summaryrefslogtreecommitdiffstats
path: root/drivers/dma
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/dma')
-rw-r--r--drivers/dma/Kconfig809
-rw-r--r--drivers/dma/Makefile90
-rw-r--r--drivers/dma/TODO12
-rw-r--r--drivers/dma/acpi-dma.c481
-rw-r--r--drivers/dma/altera-msgdma.c963
-rw-r--r--drivers/dma/amba-pl08x.c3073
-rw-r--r--drivers/dma/apple-admac.c957
-rw-r--r--drivers/dma/at_hdmac.c2156
-rw-r--r--drivers/dma/at_hdmac_regs.h478
-rw-r--r--drivers/dma/at_xdmac.c2263
-rw-r--r--drivers/dma/bcm-sba-raid.c1769
-rw-r--r--drivers/dma/bcm2835-dma.c1048
-rw-r--r--drivers/dma/bestcomm/Kconfig37
-rw-r--r--drivers/dma/bestcomm/Makefile15
-rw-r--r--drivers/dma/bestcomm/ata.c152
-rw-r--r--drivers/dma/bestcomm/bcom_ata_task.c64
-rw-r--r--drivers/dma/bestcomm/bcom_fec_rx_task.c75
-rw-r--r--drivers/dma/bestcomm/bcom_fec_tx_task.c88
-rw-r--r--drivers/dma/bestcomm/bcom_gen_bd_rx_task.c59
-rw-r--r--drivers/dma/bestcomm/bcom_gen_bd_tx_task.c65
-rw-r--r--drivers/dma/bestcomm/bestcomm.c526
-rw-r--r--drivers/dma/bestcomm/fec.c265
-rw-r--r--drivers/dma/bestcomm/gen_bd.c350
-rw-r--r--drivers/dma/bestcomm/sram.c174
-rw-r--r--drivers/dma/dma-axi-dmac.c1066
-rw-r--r--drivers/dma/dma-jz4780.c1141
-rw-r--r--drivers/dma/dmaengine.c1655
-rw-r--r--drivers/dma/dmaengine.h201
-rw-r--r--drivers/dma/dmatest.c1360
-rw-r--r--drivers/dma/dw-axi-dmac/Makefile2
-rw-r--r--drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c1578
-rw-r--r--drivers/dma/dw-axi-dmac/dw-axi-dmac.h399
-rw-r--r--drivers/dma/dw-edma/Kconfig19
-rw-r--r--drivers/dma/dw-edma/Makefile7
-rw-r--r--drivers/dma/dw-edma/dw-edma-core.c1036
-rw-r--r--drivers/dma/dw-edma/dw-edma-core.h155
-rw-r--r--drivers/dma/dw-edma/dw-edma-pcie.c364
-rw-r--r--drivers/dma/dw-edma/dw-edma-v0-core.c511
-rw-r--r--drivers/dma/dw-edma/dw-edma-v0-core.h28
-rw-r--r--drivers/dma/dw-edma/dw-edma-v0-debugfs.c314
-rw-r--r--drivers/dma/dw-edma/dw-edma-v0-debugfs.h27
-rw-r--r--drivers/dma/dw-edma/dw-edma-v0-regs.h233
-rw-r--r--drivers/dma/dw/Kconfig36
-rw-r--r--drivers/dma/dw/Makefile13
-rw-r--r--drivers/dma/dw/acpi.c55
-rw-r--r--drivers/dma/dw/core.c1320
-rw-r--r--drivers/dma/dw/dw.c137
-rw-r--r--drivers/dma/dw/idma32.c291
-rw-r--r--drivers/dma/dw/internal.h93
-rw-r--r--drivers/dma/dw/of.c122
-rw-r--r--drivers/dma/dw/pci.c147
-rw-r--r--drivers/dma/dw/platform.c220
-rw-r--r--drivers/dma/dw/regs.h409
-rw-r--r--drivers/dma/dw/rzn1-dmamux.c158
-rw-r--r--drivers/dma/ep93xx_dma.c1434
-rw-r--r--drivers/dma/fsl-dpaa2-qdma/Kconfig9
-rw-r--r--drivers/dma/fsl-dpaa2-qdma/Makefile3
-rw-r--r--drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c841
-rw-r--r--drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.h153
-rw-r--r--drivers/dma/fsl-dpaa2-qdma/dpdmai.c397
-rw-r--r--drivers/dma/fsl-dpaa2-qdma/dpdmai.h179
-rw-r--r--drivers/dma/fsl-edma-common.c748
-rw-r--r--drivers/dma/fsl-edma-common.h254
-rw-r--r--drivers/dma/fsl-edma.c519
-rw-r--r--drivers/dma/fsl-qdma.c1312
-rw-r--r--drivers/dma/fsl_raid.c897
-rw-r--r--drivers/dma/fsl_raid.h306
-rw-r--r--drivers/dma/fsldma.c1430
-rw-r--r--drivers/dma/fsldma.h265
-rw-r--r--drivers/dma/hisi_dma.c1053
-rw-r--r--drivers/dma/hsu/Kconfig10
-rw-r--r--drivers/dma/hsu/Makefile6
-rw-r--r--drivers/dma/hsu/hsu.c512
-rw-r--r--drivers/dma/hsu/hsu.h127
-rw-r--r--drivers/dma/hsu/pci.c135
-rw-r--r--drivers/dma/idma64.c704
-rw-r--r--drivers/dma/idma64.h229
-rw-r--r--drivers/dma/idxd/Makefile12
-rw-r--r--drivers/dma/idxd/bus.c91
-rw-r--r--drivers/dma/idxd/cdev.c410
-rw-r--r--drivers/dma/idxd/compat.c107
-rw-r--r--drivers/dma/idxd/device.c1507
-rw-r--r--drivers/dma/idxd/dma.c364
-rw-r--r--drivers/dma/idxd/idxd.h681
-rw-r--r--drivers/dma/idxd/init.c810
-rw-r--r--drivers/dma/idxd/irq.c497
-rw-r--r--drivers/dma/idxd/perfmon.c662
-rw-r--r--drivers/dma/idxd/perfmon.h119
-rw-r--r--drivers/dma/idxd/registers.h516
-rw-r--r--drivers/dma/idxd/submit.c217
-rw-r--r--drivers/dma/idxd/sysfs.c1818
-rw-r--r--drivers/dma/img-mdc-dma.c1090
-rw-r--r--drivers/dma/imx-dma.c1255
-rw-r--r--drivers/dma/imx-sdma.c2382
-rw-r--r--drivers/dma/ioat/Makefile3
-rw-r--r--drivers/dma/ioat/dca.c331
-rw-r--r--drivers/dma/ioat/dma.c1061
-rw-r--r--drivers/dma/ioat/dma.h406
-rw-r--r--drivers/dma/ioat/hw.h271
-rw-r--r--drivers/dma/ioat/init.c1455
-rw-r--r--drivers/dma/ioat/prep.c737
-rw-r--r--drivers/dma/ioat/registers.h258
-rw-r--r--drivers/dma/ioat/sysfs.c166
-rw-r--r--drivers/dma/iop-adma.c1554
-rw-r--r--drivers/dma/iop-adma.h914
-rw-r--r--drivers/dma/ipu/Makefile2
-rw-r--r--drivers/dma/ipu/ipu_idmac.c1802
-rw-r--r--drivers/dma/ipu/ipu_intern.h173
-rw-r--r--drivers/dma/ipu/ipu_irq.c367
-rw-r--r--drivers/dma/k3dma.c1044
-rw-r--r--drivers/dma/lgm/Kconfig10
-rw-r--r--drivers/dma/lgm/Makefile2
-rw-r--r--drivers/dma/lgm/lgm-dma.c1740
-rw-r--r--drivers/dma/lpc18xx-dmamux.c179
-rw-r--r--drivers/dma/mcf-edma.c331
-rw-r--r--drivers/dma/mediatek/Kconfig38
-rw-r--r--drivers/dma/mediatek/Makefile4
-rw-r--r--drivers/dma/mediatek/mtk-cqdma.c938
-rw-r--r--drivers/dma/mediatek/mtk-hsdma.c1056
-rw-r--r--drivers/dma/mediatek/mtk-uart-apdma.c656
-rw-r--r--drivers/dma/milbeaut-hdmac.c578
-rw-r--r--drivers/dma/milbeaut-xdmac.c416
-rw-r--r--drivers/dma/mmp_pdma.c1153
-rw-r--r--drivers/dma/mmp_tdma.c770
-rw-r--r--drivers/dma/moxart-dma.c678
-rw-r--r--drivers/dma/mpc512x_dma.c1125
-rw-r--r--drivers/dma/mv_xor.c1468
-rw-r--r--drivers/dma/mv_xor.h193
-rw-r--r--drivers/dma/mv_xor_v2.c923
-rw-r--r--drivers/dma/mxs-dma.c845
-rw-r--r--drivers/dma/nbpfaxi.c1530
-rw-r--r--drivers/dma/of-dma.c371
-rw-r--r--drivers/dma/owl-dma.c1278
-rw-r--r--drivers/dma/pch_dma.c995
-rw-r--r--drivers/dma/pl330.c3277
-rw-r--r--drivers/dma/plx_dma.c636
-rw-r--r--drivers/dma/ppc4xx/Makefile2
-rw-r--r--drivers/dma/ppc4xx/adma.c4629
-rw-r--r--drivers/dma/ppc4xx/adma.h190
-rw-r--r--drivers/dma/ppc4xx/dma.h220
-rw-r--r--drivers/dma/ppc4xx/xor.h107
-rw-r--r--drivers/dma/ptdma/Kconfig13
-rw-r--r--drivers/dma/ptdma/Makefile10
-rw-r--r--drivers/dma/ptdma/ptdma-debugfs.c106
-rw-r--r--drivers/dma/ptdma/ptdma-dev.c309
-rw-r--r--drivers/dma/ptdma/ptdma-dmaengine.c413
-rw-r--r--drivers/dma/ptdma/ptdma-pci.c243
-rw-r--r--drivers/dma/ptdma/ptdma.h337
-rw-r--r--drivers/dma/pxa_dma.c1469
-rw-r--r--drivers/dma/qcom/Kconfig54
-rw-r--r--drivers/dma/qcom/Makefile8
-rw-r--r--drivers/dma/qcom/bam_dma.c1492
-rw-r--r--drivers/dma/qcom/gpi.c2315
-rw-r--r--drivers/dma/qcom/hidma.c976
-rw-r--r--drivers/dma/qcom/hidma.h160
-rw-r--r--drivers/dma/qcom/hidma_dbg.c165
-rw-r--r--drivers/dma/qcom/hidma_ll.c855
-rw-r--r--drivers/dma/qcom/hidma_mgmt.c440
-rw-r--r--drivers/dma/qcom/hidma_mgmt.h31
-rw-r--r--drivers/dma/qcom/hidma_mgmt_sys.c285
-rw-r--r--drivers/dma/qcom/qcom_adm.c953
-rw-r--r--drivers/dma/s3c24xx-dma.c1428
-rw-r--r--drivers/dma/sa11x0-dma.c1102
-rw-r--r--drivers/dma/sf-pdma/Kconfig7
-rw-r--r--drivers/dma/sf-pdma/Makefile1
-rw-r--r--drivers/dma/sf-pdma/sf-pdma.c625
-rw-r--r--drivers/dma/sf-pdma/sf-pdma.h119
-rw-r--r--drivers/dma/sh/Kconfig58
-rw-r--r--drivers/dma/sh/Makefile18
-rw-r--r--drivers/dma/sh/rcar-dmac.c2052
-rw-r--r--drivers/dma/sh/rz-dmac.c1003
-rw-r--r--drivers/dma/sh/shdma-arm.h48
-rw-r--r--drivers/dma/sh/shdma-base.c1052
-rw-r--r--drivers/dma/sh/shdma.h61
-rw-r--r--drivers/dma/sh/shdmac.c938
-rw-r--r--drivers/dma/sh/usb-dmac.c914
-rw-r--r--drivers/dma/sprd-dma.c1309
-rw-r--r--drivers/dma/st_fdma.c877
-rw-r--r--drivers/dma/st_fdma.h245
-rw-r--r--drivers/dma/ste_dma40.c3729
-rw-r--r--drivers/dma/ste_dma40_ll.c448
-rw-r--r--drivers/dma/ste_dma40_ll.h470
-rw-r--r--drivers/dma/stm32-dma.c1794
-rw-r--r--drivers/dma/stm32-dmamux.c403
-rw-r--r--drivers/dma/stm32-mdma.c1834
-rw-r--r--drivers/dma/sun4i-dma.c1310
-rw-r--r--drivers/dma/sun6i-dma.c1503
-rw-r--r--drivers/dma/tegra186-gpc-dma.c1521
-rw-r--r--drivers/dma/tegra20-apb-dma.c1693
-rw-r--r--drivers/dma/tegra210-adma.c992
-rw-r--r--drivers/dma/ti/Kconfig62
-rw-r--r--drivers/dma/ti/Makefile14
-rw-r--r--drivers/dma/ti/cppi41.c1261
-rw-r--r--drivers/dma/ti/dma-crossbar.c477
-rw-r--r--drivers/dma/ti/edma.c2699
-rw-r--r--drivers/dma/ti/k3-psil-am62.c186
-rw-r--r--drivers/dma/ti/k3-psil-am64.c158
-rw-r--r--drivers/dma/ti/k3-psil-am654.c175
-rw-r--r--drivers/dma/ti/k3-psil-j7200.c242
-rw-r--r--drivers/dma/ti/k3-psil-j721e.c377
-rw-r--r--drivers/dma/ti/k3-psil-j721s2.c175
-rw-r--r--drivers/dma/ti/k3-psil-priv.h47
-rw-r--r--drivers/dma/ti/k3-psil.c103
-rw-r--r--drivers/dma/ti/k3-udma-glue.c1439
-rw-r--r--drivers/dma/ti/k3-udma-private.c181
-rw-r--r--drivers/dma/ti/k3-udma.c5538
-rw-r--r--drivers/dma/ti/k3-udma.h164
-rw-r--r--drivers/dma/ti/omap-dma.c1958
-rw-r--r--drivers/dma/timb_dma.c773
-rw-r--r--drivers/dma/txx9dmac.c1306
-rw-r--r--drivers/dma/txx9dmac.h304
-rw-r--r--drivers/dma/uniphier-mdmac.c501
-rw-r--r--drivers/dma/uniphier-xdmac.c611
-rw-r--r--drivers/dma/virt-dma.c142
-rw-r--r--drivers/dma/virt-dma.h227
-rw-r--r--drivers/dma/xgene-dma.c1833
-rw-r--r--drivers/dma/xilinx/Makefile4
-rw-r--r--drivers/dma/xilinx/xilinx_dma.c3229
-rw-r--r--drivers/dma/xilinx/xilinx_dpdma.c1777
-rw-r--r--drivers/dma/xilinx/zynqmp_dma.c1182
220 files changed, 155342 insertions, 0 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
new file mode 100644
index 000000000..81de833cc
--- /dev/null
+++ b/drivers/dma/Kconfig
@@ -0,0 +1,809 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# DMA engine configuration
+#
+
+menuconfig DMADEVICES
+ bool "DMA Engine support"
+ depends on HAS_DMA
+ help
+ DMA engines can do asynchronous data transfers without
+ involving the host CPU. Currently, this framework can be
+ used to offload memory copies in the network stack and
+ RAID operations in the MD driver. This menu only presents
+ DMA Device drivers supported by the configured arch, it may
+ be empty in some cases.
+
+config DMADEVICES_DEBUG
+ bool "DMA Engine debugging"
+ depends on DMADEVICES != n
+ help
+ This is an option for use by developers; most people should
+ say N here. This enables DMA engine core and driver debugging.
+
+config DMADEVICES_VDEBUG
+ bool "DMA Engine verbose debugging"
+ depends on DMADEVICES_DEBUG != n
+ help
+ This is an option for use by developers; most people should
+ say N here. This enables deeper (more verbose) debugging of
+ the DMA engine core and drivers.
+
+
+if DMADEVICES
+
+comment "DMA Devices"
+
+#core
+config ASYNC_TX_ENABLE_CHANNEL_SWITCH
+ bool
+
+config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
+ bool
+
+config DMA_ENGINE
+ bool
+
+config DMA_VIRTUAL_CHANNELS
+ tristate
+
+config DMA_ACPI
+ def_bool y
+ depends on ACPI
+
+config DMA_OF
+ def_bool y
+ depends on OF
+ select DMA_ENGINE
+
+#devices
+config ALTERA_MSGDMA
+ tristate "Altera / Intel mSGDMA Engine"
+ depends on HAS_IOMEM
+ select DMA_ENGINE
+ help
+ Enable support for Altera / Intel mSGDMA controller.
+
+config AMBA_PL08X
+ bool "ARM PrimeCell PL080 or PL081 support"
+ depends on ARM_AMBA
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Say yes if your platform has a PL08x DMAC device which can
+ provide DMA engine support. This includes the original ARM
+ PL080 and PL081, Samsungs PL080 derivative and Faraday
+ Technology's FTDMAC020 PL080 derivative.
+
+config AMCC_PPC440SPE_ADMA
+ tristate "AMCC PPC440SPe ADMA support"
+ depends on 440SPe || 440SP
+ select DMA_ENGINE
+ select DMA_ENGINE_RAID
+ select ARCH_HAS_ASYNC_TX_FIND_CHANNEL
+ select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+ help
+ Enable support for the AMCC PPC440SPe RAID engines.
+
+config APPLE_ADMAC
+ tristate "Apple ADMAC support"
+ depends on ARCH_APPLE || COMPILE_TEST
+ select DMA_ENGINE
+ default ARCH_APPLE
+ help
+ Enable support for Audio DMA Controller found on Apple Silicon SoCs.
+
+config AT_HDMAC
+ tristate "Atmel AHB DMA support"
+ depends on ARCH_AT91
+ select DMA_ENGINE
+ help
+ Support the Atmel AHB DMA controller.
+
+config AT_XDMAC
+ tristate "Atmel XDMA support"
+ depends on ARCH_AT91
+ select DMA_ENGINE
+ help
+ Support the Atmel XDMA controller.
+
+config AXI_DMAC
+ tristate "Analog Devices AXI-DMAC DMA support"
+ depends on MICROBLAZE || NIOS2 || ARCH_ZYNQ || ARCH_ZYNQMP || ARCH_INTEL_SOCFPGA || COMPILE_TEST
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ select REGMAP_MMIO
+ help
+ Enable support for the Analog Devices AXI-DMAC peripheral. This DMA
+ controller is often used in Analog Devices' reference designs for FPGA
+ platforms.
+
+config BCM_SBA_RAID
+ tristate "Broadcom SBA RAID engine support"
+ depends on ARM64 || COMPILE_TEST
+ depends on MAILBOX && RAID6_PQ
+ select DMA_ENGINE
+ select DMA_ENGINE_RAID
+ select ASYNC_TX_DISABLE_XOR_VAL_DMA
+ select ASYNC_TX_DISABLE_PQ_VAL_DMA
+ default m if ARCH_BCM_IPROC
+ help
+ Enable support for Broadcom SBA RAID Engine. The SBA RAID
+ engine is available on most of the Broadcom iProc SoCs. It
+ has the capability to offload memcpy, xor and pq computation
+ for raid5/6.
+
+config DMA_BCM2835
+ tristate "BCM2835 DMA engine support"
+ depends on ARCH_BCM2835
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+
+config DMA_JZ4780
+ tristate "JZ4780 DMA support"
+ depends on MIPS || COMPILE_TEST
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ This selects support for the DMA controller in Ingenic JZ4780 SoCs.
+ If you have a board based on such a SoC and wish to use DMA for
+ devices which can use the DMA controller, say Y or M here.
+
+config DMA_SA11X0
+ tristate "SA-11x0 DMA support"
+ depends on ARCH_SA1100 || COMPILE_TEST
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Support the DMA engine found on Intel StrongARM SA-1100 and
+ SA-1110 SoCs. This DMA engine can only be used with on-chip
+ devices.
+
+config DMA_SUN4I
+ tristate "Allwinner A10 DMA SoCs support"
+ depends on MACH_SUN4I || MACH_SUN5I || MACH_SUN7I
+ default (MACH_SUN4I || MACH_SUN5I || MACH_SUN7I)
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for the DMA controller present in the sun4i,
+ sun5i and sun7i Allwinner ARM SoCs.
+
+config DMA_SUN6I
+ tristate "Allwinner A31 SoCs DMA support"
+ depends on ARCH_SUNXI || COMPILE_TEST
+ depends on RESET_CONTROLLER
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Support for the DMA engine first found in Allwinner A31 SoCs.
+
+config DW_AXI_DMAC
+ tristate "Synopsys DesignWare AXI DMA support"
+ depends on OF
+ depends on HAS_IOMEM
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for Synopsys DesignWare AXI DMA controller.
+ NOTE: This driver wasn't tested on 64 bit platform because
+ of lack 64 bit platform with Synopsys DW AXI DMAC.
+
+config EP93XX_DMA
+ bool "Cirrus Logic EP93xx DMA support"
+ depends on ARCH_EP93XX || COMPILE_TEST
+ select DMA_ENGINE
+ help
+ Enable support for the Cirrus Logic EP93xx M2P/M2M DMA controller.
+
+config FSL_DMA
+ tristate "Freescale Elo series DMA support"
+ depends on FSL_SOC
+ select DMA_ENGINE
+ select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+ help
+ Enable support for the Freescale Elo series DMA controllers.
+ The Elo is the DMA controller on some mpc82xx and mpc83xx parts, the
+ EloPlus is on mpc85xx and mpc86xx and Pxxx parts, and the Elo3 is on
+ some Txxx and Bxxx parts.
+
+config FSL_EDMA
+ tristate "Freescale eDMA engine support"
+ depends on OF
+ depends on HAS_IOMEM
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Support the Freescale eDMA engine with programmable channel
+ multiplexing capability for DMA request sources(slot).
+ This module can be found on Freescale Vybrid and LS-1 SoCs.
+
+config FSL_QDMA
+ tristate "NXP Layerscape qDMA engine support"
+ depends on ARM || ARM64
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ select DMA_ENGINE_RAID
+ select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+ help
+ Support the NXP Layerscape qDMA engine with command queue and legacy mode.
+ Channel virtualization is supported through enqueuing of DMA jobs to,
+ or dequeuing DMA jobs from, different work queues.
+ This module can be found on NXP Layerscape SoCs.
+ The qdma driver only work on SoCs with a DPAA hardware block.
+
+config FSL_RAID
+ tristate "Freescale RAID engine Support"
+ depends on FSL_SOC && !ASYNC_TX_ENABLE_CHANNEL_SWITCH
+ select DMA_ENGINE
+ select DMA_ENGINE_RAID
+ help
+ Enable support for Freescale RAID Engine. RAID Engine is
+ available on some QorIQ SoCs (like P5020/P5040). It has
+ the capability to offload memcpy, xor and pq computation
+ for raid5/6.
+
+config HISI_DMA
+ tristate "HiSilicon DMA Engine support"
+ depends on ARCH_HISI || COMPILE_TEST
+ depends on PCI_MSI
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Support HiSilicon Kunpeng DMA engine.
+
+config IMG_MDC_DMA
+ tristate "IMG MDC support"
+ depends on MIPS || COMPILE_TEST
+ depends on MFD_SYSCON
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for the IMG multi-threaded DMA controller (MDC).
+
+config IMX_DMA
+ tristate "i.MX DMA support"
+ depends on ARCH_MXC
+ select DMA_ENGINE
+ help
+ Support the i.MX DMA engine. This engine is integrated into
+ Freescale i.MX1/21/27 chips.
+
+config IMX_SDMA
+ tristate "i.MX SDMA support"
+ depends on ARCH_MXC
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Support the i.MX SDMA engine. This engine is integrated into
+ Freescale i.MX25/31/35/51/53/6 chips.
+
+config INTEL_IDMA64
+ tristate "Intel integrated DMA 64-bit support"
+ depends on HAS_IOMEM
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable DMA support for Intel Low Power Subsystem such as found on
+ Intel Skylake PCH.
+
+config INTEL_IDXD_BUS
+ tristate
+ default INTEL_IDXD
+
+config INTEL_IDXD
+ tristate "Intel Data Accelerators support"
+ depends on PCI && X86_64 && !UML
+ depends on PCI_MSI
+ depends on PCI_PASID
+ depends on SBITMAP
+ select DMA_ENGINE
+ help
+ Enable support for the Intel(R) data accelerators present
+ in Intel Xeon CPU.
+
+ Say Y if you have such a platform.
+
+ If unsure, say N.
+
+config INTEL_IDXD_COMPAT
+ bool "Legacy behavior for idxd driver"
+ depends on PCI && X86_64
+ select INTEL_IDXD_BUS
+ help
+ Compatible driver to support old /sys/bus/dsa/drivers/dsa behavior.
+ The old behavior performed driver bind/unbind for device and wq
+ devices all under the dsa driver. The compat driver will emulate
+ the legacy behavior in order to allow existing support apps (i.e.
+ accel-config) to continue function. It is expected that accel-config
+ v3.2 and earlier will need the compat mode. A distro with later
+ accel-config version can disable this compat config.
+
+ Say Y if you have old applications that require such behavior.
+
+ If unsure, say N.
+
+# Config symbol that collects all the dependencies that's necessary to
+# support shared virtual memory for the devices supported by idxd.
+config INTEL_IDXD_SVM
+ bool "Accelerator Shared Virtual Memory Support"
+ depends on INTEL_IDXD
+ depends on INTEL_IOMMU_SVM
+ depends on PCI_PRI
+ depends on PCI_PASID
+ depends on PCI_IOV
+
+config INTEL_IDXD_PERFMON
+ bool "Intel Data Accelerators performance monitor support"
+ depends on INTEL_IDXD
+ help
+ Enable performance monitor (pmu) support for the Intel(R)
+ data accelerators present in Intel Xeon CPU. With this
+ enabled, perf can be used to monitor the DSA (Intel Data
+ Streaming Accelerator) events described in the Intel DSA
+ spec.
+
+ If unsure, say N.
+
+config INTEL_IOATDMA
+ tristate "Intel I/OAT DMA support"
+ depends on PCI && X86_64 && !UML
+ select DMA_ENGINE
+ select DMA_ENGINE_RAID
+ select DCA
+ help
+ Enable support for the Intel(R) I/OAT DMA engine present
+ in recent Intel Xeon chipsets.
+
+ Say Y here if you have such a chipset.
+
+ If unsure, say N.
+
+config INTEL_IOP_ADMA
+ tristate "Intel IOP32x ADMA support"
+ depends on ARCH_IOP32X || COMPILE_TEST
+ select DMA_ENGINE
+ select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+ help
+ Enable support for the Intel(R) IOP Series RAID engines.
+
+config K3_DMA
+ tristate "Hisilicon K3 DMA support"
+ depends on ARCH_HI3xxx || ARCH_HISI || COMPILE_TEST
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Support the DMA engine for Hisilicon K3 platform
+ devices.
+
+config LPC18XX_DMAMUX
+ bool "NXP LPC18xx/43xx DMA MUX for PL080"
+ depends on ARCH_LPC18XX || COMPILE_TEST
+ depends on OF && AMBA_PL08X
+ select MFD_SYSCON
+ help
+ Enable support for DMA on NXP LPC18xx/43xx platforms
+ with PL080 and multiplexed DMA request lines.
+
+config MCF_EDMA
+ tristate "Freescale eDMA engine support, ColdFire mcf5441x SoCs"
+ depends on M5441x || COMPILE_TEST
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Support the Freescale ColdFire eDMA engine, 64-channel
+ implementation that performs complex data transfers with
+ minimal intervention from a host processor.
+ This module can be found on Freescale ColdFire mcf5441x SoCs.
+
+config MILBEAUT_HDMAC
+ tristate "Milbeaut AHB DMA support"
+ depends on ARCH_MILBEAUT || COMPILE_TEST
+ depends on OF
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Say yes here to support the Socionext Milbeaut
+ HDMAC device.
+
+config MILBEAUT_XDMAC
+ tristate "Milbeaut AXI DMA support"
+ depends on ARCH_MILBEAUT || COMPILE_TEST
+ depends on OF
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Say yes here to support the Socionext Milbeaut
+ XDMAC device.
+
+config MMP_PDMA
+ tristate "MMP PDMA support"
+ depends on ARCH_MMP || ARCH_PXA || COMPILE_TEST
+ select DMA_ENGINE
+ help
+ Support the MMP PDMA engine for PXA and MMP platform.
+
+config MMP_TDMA
+ tristate "MMP Two-Channel DMA support"
+ depends on ARCH_MMP || COMPILE_TEST
+ select DMA_ENGINE
+ select GENERIC_ALLOCATOR
+ help
+ Support the MMP Two-Channel DMA engine.
+ This engine used for MMP Audio DMA and pxa910 SQU.
+
+config MOXART_DMA
+ tristate "MOXART DMA support"
+ depends on ARCH_MOXART
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for the MOXA ART SoC DMA controller.
+
+ Say Y here if you enabled MMP ADMA, otherwise say N.
+
+config MPC512X_DMA
+ tristate "Freescale MPC512x built-in DMA engine support"
+ depends on PPC_MPC512x || PPC_MPC831x
+ select DMA_ENGINE
+ help
+ Enable support for the Freescale MPC512x built-in DMA engine.
+
+config MV_XOR
+ bool "Marvell XOR engine support"
+ depends on PLAT_ORION || ARCH_MVEBU || COMPILE_TEST
+ select DMA_ENGINE
+ select DMA_ENGINE_RAID
+ select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+ help
+ Enable support for the Marvell XOR engine.
+
+config MV_XOR_V2
+ bool "Marvell XOR engine version 2 support "
+ depends on ARM64
+ select DMA_ENGINE
+ select DMA_ENGINE_RAID
+ select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+ select GENERIC_MSI_IRQ_DOMAIN
+ help
+ Enable support for the Marvell version 2 XOR engine.
+
+ This engine provides acceleration for copy, XOR and RAID6
+ operations, and is available on Marvell Armada 7K and 8K
+ platforms.
+
+config MXS_DMA
+ bool "MXS DMA support"
+ depends on ARCH_MXS || ARCH_MXC || COMPILE_TEST
+ select STMP_DEVICE
+ select DMA_ENGINE
+ help
+ Support the MXS DMA engine. This engine including APBH-DMA
+ and APBX-DMA is integrated into some Freescale chips.
+
+config MX3_IPU
+ bool "MX3x Image Processing Unit support"
+ depends on ARCH_MXC
+ select DMA_ENGINE
+ default y
+ help
+ If you plan to use the Image Processing unit in the i.MX3x, say
+ Y here. If unsure, select Y.
+
+config MX3_IPU_IRQS
+ int "Number of dynamically mapped interrupts for IPU"
+ depends on MX3_IPU
+ range 2 137
+ default 4
+ help
+ Out of 137 interrupt sources on i.MX31 IPU only very few are used.
+ To avoid bloating the irq_desc[] array we allocate a sufficient
+ number of IRQ slots and map them dynamically to specific sources.
+
+config NBPFAXI_DMA
+ tristate "Renesas Type-AXI NBPF DMA support"
+ select DMA_ENGINE
+ depends on ARM || COMPILE_TEST
+ help
+ Support for "Type-AXI" NBPF DMA IPs from Renesas
+
+config OWL_DMA
+ tristate "Actions Semi Owl SoCs DMA support"
+ depends on ARCH_ACTIONS
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for the Actions Semi Owl SoCs DMA controller.
+
+config PCH_DMA
+ tristate "Intel EG20T PCH / LAPIS Semicon IOH(ML7213/ML7223/ML7831) DMA"
+ depends on PCI && (X86_32 || COMPILE_TEST)
+ select DMA_ENGINE
+ help
+ Enable support for Intel EG20T PCH DMA engine.
+
+ This driver also can be used for LAPIS Semiconductor IOH(Input/
+ Output Hub), ML7213, ML7223 and ML7831.
+ ML7213 IOH is for IVI(In-Vehicle Infotainment) use, ML7223 IOH is
+ for MP(Media Phone) use and ML7831 IOH is for general purpose use.
+ ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series.
+ ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH.
+
+config PL330_DMA
+ tristate "DMA API Driver for PL330"
+ select DMA_ENGINE
+ depends on ARM_AMBA
+ help
+ Select if your platform has one or more PL330 DMACs.
+ You need to provide platform specific settings via
+ platform_data for a dma-pl330 device.
+
+config PXA_DMA
+ bool "PXA DMA support"
+ depends on (ARCH_MMP || ARCH_PXA)
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Support the DMA engine for PXA. It is also compatible with MMP PDMA
+ platform. The internal DMA IP of all PXA variants is supported, with
+ 16 to 32 channels for peripheral to memory or memory to memory
+ transfers.
+
+config PLX_DMA
+ tristate "PLX ExpressLane PEX Switch DMA Engine Support"
+ depends on PCI
+ select DMA_ENGINE
+ help
+ Some PLX ExpressLane PCI Switches support additional DMA engines.
+ These are exposed via extra functions on the switch's
+ upstream port. Each function exposes one DMA channel.
+
+config STE_DMA40
+ bool "ST-Ericsson DMA40 support"
+ depends on ARCH_U8500
+ select DMA_ENGINE
+ help
+ Support for ST-Ericsson DMA40 controller
+
+config ST_FDMA
+ tristate "ST FDMA dmaengine support"
+ depends on ARCH_STI
+ depends on REMOTEPROC
+ select ST_SLIM_REMOTEPROC
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for ST FDMA controller.
+ It supports 16 independent DMA channels, accepts up to 32 DMA requests
+
+ Say Y here if you have such a chipset.
+ If unsure, say N.
+
+config STM32_DMA
+ bool "STMicroelectronics STM32 DMA support"
+ depends on ARCH_STM32 || COMPILE_TEST
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for the on-chip DMA controller on STMicroelectronics
+ STM32 MCUs.
+ If you have a board based on such a MCU and wish to use DMA say Y
+ here.
+
+config STM32_DMAMUX
+ bool "STMicroelectronics STM32 dma multiplexer support"
+ depends on STM32_DMA || COMPILE_TEST
+ help
+ Enable support for the on-chip DMA multiplexer on STMicroelectronics
+ STM32 MCUs.
+ If you have a board based on such a MCU and wish to use DMAMUX say Y
+ here.
+
+config STM32_MDMA
+ bool "STMicroelectronics STM32 master dma support"
+ depends on ARCH_STM32 || COMPILE_TEST
+ depends on OF
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for the on-chip MDMA controller on STMicroelectronics
+ STM32 platforms.
+ If you have a board based on STM32 SoC and wish to use the master DMA
+ say Y here.
+
+config SPRD_DMA
+ tristate "Spreadtrum DMA support"
+ depends on ARCH_SPRD || COMPILE_TEST
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for the on-chip DMA controller on Spreadtrum platform.
+
+config S3C24XX_DMAC
+ bool "Samsung S3C24XX DMA support"
+ depends on ARCH_S3C24XX || COMPILE_TEST
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Support for the Samsung S3C24XX DMA controller driver. The
+ DMA controller is having multiple DMA channels which can be
+ configured for different peripherals like audio, UART, SPI.
+ The DMA controller can transfer data from memory to peripheral,
+ periphal to memory, periphal to periphal and memory to memory.
+
+config TXX9_DMAC
+ tristate "Toshiba TXx9 SoC DMA support"
+ depends on MACH_TX49XX
+ select DMA_ENGINE
+ help
+ Support the TXx9 SoC internal DMA controller. This can be
+ integrated in chips such as the Toshiba TX4927/38/39.
+
+config TEGRA186_GPC_DMA
+ tristate "NVIDIA Tegra GPC DMA support"
+ depends on (ARCH_TEGRA || COMPILE_TEST) && ARCH_DMA_ADDR_T_64BIT
+ depends on IOMMU_API
+ select DMA_ENGINE
+ help
+ Support for the NVIDIA Tegra General Purpose Central DMA controller.
+ The DMA controller has multiple DMA channels which can be configured
+ for different peripherals like UART, SPI, etc which are on APB bus.
+ This DMA controller transfers data from memory to peripheral FIFO
+ or vice versa. It also supports memory to memory data transfer.
+
+config TEGRA20_APB_DMA
+ tristate "NVIDIA Tegra20 APB DMA support"
+ depends on ARCH_TEGRA || COMPILE_TEST
+ select DMA_ENGINE
+ help
+ Support for the NVIDIA Tegra20 APB DMA controller driver. The
+ DMA controller is having multiple DMA channel which can be
+ configured for different peripherals like audio, UART, SPI,
+ I2C etc which is in APB bus.
+ This DMA controller transfers data from memory to peripheral fifo
+ or vice versa. It does not support memory to memory data transfer.
+
+config TEGRA210_ADMA
+ tristate "NVIDIA Tegra210 ADMA support"
+ depends on (ARCH_TEGRA_210_SOC || COMPILE_TEST)
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Support for the NVIDIA Tegra210 ADMA controller driver. The
+ DMA controller has multiple DMA channels and is used to service
+ various audio clients in the Tegra210 audio processing engine
+ (APE). This DMA controller transfers data from memory to
+ peripheral and vice versa. It does not support memory to
+ memory data transfer.
+
+config TIMB_DMA
+ tristate "Timberdale FPGA DMA support"
+ depends on MFD_TIMBERDALE || COMPILE_TEST
+ select DMA_ENGINE
+ help
+ Enable support for the Timberdale FPGA DMA engine.
+
+config UNIPHIER_MDMAC
+ tristate "UniPhier MIO DMAC"
+ depends on ARCH_UNIPHIER || COMPILE_TEST
+ depends on OF
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for the MIO DMAC (Media I/O DMA controller) on the
+ UniPhier platform. This DMA controller is used as the external
+ DMA engine of the SD/eMMC controllers of the LD4, Pro4, sLD8 SoCs.
+
+config UNIPHIER_XDMAC
+ tristate "UniPhier XDMAC support"
+ depends on ARCH_UNIPHIER || COMPILE_TEST
+ depends on OF
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for the XDMAC (external DMA controller) on the
+ UniPhier platform. This DMA controller can transfer data from
+ memory to memory, memory to peripheral and peripheral to memory.
+
+config XGENE_DMA
+ tristate "APM X-Gene DMA support"
+ depends on ARCH_XGENE || COMPILE_TEST
+ select DMA_ENGINE
+ select DMA_ENGINE_RAID
+ select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+ help
+ Enable support for the APM X-Gene SoC DMA engine.
+
+config XILINX_DMA
+ tristate "Xilinx AXI DMAS Engine"
+ depends on (ARCH_ZYNQ || MICROBLAZE || ARM64)
+ select DMA_ENGINE
+ help
+ Enable support for Xilinx AXI VDMA Soft IP.
+
+ AXI VDMA engine provides high-bandwidth direct memory access
+ between memory and AXI4-Stream video type target
+ peripherals including peripherals which support AXI4-
+ Stream Video Protocol. It has two stream interfaces/
+ channels, Memory Mapped to Stream (MM2S) and Stream to
+ Memory Mapped (S2MM) for the data transfers.
+ AXI CDMA engine provides high-bandwidth direct memory access
+ between a memory-mapped source address and a memory-mapped
+ destination address.
+ AXI DMA engine provides high-bandwidth one dimensional direct
+ memory access between memory and AXI4-Stream target peripherals.
+ AXI MCDMA engine provides high-bandwidth direct memory access
+ between memory and AXI4-Stream target peripherals. It provides
+ the scatter gather interface with multiple channels independent
+ configuration support.
+
+config XILINX_ZYNQMP_DMA
+ tristate "Xilinx ZynqMP DMA Engine"
+ depends on ARCH_ZYNQ || MICROBLAZE || ARM64 || COMPILE_TEST
+ select DMA_ENGINE
+ help
+ Enable support for Xilinx ZynqMP DMA controller.
+
+config XILINX_ZYNQMP_DPDMA
+ tristate "Xilinx DPDMA Engine"
+ depends on HAS_IOMEM && OF
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for Xilinx ZynqMP DisplayPort DMA. Choose this option
+ if you have a Xilinx ZynqMP SoC with a DisplayPort subsystem. The
+ driver provides the dmaengine required by the DisplayPort subsystem
+ display driver.
+
+# driver files
+source "drivers/dma/bestcomm/Kconfig"
+
+source "drivers/dma/mediatek/Kconfig"
+
+source "drivers/dma/ptdma/Kconfig"
+
+source "drivers/dma/qcom/Kconfig"
+
+source "drivers/dma/dw/Kconfig"
+
+source "drivers/dma/dw-edma/Kconfig"
+
+source "drivers/dma/hsu/Kconfig"
+
+source "drivers/dma/sf-pdma/Kconfig"
+
+source "drivers/dma/sh/Kconfig"
+
+source "drivers/dma/ti/Kconfig"
+
+source "drivers/dma/fsl-dpaa2-qdma/Kconfig"
+
+source "drivers/dma/lgm/Kconfig"
+
+# clients
+comment "DMA Clients"
+ depends on DMA_ENGINE
+
+config ASYNC_TX_DMA
+ bool "Async_tx: Offload support for the async_tx api"
+ depends on DMA_ENGINE
+ help
+ This allows the async_tx api to take advantage of offload engines for
+ memcpy, memset, xor, and raid6 p+q operations. If your platform has
+ a dma engine that can perform raid operations and you have enabled
+ MD_RAID456 say Y.
+
+ If unsure, say N.
+
+config DMATEST
+ tristate "DMA Test client"
+ depends on DMA_ENGINE
+ select DMA_ENGINE_RAID
+ help
+ Simple DMA test client. Say N unless you're debugging a
+ DMA Device driver.
+
+config DMA_ENGINE_RAID
+ bool
+
+endif
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
new file mode 100644
index 000000000..10f7d4241
--- /dev/null
+++ b/drivers/dma/Makefile
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: GPL-2.0
+#dmaengine debug flags
+subdir-ccflags-$(CONFIG_DMADEVICES_DEBUG) := -DDEBUG
+subdir-ccflags-$(CONFIG_DMADEVICES_VDEBUG) += -DVERBOSE_DEBUG
+
+#core
+obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
+obj-$(CONFIG_DMA_VIRTUAL_CHANNELS) += virt-dma.o
+obj-$(CONFIG_DMA_ACPI) += acpi-dma.o
+obj-$(CONFIG_DMA_OF) += of-dma.o
+
+#dmatest
+obj-$(CONFIG_DMATEST) += dmatest.o
+
+#devices
+obj-$(CONFIG_ALTERA_MSGDMA) += altera-msgdma.o
+obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
+obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
+obj-$(CONFIG_AMD_PTDMA) += ptdma/
+obj-$(CONFIG_APPLE_ADMAC) += apple-admac.o
+obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
+obj-$(CONFIG_AT_XDMAC) += at_xdmac.o
+obj-$(CONFIG_AXI_DMAC) += dma-axi-dmac.o
+obj-$(CONFIG_BCM_SBA_RAID) += bcm-sba-raid.o
+obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o
+obj-$(CONFIG_DMA_JZ4780) += dma-jz4780.o
+obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
+obj-$(CONFIG_DMA_SUN4I) += sun4i-dma.o
+obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o
+obj-$(CONFIG_DW_AXI_DMAC) += dw-axi-dmac/
+obj-$(CONFIG_DW_DMAC_CORE) += dw/
+obj-$(CONFIG_DW_EDMA) += dw-edma/
+obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o
+obj-$(CONFIG_FSL_DMA) += fsldma.o
+obj-$(CONFIG_FSL_EDMA) += fsl-edma.o fsl-edma-common.o
+obj-$(CONFIG_MCF_EDMA) += mcf-edma.o fsl-edma-common.o
+obj-$(CONFIG_FSL_QDMA) += fsl-qdma.o
+obj-$(CONFIG_FSL_RAID) += fsl_raid.o
+obj-$(CONFIG_HISI_DMA) += hisi_dma.o
+obj-$(CONFIG_HSU_DMA) += hsu/
+obj-$(CONFIG_IMG_MDC_DMA) += img-mdc-dma.o
+obj-$(CONFIG_IMX_DMA) += imx-dma.o
+obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
+obj-$(CONFIG_INTEL_IDMA64) += idma64.o
+obj-$(CONFIG_INTEL_IOATDMA) += ioat/
+obj-y += idxd/
+obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
+obj-$(CONFIG_K3_DMA) += k3dma.o
+obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o
+obj-$(CONFIG_MILBEAUT_HDMAC) += milbeaut-hdmac.o
+obj-$(CONFIG_MILBEAUT_XDMAC) += milbeaut-xdmac.o
+obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
+obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o
+obj-$(CONFIG_MOXART_DMA) += moxart-dma.o
+obj-$(CONFIG_MPC512X_DMA) += mpc512x_dma.o
+obj-$(CONFIG_MV_XOR) += mv_xor.o
+obj-$(CONFIG_MV_XOR_V2) += mv_xor_v2.o
+obj-$(CONFIG_MXS_DMA) += mxs-dma.o
+obj-$(CONFIG_MX3_IPU) += ipu/
+obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o
+obj-$(CONFIG_OWL_DMA) += owl-dma.o
+obj-$(CONFIG_PCH_DMA) += pch_dma.o
+obj-$(CONFIG_PL330_DMA) += pl330.o
+obj-$(CONFIG_PLX_DMA) += plx_dma.o
+obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/
+obj-$(CONFIG_PXA_DMA) += pxa_dma.o
+obj-$(CONFIG_RENESAS_DMA) += sh/
+obj-$(CONFIG_SF_PDMA) += sf-pdma/
+obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
+obj-$(CONFIG_STM32_DMA) += stm32-dma.o
+obj-$(CONFIG_STM32_DMAMUX) += stm32-dmamux.o
+obj-$(CONFIG_STM32_MDMA) += stm32-mdma.o
+obj-$(CONFIG_SPRD_DMA) += sprd-dma.o
+obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o
+obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
+obj-$(CONFIG_TEGRA186_GPC_DMA) += tegra186-gpc-dma.o
+obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o
+obj-$(CONFIG_TEGRA210_ADMA) += tegra210-adma.o
+obj-$(CONFIG_TIMB_DMA) += timb_dma.o
+obj-$(CONFIG_UNIPHIER_MDMAC) += uniphier-mdmac.o
+obj-$(CONFIG_UNIPHIER_XDMAC) += uniphier-xdmac.o
+obj-$(CONFIG_XGENE_DMA) += xgene-dma.o
+obj-$(CONFIG_ST_FDMA) += st_fdma.o
+obj-$(CONFIG_FSL_DPAA2_QDMA) += fsl-dpaa2-qdma/
+obj-$(CONFIG_INTEL_LDMA) += lgm/
+
+obj-y += mediatek/
+obj-y += qcom/
+obj-y += ti/
+obj-y += xilinx/
diff --git a/drivers/dma/TODO b/drivers/dma/TODO
new file mode 100644
index 000000000..b8045cd42
--- /dev/null
+++ b/drivers/dma/TODO
@@ -0,0 +1,12 @@
+TODO for slave dma
+
+1. Move remaining drivers to use new slave interface
+2. Remove old slave pointer machansim
+3. Make issue_pending to start the transaction in below drivers
+ - mpc512x_dma
+ - imx-dma
+ - imx-sdma
+ - mxs-dma.c
+ - intel_mid_dma
+4. Check other subsystems for dma drivers and merge/move to dmaengine
+5. Remove dma_slave_config's dma direction.
diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c
new file mode 100644
index 000000000..5906eae26
--- /dev/null
+++ b/drivers/dma/acpi-dma.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ACPI helpers for DMA request / controller
+ *
+ * Based on of-dma.c
+ *
+ * Copyright (C) 2013, Intel Corporation
+ * Authors: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ * Mika Westerberg <mika.westerberg@linux.intel.com>
+ */
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/acpi.h>
+#include <linux/acpi_dma.h>
+#include <linux/property.h>
+
+static LIST_HEAD(acpi_dma_list);
+static DEFINE_MUTEX(acpi_dma_lock);
+
+/**
+ * acpi_dma_parse_resource_group - match device and parse resource group
+ * @grp: CSRT resource group
+ * @adev: ACPI device to match with
+ * @adma: struct acpi_dma of the given DMA controller
+ *
+ * In order to match a device from DSDT table to the corresponding CSRT device
+ * we use MMIO address and IRQ.
+ *
+ * Return:
+ * 1 on success, 0 when no information is available, or appropriate errno value
+ * on error.
+ */
+static int acpi_dma_parse_resource_group(const struct acpi_csrt_group *grp,
+ struct acpi_device *adev, struct acpi_dma *adma)
+{
+ const struct acpi_csrt_shared_info *si;
+ struct list_head resource_list;
+ struct resource_entry *rentry;
+ resource_size_t mem = 0, irq = 0;
+ int ret;
+
+ if (grp->shared_info_length != sizeof(struct acpi_csrt_shared_info))
+ return -ENODEV;
+
+ INIT_LIST_HEAD(&resource_list);
+ ret = acpi_dev_get_resources(adev, &resource_list, NULL, NULL);
+ if (ret <= 0)
+ return 0;
+
+ list_for_each_entry(rentry, &resource_list, node) {
+ if (resource_type(rentry->res) == IORESOURCE_MEM)
+ mem = rentry->res->start;
+ else if (resource_type(rentry->res) == IORESOURCE_IRQ)
+ irq = rentry->res->start;
+ }
+
+ acpi_dev_free_resource_list(&resource_list);
+
+ /* Consider initial zero values as resource not found */
+ if (mem == 0 && irq == 0)
+ return 0;
+
+ si = (const struct acpi_csrt_shared_info *)&grp[1];
+
+ /* Match device by MMIO */
+ if (si->mmio_base_low != lower_32_bits(mem) ||
+ si->mmio_base_high != upper_32_bits(mem))
+ return 0;
+
+ /*
+ * acpi_gsi_to_irq() can't be used because some platforms do not save
+ * registered IRQs in the MP table. Instead we just try to register
+ * the GSI, which is the core part of the above mentioned function.
+ */
+ ret = acpi_register_gsi(NULL, si->gsi_interrupt, si->interrupt_mode, si->interrupt_polarity);
+ if (ret < 0)
+ return 0;
+
+ /* Match device by Linux vIRQ */
+ if (ret != irq)
+ return 0;
+
+ dev_dbg(&adev->dev, "matches with %.4s%04X (rev %u)\n",
+ (char *)&grp->vendor_id, grp->device_id, grp->revision);
+
+ /* Check if the request line range is available */
+ if (si->base_request_line == 0 && si->num_handshake_signals == 0)
+ return 0;
+
+ /* Set up DMA mask based on value from CSRT */
+ ret = dma_coerce_mask_and_coherent(&adev->dev,
+ DMA_BIT_MASK(si->dma_address_width));
+ if (ret)
+ return 0;
+
+ adma->base_request_line = si->base_request_line;
+ adma->end_request_line = si->base_request_line +
+ si->num_handshake_signals - 1;
+
+ dev_dbg(&adev->dev, "request line base: 0x%04x end: 0x%04x\n",
+ adma->base_request_line, adma->end_request_line);
+
+ return 1;
+}
+
+/**
+ * acpi_dma_parse_csrt - parse CSRT to exctract additional DMA resources
+ * @adev: ACPI device to match with
+ * @adma: struct acpi_dma of the given DMA controller
+ *
+ * CSRT or Core System Resources Table is a proprietary ACPI table
+ * introduced by Microsoft. This table can contain devices that are not in
+ * the system DSDT table. In particular DMA controllers might be described
+ * here.
+ *
+ * We are using this table to get the request line range of the specific DMA
+ * controller to be used later.
+ */
+static void acpi_dma_parse_csrt(struct acpi_device *adev, struct acpi_dma *adma)
+{
+ struct acpi_csrt_group *grp, *end;
+ struct acpi_table_csrt *csrt;
+ acpi_status status;
+ int ret;
+
+ status = acpi_get_table(ACPI_SIG_CSRT, 0,
+ (struct acpi_table_header **)&csrt);
+ if (ACPI_FAILURE(status)) {
+ if (status != AE_NOT_FOUND)
+ dev_warn(&adev->dev, "failed to get the CSRT table\n");
+ return;
+ }
+
+ grp = (struct acpi_csrt_group *)(csrt + 1);
+ end = (struct acpi_csrt_group *)((void *)csrt + csrt->header.length);
+
+ while (grp < end) {
+ ret = acpi_dma_parse_resource_group(grp, adev, adma);
+ if (ret < 0) {
+ dev_warn(&adev->dev,
+ "error in parsing resource group\n");
+ break;
+ }
+
+ grp = (struct acpi_csrt_group *)((void *)grp + grp->length);
+ }
+
+ acpi_put_table((struct acpi_table_header *)csrt);
+}
+
+/**
+ * acpi_dma_controller_register - Register a DMA controller to ACPI DMA helpers
+ * @dev: struct device of DMA controller
+ * @acpi_dma_xlate: translation function which converts a dma specifier
+ * into a dma_chan structure
+ * @data: pointer to controller specific data to be used by
+ * translation function
+ *
+ * Allocated memory should be freed with appropriate acpi_dma_controller_free()
+ * call.
+ *
+ * Return:
+ * 0 on success or appropriate errno value on error.
+ */
+int acpi_dma_controller_register(struct device *dev,
+ struct dma_chan *(*acpi_dma_xlate)
+ (struct acpi_dma_spec *, struct acpi_dma *),
+ void *data)
+{
+ struct acpi_device *adev;
+ struct acpi_dma *adma;
+
+ if (!dev || !acpi_dma_xlate)
+ return -EINVAL;
+
+ /* Check if the device was enumerated by ACPI */
+ adev = ACPI_COMPANION(dev);
+ if (!adev)
+ return -EINVAL;
+
+ adma = kzalloc(sizeof(*adma), GFP_KERNEL);
+ if (!adma)
+ return -ENOMEM;
+
+ adma->dev = dev;
+ adma->acpi_dma_xlate = acpi_dma_xlate;
+ adma->data = data;
+
+ acpi_dma_parse_csrt(adev, adma);
+
+ /* Now queue acpi_dma controller structure in list */
+ mutex_lock(&acpi_dma_lock);
+ list_add_tail(&adma->dma_controllers, &acpi_dma_list);
+ mutex_unlock(&acpi_dma_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(acpi_dma_controller_register);
+
+/**
+ * acpi_dma_controller_free - Remove a DMA controller from ACPI DMA helpers list
+ * @dev: struct device of DMA controller
+ *
+ * Memory allocated by acpi_dma_controller_register() is freed here.
+ *
+ * Return:
+ * 0 on success or appropriate errno value on error.
+ */
+int acpi_dma_controller_free(struct device *dev)
+{
+ struct acpi_dma *adma;
+
+ if (!dev)
+ return -EINVAL;
+
+ mutex_lock(&acpi_dma_lock);
+
+ list_for_each_entry(adma, &acpi_dma_list, dma_controllers)
+ if (adma->dev == dev) {
+ list_del(&adma->dma_controllers);
+ mutex_unlock(&acpi_dma_lock);
+ kfree(adma);
+ return 0;
+ }
+
+ mutex_unlock(&acpi_dma_lock);
+ return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(acpi_dma_controller_free);
+
+static void devm_acpi_dma_release(struct device *dev, void *res)
+{
+ acpi_dma_controller_free(dev);
+}
+
+/**
+ * devm_acpi_dma_controller_register - resource managed acpi_dma_controller_register()
+ * @dev: device that is registering this DMA controller
+ * @acpi_dma_xlate: translation function
+ * @data: pointer to controller specific data
+ *
+ * Managed acpi_dma_controller_register(). DMA controller registered by this
+ * function are automatically freed on driver detach. See
+ * acpi_dma_controller_register() for more information.
+ *
+ * Return:
+ * 0 on success or appropriate errno value on error.
+ */
+int devm_acpi_dma_controller_register(struct device *dev,
+ struct dma_chan *(*acpi_dma_xlate)
+ (struct acpi_dma_spec *, struct acpi_dma *),
+ void *data)
+{
+ void *res;
+ int ret;
+
+ res = devres_alloc(devm_acpi_dma_release, 0, GFP_KERNEL);
+ if (!res)
+ return -ENOMEM;
+
+ ret = acpi_dma_controller_register(dev, acpi_dma_xlate, data);
+ if (ret) {
+ devres_free(res);
+ return ret;
+ }
+ devres_add(dev, res);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devm_acpi_dma_controller_register);
+
+/**
+ * devm_acpi_dma_controller_free - resource managed acpi_dma_controller_free()
+ * @dev: device that is unregistering as DMA controller
+ *
+ * Unregister a DMA controller registered with
+ * devm_acpi_dma_controller_register(). Normally this function will not need to
+ * be called and the resource management code will ensure that the resource is
+ * freed.
+ */
+void devm_acpi_dma_controller_free(struct device *dev)
+{
+ WARN_ON(devres_release(dev, devm_acpi_dma_release, NULL, NULL));
+}
+EXPORT_SYMBOL_GPL(devm_acpi_dma_controller_free);
+
+/**
+ * acpi_dma_update_dma_spec - prepare dma specifier to pass to translation function
+ * @adma: struct acpi_dma of DMA controller
+ * @dma_spec: dma specifier to update
+ *
+ * Accordingly to ACPI 5.0 Specification Table 6-170 "Fixed DMA Resource
+ * Descriptor":
+ * DMA Request Line bits is a platform-relative number uniquely
+ * identifying the request line assigned. Request line-to-Controller
+ * mapping is done in a controller-specific OS driver.
+ * That's why we can safely adjust slave_id when the appropriate controller is
+ * found.
+ *
+ * Return:
+ * 0, if no information is avaiable, -1 on mismatch, and 1 otherwise.
+ */
+static int acpi_dma_update_dma_spec(struct acpi_dma *adma,
+ struct acpi_dma_spec *dma_spec)
+{
+ /* Set link to the DMA controller device */
+ dma_spec->dev = adma->dev;
+
+ /* Check if the request line range is available */
+ if (adma->base_request_line == 0 && adma->end_request_line == 0)
+ return 0;
+
+ /* Check if slave_id falls to the range */
+ if (dma_spec->slave_id < adma->base_request_line ||
+ dma_spec->slave_id > adma->end_request_line)
+ return -1;
+
+ /*
+ * Here we adjust slave_id. It should be a relative number to the base
+ * request line.
+ */
+ dma_spec->slave_id -= adma->base_request_line;
+
+ return 1;
+}
+
+struct acpi_dma_parser_data {
+ struct acpi_dma_spec dma_spec;
+ size_t index;
+ size_t n;
+};
+
+/**
+ * acpi_dma_parse_fixed_dma - Parse FixedDMA ACPI resources to a DMA specifier
+ * @res: struct acpi_resource to get FixedDMA resources from
+ * @data: pointer to a helper struct acpi_dma_parser_data
+ */
+static int acpi_dma_parse_fixed_dma(struct acpi_resource *res, void *data)
+{
+ struct acpi_dma_parser_data *pdata = data;
+
+ if (res->type == ACPI_RESOURCE_TYPE_FIXED_DMA) {
+ struct acpi_resource_fixed_dma *dma = &res->data.fixed_dma;
+
+ if (pdata->n++ == pdata->index) {
+ pdata->dma_spec.chan_id = dma->channels;
+ pdata->dma_spec.slave_id = dma->request_lines;
+ }
+ }
+
+ /* Tell the ACPI core to skip this resource */
+ return 1;
+}
+
+/**
+ * acpi_dma_request_slave_chan_by_index - Get the DMA slave channel
+ * @dev: struct device to get DMA request from
+ * @index: index of FixedDMA descriptor for @dev
+ *
+ * Return:
+ * Pointer to appropriate dma channel on success or an error pointer.
+ */
+struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev,
+ size_t index)
+{
+ struct acpi_dma_parser_data pdata;
+ struct acpi_dma_spec *dma_spec = &pdata.dma_spec;
+ struct acpi_device *adev = ACPI_COMPANION(dev);
+ struct list_head resource_list;
+ struct acpi_dma *adma;
+ struct dma_chan *chan = NULL;
+ int found;
+ int ret;
+
+ memset(&pdata, 0, sizeof(pdata));
+ pdata.index = index;
+
+ /* Initial values for the request line and channel */
+ dma_spec->chan_id = -1;
+ dma_spec->slave_id = -1;
+
+ INIT_LIST_HEAD(&resource_list);
+ ret = acpi_dev_get_resources(adev, &resource_list,
+ acpi_dma_parse_fixed_dma, &pdata);
+ acpi_dev_free_resource_list(&resource_list);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ if (dma_spec->slave_id < 0 || dma_spec->chan_id < 0)
+ return ERR_PTR(-ENODEV);
+
+ mutex_lock(&acpi_dma_lock);
+
+ list_for_each_entry(adma, &acpi_dma_list, dma_controllers) {
+ /*
+ * We are not going to call translation function if slave_id
+ * doesn't fall to the request range.
+ */
+ found = acpi_dma_update_dma_spec(adma, dma_spec);
+ if (found < 0)
+ continue;
+ chan = adma->acpi_dma_xlate(dma_spec, adma);
+ /*
+ * Try to get a channel only from the DMA controller that
+ * matches the slave_id. See acpi_dma_update_dma_spec()
+ * description for the details.
+ */
+ if (found > 0 || chan)
+ break;
+ }
+
+ mutex_unlock(&acpi_dma_lock);
+ return chan ? chan : ERR_PTR(-EPROBE_DEFER);
+}
+EXPORT_SYMBOL_GPL(acpi_dma_request_slave_chan_by_index);
+
+/**
+ * acpi_dma_request_slave_chan_by_name - Get the DMA slave channel
+ * @dev: struct device to get DMA request from
+ * @name: represents corresponding FixedDMA descriptor for @dev
+ *
+ * In order to support both Device Tree and ACPI in a single driver we
+ * translate the names "tx" and "rx" here based on the most common case where
+ * the first FixedDMA descriptor is TX and second is RX.
+ *
+ * If the device has "dma-names" property the FixedDMA descriptor indices
+ * are retrieved based on those. Otherwise the function falls back using
+ * hardcoded indices.
+ *
+ * Return:
+ * Pointer to appropriate dma channel on success or an error pointer.
+ */
+struct dma_chan *acpi_dma_request_slave_chan_by_name(struct device *dev,
+ const char *name)
+{
+ int index;
+
+ index = device_property_match_string(dev, "dma-names", name);
+ if (index < 0) {
+ if (!strcmp(name, "tx"))
+ index = 0;
+ else if (!strcmp(name, "rx"))
+ index = 1;
+ else
+ return ERR_PTR(-ENODEV);
+ }
+
+ dev_dbg(dev, "Looking for DMA channel \"%s\" at index %d...\n", name, index);
+ return acpi_dma_request_slave_chan_by_index(dev, index);
+}
+EXPORT_SYMBOL_GPL(acpi_dma_request_slave_chan_by_name);
+
+/**
+ * acpi_dma_simple_xlate - Simple ACPI DMA engine translation helper
+ * @dma_spec: pointer to ACPI DMA specifier
+ * @adma: pointer to ACPI DMA controller data
+ *
+ * A simple translation function for ACPI based devices. Passes &struct
+ * dma_spec to the DMA controller driver provided filter function.
+ *
+ * Return:
+ * Pointer to the channel if found or %NULL otherwise.
+ */
+struct dma_chan *acpi_dma_simple_xlate(struct acpi_dma_spec *dma_spec,
+ struct acpi_dma *adma)
+{
+ struct acpi_dma_filter_info *info = adma->data;
+
+ if (!info || !info->filter_fn)
+ return NULL;
+
+ return dma_request_channel(info->dma_cap, info->filter_fn, dma_spec);
+}
+EXPORT_SYMBOL_GPL(acpi_dma_simple_xlate);
diff --git a/drivers/dma/altera-msgdma.c b/drivers/dma/altera-msgdma.c
new file mode 100644
index 000000000..4153c2edb
--- /dev/null
+++ b/drivers/dma/altera-msgdma.c
@@ -0,0 +1,963 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * DMA driver for Altera mSGDMA IP core
+ *
+ * Copyright (C) 2017 Stefan Roese <sr@denx.de>
+ *
+ * Based on drivers/dma/xilinx/zynqmp_dma.c, which is:
+ * Copyright (C) 2016 Xilinx, Inc. All rights reserved.
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/of_dma.h>
+
+#include "dmaengine.h"
+
+#define MSGDMA_MAX_TRANS_LEN U32_MAX
+#define MSGDMA_DESC_NUM 1024
+
+/**
+ * struct msgdma_extended_desc - implements an extended descriptor
+ * @read_addr_lo: data buffer source address low bits
+ * @write_addr_lo: data buffer destination address low bits
+ * @len: the number of bytes to transfer per descriptor
+ * @burst_seq_num: bit 31:24 write burst
+ * bit 23:16 read burst
+ * bit 15:00 sequence number
+ * @stride: bit 31:16 write stride
+ * bit 15:00 read stride
+ * @read_addr_hi: data buffer source address high bits
+ * @write_addr_hi: data buffer destination address high bits
+ * @control: characteristics of the transfer
+ */
+struct msgdma_extended_desc {
+ u32 read_addr_lo;
+ u32 write_addr_lo;
+ u32 len;
+ u32 burst_seq_num;
+ u32 stride;
+ u32 read_addr_hi;
+ u32 write_addr_hi;
+ u32 control;
+};
+
+/* mSGDMA descriptor control field bit definitions */
+#define MSGDMA_DESC_CTL_SET_CH(x) ((x) & 0xff)
+#define MSGDMA_DESC_CTL_GEN_SOP BIT(8)
+#define MSGDMA_DESC_CTL_GEN_EOP BIT(9)
+#define MSGDMA_DESC_CTL_PARK_READS BIT(10)
+#define MSGDMA_DESC_CTL_PARK_WRITES BIT(11)
+#define MSGDMA_DESC_CTL_END_ON_EOP BIT(12)
+#define MSGDMA_DESC_CTL_END_ON_LEN BIT(13)
+#define MSGDMA_DESC_CTL_TR_COMP_IRQ BIT(14)
+#define MSGDMA_DESC_CTL_EARLY_IRQ BIT(15)
+#define MSGDMA_DESC_CTL_TR_ERR_IRQ GENMASK(23, 16)
+#define MSGDMA_DESC_CTL_EARLY_DONE BIT(24)
+
+/*
+ * Writing "1" the "go" bit commits the entire descriptor into the
+ * descriptor FIFO(s)
+ */
+#define MSGDMA_DESC_CTL_GO BIT(31)
+
+/* Tx buffer control flags */
+#define MSGDMA_DESC_CTL_TX_FIRST (MSGDMA_DESC_CTL_GEN_SOP | \
+ MSGDMA_DESC_CTL_TR_ERR_IRQ | \
+ MSGDMA_DESC_CTL_GO)
+
+#define MSGDMA_DESC_CTL_TX_MIDDLE (MSGDMA_DESC_CTL_TR_ERR_IRQ | \
+ MSGDMA_DESC_CTL_GO)
+
+#define MSGDMA_DESC_CTL_TX_LAST (MSGDMA_DESC_CTL_GEN_EOP | \
+ MSGDMA_DESC_CTL_TR_COMP_IRQ | \
+ MSGDMA_DESC_CTL_TR_ERR_IRQ | \
+ MSGDMA_DESC_CTL_GO)
+
+#define MSGDMA_DESC_CTL_TX_SINGLE (MSGDMA_DESC_CTL_GEN_SOP | \
+ MSGDMA_DESC_CTL_GEN_EOP | \
+ MSGDMA_DESC_CTL_TR_COMP_IRQ | \
+ MSGDMA_DESC_CTL_TR_ERR_IRQ | \
+ MSGDMA_DESC_CTL_GO)
+
+#define MSGDMA_DESC_CTL_RX_SINGLE (MSGDMA_DESC_CTL_END_ON_EOP | \
+ MSGDMA_DESC_CTL_END_ON_LEN | \
+ MSGDMA_DESC_CTL_TR_COMP_IRQ | \
+ MSGDMA_DESC_CTL_EARLY_IRQ | \
+ MSGDMA_DESC_CTL_TR_ERR_IRQ | \
+ MSGDMA_DESC_CTL_GO)
+
+/* mSGDMA extended descriptor stride definitions */
+#define MSGDMA_DESC_STRIDE_RD 0x00000001
+#define MSGDMA_DESC_STRIDE_WR 0x00010000
+#define MSGDMA_DESC_STRIDE_RW 0x00010001
+
+/* mSGDMA dispatcher control and status register map */
+#define MSGDMA_CSR_STATUS 0x00 /* Read / Clear */
+#define MSGDMA_CSR_CONTROL 0x04 /* Read / Write */
+#define MSGDMA_CSR_RW_FILL_LEVEL 0x08 /* 31:16 - write fill level */
+ /* 15:00 - read fill level */
+#define MSGDMA_CSR_RESP_FILL_LEVEL 0x0c /* response FIFO fill level */
+#define MSGDMA_CSR_RW_SEQ_NUM 0x10 /* 31:16 - write seq number */
+ /* 15:00 - read seq number */
+
+/* mSGDMA CSR status register bit definitions */
+#define MSGDMA_CSR_STAT_BUSY BIT(0)
+#define MSGDMA_CSR_STAT_DESC_BUF_EMPTY BIT(1)
+#define MSGDMA_CSR_STAT_DESC_BUF_FULL BIT(2)
+#define MSGDMA_CSR_STAT_RESP_BUF_EMPTY BIT(3)
+#define MSGDMA_CSR_STAT_RESP_BUF_FULL BIT(4)
+#define MSGDMA_CSR_STAT_STOPPED BIT(5)
+#define MSGDMA_CSR_STAT_RESETTING BIT(6)
+#define MSGDMA_CSR_STAT_STOPPED_ON_ERR BIT(7)
+#define MSGDMA_CSR_STAT_STOPPED_ON_EARLY BIT(8)
+#define MSGDMA_CSR_STAT_IRQ BIT(9)
+#define MSGDMA_CSR_STAT_MASK GENMASK(9, 0)
+#define MSGDMA_CSR_STAT_MASK_WITHOUT_IRQ GENMASK(8, 0)
+
+#define DESC_EMPTY (MSGDMA_CSR_STAT_DESC_BUF_EMPTY | \
+ MSGDMA_CSR_STAT_RESP_BUF_EMPTY)
+
+/* mSGDMA CSR control register bit definitions */
+#define MSGDMA_CSR_CTL_STOP BIT(0)
+#define MSGDMA_CSR_CTL_RESET BIT(1)
+#define MSGDMA_CSR_CTL_STOP_ON_ERR BIT(2)
+#define MSGDMA_CSR_CTL_STOP_ON_EARLY BIT(3)
+#define MSGDMA_CSR_CTL_GLOBAL_INTR BIT(4)
+#define MSGDMA_CSR_CTL_STOP_DESCS BIT(5)
+
+/* mSGDMA CSR fill level bits */
+#define MSGDMA_CSR_WR_FILL_LEVEL_GET(v) (((v) & 0xffff0000) >> 16)
+#define MSGDMA_CSR_RD_FILL_LEVEL_GET(v) ((v) & 0x0000ffff)
+#define MSGDMA_CSR_RESP_FILL_LEVEL_GET(v) ((v) & 0x0000ffff)
+
+#define MSGDMA_CSR_SEQ_NUM_GET(v) (((v) & 0xffff0000) >> 16)
+
+/* mSGDMA response register map */
+#define MSGDMA_RESP_BYTES_TRANSFERRED 0x00
+#define MSGDMA_RESP_STATUS 0x04
+
+/* mSGDMA response register bit definitions */
+#define MSGDMA_RESP_EARLY_TERM BIT(8)
+#define MSGDMA_RESP_ERR_MASK 0xff
+
+/**
+ * struct msgdma_sw_desc - implements a sw descriptor
+ * @async_tx: support for the async_tx api
+ * @hw_desc: assosiated HW descriptor
+ * @node: node to move from the free list to the tx list
+ * @tx_list: transmit list node
+ */
+struct msgdma_sw_desc {
+ struct dma_async_tx_descriptor async_tx;
+ struct msgdma_extended_desc hw_desc;
+ struct list_head node;
+ struct list_head tx_list;
+};
+
+/*
+ * struct msgdma_device - DMA device structure
+ */
+struct msgdma_device {
+ spinlock_t lock;
+ struct device *dev;
+ struct tasklet_struct irq_tasklet;
+ struct list_head pending_list;
+ struct list_head free_list;
+ struct list_head active_list;
+ struct list_head done_list;
+ u32 desc_free_cnt;
+ bool idle;
+
+ struct dma_device dmadev;
+ struct dma_chan dmachan;
+ dma_addr_t hw_desq;
+ struct msgdma_sw_desc *sw_desq;
+ unsigned int npendings;
+
+ struct dma_slave_config slave_cfg;
+
+ int irq;
+
+ /* mSGDMA controller */
+ void __iomem *csr;
+
+ /* mSGDMA descriptors */
+ void __iomem *desc;
+
+ /* mSGDMA response */
+ void __iomem *resp;
+};
+
+#define to_mdev(chan) container_of(chan, struct msgdma_device, dmachan)
+#define tx_to_desc(tx) container_of(tx, struct msgdma_sw_desc, async_tx)
+
+/**
+ * msgdma_get_descriptor - Get the sw descriptor from the pool
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ *
+ * Return: The sw descriptor
+ */
+static struct msgdma_sw_desc *msgdma_get_descriptor(struct msgdma_device *mdev)
+{
+ struct msgdma_sw_desc *desc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&mdev->lock, flags);
+ desc = list_first_entry(&mdev->free_list, struct msgdma_sw_desc, node);
+ list_del(&desc->node);
+ spin_unlock_irqrestore(&mdev->lock, flags);
+
+ INIT_LIST_HEAD(&desc->tx_list);
+
+ return desc;
+}
+
+/**
+ * msgdma_free_descriptor - Issue pending transactions
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ * @desc: Transaction descriptor pointer
+ */
+static void msgdma_free_descriptor(struct msgdma_device *mdev,
+ struct msgdma_sw_desc *desc)
+{
+ struct msgdma_sw_desc *child, *next;
+
+ mdev->desc_free_cnt++;
+ list_add_tail(&desc->node, &mdev->free_list);
+ list_for_each_entry_safe(child, next, &desc->tx_list, node) {
+ mdev->desc_free_cnt++;
+ list_move_tail(&child->node, &mdev->free_list);
+ }
+}
+
+/**
+ * msgdma_free_desc_list - Free descriptors list
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ * @list: List to parse and delete the descriptor
+ */
+static void msgdma_free_desc_list(struct msgdma_device *mdev,
+ struct list_head *list)
+{
+ struct msgdma_sw_desc *desc, *next;
+
+ list_for_each_entry_safe(desc, next, list, node)
+ msgdma_free_descriptor(mdev, desc);
+}
+
+/**
+ * msgdma_desc_config - Configure the descriptor
+ * @desc: Hw descriptor pointer
+ * @dst: Destination buffer address
+ * @src: Source buffer address
+ * @len: Transfer length
+ * @stride: Read/write stride value to set
+ */
+static void msgdma_desc_config(struct msgdma_extended_desc *desc,
+ dma_addr_t dst, dma_addr_t src, size_t len,
+ u32 stride)
+{
+ /* Set lower 32bits of src & dst addresses in the descriptor */
+ desc->read_addr_lo = lower_32_bits(src);
+ desc->write_addr_lo = lower_32_bits(dst);
+
+ /* Set upper 32bits of src & dst addresses in the descriptor */
+ desc->read_addr_hi = upper_32_bits(src);
+ desc->write_addr_hi = upper_32_bits(dst);
+
+ desc->len = len;
+ desc->stride = stride;
+ desc->burst_seq_num = 0; /* 0 will result in max burst length */
+
+ /*
+ * Don't set interrupt on xfer end yet, this will be done later
+ * for the "last" descriptor
+ */
+ desc->control = MSGDMA_DESC_CTL_TR_ERR_IRQ | MSGDMA_DESC_CTL_GO |
+ MSGDMA_DESC_CTL_END_ON_LEN;
+}
+
+/**
+ * msgdma_desc_config_eod - Mark the descriptor as end descriptor
+ * @desc: Hw descriptor pointer
+ */
+static void msgdma_desc_config_eod(struct msgdma_extended_desc *desc)
+{
+ desc->control |= MSGDMA_DESC_CTL_TR_COMP_IRQ;
+}
+
+/**
+ * msgdma_tx_submit - Submit DMA transaction
+ * @tx: Async transaction descriptor pointer
+ *
+ * Return: cookie value
+ */
+static dma_cookie_t msgdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct msgdma_device *mdev = to_mdev(tx->chan);
+ struct msgdma_sw_desc *new;
+ dma_cookie_t cookie;
+ unsigned long flags;
+
+ new = tx_to_desc(tx);
+ spin_lock_irqsave(&mdev->lock, flags);
+ cookie = dma_cookie_assign(tx);
+
+ list_add_tail(&new->node, &mdev->pending_list);
+ spin_unlock_irqrestore(&mdev->lock, flags);
+
+ return cookie;
+}
+
+/**
+ * msgdma_prep_memcpy - prepare descriptors for memcpy transaction
+ * @dchan: DMA channel
+ * @dma_dst: Destination buffer address
+ * @dma_src: Source buffer address
+ * @len: Transfer length
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *
+msgdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst,
+ dma_addr_t dma_src, size_t len, ulong flags)
+{
+ struct msgdma_device *mdev = to_mdev(dchan);
+ struct msgdma_sw_desc *new, *first = NULL;
+ struct msgdma_extended_desc *desc;
+ size_t copy;
+ u32 desc_cnt;
+ unsigned long irqflags;
+
+ desc_cnt = DIV_ROUND_UP(len, MSGDMA_MAX_TRANS_LEN);
+
+ spin_lock_irqsave(&mdev->lock, irqflags);
+ if (desc_cnt > mdev->desc_free_cnt) {
+ spin_unlock_irqrestore(&mdev->lock, irqflags);
+ dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev);
+ return NULL;
+ }
+ mdev->desc_free_cnt -= desc_cnt;
+ spin_unlock_irqrestore(&mdev->lock, irqflags);
+
+ do {
+ /* Allocate and populate the descriptor */
+ new = msgdma_get_descriptor(mdev);
+
+ copy = min_t(size_t, len, MSGDMA_MAX_TRANS_LEN);
+ desc = &new->hw_desc;
+ msgdma_desc_config(desc, dma_dst, dma_src, copy,
+ MSGDMA_DESC_STRIDE_RW);
+ len -= copy;
+ dma_src += copy;
+ dma_dst += copy;
+ if (!first)
+ first = new;
+ else
+ list_add_tail(&new->node, &first->tx_list);
+ } while (len);
+
+ msgdma_desc_config_eod(desc);
+ async_tx_ack(&first->async_tx);
+ first->async_tx.flags = flags;
+
+ return &first->async_tx;
+}
+
+/**
+ * msgdma_prep_slave_sg - prepare descriptors for a slave sg transaction
+ *
+ * @dchan: DMA channel
+ * @sgl: Destination scatter list
+ * @sg_len: Number of entries in destination scatter list
+ * @dir: DMA transfer direction
+ * @flags: transfer ack flags
+ * @context: transfer context (unused)
+ */
+static struct dma_async_tx_descriptor *
+msgdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction dir,
+ unsigned long flags, void *context)
+
+{
+ struct msgdma_device *mdev = to_mdev(dchan);
+ struct dma_slave_config *cfg = &mdev->slave_cfg;
+ struct msgdma_sw_desc *new, *first = NULL;
+ void *desc = NULL;
+ size_t len, avail;
+ dma_addr_t dma_dst, dma_src;
+ u32 desc_cnt = 0, i;
+ struct scatterlist *sg;
+ u32 stride;
+ unsigned long irqflags;
+
+ for_each_sg(sgl, sg, sg_len, i)
+ desc_cnt += DIV_ROUND_UP(sg_dma_len(sg), MSGDMA_MAX_TRANS_LEN);
+
+ spin_lock_irqsave(&mdev->lock, irqflags);
+ if (desc_cnt > mdev->desc_free_cnt) {
+ spin_unlock_irqrestore(&mdev->lock, irqflags);
+ dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev);
+ return NULL;
+ }
+ mdev->desc_free_cnt -= desc_cnt;
+ spin_unlock_irqrestore(&mdev->lock, irqflags);
+
+ avail = sg_dma_len(sgl);
+
+ /* Run until we are out of scatterlist entries */
+ while (true) {
+ /* Allocate and populate the descriptor */
+ new = msgdma_get_descriptor(mdev);
+
+ desc = &new->hw_desc;
+ len = min_t(size_t, avail, MSGDMA_MAX_TRANS_LEN);
+
+ if (dir == DMA_MEM_TO_DEV) {
+ dma_src = sg_dma_address(sgl) + sg_dma_len(sgl) - avail;
+ dma_dst = cfg->dst_addr;
+ stride = MSGDMA_DESC_STRIDE_RD;
+ } else {
+ dma_src = cfg->src_addr;
+ dma_dst = sg_dma_address(sgl) + sg_dma_len(sgl) - avail;
+ stride = MSGDMA_DESC_STRIDE_WR;
+ }
+ msgdma_desc_config(desc, dma_dst, dma_src, len, stride);
+ avail -= len;
+
+ if (!first)
+ first = new;
+ else
+ list_add_tail(&new->node, &first->tx_list);
+
+ /* Fetch the next scatterlist entry */
+ if (avail == 0) {
+ if (sg_len == 0)
+ break;
+ sgl = sg_next(sgl);
+ if (sgl == NULL)
+ break;
+ sg_len--;
+ avail = sg_dma_len(sgl);
+ }
+ }
+
+ msgdma_desc_config_eod(desc);
+ first->async_tx.flags = flags;
+
+ return &first->async_tx;
+}
+
+static int msgdma_dma_config(struct dma_chan *dchan,
+ struct dma_slave_config *config)
+{
+ struct msgdma_device *mdev = to_mdev(dchan);
+
+ memcpy(&mdev->slave_cfg, config, sizeof(*config));
+
+ return 0;
+}
+
+static void msgdma_reset(struct msgdma_device *mdev)
+{
+ u32 val;
+ int ret;
+
+ /* Reset mSGDMA */
+ iowrite32(MSGDMA_CSR_STAT_MASK, mdev->csr + MSGDMA_CSR_STATUS);
+ iowrite32(MSGDMA_CSR_CTL_RESET, mdev->csr + MSGDMA_CSR_CONTROL);
+
+ ret = readl_poll_timeout(mdev->csr + MSGDMA_CSR_STATUS, val,
+ (val & MSGDMA_CSR_STAT_RESETTING) == 0,
+ 1, 10000);
+ if (ret)
+ dev_err(mdev->dev, "DMA channel did not reset\n");
+
+ /* Clear all status bits */
+ iowrite32(MSGDMA_CSR_STAT_MASK, mdev->csr + MSGDMA_CSR_STATUS);
+
+ /* Enable the DMA controller including interrupts */
+ iowrite32(MSGDMA_CSR_CTL_STOP_ON_ERR | MSGDMA_CSR_CTL_STOP_ON_EARLY |
+ MSGDMA_CSR_CTL_GLOBAL_INTR, mdev->csr + MSGDMA_CSR_CONTROL);
+
+ mdev->idle = true;
+};
+
+static void msgdma_copy_one(struct msgdma_device *mdev,
+ struct msgdma_sw_desc *desc)
+{
+ void __iomem *hw_desc = mdev->desc;
+
+ /*
+ * Check if the DESC FIFO it not full. If its full, we need to wait
+ * for at least one entry to become free again
+ */
+ while (ioread32(mdev->csr + MSGDMA_CSR_STATUS) &
+ MSGDMA_CSR_STAT_DESC_BUF_FULL)
+ mdelay(1);
+
+ /*
+ * The descriptor needs to get copied into the descriptor FIFO
+ * of the DMA controller. The descriptor will get flushed to the
+ * FIFO, once the last word (control word) is written. Since we
+ * are not 100% sure that memcpy() writes all word in the "correct"
+ * oder (address from low to high) on all architectures, we make
+ * sure this control word is written last by single coding it and
+ * adding some write-barriers here.
+ */
+ memcpy((void __force *)hw_desc, &desc->hw_desc,
+ sizeof(desc->hw_desc) - sizeof(u32));
+
+ /* Write control word last to flush this descriptor into the FIFO */
+ mdev->idle = false;
+ wmb();
+ iowrite32(desc->hw_desc.control, hw_desc +
+ offsetof(struct msgdma_extended_desc, control));
+ wmb();
+}
+
+/**
+ * msgdma_copy_desc_to_fifo - copy descriptor(s) into controller FIFO
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ * @desc: Transaction descriptor pointer
+ */
+static void msgdma_copy_desc_to_fifo(struct msgdma_device *mdev,
+ struct msgdma_sw_desc *desc)
+{
+ struct msgdma_sw_desc *sdesc, *next;
+
+ msgdma_copy_one(mdev, desc);
+
+ list_for_each_entry_safe(sdesc, next, &desc->tx_list, node)
+ msgdma_copy_one(mdev, sdesc);
+}
+
+/**
+ * msgdma_start_transfer - Initiate the new transfer
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ */
+static void msgdma_start_transfer(struct msgdma_device *mdev)
+{
+ struct msgdma_sw_desc *desc;
+
+ if (!mdev->idle)
+ return;
+
+ desc = list_first_entry_or_null(&mdev->pending_list,
+ struct msgdma_sw_desc, node);
+ if (!desc)
+ return;
+
+ list_splice_tail_init(&mdev->pending_list, &mdev->active_list);
+ msgdma_copy_desc_to_fifo(mdev, desc);
+}
+
+/**
+ * msgdma_issue_pending - Issue pending transactions
+ * @chan: DMA channel pointer
+ */
+static void msgdma_issue_pending(struct dma_chan *chan)
+{
+ struct msgdma_device *mdev = to_mdev(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&mdev->lock, flags);
+ msgdma_start_transfer(mdev);
+ spin_unlock_irqrestore(&mdev->lock, flags);
+}
+
+/**
+ * msgdma_chan_desc_cleanup - Cleanup the completed descriptors
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ */
+static void msgdma_chan_desc_cleanup(struct msgdma_device *mdev)
+{
+ struct msgdma_sw_desc *desc, *next;
+
+ list_for_each_entry_safe(desc, next, &mdev->done_list, node) {
+ struct dmaengine_desc_callback cb;
+
+ list_del(&desc->node);
+
+ dmaengine_desc_get_callback(&desc->async_tx, &cb);
+ if (dmaengine_desc_callback_valid(&cb)) {
+ spin_unlock(&mdev->lock);
+ dmaengine_desc_callback_invoke(&cb, NULL);
+ spin_lock(&mdev->lock);
+ }
+
+ /* Run any dependencies, then free the descriptor */
+ msgdma_free_descriptor(mdev, desc);
+ }
+}
+
+/**
+ * msgdma_complete_descriptor - Mark the active descriptor as complete
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ */
+static void msgdma_complete_descriptor(struct msgdma_device *mdev)
+{
+ struct msgdma_sw_desc *desc;
+
+ desc = list_first_entry_or_null(&mdev->active_list,
+ struct msgdma_sw_desc, node);
+ if (!desc)
+ return;
+ list_del(&desc->node);
+ dma_cookie_complete(&desc->async_tx);
+ list_add_tail(&desc->node, &mdev->done_list);
+}
+
+/**
+ * msgdma_free_descriptors - Free channel descriptors
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ */
+static void msgdma_free_descriptors(struct msgdma_device *mdev)
+{
+ msgdma_free_desc_list(mdev, &mdev->active_list);
+ msgdma_free_desc_list(mdev, &mdev->pending_list);
+ msgdma_free_desc_list(mdev, &mdev->done_list);
+}
+
+/**
+ * msgdma_free_chan_resources - Free channel resources
+ * @dchan: DMA channel pointer
+ */
+static void msgdma_free_chan_resources(struct dma_chan *dchan)
+{
+ struct msgdma_device *mdev = to_mdev(dchan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&mdev->lock, flags);
+ msgdma_free_descriptors(mdev);
+ spin_unlock_irqrestore(&mdev->lock, flags);
+ kfree(mdev->sw_desq);
+}
+
+/**
+ * msgdma_alloc_chan_resources - Allocate channel resources
+ * @dchan: DMA channel
+ *
+ * Return: Number of descriptors on success and failure value on error
+ */
+static int msgdma_alloc_chan_resources(struct dma_chan *dchan)
+{
+ struct msgdma_device *mdev = to_mdev(dchan);
+ struct msgdma_sw_desc *desc;
+ int i;
+
+ mdev->sw_desq = kcalloc(MSGDMA_DESC_NUM, sizeof(*desc), GFP_NOWAIT);
+ if (!mdev->sw_desq)
+ return -ENOMEM;
+
+ mdev->idle = true;
+ mdev->desc_free_cnt = MSGDMA_DESC_NUM;
+
+ INIT_LIST_HEAD(&mdev->free_list);
+
+ for (i = 0; i < MSGDMA_DESC_NUM; i++) {
+ desc = mdev->sw_desq + i;
+ dma_async_tx_descriptor_init(&desc->async_tx, &mdev->dmachan);
+ desc->async_tx.tx_submit = msgdma_tx_submit;
+ list_add_tail(&desc->node, &mdev->free_list);
+ }
+
+ return MSGDMA_DESC_NUM;
+}
+
+/**
+ * msgdma_tasklet - Schedule completion tasklet
+ * @t: Pointer to the Altera sSGDMA channel structure
+ */
+static void msgdma_tasklet(struct tasklet_struct *t)
+{
+ struct msgdma_device *mdev = from_tasklet(mdev, t, irq_tasklet);
+ u32 count;
+ u32 __maybe_unused size;
+ u32 __maybe_unused status;
+ unsigned long flags;
+
+ spin_lock_irqsave(&mdev->lock, flags);
+
+ if (mdev->resp) {
+ /* Read number of responses that are available */
+ count = ioread32(mdev->csr + MSGDMA_CSR_RESP_FILL_LEVEL);
+ dev_dbg(mdev->dev, "%s (%d): response count=%d\n",
+ __func__, __LINE__, count);
+ } else {
+ count = 1;
+ }
+
+ while (count--) {
+ /*
+ * Read both longwords to purge this response from the FIFO
+ * On Avalon-MM implementations, size and status do not
+ * have any real values, like transferred bytes or error
+ * bits. So we need to just drop these values.
+ */
+ if (mdev->resp) {
+ size = ioread32(mdev->resp +
+ MSGDMA_RESP_BYTES_TRANSFERRED);
+ status = ioread32(mdev->resp +
+ MSGDMA_RESP_STATUS);
+ }
+
+ msgdma_complete_descriptor(mdev);
+ msgdma_chan_desc_cleanup(mdev);
+ }
+
+ spin_unlock_irqrestore(&mdev->lock, flags);
+}
+
+/**
+ * msgdma_irq_handler - Altera mSGDMA Interrupt handler
+ * @irq: IRQ number
+ * @data: Pointer to the Altera mSGDMA device structure
+ *
+ * Return: IRQ_HANDLED/IRQ_NONE
+ */
+static irqreturn_t msgdma_irq_handler(int irq, void *data)
+{
+ struct msgdma_device *mdev = data;
+ u32 status;
+
+ status = ioread32(mdev->csr + MSGDMA_CSR_STATUS);
+ if ((status & MSGDMA_CSR_STAT_BUSY) == 0) {
+ /* Start next transfer if the DMA controller is idle */
+ spin_lock(&mdev->lock);
+ mdev->idle = true;
+ msgdma_start_transfer(mdev);
+ spin_unlock(&mdev->lock);
+ }
+
+ tasklet_schedule(&mdev->irq_tasklet);
+
+ /* Clear interrupt in mSGDMA controller */
+ iowrite32(MSGDMA_CSR_STAT_IRQ, mdev->csr + MSGDMA_CSR_STATUS);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * msgdma_dev_remove() - Device remove function
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ */
+static void msgdma_dev_remove(struct msgdma_device *mdev)
+{
+ if (!mdev)
+ return;
+
+ devm_free_irq(mdev->dev, mdev->irq, mdev);
+ tasklet_kill(&mdev->irq_tasklet);
+ list_del(&mdev->dmachan.device_node);
+}
+
+static int request_and_map(struct platform_device *pdev, const char *name,
+ struct resource **res, void __iomem **ptr,
+ bool optional)
+{
+ struct resource *region;
+ struct device *device = &pdev->dev;
+
+ *res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
+ if (*res == NULL) {
+ if (optional) {
+ *ptr = NULL;
+ dev_info(device, "optional resource %s not defined\n",
+ name);
+ return 0;
+ }
+ dev_err(device, "mandatory resource %s not defined\n", name);
+ return -ENODEV;
+ }
+
+ region = devm_request_mem_region(device, (*res)->start,
+ resource_size(*res), dev_name(device));
+ if (region == NULL) {
+ dev_err(device, "unable to request %s\n", name);
+ return -EBUSY;
+ }
+
+ *ptr = devm_ioremap(device, region->start,
+ resource_size(region));
+ if (*ptr == NULL) {
+ dev_err(device, "ioremap of %s failed!", name);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * msgdma_probe - Driver probe function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int msgdma_probe(struct platform_device *pdev)
+{
+ struct msgdma_device *mdev;
+ struct dma_device *dma_dev;
+ struct resource *dma_res;
+ int ret;
+
+ mdev = devm_kzalloc(&pdev->dev, sizeof(*mdev), GFP_NOWAIT);
+ if (!mdev)
+ return -ENOMEM;
+
+ mdev->dev = &pdev->dev;
+
+ /* Map CSR space */
+ ret = request_and_map(pdev, "csr", &dma_res, &mdev->csr, false);
+ if (ret)
+ return ret;
+
+ /* Map (extended) descriptor space */
+ ret = request_and_map(pdev, "desc", &dma_res, &mdev->desc, false);
+ if (ret)
+ return ret;
+
+ /* Map response space */
+ ret = request_and_map(pdev, "resp", &dma_res, &mdev->resp, true);
+ if (ret)
+ return ret;
+
+ platform_set_drvdata(pdev, mdev);
+
+ /* Get interrupt nr from platform data */
+ mdev->irq = platform_get_irq(pdev, 0);
+ if (mdev->irq < 0)
+ return -ENXIO;
+
+ ret = devm_request_irq(&pdev->dev, mdev->irq, msgdma_irq_handler,
+ 0, dev_name(&pdev->dev), mdev);
+ if (ret)
+ return ret;
+
+ tasklet_setup(&mdev->irq_tasklet, msgdma_tasklet);
+
+ dma_cookie_init(&mdev->dmachan);
+
+ spin_lock_init(&mdev->lock);
+
+ INIT_LIST_HEAD(&mdev->active_list);
+ INIT_LIST_HEAD(&mdev->pending_list);
+ INIT_LIST_HEAD(&mdev->done_list);
+ INIT_LIST_HEAD(&mdev->free_list);
+
+ dma_dev = &mdev->dmadev;
+
+ /* Set DMA capabilities */
+ dma_cap_zero(dma_dev->cap_mask);
+ dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+ dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+
+ dma_dev->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ dma_dev->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ dma_dev->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM) |
+ BIT(DMA_MEM_TO_MEM);
+ dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+ /* Init DMA link list */
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ /* Set base routines */
+ dma_dev->device_tx_status = dma_cookie_status;
+ dma_dev->device_issue_pending = msgdma_issue_pending;
+ dma_dev->dev = &pdev->dev;
+
+ dma_dev->copy_align = DMAENGINE_ALIGN_4_BYTES;
+ dma_dev->device_prep_dma_memcpy = msgdma_prep_memcpy;
+ dma_dev->device_prep_slave_sg = msgdma_prep_slave_sg;
+ dma_dev->device_config = msgdma_dma_config;
+
+ dma_dev->device_alloc_chan_resources = msgdma_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = msgdma_free_chan_resources;
+
+ mdev->dmachan.device = dma_dev;
+ list_add_tail(&mdev->dmachan.device_node, &dma_dev->channels);
+
+ /* Set DMA mask to 64 bits */
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (ret) {
+ dev_warn(&pdev->dev, "unable to set coherent mask to 64");
+ goto fail;
+ }
+
+ msgdma_reset(mdev);
+
+ ret = dma_async_device_register(dma_dev);
+ if (ret)
+ goto fail;
+
+ ret = of_dma_controller_register(pdev->dev.of_node,
+ of_dma_xlate_by_chan_id, dma_dev);
+ if (ret == -EINVAL)
+ dev_warn(&pdev->dev, "device was not probed from DT");
+ else if (ret && ret != -ENODEV)
+ goto fail;
+
+ dev_notice(&pdev->dev, "Altera mSGDMA driver probe success\n");
+
+ return 0;
+
+fail:
+ msgdma_dev_remove(mdev);
+
+ return ret;
+}
+
+/**
+ * msgdma_remove() - Driver remove function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: Always '0'
+ */
+static int msgdma_remove(struct platform_device *pdev)
+{
+ struct msgdma_device *mdev = platform_get_drvdata(pdev);
+
+ if (pdev->dev.of_node)
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&mdev->dmadev);
+ msgdma_dev_remove(mdev);
+
+ dev_notice(&pdev->dev, "Altera mSGDMA driver removed\n");
+
+ return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id msgdma_match[] = {
+ { .compatible = "altr,socfpga-msgdma", },
+ { }
+};
+
+MODULE_DEVICE_TABLE(of, msgdma_match);
+#endif
+
+static struct platform_driver msgdma_driver = {
+ .driver = {
+ .name = "altera-msgdma",
+ .of_match_table = of_match_ptr(msgdma_match),
+ },
+ .probe = msgdma_probe,
+ .remove = msgdma_remove,
+};
+
+module_platform_driver(msgdma_driver);
+
+MODULE_ALIAS("platform:altera-msgdma");
+MODULE_DESCRIPTION("Altera mSGDMA driver");
+MODULE_AUTHOR("Stefan Roese <sr@denx.de>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
new file mode 100644
index 000000000..eea8bd33b
--- /dev/null
+++ b/drivers/dma/amba-pl08x.c
@@ -0,0 +1,3073 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2006 ARM Ltd.
+ * Copyright (c) 2010 ST-Ericsson SA
+ * Copyirght (c) 2017 Linaro Ltd.
+ *
+ * Author: Peter Pearse <peter.pearse@arm.com>
+ * Author: Linus Walleij <linus.walleij@linaro.org>
+ *
+ * Documentation: ARM DDI 0196G == PL080
+ * Documentation: ARM DDI 0218E == PL081
+ * Documentation: S3C6410 User's Manual == PL080S
+ *
+ * PL080 & PL081 both have 16 sets of DMA signals that can be routed to any
+ * channel.
+ *
+ * The PL080 has 8 channels available for simultaneous use, and the PL081
+ * has only two channels. So on these DMA controllers the number of channels
+ * and the number of incoming DMA signals are two totally different things.
+ * It is usually not possible to theoretically handle all physical signals,
+ * so a multiplexing scheme with possible denial of use is necessary.
+ *
+ * The PL080 has a dual bus master, PL081 has a single master.
+ *
+ * PL080S is a version modified by Samsung and used in S3C64xx SoCs.
+ * It differs in following aspects:
+ * - CH_CONFIG register at different offset,
+ * - separate CH_CONTROL2 register for transfer size,
+ * - bigger maximum transfer size,
+ * - 8-word aligned LLI, instead of 4-word, due to extra CCTL2 word,
+ * - no support for peripheral flow control.
+ *
+ * Memory to peripheral transfer may be visualized as
+ * Get data from memory to DMAC
+ * Until no data left
+ * On burst request from peripheral
+ * Destination burst from DMAC to peripheral
+ * Clear burst request
+ * Raise terminal count interrupt
+ *
+ * For peripherals with a FIFO:
+ * Source burst size == half the depth of the peripheral FIFO
+ * Destination burst size == the depth of the peripheral FIFO
+ *
+ * (Bursts are irrelevant for mem to mem transfers - there are no burst
+ * signals, the DMA controller will simply facilitate its AHB master.)
+ *
+ * ASSUMES default (little) endianness for DMA transfers
+ *
+ * The PL08x has two flow control settings:
+ * - DMAC flow control: the transfer size defines the number of transfers
+ * which occur for the current LLI entry, and the DMAC raises TC at the
+ * end of every LLI entry. Observed behaviour shows the DMAC listening
+ * to both the BREQ and SREQ signals (contrary to documented),
+ * transferring data if either is active. The LBREQ and LSREQ signals
+ * are ignored.
+ *
+ * - Peripheral flow control: the transfer size is ignored (and should be
+ * zero). The data is transferred from the current LLI entry, until
+ * after the final transfer signalled by LBREQ or LSREQ. The DMAC
+ * will then move to the next LLI entry. Unsupported by PL080S.
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/pl08x.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/dma-mapping.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/pm_runtime.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/amba/pl080.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define DRIVER_NAME "pl08xdmac"
+
+#define PL80X_DMA_BUSWIDTHS \
+ BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+ BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)
+
+static struct amba_driver pl08x_amba_driver;
+struct pl08x_driver_data;
+
+/**
+ * struct vendor_data - vendor-specific config parameters for PL08x derivatives
+ * @config_offset: offset to the configuration register
+ * @channels: the number of channels available in this variant
+ * @signals: the number of request signals available from the hardware
+ * @dualmaster: whether this version supports dual AHB masters or not.
+ * @nomadik: whether this variant is a ST Microelectronics Nomadik, where the
+ * channels have Nomadik security extension bits that need to be checked
+ * for permission before use and some registers are missing
+ * @pl080s: whether this variant is a Samsung PL080S, which has separate
+ * register and LLI word for transfer size.
+ * @ftdmac020: whether this variant is a Faraday Technology FTDMAC020
+ * @max_transfer_size: the maximum single element transfer size for this
+ * PL08x variant.
+ */
+struct vendor_data {
+ u8 config_offset;
+ u8 channels;
+ u8 signals;
+ bool dualmaster;
+ bool nomadik;
+ bool pl080s;
+ bool ftdmac020;
+ u32 max_transfer_size;
+};
+
+/**
+ * struct pl08x_bus_data - information of source or destination
+ * busses for a transfer
+ * @addr: current address
+ * @maxwidth: the maximum width of a transfer on this bus
+ * @buswidth: the width of this bus in bytes: 1, 2 or 4
+ */
+struct pl08x_bus_data {
+ dma_addr_t addr;
+ u8 maxwidth;
+ u8 buswidth;
+};
+
+#define IS_BUS_ALIGNED(bus) IS_ALIGNED((bus)->addr, (bus)->buswidth)
+
+/**
+ * struct pl08x_phy_chan - holder for the physical channels
+ * @id: physical index to this channel
+ * @base: memory base address for this physical channel
+ * @reg_config: configuration address for this physical channel
+ * @reg_control: control address for this physical channel
+ * @reg_src: transfer source address register
+ * @reg_dst: transfer destination address register
+ * @reg_lli: transfer LLI address register
+ * @reg_busy: if the variant has a special per-channel busy register,
+ * this contains a pointer to it
+ * @lock: a lock to use when altering an instance of this struct
+ * @serving: the virtual channel currently being served by this physical
+ * channel
+ * @locked: channel unavailable for the system, e.g. dedicated to secure
+ * world
+ * @ftdmac020: channel is on a FTDMAC020
+ * @pl080s: channel is on a PL08s
+ */
+struct pl08x_phy_chan {
+ unsigned int id;
+ void __iomem *base;
+ void __iomem *reg_config;
+ void __iomem *reg_control;
+ void __iomem *reg_src;
+ void __iomem *reg_dst;
+ void __iomem *reg_lli;
+ void __iomem *reg_busy;
+ spinlock_t lock;
+ struct pl08x_dma_chan *serving;
+ bool locked;
+ bool ftdmac020;
+ bool pl080s;
+};
+
+/**
+ * struct pl08x_sg - structure containing data per sg
+ * @src_addr: src address of sg
+ * @dst_addr: dst address of sg
+ * @len: transfer len in bytes
+ * @node: node for txd's dsg_list
+ */
+struct pl08x_sg {
+ dma_addr_t src_addr;
+ dma_addr_t dst_addr;
+ size_t len;
+ struct list_head node;
+};
+
+/**
+ * struct pl08x_txd - wrapper for struct dma_async_tx_descriptor
+ * @vd: virtual DMA descriptor
+ * @dsg_list: list of children sg's
+ * @llis_bus: DMA memory address (physical) start for the LLIs
+ * @llis_va: virtual memory address start for the LLIs
+ * @cctl: control reg values for current txd
+ * @ccfg: config reg values for current txd
+ * @done: this marks completed descriptors, which should not have their
+ * mux released.
+ * @cyclic: indicate cyclic transfers
+ */
+struct pl08x_txd {
+ struct virt_dma_desc vd;
+ struct list_head dsg_list;
+ dma_addr_t llis_bus;
+ u32 *llis_va;
+ /* Default cctl value for LLIs */
+ u32 cctl;
+ /*
+ * Settings to be put into the physical channel when we
+ * trigger this txd. Other registers are in llis_va[0].
+ */
+ u32 ccfg;
+ bool done;
+ bool cyclic;
+};
+
+/**
+ * enum pl08x_dma_chan_state - holds the PL08x specific virtual channel
+ * states
+ * @PL08X_CHAN_IDLE: the channel is idle
+ * @PL08X_CHAN_RUNNING: the channel has allocated a physical transport
+ * channel and is running a transfer on it
+ * @PL08X_CHAN_PAUSED: the channel has allocated a physical transport
+ * channel, but the transfer is currently paused
+ * @PL08X_CHAN_WAITING: the channel is waiting for a physical transport
+ * channel to become available (only pertains to memcpy channels)
+ */
+enum pl08x_dma_chan_state {
+ PL08X_CHAN_IDLE,
+ PL08X_CHAN_RUNNING,
+ PL08X_CHAN_PAUSED,
+ PL08X_CHAN_WAITING,
+};
+
+/**
+ * struct pl08x_dma_chan - this structure wraps a DMA ENGINE channel
+ * @vc: wrapped virtual channel
+ * @phychan: the physical channel utilized by this channel, if there is one
+ * @name: name of channel
+ * @cd: channel platform data
+ * @cfg: slave configuration
+ * @at: active transaction on this channel
+ * @host: a pointer to the host (internal use)
+ * @state: whether the channel is idle, paused, running etc
+ * @slave: whether this channel is a device (slave) or for memcpy
+ * @signal: the physical DMA request signal which this channel is using
+ * @mux_use: count of descriptors using this DMA request signal setting
+ * @waiting_at: time in jiffies when this channel moved to waiting state
+ */
+struct pl08x_dma_chan {
+ struct virt_dma_chan vc;
+ struct pl08x_phy_chan *phychan;
+ const char *name;
+ struct pl08x_channel_data *cd;
+ struct dma_slave_config cfg;
+ struct pl08x_txd *at;
+ struct pl08x_driver_data *host;
+ enum pl08x_dma_chan_state state;
+ bool slave;
+ int signal;
+ unsigned mux_use;
+ unsigned long waiting_at;
+};
+
+/**
+ * struct pl08x_driver_data - the local state holder for the PL08x
+ * @slave: optional slave engine for this instance
+ * @memcpy: memcpy engine for this instance
+ * @has_slave: the PL08x has a slave engine (routed signals)
+ * @base: virtual memory base (remapped) for the PL08x
+ * @adev: the corresponding AMBA (PrimeCell) bus entry
+ * @vd: vendor data for this PL08x variant
+ * @pd: platform data passed in from the platform/machine
+ * @phy_chans: array of data for the physical channels
+ * @pool: a pool for the LLI descriptors
+ * @lli_buses: bitmask to or in to LLI pointer selecting AHB port for LLI
+ * fetches
+ * @mem_buses: set to indicate memory transfers on AHB2.
+ * @lli_words: how many words are used in each LLI item for this variant
+ */
+struct pl08x_driver_data {
+ struct dma_device slave;
+ struct dma_device memcpy;
+ bool has_slave;
+ void __iomem *base;
+ struct amba_device *adev;
+ const struct vendor_data *vd;
+ struct pl08x_platform_data *pd;
+ struct pl08x_phy_chan *phy_chans;
+ struct dma_pool *pool;
+ u8 lli_buses;
+ u8 mem_buses;
+ u8 lli_words;
+};
+
+/*
+ * PL08X specific defines
+ */
+
+/* The order of words in an LLI. */
+#define PL080_LLI_SRC 0
+#define PL080_LLI_DST 1
+#define PL080_LLI_LLI 2
+#define PL080_LLI_CCTL 3
+#define PL080S_LLI_CCTL2 4
+
+/* Total words in an LLI. */
+#define PL080_LLI_WORDS 4
+#define PL080S_LLI_WORDS 8
+
+/*
+ * Number of LLIs in each LLI buffer allocated for one transfer
+ * (maximum times we call dma_pool_alloc on this pool without freeing)
+ */
+#define MAX_NUM_TSFR_LLIS 512
+#define PL08X_ALIGN 8
+
+static inline struct pl08x_dma_chan *to_pl08x_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct pl08x_dma_chan, vc.chan);
+}
+
+static inline struct pl08x_txd *to_pl08x_txd(struct dma_async_tx_descriptor *tx)
+{
+ return container_of(tx, struct pl08x_txd, vd.tx);
+}
+
+/*
+ * Mux handling.
+ *
+ * This gives us the DMA request input to the PL08x primecell which the
+ * peripheral described by the channel data will be routed to, possibly
+ * via a board/SoC specific external MUX. One important point to note
+ * here is that this does not depend on the physical channel.
+ */
+static int pl08x_request_mux(struct pl08x_dma_chan *plchan)
+{
+ const struct pl08x_platform_data *pd = plchan->host->pd;
+ int ret;
+
+ if (plchan->mux_use++ == 0 && pd->get_xfer_signal) {
+ ret = pd->get_xfer_signal(plchan->cd);
+ if (ret < 0) {
+ plchan->mux_use = 0;
+ return ret;
+ }
+
+ plchan->signal = ret;
+ }
+ return 0;
+}
+
+static void pl08x_release_mux(struct pl08x_dma_chan *plchan)
+{
+ const struct pl08x_platform_data *pd = plchan->host->pd;
+
+ if (plchan->signal >= 0) {
+ WARN_ON(plchan->mux_use == 0);
+
+ if (--plchan->mux_use == 0 && pd->put_xfer_signal) {
+ pd->put_xfer_signal(plchan->cd, plchan->signal);
+ plchan->signal = -1;
+ }
+ }
+}
+
+/*
+ * Physical channel handling
+ */
+
+/* Whether a certain channel is busy or not */
+static int pl08x_phy_channel_busy(struct pl08x_phy_chan *ch)
+{
+ unsigned int val;
+
+ /* If we have a special busy register, take a shortcut */
+ if (ch->reg_busy) {
+ val = readl(ch->reg_busy);
+ return !!(val & BIT(ch->id));
+ }
+ val = readl(ch->reg_config);
+ return val & PL080_CONFIG_ACTIVE;
+}
+
+/*
+ * pl08x_write_lli() - Write an LLI into the DMA controller.
+ *
+ * The PL08x derivatives support linked lists, but the first item of the
+ * list containing the source, destination, control word and next LLI is
+ * ignored. Instead the driver has to write those values directly into the
+ * SRC, DST, LLI and control registers. On FTDMAC020 also the SIZE
+ * register need to be set up for the first transfer.
+ */
+static void pl08x_write_lli(struct pl08x_driver_data *pl08x,
+ struct pl08x_phy_chan *phychan, const u32 *lli, u32 ccfg)
+{
+ if (pl08x->vd->pl080s)
+ dev_vdbg(&pl08x->adev->dev,
+ "WRITE channel %d: csrc=0x%08x, cdst=0x%08x, "
+ "clli=0x%08x, cctl=0x%08x, cctl2=0x%08x, ccfg=0x%08x\n",
+ phychan->id, lli[PL080_LLI_SRC], lli[PL080_LLI_DST],
+ lli[PL080_LLI_LLI], lli[PL080_LLI_CCTL],
+ lli[PL080S_LLI_CCTL2], ccfg);
+ else
+ dev_vdbg(&pl08x->adev->dev,
+ "WRITE channel %d: csrc=0x%08x, cdst=0x%08x, "
+ "clli=0x%08x, cctl=0x%08x, ccfg=0x%08x\n",
+ phychan->id, lli[PL080_LLI_SRC], lli[PL080_LLI_DST],
+ lli[PL080_LLI_LLI], lli[PL080_LLI_CCTL], ccfg);
+
+ writel_relaxed(lli[PL080_LLI_SRC], phychan->reg_src);
+ writel_relaxed(lli[PL080_LLI_DST], phychan->reg_dst);
+ writel_relaxed(lli[PL080_LLI_LLI], phychan->reg_lli);
+
+ /*
+ * The FTMAC020 has a different layout in the CCTL word of the LLI
+ * and the CCTL register which is split in CSR and SIZE registers.
+ * Convert the LLI item CCTL into the proper values to write into
+ * the CSR and SIZE registers.
+ */
+ if (phychan->ftdmac020) {
+ u32 llictl = lli[PL080_LLI_CCTL];
+ u32 val = 0;
+
+ /* Write the transfer size (12 bits) to the size register */
+ writel_relaxed(llictl & FTDMAC020_LLI_TRANSFER_SIZE_MASK,
+ phychan->base + FTDMAC020_CH_SIZE);
+ /*
+ * Then write the control bits 28..16 to the control register
+ * by shuffleing the bits around to where they are in the
+ * main register. The mapping is as follows:
+ * Bit 28: TC_MSK - mask on all except last LLI
+ * Bit 27..25: SRC_WIDTH
+ * Bit 24..22: DST_WIDTH
+ * Bit 21..20: SRCAD_CTRL
+ * Bit 19..17: DSTAD_CTRL
+ * Bit 17: SRC_SEL
+ * Bit 16: DST_SEL
+ */
+ if (llictl & FTDMAC020_LLI_TC_MSK)
+ val |= FTDMAC020_CH_CSR_TC_MSK;
+ val |= ((llictl & FTDMAC020_LLI_SRC_WIDTH_MSK) >>
+ (FTDMAC020_LLI_SRC_WIDTH_SHIFT -
+ FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT));
+ val |= ((llictl & FTDMAC020_LLI_DST_WIDTH_MSK) >>
+ (FTDMAC020_LLI_DST_WIDTH_SHIFT -
+ FTDMAC020_CH_CSR_DST_WIDTH_SHIFT));
+ val |= ((llictl & FTDMAC020_LLI_SRCAD_CTL_MSK) >>
+ (FTDMAC020_LLI_SRCAD_CTL_SHIFT -
+ FTDMAC020_CH_CSR_SRCAD_CTL_SHIFT));
+ val |= ((llictl & FTDMAC020_LLI_DSTAD_CTL_MSK) >>
+ (FTDMAC020_LLI_DSTAD_CTL_SHIFT -
+ FTDMAC020_CH_CSR_DSTAD_CTL_SHIFT));
+ if (llictl & FTDMAC020_LLI_SRC_SEL)
+ val |= FTDMAC020_CH_CSR_SRC_SEL;
+ if (llictl & FTDMAC020_LLI_DST_SEL)
+ val |= FTDMAC020_CH_CSR_DST_SEL;
+
+ /*
+ * Set up the bits that exist in the CSR but are not
+ * part the LLI, i.e. only gets written to the control
+ * register right here.
+ *
+ * FIXME: do not just handle memcpy, also handle slave DMA.
+ */
+ switch (pl08x->pd->memcpy_burst_size) {
+ default:
+ case PL08X_BURST_SZ_1:
+ val |= PL080_BSIZE_1 <<
+ FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_4:
+ val |= PL080_BSIZE_4 <<
+ FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_8:
+ val |= PL080_BSIZE_8 <<
+ FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_16:
+ val |= PL080_BSIZE_16 <<
+ FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_32:
+ val |= PL080_BSIZE_32 <<
+ FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_64:
+ val |= PL080_BSIZE_64 <<
+ FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_128:
+ val |= PL080_BSIZE_128 <<
+ FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_256:
+ val |= PL080_BSIZE_256 <<
+ FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+ break;
+ }
+
+ /* Protection flags */
+ if (pl08x->pd->memcpy_prot_buff)
+ val |= FTDMAC020_CH_CSR_PROT2;
+ if (pl08x->pd->memcpy_prot_cache)
+ val |= FTDMAC020_CH_CSR_PROT3;
+ /* We are the kernel, so we are in privileged mode */
+ val |= FTDMAC020_CH_CSR_PROT1;
+
+ writel_relaxed(val, phychan->reg_control);
+ } else {
+ /* Bits are just identical */
+ writel_relaxed(lli[PL080_LLI_CCTL], phychan->reg_control);
+ }
+
+ /* Second control word on the PL080s */
+ if (pl08x->vd->pl080s)
+ writel_relaxed(lli[PL080S_LLI_CCTL2],
+ phychan->base + PL080S_CH_CONTROL2);
+
+ writel(ccfg, phychan->reg_config);
+}
+
+/*
+ * Set the initial DMA register values i.e. those for the first LLI
+ * The next LLI pointer and the configuration interrupt bit have
+ * been set when the LLIs were constructed. Poke them into the hardware
+ * and start the transfer.
+ */
+static void pl08x_start_next_txd(struct pl08x_dma_chan *plchan)
+{
+ struct pl08x_driver_data *pl08x = plchan->host;
+ struct pl08x_phy_chan *phychan = plchan->phychan;
+ struct virt_dma_desc *vd = vchan_next_desc(&plchan->vc);
+ struct pl08x_txd *txd = to_pl08x_txd(&vd->tx);
+ u32 val;
+
+ list_del(&txd->vd.node);
+
+ plchan->at = txd;
+
+ /* Wait for channel inactive */
+ while (pl08x_phy_channel_busy(phychan))
+ cpu_relax();
+
+ pl08x_write_lli(pl08x, phychan, &txd->llis_va[0], txd->ccfg);
+
+ /* Enable the DMA channel */
+ /* Do not access config register until channel shows as disabled */
+ while (readl(pl08x->base + PL080_EN_CHAN) & BIT(phychan->id))
+ cpu_relax();
+
+ /* Do not access config register until channel shows as inactive */
+ if (phychan->ftdmac020) {
+ val = readl(phychan->reg_config);
+ while (val & FTDMAC020_CH_CFG_BUSY)
+ val = readl(phychan->reg_config);
+
+ val = readl(phychan->reg_control);
+ while (val & FTDMAC020_CH_CSR_EN)
+ val = readl(phychan->reg_control);
+
+ writel(val | FTDMAC020_CH_CSR_EN,
+ phychan->reg_control);
+ } else {
+ val = readl(phychan->reg_config);
+ while ((val & PL080_CONFIG_ACTIVE) ||
+ (val & PL080_CONFIG_ENABLE))
+ val = readl(phychan->reg_config);
+
+ writel(val | PL080_CONFIG_ENABLE, phychan->reg_config);
+ }
+}
+
+/*
+ * Pause the channel by setting the HALT bit.
+ *
+ * For M->P transfers, pause the DMAC first and then stop the peripheral -
+ * the FIFO can only drain if the peripheral is still requesting data.
+ * (note: this can still timeout if the DMAC FIFO never drains of data.)
+ *
+ * For P->M transfers, disable the peripheral first to stop it filling
+ * the DMAC FIFO, and then pause the DMAC.
+ */
+static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch)
+{
+ u32 val;
+ int timeout;
+
+ if (ch->ftdmac020) {
+ /* Use the enable bit on the FTDMAC020 */
+ val = readl(ch->reg_control);
+ val &= ~FTDMAC020_CH_CSR_EN;
+ writel(val, ch->reg_control);
+ return;
+ }
+
+ /* Set the HALT bit and wait for the FIFO to drain */
+ val = readl(ch->reg_config);
+ val |= PL080_CONFIG_HALT;
+ writel(val, ch->reg_config);
+
+ /* Wait for channel inactive */
+ for (timeout = 1000; timeout; timeout--) {
+ if (!pl08x_phy_channel_busy(ch))
+ break;
+ udelay(1);
+ }
+ if (pl08x_phy_channel_busy(ch))
+ pr_err("pl08x: channel%u timeout waiting for pause\n", ch->id);
+}
+
+static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch)
+{
+ u32 val;
+
+ /* Use the enable bit on the FTDMAC020 */
+ if (ch->ftdmac020) {
+ val = readl(ch->reg_control);
+ val |= FTDMAC020_CH_CSR_EN;
+ writel(val, ch->reg_control);
+ return;
+ }
+
+ /* Clear the HALT bit */
+ val = readl(ch->reg_config);
+ val &= ~PL080_CONFIG_HALT;
+ writel(val, ch->reg_config);
+}
+
+/*
+ * pl08x_terminate_phy_chan() stops the channel, clears the FIFO and
+ * clears any pending interrupt status. This should not be used for
+ * an on-going transfer, but as a method of shutting down a channel
+ * (eg, when it's no longer used) or terminating a transfer.
+ */
+static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x,
+ struct pl08x_phy_chan *ch)
+{
+ u32 val;
+
+ /* The layout for the FTDMAC020 is different */
+ if (ch->ftdmac020) {
+ /* Disable all interrupts */
+ val = readl(ch->reg_config);
+ val |= (FTDMAC020_CH_CFG_INT_ABT_MASK |
+ FTDMAC020_CH_CFG_INT_ERR_MASK |
+ FTDMAC020_CH_CFG_INT_TC_MASK);
+ writel(val, ch->reg_config);
+
+ /* Abort and disable channel */
+ val = readl(ch->reg_control);
+ val &= ~FTDMAC020_CH_CSR_EN;
+ val |= FTDMAC020_CH_CSR_ABT;
+ writel(val, ch->reg_control);
+
+ /* Clear ABT and ERR interrupt flags */
+ writel(BIT(ch->id) | BIT(ch->id + 16),
+ pl08x->base + PL080_ERR_CLEAR);
+ writel(BIT(ch->id), pl08x->base + PL080_TC_CLEAR);
+
+ return;
+ }
+
+ val = readl(ch->reg_config);
+ val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK |
+ PL080_CONFIG_TC_IRQ_MASK);
+ writel(val, ch->reg_config);
+
+ writel(BIT(ch->id), pl08x->base + PL080_ERR_CLEAR);
+ writel(BIT(ch->id), pl08x->base + PL080_TC_CLEAR);
+}
+
+static u32 get_bytes_in_phy_channel(struct pl08x_phy_chan *ch)
+{
+ u32 val;
+ u32 bytes;
+
+ if (ch->ftdmac020) {
+ bytes = readl(ch->base + FTDMAC020_CH_SIZE);
+
+ val = readl(ch->reg_control);
+ val &= FTDMAC020_CH_CSR_SRC_WIDTH_MSK;
+ val >>= FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT;
+ } else if (ch->pl080s) {
+ val = readl(ch->base + PL080S_CH_CONTROL2);
+ bytes = val & PL080S_CONTROL_TRANSFER_SIZE_MASK;
+
+ val = readl(ch->reg_control);
+ val &= PL080_CONTROL_SWIDTH_MASK;
+ val >>= PL080_CONTROL_SWIDTH_SHIFT;
+ } else {
+ /* Plain PL08x */
+ val = readl(ch->reg_control);
+ bytes = val & PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+ val &= PL080_CONTROL_SWIDTH_MASK;
+ val >>= PL080_CONTROL_SWIDTH_SHIFT;
+ }
+
+ switch (val) {
+ case PL080_WIDTH_8BIT:
+ break;
+ case PL080_WIDTH_16BIT:
+ bytes *= 2;
+ break;
+ case PL080_WIDTH_32BIT:
+ bytes *= 4;
+ break;
+ }
+ return bytes;
+}
+
+static u32 get_bytes_in_lli(struct pl08x_phy_chan *ch, const u32 *llis_va)
+{
+ u32 val;
+ u32 bytes;
+
+ if (ch->ftdmac020) {
+ val = llis_va[PL080_LLI_CCTL];
+ bytes = val & FTDMAC020_LLI_TRANSFER_SIZE_MASK;
+
+ val = llis_va[PL080_LLI_CCTL];
+ val &= FTDMAC020_LLI_SRC_WIDTH_MSK;
+ val >>= FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+ } else if (ch->pl080s) {
+ val = llis_va[PL080S_LLI_CCTL2];
+ bytes = val & PL080S_CONTROL_TRANSFER_SIZE_MASK;
+
+ val = llis_va[PL080_LLI_CCTL];
+ val &= PL080_CONTROL_SWIDTH_MASK;
+ val >>= PL080_CONTROL_SWIDTH_SHIFT;
+ } else {
+ /* Plain PL08x */
+ val = llis_va[PL080_LLI_CCTL];
+ bytes = val & PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+ val &= PL080_CONTROL_SWIDTH_MASK;
+ val >>= PL080_CONTROL_SWIDTH_SHIFT;
+ }
+
+ switch (val) {
+ case PL080_WIDTH_8BIT:
+ break;
+ case PL080_WIDTH_16BIT:
+ bytes *= 2;
+ break;
+ case PL080_WIDTH_32BIT:
+ bytes *= 4;
+ break;
+ }
+ return bytes;
+}
+
+/* The channel should be paused when calling this */
+static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan)
+{
+ struct pl08x_driver_data *pl08x = plchan->host;
+ const u32 *llis_va, *llis_va_limit;
+ struct pl08x_phy_chan *ch;
+ dma_addr_t llis_bus;
+ struct pl08x_txd *txd;
+ u32 llis_max_words;
+ size_t bytes;
+ u32 clli;
+
+ ch = plchan->phychan;
+ txd = plchan->at;
+
+ if (!ch || !txd)
+ return 0;
+
+ /*
+ * Follow the LLIs to get the number of remaining
+ * bytes in the currently active transaction.
+ */
+ clli = readl(ch->reg_lli) & ~PL080_LLI_LM_AHB2;
+
+ /* First get the remaining bytes in the active transfer */
+ bytes = get_bytes_in_phy_channel(ch);
+
+ if (!clli)
+ return bytes;
+
+ llis_va = txd->llis_va;
+ llis_bus = txd->llis_bus;
+
+ llis_max_words = pl08x->lli_words * MAX_NUM_TSFR_LLIS;
+ BUG_ON(clli < llis_bus || clli >= llis_bus +
+ sizeof(u32) * llis_max_words);
+
+ /*
+ * Locate the next LLI - as this is an array,
+ * it's simple maths to find.
+ */
+ llis_va += (clli - llis_bus) / sizeof(u32);
+
+ llis_va_limit = llis_va + llis_max_words;
+
+ for (; llis_va < llis_va_limit; llis_va += pl08x->lli_words) {
+ bytes += get_bytes_in_lli(ch, llis_va);
+
+ /*
+ * A LLI pointer going backward terminates the LLI list
+ */
+ if (llis_va[PL080_LLI_LLI] <= clli)
+ break;
+ }
+
+ return bytes;
+}
+
+/*
+ * Allocate a physical channel for a virtual channel
+ *
+ * Try to locate a physical channel to be used for this transfer. If all
+ * are taken return NULL and the requester will have to cope by using
+ * some fallback PIO mode or retrying later.
+ */
+static struct pl08x_phy_chan *
+pl08x_get_phy_channel(struct pl08x_driver_data *pl08x,
+ struct pl08x_dma_chan *virt_chan)
+{
+ struct pl08x_phy_chan *ch = NULL;
+ unsigned long flags;
+ int i;
+
+ for (i = 0; i < pl08x->vd->channels; i++) {
+ ch = &pl08x->phy_chans[i];
+
+ spin_lock_irqsave(&ch->lock, flags);
+
+ if (!ch->locked && !ch->serving) {
+ ch->serving = virt_chan;
+ spin_unlock_irqrestore(&ch->lock, flags);
+ break;
+ }
+
+ spin_unlock_irqrestore(&ch->lock, flags);
+ }
+
+ if (i == pl08x->vd->channels) {
+ /* No physical channel available, cope with it */
+ return NULL;
+ }
+
+ return ch;
+}
+
+/* Mark the physical channel as free. Note, this write is atomic. */
+static inline void pl08x_put_phy_channel(struct pl08x_driver_data *pl08x,
+ struct pl08x_phy_chan *ch)
+{
+ ch->serving = NULL;
+}
+
+/*
+ * Try to allocate a physical channel. When successful, assign it to
+ * this virtual channel, and initiate the next descriptor. The
+ * virtual channel lock must be held at this point.
+ */
+static void pl08x_phy_alloc_and_start(struct pl08x_dma_chan *plchan)
+{
+ struct pl08x_driver_data *pl08x = plchan->host;
+ struct pl08x_phy_chan *ch;
+
+ ch = pl08x_get_phy_channel(pl08x, plchan);
+ if (!ch) {
+ dev_dbg(&pl08x->adev->dev, "no physical channel available for xfer on %s\n", plchan->name);
+ plchan->state = PL08X_CHAN_WAITING;
+ plchan->waiting_at = jiffies;
+ return;
+ }
+
+ dev_dbg(&pl08x->adev->dev, "allocated physical channel %d for xfer on %s\n",
+ ch->id, plchan->name);
+
+ plchan->phychan = ch;
+ plchan->state = PL08X_CHAN_RUNNING;
+ pl08x_start_next_txd(plchan);
+}
+
+static void pl08x_phy_reassign_start(struct pl08x_phy_chan *ch,
+ struct pl08x_dma_chan *plchan)
+{
+ struct pl08x_driver_data *pl08x = plchan->host;
+
+ dev_dbg(&pl08x->adev->dev, "reassigned physical channel %d for xfer on %s\n",
+ ch->id, plchan->name);
+
+ /*
+ * We do this without taking the lock; we're really only concerned
+ * about whether this pointer is NULL or not, and we're guaranteed
+ * that this will only be called when it _already_ is non-NULL.
+ */
+ ch->serving = plchan;
+ plchan->phychan = ch;
+ plchan->state = PL08X_CHAN_RUNNING;
+ pl08x_start_next_txd(plchan);
+}
+
+/*
+ * Free a physical DMA channel, potentially reallocating it to another
+ * virtual channel if we have any pending.
+ */
+static void pl08x_phy_free(struct pl08x_dma_chan *plchan)
+{
+ struct pl08x_driver_data *pl08x = plchan->host;
+ struct pl08x_dma_chan *p, *next;
+ unsigned long waiting_at;
+ retry:
+ next = NULL;
+ waiting_at = jiffies;
+
+ /*
+ * Find a waiting virtual channel for the next transfer.
+ * To be fair, time when each channel reached waiting state is compared
+ * to select channel that is waiting for the longest time.
+ */
+ list_for_each_entry(p, &pl08x->memcpy.channels, vc.chan.device_node)
+ if (p->state == PL08X_CHAN_WAITING &&
+ p->waiting_at <= waiting_at) {
+ next = p;
+ waiting_at = p->waiting_at;
+ }
+
+ if (!next && pl08x->has_slave) {
+ list_for_each_entry(p, &pl08x->slave.channels, vc.chan.device_node)
+ if (p->state == PL08X_CHAN_WAITING &&
+ p->waiting_at <= waiting_at) {
+ next = p;
+ waiting_at = p->waiting_at;
+ }
+ }
+
+ /* Ensure that the physical channel is stopped */
+ pl08x_terminate_phy_chan(pl08x, plchan->phychan);
+
+ if (next) {
+ bool success;
+
+ /*
+ * Eww. We know this isn't going to deadlock
+ * but lockdep probably doesn't.
+ */
+ spin_lock(&next->vc.lock);
+ /* Re-check the state now that we have the lock */
+ success = next->state == PL08X_CHAN_WAITING;
+ if (success)
+ pl08x_phy_reassign_start(plchan->phychan, next);
+ spin_unlock(&next->vc.lock);
+
+ /* If the state changed, try to find another channel */
+ if (!success)
+ goto retry;
+ } else {
+ /* No more jobs, so free up the physical channel */
+ pl08x_put_phy_channel(pl08x, plchan->phychan);
+ }
+
+ plchan->phychan = NULL;
+ plchan->state = PL08X_CHAN_IDLE;
+}
+
+/*
+ * LLI handling
+ */
+
+static inline unsigned int
+pl08x_get_bytes_for_lli(struct pl08x_driver_data *pl08x,
+ u32 cctl,
+ bool source)
+{
+ u32 val;
+
+ if (pl08x->vd->ftdmac020) {
+ if (source)
+ val = (cctl & FTDMAC020_LLI_SRC_WIDTH_MSK) >>
+ FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+ else
+ val = (cctl & FTDMAC020_LLI_DST_WIDTH_MSK) >>
+ FTDMAC020_LLI_DST_WIDTH_SHIFT;
+ } else {
+ if (source)
+ val = (cctl & PL080_CONTROL_SWIDTH_MASK) >>
+ PL080_CONTROL_SWIDTH_SHIFT;
+ else
+ val = (cctl & PL080_CONTROL_DWIDTH_MASK) >>
+ PL080_CONTROL_DWIDTH_SHIFT;
+ }
+
+ switch (val) {
+ case PL080_WIDTH_8BIT:
+ return 1;
+ case PL080_WIDTH_16BIT:
+ return 2;
+ case PL080_WIDTH_32BIT:
+ return 4;
+ default:
+ break;
+ }
+ BUG();
+ return 0;
+}
+
+static inline u32 pl08x_lli_control_bits(struct pl08x_driver_data *pl08x,
+ u32 cctl,
+ u8 srcwidth, u8 dstwidth,
+ size_t tsize)
+{
+ u32 retbits = cctl;
+
+ /*
+ * Remove all src, dst and transfer size bits, then set the
+ * width and size according to the parameters. The bit offsets
+ * are different in the FTDMAC020 so we need to accound for this.
+ */
+ if (pl08x->vd->ftdmac020) {
+ retbits &= ~FTDMAC020_LLI_DST_WIDTH_MSK;
+ retbits &= ~FTDMAC020_LLI_SRC_WIDTH_MSK;
+ retbits &= ~FTDMAC020_LLI_TRANSFER_SIZE_MASK;
+
+ switch (srcwidth) {
+ case 1:
+ retbits |= PL080_WIDTH_8BIT <<
+ FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+ break;
+ case 2:
+ retbits |= PL080_WIDTH_16BIT <<
+ FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+ break;
+ case 4:
+ retbits |= PL080_WIDTH_32BIT <<
+ FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ switch (dstwidth) {
+ case 1:
+ retbits |= PL080_WIDTH_8BIT <<
+ FTDMAC020_LLI_DST_WIDTH_SHIFT;
+ break;
+ case 2:
+ retbits |= PL080_WIDTH_16BIT <<
+ FTDMAC020_LLI_DST_WIDTH_SHIFT;
+ break;
+ case 4:
+ retbits |= PL080_WIDTH_32BIT <<
+ FTDMAC020_LLI_DST_WIDTH_SHIFT;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ tsize &= FTDMAC020_LLI_TRANSFER_SIZE_MASK;
+ retbits |= tsize << FTDMAC020_LLI_TRANSFER_SIZE_SHIFT;
+ } else {
+ retbits &= ~PL080_CONTROL_DWIDTH_MASK;
+ retbits &= ~PL080_CONTROL_SWIDTH_MASK;
+ retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+ switch (srcwidth) {
+ case 1:
+ retbits |= PL080_WIDTH_8BIT <<
+ PL080_CONTROL_SWIDTH_SHIFT;
+ break;
+ case 2:
+ retbits |= PL080_WIDTH_16BIT <<
+ PL080_CONTROL_SWIDTH_SHIFT;
+ break;
+ case 4:
+ retbits |= PL080_WIDTH_32BIT <<
+ PL080_CONTROL_SWIDTH_SHIFT;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ switch (dstwidth) {
+ case 1:
+ retbits |= PL080_WIDTH_8BIT <<
+ PL080_CONTROL_DWIDTH_SHIFT;
+ break;
+ case 2:
+ retbits |= PL080_WIDTH_16BIT <<
+ PL080_CONTROL_DWIDTH_SHIFT;
+ break;
+ case 4:
+ retbits |= PL080_WIDTH_32BIT <<
+ PL080_CONTROL_DWIDTH_SHIFT;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ tsize &= PL080_CONTROL_TRANSFER_SIZE_MASK;
+ retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT;
+ }
+
+ return retbits;
+}
+
+struct pl08x_lli_build_data {
+ struct pl08x_txd *txd;
+ struct pl08x_bus_data srcbus;
+ struct pl08x_bus_data dstbus;
+ size_t remainder;
+ u32 lli_bus;
+};
+
+/*
+ * Autoselect a master bus to use for the transfer. Slave will be the chosen as
+ * victim in case src & dest are not similarly aligned. i.e. If after aligning
+ * masters address with width requirements of transfer (by sending few byte by
+ * byte data), slave is still not aligned, then its width will be reduced to
+ * BYTE.
+ * - prefers the destination bus if both available
+ * - prefers bus with fixed address (i.e. peripheral)
+ */
+static void pl08x_choose_master_bus(struct pl08x_driver_data *pl08x,
+ struct pl08x_lli_build_data *bd,
+ struct pl08x_bus_data **mbus,
+ struct pl08x_bus_data **sbus,
+ u32 cctl)
+{
+ bool dst_incr;
+ bool src_incr;
+
+ /*
+ * The FTDMAC020 only supports memory-to-memory transfer, so
+ * source and destination always increase.
+ */
+ if (pl08x->vd->ftdmac020) {
+ dst_incr = true;
+ src_incr = true;
+ } else {
+ dst_incr = !!(cctl & PL080_CONTROL_DST_INCR);
+ src_incr = !!(cctl & PL080_CONTROL_SRC_INCR);
+ }
+
+ /*
+ * If either bus is not advancing, i.e. it is a peripheral, that
+ * one becomes master
+ */
+ if (!dst_incr) {
+ *mbus = &bd->dstbus;
+ *sbus = &bd->srcbus;
+ } else if (!src_incr) {
+ *mbus = &bd->srcbus;
+ *sbus = &bd->dstbus;
+ } else {
+ if (bd->dstbus.buswidth >= bd->srcbus.buswidth) {
+ *mbus = &bd->dstbus;
+ *sbus = &bd->srcbus;
+ } else {
+ *mbus = &bd->srcbus;
+ *sbus = &bd->dstbus;
+ }
+ }
+}
+
+/*
+ * Fills in one LLI for a certain transfer descriptor and advance the counter
+ */
+static void pl08x_fill_lli_for_desc(struct pl08x_driver_data *pl08x,
+ struct pl08x_lli_build_data *bd,
+ int num_llis, int len, u32 cctl, u32 cctl2)
+{
+ u32 offset = num_llis * pl08x->lli_words;
+ u32 *llis_va = bd->txd->llis_va + offset;
+ dma_addr_t llis_bus = bd->txd->llis_bus;
+
+ BUG_ON(num_llis >= MAX_NUM_TSFR_LLIS);
+
+ /* Advance the offset to next LLI. */
+ offset += pl08x->lli_words;
+
+ llis_va[PL080_LLI_SRC] = bd->srcbus.addr;
+ llis_va[PL080_LLI_DST] = bd->dstbus.addr;
+ llis_va[PL080_LLI_LLI] = (llis_bus + sizeof(u32) * offset);
+ llis_va[PL080_LLI_LLI] |= bd->lli_bus;
+ llis_va[PL080_LLI_CCTL] = cctl;
+ if (pl08x->vd->pl080s)
+ llis_va[PL080S_LLI_CCTL2] = cctl2;
+
+ if (pl08x->vd->ftdmac020) {
+ /* FIXME: only memcpy so far so both increase */
+ bd->srcbus.addr += len;
+ bd->dstbus.addr += len;
+ } else {
+ if (cctl & PL080_CONTROL_SRC_INCR)
+ bd->srcbus.addr += len;
+ if (cctl & PL080_CONTROL_DST_INCR)
+ bd->dstbus.addr += len;
+ }
+
+ BUG_ON(bd->remainder < len);
+
+ bd->remainder -= len;
+}
+
+static inline void prep_byte_width_lli(struct pl08x_driver_data *pl08x,
+ struct pl08x_lli_build_data *bd, u32 *cctl, u32 len,
+ int num_llis, size_t *total_bytes)
+{
+ *cctl = pl08x_lli_control_bits(pl08x, *cctl, 1, 1, len);
+ pl08x_fill_lli_for_desc(pl08x, bd, num_llis, len, *cctl, len);
+ (*total_bytes) += len;
+}
+
+#if 1
+static void pl08x_dump_lli(struct pl08x_driver_data *pl08x,
+ const u32 *llis_va, int num_llis)
+{
+ int i;
+
+ if (pl08x->vd->pl080s) {
+ dev_vdbg(&pl08x->adev->dev,
+ "%-3s %-9s %-10s %-10s %-10s %-10s %s\n",
+ "lli", "", "csrc", "cdst", "clli", "cctl", "cctl2");
+ for (i = 0; i < num_llis; i++) {
+ dev_vdbg(&pl08x->adev->dev,
+ "%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
+ i, llis_va, llis_va[PL080_LLI_SRC],
+ llis_va[PL080_LLI_DST], llis_va[PL080_LLI_LLI],
+ llis_va[PL080_LLI_CCTL],
+ llis_va[PL080S_LLI_CCTL2]);
+ llis_va += pl08x->lli_words;
+ }
+ } else {
+ dev_vdbg(&pl08x->adev->dev,
+ "%-3s %-9s %-10s %-10s %-10s %s\n",
+ "lli", "", "csrc", "cdst", "clli", "cctl");
+ for (i = 0; i < num_llis; i++) {
+ dev_vdbg(&pl08x->adev->dev,
+ "%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+ i, llis_va, llis_va[PL080_LLI_SRC],
+ llis_va[PL080_LLI_DST], llis_va[PL080_LLI_LLI],
+ llis_va[PL080_LLI_CCTL]);
+ llis_va += pl08x->lli_words;
+ }
+ }
+}
+#else
+static inline void pl08x_dump_lli(struct pl08x_driver_data *pl08x,
+ const u32 *llis_va, int num_llis) {}
+#endif
+
+/*
+ * This fills in the table of LLIs for the transfer descriptor
+ * Note that we assume we never have to change the burst sizes
+ * Return 0 for error
+ */
+static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
+ struct pl08x_txd *txd)
+{
+ struct pl08x_bus_data *mbus, *sbus;
+ struct pl08x_lli_build_data bd;
+ int num_llis = 0;
+ u32 cctl, early_bytes = 0;
+ size_t max_bytes_per_lli, total_bytes;
+ u32 *llis_va, *last_lli;
+ struct pl08x_sg *dsg;
+
+ txd->llis_va = dma_pool_alloc(pl08x->pool, GFP_NOWAIT, &txd->llis_bus);
+ if (!txd->llis_va) {
+ dev_err(&pl08x->adev->dev, "%s no memory for llis\n", __func__);
+ return 0;
+ }
+
+ bd.txd = txd;
+ bd.lli_bus = (pl08x->lli_buses & PL08X_AHB2) ? PL080_LLI_LM_AHB2 : 0;
+ cctl = txd->cctl;
+
+ /* Find maximum width of the source bus */
+ bd.srcbus.maxwidth = pl08x_get_bytes_for_lli(pl08x, cctl, true);
+
+ /* Find maximum width of the destination bus */
+ bd.dstbus.maxwidth = pl08x_get_bytes_for_lli(pl08x, cctl, false);
+
+ list_for_each_entry(dsg, &txd->dsg_list, node) {
+ total_bytes = 0;
+ cctl = txd->cctl;
+
+ bd.srcbus.addr = dsg->src_addr;
+ bd.dstbus.addr = dsg->dst_addr;
+ bd.remainder = dsg->len;
+ bd.srcbus.buswidth = bd.srcbus.maxwidth;
+ bd.dstbus.buswidth = bd.dstbus.maxwidth;
+
+ pl08x_choose_master_bus(pl08x, &bd, &mbus, &sbus, cctl);
+
+ dev_vdbg(&pl08x->adev->dev,
+ "src=0x%08llx%s/%u dst=0x%08llx%s/%u len=%zu\n",
+ (u64)bd.srcbus.addr,
+ cctl & PL080_CONTROL_SRC_INCR ? "+" : "",
+ bd.srcbus.buswidth,
+ (u64)bd.dstbus.addr,
+ cctl & PL080_CONTROL_DST_INCR ? "+" : "",
+ bd.dstbus.buswidth,
+ bd.remainder);
+ dev_vdbg(&pl08x->adev->dev, "mbus=%s sbus=%s\n",
+ mbus == &bd.srcbus ? "src" : "dst",
+ sbus == &bd.srcbus ? "src" : "dst");
+
+ /*
+ * Zero length is only allowed if all these requirements are
+ * met:
+ * - flow controller is peripheral.
+ * - src.addr is aligned to src.width
+ * - dst.addr is aligned to dst.width
+ *
+ * sg_len == 1 should be true, as there can be two cases here:
+ *
+ * - Memory addresses are contiguous and are not scattered.
+ * Here, Only one sg will be passed by user driver, with
+ * memory address and zero length. We pass this to controller
+ * and after the transfer it will receive the last burst
+ * request from peripheral and so transfer finishes.
+ *
+ * - Memory addresses are scattered and are not contiguous.
+ * Here, Obviously as DMA controller doesn't know when a lli's
+ * transfer gets over, it can't load next lli. So in this
+ * case, there has to be an assumption that only one lli is
+ * supported. Thus, we can't have scattered addresses.
+ */
+ if (!bd.remainder) {
+ u32 fc;
+
+ /* FTDMAC020 only does memory-to-memory */
+ if (pl08x->vd->ftdmac020)
+ fc = PL080_FLOW_MEM2MEM;
+ else
+ fc = (txd->ccfg & PL080_CONFIG_FLOW_CONTROL_MASK) >>
+ PL080_CONFIG_FLOW_CONTROL_SHIFT;
+ if (!((fc >= PL080_FLOW_SRC2DST_DST) &&
+ (fc <= PL080_FLOW_SRC2DST_SRC))) {
+ dev_err(&pl08x->adev->dev, "%s sg len can't be zero",
+ __func__);
+ return 0;
+ }
+
+ if (!IS_BUS_ALIGNED(&bd.srcbus) ||
+ !IS_BUS_ALIGNED(&bd.dstbus)) {
+ dev_err(&pl08x->adev->dev,
+ "%s src & dst address must be aligned to src"
+ " & dst width if peripheral is flow controller",
+ __func__);
+ return 0;
+ }
+
+ cctl = pl08x_lli_control_bits(pl08x, cctl,
+ bd.srcbus.buswidth, bd.dstbus.buswidth,
+ 0);
+ pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++,
+ 0, cctl, 0);
+ break;
+ }
+
+ /*
+ * Send byte by byte for following cases
+ * - Less than a bus width available
+ * - until master bus is aligned
+ */
+ if (bd.remainder < mbus->buswidth)
+ early_bytes = bd.remainder;
+ else if (!IS_BUS_ALIGNED(mbus)) {
+ early_bytes = mbus->buswidth -
+ (mbus->addr & (mbus->buswidth - 1));
+ if ((bd.remainder - early_bytes) < mbus->buswidth)
+ early_bytes = bd.remainder;
+ }
+
+ if (early_bytes) {
+ dev_vdbg(&pl08x->adev->dev,
+ "%s byte width LLIs (remain 0x%08zx)\n",
+ __func__, bd.remainder);
+ prep_byte_width_lli(pl08x, &bd, &cctl, early_bytes,
+ num_llis++, &total_bytes);
+ }
+
+ if (bd.remainder) {
+ /*
+ * Master now aligned
+ * - if slave is not then we must set its width down
+ */
+ if (!IS_BUS_ALIGNED(sbus)) {
+ dev_dbg(&pl08x->adev->dev,
+ "%s set down bus width to one byte\n",
+ __func__);
+
+ sbus->buswidth = 1;
+ }
+
+ /*
+ * Bytes transferred = tsize * src width, not
+ * MIN(buswidths)
+ */
+ max_bytes_per_lli = bd.srcbus.buswidth *
+ pl08x->vd->max_transfer_size;
+ dev_vdbg(&pl08x->adev->dev,
+ "%s max bytes per lli = %zu\n",
+ __func__, max_bytes_per_lli);
+
+ /*
+ * Make largest possible LLIs until less than one bus
+ * width left
+ */
+ while (bd.remainder > (mbus->buswidth - 1)) {
+ size_t lli_len, tsize, width;
+
+ /*
+ * If enough left try to send max possible,
+ * otherwise try to send the remainder
+ */
+ lli_len = min(bd.remainder, max_bytes_per_lli);
+
+ /*
+ * Check against maximum bus alignment:
+ * Calculate actual transfer size in relation to
+ * bus width an get a maximum remainder of the
+ * highest bus width - 1
+ */
+ width = max(mbus->buswidth, sbus->buswidth);
+ lli_len = (lli_len / width) * width;
+ tsize = lli_len / bd.srcbus.buswidth;
+
+ dev_vdbg(&pl08x->adev->dev,
+ "%s fill lli with single lli chunk of "
+ "size 0x%08zx (remainder 0x%08zx)\n",
+ __func__, lli_len, bd.remainder);
+
+ cctl = pl08x_lli_control_bits(pl08x, cctl,
+ bd.srcbus.buswidth, bd.dstbus.buswidth,
+ tsize);
+ pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++,
+ lli_len, cctl, tsize);
+ total_bytes += lli_len;
+ }
+
+ /*
+ * Send any odd bytes
+ */
+ if (bd.remainder) {
+ dev_vdbg(&pl08x->adev->dev,
+ "%s align with boundary, send odd bytes (remain %zu)\n",
+ __func__, bd.remainder);
+ prep_byte_width_lli(pl08x, &bd, &cctl,
+ bd.remainder, num_llis++, &total_bytes);
+ }
+ }
+
+ if (total_bytes != dsg->len) {
+ dev_err(&pl08x->adev->dev,
+ "%s size of encoded lli:s don't match total txd, transferred 0x%08zx from size 0x%08zx\n",
+ __func__, total_bytes, dsg->len);
+ return 0;
+ }
+
+ if (num_llis >= MAX_NUM_TSFR_LLIS) {
+ dev_err(&pl08x->adev->dev,
+ "%s need to increase MAX_NUM_TSFR_LLIS from 0x%08x\n",
+ __func__, MAX_NUM_TSFR_LLIS);
+ return 0;
+ }
+ }
+
+ llis_va = txd->llis_va;
+ last_lli = llis_va + (num_llis - 1) * pl08x->lli_words;
+
+ if (txd->cyclic) {
+ /* Link back to the first LLI. */
+ last_lli[PL080_LLI_LLI] = txd->llis_bus | bd.lli_bus;
+ } else {
+ /* The final LLI terminates the LLI. */
+ last_lli[PL080_LLI_LLI] = 0;
+ /* The final LLI element shall also fire an interrupt. */
+ if (pl08x->vd->ftdmac020)
+ last_lli[PL080_LLI_CCTL] &= ~FTDMAC020_LLI_TC_MSK;
+ else
+ last_lli[PL080_LLI_CCTL] |= PL080_CONTROL_TC_IRQ_EN;
+ }
+
+ pl08x_dump_lli(pl08x, llis_va, num_llis);
+
+ return num_llis;
+}
+
+static void pl08x_free_txd(struct pl08x_driver_data *pl08x,
+ struct pl08x_txd *txd)
+{
+ struct pl08x_sg *dsg, *_dsg;
+
+ if (txd->llis_va)
+ dma_pool_free(pl08x->pool, txd->llis_va, txd->llis_bus);
+
+ list_for_each_entry_safe(dsg, _dsg, &txd->dsg_list, node) {
+ list_del(&dsg->node);
+ kfree(dsg);
+ }
+
+ kfree(txd);
+}
+
+static void pl08x_desc_free(struct virt_dma_desc *vd)
+{
+ struct pl08x_txd *txd = to_pl08x_txd(&vd->tx);
+ struct pl08x_dma_chan *plchan = to_pl08x_chan(vd->tx.chan);
+
+ dma_descriptor_unmap(&vd->tx);
+ if (!txd->done)
+ pl08x_release_mux(plchan);
+
+ pl08x_free_txd(plchan->host, txd);
+}
+
+static void pl08x_free_txd_list(struct pl08x_driver_data *pl08x,
+ struct pl08x_dma_chan *plchan)
+{
+ LIST_HEAD(head);
+
+ vchan_get_all_descriptors(&plchan->vc, &head);
+ vchan_dma_desc_free_list(&plchan->vc, &head);
+}
+
+/*
+ * The DMA ENGINE API
+ */
+static void pl08x_free_chan_resources(struct dma_chan *chan)
+{
+ /* Ensure all queued descriptors are freed */
+ vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+/*
+ * Code accessing dma_async_is_complete() in a tight loop may give problems.
+ * If slaves are relying on interrupts to signal completion this function
+ * must not be called with interrupts disabled.
+ */
+static enum dma_status pl08x_dma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+ struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+ enum dma_status ret;
+ size_t bytes = 0;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ /*
+ * There's no point calculating the residue if there's
+ * no txstate to store the value.
+ */
+ if (!txstate) {
+ if (plchan->state == PL08X_CHAN_PAUSED)
+ ret = DMA_PAUSED;
+ return ret;
+ }
+
+ spin_lock_irqsave(&plchan->vc.lock, flags);
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret != DMA_COMPLETE) {
+ vd = vchan_find_desc(&plchan->vc, cookie);
+ if (vd) {
+ /* On the issued list, so hasn't been processed yet */
+ struct pl08x_txd *txd = to_pl08x_txd(&vd->tx);
+ struct pl08x_sg *dsg;
+
+ list_for_each_entry(dsg, &txd->dsg_list, node)
+ bytes += dsg->len;
+ } else {
+ bytes = pl08x_getbytes_chan(plchan);
+ }
+ }
+ spin_unlock_irqrestore(&plchan->vc.lock, flags);
+
+ /*
+ * This cookie not complete yet
+ * Get number of bytes left in the active transactions and queue
+ */
+ dma_set_residue(txstate, bytes);
+
+ if (plchan->state == PL08X_CHAN_PAUSED && ret == DMA_IN_PROGRESS)
+ ret = DMA_PAUSED;
+
+ /* Whether waiting or running, we're in progress */
+ return ret;
+}
+
+/* PrimeCell DMA extension */
+struct burst_table {
+ u32 burstwords;
+ u32 reg;
+};
+
+static const struct burst_table burst_sizes[] = {
+ {
+ .burstwords = 256,
+ .reg = PL080_BSIZE_256,
+ },
+ {
+ .burstwords = 128,
+ .reg = PL080_BSIZE_128,
+ },
+ {
+ .burstwords = 64,
+ .reg = PL080_BSIZE_64,
+ },
+ {
+ .burstwords = 32,
+ .reg = PL080_BSIZE_32,
+ },
+ {
+ .burstwords = 16,
+ .reg = PL080_BSIZE_16,
+ },
+ {
+ .burstwords = 8,
+ .reg = PL080_BSIZE_8,
+ },
+ {
+ .burstwords = 4,
+ .reg = PL080_BSIZE_4,
+ },
+ {
+ .burstwords = 0,
+ .reg = PL080_BSIZE_1,
+ },
+};
+
+/*
+ * Given the source and destination available bus masks, select which
+ * will be routed to each port. We try to have source and destination
+ * on separate ports, but always respect the allowable settings.
+ */
+static u32 pl08x_select_bus(bool ftdmac020, u8 src, u8 dst)
+{
+ u32 cctl = 0;
+ u32 dst_ahb2;
+ u32 src_ahb2;
+
+ /* The FTDMAC020 use different bits to indicate src/dst bus */
+ if (ftdmac020) {
+ dst_ahb2 = FTDMAC020_LLI_DST_SEL;
+ src_ahb2 = FTDMAC020_LLI_SRC_SEL;
+ } else {
+ dst_ahb2 = PL080_CONTROL_DST_AHB2;
+ src_ahb2 = PL080_CONTROL_SRC_AHB2;
+ }
+
+ if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1)))
+ cctl |= dst_ahb2;
+ if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2)))
+ cctl |= src_ahb2;
+
+ return cctl;
+}
+
+static u32 pl08x_cctl(u32 cctl)
+{
+ cctl &= ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 |
+ PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR |
+ PL080_CONTROL_PROT_MASK);
+
+ /* Access the cell in privileged mode, non-bufferable, non-cacheable */
+ return cctl | PL080_CONTROL_PROT_SYS;
+}
+
+static u32 pl08x_width(enum dma_slave_buswidth width)
+{
+ switch (width) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ return PL080_WIDTH_8BIT;
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ return PL080_WIDTH_16BIT;
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ return PL080_WIDTH_32BIT;
+ default:
+ return ~0;
+ }
+}
+
+static u32 pl08x_burst(u32 maxburst)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(burst_sizes); i++)
+ if (burst_sizes[i].burstwords <= maxburst)
+ break;
+
+ return burst_sizes[i].reg;
+}
+
+static u32 pl08x_get_cctl(struct pl08x_dma_chan *plchan,
+ enum dma_slave_buswidth addr_width, u32 maxburst)
+{
+ u32 width, burst, cctl = 0;
+
+ width = pl08x_width(addr_width);
+ if (width == ~0)
+ return ~0;
+
+ cctl |= width << PL080_CONTROL_SWIDTH_SHIFT;
+ cctl |= width << PL080_CONTROL_DWIDTH_SHIFT;
+
+ /*
+ * If this channel will only request single transfers, set this
+ * down to ONE element. Also select one element if no maxburst
+ * is specified.
+ */
+ if (plchan->cd->single)
+ maxburst = 1;
+
+ burst = pl08x_burst(maxburst);
+ cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT;
+ cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT;
+
+ return pl08x_cctl(cctl);
+}
+
+/*
+ * Slave transactions callback to the slave device to allow
+ * synchronization of slave DMA signals with the DMAC enable
+ */
+static void pl08x_issue_pending(struct dma_chan *chan)
+{
+ struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&plchan->vc.lock, flags);
+ if (vchan_issue_pending(&plchan->vc)) {
+ if (!plchan->phychan && plchan->state != PL08X_CHAN_WAITING)
+ pl08x_phy_alloc_and_start(plchan);
+ }
+ spin_unlock_irqrestore(&plchan->vc.lock, flags);
+}
+
+static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan)
+{
+ struct pl08x_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+
+ if (txd)
+ INIT_LIST_HEAD(&txd->dsg_list);
+ return txd;
+}
+
+static u32 pl08x_memcpy_cctl(struct pl08x_driver_data *pl08x)
+{
+ u32 cctl = 0;
+
+ /* Conjure cctl */
+ switch (pl08x->pd->memcpy_burst_size) {
+ default:
+ dev_err(&pl08x->adev->dev,
+ "illegal burst size for memcpy, set to 1\n");
+ fallthrough;
+ case PL08X_BURST_SZ_1:
+ cctl |= PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT |
+ PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_4:
+ cctl |= PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT |
+ PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_8:
+ cctl |= PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT |
+ PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_16:
+ cctl |= PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT |
+ PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_32:
+ cctl |= PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT |
+ PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_64:
+ cctl |= PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT |
+ PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_128:
+ cctl |= PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT |
+ PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT;
+ break;
+ case PL08X_BURST_SZ_256:
+ cctl |= PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT |
+ PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT;
+ break;
+ }
+
+ switch (pl08x->pd->memcpy_bus_width) {
+ default:
+ dev_err(&pl08x->adev->dev,
+ "illegal bus width for memcpy, set to 8 bits\n");
+ fallthrough;
+ case PL08X_BUS_WIDTH_8_BITS:
+ cctl |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT |
+ PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT;
+ break;
+ case PL08X_BUS_WIDTH_16_BITS:
+ cctl |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT |
+ PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT;
+ break;
+ case PL08X_BUS_WIDTH_32_BITS:
+ cctl |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT |
+ PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT;
+ break;
+ }
+
+ /* Protection flags */
+ if (pl08x->pd->memcpy_prot_buff)
+ cctl |= PL080_CONTROL_PROT_BUFF;
+ if (pl08x->pd->memcpy_prot_cache)
+ cctl |= PL080_CONTROL_PROT_CACHE;
+
+ /* We are the kernel, so we are in privileged mode */
+ cctl |= PL080_CONTROL_PROT_SYS;
+
+ /* Both to be incremented or the code will break */
+ cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
+
+ if (pl08x->vd->dualmaster)
+ cctl |= pl08x_select_bus(false,
+ pl08x->mem_buses,
+ pl08x->mem_buses);
+
+ return cctl;
+}
+
+static u32 pl08x_ftdmac020_memcpy_cctl(struct pl08x_driver_data *pl08x)
+{
+ u32 cctl = 0;
+
+ /* Conjure cctl */
+ switch (pl08x->pd->memcpy_bus_width) {
+ default:
+ dev_err(&pl08x->adev->dev,
+ "illegal bus width for memcpy, set to 8 bits\n");
+ fallthrough;
+ case PL08X_BUS_WIDTH_8_BITS:
+ cctl |= PL080_WIDTH_8BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT |
+ PL080_WIDTH_8BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT;
+ break;
+ case PL08X_BUS_WIDTH_16_BITS:
+ cctl |= PL080_WIDTH_16BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT |
+ PL080_WIDTH_16BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT;
+ break;
+ case PL08X_BUS_WIDTH_32_BITS:
+ cctl |= PL080_WIDTH_32BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT |
+ PL080_WIDTH_32BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT;
+ break;
+ }
+
+ /*
+ * By default mask the TC IRQ on all LLIs, it will be unmasked on
+ * the last LLI item by other code.
+ */
+ cctl |= FTDMAC020_LLI_TC_MSK;
+
+ /*
+ * Both to be incremented so leave bits FTDMAC020_LLI_SRCAD_CTL
+ * and FTDMAC020_LLI_DSTAD_CTL as zero
+ */
+ if (pl08x->vd->dualmaster)
+ cctl |= pl08x_select_bus(true,
+ pl08x->mem_buses,
+ pl08x->mem_buses);
+
+ return cctl;
+}
+
+/*
+ * Initialize a descriptor to be used by memcpy submit
+ */
+static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
+ struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+ struct pl08x_driver_data *pl08x = plchan->host;
+ struct pl08x_txd *txd;
+ struct pl08x_sg *dsg;
+ int ret;
+
+ txd = pl08x_get_txd(plchan);
+ if (!txd) {
+ dev_err(&pl08x->adev->dev,
+ "%s no memory for descriptor\n", __func__);
+ return NULL;
+ }
+
+ dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT);
+ if (!dsg) {
+ pl08x_free_txd(pl08x, txd);
+ return NULL;
+ }
+ list_add_tail(&dsg->node, &txd->dsg_list);
+
+ dsg->src_addr = src;
+ dsg->dst_addr = dest;
+ dsg->len = len;
+ if (pl08x->vd->ftdmac020) {
+ /* Writing CCFG zero ENABLES all interrupts */
+ txd->ccfg = 0;
+ txd->cctl = pl08x_ftdmac020_memcpy_cctl(pl08x);
+ } else {
+ txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK |
+ PL080_CONFIG_TC_IRQ_MASK |
+ PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+ txd->cctl = pl08x_memcpy_cctl(pl08x);
+ }
+
+ ret = pl08x_fill_llis_for_desc(plchan->host, txd);
+ if (!ret) {
+ pl08x_free_txd(pl08x, txd);
+ return NULL;
+ }
+
+ return vchan_tx_prep(&plchan->vc, &txd->vd, flags);
+}
+
+static struct pl08x_txd *pl08x_init_txd(
+ struct dma_chan *chan,
+ enum dma_transfer_direction direction,
+ dma_addr_t *slave_addr)
+{
+ struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+ struct pl08x_driver_data *pl08x = plchan->host;
+ struct pl08x_txd *txd;
+ enum dma_slave_buswidth addr_width;
+ int ret, tmp;
+ u8 src_buses, dst_buses;
+ u32 maxburst, cctl;
+
+ txd = pl08x_get_txd(plchan);
+ if (!txd) {
+ dev_err(&pl08x->adev->dev, "%s no txd\n", __func__);
+ return NULL;
+ }
+
+ /*
+ * Set up addresses, the PrimeCell configured address
+ * will take precedence since this may configure the
+ * channel target address dynamically at runtime.
+ */
+ if (direction == DMA_MEM_TO_DEV) {
+ cctl = PL080_CONTROL_SRC_INCR;
+ *slave_addr = plchan->cfg.dst_addr;
+ addr_width = plchan->cfg.dst_addr_width;
+ maxburst = plchan->cfg.dst_maxburst;
+ src_buses = pl08x->mem_buses;
+ dst_buses = plchan->cd->periph_buses;
+ } else if (direction == DMA_DEV_TO_MEM) {
+ cctl = PL080_CONTROL_DST_INCR;
+ *slave_addr = plchan->cfg.src_addr;
+ addr_width = plchan->cfg.src_addr_width;
+ maxburst = plchan->cfg.src_maxburst;
+ src_buses = plchan->cd->periph_buses;
+ dst_buses = pl08x->mem_buses;
+ } else {
+ pl08x_free_txd(pl08x, txd);
+ dev_err(&pl08x->adev->dev,
+ "%s direction unsupported\n", __func__);
+ return NULL;
+ }
+
+ cctl |= pl08x_get_cctl(plchan, addr_width, maxburst);
+ if (cctl == ~0) {
+ pl08x_free_txd(pl08x, txd);
+ dev_err(&pl08x->adev->dev,
+ "DMA slave configuration botched?\n");
+ return NULL;
+ }
+
+ txd->cctl = cctl | pl08x_select_bus(false, src_buses, dst_buses);
+
+ if (plchan->cfg.device_fc)
+ tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER_PER :
+ PL080_FLOW_PER2MEM_PER;
+ else
+ tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER :
+ PL080_FLOW_PER2MEM;
+
+ txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK |
+ PL080_CONFIG_TC_IRQ_MASK |
+ tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+
+ ret = pl08x_request_mux(plchan);
+ if (ret < 0) {
+ pl08x_free_txd(pl08x, txd);
+ dev_dbg(&pl08x->adev->dev,
+ "unable to mux for transfer on %s due to platform restrictions\n",
+ plchan->name);
+ return NULL;
+ }
+
+ dev_dbg(&pl08x->adev->dev, "allocated DMA request signal %d for xfer on %s\n",
+ plchan->signal, plchan->name);
+
+ /* Assign the flow control signal to this channel */
+ if (direction == DMA_MEM_TO_DEV)
+ txd->ccfg |= plchan->signal << PL080_CONFIG_DST_SEL_SHIFT;
+ else
+ txd->ccfg |= plchan->signal << PL080_CONFIG_SRC_SEL_SHIFT;
+
+ return txd;
+}
+
+static int pl08x_tx_add_sg(struct pl08x_txd *txd,
+ enum dma_transfer_direction direction,
+ dma_addr_t slave_addr,
+ dma_addr_t buf_addr,
+ unsigned int len)
+{
+ struct pl08x_sg *dsg;
+
+ dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT);
+ if (!dsg)
+ return -ENOMEM;
+
+ list_add_tail(&dsg->node, &txd->dsg_list);
+
+ dsg->len = len;
+ if (direction == DMA_MEM_TO_DEV) {
+ dsg->src_addr = buf_addr;
+ dsg->dst_addr = slave_addr;
+ } else {
+ dsg->src_addr = slave_addr;
+ dsg->dst_addr = buf_addr;
+ }
+
+ return 0;
+}
+
+static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+ struct pl08x_driver_data *pl08x = plchan->host;
+ struct pl08x_txd *txd;
+ struct scatterlist *sg;
+ int ret, tmp;
+ dma_addr_t slave_addr;
+
+ dev_dbg(&pl08x->adev->dev, "%s prepare transaction of %d bytes from %s\n",
+ __func__, sg_dma_len(sgl), plchan->name);
+
+ txd = pl08x_init_txd(chan, direction, &slave_addr);
+ if (!txd)
+ return NULL;
+
+ for_each_sg(sgl, sg, sg_len, tmp) {
+ ret = pl08x_tx_add_sg(txd, direction, slave_addr,
+ sg_dma_address(sg),
+ sg_dma_len(sg));
+ if (ret) {
+ pl08x_release_mux(plchan);
+ pl08x_free_txd(pl08x, txd);
+ dev_err(&pl08x->adev->dev, "%s no mem for pl080 sg\n",
+ __func__);
+ return NULL;
+ }
+ }
+
+ ret = pl08x_fill_llis_for_desc(plchan->host, txd);
+ if (!ret) {
+ pl08x_release_mux(plchan);
+ pl08x_free_txd(pl08x, txd);
+ return NULL;
+ }
+
+ return vchan_tx_prep(&plchan->vc, &txd->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *pl08x_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+ struct pl08x_driver_data *pl08x = plchan->host;
+ struct pl08x_txd *txd;
+ int ret, tmp;
+ dma_addr_t slave_addr;
+
+ dev_dbg(&pl08x->adev->dev,
+ "%s prepare cyclic transaction of %zd/%zd bytes %s %s\n",
+ __func__, period_len, buf_len,
+ direction == DMA_MEM_TO_DEV ? "to" : "from",
+ plchan->name);
+
+ txd = pl08x_init_txd(chan, direction, &slave_addr);
+ if (!txd)
+ return NULL;
+
+ txd->cyclic = true;
+ txd->cctl |= PL080_CONTROL_TC_IRQ_EN;
+ for (tmp = 0; tmp < buf_len; tmp += period_len) {
+ ret = pl08x_tx_add_sg(txd, direction, slave_addr,
+ buf_addr + tmp, period_len);
+ if (ret) {
+ pl08x_release_mux(plchan);
+ pl08x_free_txd(pl08x, txd);
+ return NULL;
+ }
+ }
+
+ ret = pl08x_fill_llis_for_desc(plchan->host, txd);
+ if (!ret) {
+ pl08x_release_mux(plchan);
+ pl08x_free_txd(pl08x, txd);
+ return NULL;
+ }
+
+ return vchan_tx_prep(&plchan->vc, &txd->vd, flags);
+}
+
+static int pl08x_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+ struct pl08x_driver_data *pl08x = plchan->host;
+
+ if (!plchan->slave)
+ return -EINVAL;
+
+ /* Reject definitely invalid configurations */
+ if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES ||
+ config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+ return -EINVAL;
+
+ if (config->device_fc && pl08x->vd->pl080s) {
+ dev_err(&pl08x->adev->dev,
+ "%s: PL080S does not support peripheral flow control\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ plchan->cfg = *config;
+
+ return 0;
+}
+
+static int pl08x_terminate_all(struct dma_chan *chan)
+{
+ struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+ struct pl08x_driver_data *pl08x = plchan->host;
+ unsigned long flags;
+
+ spin_lock_irqsave(&plchan->vc.lock, flags);
+ if (!plchan->phychan && !plchan->at) {
+ spin_unlock_irqrestore(&plchan->vc.lock, flags);
+ return 0;
+ }
+
+ plchan->state = PL08X_CHAN_IDLE;
+
+ if (plchan->phychan) {
+ /*
+ * Mark physical channel as free and free any slave
+ * signal
+ */
+ pl08x_phy_free(plchan);
+ }
+ /* Dequeue jobs and free LLIs */
+ if (plchan->at) {
+ vchan_terminate_vdesc(&plchan->at->vd);
+ plchan->at = NULL;
+ }
+ /* Dequeue jobs not yet fired as well */
+ pl08x_free_txd_list(pl08x, plchan);
+
+ spin_unlock_irqrestore(&plchan->vc.lock, flags);
+
+ return 0;
+}
+
+static void pl08x_synchronize(struct dma_chan *chan)
+{
+ struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+
+ vchan_synchronize(&plchan->vc);
+}
+
+static int pl08x_pause(struct dma_chan *chan)
+{
+ struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+ unsigned long flags;
+
+ /*
+ * Anything succeeds on channels with no physical allocation and
+ * no queued transfers.
+ */
+ spin_lock_irqsave(&plchan->vc.lock, flags);
+ if (!plchan->phychan && !plchan->at) {
+ spin_unlock_irqrestore(&plchan->vc.lock, flags);
+ return 0;
+ }
+
+ pl08x_pause_phy_chan(plchan->phychan);
+ plchan->state = PL08X_CHAN_PAUSED;
+
+ spin_unlock_irqrestore(&plchan->vc.lock, flags);
+
+ return 0;
+}
+
+static int pl08x_resume(struct dma_chan *chan)
+{
+ struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+ unsigned long flags;
+
+ /*
+ * Anything succeeds on channels with no physical allocation and
+ * no queued transfers.
+ */
+ spin_lock_irqsave(&plchan->vc.lock, flags);
+ if (!plchan->phychan && !plchan->at) {
+ spin_unlock_irqrestore(&plchan->vc.lock, flags);
+ return 0;
+ }
+
+ pl08x_resume_phy_chan(plchan->phychan);
+ plchan->state = PL08X_CHAN_RUNNING;
+
+ spin_unlock_irqrestore(&plchan->vc.lock, flags);
+
+ return 0;
+}
+
+bool pl08x_filter_id(struct dma_chan *chan, void *chan_id)
+{
+ struct pl08x_dma_chan *plchan;
+ char *name = chan_id;
+
+ /* Reject channels for devices not bound to this driver */
+ if (chan->device->dev->driver != &pl08x_amba_driver.drv)
+ return false;
+
+ plchan = to_pl08x_chan(chan);
+
+ /* Check that the channel is not taken! */
+ if (!strcmp(plchan->name, name))
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(pl08x_filter_id);
+
+static bool pl08x_filter_fn(struct dma_chan *chan, void *chan_id)
+{
+ struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+
+ return plchan->cd == chan_id;
+}
+
+/*
+ * Just check that the device is there and active
+ * TODO: turn this bit on/off depending on the number of physical channels
+ * actually used, if it is zero... well shut it off. That will save some
+ * power. Cut the clock at the same time.
+ */
+static void pl08x_ensure_on(struct pl08x_driver_data *pl08x)
+{
+ /* The Nomadik variant does not have the config register */
+ if (pl08x->vd->nomadik)
+ return;
+ /* The FTDMAC020 variant does this in another register */
+ if (pl08x->vd->ftdmac020) {
+ writel(PL080_CONFIG_ENABLE, pl08x->base + FTDMAC020_CSR);
+ return;
+ }
+ writel(PL080_CONFIG_ENABLE, pl08x->base + PL080_CONFIG);
+}
+
+static irqreturn_t pl08x_irq(int irq, void *dev)
+{
+ struct pl08x_driver_data *pl08x = dev;
+ u32 mask = 0, err, tc, i;
+
+ /* check & clear - ERR & TC interrupts */
+ err = readl(pl08x->base + PL080_ERR_STATUS);
+ if (err) {
+ dev_err(&pl08x->adev->dev, "%s error interrupt, register value 0x%08x\n",
+ __func__, err);
+ writel(err, pl08x->base + PL080_ERR_CLEAR);
+ }
+ tc = readl(pl08x->base + PL080_TC_STATUS);
+ if (tc)
+ writel(tc, pl08x->base + PL080_TC_CLEAR);
+
+ if (!err && !tc)
+ return IRQ_NONE;
+
+ for (i = 0; i < pl08x->vd->channels; i++) {
+ if ((BIT(i) & err) || (BIT(i) & tc)) {
+ /* Locate physical channel */
+ struct pl08x_phy_chan *phychan = &pl08x->phy_chans[i];
+ struct pl08x_dma_chan *plchan = phychan->serving;
+ struct pl08x_txd *tx;
+
+ if (!plchan) {
+ dev_err(&pl08x->adev->dev,
+ "%s Error TC interrupt on unused channel: 0x%08x\n",
+ __func__, i);
+ continue;
+ }
+
+ spin_lock(&plchan->vc.lock);
+ tx = plchan->at;
+ if (tx && tx->cyclic) {
+ vchan_cyclic_callback(&tx->vd);
+ } else if (tx) {
+ plchan->at = NULL;
+ /*
+ * This descriptor is done, release its mux
+ * reservation.
+ */
+ pl08x_release_mux(plchan);
+ tx->done = true;
+ vchan_cookie_complete(&tx->vd);
+
+ /*
+ * And start the next descriptor (if any),
+ * otherwise free this channel.
+ */
+ if (vchan_next_desc(&plchan->vc))
+ pl08x_start_next_txd(plchan);
+ else
+ pl08x_phy_free(plchan);
+ }
+ spin_unlock(&plchan->vc.lock);
+
+ mask |= BIT(i);
+ }
+ }
+
+ return mask ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static void pl08x_dma_slave_init(struct pl08x_dma_chan *chan)
+{
+ chan->slave = true;
+ chan->name = chan->cd->bus_id;
+ chan->cfg.src_addr = chan->cd->addr;
+ chan->cfg.dst_addr = chan->cd->addr;
+}
+
+/*
+ * Initialise the DMAC memcpy/slave channels.
+ * Make a local wrapper to hold required data
+ */
+static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
+ struct dma_device *dmadev, unsigned int channels, bool slave)
+{
+ struct pl08x_dma_chan *chan;
+ int i;
+
+ INIT_LIST_HEAD(&dmadev->channels);
+
+ /*
+ * Register as many memcpy as we have physical channels,
+ * we won't always be able to use all but the code will have
+ * to cope with that situation.
+ */
+ for (i = 0; i < channels; i++) {
+ chan = kzalloc(sizeof(*chan), GFP_KERNEL);
+ if (!chan)
+ return -ENOMEM;
+
+ chan->host = pl08x;
+ chan->state = PL08X_CHAN_IDLE;
+ chan->signal = -1;
+
+ if (slave) {
+ chan->cd = &pl08x->pd->slave_channels[i];
+ /*
+ * Some implementations have muxed signals, whereas some
+ * use a mux in front of the signals and need dynamic
+ * assignment of signals.
+ */
+ chan->signal = i;
+ pl08x_dma_slave_init(chan);
+ } else {
+ chan->cd = kzalloc(sizeof(*chan->cd), GFP_KERNEL);
+ if (!chan->cd) {
+ kfree(chan);
+ return -ENOMEM;
+ }
+ chan->cd->bus_id = "memcpy";
+ chan->cd->periph_buses = pl08x->pd->mem_buses;
+ chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
+ if (!chan->name) {
+ kfree(chan->cd);
+ kfree(chan);
+ return -ENOMEM;
+ }
+ }
+ dev_dbg(&pl08x->adev->dev,
+ "initialize virtual channel \"%s\"\n",
+ chan->name);
+
+ chan->vc.desc_free = pl08x_desc_free;
+ vchan_init(&chan->vc, dmadev);
+ }
+ dev_info(&pl08x->adev->dev, "initialized %d virtual %s channels\n",
+ i, slave ? "slave" : "memcpy");
+ return i;
+}
+
+static void pl08x_free_virtual_channels(struct dma_device *dmadev)
+{
+ struct pl08x_dma_chan *chan = NULL;
+ struct pl08x_dma_chan *next;
+
+ list_for_each_entry_safe(chan,
+ next, &dmadev->channels, vc.chan.device_node) {
+ list_del(&chan->vc.chan.device_node);
+ kfree(chan);
+ }
+}
+
+#ifdef CONFIG_DEBUG_FS
+static const char *pl08x_state_str(enum pl08x_dma_chan_state state)
+{
+ switch (state) {
+ case PL08X_CHAN_IDLE:
+ return "idle";
+ case PL08X_CHAN_RUNNING:
+ return "running";
+ case PL08X_CHAN_PAUSED:
+ return "paused";
+ case PL08X_CHAN_WAITING:
+ return "waiting";
+ default:
+ break;
+ }
+ return "UNKNOWN STATE";
+}
+
+static int pl08x_debugfs_show(struct seq_file *s, void *data)
+{
+ struct pl08x_driver_data *pl08x = s->private;
+ struct pl08x_dma_chan *chan;
+ struct pl08x_phy_chan *ch;
+ unsigned long flags;
+ int i;
+
+ seq_printf(s, "PL08x physical channels:\n");
+ seq_printf(s, "CHANNEL:\tUSER:\n");
+ seq_printf(s, "--------\t-----\n");
+ for (i = 0; i < pl08x->vd->channels; i++) {
+ struct pl08x_dma_chan *virt_chan;
+
+ ch = &pl08x->phy_chans[i];
+
+ spin_lock_irqsave(&ch->lock, flags);
+ virt_chan = ch->serving;
+
+ seq_printf(s, "%d\t\t%s%s\n",
+ ch->id,
+ virt_chan ? virt_chan->name : "(none)",
+ ch->locked ? " LOCKED" : "");
+
+ spin_unlock_irqrestore(&ch->lock, flags);
+ }
+
+ seq_printf(s, "\nPL08x virtual memcpy channels:\n");
+ seq_printf(s, "CHANNEL:\tSTATE:\n");
+ seq_printf(s, "--------\t------\n");
+ list_for_each_entry(chan, &pl08x->memcpy.channels, vc.chan.device_node) {
+ seq_printf(s, "%s\t\t%s\n", chan->name,
+ pl08x_state_str(chan->state));
+ }
+
+ if (pl08x->has_slave) {
+ seq_printf(s, "\nPL08x virtual slave channels:\n");
+ seq_printf(s, "CHANNEL:\tSTATE:\n");
+ seq_printf(s, "--------\t------\n");
+ list_for_each_entry(chan, &pl08x->slave.channels,
+ vc.chan.device_node) {
+ seq_printf(s, "%s\t\t%s\n", chan->name,
+ pl08x_state_str(chan->state));
+ }
+ }
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(pl08x_debugfs);
+
+static void init_pl08x_debugfs(struct pl08x_driver_data *pl08x)
+{
+ /* Expose a simple debugfs interface to view all clocks */
+ debugfs_create_file(dev_name(&pl08x->adev->dev), S_IFREG | S_IRUGO,
+ NULL, pl08x, &pl08x_debugfs_fops);
+}
+
+#else
+static inline void init_pl08x_debugfs(struct pl08x_driver_data *pl08x)
+{
+}
+#endif
+
+#ifdef CONFIG_OF
+static struct dma_chan *pl08x_find_chan_id(struct pl08x_driver_data *pl08x,
+ u32 id)
+{
+ struct pl08x_dma_chan *chan;
+
+ /* Trying to get a slave channel from something with no slave support */
+ if (!pl08x->has_slave)
+ return NULL;
+
+ list_for_each_entry(chan, &pl08x->slave.channels, vc.chan.device_node) {
+ if (chan->signal == id)
+ return &chan->vc.chan;
+ }
+
+ return NULL;
+}
+
+static struct dma_chan *pl08x_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct pl08x_driver_data *pl08x = ofdma->of_dma_data;
+ struct dma_chan *dma_chan;
+ struct pl08x_dma_chan *plchan;
+
+ if (!pl08x)
+ return NULL;
+
+ if (dma_spec->args_count != 2) {
+ dev_err(&pl08x->adev->dev,
+ "DMA channel translation requires two cells\n");
+ return NULL;
+ }
+
+ dma_chan = pl08x_find_chan_id(pl08x, dma_spec->args[0]);
+ if (!dma_chan) {
+ dev_err(&pl08x->adev->dev,
+ "DMA slave channel not found\n");
+ return NULL;
+ }
+
+ plchan = to_pl08x_chan(dma_chan);
+ dev_dbg(&pl08x->adev->dev,
+ "translated channel for signal %d\n",
+ dma_spec->args[0]);
+
+ /* Augment channel data for applicable AHB buses */
+ plchan->cd->periph_buses = dma_spec->args[1];
+ return dma_get_slave_channel(dma_chan);
+}
+
+static int pl08x_of_probe(struct amba_device *adev,
+ struct pl08x_driver_data *pl08x,
+ struct device_node *np)
+{
+ struct pl08x_platform_data *pd;
+ struct pl08x_channel_data *chanp = NULL;
+ u32 val;
+ int ret;
+ int i;
+
+ pd = devm_kzalloc(&adev->dev, sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return -ENOMEM;
+
+ /* Eligible bus masters for fetching LLIs */
+ if (of_property_read_bool(np, "lli-bus-interface-ahb1"))
+ pd->lli_buses |= PL08X_AHB1;
+ if (of_property_read_bool(np, "lli-bus-interface-ahb2"))
+ pd->lli_buses |= PL08X_AHB2;
+ if (!pd->lli_buses) {
+ dev_info(&adev->dev, "no bus masters for LLIs stated, assume all\n");
+ pd->lli_buses |= PL08X_AHB1 | PL08X_AHB2;
+ }
+
+ /* Eligible bus masters for memory access */
+ if (of_property_read_bool(np, "mem-bus-interface-ahb1"))
+ pd->mem_buses |= PL08X_AHB1;
+ if (of_property_read_bool(np, "mem-bus-interface-ahb2"))
+ pd->mem_buses |= PL08X_AHB2;
+ if (!pd->mem_buses) {
+ dev_info(&adev->dev, "no bus masters for memory stated, assume all\n");
+ pd->mem_buses |= PL08X_AHB1 | PL08X_AHB2;
+ }
+
+ /* Parse the memcpy channel properties */
+ ret = of_property_read_u32(np, "memcpy-burst-size", &val);
+ if (ret) {
+ dev_info(&adev->dev, "no memcpy burst size specified, using 1 byte\n");
+ val = 1;
+ }
+ switch (val) {
+ default:
+ dev_err(&adev->dev, "illegal burst size for memcpy, set to 1\n");
+ fallthrough;
+ case 1:
+ pd->memcpy_burst_size = PL08X_BURST_SZ_1;
+ break;
+ case 4:
+ pd->memcpy_burst_size = PL08X_BURST_SZ_4;
+ break;
+ case 8:
+ pd->memcpy_burst_size = PL08X_BURST_SZ_8;
+ break;
+ case 16:
+ pd->memcpy_burst_size = PL08X_BURST_SZ_16;
+ break;
+ case 32:
+ pd->memcpy_burst_size = PL08X_BURST_SZ_32;
+ break;
+ case 64:
+ pd->memcpy_burst_size = PL08X_BURST_SZ_64;
+ break;
+ case 128:
+ pd->memcpy_burst_size = PL08X_BURST_SZ_128;
+ break;
+ case 256:
+ pd->memcpy_burst_size = PL08X_BURST_SZ_256;
+ break;
+ }
+
+ ret = of_property_read_u32(np, "memcpy-bus-width", &val);
+ if (ret) {
+ dev_info(&adev->dev, "no memcpy bus width specified, using 8 bits\n");
+ val = 8;
+ }
+ switch (val) {
+ default:
+ dev_err(&adev->dev, "illegal bus width for memcpy, set to 8 bits\n");
+ fallthrough;
+ case 8:
+ pd->memcpy_bus_width = PL08X_BUS_WIDTH_8_BITS;
+ break;
+ case 16:
+ pd->memcpy_bus_width = PL08X_BUS_WIDTH_16_BITS;
+ break;
+ case 32:
+ pd->memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS;
+ break;
+ }
+
+ /*
+ * Allocate channel data for all possible slave channels (one
+ * for each possible signal), channels will then be allocated
+ * for a device and have it's AHB interfaces set up at
+ * translation time.
+ */
+ if (pl08x->vd->signals) {
+ chanp = devm_kcalloc(&adev->dev,
+ pl08x->vd->signals,
+ sizeof(struct pl08x_channel_data),
+ GFP_KERNEL);
+ if (!chanp)
+ return -ENOMEM;
+
+ pd->slave_channels = chanp;
+ for (i = 0; i < pl08x->vd->signals; i++) {
+ /*
+ * chanp->periph_buses will be assigned at translation
+ */
+ chanp->bus_id = kasprintf(GFP_KERNEL, "slave%d", i);
+ chanp++;
+ }
+ pd->num_slave_channels = pl08x->vd->signals;
+ }
+
+ pl08x->pd = pd;
+
+ return of_dma_controller_register(adev->dev.of_node, pl08x_of_xlate,
+ pl08x);
+}
+#else
+static inline int pl08x_of_probe(struct amba_device *adev,
+ struct pl08x_driver_data *pl08x,
+ struct device_node *np)
+{
+ return -EINVAL;
+}
+#endif
+
+static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
+{
+ struct pl08x_driver_data *pl08x;
+ struct vendor_data *vd = id->data;
+ struct device_node *np = adev->dev.of_node;
+ u32 tsfr_size;
+ int ret = 0;
+ int i;
+
+ ret = amba_request_regions(adev, NULL);
+ if (ret)
+ return ret;
+
+ /* Ensure that we can do DMA */
+ ret = dma_set_mask_and_coherent(&adev->dev, DMA_BIT_MASK(32));
+ if (ret)
+ goto out_no_pl08x;
+
+ /* Create the driver state holder */
+ pl08x = kzalloc(sizeof(*pl08x), GFP_KERNEL);
+ if (!pl08x) {
+ ret = -ENOMEM;
+ goto out_no_pl08x;
+ }
+
+ /* Assign useful pointers to the driver state */
+ pl08x->adev = adev;
+ pl08x->vd = vd;
+
+ pl08x->base = ioremap(adev->res.start, resource_size(&adev->res));
+ if (!pl08x->base) {
+ ret = -ENOMEM;
+ goto out_no_ioremap;
+ }
+
+ if (vd->ftdmac020) {
+ u32 val;
+
+ val = readl(pl08x->base + FTDMAC020_REVISION);
+ dev_info(&pl08x->adev->dev, "FTDMAC020 %d.%d rel %d\n",
+ (val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff);
+ val = readl(pl08x->base + FTDMAC020_FEATURE);
+ dev_info(&pl08x->adev->dev, "FTDMAC020 %d channels, "
+ "%s built-in bridge, %s, %s linked lists\n",
+ (val >> 12) & 0x0f,
+ (val & BIT(10)) ? "no" : "has",
+ (val & BIT(9)) ? "AHB0 and AHB1" : "AHB0",
+ (val & BIT(8)) ? "supports" : "does not support");
+
+ /* Vendor data from feature register */
+ if (!(val & BIT(8)))
+ dev_warn(&pl08x->adev->dev,
+ "linked lists not supported, required\n");
+ vd->channels = (val >> 12) & 0x0f;
+ vd->dualmaster = !!(val & BIT(9));
+ }
+
+ /* Initialize memcpy engine */
+ dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask);
+ pl08x->memcpy.dev = &adev->dev;
+ pl08x->memcpy.device_free_chan_resources = pl08x_free_chan_resources;
+ pl08x->memcpy.device_prep_dma_memcpy = pl08x_prep_dma_memcpy;
+ pl08x->memcpy.device_tx_status = pl08x_dma_tx_status;
+ pl08x->memcpy.device_issue_pending = pl08x_issue_pending;
+ pl08x->memcpy.device_config = pl08x_config;
+ pl08x->memcpy.device_pause = pl08x_pause;
+ pl08x->memcpy.device_resume = pl08x_resume;
+ pl08x->memcpy.device_terminate_all = pl08x_terminate_all;
+ pl08x->memcpy.device_synchronize = pl08x_synchronize;
+ pl08x->memcpy.src_addr_widths = PL80X_DMA_BUSWIDTHS;
+ pl08x->memcpy.dst_addr_widths = PL80X_DMA_BUSWIDTHS;
+ pl08x->memcpy.directions = BIT(DMA_MEM_TO_MEM);
+ pl08x->memcpy.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+ if (vd->ftdmac020)
+ pl08x->memcpy.copy_align = DMAENGINE_ALIGN_4_BYTES;
+
+
+ /*
+ * Initialize slave engine, if the block has no signals, that means
+ * we have no slave support.
+ */
+ if (vd->signals) {
+ pl08x->has_slave = true;
+ dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask);
+ dma_cap_set(DMA_CYCLIC, pl08x->slave.cap_mask);
+ pl08x->slave.dev = &adev->dev;
+ pl08x->slave.device_free_chan_resources =
+ pl08x_free_chan_resources;
+ pl08x->slave.device_tx_status = pl08x_dma_tx_status;
+ pl08x->slave.device_issue_pending = pl08x_issue_pending;
+ pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg;
+ pl08x->slave.device_prep_dma_cyclic = pl08x_prep_dma_cyclic;
+ pl08x->slave.device_config = pl08x_config;
+ pl08x->slave.device_pause = pl08x_pause;
+ pl08x->slave.device_resume = pl08x_resume;
+ pl08x->slave.device_terminate_all = pl08x_terminate_all;
+ pl08x->slave.device_synchronize = pl08x_synchronize;
+ pl08x->slave.src_addr_widths = PL80X_DMA_BUSWIDTHS;
+ pl08x->slave.dst_addr_widths = PL80X_DMA_BUSWIDTHS;
+ pl08x->slave.directions =
+ BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ pl08x->slave.residue_granularity =
+ DMA_RESIDUE_GRANULARITY_SEGMENT;
+ }
+
+ /* Get the platform data */
+ pl08x->pd = dev_get_platdata(&adev->dev);
+ if (!pl08x->pd) {
+ if (np) {
+ ret = pl08x_of_probe(adev, pl08x, np);
+ if (ret)
+ goto out_no_platdata;
+ } else {
+ dev_err(&adev->dev, "no platform data supplied\n");
+ ret = -EINVAL;
+ goto out_no_platdata;
+ }
+ } else {
+ pl08x->slave.filter.map = pl08x->pd->slave_map;
+ pl08x->slave.filter.mapcnt = pl08x->pd->slave_map_len;
+ pl08x->slave.filter.fn = pl08x_filter_fn;
+ }
+
+ /* By default, AHB1 only. If dualmaster, from platform */
+ pl08x->lli_buses = PL08X_AHB1;
+ pl08x->mem_buses = PL08X_AHB1;
+ if (pl08x->vd->dualmaster) {
+ pl08x->lli_buses = pl08x->pd->lli_buses;
+ pl08x->mem_buses = pl08x->pd->mem_buses;
+ }
+
+ if (vd->pl080s)
+ pl08x->lli_words = PL080S_LLI_WORDS;
+ else
+ pl08x->lli_words = PL080_LLI_WORDS;
+ tsfr_size = MAX_NUM_TSFR_LLIS * pl08x->lli_words * sizeof(u32);
+
+ /* A DMA memory pool for LLIs, align on 1-byte boundary */
+ pl08x->pool = dma_pool_create(DRIVER_NAME, &pl08x->adev->dev,
+ tsfr_size, PL08X_ALIGN, 0);
+ if (!pl08x->pool) {
+ ret = -ENOMEM;
+ goto out_no_lli_pool;
+ }
+
+ /* Turn on the PL08x */
+ pl08x_ensure_on(pl08x);
+
+ /* Clear any pending interrupts */
+ if (vd->ftdmac020)
+ /* This variant has error IRQs in bits 16-19 */
+ writel(0x0000FFFF, pl08x->base + PL080_ERR_CLEAR);
+ else
+ writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
+ writel(0x000000FF, pl08x->base + PL080_TC_CLEAR);
+
+ /* Attach the interrupt handler */
+ ret = request_irq(adev->irq[0], pl08x_irq, 0, DRIVER_NAME, pl08x);
+ if (ret) {
+ dev_err(&adev->dev, "%s failed to request interrupt %d\n",
+ __func__, adev->irq[0]);
+ goto out_no_irq;
+ }
+
+ /* Initialize physical channels */
+ pl08x->phy_chans = kzalloc((vd->channels * sizeof(*pl08x->phy_chans)),
+ GFP_KERNEL);
+ if (!pl08x->phy_chans) {
+ ret = -ENOMEM;
+ goto out_no_phychans;
+ }
+
+ for (i = 0; i < vd->channels; i++) {
+ struct pl08x_phy_chan *ch = &pl08x->phy_chans[i];
+
+ ch->id = i;
+ ch->base = pl08x->base + PL080_Cx_BASE(i);
+ if (vd->ftdmac020) {
+ /* FTDMA020 has a special channel busy register */
+ ch->reg_busy = ch->base + FTDMAC020_CH_BUSY;
+ ch->reg_config = ch->base + FTDMAC020_CH_CFG;
+ ch->reg_control = ch->base + FTDMAC020_CH_CSR;
+ ch->reg_src = ch->base + FTDMAC020_CH_SRC_ADDR;
+ ch->reg_dst = ch->base + FTDMAC020_CH_DST_ADDR;
+ ch->reg_lli = ch->base + FTDMAC020_CH_LLP;
+ ch->ftdmac020 = true;
+ } else {
+ ch->reg_config = ch->base + vd->config_offset;
+ ch->reg_control = ch->base + PL080_CH_CONTROL;
+ ch->reg_src = ch->base + PL080_CH_SRC_ADDR;
+ ch->reg_dst = ch->base + PL080_CH_DST_ADDR;
+ ch->reg_lli = ch->base + PL080_CH_LLI;
+ }
+ if (vd->pl080s)
+ ch->pl080s = true;
+
+ spin_lock_init(&ch->lock);
+
+ /*
+ * Nomadik variants can have channels that are locked
+ * down for the secure world only. Lock up these channels
+ * by perpetually serving a dummy virtual channel.
+ */
+ if (vd->nomadik) {
+ u32 val;
+
+ val = readl(ch->reg_config);
+ if (val & (PL080N_CONFIG_ITPROT | PL080N_CONFIG_SECPROT)) {
+ dev_info(&adev->dev, "physical channel %d reserved for secure access only\n", i);
+ ch->locked = true;
+ }
+ }
+
+ dev_dbg(&adev->dev, "physical channel %d is %s\n",
+ i, pl08x_phy_channel_busy(ch) ? "BUSY" : "FREE");
+ }
+
+ /* Register as many memcpy channels as there are physical channels */
+ ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->memcpy,
+ pl08x->vd->channels, false);
+ if (ret <= 0) {
+ dev_warn(&pl08x->adev->dev,
+ "%s failed to enumerate memcpy channels - %d\n",
+ __func__, ret);
+ goto out_no_memcpy;
+ }
+
+ /* Register slave channels */
+ if (pl08x->has_slave) {
+ ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave,
+ pl08x->pd->num_slave_channels, true);
+ if (ret < 0) {
+ dev_warn(&pl08x->adev->dev,
+ "%s failed to enumerate slave channels - %d\n",
+ __func__, ret);
+ goto out_no_slave;
+ }
+ }
+
+ ret = dma_async_device_register(&pl08x->memcpy);
+ if (ret) {
+ dev_warn(&pl08x->adev->dev,
+ "%s failed to register memcpy as an async device - %d\n",
+ __func__, ret);
+ goto out_no_memcpy_reg;
+ }
+
+ if (pl08x->has_slave) {
+ ret = dma_async_device_register(&pl08x->slave);
+ if (ret) {
+ dev_warn(&pl08x->adev->dev,
+ "%s failed to register slave as an async device - %d\n",
+ __func__, ret);
+ goto out_no_slave_reg;
+ }
+ }
+
+ amba_set_drvdata(adev, pl08x);
+ init_pl08x_debugfs(pl08x);
+ dev_info(&pl08x->adev->dev, "DMA: PL%03x%s rev%u at 0x%08llx irq %d\n",
+ amba_part(adev), pl08x->vd->pl080s ? "s" : "", amba_rev(adev),
+ (unsigned long long)adev->res.start, adev->irq[0]);
+
+ return 0;
+
+out_no_slave_reg:
+ dma_async_device_unregister(&pl08x->memcpy);
+out_no_memcpy_reg:
+ if (pl08x->has_slave)
+ pl08x_free_virtual_channels(&pl08x->slave);
+out_no_slave:
+ pl08x_free_virtual_channels(&pl08x->memcpy);
+out_no_memcpy:
+ kfree(pl08x->phy_chans);
+out_no_phychans:
+ free_irq(adev->irq[0], pl08x);
+out_no_irq:
+ dma_pool_destroy(pl08x->pool);
+out_no_lli_pool:
+out_no_platdata:
+ iounmap(pl08x->base);
+out_no_ioremap:
+ kfree(pl08x);
+out_no_pl08x:
+ amba_release_regions(adev);
+ return ret;
+}
+
+/* PL080 has 8 channels and the PL080 have just 2 */
+static struct vendor_data vendor_pl080 = {
+ .config_offset = PL080_CH_CONFIG,
+ .channels = 8,
+ .signals = 16,
+ .dualmaster = true,
+ .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
+};
+
+static struct vendor_data vendor_nomadik = {
+ .config_offset = PL080_CH_CONFIG,
+ .channels = 8,
+ .signals = 32,
+ .dualmaster = true,
+ .nomadik = true,
+ .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
+};
+
+static struct vendor_data vendor_pl080s = {
+ .config_offset = PL080S_CH_CONFIG,
+ .channels = 8,
+ .signals = 32,
+ .pl080s = true,
+ .max_transfer_size = PL080S_CONTROL_TRANSFER_SIZE_MASK,
+};
+
+static struct vendor_data vendor_pl081 = {
+ .config_offset = PL080_CH_CONFIG,
+ .channels = 2,
+ .signals = 16,
+ .dualmaster = false,
+ .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
+};
+
+static struct vendor_data vendor_ftdmac020 = {
+ .config_offset = PL080_CH_CONFIG,
+ .ftdmac020 = true,
+ .max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
+};
+
+static const struct amba_id pl08x_ids[] = {
+ /* Samsung PL080S variant */
+ {
+ .id = 0x0a141080,
+ .mask = 0xffffffff,
+ .data = &vendor_pl080s,
+ },
+ /* PL080 */
+ {
+ .id = 0x00041080,
+ .mask = 0x000fffff,
+ .data = &vendor_pl080,
+ },
+ /* PL081 */
+ {
+ .id = 0x00041081,
+ .mask = 0x000fffff,
+ .data = &vendor_pl081,
+ },
+ /* Nomadik 8815 PL080 variant */
+ {
+ .id = 0x00280080,
+ .mask = 0x00ffffff,
+ .data = &vendor_nomadik,
+ },
+ /* Faraday Technology FTDMAC020 */
+ {
+ .id = 0x0003b080,
+ .mask = 0x000fffff,
+ .data = &vendor_ftdmac020,
+ },
+ { 0, 0 },
+};
+
+MODULE_DEVICE_TABLE(amba, pl08x_ids);
+
+static struct amba_driver pl08x_amba_driver = {
+ .drv.name = DRIVER_NAME,
+ .id_table = pl08x_ids,
+ .probe = pl08x_probe,
+};
+
+static int __init pl08x_init(void)
+{
+ int retval;
+ retval = amba_driver_register(&pl08x_amba_driver);
+ if (retval)
+ printk(KERN_WARNING DRIVER_NAME
+ "failed to register as an AMBA device (%d)\n",
+ retval);
+ return retval;
+}
+subsys_initcall(pl08x_init);
diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c
new file mode 100644
index 000000000..4cf8da77b
--- /dev/null
+++ b/drivers/dma/apple-admac.c
@@ -0,0 +1,957 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for Audio DMA Controller (ADMAC) on t8103 (M1) and other Apple chips
+ *
+ * Copyright (C) The Asahi Linux Contributors
+ */
+
+#include <linux/bits.h>
+#include <linux/bitfield.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/reset.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+
+#include "dmaengine.h"
+
+#define NCHANNELS_MAX 64
+#define IRQ_NOUTPUTS 4
+
+/*
+ * For allocation purposes we split the cache
+ * memory into blocks of fixed size (given in bytes).
+ */
+#define SRAM_BLOCK 2048
+
+#define RING_WRITE_SLOT GENMASK(1, 0)
+#define RING_READ_SLOT GENMASK(5, 4)
+#define RING_FULL BIT(9)
+#define RING_EMPTY BIT(8)
+#define RING_ERR BIT(10)
+
+#define STATUS_DESC_DONE BIT(0)
+#define STATUS_ERR BIT(6)
+
+#define FLAG_DESC_NOTIFY BIT(16)
+
+#define REG_TX_START 0x0000
+#define REG_TX_STOP 0x0004
+#define REG_RX_START 0x0008
+#define REG_RX_STOP 0x000c
+#define REG_IMPRINT 0x0090
+#define REG_TX_SRAM_SIZE 0x0094
+#define REG_RX_SRAM_SIZE 0x0098
+
+#define REG_CHAN_CTL(ch) (0x8000 + (ch) * 0x200)
+#define REG_CHAN_CTL_RST_RINGS BIT(0)
+
+#define REG_DESC_RING(ch) (0x8070 + (ch) * 0x200)
+#define REG_REPORT_RING(ch) (0x8074 + (ch) * 0x200)
+
+#define REG_RESIDUE(ch) (0x8064 + (ch) * 0x200)
+
+#define REG_BUS_WIDTH(ch) (0x8040 + (ch) * 0x200)
+
+#define BUS_WIDTH_8BIT 0x00
+#define BUS_WIDTH_16BIT 0x01
+#define BUS_WIDTH_32BIT 0x02
+#define BUS_WIDTH_FRAME_2_WORDS 0x10
+#define BUS_WIDTH_FRAME_4_WORDS 0x20
+
+#define REG_CHAN_SRAM_CARVEOUT(ch) (0x8050 + (ch) * 0x200)
+#define CHAN_SRAM_CARVEOUT_SIZE GENMASK(31, 16)
+#define CHAN_SRAM_CARVEOUT_BASE GENMASK(15, 0)
+
+#define REG_CHAN_FIFOCTL(ch) (0x8054 + (ch) * 0x200)
+#define CHAN_FIFOCTL_LIMIT GENMASK(31, 16)
+#define CHAN_FIFOCTL_THRESHOLD GENMASK(15, 0)
+
+#define REG_DESC_WRITE(ch) (0x10000 + ((ch) / 2) * 0x4 + ((ch) & 1) * 0x4000)
+#define REG_REPORT_READ(ch) (0x10100 + ((ch) / 2) * 0x4 + ((ch) & 1) * 0x4000)
+
+#define REG_TX_INTSTATE(idx) (0x0030 + (idx) * 4)
+#define REG_RX_INTSTATE(idx) (0x0040 + (idx) * 4)
+#define REG_GLOBAL_INTSTATE(idx) (0x0050 + (idx) * 4)
+#define REG_CHAN_INTSTATUS(ch, idx) (0x8010 + (ch) * 0x200 + (idx) * 4)
+#define REG_CHAN_INTMASK(ch, idx) (0x8020 + (ch) * 0x200 + (idx) * 4)
+
+struct admac_data;
+struct admac_tx;
+
+struct admac_chan {
+ unsigned int no;
+ struct admac_data *host;
+ struct dma_chan chan;
+ struct tasklet_struct tasklet;
+
+ u32 carveout;
+
+ spinlock_t lock;
+ struct admac_tx *current_tx;
+ int nperiod_acks;
+
+ /*
+ * We maintain a 'submitted' and 'issued' list mainly for interface
+ * correctness. Typical use of the driver (per channel) will be
+ * prepping, submitting and issuing a single cyclic transaction which
+ * will stay current until terminate_all is called.
+ */
+ struct list_head submitted;
+ struct list_head issued;
+
+ struct list_head to_free;
+};
+
+struct admac_sram {
+ u32 size;
+ /*
+ * SRAM_CARVEOUT has 16-bit fields, so the SRAM cannot be larger than
+ * 64K and a 32-bit bitfield over 2K blocks covers it.
+ */
+ u32 allocated;
+};
+
+struct admac_data {
+ struct dma_device dma;
+ struct device *dev;
+ __iomem void *base;
+ struct reset_control *rstc;
+
+ struct mutex cache_alloc_lock;
+ struct admac_sram txcache, rxcache;
+
+ int irq;
+ int irq_index;
+ int nchannels;
+ struct admac_chan channels[];
+};
+
+struct admac_tx {
+ struct dma_async_tx_descriptor tx;
+ bool cyclic;
+ dma_addr_t buf_addr;
+ dma_addr_t buf_end;
+ size_t buf_len;
+ size_t period_len;
+
+ size_t submitted_pos;
+ size_t reclaimed_pos;
+
+ struct list_head node;
+};
+
+static int admac_alloc_sram_carveout(struct admac_data *ad,
+ enum dma_transfer_direction dir,
+ u32 *out)
+{
+ struct admac_sram *sram;
+ int i, ret = 0, nblocks;
+
+ if (dir == DMA_MEM_TO_DEV)
+ sram = &ad->txcache;
+ else
+ sram = &ad->rxcache;
+
+ mutex_lock(&ad->cache_alloc_lock);
+
+ nblocks = sram->size / SRAM_BLOCK;
+ for (i = 0; i < nblocks; i++)
+ if (!(sram->allocated & BIT(i)))
+ break;
+
+ if (i < nblocks) {
+ *out = FIELD_PREP(CHAN_SRAM_CARVEOUT_BASE, i * SRAM_BLOCK) |
+ FIELD_PREP(CHAN_SRAM_CARVEOUT_SIZE, SRAM_BLOCK);
+ sram->allocated |= BIT(i);
+ } else {
+ ret = -EBUSY;
+ }
+
+ mutex_unlock(&ad->cache_alloc_lock);
+
+ return ret;
+}
+
+static void admac_free_sram_carveout(struct admac_data *ad,
+ enum dma_transfer_direction dir,
+ u32 carveout)
+{
+ struct admac_sram *sram;
+ u32 base = FIELD_GET(CHAN_SRAM_CARVEOUT_BASE, carveout);
+ int i;
+
+ if (dir == DMA_MEM_TO_DEV)
+ sram = &ad->txcache;
+ else
+ sram = &ad->rxcache;
+
+ if (WARN_ON(base >= sram->size))
+ return;
+
+ mutex_lock(&ad->cache_alloc_lock);
+ i = base / SRAM_BLOCK;
+ sram->allocated &= ~BIT(i);
+ mutex_unlock(&ad->cache_alloc_lock);
+}
+
+static void admac_modify(struct admac_data *ad, int reg, u32 mask, u32 val)
+{
+ void __iomem *addr = ad->base + reg;
+ u32 curr = readl_relaxed(addr);
+
+ writel_relaxed((curr & ~mask) | (val & mask), addr);
+}
+
+static struct admac_chan *to_admac_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct admac_chan, chan);
+}
+
+static struct admac_tx *to_admac_tx(struct dma_async_tx_descriptor *tx)
+{
+ return container_of(tx, struct admac_tx, tx);
+}
+
+static enum dma_transfer_direction admac_chan_direction(int channo)
+{
+ /* Channel directions are hardwired */
+ return (channo & 1) ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV;
+}
+
+static dma_cookie_t admac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct admac_tx *adtx = to_admac_tx(tx);
+ struct admac_chan *adchan = to_admac_chan(tx->chan);
+ unsigned long flags;
+ dma_cookie_t cookie;
+
+ spin_lock_irqsave(&adchan->lock, flags);
+ cookie = dma_cookie_assign(tx);
+ list_add_tail(&adtx->node, &adchan->submitted);
+ spin_unlock_irqrestore(&adchan->lock, flags);
+
+ return cookie;
+}
+
+static int admac_desc_free(struct dma_async_tx_descriptor *tx)
+{
+ kfree(to_admac_tx(tx));
+
+ return 0;
+}
+
+static struct dma_async_tx_descriptor *admac_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct admac_chan *adchan = container_of(chan, struct admac_chan, chan);
+ struct admac_tx *adtx;
+
+ if (direction != admac_chan_direction(adchan->no))
+ return NULL;
+
+ adtx = kzalloc(sizeof(*adtx), GFP_NOWAIT);
+ if (!adtx)
+ return NULL;
+
+ adtx->cyclic = true;
+
+ adtx->buf_addr = buf_addr;
+ adtx->buf_len = buf_len;
+ adtx->buf_end = buf_addr + buf_len;
+ adtx->period_len = period_len;
+
+ adtx->submitted_pos = 0;
+ adtx->reclaimed_pos = 0;
+
+ dma_async_tx_descriptor_init(&adtx->tx, chan);
+ adtx->tx.tx_submit = admac_tx_submit;
+ adtx->tx.desc_free = admac_desc_free;
+
+ return &adtx->tx;
+}
+
+/*
+ * Write one hardware descriptor for a dmaengine cyclic transaction.
+ */
+static void admac_cyclic_write_one_desc(struct admac_data *ad, int channo,
+ struct admac_tx *tx)
+{
+ dma_addr_t addr;
+
+ addr = tx->buf_addr + (tx->submitted_pos % tx->buf_len);
+
+ /* If happens means we have buggy code */
+ WARN_ON_ONCE(addr + tx->period_len > tx->buf_end);
+
+ dev_dbg(ad->dev, "ch%d descriptor: addr=0x%pad len=0x%zx flags=0x%lx\n",
+ channo, &addr, tx->period_len, FLAG_DESC_NOTIFY);
+
+ writel_relaxed(lower_32_bits(addr), ad->base + REG_DESC_WRITE(channo));
+ writel_relaxed(upper_32_bits(addr), ad->base + REG_DESC_WRITE(channo));
+ writel_relaxed(tx->period_len, ad->base + REG_DESC_WRITE(channo));
+ writel_relaxed(FLAG_DESC_NOTIFY, ad->base + REG_DESC_WRITE(channo));
+
+ tx->submitted_pos += tx->period_len;
+ tx->submitted_pos %= 2 * tx->buf_len;
+}
+
+/*
+ * Write all the hardware descriptors for a dmaengine cyclic
+ * transaction there is space for.
+ */
+static void admac_cyclic_write_desc(struct admac_data *ad, int channo,
+ struct admac_tx *tx)
+{
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (readl_relaxed(ad->base + REG_DESC_RING(channo)) & RING_FULL)
+ break;
+ admac_cyclic_write_one_desc(ad, channo, tx);
+ }
+}
+
+static int admac_ring_noccupied_slots(int ringval)
+{
+ int wrslot = FIELD_GET(RING_WRITE_SLOT, ringval);
+ int rdslot = FIELD_GET(RING_READ_SLOT, ringval);
+
+ if (wrslot != rdslot) {
+ return (wrslot + 4 - rdslot) % 4;
+ } else {
+ WARN_ON((ringval & (RING_FULL | RING_EMPTY)) == 0);
+
+ if (ringval & RING_FULL)
+ return 4;
+ else
+ return 0;
+ }
+}
+
+/*
+ * Read from hardware the residue of a cyclic dmaengine transaction.
+ */
+static u32 admac_cyclic_read_residue(struct admac_data *ad, int channo,
+ struct admac_tx *adtx)
+{
+ u32 ring1, ring2;
+ u32 residue1, residue2;
+ int nreports;
+ size_t pos;
+
+ ring1 = readl_relaxed(ad->base + REG_REPORT_RING(channo));
+ residue1 = readl_relaxed(ad->base + REG_RESIDUE(channo));
+ ring2 = readl_relaxed(ad->base + REG_REPORT_RING(channo));
+ residue2 = readl_relaxed(ad->base + REG_RESIDUE(channo));
+
+ if (residue2 > residue1) {
+ /*
+ * Controller must have loaded next descriptor between
+ * the two residue reads
+ */
+ nreports = admac_ring_noccupied_slots(ring1) + 1;
+ } else {
+ /* No descriptor load between the two reads, ring2 is safe to use */
+ nreports = admac_ring_noccupied_slots(ring2);
+ }
+
+ pos = adtx->reclaimed_pos + adtx->period_len * (nreports + 1) - residue2;
+
+ return adtx->buf_len - pos % adtx->buf_len;
+}
+
+static enum dma_status admac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct admac_chan *adchan = to_admac_chan(chan);
+ struct admac_data *ad = adchan->host;
+ struct admac_tx *adtx;
+
+ enum dma_status ret;
+ size_t residue;
+ unsigned long flags;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE || !txstate)
+ return ret;
+
+ spin_lock_irqsave(&adchan->lock, flags);
+ adtx = adchan->current_tx;
+
+ if (adtx && adtx->tx.cookie == cookie) {
+ ret = DMA_IN_PROGRESS;
+ residue = admac_cyclic_read_residue(ad, adchan->no, adtx);
+ } else {
+ ret = DMA_IN_PROGRESS;
+ residue = 0;
+ list_for_each_entry(adtx, &adchan->issued, node) {
+ if (adtx->tx.cookie == cookie) {
+ residue = adtx->buf_len;
+ break;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&adchan->lock, flags);
+
+ dma_set_residue(txstate, residue);
+ return ret;
+}
+
+static void admac_start_chan(struct admac_chan *adchan)
+{
+ struct admac_data *ad = adchan->host;
+ u32 startbit = 1 << (adchan->no / 2);
+
+ writel_relaxed(STATUS_DESC_DONE | STATUS_ERR,
+ ad->base + REG_CHAN_INTSTATUS(adchan->no, ad->irq_index));
+ writel_relaxed(STATUS_DESC_DONE | STATUS_ERR,
+ ad->base + REG_CHAN_INTMASK(adchan->no, ad->irq_index));
+
+ switch (admac_chan_direction(adchan->no)) {
+ case DMA_MEM_TO_DEV:
+ writel_relaxed(startbit, ad->base + REG_TX_START);
+ break;
+ case DMA_DEV_TO_MEM:
+ writel_relaxed(startbit, ad->base + REG_RX_START);
+ break;
+ default:
+ break;
+ }
+ dev_dbg(adchan->host->dev, "ch%d start\n", adchan->no);
+}
+
+static void admac_stop_chan(struct admac_chan *adchan)
+{
+ struct admac_data *ad = adchan->host;
+ u32 stopbit = 1 << (adchan->no / 2);
+
+ switch (admac_chan_direction(adchan->no)) {
+ case DMA_MEM_TO_DEV:
+ writel_relaxed(stopbit, ad->base + REG_TX_STOP);
+ break;
+ case DMA_DEV_TO_MEM:
+ writel_relaxed(stopbit, ad->base + REG_RX_STOP);
+ break;
+ default:
+ break;
+ }
+ dev_dbg(adchan->host->dev, "ch%d stop\n", adchan->no);
+}
+
+static void admac_reset_rings(struct admac_chan *adchan)
+{
+ struct admac_data *ad = adchan->host;
+
+ writel_relaxed(REG_CHAN_CTL_RST_RINGS,
+ ad->base + REG_CHAN_CTL(adchan->no));
+ writel_relaxed(0, ad->base + REG_CHAN_CTL(adchan->no));
+}
+
+static void admac_start_current_tx(struct admac_chan *adchan)
+{
+ struct admac_data *ad = adchan->host;
+ int ch = adchan->no;
+
+ admac_reset_rings(adchan);
+ writel_relaxed(0, ad->base + REG_CHAN_CTL(ch));
+
+ admac_cyclic_write_one_desc(ad, ch, adchan->current_tx);
+ admac_start_chan(adchan);
+ admac_cyclic_write_desc(ad, ch, adchan->current_tx);
+}
+
+static void admac_issue_pending(struct dma_chan *chan)
+{
+ struct admac_chan *adchan = to_admac_chan(chan);
+ struct admac_tx *tx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&adchan->lock, flags);
+ list_splice_tail_init(&adchan->submitted, &adchan->issued);
+ if (!list_empty(&adchan->issued) && !adchan->current_tx) {
+ tx = list_first_entry(&adchan->issued, struct admac_tx, node);
+ list_del(&tx->node);
+
+ adchan->current_tx = tx;
+ adchan->nperiod_acks = 0;
+ admac_start_current_tx(adchan);
+ }
+ spin_unlock_irqrestore(&adchan->lock, flags);
+}
+
+static int admac_pause(struct dma_chan *chan)
+{
+ struct admac_chan *adchan = to_admac_chan(chan);
+
+ admac_stop_chan(adchan);
+
+ return 0;
+}
+
+static int admac_resume(struct dma_chan *chan)
+{
+ struct admac_chan *adchan = to_admac_chan(chan);
+
+ admac_start_chan(adchan);
+
+ return 0;
+}
+
+static int admac_terminate_all(struct dma_chan *chan)
+{
+ struct admac_chan *adchan = to_admac_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&adchan->lock, flags);
+ admac_stop_chan(adchan);
+ admac_reset_rings(adchan);
+
+ if (adchan->current_tx) {
+ list_add_tail(&adchan->current_tx->node, &adchan->to_free);
+ adchan->current_tx = NULL;
+ }
+ /*
+ * Descriptors can only be freed after the tasklet
+ * has been killed (in admac_synchronize).
+ */
+ list_splice_tail_init(&adchan->submitted, &adchan->to_free);
+ list_splice_tail_init(&adchan->issued, &adchan->to_free);
+ spin_unlock_irqrestore(&adchan->lock, flags);
+
+ return 0;
+}
+
+static void admac_synchronize(struct dma_chan *chan)
+{
+ struct admac_chan *adchan = to_admac_chan(chan);
+ struct admac_tx *adtx, *_adtx;
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&adchan->lock, flags);
+ list_splice_tail_init(&adchan->to_free, &head);
+ spin_unlock_irqrestore(&adchan->lock, flags);
+
+ tasklet_kill(&adchan->tasklet);
+
+ list_for_each_entry_safe(adtx, _adtx, &head, node) {
+ list_del(&adtx->node);
+ admac_desc_free(&adtx->tx);
+ }
+}
+
+static int admac_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct admac_chan *adchan = to_admac_chan(chan);
+ struct admac_data *ad = adchan->host;
+ int ret;
+
+ dma_cookie_init(&adchan->chan);
+ ret = admac_alloc_sram_carveout(ad, admac_chan_direction(adchan->no),
+ &adchan->carveout);
+ if (ret < 0)
+ return ret;
+
+ writel_relaxed(adchan->carveout,
+ ad->base + REG_CHAN_SRAM_CARVEOUT(adchan->no));
+ return 0;
+}
+
+static void admac_free_chan_resources(struct dma_chan *chan)
+{
+ struct admac_chan *adchan = to_admac_chan(chan);
+
+ admac_terminate_all(chan);
+ admac_synchronize(chan);
+ admac_free_sram_carveout(adchan->host, admac_chan_direction(adchan->no),
+ adchan->carveout);
+}
+
+static struct dma_chan *admac_dma_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct admac_data *ad = (struct admac_data *) ofdma->of_dma_data;
+ unsigned int index;
+
+ if (dma_spec->args_count != 1)
+ return NULL;
+
+ index = dma_spec->args[0];
+
+ if (index >= ad->nchannels) {
+ dev_err(ad->dev, "channel index %u out of bounds\n", index);
+ return NULL;
+ }
+
+ return dma_get_slave_channel(&ad->channels[index].chan);
+}
+
+static int admac_drain_reports(struct admac_data *ad, int channo)
+{
+ int count;
+
+ for (count = 0; count < 4; count++) {
+ u32 countval_hi, countval_lo, unk1, flags;
+
+ if (readl_relaxed(ad->base + REG_REPORT_RING(channo)) & RING_EMPTY)
+ break;
+
+ countval_lo = readl_relaxed(ad->base + REG_REPORT_READ(channo));
+ countval_hi = readl_relaxed(ad->base + REG_REPORT_READ(channo));
+ unk1 = readl_relaxed(ad->base + REG_REPORT_READ(channo));
+ flags = readl_relaxed(ad->base + REG_REPORT_READ(channo));
+
+ dev_dbg(ad->dev, "ch%d report: countval=0x%llx unk1=0x%x flags=0x%x\n",
+ channo, ((u64) countval_hi) << 32 | countval_lo, unk1, flags);
+ }
+
+ return count;
+}
+
+static void admac_handle_status_err(struct admac_data *ad, int channo)
+{
+ bool handled = false;
+
+ if (readl_relaxed(ad->base + REG_DESC_RING(channo)) & RING_ERR) {
+ writel_relaxed(RING_ERR, ad->base + REG_DESC_RING(channo));
+ dev_err_ratelimited(ad->dev, "ch%d descriptor ring error\n", channo);
+ handled = true;
+ }
+
+ if (readl_relaxed(ad->base + REG_REPORT_RING(channo)) & RING_ERR) {
+ writel_relaxed(RING_ERR, ad->base + REG_REPORT_RING(channo));
+ dev_err_ratelimited(ad->dev, "ch%d report ring error\n", channo);
+ handled = true;
+ }
+
+ if (unlikely(!handled)) {
+ dev_err(ad->dev, "ch%d unknown error, masking errors as cause of IRQs\n", channo);
+ admac_modify(ad, REG_CHAN_INTMASK(channo, ad->irq_index),
+ STATUS_ERR, 0);
+ }
+}
+
+static void admac_handle_status_desc_done(struct admac_data *ad, int channo)
+{
+ struct admac_chan *adchan = &ad->channels[channo];
+ unsigned long flags;
+ int nreports;
+
+ writel_relaxed(STATUS_DESC_DONE,
+ ad->base + REG_CHAN_INTSTATUS(channo, ad->irq_index));
+
+ spin_lock_irqsave(&adchan->lock, flags);
+ nreports = admac_drain_reports(ad, channo);
+
+ if (adchan->current_tx) {
+ struct admac_tx *tx = adchan->current_tx;
+
+ adchan->nperiod_acks += nreports;
+ tx->reclaimed_pos += nreports * tx->period_len;
+ tx->reclaimed_pos %= 2 * tx->buf_len;
+
+ admac_cyclic_write_desc(ad, channo, tx);
+ tasklet_schedule(&adchan->tasklet);
+ }
+ spin_unlock_irqrestore(&adchan->lock, flags);
+}
+
+static void admac_handle_chan_int(struct admac_data *ad, int no)
+{
+ u32 cause = readl_relaxed(ad->base + REG_CHAN_INTSTATUS(no, ad->irq_index));
+
+ if (cause & STATUS_ERR)
+ admac_handle_status_err(ad, no);
+
+ if (cause & STATUS_DESC_DONE)
+ admac_handle_status_desc_done(ad, no);
+}
+
+static irqreturn_t admac_interrupt(int irq, void *devid)
+{
+ struct admac_data *ad = devid;
+ u32 rx_intstate, tx_intstate, global_intstate;
+ int i;
+
+ rx_intstate = readl_relaxed(ad->base + REG_RX_INTSTATE(ad->irq_index));
+ tx_intstate = readl_relaxed(ad->base + REG_TX_INTSTATE(ad->irq_index));
+ global_intstate = readl_relaxed(ad->base + REG_GLOBAL_INTSTATE(ad->irq_index));
+
+ if (!tx_intstate && !rx_intstate && !global_intstate)
+ return IRQ_NONE;
+
+ for (i = 0; i < ad->nchannels; i += 2) {
+ if (tx_intstate & 1)
+ admac_handle_chan_int(ad, i);
+ tx_intstate >>= 1;
+ }
+
+ for (i = 1; i < ad->nchannels; i += 2) {
+ if (rx_intstate & 1)
+ admac_handle_chan_int(ad, i);
+ rx_intstate >>= 1;
+ }
+
+ if (global_intstate) {
+ dev_warn(ad->dev, "clearing unknown global interrupt flag: %x\n",
+ global_intstate);
+ writel_relaxed(~(u32) 0, ad->base + REG_GLOBAL_INTSTATE(ad->irq_index));
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void admac_chan_tasklet(struct tasklet_struct *t)
+{
+ struct admac_chan *adchan = from_tasklet(adchan, t, tasklet);
+ struct admac_tx *adtx;
+ struct dmaengine_desc_callback cb;
+ struct dmaengine_result tx_result;
+ int nacks;
+
+ spin_lock_irq(&adchan->lock);
+ adtx = adchan->current_tx;
+ nacks = adchan->nperiod_acks;
+ adchan->nperiod_acks = 0;
+ spin_unlock_irq(&adchan->lock);
+
+ if (!adtx || !nacks)
+ return;
+
+ tx_result.result = DMA_TRANS_NOERROR;
+ tx_result.residue = 0;
+
+ dmaengine_desc_get_callback(&adtx->tx, &cb);
+ while (nacks--)
+ dmaengine_desc_callback_invoke(&cb, &tx_result);
+}
+
+static int admac_device_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct admac_chan *adchan = to_admac_chan(chan);
+ struct admac_data *ad = adchan->host;
+ bool is_tx = admac_chan_direction(adchan->no) == DMA_MEM_TO_DEV;
+ int wordsize = 0;
+ u32 bus_width = 0;
+
+ switch (is_tx ? config->dst_addr_width : config->src_addr_width) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ wordsize = 1;
+ bus_width |= BUS_WIDTH_8BIT;
+ break;
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ wordsize = 2;
+ bus_width |= BUS_WIDTH_16BIT;
+ break;
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ wordsize = 4;
+ bus_width |= BUS_WIDTH_32BIT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /*
+ * We take port_window_size to be the number of words in a frame.
+ *
+ * The controller has some means of out-of-band signalling, to the peripheral,
+ * of words position in a frame. That's where the importance of this control
+ * comes from.
+ */
+ switch (is_tx ? config->dst_port_window_size : config->src_port_window_size) {
+ case 0 ... 1:
+ break;
+ case 2:
+ bus_width |= BUS_WIDTH_FRAME_2_WORDS;
+ break;
+ case 4:
+ bus_width |= BUS_WIDTH_FRAME_4_WORDS;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ writel_relaxed(bus_width, ad->base + REG_BUS_WIDTH(adchan->no));
+
+ /*
+ * By FIFOCTL_LIMIT we seem to set the maximal number of bytes allowed to be
+ * held in controller's per-channel FIFO. Transfers seem to be triggered
+ * around the time FIFO occupancy touches FIFOCTL_THRESHOLD.
+ *
+ * The numbers we set are more or less arbitrary.
+ */
+ writel_relaxed(FIELD_PREP(CHAN_FIFOCTL_LIMIT, 0x30 * wordsize)
+ | FIELD_PREP(CHAN_FIFOCTL_THRESHOLD, 0x18 * wordsize),
+ ad->base + REG_CHAN_FIFOCTL(adchan->no));
+
+ return 0;
+}
+
+static int admac_probe(struct platform_device *pdev)
+{
+ struct device_node *np = pdev->dev.of_node;
+ struct admac_data *ad;
+ struct dma_device *dma;
+ int nchannels;
+ int err, irq, i;
+
+ err = of_property_read_u32(np, "dma-channels", &nchannels);
+ if (err || nchannels > NCHANNELS_MAX) {
+ dev_err(&pdev->dev, "missing or invalid dma-channels property\n");
+ return -EINVAL;
+ }
+
+ ad = devm_kzalloc(&pdev->dev, struct_size(ad, channels, nchannels), GFP_KERNEL);
+ if (!ad)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, ad);
+ ad->dev = &pdev->dev;
+ ad->nchannels = nchannels;
+ mutex_init(&ad->cache_alloc_lock);
+
+ /*
+ * The controller has 4 IRQ outputs. Try them all until
+ * we find one we can use.
+ */
+ for (i = 0; i < IRQ_NOUTPUTS; i++) {
+ irq = platform_get_irq_optional(pdev, i);
+ if (irq >= 0) {
+ ad->irq_index = i;
+ break;
+ }
+ }
+
+ if (irq < 0)
+ return dev_err_probe(&pdev->dev, irq, "no usable interrupt\n");
+ ad->irq = irq;
+
+ ad->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(ad->base))
+ return dev_err_probe(&pdev->dev, PTR_ERR(ad->base),
+ "unable to obtain MMIO resource\n");
+
+ ad->rstc = devm_reset_control_get_optional_shared(&pdev->dev, NULL);
+ if (IS_ERR(ad->rstc))
+ return PTR_ERR(ad->rstc);
+
+ dma = &ad->dma;
+
+ dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+ dma_cap_set(DMA_CYCLIC, dma->cap_mask);
+
+ dma->dev = &pdev->dev;
+ dma->device_alloc_chan_resources = admac_alloc_chan_resources;
+ dma->device_free_chan_resources = admac_free_chan_resources;
+ dma->device_tx_status = admac_tx_status;
+ dma->device_issue_pending = admac_issue_pending;
+ dma->device_terminate_all = admac_terminate_all;
+ dma->device_synchronize = admac_synchronize;
+ dma->device_prep_dma_cyclic = admac_prep_dma_cyclic;
+ dma->device_config = admac_device_config;
+ dma->device_pause = admac_pause;
+ dma->device_resume = admac_resume;
+
+ dma->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+ dma->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ dma->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ dma->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+
+ INIT_LIST_HEAD(&dma->channels);
+ for (i = 0; i < nchannels; i++) {
+ struct admac_chan *adchan = &ad->channels[i];
+
+ adchan->host = ad;
+ adchan->no = i;
+ adchan->chan.device = &ad->dma;
+ spin_lock_init(&adchan->lock);
+ INIT_LIST_HEAD(&adchan->submitted);
+ INIT_LIST_HEAD(&adchan->issued);
+ INIT_LIST_HEAD(&adchan->to_free);
+ list_add_tail(&adchan->chan.device_node, &dma->channels);
+ tasklet_setup(&adchan->tasklet, admac_chan_tasklet);
+ }
+
+ err = reset_control_reset(ad->rstc);
+ if (err)
+ return dev_err_probe(&pdev->dev, err,
+ "unable to trigger reset\n");
+
+ err = request_irq(irq, admac_interrupt, 0, dev_name(&pdev->dev), ad);
+ if (err) {
+ dev_err_probe(&pdev->dev, err,
+ "unable to register interrupt\n");
+ goto free_reset;
+ }
+
+ err = dma_async_device_register(&ad->dma);
+ if (err) {
+ dev_err_probe(&pdev->dev, err, "failed to register DMA device\n");
+ goto free_irq;
+ }
+
+ err = of_dma_controller_register(pdev->dev.of_node, admac_dma_of_xlate, ad);
+ if (err) {
+ dma_async_device_unregister(&ad->dma);
+ dev_err_probe(&pdev->dev, err, "failed to register with OF\n");
+ goto free_irq;
+ }
+
+ ad->txcache.size = readl_relaxed(ad->base + REG_TX_SRAM_SIZE);
+ ad->rxcache.size = readl_relaxed(ad->base + REG_RX_SRAM_SIZE);
+
+ dev_info(&pdev->dev, "Audio DMA Controller\n");
+ dev_info(&pdev->dev, "imprint %x TX cache %u RX cache %u\n",
+ readl_relaxed(ad->base + REG_IMPRINT), ad->txcache.size, ad->rxcache.size);
+
+ return 0;
+
+free_irq:
+ free_irq(ad->irq, ad);
+free_reset:
+ reset_control_rearm(ad->rstc);
+ return err;
+}
+
+static int admac_remove(struct platform_device *pdev)
+{
+ struct admac_data *ad = platform_get_drvdata(pdev);
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&ad->dma);
+ free_irq(ad->irq, ad);
+ reset_control_rearm(ad->rstc);
+
+ return 0;
+}
+
+static const struct of_device_id admac_of_match[] = {
+ { .compatible = "apple,admac", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, admac_of_match);
+
+static struct platform_driver apple_admac_driver = {
+ .driver = {
+ .name = "apple-admac",
+ .of_match_table = admac_of_match,
+ },
+ .probe = admac_probe,
+ .remove = admac_remove,
+};
+module_platform_driver(apple_admac_driver);
+
+MODULE_AUTHOR("Martin Povišer <povik+lin@cutebit.org>");
+MODULE_DESCRIPTION("Driver for Audio DMA Controller (ADMAC) on Apple SoCs");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
new file mode 100644
index 000000000..858bd64f1
--- /dev/null
+++ b/drivers/dma/at_hdmac.c
@@ -0,0 +1,2156 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Driver for the Atmel AHB DMA Controller (aka HDMA or DMAC on AT91 systems)
+ *
+ * Copyright (C) 2008 Atmel Corporation
+ *
+ * This supports the Atmel AHB DMA Controller found in several Atmel SoCs.
+ * The only Atmel DMA Controller that is not covered by this driver is the one
+ * found on AT91SAM9263.
+ */
+
+#include <dt-bindings/dma/at91.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+
+#include "at_hdmac_regs.h"
+#include "dmaengine.h"
+
+/*
+ * Glossary
+ * --------
+ *
+ * at_hdmac : Name of the ATmel AHB DMA Controller
+ * at_dma_ / atdma : ATmel DMA controller entity related
+ * atc_ / atchan : ATmel DMA Channel entity related
+ */
+
+#define ATC_DEFAULT_CFG (ATC_FIFOCFG_HALFFIFO)
+#define ATC_DEFAULT_CTRLB (ATC_SIF(AT_DMA_MEM_IF) \
+ |ATC_DIF(AT_DMA_MEM_IF))
+#define ATC_DMA_BUSWIDTHS\
+ (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\
+ BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |\
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |\
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+#define ATC_MAX_DSCR_TRIALS 10
+
+/*
+ * Initial number of descriptors to allocate for each channel. This could
+ * be increased during dma usage.
+ */
+static unsigned int init_nr_desc_per_channel = 64;
+module_param(init_nr_desc_per_channel, uint, 0644);
+MODULE_PARM_DESC(init_nr_desc_per_channel,
+ "initial descriptors per channel (default: 64)");
+
+/**
+ * struct at_dma_platform_data - Controller configuration parameters
+ * @nr_channels: Number of channels supported by hardware (max 8)
+ * @cap_mask: dma_capability flags supported by the platform
+ */
+struct at_dma_platform_data {
+ unsigned int nr_channels;
+ dma_cap_mask_t cap_mask;
+};
+
+/**
+ * struct at_dma_slave - Controller-specific information about a slave
+ * @dma_dev: required DMA master device
+ * @cfg: Platform-specific initializer for the CFG register
+ */
+struct at_dma_slave {
+ struct device *dma_dev;
+ u32 cfg;
+};
+
+/* prototypes */
+static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx);
+static void atc_issue_pending(struct dma_chan *chan);
+
+
+/*----------------------------------------------------------------------*/
+
+static inline unsigned int atc_get_xfer_width(dma_addr_t src, dma_addr_t dst,
+ size_t len)
+{
+ unsigned int width;
+
+ if (!((src | dst | len) & 3))
+ width = 2;
+ else if (!((src | dst | len) & 1))
+ width = 1;
+ else
+ width = 0;
+
+ return width;
+}
+
+static struct at_desc *atc_first_active(struct at_dma_chan *atchan)
+{
+ return list_first_entry(&atchan->active_list,
+ struct at_desc, desc_node);
+}
+
+static struct at_desc *atc_first_queued(struct at_dma_chan *atchan)
+{
+ return list_first_entry(&atchan->queue,
+ struct at_desc, desc_node);
+}
+
+/**
+ * atc_alloc_descriptor - allocate and return an initialized descriptor
+ * @chan: the channel to allocate descriptors for
+ * @gfp_flags: GFP allocation flags
+ *
+ * Note: The ack-bit is positioned in the descriptor flag at creation time
+ * to make initial allocation more convenient. This bit will be cleared
+ * and control will be given to client at usage time (during
+ * preparation functions).
+ */
+static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan,
+ gfp_t gfp_flags)
+{
+ struct at_desc *desc = NULL;
+ struct at_dma *atdma = to_at_dma(chan->device);
+ dma_addr_t phys;
+
+ desc = dma_pool_zalloc(atdma->dma_desc_pool, gfp_flags, &phys);
+ if (desc) {
+ INIT_LIST_HEAD(&desc->tx_list);
+ dma_async_tx_descriptor_init(&desc->txd, chan);
+ /* txd.flags will be overwritten in prep functions */
+ desc->txd.flags = DMA_CTRL_ACK;
+ desc->txd.tx_submit = atc_tx_submit;
+ desc->txd.phys = phys;
+ }
+
+ return desc;
+}
+
+/**
+ * atc_desc_get - get an unused descriptor from free_list
+ * @atchan: channel we want a new descriptor for
+ */
+static struct at_desc *atc_desc_get(struct at_dma_chan *atchan)
+{
+ struct at_desc *desc, *_desc;
+ struct at_desc *ret = NULL;
+ unsigned long flags;
+ unsigned int i = 0;
+
+ spin_lock_irqsave(&atchan->lock, flags);
+ list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) {
+ i++;
+ if (async_tx_test_ack(&desc->txd)) {
+ list_del(&desc->desc_node);
+ ret = desc;
+ break;
+ }
+ dev_dbg(chan2dev(&atchan->chan_common),
+ "desc %p not ACKed\n", desc);
+ }
+ spin_unlock_irqrestore(&atchan->lock, flags);
+ dev_vdbg(chan2dev(&atchan->chan_common),
+ "scanned %u descriptors on freelist\n", i);
+
+ /* no more descriptor available in initial pool: create one more */
+ if (!ret)
+ ret = atc_alloc_descriptor(&atchan->chan_common, GFP_NOWAIT);
+
+ return ret;
+}
+
+/**
+ * atc_desc_put - move a descriptor, including any children, to the free list
+ * @atchan: channel we work on
+ * @desc: descriptor, at the head of a chain, to move to free list
+ */
+static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc)
+{
+ if (desc) {
+ struct at_desc *child;
+ unsigned long flags;
+
+ spin_lock_irqsave(&atchan->lock, flags);
+ list_for_each_entry(child, &desc->tx_list, desc_node)
+ dev_vdbg(chan2dev(&atchan->chan_common),
+ "moving child desc %p to freelist\n",
+ child);
+ list_splice_init(&desc->tx_list, &atchan->free_list);
+ dev_vdbg(chan2dev(&atchan->chan_common),
+ "moving desc %p to freelist\n", desc);
+ list_add(&desc->desc_node, &atchan->free_list);
+ spin_unlock_irqrestore(&atchan->lock, flags);
+ }
+}
+
+/**
+ * atc_desc_chain - build chain adding a descriptor
+ * @first: address of first descriptor of the chain
+ * @prev: address of previous descriptor of the chain
+ * @desc: descriptor to queue
+ *
+ * Called from prep_* functions
+ */
+static void atc_desc_chain(struct at_desc **first, struct at_desc **prev,
+ struct at_desc *desc)
+{
+ if (!(*first)) {
+ *first = desc;
+ } else {
+ /* inform the HW lli about chaining */
+ (*prev)->lli.dscr = desc->txd.phys;
+ /* insert the link descriptor to the LD ring */
+ list_add_tail(&desc->desc_node,
+ &(*first)->tx_list);
+ }
+ *prev = desc;
+}
+
+/**
+ * atc_dostart - starts the DMA engine for real
+ * @atchan: the channel we want to start
+ * @first: first descriptor in the list we want to begin with
+ *
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first)
+{
+ struct at_dma *atdma = to_at_dma(atchan->chan_common.device);
+
+ /* ASSERT: channel is idle */
+ if (atc_chan_is_enabled(atchan)) {
+ dev_err(chan2dev(&atchan->chan_common),
+ "BUG: Attempted to start non-idle channel\n");
+ dev_err(chan2dev(&atchan->chan_common),
+ " channel: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n",
+ channel_readl(atchan, SADDR),
+ channel_readl(atchan, DADDR),
+ channel_readl(atchan, CTRLA),
+ channel_readl(atchan, CTRLB),
+ channel_readl(atchan, DSCR));
+
+ /* The tasklet will hopefully advance the queue... */
+ return;
+ }
+
+ vdbg_dump_regs(atchan);
+
+ channel_writel(atchan, SADDR, 0);
+ channel_writel(atchan, DADDR, 0);
+ channel_writel(atchan, CTRLA, 0);
+ channel_writel(atchan, CTRLB, 0);
+ channel_writel(atchan, DSCR, first->txd.phys);
+ channel_writel(atchan, SPIP, ATC_SPIP_HOLE(first->src_hole) |
+ ATC_SPIP_BOUNDARY(first->boundary));
+ channel_writel(atchan, DPIP, ATC_DPIP_HOLE(first->dst_hole) |
+ ATC_DPIP_BOUNDARY(first->boundary));
+ /* Don't allow CPU to reorder channel enable. */
+ wmb();
+ dma_writel(atdma, CHER, atchan->mask);
+
+ vdbg_dump_regs(atchan);
+}
+
+/*
+ * atc_get_desc_by_cookie - get the descriptor of a cookie
+ * @atchan: the DMA channel
+ * @cookie: the cookie to get the descriptor for
+ */
+static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan,
+ dma_cookie_t cookie)
+{
+ struct at_desc *desc, *_desc;
+
+ list_for_each_entry_safe(desc, _desc, &atchan->queue, desc_node) {
+ if (desc->txd.cookie == cookie)
+ return desc;
+ }
+
+ list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) {
+ if (desc->txd.cookie == cookie)
+ return desc;
+ }
+
+ return NULL;
+}
+
+/**
+ * atc_calc_bytes_left - calculates the number of bytes left according to the
+ * value read from CTRLA.
+ *
+ * @current_len: the number of bytes left before reading CTRLA
+ * @ctrla: the value of CTRLA
+ */
+static inline int atc_calc_bytes_left(int current_len, u32 ctrla)
+{
+ u32 btsize = (ctrla & ATC_BTSIZE_MAX);
+ u32 src_width = ATC_REG_TO_SRC_WIDTH(ctrla);
+
+ /*
+ * According to the datasheet, when reading the Control A Register
+ * (ctrla), the Buffer Transfer Size (btsize) bitfield refers to the
+ * number of transfers completed on the Source Interface.
+ * So btsize is always a number of source width transfers.
+ */
+ return current_len - (btsize << src_width);
+}
+
+/**
+ * atc_get_bytes_left - get the number of bytes residue for a cookie
+ * @chan: DMA channel
+ * @cookie: transaction identifier to check status of
+ */
+static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_desc *desc_first = atc_first_active(atchan);
+ struct at_desc *desc;
+ int ret;
+ u32 ctrla, dscr;
+ unsigned int i;
+
+ /*
+ * If the cookie doesn't match to the currently running transfer then
+ * we can return the total length of the associated DMA transfer,
+ * because it is still queued.
+ */
+ desc = atc_get_desc_by_cookie(atchan, cookie);
+ if (desc == NULL)
+ return -EINVAL;
+ else if (desc != desc_first)
+ return desc->total_len;
+
+ /* cookie matches to the currently running transfer */
+ ret = desc_first->total_len;
+
+ if (desc_first->lli.dscr) {
+ /* hardware linked list transfer */
+
+ /*
+ * Calculate the residue by removing the length of the child
+ * descriptors already transferred from the total length.
+ * To get the current child descriptor we can use the value of
+ * the channel's DSCR register and compare it against the value
+ * of the hardware linked list structure of each child
+ * descriptor.
+ *
+ * The CTRLA register provides us with the amount of data
+ * already read from the source for the current child
+ * descriptor. So we can compute a more accurate residue by also
+ * removing the number of bytes corresponding to this amount of
+ * data.
+ *
+ * However, the DSCR and CTRLA registers cannot be read both
+ * atomically. Hence a race condition may occur: the first read
+ * register may refer to one child descriptor whereas the second
+ * read may refer to a later child descriptor in the list
+ * because of the DMA transfer progression inbetween the two
+ * reads.
+ *
+ * One solution could have been to pause the DMA transfer, read
+ * the DSCR and CTRLA then resume the DMA transfer. Nonetheless,
+ * this approach presents some drawbacks:
+ * - If the DMA transfer is paused, RX overruns or TX underruns
+ * are more likey to occur depending on the system latency.
+ * Taking the USART driver as an example, it uses a cyclic DMA
+ * transfer to read data from the Receive Holding Register
+ * (RHR) to avoid RX overruns since the RHR is not protected
+ * by any FIFO on most Atmel SoCs. So pausing the DMA transfer
+ * to compute the residue would break the USART driver design.
+ * - The atc_pause() function masks interrupts but we'd rather
+ * avoid to do so for system latency purpose.
+ *
+ * Then we'd rather use another solution: the DSCR is read a
+ * first time, the CTRLA is read in turn, next the DSCR is read
+ * a second time. If the two consecutive read values of the DSCR
+ * are the same then we assume both refers to the very same
+ * child descriptor as well as the CTRLA value read inbetween
+ * does. For cyclic tranfers, the assumption is that a full loop
+ * is "not so fast".
+ * If the two DSCR values are different, we read again the CTRLA
+ * then the DSCR till two consecutive read values from DSCR are
+ * equal or till the maxium trials is reach.
+ * This algorithm is very unlikely not to find a stable value for
+ * DSCR.
+ */
+
+ dscr = channel_readl(atchan, DSCR);
+ rmb(); /* ensure DSCR is read before CTRLA */
+ ctrla = channel_readl(atchan, CTRLA);
+ for (i = 0; i < ATC_MAX_DSCR_TRIALS; ++i) {
+ u32 new_dscr;
+
+ rmb(); /* ensure DSCR is read after CTRLA */
+ new_dscr = channel_readl(atchan, DSCR);
+
+ /*
+ * If the DSCR register value has not changed inside the
+ * DMA controller since the previous read, we assume
+ * that both the dscr and ctrla values refers to the
+ * very same descriptor.
+ */
+ if (likely(new_dscr == dscr))
+ break;
+
+ /*
+ * DSCR has changed inside the DMA controller, so the
+ * previouly read value of CTRLA may refer to an already
+ * processed descriptor hence could be outdated.
+ * We need to update ctrla to match the current
+ * descriptor.
+ */
+ dscr = new_dscr;
+ rmb(); /* ensure DSCR is read before CTRLA */
+ ctrla = channel_readl(atchan, CTRLA);
+ }
+ if (unlikely(i == ATC_MAX_DSCR_TRIALS))
+ return -ETIMEDOUT;
+
+ /* for the first descriptor we can be more accurate */
+ if (desc_first->lli.dscr == dscr)
+ return atc_calc_bytes_left(ret, ctrla);
+
+ ret -= desc_first->len;
+ list_for_each_entry(desc, &desc_first->tx_list, desc_node) {
+ if (desc->lli.dscr == dscr)
+ break;
+
+ ret -= desc->len;
+ }
+
+ /*
+ * For the current descriptor in the chain we can calculate
+ * the remaining bytes using the channel's register.
+ */
+ ret = atc_calc_bytes_left(ret, ctrla);
+ } else {
+ /* single transfer */
+ ctrla = channel_readl(atchan, CTRLA);
+ ret = atc_calc_bytes_left(ret, ctrla);
+ }
+
+ return ret;
+}
+
+/**
+ * atc_chain_complete - finish work for one transaction chain
+ * @atchan: channel we work on
+ * @desc: descriptor at the head of the chain we want do complete
+ */
+static void
+atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
+{
+ struct dma_async_tx_descriptor *txd = &desc->txd;
+ struct at_dma *atdma = to_at_dma(atchan->chan_common.device);
+ unsigned long flags;
+
+ dev_vdbg(chan2dev(&atchan->chan_common),
+ "descriptor %u complete\n", txd->cookie);
+
+ spin_lock_irqsave(&atchan->lock, flags);
+
+ /* mark the descriptor as complete for non cyclic cases only */
+ if (!atc_chan_is_cyclic(atchan))
+ dma_cookie_complete(txd);
+
+ spin_unlock_irqrestore(&atchan->lock, flags);
+
+ dma_descriptor_unmap(txd);
+ /* for cyclic transfers,
+ * no need to replay callback function while stopping */
+ if (!atc_chan_is_cyclic(atchan))
+ dmaengine_desc_get_callback_invoke(txd, NULL);
+
+ dma_run_dependencies(txd);
+
+ spin_lock_irqsave(&atchan->lock, flags);
+ /* move children to free_list */
+ list_splice_init(&desc->tx_list, &atchan->free_list);
+ /* add myself to free_list */
+ list_add(&desc->desc_node, &atchan->free_list);
+ spin_unlock_irqrestore(&atchan->lock, flags);
+
+ /* If the transfer was a memset, free our temporary buffer */
+ if (desc->memset_buffer) {
+ dma_pool_free(atdma->memset_pool, desc->memset_vaddr,
+ desc->memset_paddr);
+ desc->memset_buffer = false;
+ }
+}
+
+/**
+ * atc_advance_work - at the end of a transaction, move forward
+ * @atchan: channel where the transaction ended
+ */
+static void atc_advance_work(struct at_dma_chan *atchan)
+{
+ struct at_desc *desc;
+ unsigned long flags;
+
+ dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n");
+
+ spin_lock_irqsave(&atchan->lock, flags);
+ if (atc_chan_is_enabled(atchan) || list_empty(&atchan->active_list))
+ return spin_unlock_irqrestore(&atchan->lock, flags);
+
+ desc = atc_first_active(atchan);
+ /* Remove the transfer node from the active list. */
+ list_del_init(&desc->desc_node);
+ spin_unlock_irqrestore(&atchan->lock, flags);
+ atc_chain_complete(atchan, desc);
+
+ /* advance work */
+ spin_lock_irqsave(&atchan->lock, flags);
+ if (!list_empty(&atchan->active_list)) {
+ desc = atc_first_queued(atchan);
+ list_move_tail(&desc->desc_node, &atchan->active_list);
+ atc_dostart(atchan, desc);
+ }
+ spin_unlock_irqrestore(&atchan->lock, flags);
+}
+
+
+/**
+ * atc_handle_error - handle errors reported by DMA controller
+ * @atchan: channel where error occurs
+ */
+static void atc_handle_error(struct at_dma_chan *atchan)
+{
+ struct at_desc *bad_desc;
+ struct at_desc *desc;
+ struct at_desc *child;
+ unsigned long flags;
+
+ spin_lock_irqsave(&atchan->lock, flags);
+ /*
+ * The descriptor currently at the head of the active list is
+ * broked. Since we don't have any way to report errors, we'll
+ * just have to scream loudly and try to carry on.
+ */
+ bad_desc = atc_first_active(atchan);
+ list_del_init(&bad_desc->desc_node);
+
+ /* Try to restart the controller */
+ if (!list_empty(&atchan->active_list)) {
+ desc = atc_first_queued(atchan);
+ list_move_tail(&desc->desc_node, &atchan->active_list);
+ atc_dostart(atchan, desc);
+ }
+
+ /*
+ * KERN_CRITICAL may seem harsh, but since this only happens
+ * when someone submits a bad physical address in a
+ * descriptor, we should consider ourselves lucky that the
+ * controller flagged an error instead of scribbling over
+ * random memory locations.
+ */
+ dev_crit(chan2dev(&atchan->chan_common),
+ "Bad descriptor submitted for DMA!\n");
+ dev_crit(chan2dev(&atchan->chan_common),
+ " cookie: %d\n", bad_desc->txd.cookie);
+ atc_dump_lli(atchan, &bad_desc->lli);
+ list_for_each_entry(child, &bad_desc->tx_list, desc_node)
+ atc_dump_lli(atchan, &child->lli);
+
+ spin_unlock_irqrestore(&atchan->lock, flags);
+
+ /* Pretend the descriptor completed successfully */
+ atc_chain_complete(atchan, bad_desc);
+}
+
+/**
+ * atc_handle_cyclic - at the end of a period, run callback function
+ * @atchan: channel used for cyclic operations
+ */
+static void atc_handle_cyclic(struct at_dma_chan *atchan)
+{
+ struct at_desc *first = atc_first_active(atchan);
+ struct dma_async_tx_descriptor *txd = &first->txd;
+
+ dev_vdbg(chan2dev(&atchan->chan_common),
+ "new cyclic period llp 0x%08x\n",
+ channel_readl(atchan, DSCR));
+
+ dmaengine_desc_get_callback_invoke(txd, NULL);
+}
+
+/*-- IRQ & Tasklet ---------------------------------------------------*/
+
+static void atc_tasklet(struct tasklet_struct *t)
+{
+ struct at_dma_chan *atchan = from_tasklet(atchan, t, tasklet);
+
+ if (test_and_clear_bit(ATC_IS_ERROR, &atchan->status))
+ return atc_handle_error(atchan);
+
+ if (atc_chan_is_cyclic(atchan))
+ return atc_handle_cyclic(atchan);
+
+ atc_advance_work(atchan);
+}
+
+static irqreturn_t at_dma_interrupt(int irq, void *dev_id)
+{
+ struct at_dma *atdma = (struct at_dma *)dev_id;
+ struct at_dma_chan *atchan;
+ int i;
+ u32 status, pending, imr;
+ int ret = IRQ_NONE;
+
+ do {
+ imr = dma_readl(atdma, EBCIMR);
+ status = dma_readl(atdma, EBCISR);
+ pending = status & imr;
+
+ if (!pending)
+ break;
+
+ dev_vdbg(atdma->dma_common.dev,
+ "interrupt: status = 0x%08x, 0x%08x, 0x%08x\n",
+ status, imr, pending);
+
+ for (i = 0; i < atdma->dma_common.chancnt; i++) {
+ atchan = &atdma->chan[i];
+ if (pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i))) {
+ if (pending & AT_DMA_ERR(i)) {
+ /* Disable channel on AHB error */
+ dma_writel(atdma, CHDR,
+ AT_DMA_RES(i) | atchan->mask);
+ /* Give information to tasklet */
+ set_bit(ATC_IS_ERROR, &atchan->status);
+ }
+ tasklet_schedule(&atchan->tasklet);
+ ret = IRQ_HANDLED;
+ }
+ }
+
+ } while (pending);
+
+ return ret;
+}
+
+
+/*-- DMA Engine API --------------------------------------------------*/
+
+/**
+ * atc_tx_submit - set the prepared descriptor(s) to be executed by the engine
+ * @tx: descriptor at the head of the transaction chain
+ *
+ * Queue chain if DMA engine is working already
+ *
+ * Cookie increment and adding to active_list or queue must be atomic
+ */
+static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct at_desc *desc = txd_to_at_desc(tx);
+ struct at_dma_chan *atchan = to_at_dma_chan(tx->chan);
+ dma_cookie_t cookie;
+ unsigned long flags;
+
+ spin_lock_irqsave(&atchan->lock, flags);
+ cookie = dma_cookie_assign(tx);
+
+ list_add_tail(&desc->desc_node, &atchan->queue);
+ spin_unlock_irqrestore(&atchan->lock, flags);
+
+ dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n",
+ desc->txd.cookie);
+ return cookie;
+}
+
+/**
+ * atc_prep_dma_interleaved - prepare memory to memory interleaved operation
+ * @chan: the channel to prepare operation on
+ * @xt: Interleaved transfer template
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_interleaved(struct dma_chan *chan,
+ struct dma_interleaved_template *xt,
+ unsigned long flags)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct data_chunk *first;
+ struct at_desc *desc = NULL;
+ size_t xfer_count;
+ unsigned int dwidth;
+ u32 ctrla;
+ u32 ctrlb;
+ size_t len = 0;
+ int i;
+
+ if (unlikely(!xt || xt->numf != 1 || !xt->frame_size))
+ return NULL;
+
+ first = xt->sgl;
+
+ dev_info(chan2dev(chan),
+ "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n",
+ __func__, &xt->src_start, &xt->dst_start, xt->numf,
+ xt->frame_size, flags);
+
+ /*
+ * The controller can only "skip" X bytes every Y bytes, so we
+ * need to make sure we are given a template that fit that
+ * description, ie a template with chunks that always have the
+ * same size, with the same ICGs.
+ */
+ for (i = 0; i < xt->frame_size; i++) {
+ struct data_chunk *chunk = xt->sgl + i;
+
+ if ((chunk->size != xt->sgl->size) ||
+ (dmaengine_get_dst_icg(xt, chunk) != dmaengine_get_dst_icg(xt, first)) ||
+ (dmaengine_get_src_icg(xt, chunk) != dmaengine_get_src_icg(xt, first))) {
+ dev_err(chan2dev(chan),
+ "%s: the controller can transfer only identical chunks\n",
+ __func__);
+ return NULL;
+ }
+
+ len += chunk->size;
+ }
+
+ dwidth = atc_get_xfer_width(xt->src_start,
+ xt->dst_start, len);
+
+ xfer_count = len >> dwidth;
+ if (xfer_count > ATC_BTSIZE_MAX) {
+ dev_err(chan2dev(chan), "%s: buffer is too big\n", __func__);
+ return NULL;
+ }
+
+ ctrla = ATC_SRC_WIDTH(dwidth) |
+ ATC_DST_WIDTH(dwidth);
+
+ ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN
+ | ATC_SRC_ADDR_MODE_INCR
+ | ATC_DST_ADDR_MODE_INCR
+ | ATC_SRC_PIP
+ | ATC_DST_PIP
+ | ATC_FC_MEM2MEM;
+
+ /* create the transfer */
+ desc = atc_desc_get(atchan);
+ if (!desc) {
+ dev_err(chan2dev(chan),
+ "%s: couldn't allocate our descriptor\n", __func__);
+ return NULL;
+ }
+
+ desc->lli.saddr = xt->src_start;
+ desc->lli.daddr = xt->dst_start;
+ desc->lli.ctrla = ctrla | xfer_count;
+ desc->lli.ctrlb = ctrlb;
+
+ desc->boundary = first->size >> dwidth;
+ desc->dst_hole = (dmaengine_get_dst_icg(xt, first) >> dwidth) + 1;
+ desc->src_hole = (dmaengine_get_src_icg(xt, first) >> dwidth) + 1;
+
+ desc->txd.cookie = -EBUSY;
+ desc->total_len = desc->len = len;
+
+ /* set end-of-link to the last link descriptor of list*/
+ set_desc_eol(desc);
+
+ desc->txd.flags = flags; /* client is in control of this ack */
+
+ return &desc->txd;
+}
+
+/**
+ * atc_prep_dma_memcpy - prepare a memcpy operation
+ * @chan: the channel to prepare operation on
+ * @dest: operation virtual destination address
+ * @src: operation virtual source address
+ * @len: operation length
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_desc *desc = NULL;
+ struct at_desc *first = NULL;
+ struct at_desc *prev = NULL;
+ size_t xfer_count;
+ size_t offset;
+ unsigned int src_width;
+ unsigned int dst_width;
+ u32 ctrla;
+ u32 ctrlb;
+
+ dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d%pad s%pad l0x%zx f0x%lx\n",
+ &dest, &src, len, flags);
+
+ if (unlikely(!len)) {
+ dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
+ return NULL;
+ }
+
+ ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN
+ | ATC_SRC_ADDR_MODE_INCR
+ | ATC_DST_ADDR_MODE_INCR
+ | ATC_FC_MEM2MEM;
+
+ /*
+ * We can be a lot more clever here, but this should take care
+ * of the most common optimization.
+ */
+ src_width = dst_width = atc_get_xfer_width(src, dest, len);
+
+ ctrla = ATC_SRC_WIDTH(src_width) |
+ ATC_DST_WIDTH(dst_width);
+
+ for (offset = 0; offset < len; offset += xfer_count << src_width) {
+ xfer_count = min_t(size_t, (len - offset) >> src_width,
+ ATC_BTSIZE_MAX);
+
+ desc = atc_desc_get(atchan);
+ if (!desc)
+ goto err_desc_get;
+
+ desc->lli.saddr = src + offset;
+ desc->lli.daddr = dest + offset;
+ desc->lli.ctrla = ctrla | xfer_count;
+ desc->lli.ctrlb = ctrlb;
+
+ desc->txd.cookie = 0;
+ desc->len = xfer_count << src_width;
+
+ atc_desc_chain(&first, &prev, desc);
+ }
+
+ /* First descriptor of the chain embedds additional information */
+ first->txd.cookie = -EBUSY;
+ first->total_len = len;
+
+ /* set end-of-link to the last link descriptor of list*/
+ set_desc_eol(desc);
+
+ first->txd.flags = flags; /* client is in control of this ack */
+
+ return &first->txd;
+
+err_desc_get:
+ atc_desc_put(atchan, first);
+ return NULL;
+}
+
+static struct at_desc *atc_create_memset_desc(struct dma_chan *chan,
+ dma_addr_t psrc,
+ dma_addr_t pdst,
+ size_t len)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_desc *desc;
+ size_t xfer_count;
+
+ u32 ctrla = ATC_SRC_WIDTH(2) | ATC_DST_WIDTH(2);
+ u32 ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN |
+ ATC_SRC_ADDR_MODE_FIXED |
+ ATC_DST_ADDR_MODE_INCR |
+ ATC_FC_MEM2MEM;
+
+ xfer_count = len >> 2;
+ if (xfer_count > ATC_BTSIZE_MAX) {
+ dev_err(chan2dev(chan), "%s: buffer is too big\n",
+ __func__);
+ return NULL;
+ }
+
+ desc = atc_desc_get(atchan);
+ if (!desc) {
+ dev_err(chan2dev(chan), "%s: can't get a descriptor\n",
+ __func__);
+ return NULL;
+ }
+
+ desc->lli.saddr = psrc;
+ desc->lli.daddr = pdst;
+ desc->lli.ctrla = ctrla | xfer_count;
+ desc->lli.ctrlb = ctrlb;
+
+ desc->txd.cookie = 0;
+ desc->len = len;
+
+ return desc;
+}
+
+/**
+ * atc_prep_dma_memset - prepare a memcpy operation
+ * @chan: the channel to prepare operation on
+ * @dest: operation virtual destination address
+ * @value: value to set memory buffer to
+ * @len: operation length
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+ size_t len, unsigned long flags)
+{
+ struct at_dma *atdma = to_at_dma(chan->device);
+ struct at_desc *desc;
+ void __iomem *vaddr;
+ dma_addr_t paddr;
+ char fill_pattern;
+
+ dev_vdbg(chan2dev(chan), "%s: d%pad v0x%x l0x%zx f0x%lx\n", __func__,
+ &dest, value, len, flags);
+
+ if (unlikely(!len)) {
+ dev_dbg(chan2dev(chan), "%s: length is zero!\n", __func__);
+ return NULL;
+ }
+
+ if (!is_dma_fill_aligned(chan->device, dest, 0, len)) {
+ dev_dbg(chan2dev(chan), "%s: buffer is not aligned\n",
+ __func__);
+ return NULL;
+ }
+
+ vaddr = dma_pool_alloc(atdma->memset_pool, GFP_NOWAIT, &paddr);
+ if (!vaddr) {
+ dev_err(chan2dev(chan), "%s: couldn't allocate buffer\n",
+ __func__);
+ return NULL;
+ }
+
+ /* Only the first byte of value is to be used according to dmaengine */
+ fill_pattern = (char)value;
+
+ *(u32*)vaddr = (fill_pattern << 24) |
+ (fill_pattern << 16) |
+ (fill_pattern << 8) |
+ fill_pattern;
+
+ desc = atc_create_memset_desc(chan, paddr, dest, len);
+ if (!desc) {
+ dev_err(chan2dev(chan), "%s: couldn't get a descriptor\n",
+ __func__);
+ goto err_free_buffer;
+ }
+
+ desc->memset_paddr = paddr;
+ desc->memset_vaddr = vaddr;
+ desc->memset_buffer = true;
+
+ desc->txd.cookie = -EBUSY;
+ desc->total_len = len;
+
+ /* set end-of-link on the descriptor */
+ set_desc_eol(desc);
+
+ desc->txd.flags = flags;
+
+ return &desc->txd;
+
+err_free_buffer:
+ dma_pool_free(atdma->memset_pool, vaddr, paddr);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+atc_prep_dma_memset_sg(struct dma_chan *chan,
+ struct scatterlist *sgl,
+ unsigned int sg_len, int value,
+ unsigned long flags)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_dma *atdma = to_at_dma(chan->device);
+ struct at_desc *desc = NULL, *first = NULL, *prev = NULL;
+ struct scatterlist *sg;
+ void __iomem *vaddr;
+ dma_addr_t paddr;
+ size_t total_len = 0;
+ int i;
+
+ dev_vdbg(chan2dev(chan), "%s: v0x%x l0x%zx f0x%lx\n", __func__,
+ value, sg_len, flags);
+
+ if (unlikely(!sgl || !sg_len)) {
+ dev_dbg(chan2dev(chan), "%s: scatterlist is empty!\n",
+ __func__);
+ return NULL;
+ }
+
+ vaddr = dma_pool_alloc(atdma->memset_pool, GFP_NOWAIT, &paddr);
+ if (!vaddr) {
+ dev_err(chan2dev(chan), "%s: couldn't allocate buffer\n",
+ __func__);
+ return NULL;
+ }
+ *(u32*)vaddr = value;
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ dma_addr_t dest = sg_dma_address(sg);
+ size_t len = sg_dma_len(sg);
+
+ dev_vdbg(chan2dev(chan), "%s: d%pad, l0x%zx\n",
+ __func__, &dest, len);
+
+ if (!is_dma_fill_aligned(chan->device, dest, 0, len)) {
+ dev_err(chan2dev(chan), "%s: buffer is not aligned\n",
+ __func__);
+ goto err_put_desc;
+ }
+
+ desc = atc_create_memset_desc(chan, paddr, dest, len);
+ if (!desc)
+ goto err_put_desc;
+
+ atc_desc_chain(&first, &prev, desc);
+
+ total_len += len;
+ }
+
+ /*
+ * Only set the buffer pointers on the last descriptor to
+ * avoid free'ing while we have our transfer still going
+ */
+ desc->memset_paddr = paddr;
+ desc->memset_vaddr = vaddr;
+ desc->memset_buffer = true;
+
+ first->txd.cookie = -EBUSY;
+ first->total_len = total_len;
+
+ /* set end-of-link on the descriptor */
+ set_desc_eol(desc);
+
+ first->txd.flags = flags;
+
+ return &first->txd;
+
+err_put_desc:
+ atc_desc_put(atchan, first);
+ return NULL;
+}
+
+/**
+ * atc_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
+ * @chan: DMA channel
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
+ * @flags: tx descriptor status flags
+ * @context: transaction context (ignored)
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_dma_slave *atslave = chan->private;
+ struct dma_slave_config *sconfig = &atchan->dma_sconfig;
+ struct at_desc *first = NULL;
+ struct at_desc *prev = NULL;
+ u32 ctrla;
+ u32 ctrlb;
+ dma_addr_t reg;
+ unsigned int reg_width;
+ unsigned int mem_width;
+ unsigned int i;
+ struct scatterlist *sg;
+ size_t total_len = 0;
+
+ dev_vdbg(chan2dev(chan), "prep_slave_sg (%d): %s f0x%lx\n",
+ sg_len,
+ direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
+ flags);
+
+ if (unlikely(!atslave || !sg_len)) {
+ dev_dbg(chan2dev(chan), "prep_slave_sg: sg length is zero!\n");
+ return NULL;
+ }
+
+ ctrla = ATC_SCSIZE(sconfig->src_maxburst)
+ | ATC_DCSIZE(sconfig->dst_maxburst);
+ ctrlb = ATC_IEN;
+
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+ reg_width = convert_buswidth(sconfig->dst_addr_width);
+ ctrla |= ATC_DST_WIDTH(reg_width);
+ ctrlb |= ATC_DST_ADDR_MODE_FIXED
+ | ATC_SRC_ADDR_MODE_INCR
+ | ATC_FC_MEM2PER
+ | ATC_SIF(atchan->mem_if) | ATC_DIF(atchan->per_if);
+ reg = sconfig->dst_addr;
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct at_desc *desc;
+ u32 len;
+ u32 mem;
+
+ desc = atc_desc_get(atchan);
+ if (!desc)
+ goto err_desc_get;
+
+ mem = sg_dma_address(sg);
+ len = sg_dma_len(sg);
+ if (unlikely(!len)) {
+ dev_dbg(chan2dev(chan),
+ "prep_slave_sg: sg(%d) data length is zero\n", i);
+ goto err;
+ }
+ mem_width = 2;
+ if (unlikely(mem & 3 || len & 3))
+ mem_width = 0;
+
+ desc->lli.saddr = mem;
+ desc->lli.daddr = reg;
+ desc->lli.ctrla = ctrla
+ | ATC_SRC_WIDTH(mem_width)
+ | len >> mem_width;
+ desc->lli.ctrlb = ctrlb;
+ desc->len = len;
+
+ atc_desc_chain(&first, &prev, desc);
+ total_len += len;
+ }
+ break;
+ case DMA_DEV_TO_MEM:
+ reg_width = convert_buswidth(sconfig->src_addr_width);
+ ctrla |= ATC_SRC_WIDTH(reg_width);
+ ctrlb |= ATC_DST_ADDR_MODE_INCR
+ | ATC_SRC_ADDR_MODE_FIXED
+ | ATC_FC_PER2MEM
+ | ATC_SIF(atchan->per_if) | ATC_DIF(atchan->mem_if);
+
+ reg = sconfig->src_addr;
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct at_desc *desc;
+ u32 len;
+ u32 mem;
+
+ desc = atc_desc_get(atchan);
+ if (!desc)
+ goto err_desc_get;
+
+ mem = sg_dma_address(sg);
+ len = sg_dma_len(sg);
+ if (unlikely(!len)) {
+ dev_dbg(chan2dev(chan),
+ "prep_slave_sg: sg(%d) data length is zero\n", i);
+ goto err;
+ }
+ mem_width = 2;
+ if (unlikely(mem & 3 || len & 3))
+ mem_width = 0;
+
+ desc->lli.saddr = reg;
+ desc->lli.daddr = mem;
+ desc->lli.ctrla = ctrla
+ | ATC_DST_WIDTH(mem_width)
+ | len >> reg_width;
+ desc->lli.ctrlb = ctrlb;
+ desc->len = len;
+
+ atc_desc_chain(&first, &prev, desc);
+ total_len += len;
+ }
+ break;
+ default:
+ return NULL;
+ }
+
+ /* set end-of-link to the last link descriptor of list*/
+ set_desc_eol(prev);
+
+ /* First descriptor of the chain embedds additional information */
+ first->txd.cookie = -EBUSY;
+ first->total_len = total_len;
+
+ /* first link descriptor of list is responsible of flags */
+ first->txd.flags = flags; /* client is in control of this ack */
+
+ return &first->txd;
+
+err_desc_get:
+ dev_err(chan2dev(chan), "not enough descriptors available\n");
+err:
+ atc_desc_put(atchan, first);
+ return NULL;
+}
+
+/*
+ * atc_dma_cyclic_check_values
+ * Check for too big/unaligned periods and unaligned DMA buffer
+ */
+static int
+atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr,
+ size_t period_len)
+{
+ if (period_len > (ATC_BTSIZE_MAX << reg_width))
+ goto err_out;
+ if (unlikely(period_len & ((1 << reg_width) - 1)))
+ goto err_out;
+ if (unlikely(buf_addr & ((1 << reg_width) - 1)))
+ goto err_out;
+
+ return 0;
+
+err_out:
+ return -EINVAL;
+}
+
+/*
+ * atc_dma_cyclic_fill_desc - Fill one period descriptor
+ */
+static int
+atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc,
+ unsigned int period_index, dma_addr_t buf_addr,
+ unsigned int reg_width, size_t period_len,
+ enum dma_transfer_direction direction)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct dma_slave_config *sconfig = &atchan->dma_sconfig;
+ u32 ctrla;
+
+ /* prepare common CRTLA value */
+ ctrla = ATC_SCSIZE(sconfig->src_maxburst)
+ | ATC_DCSIZE(sconfig->dst_maxburst)
+ | ATC_DST_WIDTH(reg_width)
+ | ATC_SRC_WIDTH(reg_width)
+ | period_len >> reg_width;
+
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+ desc->lli.saddr = buf_addr + (period_len * period_index);
+ desc->lli.daddr = sconfig->dst_addr;
+ desc->lli.ctrla = ctrla;
+ desc->lli.ctrlb = ATC_DST_ADDR_MODE_FIXED
+ | ATC_SRC_ADDR_MODE_INCR
+ | ATC_FC_MEM2PER
+ | ATC_SIF(atchan->mem_if)
+ | ATC_DIF(atchan->per_if);
+ desc->len = period_len;
+ break;
+
+ case DMA_DEV_TO_MEM:
+ desc->lli.saddr = sconfig->src_addr;
+ desc->lli.daddr = buf_addr + (period_len * period_index);
+ desc->lli.ctrla = ctrla;
+ desc->lli.ctrlb = ATC_DST_ADDR_MODE_INCR
+ | ATC_SRC_ADDR_MODE_FIXED
+ | ATC_FC_PER2MEM
+ | ATC_SIF(atchan->per_if)
+ | ATC_DIF(atchan->mem_if);
+ desc->len = period_len;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * atc_prep_dma_cyclic - prepare the cyclic DMA transfer
+ * @chan: the DMA channel to prepare
+ * @buf_addr: physical DMA address where the buffer starts
+ * @buf_len: total number of bytes for the entire buffer
+ * @period_len: number of bytes for each period
+ * @direction: transfer direction, to or from device
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_dma_slave *atslave = chan->private;
+ struct dma_slave_config *sconfig = &atchan->dma_sconfig;
+ struct at_desc *first = NULL;
+ struct at_desc *prev = NULL;
+ unsigned long was_cyclic;
+ unsigned int reg_width;
+ unsigned int periods = buf_len / period_len;
+ unsigned int i;
+
+ dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@%pad - %d (%d/%d)\n",
+ direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
+ &buf_addr,
+ periods, buf_len, period_len);
+
+ if (unlikely(!atslave || !buf_len || !period_len)) {
+ dev_dbg(chan2dev(chan), "prep_dma_cyclic: length is zero!\n");
+ return NULL;
+ }
+
+ was_cyclic = test_and_set_bit(ATC_IS_CYCLIC, &atchan->status);
+ if (was_cyclic) {
+ dev_dbg(chan2dev(chan), "prep_dma_cyclic: channel in use!\n");
+ return NULL;
+ }
+
+ if (unlikely(!is_slave_direction(direction)))
+ goto err_out;
+
+ if (direction == DMA_MEM_TO_DEV)
+ reg_width = convert_buswidth(sconfig->dst_addr_width);
+ else
+ reg_width = convert_buswidth(sconfig->src_addr_width);
+
+ /* Check for too big/unaligned periods and unaligned DMA buffer */
+ if (atc_dma_cyclic_check_values(reg_width, buf_addr, period_len))
+ goto err_out;
+
+ /* build cyclic linked list */
+ for (i = 0; i < periods; i++) {
+ struct at_desc *desc;
+
+ desc = atc_desc_get(atchan);
+ if (!desc)
+ goto err_desc_get;
+
+ if (atc_dma_cyclic_fill_desc(chan, desc, i, buf_addr,
+ reg_width, period_len, direction))
+ goto err_desc_get;
+
+ atc_desc_chain(&first, &prev, desc);
+ }
+
+ /* lets make a cyclic list */
+ prev->lli.dscr = first->txd.phys;
+
+ /* First descriptor of the chain embedds additional information */
+ first->txd.cookie = -EBUSY;
+ first->total_len = buf_len;
+
+ return &first->txd;
+
+err_desc_get:
+ dev_err(chan2dev(chan), "not enough descriptors available\n");
+ atc_desc_put(atchan, first);
+err_out:
+ clear_bit(ATC_IS_CYCLIC, &atchan->status);
+ return NULL;
+}
+
+static int atc_config(struct dma_chan *chan,
+ struct dma_slave_config *sconfig)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+
+ dev_vdbg(chan2dev(chan), "%s\n", __func__);
+
+ /* Check if it is chan is configured for slave transfers */
+ if (!chan->private)
+ return -EINVAL;
+
+ memcpy(&atchan->dma_sconfig, sconfig, sizeof(*sconfig));
+
+ convert_burst(&atchan->dma_sconfig.src_maxburst);
+ convert_burst(&atchan->dma_sconfig.dst_maxburst);
+
+ return 0;
+}
+
+static int atc_pause(struct dma_chan *chan)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_dma *atdma = to_at_dma(chan->device);
+ int chan_id = atchan->chan_common.chan_id;
+ unsigned long flags;
+
+ dev_vdbg(chan2dev(chan), "%s\n", __func__);
+
+ spin_lock_irqsave(&atchan->lock, flags);
+
+ dma_writel(atdma, CHER, AT_DMA_SUSP(chan_id));
+ set_bit(ATC_IS_PAUSED, &atchan->status);
+
+ spin_unlock_irqrestore(&atchan->lock, flags);
+
+ return 0;
+}
+
+static int atc_resume(struct dma_chan *chan)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_dma *atdma = to_at_dma(chan->device);
+ int chan_id = atchan->chan_common.chan_id;
+ unsigned long flags;
+
+ dev_vdbg(chan2dev(chan), "%s\n", __func__);
+
+ if (!atc_chan_is_paused(atchan))
+ return 0;
+
+ spin_lock_irqsave(&atchan->lock, flags);
+
+ dma_writel(atdma, CHDR, AT_DMA_RES(chan_id));
+ clear_bit(ATC_IS_PAUSED, &atchan->status);
+
+ spin_unlock_irqrestore(&atchan->lock, flags);
+
+ return 0;
+}
+
+static int atc_terminate_all(struct dma_chan *chan)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_dma *atdma = to_at_dma(chan->device);
+ int chan_id = atchan->chan_common.chan_id;
+ unsigned long flags;
+
+ dev_vdbg(chan2dev(chan), "%s\n", __func__);
+
+ /*
+ * This is only called when something went wrong elsewhere, so
+ * we don't really care about the data. Just disable the
+ * channel. We still have to poll the channel enable bit due
+ * to AHB/HSB limitations.
+ */
+ spin_lock_irqsave(&atchan->lock, flags);
+
+ /* disabling channel: must also remove suspend state */
+ dma_writel(atdma, CHDR, AT_DMA_RES(chan_id) | atchan->mask);
+
+ /* confirm that this channel is disabled */
+ while (dma_readl(atdma, CHSR) & atchan->mask)
+ cpu_relax();
+
+ /* active_list entries will end up before queued entries */
+ list_splice_tail_init(&atchan->queue, &atchan->free_list);
+ list_splice_tail_init(&atchan->active_list, &atchan->free_list);
+
+ clear_bit(ATC_IS_PAUSED, &atchan->status);
+ /* if channel dedicated to cyclic operations, free it */
+ clear_bit(ATC_IS_CYCLIC, &atchan->status);
+
+ spin_unlock_irqrestore(&atchan->lock, flags);
+
+ return 0;
+}
+
+/**
+ * atc_tx_status - poll for transaction completion
+ * @chan: DMA channel
+ * @cookie: transaction identifier to check status of
+ * @txstate: if not %NULL updated with transaction state
+ *
+ * If @txstate is passed in, upon return it reflect the driver
+ * internal state and can be used with dma_async_is_complete() to check
+ * the status of multiple cookies without re-checking hardware state.
+ */
+static enum dma_status
+atc_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ unsigned long flags;
+ enum dma_status ret;
+ int bytes = 0;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+ /*
+ * There's no point calculating the residue if there's
+ * no txstate to store the value.
+ */
+ if (!txstate)
+ return DMA_ERROR;
+
+ spin_lock_irqsave(&atchan->lock, flags);
+
+ /* Get number of bytes left in the active transactions */
+ bytes = atc_get_bytes_left(chan, cookie);
+
+ spin_unlock_irqrestore(&atchan->lock, flags);
+
+ if (unlikely(bytes < 0)) {
+ dev_vdbg(chan2dev(chan), "get residual bytes error\n");
+ return DMA_ERROR;
+ } else {
+ dma_set_residue(txstate, bytes);
+ }
+
+ dev_vdbg(chan2dev(chan), "tx_status %d: cookie = %d residue = %d\n",
+ ret, cookie, bytes);
+
+ return ret;
+}
+
+/**
+ * atc_issue_pending - takes the first transaction descriptor in the pending
+ * queue and starts the transfer.
+ * @chan: target DMA channel
+ */
+static void atc_issue_pending(struct dma_chan *chan)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_desc *desc;
+ unsigned long flags;
+
+ dev_vdbg(chan2dev(chan), "issue_pending\n");
+
+ spin_lock_irqsave(&atchan->lock, flags);
+ if (atc_chan_is_enabled(atchan) || list_empty(&atchan->queue))
+ return spin_unlock_irqrestore(&atchan->lock, flags);
+
+ desc = atc_first_queued(atchan);
+ list_move_tail(&desc->desc_node, &atchan->active_list);
+ atc_dostart(atchan, desc);
+ spin_unlock_irqrestore(&atchan->lock, flags);
+}
+
+/**
+ * atc_alloc_chan_resources - allocate resources for DMA channel
+ * @chan: allocate descriptor resources for this channel
+ *
+ * return - the number of allocated descriptors
+ */
+static int atc_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_dma *atdma = to_at_dma(chan->device);
+ struct at_desc *desc;
+ struct at_dma_slave *atslave;
+ int i;
+ u32 cfg;
+
+ dev_vdbg(chan2dev(chan), "alloc_chan_resources\n");
+
+ /* ASSERT: channel is idle */
+ if (atc_chan_is_enabled(atchan)) {
+ dev_dbg(chan2dev(chan), "DMA channel not idle ?\n");
+ return -EIO;
+ }
+
+ if (!list_empty(&atchan->free_list)) {
+ dev_dbg(chan2dev(chan), "can't allocate channel resources (channel not freed from a previous use)\n");
+ return -EIO;
+ }
+
+ cfg = ATC_DEFAULT_CFG;
+
+ atslave = chan->private;
+ if (atslave) {
+ /*
+ * We need controller-specific data to set up slave
+ * transfers.
+ */
+ BUG_ON(!atslave->dma_dev || atslave->dma_dev != atdma->dma_common.dev);
+
+ /* if cfg configuration specified take it instead of default */
+ if (atslave->cfg)
+ cfg = atslave->cfg;
+ }
+
+ /* Allocate initial pool of descriptors */
+ for (i = 0; i < init_nr_desc_per_channel; i++) {
+ desc = atc_alloc_descriptor(chan, GFP_KERNEL);
+ if (!desc) {
+ dev_err(atdma->dma_common.dev,
+ "Only %d initial descriptors\n", i);
+ break;
+ }
+ list_add_tail(&desc->desc_node, &atchan->free_list);
+ }
+
+ dma_cookie_init(chan);
+
+ /* channel parameters */
+ channel_writel(atchan, CFG, cfg);
+
+ dev_dbg(chan2dev(chan),
+ "alloc_chan_resources: allocated %d descriptors\n", i);
+
+ return i;
+}
+
+/**
+ * atc_free_chan_resources - free all channel resources
+ * @chan: DMA channel
+ */
+static void atc_free_chan_resources(struct dma_chan *chan)
+{
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ struct at_dma *atdma = to_at_dma(chan->device);
+ struct at_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ /* ASSERT: channel is idle */
+ BUG_ON(!list_empty(&atchan->active_list));
+ BUG_ON(!list_empty(&atchan->queue));
+ BUG_ON(atc_chan_is_enabled(atchan));
+
+ list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) {
+ dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc);
+ list_del(&desc->desc_node);
+ /* free link descriptor */
+ dma_pool_free(atdma->dma_desc_pool, desc, desc->txd.phys);
+ }
+ list_splice_init(&atchan->free_list, &list);
+ atchan->status = 0;
+
+ /*
+ * Free atslave allocated in at_dma_xlate()
+ */
+ kfree(chan->private);
+ chan->private = NULL;
+
+ dev_vdbg(chan2dev(chan), "free_chan_resources: done\n");
+}
+
+#ifdef CONFIG_OF
+static bool at_dma_filter(struct dma_chan *chan, void *slave)
+{
+ struct at_dma_slave *atslave = slave;
+
+ if (atslave->dma_dev == chan->device->dev) {
+ chan->private = atslave;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *of_dma)
+{
+ struct dma_chan *chan;
+ struct at_dma_chan *atchan;
+ struct at_dma_slave *atslave;
+ dma_cap_mask_t mask;
+ unsigned int per_id;
+ struct platform_device *dmac_pdev;
+
+ if (dma_spec->args_count != 2)
+ return NULL;
+
+ dmac_pdev = of_find_device_by_node(dma_spec->np);
+ if (!dmac_pdev)
+ return NULL;
+
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_SLAVE, mask);
+
+ atslave = kmalloc(sizeof(*atslave), GFP_KERNEL);
+ if (!atslave) {
+ put_device(&dmac_pdev->dev);
+ return NULL;
+ }
+
+ atslave->cfg = ATC_DST_H2SEL_HW | ATC_SRC_H2SEL_HW;
+ /*
+ * We can fill both SRC_PER and DST_PER, one of these fields will be
+ * ignored depending on DMA transfer direction.
+ */
+ per_id = dma_spec->args[1] & AT91_DMA_CFG_PER_ID_MASK;
+ atslave->cfg |= ATC_DST_PER_MSB(per_id) | ATC_DST_PER(per_id)
+ | ATC_SRC_PER_MSB(per_id) | ATC_SRC_PER(per_id);
+ /*
+ * We have to translate the value we get from the device tree since
+ * the half FIFO configuration value had to be 0 to keep backward
+ * compatibility.
+ */
+ switch (dma_spec->args[1] & AT91_DMA_CFG_FIFOCFG_MASK) {
+ case AT91_DMA_CFG_FIFOCFG_ALAP:
+ atslave->cfg |= ATC_FIFOCFG_LARGESTBURST;
+ break;
+ case AT91_DMA_CFG_FIFOCFG_ASAP:
+ atslave->cfg |= ATC_FIFOCFG_ENOUGHSPACE;
+ break;
+ case AT91_DMA_CFG_FIFOCFG_HALF:
+ default:
+ atslave->cfg |= ATC_FIFOCFG_HALFFIFO;
+ }
+ atslave->dma_dev = &dmac_pdev->dev;
+
+ chan = dma_request_channel(mask, at_dma_filter, atslave);
+ if (!chan) {
+ put_device(&dmac_pdev->dev);
+ kfree(atslave);
+ return NULL;
+ }
+
+ atchan = to_at_dma_chan(chan);
+ atchan->per_if = dma_spec->args[0] & 0xff;
+ atchan->mem_if = (dma_spec->args[0] >> 16) & 0xff;
+
+ return chan;
+}
+#else
+static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *of_dma)
+{
+ return NULL;
+}
+#endif
+
+/*-- Module Management -----------------------------------------------*/
+
+/* cap_mask is a multi-u32 bitfield, fill it with proper C code. */
+static struct at_dma_platform_data at91sam9rl_config = {
+ .nr_channels = 2,
+};
+static struct at_dma_platform_data at91sam9g45_config = {
+ .nr_channels = 8,
+};
+
+#if defined(CONFIG_OF)
+static const struct of_device_id atmel_dma_dt_ids[] = {
+ {
+ .compatible = "atmel,at91sam9rl-dma",
+ .data = &at91sam9rl_config,
+ }, {
+ .compatible = "atmel,at91sam9g45-dma",
+ .data = &at91sam9g45_config,
+ }, {
+ /* sentinel */
+ }
+};
+
+MODULE_DEVICE_TABLE(of, atmel_dma_dt_ids);
+#endif
+
+static const struct platform_device_id atdma_devtypes[] = {
+ {
+ .name = "at91sam9rl_dma",
+ .driver_data = (unsigned long) &at91sam9rl_config,
+ }, {
+ .name = "at91sam9g45_dma",
+ .driver_data = (unsigned long) &at91sam9g45_config,
+ }, {
+ /* sentinel */
+ }
+};
+
+static inline const struct at_dma_platform_data * __init at_dma_get_driver_data(
+ struct platform_device *pdev)
+{
+ if (pdev->dev.of_node) {
+ const struct of_device_id *match;
+ match = of_match_node(atmel_dma_dt_ids, pdev->dev.of_node);
+ if (match == NULL)
+ return NULL;
+ return match->data;
+ }
+ return (struct at_dma_platform_data *)
+ platform_get_device_id(pdev)->driver_data;
+}
+
+/**
+ * at_dma_off - disable DMA controller
+ * @atdma: the Atmel HDAMC device
+ */
+static void at_dma_off(struct at_dma *atdma)
+{
+ dma_writel(atdma, EN, 0);
+
+ /* disable all interrupts */
+ dma_writel(atdma, EBCIDR, -1L);
+
+ /* confirm that all channels are disabled */
+ while (dma_readl(atdma, CHSR) & atdma->all_chan_mask)
+ cpu_relax();
+}
+
+static int __init at_dma_probe(struct platform_device *pdev)
+{
+ struct resource *io;
+ struct at_dma *atdma;
+ size_t size;
+ int irq;
+ int err;
+ int i;
+ const struct at_dma_platform_data *plat_dat;
+
+ /* setup platform data for each SoC */
+ dma_cap_set(DMA_MEMCPY, at91sam9rl_config.cap_mask);
+ dma_cap_set(DMA_INTERLEAVE, at91sam9g45_config.cap_mask);
+ dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask);
+ dma_cap_set(DMA_MEMSET, at91sam9g45_config.cap_mask);
+ dma_cap_set(DMA_MEMSET_SG, at91sam9g45_config.cap_mask);
+ dma_cap_set(DMA_PRIVATE, at91sam9g45_config.cap_mask);
+ dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask);
+
+ /* get DMA parameters from controller type */
+ plat_dat = at_dma_get_driver_data(pdev);
+ if (!plat_dat)
+ return -ENODEV;
+
+ io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!io)
+ return -EINVAL;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ size = sizeof(struct at_dma);
+ size += plat_dat->nr_channels * sizeof(struct at_dma_chan);
+ atdma = kzalloc(size, GFP_KERNEL);
+ if (!atdma)
+ return -ENOMEM;
+
+ /* discover transaction capabilities */
+ atdma->dma_common.cap_mask = plat_dat->cap_mask;
+ atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1;
+
+ size = resource_size(io);
+ if (!request_mem_region(io->start, size, pdev->dev.driver->name)) {
+ err = -EBUSY;
+ goto err_kfree;
+ }
+
+ atdma->regs = ioremap(io->start, size);
+ if (!atdma->regs) {
+ err = -ENOMEM;
+ goto err_release_r;
+ }
+
+ atdma->clk = clk_get(&pdev->dev, "dma_clk");
+ if (IS_ERR(atdma->clk)) {
+ err = PTR_ERR(atdma->clk);
+ goto err_clk;
+ }
+ err = clk_prepare_enable(atdma->clk);
+ if (err)
+ goto err_clk_prepare;
+
+ /* force dma off, just in case */
+ at_dma_off(atdma);
+
+ err = request_irq(irq, at_dma_interrupt, 0, "at_hdmac", atdma);
+ if (err)
+ goto err_irq;
+
+ platform_set_drvdata(pdev, atdma);
+
+ /* create a pool of consistent memory blocks for hardware descriptors */
+ atdma->dma_desc_pool = dma_pool_create("at_hdmac_desc_pool",
+ &pdev->dev, sizeof(struct at_desc),
+ 4 /* word alignment */, 0);
+ if (!atdma->dma_desc_pool) {
+ dev_err(&pdev->dev, "No memory for descriptors dma pool\n");
+ err = -ENOMEM;
+ goto err_desc_pool_create;
+ }
+
+ /* create a pool of consistent memory blocks for memset blocks */
+ atdma->memset_pool = dma_pool_create("at_hdmac_memset_pool",
+ &pdev->dev, sizeof(int), 4, 0);
+ if (!atdma->memset_pool) {
+ dev_err(&pdev->dev, "No memory for memset dma pool\n");
+ err = -ENOMEM;
+ goto err_memset_pool_create;
+ }
+
+ /* clear any pending interrupt */
+ while (dma_readl(atdma, EBCISR))
+ cpu_relax();
+
+ /* initialize channels related values */
+ INIT_LIST_HEAD(&atdma->dma_common.channels);
+ for (i = 0; i < plat_dat->nr_channels; i++) {
+ struct at_dma_chan *atchan = &atdma->chan[i];
+
+ atchan->mem_if = AT_DMA_MEM_IF;
+ atchan->per_if = AT_DMA_PER_IF;
+ atchan->chan_common.device = &atdma->dma_common;
+ dma_cookie_init(&atchan->chan_common);
+ list_add_tail(&atchan->chan_common.device_node,
+ &atdma->dma_common.channels);
+
+ atchan->ch_regs = atdma->regs + ch_regs(i);
+ spin_lock_init(&atchan->lock);
+ atchan->mask = 1 << i;
+
+ INIT_LIST_HEAD(&atchan->active_list);
+ INIT_LIST_HEAD(&atchan->queue);
+ INIT_LIST_HEAD(&atchan->free_list);
+
+ tasklet_setup(&atchan->tasklet, atc_tasklet);
+ atc_enable_chan_irq(atdma, i);
+ }
+
+ /* set base routines */
+ atdma->dma_common.device_alloc_chan_resources = atc_alloc_chan_resources;
+ atdma->dma_common.device_free_chan_resources = atc_free_chan_resources;
+ atdma->dma_common.device_tx_status = atc_tx_status;
+ atdma->dma_common.device_issue_pending = atc_issue_pending;
+ atdma->dma_common.dev = &pdev->dev;
+
+ /* set prep routines based on capability */
+ if (dma_has_cap(DMA_INTERLEAVE, atdma->dma_common.cap_mask))
+ atdma->dma_common.device_prep_interleaved_dma = atc_prep_dma_interleaved;
+
+ if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask))
+ atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy;
+
+ if (dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask)) {
+ atdma->dma_common.device_prep_dma_memset = atc_prep_dma_memset;
+ atdma->dma_common.device_prep_dma_memset_sg = atc_prep_dma_memset_sg;
+ atdma->dma_common.fill_align = DMAENGINE_ALIGN_4_BYTES;
+ }
+
+ if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) {
+ atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg;
+ /* controller can do slave DMA: can trigger cyclic transfers */
+ dma_cap_set(DMA_CYCLIC, atdma->dma_common.cap_mask);
+ atdma->dma_common.device_prep_dma_cyclic = atc_prep_dma_cyclic;
+ atdma->dma_common.device_config = atc_config;
+ atdma->dma_common.device_pause = atc_pause;
+ atdma->dma_common.device_resume = atc_resume;
+ atdma->dma_common.device_terminate_all = atc_terminate_all;
+ atdma->dma_common.src_addr_widths = ATC_DMA_BUSWIDTHS;
+ atdma->dma_common.dst_addr_widths = ATC_DMA_BUSWIDTHS;
+ atdma->dma_common.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ atdma->dma_common.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ }
+
+ dma_writel(atdma, EN, AT_DMA_ENABLE);
+
+ dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s), %d channels\n",
+ dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "",
+ dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask) ? "set " : "",
+ dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "",
+ plat_dat->nr_channels);
+
+ err = dma_async_device_register(&atdma->dma_common);
+ if (err) {
+ dev_err(&pdev->dev, "Unable to register: %d.\n", err);
+ goto err_dma_async_device_register;
+ }
+
+ /*
+ * Do not return an error if the dmac node is not present in order to
+ * not break the existing way of requesting channel with
+ * dma_request_channel().
+ */
+ if (pdev->dev.of_node) {
+ err = of_dma_controller_register(pdev->dev.of_node,
+ at_dma_xlate, atdma);
+ if (err) {
+ dev_err(&pdev->dev, "could not register of_dma_controller\n");
+ goto err_of_dma_controller_register;
+ }
+ }
+
+ return 0;
+
+err_of_dma_controller_register:
+ dma_async_device_unregister(&atdma->dma_common);
+err_dma_async_device_register:
+ dma_pool_destroy(atdma->memset_pool);
+err_memset_pool_create:
+ dma_pool_destroy(atdma->dma_desc_pool);
+err_desc_pool_create:
+ free_irq(platform_get_irq(pdev, 0), atdma);
+err_irq:
+ clk_disable_unprepare(atdma->clk);
+err_clk_prepare:
+ clk_put(atdma->clk);
+err_clk:
+ iounmap(atdma->regs);
+ atdma->regs = NULL;
+err_release_r:
+ release_mem_region(io->start, size);
+err_kfree:
+ kfree(atdma);
+ return err;
+}
+
+static int at_dma_remove(struct platform_device *pdev)
+{
+ struct at_dma *atdma = platform_get_drvdata(pdev);
+ struct dma_chan *chan, *_chan;
+ struct resource *io;
+
+ at_dma_off(atdma);
+ if (pdev->dev.of_node)
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&atdma->dma_common);
+
+ dma_pool_destroy(atdma->memset_pool);
+ dma_pool_destroy(atdma->dma_desc_pool);
+ free_irq(platform_get_irq(pdev, 0), atdma);
+
+ list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+ device_node) {
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+
+ /* Disable interrupts */
+ atc_disable_chan_irq(atdma, chan->chan_id);
+
+ tasklet_kill(&atchan->tasklet);
+ list_del(&chan->device_node);
+ }
+
+ clk_disable_unprepare(atdma->clk);
+ clk_put(atdma->clk);
+
+ iounmap(atdma->regs);
+ atdma->regs = NULL;
+
+ io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ release_mem_region(io->start, resource_size(io));
+
+ kfree(atdma);
+
+ return 0;
+}
+
+static void at_dma_shutdown(struct platform_device *pdev)
+{
+ struct at_dma *atdma = platform_get_drvdata(pdev);
+
+ at_dma_off(platform_get_drvdata(pdev));
+ clk_disable_unprepare(atdma->clk);
+}
+
+static int at_dma_prepare(struct device *dev)
+{
+ struct at_dma *atdma = dev_get_drvdata(dev);
+ struct dma_chan *chan, *_chan;
+
+ list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+ device_node) {
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+ /* wait for transaction completion (except in cyclic case) */
+ if (atc_chan_is_enabled(atchan) && !atc_chan_is_cyclic(atchan))
+ return -EAGAIN;
+ }
+ return 0;
+}
+
+static void atc_suspend_cyclic(struct at_dma_chan *atchan)
+{
+ struct dma_chan *chan = &atchan->chan_common;
+
+ /* Channel should be paused by user
+ * do it anyway even if it is not done already */
+ if (!atc_chan_is_paused(atchan)) {
+ dev_warn(chan2dev(chan),
+ "cyclic channel not paused, should be done by channel user\n");
+ atc_pause(chan);
+ }
+
+ /* now preserve additional data for cyclic operations */
+ /* next descriptor address in the cyclic list */
+ atchan->save_dscr = channel_readl(atchan, DSCR);
+
+ vdbg_dump_regs(atchan);
+}
+
+static int at_dma_suspend_noirq(struct device *dev)
+{
+ struct at_dma *atdma = dev_get_drvdata(dev);
+ struct dma_chan *chan, *_chan;
+
+ /* preserve data */
+ list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+ device_node) {
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+
+ if (atc_chan_is_cyclic(atchan))
+ atc_suspend_cyclic(atchan);
+ atchan->save_cfg = channel_readl(atchan, CFG);
+ }
+ atdma->save_imr = dma_readl(atdma, EBCIMR);
+
+ /* disable DMA controller */
+ at_dma_off(atdma);
+ clk_disable_unprepare(atdma->clk);
+ return 0;
+}
+
+static void atc_resume_cyclic(struct at_dma_chan *atchan)
+{
+ struct at_dma *atdma = to_at_dma(atchan->chan_common.device);
+
+ /* restore channel status for cyclic descriptors list:
+ * next descriptor in the cyclic list at the time of suspend */
+ channel_writel(atchan, SADDR, 0);
+ channel_writel(atchan, DADDR, 0);
+ channel_writel(atchan, CTRLA, 0);
+ channel_writel(atchan, CTRLB, 0);
+ channel_writel(atchan, DSCR, atchan->save_dscr);
+ dma_writel(atdma, CHER, atchan->mask);
+
+ /* channel pause status should be removed by channel user
+ * We cannot take the initiative to do it here */
+
+ vdbg_dump_regs(atchan);
+}
+
+static int at_dma_resume_noirq(struct device *dev)
+{
+ struct at_dma *atdma = dev_get_drvdata(dev);
+ struct dma_chan *chan, *_chan;
+
+ /* bring back DMA controller */
+ clk_prepare_enable(atdma->clk);
+ dma_writel(atdma, EN, AT_DMA_ENABLE);
+
+ /* clear any pending interrupt */
+ while (dma_readl(atdma, EBCISR))
+ cpu_relax();
+
+ /* restore saved data */
+ dma_writel(atdma, EBCIER, atdma->save_imr);
+ list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+ device_node) {
+ struct at_dma_chan *atchan = to_at_dma_chan(chan);
+
+ channel_writel(atchan, CFG, atchan->save_cfg);
+ if (atc_chan_is_cyclic(atchan))
+ atc_resume_cyclic(atchan);
+ }
+ return 0;
+}
+
+static const struct dev_pm_ops at_dma_dev_pm_ops = {
+ .prepare = at_dma_prepare,
+ .suspend_noirq = at_dma_suspend_noirq,
+ .resume_noirq = at_dma_resume_noirq,
+};
+
+static struct platform_driver at_dma_driver = {
+ .remove = at_dma_remove,
+ .shutdown = at_dma_shutdown,
+ .id_table = atdma_devtypes,
+ .driver = {
+ .name = "at_hdmac",
+ .pm = &at_dma_dev_pm_ops,
+ .of_match_table = of_match_ptr(atmel_dma_dt_ids),
+ },
+};
+
+static int __init at_dma_init(void)
+{
+ return platform_driver_probe(&at_dma_driver, at_dma_probe);
+}
+subsys_initcall(at_dma_init);
+
+static void __exit at_dma_exit(void)
+{
+ platform_driver_unregister(&at_dma_driver);
+}
+module_exit(at_dma_exit);
+
+MODULE_DESCRIPTION("Atmel AHB DMA Controller driver");
+MODULE_AUTHOR("Nicolas Ferre <nicolas.ferre@atmel.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:at_hdmac");
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
new file mode 100644
index 000000000..d4d382d74
--- /dev/null
+++ b/drivers/dma/at_hdmac_regs.h
@@ -0,0 +1,478 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Header file for the Atmel AHB DMA Controller driver
+ *
+ * Copyright (C) 2008 Atmel Corporation
+ */
+#ifndef AT_HDMAC_REGS_H
+#define AT_HDMAC_REGS_H
+
+#define AT_DMA_MAX_NR_CHANNELS 8
+
+
+#define AT_DMA_GCFG 0x00 /* Global Configuration Register */
+#define AT_DMA_IF_BIGEND(i) (0x1 << (i)) /* AHB-Lite Interface i in Big-endian mode */
+#define AT_DMA_ARB_CFG (0x1 << 4) /* Arbiter mode. */
+#define AT_DMA_ARB_CFG_FIXED (0x0 << 4)
+#define AT_DMA_ARB_CFG_ROUND_ROBIN (0x1 << 4)
+
+#define AT_DMA_EN 0x04 /* Controller Enable Register */
+#define AT_DMA_ENABLE (0x1 << 0)
+
+#define AT_DMA_SREQ 0x08 /* Software Single Request Register */
+#define AT_DMA_SSREQ(x) (0x1 << ((x) << 1)) /* Request a source single transfer on channel x */
+#define AT_DMA_DSREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination single transfer on channel x */
+
+#define AT_DMA_CREQ 0x0C /* Software Chunk Transfer Request Register */
+#define AT_DMA_SCREQ(x) (0x1 << ((x) << 1)) /* Request a source chunk transfer on channel x */
+#define AT_DMA_DCREQ(x) (0x1 << (1 + ((x) << 1))) /* Request a destination chunk transfer on channel x */
+
+#define AT_DMA_LAST 0x10 /* Software Last Transfer Flag Register */
+#define AT_DMA_SLAST(x) (0x1 << ((x) << 1)) /* This src rq is last tx of buffer on channel x */
+#define AT_DMA_DLAST(x) (0x1 << (1 + ((x) << 1))) /* This dst rq is last tx of buffer on channel x */
+
+#define AT_DMA_SYNC 0x14 /* Request Synchronization Register */
+#define AT_DMA_SYR(h) (0x1 << (h)) /* Synchronize handshake line h */
+
+/* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */
+#define AT_DMA_EBCIER 0x18 /* Enable register */
+#define AT_DMA_EBCIDR 0x1C /* Disable register */
+#define AT_DMA_EBCIMR 0x20 /* Mask Register */
+#define AT_DMA_EBCISR 0x24 /* Status Register */
+#define AT_DMA_CBTC_OFFSET 8
+#define AT_DMA_ERR_OFFSET 16
+#define AT_DMA_BTC(x) (0x1 << (x))
+#define AT_DMA_CBTC(x) (0x1 << (AT_DMA_CBTC_OFFSET + (x)))
+#define AT_DMA_ERR(x) (0x1 << (AT_DMA_ERR_OFFSET + (x)))
+
+#define AT_DMA_CHER 0x28 /* Channel Handler Enable Register */
+#define AT_DMA_ENA(x) (0x1 << (x))
+#define AT_DMA_SUSP(x) (0x1 << ( 8 + (x)))
+#define AT_DMA_KEEP(x) (0x1 << (24 + (x)))
+
+#define AT_DMA_CHDR 0x2C /* Channel Handler Disable Register */
+#define AT_DMA_DIS(x) (0x1 << (x))
+#define AT_DMA_RES(x) (0x1 << ( 8 + (x)))
+
+#define AT_DMA_CHSR 0x30 /* Channel Handler Status Register */
+#define AT_DMA_EMPT(x) (0x1 << (16 + (x)))
+#define AT_DMA_STAL(x) (0x1 << (24 + (x)))
+
+
+#define AT_DMA_CH_REGS_BASE 0x3C /* Channel registers base address */
+#define ch_regs(x) (AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */
+
+/* Hardware register offset for each channel */
+#define ATC_SADDR_OFFSET 0x00 /* Source Address Register */
+#define ATC_DADDR_OFFSET 0x04 /* Destination Address Register */
+#define ATC_DSCR_OFFSET 0x08 /* Descriptor Address Register */
+#define ATC_CTRLA_OFFSET 0x0C /* Control A Register */
+#define ATC_CTRLB_OFFSET 0x10 /* Control B Register */
+#define ATC_CFG_OFFSET 0x14 /* Configuration Register */
+#define ATC_SPIP_OFFSET 0x18 /* Src PIP Configuration Register */
+#define ATC_DPIP_OFFSET 0x1C /* Dst PIP Configuration Register */
+
+
+/* Bitfield definitions */
+
+/* Bitfields in DSCR */
+#define ATC_DSCR_IF(i) (0x3 & (i)) /* Dsc feched via AHB-Lite Interface i */
+
+/* Bitfields in CTRLA */
+#define ATC_BTSIZE_MAX 0xFFFFUL /* Maximum Buffer Transfer Size */
+#define ATC_BTSIZE(x) (ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */
+#define ATC_SCSIZE_MASK (0x7 << 16) /* Source Chunk Transfer Size */
+#define ATC_SCSIZE(x) (ATC_SCSIZE_MASK & ((x) << 16))
+#define ATC_SCSIZE_1 (0x0 << 16)
+#define ATC_SCSIZE_4 (0x1 << 16)
+#define ATC_SCSIZE_8 (0x2 << 16)
+#define ATC_SCSIZE_16 (0x3 << 16)
+#define ATC_SCSIZE_32 (0x4 << 16)
+#define ATC_SCSIZE_64 (0x5 << 16)
+#define ATC_SCSIZE_128 (0x6 << 16)
+#define ATC_SCSIZE_256 (0x7 << 16)
+#define ATC_DCSIZE_MASK (0x7 << 20) /* Destination Chunk Transfer Size */
+#define ATC_DCSIZE(x) (ATC_DCSIZE_MASK & ((x) << 20))
+#define ATC_DCSIZE_1 (0x0 << 20)
+#define ATC_DCSIZE_4 (0x1 << 20)
+#define ATC_DCSIZE_8 (0x2 << 20)
+#define ATC_DCSIZE_16 (0x3 << 20)
+#define ATC_DCSIZE_32 (0x4 << 20)
+#define ATC_DCSIZE_64 (0x5 << 20)
+#define ATC_DCSIZE_128 (0x6 << 20)
+#define ATC_DCSIZE_256 (0x7 << 20)
+#define ATC_SRC_WIDTH_MASK (0x3 << 24) /* Source Single Transfer Size */
+#define ATC_SRC_WIDTH(x) ((x) << 24)
+#define ATC_SRC_WIDTH_BYTE (0x0 << 24)
+#define ATC_SRC_WIDTH_HALFWORD (0x1 << 24)
+#define ATC_SRC_WIDTH_WORD (0x2 << 24)
+#define ATC_REG_TO_SRC_WIDTH(r) (((r) >> 24) & 0x3)
+#define ATC_DST_WIDTH_MASK (0x3 << 28) /* Destination Single Transfer Size */
+#define ATC_DST_WIDTH(x) ((x) << 28)
+#define ATC_DST_WIDTH_BYTE (0x0 << 28)
+#define ATC_DST_WIDTH_HALFWORD (0x1 << 28)
+#define ATC_DST_WIDTH_WORD (0x2 << 28)
+#define ATC_DONE (0x1 << 31) /* Tx Done (only written back in descriptor) */
+
+/* Bitfields in CTRLB */
+#define ATC_SIF(i) (0x3 & (i)) /* Src tx done via AHB-Lite Interface i */
+#define ATC_DIF(i) ((0x3 & (i)) << 4) /* Dst tx done via AHB-Lite Interface i */
+ /* Specify AHB interfaces */
+#define AT_DMA_MEM_IF 0 /* interface 0 as memory interface */
+#define AT_DMA_PER_IF 1 /* interface 1 as peripheral interface */
+
+#define ATC_SRC_PIP (0x1 << 8) /* Source Picture-in-Picture enabled */
+#define ATC_DST_PIP (0x1 << 12) /* Destination Picture-in-Picture enabled */
+#define ATC_SRC_DSCR_DIS (0x1 << 16) /* Src Descriptor fetch disable */
+#define ATC_DST_DSCR_DIS (0x1 << 20) /* Dst Descriptor fetch disable */
+#define ATC_FC_MASK (0x7 << 21) /* Choose Flow Controller */
+#define ATC_FC_MEM2MEM (0x0 << 21) /* Mem-to-Mem (DMA) */
+#define ATC_FC_MEM2PER (0x1 << 21) /* Mem-to-Periph (DMA) */
+#define ATC_FC_PER2MEM (0x2 << 21) /* Periph-to-Mem (DMA) */
+#define ATC_FC_PER2PER (0x3 << 21) /* Periph-to-Periph (DMA) */
+#define ATC_FC_PER2MEM_PER (0x4 << 21) /* Periph-to-Mem (Peripheral) */
+#define ATC_FC_MEM2PER_PER (0x5 << 21) /* Mem-to-Periph (Peripheral) */
+#define ATC_FC_PER2PER_SRCPER (0x6 << 21) /* Periph-to-Periph (Src Peripheral) */
+#define ATC_FC_PER2PER_DSTPER (0x7 << 21) /* Periph-to-Periph (Dst Peripheral) */
+#define ATC_SRC_ADDR_MODE_MASK (0x3 << 24)
+#define ATC_SRC_ADDR_MODE_INCR (0x0 << 24) /* Incrementing Mode */
+#define ATC_SRC_ADDR_MODE_DECR (0x1 << 24) /* Decrementing Mode */
+#define ATC_SRC_ADDR_MODE_FIXED (0x2 << 24) /* Fixed Mode */
+#define ATC_DST_ADDR_MODE_MASK (0x3 << 28)
+#define ATC_DST_ADDR_MODE_INCR (0x0 << 28) /* Incrementing Mode */
+#define ATC_DST_ADDR_MODE_DECR (0x1 << 28) /* Decrementing Mode */
+#define ATC_DST_ADDR_MODE_FIXED (0x2 << 28) /* Fixed Mode */
+#define ATC_IEN (0x1 << 30) /* BTC interrupt enable (active low) */
+#define ATC_AUTO (0x1 << 31) /* Auto multiple buffer tx enable */
+
+/* Bitfields in CFG */
+#define ATC_PER_MSB(h) ((0x30U & (h)) >> 4) /* Extract most significant bits of a handshaking identifier */
+
+#define ATC_SRC_PER(h) (0xFU & (h)) /* Channel src rq associated with periph handshaking ifc h */
+#define ATC_DST_PER(h) ((0xFU & (h)) << 4) /* Channel dst rq associated with periph handshaking ifc h */
+#define ATC_SRC_REP (0x1 << 8) /* Source Replay Mod */
+#define ATC_SRC_H2SEL (0x1 << 9) /* Source Handshaking Mod */
+#define ATC_SRC_H2SEL_SW (0x0 << 9)
+#define ATC_SRC_H2SEL_HW (0x1 << 9)
+#define ATC_SRC_PER_MSB(h) (ATC_PER_MSB(h) << 10) /* Channel src rq (most significant bits) */
+#define ATC_DST_REP (0x1 << 12) /* Destination Replay Mod */
+#define ATC_DST_H2SEL (0x1 << 13) /* Destination Handshaking Mod */
+#define ATC_DST_H2SEL_SW (0x0 << 13)
+#define ATC_DST_H2SEL_HW (0x1 << 13)
+#define ATC_DST_PER_MSB(h) (ATC_PER_MSB(h) << 14) /* Channel dst rq (most significant bits) */
+#define ATC_SOD (0x1 << 16) /* Stop On Done */
+#define ATC_LOCK_IF (0x1 << 20) /* Interface Lock */
+#define ATC_LOCK_B (0x1 << 21) /* AHB Bus Lock */
+#define ATC_LOCK_IF_L (0x1 << 22) /* Master Interface Arbiter Lock */
+#define ATC_LOCK_IF_L_CHUNK (0x0 << 22)
+#define ATC_LOCK_IF_L_BUFFER (0x1 << 22)
+#define ATC_AHB_PROT_MASK (0x7 << 24) /* AHB Protection */
+#define ATC_FIFOCFG_MASK (0x3 << 28) /* FIFO Request Configuration */
+#define ATC_FIFOCFG_LARGESTBURST (0x0 << 28)
+#define ATC_FIFOCFG_HALFFIFO (0x1 << 28)
+#define ATC_FIFOCFG_ENOUGHSPACE (0x2 << 28)
+
+/* Bitfields in SPIP */
+#define ATC_SPIP_HOLE(x) (0xFFFFU & (x))
+#define ATC_SPIP_BOUNDARY(x) ((0x3FF & (x)) << 16)
+
+/* Bitfields in DPIP */
+#define ATC_DPIP_HOLE(x) (0xFFFFU & (x))
+#define ATC_DPIP_BOUNDARY(x) ((0x3FF & (x)) << 16)
+
+
+/*-- descriptors -----------------------------------------------------*/
+
+/* LLI == Linked List Item; aka DMA buffer descriptor */
+struct at_lli {
+ /* values that are not changed by hardware */
+ u32 saddr;
+ u32 daddr;
+ /* value that may get written back: */
+ u32 ctrla;
+ /* more values that are not changed by hardware */
+ u32 ctrlb;
+ u32 dscr; /* chain to next lli */
+};
+
+/**
+ * struct at_desc - software descriptor
+ * @at_lli: hardware lli structure
+ * @txd: support for the async_tx api
+ * @desc_node: node on the channed descriptors list
+ * @len: descriptor byte count
+ * @total_len: total transaction byte count
+ */
+struct at_desc {
+ /* FIRST values the hardware uses */
+ struct at_lli lli;
+
+ /* THEN values for driver housekeeping */
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor txd;
+ struct list_head desc_node;
+ size_t len;
+ size_t total_len;
+
+ /* Interleaved data */
+ size_t boundary;
+ size_t dst_hole;
+ size_t src_hole;
+
+ /* Memset temporary buffer */
+ bool memset_buffer;
+ dma_addr_t memset_paddr;
+ int *memset_vaddr;
+};
+
+static inline struct at_desc *
+txd_to_at_desc(struct dma_async_tx_descriptor *txd)
+{
+ return container_of(txd, struct at_desc, txd);
+}
+
+
+/*-- Channels --------------------------------------------------------*/
+
+/**
+ * atc_status - information bits stored in channel status flag
+ *
+ * Manipulated with atomic operations.
+ */
+enum atc_status {
+ ATC_IS_ERROR = 0,
+ ATC_IS_PAUSED = 1,
+ ATC_IS_CYCLIC = 24,
+};
+
+/**
+ * struct at_dma_chan - internal representation of an Atmel HDMAC channel
+ * @chan_common: common dmaengine channel object members
+ * @device: parent device
+ * @ch_regs: memory mapped register base
+ * @mask: channel index in a mask
+ * @per_if: peripheral interface
+ * @mem_if: memory interface
+ * @status: transmit status information from irq/prep* functions
+ * to tasklet (use atomic operations)
+ * @tasklet: bottom half to finish transaction work
+ * @save_cfg: configuration register that is saved on suspend/resume cycle
+ * @save_dscr: for cyclic operations, preserve next descriptor address in
+ * the cyclic list on suspend/resume cycle
+ * @dma_sconfig: configuration for slave transfers, passed via
+ * .device_config
+ * @lock: serializes enqueue/dequeue operations to descriptors lists
+ * @active_list: list of descriptors dmaengine is being running on
+ * @queue: list of descriptors ready to be submitted to engine
+ * @free_list: list of descriptors usable by the channel
+ */
+struct at_dma_chan {
+ struct dma_chan chan_common;
+ struct at_dma *device;
+ void __iomem *ch_regs;
+ u8 mask;
+ u8 per_if;
+ u8 mem_if;
+ unsigned long status;
+ struct tasklet_struct tasklet;
+ u32 save_cfg;
+ u32 save_dscr;
+ struct dma_slave_config dma_sconfig;
+
+ spinlock_t lock;
+
+ /* these other elements are all protected by lock */
+ struct list_head active_list;
+ struct list_head queue;
+ struct list_head free_list;
+};
+
+#define channel_readl(atchan, name) \
+ __raw_readl((atchan)->ch_regs + ATC_##name##_OFFSET)
+
+#define channel_writel(atchan, name, val) \
+ __raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET)
+
+static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan)
+{
+ return container_of(dchan, struct at_dma_chan, chan_common);
+}
+
+/*
+ * Fix sconfig's burst size according to at_hdmac. We need to convert them as:
+ * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3, 32 -> 4, 64 -> 5, 128 -> 6, 256 -> 7.
+ *
+ * This can be done by finding most significant bit set.
+ */
+static inline void convert_burst(u32 *maxburst)
+{
+ if (*maxburst > 1)
+ *maxburst = fls(*maxburst) - 2;
+ else
+ *maxburst = 0;
+}
+
+/*
+ * Fix sconfig's bus width according to at_hdmac.
+ * 1 byte -> 0, 2 bytes -> 1, 4 bytes -> 2.
+ */
+static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width)
+{
+ switch (addr_width) {
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ return 1;
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ return 2;
+ default:
+ /* For 1 byte width or fallback */
+ return 0;
+ }
+}
+
+/*-- Controller ------------------------------------------------------*/
+
+/**
+ * struct at_dma - internal representation of an Atmel HDMA Controller
+ * @chan_common: common dmaengine dma_device object members
+ * @atdma_devtype: identifier of DMA controller compatibility
+ * @ch_regs: memory mapped register base
+ * @clk: dma controller clock
+ * @save_imr: interrupt mask register that is saved on suspend/resume cycle
+ * @all_chan_mask: all channels availlable in a mask
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @chan: channels table to store at_dma_chan structures
+ */
+struct at_dma {
+ struct dma_device dma_common;
+ void __iomem *regs;
+ struct clk *clk;
+ u32 save_imr;
+
+ u8 all_chan_mask;
+
+ struct dma_pool *dma_desc_pool;
+ struct dma_pool *memset_pool;
+ /* AT THE END channels table */
+ struct at_dma_chan chan[];
+};
+
+#define dma_readl(atdma, name) \
+ __raw_readl((atdma)->regs + AT_DMA_##name)
+#define dma_writel(atdma, name, val) \
+ __raw_writel((val), (atdma)->regs + AT_DMA_##name)
+
+static inline struct at_dma *to_at_dma(struct dma_device *ddev)
+{
+ return container_of(ddev, struct at_dma, dma_common);
+}
+
+
+/*-- Helper functions ------------------------------------------------*/
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+ return &chan->dev->device;
+}
+
+#if defined(VERBOSE_DEBUG)
+static void vdbg_dump_regs(struct at_dma_chan *atchan)
+{
+ struct at_dma *atdma = to_at_dma(atchan->chan_common.device);
+
+ dev_err(chan2dev(&atchan->chan_common),
+ " channel %d : imr = 0x%x, chsr = 0x%x\n",
+ atchan->chan_common.chan_id,
+ dma_readl(atdma, EBCIMR),
+ dma_readl(atdma, CHSR));
+
+ dev_err(chan2dev(&atchan->chan_common),
+ " channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n",
+ channel_readl(atchan, SADDR),
+ channel_readl(atchan, DADDR),
+ channel_readl(atchan, CTRLA),
+ channel_readl(atchan, CTRLB),
+ channel_readl(atchan, CFG),
+ channel_readl(atchan, DSCR));
+}
+#else
+static void vdbg_dump_regs(struct at_dma_chan *atchan) {}
+#endif
+
+static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli)
+{
+ dev_crit(chan2dev(&atchan->chan_common),
+ "desc: s%pad d%pad ctrl0x%x:0x%x l%pad\n",
+ &lli->saddr, &lli->daddr,
+ lli->ctrla, lli->ctrlb, &lli->dscr);
+}
+
+
+static void atc_setup_irq(struct at_dma *atdma, int chan_id, int on)
+{
+ u32 ebci;
+
+ /* enable interrupts on buffer transfer completion & error */
+ ebci = AT_DMA_BTC(chan_id)
+ | AT_DMA_ERR(chan_id);
+ if (on)
+ dma_writel(atdma, EBCIER, ebci);
+ else
+ dma_writel(atdma, EBCIDR, ebci);
+}
+
+static void atc_enable_chan_irq(struct at_dma *atdma, int chan_id)
+{
+ atc_setup_irq(atdma, chan_id, 1);
+}
+
+static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id)
+{
+ atc_setup_irq(atdma, chan_id, 0);
+}
+
+
+/**
+ * atc_chan_is_enabled - test if given channel is enabled
+ * @atchan: channel we want to test status
+ */
+static inline int atc_chan_is_enabled(struct at_dma_chan *atchan)
+{
+ struct at_dma *atdma = to_at_dma(atchan->chan_common.device);
+
+ return !!(dma_readl(atdma, CHSR) & atchan->mask);
+}
+
+/**
+ * atc_chan_is_paused - test channel pause/resume status
+ * @atchan: channel we want to test status
+ */
+static inline int atc_chan_is_paused(struct at_dma_chan *atchan)
+{
+ return test_bit(ATC_IS_PAUSED, &atchan->status);
+}
+
+/**
+ * atc_chan_is_cyclic - test if given channel has cyclic property set
+ * @atchan: channel we want to test status
+ */
+static inline int atc_chan_is_cyclic(struct at_dma_chan *atchan)
+{
+ return test_bit(ATC_IS_CYCLIC, &atchan->status);
+}
+
+/**
+ * set_desc_eol - set end-of-link to descriptor so it will end transfer
+ * @desc: descriptor, signle or at the end of a chain, to end chain on
+ */
+static void set_desc_eol(struct at_desc *desc)
+{
+ u32 ctrlb = desc->lli.ctrlb;
+
+ ctrlb &= ~ATC_IEN;
+ ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS;
+
+ desc->lli.ctrlb = ctrlb;
+ desc->lli.dscr = 0;
+}
+
+#endif /* AT_HDMAC_REGS_H */
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
new file mode 100644
index 000000000..7919906b0
--- /dev/null
+++ b/drivers/dma/at_xdmac.c
@@ -0,0 +1,2263 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for the Atmel Extensible DMA Controller (aka XDMAC on AT91 systems)
+ *
+ * Copyright (C) 2014 Atmel Corporation
+ *
+ * Author: Ludovic Desroches <ludovic.desroches@atmel.com>
+ */
+
+#include <asm/barrier.h>
+#include <dt-bindings/dma/at91.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+
+#include "dmaengine.h"
+
+/* Global registers */
+#define AT_XDMAC_GTYPE 0x00 /* Global Type Register */
+#define AT_XDMAC_NB_CH(i) (((i) & 0x1F) + 1) /* Number of Channels Minus One */
+#define AT_XDMAC_FIFO_SZ(i) (((i) >> 5) & 0x7FF) /* Number of Bytes */
+#define AT_XDMAC_NB_REQ(i) ((((i) >> 16) & 0x3F) + 1) /* Number of Peripheral Requests Minus One */
+#define AT_XDMAC_GCFG 0x04 /* Global Configuration Register */
+#define AT_XDMAC_WRHP(i) (((i) & 0xF) << 4)
+#define AT_XDMAC_WRMP(i) (((i) & 0xF) << 8)
+#define AT_XDMAC_WRLP(i) (((i) & 0xF) << 12)
+#define AT_XDMAC_RDHP(i) (((i) & 0xF) << 16)
+#define AT_XDMAC_RDMP(i) (((i) & 0xF) << 20)
+#define AT_XDMAC_RDLP(i) (((i) & 0xF) << 24)
+#define AT_XDMAC_RDSG(i) (((i) & 0xF) << 28)
+#define AT_XDMAC_GCFG_M2M (AT_XDMAC_RDLP(0xF) | AT_XDMAC_WRLP(0xF))
+#define AT_XDMAC_GCFG_P2M (AT_XDMAC_RDSG(0x1) | AT_XDMAC_RDHP(0x3) | \
+ AT_XDMAC_WRHP(0x5))
+#define AT_XDMAC_GWAC 0x08 /* Global Weighted Arbiter Configuration Register */
+#define AT_XDMAC_PW0(i) (((i) & 0xF) << 0)
+#define AT_XDMAC_PW1(i) (((i) & 0xF) << 4)
+#define AT_XDMAC_PW2(i) (((i) & 0xF) << 8)
+#define AT_XDMAC_PW3(i) (((i) & 0xF) << 12)
+#define AT_XDMAC_GWAC_M2M 0
+#define AT_XDMAC_GWAC_P2M (AT_XDMAC_PW0(0xF) | AT_XDMAC_PW2(0xF))
+
+#define AT_XDMAC_GIE 0x0C /* Global Interrupt Enable Register */
+#define AT_XDMAC_GID 0x10 /* Global Interrupt Disable Register */
+#define AT_XDMAC_GIM 0x14 /* Global Interrupt Mask Register */
+#define AT_XDMAC_GIS 0x18 /* Global Interrupt Status Register */
+#define AT_XDMAC_GE 0x1C /* Global Channel Enable Register */
+#define AT_XDMAC_GD 0x20 /* Global Channel Disable Register */
+#define AT_XDMAC_GS 0x24 /* Global Channel Status Register */
+#define AT_XDMAC_VERSION 0xFFC /* XDMAC Version Register */
+
+/* Channel relative registers offsets */
+#define AT_XDMAC_CIE 0x00 /* Channel Interrupt Enable Register */
+#define AT_XDMAC_CIE_BIE BIT(0) /* End of Block Interrupt Enable Bit */
+#define AT_XDMAC_CIE_LIE BIT(1) /* End of Linked List Interrupt Enable Bit */
+#define AT_XDMAC_CIE_DIE BIT(2) /* End of Disable Interrupt Enable Bit */
+#define AT_XDMAC_CIE_FIE BIT(3) /* End of Flush Interrupt Enable Bit */
+#define AT_XDMAC_CIE_RBEIE BIT(4) /* Read Bus Error Interrupt Enable Bit */
+#define AT_XDMAC_CIE_WBEIE BIT(5) /* Write Bus Error Interrupt Enable Bit */
+#define AT_XDMAC_CIE_ROIE BIT(6) /* Request Overflow Interrupt Enable Bit */
+#define AT_XDMAC_CID 0x04 /* Channel Interrupt Disable Register */
+#define AT_XDMAC_CID_BID BIT(0) /* End of Block Interrupt Disable Bit */
+#define AT_XDMAC_CID_LID BIT(1) /* End of Linked List Interrupt Disable Bit */
+#define AT_XDMAC_CID_DID BIT(2) /* End of Disable Interrupt Disable Bit */
+#define AT_XDMAC_CID_FID BIT(3) /* End of Flush Interrupt Disable Bit */
+#define AT_XDMAC_CID_RBEID BIT(4) /* Read Bus Error Interrupt Disable Bit */
+#define AT_XDMAC_CID_WBEID BIT(5) /* Write Bus Error Interrupt Disable Bit */
+#define AT_XDMAC_CID_ROID BIT(6) /* Request Overflow Interrupt Disable Bit */
+#define AT_XDMAC_CIM 0x08 /* Channel Interrupt Mask Register */
+#define AT_XDMAC_CIM_BIM BIT(0) /* End of Block Interrupt Mask Bit */
+#define AT_XDMAC_CIM_LIM BIT(1) /* End of Linked List Interrupt Mask Bit */
+#define AT_XDMAC_CIM_DIM BIT(2) /* End of Disable Interrupt Mask Bit */
+#define AT_XDMAC_CIM_FIM BIT(3) /* End of Flush Interrupt Mask Bit */
+#define AT_XDMAC_CIM_RBEIM BIT(4) /* Read Bus Error Interrupt Mask Bit */
+#define AT_XDMAC_CIM_WBEIM BIT(5) /* Write Bus Error Interrupt Mask Bit */
+#define AT_XDMAC_CIM_ROIM BIT(6) /* Request Overflow Interrupt Mask Bit */
+#define AT_XDMAC_CIS 0x0C /* Channel Interrupt Status Register */
+#define AT_XDMAC_CIS_BIS BIT(0) /* End of Block Interrupt Status Bit */
+#define AT_XDMAC_CIS_LIS BIT(1) /* End of Linked List Interrupt Status Bit */
+#define AT_XDMAC_CIS_DIS BIT(2) /* End of Disable Interrupt Status Bit */
+#define AT_XDMAC_CIS_FIS BIT(3) /* End of Flush Interrupt Status Bit */
+#define AT_XDMAC_CIS_RBEIS BIT(4) /* Read Bus Error Interrupt Status Bit */
+#define AT_XDMAC_CIS_WBEIS BIT(5) /* Write Bus Error Interrupt Status Bit */
+#define AT_XDMAC_CIS_ROIS BIT(6) /* Request Overflow Interrupt Status Bit */
+#define AT_XDMAC_CSA 0x10 /* Channel Source Address Register */
+#define AT_XDMAC_CDA 0x14 /* Channel Destination Address Register */
+#define AT_XDMAC_CNDA 0x18 /* Channel Next Descriptor Address Register */
+#define AT_XDMAC_CNDA_NDAIF(i) ((i) & 0x1) /* Channel x Next Descriptor Interface */
+#define AT_XDMAC_CNDA_NDA(i) ((i) & 0xfffffffc) /* Channel x Next Descriptor Address */
+#define AT_XDMAC_CNDC 0x1C /* Channel Next Descriptor Control Register */
+#define AT_XDMAC_CNDC_NDE (0x1 << 0) /* Channel x Next Descriptor Enable */
+#define AT_XDMAC_CNDC_NDSUP (0x1 << 1) /* Channel x Next Descriptor Source Update */
+#define AT_XDMAC_CNDC_NDDUP (0x1 << 2) /* Channel x Next Descriptor Destination Update */
+#define AT_XDMAC_CNDC_NDVIEW_MASK GENMASK(28, 27)
+#define AT_XDMAC_CNDC_NDVIEW_NDV0 (0x0 << 3) /* Channel x Next Descriptor View 0 */
+#define AT_XDMAC_CNDC_NDVIEW_NDV1 (0x1 << 3) /* Channel x Next Descriptor View 1 */
+#define AT_XDMAC_CNDC_NDVIEW_NDV2 (0x2 << 3) /* Channel x Next Descriptor View 2 */
+#define AT_XDMAC_CNDC_NDVIEW_NDV3 (0x3 << 3) /* Channel x Next Descriptor View 3 */
+#define AT_XDMAC_CUBC 0x20 /* Channel Microblock Control Register */
+#define AT_XDMAC_CBC 0x24 /* Channel Block Control Register */
+#define AT_XDMAC_CC 0x28 /* Channel Configuration Register */
+#define AT_XDMAC_CC_TYPE (0x1 << 0) /* Channel Transfer Type */
+#define AT_XDMAC_CC_TYPE_MEM_TRAN (0x0 << 0) /* Memory to Memory Transfer */
+#define AT_XDMAC_CC_TYPE_PER_TRAN (0x1 << 0) /* Peripheral to Memory or Memory to Peripheral Transfer */
+#define AT_XDMAC_CC_MBSIZE_MASK (0x3 << 1)
+#define AT_XDMAC_CC_MBSIZE_SINGLE (0x0 << 1)
+#define AT_XDMAC_CC_MBSIZE_FOUR (0x1 << 1)
+#define AT_XDMAC_CC_MBSIZE_EIGHT (0x2 << 1)
+#define AT_XDMAC_CC_MBSIZE_SIXTEEN (0x3 << 1)
+#define AT_XDMAC_CC_DSYNC (0x1 << 4) /* Channel Synchronization */
+#define AT_XDMAC_CC_DSYNC_PER2MEM (0x0 << 4)
+#define AT_XDMAC_CC_DSYNC_MEM2PER (0x1 << 4)
+#define AT_XDMAC_CC_PROT (0x1 << 5) /* Channel Protection */
+#define AT_XDMAC_CC_PROT_SEC (0x0 << 5)
+#define AT_XDMAC_CC_PROT_UNSEC (0x1 << 5)
+#define AT_XDMAC_CC_SWREQ (0x1 << 6) /* Channel Software Request Trigger */
+#define AT_XDMAC_CC_SWREQ_HWR_CONNECTED (0x0 << 6)
+#define AT_XDMAC_CC_SWREQ_SWR_CONNECTED (0x1 << 6)
+#define AT_XDMAC_CC_MEMSET (0x1 << 7) /* Channel Fill Block of memory */
+#define AT_XDMAC_CC_MEMSET_NORMAL_MODE (0x0 << 7)
+#define AT_XDMAC_CC_MEMSET_HW_MODE (0x1 << 7)
+#define AT_XDMAC_CC_CSIZE(i) ((0x7 & (i)) << 8) /* Channel Chunk Size */
+#define AT_XDMAC_CC_DWIDTH_OFFSET 11
+#define AT_XDMAC_CC_DWIDTH_MASK (0x3 << AT_XDMAC_CC_DWIDTH_OFFSET)
+#define AT_XDMAC_CC_DWIDTH(i) ((0x3 & (i)) << AT_XDMAC_CC_DWIDTH_OFFSET) /* Channel Data Width */
+#define AT_XDMAC_CC_DWIDTH_BYTE 0x0
+#define AT_XDMAC_CC_DWIDTH_HALFWORD 0x1
+#define AT_XDMAC_CC_DWIDTH_WORD 0x2
+#define AT_XDMAC_CC_DWIDTH_DWORD 0x3
+#define AT_XDMAC_CC_SIF(i) ((0x1 & (i)) << 13) /* Channel Source Interface Identifier */
+#define AT_XDMAC_CC_DIF(i) ((0x1 & (i)) << 14) /* Channel Destination Interface Identifier */
+#define AT_XDMAC_CC_SAM_MASK (0x3 << 16) /* Channel Source Addressing Mode */
+#define AT_XDMAC_CC_SAM_FIXED_AM (0x0 << 16)
+#define AT_XDMAC_CC_SAM_INCREMENTED_AM (0x1 << 16)
+#define AT_XDMAC_CC_SAM_UBS_AM (0x2 << 16)
+#define AT_XDMAC_CC_SAM_UBS_DS_AM (0x3 << 16)
+#define AT_XDMAC_CC_DAM_MASK (0x3 << 18) /* Channel Source Addressing Mode */
+#define AT_XDMAC_CC_DAM_FIXED_AM (0x0 << 18)
+#define AT_XDMAC_CC_DAM_INCREMENTED_AM (0x1 << 18)
+#define AT_XDMAC_CC_DAM_UBS_AM (0x2 << 18)
+#define AT_XDMAC_CC_DAM_UBS_DS_AM (0x3 << 18)
+#define AT_XDMAC_CC_INITD (0x1 << 21) /* Channel Initialization Terminated (read only) */
+#define AT_XDMAC_CC_INITD_TERMINATED (0x0 << 21)
+#define AT_XDMAC_CC_INITD_IN_PROGRESS (0x1 << 21)
+#define AT_XDMAC_CC_RDIP (0x1 << 22) /* Read in Progress (read only) */
+#define AT_XDMAC_CC_RDIP_DONE (0x0 << 22)
+#define AT_XDMAC_CC_RDIP_IN_PROGRESS (0x1 << 22)
+#define AT_XDMAC_CC_WRIP (0x1 << 23) /* Write in Progress (read only) */
+#define AT_XDMAC_CC_WRIP_DONE (0x0 << 23)
+#define AT_XDMAC_CC_WRIP_IN_PROGRESS (0x1 << 23)
+#define AT_XDMAC_CC_PERID(i) ((0x7f & (i)) << 24) /* Channel Peripheral Identifier */
+#define AT_XDMAC_CDS_MSP 0x2C /* Channel Data Stride Memory Set Pattern */
+#define AT_XDMAC_CSUS 0x30 /* Channel Source Microblock Stride */
+#define AT_XDMAC_CDUS 0x34 /* Channel Destination Microblock Stride */
+
+/* Microblock control members */
+#define AT_XDMAC_MBR_UBC_UBLEN_MAX 0xFFFFFFUL /* Maximum Microblock Length */
+#define AT_XDMAC_MBR_UBC_NDE (0x1 << 24) /* Next Descriptor Enable */
+#define AT_XDMAC_MBR_UBC_NSEN (0x1 << 25) /* Next Descriptor Source Update */
+#define AT_XDMAC_MBR_UBC_NDEN (0x1 << 26) /* Next Descriptor Destination Update */
+#define AT_XDMAC_MBR_UBC_NDV0 (0x0 << 27) /* Next Descriptor View 0 */
+#define AT_XDMAC_MBR_UBC_NDV1 (0x1 << 27) /* Next Descriptor View 1 */
+#define AT_XDMAC_MBR_UBC_NDV2 (0x2 << 27) /* Next Descriptor View 2 */
+#define AT_XDMAC_MBR_UBC_NDV3 (0x3 << 27) /* Next Descriptor View 3 */
+
+#define AT_XDMAC_MAX_CHAN 0x20
+#define AT_XDMAC_MAX_CSIZE 16 /* 16 data */
+#define AT_XDMAC_MAX_DWIDTH 8 /* 64 bits */
+#define AT_XDMAC_RESIDUE_MAX_RETRIES 5
+
+#define AT_XDMAC_DMA_BUSWIDTHS\
+ (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\
+ BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |\
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |\
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |\
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+enum atc_status {
+ AT_XDMAC_CHAN_IS_CYCLIC = 0,
+ AT_XDMAC_CHAN_IS_PAUSED,
+};
+
+struct at_xdmac_layout {
+ /* Global Channel Read Suspend Register */
+ u8 grs;
+ /* Global Write Suspend Register */
+ u8 gws;
+ /* Global Channel Read Write Suspend Register */
+ u8 grws;
+ /* Global Channel Read Write Resume Register */
+ u8 grwr;
+ /* Global Channel Software Request Register */
+ u8 gswr;
+ /* Global channel Software Request Status Register */
+ u8 gsws;
+ /* Global Channel Software Flush Request Register */
+ u8 gswf;
+ /* Channel reg base */
+ u8 chan_cc_reg_base;
+ /* Source/Destination Interface must be specified or not */
+ bool sdif;
+ /* AXI queue priority configuration supported */
+ bool axi_config;
+};
+
+/* ----- Channels ----- */
+struct at_xdmac_chan {
+ struct dma_chan chan;
+ void __iomem *ch_regs;
+ u32 mask; /* Channel Mask */
+ u32 cfg; /* Channel Configuration Register */
+ u8 perid; /* Peripheral ID */
+ u8 perif; /* Peripheral Interface */
+ u8 memif; /* Memory Interface */
+ u32 save_cc;
+ u32 save_cim;
+ u32 save_cnda;
+ u32 save_cndc;
+ u32 irq_status;
+ unsigned long status;
+ struct tasklet_struct tasklet;
+ struct dma_slave_config sconfig;
+
+ spinlock_t lock;
+
+ struct list_head xfers_list;
+ struct list_head free_descs_list;
+};
+
+
+/* ----- Controller ----- */
+struct at_xdmac {
+ struct dma_device dma;
+ void __iomem *regs;
+ int irq;
+ struct clk *clk;
+ u32 save_gim;
+ u32 save_gs;
+ struct dma_pool *at_xdmac_desc_pool;
+ const struct at_xdmac_layout *layout;
+ struct at_xdmac_chan chan[];
+};
+
+
+/* ----- Descriptors ----- */
+
+/* Linked List Descriptor */
+struct at_xdmac_lld {
+ u32 mbr_nda; /* Next Descriptor Member */
+ u32 mbr_ubc; /* Microblock Control Member */
+ u32 mbr_sa; /* Source Address Member */
+ u32 mbr_da; /* Destination Address Member */
+ u32 mbr_cfg; /* Configuration Register */
+ u32 mbr_bc; /* Block Control Register */
+ u32 mbr_ds; /* Data Stride Register */
+ u32 mbr_sus; /* Source Microblock Stride Register */
+ u32 mbr_dus; /* Destination Microblock Stride Register */
+};
+
+/* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */
+struct at_xdmac_desc {
+ struct at_xdmac_lld lld;
+ enum dma_transfer_direction direction;
+ struct dma_async_tx_descriptor tx_dma_desc;
+ struct list_head desc_node;
+ /* Following members are only used by the first descriptor */
+ bool active_xfer;
+ unsigned int xfer_size;
+ struct list_head descs_list;
+ struct list_head xfer_node;
+} __aligned(sizeof(u64));
+
+static const struct at_xdmac_layout at_xdmac_sama5d4_layout = {
+ .grs = 0x28,
+ .gws = 0x2C,
+ .grws = 0x30,
+ .grwr = 0x34,
+ .gswr = 0x38,
+ .gsws = 0x3C,
+ .gswf = 0x40,
+ .chan_cc_reg_base = 0x50,
+ .sdif = true,
+ .axi_config = false,
+};
+
+static const struct at_xdmac_layout at_xdmac_sama7g5_layout = {
+ .grs = 0x30,
+ .gws = 0x38,
+ .grws = 0x40,
+ .grwr = 0x44,
+ .gswr = 0x48,
+ .gsws = 0x4C,
+ .gswf = 0x50,
+ .chan_cc_reg_base = 0x60,
+ .sdif = false,
+ .axi_config = true,
+};
+
+static inline void __iomem *at_xdmac_chan_reg_base(struct at_xdmac *atxdmac, unsigned int chan_nb)
+{
+ return atxdmac->regs + (atxdmac->layout->chan_cc_reg_base + chan_nb * 0x40);
+}
+
+#define at_xdmac_read(atxdmac, reg) readl_relaxed((atxdmac)->regs + (reg))
+#define at_xdmac_write(atxdmac, reg, value) \
+ writel_relaxed((value), (atxdmac)->regs + (reg))
+
+#define at_xdmac_chan_read(atchan, reg) readl_relaxed((atchan)->ch_regs + (reg))
+#define at_xdmac_chan_write(atchan, reg, value) writel_relaxed((value), (atchan)->ch_regs + (reg))
+
+static inline struct at_xdmac_chan *to_at_xdmac_chan(struct dma_chan *dchan)
+{
+ return container_of(dchan, struct at_xdmac_chan, chan);
+}
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+ return &chan->dev->device;
+}
+
+static inline struct at_xdmac *to_at_xdmac(struct dma_device *ddev)
+{
+ return container_of(ddev, struct at_xdmac, dma);
+}
+
+static inline struct at_xdmac_desc *txd_to_at_desc(struct dma_async_tx_descriptor *txd)
+{
+ return container_of(txd, struct at_xdmac_desc, tx_dma_desc);
+}
+
+static inline int at_xdmac_chan_is_cyclic(struct at_xdmac_chan *atchan)
+{
+ return test_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
+}
+
+static inline int at_xdmac_chan_is_paused(struct at_xdmac_chan *atchan)
+{
+ return test_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
+}
+
+static inline bool at_xdmac_chan_is_peripheral_xfer(u32 cfg)
+{
+ return cfg & AT_XDMAC_CC_TYPE_PER_TRAN;
+}
+
+static inline u8 at_xdmac_get_dwidth(u32 cfg)
+{
+ return (cfg & AT_XDMAC_CC_DWIDTH_MASK) >> AT_XDMAC_CC_DWIDTH_OFFSET;
+};
+
+static unsigned int init_nr_desc_per_channel = 64;
+module_param(init_nr_desc_per_channel, uint, 0644);
+MODULE_PARM_DESC(init_nr_desc_per_channel,
+ "initial descriptors per channel (default: 64)");
+
+
+static bool at_xdmac_chan_is_enabled(struct at_xdmac_chan *atchan)
+{
+ return at_xdmac_chan_read(atchan, AT_XDMAC_GS) & atchan->mask;
+}
+
+static void at_xdmac_off(struct at_xdmac *atxdmac)
+{
+ at_xdmac_write(atxdmac, AT_XDMAC_GD, -1L);
+
+ /* Wait that all chans are disabled. */
+ while (at_xdmac_read(atxdmac, AT_XDMAC_GS))
+ cpu_relax();
+
+ at_xdmac_write(atxdmac, AT_XDMAC_GID, -1L);
+}
+
+/* Call with lock hold. */
+static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan,
+ struct at_xdmac_desc *first)
+{
+ struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device);
+ u32 reg;
+
+ dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, first);
+
+ /* Set transfer as active to not try to start it again. */
+ first->active_xfer = true;
+
+ /* Tell xdmac where to get the first descriptor. */
+ reg = AT_XDMAC_CNDA_NDA(first->tx_dma_desc.phys);
+ if (atxdmac->layout->sdif)
+ reg |= AT_XDMAC_CNDA_NDAIF(atchan->memif);
+
+ at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, reg);
+
+ /*
+ * When doing non cyclic transfer we need to use the next
+ * descriptor view 2 since some fields of the configuration register
+ * depend on transfer size and src/dest addresses.
+ */
+ if (at_xdmac_chan_is_cyclic(atchan))
+ reg = AT_XDMAC_CNDC_NDVIEW_NDV1;
+ else if ((first->lld.mbr_ubc &
+ AT_XDMAC_CNDC_NDVIEW_MASK) == AT_XDMAC_MBR_UBC_NDV3)
+ reg = AT_XDMAC_CNDC_NDVIEW_NDV3;
+ else
+ reg = AT_XDMAC_CNDC_NDVIEW_NDV2;
+ /*
+ * Even if the register will be updated from the configuration in the
+ * descriptor when using view 2 or higher, the PROT bit won't be set
+ * properly. This bit can be modified only by using the channel
+ * configuration register.
+ */
+ at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg);
+
+ reg |= AT_XDMAC_CNDC_NDDUP
+ | AT_XDMAC_CNDC_NDSUP
+ | AT_XDMAC_CNDC_NDE;
+ at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, reg);
+
+ dev_vdbg(chan2dev(&atchan->chan),
+ "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
+ __func__, at_xdmac_chan_read(atchan, AT_XDMAC_CC),
+ at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
+ at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
+ at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
+ at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
+ at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
+
+ at_xdmac_chan_write(atchan, AT_XDMAC_CID, 0xffffffff);
+ reg = AT_XDMAC_CIE_RBEIE | AT_XDMAC_CIE_WBEIE;
+ /*
+ * Request Overflow Error is only for peripheral synchronized transfers
+ */
+ if (at_xdmac_chan_is_peripheral_xfer(first->lld.mbr_cfg))
+ reg |= AT_XDMAC_CIE_ROIE;
+
+ /*
+ * There is no end of list when doing cyclic dma, we need to get
+ * an interrupt after each periods.
+ */
+ if (at_xdmac_chan_is_cyclic(atchan))
+ at_xdmac_chan_write(atchan, AT_XDMAC_CIE,
+ reg | AT_XDMAC_CIE_BIE);
+ else
+ at_xdmac_chan_write(atchan, AT_XDMAC_CIE,
+ reg | AT_XDMAC_CIE_LIE);
+ at_xdmac_write(atxdmac, AT_XDMAC_GIE, atchan->mask);
+ dev_vdbg(chan2dev(&atchan->chan),
+ "%s: enable channel (0x%08x)\n", __func__, atchan->mask);
+ wmb();
+ at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask);
+
+ dev_vdbg(chan2dev(&atchan->chan),
+ "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
+ __func__, at_xdmac_chan_read(atchan, AT_XDMAC_CC),
+ at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
+ at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
+ at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
+ at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
+ at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
+
+}
+
+static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct at_xdmac_desc *desc = txd_to_at_desc(tx);
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(tx->chan);
+ dma_cookie_t cookie;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&atchan->lock, irqflags);
+ cookie = dma_cookie_assign(tx);
+
+ list_add_tail(&desc->xfer_node, &atchan->xfers_list);
+ spin_unlock_irqrestore(&atchan->lock, irqflags);
+
+ dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n",
+ __func__, atchan, desc);
+
+ return cookie;
+}
+
+static struct at_xdmac_desc *at_xdmac_alloc_desc(struct dma_chan *chan,
+ gfp_t gfp_flags)
+{
+ struct at_xdmac_desc *desc;
+ struct at_xdmac *atxdmac = to_at_xdmac(chan->device);
+ dma_addr_t phys;
+
+ desc = dma_pool_zalloc(atxdmac->at_xdmac_desc_pool, gfp_flags, &phys);
+ if (desc) {
+ INIT_LIST_HEAD(&desc->descs_list);
+ dma_async_tx_descriptor_init(&desc->tx_dma_desc, chan);
+ desc->tx_dma_desc.tx_submit = at_xdmac_tx_submit;
+ desc->tx_dma_desc.phys = phys;
+ }
+
+ return desc;
+}
+
+static void at_xdmac_init_used_desc(struct at_xdmac_desc *desc)
+{
+ memset(&desc->lld, 0, sizeof(desc->lld));
+ INIT_LIST_HEAD(&desc->descs_list);
+ desc->direction = DMA_TRANS_NONE;
+ desc->xfer_size = 0;
+ desc->active_xfer = false;
+}
+
+/* Call must be protected by lock. */
+static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan)
+{
+ struct at_xdmac_desc *desc;
+
+ if (list_empty(&atchan->free_descs_list)) {
+ desc = at_xdmac_alloc_desc(&atchan->chan, GFP_NOWAIT);
+ } else {
+ desc = list_first_entry(&atchan->free_descs_list,
+ struct at_xdmac_desc, desc_node);
+ list_del(&desc->desc_node);
+ at_xdmac_init_used_desc(desc);
+ }
+
+ return desc;
+}
+
+static void at_xdmac_queue_desc(struct dma_chan *chan,
+ struct at_xdmac_desc *prev,
+ struct at_xdmac_desc *desc)
+{
+ if (!prev || !desc)
+ return;
+
+ prev->lld.mbr_nda = desc->tx_dma_desc.phys;
+ prev->lld.mbr_ubc |= AT_XDMAC_MBR_UBC_NDE;
+
+ dev_dbg(chan2dev(chan), "%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
+ __func__, prev, &prev->lld.mbr_nda);
+}
+
+static inline void at_xdmac_increment_block_count(struct dma_chan *chan,
+ struct at_xdmac_desc *desc)
+{
+ if (!desc)
+ return;
+
+ desc->lld.mbr_bc++;
+
+ dev_dbg(chan2dev(chan),
+ "%s: incrementing the block count of the desc 0x%p\n",
+ __func__, desc);
+}
+
+static struct dma_chan *at_xdmac_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *of_dma)
+{
+ struct at_xdmac *atxdmac = of_dma->of_dma_data;
+ struct at_xdmac_chan *atchan;
+ struct dma_chan *chan;
+ struct device *dev = atxdmac->dma.dev;
+
+ if (dma_spec->args_count != 1) {
+ dev_err(dev, "dma phandler args: bad number of args\n");
+ return NULL;
+ }
+
+ chan = dma_get_any_slave_channel(&atxdmac->dma);
+ if (!chan) {
+ dev_err(dev, "can't get a dma channel\n");
+ return NULL;
+ }
+
+ atchan = to_at_xdmac_chan(chan);
+ atchan->memif = AT91_XDMAC_DT_GET_MEM_IF(dma_spec->args[0]);
+ atchan->perif = AT91_XDMAC_DT_GET_PER_IF(dma_spec->args[0]);
+ atchan->perid = AT91_XDMAC_DT_GET_PERID(dma_spec->args[0]);
+ dev_dbg(dev, "chan dt cfg: memif=%u perif=%u perid=%u\n",
+ atchan->memif, atchan->perif, atchan->perid);
+
+ return chan;
+}
+
+static int at_xdmac_compute_chan_conf(struct dma_chan *chan,
+ enum dma_transfer_direction direction)
+{
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+ struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device);
+ int csize, dwidth;
+
+ if (direction == DMA_DEV_TO_MEM) {
+ atchan->cfg =
+ AT91_XDMAC_DT_PERID(atchan->perid)
+ | AT_XDMAC_CC_DAM_INCREMENTED_AM
+ | AT_XDMAC_CC_SAM_FIXED_AM
+ | AT_XDMAC_CC_SWREQ_HWR_CONNECTED
+ | AT_XDMAC_CC_DSYNC_PER2MEM
+ | AT_XDMAC_CC_MBSIZE_SIXTEEN
+ | AT_XDMAC_CC_TYPE_PER_TRAN;
+ if (atxdmac->layout->sdif)
+ atchan->cfg |= AT_XDMAC_CC_DIF(atchan->memif) |
+ AT_XDMAC_CC_SIF(atchan->perif);
+
+ csize = ffs(atchan->sconfig.src_maxburst) - 1;
+ if (csize < 0) {
+ dev_err(chan2dev(chan), "invalid src maxburst value\n");
+ return -EINVAL;
+ }
+ atchan->cfg |= AT_XDMAC_CC_CSIZE(csize);
+ dwidth = ffs(atchan->sconfig.src_addr_width) - 1;
+ if (dwidth < 0) {
+ dev_err(chan2dev(chan), "invalid src addr width value\n");
+ return -EINVAL;
+ }
+ atchan->cfg |= AT_XDMAC_CC_DWIDTH(dwidth);
+ } else if (direction == DMA_MEM_TO_DEV) {
+ atchan->cfg =
+ AT91_XDMAC_DT_PERID(atchan->perid)
+ | AT_XDMAC_CC_DAM_FIXED_AM
+ | AT_XDMAC_CC_SAM_INCREMENTED_AM
+ | AT_XDMAC_CC_SWREQ_HWR_CONNECTED
+ | AT_XDMAC_CC_DSYNC_MEM2PER
+ | AT_XDMAC_CC_MBSIZE_SIXTEEN
+ | AT_XDMAC_CC_TYPE_PER_TRAN;
+ if (atxdmac->layout->sdif)
+ atchan->cfg |= AT_XDMAC_CC_DIF(atchan->perif) |
+ AT_XDMAC_CC_SIF(atchan->memif);
+
+ csize = ffs(atchan->sconfig.dst_maxburst) - 1;
+ if (csize < 0) {
+ dev_err(chan2dev(chan), "invalid src maxburst value\n");
+ return -EINVAL;
+ }
+ atchan->cfg |= AT_XDMAC_CC_CSIZE(csize);
+ dwidth = ffs(atchan->sconfig.dst_addr_width) - 1;
+ if (dwidth < 0) {
+ dev_err(chan2dev(chan), "invalid dst addr width value\n");
+ return -EINVAL;
+ }
+ atchan->cfg |= AT_XDMAC_CC_DWIDTH(dwidth);
+ }
+
+ dev_dbg(chan2dev(chan), "%s: cfg=0x%08x\n", __func__, atchan->cfg);
+
+ return 0;
+}
+
+/*
+ * Only check that maxburst and addr width values are supported by
+ * the controller but not that the configuration is good to perform the
+ * transfer since we don't know the direction at this stage.
+ */
+static int at_xdmac_check_slave_config(struct dma_slave_config *sconfig)
+{
+ if ((sconfig->src_maxburst > AT_XDMAC_MAX_CSIZE)
+ || (sconfig->dst_maxburst > AT_XDMAC_MAX_CSIZE))
+ return -EINVAL;
+
+ if ((sconfig->src_addr_width > AT_XDMAC_MAX_DWIDTH)
+ || (sconfig->dst_addr_width > AT_XDMAC_MAX_DWIDTH))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int at_xdmac_set_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *sconfig)
+{
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+
+ if (at_xdmac_check_slave_config(sconfig)) {
+ dev_err(chan2dev(chan), "invalid slave configuration\n");
+ return -EINVAL;
+ }
+
+ memcpy(&atchan->sconfig, sconfig, sizeof(atchan->sconfig));
+
+ return 0;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+ struct at_xdmac_desc *first = NULL, *prev = NULL;
+ struct scatterlist *sg;
+ int i;
+ unsigned int xfer_size = 0;
+ unsigned long irqflags;
+ struct dma_async_tx_descriptor *ret = NULL;
+
+ if (!sgl)
+ return NULL;
+
+ if (!is_slave_direction(direction)) {
+ dev_err(chan2dev(chan), "invalid DMA direction\n");
+ return NULL;
+ }
+
+ dev_dbg(chan2dev(chan), "%s: sg_len=%d, dir=%s, flags=0x%lx\n",
+ __func__, sg_len,
+ direction == DMA_MEM_TO_DEV ? "to device" : "from device",
+ flags);
+
+ /* Protect dma_sconfig field that can be modified by set_slave_conf. */
+ spin_lock_irqsave(&atchan->lock, irqflags);
+
+ if (at_xdmac_compute_chan_conf(chan, direction))
+ goto spin_unlock;
+
+ /* Prepare descriptors. */
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct at_xdmac_desc *desc = NULL;
+ u32 len, mem, dwidth, fixed_dwidth;
+
+ len = sg_dma_len(sg);
+ mem = sg_dma_address(sg);
+ if (unlikely(!len)) {
+ dev_err(chan2dev(chan), "sg data length is zero\n");
+ goto spin_unlock;
+ }
+ dev_dbg(chan2dev(chan), "%s: * sg%d len=%u, mem=0x%08x\n",
+ __func__, i, len, mem);
+
+ desc = at_xdmac_get_desc(atchan);
+ if (!desc) {
+ dev_err(chan2dev(chan), "can't get descriptor\n");
+ if (first)
+ list_splice_tail_init(&first->descs_list,
+ &atchan->free_descs_list);
+ goto spin_unlock;
+ }
+
+ /* Linked list descriptor setup. */
+ if (direction == DMA_DEV_TO_MEM) {
+ desc->lld.mbr_sa = atchan->sconfig.src_addr;
+ desc->lld.mbr_da = mem;
+ } else {
+ desc->lld.mbr_sa = mem;
+ desc->lld.mbr_da = atchan->sconfig.dst_addr;
+ }
+ dwidth = at_xdmac_get_dwidth(atchan->cfg);
+ fixed_dwidth = IS_ALIGNED(len, 1 << dwidth)
+ ? dwidth
+ : AT_XDMAC_CC_DWIDTH_BYTE;
+ desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2 /* next descriptor view */
+ | AT_XDMAC_MBR_UBC_NDEN /* next descriptor dst parameter update */
+ | AT_XDMAC_MBR_UBC_NSEN /* next descriptor src parameter update */
+ | (len >> fixed_dwidth); /* microblock length */
+ desc->lld.mbr_cfg = (atchan->cfg & ~AT_XDMAC_CC_DWIDTH_MASK) |
+ AT_XDMAC_CC_DWIDTH(fixed_dwidth);
+ dev_dbg(chan2dev(chan),
+ "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
+ __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);
+
+ /* Chain lld. */
+ if (prev)
+ at_xdmac_queue_desc(chan, prev, desc);
+
+ prev = desc;
+ if (!first)
+ first = desc;
+
+ dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+ __func__, desc, first);
+ list_add_tail(&desc->desc_node, &first->descs_list);
+ xfer_size += len;
+ }
+
+
+ first->tx_dma_desc.flags = flags;
+ first->xfer_size = xfer_size;
+ first->direction = direction;
+ ret = &first->tx_dma_desc;
+
+spin_unlock:
+ spin_unlock_irqrestore(&atchan->lock, irqflags);
+ return ret;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
+ size_t buf_len, size_t period_len,
+ enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+ struct at_xdmac_desc *first = NULL, *prev = NULL;
+ unsigned int periods = buf_len / period_len;
+ int i;
+ unsigned long irqflags;
+
+ dev_dbg(chan2dev(chan), "%s: buf_addr=%pad, buf_len=%zd, period_len=%zd, dir=%s, flags=0x%lx\n",
+ __func__, &buf_addr, buf_len, period_len,
+ direction == DMA_MEM_TO_DEV ? "mem2per" : "per2mem", flags);
+
+ if (!is_slave_direction(direction)) {
+ dev_err(chan2dev(chan), "invalid DMA direction\n");
+ return NULL;
+ }
+
+ if (test_and_set_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status)) {
+ dev_err(chan2dev(chan), "channel currently used\n");
+ return NULL;
+ }
+
+ if (at_xdmac_compute_chan_conf(chan, direction))
+ return NULL;
+
+ for (i = 0; i < periods; i++) {
+ struct at_xdmac_desc *desc = NULL;
+
+ spin_lock_irqsave(&atchan->lock, irqflags);
+ desc = at_xdmac_get_desc(atchan);
+ if (!desc) {
+ dev_err(chan2dev(chan), "can't get descriptor\n");
+ if (first)
+ list_splice_tail_init(&first->descs_list,
+ &atchan->free_descs_list);
+ spin_unlock_irqrestore(&atchan->lock, irqflags);
+ return NULL;
+ }
+ spin_unlock_irqrestore(&atchan->lock, irqflags);
+ dev_dbg(chan2dev(chan),
+ "%s: desc=0x%p, tx_dma_desc.phys=%pad\n",
+ __func__, desc, &desc->tx_dma_desc.phys);
+
+ if (direction == DMA_DEV_TO_MEM) {
+ desc->lld.mbr_sa = atchan->sconfig.src_addr;
+ desc->lld.mbr_da = buf_addr + i * period_len;
+ } else {
+ desc->lld.mbr_sa = buf_addr + i * period_len;
+ desc->lld.mbr_da = atchan->sconfig.dst_addr;
+ }
+ desc->lld.mbr_cfg = atchan->cfg;
+ desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1
+ | AT_XDMAC_MBR_UBC_NDEN
+ | AT_XDMAC_MBR_UBC_NSEN
+ | period_len >> at_xdmac_get_dwidth(desc->lld.mbr_cfg);
+
+ dev_dbg(chan2dev(chan),
+ "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
+ __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);
+
+ /* Chain lld. */
+ if (prev)
+ at_xdmac_queue_desc(chan, prev, desc);
+
+ prev = desc;
+ if (!first)
+ first = desc;
+
+ dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+ __func__, desc, first);
+ list_add_tail(&desc->desc_node, &first->descs_list);
+ }
+
+ at_xdmac_queue_desc(chan, prev, first);
+ first->tx_dma_desc.flags = flags;
+ first->xfer_size = buf_len;
+ first->direction = direction;
+
+ return &first->tx_dma_desc;
+}
+
+static inline u32 at_xdmac_align_width(struct dma_chan *chan, dma_addr_t addr)
+{
+ u32 width;
+
+ /*
+ * Check address alignment to select the greater data width we
+ * can use.
+ *
+ * Some XDMAC implementations don't provide dword transfer, in
+ * this case selecting dword has the same behavior as
+ * selecting word transfers.
+ */
+ if (!(addr & 7)) {
+ width = AT_XDMAC_CC_DWIDTH_DWORD;
+ dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
+ } else if (!(addr & 3)) {
+ width = AT_XDMAC_CC_DWIDTH_WORD;
+ dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
+ } else if (!(addr & 1)) {
+ width = AT_XDMAC_CC_DWIDTH_HALFWORD;
+ dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
+ } else {
+ width = AT_XDMAC_CC_DWIDTH_BYTE;
+ dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
+ }
+
+ return width;
+}
+
+static struct at_xdmac_desc *
+at_xdmac_interleaved_queue_desc(struct dma_chan *chan,
+ struct at_xdmac_chan *atchan,
+ struct at_xdmac_desc *prev,
+ dma_addr_t src, dma_addr_t dst,
+ struct dma_interleaved_template *xt,
+ struct data_chunk *chunk)
+{
+ struct at_xdmac_desc *desc;
+ u32 dwidth;
+ unsigned long flags;
+ size_t ublen;
+ /*
+ * WARNING: The channel configuration is set here since there is no
+ * dmaengine_slave_config call in this case. Moreover we don't know the
+ * direction, it involves we can't dynamically set the source and dest
+ * interface so we have to use the same one. Only interface 0 allows EBI
+ * access. Hopefully we can access DDR through both ports (at least on
+ * SAMA5D4x), so we can use the same interface for source and dest,
+ * that solves the fact we don't know the direction.
+ * ERRATA: Even if useless for memory transfers, the PERID has to not
+ * match the one of another channel. If not, it could lead to spurious
+ * flag status.
+ * For SAMA7G5x case, the SIF and DIF fields are no longer used.
+ * Thus, no need to have the SIF/DIF interfaces here.
+ * For SAMA5D4x and SAMA5D2x the SIF and DIF are already configured as
+ * zero.
+ */
+ u32 chan_cc = AT_XDMAC_CC_PERID(0x7f)
+ | AT_XDMAC_CC_MBSIZE_SIXTEEN
+ | AT_XDMAC_CC_TYPE_MEM_TRAN;
+
+ dwidth = at_xdmac_align_width(chan, src | dst | chunk->size);
+ if (chunk->size >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) {
+ dev_dbg(chan2dev(chan),
+ "%s: chunk too big (%zu, max size %lu)...\n",
+ __func__, chunk->size,
+ AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth);
+ return NULL;
+ }
+
+ if (prev)
+ dev_dbg(chan2dev(chan),
+ "Adding items at the end of desc 0x%p\n", prev);
+
+ if (xt->src_inc) {
+ if (xt->src_sgl)
+ chan_cc |= AT_XDMAC_CC_SAM_UBS_AM;
+ else
+ chan_cc |= AT_XDMAC_CC_SAM_INCREMENTED_AM;
+ }
+
+ if (xt->dst_inc) {
+ if (xt->dst_sgl)
+ chan_cc |= AT_XDMAC_CC_DAM_UBS_AM;
+ else
+ chan_cc |= AT_XDMAC_CC_DAM_INCREMENTED_AM;
+ }
+
+ spin_lock_irqsave(&atchan->lock, flags);
+ desc = at_xdmac_get_desc(atchan);
+ spin_unlock_irqrestore(&atchan->lock, flags);
+ if (!desc) {
+ dev_err(chan2dev(chan), "can't get descriptor\n");
+ return NULL;
+ }
+
+ chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+ ublen = chunk->size >> dwidth;
+
+ desc->lld.mbr_sa = src;
+ desc->lld.mbr_da = dst;
+ desc->lld.mbr_sus = dmaengine_get_src_icg(xt, chunk);
+ desc->lld.mbr_dus = dmaengine_get_dst_icg(xt, chunk);
+
+ desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3
+ | AT_XDMAC_MBR_UBC_NDEN
+ | AT_XDMAC_MBR_UBC_NSEN
+ | ublen;
+ desc->lld.mbr_cfg = chan_cc;
+
+ dev_dbg(chan2dev(chan),
+ "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+ __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da,
+ desc->lld.mbr_ubc, desc->lld.mbr_cfg);
+
+ /* Chain lld. */
+ if (prev)
+ at_xdmac_queue_desc(chan, prev, desc);
+
+ return desc;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_interleaved(struct dma_chan *chan,
+ struct dma_interleaved_template *xt,
+ unsigned long flags)
+{
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+ struct at_xdmac_desc *prev = NULL, *first = NULL;
+ dma_addr_t dst_addr, src_addr;
+ size_t src_skip = 0, dst_skip = 0, len = 0;
+ struct data_chunk *chunk;
+ int i;
+
+ if (!xt || !xt->numf || (xt->dir != DMA_MEM_TO_MEM))
+ return NULL;
+
+ /*
+ * TODO: Handle the case where we have to repeat a chain of
+ * descriptors...
+ */
+ if ((xt->numf > 1) && (xt->frame_size > 1))
+ return NULL;
+
+ dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, numf=%zu, frame_size=%zu, flags=0x%lx\n",
+ __func__, &xt->src_start, &xt->dst_start, xt->numf,
+ xt->frame_size, flags);
+
+ src_addr = xt->src_start;
+ dst_addr = xt->dst_start;
+
+ if (xt->numf > 1) {
+ first = at_xdmac_interleaved_queue_desc(chan, atchan,
+ NULL,
+ src_addr, dst_addr,
+ xt, xt->sgl);
+ if (!first)
+ return NULL;
+
+ /* Length of the block is (BLEN+1) microblocks. */
+ for (i = 0; i < xt->numf - 1; i++)
+ at_xdmac_increment_block_count(chan, first);
+
+ dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+ __func__, first, first);
+ list_add_tail(&first->desc_node, &first->descs_list);
+ } else {
+ for (i = 0; i < xt->frame_size; i++) {
+ size_t src_icg = 0, dst_icg = 0;
+ struct at_xdmac_desc *desc;
+
+ chunk = xt->sgl + i;
+
+ dst_icg = dmaengine_get_dst_icg(xt, chunk);
+ src_icg = dmaengine_get_src_icg(xt, chunk);
+
+ src_skip = chunk->size + src_icg;
+ dst_skip = chunk->size + dst_icg;
+
+ dev_dbg(chan2dev(chan),
+ "%s: chunk size=%zu, src icg=%zu, dst icg=%zu\n",
+ __func__, chunk->size, src_icg, dst_icg);
+
+ desc = at_xdmac_interleaved_queue_desc(chan, atchan,
+ prev,
+ src_addr, dst_addr,
+ xt, chunk);
+ if (!desc) {
+ if (first)
+ list_splice_tail_init(&first->descs_list,
+ &atchan->free_descs_list);
+ return NULL;
+ }
+
+ if (!first)
+ first = desc;
+
+ dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+ __func__, desc, first);
+ list_add_tail(&desc->desc_node, &first->descs_list);
+
+ if (xt->src_sgl)
+ src_addr += src_skip;
+
+ if (xt->dst_sgl)
+ dst_addr += dst_skip;
+
+ len += chunk->size;
+ prev = desc;
+ }
+ }
+
+ first->tx_dma_desc.cookie = -EBUSY;
+ first->tx_dma_desc.flags = flags;
+ first->xfer_size = len;
+
+ return &first->tx_dma_desc;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+ struct at_xdmac_desc *first = NULL, *prev = NULL;
+ size_t remaining_size = len, xfer_size = 0, ublen;
+ dma_addr_t src_addr = src, dst_addr = dest;
+ u32 dwidth;
+ /*
+ * WARNING: We don't know the direction, it involves we can't
+ * dynamically set the source and dest interface so we have to use the
+ * same one. Only interface 0 allows EBI access. Hopefully we can
+ * access DDR through both ports (at least on SAMA5D4x), so we can use
+ * the same interface for source and dest, that solves the fact we
+ * don't know the direction.
+ * ERRATA: Even if useless for memory transfers, the PERID has to not
+ * match the one of another channel. If not, it could lead to spurious
+ * flag status.
+ * For SAMA7G5x case, the SIF and DIF fields are no longer used.
+ * Thus, no need to have the SIF/DIF interfaces here.
+ * For SAMA5D4x and SAMA5D2x the SIF and DIF are already configured as
+ * zero.
+ */
+ u32 chan_cc = AT_XDMAC_CC_PERID(0x7f)
+ | AT_XDMAC_CC_DAM_INCREMENTED_AM
+ | AT_XDMAC_CC_SAM_INCREMENTED_AM
+ | AT_XDMAC_CC_MBSIZE_SIXTEEN
+ | AT_XDMAC_CC_TYPE_MEM_TRAN;
+ unsigned long irqflags;
+
+ dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, len=%zd, flags=0x%lx\n",
+ __func__, &src, &dest, len, flags);
+
+ if (unlikely(!len))
+ return NULL;
+
+ dwidth = at_xdmac_align_width(chan, src_addr | dst_addr);
+
+ /* Prepare descriptors. */
+ while (remaining_size) {
+ struct at_xdmac_desc *desc = NULL;
+
+ dev_dbg(chan2dev(chan), "%s: remaining_size=%zu\n", __func__, remaining_size);
+
+ spin_lock_irqsave(&atchan->lock, irqflags);
+ desc = at_xdmac_get_desc(atchan);
+ spin_unlock_irqrestore(&atchan->lock, irqflags);
+ if (!desc) {
+ dev_err(chan2dev(chan), "can't get descriptor\n");
+ if (first)
+ list_splice_tail_init(&first->descs_list,
+ &atchan->free_descs_list);
+ return NULL;
+ }
+
+ /* Update src and dest addresses. */
+ src_addr += xfer_size;
+ dst_addr += xfer_size;
+
+ if (remaining_size >= AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)
+ xfer_size = AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth;
+ else
+ xfer_size = remaining_size;
+
+ dev_dbg(chan2dev(chan), "%s: xfer_size=%zu\n", __func__, xfer_size);
+
+ /* Check remaining length and change data width if needed. */
+ dwidth = at_xdmac_align_width(chan,
+ src_addr | dst_addr | xfer_size);
+ chan_cc &= ~AT_XDMAC_CC_DWIDTH_MASK;
+ chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+ ublen = xfer_size >> dwidth;
+ remaining_size -= xfer_size;
+
+ desc->lld.mbr_sa = src_addr;
+ desc->lld.mbr_da = dst_addr;
+ desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2
+ | AT_XDMAC_MBR_UBC_NDEN
+ | AT_XDMAC_MBR_UBC_NSEN
+ | ublen;
+ desc->lld.mbr_cfg = chan_cc;
+
+ dev_dbg(chan2dev(chan),
+ "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+ __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc, desc->lld.mbr_cfg);
+
+ /* Chain lld. */
+ if (prev)
+ at_xdmac_queue_desc(chan, prev, desc);
+
+ prev = desc;
+ if (!first)
+ first = desc;
+
+ dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+ __func__, desc, first);
+ list_add_tail(&desc->desc_node, &first->descs_list);
+ }
+
+ first->tx_dma_desc.flags = flags;
+ first->xfer_size = len;
+
+ return &first->tx_dma_desc;
+}
+
+static struct at_xdmac_desc *at_xdmac_memset_create_desc(struct dma_chan *chan,
+ struct at_xdmac_chan *atchan,
+ dma_addr_t dst_addr,
+ size_t len,
+ int value)
+{
+ struct at_xdmac_desc *desc;
+ unsigned long flags;
+ size_t ublen;
+ u32 dwidth;
+ char pattern;
+ /*
+ * WARNING: The channel configuration is set here since there is no
+ * dmaengine_slave_config call in this case. Moreover we don't know the
+ * direction, it involves we can't dynamically set the source and dest
+ * interface so we have to use the same one. Only interface 0 allows EBI
+ * access. Hopefully we can access DDR through both ports (at least on
+ * SAMA5D4x), so we can use the same interface for source and dest,
+ * that solves the fact we don't know the direction.
+ * ERRATA: Even if useless for memory transfers, the PERID has to not
+ * match the one of another channel. If not, it could lead to spurious
+ * flag status.
+ * For SAMA7G5x case, the SIF and DIF fields are no longer used.
+ * Thus, no need to have the SIF/DIF interfaces here.
+ * For SAMA5D4x and SAMA5D2x the SIF and DIF are already configured as
+ * zero.
+ */
+ u32 chan_cc = AT_XDMAC_CC_PERID(0x7f)
+ | AT_XDMAC_CC_DAM_UBS_AM
+ | AT_XDMAC_CC_SAM_INCREMENTED_AM
+ | AT_XDMAC_CC_MBSIZE_SIXTEEN
+ | AT_XDMAC_CC_MEMSET_HW_MODE
+ | AT_XDMAC_CC_TYPE_MEM_TRAN;
+
+ dwidth = at_xdmac_align_width(chan, dst_addr);
+
+ if (len >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) {
+ dev_err(chan2dev(chan),
+ "%s: Transfer too large, aborting...\n",
+ __func__);
+ return NULL;
+ }
+
+ spin_lock_irqsave(&atchan->lock, flags);
+ desc = at_xdmac_get_desc(atchan);
+ spin_unlock_irqrestore(&atchan->lock, flags);
+ if (!desc) {
+ dev_err(chan2dev(chan), "can't get descriptor\n");
+ return NULL;
+ }
+
+ chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+ /* Only the first byte of value is to be used according to dmaengine */
+ pattern = (char)value;
+
+ ublen = len >> dwidth;
+
+ desc->lld.mbr_da = dst_addr;
+ desc->lld.mbr_ds = (pattern << 24) |
+ (pattern << 16) |
+ (pattern << 8) |
+ pattern;
+ desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3
+ | AT_XDMAC_MBR_UBC_NDEN
+ | AT_XDMAC_MBR_UBC_NSEN
+ | ublen;
+ desc->lld.mbr_cfg = chan_cc;
+
+ dev_dbg(chan2dev(chan),
+ "%s: lld: mbr_da=%pad, mbr_ds=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+ __func__, &desc->lld.mbr_da, desc->lld.mbr_ds, desc->lld.mbr_ubc,
+ desc->lld.mbr_cfg);
+
+ return desc;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+ size_t len, unsigned long flags)
+{
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+ struct at_xdmac_desc *desc;
+
+ dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%zu, pattern=0x%x, flags=0x%lx\n",
+ __func__, &dest, len, value, flags);
+
+ if (unlikely(!len))
+ return NULL;
+
+ desc = at_xdmac_memset_create_desc(chan, atchan, dest, len, value);
+ list_add_tail(&desc->desc_node, &desc->descs_list);
+
+ desc->tx_dma_desc.cookie = -EBUSY;
+ desc->tx_dma_desc.flags = flags;
+ desc->xfer_size = len;
+
+ return &desc->tx_dma_desc;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, int value,
+ unsigned long flags)
+{
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+ struct at_xdmac_desc *desc, *pdesc = NULL,
+ *ppdesc = NULL, *first = NULL;
+ struct scatterlist *sg, *psg = NULL, *ppsg = NULL;
+ size_t stride = 0, pstride = 0, len = 0;
+ int i;
+
+ if (!sgl)
+ return NULL;
+
+ dev_dbg(chan2dev(chan), "%s: sg_len=%d, value=0x%x, flags=0x%lx\n",
+ __func__, sg_len, value, flags);
+
+ /* Prepare descriptors. */
+ for_each_sg(sgl, sg, sg_len, i) {
+ dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n",
+ __func__, &sg_dma_address(sg), sg_dma_len(sg),
+ value, flags);
+ desc = at_xdmac_memset_create_desc(chan, atchan,
+ sg_dma_address(sg),
+ sg_dma_len(sg),
+ value);
+ if (!desc && first)
+ list_splice_tail_init(&first->descs_list,
+ &atchan->free_descs_list);
+
+ if (!first)
+ first = desc;
+
+ /* Update our strides */
+ pstride = stride;
+ if (psg)
+ stride = sg_dma_address(sg) -
+ (sg_dma_address(psg) + sg_dma_len(psg));
+
+ /*
+ * The scatterlist API gives us only the address and
+ * length of each elements.
+ *
+ * Unfortunately, we don't have the stride, which we
+ * will need to compute.
+ *
+ * That make us end up in a situation like this one:
+ * len stride len stride len
+ * +-------+ +-------+ +-------+
+ * | N-2 | | N-1 | | N |
+ * +-------+ +-------+ +-------+
+ *
+ * We need all these three elements (N-2, N-1 and N)
+ * to actually take the decision on whether we need to
+ * queue N-1 or reuse N-2.
+ *
+ * We will only consider N if it is the last element.
+ */
+ if (ppdesc && pdesc) {
+ if ((stride == pstride) &&
+ (sg_dma_len(ppsg) == sg_dma_len(psg))) {
+ dev_dbg(chan2dev(chan),
+ "%s: desc 0x%p can be merged with desc 0x%p\n",
+ __func__, pdesc, ppdesc);
+
+ /*
+ * Increment the block count of the
+ * N-2 descriptor
+ */
+ at_xdmac_increment_block_count(chan, ppdesc);
+ ppdesc->lld.mbr_dus = stride;
+
+ /*
+ * Put back the N-1 descriptor in the
+ * free descriptor list
+ */
+ list_add_tail(&pdesc->desc_node,
+ &atchan->free_descs_list);
+
+ /*
+ * Make our N-1 descriptor pointer
+ * point to the N-2 since they were
+ * actually merged.
+ */
+ pdesc = ppdesc;
+
+ /*
+ * Rule out the case where we don't have
+ * pstride computed yet (our second sg
+ * element)
+ *
+ * We also want to catch the case where there
+ * would be a negative stride,
+ */
+ } else if (pstride ||
+ sg_dma_address(sg) < sg_dma_address(psg)) {
+ /*
+ * Queue the N-1 descriptor after the
+ * N-2
+ */
+ at_xdmac_queue_desc(chan, ppdesc, pdesc);
+
+ /*
+ * Add the N-1 descriptor to the list
+ * of the descriptors used for this
+ * transfer
+ */
+ list_add_tail(&desc->desc_node,
+ &first->descs_list);
+ dev_dbg(chan2dev(chan),
+ "%s: add desc 0x%p to descs_list 0x%p\n",
+ __func__, desc, first);
+ }
+ }
+
+ /*
+ * If we are the last element, just see if we have the
+ * same size than the previous element.
+ *
+ * If so, we can merge it with the previous descriptor
+ * since we don't care about the stride anymore.
+ */
+ if ((i == (sg_len - 1)) &&
+ sg_dma_len(psg) == sg_dma_len(sg)) {
+ dev_dbg(chan2dev(chan),
+ "%s: desc 0x%p can be merged with desc 0x%p\n",
+ __func__, desc, pdesc);
+
+ /*
+ * Increment the block count of the N-1
+ * descriptor
+ */
+ at_xdmac_increment_block_count(chan, pdesc);
+ pdesc->lld.mbr_dus = stride;
+
+ /*
+ * Put back the N descriptor in the free
+ * descriptor list
+ */
+ list_add_tail(&desc->desc_node,
+ &atchan->free_descs_list);
+ }
+
+ /* Update our descriptors */
+ ppdesc = pdesc;
+ pdesc = desc;
+
+ /* Update our scatter pointers */
+ ppsg = psg;
+ psg = sg;
+
+ len += sg_dma_len(sg);
+ }
+
+ first->tx_dma_desc.cookie = -EBUSY;
+ first->tx_dma_desc.flags = flags;
+ first->xfer_size = len;
+
+ return &first->tx_dma_desc;
+}
+
+static enum dma_status
+at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+ struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device);
+ struct at_xdmac_desc *desc, *_desc, *iter;
+ struct list_head *descs_list;
+ enum dma_status ret;
+ int residue, retry;
+ u32 cur_nda, check_nda, cur_ubc, mask, value;
+ u8 dwidth = 0;
+ unsigned long flags;
+ bool initd;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE || !txstate)
+ return ret;
+
+ spin_lock_irqsave(&atchan->lock, flags);
+
+ desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, xfer_node);
+
+ /*
+ * If the transfer has not been started yet, don't need to compute the
+ * residue, it's the transfer length.
+ */
+ if (!desc->active_xfer) {
+ dma_set_residue(txstate, desc->xfer_size);
+ goto spin_unlock;
+ }
+
+ residue = desc->xfer_size;
+ /*
+ * Flush FIFO: only relevant when the transfer is source peripheral
+ * synchronized. Flush is needed before reading CUBC because data in
+ * the FIFO are not reported by CUBC. Reporting a residue of the
+ * transfer length while we have data in FIFO can cause issue.
+ * Usecase: atmel USART has a timeout which means I have received
+ * characters but there is no more character received for a while. On
+ * timeout, it requests the residue. If the data are in the DMA FIFO,
+ * we will return a residue of the transfer length. It means no data
+ * received. If an application is waiting for these data, it will hang
+ * since we won't have another USART timeout without receiving new
+ * data.
+ */
+ mask = AT_XDMAC_CC_TYPE | AT_XDMAC_CC_DSYNC;
+ value = AT_XDMAC_CC_TYPE_PER_TRAN | AT_XDMAC_CC_DSYNC_PER2MEM;
+ if ((desc->lld.mbr_cfg & mask) == value) {
+ at_xdmac_write(atxdmac, atxdmac->layout->gswf, atchan->mask);
+ while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS))
+ cpu_relax();
+ }
+
+ /*
+ * The easiest way to compute the residue should be to pause the DMA
+ * but doing this can lead to miss some data as some devices don't
+ * have FIFO.
+ * We need to read several registers because:
+ * - DMA is running therefore a descriptor change is possible while
+ * reading these registers
+ * - When the block transfer is done, the value of the CUBC register
+ * is set to its initial value until the fetch of the next descriptor.
+ * This value will corrupt the residue calculation so we have to skip
+ * it.
+ *
+ * INITD -------- ------------
+ * |____________________|
+ * _______________________ _______________
+ * NDA @desc2 \/ @desc3
+ * _______________________/\_______________
+ * __________ ___________ _______________
+ * CUBC 0 \/ MAX desc1 \/ MAX desc2
+ * __________/\___________/\_______________
+ *
+ * Since descriptors are aligned on 64 bits, we can assume that
+ * the update of NDA and CUBC is atomic.
+ * Memory barriers are used to ensure the read order of the registers.
+ * A max number of retries is set because unlikely it could never ends.
+ */
+ for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) {
+ check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+ rmb();
+ cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
+ rmb();
+ initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD);
+ rmb();
+ cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+ rmb();
+
+ if ((check_nda == cur_nda) && initd)
+ break;
+ }
+
+ if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) {
+ ret = DMA_ERROR;
+ goto spin_unlock;
+ }
+
+ /*
+ * Flush FIFO: only relevant when the transfer is source peripheral
+ * synchronized. Another flush is needed here because CUBC is updated
+ * when the controller sends the data write command. It can lead to
+ * report data that are not written in the memory or the device. The
+ * FIFO flush ensures that data are really written.
+ */
+ if ((desc->lld.mbr_cfg & mask) == value) {
+ at_xdmac_write(atxdmac, atxdmac->layout->gswf, atchan->mask);
+ while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS))
+ cpu_relax();
+ }
+
+ /*
+ * Remove size of all microblocks already transferred and the current
+ * one. Then add the remaining size to transfer of the current
+ * microblock.
+ */
+ descs_list = &desc->descs_list;
+ list_for_each_entry_safe(iter, _desc, descs_list, desc_node) {
+ dwidth = at_xdmac_get_dwidth(iter->lld.mbr_cfg);
+ residue -= (iter->lld.mbr_ubc & 0xffffff) << dwidth;
+ if ((iter->lld.mbr_nda & 0xfffffffc) == cur_nda) {
+ desc = iter;
+ break;
+ }
+ }
+ residue += cur_ubc << dwidth;
+
+ dma_set_residue(txstate, residue);
+
+ dev_dbg(chan2dev(chan),
+ "%s: desc=0x%p, tx_dma_desc.phys=%pad, tx_status=%d, cookie=%d, residue=%d\n",
+ __func__, desc, &desc->tx_dma_desc.phys, ret, cookie, residue);
+
+spin_unlock:
+ spin_unlock_irqrestore(&atchan->lock, flags);
+ return ret;
+}
+
+static void at_xdmac_advance_work(struct at_xdmac_chan *atchan)
+{
+ struct at_xdmac_desc *desc;
+
+ /*
+ * If channel is enabled, do nothing, advance_work will be triggered
+ * after the interruption.
+ */
+ if (at_xdmac_chan_is_enabled(atchan) || list_empty(&atchan->xfers_list))
+ return;
+
+ desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc,
+ xfer_node);
+ dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
+ if (!desc->active_xfer)
+ at_xdmac_start_xfer(atchan, desc);
+}
+
+static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan)
+{
+ struct at_xdmac_desc *desc;
+ struct dma_async_tx_descriptor *txd;
+
+ spin_lock_irq(&atchan->lock);
+ dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n",
+ __func__, atchan->irq_status);
+ if (list_empty(&atchan->xfers_list)) {
+ spin_unlock_irq(&atchan->lock);
+ return;
+ }
+ desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc,
+ xfer_node);
+ spin_unlock_irq(&atchan->lock);
+ txd = &desc->tx_dma_desc;
+ if (txd->flags & DMA_PREP_INTERRUPT)
+ dmaengine_desc_get_callback_invoke(txd, NULL);
+}
+
+/* Called with atchan->lock held. */
+static void at_xdmac_handle_error(struct at_xdmac_chan *atchan)
+{
+ struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device);
+ struct at_xdmac_desc *bad_desc;
+
+ /*
+ * The descriptor currently at the head of the active list is
+ * broken. Since we don't have any way to report errors, we'll
+ * just have to scream loudly and try to continue with other
+ * descriptors queued (if any).
+ */
+ if (atchan->irq_status & AT_XDMAC_CIS_RBEIS)
+ dev_err(chan2dev(&atchan->chan), "read bus error!!!");
+ if (atchan->irq_status & AT_XDMAC_CIS_WBEIS)
+ dev_err(chan2dev(&atchan->chan), "write bus error!!!");
+ if (atchan->irq_status & AT_XDMAC_CIS_ROIS)
+ dev_err(chan2dev(&atchan->chan), "request overflow error!!!");
+
+ /* Channel must be disabled first as it's not done automatically */
+ at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
+ while (at_xdmac_read(atxdmac, AT_XDMAC_GS) & atchan->mask)
+ cpu_relax();
+
+ bad_desc = list_first_entry(&atchan->xfers_list,
+ struct at_xdmac_desc,
+ xfer_node);
+
+ /* Print bad descriptor's details if needed */
+ dev_dbg(chan2dev(&atchan->chan),
+ "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
+ __func__, &bad_desc->lld.mbr_sa, &bad_desc->lld.mbr_da,
+ bad_desc->lld.mbr_ubc);
+
+ /* Then continue with usual descriptor management */
+}
+
+static void at_xdmac_tasklet(struct tasklet_struct *t)
+{
+ struct at_xdmac_chan *atchan = from_tasklet(atchan, t, tasklet);
+ struct at_xdmac_desc *desc;
+ struct dma_async_tx_descriptor *txd;
+ u32 error_mask;
+
+ if (at_xdmac_chan_is_cyclic(atchan))
+ return at_xdmac_handle_cyclic(atchan);
+
+ error_mask = AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS |
+ AT_XDMAC_CIS_ROIS;
+
+ spin_lock_irq(&atchan->lock);
+
+ dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n",
+ __func__, atchan->irq_status);
+
+ if (!(atchan->irq_status & AT_XDMAC_CIS_LIS) &&
+ !(atchan->irq_status & error_mask)) {
+ spin_unlock_irq(&atchan->lock);
+ return;
+ }
+
+ if (atchan->irq_status & error_mask)
+ at_xdmac_handle_error(atchan);
+
+ desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc,
+ xfer_node);
+ dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
+ if (!desc->active_xfer) {
+ dev_err(chan2dev(&atchan->chan), "Xfer not active: exiting");
+ spin_unlock_irq(&atchan->lock);
+ return;
+ }
+
+ txd = &desc->tx_dma_desc;
+ dma_cookie_complete(txd);
+ /* Remove the transfer from the transfer list. */
+ list_del(&desc->xfer_node);
+ spin_unlock_irq(&atchan->lock);
+
+ if (txd->flags & DMA_PREP_INTERRUPT)
+ dmaengine_desc_get_callback_invoke(txd, NULL);
+
+ dma_run_dependencies(txd);
+
+ spin_lock_irq(&atchan->lock);
+ /* Move the xfer descriptors into the free descriptors list. */
+ list_splice_tail_init(&desc->descs_list, &atchan->free_descs_list);
+ at_xdmac_advance_work(atchan);
+ spin_unlock_irq(&atchan->lock);
+}
+
+static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id)
+{
+ struct at_xdmac *atxdmac = (struct at_xdmac *)dev_id;
+ struct at_xdmac_chan *atchan;
+ u32 imr, status, pending;
+ u32 chan_imr, chan_status;
+ int i, ret = IRQ_NONE;
+
+ do {
+ imr = at_xdmac_read(atxdmac, AT_XDMAC_GIM);
+ status = at_xdmac_read(atxdmac, AT_XDMAC_GIS);
+ pending = status & imr;
+
+ dev_vdbg(atxdmac->dma.dev,
+ "%s: status=0x%08x, imr=0x%08x, pending=0x%08x\n",
+ __func__, status, imr, pending);
+
+ if (!pending)
+ break;
+
+ /* We have to find which channel has generated the interrupt. */
+ for (i = 0; i < atxdmac->dma.chancnt; i++) {
+ if (!((1 << i) & pending))
+ continue;
+
+ atchan = &atxdmac->chan[i];
+ chan_imr = at_xdmac_chan_read(atchan, AT_XDMAC_CIM);
+ chan_status = at_xdmac_chan_read(atchan, AT_XDMAC_CIS);
+ atchan->irq_status = chan_status & chan_imr;
+ dev_vdbg(atxdmac->dma.dev,
+ "%s: chan%d: imr=0x%x, status=0x%x\n",
+ __func__, i, chan_imr, chan_status);
+ dev_vdbg(chan2dev(&atchan->chan),
+ "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
+ __func__,
+ at_xdmac_chan_read(atchan, AT_XDMAC_CC),
+ at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
+ at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
+ at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
+ at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
+ at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
+
+ if (atchan->irq_status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS))
+ at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
+
+ tasklet_schedule(&atchan->tasklet);
+ ret = IRQ_HANDLED;
+ }
+
+ } while (pending);
+
+ return ret;
+}
+
+static void at_xdmac_issue_pending(struct dma_chan *chan)
+{
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+ unsigned long flags;
+
+ dev_dbg(chan2dev(&atchan->chan), "%s\n", __func__);
+
+ spin_lock_irqsave(&atchan->lock, flags);
+ at_xdmac_advance_work(atchan);
+ spin_unlock_irqrestore(&atchan->lock, flags);
+
+ return;
+}
+
+static int at_xdmac_device_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+ int ret;
+ unsigned long flags;
+
+ dev_dbg(chan2dev(chan), "%s\n", __func__);
+
+ spin_lock_irqsave(&atchan->lock, flags);
+ ret = at_xdmac_set_slave_config(chan, config);
+ spin_unlock_irqrestore(&atchan->lock, flags);
+
+ return ret;
+}
+
+static int at_xdmac_device_pause(struct dma_chan *chan)
+{
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+ struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device);
+ unsigned long flags;
+
+ dev_dbg(chan2dev(chan), "%s\n", __func__);
+
+ if (test_and_set_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status))
+ return 0;
+
+ spin_lock_irqsave(&atchan->lock, flags);
+ at_xdmac_write(atxdmac, atxdmac->layout->grws, atchan->mask);
+ while (at_xdmac_chan_read(atchan, AT_XDMAC_CC)
+ & (AT_XDMAC_CC_WRIP | AT_XDMAC_CC_RDIP))
+ cpu_relax();
+ spin_unlock_irqrestore(&atchan->lock, flags);
+
+ return 0;
+}
+
+static int at_xdmac_device_resume(struct dma_chan *chan)
+{
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+ struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device);
+ unsigned long flags;
+
+ dev_dbg(chan2dev(chan), "%s\n", __func__);
+
+ spin_lock_irqsave(&atchan->lock, flags);
+ if (!at_xdmac_chan_is_paused(atchan)) {
+ spin_unlock_irqrestore(&atchan->lock, flags);
+ return 0;
+ }
+
+ at_xdmac_write(atxdmac, atxdmac->layout->grwr, atchan->mask);
+ clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
+ spin_unlock_irqrestore(&atchan->lock, flags);
+
+ return 0;
+}
+
+static int at_xdmac_device_terminate_all(struct dma_chan *chan)
+{
+ struct at_xdmac_desc *desc, *_desc;
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+ struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device);
+ unsigned long flags;
+
+ dev_dbg(chan2dev(chan), "%s\n", __func__);
+
+ spin_lock_irqsave(&atchan->lock, flags);
+ at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
+ while (at_xdmac_read(atxdmac, AT_XDMAC_GS) & atchan->mask)
+ cpu_relax();
+
+ /* Cancel all pending transfers. */
+ list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) {
+ list_del(&desc->xfer_node);
+ list_splice_tail_init(&desc->descs_list,
+ &atchan->free_descs_list);
+ }
+
+ clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
+ clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
+ spin_unlock_irqrestore(&atchan->lock, flags);
+
+ return 0;
+}
+
+static int at_xdmac_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+ struct at_xdmac_desc *desc;
+ int i;
+
+ if (at_xdmac_chan_is_enabled(atchan)) {
+ dev_err(chan2dev(chan),
+ "can't allocate channel resources (channel enabled)\n");
+ return -EIO;
+ }
+
+ if (!list_empty(&atchan->free_descs_list)) {
+ dev_err(chan2dev(chan),
+ "can't allocate channel resources (channel not free from a previous use)\n");
+ return -EIO;
+ }
+
+ for (i = 0; i < init_nr_desc_per_channel; i++) {
+ desc = at_xdmac_alloc_desc(chan, GFP_KERNEL);
+ if (!desc) {
+ if (i == 0) {
+ dev_warn(chan2dev(chan),
+ "can't allocate any descriptors\n");
+ return -EIO;
+ }
+ dev_warn(chan2dev(chan),
+ "only %d descriptors have been allocated\n", i);
+ break;
+ }
+ list_add_tail(&desc->desc_node, &atchan->free_descs_list);
+ }
+
+ dma_cookie_init(chan);
+
+ dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i);
+
+ return i;
+}
+
+static void at_xdmac_free_chan_resources(struct dma_chan *chan)
+{
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+ struct at_xdmac *atxdmac = to_at_xdmac(chan->device);
+ struct at_xdmac_desc *desc, *_desc;
+
+ list_for_each_entry_safe(desc, _desc, &atchan->free_descs_list, desc_node) {
+ dev_dbg(chan2dev(chan), "%s: freeing descriptor %p\n", __func__, desc);
+ list_del(&desc->desc_node);
+ dma_pool_free(atxdmac->at_xdmac_desc_pool, desc, desc->tx_dma_desc.phys);
+ }
+
+ return;
+}
+
+static void at_xdmac_axi_config(struct platform_device *pdev)
+{
+ struct at_xdmac *atxdmac = (struct at_xdmac *)platform_get_drvdata(pdev);
+ bool dev_m2m = false;
+ u32 dma_requests;
+
+ if (!atxdmac->layout->axi_config)
+ return; /* Not supported */
+
+ if (!of_property_read_u32(pdev->dev.of_node, "dma-requests",
+ &dma_requests)) {
+ dev_info(&pdev->dev, "controller in mem2mem mode.\n");
+ dev_m2m = true;
+ }
+
+ if (dev_m2m) {
+ at_xdmac_write(atxdmac, AT_XDMAC_GCFG, AT_XDMAC_GCFG_M2M);
+ at_xdmac_write(atxdmac, AT_XDMAC_GWAC, AT_XDMAC_GWAC_M2M);
+ } else {
+ at_xdmac_write(atxdmac, AT_XDMAC_GCFG, AT_XDMAC_GCFG_P2M);
+ at_xdmac_write(atxdmac, AT_XDMAC_GWAC, AT_XDMAC_GWAC_P2M);
+ }
+}
+
+static int __maybe_unused atmel_xdmac_prepare(struct device *dev)
+{
+ struct at_xdmac *atxdmac = dev_get_drvdata(dev);
+ struct dma_chan *chan, *_chan;
+
+ list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+
+ /* Wait for transfer completion, except in cyclic case. */
+ if (at_xdmac_chan_is_enabled(atchan) && !at_xdmac_chan_is_cyclic(atchan))
+ return -EAGAIN;
+ }
+ return 0;
+}
+
+static int __maybe_unused atmel_xdmac_suspend(struct device *dev)
+{
+ struct at_xdmac *atxdmac = dev_get_drvdata(dev);
+ struct dma_chan *chan, *_chan;
+
+ list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
+ struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+
+ atchan->save_cc = at_xdmac_chan_read(atchan, AT_XDMAC_CC);
+ if (at_xdmac_chan_is_cyclic(atchan)) {
+ if (!at_xdmac_chan_is_paused(atchan))
+ at_xdmac_device_pause(chan);
+ atchan->save_cim = at_xdmac_chan_read(atchan, AT_XDMAC_CIM);
+ atchan->save_cnda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA);
+ atchan->save_cndc = at_xdmac_chan_read(atchan, AT_XDMAC_CNDC);
+ }
+ }
+ atxdmac->save_gim = at_xdmac_read(atxdmac, AT_XDMAC_GIM);
+ atxdmac->save_gs = at_xdmac_read(atxdmac, AT_XDMAC_GS);
+
+ at_xdmac_off(atxdmac);
+ clk_disable_unprepare(atxdmac->clk);
+ return 0;
+}
+
+static int __maybe_unused atmel_xdmac_resume(struct device *dev)
+{
+ struct at_xdmac *atxdmac = dev_get_drvdata(dev);
+ struct at_xdmac_chan *atchan;
+ struct dma_chan *chan, *_chan;
+ struct platform_device *pdev = container_of(dev, struct platform_device, dev);
+ int i;
+ int ret;
+
+ ret = clk_prepare_enable(atxdmac->clk);
+ if (ret)
+ return ret;
+
+ at_xdmac_axi_config(pdev);
+
+ /* Clear pending interrupts. */
+ for (i = 0; i < atxdmac->dma.chancnt; i++) {
+ atchan = &atxdmac->chan[i];
+ while (at_xdmac_chan_read(atchan, AT_XDMAC_CIS))
+ cpu_relax();
+ }
+
+ at_xdmac_write(atxdmac, AT_XDMAC_GIE, atxdmac->save_gim);
+ list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
+ atchan = to_at_xdmac_chan(chan);
+ at_xdmac_chan_write(atchan, AT_XDMAC_CC, atchan->save_cc);
+ if (at_xdmac_chan_is_cyclic(atchan)) {
+ if (at_xdmac_chan_is_paused(atchan))
+ at_xdmac_device_resume(chan);
+ at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda);
+ at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc);
+ at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim);
+ wmb();
+ if (atxdmac->save_gs & atchan->mask)
+ at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask);
+ }
+ }
+ return 0;
+}
+
+static int at_xdmac_probe(struct platform_device *pdev)
+{
+ struct at_xdmac *atxdmac;
+ int irq, nr_channels, i, ret;
+ void __iomem *base;
+ u32 reg;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ /*
+ * Read number of xdmac channels, read helper function can't be used
+ * since atxdmac is not yet allocated and we need to know the number
+ * of channels to do the allocation.
+ */
+ reg = readl_relaxed(base + AT_XDMAC_GTYPE);
+ nr_channels = AT_XDMAC_NB_CH(reg);
+ if (nr_channels > AT_XDMAC_MAX_CHAN) {
+ dev_err(&pdev->dev, "invalid number of channels (%u)\n",
+ nr_channels);
+ return -EINVAL;
+ }
+
+ atxdmac = devm_kzalloc(&pdev->dev,
+ struct_size(atxdmac, chan, nr_channels),
+ GFP_KERNEL);
+ if (!atxdmac) {
+ dev_err(&pdev->dev, "can't allocate at_xdmac structure\n");
+ return -ENOMEM;
+ }
+
+ atxdmac->regs = base;
+ atxdmac->irq = irq;
+
+ atxdmac->layout = of_device_get_match_data(&pdev->dev);
+ if (!atxdmac->layout)
+ return -ENODEV;
+
+ atxdmac->clk = devm_clk_get(&pdev->dev, "dma_clk");
+ if (IS_ERR(atxdmac->clk)) {
+ dev_err(&pdev->dev, "can't get dma_clk\n");
+ return PTR_ERR(atxdmac->clk);
+ }
+
+ /* Do not use dev res to prevent races with tasklet */
+ ret = request_irq(atxdmac->irq, at_xdmac_interrupt, 0, "at_xdmac", atxdmac);
+ if (ret) {
+ dev_err(&pdev->dev, "can't request irq\n");
+ return ret;
+ }
+
+ ret = clk_prepare_enable(atxdmac->clk);
+ if (ret) {
+ dev_err(&pdev->dev, "can't prepare or enable clock\n");
+ goto err_free_irq;
+ }
+
+ atxdmac->at_xdmac_desc_pool =
+ dmam_pool_create(dev_name(&pdev->dev), &pdev->dev,
+ sizeof(struct at_xdmac_desc), 4, 0);
+ if (!atxdmac->at_xdmac_desc_pool) {
+ dev_err(&pdev->dev, "no memory for descriptors dma pool\n");
+ ret = -ENOMEM;
+ goto err_clk_disable;
+ }
+
+ dma_cap_set(DMA_CYCLIC, atxdmac->dma.cap_mask);
+ dma_cap_set(DMA_INTERLEAVE, atxdmac->dma.cap_mask);
+ dma_cap_set(DMA_MEMCPY, atxdmac->dma.cap_mask);
+ dma_cap_set(DMA_MEMSET, atxdmac->dma.cap_mask);
+ dma_cap_set(DMA_MEMSET_SG, atxdmac->dma.cap_mask);
+ dma_cap_set(DMA_SLAVE, atxdmac->dma.cap_mask);
+ /*
+ * Without DMA_PRIVATE the driver is not able to allocate more than
+ * one channel, second allocation fails in private_candidate.
+ */
+ dma_cap_set(DMA_PRIVATE, atxdmac->dma.cap_mask);
+ atxdmac->dma.dev = &pdev->dev;
+ atxdmac->dma.device_alloc_chan_resources = at_xdmac_alloc_chan_resources;
+ atxdmac->dma.device_free_chan_resources = at_xdmac_free_chan_resources;
+ atxdmac->dma.device_tx_status = at_xdmac_tx_status;
+ atxdmac->dma.device_issue_pending = at_xdmac_issue_pending;
+ atxdmac->dma.device_prep_dma_cyclic = at_xdmac_prep_dma_cyclic;
+ atxdmac->dma.device_prep_interleaved_dma = at_xdmac_prep_interleaved;
+ atxdmac->dma.device_prep_dma_memcpy = at_xdmac_prep_dma_memcpy;
+ atxdmac->dma.device_prep_dma_memset = at_xdmac_prep_dma_memset;
+ atxdmac->dma.device_prep_dma_memset_sg = at_xdmac_prep_dma_memset_sg;
+ atxdmac->dma.device_prep_slave_sg = at_xdmac_prep_slave_sg;
+ atxdmac->dma.device_config = at_xdmac_device_config;
+ atxdmac->dma.device_pause = at_xdmac_device_pause;
+ atxdmac->dma.device_resume = at_xdmac_device_resume;
+ atxdmac->dma.device_terminate_all = at_xdmac_device_terminate_all;
+ atxdmac->dma.src_addr_widths = AT_XDMAC_DMA_BUSWIDTHS;
+ atxdmac->dma.dst_addr_widths = AT_XDMAC_DMA_BUSWIDTHS;
+ atxdmac->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ atxdmac->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+ /* Disable all chans and interrupts. */
+ at_xdmac_off(atxdmac);
+
+ /* Init channels. */
+ INIT_LIST_HEAD(&atxdmac->dma.channels);
+ for (i = 0; i < nr_channels; i++) {
+ struct at_xdmac_chan *atchan = &atxdmac->chan[i];
+
+ atchan->chan.device = &atxdmac->dma;
+ list_add_tail(&atchan->chan.device_node,
+ &atxdmac->dma.channels);
+
+ atchan->ch_regs = at_xdmac_chan_reg_base(atxdmac, i);
+ atchan->mask = 1 << i;
+
+ spin_lock_init(&atchan->lock);
+ INIT_LIST_HEAD(&atchan->xfers_list);
+ INIT_LIST_HEAD(&atchan->free_descs_list);
+ tasklet_setup(&atchan->tasklet, at_xdmac_tasklet);
+
+ /* Clear pending interrupts. */
+ while (at_xdmac_chan_read(atchan, AT_XDMAC_CIS))
+ cpu_relax();
+ }
+ platform_set_drvdata(pdev, atxdmac);
+
+ ret = dma_async_device_register(&atxdmac->dma);
+ if (ret) {
+ dev_err(&pdev->dev, "fail to register DMA engine device\n");
+ goto err_clk_disable;
+ }
+
+ ret = of_dma_controller_register(pdev->dev.of_node,
+ at_xdmac_xlate, atxdmac);
+ if (ret) {
+ dev_err(&pdev->dev, "could not register of dma controller\n");
+ goto err_dma_unregister;
+ }
+
+ dev_info(&pdev->dev, "%d channels, mapped at 0x%p\n",
+ nr_channels, atxdmac->regs);
+
+ at_xdmac_axi_config(pdev);
+
+ return 0;
+
+err_dma_unregister:
+ dma_async_device_unregister(&atxdmac->dma);
+err_clk_disable:
+ clk_disable_unprepare(atxdmac->clk);
+err_free_irq:
+ free_irq(atxdmac->irq, atxdmac);
+ return ret;
+}
+
+static int at_xdmac_remove(struct platform_device *pdev)
+{
+ struct at_xdmac *atxdmac = (struct at_xdmac *)platform_get_drvdata(pdev);
+ int i;
+
+ at_xdmac_off(atxdmac);
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&atxdmac->dma);
+ clk_disable_unprepare(atxdmac->clk);
+
+ free_irq(atxdmac->irq, atxdmac);
+
+ for (i = 0; i < atxdmac->dma.chancnt; i++) {
+ struct at_xdmac_chan *atchan = &atxdmac->chan[i];
+
+ tasklet_kill(&atchan->tasklet);
+ at_xdmac_free_chan_resources(&atchan->chan);
+ }
+
+ return 0;
+}
+
+static const struct dev_pm_ops __maybe_unused atmel_xdmac_dev_pm_ops = {
+ .prepare = atmel_xdmac_prepare,
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(atmel_xdmac_suspend, atmel_xdmac_resume)
+};
+
+static const struct of_device_id atmel_xdmac_dt_ids[] = {
+ {
+ .compatible = "atmel,sama5d4-dma",
+ .data = &at_xdmac_sama5d4_layout,
+ }, {
+ .compatible = "microchip,sama7g5-dma",
+ .data = &at_xdmac_sama7g5_layout,
+ }, {
+ /* sentinel */
+ }
+};
+MODULE_DEVICE_TABLE(of, atmel_xdmac_dt_ids);
+
+static struct platform_driver at_xdmac_driver = {
+ .probe = at_xdmac_probe,
+ .remove = at_xdmac_remove,
+ .driver = {
+ .name = "at_xdmac",
+ .of_match_table = of_match_ptr(atmel_xdmac_dt_ids),
+ .pm = pm_ptr(&atmel_xdmac_dev_pm_ops),
+ }
+};
+
+static int __init at_xdmac_init(void)
+{
+ return platform_driver_register(&at_xdmac_driver);
+}
+subsys_initcall(at_xdmac_init);
+
+static void __exit at_xdmac_exit(void)
+{
+ platform_driver_unregister(&at_xdmac_driver);
+}
+module_exit(at_xdmac_exit);
+
+MODULE_DESCRIPTION("Atmel Extended DMA Controller driver");
+MODULE_AUTHOR("Ludovic Desroches <ludovic.desroches@atmel.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
new file mode 100644
index 000000000..064761289
--- /dev/null
+++ b/drivers/dma/bcm-sba-raid.c
@@ -0,0 +1,1769 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2017 Broadcom
+
+/*
+ * Broadcom SBA RAID Driver
+ *
+ * The Broadcom stream buffer accelerator (SBA) provides offloading
+ * capabilities for RAID operations. The SBA offload engine is accessible
+ * via Broadcom SoC specific ring manager. Two or more offload engines
+ * can share same Broadcom SoC specific ring manager due to this Broadcom
+ * SoC specific ring manager driver is implemented as a mailbox controller
+ * driver and offload engine drivers are implemented as mallbox clients.
+ *
+ * Typically, Broadcom SoC specific ring manager will implement larger
+ * number of hardware rings over one or more SBA hardware devices. By
+ * design, the internal buffer size of SBA hardware device is limited
+ * but all offload operations supported by SBA can be broken down into
+ * multiple small size requests and executed parallely on multiple SBA
+ * hardware devices for achieving high through-put.
+ *
+ * The Broadcom SBA RAID driver does not require any register programming
+ * except submitting request to SBA hardware device via mailbox channels.
+ * This driver implements a DMA device with one DMA channel using a single
+ * mailbox channel provided by Broadcom SoC specific ring manager driver.
+ * For having more SBA DMA channels, we can create more SBA device nodes
+ * in Broadcom SoC specific DTS based on number of hardware rings supported
+ * by Broadcom SoC ring manager.
+ */
+
+#include <linux/bitops.h>
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/list.h>
+#include <linux/mailbox_client.h>
+#include <linux/mailbox/brcm-message.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/slab.h>
+#include <linux/raid/pq.h>
+
+#include "dmaengine.h"
+
+/* ====== Driver macros and defines ===== */
+
+#define SBA_TYPE_SHIFT 48
+#define SBA_TYPE_MASK GENMASK(1, 0)
+#define SBA_TYPE_A 0x0
+#define SBA_TYPE_B 0x2
+#define SBA_TYPE_C 0x3
+#define SBA_USER_DEF_SHIFT 32
+#define SBA_USER_DEF_MASK GENMASK(15, 0)
+#define SBA_R_MDATA_SHIFT 24
+#define SBA_R_MDATA_MASK GENMASK(7, 0)
+#define SBA_C_MDATA_MS_SHIFT 18
+#define SBA_C_MDATA_MS_MASK GENMASK(1, 0)
+#define SBA_INT_SHIFT 17
+#define SBA_INT_MASK BIT(0)
+#define SBA_RESP_SHIFT 16
+#define SBA_RESP_MASK BIT(0)
+#define SBA_C_MDATA_SHIFT 8
+#define SBA_C_MDATA_MASK GENMASK(7, 0)
+#define SBA_C_MDATA_BNUMx_SHIFT(__bnum) (2 * (__bnum))
+#define SBA_C_MDATA_BNUMx_MASK GENMASK(1, 0)
+#define SBA_C_MDATA_DNUM_SHIFT 5
+#define SBA_C_MDATA_DNUM_MASK GENMASK(4, 0)
+#define SBA_C_MDATA_LS(__v) ((__v) & 0xff)
+#define SBA_C_MDATA_MS(__v) (((__v) >> 8) & 0x3)
+#define SBA_CMD_SHIFT 0
+#define SBA_CMD_MASK GENMASK(3, 0)
+#define SBA_CMD_ZERO_BUFFER 0x4
+#define SBA_CMD_ZERO_ALL_BUFFERS 0x8
+#define SBA_CMD_LOAD_BUFFER 0x9
+#define SBA_CMD_XOR 0xa
+#define SBA_CMD_GALOIS_XOR 0xb
+#define SBA_CMD_WRITE_BUFFER 0xc
+#define SBA_CMD_GALOIS 0xe
+
+#define SBA_MAX_REQ_PER_MBOX_CHANNEL 8192
+#define SBA_MAX_MSG_SEND_PER_MBOX_CHANNEL 8
+
+/* Driver helper macros */
+#define to_sba_request(tx) \
+ container_of(tx, struct sba_request, tx)
+#define to_sba_device(dchan) \
+ container_of(dchan, struct sba_device, dma_chan)
+
+/* ===== Driver data structures ===== */
+
+enum sba_request_flags {
+ SBA_REQUEST_STATE_FREE = 0x001,
+ SBA_REQUEST_STATE_ALLOCED = 0x002,
+ SBA_REQUEST_STATE_PENDING = 0x004,
+ SBA_REQUEST_STATE_ACTIVE = 0x008,
+ SBA_REQUEST_STATE_ABORTED = 0x010,
+ SBA_REQUEST_STATE_MASK = 0x0ff,
+ SBA_REQUEST_FENCE = 0x100,
+};
+
+struct sba_request {
+ /* Global state */
+ struct list_head node;
+ struct sba_device *sba;
+ u32 flags;
+ /* Chained requests management */
+ struct sba_request *first;
+ struct list_head next;
+ atomic_t next_pending_count;
+ /* BRCM message data */
+ struct brcm_message msg;
+ struct dma_async_tx_descriptor tx;
+ /* SBA commands */
+ struct brcm_sba_command cmds[];
+};
+
+enum sba_version {
+ SBA_VER_1 = 0,
+ SBA_VER_2
+};
+
+struct sba_device {
+ /* Underlying device */
+ struct device *dev;
+ /* DT configuration parameters */
+ enum sba_version ver;
+ /* Derived configuration parameters */
+ u32 max_req;
+ u32 hw_buf_size;
+ u32 hw_resp_size;
+ u32 max_pq_coefs;
+ u32 max_pq_srcs;
+ u32 max_cmd_per_req;
+ u32 max_xor_srcs;
+ u32 max_resp_pool_size;
+ u32 max_cmds_pool_size;
+ /* Maibox client and Mailbox channels */
+ struct mbox_client client;
+ struct mbox_chan *mchan;
+ struct device *mbox_dev;
+ /* DMA device and DMA channel */
+ struct dma_device dma_dev;
+ struct dma_chan dma_chan;
+ /* DMA channel resources */
+ void *resp_base;
+ dma_addr_t resp_dma_base;
+ void *cmds_base;
+ dma_addr_t cmds_dma_base;
+ spinlock_t reqs_lock;
+ bool reqs_fence;
+ struct list_head reqs_alloc_list;
+ struct list_head reqs_pending_list;
+ struct list_head reqs_active_list;
+ struct list_head reqs_aborted_list;
+ struct list_head reqs_free_list;
+ /* DebugFS directory entries */
+ struct dentry *root;
+};
+
+/* ====== Command helper routines ===== */
+
+static inline u64 __pure sba_cmd_enc(u64 cmd, u32 val, u32 shift, u32 mask)
+{
+ cmd &= ~((u64)mask << shift);
+ cmd |= ((u64)(val & mask) << shift);
+ return cmd;
+}
+
+static inline u32 __pure sba_cmd_load_c_mdata(u32 b0)
+{
+ return b0 & SBA_C_MDATA_BNUMx_MASK;
+}
+
+static inline u32 __pure sba_cmd_write_c_mdata(u32 b0)
+{
+ return b0 & SBA_C_MDATA_BNUMx_MASK;
+}
+
+static inline u32 __pure sba_cmd_xor_c_mdata(u32 b1, u32 b0)
+{
+ return (b0 & SBA_C_MDATA_BNUMx_MASK) |
+ ((b1 & SBA_C_MDATA_BNUMx_MASK) << SBA_C_MDATA_BNUMx_SHIFT(1));
+}
+
+static inline u32 __pure sba_cmd_pq_c_mdata(u32 d, u32 b1, u32 b0)
+{
+ return (b0 & SBA_C_MDATA_BNUMx_MASK) |
+ ((b1 & SBA_C_MDATA_BNUMx_MASK) << SBA_C_MDATA_BNUMx_SHIFT(1)) |
+ ((d & SBA_C_MDATA_DNUM_MASK) << SBA_C_MDATA_DNUM_SHIFT);
+}
+
+/* ====== General helper routines ===== */
+
+static struct sba_request *sba_alloc_request(struct sba_device *sba)
+{
+ bool found = false;
+ unsigned long flags;
+ struct sba_request *req = NULL;
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+ list_for_each_entry(req, &sba->reqs_free_list, node) {
+ if (async_tx_test_ack(&req->tx)) {
+ list_move_tail(&req->node, &sba->reqs_alloc_list);
+ found = true;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+
+ if (!found) {
+ /*
+ * We have no more free requests so, we peek
+ * mailbox channels hoping few active requests
+ * would have completed which will create more
+ * room for new requests.
+ */
+ mbox_client_peek_data(sba->mchan);
+ return NULL;
+ }
+
+ req->flags = SBA_REQUEST_STATE_ALLOCED;
+ req->first = req;
+ INIT_LIST_HEAD(&req->next);
+ atomic_set(&req->next_pending_count, 1);
+
+ dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
+ async_tx_ack(&req->tx);
+
+ return req;
+}
+
+/* Note: Must be called with sba->reqs_lock held */
+static void _sba_pending_request(struct sba_device *sba,
+ struct sba_request *req)
+{
+ lockdep_assert_held(&sba->reqs_lock);
+ req->flags &= ~SBA_REQUEST_STATE_MASK;
+ req->flags |= SBA_REQUEST_STATE_PENDING;
+ list_move_tail(&req->node, &sba->reqs_pending_list);
+ if (list_empty(&sba->reqs_active_list))
+ sba->reqs_fence = false;
+}
+
+/* Note: Must be called with sba->reqs_lock held */
+static bool _sba_active_request(struct sba_device *sba,
+ struct sba_request *req)
+{
+ lockdep_assert_held(&sba->reqs_lock);
+ if (list_empty(&sba->reqs_active_list))
+ sba->reqs_fence = false;
+ if (sba->reqs_fence)
+ return false;
+ req->flags &= ~SBA_REQUEST_STATE_MASK;
+ req->flags |= SBA_REQUEST_STATE_ACTIVE;
+ list_move_tail(&req->node, &sba->reqs_active_list);
+ if (req->flags & SBA_REQUEST_FENCE)
+ sba->reqs_fence = true;
+ return true;
+}
+
+/* Note: Must be called with sba->reqs_lock held */
+static void _sba_abort_request(struct sba_device *sba,
+ struct sba_request *req)
+{
+ lockdep_assert_held(&sba->reqs_lock);
+ req->flags &= ~SBA_REQUEST_STATE_MASK;
+ req->flags |= SBA_REQUEST_STATE_ABORTED;
+ list_move_tail(&req->node, &sba->reqs_aborted_list);
+ if (list_empty(&sba->reqs_active_list))
+ sba->reqs_fence = false;
+}
+
+/* Note: Must be called with sba->reqs_lock held */
+static void _sba_free_request(struct sba_device *sba,
+ struct sba_request *req)
+{
+ lockdep_assert_held(&sba->reqs_lock);
+ req->flags &= ~SBA_REQUEST_STATE_MASK;
+ req->flags |= SBA_REQUEST_STATE_FREE;
+ list_move_tail(&req->node, &sba->reqs_free_list);
+ if (list_empty(&sba->reqs_active_list))
+ sba->reqs_fence = false;
+}
+
+static void sba_free_chained_requests(struct sba_request *req)
+{
+ unsigned long flags;
+ struct sba_request *nreq;
+ struct sba_device *sba = req->sba;
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ _sba_free_request(sba, req);
+ list_for_each_entry(nreq, &req->next, next)
+ _sba_free_request(sba, nreq);
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+static void sba_chain_request(struct sba_request *first,
+ struct sba_request *req)
+{
+ unsigned long flags;
+ struct sba_device *sba = req->sba;
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ list_add_tail(&req->next, &first->next);
+ req->first = first;
+ atomic_inc(&first->next_pending_count);
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+static void sba_cleanup_nonpending_requests(struct sba_device *sba)
+{
+ unsigned long flags;
+ struct sba_request *req, *req1;
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ /* Freeup all alloced request */
+ list_for_each_entry_safe(req, req1, &sba->reqs_alloc_list, node)
+ _sba_free_request(sba, req);
+
+ /* Set all active requests as aborted */
+ list_for_each_entry_safe(req, req1, &sba->reqs_active_list, node)
+ _sba_abort_request(sba, req);
+
+ /*
+ * Note: We expect that aborted request will be eventually
+ * freed by sba_receive_message()
+ */
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+static void sba_cleanup_pending_requests(struct sba_device *sba)
+{
+ unsigned long flags;
+ struct sba_request *req, *req1;
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ /* Freeup all pending request */
+ list_for_each_entry_safe(req, req1, &sba->reqs_pending_list, node)
+ _sba_free_request(sba, req);
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+static int sba_send_mbox_request(struct sba_device *sba,
+ struct sba_request *req)
+{
+ int ret = 0;
+
+ /* Send message for the request */
+ req->msg.error = 0;
+ ret = mbox_send_message(sba->mchan, &req->msg);
+ if (ret < 0) {
+ dev_err(sba->dev, "send message failed with error %d", ret);
+ return ret;
+ }
+
+ /* Check error returned by mailbox controller */
+ ret = req->msg.error;
+ if (ret < 0) {
+ dev_err(sba->dev, "message error %d", ret);
+ }
+
+ /* Signal txdone for mailbox channel */
+ mbox_client_txdone(sba->mchan, ret);
+
+ return ret;
+}
+
+/* Note: Must be called with sba->reqs_lock held */
+static void _sba_process_pending_requests(struct sba_device *sba)
+{
+ int ret;
+ u32 count;
+ struct sba_request *req;
+
+ /* Process few pending requests */
+ count = SBA_MAX_MSG_SEND_PER_MBOX_CHANNEL;
+ while (!list_empty(&sba->reqs_pending_list) && count) {
+ /* Get the first pending request */
+ req = list_first_entry(&sba->reqs_pending_list,
+ struct sba_request, node);
+
+ /* Try to make request active */
+ if (!_sba_active_request(sba, req))
+ break;
+
+ /* Send request to mailbox channel */
+ ret = sba_send_mbox_request(sba, req);
+ if (ret < 0) {
+ _sba_pending_request(sba, req);
+ break;
+ }
+
+ count--;
+ }
+}
+
+static void sba_process_received_request(struct sba_device *sba,
+ struct sba_request *req)
+{
+ unsigned long flags;
+ struct dma_async_tx_descriptor *tx;
+ struct sba_request *nreq, *first = req->first;
+
+ /* Process only after all chained requests are received */
+ if (!atomic_dec_return(&first->next_pending_count)) {
+ tx = &first->tx;
+
+ WARN_ON(tx->cookie < 0);
+ if (tx->cookie > 0) {
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+ dma_cookie_complete(tx);
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+ dmaengine_desc_get_callback_invoke(tx, NULL);
+ dma_descriptor_unmap(tx);
+ tx->callback = NULL;
+ tx->callback_result = NULL;
+ }
+
+ dma_run_dependencies(tx);
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ /* Free all requests chained to first request */
+ list_for_each_entry(nreq, &first->next, next)
+ _sba_free_request(sba, nreq);
+ INIT_LIST_HEAD(&first->next);
+
+ /* Free the first request */
+ _sba_free_request(sba, first);
+
+ /* Process pending requests */
+ _sba_process_pending_requests(sba);
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+ }
+}
+
+static void sba_write_stats_in_seqfile(struct sba_device *sba,
+ struct seq_file *file)
+{
+ unsigned long flags;
+ struct sba_request *req;
+ u32 free_count = 0, alloced_count = 0;
+ u32 pending_count = 0, active_count = 0, aborted_count = 0;
+
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+
+ list_for_each_entry(req, &sba->reqs_free_list, node)
+ if (async_tx_test_ack(&req->tx))
+ free_count++;
+
+ list_for_each_entry(req, &sba->reqs_alloc_list, node)
+ alloced_count++;
+
+ list_for_each_entry(req, &sba->reqs_pending_list, node)
+ pending_count++;
+
+ list_for_each_entry(req, &sba->reqs_active_list, node)
+ active_count++;
+
+ list_for_each_entry(req, &sba->reqs_aborted_list, node)
+ aborted_count++;
+
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+
+ seq_printf(file, "maximum requests = %d\n", sba->max_req);
+ seq_printf(file, "free requests = %d\n", free_count);
+ seq_printf(file, "alloced requests = %d\n", alloced_count);
+ seq_printf(file, "pending requests = %d\n", pending_count);
+ seq_printf(file, "active requests = %d\n", active_count);
+ seq_printf(file, "aborted requests = %d\n", aborted_count);
+}
+
+/* ====== DMAENGINE callbacks ===== */
+
+static void sba_free_chan_resources(struct dma_chan *dchan)
+{
+ /*
+ * Channel resources are pre-alloced so we just free-up
+ * whatever we can so that we can re-use pre-alloced
+ * channel resources next time.
+ */
+ sba_cleanup_nonpending_requests(to_sba_device(dchan));
+}
+
+static int sba_device_terminate_all(struct dma_chan *dchan)
+{
+ /* Cleanup all pending requests */
+ sba_cleanup_pending_requests(to_sba_device(dchan));
+
+ return 0;
+}
+
+static void sba_issue_pending(struct dma_chan *dchan)
+{
+ unsigned long flags;
+ struct sba_device *sba = to_sba_device(dchan);
+
+ /* Process pending requests */
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+ _sba_process_pending_requests(sba);
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+static dma_cookie_t sba_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ unsigned long flags;
+ dma_cookie_t cookie;
+ struct sba_device *sba;
+ struct sba_request *req, *nreq;
+
+ if (unlikely(!tx))
+ return -EINVAL;
+
+ sba = to_sba_device(tx->chan);
+ req = to_sba_request(tx);
+
+ /* Assign cookie and mark all chained requests pending */
+ spin_lock_irqsave(&sba->reqs_lock, flags);
+ cookie = dma_cookie_assign(tx);
+ _sba_pending_request(sba, req);
+ list_for_each_entry(nreq, &req->next, next)
+ _sba_pending_request(sba, nreq);
+ spin_unlock_irqrestore(&sba->reqs_lock, flags);
+
+ return cookie;
+}
+
+static enum dma_status sba_tx_status(struct dma_chan *dchan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ enum dma_status ret;
+ struct sba_device *sba = to_sba_device(dchan);
+
+ ret = dma_cookie_status(dchan, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ mbox_client_peek_data(sba->mchan);
+
+ return dma_cookie_status(dchan, cookie, txstate);
+}
+
+static void sba_fillup_interrupt_msg(struct sba_request *req,
+ struct brcm_sba_command *cmds,
+ struct brcm_message *msg)
+{
+ u64 cmd;
+ u32 c_mdata;
+ dma_addr_t resp_dma = req->tx.phys;
+ struct brcm_sba_command *cmdsp = cmds;
+
+ /* Type-B command to load dummy data into buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, req->sba->hw_resp_size,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_load_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = resp_dma;
+ cmdsp->data_len = req->sba->hw_resp_size;
+ cmdsp++;
+
+ /* Type-A command to write buf0 to dummy location */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, req->sba->hw_resp_size,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, 0x1,
+ SBA_RESP_SHIFT, SBA_RESP_MASK);
+ c_mdata = sba_cmd_write_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ if (req->sba->hw_resp_size) {
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+ cmdsp->resp = resp_dma;
+ cmdsp->resp_len = req->sba->hw_resp_size;
+ }
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+ cmdsp->data = resp_dma;
+ cmdsp->data_len = req->sba->hw_resp_size;
+ cmdsp++;
+
+ /* Fillup brcm_message */
+ msg->type = BRCM_MESSAGE_SBA;
+ msg->sba.cmds = cmds;
+ msg->sba.cmds_count = cmdsp - cmds;
+ msg->ctx = req;
+ msg->error = 0;
+}
+
+static struct dma_async_tx_descriptor *
+sba_prep_dma_interrupt(struct dma_chan *dchan, unsigned long flags)
+{
+ struct sba_request *req = NULL;
+ struct sba_device *sba = to_sba_device(dchan);
+
+ /* Alloc new request */
+ req = sba_alloc_request(sba);
+ if (!req)
+ return NULL;
+
+ /*
+ * Force fence so that no requests are submitted
+ * until DMA callback for this request is invoked.
+ */
+ req->flags |= SBA_REQUEST_FENCE;
+
+ /* Fillup request message */
+ sba_fillup_interrupt_msg(req, req->cmds, &req->msg);
+
+ /* Init async_tx descriptor */
+ req->tx.flags = flags;
+ req->tx.cookie = -EBUSY;
+
+ return &req->tx;
+}
+
+static void sba_fillup_memcpy_msg(struct sba_request *req,
+ struct brcm_sba_command *cmds,
+ struct brcm_message *msg,
+ dma_addr_t msg_offset, size_t msg_len,
+ dma_addr_t dst, dma_addr_t src)
+{
+ u64 cmd;
+ u32 c_mdata;
+ dma_addr_t resp_dma = req->tx.phys;
+ struct brcm_sba_command *cmdsp = cmds;
+
+ /* Type-B command to load data into buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_load_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = src + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+
+ /* Type-A command to write buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, 0x1,
+ SBA_RESP_SHIFT, SBA_RESP_MASK);
+ c_mdata = sba_cmd_write_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ if (req->sba->hw_resp_size) {
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+ cmdsp->resp = resp_dma;
+ cmdsp->resp_len = req->sba->hw_resp_size;
+ }
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+ cmdsp->data = dst + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+
+ /* Fillup brcm_message */
+ msg->type = BRCM_MESSAGE_SBA;
+ msg->sba.cmds = cmds;
+ msg->sba.cmds_count = cmdsp - cmds;
+ msg->ctx = req;
+ msg->error = 0;
+}
+
+static struct sba_request *
+sba_prep_dma_memcpy_req(struct sba_device *sba,
+ dma_addr_t off, dma_addr_t dst, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct sba_request *req = NULL;
+
+ /* Alloc new request */
+ req = sba_alloc_request(sba);
+ if (!req)
+ return NULL;
+ if (flags & DMA_PREP_FENCE)
+ req->flags |= SBA_REQUEST_FENCE;
+
+ /* Fillup request message */
+ sba_fillup_memcpy_msg(req, req->cmds, &req->msg,
+ off, len, dst, src);
+
+ /* Init async_tx descriptor */
+ req->tx.flags = flags;
+ req->tx.cookie = -EBUSY;
+
+ return req;
+}
+
+static struct dma_async_tx_descriptor *
+sba_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ size_t req_len;
+ dma_addr_t off = 0;
+ struct sba_device *sba = to_sba_device(dchan);
+ struct sba_request *first = NULL, *req;
+
+ /* Create chained requests where each request is upto hw_buf_size */
+ while (len) {
+ req_len = (len < sba->hw_buf_size) ? len : sba->hw_buf_size;
+
+ req = sba_prep_dma_memcpy_req(sba, off, dst, src,
+ req_len, flags);
+ if (!req) {
+ if (first)
+ sba_free_chained_requests(first);
+ return NULL;
+ }
+
+ if (first)
+ sba_chain_request(first, req);
+ else
+ first = req;
+
+ off += req_len;
+ len -= req_len;
+ }
+
+ return (first) ? &first->tx : NULL;
+}
+
+static void sba_fillup_xor_msg(struct sba_request *req,
+ struct brcm_sba_command *cmds,
+ struct brcm_message *msg,
+ dma_addr_t msg_offset, size_t msg_len,
+ dma_addr_t dst, dma_addr_t *src, u32 src_cnt)
+{
+ u64 cmd;
+ u32 c_mdata;
+ unsigned int i;
+ dma_addr_t resp_dma = req->tx.phys;
+ struct brcm_sba_command *cmdsp = cmds;
+
+ /* Type-B command to load data into buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_load_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = src[0] + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+
+ /* Type-B commands to xor data with buf0 and put it back in buf0 */
+ for (i = 1; i < src_cnt; i++) {
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_xor_c_mdata(0, 0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_XOR,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = src[i] + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ }
+
+ /* Type-A command to write buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, 0x1,
+ SBA_RESP_SHIFT, SBA_RESP_MASK);
+ c_mdata = sba_cmd_write_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ if (req->sba->hw_resp_size) {
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+ cmdsp->resp = resp_dma;
+ cmdsp->resp_len = req->sba->hw_resp_size;
+ }
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+ cmdsp->data = dst + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+
+ /* Fillup brcm_message */
+ msg->type = BRCM_MESSAGE_SBA;
+ msg->sba.cmds = cmds;
+ msg->sba.cmds_count = cmdsp - cmds;
+ msg->ctx = req;
+ msg->error = 0;
+}
+
+static struct sba_request *
+sba_prep_dma_xor_req(struct sba_device *sba,
+ dma_addr_t off, dma_addr_t dst, dma_addr_t *src,
+ u32 src_cnt, size_t len, unsigned long flags)
+{
+ struct sba_request *req = NULL;
+
+ /* Alloc new request */
+ req = sba_alloc_request(sba);
+ if (!req)
+ return NULL;
+ if (flags & DMA_PREP_FENCE)
+ req->flags |= SBA_REQUEST_FENCE;
+
+ /* Fillup request message */
+ sba_fillup_xor_msg(req, req->cmds, &req->msg,
+ off, len, dst, src, src_cnt);
+
+ /* Init async_tx descriptor */
+ req->tx.flags = flags;
+ req->tx.cookie = -EBUSY;
+
+ return req;
+}
+
+static struct dma_async_tx_descriptor *
+sba_prep_dma_xor(struct dma_chan *dchan, dma_addr_t dst, dma_addr_t *src,
+ u32 src_cnt, size_t len, unsigned long flags)
+{
+ size_t req_len;
+ dma_addr_t off = 0;
+ struct sba_device *sba = to_sba_device(dchan);
+ struct sba_request *first = NULL, *req;
+
+ /* Sanity checks */
+ if (unlikely(src_cnt > sba->max_xor_srcs))
+ return NULL;
+
+ /* Create chained requests where each request is upto hw_buf_size */
+ while (len) {
+ req_len = (len < sba->hw_buf_size) ? len : sba->hw_buf_size;
+
+ req = sba_prep_dma_xor_req(sba, off, dst, src, src_cnt,
+ req_len, flags);
+ if (!req) {
+ if (first)
+ sba_free_chained_requests(first);
+ return NULL;
+ }
+
+ if (first)
+ sba_chain_request(first, req);
+ else
+ first = req;
+
+ off += req_len;
+ len -= req_len;
+ }
+
+ return (first) ? &first->tx : NULL;
+}
+
+static void sba_fillup_pq_msg(struct sba_request *req,
+ bool pq_continue,
+ struct brcm_sba_command *cmds,
+ struct brcm_message *msg,
+ dma_addr_t msg_offset, size_t msg_len,
+ dma_addr_t *dst_p, dma_addr_t *dst_q,
+ const u8 *scf, dma_addr_t *src, u32 src_cnt)
+{
+ u64 cmd;
+ u32 c_mdata;
+ unsigned int i;
+ dma_addr_t resp_dma = req->tx.phys;
+ struct brcm_sba_command *cmdsp = cmds;
+
+ if (pq_continue) {
+ /* Type-B command to load old P into buf0 */
+ if (dst_p) {
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_load_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = *dst_p + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ }
+
+ /* Type-B command to load old Q into buf1 */
+ if (dst_q) {
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_load_c_mdata(1);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = *dst_q + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ }
+ } else {
+ /* Type-A command to zero all buffers */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_ZERO_ALL_BUFFERS,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ cmdsp++;
+ }
+
+ /* Type-B commands for generate P onto buf0 and Q onto buf1 */
+ for (i = 0; i < src_cnt; i++) {
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_pq_c_mdata(raid6_gflog[scf[i]], 1, 0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_MS(c_mdata),
+ SBA_C_MDATA_MS_SHIFT, SBA_C_MDATA_MS_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_GALOIS_XOR,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = src[i] + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ }
+
+ /* Type-A command to write buf0 */
+ if (dst_p) {
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, 0x1,
+ SBA_RESP_SHIFT, SBA_RESP_MASK);
+ c_mdata = sba_cmd_write_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ if (req->sba->hw_resp_size) {
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+ cmdsp->resp = resp_dma;
+ cmdsp->resp_len = req->sba->hw_resp_size;
+ }
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+ cmdsp->data = *dst_p + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ }
+
+ /* Type-A command to write buf1 */
+ if (dst_q) {
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, 0x1,
+ SBA_RESP_SHIFT, SBA_RESP_MASK);
+ c_mdata = sba_cmd_write_c_mdata(1);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ if (req->sba->hw_resp_size) {
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+ cmdsp->resp = resp_dma;
+ cmdsp->resp_len = req->sba->hw_resp_size;
+ }
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+ cmdsp->data = *dst_q + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ }
+
+ /* Fillup brcm_message */
+ msg->type = BRCM_MESSAGE_SBA;
+ msg->sba.cmds = cmds;
+ msg->sba.cmds_count = cmdsp - cmds;
+ msg->ctx = req;
+ msg->error = 0;
+}
+
+static struct sba_request *
+sba_prep_dma_pq_req(struct sba_device *sba, dma_addr_t off,
+ dma_addr_t *dst_p, dma_addr_t *dst_q, dma_addr_t *src,
+ u32 src_cnt, const u8 *scf, size_t len, unsigned long flags)
+{
+ struct sba_request *req = NULL;
+
+ /* Alloc new request */
+ req = sba_alloc_request(sba);
+ if (!req)
+ return NULL;
+ if (flags & DMA_PREP_FENCE)
+ req->flags |= SBA_REQUEST_FENCE;
+
+ /* Fillup request messages */
+ sba_fillup_pq_msg(req, dmaf_continue(flags),
+ req->cmds, &req->msg,
+ off, len, dst_p, dst_q, scf, src, src_cnt);
+
+ /* Init async_tx descriptor */
+ req->tx.flags = flags;
+ req->tx.cookie = -EBUSY;
+
+ return req;
+}
+
+static void sba_fillup_pq_single_msg(struct sba_request *req,
+ bool pq_continue,
+ struct brcm_sba_command *cmds,
+ struct brcm_message *msg,
+ dma_addr_t msg_offset, size_t msg_len,
+ dma_addr_t *dst_p, dma_addr_t *dst_q,
+ dma_addr_t src, u8 scf)
+{
+ u64 cmd;
+ u32 c_mdata;
+ u8 pos, dpos = raid6_gflog[scf];
+ dma_addr_t resp_dma = req->tx.phys;
+ struct brcm_sba_command *cmdsp = cmds;
+
+ if (!dst_p)
+ goto skip_p;
+
+ if (pq_continue) {
+ /* Type-B command to load old P into buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_load_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = *dst_p + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+
+ /*
+ * Type-B commands to xor data with buf0 and put it
+ * back in buf0
+ */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_xor_c_mdata(0, 0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_XOR,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = src + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ } else {
+ /* Type-B command to load old P into buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_load_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = src + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ }
+
+ /* Type-A command to write buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, 0x1,
+ SBA_RESP_SHIFT, SBA_RESP_MASK);
+ c_mdata = sba_cmd_write_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ if (req->sba->hw_resp_size) {
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+ cmdsp->resp = resp_dma;
+ cmdsp->resp_len = req->sba->hw_resp_size;
+ }
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+ cmdsp->data = *dst_p + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+
+skip_p:
+ if (!dst_q)
+ goto skip_q;
+
+ /* Type-A command to zero all buffers */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_ZERO_ALL_BUFFERS,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ cmdsp++;
+
+ if (dpos == 255)
+ goto skip_q_computation;
+ pos = (dpos < req->sba->max_pq_coefs) ?
+ dpos : (req->sba->max_pq_coefs - 1);
+
+ /*
+ * Type-B command to generate initial Q from data
+ * and store output into buf0
+ */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_pq_c_mdata(pos, 0, 0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_MS(c_mdata),
+ SBA_C_MDATA_MS_SHIFT, SBA_C_MDATA_MS_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_GALOIS,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = src + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+
+ dpos -= pos;
+
+ /* Multiple Type-A command to generate final Q */
+ while (dpos) {
+ pos = (dpos < req->sba->max_pq_coefs) ?
+ dpos : (req->sba->max_pq_coefs - 1);
+
+ /*
+ * Type-A command to generate Q with buf0 and
+ * buf1 store result in buf0
+ */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_pq_c_mdata(pos, 0, 1);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_MS(c_mdata),
+ SBA_C_MDATA_MS_SHIFT, SBA_C_MDATA_MS_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_GALOIS,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ cmdsp++;
+
+ dpos -= pos;
+ }
+
+skip_q_computation:
+ if (pq_continue) {
+ /*
+ * Type-B command to XOR previous output with
+ * buf0 and write it into buf0
+ */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ c_mdata = sba_cmd_xor_c_mdata(0, 0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_XOR,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+ cmdsp->data = *dst_q + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+ }
+
+ /* Type-A command to write buf0 */
+ cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+ SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+ cmd = sba_cmd_enc(cmd, msg_len,
+ SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+ cmd = sba_cmd_enc(cmd, 0x1,
+ SBA_RESP_SHIFT, SBA_RESP_MASK);
+ c_mdata = sba_cmd_write_c_mdata(0);
+ cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+ SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+ cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+ SBA_CMD_SHIFT, SBA_CMD_MASK);
+ cmdsp->cmd = cmd;
+ *cmdsp->cmd_dma = cpu_to_le64(cmd);
+ cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+ if (req->sba->hw_resp_size) {
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+ cmdsp->resp = resp_dma;
+ cmdsp->resp_len = req->sba->hw_resp_size;
+ }
+ cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+ cmdsp->data = *dst_q + msg_offset;
+ cmdsp->data_len = msg_len;
+ cmdsp++;
+
+skip_q:
+ /* Fillup brcm_message */
+ msg->type = BRCM_MESSAGE_SBA;
+ msg->sba.cmds = cmds;
+ msg->sba.cmds_count = cmdsp - cmds;
+ msg->ctx = req;
+ msg->error = 0;
+}
+
+static struct sba_request *
+sba_prep_dma_pq_single_req(struct sba_device *sba, dma_addr_t off,
+ dma_addr_t *dst_p, dma_addr_t *dst_q,
+ dma_addr_t src, u8 scf, size_t len,
+ unsigned long flags)
+{
+ struct sba_request *req = NULL;
+
+ /* Alloc new request */
+ req = sba_alloc_request(sba);
+ if (!req)
+ return NULL;
+ if (flags & DMA_PREP_FENCE)
+ req->flags |= SBA_REQUEST_FENCE;
+
+ /* Fillup request messages */
+ sba_fillup_pq_single_msg(req, dmaf_continue(flags),
+ req->cmds, &req->msg, off, len,
+ dst_p, dst_q, src, scf);
+
+ /* Init async_tx descriptor */
+ req->tx.flags = flags;
+ req->tx.cookie = -EBUSY;
+
+ return req;
+}
+
+static struct dma_async_tx_descriptor *
+sba_prep_dma_pq(struct dma_chan *dchan, dma_addr_t *dst, dma_addr_t *src,
+ u32 src_cnt, const u8 *scf, size_t len, unsigned long flags)
+{
+ u32 i, dst_q_index;
+ size_t req_len;
+ bool slow = false;
+ dma_addr_t off = 0;
+ dma_addr_t *dst_p = NULL, *dst_q = NULL;
+ struct sba_device *sba = to_sba_device(dchan);
+ struct sba_request *first = NULL, *req;
+
+ /* Sanity checks */
+ if (unlikely(src_cnt > sba->max_pq_srcs))
+ return NULL;
+ for (i = 0; i < src_cnt; i++)
+ if (sba->max_pq_coefs <= raid6_gflog[scf[i]])
+ slow = true;
+
+ /* Figure-out P and Q destination addresses */
+ if (!(flags & DMA_PREP_PQ_DISABLE_P))
+ dst_p = &dst[0];
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+ dst_q = &dst[1];
+
+ /* Create chained requests where each request is upto hw_buf_size */
+ while (len) {
+ req_len = (len < sba->hw_buf_size) ? len : sba->hw_buf_size;
+
+ if (slow) {
+ dst_q_index = src_cnt;
+
+ if (dst_q) {
+ for (i = 0; i < src_cnt; i++) {
+ if (*dst_q == src[i]) {
+ dst_q_index = i;
+ break;
+ }
+ }
+ }
+
+ if (dst_q_index < src_cnt) {
+ i = dst_q_index;
+ req = sba_prep_dma_pq_single_req(sba,
+ off, dst_p, dst_q, src[i], scf[i],
+ req_len, flags | DMA_PREP_FENCE);
+ if (!req)
+ goto fail;
+
+ if (first)
+ sba_chain_request(first, req);
+ else
+ first = req;
+
+ flags |= DMA_PREP_CONTINUE;
+ }
+
+ for (i = 0; i < src_cnt; i++) {
+ if (dst_q_index == i)
+ continue;
+
+ req = sba_prep_dma_pq_single_req(sba,
+ off, dst_p, dst_q, src[i], scf[i],
+ req_len, flags | DMA_PREP_FENCE);
+ if (!req)
+ goto fail;
+
+ if (first)
+ sba_chain_request(first, req);
+ else
+ first = req;
+
+ flags |= DMA_PREP_CONTINUE;
+ }
+ } else {
+ req = sba_prep_dma_pq_req(sba, off,
+ dst_p, dst_q, src, src_cnt,
+ scf, req_len, flags);
+ if (!req)
+ goto fail;
+
+ if (first)
+ sba_chain_request(first, req);
+ else
+ first = req;
+ }
+
+ off += req_len;
+ len -= req_len;
+ }
+
+ return (first) ? &first->tx : NULL;
+
+fail:
+ if (first)
+ sba_free_chained_requests(first);
+ return NULL;
+}
+
+/* ====== Mailbox callbacks ===== */
+
+static void sba_receive_message(struct mbox_client *cl, void *msg)
+{
+ struct brcm_message *m = msg;
+ struct sba_request *req = m->ctx;
+ struct sba_device *sba = req->sba;
+
+ /* Error count if message has error */
+ if (m->error < 0)
+ dev_err(sba->dev, "%s got message with error %d",
+ dma_chan_name(&sba->dma_chan), m->error);
+
+ /* Process received request */
+ sba_process_received_request(sba, req);
+}
+
+/* ====== Debugfs callbacks ====== */
+
+static int sba_debugfs_stats_show(struct seq_file *file, void *offset)
+{
+ struct sba_device *sba = dev_get_drvdata(file->private);
+
+ /* Write stats in file */
+ sba_write_stats_in_seqfile(sba, file);
+
+ return 0;
+}
+
+/* ====== Platform driver routines ===== */
+
+static int sba_prealloc_channel_resources(struct sba_device *sba)
+{
+ int i, j, ret = 0;
+ struct sba_request *req = NULL;
+
+ sba->resp_base = dma_alloc_coherent(sba->mbox_dev,
+ sba->max_resp_pool_size,
+ &sba->resp_dma_base, GFP_KERNEL);
+ if (!sba->resp_base)
+ return -ENOMEM;
+
+ sba->cmds_base = dma_alloc_coherent(sba->mbox_dev,
+ sba->max_cmds_pool_size,
+ &sba->cmds_dma_base, GFP_KERNEL);
+ if (!sba->cmds_base) {
+ ret = -ENOMEM;
+ goto fail_free_resp_pool;
+ }
+
+ spin_lock_init(&sba->reqs_lock);
+ sba->reqs_fence = false;
+ INIT_LIST_HEAD(&sba->reqs_alloc_list);
+ INIT_LIST_HEAD(&sba->reqs_pending_list);
+ INIT_LIST_HEAD(&sba->reqs_active_list);
+ INIT_LIST_HEAD(&sba->reqs_aborted_list);
+ INIT_LIST_HEAD(&sba->reqs_free_list);
+
+ for (i = 0; i < sba->max_req; i++) {
+ req = devm_kzalloc(sba->dev,
+ struct_size(req, cmds, sba->max_cmd_per_req),
+ GFP_KERNEL);
+ if (!req) {
+ ret = -ENOMEM;
+ goto fail_free_cmds_pool;
+ }
+ INIT_LIST_HEAD(&req->node);
+ req->sba = sba;
+ req->flags = SBA_REQUEST_STATE_FREE;
+ INIT_LIST_HEAD(&req->next);
+ atomic_set(&req->next_pending_count, 0);
+ for (j = 0; j < sba->max_cmd_per_req; j++) {
+ req->cmds[j].cmd = 0;
+ req->cmds[j].cmd_dma = sba->cmds_base +
+ (i * sba->max_cmd_per_req + j) * sizeof(u64);
+ req->cmds[j].cmd_dma_addr = sba->cmds_dma_base +
+ (i * sba->max_cmd_per_req + j) * sizeof(u64);
+ req->cmds[j].flags = 0;
+ }
+ memset(&req->msg, 0, sizeof(req->msg));
+ dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
+ async_tx_ack(&req->tx);
+ req->tx.tx_submit = sba_tx_submit;
+ req->tx.phys = sba->resp_dma_base + i * sba->hw_resp_size;
+ list_add_tail(&req->node, &sba->reqs_free_list);
+ }
+
+ return 0;
+
+fail_free_cmds_pool:
+ dma_free_coherent(sba->mbox_dev,
+ sba->max_cmds_pool_size,
+ sba->cmds_base, sba->cmds_dma_base);
+fail_free_resp_pool:
+ dma_free_coherent(sba->mbox_dev,
+ sba->max_resp_pool_size,
+ sba->resp_base, sba->resp_dma_base);
+ return ret;
+}
+
+static void sba_freeup_channel_resources(struct sba_device *sba)
+{
+ dmaengine_terminate_all(&sba->dma_chan);
+ dma_free_coherent(sba->mbox_dev, sba->max_cmds_pool_size,
+ sba->cmds_base, sba->cmds_dma_base);
+ dma_free_coherent(sba->mbox_dev, sba->max_resp_pool_size,
+ sba->resp_base, sba->resp_dma_base);
+ sba->resp_base = NULL;
+ sba->resp_dma_base = 0;
+}
+
+static int sba_async_register(struct sba_device *sba)
+{
+ int ret;
+ struct dma_device *dma_dev = &sba->dma_dev;
+
+ /* Initialize DMA channel cookie */
+ sba->dma_chan.device = dma_dev;
+ dma_cookie_init(&sba->dma_chan);
+
+ /* Initialize DMA device capability mask */
+ dma_cap_zero(dma_dev->cap_mask);
+ dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);
+ dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+ dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+ dma_cap_set(DMA_PQ, dma_dev->cap_mask);
+
+ /*
+ * Set mailbox channel device as the base device of
+ * our dma_device because the actual memory accesses
+ * will be done by mailbox controller
+ */
+ dma_dev->dev = sba->mbox_dev;
+
+ /* Set base prep routines */
+ dma_dev->device_free_chan_resources = sba_free_chan_resources;
+ dma_dev->device_terminate_all = sba_device_terminate_all;
+ dma_dev->device_issue_pending = sba_issue_pending;
+ dma_dev->device_tx_status = sba_tx_status;
+
+ /* Set interrupt routine */
+ if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_interrupt = sba_prep_dma_interrupt;
+
+ /* Set memcpy routine */
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memcpy = sba_prep_dma_memcpy;
+
+ /* Set xor routine and capability */
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ dma_dev->device_prep_dma_xor = sba_prep_dma_xor;
+ dma_dev->max_xor = sba->max_xor_srcs;
+ }
+
+ /* Set pq routine and capability */
+ if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+ dma_dev->device_prep_dma_pq = sba_prep_dma_pq;
+ dma_set_maxpq(dma_dev, sba->max_pq_srcs, 0);
+ }
+
+ /* Initialize DMA device channel list */
+ INIT_LIST_HEAD(&dma_dev->channels);
+ list_add_tail(&sba->dma_chan.device_node, &dma_dev->channels);
+
+ /* Register with Linux async DMA framework*/
+ ret = dma_async_device_register(dma_dev);
+ if (ret) {
+ dev_err(sba->dev, "async device register error %d", ret);
+ return ret;
+ }
+
+ dev_info(sba->dev, "%s capabilities: %s%s%s%s\n",
+ dma_chan_name(&sba->dma_chan),
+ dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "interrupt " : "",
+ dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "memcpy " : "",
+ dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+ dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "");
+
+ return 0;
+}
+
+static int sba_probe(struct platform_device *pdev)
+{
+ int ret = 0;
+ struct sba_device *sba;
+ struct platform_device *mbox_pdev;
+ struct of_phandle_args args;
+
+ /* Allocate main SBA struct */
+ sba = devm_kzalloc(&pdev->dev, sizeof(*sba), GFP_KERNEL);
+ if (!sba)
+ return -ENOMEM;
+
+ sba->dev = &pdev->dev;
+ platform_set_drvdata(pdev, sba);
+
+ /* Number of mailbox channels should be atleast 1 */
+ ret = of_count_phandle_with_args(pdev->dev.of_node,
+ "mboxes", "#mbox-cells");
+ if (ret <= 0)
+ return -ENODEV;
+
+ /* Determine SBA version from DT compatible string */
+ if (of_device_is_compatible(sba->dev->of_node, "brcm,iproc-sba"))
+ sba->ver = SBA_VER_1;
+ else if (of_device_is_compatible(sba->dev->of_node,
+ "brcm,iproc-sba-v2"))
+ sba->ver = SBA_VER_2;
+ else
+ return -ENODEV;
+
+ /* Derived Configuration parameters */
+ switch (sba->ver) {
+ case SBA_VER_1:
+ sba->hw_buf_size = 4096;
+ sba->hw_resp_size = 8;
+ sba->max_pq_coefs = 6;
+ sba->max_pq_srcs = 6;
+ break;
+ case SBA_VER_2:
+ sba->hw_buf_size = 4096;
+ sba->hw_resp_size = 8;
+ sba->max_pq_coefs = 30;
+ /*
+ * We can support max_pq_srcs == max_pq_coefs because
+ * we are limited by number of SBA commands that we can
+ * fit in one message for underlying ring manager HW.
+ */
+ sba->max_pq_srcs = 12;
+ break;
+ default:
+ return -EINVAL;
+ }
+ sba->max_req = SBA_MAX_REQ_PER_MBOX_CHANNEL;
+ sba->max_cmd_per_req = sba->max_pq_srcs + 3;
+ sba->max_xor_srcs = sba->max_cmd_per_req - 1;
+ sba->max_resp_pool_size = sba->max_req * sba->hw_resp_size;
+ sba->max_cmds_pool_size = sba->max_req *
+ sba->max_cmd_per_req * sizeof(u64);
+
+ /* Setup mailbox client */
+ sba->client.dev = &pdev->dev;
+ sba->client.rx_callback = sba_receive_message;
+ sba->client.tx_block = false;
+ sba->client.knows_txdone = true;
+ sba->client.tx_tout = 0;
+
+ /* Request mailbox channel */
+ sba->mchan = mbox_request_channel(&sba->client, 0);
+ if (IS_ERR(sba->mchan)) {
+ ret = PTR_ERR(sba->mchan);
+ goto fail_free_mchan;
+ }
+
+ /* Find-out underlying mailbox device */
+ ret = of_parse_phandle_with_args(pdev->dev.of_node,
+ "mboxes", "#mbox-cells", 0, &args);
+ if (ret)
+ goto fail_free_mchan;
+ mbox_pdev = of_find_device_by_node(args.np);
+ of_node_put(args.np);
+ if (!mbox_pdev) {
+ ret = -ENODEV;
+ goto fail_free_mchan;
+ }
+ sba->mbox_dev = &mbox_pdev->dev;
+
+ /* Prealloc channel resource */
+ ret = sba_prealloc_channel_resources(sba);
+ if (ret)
+ goto fail_free_mchan;
+
+ /* Check availability of debugfs */
+ if (!debugfs_initialized())
+ goto skip_debugfs;
+
+ /* Create debugfs root entry */
+ sba->root = debugfs_create_dir(dev_name(sba->dev), NULL);
+
+ /* Create debugfs stats entry */
+ debugfs_create_devm_seqfile(sba->dev, "stats", sba->root,
+ sba_debugfs_stats_show);
+
+skip_debugfs:
+
+ /* Register DMA device with Linux async framework */
+ ret = sba_async_register(sba);
+ if (ret)
+ goto fail_free_resources;
+
+ /* Print device info */
+ dev_info(sba->dev, "%s using SBAv%d mailbox channel from %s",
+ dma_chan_name(&sba->dma_chan), sba->ver+1,
+ dev_name(sba->mbox_dev));
+
+ return 0;
+
+fail_free_resources:
+ debugfs_remove_recursive(sba->root);
+ sba_freeup_channel_resources(sba);
+fail_free_mchan:
+ mbox_free_channel(sba->mchan);
+ return ret;
+}
+
+static int sba_remove(struct platform_device *pdev)
+{
+ struct sba_device *sba = platform_get_drvdata(pdev);
+
+ dma_async_device_unregister(&sba->dma_dev);
+
+ debugfs_remove_recursive(sba->root);
+
+ sba_freeup_channel_resources(sba);
+
+ mbox_free_channel(sba->mchan);
+
+ return 0;
+}
+
+static const struct of_device_id sba_of_match[] = {
+ { .compatible = "brcm,iproc-sba", },
+ { .compatible = "brcm,iproc-sba-v2", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, sba_of_match);
+
+static struct platform_driver sba_driver = {
+ .probe = sba_probe,
+ .remove = sba_remove,
+ .driver = {
+ .name = "bcm-sba-raid",
+ .of_match_table = sba_of_match,
+ },
+};
+module_platform_driver(sba_driver);
+
+MODULE_DESCRIPTION("Broadcom SBA RAID driver");
+MODULE_AUTHOR("Anup Patel <anup.patel@broadcom.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
new file mode 100644
index 000000000..630dfbb01
--- /dev/null
+++ b/drivers/dma/bcm2835-dma.c
@@ -0,0 +1,1048 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * BCM2835 DMA engine support
+ *
+ * Author: Florian Meier <florian.meier@koalo.de>
+ * Copyright 2013
+ *
+ * Based on
+ * OMAP DMAengine support by Russell King
+ *
+ * BCM2708 DMA Driver
+ * Copyright (C) 2010 Broadcom
+ *
+ * Raspberry Pi PCM I2S ALSA Driver
+ * Copyright (c) by Phil Poole 2013
+ *
+ * MARVELL MMP Peripheral DMA Driver
+ * Copyright 2012 Marvell International Ltd.
+ */
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+
+#include "virt-dma.h"
+
+#define BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED 14
+#define BCM2835_DMA_CHAN_NAME_SIZE 8
+
+/**
+ * struct bcm2835_dmadev - BCM2835 DMA controller
+ * @ddev: DMA device
+ * @base: base address of register map
+ * @zero_page: bus address of zero page (to detect transactions copying from
+ * zero page and avoid accessing memory if so)
+ */
+struct bcm2835_dmadev {
+ struct dma_device ddev;
+ void __iomem *base;
+ dma_addr_t zero_page;
+};
+
+struct bcm2835_dma_cb {
+ uint32_t info;
+ uint32_t src;
+ uint32_t dst;
+ uint32_t length;
+ uint32_t stride;
+ uint32_t next;
+ uint32_t pad[2];
+};
+
+struct bcm2835_cb_entry {
+ struct bcm2835_dma_cb *cb;
+ dma_addr_t paddr;
+};
+
+struct bcm2835_chan {
+ struct virt_dma_chan vc;
+
+ struct dma_slave_config cfg;
+ unsigned int dreq;
+
+ int ch;
+ struct bcm2835_desc *desc;
+ struct dma_pool *cb_pool;
+
+ void __iomem *chan_base;
+ int irq_number;
+ unsigned int irq_flags;
+
+ bool is_lite_channel;
+};
+
+struct bcm2835_desc {
+ struct bcm2835_chan *c;
+ struct virt_dma_desc vd;
+ enum dma_transfer_direction dir;
+
+ unsigned int frames;
+ size_t size;
+
+ bool cyclic;
+
+ struct bcm2835_cb_entry cb_list[];
+};
+
+#define BCM2835_DMA_CS 0x00
+#define BCM2835_DMA_ADDR 0x04
+#define BCM2835_DMA_TI 0x08
+#define BCM2835_DMA_SOURCE_AD 0x0c
+#define BCM2835_DMA_DEST_AD 0x10
+#define BCM2835_DMA_LEN 0x14
+#define BCM2835_DMA_STRIDE 0x18
+#define BCM2835_DMA_NEXTCB 0x1c
+#define BCM2835_DMA_DEBUG 0x20
+
+/* DMA CS Control and Status bits */
+#define BCM2835_DMA_ACTIVE BIT(0) /* activate the DMA */
+#define BCM2835_DMA_END BIT(1) /* current CB has ended */
+#define BCM2835_DMA_INT BIT(2) /* interrupt status */
+#define BCM2835_DMA_DREQ BIT(3) /* DREQ state */
+#define BCM2835_DMA_ISPAUSED BIT(4) /* Pause requested or not active */
+#define BCM2835_DMA_ISHELD BIT(5) /* Is held by DREQ flow control */
+#define BCM2835_DMA_WAITING_FOR_WRITES BIT(6) /* waiting for last
+ * AXI-write to ack
+ */
+#define BCM2835_DMA_ERR BIT(8)
+#define BCM2835_DMA_PRIORITY(x) ((x & 15) << 16) /* AXI priority */
+#define BCM2835_DMA_PANIC_PRIORITY(x) ((x & 15) << 20) /* panic priority */
+/* current value of TI.BCM2835_DMA_WAIT_RESP */
+#define BCM2835_DMA_WAIT_FOR_WRITES BIT(28)
+#define BCM2835_DMA_DIS_DEBUG BIT(29) /* disable debug pause signal */
+#define BCM2835_DMA_ABORT BIT(30) /* Stop current CB, go to next, WO */
+#define BCM2835_DMA_RESET BIT(31) /* WO, self clearing */
+
+/* Transfer information bits - also bcm2835_cb.info field */
+#define BCM2835_DMA_INT_EN BIT(0)
+#define BCM2835_DMA_TDMODE BIT(1) /* 2D-Mode */
+#define BCM2835_DMA_WAIT_RESP BIT(3) /* wait for AXI-write to be acked */
+#define BCM2835_DMA_D_INC BIT(4)
+#define BCM2835_DMA_D_WIDTH BIT(5) /* 128bit writes if set */
+#define BCM2835_DMA_D_DREQ BIT(6) /* enable DREQ for destination */
+#define BCM2835_DMA_D_IGNORE BIT(7) /* ignore destination writes */
+#define BCM2835_DMA_S_INC BIT(8)
+#define BCM2835_DMA_S_WIDTH BIT(9) /* 128bit writes if set */
+#define BCM2835_DMA_S_DREQ BIT(10) /* enable SREQ for source */
+#define BCM2835_DMA_S_IGNORE BIT(11) /* ignore source reads - read 0 */
+#define BCM2835_DMA_BURST_LENGTH(x) ((x & 15) << 12)
+#define BCM2835_DMA_PER_MAP(x) ((x & 31) << 16) /* REQ source */
+#define BCM2835_DMA_WAIT(x) ((x & 31) << 21) /* add DMA-wait cycles */
+#define BCM2835_DMA_NO_WIDE_BURSTS BIT(26) /* no 2 beat write bursts */
+
+/* debug register bits */
+#define BCM2835_DMA_DEBUG_LAST_NOT_SET_ERR BIT(0)
+#define BCM2835_DMA_DEBUG_FIFO_ERR BIT(1)
+#define BCM2835_DMA_DEBUG_READ_ERR BIT(2)
+#define BCM2835_DMA_DEBUG_OUTSTANDING_WRITES_SHIFT 4
+#define BCM2835_DMA_DEBUG_OUTSTANDING_WRITES_BITS 4
+#define BCM2835_DMA_DEBUG_ID_SHIFT 16
+#define BCM2835_DMA_DEBUG_ID_BITS 9
+#define BCM2835_DMA_DEBUG_STATE_SHIFT 16
+#define BCM2835_DMA_DEBUG_STATE_BITS 9
+#define BCM2835_DMA_DEBUG_VERSION_SHIFT 25
+#define BCM2835_DMA_DEBUG_VERSION_BITS 3
+#define BCM2835_DMA_DEBUG_LITE BIT(28)
+
+/* shared registers for all dma channels */
+#define BCM2835_DMA_INT_STATUS 0xfe0
+#define BCM2835_DMA_ENABLE 0xff0
+
+#define BCM2835_DMA_DATA_TYPE_S8 1
+#define BCM2835_DMA_DATA_TYPE_S16 2
+#define BCM2835_DMA_DATA_TYPE_S32 4
+#define BCM2835_DMA_DATA_TYPE_S128 16
+
+/* Valid only for channels 0 - 14, 15 has its own base address */
+#define BCM2835_DMA_CHAN(n) ((n) << 8) /* Base address */
+#define BCM2835_DMA_CHANIO(base, n) ((base) + BCM2835_DMA_CHAN(n))
+
+/* the max dma length for different channels */
+#define MAX_DMA_LEN SZ_1G
+#define MAX_LITE_DMA_LEN (SZ_64K - 4)
+
+static inline size_t bcm2835_dma_max_frame_length(struct bcm2835_chan *c)
+{
+ /* lite and normal channels have different max frame length */
+ return c->is_lite_channel ? MAX_LITE_DMA_LEN : MAX_DMA_LEN;
+}
+
+/* how many frames of max_len size do we need to transfer len bytes */
+static inline size_t bcm2835_dma_frames_for_length(size_t len,
+ size_t max_len)
+{
+ return DIV_ROUND_UP(len, max_len);
+}
+
+static inline struct bcm2835_dmadev *to_bcm2835_dma_dev(struct dma_device *d)
+{
+ return container_of(d, struct bcm2835_dmadev, ddev);
+}
+
+static inline struct bcm2835_chan *to_bcm2835_dma_chan(struct dma_chan *c)
+{
+ return container_of(c, struct bcm2835_chan, vc.chan);
+}
+
+static inline struct bcm2835_desc *to_bcm2835_dma_desc(
+ struct dma_async_tx_descriptor *t)
+{
+ return container_of(t, struct bcm2835_desc, vd.tx);
+}
+
+static void bcm2835_dma_free_cb_chain(struct bcm2835_desc *desc)
+{
+ size_t i;
+
+ for (i = 0; i < desc->frames; i++)
+ dma_pool_free(desc->c->cb_pool, desc->cb_list[i].cb,
+ desc->cb_list[i].paddr);
+
+ kfree(desc);
+}
+
+static void bcm2835_dma_desc_free(struct virt_dma_desc *vd)
+{
+ bcm2835_dma_free_cb_chain(
+ container_of(vd, struct bcm2835_desc, vd));
+}
+
+static void bcm2835_dma_create_cb_set_length(
+ struct bcm2835_chan *chan,
+ struct bcm2835_dma_cb *control_block,
+ size_t len,
+ size_t period_len,
+ size_t *total_len,
+ u32 finalextrainfo)
+{
+ size_t max_len = bcm2835_dma_max_frame_length(chan);
+
+ /* set the length taking lite-channel limitations into account */
+ control_block->length = min_t(u32, len, max_len);
+
+ /* finished if we have no period_length */
+ if (!period_len)
+ return;
+
+ /*
+ * period_len means: that we need to generate
+ * transfers that are terminating at every
+ * multiple of period_len - this is typically
+ * used to set the interrupt flag in info
+ * which is required during cyclic transfers
+ */
+
+ /* have we filled in period_length yet? */
+ if (*total_len + control_block->length < period_len) {
+ /* update number of bytes in this period so far */
+ *total_len += control_block->length;
+ return;
+ }
+
+ /* calculate the length that remains to reach period_length */
+ control_block->length = period_len - *total_len;
+
+ /* reset total_length for next period */
+ *total_len = 0;
+
+ /* add extrainfo bits in info */
+ control_block->info |= finalextrainfo;
+}
+
+static inline size_t bcm2835_dma_count_frames_for_sg(
+ struct bcm2835_chan *c,
+ struct scatterlist *sgl,
+ unsigned int sg_len)
+{
+ size_t frames = 0;
+ struct scatterlist *sgent;
+ unsigned int i;
+ size_t plength = bcm2835_dma_max_frame_length(c);
+
+ for_each_sg(sgl, sgent, sg_len, i)
+ frames += bcm2835_dma_frames_for_length(
+ sg_dma_len(sgent), plength);
+
+ return frames;
+}
+
+/**
+ * bcm2835_dma_create_cb_chain - create a control block and fills data in
+ *
+ * @chan: the @dma_chan for which we run this
+ * @direction: the direction in which we transfer
+ * @cyclic: it is a cyclic transfer
+ * @info: the default info bits to apply per controlblock
+ * @frames: number of controlblocks to allocate
+ * @src: the src address to assign (if the S_INC bit is set
+ * in @info, then it gets incremented)
+ * @dst: the dst address to assign (if the D_INC bit is set
+ * in @info, then it gets incremented)
+ * @buf_len: the full buffer length (may also be 0)
+ * @period_len: the period length when to apply @finalextrainfo
+ * in addition to the last transfer
+ * this will also break some control-blocks early
+ * @finalextrainfo: additional bits in last controlblock
+ * (or when period_len is reached in case of cyclic)
+ * @gfp: the GFP flag to use for allocation
+ */
+static struct bcm2835_desc *bcm2835_dma_create_cb_chain(
+ struct dma_chan *chan, enum dma_transfer_direction direction,
+ bool cyclic, u32 info, u32 finalextrainfo, size_t frames,
+ dma_addr_t src, dma_addr_t dst, size_t buf_len,
+ size_t period_len, gfp_t gfp)
+{
+ struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+ size_t len = buf_len, total_len;
+ size_t frame;
+ struct bcm2835_desc *d;
+ struct bcm2835_cb_entry *cb_entry;
+ struct bcm2835_dma_cb *control_block;
+
+ if (!frames)
+ return NULL;
+
+ /* allocate and setup the descriptor. */
+ d = kzalloc(struct_size(d, cb_list, frames), gfp);
+ if (!d)
+ return NULL;
+
+ d->c = c;
+ d->dir = direction;
+ d->cyclic = cyclic;
+
+ /*
+ * Iterate over all frames, create a control block
+ * for each frame and link them together.
+ */
+ for (frame = 0, total_len = 0; frame < frames; d->frames++, frame++) {
+ cb_entry = &d->cb_list[frame];
+ cb_entry->cb = dma_pool_alloc(c->cb_pool, gfp,
+ &cb_entry->paddr);
+ if (!cb_entry->cb)
+ goto error_cb;
+
+ /* fill in the control block */
+ control_block = cb_entry->cb;
+ control_block->info = info;
+ control_block->src = src;
+ control_block->dst = dst;
+ control_block->stride = 0;
+ control_block->next = 0;
+ /* set up length in control_block if requested */
+ if (buf_len) {
+ /* calculate length honoring period_length */
+ bcm2835_dma_create_cb_set_length(
+ c, control_block,
+ len, period_len, &total_len,
+ cyclic ? finalextrainfo : 0);
+
+ /* calculate new remaining length */
+ len -= control_block->length;
+ }
+
+ /* link this the last controlblock */
+ if (frame)
+ d->cb_list[frame - 1].cb->next = cb_entry->paddr;
+
+ /* update src and dst and length */
+ if (src && (info & BCM2835_DMA_S_INC))
+ src += control_block->length;
+ if (dst && (info & BCM2835_DMA_D_INC))
+ dst += control_block->length;
+
+ /* Length of total transfer */
+ d->size += control_block->length;
+ }
+
+ /* the last frame requires extra flags */
+ d->cb_list[d->frames - 1].cb->info |= finalextrainfo;
+
+ /* detect a size missmatch */
+ if (buf_len && (d->size != buf_len))
+ goto error_cb;
+
+ return d;
+error_cb:
+ bcm2835_dma_free_cb_chain(d);
+
+ return NULL;
+}
+
+static void bcm2835_dma_fill_cb_chain_with_sg(
+ struct dma_chan *chan,
+ enum dma_transfer_direction direction,
+ struct bcm2835_cb_entry *cb,
+ struct scatterlist *sgl,
+ unsigned int sg_len)
+{
+ struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+ size_t len, max_len;
+ unsigned int i;
+ dma_addr_t addr;
+ struct scatterlist *sgent;
+
+ max_len = bcm2835_dma_max_frame_length(c);
+ for_each_sg(sgl, sgent, sg_len, i) {
+ for (addr = sg_dma_address(sgent), len = sg_dma_len(sgent);
+ len > 0;
+ addr += cb->cb->length, len -= cb->cb->length, cb++) {
+ if (direction == DMA_DEV_TO_MEM)
+ cb->cb->dst = addr;
+ else
+ cb->cb->src = addr;
+ cb->cb->length = min(len, max_len);
+ }
+ }
+}
+
+static void bcm2835_dma_abort(struct bcm2835_chan *c)
+{
+ void __iomem *chan_base = c->chan_base;
+ long int timeout = 10000;
+
+ /*
+ * A zero control block address means the channel is idle.
+ * (The ACTIVE flag in the CS register is not a reliable indicator.)
+ */
+ if (!readl(chan_base + BCM2835_DMA_ADDR))
+ return;
+
+ /* Write 0 to the active bit - Pause the DMA */
+ writel(0, chan_base + BCM2835_DMA_CS);
+
+ /* Wait for any current AXI transfer to complete */
+ while ((readl(chan_base + BCM2835_DMA_CS) &
+ BCM2835_DMA_WAITING_FOR_WRITES) && --timeout)
+ cpu_relax();
+
+ /* Peripheral might be stuck and fail to signal AXI write responses */
+ if (!timeout)
+ dev_err(c->vc.chan.device->dev,
+ "failed to complete outstanding writes\n");
+
+ writel(BCM2835_DMA_RESET, chan_base + BCM2835_DMA_CS);
+}
+
+static void bcm2835_dma_start_desc(struct bcm2835_chan *c)
+{
+ struct virt_dma_desc *vd = vchan_next_desc(&c->vc);
+ struct bcm2835_desc *d;
+
+ if (!vd) {
+ c->desc = NULL;
+ return;
+ }
+
+ list_del(&vd->node);
+
+ c->desc = d = to_bcm2835_dma_desc(&vd->tx);
+
+ writel(d->cb_list[0].paddr, c->chan_base + BCM2835_DMA_ADDR);
+ writel(BCM2835_DMA_ACTIVE, c->chan_base + BCM2835_DMA_CS);
+}
+
+static irqreturn_t bcm2835_dma_callback(int irq, void *data)
+{
+ struct bcm2835_chan *c = data;
+ struct bcm2835_desc *d;
+ unsigned long flags;
+
+ /* check the shared interrupt */
+ if (c->irq_flags & IRQF_SHARED) {
+ /* check if the interrupt is enabled */
+ flags = readl(c->chan_base + BCM2835_DMA_CS);
+ /* if not set then we are not the reason for the irq */
+ if (!(flags & BCM2835_DMA_INT))
+ return IRQ_NONE;
+ }
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+
+ /*
+ * Clear the INT flag to receive further interrupts. Keep the channel
+ * active in case the descriptor is cyclic or in case the client has
+ * already terminated the descriptor and issued a new one. (May happen
+ * if this IRQ handler is threaded.) If the channel is finished, it
+ * will remain idle despite the ACTIVE flag being set.
+ */
+ writel(BCM2835_DMA_INT | BCM2835_DMA_ACTIVE,
+ c->chan_base + BCM2835_DMA_CS);
+
+ d = c->desc;
+
+ if (d) {
+ if (d->cyclic) {
+ /* call the cyclic callback */
+ vchan_cyclic_callback(&d->vd);
+ } else if (!readl(c->chan_base + BCM2835_DMA_ADDR)) {
+ vchan_cookie_complete(&c->desc->vd);
+ bcm2835_dma_start_desc(c);
+ }
+ }
+
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+static int bcm2835_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+ struct device *dev = c->vc.chan.device->dev;
+
+ dev_dbg(dev, "Allocating DMA channel %d\n", c->ch);
+
+ /*
+ * Control blocks are 256 bit in length and must start at a 256 bit
+ * (32 byte) aligned address (BCM2835 ARM Peripherals, sec. 4.2.1.1).
+ */
+ c->cb_pool = dma_pool_create(dev_name(dev), dev,
+ sizeof(struct bcm2835_dma_cb), 32, 0);
+ if (!c->cb_pool) {
+ dev_err(dev, "unable to allocate descriptor pool\n");
+ return -ENOMEM;
+ }
+
+ return request_irq(c->irq_number, bcm2835_dma_callback,
+ c->irq_flags, "DMA IRQ", c);
+}
+
+static void bcm2835_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+
+ vchan_free_chan_resources(&c->vc);
+ free_irq(c->irq_number, c);
+ dma_pool_destroy(c->cb_pool);
+
+ dev_dbg(c->vc.chan.device->dev, "Freeing DMA channel %u\n", c->ch);
+}
+
+static size_t bcm2835_dma_desc_size(struct bcm2835_desc *d)
+{
+ return d->size;
+}
+
+static size_t bcm2835_dma_desc_size_pos(struct bcm2835_desc *d, dma_addr_t addr)
+{
+ unsigned int i;
+ size_t size;
+
+ for (size = i = 0; i < d->frames; i++) {
+ struct bcm2835_dma_cb *control_block = d->cb_list[i].cb;
+ size_t this_size = control_block->length;
+ dma_addr_t dma;
+
+ if (d->dir == DMA_DEV_TO_MEM)
+ dma = control_block->dst;
+ else
+ dma = control_block->src;
+
+ if (size)
+ size += this_size;
+ else if (addr >= dma && addr < dma + this_size)
+ size += dma + this_size - addr;
+ }
+
+ return size;
+}
+
+static enum dma_status bcm2835_dma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+ struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+ struct virt_dma_desc *vd;
+ enum dma_status ret;
+ unsigned long flags;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE || !txstate)
+ return ret;
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+ vd = vchan_find_desc(&c->vc, cookie);
+ if (vd) {
+ txstate->residue =
+ bcm2835_dma_desc_size(to_bcm2835_dma_desc(&vd->tx));
+ } else if (c->desc && c->desc->vd.tx.cookie == cookie) {
+ struct bcm2835_desc *d = c->desc;
+ dma_addr_t pos;
+
+ if (d->dir == DMA_MEM_TO_DEV)
+ pos = readl(c->chan_base + BCM2835_DMA_SOURCE_AD);
+ else if (d->dir == DMA_DEV_TO_MEM)
+ pos = readl(c->chan_base + BCM2835_DMA_DEST_AD);
+ else
+ pos = 0;
+
+ txstate->residue = bcm2835_dma_desc_size_pos(d, pos);
+ } else {
+ txstate->residue = 0;
+ }
+
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+
+ return ret;
+}
+
+static void bcm2835_dma_issue_pending(struct dma_chan *chan)
+{
+ struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+ if (vchan_issue_pending(&c->vc) && !c->desc)
+ bcm2835_dma_start_desc(c);
+
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+}
+
+static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_memcpy(
+ struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+ struct bcm2835_desc *d;
+ u32 info = BCM2835_DMA_D_INC | BCM2835_DMA_S_INC;
+ u32 extra = BCM2835_DMA_INT_EN | BCM2835_DMA_WAIT_RESP;
+ size_t max_len = bcm2835_dma_max_frame_length(c);
+ size_t frames;
+
+ /* if src, dst or len is not given return with an error */
+ if (!src || !dst || !len)
+ return NULL;
+
+ /* calculate number of frames */
+ frames = bcm2835_dma_frames_for_length(len, max_len);
+
+ /* allocate the CB chain - this also fills in the pointers */
+ d = bcm2835_dma_create_cb_chain(chan, DMA_MEM_TO_MEM, false,
+ info, extra, frames,
+ src, dst, len, 0, GFP_KERNEL);
+ if (!d)
+ return NULL;
+
+ return vchan_tx_prep(&c->vc, &d->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg(
+ struct dma_chan *chan,
+ struct scatterlist *sgl, unsigned int sg_len,
+ enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+ struct bcm2835_desc *d;
+ dma_addr_t src = 0, dst = 0;
+ u32 info = BCM2835_DMA_WAIT_RESP;
+ u32 extra = BCM2835_DMA_INT_EN;
+ size_t frames;
+
+ if (!is_slave_direction(direction)) {
+ dev_err(chan->device->dev,
+ "%s: bad direction?\n", __func__);
+ return NULL;
+ }
+
+ if (c->dreq != 0)
+ info |= BCM2835_DMA_PER_MAP(c->dreq);
+
+ if (direction == DMA_DEV_TO_MEM) {
+ if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
+ return NULL;
+ src = c->cfg.src_addr;
+ info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC;
+ } else {
+ if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
+ return NULL;
+ dst = c->cfg.dst_addr;
+ info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC;
+ }
+
+ /* count frames in sg list */
+ frames = bcm2835_dma_count_frames_for_sg(c, sgl, sg_len);
+
+ /* allocate the CB chain */
+ d = bcm2835_dma_create_cb_chain(chan, direction, false,
+ info, extra,
+ frames, src, dst, 0, 0,
+ GFP_NOWAIT);
+ if (!d)
+ return NULL;
+
+ /* fill in frames with scatterlist pointers */
+ bcm2835_dma_fill_cb_chain_with_sg(chan, direction, d->cb_list,
+ sgl, sg_len);
+
+ return vchan_tx_prep(&c->vc, &d->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct bcm2835_dmadev *od = to_bcm2835_dma_dev(chan->device);
+ struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+ struct bcm2835_desc *d;
+ dma_addr_t src, dst;
+ u32 info = BCM2835_DMA_WAIT_RESP;
+ u32 extra = 0;
+ size_t max_len = bcm2835_dma_max_frame_length(c);
+ size_t frames;
+
+ /* Grab configuration */
+ if (!is_slave_direction(direction)) {
+ dev_err(chan->device->dev, "%s: bad direction?\n", __func__);
+ return NULL;
+ }
+
+ if (!buf_len) {
+ dev_err(chan->device->dev,
+ "%s: bad buffer length (= 0)\n", __func__);
+ return NULL;
+ }
+
+ if (flags & DMA_PREP_INTERRUPT)
+ extra |= BCM2835_DMA_INT_EN;
+ else
+ period_len = buf_len;
+
+ /*
+ * warn if buf_len is not a multiple of period_len - this may leed
+ * to unexpected latencies for interrupts and thus audiable clicks
+ */
+ if (buf_len % period_len)
+ dev_warn_once(chan->device->dev,
+ "%s: buffer_length (%zd) is not a multiple of period_len (%zd)\n",
+ __func__, buf_len, period_len);
+
+ /* Setup DREQ channel */
+ if (c->dreq != 0)
+ info |= BCM2835_DMA_PER_MAP(c->dreq);
+
+ if (direction == DMA_DEV_TO_MEM) {
+ if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
+ return NULL;
+ src = c->cfg.src_addr;
+ dst = buf_addr;
+ info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC;
+ } else {
+ if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
+ return NULL;
+ dst = c->cfg.dst_addr;
+ src = buf_addr;
+ info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC;
+
+ /* non-lite channels can write zeroes w/o accessing memory */
+ if (buf_addr == od->zero_page && !c->is_lite_channel)
+ info |= BCM2835_DMA_S_IGNORE;
+ }
+
+ /* calculate number of frames */
+ frames = /* number of periods */
+ DIV_ROUND_UP(buf_len, period_len) *
+ /* number of frames per period */
+ bcm2835_dma_frames_for_length(period_len, max_len);
+
+ /*
+ * allocate the CB chain
+ * note that we need to use GFP_NOWAIT, as the ALSA i2s dmaengine
+ * implementation calls prep_dma_cyclic with interrupts disabled.
+ */
+ d = bcm2835_dma_create_cb_chain(chan, direction, true,
+ info, extra,
+ frames, src, dst, buf_len,
+ period_len, GFP_NOWAIT);
+ if (!d)
+ return NULL;
+
+ /* wrap around into a loop */
+ d->cb_list[d->frames - 1].cb->next = d->cb_list[0].paddr;
+
+ return vchan_tx_prep(&c->vc, &d->vd, flags);
+}
+
+static int bcm2835_dma_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *cfg)
+{
+ struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+
+ c->cfg = *cfg;
+
+ return 0;
+}
+
+static int bcm2835_dma_terminate_all(struct dma_chan *chan)
+{
+ struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+
+ /* stop DMA activity */
+ if (c->desc) {
+ vchan_terminate_vdesc(&c->desc->vd);
+ c->desc = NULL;
+ bcm2835_dma_abort(c);
+ }
+
+ vchan_get_all_descriptors(&c->vc, &head);
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+ vchan_dma_desc_free_list(&c->vc, &head);
+
+ return 0;
+}
+
+static void bcm2835_dma_synchronize(struct dma_chan *chan)
+{
+ struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+
+ vchan_synchronize(&c->vc);
+}
+
+static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id,
+ int irq, unsigned int irq_flags)
+{
+ struct bcm2835_chan *c;
+
+ c = devm_kzalloc(d->ddev.dev, sizeof(*c), GFP_KERNEL);
+ if (!c)
+ return -ENOMEM;
+
+ c->vc.desc_free = bcm2835_dma_desc_free;
+ vchan_init(&c->vc, &d->ddev);
+
+ c->chan_base = BCM2835_DMA_CHANIO(d->base, chan_id);
+ c->ch = chan_id;
+ c->irq_number = irq;
+ c->irq_flags = irq_flags;
+
+ /* check in DEBUG register if this is a LITE channel */
+ if (readl(c->chan_base + BCM2835_DMA_DEBUG) &
+ BCM2835_DMA_DEBUG_LITE)
+ c->is_lite_channel = true;
+
+ return 0;
+}
+
+static void bcm2835_dma_free(struct bcm2835_dmadev *od)
+{
+ struct bcm2835_chan *c, *next;
+
+ list_for_each_entry_safe(c, next, &od->ddev.channels,
+ vc.chan.device_node) {
+ list_del(&c->vc.chan.device_node);
+ tasklet_kill(&c->vc.task);
+ }
+
+ dma_unmap_page_attrs(od->ddev.dev, od->zero_page, PAGE_SIZE,
+ DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+}
+
+static const struct of_device_id bcm2835_dma_of_match[] = {
+ { .compatible = "brcm,bcm2835-dma", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, bcm2835_dma_of_match);
+
+static struct dma_chan *bcm2835_dma_xlate(struct of_phandle_args *spec,
+ struct of_dma *ofdma)
+{
+ struct bcm2835_dmadev *d = ofdma->of_dma_data;
+ struct dma_chan *chan;
+
+ chan = dma_get_any_slave_channel(&d->ddev);
+ if (!chan)
+ return NULL;
+
+ /* Set DREQ from param */
+ to_bcm2835_dma_chan(chan)->dreq = spec->args[0];
+
+ return chan;
+}
+
+static int bcm2835_dma_probe(struct platform_device *pdev)
+{
+ struct bcm2835_dmadev *od;
+ struct resource *res;
+ void __iomem *base;
+ int rc;
+ int i, j;
+ int irq[BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED + 1];
+ int irq_flags;
+ uint32_t chans_available;
+ char chan_name[BCM2835_DMA_CHAN_NAME_SIZE];
+
+ if (!pdev->dev.dma_mask)
+ pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
+
+ rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ if (rc) {
+ dev_err(&pdev->dev, "Unable to set DMA mask\n");
+ return rc;
+ }
+
+ od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL);
+ if (!od)
+ return -ENOMEM;
+
+ dma_set_max_seg_size(&pdev->dev, 0x3FFFFFFF);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ od->base = base;
+
+ dma_cap_set(DMA_SLAVE, od->ddev.cap_mask);
+ dma_cap_set(DMA_PRIVATE, od->ddev.cap_mask);
+ dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask);
+ dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask);
+ od->ddev.device_alloc_chan_resources = bcm2835_dma_alloc_chan_resources;
+ od->ddev.device_free_chan_resources = bcm2835_dma_free_chan_resources;
+ od->ddev.device_tx_status = bcm2835_dma_tx_status;
+ od->ddev.device_issue_pending = bcm2835_dma_issue_pending;
+ od->ddev.device_prep_dma_cyclic = bcm2835_dma_prep_dma_cyclic;
+ od->ddev.device_prep_slave_sg = bcm2835_dma_prep_slave_sg;
+ od->ddev.device_prep_dma_memcpy = bcm2835_dma_prep_dma_memcpy;
+ od->ddev.device_config = bcm2835_dma_slave_config;
+ od->ddev.device_terminate_all = bcm2835_dma_terminate_all;
+ od->ddev.device_synchronize = bcm2835_dma_synchronize;
+ od->ddev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ od->ddev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) |
+ BIT(DMA_MEM_TO_MEM);
+ od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ od->ddev.descriptor_reuse = true;
+ od->ddev.dev = &pdev->dev;
+ INIT_LIST_HEAD(&od->ddev.channels);
+
+ platform_set_drvdata(pdev, od);
+
+ od->zero_page = dma_map_page_attrs(od->ddev.dev, ZERO_PAGE(0), 0,
+ PAGE_SIZE, DMA_TO_DEVICE,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ if (dma_mapping_error(od->ddev.dev, od->zero_page)) {
+ dev_err(&pdev->dev, "Failed to map zero page\n");
+ return -ENOMEM;
+ }
+
+ /* Request DMA channel mask from device tree */
+ if (of_property_read_u32(pdev->dev.of_node,
+ "brcm,dma-channel-mask",
+ &chans_available)) {
+ dev_err(&pdev->dev, "Failed to get channel mask\n");
+ rc = -EINVAL;
+ goto err_no_dma;
+ }
+
+ /* get irqs for each channel that we support */
+ for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) {
+ /* skip masked out channels */
+ if (!(chans_available & (1 << i))) {
+ irq[i] = -1;
+ continue;
+ }
+
+ /* get the named irq */
+ snprintf(chan_name, sizeof(chan_name), "dma%i", i);
+ irq[i] = platform_get_irq_byname(pdev, chan_name);
+ if (irq[i] >= 0)
+ continue;
+
+ /* legacy device tree case handling */
+ dev_warn_once(&pdev->dev,
+ "missing interrupt-names property in device tree - legacy interpretation is used\n");
+ /*
+ * in case of channel >= 11
+ * use the 11th interrupt and that is shared
+ */
+ irq[i] = platform_get_irq(pdev, i < 11 ? i : 11);
+ }
+
+ /* get irqs for each channel */
+ for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) {
+ /* skip channels without irq */
+ if (irq[i] < 0)
+ continue;
+
+ /* check if there are other channels that also use this irq */
+ irq_flags = 0;
+ for (j = 0; j <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; j++)
+ if ((i != j) && (irq[j] == irq[i])) {
+ irq_flags = IRQF_SHARED;
+ break;
+ }
+
+ /* initialize the channel */
+ rc = bcm2835_dma_chan_init(od, i, irq[i], irq_flags);
+ if (rc)
+ goto err_no_dma;
+ }
+
+ dev_dbg(&pdev->dev, "Initialized %i DMA channels\n", i);
+
+ /* Device-tree DMA controller registration */
+ rc = of_dma_controller_register(pdev->dev.of_node,
+ bcm2835_dma_xlate, od);
+ if (rc) {
+ dev_err(&pdev->dev, "Failed to register DMA controller\n");
+ goto err_no_dma;
+ }
+
+ rc = dma_async_device_register(&od->ddev);
+ if (rc) {
+ dev_err(&pdev->dev,
+ "Failed to register slave DMA engine device: %d\n", rc);
+ goto err_no_dma;
+ }
+
+ dev_dbg(&pdev->dev, "Load BCM2835 DMA engine driver\n");
+
+ return 0;
+
+err_no_dma:
+ bcm2835_dma_free(od);
+ return rc;
+}
+
+static int bcm2835_dma_remove(struct platform_device *pdev)
+{
+ struct bcm2835_dmadev *od = platform_get_drvdata(pdev);
+
+ dma_async_device_unregister(&od->ddev);
+ bcm2835_dma_free(od);
+
+ return 0;
+}
+
+static struct platform_driver bcm2835_dma_driver = {
+ .probe = bcm2835_dma_probe,
+ .remove = bcm2835_dma_remove,
+ .driver = {
+ .name = "bcm2835-dma",
+ .of_match_table = of_match_ptr(bcm2835_dma_of_match),
+ },
+};
+
+module_platform_driver(bcm2835_dma_driver);
+
+MODULE_ALIAS("platform:bcm2835-dma");
+MODULE_DESCRIPTION("BCM2835 DMA engine driver");
+MODULE_AUTHOR("Florian Meier <florian.meier@koalo.de>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/bestcomm/Kconfig b/drivers/dma/bestcomm/Kconfig
new file mode 100644
index 000000000..5dd437295
--- /dev/null
+++ b/drivers/dma/bestcomm/Kconfig
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Kconfig options for Bestcomm
+#
+
+config PPC_BESTCOMM
+ tristate "Bestcomm DMA engine support"
+ depends on PPC_MPC52xx
+ default n
+ select PPC_LIB_RHEAP
+ help
+ BestComm is the name of the communication coprocessor found
+ on the Freescale MPC5200 family of processor. Its usage is
+ optional for some drivers (like ATA), but required for
+ others (like FEC).
+
+ If you want to use drivers that require DMA operations,
+ answer Y or M. Otherwise say N.
+
+config PPC_BESTCOMM_ATA
+ tristate
+ depends on PPC_BESTCOMM
+ help
+ This option enables the support for the ATA task.
+
+config PPC_BESTCOMM_FEC
+ tristate
+ depends on PPC_BESTCOMM
+ help
+ This option enables the support for the FEC tasks.
+
+config PPC_BESTCOMM_GEN_BD
+ tristate
+ depends on PPC_BESTCOMM
+ help
+ This option enables the support for the GenBD tasks.
+
diff --git a/drivers/dma/bestcomm/Makefile b/drivers/dma/bestcomm/Makefile
new file mode 100644
index 000000000..8d1b33a2f
--- /dev/null
+++ b/drivers/dma/bestcomm/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for BestComm & co
+#
+
+bestcomm-core-objs := bestcomm.o sram.o
+bestcomm-ata-objs := ata.o bcom_ata_task.o
+bestcomm-fec-objs := fec.o bcom_fec_rx_task.o bcom_fec_tx_task.o
+bestcomm-gen-bd-objs := gen_bd.o bcom_gen_bd_rx_task.o bcom_gen_bd_tx_task.o
+
+obj-$(CONFIG_PPC_BESTCOMM) += bestcomm-core.o
+obj-$(CONFIG_PPC_BESTCOMM_ATA) += bestcomm-ata.o
+obj-$(CONFIG_PPC_BESTCOMM_FEC) += bestcomm-fec.o
+obj-$(CONFIG_PPC_BESTCOMM_GEN_BD) += bestcomm-gen-bd.o
+
diff --git a/drivers/dma/bestcomm/ata.c b/drivers/dma/bestcomm/ata.c
new file mode 100644
index 000000000..502a45d76
--- /dev/null
+++ b/drivers/dma/bestcomm/ata.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Bestcomm ATA task driver
+ *
+ * Patterned after bestcomm/fec.c by Dale Farnsworth <dfarnsworth@mvista.com>
+ * 2003-2004 (c) MontaVista, Software, Inc.
+ *
+ * Copyright (C) 2006-2007 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2006 Freescale - John Rigby
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <asm/io.h>
+
+#include <linux/fsl/bestcomm/bestcomm.h>
+#include <linux/fsl/bestcomm/bestcomm_priv.h>
+#include <linux/fsl/bestcomm/ata.h>
+
+
+/* ======================================================================== */
+/* Task image/var/inc */
+/* ======================================================================== */
+
+/* ata task image */
+extern u32 bcom_ata_task[];
+
+/* ata task vars that need to be set before enabling the task */
+struct bcom_ata_var {
+ u32 enable; /* (u16*) address of task's control register */
+ u32 bd_base; /* (struct bcom_bd*) beginning of ring buffer */
+ u32 bd_last; /* (struct bcom_bd*) end of ring buffer */
+ u32 bd_start; /* (struct bcom_bd*) current bd */
+ u32 buffer_size; /* size of receive buffer */
+};
+
+/* ata task incs that need to be set before enabling the task */
+struct bcom_ata_inc {
+ u16 pad0;
+ s16 incr_bytes;
+ u16 pad1;
+ s16 incr_dst;
+ u16 pad2;
+ s16 incr_src;
+};
+
+
+/* ======================================================================== */
+/* Task support code */
+/* ======================================================================== */
+
+struct bcom_task *
+bcom_ata_init(int queue_len, int maxbufsize)
+{
+ struct bcom_task *tsk;
+ struct bcom_ata_var *var;
+ struct bcom_ata_inc *inc;
+
+ /* Prefetch breaks ATA DMA. Turn it off for ATA DMA */
+ bcom_disable_prefetch();
+
+ tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_ata_bd), 0);
+ if (!tsk)
+ return NULL;
+
+ tsk->flags = BCOM_FLAGS_NONE;
+
+ bcom_ata_reset_bd(tsk);
+
+ var = (struct bcom_ata_var *) bcom_task_var(tsk->tasknum);
+ inc = (struct bcom_ata_inc *) bcom_task_inc(tsk->tasknum);
+
+ if (bcom_load_image(tsk->tasknum, bcom_ata_task)) {
+ bcom_task_free(tsk);
+ return NULL;
+ }
+
+ var->enable = bcom_eng->regs_base +
+ offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
+ var->bd_base = tsk->bd_pa;
+ var->bd_last = tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
+ var->bd_start = tsk->bd_pa;
+ var->buffer_size = maxbufsize;
+
+ /* Configure some stuff */
+ bcom_set_task_pragma(tsk->tasknum, BCOM_ATA_PRAGMA);
+ bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
+
+ out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_ATA_RX], BCOM_IPR_ATA_RX);
+ out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_ATA_TX], BCOM_IPR_ATA_TX);
+
+ out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum); /* Clear ints */
+
+ return tsk;
+}
+EXPORT_SYMBOL_GPL(bcom_ata_init);
+
+void bcom_ata_rx_prepare(struct bcom_task *tsk)
+{
+ struct bcom_ata_inc *inc;
+
+ inc = (struct bcom_ata_inc *) bcom_task_inc(tsk->tasknum);
+
+ inc->incr_bytes = -(s16)sizeof(u32);
+ inc->incr_src = 0;
+ inc->incr_dst = sizeof(u32);
+
+ bcom_set_initiator(tsk->tasknum, BCOM_INITIATOR_ATA_RX);
+}
+EXPORT_SYMBOL_GPL(bcom_ata_rx_prepare);
+
+void bcom_ata_tx_prepare(struct bcom_task *tsk)
+{
+ struct bcom_ata_inc *inc;
+
+ inc = (struct bcom_ata_inc *) bcom_task_inc(tsk->tasknum);
+
+ inc->incr_bytes = -(s16)sizeof(u32);
+ inc->incr_src = sizeof(u32);
+ inc->incr_dst = 0;
+
+ bcom_set_initiator(tsk->tasknum, BCOM_INITIATOR_ATA_TX);
+}
+EXPORT_SYMBOL_GPL(bcom_ata_tx_prepare);
+
+void bcom_ata_reset_bd(struct bcom_task *tsk)
+{
+ struct bcom_ata_var *var;
+
+ /* Reset all BD */
+ memset_io(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+ tsk->index = 0;
+ tsk->outdex = 0;
+
+ var = (struct bcom_ata_var *) bcom_task_var(tsk->tasknum);
+ var->bd_start = var->bd_base;
+}
+EXPORT_SYMBOL_GPL(bcom_ata_reset_bd);
+
+void bcom_ata_release(struct bcom_task *tsk)
+{
+ /* Nothing special for the ATA tasks */
+ bcom_task_free(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_ata_release);
+
+
+MODULE_DESCRIPTION("BestComm ATA task driver");
+MODULE_AUTHOR("John Rigby");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/bestcomm/bcom_ata_task.c b/drivers/dma/bestcomm/bcom_ata_task.c
new file mode 100644
index 000000000..9a1c349e9
--- /dev/null
+++ b/drivers/dma/bestcomm/bcom_ata_task.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Bestcomm ATA task microcode
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * Created based on bestcom/code_dma/image_rtos1/dma_image.hex
+ */
+
+#include <asm/types.h>
+
+/*
+ * The header consists of the following fields:
+ * u32 magic;
+ * u8 desc_size;
+ * u8 var_size;
+ * u8 inc_size;
+ * u8 first_var;
+ * u8 reserved[8];
+ *
+ * The size fields contain the number of 32-bit words.
+ */
+
+u32 bcom_ata_task[] = {
+ /* header */
+ 0x4243544b,
+ 0x0e060709,
+ 0x00000000,
+ 0x00000000,
+
+ /* Task descriptors */
+ 0x8198009b, /* LCD: idx0 = var3; idx0 <= var2; idx0 += inc3 */
+ 0x13e00c08, /* DRD1A: var3 = var1; FN=0 MORE init=31 WS=0 RS=0 */
+ 0xb8000264, /* LCD: idx1 = *idx0, idx2 = var0; idx1 < var9; idx1 += inc4, idx2 += inc4 */
+ 0x10000f00, /* DRD1A: var3 = idx0; FN=0 MORE init=0 WS=0 RS=0 */
+ 0x60140002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
+ 0x0c8cfc8a, /* DRD2B1: *idx2 = EU3(); EU3(*idx2,var10) */
+ 0xd8988240, /* LCDEXT: idx1 = idx1; idx1 > var9; idx1 += inc0 */
+ 0xf845e011, /* LCDEXT: idx2 = *(idx0 + var00000015); ; idx2 += inc2 */
+ 0xb845e00a, /* LCD: idx3 = *(idx0 + var00000019); ; idx3 += inc1 */
+ 0x0bfecf90, /* DRD1A: *idx3 = *idx2; FN=0 TFD init=31 WS=3 RS=3 */
+ 0x9898802d, /* LCD: idx1 = idx1; idx1 once var0; idx1 += inc5 */
+ 0x64000005, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 INT EXT init=0 WS=0 RS=0 */
+ 0x0c0cf849, /* DRD2B1: *idx0 = EU3(); EU3(idx1,var9) */
+ 0x000001f8, /* NOP */
+
+ /* VAR[9]-VAR[14] */
+ 0x40000000,
+ 0x7fff7fff,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+
+ /* INC[0]-INC[6] */
+ 0x40000000,
+ 0xe0000000,
+ 0xe0000000,
+ 0xa000000c,
+ 0x20000000,
+ 0x00000000,
+ 0x00000000,
+};
+
diff --git a/drivers/dma/bestcomm/bcom_fec_rx_task.c b/drivers/dma/bestcomm/bcom_fec_rx_task.c
new file mode 100644
index 000000000..c610dc76a
--- /dev/null
+++ b/drivers/dma/bestcomm/bcom_fec_rx_task.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Bestcomm FEC RX task microcode
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * Automatically created based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex
+ * on Tue Mar 22 11:19:38 2005 GMT
+ */
+
+#include <asm/types.h>
+
+/*
+ * The header consists of the following fields:
+ * u32 magic;
+ * u8 desc_size;
+ * u8 var_size;
+ * u8 inc_size;
+ * u8 first_var;
+ * u8 reserved[8];
+ *
+ * The size fields contain the number of 32-bit words.
+ */
+
+u32 bcom_fec_rx_task[] = {
+ /* header */
+ 0x4243544b,
+ 0x18060709,
+ 0x00000000,
+ 0x00000000,
+
+ /* Task descriptors */
+ 0x808220e3, /* LCD: idx0 = var1, idx1 = var4; idx1 <= var3; idx0 += inc4, idx1 += inc3 */
+ 0x10601010, /* DRD1A: var4 = var2; FN=0 MORE init=3 WS=0 RS=0 */
+ 0xb8800264, /* LCD: idx2 = *idx1, idx3 = var0; idx2 < var9; idx2 += inc4, idx3 += inc4 */
+ 0x10001308, /* DRD1A: var4 = idx1; FN=0 MORE init=0 WS=0 RS=0 */
+ 0x60140002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
+ 0x0cccfcca, /* DRD2B1: *idx3 = EU3(); EU3(*idx3,var10) */
+ 0x80004000, /* LCDEXT: idx2 = 0x00000000; ; */
+ 0xb8c58029, /* LCD: idx3 = *(idx1 + var00000015); idx3 once var0; idx3 += inc5 */
+ 0x60000002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=0 RS=0 */
+ 0x088cf8cc, /* DRD2B1: idx2 = EU3(); EU3(idx3,var12) */
+ 0x991982f2, /* LCD: idx2 = idx2, idx3 = idx3; idx2 > var11; idx2 += inc6, idx3 += inc2 */
+ 0x006acf80, /* DRD1A: *idx3 = *idx0; FN=0 init=3 WS=1 RS=1 */
+ 0x80004000, /* LCDEXT: idx2 = 0x00000000; ; */
+ 0x9999802d, /* LCD: idx3 = idx3; idx3 once var0; idx3 += inc5 */
+ 0x70000002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */
+ 0x034cfc4e, /* DRD2B1: var13 = EU3(); EU3(*idx1,var14) */
+ 0x00008868, /* DRD1A: idx2 = var13; FN=0 init=0 WS=0 RS=0 */
+ 0x99198341, /* LCD: idx2 = idx2, idx3 = idx3; idx2 > var13; idx2 += inc0, idx3 += inc1 */
+ 0x007ecf80, /* DRD1A: *idx3 = *idx0; FN=0 init=3 WS=3 RS=3 */
+ 0x99198272, /* LCD: idx2 = idx2, idx3 = idx3; idx2 > var9; idx2 += inc6, idx3 += inc2 */
+ 0x046acf80, /* DRD1A: *idx3 = *idx0; FN=0 INT init=3 WS=1 RS=1 */
+ 0x9819002d, /* LCD: idx2 = idx0; idx2 once var0; idx2 += inc5 */
+ 0x0060c790, /* DRD1A: *idx1 = *idx2; FN=0 init=3 WS=0 RS=0 */
+ 0x000001f8, /* NOP */
+
+ /* VAR[9]-VAR[14] */
+ 0x40000000,
+ 0x7fff7fff,
+ 0x00000000,
+ 0x00000003,
+ 0x40000008,
+ 0x43ffffff,
+
+ /* INC[0]-INC[6] */
+ 0x40000000,
+ 0xe0000000,
+ 0xe0000000,
+ 0xa0000008,
+ 0x20000000,
+ 0x00000000,
+ 0x4000ffff,
+};
+
diff --git a/drivers/dma/bestcomm/bcom_fec_tx_task.c b/drivers/dma/bestcomm/bcom_fec_tx_task.c
new file mode 100644
index 000000000..410b42695
--- /dev/null
+++ b/drivers/dma/bestcomm/bcom_fec_tx_task.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Bestcomm FEC TX task microcode
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * Automatically created based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex
+ * on Tue Mar 22 11:19:29 2005 GMT
+ */
+
+#include <asm/types.h>
+
+/*
+ * The header consists of the following fields:
+ * u32 magic;
+ * u8 desc_size;
+ * u8 var_size;
+ * u8 inc_size;
+ * u8 first_var;
+ * u8 reserved[8];
+ *
+ * The size fields contain the number of 32-bit words.
+ */
+
+u32 bcom_fec_tx_task[] = {
+ /* header */
+ 0x4243544b,
+ 0x2407070d,
+ 0x00000000,
+ 0x00000000,
+
+ /* Task descriptors */
+ 0x8018001b, /* LCD: idx0 = var0; idx0 <= var0; idx0 += inc3 */
+ 0x60000005, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */
+ 0x01ccfc0d, /* DRD2B1: var7 = EU3(); EU3(*idx0,var13) */
+ 0x8082a123, /* LCD: idx0 = var1, idx1 = var5; idx1 <= var4; idx0 += inc4, idx1 += inc3 */
+ 0x10801418, /* DRD1A: var5 = var3; FN=0 MORE init=4 WS=0 RS=0 */
+ 0xf88103a4, /* LCDEXT: idx2 = *idx1, idx3 = var2; idx2 < var14; idx2 += inc4, idx3 += inc4 */
+ 0x801a6024, /* LCD: idx4 = var0; ; idx4 += inc4 */
+ 0x10001708, /* DRD1A: var5 = idx1; FN=0 MORE init=0 WS=0 RS=0 */
+ 0x60140002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
+ 0x0cccfccf, /* DRD2B1: *idx3 = EU3(); EU3(*idx3,var15) */
+ 0x991a002c, /* LCD: idx2 = idx2, idx3 = idx4; idx2 once var0; idx2 += inc5, idx3 += inc4 */
+ 0x70000002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */
+ 0x024cfc4d, /* DRD2B1: var9 = EU3(); EU3(*idx1,var13) */
+ 0x60000003, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=3 EXT init=0 WS=0 RS=0 */
+ 0x0cccf247, /* DRD2B1: *idx3 = EU3(); EU3(var9,var7) */
+ 0x80004000, /* LCDEXT: idx2 = 0x00000000; ; */
+ 0xb8c80029, /* LCD: idx3 = *(idx1 + var0000001a); idx3 once var0; idx3 += inc5 */
+ 0x70000002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */
+ 0x088cf8d1, /* DRD2B1: idx2 = EU3(); EU3(idx3,var17) */
+ 0x00002f10, /* DRD1A: var11 = idx2; FN=0 init=0 WS=0 RS=0 */
+ 0x99198432, /* LCD: idx2 = idx2, idx3 = idx3; idx2 > var16; idx2 += inc6, idx3 += inc2 */
+ 0x008ac398, /* DRD1A: *idx0 = *idx3; FN=0 init=4 WS=1 RS=1 */
+ 0x80004000, /* LCDEXT: idx2 = 0x00000000; ; */
+ 0x9999802d, /* LCD: idx3 = idx3; idx3 once var0; idx3 += inc5 */
+ 0x70000002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */
+ 0x048cfc53, /* DRD2B1: var18 = EU3(); EU3(*idx1,var19) */
+ 0x60000008, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=8 EXT init=0 WS=0 RS=0 */
+ 0x088cf48b, /* DRD2B1: idx2 = EU3(); EU3(var18,var11) */
+ 0x99198481, /* LCD: idx2 = idx2, idx3 = idx3; idx2 > var18; idx2 += inc0, idx3 += inc1 */
+ 0x009ec398, /* DRD1A: *idx0 = *idx3; FN=0 init=4 WS=3 RS=3 */
+ 0x991983b2, /* LCD: idx2 = idx2, idx3 = idx3; idx2 > var14; idx2 += inc6, idx3 += inc2 */
+ 0x088ac398, /* DRD1A: *idx0 = *idx3; FN=0 TFD init=4 WS=1 RS=1 */
+ 0x9919002d, /* LCD: idx2 = idx2; idx2 once var0; idx2 += inc5 */
+ 0x60000005, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */
+ 0x0c4cf88e, /* DRD2B1: *idx1 = EU3(); EU3(idx2,var14) */
+ 0x000001f8, /* NOP */
+
+ /* VAR[13]-VAR[19] */
+ 0x0c000000,
+ 0x40000000,
+ 0x7fff7fff,
+ 0x00000000,
+ 0x00000003,
+ 0x40000004,
+ 0x43ffffff,
+
+ /* INC[0]-INC[6] */
+ 0x40000000,
+ 0xe0000000,
+ 0xe0000000,
+ 0xa0000008,
+ 0x20000000,
+ 0x00000000,
+ 0x4000ffff,
+};
+
diff --git a/drivers/dma/bestcomm/bcom_gen_bd_rx_task.c b/drivers/dma/bestcomm/bcom_gen_bd_rx_task.c
new file mode 100644
index 000000000..8dd38ede2
--- /dev/null
+++ b/drivers/dma/bestcomm/bcom_gen_bd_rx_task.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Bestcomm GenBD RX task microcode
+ *
+ * Copyright (C) 2006 AppSpec Computer Technologies Corp.
+ * Jeff Gibbons <jeff.gibbons@appspec.com>
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * Based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex
+ * on Tue Mar 4 10:14:12 2006 GMT
+ */
+
+#include <asm/types.h>
+
+/*
+ * The header consists of the following fields:
+ * u32 magic;
+ * u8 desc_size;
+ * u8 var_size;
+ * u8 inc_size;
+ * u8 first_var;
+ * u8 reserved[8];
+ *
+ * The size fields contain the number of 32-bit words.
+ */
+
+u32 bcom_gen_bd_rx_task[] = {
+ /* header */
+ 0x4243544b,
+ 0x0d020409,
+ 0x00000000,
+ 0x00000000,
+
+ /* Task descriptors */
+ 0x808220da, /* LCD: idx0 = var1, idx1 = var4; idx1 <= var3; idx0 += inc3, idx1 += inc2 */
+ 0x13e01010, /* DRD1A: var4 = var2; FN=0 MORE init=31 WS=0 RS=0 */
+ 0xb880025b, /* LCD: idx2 = *idx1, idx3 = var0; idx2 < var9; idx2 += inc3, idx3 += inc3 */
+ 0x10001308, /* DRD1A: var4 = idx1; FN=0 MORE init=0 WS=0 RS=0 */
+ 0x60140002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
+ 0x0cccfcca, /* DRD2B1: *idx3 = EU3(); EU3(*idx3,var10) */
+ 0xd9190240, /* LCDEXT: idx2 = idx2; idx2 > var9; idx2 += inc0 */
+ 0xb8c5e009, /* LCD: idx3 = *(idx1 + var00000015); ; idx3 += inc1 */
+ 0x07fecf80, /* DRD1A: *idx3 = *idx0; FN=0 INT init=31 WS=3 RS=3 */
+ 0x99190024, /* LCD: idx2 = idx2; idx2 once var0; idx2 += inc4 */
+ 0x60000005, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */
+ 0x0c4cf889, /* DRD2B1: *idx1 = EU3(); EU3(idx2,var9) */
+ 0x000001f8, /* NOP */
+
+ /* VAR[9]-VAR[10] */
+ 0x40000000,
+ 0x7fff7fff,
+
+ /* INC[0]-INC[3] */
+ 0x40000000,
+ 0xe0000000,
+ 0xa0000008,
+ 0x20000000,
+};
+
diff --git a/drivers/dma/bestcomm/bcom_gen_bd_tx_task.c b/drivers/dma/bestcomm/bcom_gen_bd_tx_task.c
new file mode 100644
index 000000000..844dfe258
--- /dev/null
+++ b/drivers/dma/bestcomm/bcom_gen_bd_tx_task.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Bestcomm GenBD TX task microcode
+ *
+ * Copyright (C) 2006 AppSpec Computer Technologies Corp.
+ * Jeff Gibbons <jeff.gibbons@appspec.com>
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * Based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex
+ * on Tue Mar 4 10:14:12 2006 GMT
+ */
+
+#include <asm/types.h>
+
+/*
+ * The header consists of the following fields:
+ * u32 magic;
+ * u8 desc_size;
+ * u8 var_size;
+ * u8 inc_size;
+ * u8 first_var;
+ * u8 reserved[8];
+ *
+ * The size fields contain the number of 32-bit words.
+ */
+
+u32 bcom_gen_bd_tx_task[] = {
+ /* header */
+ 0x4243544b,
+ 0x0f040609,
+ 0x00000000,
+ 0x00000000,
+
+ /* Task descriptors */
+ 0x800220e3, /* LCD: idx0 = var0, idx1 = var4; idx1 <= var3; idx0 += inc4, idx1 += inc3 */
+ 0x13e01010, /* DRD1A: var4 = var2; FN=0 MORE init=31 WS=0 RS=0 */
+ 0xb8808264, /* LCD: idx2 = *idx1, idx3 = var1; idx2 < var9; idx2 += inc4, idx3 += inc4 */
+ 0x10001308, /* DRD1A: var4 = idx1; FN=0 MORE init=0 WS=0 RS=0 */
+ 0x60140002, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
+ 0x0cccfcca, /* DRD2B1: *idx3 = EU3(); EU3(*idx3,var10) */
+ 0xd9190300, /* LCDEXT: idx2 = idx2; idx2 > var12; idx2 += inc0 */
+ 0xb8c5e009, /* LCD: idx3 = *(idx1 + var00000015); ; idx3 += inc1 */
+ 0x03fec398, /* DRD1A: *idx0 = *idx3; FN=0 init=31 WS=3 RS=3 */
+ 0x9919826a, /* LCD: idx2 = idx2, idx3 = idx3; idx2 > var9; idx2 += inc5, idx3 += inc2 */
+ 0x0feac398, /* DRD1A: *idx0 = *idx3; FN=0 TFD INT init=31 WS=1 RS=1 */
+ 0x99190036, /* LCD: idx2 = idx2; idx2 once var0; idx2 += inc6 */
+ 0x60000005, /* DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */
+ 0x0c4cf889, /* DRD2B1: *idx1 = EU3(); EU3(idx2,var9) */
+ 0x000001f8, /* NOP */
+
+ /* VAR[9]-VAR[12] */
+ 0x40000000,
+ 0x7fff7fff,
+ 0x00000000,
+ 0x40000004,
+
+ /* INC[0]-INC[5] */
+ 0x40000000,
+ 0xe0000000,
+ 0xe0000000,
+ 0xa0000008,
+ 0x20000000,
+ 0x4000ffff,
+};
+
diff --git a/drivers/dma/bestcomm/bestcomm.c b/drivers/dma/bestcomm/bestcomm.c
new file mode 100644
index 000000000..eabbcfcaa
--- /dev/null
+++ b/drivers/dma/bestcomm/bestcomm.c
@@ -0,0 +1,526 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for MPC52xx processor BestComm peripheral controller
+ *
+ * Copyright (C) 2006-2007 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2005 Varma Electronics Oy,
+ * ( by Andrey Volkov <avolkov@varma-el.com> )
+ * Copyright (C) 2003-2004 MontaVista, Software, Inc.
+ * ( by Dale Farnsworth <dfarnsworth@mvista.com> )
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/mpc52xx.h>
+
+#include <linux/fsl/bestcomm/sram.h>
+#include <linux/fsl/bestcomm/bestcomm_priv.h>
+#include "linux/fsl/bestcomm/bestcomm.h"
+
+#define DRIVER_NAME "bestcomm-core"
+
+/* MPC5200 device tree match tables */
+static const struct of_device_id mpc52xx_sram_ids[] = {
+ { .compatible = "fsl,mpc5200-sram", },
+ { .compatible = "mpc5200-sram", },
+ {}
+};
+
+
+struct bcom_engine *bcom_eng = NULL;
+EXPORT_SYMBOL_GPL(bcom_eng); /* needed for inline functions */
+
+/* ======================================================================== */
+/* Public and private API */
+/* ======================================================================== */
+
+/* Private API */
+
+struct bcom_task *
+bcom_task_alloc(int bd_count, int bd_size, int priv_size)
+{
+ int i, tasknum = -1;
+ struct bcom_task *tsk;
+
+ /* Don't try to do anything if bestcomm init failed */
+ if (!bcom_eng)
+ return NULL;
+
+ /* Get and reserve a task num */
+ spin_lock(&bcom_eng->lock);
+
+ for (i=0; i<BCOM_MAX_TASKS; i++)
+ if (!bcom_eng->tdt[i].stop) { /* we use stop as a marker */
+ bcom_eng->tdt[i].stop = 0xfffffffful; /* dummy addr */
+ tasknum = i;
+ break;
+ }
+
+ spin_unlock(&bcom_eng->lock);
+
+ if (tasknum < 0)
+ return NULL;
+
+ /* Allocate our structure */
+ tsk = kzalloc(sizeof(struct bcom_task) + priv_size, GFP_KERNEL);
+ if (!tsk)
+ goto error;
+
+ tsk->tasknum = tasknum;
+ if (priv_size)
+ tsk->priv = (void*)tsk + sizeof(struct bcom_task);
+
+ /* Get IRQ of that task */
+ tsk->irq = irq_of_parse_and_map(bcom_eng->ofnode, tsk->tasknum);
+ if (!tsk->irq)
+ goto error;
+
+ /* Init the BDs, if needed */
+ if (bd_count) {
+ tsk->cookie = kmalloc_array(bd_count, sizeof(void *),
+ GFP_KERNEL);
+ if (!tsk->cookie)
+ goto error;
+
+ tsk->bd = bcom_sram_alloc(bd_count * bd_size, 4, &tsk->bd_pa);
+ if (!tsk->bd)
+ goto error;
+ memset_io(tsk->bd, 0x00, bd_count * bd_size);
+
+ tsk->num_bd = bd_count;
+ tsk->bd_size = bd_size;
+ }
+
+ return tsk;
+
+error:
+ if (tsk) {
+ if (tsk->irq)
+ irq_dispose_mapping(tsk->irq);
+ bcom_sram_free(tsk->bd);
+ kfree(tsk->cookie);
+ kfree(tsk);
+ }
+
+ bcom_eng->tdt[tasknum].stop = 0;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(bcom_task_alloc);
+
+void
+bcom_task_free(struct bcom_task *tsk)
+{
+ /* Stop the task */
+ bcom_disable_task(tsk->tasknum);
+
+ /* Clear TDT */
+ bcom_eng->tdt[tsk->tasknum].start = 0;
+ bcom_eng->tdt[tsk->tasknum].stop = 0;
+
+ /* Free everything */
+ irq_dispose_mapping(tsk->irq);
+ bcom_sram_free(tsk->bd);
+ kfree(tsk->cookie);
+ kfree(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_task_free);
+
+int
+bcom_load_image(int task, u32 *task_image)
+{
+ struct bcom_task_header *hdr = (struct bcom_task_header *)task_image;
+ struct bcom_tdt *tdt;
+ u32 *desc, *var, *inc;
+ u32 *desc_src, *var_src, *inc_src;
+
+ /* Safety checks */
+ if (hdr->magic != BCOM_TASK_MAGIC) {
+ printk(KERN_ERR DRIVER_NAME
+ ": Trying to load invalid microcode\n");
+ return -EINVAL;
+ }
+
+ if ((task < 0) || (task >= BCOM_MAX_TASKS)) {
+ printk(KERN_ERR DRIVER_NAME
+ ": Trying to load invalid task %d\n", task);
+ return -EINVAL;
+ }
+
+ /* Initial load or reload */
+ tdt = &bcom_eng->tdt[task];
+
+ if (tdt->start) {
+ desc = bcom_task_desc(task);
+ if (hdr->desc_size != bcom_task_num_descs(task)) {
+ printk(KERN_ERR DRIVER_NAME
+ ": Trying to reload wrong task image "
+ "(%d size %d/%d)!\n",
+ task,
+ hdr->desc_size,
+ bcom_task_num_descs(task));
+ return -EINVAL;
+ }
+ } else {
+ phys_addr_t start_pa;
+
+ desc = bcom_sram_alloc(hdr->desc_size * sizeof(u32), 4, &start_pa);
+ if (!desc)
+ return -ENOMEM;
+
+ tdt->start = start_pa;
+ tdt->stop = start_pa + ((hdr->desc_size-1) * sizeof(u32));
+ }
+
+ var = bcom_task_var(task);
+ inc = bcom_task_inc(task);
+
+ /* Clear & copy */
+ memset_io(var, 0x00, BCOM_VAR_SIZE);
+ memset_io(inc, 0x00, BCOM_INC_SIZE);
+
+ desc_src = (u32 *)(hdr + 1);
+ var_src = desc_src + hdr->desc_size;
+ inc_src = var_src + hdr->var_size;
+
+ memcpy_toio(desc, desc_src, hdr->desc_size * sizeof(u32));
+ memcpy_toio(var + hdr->first_var, var_src, hdr->var_size * sizeof(u32));
+ memcpy_toio(inc, inc_src, hdr->inc_size * sizeof(u32));
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(bcom_load_image);
+
+void
+bcom_set_initiator(int task, int initiator)
+{
+ int i;
+ int num_descs;
+ u32 *desc;
+ int next_drd_has_initiator;
+
+ bcom_set_tcr_initiator(task, initiator);
+
+ /* Just setting tcr is apparently not enough due to some problem */
+ /* with it. So we just go thru all the microcode and replace in */
+ /* the DRD directly */
+
+ desc = bcom_task_desc(task);
+ next_drd_has_initiator = 1;
+ num_descs = bcom_task_num_descs(task);
+
+ for (i=0; i<num_descs; i++, desc++) {
+ if (!bcom_desc_is_drd(*desc))
+ continue;
+ if (next_drd_has_initiator)
+ if (bcom_desc_initiator(*desc) != BCOM_INITIATOR_ALWAYS)
+ bcom_set_desc_initiator(desc, initiator);
+ next_drd_has_initiator = !bcom_drd_is_extended(*desc);
+ }
+}
+EXPORT_SYMBOL_GPL(bcom_set_initiator);
+
+
+/* Public API */
+
+void
+bcom_enable(struct bcom_task *tsk)
+{
+ bcom_enable_task(tsk->tasknum);
+}
+EXPORT_SYMBOL_GPL(bcom_enable);
+
+void
+bcom_disable(struct bcom_task *tsk)
+{
+ bcom_disable_task(tsk->tasknum);
+}
+EXPORT_SYMBOL_GPL(bcom_disable);
+
+
+/* ======================================================================== */
+/* Engine init/cleanup */
+/* ======================================================================== */
+
+/* Function Descriptor table */
+/* this will need to be updated if Freescale changes their task code FDT */
+static u32 fdt_ops[] = {
+ 0xa0045670, /* FDT[48] - load_acc() */
+ 0x80045670, /* FDT[49] - unload_acc() */
+ 0x21800000, /* FDT[50] - and() */
+ 0x21e00000, /* FDT[51] - or() */
+ 0x21500000, /* FDT[52] - xor() */
+ 0x21400000, /* FDT[53] - andn() */
+ 0x21500000, /* FDT[54] - not() */
+ 0x20400000, /* FDT[55] - add() */
+ 0x20500000, /* FDT[56] - sub() */
+ 0x20800000, /* FDT[57] - lsh() */
+ 0x20a00000, /* FDT[58] - rsh() */
+ 0xc0170000, /* FDT[59] - crc8() */
+ 0xc0145670, /* FDT[60] - crc16() */
+ 0xc0345670, /* FDT[61] - crc32() */
+ 0xa0076540, /* FDT[62] - endian32() */
+ 0xa0000760, /* FDT[63] - endian16() */
+};
+
+
+static int bcom_engine_init(void)
+{
+ int task;
+ phys_addr_t tdt_pa, ctx_pa, var_pa, fdt_pa;
+ unsigned int tdt_size, ctx_size, var_size, fdt_size;
+
+ /* Allocate & clear SRAM zones for FDT, TDTs, contexts and vars/incs */
+ tdt_size = BCOM_MAX_TASKS * sizeof(struct bcom_tdt);
+ ctx_size = BCOM_MAX_TASKS * BCOM_CTX_SIZE;
+ var_size = BCOM_MAX_TASKS * (BCOM_VAR_SIZE + BCOM_INC_SIZE);
+ fdt_size = BCOM_FDT_SIZE;
+
+ bcom_eng->tdt = bcom_sram_alloc(tdt_size, sizeof(u32), &tdt_pa);
+ bcom_eng->ctx = bcom_sram_alloc(ctx_size, BCOM_CTX_ALIGN, &ctx_pa);
+ bcom_eng->var = bcom_sram_alloc(var_size, BCOM_VAR_ALIGN, &var_pa);
+ bcom_eng->fdt = bcom_sram_alloc(fdt_size, BCOM_FDT_ALIGN, &fdt_pa);
+
+ if (!bcom_eng->tdt || !bcom_eng->ctx || !bcom_eng->var || !bcom_eng->fdt) {
+ printk(KERN_ERR "DMA: SRAM alloc failed in engine init !\n");
+
+ bcom_sram_free(bcom_eng->tdt);
+ bcom_sram_free(bcom_eng->ctx);
+ bcom_sram_free(bcom_eng->var);
+ bcom_sram_free(bcom_eng->fdt);
+
+ return -ENOMEM;
+ }
+
+ memset_io(bcom_eng->tdt, 0x00, tdt_size);
+ memset_io(bcom_eng->ctx, 0x00, ctx_size);
+ memset_io(bcom_eng->var, 0x00, var_size);
+ memset_io(bcom_eng->fdt, 0x00, fdt_size);
+
+ /* Copy the FDT for the EU#3 */
+ memcpy_toio(&bcom_eng->fdt[48], fdt_ops, sizeof(fdt_ops));
+
+ /* Initialize Task base structure */
+ for (task=0; task<BCOM_MAX_TASKS; task++)
+ {
+ out_be16(&bcom_eng->regs->tcr[task], 0);
+ out_8(&bcom_eng->regs->ipr[task], 0);
+
+ bcom_eng->tdt[task].context = ctx_pa;
+ bcom_eng->tdt[task].var = var_pa;
+ bcom_eng->tdt[task].fdt = fdt_pa;
+
+ var_pa += BCOM_VAR_SIZE + BCOM_INC_SIZE;
+ ctx_pa += BCOM_CTX_SIZE;
+ }
+
+ out_be32(&bcom_eng->regs->taskBar, tdt_pa);
+
+ /* Init 'always' initiator */
+ out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_ALWAYS], BCOM_IPR_ALWAYS);
+
+ /* Disable COMM Bus Prefetch on the original 5200; it's broken */
+ if ((mfspr(SPRN_SVR) & MPC5200_SVR_MASK) == MPC5200_SVR)
+ bcom_disable_prefetch();
+
+ /* Init lock */
+ spin_lock_init(&bcom_eng->lock);
+
+ return 0;
+}
+
+static void
+bcom_engine_cleanup(void)
+{
+ int task;
+
+ /* Stop all tasks */
+ for (task=0; task<BCOM_MAX_TASKS; task++)
+ {
+ out_be16(&bcom_eng->regs->tcr[task], 0);
+ out_8(&bcom_eng->regs->ipr[task], 0);
+ }
+
+ out_be32(&bcom_eng->regs->taskBar, 0ul);
+
+ /* Release the SRAM zones */
+ bcom_sram_free(bcom_eng->tdt);
+ bcom_sram_free(bcom_eng->ctx);
+ bcom_sram_free(bcom_eng->var);
+ bcom_sram_free(bcom_eng->fdt);
+}
+
+
+/* ======================================================================== */
+/* OF platform driver */
+/* ======================================================================== */
+
+static int mpc52xx_bcom_probe(struct platform_device *op)
+{
+ struct device_node *ofn_sram;
+ struct resource res_bcom;
+
+ int rv;
+
+ /* Inform user we're ok so far */
+ printk(KERN_INFO "DMA: MPC52xx BestComm driver\n");
+
+ /* Get the bestcomm node */
+ of_node_get(op->dev.of_node);
+
+ /* Prepare SRAM */
+ ofn_sram = of_find_matching_node(NULL, mpc52xx_sram_ids);
+ if (!ofn_sram) {
+ printk(KERN_ERR DRIVER_NAME ": "
+ "No SRAM found in device tree\n");
+ rv = -ENODEV;
+ goto error_ofput;
+ }
+ rv = bcom_sram_init(ofn_sram, DRIVER_NAME);
+ of_node_put(ofn_sram);
+
+ if (rv) {
+ printk(KERN_ERR DRIVER_NAME ": "
+ "Error in SRAM init\n");
+ goto error_ofput;
+ }
+
+ /* Get a clean struct */
+ bcom_eng = kzalloc(sizeof(struct bcom_engine), GFP_KERNEL);
+ if (!bcom_eng) {
+ rv = -ENOMEM;
+ goto error_sramclean;
+ }
+
+ /* Save the node */
+ bcom_eng->ofnode = op->dev.of_node;
+
+ /* Get, reserve & map io */
+ if (of_address_to_resource(op->dev.of_node, 0, &res_bcom)) {
+ printk(KERN_ERR DRIVER_NAME ": "
+ "Can't get resource\n");
+ rv = -EINVAL;
+ goto error_sramclean;
+ }
+
+ if (!request_mem_region(res_bcom.start, resource_size(&res_bcom),
+ DRIVER_NAME)) {
+ printk(KERN_ERR DRIVER_NAME ": "
+ "Can't request registers region\n");
+ rv = -EBUSY;
+ goto error_sramclean;
+ }
+
+ bcom_eng->regs_base = res_bcom.start;
+ bcom_eng->regs = ioremap(res_bcom.start, sizeof(struct mpc52xx_sdma));
+ if (!bcom_eng->regs) {
+ printk(KERN_ERR DRIVER_NAME ": "
+ "Can't map registers\n");
+ rv = -ENOMEM;
+ goto error_release;
+ }
+
+ /* Now, do the real init */
+ rv = bcom_engine_init();
+ if (rv)
+ goto error_unmap;
+
+ /* Done ! */
+ printk(KERN_INFO "DMA: MPC52xx BestComm engine @%08lx ok !\n",
+ (long)bcom_eng->regs_base);
+
+ return 0;
+
+ /* Error path */
+error_unmap:
+ iounmap(bcom_eng->regs);
+error_release:
+ release_mem_region(res_bcom.start, sizeof(struct mpc52xx_sdma));
+error_sramclean:
+ kfree(bcom_eng);
+ bcom_sram_cleanup();
+error_ofput:
+ of_node_put(op->dev.of_node);
+
+ printk(KERN_ERR "DMA: MPC52xx BestComm init failed !\n");
+
+ return rv;
+}
+
+
+static int mpc52xx_bcom_remove(struct platform_device *op)
+{
+ /* Clean up the engine */
+ bcom_engine_cleanup();
+
+ /* Cleanup SRAM */
+ bcom_sram_cleanup();
+
+ /* Release regs */
+ iounmap(bcom_eng->regs);
+ release_mem_region(bcom_eng->regs_base, sizeof(struct mpc52xx_sdma));
+
+ /* Release the node */
+ of_node_put(bcom_eng->ofnode);
+
+ /* Release memory */
+ kfree(bcom_eng);
+ bcom_eng = NULL;
+
+ return 0;
+}
+
+static const struct of_device_id mpc52xx_bcom_of_match[] = {
+ { .compatible = "fsl,mpc5200-bestcomm", },
+ { .compatible = "mpc5200-bestcomm", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(of, mpc52xx_bcom_of_match);
+
+
+static struct platform_driver mpc52xx_bcom_of_platform_driver = {
+ .probe = mpc52xx_bcom_probe,
+ .remove = mpc52xx_bcom_remove,
+ .driver = {
+ .name = DRIVER_NAME,
+ .of_match_table = mpc52xx_bcom_of_match,
+ },
+};
+
+
+/* ======================================================================== */
+/* Module */
+/* ======================================================================== */
+
+static int __init
+mpc52xx_bcom_init(void)
+{
+ return platform_driver_register(&mpc52xx_bcom_of_platform_driver);
+}
+
+static void __exit
+mpc52xx_bcom_exit(void)
+{
+ platform_driver_unregister(&mpc52xx_bcom_of_platform_driver);
+}
+
+/* If we're not a module, we must make sure everything is setup before */
+/* anyone tries to use us ... that's why we use subsys_initcall instead */
+/* of module_init. */
+subsys_initcall(mpc52xx_bcom_init);
+module_exit(mpc52xx_bcom_exit);
+
+MODULE_DESCRIPTION("Freescale MPC52xx BestComm DMA");
+MODULE_AUTHOR("Sylvain Munaut <tnt@246tNt.com>");
+MODULE_AUTHOR("Andrey Volkov <avolkov@varma-el.com>");
+MODULE_AUTHOR("Dale Farnsworth <dfarnsworth@mvista.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/bestcomm/fec.c b/drivers/dma/bestcomm/fec.c
new file mode 100644
index 000000000..3a4a2f791
--- /dev/null
+++ b/drivers/dma/bestcomm/fec.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Bestcomm FEC tasks driver
+ *
+ * Copyright (C) 2006-2007 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2003-2004 MontaVista, Software, Inc.
+ * ( by Dale Farnsworth <dfarnsworth@mvista.com> )
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <asm/io.h>
+
+#include <linux/fsl/bestcomm/bestcomm.h>
+#include <linux/fsl/bestcomm/bestcomm_priv.h>
+#include <linux/fsl/bestcomm/fec.h>
+
+
+/* ======================================================================== */
+/* Task image/var/inc */
+/* ======================================================================== */
+
+/* fec tasks images */
+extern u32 bcom_fec_rx_task[];
+extern u32 bcom_fec_tx_task[];
+
+/* rx task vars that need to be set before enabling the task */
+struct bcom_fec_rx_var {
+ u32 enable; /* (u16*) address of task's control register */
+ u32 fifo; /* (u32*) address of fec's fifo */
+ u32 bd_base; /* (struct bcom_bd*) beginning of ring buffer */
+ u32 bd_last; /* (struct bcom_bd*) end of ring buffer */
+ u32 bd_start; /* (struct bcom_bd*) current bd */
+ u32 buffer_size; /* size of receive buffer */
+};
+
+/* rx task incs that need to be set before enabling the task */
+struct bcom_fec_rx_inc {
+ u16 pad0;
+ s16 incr_bytes;
+ u16 pad1;
+ s16 incr_dst;
+ u16 pad2;
+ s16 incr_dst_ma;
+};
+
+/* tx task vars that need to be set before enabling the task */
+struct bcom_fec_tx_var {
+ u32 DRD; /* (u32*) address of self-modified DRD */
+ u32 fifo; /* (u32*) address of fec's fifo */
+ u32 enable; /* (u16*) address of task's control register */
+ u32 bd_base; /* (struct bcom_bd*) beginning of ring buffer */
+ u32 bd_last; /* (struct bcom_bd*) end of ring buffer */
+ u32 bd_start; /* (struct bcom_bd*) current bd */
+ u32 buffer_size; /* set by uCode for each packet */
+};
+
+/* tx task incs that need to be set before enabling the task */
+struct bcom_fec_tx_inc {
+ u16 pad0;
+ s16 incr_bytes;
+ u16 pad1;
+ s16 incr_src;
+ u16 pad2;
+ s16 incr_src_ma;
+};
+
+/* private structure in the task */
+struct bcom_fec_priv {
+ phys_addr_t fifo;
+ int maxbufsize;
+};
+
+
+/* ======================================================================== */
+/* Task support code */
+/* ======================================================================== */
+
+struct bcom_task *
+bcom_fec_rx_init(int queue_len, phys_addr_t fifo, int maxbufsize)
+{
+ struct bcom_task *tsk;
+ struct bcom_fec_priv *priv;
+
+ tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_fec_bd),
+ sizeof(struct bcom_fec_priv));
+ if (!tsk)
+ return NULL;
+
+ tsk->flags = BCOM_FLAGS_NONE;
+
+ priv = tsk->priv;
+ priv->fifo = fifo;
+ priv->maxbufsize = maxbufsize;
+
+ if (bcom_fec_rx_reset(tsk)) {
+ bcom_task_free(tsk);
+ return NULL;
+ }
+
+ return tsk;
+}
+EXPORT_SYMBOL_GPL(bcom_fec_rx_init);
+
+int
+bcom_fec_rx_reset(struct bcom_task *tsk)
+{
+ struct bcom_fec_priv *priv = tsk->priv;
+ struct bcom_fec_rx_var *var;
+ struct bcom_fec_rx_inc *inc;
+
+ /* Shutdown the task */
+ bcom_disable_task(tsk->tasknum);
+
+ /* Reset the microcode */
+ var = (struct bcom_fec_rx_var *) bcom_task_var(tsk->tasknum);
+ inc = (struct bcom_fec_rx_inc *) bcom_task_inc(tsk->tasknum);
+
+ if (bcom_load_image(tsk->tasknum, bcom_fec_rx_task))
+ return -1;
+
+ var->enable = bcom_eng->regs_base +
+ offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
+ var->fifo = (u32) priv->fifo;
+ var->bd_base = tsk->bd_pa;
+ var->bd_last = tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
+ var->bd_start = tsk->bd_pa;
+ var->buffer_size = priv->maxbufsize;
+
+ inc->incr_bytes = -(s16)sizeof(u32); /* These should be in the */
+ inc->incr_dst = sizeof(u32); /* task image, but we stick */
+ inc->incr_dst_ma= sizeof(u8); /* to the official ones */
+
+ /* Reset the BDs */
+ tsk->index = 0;
+ tsk->outdex = 0;
+
+ memset_io(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+ /* Configure some stuff */
+ bcom_set_task_pragma(tsk->tasknum, BCOM_FEC_RX_BD_PRAGMA);
+ bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
+
+ out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_FEC_RX], BCOM_IPR_FEC_RX);
+
+ out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum); /* Clear ints */
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(bcom_fec_rx_reset);
+
+void
+bcom_fec_rx_release(struct bcom_task *tsk)
+{
+ /* Nothing special for the FEC tasks */
+ bcom_task_free(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_fec_rx_release);
+
+
+
+ /* Return 2nd to last DRD */
+ /* This is an ugly hack, but at least it's only done
+ once at initialization */
+static u32 *self_modified_drd(int tasknum)
+{
+ u32 *desc;
+ int num_descs;
+ int drd_count;
+ int i;
+
+ num_descs = bcom_task_num_descs(tasknum);
+ desc = bcom_task_desc(tasknum) + num_descs - 1;
+ drd_count = 0;
+ for (i=0; i<num_descs; i++, desc--)
+ if (bcom_desc_is_drd(*desc) && ++drd_count == 3)
+ break;
+ return desc;
+}
+
+struct bcom_task *
+bcom_fec_tx_init(int queue_len, phys_addr_t fifo)
+{
+ struct bcom_task *tsk;
+ struct bcom_fec_priv *priv;
+
+ tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_fec_bd),
+ sizeof(struct bcom_fec_priv));
+ if (!tsk)
+ return NULL;
+
+ tsk->flags = BCOM_FLAGS_ENABLE_TASK;
+
+ priv = tsk->priv;
+ priv->fifo = fifo;
+
+ if (bcom_fec_tx_reset(tsk)) {
+ bcom_task_free(tsk);
+ return NULL;
+ }
+
+ return tsk;
+}
+EXPORT_SYMBOL_GPL(bcom_fec_tx_init);
+
+int
+bcom_fec_tx_reset(struct bcom_task *tsk)
+{
+ struct bcom_fec_priv *priv = tsk->priv;
+ struct bcom_fec_tx_var *var;
+ struct bcom_fec_tx_inc *inc;
+
+ /* Shutdown the task */
+ bcom_disable_task(tsk->tasknum);
+
+ /* Reset the microcode */
+ var = (struct bcom_fec_tx_var *) bcom_task_var(tsk->tasknum);
+ inc = (struct bcom_fec_tx_inc *) bcom_task_inc(tsk->tasknum);
+
+ if (bcom_load_image(tsk->tasknum, bcom_fec_tx_task))
+ return -1;
+
+ var->enable = bcom_eng->regs_base +
+ offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
+ var->fifo = (u32) priv->fifo;
+ var->DRD = bcom_sram_va2pa(self_modified_drd(tsk->tasknum));
+ var->bd_base = tsk->bd_pa;
+ var->bd_last = tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
+ var->bd_start = tsk->bd_pa;
+
+ inc->incr_bytes = -(s16)sizeof(u32); /* These should be in the */
+ inc->incr_src = sizeof(u32); /* task image, but we stick */
+ inc->incr_src_ma= sizeof(u8); /* to the official ones */
+
+ /* Reset the BDs */
+ tsk->index = 0;
+ tsk->outdex = 0;
+
+ memset_io(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+ /* Configure some stuff */
+ bcom_set_task_pragma(tsk->tasknum, BCOM_FEC_TX_BD_PRAGMA);
+ bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
+
+ out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_FEC_TX], BCOM_IPR_FEC_TX);
+
+ out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum); /* Clear ints */
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(bcom_fec_tx_reset);
+
+void
+bcom_fec_tx_release(struct bcom_task *tsk)
+{
+ /* Nothing special for the FEC tasks */
+ bcom_task_free(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_fec_tx_release);
+
+
+MODULE_DESCRIPTION("BestComm FEC tasks driver");
+MODULE_AUTHOR("Dale Farnsworth <dfarnsworth@mvista.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/bestcomm/gen_bd.c b/drivers/dma/bestcomm/gen_bd.c
new file mode 100644
index 000000000..8a24a5cbc
--- /dev/null
+++ b/drivers/dma/bestcomm/gen_bd.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for MPC52xx processor BestComm General Buffer Descriptor
+ *
+ * Copyright (C) 2007 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2006 AppSpec Computer Technologies Corp.
+ * Jeff Gibbons <jeff.gibbons@appspec.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <asm/errno.h>
+#include <asm/io.h>
+
+#include <asm/mpc52xx.h>
+#include <asm/mpc52xx_psc.h>
+
+#include <linux/fsl/bestcomm/bestcomm.h>
+#include <linux/fsl/bestcomm/bestcomm_priv.h>
+#include <linux/fsl/bestcomm/gen_bd.h>
+
+
+/* ======================================================================== */
+/* Task image/var/inc */
+/* ======================================================================== */
+
+/* gen_bd tasks images */
+extern u32 bcom_gen_bd_rx_task[];
+extern u32 bcom_gen_bd_tx_task[];
+
+/* rx task vars that need to be set before enabling the task */
+struct bcom_gen_bd_rx_var {
+ u32 enable; /* (u16*) address of task's control register */
+ u32 fifo; /* (u32*) address of gen_bd's fifo */
+ u32 bd_base; /* (struct bcom_bd*) beginning of ring buffer */
+ u32 bd_last; /* (struct bcom_bd*) end of ring buffer */
+ u32 bd_start; /* (struct bcom_bd*) current bd */
+ u32 buffer_size; /* size of receive buffer */
+};
+
+/* rx task incs that need to be set before enabling the task */
+struct bcom_gen_bd_rx_inc {
+ u16 pad0;
+ s16 incr_bytes;
+ u16 pad1;
+ s16 incr_dst;
+};
+
+/* tx task vars that need to be set before enabling the task */
+struct bcom_gen_bd_tx_var {
+ u32 fifo; /* (u32*) address of gen_bd's fifo */
+ u32 enable; /* (u16*) address of task's control register */
+ u32 bd_base; /* (struct bcom_bd*) beginning of ring buffer */
+ u32 bd_last; /* (struct bcom_bd*) end of ring buffer */
+ u32 bd_start; /* (struct bcom_bd*) current bd */
+ u32 buffer_size; /* set by uCode for each packet */
+};
+
+/* tx task incs that need to be set before enabling the task */
+struct bcom_gen_bd_tx_inc {
+ u16 pad0;
+ s16 incr_bytes;
+ u16 pad1;
+ s16 incr_src;
+ u16 pad2;
+ s16 incr_src_ma;
+};
+
+/* private structure */
+struct bcom_gen_bd_priv {
+ phys_addr_t fifo;
+ int initiator;
+ int ipr;
+ int maxbufsize;
+};
+
+
+/* ======================================================================== */
+/* Task support code */
+/* ======================================================================== */
+
+struct bcom_task *
+bcom_gen_bd_rx_init(int queue_len, phys_addr_t fifo,
+ int initiator, int ipr, int maxbufsize)
+{
+ struct bcom_task *tsk;
+ struct bcom_gen_bd_priv *priv;
+
+ tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_gen_bd),
+ sizeof(struct bcom_gen_bd_priv));
+ if (!tsk)
+ return NULL;
+
+ tsk->flags = BCOM_FLAGS_NONE;
+
+ priv = tsk->priv;
+ priv->fifo = fifo;
+ priv->initiator = initiator;
+ priv->ipr = ipr;
+ priv->maxbufsize = maxbufsize;
+
+ if (bcom_gen_bd_rx_reset(tsk)) {
+ bcom_task_free(tsk);
+ return NULL;
+ }
+
+ return tsk;
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_rx_init);
+
+int
+bcom_gen_bd_rx_reset(struct bcom_task *tsk)
+{
+ struct bcom_gen_bd_priv *priv = tsk->priv;
+ struct bcom_gen_bd_rx_var *var;
+ struct bcom_gen_bd_rx_inc *inc;
+
+ /* Shutdown the task */
+ bcom_disable_task(tsk->tasknum);
+
+ /* Reset the microcode */
+ var = (struct bcom_gen_bd_rx_var *) bcom_task_var(tsk->tasknum);
+ inc = (struct bcom_gen_bd_rx_inc *) bcom_task_inc(tsk->tasknum);
+
+ if (bcom_load_image(tsk->tasknum, bcom_gen_bd_rx_task))
+ return -1;
+
+ var->enable = bcom_eng->regs_base +
+ offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
+ var->fifo = (u32) priv->fifo;
+ var->bd_base = tsk->bd_pa;
+ var->bd_last = tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
+ var->bd_start = tsk->bd_pa;
+ var->buffer_size = priv->maxbufsize;
+
+ inc->incr_bytes = -(s16)sizeof(u32);
+ inc->incr_dst = sizeof(u32);
+
+ /* Reset the BDs */
+ tsk->index = 0;
+ tsk->outdex = 0;
+
+ memset_io(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+ /* Configure some stuff */
+ bcom_set_task_pragma(tsk->tasknum, BCOM_GEN_RX_BD_PRAGMA);
+ bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
+
+ out_8(&bcom_eng->regs->ipr[priv->initiator], priv->ipr);
+ bcom_set_initiator(tsk->tasknum, priv->initiator);
+
+ out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum); /* Clear ints */
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_rx_reset);
+
+void
+bcom_gen_bd_rx_release(struct bcom_task *tsk)
+{
+ /* Nothing special for the GenBD tasks */
+ bcom_task_free(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_rx_release);
+
+
+extern struct bcom_task *
+bcom_gen_bd_tx_init(int queue_len, phys_addr_t fifo,
+ int initiator, int ipr)
+{
+ struct bcom_task *tsk;
+ struct bcom_gen_bd_priv *priv;
+
+ tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_gen_bd),
+ sizeof(struct bcom_gen_bd_priv));
+ if (!tsk)
+ return NULL;
+
+ tsk->flags = BCOM_FLAGS_NONE;
+
+ priv = tsk->priv;
+ priv->fifo = fifo;
+ priv->initiator = initiator;
+ priv->ipr = ipr;
+
+ if (bcom_gen_bd_tx_reset(tsk)) {
+ bcom_task_free(tsk);
+ return NULL;
+ }
+
+ return tsk;
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_tx_init);
+
+int
+bcom_gen_bd_tx_reset(struct bcom_task *tsk)
+{
+ struct bcom_gen_bd_priv *priv = tsk->priv;
+ struct bcom_gen_bd_tx_var *var;
+ struct bcom_gen_bd_tx_inc *inc;
+
+ /* Shutdown the task */
+ bcom_disable_task(tsk->tasknum);
+
+ /* Reset the microcode */
+ var = (struct bcom_gen_bd_tx_var *) bcom_task_var(tsk->tasknum);
+ inc = (struct bcom_gen_bd_tx_inc *) bcom_task_inc(tsk->tasknum);
+
+ if (bcom_load_image(tsk->tasknum, bcom_gen_bd_tx_task))
+ return -1;
+
+ var->enable = bcom_eng->regs_base +
+ offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
+ var->fifo = (u32) priv->fifo;
+ var->bd_base = tsk->bd_pa;
+ var->bd_last = tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
+ var->bd_start = tsk->bd_pa;
+
+ inc->incr_bytes = -(s16)sizeof(u32);
+ inc->incr_src = sizeof(u32);
+ inc->incr_src_ma = sizeof(u8);
+
+ /* Reset the BDs */
+ tsk->index = 0;
+ tsk->outdex = 0;
+
+ memset_io(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+ /* Configure some stuff */
+ bcom_set_task_pragma(tsk->tasknum, BCOM_GEN_TX_BD_PRAGMA);
+ bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
+
+ out_8(&bcom_eng->regs->ipr[priv->initiator], priv->ipr);
+ bcom_set_initiator(tsk->tasknum, priv->initiator);
+
+ out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum); /* Clear ints */
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_tx_reset);
+
+void
+bcom_gen_bd_tx_release(struct bcom_task *tsk)
+{
+ /* Nothing special for the GenBD tasks */
+ bcom_task_free(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_tx_release);
+
+/* ---------------------------------------------------------------------
+ * PSC support code
+ */
+
+/**
+ * bcom_psc_parameters - Bestcomm initialization value table for PSC devices
+ *
+ * This structure is only used internally. It is a lookup table for PSC
+ * specific parameters to bestcomm tasks.
+ */
+static struct bcom_psc_params {
+ int rx_initiator;
+ int rx_ipr;
+ int tx_initiator;
+ int tx_ipr;
+} bcom_psc_params[] = {
+ [0] = {
+ .rx_initiator = BCOM_INITIATOR_PSC1_RX,
+ .rx_ipr = BCOM_IPR_PSC1_RX,
+ .tx_initiator = BCOM_INITIATOR_PSC1_TX,
+ .tx_ipr = BCOM_IPR_PSC1_TX,
+ },
+ [1] = {
+ .rx_initiator = BCOM_INITIATOR_PSC2_RX,
+ .rx_ipr = BCOM_IPR_PSC2_RX,
+ .tx_initiator = BCOM_INITIATOR_PSC2_TX,
+ .tx_ipr = BCOM_IPR_PSC2_TX,
+ },
+ [2] = {
+ .rx_initiator = BCOM_INITIATOR_PSC3_RX,
+ .rx_ipr = BCOM_IPR_PSC3_RX,
+ .tx_initiator = BCOM_INITIATOR_PSC3_TX,
+ .tx_ipr = BCOM_IPR_PSC3_TX,
+ },
+ [3] = {
+ .rx_initiator = BCOM_INITIATOR_PSC4_RX,
+ .rx_ipr = BCOM_IPR_PSC4_RX,
+ .tx_initiator = BCOM_INITIATOR_PSC4_TX,
+ .tx_ipr = BCOM_IPR_PSC4_TX,
+ },
+ [4] = {
+ .rx_initiator = BCOM_INITIATOR_PSC5_RX,
+ .rx_ipr = BCOM_IPR_PSC5_RX,
+ .tx_initiator = BCOM_INITIATOR_PSC5_TX,
+ .tx_ipr = BCOM_IPR_PSC5_TX,
+ },
+ [5] = {
+ .rx_initiator = BCOM_INITIATOR_PSC6_RX,
+ .rx_ipr = BCOM_IPR_PSC6_RX,
+ .tx_initiator = BCOM_INITIATOR_PSC6_TX,
+ .tx_ipr = BCOM_IPR_PSC6_TX,
+ },
+};
+
+/**
+ * bcom_psc_gen_bd_rx_init - Allocate a receive bcom_task for a PSC port
+ * @psc_num: Number of the PSC to allocate a task for
+ * @queue_len: number of buffer descriptors to allocate for the task
+ * @fifo: physical address of FIFO register
+ * @maxbufsize: Maximum receive data size in bytes.
+ *
+ * Allocate a bestcomm task structure for receiving data from a PSC.
+ */
+struct bcom_task * bcom_psc_gen_bd_rx_init(unsigned psc_num, int queue_len,
+ phys_addr_t fifo, int maxbufsize)
+{
+ if (psc_num >= MPC52xx_PSC_MAXNUM)
+ return NULL;
+
+ return bcom_gen_bd_rx_init(queue_len, fifo,
+ bcom_psc_params[psc_num].rx_initiator,
+ bcom_psc_params[psc_num].rx_ipr,
+ maxbufsize);
+}
+EXPORT_SYMBOL_GPL(bcom_psc_gen_bd_rx_init);
+
+/**
+ * bcom_psc_gen_bd_tx_init - Allocate a transmit bcom_task for a PSC port
+ * @psc_num: Number of the PSC to allocate a task for
+ * @queue_len: number of buffer descriptors to allocate for the task
+ * @fifo: physical address of FIFO register
+ *
+ * Allocate a bestcomm task structure for transmitting data to a PSC.
+ */
+struct bcom_task *
+bcom_psc_gen_bd_tx_init(unsigned psc_num, int queue_len, phys_addr_t fifo)
+{
+ struct psc;
+ return bcom_gen_bd_tx_init(queue_len, fifo,
+ bcom_psc_params[psc_num].tx_initiator,
+ bcom_psc_params[psc_num].tx_ipr);
+}
+EXPORT_SYMBOL_GPL(bcom_psc_gen_bd_tx_init);
+
+
+MODULE_DESCRIPTION("BestComm General Buffer Descriptor tasks driver");
+MODULE_AUTHOR("Jeff Gibbons <jeff.gibbons@appspec.com>");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/dma/bestcomm/sram.c b/drivers/dma/bestcomm/sram.c
new file mode 100644
index 000000000..c465758e7
--- /dev/null
+++ b/drivers/dma/bestcomm/sram.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Simple memory allocator for on-board SRAM
+ *
+ * Maintainer : Sylvain Munaut <tnt@246tNt.com>
+ *
+ * Copyright (C) 2005 Sylvain Munaut <tnt@246tNt.com>
+ */
+
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/io.h>
+#include <asm/mmu.h>
+
+#include <linux/fsl/bestcomm/sram.h>
+
+
+/* Struct keeping our 'state' */
+struct bcom_sram *bcom_sram = NULL;
+EXPORT_SYMBOL_GPL(bcom_sram); /* needed for inline functions */
+
+
+/* ======================================================================== */
+/* Public API */
+/* ======================================================================== */
+/* DO NOT USE in interrupts, if needed in irq handler, we should use the
+ _irqsave version of the spin_locks */
+
+int bcom_sram_init(struct device_node *sram_node, char *owner)
+{
+ int rv;
+ const u32 *regaddr_p;
+ u64 regaddr64, size64;
+ unsigned int psize;
+
+ /* Create our state struct */
+ if (bcom_sram) {
+ printk(KERN_ERR "%s: bcom_sram_init: "
+ "Already initialized !\n", owner);
+ return -EBUSY;
+ }
+
+ bcom_sram = kmalloc(sizeof(struct bcom_sram), GFP_KERNEL);
+ if (!bcom_sram) {
+ printk(KERN_ERR "%s: bcom_sram_init: "
+ "Couldn't allocate internal state !\n", owner);
+ return -ENOMEM;
+ }
+
+ /* Get address and size of the sram */
+ regaddr_p = of_get_address(sram_node, 0, &size64, NULL);
+ if (!regaddr_p) {
+ printk(KERN_ERR "%s: bcom_sram_init: "
+ "Invalid device node !\n", owner);
+ rv = -EINVAL;
+ goto error_free;
+ }
+
+ regaddr64 = of_translate_address(sram_node, regaddr_p);
+
+ bcom_sram->base_phys = (phys_addr_t) regaddr64;
+ bcom_sram->size = (unsigned int) size64;
+
+ /* Request region */
+ if (!request_mem_region(bcom_sram->base_phys, bcom_sram->size, owner)) {
+ printk(KERN_ERR "%s: bcom_sram_init: "
+ "Couldn't request region !\n", owner);
+ rv = -EBUSY;
+ goto error_free;
+ }
+
+ /* Map SRAM */
+ /* sram is not really __iomem */
+ bcom_sram->base_virt = (void*) ioremap(bcom_sram->base_phys, bcom_sram->size);
+
+ if (!bcom_sram->base_virt) {
+ printk(KERN_ERR "%s: bcom_sram_init: "
+ "Map error SRAM zone 0x%08lx (0x%0x)!\n",
+ owner, (long)bcom_sram->base_phys, bcom_sram->size );
+ rv = -ENOMEM;
+ goto error_release;
+ }
+
+ /* Create an rheap (defaults to 32 bits word alignment) */
+ bcom_sram->rh = rh_create(4);
+
+ /* Attach the free zones */
+#if 0
+ /* Currently disabled ... for future use only */
+ reg_addr_p = of_get_property(sram_node, "available", &psize);
+#else
+ regaddr_p = NULL;
+ psize = 0;
+#endif
+
+ if (!regaddr_p || !psize) {
+ /* Attach the whole zone */
+ rh_attach_region(bcom_sram->rh, 0, bcom_sram->size);
+ } else {
+ /* Attach each zone independently */
+ while (psize >= 2 * sizeof(u32)) {
+ phys_addr_t zbase = of_translate_address(sram_node, regaddr_p);
+ rh_attach_region(bcom_sram->rh, zbase - bcom_sram->base_phys, regaddr_p[1]);
+ regaddr_p += 2;
+ psize -= 2 * sizeof(u32);
+ }
+ }
+
+ /* Init our spinlock */
+ spin_lock_init(&bcom_sram->lock);
+
+ return 0;
+
+error_release:
+ release_mem_region(bcom_sram->base_phys, bcom_sram->size);
+error_free:
+ kfree(bcom_sram);
+ bcom_sram = NULL;
+
+ return rv;
+}
+EXPORT_SYMBOL_GPL(bcom_sram_init);
+
+void bcom_sram_cleanup(void)
+{
+ /* Free resources */
+ if (bcom_sram) {
+ rh_destroy(bcom_sram->rh);
+ iounmap((void __iomem *)bcom_sram->base_virt);
+ release_mem_region(bcom_sram->base_phys, bcom_sram->size);
+ kfree(bcom_sram);
+ bcom_sram = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(bcom_sram_cleanup);
+
+void* bcom_sram_alloc(int size, int align, phys_addr_t *phys)
+{
+ unsigned long offset;
+
+ spin_lock(&bcom_sram->lock);
+ offset = rh_alloc_align(bcom_sram->rh, size, align, NULL);
+ spin_unlock(&bcom_sram->lock);
+
+ if (IS_ERR_VALUE(offset))
+ return NULL;
+
+ *phys = bcom_sram->base_phys + offset;
+ return bcom_sram->base_virt + offset;
+}
+EXPORT_SYMBOL_GPL(bcom_sram_alloc);
+
+void bcom_sram_free(void *ptr)
+{
+ unsigned long offset;
+
+ if (!ptr)
+ return;
+
+ offset = ptr - bcom_sram->base_virt;
+
+ spin_lock(&bcom_sram->lock);
+ rh_free(bcom_sram->rh, offset);
+ spin_unlock(&bcom_sram->lock);
+}
+EXPORT_SYMBOL_GPL(bcom_sram_free);
diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
new file mode 100644
index 000000000..f30dabc99
--- /dev/null
+++ b/drivers/dma/dma-axi-dmac.c
@@ -0,0 +1,1066 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for the Analog Devices AXI-DMAC core
+ *
+ * Copyright 2013-2019 Analog Devices Inc.
+ * Author: Lars-Peter Clausen <lars@metafoo.de>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/fpga/adi-axi-common.h>
+
+#include <dt-bindings/dma/axi-dmac.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+/*
+ * The AXI-DMAC is a soft IP core that is used in FPGA designs. The core has
+ * various instantiation parameters which decided the exact feature set support
+ * by the core.
+ *
+ * Each channel of the core has a source interface and a destination interface.
+ * The number of channels and the type of the channel interfaces is selected at
+ * configuration time. A interface can either be a connected to a central memory
+ * interconnect, which allows access to system memory, or it can be connected to
+ * a dedicated bus which is directly connected to a data port on a peripheral.
+ * Given that those are configuration options of the core that are selected when
+ * it is instantiated this means that they can not be changed by software at
+ * runtime. By extension this means that each channel is uni-directional. It can
+ * either be device to memory or memory to device, but not both. Also since the
+ * device side is a dedicated data bus only connected to a single peripheral
+ * there is no address than can or needs to be configured for the device side.
+ */
+
+#define AXI_DMAC_REG_INTERFACE_DESC 0x10
+#define AXI_DMAC_DMA_SRC_TYPE_MSK GENMASK(13, 12)
+#define AXI_DMAC_DMA_SRC_TYPE_GET(x) FIELD_GET(AXI_DMAC_DMA_SRC_TYPE_MSK, x)
+#define AXI_DMAC_DMA_SRC_WIDTH_MSK GENMASK(11, 8)
+#define AXI_DMAC_DMA_SRC_WIDTH_GET(x) FIELD_GET(AXI_DMAC_DMA_SRC_WIDTH_MSK, x)
+#define AXI_DMAC_DMA_DST_TYPE_MSK GENMASK(5, 4)
+#define AXI_DMAC_DMA_DST_TYPE_GET(x) FIELD_GET(AXI_DMAC_DMA_DST_TYPE_MSK, x)
+#define AXI_DMAC_DMA_DST_WIDTH_MSK GENMASK(3, 0)
+#define AXI_DMAC_DMA_DST_WIDTH_GET(x) FIELD_GET(AXI_DMAC_DMA_DST_WIDTH_MSK, x)
+#define AXI_DMAC_REG_COHERENCY_DESC 0x14
+#define AXI_DMAC_DST_COHERENT_MSK BIT(0)
+#define AXI_DMAC_DST_COHERENT_GET(x) FIELD_GET(AXI_DMAC_DST_COHERENT_MSK, x)
+
+#define AXI_DMAC_REG_IRQ_MASK 0x80
+#define AXI_DMAC_REG_IRQ_PENDING 0x84
+#define AXI_DMAC_REG_IRQ_SOURCE 0x88
+
+#define AXI_DMAC_REG_CTRL 0x400
+#define AXI_DMAC_REG_TRANSFER_ID 0x404
+#define AXI_DMAC_REG_START_TRANSFER 0x408
+#define AXI_DMAC_REG_FLAGS 0x40c
+#define AXI_DMAC_REG_DEST_ADDRESS 0x410
+#define AXI_DMAC_REG_SRC_ADDRESS 0x414
+#define AXI_DMAC_REG_X_LENGTH 0x418
+#define AXI_DMAC_REG_Y_LENGTH 0x41c
+#define AXI_DMAC_REG_DEST_STRIDE 0x420
+#define AXI_DMAC_REG_SRC_STRIDE 0x424
+#define AXI_DMAC_REG_TRANSFER_DONE 0x428
+#define AXI_DMAC_REG_ACTIVE_TRANSFER_ID 0x42c
+#define AXI_DMAC_REG_STATUS 0x430
+#define AXI_DMAC_REG_CURRENT_SRC_ADDR 0x434
+#define AXI_DMAC_REG_CURRENT_DEST_ADDR 0x438
+#define AXI_DMAC_REG_PARTIAL_XFER_LEN 0x44c
+#define AXI_DMAC_REG_PARTIAL_XFER_ID 0x450
+
+#define AXI_DMAC_CTRL_ENABLE BIT(0)
+#define AXI_DMAC_CTRL_PAUSE BIT(1)
+
+#define AXI_DMAC_IRQ_SOT BIT(0)
+#define AXI_DMAC_IRQ_EOT BIT(1)
+
+#define AXI_DMAC_FLAG_CYCLIC BIT(0)
+#define AXI_DMAC_FLAG_LAST BIT(1)
+#define AXI_DMAC_FLAG_PARTIAL_REPORT BIT(2)
+
+#define AXI_DMAC_FLAG_PARTIAL_XFER_DONE BIT(31)
+
+/* The maximum ID allocated by the hardware is 31 */
+#define AXI_DMAC_SG_UNUSED 32U
+
+struct axi_dmac_sg {
+ dma_addr_t src_addr;
+ dma_addr_t dest_addr;
+ unsigned int x_len;
+ unsigned int y_len;
+ unsigned int dest_stride;
+ unsigned int src_stride;
+ unsigned int id;
+ unsigned int partial_len;
+ bool schedule_when_free;
+};
+
+struct axi_dmac_desc {
+ struct virt_dma_desc vdesc;
+ bool cyclic;
+ bool have_partial_xfer;
+
+ unsigned int num_submitted;
+ unsigned int num_completed;
+ unsigned int num_sgs;
+ struct axi_dmac_sg sg[];
+};
+
+struct axi_dmac_chan {
+ struct virt_dma_chan vchan;
+
+ struct axi_dmac_desc *next_desc;
+ struct list_head active_descs;
+ enum dma_transfer_direction direction;
+
+ unsigned int src_width;
+ unsigned int dest_width;
+ unsigned int src_type;
+ unsigned int dest_type;
+
+ unsigned int max_length;
+ unsigned int address_align_mask;
+ unsigned int length_align_mask;
+
+ bool hw_partial_xfer;
+ bool hw_cyclic;
+ bool hw_2d;
+};
+
+struct axi_dmac {
+ void __iomem *base;
+ int irq;
+
+ struct clk *clk;
+
+ struct dma_device dma_dev;
+ struct axi_dmac_chan chan;
+};
+
+static struct axi_dmac *chan_to_axi_dmac(struct axi_dmac_chan *chan)
+{
+ return container_of(chan->vchan.chan.device, struct axi_dmac,
+ dma_dev);
+}
+
+static struct axi_dmac_chan *to_axi_dmac_chan(struct dma_chan *c)
+{
+ return container_of(c, struct axi_dmac_chan, vchan.chan);
+}
+
+static struct axi_dmac_desc *to_axi_dmac_desc(struct virt_dma_desc *vdesc)
+{
+ return container_of(vdesc, struct axi_dmac_desc, vdesc);
+}
+
+static void axi_dmac_write(struct axi_dmac *axi_dmac, unsigned int reg,
+ unsigned int val)
+{
+ writel(val, axi_dmac->base + reg);
+}
+
+static int axi_dmac_read(struct axi_dmac *axi_dmac, unsigned int reg)
+{
+ return readl(axi_dmac->base + reg);
+}
+
+static int axi_dmac_src_is_mem(struct axi_dmac_chan *chan)
+{
+ return chan->src_type == AXI_DMAC_BUS_TYPE_AXI_MM;
+}
+
+static int axi_dmac_dest_is_mem(struct axi_dmac_chan *chan)
+{
+ return chan->dest_type == AXI_DMAC_BUS_TYPE_AXI_MM;
+}
+
+static bool axi_dmac_check_len(struct axi_dmac_chan *chan, unsigned int len)
+{
+ if (len == 0)
+ return false;
+ if ((len & chan->length_align_mask) != 0) /* Not aligned */
+ return false;
+ return true;
+}
+
+static bool axi_dmac_check_addr(struct axi_dmac_chan *chan, dma_addr_t addr)
+{
+ if ((addr & chan->address_align_mask) != 0) /* Not aligned */
+ return false;
+ return true;
+}
+
+static void axi_dmac_start_transfer(struct axi_dmac_chan *chan)
+{
+ struct axi_dmac *dmac = chan_to_axi_dmac(chan);
+ struct virt_dma_desc *vdesc;
+ struct axi_dmac_desc *desc;
+ struct axi_dmac_sg *sg;
+ unsigned int flags = 0;
+ unsigned int val;
+
+ val = axi_dmac_read(dmac, AXI_DMAC_REG_START_TRANSFER);
+ if (val) /* Queue is full, wait for the next SOT IRQ */
+ return;
+
+ desc = chan->next_desc;
+
+ if (!desc) {
+ vdesc = vchan_next_desc(&chan->vchan);
+ if (!vdesc)
+ return;
+ list_move_tail(&vdesc->node, &chan->active_descs);
+ desc = to_axi_dmac_desc(vdesc);
+ }
+ sg = &desc->sg[desc->num_submitted];
+
+ /* Already queued in cyclic mode. Wait for it to finish */
+ if (sg->id != AXI_DMAC_SG_UNUSED) {
+ sg->schedule_when_free = true;
+ return;
+ }
+
+ desc->num_submitted++;
+ if (desc->num_submitted == desc->num_sgs ||
+ desc->have_partial_xfer) {
+ if (desc->cyclic)
+ desc->num_submitted = 0; /* Start again */
+ else
+ chan->next_desc = NULL;
+ flags |= AXI_DMAC_FLAG_LAST;
+ } else {
+ chan->next_desc = desc;
+ }
+
+ sg->id = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID);
+
+ if (axi_dmac_dest_is_mem(chan)) {
+ axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->dest_addr);
+ axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->dest_stride);
+ }
+
+ if (axi_dmac_src_is_mem(chan)) {
+ axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->src_addr);
+ axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->src_stride);
+ }
+
+ /*
+ * If the hardware supports cyclic transfers and there is no callback to
+ * call and only a single segment, enable hw cyclic mode to avoid
+ * unnecessary interrupts.
+ */
+ if (chan->hw_cyclic && desc->cyclic && !desc->vdesc.tx.callback &&
+ desc->num_sgs == 1)
+ flags |= AXI_DMAC_FLAG_CYCLIC;
+
+ if (chan->hw_partial_xfer)
+ flags |= AXI_DMAC_FLAG_PARTIAL_REPORT;
+
+ axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->x_len - 1);
+ axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->y_len - 1);
+ axi_dmac_write(dmac, AXI_DMAC_REG_FLAGS, flags);
+ axi_dmac_write(dmac, AXI_DMAC_REG_START_TRANSFER, 1);
+}
+
+static struct axi_dmac_desc *axi_dmac_active_desc(struct axi_dmac_chan *chan)
+{
+ return list_first_entry_or_null(&chan->active_descs,
+ struct axi_dmac_desc, vdesc.node);
+}
+
+static inline unsigned int axi_dmac_total_sg_bytes(struct axi_dmac_chan *chan,
+ struct axi_dmac_sg *sg)
+{
+ if (chan->hw_2d)
+ return sg->x_len * sg->y_len;
+ else
+ return sg->x_len;
+}
+
+static void axi_dmac_dequeue_partial_xfers(struct axi_dmac_chan *chan)
+{
+ struct axi_dmac *dmac = chan_to_axi_dmac(chan);
+ struct axi_dmac_desc *desc;
+ struct axi_dmac_sg *sg;
+ u32 xfer_done, len, id, i;
+ bool found_sg;
+
+ do {
+ len = axi_dmac_read(dmac, AXI_DMAC_REG_PARTIAL_XFER_LEN);
+ id = axi_dmac_read(dmac, AXI_DMAC_REG_PARTIAL_XFER_ID);
+
+ found_sg = false;
+ list_for_each_entry(desc, &chan->active_descs, vdesc.node) {
+ for (i = 0; i < desc->num_sgs; i++) {
+ sg = &desc->sg[i];
+ if (sg->id == AXI_DMAC_SG_UNUSED)
+ continue;
+ if (sg->id == id) {
+ desc->have_partial_xfer = true;
+ sg->partial_len = len;
+ found_sg = true;
+ break;
+ }
+ }
+ if (found_sg)
+ break;
+ }
+
+ if (found_sg) {
+ dev_dbg(dmac->dma_dev.dev,
+ "Found partial segment id=%u, len=%u\n",
+ id, len);
+ } else {
+ dev_warn(dmac->dma_dev.dev,
+ "Not found partial segment id=%u, len=%u\n",
+ id, len);
+ }
+
+ /* Check if we have any more partial transfers */
+ xfer_done = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_DONE);
+ xfer_done = !(xfer_done & AXI_DMAC_FLAG_PARTIAL_XFER_DONE);
+
+ } while (!xfer_done);
+}
+
+static void axi_dmac_compute_residue(struct axi_dmac_chan *chan,
+ struct axi_dmac_desc *active)
+{
+ struct dmaengine_result *rslt = &active->vdesc.tx_result;
+ unsigned int start = active->num_completed - 1;
+ struct axi_dmac_sg *sg;
+ unsigned int i, total;
+
+ rslt->result = DMA_TRANS_NOERROR;
+ rslt->residue = 0;
+
+ /*
+ * We get here if the last completed segment is partial, which
+ * means we can compute the residue from that segment onwards
+ */
+ for (i = start; i < active->num_sgs; i++) {
+ sg = &active->sg[i];
+ total = axi_dmac_total_sg_bytes(chan, sg);
+ rslt->residue += (total - sg->partial_len);
+ }
+}
+
+static bool axi_dmac_transfer_done(struct axi_dmac_chan *chan,
+ unsigned int completed_transfers)
+{
+ struct axi_dmac_desc *active;
+ struct axi_dmac_sg *sg;
+ bool start_next = false;
+
+ active = axi_dmac_active_desc(chan);
+ if (!active)
+ return false;
+
+ if (chan->hw_partial_xfer &&
+ (completed_transfers & AXI_DMAC_FLAG_PARTIAL_XFER_DONE))
+ axi_dmac_dequeue_partial_xfers(chan);
+
+ do {
+ sg = &active->sg[active->num_completed];
+ if (sg->id == AXI_DMAC_SG_UNUSED) /* Not yet submitted */
+ break;
+ if (!(BIT(sg->id) & completed_transfers))
+ break;
+ active->num_completed++;
+ sg->id = AXI_DMAC_SG_UNUSED;
+ if (sg->schedule_when_free) {
+ sg->schedule_when_free = false;
+ start_next = true;
+ }
+
+ if (sg->partial_len)
+ axi_dmac_compute_residue(chan, active);
+
+ if (active->cyclic)
+ vchan_cyclic_callback(&active->vdesc);
+
+ if (active->num_completed == active->num_sgs ||
+ sg->partial_len) {
+ if (active->cyclic) {
+ active->num_completed = 0; /* wrap around */
+ } else {
+ list_del(&active->vdesc.node);
+ vchan_cookie_complete(&active->vdesc);
+ active = axi_dmac_active_desc(chan);
+ }
+ }
+ } while (active);
+
+ return start_next;
+}
+
+static irqreturn_t axi_dmac_interrupt_handler(int irq, void *devid)
+{
+ struct axi_dmac *dmac = devid;
+ unsigned int pending;
+ bool start_next = false;
+
+ pending = axi_dmac_read(dmac, AXI_DMAC_REG_IRQ_PENDING);
+ if (!pending)
+ return IRQ_NONE;
+
+ axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_PENDING, pending);
+
+ spin_lock(&dmac->chan.vchan.lock);
+ /* One or more transfers have finished */
+ if (pending & AXI_DMAC_IRQ_EOT) {
+ unsigned int completed;
+
+ completed = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_DONE);
+ start_next = axi_dmac_transfer_done(&dmac->chan, completed);
+ }
+ /* Space has become available in the descriptor queue */
+ if ((pending & AXI_DMAC_IRQ_SOT) || start_next)
+ axi_dmac_start_transfer(&dmac->chan);
+ spin_unlock(&dmac->chan.vchan.lock);
+
+ return IRQ_HANDLED;
+}
+
+static int axi_dmac_terminate_all(struct dma_chan *c)
+{
+ struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+ struct axi_dmac *dmac = chan_to_axi_dmac(chan);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+ axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, 0);
+ chan->next_desc = NULL;
+ vchan_get_all_descriptors(&chan->vchan, &head);
+ list_splice_tail_init(&chan->active_descs, &head);
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+ vchan_dma_desc_free_list(&chan->vchan, &head);
+
+ return 0;
+}
+
+static void axi_dmac_synchronize(struct dma_chan *c)
+{
+ struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+
+ vchan_synchronize(&chan->vchan);
+}
+
+static void axi_dmac_issue_pending(struct dma_chan *c)
+{
+ struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+ struct axi_dmac *dmac = chan_to_axi_dmac(chan);
+ unsigned long flags;
+
+ axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, AXI_DMAC_CTRL_ENABLE);
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+ if (vchan_issue_pending(&chan->vchan))
+ axi_dmac_start_transfer(chan);
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+}
+
+static struct axi_dmac_desc *axi_dmac_alloc_desc(unsigned int num_sgs)
+{
+ struct axi_dmac_desc *desc;
+ unsigned int i;
+
+ desc = kzalloc(struct_size(desc, sg, num_sgs), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ for (i = 0; i < num_sgs; i++)
+ desc->sg[i].id = AXI_DMAC_SG_UNUSED;
+
+ desc->num_sgs = num_sgs;
+
+ return desc;
+}
+
+static struct axi_dmac_sg *axi_dmac_fill_linear_sg(struct axi_dmac_chan *chan,
+ enum dma_transfer_direction direction, dma_addr_t addr,
+ unsigned int num_periods, unsigned int period_len,
+ struct axi_dmac_sg *sg)
+{
+ unsigned int num_segments, i;
+ unsigned int segment_size;
+ unsigned int len;
+
+ /* Split into multiple equally sized segments if necessary */
+ num_segments = DIV_ROUND_UP(period_len, chan->max_length);
+ segment_size = DIV_ROUND_UP(period_len, num_segments);
+ /* Take care of alignment */
+ segment_size = ((segment_size - 1) | chan->length_align_mask) + 1;
+
+ for (i = 0; i < num_periods; i++) {
+ len = period_len;
+
+ while (len > segment_size) {
+ if (direction == DMA_DEV_TO_MEM)
+ sg->dest_addr = addr;
+ else
+ sg->src_addr = addr;
+ sg->x_len = segment_size;
+ sg->y_len = 1;
+ sg++;
+ addr += segment_size;
+ len -= segment_size;
+ }
+
+ if (direction == DMA_DEV_TO_MEM)
+ sg->dest_addr = addr;
+ else
+ sg->src_addr = addr;
+ sg->x_len = len;
+ sg->y_len = 1;
+ sg++;
+ addr += len;
+ }
+
+ return sg;
+}
+
+static struct dma_async_tx_descriptor *axi_dmac_prep_slave_sg(
+ struct dma_chan *c, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+ struct axi_dmac_desc *desc;
+ struct axi_dmac_sg *dsg;
+ struct scatterlist *sg;
+ unsigned int num_sgs;
+ unsigned int i;
+
+ if (direction != chan->direction)
+ return NULL;
+
+ num_sgs = 0;
+ for_each_sg(sgl, sg, sg_len, i)
+ num_sgs += DIV_ROUND_UP(sg_dma_len(sg), chan->max_length);
+
+ desc = axi_dmac_alloc_desc(num_sgs);
+ if (!desc)
+ return NULL;
+
+ dsg = desc->sg;
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ if (!axi_dmac_check_addr(chan, sg_dma_address(sg)) ||
+ !axi_dmac_check_len(chan, sg_dma_len(sg))) {
+ kfree(desc);
+ return NULL;
+ }
+
+ dsg = axi_dmac_fill_linear_sg(chan, direction, sg_dma_address(sg), 1,
+ sg_dma_len(sg), dsg);
+ }
+
+ desc->cyclic = false;
+
+ return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *axi_dmac_prep_dma_cyclic(
+ struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+ struct axi_dmac_desc *desc;
+ unsigned int num_periods, num_segments;
+
+ if (direction != chan->direction)
+ return NULL;
+
+ if (!axi_dmac_check_len(chan, buf_len) ||
+ !axi_dmac_check_addr(chan, buf_addr))
+ return NULL;
+
+ if (period_len == 0 || buf_len % period_len)
+ return NULL;
+
+ num_periods = buf_len / period_len;
+ num_segments = DIV_ROUND_UP(period_len, chan->max_length);
+
+ desc = axi_dmac_alloc_desc(num_periods * num_segments);
+ if (!desc)
+ return NULL;
+
+ axi_dmac_fill_linear_sg(chan, direction, buf_addr, num_periods,
+ period_len, desc->sg);
+
+ desc->cyclic = true;
+
+ return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *axi_dmac_prep_interleaved(
+ struct dma_chan *c, struct dma_interleaved_template *xt,
+ unsigned long flags)
+{
+ struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+ struct axi_dmac_desc *desc;
+ size_t dst_icg, src_icg;
+
+ if (xt->frame_size != 1)
+ return NULL;
+
+ if (xt->dir != chan->direction)
+ return NULL;
+
+ if (axi_dmac_src_is_mem(chan)) {
+ if (!xt->src_inc || !axi_dmac_check_addr(chan, xt->src_start))
+ return NULL;
+ }
+
+ if (axi_dmac_dest_is_mem(chan)) {
+ if (!xt->dst_inc || !axi_dmac_check_addr(chan, xt->dst_start))
+ return NULL;
+ }
+
+ dst_icg = dmaengine_get_dst_icg(xt, &xt->sgl[0]);
+ src_icg = dmaengine_get_src_icg(xt, &xt->sgl[0]);
+
+ if (chan->hw_2d) {
+ if (!axi_dmac_check_len(chan, xt->sgl[0].size) ||
+ xt->numf == 0)
+ return NULL;
+ if (xt->sgl[0].size + dst_icg > chan->max_length ||
+ xt->sgl[0].size + src_icg > chan->max_length)
+ return NULL;
+ } else {
+ if (dst_icg != 0 || src_icg != 0)
+ return NULL;
+ if (chan->max_length / xt->sgl[0].size < xt->numf)
+ return NULL;
+ if (!axi_dmac_check_len(chan, xt->sgl[0].size * xt->numf))
+ return NULL;
+ }
+
+ desc = axi_dmac_alloc_desc(1);
+ if (!desc)
+ return NULL;
+
+ if (axi_dmac_src_is_mem(chan)) {
+ desc->sg[0].src_addr = xt->src_start;
+ desc->sg[0].src_stride = xt->sgl[0].size + src_icg;
+ }
+
+ if (axi_dmac_dest_is_mem(chan)) {
+ desc->sg[0].dest_addr = xt->dst_start;
+ desc->sg[0].dest_stride = xt->sgl[0].size + dst_icg;
+ }
+
+ if (chan->hw_2d) {
+ desc->sg[0].x_len = xt->sgl[0].size;
+ desc->sg[0].y_len = xt->numf;
+ } else {
+ desc->sg[0].x_len = xt->sgl[0].size * xt->numf;
+ desc->sg[0].y_len = 1;
+ }
+
+ if (flags & DMA_CYCLIC)
+ desc->cyclic = true;
+
+ return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static void axi_dmac_free_chan_resources(struct dma_chan *c)
+{
+ vchan_free_chan_resources(to_virt_chan(c));
+}
+
+static void axi_dmac_desc_free(struct virt_dma_desc *vdesc)
+{
+ kfree(container_of(vdesc, struct axi_dmac_desc, vdesc));
+}
+
+static bool axi_dmac_regmap_rdwr(struct device *dev, unsigned int reg)
+{
+ switch (reg) {
+ case AXI_DMAC_REG_IRQ_MASK:
+ case AXI_DMAC_REG_IRQ_SOURCE:
+ case AXI_DMAC_REG_IRQ_PENDING:
+ case AXI_DMAC_REG_CTRL:
+ case AXI_DMAC_REG_TRANSFER_ID:
+ case AXI_DMAC_REG_START_TRANSFER:
+ case AXI_DMAC_REG_FLAGS:
+ case AXI_DMAC_REG_DEST_ADDRESS:
+ case AXI_DMAC_REG_SRC_ADDRESS:
+ case AXI_DMAC_REG_X_LENGTH:
+ case AXI_DMAC_REG_Y_LENGTH:
+ case AXI_DMAC_REG_DEST_STRIDE:
+ case AXI_DMAC_REG_SRC_STRIDE:
+ case AXI_DMAC_REG_TRANSFER_DONE:
+ case AXI_DMAC_REG_ACTIVE_TRANSFER_ID:
+ case AXI_DMAC_REG_STATUS:
+ case AXI_DMAC_REG_CURRENT_SRC_ADDR:
+ case AXI_DMAC_REG_CURRENT_DEST_ADDR:
+ case AXI_DMAC_REG_PARTIAL_XFER_LEN:
+ case AXI_DMAC_REG_PARTIAL_XFER_ID:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static const struct regmap_config axi_dmac_regmap_config = {
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_stride = 4,
+ .max_register = AXI_DMAC_REG_PARTIAL_XFER_ID,
+ .readable_reg = axi_dmac_regmap_rdwr,
+ .writeable_reg = axi_dmac_regmap_rdwr,
+};
+
+static void axi_dmac_adjust_chan_params(struct axi_dmac_chan *chan)
+{
+ chan->address_align_mask = max(chan->dest_width, chan->src_width) - 1;
+
+ if (axi_dmac_dest_is_mem(chan) && axi_dmac_src_is_mem(chan))
+ chan->direction = DMA_MEM_TO_MEM;
+ else if (!axi_dmac_dest_is_mem(chan) && axi_dmac_src_is_mem(chan))
+ chan->direction = DMA_MEM_TO_DEV;
+ else if (axi_dmac_dest_is_mem(chan) && !axi_dmac_src_is_mem(chan))
+ chan->direction = DMA_DEV_TO_MEM;
+ else
+ chan->direction = DMA_DEV_TO_DEV;
+}
+
+/*
+ * The configuration stored in the devicetree matches the configuration
+ * parameters of the peripheral instance and allows the driver to know which
+ * features are implemented and how it should behave.
+ */
+static int axi_dmac_parse_chan_dt(struct device_node *of_chan,
+ struct axi_dmac_chan *chan)
+{
+ u32 val;
+ int ret;
+
+ ret = of_property_read_u32(of_chan, "reg", &val);
+ if (ret)
+ return ret;
+
+ /* We only support 1 channel for now */
+ if (val != 0)
+ return -EINVAL;
+
+ ret = of_property_read_u32(of_chan, "adi,source-bus-type", &val);
+ if (ret)
+ return ret;
+ if (val > AXI_DMAC_BUS_TYPE_FIFO)
+ return -EINVAL;
+ chan->src_type = val;
+
+ ret = of_property_read_u32(of_chan, "adi,destination-bus-type", &val);
+ if (ret)
+ return ret;
+ if (val > AXI_DMAC_BUS_TYPE_FIFO)
+ return -EINVAL;
+ chan->dest_type = val;
+
+ ret = of_property_read_u32(of_chan, "adi,source-bus-width", &val);
+ if (ret)
+ return ret;
+ chan->src_width = val / 8;
+
+ ret = of_property_read_u32(of_chan, "adi,destination-bus-width", &val);
+ if (ret)
+ return ret;
+ chan->dest_width = val / 8;
+
+ axi_dmac_adjust_chan_params(chan);
+
+ return 0;
+}
+
+static int axi_dmac_parse_dt(struct device *dev, struct axi_dmac *dmac)
+{
+ struct device_node *of_channels, *of_chan;
+ int ret;
+
+ of_channels = of_get_child_by_name(dev->of_node, "adi,channels");
+ if (of_channels == NULL)
+ return -ENODEV;
+
+ for_each_child_of_node(of_channels, of_chan) {
+ ret = axi_dmac_parse_chan_dt(of_chan, &dmac->chan);
+ if (ret) {
+ of_node_put(of_chan);
+ of_node_put(of_channels);
+ return -EINVAL;
+ }
+ }
+ of_node_put(of_channels);
+
+ return 0;
+}
+
+static int axi_dmac_read_chan_config(struct device *dev, struct axi_dmac *dmac)
+{
+ struct axi_dmac_chan *chan = &dmac->chan;
+ unsigned int val, desc;
+
+ desc = axi_dmac_read(dmac, AXI_DMAC_REG_INTERFACE_DESC);
+ if (desc == 0) {
+ dev_err(dev, "DMA interface register reads zero\n");
+ return -EFAULT;
+ }
+
+ val = AXI_DMAC_DMA_SRC_TYPE_GET(desc);
+ if (val > AXI_DMAC_BUS_TYPE_FIFO) {
+ dev_err(dev, "Invalid source bus type read: %d\n", val);
+ return -EINVAL;
+ }
+ chan->src_type = val;
+
+ val = AXI_DMAC_DMA_DST_TYPE_GET(desc);
+ if (val > AXI_DMAC_BUS_TYPE_FIFO) {
+ dev_err(dev, "Invalid destination bus type read: %d\n", val);
+ return -EINVAL;
+ }
+ chan->dest_type = val;
+
+ val = AXI_DMAC_DMA_SRC_WIDTH_GET(desc);
+ if (val == 0) {
+ dev_err(dev, "Source bus width is zero\n");
+ return -EINVAL;
+ }
+ /* widths are stored in log2 */
+ chan->src_width = 1 << val;
+
+ val = AXI_DMAC_DMA_DST_WIDTH_GET(desc);
+ if (val == 0) {
+ dev_err(dev, "Destination bus width is zero\n");
+ return -EINVAL;
+ }
+ chan->dest_width = 1 << val;
+
+ axi_dmac_adjust_chan_params(chan);
+
+ return 0;
+}
+
+static int axi_dmac_detect_caps(struct axi_dmac *dmac, unsigned int version)
+{
+ struct axi_dmac_chan *chan = &dmac->chan;
+
+ axi_dmac_write(dmac, AXI_DMAC_REG_FLAGS, AXI_DMAC_FLAG_CYCLIC);
+ if (axi_dmac_read(dmac, AXI_DMAC_REG_FLAGS) == AXI_DMAC_FLAG_CYCLIC)
+ chan->hw_cyclic = true;
+
+ axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, 1);
+ if (axi_dmac_read(dmac, AXI_DMAC_REG_Y_LENGTH) == 1)
+ chan->hw_2d = true;
+
+ axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, 0xffffffff);
+ chan->max_length = axi_dmac_read(dmac, AXI_DMAC_REG_X_LENGTH);
+ if (chan->max_length != UINT_MAX)
+ chan->max_length++;
+
+ axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, 0xffffffff);
+ if (axi_dmac_read(dmac, AXI_DMAC_REG_DEST_ADDRESS) == 0 &&
+ chan->dest_type == AXI_DMAC_BUS_TYPE_AXI_MM) {
+ dev_err(dmac->dma_dev.dev,
+ "Destination memory-mapped interface not supported.");
+ return -ENODEV;
+ }
+
+ axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, 0xffffffff);
+ if (axi_dmac_read(dmac, AXI_DMAC_REG_SRC_ADDRESS) == 0 &&
+ chan->src_type == AXI_DMAC_BUS_TYPE_AXI_MM) {
+ dev_err(dmac->dma_dev.dev,
+ "Source memory-mapped interface not supported.");
+ return -ENODEV;
+ }
+
+ if (version >= ADI_AXI_PCORE_VER(4, 2, 'a'))
+ chan->hw_partial_xfer = true;
+
+ if (version >= ADI_AXI_PCORE_VER(4, 1, 'a')) {
+ axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, 0x00);
+ chan->length_align_mask =
+ axi_dmac_read(dmac, AXI_DMAC_REG_X_LENGTH);
+ } else {
+ chan->length_align_mask = chan->address_align_mask;
+ }
+
+ return 0;
+}
+
+static int axi_dmac_probe(struct platform_device *pdev)
+{
+ struct dma_device *dma_dev;
+ struct axi_dmac *dmac;
+ struct resource *res;
+ struct regmap *regmap;
+ unsigned int version;
+ int ret;
+
+ dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
+ if (!dmac)
+ return -ENOMEM;
+
+ dmac->irq = platform_get_irq(pdev, 0);
+ if (dmac->irq < 0)
+ return dmac->irq;
+ if (dmac->irq == 0)
+ return -EINVAL;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ dmac->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(dmac->base))
+ return PTR_ERR(dmac->base);
+
+ dmac->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(dmac->clk))
+ return PTR_ERR(dmac->clk);
+
+ ret = clk_prepare_enable(dmac->clk);
+ if (ret < 0)
+ return ret;
+
+ version = axi_dmac_read(dmac, ADI_AXI_REG_VERSION);
+
+ if (version >= ADI_AXI_PCORE_VER(4, 3, 'a'))
+ ret = axi_dmac_read_chan_config(&pdev->dev, dmac);
+ else
+ ret = axi_dmac_parse_dt(&pdev->dev, dmac);
+
+ if (ret < 0)
+ goto err_clk_disable;
+
+ INIT_LIST_HEAD(&dmac->chan.active_descs);
+
+ dma_set_max_seg_size(&pdev->dev, UINT_MAX);
+
+ dma_dev = &dmac->dma_dev;
+ dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+ dma_cap_set(DMA_CYCLIC, dma_dev->cap_mask);
+ dma_cap_set(DMA_INTERLEAVE, dma_dev->cap_mask);
+ dma_dev->device_free_chan_resources = axi_dmac_free_chan_resources;
+ dma_dev->device_tx_status = dma_cookie_status;
+ dma_dev->device_issue_pending = axi_dmac_issue_pending;
+ dma_dev->device_prep_slave_sg = axi_dmac_prep_slave_sg;
+ dma_dev->device_prep_dma_cyclic = axi_dmac_prep_dma_cyclic;
+ dma_dev->device_prep_interleaved_dma = axi_dmac_prep_interleaved;
+ dma_dev->device_terminate_all = axi_dmac_terminate_all;
+ dma_dev->device_synchronize = axi_dmac_synchronize;
+ dma_dev->dev = &pdev->dev;
+ dma_dev->chancnt = 1;
+ dma_dev->src_addr_widths = BIT(dmac->chan.src_width);
+ dma_dev->dst_addr_widths = BIT(dmac->chan.dest_width);
+ dma_dev->directions = BIT(dmac->chan.direction);
+ dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ dmac->chan.vchan.desc_free = axi_dmac_desc_free;
+ vchan_init(&dmac->chan.vchan, dma_dev);
+
+ ret = axi_dmac_detect_caps(dmac, version);
+ if (ret)
+ goto err_clk_disable;
+
+ dma_dev->copy_align = (dmac->chan.address_align_mask + 1);
+
+ axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_MASK, 0x00);
+
+ if (of_dma_is_coherent(pdev->dev.of_node)) {
+ ret = axi_dmac_read(dmac, AXI_DMAC_REG_COHERENCY_DESC);
+
+ if (version < ADI_AXI_PCORE_VER(4, 4, 'a') ||
+ !AXI_DMAC_DST_COHERENT_GET(ret)) {
+ dev_err(dmac->dma_dev.dev,
+ "Coherent DMA not supported in hardware");
+ ret = -EINVAL;
+ goto err_clk_disable;
+ }
+ }
+
+ ret = dma_async_device_register(dma_dev);
+ if (ret)
+ goto err_clk_disable;
+
+ ret = of_dma_controller_register(pdev->dev.of_node,
+ of_dma_xlate_by_chan_id, dma_dev);
+ if (ret)
+ goto err_unregister_device;
+
+ ret = request_irq(dmac->irq, axi_dmac_interrupt_handler, IRQF_SHARED,
+ dev_name(&pdev->dev), dmac);
+ if (ret)
+ goto err_unregister_of;
+
+ platform_set_drvdata(pdev, dmac);
+
+ regmap = devm_regmap_init_mmio(&pdev->dev, dmac->base,
+ &axi_dmac_regmap_config);
+ if (IS_ERR(regmap)) {
+ ret = PTR_ERR(regmap);
+ goto err_free_irq;
+ }
+
+ return 0;
+
+err_free_irq:
+ free_irq(dmac->irq, dmac);
+err_unregister_of:
+ of_dma_controller_free(pdev->dev.of_node);
+err_unregister_device:
+ dma_async_device_unregister(&dmac->dma_dev);
+err_clk_disable:
+ clk_disable_unprepare(dmac->clk);
+
+ return ret;
+}
+
+static int axi_dmac_remove(struct platform_device *pdev)
+{
+ struct axi_dmac *dmac = platform_get_drvdata(pdev);
+
+ of_dma_controller_free(pdev->dev.of_node);
+ free_irq(dmac->irq, dmac);
+ tasklet_kill(&dmac->chan.vchan.task);
+ dma_async_device_unregister(&dmac->dma_dev);
+ clk_disable_unprepare(dmac->clk);
+
+ return 0;
+}
+
+static const struct of_device_id axi_dmac_of_match_table[] = {
+ { .compatible = "adi,axi-dmac-1.00.a" },
+ { },
+};
+MODULE_DEVICE_TABLE(of, axi_dmac_of_match_table);
+
+static struct platform_driver axi_dmac_driver = {
+ .driver = {
+ .name = "dma-axi-dmac",
+ .of_match_table = axi_dmac_of_match_table,
+ },
+ .probe = axi_dmac_probe,
+ .remove = axi_dmac_remove,
+};
+module_platform_driver(axi_dmac_driver);
+
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_DESCRIPTION("DMA controller driver for the AXI-DMAC controller");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c
new file mode 100644
index 000000000..2a483802d
--- /dev/null
+++ b/drivers/dma/dma-jz4780.c
@@ -0,0 +1,1141 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Ingenic JZ4780 DMA controller
+ *
+ * Copyright (c) 2015 Imagination Technologies
+ * Author: Alex Smith <alex@alex-smith.me.uk>
+ */
+
+#include <linux/clk.h>
+#include <linux/dmapool.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+/* Global registers. */
+#define JZ_DMA_REG_DMAC 0x00
+#define JZ_DMA_REG_DIRQP 0x04
+#define JZ_DMA_REG_DDR 0x08
+#define JZ_DMA_REG_DDRS 0x0c
+#define JZ_DMA_REG_DCKE 0x10
+#define JZ_DMA_REG_DCKES 0x14
+#define JZ_DMA_REG_DCKEC 0x18
+#define JZ_DMA_REG_DMACP 0x1c
+#define JZ_DMA_REG_DSIRQP 0x20
+#define JZ_DMA_REG_DSIRQM 0x24
+#define JZ_DMA_REG_DCIRQP 0x28
+#define JZ_DMA_REG_DCIRQM 0x2c
+
+/* Per-channel registers. */
+#define JZ_DMA_REG_CHAN(n) (n * 0x20)
+#define JZ_DMA_REG_DSA 0x00
+#define JZ_DMA_REG_DTA 0x04
+#define JZ_DMA_REG_DTC 0x08
+#define JZ_DMA_REG_DRT 0x0c
+#define JZ_DMA_REG_DCS 0x10
+#define JZ_DMA_REG_DCM 0x14
+#define JZ_DMA_REG_DDA 0x18
+#define JZ_DMA_REG_DSD 0x1c
+
+#define JZ_DMA_DMAC_DMAE BIT(0)
+#define JZ_DMA_DMAC_AR BIT(2)
+#define JZ_DMA_DMAC_HLT BIT(3)
+#define JZ_DMA_DMAC_FAIC BIT(27)
+#define JZ_DMA_DMAC_FMSC BIT(31)
+
+#define JZ_DMA_DRT_AUTO 0x8
+
+#define JZ_DMA_DCS_CTE BIT(0)
+#define JZ_DMA_DCS_HLT BIT(2)
+#define JZ_DMA_DCS_TT BIT(3)
+#define JZ_DMA_DCS_AR BIT(4)
+#define JZ_DMA_DCS_DES8 BIT(30)
+
+#define JZ_DMA_DCM_LINK BIT(0)
+#define JZ_DMA_DCM_TIE BIT(1)
+#define JZ_DMA_DCM_STDE BIT(2)
+#define JZ_DMA_DCM_TSZ_SHIFT 8
+#define JZ_DMA_DCM_TSZ_MASK (0x7 << JZ_DMA_DCM_TSZ_SHIFT)
+#define JZ_DMA_DCM_DP_SHIFT 12
+#define JZ_DMA_DCM_SP_SHIFT 14
+#define JZ_DMA_DCM_DAI BIT(22)
+#define JZ_DMA_DCM_SAI BIT(23)
+
+#define JZ_DMA_SIZE_4_BYTE 0x0
+#define JZ_DMA_SIZE_1_BYTE 0x1
+#define JZ_DMA_SIZE_2_BYTE 0x2
+#define JZ_DMA_SIZE_16_BYTE 0x3
+#define JZ_DMA_SIZE_32_BYTE 0x4
+#define JZ_DMA_SIZE_64_BYTE 0x5
+#define JZ_DMA_SIZE_128_BYTE 0x6
+
+#define JZ_DMA_WIDTH_32_BIT 0x0
+#define JZ_DMA_WIDTH_8_BIT 0x1
+#define JZ_DMA_WIDTH_16_BIT 0x2
+
+#define JZ_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+#define JZ4780_DMA_CTRL_OFFSET 0x1000
+
+/* macros for use with jz4780_dma_soc_data.flags */
+#define JZ_SOC_DATA_ALLOW_LEGACY_DT BIT(0)
+#define JZ_SOC_DATA_PROGRAMMABLE_DMA BIT(1)
+#define JZ_SOC_DATA_PER_CHAN_PM BIT(2)
+#define JZ_SOC_DATA_NO_DCKES_DCKEC BIT(3)
+#define JZ_SOC_DATA_BREAK_LINKS BIT(4)
+
+/**
+ * struct jz4780_dma_hwdesc - descriptor structure read by the DMA controller.
+ * @dcm: value for the DCM (channel command) register
+ * @dsa: source address
+ * @dta: target address
+ * @dtc: transfer count (number of blocks of the transfer size specified in DCM
+ * to transfer) in the low 24 bits, offset of the next descriptor from the
+ * descriptor base address in the upper 8 bits.
+ */
+struct jz4780_dma_hwdesc {
+ u32 dcm;
+ u32 dsa;
+ u32 dta;
+ u32 dtc;
+};
+
+/* Size of allocations for hardware descriptor blocks. */
+#define JZ_DMA_DESC_BLOCK_SIZE PAGE_SIZE
+#define JZ_DMA_MAX_DESC \
+ (JZ_DMA_DESC_BLOCK_SIZE / sizeof(struct jz4780_dma_hwdesc))
+
+struct jz4780_dma_desc {
+ struct virt_dma_desc vdesc;
+
+ struct jz4780_dma_hwdesc *desc;
+ dma_addr_t desc_phys;
+ unsigned int count;
+ enum dma_transaction_type type;
+ u32 transfer_type;
+ u32 status;
+};
+
+struct jz4780_dma_chan {
+ struct virt_dma_chan vchan;
+ unsigned int id;
+ struct dma_pool *desc_pool;
+
+ u32 transfer_type_tx, transfer_type_rx;
+ u32 transfer_shift;
+ struct dma_slave_config config;
+
+ struct jz4780_dma_desc *desc;
+ unsigned int curr_hwdesc;
+};
+
+struct jz4780_dma_soc_data {
+ unsigned int nb_channels;
+ unsigned int transfer_ord_max;
+ unsigned long flags;
+};
+
+struct jz4780_dma_dev {
+ struct dma_device dma_device;
+ void __iomem *chn_base;
+ void __iomem *ctrl_base;
+ struct clk *clk;
+ unsigned int irq;
+ const struct jz4780_dma_soc_data *soc_data;
+
+ u32 chan_reserved;
+ struct jz4780_dma_chan chan[];
+};
+
+struct jz4780_dma_filter_data {
+ u32 transfer_type_tx, transfer_type_rx;
+ int channel;
+};
+
+static inline struct jz4780_dma_chan *to_jz4780_dma_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct jz4780_dma_chan, vchan.chan);
+}
+
+static inline struct jz4780_dma_desc *to_jz4780_dma_desc(
+ struct virt_dma_desc *vdesc)
+{
+ return container_of(vdesc, struct jz4780_dma_desc, vdesc);
+}
+
+static inline struct jz4780_dma_dev *jz4780_dma_chan_parent(
+ struct jz4780_dma_chan *jzchan)
+{
+ return container_of(jzchan->vchan.chan.device, struct jz4780_dma_dev,
+ dma_device);
+}
+
+static inline u32 jz4780_dma_chn_readl(struct jz4780_dma_dev *jzdma,
+ unsigned int chn, unsigned int reg)
+{
+ return readl(jzdma->chn_base + reg + JZ_DMA_REG_CHAN(chn));
+}
+
+static inline void jz4780_dma_chn_writel(struct jz4780_dma_dev *jzdma,
+ unsigned int chn, unsigned int reg, u32 val)
+{
+ writel(val, jzdma->chn_base + reg + JZ_DMA_REG_CHAN(chn));
+}
+
+static inline u32 jz4780_dma_ctrl_readl(struct jz4780_dma_dev *jzdma,
+ unsigned int reg)
+{
+ return readl(jzdma->ctrl_base + reg);
+}
+
+static inline void jz4780_dma_ctrl_writel(struct jz4780_dma_dev *jzdma,
+ unsigned int reg, u32 val)
+{
+ writel(val, jzdma->ctrl_base + reg);
+}
+
+static inline void jz4780_dma_chan_enable(struct jz4780_dma_dev *jzdma,
+ unsigned int chn)
+{
+ if (jzdma->soc_data->flags & JZ_SOC_DATA_PER_CHAN_PM) {
+ unsigned int reg;
+
+ if (jzdma->soc_data->flags & JZ_SOC_DATA_NO_DCKES_DCKEC)
+ reg = JZ_DMA_REG_DCKE;
+ else
+ reg = JZ_DMA_REG_DCKES;
+
+ jz4780_dma_ctrl_writel(jzdma, reg, BIT(chn));
+ }
+}
+
+static inline void jz4780_dma_chan_disable(struct jz4780_dma_dev *jzdma,
+ unsigned int chn)
+{
+ if ((jzdma->soc_data->flags & JZ_SOC_DATA_PER_CHAN_PM) &&
+ !(jzdma->soc_data->flags & JZ_SOC_DATA_NO_DCKES_DCKEC))
+ jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DCKEC, BIT(chn));
+}
+
+static struct jz4780_dma_desc *
+jz4780_dma_desc_alloc(struct jz4780_dma_chan *jzchan, unsigned int count,
+ enum dma_transaction_type type,
+ enum dma_transfer_direction direction)
+{
+ struct jz4780_dma_desc *desc;
+
+ if (count > JZ_DMA_MAX_DESC)
+ return NULL;
+
+ desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ desc->desc = dma_pool_alloc(jzchan->desc_pool, GFP_NOWAIT,
+ &desc->desc_phys);
+ if (!desc->desc) {
+ kfree(desc);
+ return NULL;
+ }
+
+ desc->count = count;
+ desc->type = type;
+
+ if (direction == DMA_DEV_TO_MEM)
+ desc->transfer_type = jzchan->transfer_type_rx;
+ else
+ desc->transfer_type = jzchan->transfer_type_tx;
+
+ return desc;
+}
+
+static void jz4780_dma_desc_free(struct virt_dma_desc *vdesc)
+{
+ struct jz4780_dma_desc *desc = to_jz4780_dma_desc(vdesc);
+ struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(vdesc->tx.chan);
+
+ dma_pool_free(jzchan->desc_pool, desc->desc, desc->desc_phys);
+ kfree(desc);
+}
+
+static u32 jz4780_dma_transfer_size(struct jz4780_dma_chan *jzchan,
+ unsigned long val, u32 *shift)
+{
+ struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+ int ord = ffs(val) - 1;
+
+ /*
+ * 8 byte transfer sizes unsupported so fall back on 4. If it's larger
+ * than the maximum, just limit it. It is perfectly safe to fall back
+ * in this way since we won't exceed the maximum burst size supported
+ * by the device, the only effect is reduced efficiency. This is better
+ * than refusing to perform the request at all.
+ */
+ if (ord == 3)
+ ord = 2;
+ else if (ord > jzdma->soc_data->transfer_ord_max)
+ ord = jzdma->soc_data->transfer_ord_max;
+
+ *shift = ord;
+
+ switch (ord) {
+ case 0:
+ return JZ_DMA_SIZE_1_BYTE;
+ case 1:
+ return JZ_DMA_SIZE_2_BYTE;
+ case 2:
+ return JZ_DMA_SIZE_4_BYTE;
+ case 4:
+ return JZ_DMA_SIZE_16_BYTE;
+ case 5:
+ return JZ_DMA_SIZE_32_BYTE;
+ case 6:
+ return JZ_DMA_SIZE_64_BYTE;
+ default:
+ return JZ_DMA_SIZE_128_BYTE;
+ }
+}
+
+static int jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan,
+ struct jz4780_dma_hwdesc *desc, dma_addr_t addr, size_t len,
+ enum dma_transfer_direction direction)
+{
+ struct dma_slave_config *config = &jzchan->config;
+ u32 width, maxburst, tsz;
+
+ if (direction == DMA_MEM_TO_DEV) {
+ desc->dcm = JZ_DMA_DCM_SAI;
+ desc->dsa = addr;
+ desc->dta = config->dst_addr;
+
+ width = config->dst_addr_width;
+ maxburst = config->dst_maxburst;
+ } else {
+ desc->dcm = JZ_DMA_DCM_DAI;
+ desc->dsa = config->src_addr;
+ desc->dta = addr;
+
+ width = config->src_addr_width;
+ maxburst = config->src_maxburst;
+ }
+
+ /*
+ * This calculates the maximum transfer size that can be used with the
+ * given address, length, width and maximum burst size. The address
+ * must be aligned to the transfer size, the total length must be
+ * divisible by the transfer size, and we must not use more than the
+ * maximum burst specified by the user.
+ */
+ tsz = jz4780_dma_transfer_size(jzchan, addr | len | (width * maxburst),
+ &jzchan->transfer_shift);
+
+ switch (width) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ break;
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ width = JZ_DMA_WIDTH_32_BIT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ desc->dcm |= tsz << JZ_DMA_DCM_TSZ_SHIFT;
+ desc->dcm |= width << JZ_DMA_DCM_SP_SHIFT;
+ desc->dcm |= width << JZ_DMA_DCM_DP_SHIFT;
+
+ desc->dtc = len >> jzchan->transfer_shift;
+ return 0;
+}
+
+static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
+ enum dma_transfer_direction direction, unsigned long flags,
+ void *context)
+{
+ struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+ struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+ struct jz4780_dma_desc *desc;
+ unsigned int i;
+ int err;
+
+ desc = jz4780_dma_desc_alloc(jzchan, sg_len, DMA_SLAVE, direction);
+ if (!desc)
+ return NULL;
+
+ for (i = 0; i < sg_len; i++) {
+ err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i],
+ sg_dma_address(&sgl[i]),
+ sg_dma_len(&sgl[i]),
+ direction);
+ if (err < 0) {
+ jz4780_dma_desc_free(&jzchan->desc->vdesc);
+ return NULL;
+ }
+
+ desc->desc[i].dcm |= JZ_DMA_DCM_TIE;
+
+ if (i != (sg_len - 1) &&
+ !(jzdma->soc_data->flags & JZ_SOC_DATA_BREAK_LINKS)) {
+ /* Automatically proceed to the next descriptor. */
+ desc->desc[i].dcm |= JZ_DMA_DCM_LINK;
+
+ /*
+ * The upper 8 bits of the DTC field in the descriptor
+ * must be set to (offset from descriptor base of next
+ * descriptor >> 4).
+ */
+ desc->desc[i].dtc |=
+ (((i + 1) * sizeof(*desc->desc)) >> 4) << 24;
+ }
+ }
+
+ return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+ struct jz4780_dma_desc *desc;
+ unsigned int periods, i;
+ int err;
+
+ if (buf_len % period_len)
+ return NULL;
+
+ periods = buf_len / period_len;
+
+ desc = jz4780_dma_desc_alloc(jzchan, periods, DMA_CYCLIC, direction);
+ if (!desc)
+ return NULL;
+
+ for (i = 0; i < periods; i++) {
+ err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i], buf_addr,
+ period_len, direction);
+ if (err < 0) {
+ jz4780_dma_desc_free(&jzchan->desc->vdesc);
+ return NULL;
+ }
+
+ buf_addr += period_len;
+
+ /*
+ * Set the link bit to indicate that the controller should
+ * automatically proceed to the next descriptor. In
+ * jz4780_dma_begin(), this will be cleared if we need to issue
+ * an interrupt after each period.
+ */
+ desc->desc[i].dcm |= JZ_DMA_DCM_TIE | JZ_DMA_DCM_LINK;
+
+ /*
+ * The upper 8 bits of the DTC field in the descriptor must be
+ * set to (offset from descriptor base of next descriptor >> 4).
+ * If this is the last descriptor, link it back to the first,
+ * i.e. leave offset set to 0, otherwise point to the next one.
+ */
+ if (i != (periods - 1)) {
+ desc->desc[i].dtc |=
+ (((i + 1) * sizeof(*desc->desc)) >> 4) << 24;
+ }
+ }
+
+ return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy(
+ struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+ struct jz4780_dma_desc *desc;
+ u32 tsz;
+
+ desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY, 0);
+ if (!desc)
+ return NULL;
+
+ tsz = jz4780_dma_transfer_size(jzchan, dest | src | len,
+ &jzchan->transfer_shift);
+
+ desc->transfer_type = JZ_DMA_DRT_AUTO;
+
+ desc->desc[0].dsa = src;
+ desc->desc[0].dta = dest;
+ desc->desc[0].dcm = JZ_DMA_DCM_TIE | JZ_DMA_DCM_SAI | JZ_DMA_DCM_DAI |
+ tsz << JZ_DMA_DCM_TSZ_SHIFT |
+ JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_SP_SHIFT |
+ JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_DP_SHIFT;
+ desc->desc[0].dtc = len >> jzchan->transfer_shift;
+
+ return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags);
+}
+
+static void jz4780_dma_begin(struct jz4780_dma_chan *jzchan)
+{
+ struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+ struct virt_dma_desc *vdesc;
+ unsigned int i;
+ dma_addr_t desc_phys;
+
+ if (!jzchan->desc) {
+ vdesc = vchan_next_desc(&jzchan->vchan);
+ if (!vdesc)
+ return;
+
+ list_del(&vdesc->node);
+
+ jzchan->desc = to_jz4780_dma_desc(vdesc);
+ jzchan->curr_hwdesc = 0;
+
+ if (jzchan->desc->type == DMA_CYCLIC && vdesc->tx.callback) {
+ /*
+ * The DMA controller doesn't support triggering an
+ * interrupt after processing each descriptor, only
+ * after processing an entire terminated list of
+ * descriptors. For a cyclic DMA setup the list of
+ * descriptors is not terminated so we can never get an
+ * interrupt.
+ *
+ * If the user requested a callback for a cyclic DMA
+ * setup then we workaround this hardware limitation
+ * here by degrading to a set of unlinked descriptors
+ * which we will submit in sequence in response to the
+ * completion of processing the previous descriptor.
+ */
+ for (i = 0; i < jzchan->desc->count; i++)
+ jzchan->desc->desc[i].dcm &= ~JZ_DMA_DCM_LINK;
+ }
+ } else {
+ /*
+ * There is an existing transfer, therefore this must be one
+ * for which we unlinked the descriptors above. Advance to the
+ * next one in the list.
+ */
+ jzchan->curr_hwdesc =
+ (jzchan->curr_hwdesc + 1) % jzchan->desc->count;
+ }
+
+ /* Enable the channel's clock. */
+ jz4780_dma_chan_enable(jzdma, jzchan->id);
+
+ /* Use 4-word descriptors. */
+ jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DCS, 0);
+
+ /* Set transfer type. */
+ jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DRT,
+ jzchan->desc->transfer_type);
+
+ /*
+ * Set the transfer count. This is redundant for a descriptor-driven
+ * transfer. However, there can be a delay between the transfer start
+ * time and when DTCn reg contains the new transfer count. Setting
+ * it explicitly ensures residue is computed correctly at all times.
+ */
+ jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DTC,
+ jzchan->desc->desc[jzchan->curr_hwdesc].dtc);
+
+ /* Write descriptor address and initiate descriptor fetch. */
+ desc_phys = jzchan->desc->desc_phys +
+ (jzchan->curr_hwdesc * sizeof(*jzchan->desc->desc));
+ jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DDA, desc_phys);
+ jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DDRS, BIT(jzchan->id));
+
+ /* Enable the channel. */
+ jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DCS,
+ JZ_DMA_DCS_CTE);
+}
+
+static void jz4780_dma_issue_pending(struct dma_chan *chan)
+{
+ struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&jzchan->vchan.lock, flags);
+
+ if (vchan_issue_pending(&jzchan->vchan) && !jzchan->desc)
+ jz4780_dma_begin(jzchan);
+
+ spin_unlock_irqrestore(&jzchan->vchan.lock, flags);
+}
+
+static int jz4780_dma_terminate_all(struct dma_chan *chan)
+{
+ struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+ struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&jzchan->vchan.lock, flags);
+
+ /* Clear the DMA status and stop the transfer. */
+ jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DCS, 0);
+ if (jzchan->desc) {
+ vchan_terminate_vdesc(&jzchan->desc->vdesc);
+ jzchan->desc = NULL;
+ }
+
+ jz4780_dma_chan_disable(jzdma, jzchan->id);
+
+ vchan_get_all_descriptors(&jzchan->vchan, &head);
+
+ spin_unlock_irqrestore(&jzchan->vchan.lock, flags);
+
+ vchan_dma_desc_free_list(&jzchan->vchan, &head);
+ return 0;
+}
+
+static void jz4780_dma_synchronize(struct dma_chan *chan)
+{
+ struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+ struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+
+ vchan_synchronize(&jzchan->vchan);
+ jz4780_dma_chan_disable(jzdma, jzchan->id);
+}
+
+static int jz4780_dma_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+
+ if ((config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+ || (config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES))
+ return -EINVAL;
+
+ /* Copy the reset of the slave configuration, it is used later. */
+ memcpy(&jzchan->config, config, sizeof(jzchan->config));
+
+ return 0;
+}
+
+static size_t jz4780_dma_desc_residue(struct jz4780_dma_chan *jzchan,
+ struct jz4780_dma_desc *desc, unsigned int next_sg)
+{
+ struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+ unsigned int count = 0;
+ unsigned int i;
+
+ for (i = next_sg; i < desc->count; i++)
+ count += desc->desc[i].dtc & GENMASK(23, 0);
+
+ if (next_sg != 0)
+ count += jz4780_dma_chn_readl(jzdma, jzchan->id,
+ JZ_DMA_REG_DTC);
+
+ return count << jzchan->transfer_shift;
+}
+
+static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+ struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+ struct virt_dma_desc *vdesc;
+ enum dma_status status;
+ unsigned long flags;
+ unsigned long residue = 0;
+
+ spin_lock_irqsave(&jzchan->vchan.lock, flags);
+
+ status = dma_cookie_status(chan, cookie, txstate);
+ if ((status == DMA_COMPLETE) || (txstate == NULL))
+ goto out_unlock_irqrestore;
+
+ vdesc = vchan_find_desc(&jzchan->vchan, cookie);
+ if (vdesc) {
+ /* On the issued list, so hasn't been processed yet */
+ residue = jz4780_dma_desc_residue(jzchan,
+ to_jz4780_dma_desc(vdesc), 0);
+ } else if (cookie == jzchan->desc->vdesc.tx.cookie) {
+ residue = jz4780_dma_desc_residue(jzchan, jzchan->desc,
+ jzchan->curr_hwdesc + 1);
+ }
+ dma_set_residue(txstate, residue);
+
+ if (vdesc && jzchan->desc && vdesc == &jzchan->desc->vdesc
+ && jzchan->desc->status & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT))
+ status = DMA_ERROR;
+
+out_unlock_irqrestore:
+ spin_unlock_irqrestore(&jzchan->vchan.lock, flags);
+ return status;
+}
+
+static bool jz4780_dma_chan_irq(struct jz4780_dma_dev *jzdma,
+ struct jz4780_dma_chan *jzchan)
+{
+ const unsigned int soc_flags = jzdma->soc_data->flags;
+ struct jz4780_dma_desc *desc = jzchan->desc;
+ u32 dcs;
+ bool ack = true;
+
+ spin_lock(&jzchan->vchan.lock);
+
+ dcs = jz4780_dma_chn_readl(jzdma, jzchan->id, JZ_DMA_REG_DCS);
+ jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DCS, 0);
+
+ if (dcs & JZ_DMA_DCS_AR) {
+ dev_warn(&jzchan->vchan.chan.dev->device,
+ "address error (DCS=0x%x)\n", dcs);
+ }
+
+ if (dcs & JZ_DMA_DCS_HLT) {
+ dev_warn(&jzchan->vchan.chan.dev->device,
+ "channel halt (DCS=0x%x)\n", dcs);
+ }
+
+ if (jzchan->desc) {
+ jzchan->desc->status = dcs;
+
+ if ((dcs & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT)) == 0) {
+ if (jzchan->desc->type == DMA_CYCLIC) {
+ vchan_cyclic_callback(&jzchan->desc->vdesc);
+
+ jz4780_dma_begin(jzchan);
+ } else if (dcs & JZ_DMA_DCS_TT) {
+ if (!(soc_flags & JZ_SOC_DATA_BREAK_LINKS) ||
+ (jzchan->curr_hwdesc + 1 == desc->count)) {
+ vchan_cookie_complete(&desc->vdesc);
+ jzchan->desc = NULL;
+ }
+
+ jz4780_dma_begin(jzchan);
+ } else {
+ /* False positive - continue the transfer */
+ ack = false;
+ jz4780_dma_chn_writel(jzdma, jzchan->id,
+ JZ_DMA_REG_DCS,
+ JZ_DMA_DCS_CTE);
+ }
+ }
+ } else {
+ dev_err(&jzchan->vchan.chan.dev->device,
+ "channel IRQ with no active transfer\n");
+ }
+
+ spin_unlock(&jzchan->vchan.lock);
+
+ return ack;
+}
+
+static irqreturn_t jz4780_dma_irq_handler(int irq, void *data)
+{
+ struct jz4780_dma_dev *jzdma = data;
+ unsigned int nb_channels = jzdma->soc_data->nb_channels;
+ unsigned long pending;
+ u32 dmac;
+ int i;
+
+ pending = jz4780_dma_ctrl_readl(jzdma, JZ_DMA_REG_DIRQP);
+
+ for_each_set_bit(i, &pending, nb_channels) {
+ if (jz4780_dma_chan_irq(jzdma, &jzdma->chan[i]))
+ pending &= ~BIT(i);
+ }
+
+ /* Clear halt and address error status of all channels. */
+ dmac = jz4780_dma_ctrl_readl(jzdma, JZ_DMA_REG_DMAC);
+ dmac &= ~(JZ_DMA_DMAC_HLT | JZ_DMA_DMAC_AR);
+ jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DMAC, dmac);
+
+ /* Clear interrupt pending status. */
+ jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DIRQP, pending);
+
+ return IRQ_HANDLED;
+}
+
+static int jz4780_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+
+ jzchan->desc_pool = dma_pool_create(dev_name(&chan->dev->device),
+ chan->device->dev,
+ JZ_DMA_DESC_BLOCK_SIZE,
+ PAGE_SIZE, 0);
+ if (!jzchan->desc_pool) {
+ dev_err(&chan->dev->device,
+ "failed to allocate descriptor pool\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void jz4780_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+
+ vchan_free_chan_resources(&jzchan->vchan);
+ dma_pool_destroy(jzchan->desc_pool);
+ jzchan->desc_pool = NULL;
+}
+
+static bool jz4780_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+ struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+ struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+ struct jz4780_dma_filter_data *data = param;
+
+
+ if (data->channel > -1) {
+ if (data->channel != jzchan->id)
+ return false;
+ } else if (jzdma->chan_reserved & BIT(jzchan->id)) {
+ return false;
+ }
+
+ jzchan->transfer_type_tx = data->transfer_type_tx;
+ jzchan->transfer_type_rx = data->transfer_type_rx;
+
+ return true;
+}
+
+static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct jz4780_dma_dev *jzdma = ofdma->of_dma_data;
+ dma_cap_mask_t mask = jzdma->dma_device.cap_mask;
+ struct jz4780_dma_filter_data data;
+
+ if (dma_spec->args_count == 2) {
+ data.transfer_type_tx = dma_spec->args[0];
+ data.transfer_type_rx = dma_spec->args[0];
+ data.channel = dma_spec->args[1];
+ } else if (dma_spec->args_count == 3) {
+ data.transfer_type_tx = dma_spec->args[0];
+ data.transfer_type_rx = dma_spec->args[1];
+ data.channel = dma_spec->args[2];
+ } else {
+ return NULL;
+ }
+
+ if (data.channel > -1) {
+ if (data.channel >= jzdma->soc_data->nb_channels) {
+ dev_err(jzdma->dma_device.dev,
+ "device requested non-existent channel %u\n",
+ data.channel);
+ return NULL;
+ }
+
+ /* Can only select a channel marked as reserved. */
+ if (!(jzdma->chan_reserved & BIT(data.channel))) {
+ dev_err(jzdma->dma_device.dev,
+ "device requested unreserved channel %u\n",
+ data.channel);
+ return NULL;
+ }
+
+ jzdma->chan[data.channel].transfer_type_tx = data.transfer_type_tx;
+ jzdma->chan[data.channel].transfer_type_rx = data.transfer_type_rx;
+
+ return dma_get_slave_channel(
+ &jzdma->chan[data.channel].vchan.chan);
+ } else {
+ return __dma_request_channel(&mask, jz4780_dma_filter_fn, &data,
+ ofdma->of_node);
+ }
+}
+
+static int jz4780_dma_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ const struct jz4780_dma_soc_data *soc_data;
+ struct jz4780_dma_dev *jzdma;
+ struct jz4780_dma_chan *jzchan;
+ struct dma_device *dd;
+ struct resource *res;
+ int i, ret;
+
+ if (!dev->of_node) {
+ dev_err(dev, "This driver must be probed from devicetree\n");
+ return -EINVAL;
+ }
+
+ soc_data = device_get_match_data(dev);
+ if (!soc_data)
+ return -EINVAL;
+
+ jzdma = devm_kzalloc(dev, struct_size(jzdma, chan,
+ soc_data->nb_channels), GFP_KERNEL);
+ if (!jzdma)
+ return -ENOMEM;
+
+ jzdma->soc_data = soc_data;
+ platform_set_drvdata(pdev, jzdma);
+
+ jzdma->chn_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(jzdma->chn_base))
+ return PTR_ERR(jzdma->chn_base);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (res) {
+ jzdma->ctrl_base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(jzdma->ctrl_base))
+ return PTR_ERR(jzdma->ctrl_base);
+ } else if (soc_data->flags & JZ_SOC_DATA_ALLOW_LEGACY_DT) {
+ /*
+ * On JZ4780, if the second memory resource was not supplied,
+ * assume we're using an old devicetree, and calculate the
+ * offset to the control registers.
+ */
+ jzdma->ctrl_base = jzdma->chn_base + JZ4780_DMA_CTRL_OFFSET;
+ } else {
+ dev_err(dev, "failed to get I/O memory\n");
+ return -EINVAL;
+ }
+
+ jzdma->clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(jzdma->clk)) {
+ dev_err(dev, "failed to get clock\n");
+ ret = PTR_ERR(jzdma->clk);
+ return ret;
+ }
+
+ clk_prepare_enable(jzdma->clk);
+
+ /* Property is optional, if it doesn't exist the value will remain 0. */
+ of_property_read_u32_index(dev->of_node, "ingenic,reserved-channels",
+ 0, &jzdma->chan_reserved);
+
+ dd = &jzdma->dma_device;
+
+ /*
+ * The real segment size limit is dependent on the size unit selected
+ * for the transfer. Because the size unit is selected automatically
+ * and may be as small as 1 byte, use a safe limit of 2^24-1 bytes to
+ * ensure the 24-bit transfer count in the descriptor cannot overflow.
+ */
+ dma_set_max_seg_size(dev, 0xffffff);
+
+ dma_cap_set(DMA_MEMCPY, dd->cap_mask);
+ dma_cap_set(DMA_SLAVE, dd->cap_mask);
+ dma_cap_set(DMA_CYCLIC, dd->cap_mask);
+
+ dd->dev = dev;
+ dd->copy_align = DMAENGINE_ALIGN_4_BYTES;
+ dd->device_alloc_chan_resources = jz4780_dma_alloc_chan_resources;
+ dd->device_free_chan_resources = jz4780_dma_free_chan_resources;
+ dd->device_prep_slave_sg = jz4780_dma_prep_slave_sg;
+ dd->device_prep_dma_cyclic = jz4780_dma_prep_dma_cyclic;
+ dd->device_prep_dma_memcpy = jz4780_dma_prep_dma_memcpy;
+ dd->device_config = jz4780_dma_config;
+ dd->device_terminate_all = jz4780_dma_terminate_all;
+ dd->device_synchronize = jz4780_dma_synchronize;
+ dd->device_tx_status = jz4780_dma_tx_status;
+ dd->device_issue_pending = jz4780_dma_issue_pending;
+ dd->src_addr_widths = JZ_DMA_BUSWIDTHS;
+ dd->dst_addr_widths = JZ_DMA_BUSWIDTHS;
+ dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ dd->max_sg_burst = JZ_DMA_MAX_DESC;
+
+ /*
+ * Enable DMA controller, mark all channels as not programmable.
+ * Also set the FMSC bit - it increases MSC performance, so it makes
+ * little sense not to enable it.
+ */
+ jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DMAC, JZ_DMA_DMAC_DMAE |
+ JZ_DMA_DMAC_FAIC | JZ_DMA_DMAC_FMSC);
+
+ if (soc_data->flags & JZ_SOC_DATA_PROGRAMMABLE_DMA)
+ jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DMACP, 0);
+
+ INIT_LIST_HEAD(&dd->channels);
+
+ for (i = 0; i < soc_data->nb_channels; i++) {
+ jzchan = &jzdma->chan[i];
+ jzchan->id = i;
+
+ vchan_init(&jzchan->vchan, dd);
+ jzchan->vchan.desc_free = jz4780_dma_desc_free;
+ }
+
+ /*
+ * On JZ4760, chan0 won't enable properly the first time.
+ * Enabling then disabling chan1 will magically make chan0 work
+ * correctly.
+ */
+ jz4780_dma_chan_enable(jzdma, 1);
+ jz4780_dma_chan_disable(jzdma, 1);
+
+ ret = platform_get_irq(pdev, 0);
+ if (ret < 0)
+ goto err_disable_clk;
+
+ jzdma->irq = ret;
+
+ ret = request_irq(jzdma->irq, jz4780_dma_irq_handler, 0, dev_name(dev),
+ jzdma);
+ if (ret) {
+ dev_err(dev, "failed to request IRQ %u!\n", jzdma->irq);
+ goto err_disable_clk;
+ }
+
+ ret = dmaenginem_async_device_register(dd);
+ if (ret) {
+ dev_err(dev, "failed to register device\n");
+ goto err_free_irq;
+ }
+
+ /* Register with OF DMA helpers. */
+ ret = of_dma_controller_register(dev->of_node, jz4780_of_dma_xlate,
+ jzdma);
+ if (ret) {
+ dev_err(dev, "failed to register OF DMA controller\n");
+ goto err_free_irq;
+ }
+
+ dev_info(dev, "JZ4780 DMA controller initialised\n");
+ return 0;
+
+err_free_irq:
+ free_irq(jzdma->irq, jzdma);
+
+err_disable_clk:
+ clk_disable_unprepare(jzdma->clk);
+ return ret;
+}
+
+static int jz4780_dma_remove(struct platform_device *pdev)
+{
+ struct jz4780_dma_dev *jzdma = platform_get_drvdata(pdev);
+ int i;
+
+ of_dma_controller_free(pdev->dev.of_node);
+
+ clk_disable_unprepare(jzdma->clk);
+ free_irq(jzdma->irq, jzdma);
+
+ for (i = 0; i < jzdma->soc_data->nb_channels; i++)
+ tasklet_kill(&jzdma->chan[i].vchan.task);
+
+ return 0;
+}
+
+static const struct jz4780_dma_soc_data jz4740_dma_soc_data = {
+ .nb_channels = 6,
+ .transfer_ord_max = 5,
+ .flags = JZ_SOC_DATA_BREAK_LINKS,
+};
+
+static const struct jz4780_dma_soc_data jz4725b_dma_soc_data = {
+ .nb_channels = 6,
+ .transfer_ord_max = 5,
+ .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC |
+ JZ_SOC_DATA_BREAK_LINKS,
+};
+
+static const struct jz4780_dma_soc_data jz4760_dma_soc_data = {
+ .nb_channels = 5,
+ .transfer_ord_max = 6,
+ .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC,
+};
+
+static const struct jz4780_dma_soc_data jz4760_mdma_soc_data = {
+ .nb_channels = 2,
+ .transfer_ord_max = 6,
+ .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC,
+};
+
+static const struct jz4780_dma_soc_data jz4760_bdma_soc_data = {
+ .nb_channels = 3,
+ .transfer_ord_max = 6,
+ .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC,
+};
+
+static const struct jz4780_dma_soc_data jz4760b_dma_soc_data = {
+ .nb_channels = 5,
+ .transfer_ord_max = 6,
+ .flags = JZ_SOC_DATA_PER_CHAN_PM,
+};
+
+static const struct jz4780_dma_soc_data jz4760b_mdma_soc_data = {
+ .nb_channels = 2,
+ .transfer_ord_max = 6,
+ .flags = JZ_SOC_DATA_PER_CHAN_PM,
+};
+
+static const struct jz4780_dma_soc_data jz4760b_bdma_soc_data = {
+ .nb_channels = 3,
+ .transfer_ord_max = 6,
+ .flags = JZ_SOC_DATA_PER_CHAN_PM,
+};
+
+static const struct jz4780_dma_soc_data jz4770_dma_soc_data = {
+ .nb_channels = 6,
+ .transfer_ord_max = 6,
+ .flags = JZ_SOC_DATA_PER_CHAN_PM,
+};
+
+static const struct jz4780_dma_soc_data jz4780_dma_soc_data = {
+ .nb_channels = 32,
+ .transfer_ord_max = 7,
+ .flags = JZ_SOC_DATA_ALLOW_LEGACY_DT | JZ_SOC_DATA_PROGRAMMABLE_DMA,
+};
+
+static const struct jz4780_dma_soc_data x1000_dma_soc_data = {
+ .nb_channels = 8,
+ .transfer_ord_max = 7,
+ .flags = JZ_SOC_DATA_PROGRAMMABLE_DMA,
+};
+
+static const struct jz4780_dma_soc_data x1830_dma_soc_data = {
+ .nb_channels = 32,
+ .transfer_ord_max = 7,
+ .flags = JZ_SOC_DATA_PROGRAMMABLE_DMA,
+};
+
+static const struct of_device_id jz4780_dma_dt_match[] = {
+ { .compatible = "ingenic,jz4740-dma", .data = &jz4740_dma_soc_data },
+ { .compatible = "ingenic,jz4725b-dma", .data = &jz4725b_dma_soc_data },
+ { .compatible = "ingenic,jz4760-dma", .data = &jz4760_dma_soc_data },
+ { .compatible = "ingenic,jz4760-mdma", .data = &jz4760_mdma_soc_data },
+ { .compatible = "ingenic,jz4760-bdma", .data = &jz4760_bdma_soc_data },
+ { .compatible = "ingenic,jz4760b-dma", .data = &jz4760b_dma_soc_data },
+ { .compatible = "ingenic,jz4760b-mdma", .data = &jz4760b_mdma_soc_data },
+ { .compatible = "ingenic,jz4760b-bdma", .data = &jz4760b_bdma_soc_data },
+ { .compatible = "ingenic,jz4770-dma", .data = &jz4770_dma_soc_data },
+ { .compatible = "ingenic,jz4780-dma", .data = &jz4780_dma_soc_data },
+ { .compatible = "ingenic,x1000-dma", .data = &x1000_dma_soc_data },
+ { .compatible = "ingenic,x1830-dma", .data = &x1830_dma_soc_data },
+ {},
+};
+MODULE_DEVICE_TABLE(of, jz4780_dma_dt_match);
+
+static struct platform_driver jz4780_dma_driver = {
+ .probe = jz4780_dma_probe,
+ .remove = jz4780_dma_remove,
+ .driver = {
+ .name = "jz4780-dma",
+ .of_match_table = jz4780_dma_dt_match,
+ },
+};
+
+static int __init jz4780_dma_init(void)
+{
+ return platform_driver_register(&jz4780_dma_driver);
+}
+subsys_initcall(jz4780_dma_init);
+
+static void __exit jz4780_dma_exit(void)
+{
+ platform_driver_unregister(&jz4780_dma_driver);
+}
+module_exit(jz4780_dma_exit);
+
+MODULE_AUTHOR("Alex Smith <alex@alex-smith.me.uk>");
+MODULE_DESCRIPTION("Ingenic JZ4780 DMA controller driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
new file mode 100644
index 000000000..892e83892
--- /dev/null
+++ b/drivers/dma/dmaengine.c
@@ -0,0 +1,1655 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ */
+
+/*
+ * This code implements the DMA subsystem. It provides a HW-neutral interface
+ * for other kernel code to use asynchronous memory copy capabilities,
+ * if present, and allows different HW DMA drivers to register as providing
+ * this capability.
+ *
+ * Due to the fact we are accelerating what is already a relatively fast
+ * operation, the code goes to great lengths to avoid additional overhead,
+ * such as locking.
+ *
+ * LOCKING:
+ *
+ * The subsystem keeps a global list of dma_device structs it is protected by a
+ * mutex, dma_list_mutex.
+ *
+ * A subsystem can get access to a channel by calling dmaengine_get() followed
+ * by dma_find_channel(), or if it has need for an exclusive channel it can call
+ * dma_request_channel(). Once a channel is allocated a reference is taken
+ * against its corresponding driver to disable removal.
+ *
+ * Each device has a channels list, which runs unlocked but is never modified
+ * once the device is registered, it's just setup by the driver.
+ *
+ * See Documentation/driver-api/dmaengine for more details
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/hardirq.h>
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/mutex.h>
+#include <linux/jiffies.h>
+#include <linux/rculist.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+#include <linux/acpi_dma.h>
+#include <linux/of_dma.h>
+#include <linux/mempool.h>
+#include <linux/numa.h>
+
+#include "dmaengine.h"
+
+static DEFINE_MUTEX(dma_list_mutex);
+static DEFINE_IDA(dma_ida);
+static LIST_HEAD(dma_device_list);
+static long dmaengine_ref_count;
+
+/* --- debugfs implementation --- */
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+
+static struct dentry *rootdir;
+
+static void dmaengine_debug_register(struct dma_device *dma_dev)
+{
+ dma_dev->dbg_dev_root = debugfs_create_dir(dev_name(dma_dev->dev),
+ rootdir);
+ if (IS_ERR(dma_dev->dbg_dev_root))
+ dma_dev->dbg_dev_root = NULL;
+}
+
+static void dmaengine_debug_unregister(struct dma_device *dma_dev)
+{
+ debugfs_remove_recursive(dma_dev->dbg_dev_root);
+ dma_dev->dbg_dev_root = NULL;
+}
+
+static void dmaengine_dbg_summary_show(struct seq_file *s,
+ struct dma_device *dma_dev)
+{
+ struct dma_chan *chan;
+
+ list_for_each_entry(chan, &dma_dev->channels, device_node) {
+ if (chan->client_count) {
+ seq_printf(s, " %-13s| %s", dma_chan_name(chan),
+ chan->dbg_client_name ?: "in-use");
+
+ if (chan->router)
+ seq_printf(s, " (via router: %s)\n",
+ dev_name(chan->router->dev));
+ else
+ seq_puts(s, "\n");
+ }
+ }
+}
+
+static int dmaengine_summary_show(struct seq_file *s, void *data)
+{
+ struct dma_device *dma_dev = NULL;
+
+ mutex_lock(&dma_list_mutex);
+ list_for_each_entry(dma_dev, &dma_device_list, global_node) {
+ seq_printf(s, "dma%d (%s): number of channels: %u\n",
+ dma_dev->dev_id, dev_name(dma_dev->dev),
+ dma_dev->chancnt);
+
+ if (dma_dev->dbg_summary_show)
+ dma_dev->dbg_summary_show(s, dma_dev);
+ else
+ dmaengine_dbg_summary_show(s, dma_dev);
+
+ if (!list_is_last(&dma_dev->global_node, &dma_device_list))
+ seq_puts(s, "\n");
+ }
+ mutex_unlock(&dma_list_mutex);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(dmaengine_summary);
+
+static void __init dmaengine_debugfs_init(void)
+{
+ rootdir = debugfs_create_dir("dmaengine", NULL);
+
+ /* /sys/kernel/debug/dmaengine/summary */
+ debugfs_create_file("summary", 0444, rootdir, NULL,
+ &dmaengine_summary_fops);
+}
+#else
+static inline void dmaengine_debugfs_init(void) { }
+static inline int dmaengine_debug_register(struct dma_device *dma_dev)
+{
+ return 0;
+}
+
+static inline void dmaengine_debug_unregister(struct dma_device *dma_dev) { }
+#endif /* DEBUG_FS */
+
+/* --- sysfs implementation --- */
+
+#define DMA_SLAVE_NAME "slave"
+
+/**
+ * dev_to_dma_chan - convert a device pointer to its sysfs container object
+ * @dev: device node
+ *
+ * Must be called under dma_list_mutex.
+ */
+static struct dma_chan *dev_to_dma_chan(struct device *dev)
+{
+ struct dma_chan_dev *chan_dev;
+
+ chan_dev = container_of(dev, typeof(*chan_dev), device);
+ return chan_dev->chan;
+}
+
+static ssize_t memcpy_count_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dma_chan *chan;
+ unsigned long count = 0;
+ int i;
+ int err;
+
+ mutex_lock(&dma_list_mutex);
+ chan = dev_to_dma_chan(dev);
+ if (chan) {
+ for_each_possible_cpu(i)
+ count += per_cpu_ptr(chan->local, i)->memcpy_count;
+ err = sprintf(buf, "%lu\n", count);
+ } else
+ err = -ENODEV;
+ mutex_unlock(&dma_list_mutex);
+
+ return err;
+}
+static DEVICE_ATTR_RO(memcpy_count);
+
+static ssize_t bytes_transferred_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dma_chan *chan;
+ unsigned long count = 0;
+ int i;
+ int err;
+
+ mutex_lock(&dma_list_mutex);
+ chan = dev_to_dma_chan(dev);
+ if (chan) {
+ for_each_possible_cpu(i)
+ count += per_cpu_ptr(chan->local, i)->bytes_transferred;
+ err = sprintf(buf, "%lu\n", count);
+ } else
+ err = -ENODEV;
+ mutex_unlock(&dma_list_mutex);
+
+ return err;
+}
+static DEVICE_ATTR_RO(bytes_transferred);
+
+static ssize_t in_use_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct dma_chan *chan;
+ int err;
+
+ mutex_lock(&dma_list_mutex);
+ chan = dev_to_dma_chan(dev);
+ if (chan)
+ err = sprintf(buf, "%d\n", chan->client_count);
+ else
+ err = -ENODEV;
+ mutex_unlock(&dma_list_mutex);
+
+ return err;
+}
+static DEVICE_ATTR_RO(in_use);
+
+static struct attribute *dma_dev_attrs[] = {
+ &dev_attr_memcpy_count.attr,
+ &dev_attr_bytes_transferred.attr,
+ &dev_attr_in_use.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(dma_dev);
+
+static void chan_dev_release(struct device *dev)
+{
+ struct dma_chan_dev *chan_dev;
+
+ chan_dev = container_of(dev, typeof(*chan_dev), device);
+ kfree(chan_dev);
+}
+
+static struct class dma_devclass = {
+ .name = "dma",
+ .dev_groups = dma_dev_groups,
+ .dev_release = chan_dev_release,
+};
+
+/* --- client and device registration --- */
+
+/* enable iteration over all operation types */
+static dma_cap_mask_t dma_cap_mask_all;
+
+/**
+ * struct dma_chan_tbl_ent - tracks channel allocations per core/operation
+ * @chan: associated channel for this entry
+ */
+struct dma_chan_tbl_ent {
+ struct dma_chan *chan;
+};
+
+/* percpu lookup table for memory-to-memory offload providers */
+static struct dma_chan_tbl_ent __percpu *channel_table[DMA_TX_TYPE_END];
+
+static int __init dma_channel_table_init(void)
+{
+ enum dma_transaction_type cap;
+ int err = 0;
+
+ bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
+
+ /* 'interrupt', 'private', and 'slave' are channel capabilities,
+ * but are not associated with an operation so they do not need
+ * an entry in the channel_table
+ */
+ clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
+ clear_bit(DMA_PRIVATE, dma_cap_mask_all.bits);
+ clear_bit(DMA_SLAVE, dma_cap_mask_all.bits);
+
+ for_each_dma_cap_mask(cap, dma_cap_mask_all) {
+ channel_table[cap] = alloc_percpu(struct dma_chan_tbl_ent);
+ if (!channel_table[cap]) {
+ err = -ENOMEM;
+ break;
+ }
+ }
+
+ if (err) {
+ pr_err("dmaengine dma_channel_table_init failure: %d\n", err);
+ for_each_dma_cap_mask(cap, dma_cap_mask_all)
+ free_percpu(channel_table[cap]);
+ }
+
+ return err;
+}
+arch_initcall(dma_channel_table_init);
+
+/**
+ * dma_chan_is_local - checks if the channel is in the same NUMA-node as the CPU
+ * @chan: DMA channel to test
+ * @cpu: CPU index which the channel should be close to
+ *
+ * Returns true if the channel is in the same NUMA-node as the CPU.
+ */
+static bool dma_chan_is_local(struct dma_chan *chan, int cpu)
+{
+ int node = dev_to_node(chan->device->dev);
+ return node == NUMA_NO_NODE ||
+ cpumask_test_cpu(cpu, cpumask_of_node(node));
+}
+
+/**
+ * min_chan - finds the channel with min count and in the same NUMA-node as the CPU
+ * @cap: capability to match
+ * @cpu: CPU index which the channel should be close to
+ *
+ * If some channels are close to the given CPU, the one with the lowest
+ * reference count is returned. Otherwise, CPU is ignored and only the
+ * reference count is taken into account.
+ *
+ * Must be called under dma_list_mutex.
+ */
+static struct dma_chan *min_chan(enum dma_transaction_type cap, int cpu)
+{
+ struct dma_device *device;
+ struct dma_chan *chan;
+ struct dma_chan *min = NULL;
+ struct dma_chan *localmin = NULL;
+
+ list_for_each_entry(device, &dma_device_list, global_node) {
+ if (!dma_has_cap(cap, device->cap_mask) ||
+ dma_has_cap(DMA_PRIVATE, device->cap_mask))
+ continue;
+ list_for_each_entry(chan, &device->channels, device_node) {
+ if (!chan->client_count)
+ continue;
+ if (!min || chan->table_count < min->table_count)
+ min = chan;
+
+ if (dma_chan_is_local(chan, cpu))
+ if (!localmin ||
+ chan->table_count < localmin->table_count)
+ localmin = chan;
+ }
+ }
+
+ chan = localmin ? localmin : min;
+
+ if (chan)
+ chan->table_count++;
+
+ return chan;
+}
+
+/**
+ * dma_channel_rebalance - redistribute the available channels
+ *
+ * Optimize for CPU isolation (each CPU gets a dedicated channel for an
+ * operation type) in the SMP case, and operation isolation (avoid
+ * multi-tasking channels) in the non-SMP case.
+ *
+ * Must be called under dma_list_mutex.
+ */
+static void dma_channel_rebalance(void)
+{
+ struct dma_chan *chan;
+ struct dma_device *device;
+ int cpu;
+ int cap;
+
+ /* undo the last distribution */
+ for_each_dma_cap_mask(cap, dma_cap_mask_all)
+ for_each_possible_cpu(cpu)
+ per_cpu_ptr(channel_table[cap], cpu)->chan = NULL;
+
+ list_for_each_entry(device, &dma_device_list, global_node) {
+ if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+ continue;
+ list_for_each_entry(chan, &device->channels, device_node)
+ chan->table_count = 0;
+ }
+
+ /* don't populate the channel_table if no clients are available */
+ if (!dmaengine_ref_count)
+ return;
+
+ /* redistribute available channels */
+ for_each_dma_cap_mask(cap, dma_cap_mask_all)
+ for_each_online_cpu(cpu) {
+ chan = min_chan(cap, cpu);
+ per_cpu_ptr(channel_table[cap], cpu)->chan = chan;
+ }
+}
+
+static int dma_device_satisfies_mask(struct dma_device *device,
+ const dma_cap_mask_t *want)
+{
+ dma_cap_mask_t has;
+
+ bitmap_and(has.bits, want->bits, device->cap_mask.bits,
+ DMA_TX_TYPE_END);
+ return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END);
+}
+
+static struct module *dma_chan_to_owner(struct dma_chan *chan)
+{
+ return chan->device->owner;
+}
+
+/**
+ * balance_ref_count - catch up the channel reference count
+ * @chan: channel to balance ->client_count versus dmaengine_ref_count
+ *
+ * Must be called under dma_list_mutex.
+ */
+static void balance_ref_count(struct dma_chan *chan)
+{
+ struct module *owner = dma_chan_to_owner(chan);
+
+ while (chan->client_count < dmaengine_ref_count) {
+ __module_get(owner);
+ chan->client_count++;
+ }
+}
+
+static void dma_device_release(struct kref *ref)
+{
+ struct dma_device *device = container_of(ref, struct dma_device, ref);
+
+ list_del_rcu(&device->global_node);
+ dma_channel_rebalance();
+
+ if (device->device_release)
+ device->device_release(device);
+}
+
+static void dma_device_put(struct dma_device *device)
+{
+ lockdep_assert_held(&dma_list_mutex);
+ kref_put(&device->ref, dma_device_release);
+}
+
+/**
+ * dma_chan_get - try to grab a DMA channel's parent driver module
+ * @chan: channel to grab
+ *
+ * Must be called under dma_list_mutex.
+ */
+static int dma_chan_get(struct dma_chan *chan)
+{
+ struct module *owner = dma_chan_to_owner(chan);
+ int ret;
+
+ /* The channel is already in use, update client count */
+ if (chan->client_count) {
+ __module_get(owner);
+ chan->client_count++;
+ return 0;
+ }
+
+ if (!try_module_get(owner))
+ return -ENODEV;
+
+ ret = kref_get_unless_zero(&chan->device->ref);
+ if (!ret) {
+ ret = -ENODEV;
+ goto module_put_out;
+ }
+
+ /* allocate upon first client reference */
+ if (chan->device->device_alloc_chan_resources) {
+ ret = chan->device->device_alloc_chan_resources(chan);
+ if (ret < 0)
+ goto err_out;
+ }
+
+ chan->client_count++;
+
+ if (!dma_has_cap(DMA_PRIVATE, chan->device->cap_mask))
+ balance_ref_count(chan);
+
+ return 0;
+
+err_out:
+ dma_device_put(chan->device);
+module_put_out:
+ module_put(owner);
+ return ret;
+}
+
+/**
+ * dma_chan_put - drop a reference to a DMA channel's parent driver module
+ * @chan: channel to release
+ *
+ * Must be called under dma_list_mutex.
+ */
+static void dma_chan_put(struct dma_chan *chan)
+{
+ /* This channel is not in use, bail out */
+ if (!chan->client_count)
+ return;
+
+ chan->client_count--;
+
+ /* This channel is not in use anymore, free it */
+ if (!chan->client_count && chan->device->device_free_chan_resources) {
+ /* Make sure all operations have completed */
+ dmaengine_synchronize(chan);
+ chan->device->device_free_chan_resources(chan);
+ }
+
+ /* If the channel is used via a DMA request router, free the mapping */
+ if (chan->router && chan->router->route_free) {
+ chan->router->route_free(chan->router->dev, chan->route_data);
+ chan->router = NULL;
+ chan->route_data = NULL;
+ }
+
+ dma_device_put(chan->device);
+ module_put(dma_chan_to_owner(chan));
+}
+
+enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
+{
+ enum dma_status status;
+ unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
+
+ dma_async_issue_pending(chan);
+ do {
+ status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
+ if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+ dev_err(chan->device->dev, "%s: timeout!\n", __func__);
+ return DMA_ERROR;
+ }
+ if (status != DMA_IN_PROGRESS)
+ break;
+ cpu_relax();
+ } while (1);
+
+ return status;
+}
+EXPORT_SYMBOL(dma_sync_wait);
+
+/**
+ * dma_find_channel - find a channel to carry out the operation
+ * @tx_type: transaction type
+ */
+struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
+{
+ return this_cpu_read(channel_table[tx_type]->chan);
+}
+EXPORT_SYMBOL(dma_find_channel);
+
+/**
+ * dma_issue_pending_all - flush all pending operations across all channels
+ */
+void dma_issue_pending_all(void)
+{
+ struct dma_device *device;
+ struct dma_chan *chan;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(device, &dma_device_list, global_node) {
+ if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+ continue;
+ list_for_each_entry(chan, &device->channels, device_node)
+ if (chan->client_count)
+ device->device_issue_pending(chan);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(dma_issue_pending_all);
+
+int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
+{
+ struct dma_device *device;
+
+ if (!chan || !caps)
+ return -EINVAL;
+
+ device = chan->device;
+
+ /* check if the channel supports slave transactions */
+ if (!(test_bit(DMA_SLAVE, device->cap_mask.bits) ||
+ test_bit(DMA_CYCLIC, device->cap_mask.bits)))
+ return -ENXIO;
+
+ /*
+ * Check whether it reports it uses the generic slave
+ * capabilities, if not, that means it doesn't support any
+ * kind of slave capabilities reporting.
+ */
+ if (!device->directions)
+ return -ENXIO;
+
+ caps->src_addr_widths = device->src_addr_widths;
+ caps->dst_addr_widths = device->dst_addr_widths;
+ caps->directions = device->directions;
+ caps->min_burst = device->min_burst;
+ caps->max_burst = device->max_burst;
+ caps->max_sg_burst = device->max_sg_burst;
+ caps->residue_granularity = device->residue_granularity;
+ caps->descriptor_reuse = device->descriptor_reuse;
+ caps->cmd_pause = !!device->device_pause;
+ caps->cmd_resume = !!device->device_resume;
+ caps->cmd_terminate = !!device->device_terminate_all;
+
+ /*
+ * DMA engine device might be configured with non-uniformly
+ * distributed slave capabilities per device channels. In this
+ * case the corresponding driver may provide the device_caps
+ * callback to override the generic capabilities with
+ * channel-specific ones.
+ */
+ if (device->device_caps)
+ device->device_caps(chan, caps);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dma_get_slave_caps);
+
+static struct dma_chan *private_candidate(const dma_cap_mask_t *mask,
+ struct dma_device *dev,
+ dma_filter_fn fn, void *fn_param)
+{
+ struct dma_chan *chan;
+
+ if (mask && !dma_device_satisfies_mask(dev, mask)) {
+ dev_dbg(dev->dev, "%s: wrong capabilities\n", __func__);
+ return NULL;
+ }
+ /* devices with multiple channels need special handling as we need to
+ * ensure that all channels are either private or public.
+ */
+ if (dev->chancnt > 1 && !dma_has_cap(DMA_PRIVATE, dev->cap_mask))
+ list_for_each_entry(chan, &dev->channels, device_node) {
+ /* some channels are already publicly allocated */
+ if (chan->client_count)
+ return NULL;
+ }
+
+ list_for_each_entry(chan, &dev->channels, device_node) {
+ if (chan->client_count) {
+ dev_dbg(dev->dev, "%s: %s busy\n",
+ __func__, dma_chan_name(chan));
+ continue;
+ }
+ if (fn && !fn(chan, fn_param)) {
+ dev_dbg(dev->dev, "%s: %s filter said false\n",
+ __func__, dma_chan_name(chan));
+ continue;
+ }
+ return chan;
+ }
+
+ return NULL;
+}
+
+static struct dma_chan *find_candidate(struct dma_device *device,
+ const dma_cap_mask_t *mask,
+ dma_filter_fn fn, void *fn_param)
+{
+ struct dma_chan *chan = private_candidate(mask, device, fn, fn_param);
+ int err;
+
+ if (chan) {
+ /* Found a suitable channel, try to grab, prep, and return it.
+ * We first set DMA_PRIVATE to disable balance_ref_count as this
+ * channel will not be published in the general-purpose
+ * allocator
+ */
+ dma_cap_set(DMA_PRIVATE, device->cap_mask);
+ device->privatecnt++;
+ err = dma_chan_get(chan);
+
+ if (err) {
+ if (err == -ENODEV) {
+ dev_dbg(device->dev, "%s: %s module removed\n",
+ __func__, dma_chan_name(chan));
+ list_del_rcu(&device->global_node);
+ } else
+ dev_dbg(device->dev,
+ "%s: failed to get %s: (%d)\n",
+ __func__, dma_chan_name(chan), err);
+
+ if (--device->privatecnt == 0)
+ dma_cap_clear(DMA_PRIVATE, device->cap_mask);
+
+ chan = ERR_PTR(err);
+ }
+ }
+
+ return chan ? chan : ERR_PTR(-EPROBE_DEFER);
+}
+
+/**
+ * dma_get_slave_channel - try to get specific channel exclusively
+ * @chan: target channel
+ */
+struct dma_chan *dma_get_slave_channel(struct dma_chan *chan)
+{
+ /* lock against __dma_request_channel */
+ mutex_lock(&dma_list_mutex);
+
+ if (chan->client_count == 0) {
+ struct dma_device *device = chan->device;
+ int err;
+
+ dma_cap_set(DMA_PRIVATE, device->cap_mask);
+ device->privatecnt++;
+ err = dma_chan_get(chan);
+ if (err) {
+ dev_dbg(chan->device->dev,
+ "%s: failed to get %s: (%d)\n",
+ __func__, dma_chan_name(chan), err);
+ chan = NULL;
+ if (--device->privatecnt == 0)
+ dma_cap_clear(DMA_PRIVATE, device->cap_mask);
+ }
+ } else
+ chan = NULL;
+
+ mutex_unlock(&dma_list_mutex);
+
+
+ return chan;
+}
+EXPORT_SYMBOL_GPL(dma_get_slave_channel);
+
+struct dma_chan *dma_get_any_slave_channel(struct dma_device *device)
+{
+ dma_cap_mask_t mask;
+ struct dma_chan *chan;
+
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_SLAVE, mask);
+
+ /* lock against __dma_request_channel */
+ mutex_lock(&dma_list_mutex);
+
+ chan = find_candidate(device, &mask, NULL, NULL);
+
+ mutex_unlock(&dma_list_mutex);
+
+ return IS_ERR(chan) ? NULL : chan;
+}
+EXPORT_SYMBOL_GPL(dma_get_any_slave_channel);
+
+/**
+ * __dma_request_channel - try to allocate an exclusive channel
+ * @mask: capabilities that the channel must satisfy
+ * @fn: optional callback to disposition available channels
+ * @fn_param: opaque parameter to pass to dma_filter_fn()
+ * @np: device node to look for DMA channels
+ *
+ * Returns pointer to appropriate DMA channel on success or NULL.
+ */
+struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask,
+ dma_filter_fn fn, void *fn_param,
+ struct device_node *np)
+{
+ struct dma_device *device, *_d;
+ struct dma_chan *chan = NULL;
+
+ /* Find a channel */
+ mutex_lock(&dma_list_mutex);
+ list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
+ /* Finds a DMA controller with matching device node */
+ if (np && device->dev->of_node && np != device->dev->of_node)
+ continue;
+
+ chan = find_candidate(device, mask, fn, fn_param);
+ if (!IS_ERR(chan))
+ break;
+
+ chan = NULL;
+ }
+ mutex_unlock(&dma_list_mutex);
+
+ pr_debug("%s: %s (%s)\n",
+ __func__,
+ chan ? "success" : "fail",
+ chan ? dma_chan_name(chan) : NULL);
+
+ return chan;
+}
+EXPORT_SYMBOL_GPL(__dma_request_channel);
+
+static const struct dma_slave_map *dma_filter_match(struct dma_device *device,
+ const char *name,
+ struct device *dev)
+{
+ int i;
+
+ if (!device->filter.mapcnt)
+ return NULL;
+
+ for (i = 0; i < device->filter.mapcnt; i++) {
+ const struct dma_slave_map *map = &device->filter.map[i];
+
+ if (!strcmp(map->devname, dev_name(dev)) &&
+ !strcmp(map->slave, name))
+ return map;
+ }
+
+ return NULL;
+}
+
+/**
+ * dma_request_chan - try to allocate an exclusive slave channel
+ * @dev: pointer to client device structure
+ * @name: slave channel name
+ *
+ * Returns pointer to appropriate DMA channel on success or an error pointer.
+ */
+struct dma_chan *dma_request_chan(struct device *dev, const char *name)
+{
+ struct dma_device *d, *_d;
+ struct dma_chan *chan = NULL;
+
+ /* If device-tree is present get slave info from here */
+ if (dev->of_node)
+ chan = of_dma_request_slave_channel(dev->of_node, name);
+
+ /* If device was enumerated by ACPI get slave info from here */
+ if (has_acpi_companion(dev) && !chan)
+ chan = acpi_dma_request_slave_chan_by_name(dev, name);
+
+ if (PTR_ERR(chan) == -EPROBE_DEFER)
+ return chan;
+
+ if (!IS_ERR_OR_NULL(chan))
+ goto found;
+
+ /* Try to find the channel via the DMA filter map(s) */
+ mutex_lock(&dma_list_mutex);
+ list_for_each_entry_safe(d, _d, &dma_device_list, global_node) {
+ dma_cap_mask_t mask;
+ const struct dma_slave_map *map = dma_filter_match(d, name, dev);
+
+ if (!map)
+ continue;
+
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_SLAVE, mask);
+
+ chan = find_candidate(d, &mask, d->filter.fn, map->param);
+ if (!IS_ERR(chan))
+ break;
+ }
+ mutex_unlock(&dma_list_mutex);
+
+ if (IS_ERR(chan))
+ return chan;
+ if (!chan)
+ return ERR_PTR(-EPROBE_DEFER);
+
+found:
+#ifdef CONFIG_DEBUG_FS
+ chan->dbg_client_name = kasprintf(GFP_KERNEL, "%s:%s", dev_name(dev),
+ name);
+#endif
+
+ chan->name = kasprintf(GFP_KERNEL, "dma:%s", name);
+ if (!chan->name)
+ return chan;
+ chan->slave = dev;
+
+ if (sysfs_create_link(&chan->dev->device.kobj, &dev->kobj,
+ DMA_SLAVE_NAME))
+ dev_warn(dev, "Cannot create DMA %s symlink\n", DMA_SLAVE_NAME);
+ if (sysfs_create_link(&dev->kobj, &chan->dev->device.kobj, chan->name))
+ dev_warn(dev, "Cannot create DMA %s symlink\n", chan->name);
+
+ return chan;
+}
+EXPORT_SYMBOL_GPL(dma_request_chan);
+
+/**
+ * dma_request_chan_by_mask - allocate a channel satisfying certain capabilities
+ * @mask: capabilities that the channel must satisfy
+ *
+ * Returns pointer to appropriate DMA channel on success or an error pointer.
+ */
+struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask)
+{
+ struct dma_chan *chan;
+
+ if (!mask)
+ return ERR_PTR(-ENODEV);
+
+ chan = __dma_request_channel(mask, NULL, NULL, NULL);
+ if (!chan) {
+ mutex_lock(&dma_list_mutex);
+ if (list_empty(&dma_device_list))
+ chan = ERR_PTR(-EPROBE_DEFER);
+ else
+ chan = ERR_PTR(-ENODEV);
+ mutex_unlock(&dma_list_mutex);
+ }
+
+ return chan;
+}
+EXPORT_SYMBOL_GPL(dma_request_chan_by_mask);
+
+void dma_release_channel(struct dma_chan *chan)
+{
+ mutex_lock(&dma_list_mutex);
+ WARN_ONCE(chan->client_count != 1,
+ "chan reference count %d != 1\n", chan->client_count);
+ dma_chan_put(chan);
+ /* drop PRIVATE cap enabled by __dma_request_channel() */
+ if (--chan->device->privatecnt == 0)
+ dma_cap_clear(DMA_PRIVATE, chan->device->cap_mask);
+
+ if (chan->slave) {
+ sysfs_remove_link(&chan->dev->device.kobj, DMA_SLAVE_NAME);
+ sysfs_remove_link(&chan->slave->kobj, chan->name);
+ kfree(chan->name);
+ chan->name = NULL;
+ chan->slave = NULL;
+ }
+
+#ifdef CONFIG_DEBUG_FS
+ kfree(chan->dbg_client_name);
+ chan->dbg_client_name = NULL;
+#endif
+ mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL_GPL(dma_release_channel);
+
+/**
+ * dmaengine_get - register interest in dma_channels
+ */
+void dmaengine_get(void)
+{
+ struct dma_device *device, *_d;
+ struct dma_chan *chan;
+ int err;
+
+ mutex_lock(&dma_list_mutex);
+ dmaengine_ref_count++;
+
+ /* try to grab channels */
+ list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
+ if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+ continue;
+ list_for_each_entry(chan, &device->channels, device_node) {
+ err = dma_chan_get(chan);
+ if (err == -ENODEV) {
+ /* module removed before we could use it */
+ list_del_rcu(&device->global_node);
+ break;
+ } else if (err)
+ dev_dbg(chan->device->dev,
+ "%s: failed to get %s: (%d)\n",
+ __func__, dma_chan_name(chan), err);
+ }
+ }
+
+ /* if this is the first reference and there were channels
+ * waiting we need to rebalance to get those channels
+ * incorporated into the channel table
+ */
+ if (dmaengine_ref_count == 1)
+ dma_channel_rebalance();
+ mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL(dmaengine_get);
+
+/**
+ * dmaengine_put - let DMA drivers be removed when ref_count == 0
+ */
+void dmaengine_put(void)
+{
+ struct dma_device *device, *_d;
+ struct dma_chan *chan;
+
+ mutex_lock(&dma_list_mutex);
+ dmaengine_ref_count--;
+ BUG_ON(dmaengine_ref_count < 0);
+ /* drop channel references */
+ list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
+ if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+ continue;
+ list_for_each_entry(chan, &device->channels, device_node)
+ dma_chan_put(chan);
+ }
+ mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL(dmaengine_put);
+
+static bool device_has_all_tx_types(struct dma_device *device)
+{
+ /* A device that satisfies this test has channels that will never cause
+ * an async_tx channel switch event as all possible operation types can
+ * be handled.
+ */
+ #ifdef CONFIG_ASYNC_TX_DMA
+ if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
+ return false;
+ #endif
+
+ #if IS_ENABLED(CONFIG_ASYNC_MEMCPY)
+ if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
+ return false;
+ #endif
+
+ #if IS_ENABLED(CONFIG_ASYNC_XOR)
+ if (!dma_has_cap(DMA_XOR, device->cap_mask))
+ return false;
+
+ #ifndef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA
+ if (!dma_has_cap(DMA_XOR_VAL, device->cap_mask))
+ return false;
+ #endif
+ #endif
+
+ #if IS_ENABLED(CONFIG_ASYNC_PQ)
+ if (!dma_has_cap(DMA_PQ, device->cap_mask))
+ return false;
+
+ #ifndef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA
+ if (!dma_has_cap(DMA_PQ_VAL, device->cap_mask))
+ return false;
+ #endif
+ #endif
+
+ return true;
+}
+
+static int get_dma_id(struct dma_device *device)
+{
+ int rc = ida_alloc(&dma_ida, GFP_KERNEL);
+
+ if (rc < 0)
+ return rc;
+ device->dev_id = rc;
+ return 0;
+}
+
+static int __dma_async_device_channel_register(struct dma_device *device,
+ struct dma_chan *chan)
+{
+ int rc;
+
+ chan->local = alloc_percpu(typeof(*chan->local));
+ if (!chan->local)
+ return -ENOMEM;
+ chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL);
+ if (!chan->dev) {
+ rc = -ENOMEM;
+ goto err_free_local;
+ }
+
+ /*
+ * When the chan_id is a negative value, we are dynamically adding
+ * the channel. Otherwise we are static enumerating.
+ */
+ chan->chan_id = ida_alloc(&device->chan_ida, GFP_KERNEL);
+ if (chan->chan_id < 0) {
+ pr_err("%s: unable to alloc ida for chan: %d\n",
+ __func__, chan->chan_id);
+ rc = chan->chan_id;
+ goto err_free_dev;
+ }
+
+ chan->dev->device.class = &dma_devclass;
+ chan->dev->device.parent = device->dev;
+ chan->dev->chan = chan;
+ chan->dev->dev_id = device->dev_id;
+ dev_set_name(&chan->dev->device, "dma%dchan%d",
+ device->dev_id, chan->chan_id);
+ rc = device_register(&chan->dev->device);
+ if (rc)
+ goto err_out_ida;
+ chan->client_count = 0;
+ device->chancnt++;
+
+ return 0;
+
+ err_out_ida:
+ ida_free(&device->chan_ida, chan->chan_id);
+ err_free_dev:
+ kfree(chan->dev);
+ err_free_local:
+ free_percpu(chan->local);
+ chan->local = NULL;
+ return rc;
+}
+
+int dma_async_device_channel_register(struct dma_device *device,
+ struct dma_chan *chan)
+{
+ int rc;
+
+ rc = __dma_async_device_channel_register(device, chan);
+ if (rc < 0)
+ return rc;
+
+ dma_channel_rebalance();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dma_async_device_channel_register);
+
+static void __dma_async_device_channel_unregister(struct dma_device *device,
+ struct dma_chan *chan)
+{
+ if (chan->local == NULL)
+ return;
+
+ WARN_ONCE(!device->device_release && chan->client_count,
+ "%s called while %d clients hold a reference\n",
+ __func__, chan->client_count);
+ mutex_lock(&dma_list_mutex);
+ device->chancnt--;
+ chan->dev->chan = NULL;
+ mutex_unlock(&dma_list_mutex);
+ ida_free(&device->chan_ida, chan->chan_id);
+ device_unregister(&chan->dev->device);
+ free_percpu(chan->local);
+}
+
+void dma_async_device_channel_unregister(struct dma_device *device,
+ struct dma_chan *chan)
+{
+ __dma_async_device_channel_unregister(device, chan);
+ dma_channel_rebalance();
+}
+EXPORT_SYMBOL_GPL(dma_async_device_channel_unregister);
+
+/**
+ * dma_async_device_register - registers DMA devices found
+ * @device: pointer to &struct dma_device
+ *
+ * After calling this routine the structure should not be freed except in the
+ * device_release() callback which will be called after
+ * dma_async_device_unregister() is called and no further references are taken.
+ */
+int dma_async_device_register(struct dma_device *device)
+{
+ int rc;
+ struct dma_chan* chan;
+
+ if (!device)
+ return -ENODEV;
+
+ /* validate device routines */
+ if (!device->dev) {
+ pr_err("DMAdevice must have dev\n");
+ return -EIO;
+ }
+
+ device->owner = device->dev->driver->owner;
+
+ if (dma_has_cap(DMA_MEMCPY, device->cap_mask) && !device->device_prep_dma_memcpy) {
+ dev_err(device->dev,
+ "Device claims capability %s, but op is not defined\n",
+ "DMA_MEMCPY");
+ return -EIO;
+ }
+
+ if (dma_has_cap(DMA_XOR, device->cap_mask) && !device->device_prep_dma_xor) {
+ dev_err(device->dev,
+ "Device claims capability %s, but op is not defined\n",
+ "DMA_XOR");
+ return -EIO;
+ }
+
+ if (dma_has_cap(DMA_XOR_VAL, device->cap_mask) && !device->device_prep_dma_xor_val) {
+ dev_err(device->dev,
+ "Device claims capability %s, but op is not defined\n",
+ "DMA_XOR_VAL");
+ return -EIO;
+ }
+
+ if (dma_has_cap(DMA_PQ, device->cap_mask) && !device->device_prep_dma_pq) {
+ dev_err(device->dev,
+ "Device claims capability %s, but op is not defined\n",
+ "DMA_PQ");
+ return -EIO;
+ }
+
+ if (dma_has_cap(DMA_PQ_VAL, device->cap_mask) && !device->device_prep_dma_pq_val) {
+ dev_err(device->dev,
+ "Device claims capability %s, but op is not defined\n",
+ "DMA_PQ_VAL");
+ return -EIO;
+ }
+
+ if (dma_has_cap(DMA_MEMSET, device->cap_mask) && !device->device_prep_dma_memset) {
+ dev_err(device->dev,
+ "Device claims capability %s, but op is not defined\n",
+ "DMA_MEMSET");
+ return -EIO;
+ }
+
+ if (dma_has_cap(DMA_INTERRUPT, device->cap_mask) && !device->device_prep_dma_interrupt) {
+ dev_err(device->dev,
+ "Device claims capability %s, but op is not defined\n",
+ "DMA_INTERRUPT");
+ return -EIO;
+ }
+
+ if (dma_has_cap(DMA_CYCLIC, device->cap_mask) && !device->device_prep_dma_cyclic) {
+ dev_err(device->dev,
+ "Device claims capability %s, but op is not defined\n",
+ "DMA_CYCLIC");
+ return -EIO;
+ }
+
+ if (dma_has_cap(DMA_INTERLEAVE, device->cap_mask) && !device->device_prep_interleaved_dma) {
+ dev_err(device->dev,
+ "Device claims capability %s, but op is not defined\n",
+ "DMA_INTERLEAVE");
+ return -EIO;
+ }
+
+
+ if (!device->device_tx_status) {
+ dev_err(device->dev, "Device tx_status is not defined\n");
+ return -EIO;
+ }
+
+
+ if (!device->device_issue_pending) {
+ dev_err(device->dev, "Device issue_pending is not defined\n");
+ return -EIO;
+ }
+
+ if (!device->device_release)
+ dev_dbg(device->dev,
+ "WARN: Device release is not defined so it is not safe to unbind this driver while in use\n");
+
+ kref_init(&device->ref);
+
+ /* note: this only matters in the
+ * CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH=n case
+ */
+ if (device_has_all_tx_types(device))
+ dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
+
+ rc = get_dma_id(device);
+ if (rc != 0)
+ return rc;
+
+ ida_init(&device->chan_ida);
+
+ /* represent channels in sysfs. Probably want devs too */
+ list_for_each_entry(chan, &device->channels, device_node) {
+ rc = __dma_async_device_channel_register(device, chan);
+ if (rc < 0)
+ goto err_out;
+ }
+
+ mutex_lock(&dma_list_mutex);
+ /* take references on public channels */
+ if (dmaengine_ref_count && !dma_has_cap(DMA_PRIVATE, device->cap_mask))
+ list_for_each_entry(chan, &device->channels, device_node) {
+ /* if clients are already waiting for channels we need
+ * to take references on their behalf
+ */
+ if (dma_chan_get(chan) == -ENODEV) {
+ /* note we can only get here for the first
+ * channel as the remaining channels are
+ * guaranteed to get a reference
+ */
+ rc = -ENODEV;
+ mutex_unlock(&dma_list_mutex);
+ goto err_out;
+ }
+ }
+ list_add_tail_rcu(&device->global_node, &dma_device_list);
+ if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+ device->privatecnt++; /* Always private */
+ dma_channel_rebalance();
+ mutex_unlock(&dma_list_mutex);
+
+ dmaengine_debug_register(device);
+
+ return 0;
+
+err_out:
+ /* if we never registered a channel just release the idr */
+ if (!device->chancnt) {
+ ida_free(&dma_ida, device->dev_id);
+ return rc;
+ }
+
+ list_for_each_entry(chan, &device->channels, device_node) {
+ if (chan->local == NULL)
+ continue;
+ mutex_lock(&dma_list_mutex);
+ chan->dev->chan = NULL;
+ mutex_unlock(&dma_list_mutex);
+ device_unregister(&chan->dev->device);
+ free_percpu(chan->local);
+ }
+ return rc;
+}
+EXPORT_SYMBOL(dma_async_device_register);
+
+/**
+ * dma_async_device_unregister - unregister a DMA device
+ * @device: pointer to &struct dma_device
+ *
+ * This routine is called by dma driver exit routines, dmaengine holds module
+ * references to prevent it being called while channels are in use.
+ */
+void dma_async_device_unregister(struct dma_device *device)
+{
+ struct dma_chan *chan, *n;
+
+ dmaengine_debug_unregister(device);
+
+ list_for_each_entry_safe(chan, n, &device->channels, device_node)
+ __dma_async_device_channel_unregister(device, chan);
+
+ mutex_lock(&dma_list_mutex);
+ /*
+ * setting DMA_PRIVATE ensures the device being torn down will not
+ * be used in the channel_table
+ */
+ dma_cap_set(DMA_PRIVATE, device->cap_mask);
+ dma_channel_rebalance();
+ ida_free(&dma_ida, device->dev_id);
+ dma_device_put(device);
+ mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL(dma_async_device_unregister);
+
+static void dmam_device_release(struct device *dev, void *res)
+{
+ struct dma_device *device;
+
+ device = *(struct dma_device **)res;
+ dma_async_device_unregister(device);
+}
+
+/**
+ * dmaenginem_async_device_register - registers DMA devices found
+ * @device: pointer to &struct dma_device
+ *
+ * The operation is managed and will be undone on driver detach.
+ */
+int dmaenginem_async_device_register(struct dma_device *device)
+{
+ void *p;
+ int ret;
+
+ p = devres_alloc(dmam_device_release, sizeof(void *), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ ret = dma_async_device_register(device);
+ if (!ret) {
+ *(struct dma_device **)p = device;
+ devres_add(device->dev, p);
+ } else {
+ devres_free(p);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(dmaenginem_async_device_register);
+
+struct dmaengine_unmap_pool {
+ struct kmem_cache *cache;
+ const char *name;
+ mempool_t *pool;
+ size_t size;
+};
+
+#define __UNMAP_POOL(x) { .size = x, .name = "dmaengine-unmap-" __stringify(x) }
+static struct dmaengine_unmap_pool unmap_pool[] = {
+ __UNMAP_POOL(2),
+ #if IS_ENABLED(CONFIG_DMA_ENGINE_RAID)
+ __UNMAP_POOL(16),
+ __UNMAP_POOL(128),
+ __UNMAP_POOL(256),
+ #endif
+};
+
+static struct dmaengine_unmap_pool *__get_unmap_pool(int nr)
+{
+ int order = get_count_order(nr);
+
+ switch (order) {
+ case 0 ... 1:
+ return &unmap_pool[0];
+#if IS_ENABLED(CONFIG_DMA_ENGINE_RAID)
+ case 2 ... 4:
+ return &unmap_pool[1];
+ case 5 ... 7:
+ return &unmap_pool[2];
+ case 8:
+ return &unmap_pool[3];
+#endif
+ default:
+ BUG();
+ return NULL;
+ }
+}
+
+static void dmaengine_unmap(struct kref *kref)
+{
+ struct dmaengine_unmap_data *unmap = container_of(kref, typeof(*unmap), kref);
+ struct device *dev = unmap->dev;
+ int cnt, i;
+
+ cnt = unmap->to_cnt;
+ for (i = 0; i < cnt; i++)
+ dma_unmap_page(dev, unmap->addr[i], unmap->len,
+ DMA_TO_DEVICE);
+ cnt += unmap->from_cnt;
+ for (; i < cnt; i++)
+ dma_unmap_page(dev, unmap->addr[i], unmap->len,
+ DMA_FROM_DEVICE);
+ cnt += unmap->bidi_cnt;
+ for (; i < cnt; i++) {
+ if (unmap->addr[i] == 0)
+ continue;
+ dma_unmap_page(dev, unmap->addr[i], unmap->len,
+ DMA_BIDIRECTIONAL);
+ }
+ cnt = unmap->map_cnt;
+ mempool_free(unmap, __get_unmap_pool(cnt)->pool);
+}
+
+void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap)
+{
+ if (unmap)
+ kref_put(&unmap->kref, dmaengine_unmap);
+}
+EXPORT_SYMBOL_GPL(dmaengine_unmap_put);
+
+static void dmaengine_destroy_unmap_pool(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
+ struct dmaengine_unmap_pool *p = &unmap_pool[i];
+
+ mempool_destroy(p->pool);
+ p->pool = NULL;
+ kmem_cache_destroy(p->cache);
+ p->cache = NULL;
+ }
+}
+
+static int __init dmaengine_init_unmap_pool(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
+ struct dmaengine_unmap_pool *p = &unmap_pool[i];
+ size_t size;
+
+ size = sizeof(struct dmaengine_unmap_data) +
+ sizeof(dma_addr_t) * p->size;
+
+ p->cache = kmem_cache_create(p->name, size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!p->cache)
+ break;
+ p->pool = mempool_create_slab_pool(1, p->cache);
+ if (!p->pool)
+ break;
+ }
+
+ if (i == ARRAY_SIZE(unmap_pool))
+ return 0;
+
+ dmaengine_destroy_unmap_pool();
+ return -ENOMEM;
+}
+
+struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
+{
+ struct dmaengine_unmap_data *unmap;
+
+ unmap = mempool_alloc(__get_unmap_pool(nr)->pool, flags);
+ if (!unmap)
+ return NULL;
+
+ memset(unmap, 0, sizeof(*unmap));
+ kref_init(&unmap->kref);
+ unmap->dev = dev;
+ unmap->map_cnt = nr;
+
+ return unmap;
+}
+EXPORT_SYMBOL(dmaengine_get_unmap_data);
+
+void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
+ struct dma_chan *chan)
+{
+ tx->chan = chan;
+ #ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
+ spin_lock_init(&tx->lock);
+ #endif
+}
+EXPORT_SYMBOL(dma_async_tx_descriptor_init);
+
+static inline int desc_check_and_set_metadata_mode(
+ struct dma_async_tx_descriptor *desc, enum dma_desc_metadata_mode mode)
+{
+ /* Make sure that the metadata mode is not mixed */
+ if (!desc->desc_metadata_mode) {
+ if (dmaengine_is_metadata_mode_supported(desc->chan, mode))
+ desc->desc_metadata_mode = mode;
+ else
+ return -ENOTSUPP;
+ } else if (desc->desc_metadata_mode != mode) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int dmaengine_desc_attach_metadata(struct dma_async_tx_descriptor *desc,
+ void *data, size_t len)
+{
+ int ret;
+
+ if (!desc)
+ return -EINVAL;
+
+ ret = desc_check_and_set_metadata_mode(desc, DESC_METADATA_CLIENT);
+ if (ret)
+ return ret;
+
+ if (!desc->metadata_ops || !desc->metadata_ops->attach)
+ return -ENOTSUPP;
+
+ return desc->metadata_ops->attach(desc, data, len);
+}
+EXPORT_SYMBOL_GPL(dmaengine_desc_attach_metadata);
+
+void *dmaengine_desc_get_metadata_ptr(struct dma_async_tx_descriptor *desc,
+ size_t *payload_len, size_t *max_len)
+{
+ int ret;
+
+ if (!desc)
+ return ERR_PTR(-EINVAL);
+
+ ret = desc_check_and_set_metadata_mode(desc, DESC_METADATA_ENGINE);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (!desc->metadata_ops || !desc->metadata_ops->get_ptr)
+ return ERR_PTR(-ENOTSUPP);
+
+ return desc->metadata_ops->get_ptr(desc, payload_len, max_len);
+}
+EXPORT_SYMBOL_GPL(dmaengine_desc_get_metadata_ptr);
+
+int dmaengine_desc_set_metadata_len(struct dma_async_tx_descriptor *desc,
+ size_t payload_len)
+{
+ int ret;
+
+ if (!desc)
+ return -EINVAL;
+
+ ret = desc_check_and_set_metadata_mode(desc, DESC_METADATA_ENGINE);
+ if (ret)
+ return ret;
+
+ if (!desc->metadata_ops || !desc->metadata_ops->set_len)
+ return -ENOTSUPP;
+
+ return desc->metadata_ops->set_len(desc, payload_len);
+}
+EXPORT_SYMBOL_GPL(dmaengine_desc_set_metadata_len);
+
+/**
+ * dma_wait_for_async_tx - spin wait for a transaction to complete
+ * @tx: in-flight transaction to wait on
+ */
+enum dma_status
+dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
+{
+ unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
+
+ if (!tx)
+ return DMA_COMPLETE;
+
+ while (tx->cookie == -EBUSY) {
+ if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+ dev_err(tx->chan->device->dev,
+ "%s timeout waiting for descriptor submission\n",
+ __func__);
+ return DMA_ERROR;
+ }
+ cpu_relax();
+ }
+ return dma_sync_wait(tx->chan, tx->cookie);
+}
+EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
+
+/**
+ * dma_run_dependencies - process dependent operations on the target channel
+ * @tx: transaction with dependencies
+ *
+ * Helper routine for DMA drivers to process (start) dependent operations
+ * on their target channel.
+ */
+void dma_run_dependencies(struct dma_async_tx_descriptor *tx)
+{
+ struct dma_async_tx_descriptor *dep = txd_next(tx);
+ struct dma_async_tx_descriptor *dep_next;
+ struct dma_chan *chan;
+
+ if (!dep)
+ return;
+
+ /* we'll submit tx->next now, so clear the link */
+ txd_clear_next(tx);
+ chan = dep->chan;
+
+ /* keep submitting up until a channel switch is detected
+ * in that case we will be called again as a result of
+ * processing the interrupt from async_tx_channel_switch
+ */
+ for (; dep; dep = dep_next) {
+ txd_lock(dep);
+ txd_clear_parent(dep);
+ dep_next = txd_next(dep);
+ if (dep_next && dep_next->chan == chan)
+ txd_clear_next(dep); /* ->next will be submitted */
+ else
+ dep_next = NULL; /* submit current dep and terminate */
+ txd_unlock(dep);
+
+ dep->tx_submit(dep);
+ }
+
+ chan->device->device_issue_pending(chan);
+}
+EXPORT_SYMBOL_GPL(dma_run_dependencies);
+
+static int __init dma_bus_init(void)
+{
+ int err = dmaengine_init_unmap_pool();
+
+ if (err)
+ return err;
+
+ err = class_register(&dma_devclass);
+ if (!err)
+ dmaengine_debugfs_init();
+
+ return err;
+}
+arch_initcall(dma_bus_init);
diff --git a/drivers/dma/dmaengine.h b/drivers/dma/dmaengine.h
new file mode 100644
index 000000000..53f16d3f0
--- /dev/null
+++ b/drivers/dma/dmaengine.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * The contents of this file are private to DMA engine drivers, and is not
+ * part of the API to be used by DMA engine users.
+ */
+#ifndef DMAENGINE_H
+#define DMAENGINE_H
+
+#include <linux/bug.h>
+#include <linux/dmaengine.h>
+
+/**
+ * dma_cookie_init - initialize the cookies for a DMA channel
+ * @chan: dma channel to initialize
+ */
+static inline void dma_cookie_init(struct dma_chan *chan)
+{
+ chan->cookie = DMA_MIN_COOKIE;
+ chan->completed_cookie = DMA_MIN_COOKIE;
+}
+
+/**
+ * dma_cookie_assign - assign a DMA engine cookie to the descriptor
+ * @tx: descriptor needing cookie
+ *
+ * Assign a unique non-zero per-channel cookie to the descriptor.
+ * Note: caller is expected to hold a lock to prevent concurrency.
+ */
+static inline dma_cookie_t dma_cookie_assign(struct dma_async_tx_descriptor *tx)
+{
+ struct dma_chan *chan = tx->chan;
+ dma_cookie_t cookie;
+
+ cookie = chan->cookie + 1;
+ if (cookie < DMA_MIN_COOKIE)
+ cookie = DMA_MIN_COOKIE;
+ tx->cookie = chan->cookie = cookie;
+
+ return cookie;
+}
+
+/**
+ * dma_cookie_complete - complete a descriptor
+ * @tx: descriptor to complete
+ *
+ * Mark this descriptor complete by updating the channels completed
+ * cookie marker. Zero the descriptors cookie to prevent accidental
+ * repeated completions.
+ *
+ * Note: caller is expected to hold a lock to prevent concurrency.
+ */
+static inline void dma_cookie_complete(struct dma_async_tx_descriptor *tx)
+{
+ BUG_ON(tx->cookie < DMA_MIN_COOKIE);
+ tx->chan->completed_cookie = tx->cookie;
+ tx->cookie = 0;
+}
+
+/**
+ * dma_cookie_status - report cookie status
+ * @chan: dma channel
+ * @cookie: cookie we are interested in
+ * @state: dma_tx_state structure to return last/used cookies
+ *
+ * Report the status of the cookie, filling in the state structure if
+ * non-NULL. No locking is required.
+ */
+static inline enum dma_status dma_cookie_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *state)
+{
+ dma_cookie_t used, complete;
+
+ used = chan->cookie;
+ complete = chan->completed_cookie;
+ barrier();
+ if (state) {
+ state->last = complete;
+ state->used = used;
+ state->residue = 0;
+ state->in_flight_bytes = 0;
+ }
+ return dma_async_is_complete(cookie, complete, used);
+}
+
+static inline void dma_set_residue(struct dma_tx_state *state, u32 residue)
+{
+ if (state)
+ state->residue = residue;
+}
+
+static inline void dma_set_in_flight_bytes(struct dma_tx_state *state,
+ u32 in_flight_bytes)
+{
+ if (state)
+ state->in_flight_bytes = in_flight_bytes;
+}
+
+struct dmaengine_desc_callback {
+ dma_async_tx_callback callback;
+ dma_async_tx_callback_result callback_result;
+ void *callback_param;
+};
+
+/**
+ * dmaengine_desc_get_callback - get the passed in callback function
+ * @tx: tx descriptor
+ * @cb: temp struct to hold the callback info
+ *
+ * Fill the passed in cb struct with what's available in the passed in
+ * tx descriptor struct
+ * No locking is required.
+ */
+static inline void
+dmaengine_desc_get_callback(struct dma_async_tx_descriptor *tx,
+ struct dmaengine_desc_callback *cb)
+{
+ cb->callback = tx->callback;
+ cb->callback_result = tx->callback_result;
+ cb->callback_param = tx->callback_param;
+}
+
+/**
+ * dmaengine_desc_callback_invoke - call the callback function in cb struct
+ * @cb: temp struct that is holding the callback info
+ * @result: transaction result
+ *
+ * Call the callback function provided in the cb struct with the parameter
+ * in the cb struct.
+ * Locking is dependent on the driver.
+ */
+static inline void
+dmaengine_desc_callback_invoke(struct dmaengine_desc_callback *cb,
+ const struct dmaengine_result *result)
+{
+ struct dmaengine_result dummy_result = {
+ .result = DMA_TRANS_NOERROR,
+ .residue = 0
+ };
+
+ if (cb->callback_result) {
+ if (!result)
+ result = &dummy_result;
+ cb->callback_result(cb->callback_param, result);
+ } else if (cb->callback) {
+ cb->callback(cb->callback_param);
+ }
+}
+
+/**
+ * dmaengine_desc_get_callback_invoke - get the callback in tx descriptor and
+ * then immediately call the callback.
+ * @tx: dma async tx descriptor
+ * @result: transaction result
+ *
+ * Call dmaengine_desc_get_callback() and dmaengine_desc_callback_invoke()
+ * in a single function since no work is necessary in between for the driver.
+ * Locking is dependent on the driver.
+ */
+static inline void
+dmaengine_desc_get_callback_invoke(struct dma_async_tx_descriptor *tx,
+ const struct dmaengine_result *result)
+{
+ struct dmaengine_desc_callback cb;
+
+ dmaengine_desc_get_callback(tx, &cb);
+ dmaengine_desc_callback_invoke(&cb, result);
+}
+
+/**
+ * dmaengine_desc_callback_valid - verify the callback is valid in cb
+ * @cb: callback info struct
+ *
+ * Return a bool that verifies whether callback in cb is valid or not.
+ * No locking is required.
+ */
+static inline bool
+dmaengine_desc_callback_valid(struct dmaengine_desc_callback *cb)
+{
+ return cb->callback || cb->callback_result;
+}
+
+struct dma_chan *dma_get_slave_channel(struct dma_chan *chan);
+struct dma_chan *dma_get_any_slave_channel(struct dma_device *device);
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+
+static inline struct dentry *
+dmaengine_get_debugfs_root(struct dma_device *dma_dev) {
+ return dma_dev->dbg_dev_root;
+}
+#else
+struct dentry;
+static inline struct dentry *
+dmaengine_get_debugfs_root(struct dma_device *dma_dev)
+{
+ return NULL;
+}
+#endif /* CONFIG_DEBUG_FS */
+
+#endif
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
new file mode 100644
index 000000000..ffe621695
--- /dev/null
+++ b/drivers/dma/dmatest.c
@@ -0,0 +1,1360 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * DMA Engine test module
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ * Copyright (C) 2013 Intel Corporation
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/freezer.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/sched/task.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+static unsigned int test_buf_size = 16384;
+module_param(test_buf_size, uint, 0644);
+MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer");
+
+static char test_device[32];
+module_param_string(device, test_device, sizeof(test_device), 0644);
+MODULE_PARM_DESC(device, "Bus ID of the DMA Engine to test (default: any)");
+
+static unsigned int threads_per_chan = 1;
+module_param(threads_per_chan, uint, 0644);
+MODULE_PARM_DESC(threads_per_chan,
+ "Number of threads to start per channel (default: 1)");
+
+static unsigned int max_channels;
+module_param(max_channels, uint, 0644);
+MODULE_PARM_DESC(max_channels,
+ "Maximum number of channels to use (default: all)");
+
+static unsigned int iterations;
+module_param(iterations, uint, 0644);
+MODULE_PARM_DESC(iterations,
+ "Iterations before stopping test (default: infinite)");
+
+static unsigned int dmatest;
+module_param(dmatest, uint, 0644);
+MODULE_PARM_DESC(dmatest,
+ "dmatest 0-memcpy 1-memset (default: 0)");
+
+static unsigned int xor_sources = 3;
+module_param(xor_sources, uint, 0644);
+MODULE_PARM_DESC(xor_sources,
+ "Number of xor source buffers (default: 3)");
+
+static unsigned int pq_sources = 3;
+module_param(pq_sources, uint, 0644);
+MODULE_PARM_DESC(pq_sources,
+ "Number of p+q source buffers (default: 3)");
+
+static int timeout = 3000;
+module_param(timeout, int, 0644);
+MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), "
+ "Pass -1 for infinite timeout");
+
+static bool noverify;
+module_param(noverify, bool, 0644);
+MODULE_PARM_DESC(noverify, "Disable data verification (default: verify)");
+
+static bool norandom;
+module_param(norandom, bool, 0644);
+MODULE_PARM_DESC(norandom, "Disable random offset setup (default: random)");
+
+static bool verbose;
+module_param(verbose, bool, 0644);
+MODULE_PARM_DESC(verbose, "Enable \"success\" result messages (default: off)");
+
+static int alignment = -1;
+module_param(alignment, int, 0644);
+MODULE_PARM_DESC(alignment, "Custom data address alignment taken as 2^(alignment) (default: not used (-1))");
+
+static unsigned int transfer_size;
+module_param(transfer_size, uint, 0644);
+MODULE_PARM_DESC(transfer_size, "Optional custom transfer size in bytes (default: not used (0))");
+
+static bool polled;
+module_param(polled, bool, 0644);
+MODULE_PARM_DESC(polled, "Use polling for completion instead of interrupts");
+
+/**
+ * struct dmatest_params - test parameters.
+ * @buf_size: size of the memcpy test buffer
+ * @channel: bus ID of the channel to test
+ * @device: bus ID of the DMA Engine to test
+ * @threads_per_chan: number of threads to start per channel
+ * @max_channels: maximum number of channels to use
+ * @iterations: iterations before stopping test
+ * @xor_sources: number of xor source buffers
+ * @pq_sources: number of p+q source buffers
+ * @timeout: transfer timeout in msec, -1 for infinite timeout
+ * @noverify: disable data verification
+ * @norandom: disable random offset setup
+ * @alignment: custom data address alignment taken as 2^alignment
+ * @transfer_size: custom transfer size in bytes
+ * @polled: use polling for completion instead of interrupts
+ */
+struct dmatest_params {
+ unsigned int buf_size;
+ char channel[20];
+ char device[32];
+ unsigned int threads_per_chan;
+ unsigned int max_channels;
+ unsigned int iterations;
+ unsigned int xor_sources;
+ unsigned int pq_sources;
+ int timeout;
+ bool noverify;
+ bool norandom;
+ int alignment;
+ unsigned int transfer_size;
+ bool polled;
+};
+
+/**
+ * struct dmatest_info - test information.
+ * @params: test parameters
+ * @channels: channels under test
+ * @nr_channels: number of channels under test
+ * @lock: access protection to the fields of this structure
+ * @did_init: module has been initialized completely
+ * @last_error: test has faced configuration issues
+ */
+static struct dmatest_info {
+ /* Test parameters */
+ struct dmatest_params params;
+
+ /* Internal state */
+ struct list_head channels;
+ unsigned int nr_channels;
+ int last_error;
+ struct mutex lock;
+ bool did_init;
+} test_info = {
+ .channels = LIST_HEAD_INIT(test_info.channels),
+ .lock = __MUTEX_INITIALIZER(test_info.lock),
+};
+
+static int dmatest_run_set(const char *val, const struct kernel_param *kp);
+static int dmatest_run_get(char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops run_ops = {
+ .set = dmatest_run_set,
+ .get = dmatest_run_get,
+};
+static bool dmatest_run;
+module_param_cb(run, &run_ops, &dmatest_run, 0644);
+MODULE_PARM_DESC(run, "Run the test (default: false)");
+
+static int dmatest_chan_set(const char *val, const struct kernel_param *kp);
+static int dmatest_chan_get(char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops multi_chan_ops = {
+ .set = dmatest_chan_set,
+ .get = dmatest_chan_get,
+};
+
+static char test_channel[20];
+static struct kparam_string newchan_kps = {
+ .string = test_channel,
+ .maxlen = 20,
+};
+module_param_cb(channel, &multi_chan_ops, &newchan_kps, 0644);
+MODULE_PARM_DESC(channel, "Bus ID of the channel to test (default: any)");
+
+static int dmatest_test_list_get(char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops test_list_ops = {
+ .get = dmatest_test_list_get,
+};
+module_param_cb(test_list, &test_list_ops, NULL, 0444);
+MODULE_PARM_DESC(test_list, "Print current test list");
+
+/* Maximum amount of mismatched bytes in buffer to print */
+#define MAX_ERROR_COUNT 32
+
+/*
+ * Initialization patterns. All bytes in the source buffer has bit 7
+ * set, all bytes in the destination buffer has bit 7 cleared.
+ *
+ * Bit 6 is set for all bytes which are to be copied by the DMA
+ * engine. Bit 5 is set for all bytes which are to be overwritten by
+ * the DMA engine.
+ *
+ * The remaining bits are the inverse of a counter which increments by
+ * one for each byte address.
+ */
+#define PATTERN_SRC 0x80
+#define PATTERN_DST 0x00
+#define PATTERN_COPY 0x40
+#define PATTERN_OVERWRITE 0x20
+#define PATTERN_COUNT_MASK 0x1f
+#define PATTERN_MEMSET_IDX 0x01
+
+/* Fixed point arithmetic ops */
+#define FIXPT_SHIFT 8
+#define FIXPNT_MASK 0xFF
+#define FIXPT_TO_INT(a) ((a) >> FIXPT_SHIFT)
+#define INT_TO_FIXPT(a) ((a) << FIXPT_SHIFT)
+#define FIXPT_GET_FRAC(a) ((((a) & FIXPNT_MASK) * 100) >> FIXPT_SHIFT)
+
+/* poor man's completion - we want to use wait_event_freezable() on it */
+struct dmatest_done {
+ bool done;
+ wait_queue_head_t *wait;
+};
+
+struct dmatest_data {
+ u8 **raw;
+ u8 **aligned;
+ unsigned int cnt;
+ unsigned int off;
+};
+
+struct dmatest_thread {
+ struct list_head node;
+ struct dmatest_info *info;
+ struct task_struct *task;
+ struct dma_chan *chan;
+ struct dmatest_data src;
+ struct dmatest_data dst;
+ enum dma_transaction_type type;
+ wait_queue_head_t done_wait;
+ struct dmatest_done test_done;
+ bool done;
+ bool pending;
+};
+
+struct dmatest_chan {
+ struct list_head node;
+ struct dma_chan *chan;
+ struct list_head threads;
+};
+
+static DECLARE_WAIT_QUEUE_HEAD(thread_wait);
+static bool wait;
+
+static bool is_threaded_test_run(struct dmatest_info *info)
+{
+ struct dmatest_chan *dtc;
+
+ list_for_each_entry(dtc, &info->channels, node) {
+ struct dmatest_thread *thread;
+
+ list_for_each_entry(thread, &dtc->threads, node) {
+ if (!thread->done && !thread->pending)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool is_threaded_test_pending(struct dmatest_info *info)
+{
+ struct dmatest_chan *dtc;
+
+ list_for_each_entry(dtc, &info->channels, node) {
+ struct dmatest_thread *thread;
+
+ list_for_each_entry(thread, &dtc->threads, node) {
+ if (thread->pending)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static int dmatest_wait_get(char *val, const struct kernel_param *kp)
+{
+ struct dmatest_info *info = &test_info;
+ struct dmatest_params *params = &info->params;
+
+ if (params->iterations)
+ wait_event(thread_wait, !is_threaded_test_run(info));
+ wait = true;
+ return param_get_bool(val, kp);
+}
+
+static const struct kernel_param_ops wait_ops = {
+ .get = dmatest_wait_get,
+ .set = param_set_bool,
+};
+module_param_cb(wait, &wait_ops, &wait, 0444);
+MODULE_PARM_DESC(wait, "Wait for tests to complete (default: false)");
+
+static bool dmatest_match_channel(struct dmatest_params *params,
+ struct dma_chan *chan)
+{
+ if (params->channel[0] == '\0')
+ return true;
+ return strcmp(dma_chan_name(chan), params->channel) == 0;
+}
+
+static bool dmatest_match_device(struct dmatest_params *params,
+ struct dma_device *device)
+{
+ if (params->device[0] == '\0')
+ return true;
+ return strcmp(dev_name(device->dev), params->device) == 0;
+}
+
+static unsigned long dmatest_random(void)
+{
+ unsigned long buf;
+
+ get_random_bytes(&buf, sizeof(buf));
+ return buf;
+}
+
+static inline u8 gen_inv_idx(u8 index, bool is_memset)
+{
+ u8 val = is_memset ? PATTERN_MEMSET_IDX : index;
+
+ return ~val & PATTERN_COUNT_MASK;
+}
+
+static inline u8 gen_src_value(u8 index, bool is_memset)
+{
+ return PATTERN_SRC | gen_inv_idx(index, is_memset);
+}
+
+static inline u8 gen_dst_value(u8 index, bool is_memset)
+{
+ return PATTERN_DST | gen_inv_idx(index, is_memset);
+}
+
+static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len,
+ unsigned int buf_size, bool is_memset)
+{
+ unsigned int i;
+ u8 *buf;
+
+ for (; (buf = *bufs); bufs++) {
+ for (i = 0; i < start; i++)
+ buf[i] = gen_src_value(i, is_memset);
+ for ( ; i < start + len; i++)
+ buf[i] = gen_src_value(i, is_memset) | PATTERN_COPY;
+ for ( ; i < buf_size; i++)
+ buf[i] = gen_src_value(i, is_memset);
+ buf++;
+ }
+}
+
+static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len,
+ unsigned int buf_size, bool is_memset)
+{
+ unsigned int i;
+ u8 *buf;
+
+ for (; (buf = *bufs); bufs++) {
+ for (i = 0; i < start; i++)
+ buf[i] = gen_dst_value(i, is_memset);
+ for ( ; i < start + len; i++)
+ buf[i] = gen_dst_value(i, is_memset) |
+ PATTERN_OVERWRITE;
+ for ( ; i < buf_size; i++)
+ buf[i] = gen_dst_value(i, is_memset);
+ }
+}
+
+static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
+ unsigned int counter, bool is_srcbuf, bool is_memset)
+{
+ u8 diff = actual ^ pattern;
+ u8 expected = pattern | gen_inv_idx(counter, is_memset);
+ const char *thread_name = current->comm;
+
+ if (is_srcbuf)
+ pr_warn("%s: srcbuf[0x%x] overwritten! Expected %02x, got %02x\n",
+ thread_name, index, expected, actual);
+ else if ((pattern & PATTERN_COPY)
+ && (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
+ pr_warn("%s: dstbuf[0x%x] not copied! Expected %02x, got %02x\n",
+ thread_name, index, expected, actual);
+ else if (diff & PATTERN_SRC)
+ pr_warn("%s: dstbuf[0x%x] was copied! Expected %02x, got %02x\n",
+ thread_name, index, expected, actual);
+ else
+ pr_warn("%s: dstbuf[0x%x] mismatch! Expected %02x, got %02x\n",
+ thread_name, index, expected, actual);
+}
+
+static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
+ unsigned int end, unsigned int counter, u8 pattern,
+ bool is_srcbuf, bool is_memset)
+{
+ unsigned int i;
+ unsigned int error_count = 0;
+ u8 actual;
+ u8 expected;
+ u8 *buf;
+ unsigned int counter_orig = counter;
+
+ for (; (buf = *bufs); bufs++) {
+ counter = counter_orig;
+ for (i = start; i < end; i++) {
+ actual = buf[i];
+ expected = pattern | gen_inv_idx(counter, is_memset);
+ if (actual != expected) {
+ if (error_count < MAX_ERROR_COUNT)
+ dmatest_mismatch(actual, pattern, i,
+ counter, is_srcbuf,
+ is_memset);
+ error_count++;
+ }
+ counter++;
+ }
+ }
+
+ if (error_count > MAX_ERROR_COUNT)
+ pr_warn("%s: %u errors suppressed\n",
+ current->comm, error_count - MAX_ERROR_COUNT);
+
+ return error_count;
+}
+
+
+static void dmatest_callback(void *arg)
+{
+ struct dmatest_done *done = arg;
+ struct dmatest_thread *thread =
+ container_of(done, struct dmatest_thread, test_done);
+ if (!thread->done) {
+ done->done = true;
+ wake_up_all(done->wait);
+ } else {
+ /*
+ * If thread->done, it means that this callback occurred
+ * after the parent thread has cleaned up. This can
+ * happen in the case that driver doesn't implement
+ * the terminate_all() functionality and a dma operation
+ * did not occur within the timeout period
+ */
+ WARN(1, "dmatest: Kernel memory may be corrupted!!\n");
+ }
+}
+
+static unsigned int min_odd(unsigned int x, unsigned int y)
+{
+ unsigned int val = min(x, y);
+
+ return val % 2 ? val : val - 1;
+}
+
+static void result(const char *err, unsigned int n, unsigned int src_off,
+ unsigned int dst_off, unsigned int len, unsigned long data)
+{
+ if (IS_ERR_VALUE(data)) {
+ pr_info("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%ld)\n",
+ current->comm, n, err, src_off, dst_off, len, data);
+ } else {
+ pr_info("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%lu)\n",
+ current->comm, n, err, src_off, dst_off, len, data);
+ }
+}
+
+static void dbg_result(const char *err, unsigned int n, unsigned int src_off,
+ unsigned int dst_off, unsigned int len,
+ unsigned long data)
+{
+ pr_debug("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%lu)\n",
+ current->comm, n, err, src_off, dst_off, len, data);
+}
+
+#define verbose_result(err, n, src_off, dst_off, len, data) ({ \
+ if (verbose) \
+ result(err, n, src_off, dst_off, len, data); \
+ else \
+ dbg_result(err, n, src_off, dst_off, len, data);\
+})
+
+static unsigned long long dmatest_persec(s64 runtime, unsigned int val)
+{
+ unsigned long long per_sec = 1000000;
+
+ if (runtime <= 0)
+ return 0;
+
+ /* drop precision until runtime is 32-bits */
+ while (runtime > UINT_MAX) {
+ runtime >>= 1;
+ per_sec <<= 1;
+ }
+
+ per_sec *= val;
+ per_sec = INT_TO_FIXPT(per_sec);
+ do_div(per_sec, runtime);
+
+ return per_sec;
+}
+
+static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len)
+{
+ return FIXPT_TO_INT(dmatest_persec(runtime, len >> 10));
+}
+
+static void __dmatest_free_test_data(struct dmatest_data *d, unsigned int cnt)
+{
+ unsigned int i;
+
+ for (i = 0; i < cnt; i++)
+ kfree(d->raw[i]);
+
+ kfree(d->aligned);
+ kfree(d->raw);
+}
+
+static void dmatest_free_test_data(struct dmatest_data *d)
+{
+ __dmatest_free_test_data(d, d->cnt);
+}
+
+static int dmatest_alloc_test_data(struct dmatest_data *d,
+ unsigned int buf_size, u8 align)
+{
+ unsigned int i = 0;
+
+ d->raw = kcalloc(d->cnt + 1, sizeof(u8 *), GFP_KERNEL);
+ if (!d->raw)
+ return -ENOMEM;
+
+ d->aligned = kcalloc(d->cnt + 1, sizeof(u8 *), GFP_KERNEL);
+ if (!d->aligned)
+ goto err;
+
+ for (i = 0; i < d->cnt; i++) {
+ d->raw[i] = kmalloc(buf_size + align, GFP_KERNEL);
+ if (!d->raw[i])
+ goto err;
+
+ /* align to alignment restriction */
+ if (align)
+ d->aligned[i] = PTR_ALIGN(d->raw[i], align);
+ else
+ d->aligned[i] = d->raw[i];
+ }
+
+ return 0;
+err:
+ __dmatest_free_test_data(d, i);
+ return -ENOMEM;
+}
+
+/*
+ * This function repeatedly tests DMA transfers of various lengths and
+ * offsets for a given operation type until it is told to exit by
+ * kthread_stop(). There may be multiple threads running this function
+ * in parallel for a single channel, and there may be multiple channels
+ * being tested in parallel.
+ *
+ * Before each test, the source and destination buffer is initialized
+ * with a known pattern. This pattern is different depending on
+ * whether it's in an area which is supposed to be copied or
+ * overwritten, and different in the source and destination buffers.
+ * So if the DMA engine doesn't copy exactly what we tell it to copy,
+ * we'll notice.
+ */
+static int dmatest_func(void *data)
+{
+ struct dmatest_thread *thread = data;
+ struct dmatest_done *done = &thread->test_done;
+ struct dmatest_info *info;
+ struct dmatest_params *params;
+ struct dma_chan *chan;
+ struct dma_device *dev;
+ struct device *dma_dev;
+ unsigned int error_count;
+ unsigned int failed_tests = 0;
+ unsigned int total_tests = 0;
+ dma_cookie_t cookie;
+ enum dma_status status;
+ enum dma_ctrl_flags flags;
+ u8 *pq_coefs = NULL;
+ int ret;
+ unsigned int buf_size;
+ struct dmatest_data *src;
+ struct dmatest_data *dst;
+ int i;
+ ktime_t ktime, start, diff;
+ ktime_t filltime = 0;
+ ktime_t comparetime = 0;
+ s64 runtime = 0;
+ unsigned long long total_len = 0;
+ unsigned long long iops = 0;
+ u8 align = 0;
+ bool is_memset = false;
+ dma_addr_t *srcs;
+ dma_addr_t *dma_pq;
+
+ set_freezable();
+
+ ret = -ENOMEM;
+
+ smp_rmb();
+ thread->pending = false;
+ info = thread->info;
+ params = &info->params;
+ chan = thread->chan;
+ dev = chan->device;
+ dma_dev = dmaengine_get_dma_device(chan);
+
+ src = &thread->src;
+ dst = &thread->dst;
+ if (thread->type == DMA_MEMCPY) {
+ align = params->alignment < 0 ? dev->copy_align :
+ params->alignment;
+ src->cnt = dst->cnt = 1;
+ } else if (thread->type == DMA_MEMSET) {
+ align = params->alignment < 0 ? dev->fill_align :
+ params->alignment;
+ src->cnt = dst->cnt = 1;
+ is_memset = true;
+ } else if (thread->type == DMA_XOR) {
+ /* force odd to ensure dst = src */
+ src->cnt = min_odd(params->xor_sources | 1, dev->max_xor);
+ dst->cnt = 1;
+ align = params->alignment < 0 ? dev->xor_align :
+ params->alignment;
+ } else if (thread->type == DMA_PQ) {
+ /* force odd to ensure dst = src */
+ src->cnt = min_odd(params->pq_sources | 1, dma_maxpq(dev, 0));
+ dst->cnt = 2;
+ align = params->alignment < 0 ? dev->pq_align :
+ params->alignment;
+
+ pq_coefs = kmalloc(params->pq_sources + 1, GFP_KERNEL);
+ if (!pq_coefs)
+ goto err_thread_type;
+
+ for (i = 0; i < src->cnt; i++)
+ pq_coefs[i] = 1;
+ } else
+ goto err_thread_type;
+
+ /* Check if buffer count fits into map count variable (u8) */
+ if ((src->cnt + dst->cnt) >= 255) {
+ pr_err("too many buffers (%d of 255 supported)\n",
+ src->cnt + dst->cnt);
+ goto err_free_coefs;
+ }
+
+ buf_size = params->buf_size;
+ if (1 << align > buf_size) {
+ pr_err("%u-byte buffer too small for %d-byte alignment\n",
+ buf_size, 1 << align);
+ goto err_free_coefs;
+ }
+
+ if (dmatest_alloc_test_data(src, buf_size, align) < 0)
+ goto err_free_coefs;
+
+ if (dmatest_alloc_test_data(dst, buf_size, align) < 0)
+ goto err_src;
+
+ set_user_nice(current, 10);
+
+ srcs = kcalloc(src->cnt, sizeof(dma_addr_t), GFP_KERNEL);
+ if (!srcs)
+ goto err_dst;
+
+ dma_pq = kcalloc(dst->cnt, sizeof(dma_addr_t), GFP_KERNEL);
+ if (!dma_pq)
+ goto err_srcs_array;
+
+ /*
+ * src and dst buffers are freed by ourselves below
+ */
+ if (params->polled)
+ flags = DMA_CTRL_ACK;
+ else
+ flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
+
+ ktime = ktime_get();
+ while (!(kthread_should_stop() ||
+ (params->iterations && total_tests >= params->iterations))) {
+ struct dma_async_tx_descriptor *tx = NULL;
+ struct dmaengine_unmap_data *um;
+ dma_addr_t *dsts;
+ unsigned int len;
+
+ total_tests++;
+
+ if (params->transfer_size) {
+ if (params->transfer_size >= buf_size) {
+ pr_err("%u-byte transfer size must be lower than %u-buffer size\n",
+ params->transfer_size, buf_size);
+ break;
+ }
+ len = params->transfer_size;
+ } else if (params->norandom) {
+ len = buf_size;
+ } else {
+ len = dmatest_random() % buf_size + 1;
+ }
+
+ /* Do not alter transfer size explicitly defined by user */
+ if (!params->transfer_size) {
+ len = (len >> align) << align;
+ if (!len)
+ len = 1 << align;
+ }
+ total_len += len;
+
+ if (params->norandom) {
+ src->off = 0;
+ dst->off = 0;
+ } else {
+ src->off = dmatest_random() % (buf_size - len + 1);
+ dst->off = dmatest_random() % (buf_size - len + 1);
+
+ src->off = (src->off >> align) << align;
+ dst->off = (dst->off >> align) << align;
+ }
+
+ if (!params->noverify) {
+ start = ktime_get();
+ dmatest_init_srcs(src->aligned, src->off, len,
+ buf_size, is_memset);
+ dmatest_init_dsts(dst->aligned, dst->off, len,
+ buf_size, is_memset);
+
+ diff = ktime_sub(ktime_get(), start);
+ filltime = ktime_add(filltime, diff);
+ }
+
+ um = dmaengine_get_unmap_data(dma_dev, src->cnt + dst->cnt,
+ GFP_KERNEL);
+ if (!um) {
+ failed_tests++;
+ result("unmap data NULL", total_tests,
+ src->off, dst->off, len, ret);
+ continue;
+ }
+
+ um->len = buf_size;
+ for (i = 0; i < src->cnt; i++) {
+ void *buf = src->aligned[i];
+ struct page *pg = virt_to_page(buf);
+ unsigned long pg_off = offset_in_page(buf);
+
+ um->addr[i] = dma_map_page(dma_dev, pg, pg_off,
+ um->len, DMA_TO_DEVICE);
+ srcs[i] = um->addr[i] + src->off;
+ ret = dma_mapping_error(dma_dev, um->addr[i]);
+ if (ret) {
+ result("src mapping error", total_tests,
+ src->off, dst->off, len, ret);
+ goto error_unmap_continue;
+ }
+ um->to_cnt++;
+ }
+ /* map with DMA_BIDIRECTIONAL to force writeback/invalidate */
+ dsts = &um->addr[src->cnt];
+ for (i = 0; i < dst->cnt; i++) {
+ void *buf = dst->aligned[i];
+ struct page *pg = virt_to_page(buf);
+ unsigned long pg_off = offset_in_page(buf);
+
+ dsts[i] = dma_map_page(dma_dev, pg, pg_off, um->len,
+ DMA_BIDIRECTIONAL);
+ ret = dma_mapping_error(dma_dev, dsts[i]);
+ if (ret) {
+ result("dst mapping error", total_tests,
+ src->off, dst->off, len, ret);
+ goto error_unmap_continue;
+ }
+ um->bidi_cnt++;
+ }
+
+ if (thread->type == DMA_MEMCPY)
+ tx = dev->device_prep_dma_memcpy(chan,
+ dsts[0] + dst->off,
+ srcs[0], len, flags);
+ else if (thread->type == DMA_MEMSET)
+ tx = dev->device_prep_dma_memset(chan,
+ dsts[0] + dst->off,
+ *(src->aligned[0] + src->off),
+ len, flags);
+ else if (thread->type == DMA_XOR)
+ tx = dev->device_prep_dma_xor(chan,
+ dsts[0] + dst->off,
+ srcs, src->cnt,
+ len, flags);
+ else if (thread->type == DMA_PQ) {
+ for (i = 0; i < dst->cnt; i++)
+ dma_pq[i] = dsts[i] + dst->off;
+ tx = dev->device_prep_dma_pq(chan, dma_pq, srcs,
+ src->cnt, pq_coefs,
+ len, flags);
+ }
+
+ if (!tx) {
+ result("prep error", total_tests, src->off,
+ dst->off, len, ret);
+ msleep(100);
+ goto error_unmap_continue;
+ }
+
+ done->done = false;
+ if (!params->polled) {
+ tx->callback = dmatest_callback;
+ tx->callback_param = done;
+ }
+ cookie = tx->tx_submit(tx);
+
+ if (dma_submit_error(cookie)) {
+ result("submit error", total_tests, src->off,
+ dst->off, len, ret);
+ msleep(100);
+ goto error_unmap_continue;
+ }
+
+ if (params->polled) {
+ status = dma_sync_wait(chan, cookie);
+ dmaengine_terminate_sync(chan);
+ if (status == DMA_COMPLETE)
+ done->done = true;
+ } else {
+ dma_async_issue_pending(chan);
+
+ wait_event_freezable_timeout(thread->done_wait,
+ done->done,
+ msecs_to_jiffies(params->timeout));
+
+ status = dma_async_is_tx_complete(chan, cookie, NULL,
+ NULL);
+ }
+
+ if (!done->done) {
+ result("test timed out", total_tests, src->off, dst->off,
+ len, 0);
+ goto error_unmap_continue;
+ } else if (status != DMA_COMPLETE &&
+ !(dma_has_cap(DMA_COMPLETION_NO_ORDER,
+ dev->cap_mask) &&
+ status == DMA_OUT_OF_ORDER)) {
+ result(status == DMA_ERROR ?
+ "completion error status" :
+ "completion busy status", total_tests, src->off,
+ dst->off, len, ret);
+ goto error_unmap_continue;
+ }
+
+ dmaengine_unmap_put(um);
+
+ if (params->noverify) {
+ verbose_result("test passed", total_tests, src->off,
+ dst->off, len, 0);
+ continue;
+ }
+
+ start = ktime_get();
+ pr_debug("%s: verifying source buffer...\n", current->comm);
+ error_count = dmatest_verify(src->aligned, 0, src->off,
+ 0, PATTERN_SRC, true, is_memset);
+ error_count += dmatest_verify(src->aligned, src->off,
+ src->off + len, src->off,
+ PATTERN_SRC | PATTERN_COPY, true, is_memset);
+ error_count += dmatest_verify(src->aligned, src->off + len,
+ buf_size, src->off + len,
+ PATTERN_SRC, true, is_memset);
+
+ pr_debug("%s: verifying dest buffer...\n", current->comm);
+ error_count += dmatest_verify(dst->aligned, 0, dst->off,
+ 0, PATTERN_DST, false, is_memset);
+
+ error_count += dmatest_verify(dst->aligned, dst->off,
+ dst->off + len, src->off,
+ PATTERN_SRC | PATTERN_COPY, false, is_memset);
+
+ error_count += dmatest_verify(dst->aligned, dst->off + len,
+ buf_size, dst->off + len,
+ PATTERN_DST, false, is_memset);
+
+ diff = ktime_sub(ktime_get(), start);
+ comparetime = ktime_add(comparetime, diff);
+
+ if (error_count) {
+ result("data error", total_tests, src->off, dst->off,
+ len, error_count);
+ failed_tests++;
+ } else {
+ verbose_result("test passed", total_tests, src->off,
+ dst->off, len, 0);
+ }
+
+ continue;
+
+error_unmap_continue:
+ dmaengine_unmap_put(um);
+ failed_tests++;
+ }
+ ktime = ktime_sub(ktime_get(), ktime);
+ ktime = ktime_sub(ktime, comparetime);
+ ktime = ktime_sub(ktime, filltime);
+ runtime = ktime_to_us(ktime);
+
+ ret = 0;
+ kfree(dma_pq);
+err_srcs_array:
+ kfree(srcs);
+err_dst:
+ dmatest_free_test_data(dst);
+err_src:
+ dmatest_free_test_data(src);
+err_free_coefs:
+ kfree(pq_coefs);
+err_thread_type:
+ iops = dmatest_persec(runtime, total_tests);
+ pr_info("%s: summary %u tests, %u failures %llu.%02llu iops %llu KB/s (%d)\n",
+ current->comm, total_tests, failed_tests,
+ FIXPT_TO_INT(iops), FIXPT_GET_FRAC(iops),
+ dmatest_KBs(runtime, total_len), ret);
+
+ /* terminate all transfers on specified channels */
+ if (ret || failed_tests)
+ dmaengine_terminate_sync(chan);
+
+ thread->done = true;
+ wake_up(&thread_wait);
+
+ return ret;
+}
+
+static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
+{
+ struct dmatest_thread *thread;
+ struct dmatest_thread *_thread;
+ int ret;
+
+ list_for_each_entry_safe(thread, _thread, &dtc->threads, node) {
+ ret = kthread_stop(thread->task);
+ pr_debug("thread %s exited with status %d\n",
+ thread->task->comm, ret);
+ list_del(&thread->node);
+ put_task_struct(thread->task);
+ kfree(thread);
+ }
+
+ /* terminate all transfers on specified channels */
+ dmaengine_terminate_sync(dtc->chan);
+
+ kfree(dtc);
+}
+
+static int dmatest_add_threads(struct dmatest_info *info,
+ struct dmatest_chan *dtc, enum dma_transaction_type type)
+{
+ struct dmatest_params *params = &info->params;
+ struct dmatest_thread *thread;
+ struct dma_chan *chan = dtc->chan;
+ char *op;
+ unsigned int i;
+
+ if (type == DMA_MEMCPY)
+ op = "copy";
+ else if (type == DMA_MEMSET)
+ op = "set";
+ else if (type == DMA_XOR)
+ op = "xor";
+ else if (type == DMA_PQ)
+ op = "pq";
+ else
+ return -EINVAL;
+
+ for (i = 0; i < params->threads_per_chan; i++) {
+ thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL);
+ if (!thread) {
+ pr_warn("No memory for %s-%s%u\n",
+ dma_chan_name(chan), op, i);
+ break;
+ }
+ thread->info = info;
+ thread->chan = dtc->chan;
+ thread->type = type;
+ thread->test_done.wait = &thread->done_wait;
+ init_waitqueue_head(&thread->done_wait);
+ smp_wmb();
+ thread->task = kthread_create(dmatest_func, thread, "%s-%s%u",
+ dma_chan_name(chan), op, i);
+ if (IS_ERR(thread->task)) {
+ pr_warn("Failed to create thread %s-%s%u\n",
+ dma_chan_name(chan), op, i);
+ kfree(thread);
+ break;
+ }
+
+ /* srcbuf and dstbuf are allocated by the thread itself */
+ get_task_struct(thread->task);
+ list_add_tail(&thread->node, &dtc->threads);
+ thread->pending = true;
+ }
+
+ return i;
+}
+
+static int dmatest_add_channel(struct dmatest_info *info,
+ struct dma_chan *chan)
+{
+ struct dmatest_chan *dtc;
+ struct dma_device *dma_dev = chan->device;
+ unsigned int thread_count = 0;
+ int cnt;
+
+ dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
+ if (!dtc) {
+ pr_warn("No memory for %s\n", dma_chan_name(chan));
+ return -ENOMEM;
+ }
+
+ dtc->chan = chan;
+ INIT_LIST_HEAD(&dtc->threads);
+
+ if (dma_has_cap(DMA_COMPLETION_NO_ORDER, dma_dev->cap_mask) &&
+ info->params.polled) {
+ info->params.polled = false;
+ pr_warn("DMA_COMPLETION_NO_ORDER, polled disabled\n");
+ }
+
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+ if (dmatest == 0) {
+ cnt = dmatest_add_threads(info, dtc, DMA_MEMCPY);
+ thread_count += cnt > 0 ? cnt : 0;
+ }
+ }
+
+ if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
+ if (dmatest == 1) {
+ cnt = dmatest_add_threads(info, dtc, DMA_MEMSET);
+ thread_count += cnt > 0 ? cnt : 0;
+ }
+ }
+
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ cnt = dmatest_add_threads(info, dtc, DMA_XOR);
+ thread_count += cnt > 0 ? cnt : 0;
+ }
+ if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+ cnt = dmatest_add_threads(info, dtc, DMA_PQ);
+ thread_count += cnt > 0 ? cnt : 0;
+ }
+
+ pr_info("Added %u threads using %s\n",
+ thread_count, dma_chan_name(chan));
+
+ list_add_tail(&dtc->node, &info->channels);
+ info->nr_channels++;
+
+ return 0;
+}
+
+static bool filter(struct dma_chan *chan, void *param)
+{
+ return dmatest_match_channel(param, chan) && dmatest_match_device(param, chan->device);
+}
+
+static void request_channels(struct dmatest_info *info,
+ enum dma_transaction_type type)
+{
+ dma_cap_mask_t mask;
+
+ dma_cap_zero(mask);
+ dma_cap_set(type, mask);
+ for (;;) {
+ struct dmatest_params *params = &info->params;
+ struct dma_chan *chan;
+
+ chan = dma_request_channel(mask, filter, params);
+ if (chan) {
+ if (dmatest_add_channel(info, chan)) {
+ dma_release_channel(chan);
+ break; /* add_channel failed, punt */
+ }
+ } else
+ break; /* no more channels available */
+ if (params->max_channels &&
+ info->nr_channels >= params->max_channels)
+ break; /* we have all we need */
+ }
+}
+
+static void add_threaded_test(struct dmatest_info *info)
+{
+ struct dmatest_params *params = &info->params;
+
+ /* Copy test parameters */
+ params->buf_size = test_buf_size;
+ strscpy(params->channel, strim(test_channel), sizeof(params->channel));
+ strscpy(params->device, strim(test_device), sizeof(params->device));
+ params->threads_per_chan = threads_per_chan;
+ params->max_channels = max_channels;
+ params->iterations = iterations;
+ params->xor_sources = xor_sources;
+ params->pq_sources = pq_sources;
+ params->timeout = timeout;
+ params->noverify = noverify;
+ params->norandom = norandom;
+ params->alignment = alignment;
+ params->transfer_size = transfer_size;
+ params->polled = polled;
+
+ request_channels(info, DMA_MEMCPY);
+ request_channels(info, DMA_MEMSET);
+ request_channels(info, DMA_XOR);
+ request_channels(info, DMA_PQ);
+}
+
+static void run_pending_tests(struct dmatest_info *info)
+{
+ struct dmatest_chan *dtc;
+ unsigned int thread_count = 0;
+
+ list_for_each_entry(dtc, &info->channels, node) {
+ struct dmatest_thread *thread;
+
+ thread_count = 0;
+ list_for_each_entry(thread, &dtc->threads, node) {
+ wake_up_process(thread->task);
+ thread_count++;
+ }
+ pr_info("Started %u threads using %s\n",
+ thread_count, dma_chan_name(dtc->chan));
+ }
+}
+
+static void stop_threaded_test(struct dmatest_info *info)
+{
+ struct dmatest_chan *dtc, *_dtc;
+ struct dma_chan *chan;
+
+ list_for_each_entry_safe(dtc, _dtc, &info->channels, node) {
+ list_del(&dtc->node);
+ chan = dtc->chan;
+ dmatest_cleanup_channel(dtc);
+ pr_debug("dropped channel %s\n", dma_chan_name(chan));
+ dma_release_channel(chan);
+ }
+
+ info->nr_channels = 0;
+}
+
+static void start_threaded_tests(struct dmatest_info *info)
+{
+ /* we might be called early to set run=, defer running until all
+ * parameters have been evaluated
+ */
+ if (!info->did_init)
+ return;
+
+ run_pending_tests(info);
+}
+
+static int dmatest_run_get(char *val, const struct kernel_param *kp)
+{
+ struct dmatest_info *info = &test_info;
+
+ mutex_lock(&info->lock);
+ if (is_threaded_test_run(info)) {
+ dmatest_run = true;
+ } else {
+ if (!is_threaded_test_pending(info))
+ stop_threaded_test(info);
+ dmatest_run = false;
+ }
+ mutex_unlock(&info->lock);
+
+ return param_get_bool(val, kp);
+}
+
+static int dmatest_run_set(const char *val, const struct kernel_param *kp)
+{
+ struct dmatest_info *info = &test_info;
+ int ret;
+
+ mutex_lock(&info->lock);
+ ret = param_set_bool(val, kp);
+ if (ret) {
+ mutex_unlock(&info->lock);
+ return ret;
+ } else if (dmatest_run) {
+ if (!is_threaded_test_pending(info)) {
+ /*
+ * We have nothing to run. This can be due to:
+ */
+ ret = info->last_error;
+ if (ret) {
+ /* 1) Misconfiguration */
+ pr_err("Channel misconfigured, can't continue\n");
+ mutex_unlock(&info->lock);
+ return ret;
+ } else {
+ /* 2) We rely on defaults */
+ pr_info("No channels configured, continue with any\n");
+ if (!is_threaded_test_run(info))
+ stop_threaded_test(info);
+ add_threaded_test(info);
+ }
+ }
+ start_threaded_tests(info);
+ } else {
+ stop_threaded_test(info);
+ }
+
+ mutex_unlock(&info->lock);
+
+ return ret;
+}
+
+static int dmatest_chan_set(const char *val, const struct kernel_param *kp)
+{
+ struct dmatest_info *info = &test_info;
+ struct dmatest_chan *dtc;
+ char chan_reset_val[20];
+ int ret;
+
+ mutex_lock(&info->lock);
+ ret = param_set_copystring(val, kp);
+ if (ret) {
+ mutex_unlock(&info->lock);
+ return ret;
+ }
+ /*Clear any previously run threads */
+ if (!is_threaded_test_run(info) && !is_threaded_test_pending(info))
+ stop_threaded_test(info);
+ /* Reject channels that are already registered */
+ if (is_threaded_test_pending(info)) {
+ list_for_each_entry(dtc, &info->channels, node) {
+ if (strcmp(dma_chan_name(dtc->chan),
+ strim(test_channel)) == 0) {
+ dtc = list_last_entry(&info->channels,
+ struct dmatest_chan,
+ node);
+ strscpy(chan_reset_val,
+ dma_chan_name(dtc->chan),
+ sizeof(chan_reset_val));
+ ret = -EBUSY;
+ goto add_chan_err;
+ }
+ }
+ }
+
+ add_threaded_test(info);
+
+ /* Check if channel was added successfully */
+ if (!list_empty(&info->channels)) {
+ /*
+ * if new channel was not successfully added, revert the
+ * "test_channel" string to the name of the last successfully
+ * added channel. exception for when users issues empty string
+ * to channel parameter.
+ */
+ dtc = list_last_entry(&info->channels, struct dmatest_chan, node);
+ if ((strcmp(dma_chan_name(dtc->chan), strim(test_channel)) != 0)
+ && (strcmp("", strim(test_channel)) != 0)) {
+ ret = -EINVAL;
+ strscpy(chan_reset_val, dma_chan_name(dtc->chan),
+ sizeof(chan_reset_val));
+ goto add_chan_err;
+ }
+
+ } else {
+ /* Clear test_channel if no channels were added successfully */
+ strscpy(chan_reset_val, "", sizeof(chan_reset_val));
+ ret = -EBUSY;
+ goto add_chan_err;
+ }
+
+ info->last_error = ret;
+ mutex_unlock(&info->lock);
+
+ return ret;
+
+add_chan_err:
+ param_set_copystring(chan_reset_val, kp);
+ info->last_error = ret;
+ mutex_unlock(&info->lock);
+
+ return ret;
+}
+
+static int dmatest_chan_get(char *val, const struct kernel_param *kp)
+{
+ struct dmatest_info *info = &test_info;
+
+ mutex_lock(&info->lock);
+ if (!is_threaded_test_run(info) && !is_threaded_test_pending(info)) {
+ stop_threaded_test(info);
+ strscpy(test_channel, "", sizeof(test_channel));
+ }
+ mutex_unlock(&info->lock);
+
+ return param_get_string(val, kp);
+}
+
+static int dmatest_test_list_get(char *val, const struct kernel_param *kp)
+{
+ struct dmatest_info *info = &test_info;
+ struct dmatest_chan *dtc;
+ unsigned int thread_count = 0;
+
+ list_for_each_entry(dtc, &info->channels, node) {
+ struct dmatest_thread *thread;
+
+ thread_count = 0;
+ list_for_each_entry(thread, &dtc->threads, node) {
+ thread_count++;
+ }
+ pr_info("%u threads using %s\n",
+ thread_count, dma_chan_name(dtc->chan));
+ }
+
+ return 0;
+}
+
+static int __init dmatest_init(void)
+{
+ struct dmatest_info *info = &test_info;
+ struct dmatest_params *params = &info->params;
+
+ if (dmatest_run) {
+ mutex_lock(&info->lock);
+ add_threaded_test(info);
+ run_pending_tests(info);
+ mutex_unlock(&info->lock);
+ }
+
+ if (params->iterations && wait)
+ wait_event(thread_wait, !is_threaded_test_run(info));
+
+ /* module parameters are stable, inittime tests are started,
+ * let userspace take over 'run' control
+ */
+ info->did_init = true;
+
+ return 0;
+}
+/* when compiled-in wait for drivers to load first */
+late_initcall(dmatest_init);
+
+static void __exit dmatest_exit(void)
+{
+ struct dmatest_info *info = &test_info;
+
+ mutex_lock(&info->lock);
+ stop_threaded_test(info);
+ mutex_unlock(&info->lock);
+}
+module_exit(dmatest_exit);
+
+MODULE_AUTHOR("Haavard Skinnemoen (Atmel)");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/dw-axi-dmac/Makefile b/drivers/dma/dw-axi-dmac/Makefile
new file mode 100644
index 000000000..4eb2f1639
--- /dev/null
+++ b/drivers/dma/dw-axi-dmac/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_DW_AXI_DMAC) += dw-axi-dmac-platform.o
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
new file mode 100644
index 000000000..152c5d985
--- /dev/null
+++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
@@ -0,0 +1,1578 @@
+// SPDX-License-Identifier: GPL-2.0
+// (C) 2017-2018 Synopsys, Inc. (www.synopsys.com)
+
+/*
+ * Synopsys DesignWare AXI DMA Controller driver.
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "dw-axi-dmac.h"
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+/*
+ * The set of bus widths supported by the DMA controller. DW AXI DMAC supports
+ * master data bus width up to 512 bits (for both AXI master interfaces), but
+ * it depends on IP block configuration.
+ */
+#define AXI_DMA_BUSWIDTHS \
+ (DMA_SLAVE_BUSWIDTH_1_BYTE | \
+ DMA_SLAVE_BUSWIDTH_2_BYTES | \
+ DMA_SLAVE_BUSWIDTH_4_BYTES | \
+ DMA_SLAVE_BUSWIDTH_8_BYTES | \
+ DMA_SLAVE_BUSWIDTH_16_BYTES | \
+ DMA_SLAVE_BUSWIDTH_32_BYTES | \
+ DMA_SLAVE_BUSWIDTH_64_BYTES)
+
+static inline void
+axi_dma_iowrite32(struct axi_dma_chip *chip, u32 reg, u32 val)
+{
+ iowrite32(val, chip->regs + reg);
+}
+
+static inline u32 axi_dma_ioread32(struct axi_dma_chip *chip, u32 reg)
+{
+ return ioread32(chip->regs + reg);
+}
+
+static inline void
+axi_chan_iowrite32(struct axi_dma_chan *chan, u32 reg, u32 val)
+{
+ iowrite32(val, chan->chan_regs + reg);
+}
+
+static inline u32 axi_chan_ioread32(struct axi_dma_chan *chan, u32 reg)
+{
+ return ioread32(chan->chan_regs + reg);
+}
+
+static inline void
+axi_chan_iowrite64(struct axi_dma_chan *chan, u32 reg, u64 val)
+{
+ /*
+ * We split one 64 bit write for two 32 bit write as some HW doesn't
+ * support 64 bit access.
+ */
+ iowrite32(lower_32_bits(val), chan->chan_regs + reg);
+ iowrite32(upper_32_bits(val), chan->chan_regs + reg + 4);
+}
+
+static inline void axi_chan_config_write(struct axi_dma_chan *chan,
+ struct axi_dma_chan_config *config)
+{
+ u32 cfg_lo, cfg_hi;
+
+ cfg_lo = (config->dst_multblk_type << CH_CFG_L_DST_MULTBLK_TYPE_POS |
+ config->src_multblk_type << CH_CFG_L_SRC_MULTBLK_TYPE_POS);
+ if (chan->chip->dw->hdata->reg_map_8_channels) {
+ cfg_hi = config->tt_fc << CH_CFG_H_TT_FC_POS |
+ config->hs_sel_src << CH_CFG_H_HS_SEL_SRC_POS |
+ config->hs_sel_dst << CH_CFG_H_HS_SEL_DST_POS |
+ config->src_per << CH_CFG_H_SRC_PER_POS |
+ config->dst_per << CH_CFG_H_DST_PER_POS |
+ config->prior << CH_CFG_H_PRIORITY_POS;
+ } else {
+ cfg_lo |= config->src_per << CH_CFG2_L_SRC_PER_POS |
+ config->dst_per << CH_CFG2_L_DST_PER_POS;
+ cfg_hi = config->tt_fc << CH_CFG2_H_TT_FC_POS |
+ config->hs_sel_src << CH_CFG2_H_HS_SEL_SRC_POS |
+ config->hs_sel_dst << CH_CFG2_H_HS_SEL_DST_POS |
+ config->prior << CH_CFG2_H_PRIORITY_POS;
+ }
+ axi_chan_iowrite32(chan, CH_CFG_L, cfg_lo);
+ axi_chan_iowrite32(chan, CH_CFG_H, cfg_hi);
+}
+
+static inline void axi_dma_disable(struct axi_dma_chip *chip)
+{
+ u32 val;
+
+ val = axi_dma_ioread32(chip, DMAC_CFG);
+ val &= ~DMAC_EN_MASK;
+ axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_dma_enable(struct axi_dma_chip *chip)
+{
+ u32 val;
+
+ val = axi_dma_ioread32(chip, DMAC_CFG);
+ val |= DMAC_EN_MASK;
+ axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_dma_irq_disable(struct axi_dma_chip *chip)
+{
+ u32 val;
+
+ val = axi_dma_ioread32(chip, DMAC_CFG);
+ val &= ~INT_EN_MASK;
+ axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_dma_irq_enable(struct axi_dma_chip *chip)
+{
+ u32 val;
+
+ val = axi_dma_ioread32(chip, DMAC_CFG);
+ val |= INT_EN_MASK;
+ axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_chan_irq_disable(struct axi_dma_chan *chan, u32 irq_mask)
+{
+ u32 val;
+
+ if (likely(irq_mask == DWAXIDMAC_IRQ_ALL)) {
+ axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, DWAXIDMAC_IRQ_NONE);
+ } else {
+ val = axi_chan_ioread32(chan, CH_INTSTATUS_ENA);
+ val &= ~irq_mask;
+ axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, val);
+ }
+}
+
+static inline void axi_chan_irq_set(struct axi_dma_chan *chan, u32 irq_mask)
+{
+ axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, irq_mask);
+}
+
+static inline void axi_chan_irq_sig_set(struct axi_dma_chan *chan, u32 irq_mask)
+{
+ axi_chan_iowrite32(chan, CH_INTSIGNAL_ENA, irq_mask);
+}
+
+static inline void axi_chan_irq_clear(struct axi_dma_chan *chan, u32 irq_mask)
+{
+ axi_chan_iowrite32(chan, CH_INTCLEAR, irq_mask);
+}
+
+static inline u32 axi_chan_irq_read(struct axi_dma_chan *chan)
+{
+ return axi_chan_ioread32(chan, CH_INTSTATUS);
+}
+
+static inline void axi_chan_disable(struct axi_dma_chan *chan)
+{
+ u32 val;
+
+ val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+ val &= ~(BIT(chan->id) << DMAC_CHAN_EN_SHIFT);
+ if (chan->chip->dw->hdata->reg_map_8_channels)
+ val |= BIT(chan->id) << DMAC_CHAN_EN_WE_SHIFT;
+ else
+ val |= BIT(chan->id) << DMAC_CHAN_EN2_WE_SHIFT;
+ axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+}
+
+static inline void axi_chan_enable(struct axi_dma_chan *chan)
+{
+ u32 val;
+
+ val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+ if (chan->chip->dw->hdata->reg_map_8_channels)
+ val |= BIT(chan->id) << DMAC_CHAN_EN_SHIFT |
+ BIT(chan->id) << DMAC_CHAN_EN_WE_SHIFT;
+ else
+ val |= BIT(chan->id) << DMAC_CHAN_EN_SHIFT |
+ BIT(chan->id) << DMAC_CHAN_EN2_WE_SHIFT;
+ axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+}
+
+static inline bool axi_chan_is_hw_enable(struct axi_dma_chan *chan)
+{
+ u32 val;
+
+ val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+
+ return !!(val & (BIT(chan->id) << DMAC_CHAN_EN_SHIFT));
+}
+
+static void axi_dma_hw_init(struct axi_dma_chip *chip)
+{
+ int ret;
+ u32 i;
+
+ for (i = 0; i < chip->dw->hdata->nr_channels; i++) {
+ axi_chan_irq_disable(&chip->dw->chan[i], DWAXIDMAC_IRQ_ALL);
+ axi_chan_disable(&chip->dw->chan[i]);
+ }
+ ret = dma_set_mask_and_coherent(chip->dev, DMA_BIT_MASK(64));
+ if (ret)
+ dev_warn(chip->dev, "Unable to set coherent mask\n");
+}
+
+static u32 axi_chan_get_xfer_width(struct axi_dma_chan *chan, dma_addr_t src,
+ dma_addr_t dst, size_t len)
+{
+ u32 max_width = chan->chip->dw->hdata->m_data_width;
+
+ return __ffs(src | dst | len | BIT(max_width));
+}
+
+static inline const char *axi_chan_name(struct axi_dma_chan *chan)
+{
+ return dma_chan_name(&chan->vc.chan);
+}
+
+static struct axi_dma_desc *axi_desc_alloc(u32 num)
+{
+ struct axi_dma_desc *desc;
+
+ desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ desc->hw_desc = kcalloc(num, sizeof(*desc->hw_desc), GFP_NOWAIT);
+ if (!desc->hw_desc) {
+ kfree(desc);
+ return NULL;
+ }
+
+ return desc;
+}
+
+static struct axi_dma_lli *axi_desc_get(struct axi_dma_chan *chan,
+ dma_addr_t *addr)
+{
+ struct axi_dma_lli *lli;
+ dma_addr_t phys;
+
+ lli = dma_pool_zalloc(chan->desc_pool, GFP_NOWAIT, &phys);
+ if (unlikely(!lli)) {
+ dev_err(chan2dev(chan), "%s: not enough descriptors available\n",
+ axi_chan_name(chan));
+ return NULL;
+ }
+
+ atomic_inc(&chan->descs_allocated);
+ *addr = phys;
+
+ return lli;
+}
+
+static void axi_desc_put(struct axi_dma_desc *desc)
+{
+ struct axi_dma_chan *chan = desc->chan;
+ int count = atomic_read(&chan->descs_allocated);
+ struct axi_dma_hw_desc *hw_desc;
+ int descs_put;
+
+ for (descs_put = 0; descs_put < count; descs_put++) {
+ hw_desc = &desc->hw_desc[descs_put];
+ dma_pool_free(chan->desc_pool, hw_desc->lli, hw_desc->llp);
+ }
+
+ kfree(desc->hw_desc);
+ kfree(desc);
+ atomic_sub(descs_put, &chan->descs_allocated);
+ dev_vdbg(chan2dev(chan), "%s: %d descs put, %d still allocated\n",
+ axi_chan_name(chan), descs_put,
+ atomic_read(&chan->descs_allocated));
+}
+
+static void vchan_desc_put(struct virt_dma_desc *vdesc)
+{
+ axi_desc_put(vd_to_axi_desc(vdesc));
+}
+
+static enum dma_status
+dma_chan_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+ struct virt_dma_desc *vdesc;
+ enum dma_status status;
+ u32 completed_length;
+ unsigned long flags;
+ u32 completed_blocks;
+ size_t bytes = 0;
+ u32 length;
+ u32 len;
+
+ status = dma_cookie_status(dchan, cookie, txstate);
+ if (status == DMA_COMPLETE || !txstate)
+ return status;
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+
+ vdesc = vchan_find_desc(&chan->vc, cookie);
+ if (vdesc) {
+ length = vd_to_axi_desc(vdesc)->length;
+ completed_blocks = vd_to_axi_desc(vdesc)->completed_blocks;
+ len = vd_to_axi_desc(vdesc)->hw_desc[0].len;
+ completed_length = completed_blocks * len;
+ bytes = length - completed_length;
+ }
+
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+ dma_set_residue(txstate, bytes);
+
+ return status;
+}
+
+static void write_desc_llp(struct axi_dma_hw_desc *desc, dma_addr_t adr)
+{
+ desc->lli->llp = cpu_to_le64(adr);
+}
+
+static void write_chan_llp(struct axi_dma_chan *chan, dma_addr_t adr)
+{
+ axi_chan_iowrite64(chan, CH_LLP, adr);
+}
+
+static void dw_axi_dma_set_byte_halfword(struct axi_dma_chan *chan, bool set)
+{
+ u32 offset = DMAC_APB_BYTE_WR_CH_EN;
+ u32 reg_width, val;
+
+ if (!chan->chip->apb_regs) {
+ dev_dbg(chan->chip->dev, "apb_regs not initialized\n");
+ return;
+ }
+
+ reg_width = __ffs(chan->config.dst_addr_width);
+ if (reg_width == DWAXIDMAC_TRANS_WIDTH_16)
+ offset = DMAC_APB_HALFWORD_WR_CH_EN;
+
+ val = ioread32(chan->chip->apb_regs + offset);
+
+ if (set)
+ val |= BIT(chan->id);
+ else
+ val &= ~BIT(chan->id);
+
+ iowrite32(val, chan->chip->apb_regs + offset);
+}
+/* Called in chan locked context */
+static void axi_chan_block_xfer_start(struct axi_dma_chan *chan,
+ struct axi_dma_desc *first)
+{
+ u32 priority = chan->chip->dw->hdata->priority[chan->id];
+ struct axi_dma_chan_config config = {};
+ u32 irq_mask;
+ u8 lms = 0; /* Select AXI0 master for LLI fetching */
+
+ if (unlikely(axi_chan_is_hw_enable(chan))) {
+ dev_err(chan2dev(chan), "%s is non-idle!\n",
+ axi_chan_name(chan));
+
+ return;
+ }
+
+ axi_dma_enable(chan->chip);
+
+ config.dst_multblk_type = DWAXIDMAC_MBLK_TYPE_LL;
+ config.src_multblk_type = DWAXIDMAC_MBLK_TYPE_LL;
+ config.tt_fc = DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC;
+ config.prior = priority;
+ config.hs_sel_dst = DWAXIDMAC_HS_SEL_HW;
+ config.hs_sel_src = DWAXIDMAC_HS_SEL_HW;
+ switch (chan->direction) {
+ case DMA_MEM_TO_DEV:
+ dw_axi_dma_set_byte_halfword(chan, true);
+ config.tt_fc = chan->config.device_fc ?
+ DWAXIDMAC_TT_FC_MEM_TO_PER_DST :
+ DWAXIDMAC_TT_FC_MEM_TO_PER_DMAC;
+ if (chan->chip->apb_regs)
+ config.dst_per = chan->id;
+ else
+ config.dst_per = chan->hw_handshake_num;
+ break;
+ case DMA_DEV_TO_MEM:
+ config.tt_fc = chan->config.device_fc ?
+ DWAXIDMAC_TT_FC_PER_TO_MEM_SRC :
+ DWAXIDMAC_TT_FC_PER_TO_MEM_DMAC;
+ if (chan->chip->apb_regs)
+ config.src_per = chan->id;
+ else
+ config.src_per = chan->hw_handshake_num;
+ break;
+ default:
+ break;
+ }
+ axi_chan_config_write(chan, &config);
+
+ write_chan_llp(chan, first->hw_desc[0].llp | lms);
+
+ irq_mask = DWAXIDMAC_IRQ_DMA_TRF | DWAXIDMAC_IRQ_ALL_ERR;
+ axi_chan_irq_sig_set(chan, irq_mask);
+
+ /* Generate 'suspend' status but don't generate interrupt */
+ irq_mask |= DWAXIDMAC_IRQ_SUSPENDED;
+ axi_chan_irq_set(chan, irq_mask);
+
+ axi_chan_enable(chan);
+}
+
+static void axi_chan_start_first_queued(struct axi_dma_chan *chan)
+{
+ struct axi_dma_desc *desc;
+ struct virt_dma_desc *vd;
+
+ vd = vchan_next_desc(&chan->vc);
+ if (!vd)
+ return;
+
+ desc = vd_to_axi_desc(vd);
+ dev_vdbg(chan2dev(chan), "%s: started %u\n", axi_chan_name(chan),
+ vd->tx.cookie);
+ axi_chan_block_xfer_start(chan, desc);
+}
+
+static void dma_chan_issue_pending(struct dma_chan *dchan)
+{
+ struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+ if (vchan_issue_pending(&chan->vc))
+ axi_chan_start_first_queued(chan);
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static void dw_axi_dma_synchronize(struct dma_chan *dchan)
+{
+ struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+
+ vchan_synchronize(&chan->vc);
+}
+
+static int dma_chan_alloc_chan_resources(struct dma_chan *dchan)
+{
+ struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+
+ /* ASSERT: channel is idle */
+ if (axi_chan_is_hw_enable(chan)) {
+ dev_err(chan2dev(chan), "%s is non-idle!\n",
+ axi_chan_name(chan));
+ return -EBUSY;
+ }
+
+ /* LLI address must be aligned to a 64-byte boundary */
+ chan->desc_pool = dma_pool_create(dev_name(chan2dev(chan)),
+ chan->chip->dev,
+ sizeof(struct axi_dma_lli),
+ 64, 0);
+ if (!chan->desc_pool) {
+ dev_err(chan2dev(chan), "No memory for descriptors\n");
+ return -ENOMEM;
+ }
+ dev_vdbg(dchan2dev(dchan), "%s: allocating\n", axi_chan_name(chan));
+
+ pm_runtime_get(chan->chip->dev);
+
+ return 0;
+}
+
+static void dma_chan_free_chan_resources(struct dma_chan *dchan)
+{
+ struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+
+ /* ASSERT: channel is idle */
+ if (axi_chan_is_hw_enable(chan))
+ dev_err(dchan2dev(dchan), "%s is non-idle!\n",
+ axi_chan_name(chan));
+
+ axi_chan_disable(chan);
+ axi_chan_irq_disable(chan, DWAXIDMAC_IRQ_ALL);
+
+ vchan_free_chan_resources(&chan->vc);
+
+ dma_pool_destroy(chan->desc_pool);
+ chan->desc_pool = NULL;
+ dev_vdbg(dchan2dev(dchan),
+ "%s: free resources, descriptor still allocated: %u\n",
+ axi_chan_name(chan), atomic_read(&chan->descs_allocated));
+
+ pm_runtime_put(chan->chip->dev);
+}
+
+static void dw_axi_dma_set_hw_channel(struct axi_dma_chan *chan, bool set)
+{
+ struct axi_dma_chip *chip = chan->chip;
+ unsigned long reg_value, val;
+
+ if (!chip->apb_regs) {
+ dev_err(chip->dev, "apb_regs not initialized\n");
+ return;
+ }
+
+ /*
+ * An unused DMA channel has a default value of 0x3F.
+ * Lock the DMA channel by assign a handshake number to the channel.
+ * Unlock the DMA channel by assign 0x3F to the channel.
+ */
+ if (set)
+ val = chan->hw_handshake_num;
+ else
+ val = UNUSED_CHANNEL;
+
+ reg_value = lo_hi_readq(chip->apb_regs + DMAC_APB_HW_HS_SEL_0);
+
+ /* Channel is already allocated, set handshake as per channel ID */
+ /* 64 bit write should handle for 8 channels */
+
+ reg_value &= ~(DMA_APB_HS_SEL_MASK <<
+ (chan->id * DMA_APB_HS_SEL_BIT_SIZE));
+ reg_value |= (val << (chan->id * DMA_APB_HS_SEL_BIT_SIZE));
+ lo_hi_writeq(reg_value, chip->apb_regs + DMAC_APB_HW_HS_SEL_0);
+
+ return;
+}
+
+/*
+ * If DW_axi_dmac sees CHx_CTL.ShadowReg_Or_LLI_Last bit of the fetched LLI
+ * as 1, it understands that the current block is the final block in the
+ * transfer and completes the DMA transfer operation at the end of current
+ * block transfer.
+ */
+static void set_desc_last(struct axi_dma_hw_desc *desc)
+{
+ u32 val;
+
+ val = le32_to_cpu(desc->lli->ctl_hi);
+ val |= CH_CTL_H_LLI_LAST;
+ desc->lli->ctl_hi = cpu_to_le32(val);
+}
+
+static void write_desc_sar(struct axi_dma_hw_desc *desc, dma_addr_t adr)
+{
+ desc->lli->sar = cpu_to_le64(adr);
+}
+
+static void write_desc_dar(struct axi_dma_hw_desc *desc, dma_addr_t adr)
+{
+ desc->lli->dar = cpu_to_le64(adr);
+}
+
+static void set_desc_src_master(struct axi_dma_hw_desc *desc)
+{
+ u32 val;
+
+ /* Select AXI0 for source master */
+ val = le32_to_cpu(desc->lli->ctl_lo);
+ val &= ~CH_CTL_L_SRC_MAST;
+ desc->lli->ctl_lo = cpu_to_le32(val);
+}
+
+static void set_desc_dest_master(struct axi_dma_hw_desc *hw_desc,
+ struct axi_dma_desc *desc)
+{
+ u32 val;
+
+ /* Select AXI1 for source master if available */
+ val = le32_to_cpu(hw_desc->lli->ctl_lo);
+ if (desc->chan->chip->dw->hdata->nr_masters > 1)
+ val |= CH_CTL_L_DST_MAST;
+ else
+ val &= ~CH_CTL_L_DST_MAST;
+
+ hw_desc->lli->ctl_lo = cpu_to_le32(val);
+}
+
+static int dw_axi_dma_set_hw_desc(struct axi_dma_chan *chan,
+ struct axi_dma_hw_desc *hw_desc,
+ dma_addr_t mem_addr, size_t len)
+{
+ unsigned int data_width = BIT(chan->chip->dw->hdata->m_data_width);
+ unsigned int reg_width;
+ unsigned int mem_width;
+ dma_addr_t device_addr;
+ size_t axi_block_ts;
+ size_t block_ts;
+ u32 ctllo, ctlhi;
+ u32 burst_len;
+
+ axi_block_ts = chan->chip->dw->hdata->block_size[chan->id];
+
+ mem_width = __ffs(data_width | mem_addr | len);
+ if (mem_width > DWAXIDMAC_TRANS_WIDTH_32)
+ mem_width = DWAXIDMAC_TRANS_WIDTH_32;
+
+ if (!IS_ALIGNED(mem_addr, 4)) {
+ dev_err(chan->chip->dev, "invalid buffer alignment\n");
+ return -EINVAL;
+ }
+
+ switch (chan->direction) {
+ case DMA_MEM_TO_DEV:
+ reg_width = __ffs(chan->config.dst_addr_width);
+ device_addr = chan->config.dst_addr;
+ ctllo = reg_width << CH_CTL_L_DST_WIDTH_POS |
+ mem_width << CH_CTL_L_SRC_WIDTH_POS |
+ DWAXIDMAC_CH_CTL_L_NOINC << CH_CTL_L_DST_INC_POS |
+ DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_SRC_INC_POS;
+ block_ts = len >> mem_width;
+ break;
+ case DMA_DEV_TO_MEM:
+ reg_width = __ffs(chan->config.src_addr_width);
+ device_addr = chan->config.src_addr;
+ ctllo = reg_width << CH_CTL_L_SRC_WIDTH_POS |
+ mem_width << CH_CTL_L_DST_WIDTH_POS |
+ DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_DST_INC_POS |
+ DWAXIDMAC_CH_CTL_L_NOINC << CH_CTL_L_SRC_INC_POS;
+ block_ts = len >> reg_width;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (block_ts > axi_block_ts)
+ return -EINVAL;
+
+ hw_desc->lli = axi_desc_get(chan, &hw_desc->llp);
+ if (unlikely(!hw_desc->lli))
+ return -ENOMEM;
+
+ ctlhi = CH_CTL_H_LLI_VALID;
+
+ if (chan->chip->dw->hdata->restrict_axi_burst_len) {
+ burst_len = chan->chip->dw->hdata->axi_rw_burst_len;
+ ctlhi |= CH_CTL_H_ARLEN_EN | CH_CTL_H_AWLEN_EN |
+ burst_len << CH_CTL_H_ARLEN_POS |
+ burst_len << CH_CTL_H_AWLEN_POS;
+ }
+
+ hw_desc->lli->ctl_hi = cpu_to_le32(ctlhi);
+
+ if (chan->direction == DMA_MEM_TO_DEV) {
+ write_desc_sar(hw_desc, mem_addr);
+ write_desc_dar(hw_desc, device_addr);
+ } else {
+ write_desc_sar(hw_desc, device_addr);
+ write_desc_dar(hw_desc, mem_addr);
+ }
+
+ hw_desc->lli->block_ts_lo = cpu_to_le32(block_ts - 1);
+
+ ctllo |= DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS |
+ DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS;
+ hw_desc->lli->ctl_lo = cpu_to_le32(ctllo);
+
+ set_desc_src_master(hw_desc);
+
+ hw_desc->len = len;
+ return 0;
+}
+
+static size_t calculate_block_len(struct axi_dma_chan *chan,
+ dma_addr_t dma_addr, size_t buf_len,
+ enum dma_transfer_direction direction)
+{
+ u32 data_width, reg_width, mem_width;
+ size_t axi_block_ts, block_len;
+
+ axi_block_ts = chan->chip->dw->hdata->block_size[chan->id];
+
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+ data_width = BIT(chan->chip->dw->hdata->m_data_width);
+ mem_width = __ffs(data_width | dma_addr | buf_len);
+ if (mem_width > DWAXIDMAC_TRANS_WIDTH_32)
+ mem_width = DWAXIDMAC_TRANS_WIDTH_32;
+
+ block_len = axi_block_ts << mem_width;
+ break;
+ case DMA_DEV_TO_MEM:
+ reg_width = __ffs(chan->config.src_addr_width);
+ block_len = axi_block_ts << reg_width;
+ break;
+ default:
+ block_len = 0;
+ }
+
+ return block_len;
+}
+
+static struct dma_async_tx_descriptor *
+dw_axi_dma_chan_prep_cyclic(struct dma_chan *dchan, dma_addr_t dma_addr,
+ size_t buf_len, size_t period_len,
+ enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+ struct axi_dma_hw_desc *hw_desc = NULL;
+ struct axi_dma_desc *desc = NULL;
+ dma_addr_t src_addr = dma_addr;
+ u32 num_periods, num_segments;
+ size_t axi_block_len;
+ u32 total_segments;
+ u32 segment_len;
+ unsigned int i;
+ int status;
+ u64 llp = 0;
+ u8 lms = 0; /* Select AXI0 master for LLI fetching */
+
+ num_periods = buf_len / period_len;
+
+ axi_block_len = calculate_block_len(chan, dma_addr, buf_len, direction);
+ if (axi_block_len == 0)
+ return NULL;
+
+ num_segments = DIV_ROUND_UP(period_len, axi_block_len);
+ segment_len = DIV_ROUND_UP(period_len, num_segments);
+
+ total_segments = num_periods * num_segments;
+
+ desc = axi_desc_alloc(total_segments);
+ if (unlikely(!desc))
+ goto err_desc_get;
+
+ chan->direction = direction;
+ desc->chan = chan;
+ chan->cyclic = true;
+ desc->length = 0;
+ desc->period_len = period_len;
+
+ for (i = 0; i < total_segments; i++) {
+ hw_desc = &desc->hw_desc[i];
+
+ status = dw_axi_dma_set_hw_desc(chan, hw_desc, src_addr,
+ segment_len);
+ if (status < 0)
+ goto err_desc_get;
+
+ desc->length += hw_desc->len;
+ /* Set end-of-link to the linked descriptor, so that cyclic
+ * callback function can be triggered during interrupt.
+ */
+ set_desc_last(hw_desc);
+
+ src_addr += segment_len;
+ }
+
+ llp = desc->hw_desc[0].llp;
+
+ /* Managed transfer list */
+ do {
+ hw_desc = &desc->hw_desc[--total_segments];
+ write_desc_llp(hw_desc, llp | lms);
+ llp = hw_desc->llp;
+ } while (total_segments);
+
+ dw_axi_dma_set_hw_channel(chan, true);
+
+ return vchan_tx_prep(&chan->vc, &desc->vd, flags);
+
+err_desc_get:
+ if (desc)
+ axi_desc_put(desc);
+
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+dw_axi_dma_chan_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+ unsigned int sg_len,
+ enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+ struct axi_dma_hw_desc *hw_desc = NULL;
+ struct axi_dma_desc *desc = NULL;
+ u32 num_segments, segment_len;
+ unsigned int loop = 0;
+ struct scatterlist *sg;
+ size_t axi_block_len;
+ u32 len, num_sgs = 0;
+ unsigned int i;
+ dma_addr_t mem;
+ int status;
+ u64 llp = 0;
+ u8 lms = 0; /* Select AXI0 master for LLI fetching */
+
+ if (unlikely(!is_slave_direction(direction) || !sg_len))
+ return NULL;
+
+ mem = sg_dma_address(sgl);
+ len = sg_dma_len(sgl);
+
+ axi_block_len = calculate_block_len(chan, mem, len, direction);
+ if (axi_block_len == 0)
+ return NULL;
+
+ for_each_sg(sgl, sg, sg_len, i)
+ num_sgs += DIV_ROUND_UP(sg_dma_len(sg), axi_block_len);
+
+ desc = axi_desc_alloc(num_sgs);
+ if (unlikely(!desc))
+ goto err_desc_get;
+
+ desc->chan = chan;
+ desc->length = 0;
+ chan->direction = direction;
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ mem = sg_dma_address(sg);
+ len = sg_dma_len(sg);
+ num_segments = DIV_ROUND_UP(sg_dma_len(sg), axi_block_len);
+ segment_len = DIV_ROUND_UP(sg_dma_len(sg), num_segments);
+
+ do {
+ hw_desc = &desc->hw_desc[loop++];
+ status = dw_axi_dma_set_hw_desc(chan, hw_desc, mem, segment_len);
+ if (status < 0)
+ goto err_desc_get;
+
+ desc->length += hw_desc->len;
+ len -= segment_len;
+ mem += segment_len;
+ } while (len >= segment_len);
+ }
+
+ /* Set end-of-link to the last link descriptor of list */
+ set_desc_last(&desc->hw_desc[num_sgs - 1]);
+
+ /* Managed transfer list */
+ do {
+ hw_desc = &desc->hw_desc[--num_sgs];
+ write_desc_llp(hw_desc, llp | lms);
+ llp = hw_desc->llp;
+ } while (num_sgs);
+
+ dw_axi_dma_set_hw_channel(chan, true);
+
+ return vchan_tx_prep(&chan->vc, &desc->vd, flags);
+
+err_desc_get:
+ if (desc)
+ axi_desc_put(desc);
+
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+dma_chan_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst_adr,
+ dma_addr_t src_adr, size_t len, unsigned long flags)
+{
+ struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+ size_t block_ts, max_block_ts, xfer_len;
+ struct axi_dma_hw_desc *hw_desc = NULL;
+ struct axi_dma_desc *desc = NULL;
+ u32 xfer_width, reg, num;
+ u64 llp = 0;
+ u8 lms = 0; /* Select AXI0 master for LLI fetching */
+
+ dev_dbg(chan2dev(chan), "%s: memcpy: src: %pad dst: %pad length: %zd flags: %#lx",
+ axi_chan_name(chan), &src_adr, &dst_adr, len, flags);
+
+ max_block_ts = chan->chip->dw->hdata->block_size[chan->id];
+ xfer_width = axi_chan_get_xfer_width(chan, src_adr, dst_adr, len);
+ num = DIV_ROUND_UP(len, max_block_ts << xfer_width);
+ desc = axi_desc_alloc(num);
+ if (unlikely(!desc))
+ goto err_desc_get;
+
+ desc->chan = chan;
+ num = 0;
+ desc->length = 0;
+ while (len) {
+ xfer_len = len;
+
+ hw_desc = &desc->hw_desc[num];
+ /*
+ * Take care for the alignment.
+ * Actually source and destination widths can be different, but
+ * make them same to be simpler.
+ */
+ xfer_width = axi_chan_get_xfer_width(chan, src_adr, dst_adr, xfer_len);
+
+ /*
+ * block_ts indicates the total number of data of width
+ * to be transferred in a DMA block transfer.
+ * BLOCK_TS register should be set to block_ts - 1
+ */
+ block_ts = xfer_len >> xfer_width;
+ if (block_ts > max_block_ts) {
+ block_ts = max_block_ts;
+ xfer_len = max_block_ts << xfer_width;
+ }
+
+ hw_desc->lli = axi_desc_get(chan, &hw_desc->llp);
+ if (unlikely(!hw_desc->lli))
+ goto err_desc_get;
+
+ write_desc_sar(hw_desc, src_adr);
+ write_desc_dar(hw_desc, dst_adr);
+ hw_desc->lli->block_ts_lo = cpu_to_le32(block_ts - 1);
+
+ reg = CH_CTL_H_LLI_VALID;
+ if (chan->chip->dw->hdata->restrict_axi_burst_len) {
+ u32 burst_len = chan->chip->dw->hdata->axi_rw_burst_len;
+
+ reg |= (CH_CTL_H_ARLEN_EN |
+ burst_len << CH_CTL_H_ARLEN_POS |
+ CH_CTL_H_AWLEN_EN |
+ burst_len << CH_CTL_H_AWLEN_POS);
+ }
+ hw_desc->lli->ctl_hi = cpu_to_le32(reg);
+
+ reg = (DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS |
+ DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS |
+ xfer_width << CH_CTL_L_DST_WIDTH_POS |
+ xfer_width << CH_CTL_L_SRC_WIDTH_POS |
+ DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_DST_INC_POS |
+ DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_SRC_INC_POS);
+ hw_desc->lli->ctl_lo = cpu_to_le32(reg);
+
+ set_desc_src_master(hw_desc);
+ set_desc_dest_master(hw_desc, desc);
+
+ hw_desc->len = xfer_len;
+ desc->length += hw_desc->len;
+ /* update the length and addresses for the next loop cycle */
+ len -= xfer_len;
+ dst_adr += xfer_len;
+ src_adr += xfer_len;
+ num++;
+ }
+
+ /* Set end-of-link to the last link descriptor of list */
+ set_desc_last(&desc->hw_desc[num - 1]);
+ /* Managed transfer list */
+ do {
+ hw_desc = &desc->hw_desc[--num];
+ write_desc_llp(hw_desc, llp | lms);
+ llp = hw_desc->llp;
+ } while (num);
+
+ return vchan_tx_prep(&chan->vc, &desc->vd, flags);
+
+err_desc_get:
+ if (desc)
+ axi_desc_put(desc);
+ return NULL;
+}
+
+static int dw_axi_dma_chan_slave_config(struct dma_chan *dchan,
+ struct dma_slave_config *config)
+{
+ struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+
+ memcpy(&chan->config, config, sizeof(*config));
+
+ return 0;
+}
+
+static void axi_chan_dump_lli(struct axi_dma_chan *chan,
+ struct axi_dma_hw_desc *desc)
+{
+ if (!desc->lli) {
+ dev_err(dchan2dev(&chan->vc.chan), "NULL LLI\n");
+ return;
+ }
+
+ dev_err(dchan2dev(&chan->vc.chan),
+ "SAR: 0x%llx DAR: 0x%llx LLP: 0x%llx BTS 0x%x CTL: 0x%x:%08x",
+ le64_to_cpu(desc->lli->sar),
+ le64_to_cpu(desc->lli->dar),
+ le64_to_cpu(desc->lli->llp),
+ le32_to_cpu(desc->lli->block_ts_lo),
+ le32_to_cpu(desc->lli->ctl_hi),
+ le32_to_cpu(desc->lli->ctl_lo));
+}
+
+static void axi_chan_list_dump_lli(struct axi_dma_chan *chan,
+ struct axi_dma_desc *desc_head)
+{
+ int count = atomic_read(&chan->descs_allocated);
+ int i;
+
+ for (i = 0; i < count; i++)
+ axi_chan_dump_lli(chan, &desc_head->hw_desc[i]);
+}
+
+static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status)
+{
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+
+ axi_chan_disable(chan);
+
+ /* The bad descriptor currently is in the head of vc list */
+ vd = vchan_next_desc(&chan->vc);
+ if (!vd) {
+ dev_err(chan2dev(chan), "BUG: %s, IRQ with no descriptors\n",
+ axi_chan_name(chan));
+ goto out;
+ }
+ /* Remove the completed descriptor from issued list */
+ list_del(&vd->node);
+
+ /* WARN about bad descriptor */
+ dev_err(chan2dev(chan),
+ "Bad descriptor submitted for %s, cookie: %d, irq: 0x%08x\n",
+ axi_chan_name(chan), vd->tx.cookie, status);
+ axi_chan_list_dump_lli(chan, vd_to_axi_desc(vd));
+
+ vchan_cookie_complete(vd);
+
+ /* Try to restart the controller */
+ axi_chan_start_first_queued(chan);
+
+out:
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan)
+{
+ int count = atomic_read(&chan->descs_allocated);
+ struct axi_dma_hw_desc *hw_desc;
+ struct axi_dma_desc *desc;
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+ u64 llp;
+ int i;
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+ if (unlikely(axi_chan_is_hw_enable(chan))) {
+ dev_err(chan2dev(chan), "BUG: %s caught DWAXIDMAC_IRQ_DMA_TRF, but channel not idle!\n",
+ axi_chan_name(chan));
+ axi_chan_disable(chan);
+ }
+
+ /* The completed descriptor currently is in the head of vc list */
+ vd = vchan_next_desc(&chan->vc);
+ if (!vd) {
+ dev_err(chan2dev(chan), "BUG: %s, IRQ with no descriptors\n",
+ axi_chan_name(chan));
+ goto out;
+ }
+
+ if (chan->cyclic) {
+ desc = vd_to_axi_desc(vd);
+ if (desc) {
+ llp = lo_hi_readq(chan->chan_regs + CH_LLP);
+ for (i = 0; i < count; i++) {
+ hw_desc = &desc->hw_desc[i];
+ if (hw_desc->llp == llp) {
+ axi_chan_irq_clear(chan, hw_desc->lli->status_lo);
+ hw_desc->lli->ctl_hi |= CH_CTL_H_LLI_VALID;
+ desc->completed_blocks = i;
+
+ if (((hw_desc->len * (i + 1)) % desc->period_len) == 0)
+ vchan_cyclic_callback(vd);
+ break;
+ }
+ }
+
+ axi_chan_enable(chan);
+ }
+ } else {
+ /* Remove the completed descriptor from issued list before completing */
+ list_del(&vd->node);
+ vchan_cookie_complete(vd);
+
+ /* Submit queued descriptors after processing the completed ones */
+ axi_chan_start_first_queued(chan);
+ }
+
+out:
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static irqreturn_t dw_axi_dma_interrupt(int irq, void *dev_id)
+{
+ struct axi_dma_chip *chip = dev_id;
+ struct dw_axi_dma *dw = chip->dw;
+ struct axi_dma_chan *chan;
+
+ u32 status, i;
+
+ /* Disable DMAC interrupts. We'll enable them after processing channels */
+ axi_dma_irq_disable(chip);
+
+ /* Poll, clear and process every channel interrupt status */
+ for (i = 0; i < dw->hdata->nr_channels; i++) {
+ chan = &dw->chan[i];
+ status = axi_chan_irq_read(chan);
+ axi_chan_irq_clear(chan, status);
+
+ dev_vdbg(chip->dev, "%s %u IRQ status: 0x%08x\n",
+ axi_chan_name(chan), i, status);
+
+ if (status & DWAXIDMAC_IRQ_ALL_ERR)
+ axi_chan_handle_err(chan, status);
+ else if (status & DWAXIDMAC_IRQ_DMA_TRF)
+ axi_chan_block_xfer_complete(chan);
+ }
+
+ /* Re-enable interrupts */
+ axi_dma_irq_enable(chip);
+
+ return IRQ_HANDLED;
+}
+
+static int dma_chan_terminate_all(struct dma_chan *dchan)
+{
+ struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+ u32 chan_active = BIT(chan->id) << DMAC_CHAN_EN_SHIFT;
+ unsigned long flags;
+ u32 val;
+ int ret;
+ LIST_HEAD(head);
+
+ axi_chan_disable(chan);
+
+ ret = readl_poll_timeout_atomic(chan->chip->regs + DMAC_CHEN, val,
+ !(val & chan_active), 1000, 10000);
+ if (ret == -ETIMEDOUT)
+ dev_warn(dchan2dev(dchan),
+ "%s failed to stop\n", axi_chan_name(chan));
+
+ if (chan->direction != DMA_MEM_TO_MEM)
+ dw_axi_dma_set_hw_channel(chan, false);
+ if (chan->direction == DMA_MEM_TO_DEV)
+ dw_axi_dma_set_byte_halfword(chan, false);
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+
+ vchan_get_all_descriptors(&chan->vc, &head);
+
+ chan->cyclic = false;
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+ vchan_dma_desc_free_list(&chan->vc, &head);
+
+ dev_vdbg(dchan2dev(dchan), "terminated: %s\n", axi_chan_name(chan));
+
+ return 0;
+}
+
+static int dma_chan_pause(struct dma_chan *dchan)
+{
+ struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+ unsigned long flags;
+ unsigned int timeout = 20; /* timeout iterations */
+ u32 val;
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+
+ if (chan->chip->dw->hdata->reg_map_8_channels) {
+ val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+ val |= BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT |
+ BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT;
+ axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+ } else {
+ val = axi_dma_ioread32(chan->chip, DMAC_CHSUSPREG);
+ val |= BIT(chan->id) << DMAC_CHAN_SUSP2_SHIFT |
+ BIT(chan->id) << DMAC_CHAN_SUSP2_WE_SHIFT;
+ axi_dma_iowrite32(chan->chip, DMAC_CHSUSPREG, val);
+ }
+
+ do {
+ if (axi_chan_irq_read(chan) & DWAXIDMAC_IRQ_SUSPENDED)
+ break;
+
+ udelay(2);
+ } while (--timeout);
+
+ axi_chan_irq_clear(chan, DWAXIDMAC_IRQ_SUSPENDED);
+
+ chan->is_paused = true;
+
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+ return timeout ? 0 : -EAGAIN;
+}
+
+/* Called in chan locked context */
+static inline void axi_chan_resume(struct axi_dma_chan *chan)
+{
+ u32 val;
+
+ if (chan->chip->dw->hdata->reg_map_8_channels) {
+ val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+ val &= ~(BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT);
+ val |= (BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT);
+ axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+ } else {
+ val = axi_dma_ioread32(chan->chip, DMAC_CHSUSPREG);
+ val &= ~(BIT(chan->id) << DMAC_CHAN_SUSP2_SHIFT);
+ val |= (BIT(chan->id) << DMAC_CHAN_SUSP2_WE_SHIFT);
+ axi_dma_iowrite32(chan->chip, DMAC_CHSUSPREG, val);
+ }
+
+ chan->is_paused = false;
+}
+
+static int dma_chan_resume(struct dma_chan *dchan)
+{
+ struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+
+ if (chan->is_paused)
+ axi_chan_resume(chan);
+
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+ return 0;
+}
+
+static int axi_dma_suspend(struct axi_dma_chip *chip)
+{
+ axi_dma_irq_disable(chip);
+ axi_dma_disable(chip);
+
+ clk_disable_unprepare(chip->core_clk);
+ clk_disable_unprepare(chip->cfgr_clk);
+
+ return 0;
+}
+
+static int axi_dma_resume(struct axi_dma_chip *chip)
+{
+ int ret;
+
+ ret = clk_prepare_enable(chip->cfgr_clk);
+ if (ret < 0)
+ return ret;
+
+ ret = clk_prepare_enable(chip->core_clk);
+ if (ret < 0)
+ return ret;
+
+ axi_dma_enable(chip);
+ axi_dma_irq_enable(chip);
+
+ return 0;
+}
+
+static int __maybe_unused axi_dma_runtime_suspend(struct device *dev)
+{
+ struct axi_dma_chip *chip = dev_get_drvdata(dev);
+
+ return axi_dma_suspend(chip);
+}
+
+static int __maybe_unused axi_dma_runtime_resume(struct device *dev)
+{
+ struct axi_dma_chip *chip = dev_get_drvdata(dev);
+
+ return axi_dma_resume(chip);
+}
+
+static struct dma_chan *dw_axi_dma_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct dw_axi_dma *dw = ofdma->of_dma_data;
+ struct axi_dma_chan *chan;
+ struct dma_chan *dchan;
+
+ dchan = dma_get_any_slave_channel(&dw->dma);
+ if (!dchan)
+ return NULL;
+
+ chan = dchan_to_axi_dma_chan(dchan);
+ chan->hw_handshake_num = dma_spec->args[0];
+ return dchan;
+}
+
+static int parse_device_properties(struct axi_dma_chip *chip)
+{
+ struct device *dev = chip->dev;
+ u32 tmp, carr[DMAC_MAX_CHANNELS];
+ int ret;
+
+ ret = device_property_read_u32(dev, "dma-channels", &tmp);
+ if (ret)
+ return ret;
+ if (tmp == 0 || tmp > DMAC_MAX_CHANNELS)
+ return -EINVAL;
+
+ chip->dw->hdata->nr_channels = tmp;
+ if (tmp <= DMA_REG_MAP_CH_REF)
+ chip->dw->hdata->reg_map_8_channels = true;
+
+ ret = device_property_read_u32(dev, "snps,dma-masters", &tmp);
+ if (ret)
+ return ret;
+ if (tmp == 0 || tmp > DMAC_MAX_MASTERS)
+ return -EINVAL;
+
+ chip->dw->hdata->nr_masters = tmp;
+
+ ret = device_property_read_u32(dev, "snps,data-width", &tmp);
+ if (ret)
+ return ret;
+ if (tmp > DWAXIDMAC_TRANS_WIDTH_MAX)
+ return -EINVAL;
+
+ chip->dw->hdata->m_data_width = tmp;
+
+ ret = device_property_read_u32_array(dev, "snps,block-size", carr,
+ chip->dw->hdata->nr_channels);
+ if (ret)
+ return ret;
+ for (tmp = 0; tmp < chip->dw->hdata->nr_channels; tmp++) {
+ if (carr[tmp] == 0 || carr[tmp] > DMAC_MAX_BLK_SIZE)
+ return -EINVAL;
+
+ chip->dw->hdata->block_size[tmp] = carr[tmp];
+ }
+
+ ret = device_property_read_u32_array(dev, "snps,priority", carr,
+ chip->dw->hdata->nr_channels);
+ if (ret)
+ return ret;
+ /* Priority value must be programmed within [0:nr_channels-1] range */
+ for (tmp = 0; tmp < chip->dw->hdata->nr_channels; tmp++) {
+ if (carr[tmp] >= chip->dw->hdata->nr_channels)
+ return -EINVAL;
+
+ chip->dw->hdata->priority[tmp] = carr[tmp];
+ }
+
+ /* axi-max-burst-len is optional property */
+ ret = device_property_read_u32(dev, "snps,axi-max-burst-len", &tmp);
+ if (!ret) {
+ if (tmp > DWAXIDMAC_ARWLEN_MAX + 1)
+ return -EINVAL;
+ if (tmp < DWAXIDMAC_ARWLEN_MIN + 1)
+ return -EINVAL;
+
+ chip->dw->hdata->restrict_axi_burst_len = true;
+ chip->dw->hdata->axi_rw_burst_len = tmp;
+ }
+
+ return 0;
+}
+
+static int dw_probe(struct platform_device *pdev)
+{
+ struct device_node *node = pdev->dev.of_node;
+ struct axi_dma_chip *chip;
+ struct resource *mem;
+ struct dw_axi_dma *dw;
+ struct dw_axi_dma_hcfg *hdata;
+ u32 i;
+ int ret;
+
+ chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ dw = devm_kzalloc(&pdev->dev, sizeof(*dw), GFP_KERNEL);
+ if (!dw)
+ return -ENOMEM;
+
+ hdata = devm_kzalloc(&pdev->dev, sizeof(*hdata), GFP_KERNEL);
+ if (!hdata)
+ return -ENOMEM;
+
+ chip->dw = dw;
+ chip->dev = &pdev->dev;
+ chip->dw->hdata = hdata;
+
+ chip->irq = platform_get_irq(pdev, 0);
+ if (chip->irq < 0)
+ return chip->irq;
+
+ mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ chip->regs = devm_ioremap_resource(chip->dev, mem);
+ if (IS_ERR(chip->regs))
+ return PTR_ERR(chip->regs);
+
+ if (of_device_is_compatible(node, "intel,kmb-axi-dma")) {
+ chip->apb_regs = devm_platform_ioremap_resource(pdev, 1);
+ if (IS_ERR(chip->apb_regs))
+ return PTR_ERR(chip->apb_regs);
+ }
+
+ chip->core_clk = devm_clk_get(chip->dev, "core-clk");
+ if (IS_ERR(chip->core_clk))
+ return PTR_ERR(chip->core_clk);
+
+ chip->cfgr_clk = devm_clk_get(chip->dev, "cfgr-clk");
+ if (IS_ERR(chip->cfgr_clk))
+ return PTR_ERR(chip->cfgr_clk);
+
+ ret = parse_device_properties(chip);
+ if (ret)
+ return ret;
+
+ dw->chan = devm_kcalloc(chip->dev, hdata->nr_channels,
+ sizeof(*dw->chan), GFP_KERNEL);
+ if (!dw->chan)
+ return -ENOMEM;
+
+ ret = devm_request_irq(chip->dev, chip->irq, dw_axi_dma_interrupt,
+ IRQF_SHARED, KBUILD_MODNAME, chip);
+ if (ret)
+ return ret;
+
+ INIT_LIST_HEAD(&dw->dma.channels);
+ for (i = 0; i < hdata->nr_channels; i++) {
+ struct axi_dma_chan *chan = &dw->chan[i];
+
+ chan->chip = chip;
+ chan->id = i;
+ chan->chan_regs = chip->regs + COMMON_REG_LEN + i * CHAN_REG_LEN;
+ atomic_set(&chan->descs_allocated, 0);
+
+ chan->vc.desc_free = vchan_desc_put;
+ vchan_init(&chan->vc, &dw->dma);
+ }
+
+ /* Set capabilities */
+ dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+ dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
+ dma_cap_set(DMA_CYCLIC, dw->dma.cap_mask);
+
+ /* DMA capabilities */
+ dw->dma.chancnt = hdata->nr_channels;
+ dw->dma.max_burst = hdata->axi_rw_burst_len;
+ dw->dma.src_addr_widths = AXI_DMA_BUSWIDTHS;
+ dw->dma.dst_addr_widths = AXI_DMA_BUSWIDTHS;
+ dw->dma.directions = BIT(DMA_MEM_TO_MEM);
+ dw->dma.directions |= BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+ dw->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+ dw->dma.dev = chip->dev;
+ dw->dma.device_tx_status = dma_chan_tx_status;
+ dw->dma.device_issue_pending = dma_chan_issue_pending;
+ dw->dma.device_terminate_all = dma_chan_terminate_all;
+ dw->dma.device_pause = dma_chan_pause;
+ dw->dma.device_resume = dma_chan_resume;
+
+ dw->dma.device_alloc_chan_resources = dma_chan_alloc_chan_resources;
+ dw->dma.device_free_chan_resources = dma_chan_free_chan_resources;
+
+ dw->dma.device_prep_dma_memcpy = dma_chan_prep_dma_memcpy;
+ dw->dma.device_synchronize = dw_axi_dma_synchronize;
+ dw->dma.device_config = dw_axi_dma_chan_slave_config;
+ dw->dma.device_prep_slave_sg = dw_axi_dma_chan_prep_slave_sg;
+ dw->dma.device_prep_dma_cyclic = dw_axi_dma_chan_prep_cyclic;
+
+ /*
+ * Synopsis DesignWare AxiDMA datasheet mentioned Maximum
+ * supported blocks is 1024. Device register width is 4 bytes.
+ * Therefore, set constraint to 1024 * 4.
+ */
+ dw->dma.dev->dma_parms = &dw->dma_parms;
+ dma_set_max_seg_size(&pdev->dev, MAX_BLOCK_SIZE);
+ platform_set_drvdata(pdev, chip);
+
+ pm_runtime_enable(chip->dev);
+
+ /*
+ * We can't just call pm_runtime_get here instead of
+ * pm_runtime_get_noresume + axi_dma_resume because we need
+ * driver to work also without Runtime PM.
+ */
+ pm_runtime_get_noresume(chip->dev);
+ ret = axi_dma_resume(chip);
+ if (ret < 0)
+ goto err_pm_disable;
+
+ axi_dma_hw_init(chip);
+
+ pm_runtime_put(chip->dev);
+
+ ret = dmaenginem_async_device_register(&dw->dma);
+ if (ret)
+ goto err_pm_disable;
+
+ /* Register with OF helpers for DMA lookups */
+ ret = of_dma_controller_register(pdev->dev.of_node,
+ dw_axi_dma_of_xlate, dw);
+ if (ret < 0)
+ dev_warn(&pdev->dev,
+ "Failed to register OF DMA controller, fallback to MEM_TO_MEM mode\n");
+
+ dev_info(chip->dev, "DesignWare AXI DMA Controller, %d channels\n",
+ dw->hdata->nr_channels);
+
+ return 0;
+
+err_pm_disable:
+ pm_runtime_disable(chip->dev);
+
+ return ret;
+}
+
+static int dw_remove(struct platform_device *pdev)
+{
+ struct axi_dma_chip *chip = platform_get_drvdata(pdev);
+ struct dw_axi_dma *dw = chip->dw;
+ struct axi_dma_chan *chan, *_chan;
+ u32 i;
+
+ /* Enable clk before accessing to registers */
+ clk_prepare_enable(chip->cfgr_clk);
+ clk_prepare_enable(chip->core_clk);
+ axi_dma_irq_disable(chip);
+ for (i = 0; i < dw->hdata->nr_channels; i++) {
+ axi_chan_disable(&chip->dw->chan[i]);
+ axi_chan_irq_disable(&chip->dw->chan[i], DWAXIDMAC_IRQ_ALL);
+ }
+ axi_dma_disable(chip);
+
+ pm_runtime_disable(chip->dev);
+ axi_dma_suspend(chip);
+
+ devm_free_irq(chip->dev, chip->irq, chip);
+
+ of_dma_controller_free(chip->dev->of_node);
+
+ list_for_each_entry_safe(chan, _chan, &dw->dma.channels,
+ vc.chan.device_node) {
+ list_del(&chan->vc.chan.device_node);
+ tasklet_kill(&chan->vc.task);
+ }
+
+ return 0;
+}
+
+static const struct dev_pm_ops dw_axi_dma_pm_ops = {
+ SET_RUNTIME_PM_OPS(axi_dma_runtime_suspend, axi_dma_runtime_resume, NULL)
+};
+
+static const struct of_device_id dw_dma_of_id_table[] = {
+ { .compatible = "snps,axi-dma-1.01a" },
+ { .compatible = "intel,kmb-axi-dma" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, dw_dma_of_id_table);
+
+static struct platform_driver dw_driver = {
+ .probe = dw_probe,
+ .remove = dw_remove,
+ .driver = {
+ .name = KBUILD_MODNAME,
+ .of_match_table = dw_dma_of_id_table,
+ .pm = &dw_axi_dma_pm_ops,
+ },
+};
+module_platform_driver(dw_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare AXI DMA Controller platform driver");
+MODULE_AUTHOR("Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>");
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
new file mode 100644
index 000000000..e9d5eb0fd
--- /dev/null
+++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
@@ -0,0 +1,399 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// (C) 2017-2018 Synopsys, Inc. (www.synopsys.com)
+
+/*
+ * Synopsys DesignWare AXI DMA Controller driver.
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+
+#ifndef _AXI_DMA_PLATFORM_H
+#define _AXI_DMA_PLATFORM_H
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/types.h>
+
+#include "../virt-dma.h"
+
+#define DMAC_MAX_CHANNELS 16
+#define DMAC_MAX_MASTERS 2
+#define DMAC_MAX_BLK_SIZE 0x200000
+
+struct dw_axi_dma_hcfg {
+ u32 nr_channels;
+ u32 nr_masters;
+ u32 m_data_width;
+ u32 block_size[DMAC_MAX_CHANNELS];
+ u32 priority[DMAC_MAX_CHANNELS];
+ /* maximum supported axi burst length */
+ u32 axi_rw_burst_len;
+ /* Register map for DMAX_NUM_CHANNELS <= 8 */
+ bool reg_map_8_channels;
+ bool restrict_axi_burst_len;
+};
+
+struct axi_dma_chan {
+ struct axi_dma_chip *chip;
+ void __iomem *chan_regs;
+ u8 id;
+ u8 hw_handshake_num;
+ atomic_t descs_allocated;
+
+ struct dma_pool *desc_pool;
+ struct virt_dma_chan vc;
+
+ struct axi_dma_desc *desc;
+ struct dma_slave_config config;
+ enum dma_transfer_direction direction;
+ bool cyclic;
+ /* these other elements are all protected by vc.lock */
+ bool is_paused;
+};
+
+struct dw_axi_dma {
+ struct dma_device dma;
+ struct dw_axi_dma_hcfg *hdata;
+ struct device_dma_parameters dma_parms;
+
+ /* channels */
+ struct axi_dma_chan *chan;
+};
+
+struct axi_dma_chip {
+ struct device *dev;
+ int irq;
+ void __iomem *regs;
+ void __iomem *apb_regs;
+ struct clk *core_clk;
+ struct clk *cfgr_clk;
+ struct dw_axi_dma *dw;
+};
+
+/* LLI == Linked List Item */
+struct __packed axi_dma_lli {
+ __le64 sar;
+ __le64 dar;
+ __le32 block_ts_lo;
+ __le32 block_ts_hi;
+ __le64 llp;
+ __le32 ctl_lo;
+ __le32 ctl_hi;
+ __le32 sstat;
+ __le32 dstat;
+ __le32 status_lo;
+ __le32 status_hi;
+ __le32 reserved_lo;
+ __le32 reserved_hi;
+};
+
+struct axi_dma_hw_desc {
+ struct axi_dma_lli *lli;
+ dma_addr_t llp;
+ u32 len;
+};
+
+struct axi_dma_desc {
+ struct axi_dma_hw_desc *hw_desc;
+
+ struct virt_dma_desc vd;
+ struct axi_dma_chan *chan;
+ u32 completed_blocks;
+ u32 length;
+ u32 period_len;
+};
+
+struct axi_dma_chan_config {
+ u8 dst_multblk_type;
+ u8 src_multblk_type;
+ u8 dst_per;
+ u8 src_per;
+ u8 tt_fc;
+ u8 prior;
+ u8 hs_sel_dst;
+ u8 hs_sel_src;
+};
+
+static inline struct device *dchan2dev(struct dma_chan *dchan)
+{
+ return &dchan->dev->device;
+}
+
+static inline struct device *chan2dev(struct axi_dma_chan *chan)
+{
+ return &chan->vc.chan.dev->device;
+}
+
+static inline struct axi_dma_desc *vd_to_axi_desc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct axi_dma_desc, vd);
+}
+
+static inline struct axi_dma_chan *vc_to_axi_dma_chan(struct virt_dma_chan *vc)
+{
+ return container_of(vc, struct axi_dma_chan, vc);
+}
+
+static inline struct axi_dma_chan *dchan_to_axi_dma_chan(struct dma_chan *dchan)
+{
+ return vc_to_axi_dma_chan(to_virt_chan(dchan));
+}
+
+
+#define COMMON_REG_LEN 0x100
+#define CHAN_REG_LEN 0x100
+
+/* Common registers offset */
+#define DMAC_ID 0x000 /* R DMAC ID */
+#define DMAC_COMPVER 0x008 /* R DMAC Component Version */
+#define DMAC_CFG 0x010 /* R/W DMAC Configuration */
+#define DMAC_CHEN 0x018 /* R/W DMAC Channel Enable */
+#define DMAC_CHEN_L 0x018 /* R/W DMAC Channel Enable 00-31 */
+#define DMAC_CHEN_H 0x01C /* R/W DMAC Channel Enable 32-63 */
+#define DMAC_CHSUSPREG 0x020 /* R/W DMAC Channel Suspend */
+#define DMAC_CHABORTREG 0x028 /* R/W DMAC Channel Abort */
+#define DMAC_INTSTATUS 0x030 /* R DMAC Interrupt Status */
+#define DMAC_COMMON_INTCLEAR 0x038 /* W DMAC Interrupt Clear */
+#define DMAC_COMMON_INTSTATUS_ENA 0x040 /* R DMAC Interrupt Status Enable */
+#define DMAC_COMMON_INTSIGNAL_ENA 0x048 /* R/W DMAC Interrupt Signal Enable */
+#define DMAC_COMMON_INTSTATUS 0x050 /* R DMAC Interrupt Status */
+#define DMAC_RESET 0x058 /* R DMAC Reset Register1 */
+
+/* DMA channel registers offset */
+#define CH_SAR 0x000 /* R/W Chan Source Address */
+#define CH_DAR 0x008 /* R/W Chan Destination Address */
+#define CH_BLOCK_TS 0x010 /* R/W Chan Block Transfer Size */
+#define CH_CTL 0x018 /* R/W Chan Control */
+#define CH_CTL_L 0x018 /* R/W Chan Control 00-31 */
+#define CH_CTL_H 0x01C /* R/W Chan Control 32-63 */
+#define CH_CFG 0x020 /* R/W Chan Configuration */
+#define CH_CFG_L 0x020 /* R/W Chan Configuration 00-31 */
+#define CH_CFG_H 0x024 /* R/W Chan Configuration 32-63 */
+#define CH_LLP 0x028 /* R/W Chan Linked List Pointer */
+#define CH_STATUS 0x030 /* R Chan Status */
+#define CH_SWHSSRC 0x038 /* R/W Chan SW Handshake Source */
+#define CH_SWHSDST 0x040 /* R/W Chan SW Handshake Destination */
+#define CH_BLK_TFR_RESUMEREQ 0x048 /* W Chan Block Transfer Resume Req */
+#define CH_AXI_ID 0x050 /* R/W Chan AXI ID */
+#define CH_AXI_QOS 0x058 /* R/W Chan AXI QOS */
+#define CH_SSTAT 0x060 /* R Chan Source Status */
+#define CH_DSTAT 0x068 /* R Chan Destination Status */
+#define CH_SSTATAR 0x070 /* R/W Chan Source Status Fetch Addr */
+#define CH_DSTATAR 0x078 /* R/W Chan Destination Status Fetch Addr */
+#define CH_INTSTATUS_ENA 0x080 /* R/W Chan Interrupt Status Enable */
+#define CH_INTSTATUS 0x088 /* R/W Chan Interrupt Status */
+#define CH_INTSIGNAL_ENA 0x090 /* R/W Chan Interrupt Signal Enable */
+#define CH_INTCLEAR 0x098 /* W Chan Interrupt Clear */
+
+/* These Apb registers are used by Intel KeemBay SoC */
+#define DMAC_APB_CFG 0x000 /* DMAC Apb Configuration Register */
+#define DMAC_APB_STAT 0x004 /* DMAC Apb Status Register */
+#define DMAC_APB_DEBUG_STAT_0 0x008 /* DMAC Apb Debug Status Register 0 */
+#define DMAC_APB_DEBUG_STAT_1 0x00C /* DMAC Apb Debug Status Register 1 */
+#define DMAC_APB_HW_HS_SEL_0 0x010 /* DMAC Apb HW HS register 0 */
+#define DMAC_APB_HW_HS_SEL_1 0x014 /* DMAC Apb HW HS register 1 */
+#define DMAC_APB_LPI 0x018 /* DMAC Apb Low Power Interface Reg */
+#define DMAC_APB_BYTE_WR_CH_EN 0x01C /* DMAC Apb Byte Write Enable */
+#define DMAC_APB_HALFWORD_WR_CH_EN 0x020 /* DMAC Halfword write enables */
+
+#define UNUSED_CHANNEL 0x3F /* Set unused DMA channel to 0x3F */
+#define DMA_APB_HS_SEL_BIT_SIZE 0x08 /* HW handshake bits per channel */
+#define DMA_APB_HS_SEL_MASK 0xFF /* HW handshake select masks */
+#define MAX_BLOCK_SIZE 0x1000 /* 1024 blocks * 4 bytes data width */
+#define DMA_REG_MAP_CH_REF 0x08 /* Channel count to choose register map */
+
+/* DMAC_CFG */
+#define DMAC_EN_POS 0
+#define DMAC_EN_MASK BIT(DMAC_EN_POS)
+
+#define INT_EN_POS 1
+#define INT_EN_MASK BIT(INT_EN_POS)
+
+/* DMAC_CHEN */
+#define DMAC_CHAN_EN_SHIFT 0
+#define DMAC_CHAN_EN_WE_SHIFT 8
+
+#define DMAC_CHAN_SUSP_SHIFT 16
+#define DMAC_CHAN_SUSP_WE_SHIFT 24
+
+/* DMAC_CHEN2 */
+#define DMAC_CHAN_EN2_WE_SHIFT 16
+
+/* DMAC_CHSUSP */
+#define DMAC_CHAN_SUSP2_SHIFT 0
+#define DMAC_CHAN_SUSP2_WE_SHIFT 16
+
+/* CH_CTL_H */
+#define CH_CTL_H_ARLEN_EN BIT(6)
+#define CH_CTL_H_ARLEN_POS 7
+#define CH_CTL_H_AWLEN_EN BIT(15)
+#define CH_CTL_H_AWLEN_POS 16
+
+enum {
+ DWAXIDMAC_ARWLEN_1 = 0,
+ DWAXIDMAC_ARWLEN_2 = 1,
+ DWAXIDMAC_ARWLEN_4 = 3,
+ DWAXIDMAC_ARWLEN_8 = 7,
+ DWAXIDMAC_ARWLEN_16 = 15,
+ DWAXIDMAC_ARWLEN_32 = 31,
+ DWAXIDMAC_ARWLEN_64 = 63,
+ DWAXIDMAC_ARWLEN_128 = 127,
+ DWAXIDMAC_ARWLEN_256 = 255,
+ DWAXIDMAC_ARWLEN_MIN = DWAXIDMAC_ARWLEN_1,
+ DWAXIDMAC_ARWLEN_MAX = DWAXIDMAC_ARWLEN_256
+};
+
+#define CH_CTL_H_LLI_LAST BIT(30)
+#define CH_CTL_H_LLI_VALID BIT(31)
+
+/* CH_CTL_L */
+#define CH_CTL_L_LAST_WRITE_EN BIT(30)
+
+#define CH_CTL_L_DST_MSIZE_POS 18
+#define CH_CTL_L_SRC_MSIZE_POS 14
+
+enum {
+ DWAXIDMAC_BURST_TRANS_LEN_1 = 0,
+ DWAXIDMAC_BURST_TRANS_LEN_4,
+ DWAXIDMAC_BURST_TRANS_LEN_8,
+ DWAXIDMAC_BURST_TRANS_LEN_16,
+ DWAXIDMAC_BURST_TRANS_LEN_32,
+ DWAXIDMAC_BURST_TRANS_LEN_64,
+ DWAXIDMAC_BURST_TRANS_LEN_128,
+ DWAXIDMAC_BURST_TRANS_LEN_256,
+ DWAXIDMAC_BURST_TRANS_LEN_512,
+ DWAXIDMAC_BURST_TRANS_LEN_1024
+};
+
+#define CH_CTL_L_DST_WIDTH_POS 11
+#define CH_CTL_L_SRC_WIDTH_POS 8
+
+#define CH_CTL_L_DST_INC_POS 6
+#define CH_CTL_L_SRC_INC_POS 4
+enum {
+ DWAXIDMAC_CH_CTL_L_INC = 0,
+ DWAXIDMAC_CH_CTL_L_NOINC
+};
+
+#define CH_CTL_L_DST_MAST BIT(2)
+#define CH_CTL_L_SRC_MAST BIT(0)
+
+/* CH_CFG_H */
+#define CH_CFG_H_PRIORITY_POS 17
+#define CH_CFG_H_DST_PER_POS 12
+#define CH_CFG_H_SRC_PER_POS 7
+#define CH_CFG_H_HS_SEL_DST_POS 4
+#define CH_CFG_H_HS_SEL_SRC_POS 3
+enum {
+ DWAXIDMAC_HS_SEL_HW = 0,
+ DWAXIDMAC_HS_SEL_SW
+};
+
+#define CH_CFG_H_TT_FC_POS 0
+enum {
+ DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC = 0,
+ DWAXIDMAC_TT_FC_MEM_TO_PER_DMAC,
+ DWAXIDMAC_TT_FC_PER_TO_MEM_DMAC,
+ DWAXIDMAC_TT_FC_PER_TO_PER_DMAC,
+ DWAXIDMAC_TT_FC_PER_TO_MEM_SRC,
+ DWAXIDMAC_TT_FC_PER_TO_PER_SRC,
+ DWAXIDMAC_TT_FC_MEM_TO_PER_DST,
+ DWAXIDMAC_TT_FC_PER_TO_PER_DST
+};
+
+/* CH_CFG_L */
+#define CH_CFG_L_DST_MULTBLK_TYPE_POS 2
+#define CH_CFG_L_SRC_MULTBLK_TYPE_POS 0
+enum {
+ DWAXIDMAC_MBLK_TYPE_CONTIGUOUS = 0,
+ DWAXIDMAC_MBLK_TYPE_RELOAD,
+ DWAXIDMAC_MBLK_TYPE_SHADOW_REG,
+ DWAXIDMAC_MBLK_TYPE_LL
+};
+
+/* CH_CFG2 */
+#define CH_CFG2_L_SRC_PER_POS 4
+#define CH_CFG2_L_DST_PER_POS 11
+
+#define CH_CFG2_H_TT_FC_POS 0
+#define CH_CFG2_H_HS_SEL_SRC_POS 3
+#define CH_CFG2_H_HS_SEL_DST_POS 4
+#define CH_CFG2_H_PRIORITY_POS 20
+
+/**
+ * DW AXI DMA channel interrupts
+ *
+ * @DWAXIDMAC_IRQ_NONE: Bitmask of no one interrupt
+ * @DWAXIDMAC_IRQ_BLOCK_TRF: Block transfer complete
+ * @DWAXIDMAC_IRQ_DMA_TRF: Dma transfer complete
+ * @DWAXIDMAC_IRQ_SRC_TRAN: Source transaction complete
+ * @DWAXIDMAC_IRQ_DST_TRAN: Destination transaction complete
+ * @DWAXIDMAC_IRQ_SRC_DEC_ERR: Source decode error
+ * @DWAXIDMAC_IRQ_DST_DEC_ERR: Destination decode error
+ * @DWAXIDMAC_IRQ_SRC_SLV_ERR: Source slave error
+ * @DWAXIDMAC_IRQ_DST_SLV_ERR: Destination slave error
+ * @DWAXIDMAC_IRQ_LLI_RD_DEC_ERR: LLI read decode error
+ * @DWAXIDMAC_IRQ_LLI_WR_DEC_ERR: LLI write decode error
+ * @DWAXIDMAC_IRQ_LLI_RD_SLV_ERR: LLI read slave error
+ * @DWAXIDMAC_IRQ_LLI_WR_SLV_ERR: LLI write slave error
+ * @DWAXIDMAC_IRQ_INVALID_ERR: LLI invalid error or Shadow register error
+ * @DWAXIDMAC_IRQ_MULTIBLKTYPE_ERR: Slave Interface Multiblock type error
+ * @DWAXIDMAC_IRQ_DEC_ERR: Slave Interface decode error
+ * @DWAXIDMAC_IRQ_WR2RO_ERR: Slave Interface write to read only error
+ * @DWAXIDMAC_IRQ_RD2RWO_ERR: Slave Interface read to write only error
+ * @DWAXIDMAC_IRQ_WRONCHEN_ERR: Slave Interface write to channel error
+ * @DWAXIDMAC_IRQ_SHADOWREG_ERR: Slave Interface shadow reg error
+ * @DWAXIDMAC_IRQ_WRONHOLD_ERR: Slave Interface hold error
+ * @DWAXIDMAC_IRQ_LOCK_CLEARED: Lock Cleared Status
+ * @DWAXIDMAC_IRQ_SRC_SUSPENDED: Source Suspended Status
+ * @DWAXIDMAC_IRQ_SUSPENDED: Channel Suspended Status
+ * @DWAXIDMAC_IRQ_DISABLED: Channel Disabled Status
+ * @DWAXIDMAC_IRQ_ABORTED: Channel Aborted Status
+ * @DWAXIDMAC_IRQ_ALL_ERR: Bitmask of all error interrupts
+ * @DWAXIDMAC_IRQ_ALL: Bitmask of all interrupts
+ */
+enum {
+ DWAXIDMAC_IRQ_NONE = 0,
+ DWAXIDMAC_IRQ_BLOCK_TRF = BIT(0),
+ DWAXIDMAC_IRQ_DMA_TRF = BIT(1),
+ DWAXIDMAC_IRQ_SRC_TRAN = BIT(3),
+ DWAXIDMAC_IRQ_DST_TRAN = BIT(4),
+ DWAXIDMAC_IRQ_SRC_DEC_ERR = BIT(5),
+ DWAXIDMAC_IRQ_DST_DEC_ERR = BIT(6),
+ DWAXIDMAC_IRQ_SRC_SLV_ERR = BIT(7),
+ DWAXIDMAC_IRQ_DST_SLV_ERR = BIT(8),
+ DWAXIDMAC_IRQ_LLI_RD_DEC_ERR = BIT(9),
+ DWAXIDMAC_IRQ_LLI_WR_DEC_ERR = BIT(10),
+ DWAXIDMAC_IRQ_LLI_RD_SLV_ERR = BIT(11),
+ DWAXIDMAC_IRQ_LLI_WR_SLV_ERR = BIT(12),
+ DWAXIDMAC_IRQ_INVALID_ERR = BIT(13),
+ DWAXIDMAC_IRQ_MULTIBLKTYPE_ERR = BIT(14),
+ DWAXIDMAC_IRQ_DEC_ERR = BIT(16),
+ DWAXIDMAC_IRQ_WR2RO_ERR = BIT(17),
+ DWAXIDMAC_IRQ_RD2RWO_ERR = BIT(18),
+ DWAXIDMAC_IRQ_WRONCHEN_ERR = BIT(19),
+ DWAXIDMAC_IRQ_SHADOWREG_ERR = BIT(20),
+ DWAXIDMAC_IRQ_WRONHOLD_ERR = BIT(21),
+ DWAXIDMAC_IRQ_LOCK_CLEARED = BIT(27),
+ DWAXIDMAC_IRQ_SRC_SUSPENDED = BIT(28),
+ DWAXIDMAC_IRQ_SUSPENDED = BIT(29),
+ DWAXIDMAC_IRQ_DISABLED = BIT(30),
+ DWAXIDMAC_IRQ_ABORTED = BIT(31),
+ DWAXIDMAC_IRQ_ALL_ERR = (GENMASK(21, 16) | GENMASK(14, 5)),
+ DWAXIDMAC_IRQ_ALL = GENMASK(31, 0)
+};
+
+enum {
+ DWAXIDMAC_TRANS_WIDTH_8 = 0,
+ DWAXIDMAC_TRANS_WIDTH_16,
+ DWAXIDMAC_TRANS_WIDTH_32,
+ DWAXIDMAC_TRANS_WIDTH_64,
+ DWAXIDMAC_TRANS_WIDTH_128,
+ DWAXIDMAC_TRANS_WIDTH_256,
+ DWAXIDMAC_TRANS_WIDTH_512,
+ DWAXIDMAC_TRANS_WIDTH_MAX = DWAXIDMAC_TRANS_WIDTH_512
+};
+
+#endif /* _AXI_DMA_PLATFORM_H */
diff --git a/drivers/dma/dw-edma/Kconfig b/drivers/dma/dw-edma/Kconfig
new file mode 100644
index 000000000..7ff17b2db
--- /dev/null
+++ b/drivers/dma/dw-edma/Kconfig
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config DW_EDMA
+ tristate "Synopsys DesignWare eDMA controller driver"
+ depends on PCI && PCI_MSI
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Support the Synopsys DesignWare eDMA controller, normally
+ implemented on endpoints SoCs.
+
+config DW_EDMA_PCIE
+ tristate "Synopsys DesignWare eDMA PCIe driver"
+ depends on PCI && PCI_MSI
+ select DW_EDMA
+ help
+ Provides a glue-logic between the Synopsys DesignWare
+ eDMA controller and an endpoint PCIe device. This also serves
+ as a reference design to whom desires to use this IP.
diff --git a/drivers/dma/dw-edma/Makefile b/drivers/dma/dw-edma/Makefile
new file mode 100644
index 000000000..8d45c0d56
--- /dev/null
+++ b/drivers/dma/dw-edma/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_DW_EDMA) += dw-edma.o
+dw-edma-$(CONFIG_DEBUG_FS) := dw-edma-v0-debugfs.o
+dw-edma-objs := dw-edma-core.o \
+ dw-edma-v0-core.o $(dw-edma-y)
+obj-$(CONFIG_DW_EDMA_PCIE) += dw-edma-pcie.o
diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c
new file mode 100644
index 000000000..ef4cdcf6b
--- /dev/null
+++ b/drivers/dma/dw-edma/dw-edma-core.c
@@ -0,0 +1,1036 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates.
+ * Synopsys DesignWare eDMA core driver
+ *
+ * Author: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/dmaengine.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/dma/edma.h>
+#include <linux/dma-mapping.h>
+
+#include "dw-edma-core.h"
+#include "dw-edma-v0-core.h"
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+static inline
+struct device *dchan2dev(struct dma_chan *dchan)
+{
+ return &dchan->dev->device;
+}
+
+static inline
+struct device *chan2dev(struct dw_edma_chan *chan)
+{
+ return &chan->vc.chan.dev->device;
+}
+
+static inline
+struct dw_edma_desc *vd2dw_edma_desc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct dw_edma_desc, vd);
+}
+
+static struct dw_edma_burst *dw_edma_alloc_burst(struct dw_edma_chunk *chunk)
+{
+ struct dw_edma_burst *burst;
+
+ burst = kzalloc(sizeof(*burst), GFP_NOWAIT);
+ if (unlikely(!burst))
+ return NULL;
+
+ INIT_LIST_HEAD(&burst->list);
+ if (chunk->burst) {
+ /* Create and add new element into the linked list */
+ chunk->bursts_alloc++;
+ list_add_tail(&burst->list, &chunk->burst->list);
+ } else {
+ /* List head */
+ chunk->bursts_alloc = 0;
+ chunk->burst = burst;
+ }
+
+ return burst;
+}
+
+static struct dw_edma_chunk *dw_edma_alloc_chunk(struct dw_edma_desc *desc)
+{
+ struct dw_edma_chip *chip = desc->chan->dw->chip;
+ struct dw_edma_chan *chan = desc->chan;
+ struct dw_edma_chunk *chunk;
+
+ chunk = kzalloc(sizeof(*chunk), GFP_NOWAIT);
+ if (unlikely(!chunk))
+ return NULL;
+
+ INIT_LIST_HEAD(&chunk->list);
+ chunk->chan = chan;
+ /* Toggling change bit (CB) in each chunk, this is a mechanism to
+ * inform the eDMA HW block that this is a new linked list ready
+ * to be consumed.
+ * - Odd chunks originate CB equal to 0
+ * - Even chunks originate CB equal to 1
+ */
+ chunk->cb = !(desc->chunks_alloc % 2);
+ if (chan->dir == EDMA_DIR_WRITE) {
+ chunk->ll_region.paddr = chip->ll_region_wr[chan->id].paddr;
+ chunk->ll_region.vaddr = chip->ll_region_wr[chan->id].vaddr;
+ } else {
+ chunk->ll_region.paddr = chip->ll_region_rd[chan->id].paddr;
+ chunk->ll_region.vaddr = chip->ll_region_rd[chan->id].vaddr;
+ }
+
+ if (desc->chunk) {
+ /* Create and add new element into the linked list */
+ if (!dw_edma_alloc_burst(chunk)) {
+ kfree(chunk);
+ return NULL;
+ }
+ desc->chunks_alloc++;
+ list_add_tail(&chunk->list, &desc->chunk->list);
+ } else {
+ /* List head */
+ chunk->burst = NULL;
+ desc->chunks_alloc = 0;
+ desc->chunk = chunk;
+ }
+
+ return chunk;
+}
+
+static struct dw_edma_desc *dw_edma_alloc_desc(struct dw_edma_chan *chan)
+{
+ struct dw_edma_desc *desc;
+
+ desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+ if (unlikely(!desc))
+ return NULL;
+
+ desc->chan = chan;
+ if (!dw_edma_alloc_chunk(desc)) {
+ kfree(desc);
+ return NULL;
+ }
+
+ return desc;
+}
+
+static void dw_edma_free_burst(struct dw_edma_chunk *chunk)
+{
+ struct dw_edma_burst *child, *_next;
+
+ /* Remove all the list elements */
+ list_for_each_entry_safe(child, _next, &chunk->burst->list, list) {
+ list_del(&child->list);
+ kfree(child);
+ chunk->bursts_alloc--;
+ }
+
+ /* Remove the list head */
+ kfree(child);
+ chunk->burst = NULL;
+}
+
+static void dw_edma_free_chunk(struct dw_edma_desc *desc)
+{
+ struct dw_edma_chunk *child, *_next;
+
+ if (!desc->chunk)
+ return;
+
+ /* Remove all the list elements */
+ list_for_each_entry_safe(child, _next, &desc->chunk->list, list) {
+ dw_edma_free_burst(child);
+ list_del(&child->list);
+ kfree(child);
+ desc->chunks_alloc--;
+ }
+
+ /* Remove the list head */
+ kfree(child);
+ desc->chunk = NULL;
+}
+
+static void dw_edma_free_desc(struct dw_edma_desc *desc)
+{
+ dw_edma_free_chunk(desc);
+ kfree(desc);
+}
+
+static void vchan_free_desc(struct virt_dma_desc *vdesc)
+{
+ dw_edma_free_desc(vd2dw_edma_desc(vdesc));
+}
+
+static int dw_edma_start_transfer(struct dw_edma_chan *chan)
+{
+ struct dw_edma_chunk *child;
+ struct dw_edma_desc *desc;
+ struct virt_dma_desc *vd;
+
+ vd = vchan_next_desc(&chan->vc);
+ if (!vd)
+ return 0;
+
+ desc = vd2dw_edma_desc(vd);
+ if (!desc)
+ return 0;
+
+ child = list_first_entry_or_null(&desc->chunk->list,
+ struct dw_edma_chunk, list);
+ if (!child)
+ return 0;
+
+ dw_edma_v0_core_start(child, !desc->xfer_sz);
+ desc->xfer_sz += child->ll_region.sz;
+ dw_edma_free_burst(child);
+ list_del(&child->list);
+ kfree(child);
+ desc->chunks_alloc--;
+
+ return 1;
+}
+
+static int dw_edma_device_config(struct dma_chan *dchan,
+ struct dma_slave_config *config)
+{
+ struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan);
+
+ memcpy(&chan->config, config, sizeof(*config));
+ chan->configured = true;
+
+ return 0;
+}
+
+static int dw_edma_device_pause(struct dma_chan *dchan)
+{
+ struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan);
+ int err = 0;
+
+ if (!chan->configured)
+ err = -EPERM;
+ else if (chan->status != EDMA_ST_BUSY)
+ err = -EPERM;
+ else if (chan->request != EDMA_REQ_NONE)
+ err = -EPERM;
+ else
+ chan->request = EDMA_REQ_PAUSE;
+
+ return err;
+}
+
+static int dw_edma_device_resume(struct dma_chan *dchan)
+{
+ struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan);
+ int err = 0;
+
+ if (!chan->configured) {
+ err = -EPERM;
+ } else if (chan->status != EDMA_ST_PAUSE) {
+ err = -EPERM;
+ } else if (chan->request != EDMA_REQ_NONE) {
+ err = -EPERM;
+ } else {
+ chan->status = EDMA_ST_BUSY;
+ dw_edma_start_transfer(chan);
+ }
+
+ return err;
+}
+
+static int dw_edma_device_terminate_all(struct dma_chan *dchan)
+{
+ struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan);
+ int err = 0;
+
+ if (!chan->configured) {
+ /* Do nothing */
+ } else if (chan->status == EDMA_ST_PAUSE) {
+ chan->status = EDMA_ST_IDLE;
+ chan->configured = false;
+ } else if (chan->status == EDMA_ST_IDLE) {
+ chan->configured = false;
+ } else if (dw_edma_v0_core_ch_status(chan) == DMA_COMPLETE) {
+ /*
+ * The channel is in a false BUSY state, probably didn't
+ * receive or lost an interrupt
+ */
+ chan->status = EDMA_ST_IDLE;
+ chan->configured = false;
+ } else if (chan->request > EDMA_REQ_PAUSE) {
+ err = -EPERM;
+ } else {
+ chan->request = EDMA_REQ_STOP;
+ }
+
+ return err;
+}
+
+static void dw_edma_device_issue_pending(struct dma_chan *dchan)
+{
+ struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan);
+ unsigned long flags;
+
+ if (!chan->configured)
+ return;
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+ if (vchan_issue_pending(&chan->vc) && chan->request == EDMA_REQ_NONE &&
+ chan->status == EDMA_ST_IDLE) {
+ chan->status = EDMA_ST_BUSY;
+ dw_edma_start_transfer(chan);
+ }
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static enum dma_status
+dw_edma_device_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan);
+ struct dw_edma_desc *desc;
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+ enum dma_status ret;
+ u32 residue = 0;
+
+ ret = dma_cookie_status(dchan, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ if (ret == DMA_IN_PROGRESS && chan->status == EDMA_ST_PAUSE)
+ ret = DMA_PAUSED;
+
+ if (!txstate)
+ goto ret_residue;
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+ vd = vchan_find_desc(&chan->vc, cookie);
+ if (vd) {
+ desc = vd2dw_edma_desc(vd);
+ if (desc)
+ residue = desc->alloc_sz - desc->xfer_sz;
+ }
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+ret_residue:
+ dma_set_residue(txstate, residue);
+
+ return ret;
+}
+
+static struct dma_async_tx_descriptor *
+dw_edma_device_transfer(struct dw_edma_transfer *xfer)
+{
+ struct dw_edma_chan *chan = dchan2dw_edma_chan(xfer->dchan);
+ enum dma_transfer_direction dir = xfer->direction;
+ phys_addr_t src_addr, dst_addr;
+ struct scatterlist *sg = NULL;
+ struct dw_edma_chunk *chunk;
+ struct dw_edma_burst *burst;
+ struct dw_edma_desc *desc;
+ u32 cnt = 0;
+ int i;
+
+ if (!chan->configured)
+ return NULL;
+
+ /*
+ * Local Root Port/End-point Remote End-point
+ * +-----------------------+ PCIe bus +----------------------+
+ * | | +-+ | |
+ * | DEV_TO_MEM Rx Ch <----+ +---+ Tx Ch DEV_TO_MEM |
+ * | | | | | |
+ * | MEM_TO_DEV Tx Ch +----+ +---> Rx Ch MEM_TO_DEV |
+ * | | +-+ | |
+ * +-----------------------+ +----------------------+
+ *
+ * 1. Normal logic:
+ * If eDMA is embedded into the DW PCIe RP/EP and controlled from the
+ * CPU/Application side, the Rx channel (EDMA_DIR_READ) will be used
+ * for the device read operations (DEV_TO_MEM) and the Tx channel
+ * (EDMA_DIR_WRITE) - for the write operations (MEM_TO_DEV).
+ *
+ * 2. Inverted logic:
+ * If eDMA is embedded into a Remote PCIe EP and is controlled by the
+ * MWr/MRd TLPs sent from the CPU's PCIe host controller, the Tx
+ * channel (EDMA_DIR_WRITE) will be used for the device read operations
+ * (DEV_TO_MEM) and the Rx channel (EDMA_DIR_READ) - for the write
+ * operations (MEM_TO_DEV).
+ *
+ * It is the client driver responsibility to choose a proper channel
+ * for the DMA transfers.
+ */
+ if (chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL) {
+ if ((chan->dir == EDMA_DIR_READ && dir != DMA_DEV_TO_MEM) ||
+ (chan->dir == EDMA_DIR_WRITE && dir != DMA_MEM_TO_DEV))
+ return NULL;
+ } else {
+ if ((chan->dir == EDMA_DIR_WRITE && dir != DMA_DEV_TO_MEM) ||
+ (chan->dir == EDMA_DIR_READ && dir != DMA_MEM_TO_DEV))
+ return NULL;
+ }
+
+ if (xfer->type == EDMA_XFER_CYCLIC) {
+ if (!xfer->xfer.cyclic.len || !xfer->xfer.cyclic.cnt)
+ return NULL;
+ } else if (xfer->type == EDMA_XFER_SCATTER_GATHER) {
+ if (xfer->xfer.sg.len < 1)
+ return NULL;
+ } else if (xfer->type == EDMA_XFER_INTERLEAVED) {
+ if (!xfer->xfer.il->numf)
+ return NULL;
+ if (xfer->xfer.il->numf > 0 && xfer->xfer.il->frame_size > 0)
+ return NULL;
+ } else {
+ return NULL;
+ }
+
+ desc = dw_edma_alloc_desc(chan);
+ if (unlikely(!desc))
+ goto err_alloc;
+
+ chunk = dw_edma_alloc_chunk(desc);
+ if (unlikely(!chunk))
+ goto err_alloc;
+
+ if (xfer->type == EDMA_XFER_INTERLEAVED) {
+ src_addr = xfer->xfer.il->src_start;
+ dst_addr = xfer->xfer.il->dst_start;
+ } else {
+ src_addr = chan->config.src_addr;
+ dst_addr = chan->config.dst_addr;
+ }
+
+ if (xfer->type == EDMA_XFER_CYCLIC) {
+ cnt = xfer->xfer.cyclic.cnt;
+ } else if (xfer->type == EDMA_XFER_SCATTER_GATHER) {
+ cnt = xfer->xfer.sg.len;
+ sg = xfer->xfer.sg.sgl;
+ } else if (xfer->type == EDMA_XFER_INTERLEAVED) {
+ if (xfer->xfer.il->numf > 0)
+ cnt = xfer->xfer.il->numf;
+ else
+ cnt = xfer->xfer.il->frame_size;
+ }
+
+ for (i = 0; i < cnt; i++) {
+ if (xfer->type == EDMA_XFER_SCATTER_GATHER && !sg)
+ break;
+
+ if (chunk->bursts_alloc == chan->ll_max) {
+ chunk = dw_edma_alloc_chunk(desc);
+ if (unlikely(!chunk))
+ goto err_alloc;
+ }
+
+ burst = dw_edma_alloc_burst(chunk);
+ if (unlikely(!burst))
+ goto err_alloc;
+
+ if (xfer->type == EDMA_XFER_CYCLIC)
+ burst->sz = xfer->xfer.cyclic.len;
+ else if (xfer->type == EDMA_XFER_SCATTER_GATHER)
+ burst->sz = sg_dma_len(sg);
+ else if (xfer->type == EDMA_XFER_INTERLEAVED)
+ burst->sz = xfer->xfer.il->sgl[i].size;
+
+ chunk->ll_region.sz += burst->sz;
+ desc->alloc_sz += burst->sz;
+
+ if (dir == DMA_DEV_TO_MEM) {
+ burst->sar = src_addr;
+ if (xfer->type == EDMA_XFER_CYCLIC) {
+ burst->dar = xfer->xfer.cyclic.paddr;
+ } else if (xfer->type == EDMA_XFER_SCATTER_GATHER) {
+ src_addr += sg_dma_len(sg);
+ burst->dar = sg_dma_address(sg);
+ /* Unlike the typical assumption by other
+ * drivers/IPs the peripheral memory isn't
+ * a FIFO memory, in this case, it's a
+ * linear memory and that why the source
+ * and destination addresses are increased
+ * by the same portion (data length)
+ */
+ } else if (xfer->type == EDMA_XFER_INTERLEAVED) {
+ burst->dar = dst_addr;
+ }
+ } else {
+ burst->dar = dst_addr;
+ if (xfer->type == EDMA_XFER_CYCLIC) {
+ burst->sar = xfer->xfer.cyclic.paddr;
+ } else if (xfer->type == EDMA_XFER_SCATTER_GATHER) {
+ dst_addr += sg_dma_len(sg);
+ burst->sar = sg_dma_address(sg);
+ /* Unlike the typical assumption by other
+ * drivers/IPs the peripheral memory isn't
+ * a FIFO memory, in this case, it's a
+ * linear memory and that why the source
+ * and destination addresses are increased
+ * by the same portion (data length)
+ */
+ } else if (xfer->type == EDMA_XFER_INTERLEAVED) {
+ burst->sar = src_addr;
+ }
+ }
+
+ if (xfer->type == EDMA_XFER_SCATTER_GATHER) {
+ sg = sg_next(sg);
+ } else if (xfer->type == EDMA_XFER_INTERLEAVED &&
+ xfer->xfer.il->frame_size > 0) {
+ struct dma_interleaved_template *il = xfer->xfer.il;
+ struct data_chunk *dc = &il->sgl[i];
+
+ if (il->src_sgl) {
+ src_addr += burst->sz;
+ src_addr += dmaengine_get_src_icg(il, dc);
+ }
+
+ if (il->dst_sgl) {
+ dst_addr += burst->sz;
+ dst_addr += dmaengine_get_dst_icg(il, dc);
+ }
+ }
+ }
+
+ return vchan_tx_prep(&chan->vc, &desc->vd, xfer->flags);
+
+err_alloc:
+ if (desc)
+ dw_edma_free_desc(desc);
+
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+dw_edma_device_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+ unsigned int len,
+ enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct dw_edma_transfer xfer;
+
+ xfer.dchan = dchan;
+ xfer.direction = direction;
+ xfer.xfer.sg.sgl = sgl;
+ xfer.xfer.sg.len = len;
+ xfer.flags = flags;
+ xfer.type = EDMA_XFER_SCATTER_GATHER;
+
+ return dw_edma_device_transfer(&xfer);
+}
+
+static struct dma_async_tx_descriptor *
+dw_edma_device_prep_dma_cyclic(struct dma_chan *dchan, dma_addr_t paddr,
+ size_t len, size_t count,
+ enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct dw_edma_transfer xfer;
+
+ xfer.dchan = dchan;
+ xfer.direction = direction;
+ xfer.xfer.cyclic.paddr = paddr;
+ xfer.xfer.cyclic.len = len;
+ xfer.xfer.cyclic.cnt = count;
+ xfer.flags = flags;
+ xfer.type = EDMA_XFER_CYCLIC;
+
+ return dw_edma_device_transfer(&xfer);
+}
+
+static struct dma_async_tx_descriptor *
+dw_edma_device_prep_interleaved_dma(struct dma_chan *dchan,
+ struct dma_interleaved_template *ilt,
+ unsigned long flags)
+{
+ struct dw_edma_transfer xfer;
+
+ xfer.dchan = dchan;
+ xfer.direction = ilt->dir;
+ xfer.xfer.il = ilt;
+ xfer.flags = flags;
+ xfer.type = EDMA_XFER_INTERLEAVED;
+
+ return dw_edma_device_transfer(&xfer);
+}
+
+static void dw_edma_done_interrupt(struct dw_edma_chan *chan)
+{
+ struct dw_edma_desc *desc;
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+
+ dw_edma_v0_core_clear_done_int(chan);
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+ vd = vchan_next_desc(&chan->vc);
+ if (vd) {
+ switch (chan->request) {
+ case EDMA_REQ_NONE:
+ desc = vd2dw_edma_desc(vd);
+ if (!desc->chunks_alloc) {
+ list_del(&vd->node);
+ vchan_cookie_complete(vd);
+ }
+
+ /* Continue transferring if there are remaining chunks or issued requests.
+ */
+ chan->status = dw_edma_start_transfer(chan) ? EDMA_ST_BUSY : EDMA_ST_IDLE;
+ break;
+
+ case EDMA_REQ_STOP:
+ list_del(&vd->node);
+ vchan_cookie_complete(vd);
+ chan->request = EDMA_REQ_NONE;
+ chan->status = EDMA_ST_IDLE;
+ break;
+
+ case EDMA_REQ_PAUSE:
+ chan->request = EDMA_REQ_NONE;
+ chan->status = EDMA_ST_PAUSE;
+ break;
+
+ default:
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static void dw_edma_abort_interrupt(struct dw_edma_chan *chan)
+{
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+
+ dw_edma_v0_core_clear_abort_int(chan);
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+ vd = vchan_next_desc(&chan->vc);
+ if (vd) {
+ list_del(&vd->node);
+ vchan_cookie_complete(vd);
+ }
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+ chan->request = EDMA_REQ_NONE;
+ chan->status = EDMA_ST_IDLE;
+}
+
+static irqreturn_t dw_edma_interrupt(int irq, void *data, bool write)
+{
+ struct dw_edma_irq *dw_irq = data;
+ struct dw_edma *dw = dw_irq->dw;
+ unsigned long total, pos, val;
+ unsigned long off;
+ u32 mask;
+
+ if (write) {
+ total = dw->wr_ch_cnt;
+ off = 0;
+ mask = dw_irq->wr_mask;
+ } else {
+ total = dw->rd_ch_cnt;
+ off = dw->wr_ch_cnt;
+ mask = dw_irq->rd_mask;
+ }
+
+ val = dw_edma_v0_core_status_done_int(dw, write ?
+ EDMA_DIR_WRITE :
+ EDMA_DIR_READ);
+ val &= mask;
+ for_each_set_bit(pos, &val, total) {
+ struct dw_edma_chan *chan = &dw->chan[pos + off];
+
+ dw_edma_done_interrupt(chan);
+ }
+
+ val = dw_edma_v0_core_status_abort_int(dw, write ?
+ EDMA_DIR_WRITE :
+ EDMA_DIR_READ);
+ val &= mask;
+ for_each_set_bit(pos, &val, total) {
+ struct dw_edma_chan *chan = &dw->chan[pos + off];
+
+ dw_edma_abort_interrupt(chan);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static inline irqreturn_t dw_edma_interrupt_write(int irq, void *data)
+{
+ return dw_edma_interrupt(irq, data, true);
+}
+
+static inline irqreturn_t dw_edma_interrupt_read(int irq, void *data)
+{
+ return dw_edma_interrupt(irq, data, false);
+}
+
+static irqreturn_t dw_edma_interrupt_common(int irq, void *data)
+{
+ dw_edma_interrupt(irq, data, true);
+ dw_edma_interrupt(irq, data, false);
+
+ return IRQ_HANDLED;
+}
+
+static int dw_edma_alloc_chan_resources(struct dma_chan *dchan)
+{
+ struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan);
+
+ if (chan->status != EDMA_ST_IDLE)
+ return -EBUSY;
+
+ return 0;
+}
+
+static void dw_edma_free_chan_resources(struct dma_chan *dchan)
+{
+ unsigned long timeout = jiffies + msecs_to_jiffies(5000);
+ int ret;
+
+ while (time_before(jiffies, timeout)) {
+ ret = dw_edma_device_terminate_all(dchan);
+ if (!ret)
+ break;
+
+ if (time_after_eq(jiffies, timeout))
+ return;
+
+ cpu_relax();
+ }
+}
+
+static int dw_edma_channel_setup(struct dw_edma *dw, bool write,
+ u32 wr_alloc, u32 rd_alloc)
+{
+ struct dw_edma_chip *chip = dw->chip;
+ struct dw_edma_region *dt_region;
+ struct device *dev = chip->dev;
+ struct dw_edma_chan *chan;
+ struct dw_edma_irq *irq;
+ struct dma_device *dma;
+ u32 alloc, off_alloc;
+ u32 i, j, cnt;
+ int err = 0;
+ u32 pos;
+
+ if (write) {
+ i = 0;
+ cnt = dw->wr_ch_cnt;
+ dma = &dw->wr_edma;
+ alloc = wr_alloc;
+ off_alloc = 0;
+ } else {
+ i = dw->wr_ch_cnt;
+ cnt = dw->rd_ch_cnt;
+ dma = &dw->rd_edma;
+ alloc = rd_alloc;
+ off_alloc = wr_alloc;
+ }
+
+ INIT_LIST_HEAD(&dma->channels);
+ for (j = 0; (alloc || dw->nr_irqs == 1) && j < cnt; j++, i++) {
+ chan = &dw->chan[i];
+
+ dt_region = devm_kzalloc(dev, sizeof(*dt_region), GFP_KERNEL);
+ if (!dt_region)
+ return -ENOMEM;
+
+ chan->vc.chan.private = dt_region;
+
+ chan->dw = dw;
+ chan->id = j;
+ chan->dir = write ? EDMA_DIR_WRITE : EDMA_DIR_READ;
+ chan->configured = false;
+ chan->request = EDMA_REQ_NONE;
+ chan->status = EDMA_ST_IDLE;
+
+ if (write)
+ chan->ll_max = (chip->ll_region_wr[j].sz / EDMA_LL_SZ);
+ else
+ chan->ll_max = (chip->ll_region_rd[j].sz / EDMA_LL_SZ);
+ chan->ll_max -= 1;
+
+ dev_vdbg(dev, "L. List:\tChannel %s[%u] max_cnt=%u\n",
+ write ? "write" : "read", j, chan->ll_max);
+
+ if (dw->nr_irqs == 1)
+ pos = 0;
+ else
+ pos = off_alloc + (j % alloc);
+
+ irq = &dw->irq[pos];
+
+ if (write)
+ irq->wr_mask |= BIT(j);
+ else
+ irq->rd_mask |= BIT(j);
+
+ irq->dw = dw;
+ memcpy(&chan->msi, &irq->msi, sizeof(chan->msi));
+
+ dev_vdbg(dev, "MSI:\t\tChannel %s[%u] addr=0x%.8x%.8x, data=0x%.8x\n",
+ write ? "write" : "read", j,
+ chan->msi.address_hi, chan->msi.address_lo,
+ chan->msi.data);
+
+ chan->vc.desc_free = vchan_free_desc;
+ vchan_init(&chan->vc, dma);
+
+ if (write) {
+ dt_region->paddr = chip->dt_region_wr[j].paddr;
+ dt_region->vaddr = chip->dt_region_wr[j].vaddr;
+ dt_region->sz = chip->dt_region_wr[j].sz;
+ } else {
+ dt_region->paddr = chip->dt_region_rd[j].paddr;
+ dt_region->vaddr = chip->dt_region_rd[j].vaddr;
+ dt_region->sz = chip->dt_region_rd[j].sz;
+ }
+
+ dw_edma_v0_core_device_config(chan);
+ }
+
+ /* Set DMA channel capabilities */
+ dma_cap_zero(dma->cap_mask);
+ dma_cap_set(DMA_SLAVE, dma->cap_mask);
+ dma_cap_set(DMA_CYCLIC, dma->cap_mask);
+ dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+ dma_cap_set(DMA_INTERLEAVE, dma->cap_mask);
+ dma->directions = BIT(write ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV);
+ dma->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ dma->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ dma->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+ dma->chancnt = cnt;
+
+ /* Set DMA channel callbacks */
+ dma->dev = chip->dev;
+ dma->device_alloc_chan_resources = dw_edma_alloc_chan_resources;
+ dma->device_free_chan_resources = dw_edma_free_chan_resources;
+ dma->device_config = dw_edma_device_config;
+ dma->device_pause = dw_edma_device_pause;
+ dma->device_resume = dw_edma_device_resume;
+ dma->device_terminate_all = dw_edma_device_terminate_all;
+ dma->device_issue_pending = dw_edma_device_issue_pending;
+ dma->device_tx_status = dw_edma_device_tx_status;
+ dma->device_prep_slave_sg = dw_edma_device_prep_slave_sg;
+ dma->device_prep_dma_cyclic = dw_edma_device_prep_dma_cyclic;
+ dma->device_prep_interleaved_dma = dw_edma_device_prep_interleaved_dma;
+
+ dma_set_max_seg_size(dma->dev, U32_MAX);
+
+ /* Register DMA device */
+ err = dma_async_device_register(dma);
+
+ return err;
+}
+
+static inline void dw_edma_dec_irq_alloc(int *nr_irqs, u32 *alloc, u16 cnt)
+{
+ if (*nr_irqs && *alloc < cnt) {
+ (*alloc)++;
+ (*nr_irqs)--;
+ }
+}
+
+static inline void dw_edma_add_irq_mask(u32 *mask, u32 alloc, u16 cnt)
+{
+ while (*mask * alloc < cnt)
+ (*mask)++;
+}
+
+static int dw_edma_irq_request(struct dw_edma *dw,
+ u32 *wr_alloc, u32 *rd_alloc)
+{
+ struct dw_edma_chip *chip = dw->chip;
+ struct device *dev = dw->chip->dev;
+ u32 wr_mask = 1;
+ u32 rd_mask = 1;
+ int i, err = 0;
+ u32 ch_cnt;
+ int irq;
+
+ ch_cnt = dw->wr_ch_cnt + dw->rd_ch_cnt;
+
+ if (chip->nr_irqs < 1 || !chip->ops->irq_vector)
+ return -EINVAL;
+
+ dw->irq = devm_kcalloc(dev, chip->nr_irqs, sizeof(*dw->irq), GFP_KERNEL);
+ if (!dw->irq)
+ return -ENOMEM;
+
+ if (chip->nr_irqs == 1) {
+ /* Common IRQ shared among all channels */
+ irq = chip->ops->irq_vector(dev, 0);
+ err = request_irq(irq, dw_edma_interrupt_common,
+ IRQF_SHARED, dw->name, &dw->irq[0]);
+ if (err) {
+ dw->nr_irqs = 0;
+ return err;
+ }
+
+ if (irq_get_msi_desc(irq))
+ get_cached_msi_msg(irq, &dw->irq[0].msi);
+
+ dw->nr_irqs = 1;
+ } else {
+ /* Distribute IRQs equally among all channels */
+ int tmp = chip->nr_irqs;
+
+ while (tmp && (*wr_alloc + *rd_alloc) < ch_cnt) {
+ dw_edma_dec_irq_alloc(&tmp, wr_alloc, dw->wr_ch_cnt);
+ dw_edma_dec_irq_alloc(&tmp, rd_alloc, dw->rd_ch_cnt);
+ }
+
+ dw_edma_add_irq_mask(&wr_mask, *wr_alloc, dw->wr_ch_cnt);
+ dw_edma_add_irq_mask(&rd_mask, *rd_alloc, dw->rd_ch_cnt);
+
+ for (i = 0; i < (*wr_alloc + *rd_alloc); i++) {
+ irq = chip->ops->irq_vector(dev, i);
+ err = request_irq(irq,
+ i < *wr_alloc ?
+ dw_edma_interrupt_write :
+ dw_edma_interrupt_read,
+ IRQF_SHARED, dw->name,
+ &dw->irq[i]);
+ if (err) {
+ dw->nr_irqs = i;
+ return err;
+ }
+
+ if (irq_get_msi_desc(irq))
+ get_cached_msi_msg(irq, &dw->irq[i].msi);
+ }
+
+ dw->nr_irqs = i;
+ }
+
+ return err;
+}
+
+int dw_edma_probe(struct dw_edma_chip *chip)
+{
+ struct device *dev;
+ struct dw_edma *dw;
+ u32 wr_alloc = 0;
+ u32 rd_alloc = 0;
+ int i, err;
+
+ if (!chip)
+ return -EINVAL;
+
+ dev = chip->dev;
+ if (!dev || !chip->ops)
+ return -EINVAL;
+
+ dw = devm_kzalloc(dev, sizeof(*dw), GFP_KERNEL);
+ if (!dw)
+ return -ENOMEM;
+
+ dw->chip = chip;
+
+ raw_spin_lock_init(&dw->lock);
+
+ dw->wr_ch_cnt = min_t(u16, chip->ll_wr_cnt,
+ dw_edma_v0_core_ch_count(dw, EDMA_DIR_WRITE));
+ dw->wr_ch_cnt = min_t(u16, dw->wr_ch_cnt, EDMA_MAX_WR_CH);
+
+ dw->rd_ch_cnt = min_t(u16, chip->ll_rd_cnt,
+ dw_edma_v0_core_ch_count(dw, EDMA_DIR_READ));
+ dw->rd_ch_cnt = min_t(u16, dw->rd_ch_cnt, EDMA_MAX_RD_CH);
+
+ if (!dw->wr_ch_cnt && !dw->rd_ch_cnt)
+ return -EINVAL;
+
+ dev_vdbg(dev, "Channels:\twrite=%d, read=%d\n",
+ dw->wr_ch_cnt, dw->rd_ch_cnt);
+
+ /* Allocate channels */
+ dw->chan = devm_kcalloc(dev, dw->wr_ch_cnt + dw->rd_ch_cnt,
+ sizeof(*dw->chan), GFP_KERNEL);
+ if (!dw->chan)
+ return -ENOMEM;
+
+ snprintf(dw->name, sizeof(dw->name), "dw-edma-core:%d", chip->id);
+
+ /* Disable eDMA, only to establish the ideal initial conditions */
+ dw_edma_v0_core_off(dw);
+
+ /* Request IRQs */
+ err = dw_edma_irq_request(dw, &wr_alloc, &rd_alloc);
+ if (err)
+ return err;
+
+ /* Setup write channels */
+ err = dw_edma_channel_setup(dw, true, wr_alloc, rd_alloc);
+ if (err)
+ goto err_irq_free;
+
+ /* Setup read channels */
+ err = dw_edma_channel_setup(dw, false, wr_alloc, rd_alloc);
+ if (err)
+ goto err_irq_free;
+
+ /* Turn debugfs on */
+ dw_edma_v0_core_debugfs_on(dw);
+
+ chip->dw = dw;
+
+ return 0;
+
+err_irq_free:
+ for (i = (dw->nr_irqs - 1); i >= 0; i--)
+ free_irq(chip->ops->irq_vector(dev, i), &dw->irq[i]);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(dw_edma_probe);
+
+int dw_edma_remove(struct dw_edma_chip *chip)
+{
+ struct dw_edma_chan *chan, *_chan;
+ struct device *dev = chip->dev;
+ struct dw_edma *dw = chip->dw;
+ int i;
+
+ /* Disable eDMA */
+ dw_edma_v0_core_off(dw);
+
+ /* Free irqs */
+ for (i = (dw->nr_irqs - 1); i >= 0; i--)
+ free_irq(chip->ops->irq_vector(dev, i), &dw->irq[i]);
+
+ /* Deregister eDMA device */
+ dma_async_device_unregister(&dw->wr_edma);
+ list_for_each_entry_safe(chan, _chan, &dw->wr_edma.channels,
+ vc.chan.device_node) {
+ tasklet_kill(&chan->vc.task);
+ list_del(&chan->vc.chan.device_node);
+ }
+
+ dma_async_device_unregister(&dw->rd_edma);
+ list_for_each_entry_safe(chan, _chan, &dw->rd_edma.channels,
+ vc.chan.device_node) {
+ tasklet_kill(&chan->vc.task);
+ list_del(&chan->vc.chan.device_node);
+ }
+
+ /* Turn debugfs off */
+ dw_edma_v0_core_debugfs_off(dw);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dw_edma_remove);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare eDMA controller core driver");
+MODULE_AUTHOR("Gustavo Pimentel <gustavo.pimentel@synopsys.com>");
diff --git a/drivers/dma/dw-edma/dw-edma-core.h b/drivers/dma/dw-edma/dw-edma-core.h
new file mode 100644
index 000000000..85df2d511
--- /dev/null
+++ b/drivers/dma/dw-edma/dw-edma-core.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates.
+ * Synopsys DesignWare eDMA core driver
+ *
+ * Author: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
+ */
+
+#ifndef _DW_EDMA_CORE_H
+#define _DW_EDMA_CORE_H
+
+#include <linux/msi.h>
+#include <linux/dma/edma.h>
+
+#include "../virt-dma.h"
+
+#define EDMA_LL_SZ 24
+
+enum dw_edma_dir {
+ EDMA_DIR_WRITE = 0,
+ EDMA_DIR_READ
+};
+
+enum dw_edma_request {
+ EDMA_REQ_NONE = 0,
+ EDMA_REQ_STOP,
+ EDMA_REQ_PAUSE
+};
+
+enum dw_edma_status {
+ EDMA_ST_IDLE = 0,
+ EDMA_ST_PAUSE,
+ EDMA_ST_BUSY
+};
+
+enum dw_edma_xfer_type {
+ EDMA_XFER_SCATTER_GATHER = 0,
+ EDMA_XFER_CYCLIC,
+ EDMA_XFER_INTERLEAVED
+};
+
+struct dw_edma_chan;
+struct dw_edma_chunk;
+
+struct dw_edma_burst {
+ struct list_head list;
+ u64 sar;
+ u64 dar;
+ u32 sz;
+};
+
+struct dw_edma_chunk {
+ struct list_head list;
+ struct dw_edma_chan *chan;
+ struct dw_edma_burst *burst;
+
+ u32 bursts_alloc;
+
+ u8 cb;
+ struct dw_edma_region ll_region; /* Linked list */
+};
+
+struct dw_edma_desc {
+ struct virt_dma_desc vd;
+ struct dw_edma_chan *chan;
+ struct dw_edma_chunk *chunk;
+
+ u32 chunks_alloc;
+
+ u32 alloc_sz;
+ u32 xfer_sz;
+};
+
+struct dw_edma_chan {
+ struct virt_dma_chan vc;
+ struct dw_edma *dw;
+ int id;
+ enum dw_edma_dir dir;
+
+ u32 ll_max;
+
+ struct msi_msg msi;
+
+ enum dw_edma_request request;
+ enum dw_edma_status status;
+ u8 configured;
+
+ struct dma_slave_config config;
+};
+
+struct dw_edma_irq {
+ struct msi_msg msi;
+ u32 wr_mask;
+ u32 rd_mask;
+ struct dw_edma *dw;
+};
+
+struct dw_edma {
+ char name[20];
+
+ struct dma_device wr_edma;
+ u16 wr_ch_cnt;
+
+ struct dma_device rd_edma;
+ u16 rd_ch_cnt;
+
+ struct dw_edma_irq *irq;
+ int nr_irqs;
+
+ struct dw_edma_chan *chan;
+
+ raw_spinlock_t lock; /* Only for legacy */
+
+ struct dw_edma_chip *chip;
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *debugfs;
+#endif /* CONFIG_DEBUG_FS */
+};
+
+struct dw_edma_sg {
+ struct scatterlist *sgl;
+ unsigned int len;
+};
+
+struct dw_edma_cyclic {
+ dma_addr_t paddr;
+ size_t len;
+ size_t cnt;
+};
+
+struct dw_edma_transfer {
+ struct dma_chan *dchan;
+ union dw_edma_xfer {
+ struct dw_edma_sg sg;
+ struct dw_edma_cyclic cyclic;
+ struct dma_interleaved_template *il;
+ } xfer;
+ enum dma_transfer_direction direction;
+ unsigned long flags;
+ enum dw_edma_xfer_type type;
+};
+
+static inline
+struct dw_edma_chan *vc2dw_edma_chan(struct virt_dma_chan *vc)
+{
+ return container_of(vc, struct dw_edma_chan, vc);
+}
+
+static inline
+struct dw_edma_chan *dchan2dw_edma_chan(struct dma_chan *dchan)
+{
+ return vc2dw_edma_chan(to_virt_chan(dchan));
+}
+
+#endif /* _DW_EDMA_CORE_H */
diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c
new file mode 100644
index 000000000..d6b5e2463
--- /dev/null
+++ b/drivers/dma/dw-edma/dw-edma-pcie.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates.
+ * Synopsys DesignWare eDMA PCIe driver
+ *
+ * Author: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/dma/edma.h>
+#include <linux/pci-epf.h>
+#include <linux/msi.h>
+#include <linux/bitfield.h>
+
+#include "dw-edma-core.h"
+
+#define DW_PCIE_VSEC_DMA_ID 0x6
+#define DW_PCIE_VSEC_DMA_BAR GENMASK(10, 8)
+#define DW_PCIE_VSEC_DMA_MAP GENMASK(2, 0)
+#define DW_PCIE_VSEC_DMA_WR_CH GENMASK(9, 0)
+#define DW_PCIE_VSEC_DMA_RD_CH GENMASK(25, 16)
+
+#define DW_BLOCK(a, b, c) \
+ { \
+ .bar = a, \
+ .off = b, \
+ .sz = c, \
+ },
+
+struct dw_edma_block {
+ enum pci_barno bar;
+ off_t off;
+ size_t sz;
+};
+
+struct dw_edma_pcie_data {
+ /* eDMA registers location */
+ struct dw_edma_block rg;
+ /* eDMA memory linked list location */
+ struct dw_edma_block ll_wr[EDMA_MAX_WR_CH];
+ struct dw_edma_block ll_rd[EDMA_MAX_RD_CH];
+ /* eDMA memory data location */
+ struct dw_edma_block dt_wr[EDMA_MAX_WR_CH];
+ struct dw_edma_block dt_rd[EDMA_MAX_RD_CH];
+ /* Other */
+ enum dw_edma_map_format mf;
+ u8 irqs;
+ u16 wr_ch_cnt;
+ u16 rd_ch_cnt;
+};
+
+static const struct dw_edma_pcie_data snps_edda_data = {
+ /* eDMA registers location */
+ .rg.bar = BAR_0,
+ .rg.off = 0x00001000, /* 4 Kbytes */
+ .rg.sz = 0x00002000, /* 8 Kbytes */
+ /* eDMA memory linked list location */
+ .ll_wr = {
+ /* Channel 0 - BAR 2, offset 0 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00000000, 0x00000800)
+ /* Channel 1 - BAR 2, offset 2 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00200000, 0x00000800)
+ },
+ .ll_rd = {
+ /* Channel 0 - BAR 2, offset 4 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00400000, 0x00000800)
+ /* Channel 1 - BAR 2, offset 6 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00600000, 0x00000800)
+ },
+ /* eDMA memory data location */
+ .dt_wr = {
+ /* Channel 0 - BAR 2, offset 8 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00800000, 0x00000800)
+ /* Channel 1 - BAR 2, offset 9 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00900000, 0x00000800)
+ },
+ .dt_rd = {
+ /* Channel 0 - BAR 2, offset 10 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00a00000, 0x00000800)
+ /* Channel 1 - BAR 2, offset 11 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00b00000, 0x00000800)
+ },
+ /* Other */
+ .mf = EDMA_MF_EDMA_UNROLL,
+ .irqs = 1,
+ .wr_ch_cnt = 2,
+ .rd_ch_cnt = 2,
+};
+
+static int dw_edma_pcie_irq_vector(struct device *dev, unsigned int nr)
+{
+ return pci_irq_vector(to_pci_dev(dev), nr);
+}
+
+static const struct dw_edma_core_ops dw_edma_pcie_core_ops = {
+ .irq_vector = dw_edma_pcie_irq_vector,
+};
+
+static void dw_edma_pcie_get_vsec_dma_data(struct pci_dev *pdev,
+ struct dw_edma_pcie_data *pdata)
+{
+ u32 val, map;
+ u16 vsec;
+ u64 off;
+
+ vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_SYNOPSYS,
+ DW_PCIE_VSEC_DMA_ID);
+ if (!vsec)
+ return;
+
+ pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val);
+ if (PCI_VNDR_HEADER_REV(val) != 0x00 ||
+ PCI_VNDR_HEADER_LEN(val) != 0x18)
+ return;
+
+ pci_dbg(pdev, "Detected PCIe Vendor-Specific Extended Capability DMA\n");
+ pci_read_config_dword(pdev, vsec + 0x8, &val);
+ map = FIELD_GET(DW_PCIE_VSEC_DMA_MAP, val);
+ if (map != EDMA_MF_EDMA_LEGACY &&
+ map != EDMA_MF_EDMA_UNROLL &&
+ map != EDMA_MF_HDMA_COMPAT)
+ return;
+
+ pdata->mf = map;
+ pdata->rg.bar = FIELD_GET(DW_PCIE_VSEC_DMA_BAR, val);
+
+ pci_read_config_dword(pdev, vsec + 0xc, &val);
+ pdata->wr_ch_cnt = min_t(u16, pdata->wr_ch_cnt,
+ FIELD_GET(DW_PCIE_VSEC_DMA_WR_CH, val));
+ pdata->rd_ch_cnt = min_t(u16, pdata->rd_ch_cnt,
+ FIELD_GET(DW_PCIE_VSEC_DMA_RD_CH, val));
+
+ pci_read_config_dword(pdev, vsec + 0x14, &val);
+ off = val;
+ pci_read_config_dword(pdev, vsec + 0x10, &val);
+ off <<= 32;
+ off |= val;
+ pdata->rg.off = off;
+}
+
+static int dw_edma_pcie_probe(struct pci_dev *pdev,
+ const struct pci_device_id *pid)
+{
+ struct dw_edma_pcie_data *pdata = (void *)pid->driver_data;
+ struct dw_edma_pcie_data vsec_data;
+ struct device *dev = &pdev->dev;
+ struct dw_edma_chip *chip;
+ int err, nr_irqs;
+ int i, mask;
+
+ /* Enable PCI device */
+ err = pcim_enable_device(pdev);
+ if (err) {
+ pci_err(pdev, "enabling device failed\n");
+ return err;
+ }
+
+ memcpy(&vsec_data, pdata, sizeof(struct dw_edma_pcie_data));
+
+ /*
+ * Tries to find if exists a PCIe Vendor-Specific Extended Capability
+ * for the DMA, if one exists, then reconfigures it.
+ */
+ dw_edma_pcie_get_vsec_dma_data(pdev, &vsec_data);
+
+ /* Mapping PCI BAR regions */
+ mask = BIT(vsec_data.rg.bar);
+ for (i = 0; i < vsec_data.wr_ch_cnt; i++) {
+ mask |= BIT(vsec_data.ll_wr[i].bar);
+ mask |= BIT(vsec_data.dt_wr[i].bar);
+ }
+ for (i = 0; i < vsec_data.rd_ch_cnt; i++) {
+ mask |= BIT(vsec_data.ll_rd[i].bar);
+ mask |= BIT(vsec_data.dt_rd[i].bar);
+ }
+ err = pcim_iomap_regions(pdev, mask, pci_name(pdev));
+ if (err) {
+ pci_err(pdev, "eDMA BAR I/O remapping failed\n");
+ return err;
+ }
+
+ pci_set_master(pdev);
+
+ /* DMA configuration */
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (err) {
+ pci_err(pdev, "DMA mask 64 set failed\n");
+ return err;
+ }
+
+ /* Data structure allocation */
+ chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ /* IRQs allocation */
+ nr_irqs = pci_alloc_irq_vectors(pdev, 1, vsec_data.irqs,
+ PCI_IRQ_MSI | PCI_IRQ_MSIX);
+ if (nr_irqs < 1) {
+ pci_err(pdev, "fail to alloc IRQ vector (number of IRQs=%u)\n",
+ nr_irqs);
+ return -EPERM;
+ }
+
+ /* Data structure initialization */
+ chip->dev = dev;
+ chip->id = pdev->devfn;
+
+ chip->mf = vsec_data.mf;
+ chip->nr_irqs = nr_irqs;
+ chip->ops = &dw_edma_pcie_core_ops;
+
+ chip->ll_wr_cnt = vsec_data.wr_ch_cnt;
+ chip->ll_rd_cnt = vsec_data.rd_ch_cnt;
+
+ chip->reg_base = pcim_iomap_table(pdev)[vsec_data.rg.bar];
+ if (!chip->reg_base)
+ return -ENOMEM;
+
+ for (i = 0; i < chip->ll_wr_cnt; i++) {
+ struct dw_edma_region *ll_region = &chip->ll_region_wr[i];
+ struct dw_edma_region *dt_region = &chip->dt_region_wr[i];
+ struct dw_edma_block *ll_block = &vsec_data.ll_wr[i];
+ struct dw_edma_block *dt_block = &vsec_data.dt_wr[i];
+
+ ll_region->vaddr = pcim_iomap_table(pdev)[ll_block->bar];
+ if (!ll_region->vaddr)
+ return -ENOMEM;
+
+ ll_region->vaddr += ll_block->off;
+ ll_region->paddr = pdev->resource[ll_block->bar].start;
+ ll_region->paddr += ll_block->off;
+ ll_region->sz = ll_block->sz;
+
+ dt_region->vaddr = pcim_iomap_table(pdev)[dt_block->bar];
+ if (!dt_region->vaddr)
+ return -ENOMEM;
+
+ dt_region->vaddr += dt_block->off;
+ dt_region->paddr = pdev->resource[dt_block->bar].start;
+ dt_region->paddr += dt_block->off;
+ dt_region->sz = dt_block->sz;
+ }
+
+ for (i = 0; i < chip->ll_rd_cnt; i++) {
+ struct dw_edma_region *ll_region = &chip->ll_region_rd[i];
+ struct dw_edma_region *dt_region = &chip->dt_region_rd[i];
+ struct dw_edma_block *ll_block = &vsec_data.ll_rd[i];
+ struct dw_edma_block *dt_block = &vsec_data.dt_rd[i];
+
+ ll_region->vaddr = pcim_iomap_table(pdev)[ll_block->bar];
+ if (!ll_region->vaddr)
+ return -ENOMEM;
+
+ ll_region->vaddr += ll_block->off;
+ ll_region->paddr = pdev->resource[ll_block->bar].start;
+ ll_region->paddr += ll_block->off;
+ ll_region->sz = ll_block->sz;
+
+ dt_region->vaddr = pcim_iomap_table(pdev)[dt_block->bar];
+ if (!dt_region->vaddr)
+ return -ENOMEM;
+
+ dt_region->vaddr += dt_block->off;
+ dt_region->paddr = pdev->resource[dt_block->bar].start;
+ dt_region->paddr += dt_block->off;
+ dt_region->sz = dt_block->sz;
+ }
+
+ /* Debug info */
+ if (chip->mf == EDMA_MF_EDMA_LEGACY)
+ pci_dbg(pdev, "Version:\teDMA Port Logic (0x%x)\n", chip->mf);
+ else if (chip->mf == EDMA_MF_EDMA_UNROLL)
+ pci_dbg(pdev, "Version:\teDMA Unroll (0x%x)\n", chip->mf);
+ else if (chip->mf == EDMA_MF_HDMA_COMPAT)
+ pci_dbg(pdev, "Version:\tHDMA Compatible (0x%x)\n", chip->mf);
+ else
+ pci_dbg(pdev, "Version:\tUnknown (0x%x)\n", chip->mf);
+
+ pci_dbg(pdev, "Registers:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p)\n",
+ vsec_data.rg.bar, vsec_data.rg.off, vsec_data.rg.sz,
+ chip->reg_base);
+
+
+ for (i = 0; i < chip->ll_wr_cnt; i++) {
+ pci_dbg(pdev, "L. List:\tWRITE CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
+ i, vsec_data.ll_wr[i].bar,
+ vsec_data.ll_wr[i].off, chip->ll_region_wr[i].sz,
+ chip->ll_region_wr[i].vaddr, &chip->ll_region_wr[i].paddr);
+
+ pci_dbg(pdev, "Data:\tWRITE CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
+ i, vsec_data.dt_wr[i].bar,
+ vsec_data.dt_wr[i].off, chip->dt_region_wr[i].sz,
+ chip->dt_region_wr[i].vaddr, &chip->dt_region_wr[i].paddr);
+ }
+
+ for (i = 0; i < chip->ll_rd_cnt; i++) {
+ pci_dbg(pdev, "L. List:\tREAD CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
+ i, vsec_data.ll_rd[i].bar,
+ vsec_data.ll_rd[i].off, chip->ll_region_rd[i].sz,
+ chip->ll_region_rd[i].vaddr, &chip->ll_region_rd[i].paddr);
+
+ pci_dbg(pdev, "Data:\tREAD CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
+ i, vsec_data.dt_rd[i].bar,
+ vsec_data.dt_rd[i].off, chip->dt_region_rd[i].sz,
+ chip->dt_region_rd[i].vaddr, &chip->dt_region_rd[i].paddr);
+ }
+
+ pci_dbg(pdev, "Nr. IRQs:\t%u\n", chip->nr_irqs);
+
+ /* Validating if PCI interrupts were enabled */
+ if (!pci_dev_msi_enabled(pdev)) {
+ pci_err(pdev, "enable interrupt failed\n");
+ return -EPERM;
+ }
+
+ /* Starting eDMA driver */
+ err = dw_edma_probe(chip);
+ if (err) {
+ pci_err(pdev, "eDMA probe failed\n");
+ return err;
+ }
+
+ /* Saving data structure reference */
+ pci_set_drvdata(pdev, chip);
+
+ return 0;
+}
+
+static void dw_edma_pcie_remove(struct pci_dev *pdev)
+{
+ struct dw_edma_chip *chip = pci_get_drvdata(pdev);
+ int err;
+
+ /* Stopping eDMA driver */
+ err = dw_edma_remove(chip);
+ if (err)
+ pci_warn(pdev, "can't remove device properly: %d\n", err);
+
+ /* Freeing IRQs */
+ pci_free_irq_vectors(pdev);
+}
+
+static const struct pci_device_id dw_edma_pcie_id_table[] = {
+ { PCI_DEVICE_DATA(SYNOPSYS, EDDA, &snps_edda_data) },
+ { }
+};
+MODULE_DEVICE_TABLE(pci, dw_edma_pcie_id_table);
+
+static struct pci_driver dw_edma_pcie_driver = {
+ .name = "dw-edma-pcie",
+ .id_table = dw_edma_pcie_id_table,
+ .probe = dw_edma_pcie_probe,
+ .remove = dw_edma_pcie_remove,
+};
+
+module_pci_driver(dw_edma_pcie_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare eDMA PCIe driver");
+MODULE_AUTHOR("Gustavo Pimentel <gustavo.pimentel@synopsys.com>");
diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.c b/drivers/dma/dw-edma/dw-edma-v0-core.c
new file mode 100644
index 000000000..a3816ba63
--- /dev/null
+++ b/drivers/dma/dw-edma/dw-edma-v0-core.c
@@ -0,0 +1,511 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates.
+ * Synopsys DesignWare eDMA v0 core
+ *
+ * Author: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
+ */
+
+#include <linux/bitfield.h>
+
+#include "dw-edma-core.h"
+#include "dw-edma-v0-core.h"
+#include "dw-edma-v0-regs.h"
+#include "dw-edma-v0-debugfs.h"
+
+enum dw_edma_control {
+ DW_EDMA_V0_CB = BIT(0),
+ DW_EDMA_V0_TCB = BIT(1),
+ DW_EDMA_V0_LLP = BIT(2),
+ DW_EDMA_V0_LIE = BIT(3),
+ DW_EDMA_V0_RIE = BIT(4),
+ DW_EDMA_V0_CCS = BIT(8),
+ DW_EDMA_V0_LLE = BIT(9),
+};
+
+static inline struct dw_edma_v0_regs __iomem *__dw_regs(struct dw_edma *dw)
+{
+ return dw->chip->reg_base;
+}
+
+#define SET_32(dw, name, value) \
+ writel(value, &(__dw_regs(dw)->name))
+
+#define GET_32(dw, name) \
+ readl(&(__dw_regs(dw)->name))
+
+#define SET_RW_32(dw, dir, name, value) \
+ do { \
+ if ((dir) == EDMA_DIR_WRITE) \
+ SET_32(dw, wr_##name, value); \
+ else \
+ SET_32(dw, rd_##name, value); \
+ } while (0)
+
+#define GET_RW_32(dw, dir, name) \
+ ((dir) == EDMA_DIR_WRITE \
+ ? GET_32(dw, wr_##name) \
+ : GET_32(dw, rd_##name))
+
+#define SET_BOTH_32(dw, name, value) \
+ do { \
+ SET_32(dw, wr_##name, value); \
+ SET_32(dw, rd_##name, value); \
+ } while (0)
+
+#ifdef CONFIG_64BIT
+
+#define SET_64(dw, name, value) \
+ writeq(value, &(__dw_regs(dw)->name))
+
+#define GET_64(dw, name) \
+ readq(&(__dw_regs(dw)->name))
+
+#define SET_RW_64(dw, dir, name, value) \
+ do { \
+ if ((dir) == EDMA_DIR_WRITE) \
+ SET_64(dw, wr_##name, value); \
+ else \
+ SET_64(dw, rd_##name, value); \
+ } while (0)
+
+#define GET_RW_64(dw, dir, name) \
+ ((dir) == EDMA_DIR_WRITE \
+ ? GET_64(dw, wr_##name) \
+ : GET_64(dw, rd_##name))
+
+#define SET_BOTH_64(dw, name, value) \
+ do { \
+ SET_64(dw, wr_##name, value); \
+ SET_64(dw, rd_##name, value); \
+ } while (0)
+
+#endif /* CONFIG_64BIT */
+
+#define SET_COMPAT(dw, name, value) \
+ writel(value, &(__dw_regs(dw)->type.unroll.name))
+
+#define SET_RW_COMPAT(dw, dir, name, value) \
+ do { \
+ if ((dir) == EDMA_DIR_WRITE) \
+ SET_COMPAT(dw, wr_##name, value); \
+ else \
+ SET_COMPAT(dw, rd_##name, value); \
+ } while (0)
+
+static inline struct dw_edma_v0_ch_regs __iomem *
+__dw_ch_regs(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch)
+{
+ if (dw->chip->mf == EDMA_MF_EDMA_LEGACY)
+ return &(__dw_regs(dw)->type.legacy.ch);
+
+ if (dir == EDMA_DIR_WRITE)
+ return &__dw_regs(dw)->type.unroll.ch[ch].wr;
+
+ return &__dw_regs(dw)->type.unroll.ch[ch].rd;
+}
+
+static inline void writel_ch(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch,
+ u32 value, void __iomem *addr)
+{
+ if (dw->chip->mf == EDMA_MF_EDMA_LEGACY) {
+ u32 viewport_sel;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&dw->lock, flags);
+
+ viewport_sel = FIELD_PREP(EDMA_V0_VIEWPORT_MASK, ch);
+ if (dir == EDMA_DIR_READ)
+ viewport_sel |= BIT(31);
+
+ writel(viewport_sel,
+ &(__dw_regs(dw)->type.legacy.viewport_sel));
+ writel(value, addr);
+
+ raw_spin_unlock_irqrestore(&dw->lock, flags);
+ } else {
+ writel(value, addr);
+ }
+}
+
+static inline u32 readl_ch(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch,
+ const void __iomem *addr)
+{
+ u32 value;
+
+ if (dw->chip->mf == EDMA_MF_EDMA_LEGACY) {
+ u32 viewport_sel;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&dw->lock, flags);
+
+ viewport_sel = FIELD_PREP(EDMA_V0_VIEWPORT_MASK, ch);
+ if (dir == EDMA_DIR_READ)
+ viewport_sel |= BIT(31);
+
+ writel(viewport_sel,
+ &(__dw_regs(dw)->type.legacy.viewport_sel));
+ value = readl(addr);
+
+ raw_spin_unlock_irqrestore(&dw->lock, flags);
+ } else {
+ value = readl(addr);
+ }
+
+ return value;
+}
+
+#define SET_CH_32(dw, dir, ch, name, value) \
+ writel_ch(dw, dir, ch, value, &(__dw_ch_regs(dw, dir, ch)->name))
+
+#define GET_CH_32(dw, dir, ch, name) \
+ readl_ch(dw, dir, ch, &(__dw_ch_regs(dw, dir, ch)->name))
+
+#define SET_LL_32(ll, value) \
+ writel(value, ll)
+
+#ifdef CONFIG_64BIT
+
+static inline void writeq_ch(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch,
+ u64 value, void __iomem *addr)
+{
+ if (dw->chip->mf == EDMA_MF_EDMA_LEGACY) {
+ u32 viewport_sel;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&dw->lock, flags);
+
+ viewport_sel = FIELD_PREP(EDMA_V0_VIEWPORT_MASK, ch);
+ if (dir == EDMA_DIR_READ)
+ viewport_sel |= BIT(31);
+
+ writel(viewport_sel,
+ &(__dw_regs(dw)->type.legacy.viewport_sel));
+ writeq(value, addr);
+
+ raw_spin_unlock_irqrestore(&dw->lock, flags);
+ } else {
+ writeq(value, addr);
+ }
+}
+
+static inline u64 readq_ch(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch,
+ const void __iomem *addr)
+{
+ u64 value;
+
+ if (dw->chip->mf == EDMA_MF_EDMA_LEGACY) {
+ u32 viewport_sel;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&dw->lock, flags);
+
+ viewport_sel = FIELD_PREP(EDMA_V0_VIEWPORT_MASK, ch);
+ if (dir == EDMA_DIR_READ)
+ viewport_sel |= BIT(31);
+
+ writel(viewport_sel,
+ &(__dw_regs(dw)->type.legacy.viewport_sel));
+ value = readq(addr);
+
+ raw_spin_unlock_irqrestore(&dw->lock, flags);
+ } else {
+ value = readq(addr);
+ }
+
+ return value;
+}
+
+#define SET_CH_64(dw, dir, ch, name, value) \
+ writeq_ch(dw, dir, ch, value, &(__dw_ch_regs(dw, dir, ch)->name))
+
+#define GET_CH_64(dw, dir, ch, name) \
+ readq_ch(dw, dir, ch, &(__dw_ch_regs(dw, dir, ch)->name))
+
+#define SET_LL_64(ll, value) \
+ writeq(value, ll)
+
+#endif /* CONFIG_64BIT */
+
+/* eDMA management callbacks */
+void dw_edma_v0_core_off(struct dw_edma *dw)
+{
+ SET_BOTH_32(dw, int_mask,
+ EDMA_V0_DONE_INT_MASK | EDMA_V0_ABORT_INT_MASK);
+ SET_BOTH_32(dw, int_clear,
+ EDMA_V0_DONE_INT_MASK | EDMA_V0_ABORT_INT_MASK);
+ SET_BOTH_32(dw, engine_en, 0);
+}
+
+u16 dw_edma_v0_core_ch_count(struct dw_edma *dw, enum dw_edma_dir dir)
+{
+ u32 num_ch;
+
+ if (dir == EDMA_DIR_WRITE)
+ num_ch = FIELD_GET(EDMA_V0_WRITE_CH_COUNT_MASK,
+ GET_32(dw, ctrl));
+ else
+ num_ch = FIELD_GET(EDMA_V0_READ_CH_COUNT_MASK,
+ GET_32(dw, ctrl));
+
+ if (num_ch > EDMA_V0_MAX_NR_CH)
+ num_ch = EDMA_V0_MAX_NR_CH;
+
+ return (u16)num_ch;
+}
+
+enum dma_status dw_edma_v0_core_ch_status(struct dw_edma_chan *chan)
+{
+ struct dw_edma *dw = chan->dw;
+ u32 tmp;
+
+ tmp = FIELD_GET(EDMA_V0_CH_STATUS_MASK,
+ GET_CH_32(dw, chan->dir, chan->id, ch_control1));
+
+ if (tmp == 1)
+ return DMA_IN_PROGRESS;
+ else if (tmp == 3)
+ return DMA_COMPLETE;
+ else
+ return DMA_ERROR;
+}
+
+void dw_edma_v0_core_clear_done_int(struct dw_edma_chan *chan)
+{
+ struct dw_edma *dw = chan->dw;
+
+ SET_RW_32(dw, chan->dir, int_clear,
+ FIELD_PREP(EDMA_V0_DONE_INT_MASK, BIT(chan->id)));
+}
+
+void dw_edma_v0_core_clear_abort_int(struct dw_edma_chan *chan)
+{
+ struct dw_edma *dw = chan->dw;
+
+ SET_RW_32(dw, chan->dir, int_clear,
+ FIELD_PREP(EDMA_V0_ABORT_INT_MASK, BIT(chan->id)));
+}
+
+u32 dw_edma_v0_core_status_done_int(struct dw_edma *dw, enum dw_edma_dir dir)
+{
+ return FIELD_GET(EDMA_V0_DONE_INT_MASK,
+ GET_RW_32(dw, dir, int_status));
+}
+
+u32 dw_edma_v0_core_status_abort_int(struct dw_edma *dw, enum dw_edma_dir dir)
+{
+ return FIELD_GET(EDMA_V0_ABORT_INT_MASK,
+ GET_RW_32(dw, dir, int_status));
+}
+
+static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk)
+{
+ struct dw_edma_burst *child;
+ struct dw_edma_chan *chan = chunk->chan;
+ struct dw_edma_v0_lli __iomem *lli;
+ struct dw_edma_v0_llp __iomem *llp;
+ u32 control = 0, i = 0;
+ int j;
+
+ lli = chunk->ll_region.vaddr;
+
+ if (chunk->cb)
+ control = DW_EDMA_V0_CB;
+
+ j = chunk->bursts_alloc;
+ list_for_each_entry(child, &chunk->burst->list, list) {
+ j--;
+ if (!j) {
+ control |= DW_EDMA_V0_LIE;
+ if (!(chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL))
+ control |= DW_EDMA_V0_RIE;
+ }
+ /* Channel control */
+ SET_LL_32(&lli[i].control, control);
+ /* Transfer size */
+ SET_LL_32(&lli[i].transfer_size, child->sz);
+ /* SAR */
+ #ifdef CONFIG_64BIT
+ SET_LL_64(&lli[i].sar.reg, child->sar);
+ #else /* CONFIG_64BIT */
+ SET_LL_32(&lli[i].sar.lsb, lower_32_bits(child->sar));
+ SET_LL_32(&lli[i].sar.msb, upper_32_bits(child->sar));
+ #endif /* CONFIG_64BIT */
+ /* DAR */
+ #ifdef CONFIG_64BIT
+ SET_LL_64(&lli[i].dar.reg, child->dar);
+ #else /* CONFIG_64BIT */
+ SET_LL_32(&lli[i].dar.lsb, lower_32_bits(child->dar));
+ SET_LL_32(&lli[i].dar.msb, upper_32_bits(child->dar));
+ #endif /* CONFIG_64BIT */
+ i++;
+ }
+
+ llp = (void __iomem *)&lli[i];
+ control = DW_EDMA_V0_LLP | DW_EDMA_V0_TCB;
+ if (!chunk->cb)
+ control |= DW_EDMA_V0_CB;
+
+ /* Channel control */
+ SET_LL_32(&llp->control, control);
+ /* Linked list */
+ #ifdef CONFIG_64BIT
+ SET_LL_64(&llp->llp.reg, chunk->ll_region.paddr);
+ #else /* CONFIG_64BIT */
+ SET_LL_32(&llp->llp.lsb, lower_32_bits(chunk->ll_region.paddr));
+ SET_LL_32(&llp->llp.msb, upper_32_bits(chunk->ll_region.paddr));
+ #endif /* CONFIG_64BIT */
+}
+
+void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
+{
+ struct dw_edma_chan *chan = chunk->chan;
+ struct dw_edma *dw = chan->dw;
+ u32 tmp;
+
+ dw_edma_v0_core_write_chunk(chunk);
+
+ if (first) {
+ /* Enable engine */
+ SET_RW_32(dw, chan->dir, engine_en, BIT(0));
+ if (dw->chip->mf == EDMA_MF_HDMA_COMPAT) {
+ switch (chan->id) {
+ case 0:
+ SET_RW_COMPAT(dw, chan->dir, ch0_pwr_en,
+ BIT(0));
+ break;
+ case 1:
+ SET_RW_COMPAT(dw, chan->dir, ch1_pwr_en,
+ BIT(0));
+ break;
+ case 2:
+ SET_RW_COMPAT(dw, chan->dir, ch2_pwr_en,
+ BIT(0));
+ break;
+ case 3:
+ SET_RW_COMPAT(dw, chan->dir, ch3_pwr_en,
+ BIT(0));
+ break;
+ case 4:
+ SET_RW_COMPAT(dw, chan->dir, ch4_pwr_en,
+ BIT(0));
+ break;
+ case 5:
+ SET_RW_COMPAT(dw, chan->dir, ch5_pwr_en,
+ BIT(0));
+ break;
+ case 6:
+ SET_RW_COMPAT(dw, chan->dir, ch6_pwr_en,
+ BIT(0));
+ break;
+ case 7:
+ SET_RW_COMPAT(dw, chan->dir, ch7_pwr_en,
+ BIT(0));
+ break;
+ }
+ }
+ /* Interrupt unmask - done, abort */
+ tmp = GET_RW_32(dw, chan->dir, int_mask);
+ tmp &= ~FIELD_PREP(EDMA_V0_DONE_INT_MASK, BIT(chan->id));
+ tmp &= ~FIELD_PREP(EDMA_V0_ABORT_INT_MASK, BIT(chan->id));
+ SET_RW_32(dw, chan->dir, int_mask, tmp);
+ /* Linked list error */
+ tmp = GET_RW_32(dw, chan->dir, linked_list_err_en);
+ tmp |= FIELD_PREP(EDMA_V0_LINKED_LIST_ERR_MASK, BIT(chan->id));
+ SET_RW_32(dw, chan->dir, linked_list_err_en, tmp);
+ /* Channel control */
+ SET_CH_32(dw, chan->dir, chan->id, ch_control1,
+ (DW_EDMA_V0_CCS | DW_EDMA_V0_LLE));
+ /* Linked list */
+ /* llp is not aligned on 64bit -> keep 32bit accesses */
+ SET_CH_32(dw, chan->dir, chan->id, llp.lsb,
+ lower_32_bits(chunk->ll_region.paddr));
+ SET_CH_32(dw, chan->dir, chan->id, llp.msb,
+ upper_32_bits(chunk->ll_region.paddr));
+ }
+ /* Doorbell */
+ SET_RW_32(dw, chan->dir, doorbell,
+ FIELD_PREP(EDMA_V0_DOORBELL_CH_MASK, chan->id));
+}
+
+int dw_edma_v0_core_device_config(struct dw_edma_chan *chan)
+{
+ struct dw_edma *dw = chan->dw;
+ u32 tmp = 0;
+
+ /* MSI done addr - low, high */
+ SET_RW_32(dw, chan->dir, done_imwr.lsb, chan->msi.address_lo);
+ SET_RW_32(dw, chan->dir, done_imwr.msb, chan->msi.address_hi);
+ /* MSI abort addr - low, high */
+ SET_RW_32(dw, chan->dir, abort_imwr.lsb, chan->msi.address_lo);
+ SET_RW_32(dw, chan->dir, abort_imwr.msb, chan->msi.address_hi);
+ /* MSI data - low, high */
+ switch (chan->id) {
+ case 0:
+ case 1:
+ tmp = GET_RW_32(dw, chan->dir, ch01_imwr_data);
+ break;
+
+ case 2:
+ case 3:
+ tmp = GET_RW_32(dw, chan->dir, ch23_imwr_data);
+ break;
+
+ case 4:
+ case 5:
+ tmp = GET_RW_32(dw, chan->dir, ch45_imwr_data);
+ break;
+
+ case 6:
+ case 7:
+ tmp = GET_RW_32(dw, chan->dir, ch67_imwr_data);
+ break;
+ }
+
+ if (chan->id & BIT(0)) {
+ /* Channel odd {1, 3, 5, 7} */
+ tmp &= EDMA_V0_CH_EVEN_MSI_DATA_MASK;
+ tmp |= FIELD_PREP(EDMA_V0_CH_ODD_MSI_DATA_MASK,
+ chan->msi.data);
+ } else {
+ /* Channel even {0, 2, 4, 6} */
+ tmp &= EDMA_V0_CH_ODD_MSI_DATA_MASK;
+ tmp |= FIELD_PREP(EDMA_V0_CH_EVEN_MSI_DATA_MASK,
+ chan->msi.data);
+ }
+
+ switch (chan->id) {
+ case 0:
+ case 1:
+ SET_RW_32(dw, chan->dir, ch01_imwr_data, tmp);
+ break;
+
+ case 2:
+ case 3:
+ SET_RW_32(dw, chan->dir, ch23_imwr_data, tmp);
+ break;
+
+ case 4:
+ case 5:
+ SET_RW_32(dw, chan->dir, ch45_imwr_data, tmp);
+ break;
+
+ case 6:
+ case 7:
+ SET_RW_32(dw, chan->dir, ch67_imwr_data, tmp);
+ break;
+ }
+
+ return 0;
+}
+
+/* eDMA debugfs callbacks */
+void dw_edma_v0_core_debugfs_on(struct dw_edma *dw)
+{
+ dw_edma_v0_debugfs_on(dw);
+}
+
+void dw_edma_v0_core_debugfs_off(struct dw_edma *dw)
+{
+ dw_edma_v0_debugfs_off(dw);
+}
diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.h b/drivers/dma/dw-edma/dw-edma-v0-core.h
new file mode 100644
index 000000000..75aec6d31
--- /dev/null
+++ b/drivers/dma/dw-edma/dw-edma-v0-core.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates.
+ * Synopsys DesignWare eDMA v0 core
+ *
+ * Author: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
+ */
+
+#ifndef _DW_EDMA_V0_CORE_H
+#define _DW_EDMA_V0_CORE_H
+
+#include <linux/dma/edma.h>
+
+/* eDMA management callbacks */
+void dw_edma_v0_core_off(struct dw_edma *chan);
+u16 dw_edma_v0_core_ch_count(struct dw_edma *chan, enum dw_edma_dir dir);
+enum dma_status dw_edma_v0_core_ch_status(struct dw_edma_chan *chan);
+void dw_edma_v0_core_clear_done_int(struct dw_edma_chan *chan);
+void dw_edma_v0_core_clear_abort_int(struct dw_edma_chan *chan);
+u32 dw_edma_v0_core_status_done_int(struct dw_edma *chan, enum dw_edma_dir dir);
+u32 dw_edma_v0_core_status_abort_int(struct dw_edma *chan, enum dw_edma_dir dir);
+void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first);
+int dw_edma_v0_core_device_config(struct dw_edma_chan *chan);
+/* eDMA debug fs callbacks */
+void dw_edma_v0_core_debugfs_on(struct dw_edma *dw);
+void dw_edma_v0_core_debugfs_off(struct dw_edma *dw);
+
+#endif /* _DW_EDMA_V0_CORE_H */
diff --git a/drivers/dma/dw-edma/dw-edma-v0-debugfs.c b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c
new file mode 100644
index 000000000..5226c9014
--- /dev/null
+++ b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates.
+ * Synopsys DesignWare eDMA v0 core
+ *
+ * Author: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/bitfield.h>
+
+#include "dw-edma-v0-debugfs.h"
+#include "dw-edma-v0-regs.h"
+#include "dw-edma-core.h"
+
+#define REGS_ADDR(name) \
+ ((void __force *)&regs->name)
+#define REGISTER(name) \
+ { #name, REGS_ADDR(name) }
+
+#define WR_REGISTER(name) \
+ { #name, REGS_ADDR(wr_##name) }
+#define RD_REGISTER(name) \
+ { #name, REGS_ADDR(rd_##name) }
+
+#define WR_REGISTER_LEGACY(name) \
+ { #name, REGS_ADDR(type.legacy.wr_##name) }
+#define RD_REGISTER_LEGACY(name) \
+ { #name, REGS_ADDR(type.legacy.rd_##name) }
+
+#define WR_REGISTER_UNROLL(name) \
+ { #name, REGS_ADDR(type.unroll.wr_##name) }
+#define RD_REGISTER_UNROLL(name) \
+ { #name, REGS_ADDR(type.unroll.rd_##name) }
+
+#define WRITE_STR "write"
+#define READ_STR "read"
+#define CHANNEL_STR "channel"
+#define REGISTERS_STR "registers"
+
+static struct dw_edma *dw;
+static struct dw_edma_v0_regs __iomem *regs;
+
+static struct {
+ void __iomem *start;
+ void __iomem *end;
+} lim[2][EDMA_V0_MAX_NR_CH];
+
+struct debugfs_entries {
+ const char *name;
+ dma_addr_t *reg;
+};
+
+static int dw_edma_debugfs_u32_get(void *data, u64 *val)
+{
+ void __iomem *reg = (void __force __iomem *)data;
+ if (dw->chip->mf == EDMA_MF_EDMA_LEGACY &&
+ reg >= (void __iomem *)&regs->type.legacy.ch) {
+ void __iomem *ptr = &regs->type.legacy.ch;
+ u32 viewport_sel = 0;
+ unsigned long flags;
+ u16 ch;
+
+ for (ch = 0; ch < dw->wr_ch_cnt; ch++)
+ if (lim[0][ch].start >= reg && reg < lim[0][ch].end) {
+ ptr += (reg - lim[0][ch].start);
+ goto legacy_sel_wr;
+ }
+
+ for (ch = 0; ch < dw->rd_ch_cnt; ch++)
+ if (lim[1][ch].start >= reg && reg < lim[1][ch].end) {
+ ptr += (reg - lim[1][ch].start);
+ goto legacy_sel_rd;
+ }
+
+ return 0;
+legacy_sel_rd:
+ viewport_sel = BIT(31);
+legacy_sel_wr:
+ viewport_sel |= FIELD_PREP(EDMA_V0_VIEWPORT_MASK, ch);
+
+ raw_spin_lock_irqsave(&dw->lock, flags);
+
+ writel(viewport_sel, &regs->type.legacy.viewport_sel);
+ *val = readl(ptr);
+
+ raw_spin_unlock_irqrestore(&dw->lock, flags);
+ } else {
+ *val = readl(reg);
+ }
+
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_x32, dw_edma_debugfs_u32_get, NULL, "0x%08llx\n");
+
+static void dw_edma_debugfs_create_x32(const struct debugfs_entries entries[],
+ int nr_entries, struct dentry *dir)
+{
+ int i;
+
+ for (i = 0; i < nr_entries; i++) {
+ if (!debugfs_create_file_unsafe(entries[i].name, 0444, dir,
+ entries[i].reg, &fops_x32))
+ break;
+ }
+}
+
+static void dw_edma_debugfs_regs_ch(struct dw_edma_v0_ch_regs __iomem *regs,
+ struct dentry *dir)
+{
+ int nr_entries;
+ const struct debugfs_entries debugfs_regs[] = {
+ REGISTER(ch_control1),
+ REGISTER(ch_control2),
+ REGISTER(transfer_size),
+ REGISTER(sar.lsb),
+ REGISTER(sar.msb),
+ REGISTER(dar.lsb),
+ REGISTER(dar.msb),
+ REGISTER(llp.lsb),
+ REGISTER(llp.msb),
+ };
+
+ nr_entries = ARRAY_SIZE(debugfs_regs);
+ dw_edma_debugfs_create_x32(debugfs_regs, nr_entries, dir);
+}
+
+static void dw_edma_debugfs_regs_wr(struct dentry *dir)
+{
+ const struct debugfs_entries debugfs_regs[] = {
+ /* eDMA global registers */
+ WR_REGISTER(engine_en),
+ WR_REGISTER(doorbell),
+ WR_REGISTER(ch_arb_weight.lsb),
+ WR_REGISTER(ch_arb_weight.msb),
+ /* eDMA interrupts registers */
+ WR_REGISTER(int_status),
+ WR_REGISTER(int_mask),
+ WR_REGISTER(int_clear),
+ WR_REGISTER(err_status),
+ WR_REGISTER(done_imwr.lsb),
+ WR_REGISTER(done_imwr.msb),
+ WR_REGISTER(abort_imwr.lsb),
+ WR_REGISTER(abort_imwr.msb),
+ WR_REGISTER(ch01_imwr_data),
+ WR_REGISTER(ch23_imwr_data),
+ WR_REGISTER(ch45_imwr_data),
+ WR_REGISTER(ch67_imwr_data),
+ WR_REGISTER(linked_list_err_en),
+ };
+ const struct debugfs_entries debugfs_unroll_regs[] = {
+ /* eDMA channel context grouping */
+ WR_REGISTER_UNROLL(engine_chgroup),
+ WR_REGISTER_UNROLL(engine_hshake_cnt.lsb),
+ WR_REGISTER_UNROLL(engine_hshake_cnt.msb),
+ WR_REGISTER_UNROLL(ch0_pwr_en),
+ WR_REGISTER_UNROLL(ch1_pwr_en),
+ WR_REGISTER_UNROLL(ch2_pwr_en),
+ WR_REGISTER_UNROLL(ch3_pwr_en),
+ WR_REGISTER_UNROLL(ch4_pwr_en),
+ WR_REGISTER_UNROLL(ch5_pwr_en),
+ WR_REGISTER_UNROLL(ch6_pwr_en),
+ WR_REGISTER_UNROLL(ch7_pwr_en),
+ };
+ struct dentry *regs_dir, *ch_dir;
+ int nr_entries, i;
+ char name[16];
+
+ regs_dir = debugfs_create_dir(WRITE_STR, dir);
+ if (!regs_dir)
+ return;
+
+ nr_entries = ARRAY_SIZE(debugfs_regs);
+ dw_edma_debugfs_create_x32(debugfs_regs, nr_entries, regs_dir);
+
+ if (dw->chip->mf == EDMA_MF_HDMA_COMPAT) {
+ nr_entries = ARRAY_SIZE(debugfs_unroll_regs);
+ dw_edma_debugfs_create_x32(debugfs_unroll_regs, nr_entries,
+ regs_dir);
+ }
+
+ for (i = 0; i < dw->wr_ch_cnt; i++) {
+ snprintf(name, sizeof(name), "%s:%d", CHANNEL_STR, i);
+
+ ch_dir = debugfs_create_dir(name, regs_dir);
+ if (!ch_dir)
+ return;
+
+ dw_edma_debugfs_regs_ch(&regs->type.unroll.ch[i].wr, ch_dir);
+
+ lim[0][i].start = &regs->type.unroll.ch[i].wr;
+ lim[0][i].end = &regs->type.unroll.ch[i].padding_1[0];
+ }
+}
+
+static void dw_edma_debugfs_regs_rd(struct dentry *dir)
+{
+ const struct debugfs_entries debugfs_regs[] = {
+ /* eDMA global registers */
+ RD_REGISTER(engine_en),
+ RD_REGISTER(doorbell),
+ RD_REGISTER(ch_arb_weight.lsb),
+ RD_REGISTER(ch_arb_weight.msb),
+ /* eDMA interrupts registers */
+ RD_REGISTER(int_status),
+ RD_REGISTER(int_mask),
+ RD_REGISTER(int_clear),
+ RD_REGISTER(err_status.lsb),
+ RD_REGISTER(err_status.msb),
+ RD_REGISTER(linked_list_err_en),
+ RD_REGISTER(done_imwr.lsb),
+ RD_REGISTER(done_imwr.msb),
+ RD_REGISTER(abort_imwr.lsb),
+ RD_REGISTER(abort_imwr.msb),
+ RD_REGISTER(ch01_imwr_data),
+ RD_REGISTER(ch23_imwr_data),
+ RD_REGISTER(ch45_imwr_data),
+ RD_REGISTER(ch67_imwr_data),
+ };
+ const struct debugfs_entries debugfs_unroll_regs[] = {
+ /* eDMA channel context grouping */
+ RD_REGISTER_UNROLL(engine_chgroup),
+ RD_REGISTER_UNROLL(engine_hshake_cnt.lsb),
+ RD_REGISTER_UNROLL(engine_hshake_cnt.msb),
+ RD_REGISTER_UNROLL(ch0_pwr_en),
+ RD_REGISTER_UNROLL(ch1_pwr_en),
+ RD_REGISTER_UNROLL(ch2_pwr_en),
+ RD_REGISTER_UNROLL(ch3_pwr_en),
+ RD_REGISTER_UNROLL(ch4_pwr_en),
+ RD_REGISTER_UNROLL(ch5_pwr_en),
+ RD_REGISTER_UNROLL(ch6_pwr_en),
+ RD_REGISTER_UNROLL(ch7_pwr_en),
+ };
+ struct dentry *regs_dir, *ch_dir;
+ int nr_entries, i;
+ char name[16];
+
+ regs_dir = debugfs_create_dir(READ_STR, dir);
+ if (!regs_dir)
+ return;
+
+ nr_entries = ARRAY_SIZE(debugfs_regs);
+ dw_edma_debugfs_create_x32(debugfs_regs, nr_entries, regs_dir);
+
+ if (dw->chip->mf == EDMA_MF_HDMA_COMPAT) {
+ nr_entries = ARRAY_SIZE(debugfs_unroll_regs);
+ dw_edma_debugfs_create_x32(debugfs_unroll_regs, nr_entries,
+ regs_dir);
+ }
+
+ for (i = 0; i < dw->rd_ch_cnt; i++) {
+ snprintf(name, sizeof(name), "%s:%d", CHANNEL_STR, i);
+
+ ch_dir = debugfs_create_dir(name, regs_dir);
+ if (!ch_dir)
+ return;
+
+ dw_edma_debugfs_regs_ch(&regs->type.unroll.ch[i].rd, ch_dir);
+
+ lim[1][i].start = &regs->type.unroll.ch[i].rd;
+ lim[1][i].end = &regs->type.unroll.ch[i].padding_2[0];
+ }
+}
+
+static void dw_edma_debugfs_regs(void)
+{
+ const struct debugfs_entries debugfs_regs[] = {
+ REGISTER(ctrl_data_arb_prior),
+ REGISTER(ctrl),
+ };
+ struct dentry *regs_dir;
+ int nr_entries;
+
+ regs_dir = debugfs_create_dir(REGISTERS_STR, dw->debugfs);
+ if (!regs_dir)
+ return;
+
+ nr_entries = ARRAY_SIZE(debugfs_regs);
+ dw_edma_debugfs_create_x32(debugfs_regs, nr_entries, regs_dir);
+
+ dw_edma_debugfs_regs_wr(regs_dir);
+ dw_edma_debugfs_regs_rd(regs_dir);
+}
+
+void dw_edma_v0_debugfs_on(struct dw_edma *_dw)
+{
+ dw = _dw;
+ if (!dw)
+ return;
+
+ regs = dw->chip->reg_base;
+ if (!regs)
+ return;
+
+ dw->debugfs = debugfs_create_dir(dw->name, NULL);
+ if (!dw->debugfs)
+ return;
+
+ debugfs_create_u32("mf", 0444, dw->debugfs, &dw->chip->mf);
+ debugfs_create_u16("wr_ch_cnt", 0444, dw->debugfs, &dw->wr_ch_cnt);
+ debugfs_create_u16("rd_ch_cnt", 0444, dw->debugfs, &dw->rd_ch_cnt);
+
+ dw_edma_debugfs_regs();
+}
+
+void dw_edma_v0_debugfs_off(struct dw_edma *_dw)
+{
+ dw = _dw;
+ if (!dw)
+ return;
+
+ debugfs_remove_recursive(dw->debugfs);
+ dw->debugfs = NULL;
+}
diff --git a/drivers/dma/dw-edma/dw-edma-v0-debugfs.h b/drivers/dma/dw-edma/dw-edma-v0-debugfs.h
new file mode 100644
index 000000000..3391b86ed
--- /dev/null
+++ b/drivers/dma/dw-edma/dw-edma-v0-debugfs.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates.
+ * Synopsys DesignWare eDMA v0 core
+ *
+ * Author: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
+ */
+
+#ifndef _DW_EDMA_V0_DEBUG_FS_H
+#define _DW_EDMA_V0_DEBUG_FS_H
+
+#include <linux/dma/edma.h>
+
+#ifdef CONFIG_DEBUG_FS
+void dw_edma_v0_debugfs_on(struct dw_edma *dw);
+void dw_edma_v0_debugfs_off(struct dw_edma *dw);
+#else
+static inline void dw_edma_v0_debugfs_on(struct dw_edma *dw)
+{
+}
+
+static inline void dw_edma_v0_debugfs_off(struct dw_edma *dw)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+
+#endif /* _DW_EDMA_V0_DEBUG_FS_H */
diff --git a/drivers/dma/dw-edma/dw-edma-v0-regs.h b/drivers/dma/dw-edma/dw-edma-v0-regs.h
new file mode 100644
index 000000000..e175f7b20
--- /dev/null
+++ b/drivers/dma/dw-edma/dw-edma-v0-regs.h
@@ -0,0 +1,233 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018-2019 Synopsys, Inc. and/or its affiliates.
+ * Synopsys DesignWare eDMA v0 core
+ *
+ * Author: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
+ */
+
+#ifndef _DW_EDMA_V0_REGS_H
+#define _DW_EDMA_V0_REGS_H
+
+#include <linux/dmaengine.h>
+
+#define EDMA_V0_MAX_NR_CH 8
+#define EDMA_V0_VIEWPORT_MASK GENMASK(2, 0)
+#define EDMA_V0_DONE_INT_MASK GENMASK(7, 0)
+#define EDMA_V0_ABORT_INT_MASK GENMASK(23, 16)
+#define EDMA_V0_WRITE_CH_COUNT_MASK GENMASK(3, 0)
+#define EDMA_V0_READ_CH_COUNT_MASK GENMASK(19, 16)
+#define EDMA_V0_CH_STATUS_MASK GENMASK(6, 5)
+#define EDMA_V0_DOORBELL_CH_MASK GENMASK(2, 0)
+#define EDMA_V0_LINKED_LIST_ERR_MASK GENMASK(7, 0)
+
+#define EDMA_V0_CH_ODD_MSI_DATA_MASK GENMASK(31, 16)
+#define EDMA_V0_CH_EVEN_MSI_DATA_MASK GENMASK(15, 0)
+
+struct dw_edma_v0_ch_regs {
+ u32 ch_control1; /* 0x0000 */
+ u32 ch_control2; /* 0x0004 */
+ u32 transfer_size; /* 0x0008 */
+ union {
+ u64 reg; /* 0x000c..0x0010 */
+ struct {
+ u32 lsb; /* 0x000c */
+ u32 msb; /* 0x0010 */
+ };
+ } sar;
+ union {
+ u64 reg; /* 0x0014..0x0018 */
+ struct {
+ u32 lsb; /* 0x0014 */
+ u32 msb; /* 0x0018 */
+ };
+ } dar;
+ union {
+ u64 reg; /* 0x001c..0x0020 */
+ struct {
+ u32 lsb; /* 0x001c */
+ u32 msb; /* 0x0020 */
+ };
+ } llp;
+} __packed;
+
+struct dw_edma_v0_ch {
+ struct dw_edma_v0_ch_regs wr; /* 0x0200 */
+ u32 padding_1[55]; /* 0x0224..0x02fc */
+ struct dw_edma_v0_ch_regs rd; /* 0x0300 */
+ u32 padding_2[55]; /* 0x0324..0x03fc */
+} __packed;
+
+struct dw_edma_v0_unroll {
+ u32 padding_1; /* 0x00f8 */
+ u32 wr_engine_chgroup; /* 0x0100 */
+ u32 rd_engine_chgroup; /* 0x0104 */
+ union {
+ u64 reg; /* 0x0108..0x010c */
+ struct {
+ u32 lsb; /* 0x0108 */
+ u32 msb; /* 0x010c */
+ };
+ } wr_engine_hshake_cnt;
+ u32 padding_2[2]; /* 0x0110..0x0114 */
+ union {
+ u64 reg; /* 0x0120..0x0124 */
+ struct {
+ u32 lsb; /* 0x0120 */
+ u32 msb; /* 0x0124 */
+ };
+ } rd_engine_hshake_cnt;
+ u32 padding_3[2]; /* 0x0120..0x0124 */
+ u32 wr_ch0_pwr_en; /* 0x0128 */
+ u32 wr_ch1_pwr_en; /* 0x012c */
+ u32 wr_ch2_pwr_en; /* 0x0130 */
+ u32 wr_ch3_pwr_en; /* 0x0134 */
+ u32 wr_ch4_pwr_en; /* 0x0138 */
+ u32 wr_ch5_pwr_en; /* 0x013c */
+ u32 wr_ch6_pwr_en; /* 0x0140 */
+ u32 wr_ch7_pwr_en; /* 0x0144 */
+ u32 padding_4[8]; /* 0x0148..0x0164 */
+ u32 rd_ch0_pwr_en; /* 0x0168 */
+ u32 rd_ch1_pwr_en; /* 0x016c */
+ u32 rd_ch2_pwr_en; /* 0x0170 */
+ u32 rd_ch3_pwr_en; /* 0x0174 */
+ u32 rd_ch4_pwr_en; /* 0x0178 */
+ u32 rd_ch5_pwr_en; /* 0x018c */
+ u32 rd_ch6_pwr_en; /* 0x0180 */
+ u32 rd_ch7_pwr_en; /* 0x0184 */
+ u32 padding_5[30]; /* 0x0188..0x01fc */
+ struct dw_edma_v0_ch ch[EDMA_V0_MAX_NR_CH]; /* 0x0200..0x1120 */
+} __packed;
+
+struct dw_edma_v0_legacy {
+ u32 viewport_sel; /* 0x00f8 */
+ struct dw_edma_v0_ch_regs ch; /* 0x0100..0x0120 */
+} __packed;
+
+struct dw_edma_v0_regs {
+ /* eDMA global registers */
+ u32 ctrl_data_arb_prior; /* 0x0000 */
+ u32 padding_1; /* 0x0004 */
+ u32 ctrl; /* 0x0008 */
+ u32 wr_engine_en; /* 0x000c */
+ u32 wr_doorbell; /* 0x0010 */
+ u32 padding_2; /* 0x0014 */
+ union {
+ u64 reg; /* 0x0018..0x001c */
+ struct {
+ u32 lsb; /* 0x0018 */
+ u32 msb; /* 0x001c */
+ };
+ } wr_ch_arb_weight;
+ u32 padding_3[3]; /* 0x0020..0x0028 */
+ u32 rd_engine_en; /* 0x002c */
+ u32 rd_doorbell; /* 0x0030 */
+ u32 padding_4; /* 0x0034 */
+ union {
+ u64 reg; /* 0x0038..0x003c */
+ struct {
+ u32 lsb; /* 0x0038 */
+ u32 msb; /* 0x003c */
+ };
+ } rd_ch_arb_weight;
+ u32 padding_5[3]; /* 0x0040..0x0048 */
+ /* eDMA interrupts registers */
+ u32 wr_int_status; /* 0x004c */
+ u32 padding_6; /* 0x0050 */
+ u32 wr_int_mask; /* 0x0054 */
+ u32 wr_int_clear; /* 0x0058 */
+ u32 wr_err_status; /* 0x005c */
+ union {
+ u64 reg; /* 0x0060..0x0064 */
+ struct {
+ u32 lsb; /* 0x0060 */
+ u32 msb; /* 0x0064 */
+ };
+ } wr_done_imwr;
+ union {
+ u64 reg; /* 0x0068..0x006c */
+ struct {
+ u32 lsb; /* 0x0068 */
+ u32 msb; /* 0x006c */
+ };
+ } wr_abort_imwr;
+ u32 wr_ch01_imwr_data; /* 0x0070 */
+ u32 wr_ch23_imwr_data; /* 0x0074 */
+ u32 wr_ch45_imwr_data; /* 0x0078 */
+ u32 wr_ch67_imwr_data; /* 0x007c */
+ u32 padding_7[4]; /* 0x0080..0x008c */
+ u32 wr_linked_list_err_en; /* 0x0090 */
+ u32 padding_8[3]; /* 0x0094..0x009c */
+ u32 rd_int_status; /* 0x00a0 */
+ u32 padding_9; /* 0x00a4 */
+ u32 rd_int_mask; /* 0x00a8 */
+ u32 rd_int_clear; /* 0x00ac */
+ u32 padding_10; /* 0x00b0 */
+ union {
+ u64 reg; /* 0x00b4..0x00b8 */
+ struct {
+ u32 lsb; /* 0x00b4 */
+ u32 msb; /* 0x00b8 */
+ };
+ } rd_err_status;
+ u32 padding_11[2]; /* 0x00bc..0x00c0 */
+ u32 rd_linked_list_err_en; /* 0x00c4 */
+ u32 padding_12; /* 0x00c8 */
+ union {
+ u64 reg; /* 0x00cc..0x00d0 */
+ struct {
+ u32 lsb; /* 0x00cc */
+ u32 msb; /* 0x00d0 */
+ };
+ } rd_done_imwr;
+ union {
+ u64 reg; /* 0x00d4..0x00d8 */
+ struct {
+ u32 lsb; /* 0x00d4 */
+ u32 msb; /* 0x00d8 */
+ };
+ } rd_abort_imwr;
+ u32 rd_ch01_imwr_data; /* 0x00dc */
+ u32 rd_ch23_imwr_data; /* 0x00e0 */
+ u32 rd_ch45_imwr_data; /* 0x00e4 */
+ u32 rd_ch67_imwr_data; /* 0x00e8 */
+ u32 padding_13[4]; /* 0x00ec..0x00f8 */
+ /* eDMA channel context grouping */
+ union dw_edma_v0_type {
+ struct dw_edma_v0_legacy legacy; /* 0x00f8..0x0120 */
+ struct dw_edma_v0_unroll unroll; /* 0x00f8..0x1120 */
+ } type;
+} __packed;
+
+struct dw_edma_v0_lli {
+ u32 control;
+ u32 transfer_size;
+ union {
+ u64 reg;
+ struct {
+ u32 lsb;
+ u32 msb;
+ };
+ } sar;
+ union {
+ u64 reg;
+ struct {
+ u32 lsb;
+ u32 msb;
+ };
+ } dar;
+} __packed;
+
+struct dw_edma_v0_llp {
+ u32 control;
+ u32 reserved;
+ union {
+ u64 reg;
+ struct {
+ u32 lsb;
+ u32 msb;
+ };
+ } llp;
+} __packed;
+
+#endif /* _DW_EDMA_V0_REGS_H */
diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig
new file mode 100644
index 000000000..a9828ddd6
--- /dev/null
+++ b/drivers/dma/dw/Kconfig
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# DMA engine configuration for dw
+#
+
+config DW_DMAC_CORE
+ tristate
+ select DMA_ENGINE
+
+config DW_DMAC
+ tristate "Synopsys DesignWare AHB DMA platform driver"
+ depends on HAS_IOMEM
+ select DW_DMAC_CORE
+ help
+ Support the Synopsys DesignWare AHB DMA controller. This
+ can be integrated in chips such as the Intel Cherrytrail.
+
+config RZN1_DMAMUX
+ tristate "Renesas RZ/N1 DMAMUX driver"
+ depends on DW_DMAC
+ depends on ARCH_RZN1 || COMPILE_TEST
+ help
+ Support the Renesas RZ/N1 DMAMUX which is located in front of
+ the Synopsys DesignWare AHB DMA controller located on Renesas
+ SoCs.
+
+config DW_DMAC_PCI
+ tristate "Synopsys DesignWare AHB DMA PCI driver"
+ depends on PCI
+ depends on HAS_IOMEM
+ select DW_DMAC_CORE
+ help
+ Support the Synopsys DesignWare AHB DMA controller on the
+ platforms that enumerate it as a PCI device. For example,
+ Intel Medfield has integrated this GPDMA controller.
diff --git a/drivers/dma/dw/Makefile b/drivers/dma/dw/Makefile
new file mode 100644
index 000000000..e1796015f
--- /dev/null
+++ b/drivers/dma/dw/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_DW_DMAC_CORE) += dw_dmac_core.o
+dw_dmac_core-y := core.o dw.o idma32.o
+dw_dmac_core-$(CONFIG_ACPI) += acpi.o
+
+obj-$(CONFIG_DW_DMAC) += dw_dmac.o
+dw_dmac-y := platform.o
+dw_dmac-$(CONFIG_OF) += of.o
+
+obj-$(CONFIG_DW_DMAC_PCI) += dw_dmac_pci.o
+dw_dmac_pci-y := pci.o
+
+obj-$(CONFIG_RZN1_DMAMUX) += rzn1-dmamux.o
diff --git a/drivers/dma/dw/acpi.c b/drivers/dma/dw/acpi.c
new file mode 100644
index 000000000..c510c109d
--- /dev/null
+++ b/drivers/dma/dw/acpi.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2013,2019 Intel Corporation
+
+#include <linux/acpi.h>
+#include <linux/acpi_dma.h>
+
+#include "internal.h"
+
+static bool dw_dma_acpi_filter(struct dma_chan *chan, void *param)
+{
+ struct acpi_dma_spec *dma_spec = param;
+ struct dw_dma_slave slave = {
+ .dma_dev = dma_spec->dev,
+ .src_id = dma_spec->slave_id,
+ .dst_id = dma_spec->slave_id,
+ .m_master = 0,
+ .p_master = 1,
+ };
+
+ return dw_dma_filter(chan, &slave);
+}
+
+void dw_dma_acpi_controller_register(struct dw_dma *dw)
+{
+ struct device *dev = dw->dma.dev;
+ struct acpi_dma_filter_info *info;
+ int ret;
+
+ if (!has_acpi_companion(dev))
+ return;
+
+ info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return;
+
+ dma_cap_zero(info->dma_cap);
+ dma_cap_set(DMA_SLAVE, info->dma_cap);
+ info->filter_fn = dw_dma_acpi_filter;
+
+ ret = acpi_dma_controller_register(dev, acpi_dma_simple_xlate, info);
+ if (ret)
+ dev_err(dev, "could not register acpi_dma_controller\n");
+}
+EXPORT_SYMBOL_GPL(dw_dma_acpi_controller_register);
+
+void dw_dma_acpi_controller_free(struct dw_dma *dw)
+{
+ struct device *dev = dw->dma.dev;
+
+ if (!has_acpi_companion(dev))
+ return;
+
+ acpi_dma_controller_free(dev);
+}
+EXPORT_SYMBOL_GPL(dw_dma_acpi_controller_free);
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
new file mode 100644
index 000000000..97ba3bfc1
--- /dev/null
+++ b/drivers/dma/dw/core.c
@@ -0,0 +1,1320 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Core driver for the Synopsys DesignWare DMA Controller
+ *
+ * Copyright (C) 2007-2008 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
+ * Copyright (C) 2013 Intel Corporation
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+
+#include "../dmaengine.h"
+#include "internal.h"
+
+/*
+ * This supports the Synopsys "DesignWare AHB Central DMA Controller",
+ * (DW_ahb_dmac) which is used with various AMBA 2.0 systems (not all
+ * of which use ARM any more). See the "Databook" from Synopsys for
+ * information beyond what licensees probably provide.
+ */
+
+/* The set of bus widths supported by the DMA controller */
+#define DW_DMA_BUSWIDTHS \
+ BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+ BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)
+
+/*----------------------------------------------------------------------*/
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+ return &chan->dev->device;
+}
+
+static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc)
+{
+ return to_dw_desc(dwc->active_list.next);
+}
+
+static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct dw_desc *desc = txd_to_dw_desc(tx);
+ struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan);
+ dma_cookie_t cookie;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ cookie = dma_cookie_assign(tx);
+
+ /*
+ * REVISIT: We should attempt to chain as many descriptors as
+ * possible, perhaps even appending to those already submitted
+ * for DMA. But this is hard to do in a race-free manner.
+ */
+
+ list_add_tail(&desc->desc_node, &dwc->queue);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n",
+ __func__, desc->txd.cookie);
+
+ return cookie;
+}
+
+static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
+{
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+ struct dw_desc *desc;
+ dma_addr_t phys;
+
+ desc = dma_pool_zalloc(dw->desc_pool, GFP_ATOMIC, &phys);
+ if (!desc)
+ return NULL;
+
+ dwc->descs_allocated++;
+ INIT_LIST_HEAD(&desc->tx_list);
+ dma_async_tx_descriptor_init(&desc->txd, &dwc->chan);
+ desc->txd.tx_submit = dwc_tx_submit;
+ desc->txd.flags = DMA_CTRL_ACK;
+ desc->txd.phys = phys;
+ return desc;
+}
+
+static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+ struct dw_desc *child, *_next;
+
+ if (unlikely(!desc))
+ return;
+
+ list_for_each_entry_safe(child, _next, &desc->tx_list, desc_node) {
+ list_del(&child->desc_node);
+ dma_pool_free(dw->desc_pool, child, child->txd.phys);
+ dwc->descs_allocated--;
+ }
+
+ dma_pool_free(dw->desc_pool, desc, desc->txd.phys);
+ dwc->descs_allocated--;
+}
+
+static void dwc_initialize(struct dw_dma_chan *dwc)
+{
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+
+ dw->initialize_chan(dwc);
+
+ /* Enable interrupts */
+ channel_set_bit(dw, MASK.XFER, dwc->mask);
+ channel_set_bit(dw, MASK.ERROR, dwc->mask);
+}
+
+/*----------------------------------------------------------------------*/
+
+static inline void dwc_dump_chan_regs(struct dw_dma_chan *dwc)
+{
+ dev_err(chan2dev(&dwc->chan),
+ " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
+ channel_readl(dwc, SAR),
+ channel_readl(dwc, DAR),
+ channel_readl(dwc, LLP),
+ channel_readl(dwc, CTL_HI),
+ channel_readl(dwc, CTL_LO));
+}
+
+static inline void dwc_chan_disable(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+ while (dma_readl(dw, CH_EN) & dwc->mask)
+ cpu_relax();
+}
+
+/*----------------------------------------------------------------------*/
+
+/* Perform single block transfer */
+static inline void dwc_do_single_block(struct dw_dma_chan *dwc,
+ struct dw_desc *desc)
+{
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+ u32 ctllo;
+
+ /*
+ * Software emulation of LLP mode relies on interrupts to continue
+ * multi block transfer.
+ */
+ ctllo = lli_read(desc, ctllo) | DWC_CTLL_INT_EN;
+
+ channel_writel(dwc, SAR, lli_read(desc, sar));
+ channel_writel(dwc, DAR, lli_read(desc, dar));
+ channel_writel(dwc, CTL_LO, ctllo);
+ channel_writel(dwc, CTL_HI, lli_read(desc, ctlhi));
+ channel_set_bit(dw, CH_EN, dwc->mask);
+
+ /* Move pointer to next descriptor */
+ dwc->tx_node_active = dwc->tx_node_active->next;
+}
+
+/* Called with dwc->lock held and bh disabled */
+static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
+{
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+ u8 lms = DWC_LLP_LMS(dwc->dws.m_master);
+ unsigned long was_soft_llp;
+
+ /* ASSERT: channel is idle */
+ if (dma_readl(dw, CH_EN) & dwc->mask) {
+ dev_err(chan2dev(&dwc->chan),
+ "%s: BUG: Attempted to start non-idle channel\n",
+ __func__);
+ dwc_dump_chan_regs(dwc);
+
+ /* The tasklet will hopefully advance the queue... */
+ return;
+ }
+
+ if (dwc->nollp) {
+ was_soft_llp = test_and_set_bit(DW_DMA_IS_SOFT_LLP,
+ &dwc->flags);
+ if (was_soft_llp) {
+ dev_err(chan2dev(&dwc->chan),
+ "BUG: Attempted to start new LLP transfer inside ongoing one\n");
+ return;
+ }
+
+ dwc_initialize(dwc);
+
+ first->residue = first->total_len;
+ dwc->tx_node_active = &first->tx_list;
+
+ /* Submit first block */
+ dwc_do_single_block(dwc, first);
+
+ return;
+ }
+
+ dwc_initialize(dwc);
+
+ channel_writel(dwc, LLP, first->txd.phys | lms);
+ channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+ channel_writel(dwc, CTL_HI, 0);
+ channel_set_bit(dw, CH_EN, dwc->mask);
+}
+
+static void dwc_dostart_first_queued(struct dw_dma_chan *dwc)
+{
+ struct dw_desc *desc;
+
+ if (list_empty(&dwc->queue))
+ return;
+
+ list_move(dwc->queue.next, &dwc->active_list);
+ desc = dwc_first_active(dwc);
+ dev_vdbg(chan2dev(&dwc->chan), "%s: started %u\n", __func__, desc->txd.cookie);
+ dwc_dostart(dwc, desc);
+}
+
+/*----------------------------------------------------------------------*/
+
+static void
+dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc,
+ bool callback_required)
+{
+ struct dma_async_tx_descriptor *txd = &desc->txd;
+ struct dw_desc *child;
+ unsigned long flags;
+ struct dmaengine_desc_callback cb;
+
+ dev_vdbg(chan2dev(&dwc->chan), "descriptor %u complete\n", txd->cookie);
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ dma_cookie_complete(txd);
+ if (callback_required)
+ dmaengine_desc_get_callback(txd, &cb);
+ else
+ memset(&cb, 0, sizeof(cb));
+
+ /* async_tx_ack */
+ list_for_each_entry(child, &desc->tx_list, desc_node)
+ async_tx_ack(&child->txd);
+ async_tx_ack(&desc->txd);
+ dwc_desc_put(dwc, desc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ dmaengine_desc_callback_invoke(&cb, NULL);
+}
+
+static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+ struct dw_desc *desc, *_desc;
+ LIST_HEAD(list);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ if (dma_readl(dw, CH_EN) & dwc->mask) {
+ dev_err(chan2dev(&dwc->chan),
+ "BUG: XFER bit set, but channel not idle!\n");
+
+ /* Try to continue after resetting the channel... */
+ dwc_chan_disable(dw, dwc);
+ }
+
+ /*
+ * Submit queued descriptors ASAP, i.e. before we go through
+ * the completed ones.
+ */
+ list_splice_init(&dwc->active_list, &list);
+ dwc_dostart_first_queued(dwc);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ list_for_each_entry_safe(desc, _desc, &list, desc_node)
+ dwc_descriptor_complete(dwc, desc, true);
+}
+
+/* Returns how many bytes were already received from source */
+static inline u32 dwc_get_sent(struct dw_dma_chan *dwc)
+{
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+ u32 ctlhi = channel_readl(dwc, CTL_HI);
+ u32 ctllo = channel_readl(dwc, CTL_LO);
+
+ return dw->block2bytes(dwc, ctlhi, ctllo >> 4 & 7);
+}
+
+static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+ dma_addr_t llp;
+ struct dw_desc *desc, *_desc;
+ struct dw_desc *child;
+ u32 status_xfer;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ llp = channel_readl(dwc, LLP);
+ status_xfer = dma_readl(dw, RAW.XFER);
+
+ if (status_xfer & dwc->mask) {
+ /* Everything we've submitted is done */
+ dma_writel(dw, CLEAR.XFER, dwc->mask);
+
+ if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags)) {
+ struct list_head *head, *active = dwc->tx_node_active;
+
+ /*
+ * We are inside first active descriptor.
+ * Otherwise something is really wrong.
+ */
+ desc = dwc_first_active(dwc);
+
+ head = &desc->tx_list;
+ if (active != head) {
+ /* Update residue to reflect last sent descriptor */
+ if (active == head->next)
+ desc->residue -= desc->len;
+ else
+ desc->residue -= to_dw_desc(active->prev)->len;
+
+ child = to_dw_desc(active);
+
+ /* Submit next block */
+ dwc_do_single_block(dwc, child);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+
+ /* We are done here */
+ clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags);
+ }
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ dwc_complete_all(dw, dwc);
+ return;
+ }
+
+ if (list_empty(&dwc->active_list)) {
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+
+ if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags)) {
+ dev_vdbg(chan2dev(&dwc->chan), "%s: soft LLP mode\n", __func__);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+
+ dev_vdbg(chan2dev(&dwc->chan), "%s: llp=%pad\n", __func__, &llp);
+
+ list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
+ /* Initial residue value */
+ desc->residue = desc->total_len;
+
+ /* Check first descriptors addr */
+ if (desc->txd.phys == DWC_LLP_LOC(llp)) {
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+
+ /* Check first descriptors llp */
+ if (lli_read(desc, llp) == llp) {
+ /* This one is currently in progress */
+ desc->residue -= dwc_get_sent(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+
+ desc->residue -= desc->len;
+ list_for_each_entry(child, &desc->tx_list, desc_node) {
+ if (lli_read(child, llp) == llp) {
+ /* Currently in progress */
+ desc->residue -= dwc_get_sent(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return;
+ }
+ desc->residue -= child->len;
+ }
+
+ /*
+ * No descriptors so far seem to be in progress, i.e.
+ * this one must be done.
+ */
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ dwc_descriptor_complete(dwc, desc, true);
+ spin_lock_irqsave(&dwc->lock, flags);
+ }
+
+ dev_err(chan2dev(&dwc->chan),
+ "BUG: All descriptors done, but channel not idle!\n");
+
+ /* Try to continue after resetting the channel... */
+ dwc_chan_disable(dw, dwc);
+
+ dwc_dostart_first_queued(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+}
+
+static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+ dev_crit(chan2dev(&dwc->chan), " desc: s0x%x d0x%x l0x%x c0x%x:%x\n",
+ lli_read(desc, sar),
+ lli_read(desc, dar),
+ lli_read(desc, llp),
+ lli_read(desc, ctlhi),
+ lli_read(desc, ctllo));
+}
+
+static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+ struct dw_desc *bad_desc;
+ struct dw_desc *child;
+ unsigned long flags;
+
+ dwc_scan_descriptors(dw, dwc);
+
+ spin_lock_irqsave(&dwc->lock, flags);
+
+ /*
+ * The descriptor currently at the head of the active list is
+ * borked. Since we don't have any way to report errors, we'll
+ * just have to scream loudly and try to carry on.
+ */
+ bad_desc = dwc_first_active(dwc);
+ list_del_init(&bad_desc->desc_node);
+ list_move(dwc->queue.next, dwc->active_list.prev);
+
+ /* Clear the error flag and try to restart the controller */
+ dma_writel(dw, CLEAR.ERROR, dwc->mask);
+ if (!list_empty(&dwc->active_list))
+ dwc_dostart(dwc, dwc_first_active(dwc));
+
+ /*
+ * WARN may seem harsh, but since this only happens
+ * when someone submits a bad physical address in a
+ * descriptor, we should consider ourselves lucky that the
+ * controller flagged an error instead of scribbling over
+ * random memory locations.
+ */
+ dev_WARN(chan2dev(&dwc->chan), "Bad descriptor submitted for DMA!\n"
+ " cookie: %d\n", bad_desc->txd.cookie);
+ dwc_dump_lli(dwc, bad_desc);
+ list_for_each_entry(child, &bad_desc->tx_list, desc_node)
+ dwc_dump_lli(dwc, child);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ /* Pretend the descriptor completed successfully */
+ dwc_descriptor_complete(dwc, bad_desc, true);
+}
+
+static void dw_dma_tasklet(struct tasklet_struct *t)
+{
+ struct dw_dma *dw = from_tasklet(dw, t, tasklet);
+ struct dw_dma_chan *dwc;
+ u32 status_xfer;
+ u32 status_err;
+ unsigned int i;
+
+ status_xfer = dma_readl(dw, RAW.XFER);
+ status_err = dma_readl(dw, RAW.ERROR);
+
+ dev_vdbg(dw->dma.dev, "%s: status_err=%x\n", __func__, status_err);
+
+ for (i = 0; i < dw->dma.chancnt; i++) {
+ dwc = &dw->chan[i];
+ if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags))
+ dev_vdbg(dw->dma.dev, "Cyclic xfer is not implemented\n");
+ else if (status_err & (1 << i))
+ dwc_handle_error(dw, dwc);
+ else if (status_xfer & (1 << i))
+ dwc_scan_descriptors(dw, dwc);
+ }
+
+ /* Re-enable interrupts */
+ channel_set_bit(dw, MASK.XFER, dw->all_chan_mask);
+ channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask);
+}
+
+static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
+{
+ struct dw_dma *dw = dev_id;
+ u32 status;
+
+ /* Check if we have any interrupt from the DMAC which is not in use */
+ if (!dw->in_use)
+ return IRQ_NONE;
+
+ status = dma_readl(dw, STATUS_INT);
+ dev_vdbg(dw->dma.dev, "%s: status=0x%x\n", __func__, status);
+
+ /* Check if we have any interrupt from the DMAC */
+ if (!status)
+ return IRQ_NONE;
+
+ /*
+ * Just disable the interrupts. We'll turn them back on in the
+ * softirq handler.
+ */
+ channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+ status = dma_readl(dw, STATUS_INT);
+ if (status) {
+ dev_err(dw->dma.dev,
+ "BUG: Unexpected interrupts pending: 0x%x\n",
+ status);
+
+ /* Try to recover */
+ channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1);
+ channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1);
+ channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1);
+ channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1);
+ channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1);
+ }
+
+ tasklet_schedule(&dw->tasklet);
+
+ return IRQ_HANDLED;
+}
+
+/*----------------------------------------------------------------------*/
+
+static struct dma_async_tx_descriptor *
+dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+ struct dw_desc *desc;
+ struct dw_desc *first;
+ struct dw_desc *prev;
+ size_t xfer_count;
+ size_t offset;
+ u8 m_master = dwc->dws.m_master;
+ unsigned int src_width;
+ unsigned int dst_width;
+ unsigned int data_width = dw->pdata->data_width[m_master];
+ u32 ctllo, ctlhi;
+ u8 lms = DWC_LLP_LMS(m_master);
+
+ dev_vdbg(chan2dev(chan),
+ "%s: d%pad s%pad l0x%zx f0x%lx\n", __func__,
+ &dest, &src, len, flags);
+
+ if (unlikely(!len)) {
+ dev_dbg(chan2dev(chan), "%s: length is zero!\n", __func__);
+ return NULL;
+ }
+
+ dwc->direction = DMA_MEM_TO_MEM;
+
+ src_width = dst_width = __ffs(data_width | src | dest | len);
+
+ ctllo = dw->prepare_ctllo(dwc)
+ | DWC_CTLL_DST_WIDTH(dst_width)
+ | DWC_CTLL_SRC_WIDTH(src_width)
+ | DWC_CTLL_DST_INC
+ | DWC_CTLL_SRC_INC
+ | DWC_CTLL_FC_M2M;
+ prev = first = NULL;
+
+ for (offset = 0; offset < len; offset += xfer_count) {
+ desc = dwc_desc_get(dwc);
+ if (!desc)
+ goto err_desc_get;
+
+ ctlhi = dw->bytes2block(dwc, len - offset, src_width, &xfer_count);
+
+ lli_write(desc, sar, src + offset);
+ lli_write(desc, dar, dest + offset);
+ lli_write(desc, ctllo, ctllo);
+ lli_write(desc, ctlhi, ctlhi);
+ desc->len = xfer_count;
+
+ if (!first) {
+ first = desc;
+ } else {
+ lli_write(prev, llp, desc->txd.phys | lms);
+ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+ prev = desc;
+ }
+
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Trigger interrupt after last block */
+ lli_set(prev, ctllo, DWC_CTLL_INT_EN);
+
+ prev->lli.llp = 0;
+ lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+ first->txd.flags = flags;
+ first->total_len = len;
+
+ return &first->txd;
+
+err_desc_get:
+ dwc_desc_put(dwc, first);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+ struct dma_slave_config *sconfig = &dwc->dma_sconfig;
+ struct dw_desc *prev;
+ struct dw_desc *first;
+ u32 ctllo, ctlhi;
+ u8 m_master = dwc->dws.m_master;
+ u8 lms = DWC_LLP_LMS(m_master);
+ dma_addr_t reg;
+ unsigned int reg_width;
+ unsigned int mem_width;
+ unsigned int data_width = dw->pdata->data_width[m_master];
+ unsigned int i;
+ struct scatterlist *sg;
+ size_t total_len = 0;
+
+ dev_vdbg(chan2dev(chan), "%s\n", __func__);
+
+ if (unlikely(!is_slave_direction(direction) || !sg_len))
+ return NULL;
+
+ dwc->direction = direction;
+
+ prev = first = NULL;
+
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+ reg_width = __ffs(sconfig->dst_addr_width);
+ reg = sconfig->dst_addr;
+ ctllo = dw->prepare_ctllo(dwc)
+ | DWC_CTLL_DST_WIDTH(reg_width)
+ | DWC_CTLL_DST_FIX
+ | DWC_CTLL_SRC_INC;
+
+ ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
+ DWC_CTLL_FC(DW_DMA_FC_D_M2P);
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct dw_desc *desc;
+ u32 len, mem;
+ size_t dlen;
+
+ mem = sg_dma_address(sg);
+ len = sg_dma_len(sg);
+
+ mem_width = __ffs(data_width | mem | len);
+
+slave_sg_todev_fill_desc:
+ desc = dwc_desc_get(dwc);
+ if (!desc)
+ goto err_desc_get;
+
+ ctlhi = dw->bytes2block(dwc, len, mem_width, &dlen);
+
+ lli_write(desc, sar, mem);
+ lli_write(desc, dar, reg);
+ lli_write(desc, ctlhi, ctlhi);
+ lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width));
+ desc->len = dlen;
+
+ if (!first) {
+ first = desc;
+ } else {
+ lli_write(prev, llp, desc->txd.phys | lms);
+ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+ prev = desc;
+
+ mem += dlen;
+ len -= dlen;
+ total_len += dlen;
+
+ if (len)
+ goto slave_sg_todev_fill_desc;
+ }
+ break;
+ case DMA_DEV_TO_MEM:
+ reg_width = __ffs(sconfig->src_addr_width);
+ reg = sconfig->src_addr;
+ ctllo = dw->prepare_ctllo(dwc)
+ | DWC_CTLL_SRC_WIDTH(reg_width)
+ | DWC_CTLL_DST_INC
+ | DWC_CTLL_SRC_FIX;
+
+ ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
+ DWC_CTLL_FC(DW_DMA_FC_D_P2M);
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct dw_desc *desc;
+ u32 len, mem;
+ size_t dlen;
+
+ mem = sg_dma_address(sg);
+ len = sg_dma_len(sg);
+
+slave_sg_fromdev_fill_desc:
+ desc = dwc_desc_get(dwc);
+ if (!desc)
+ goto err_desc_get;
+
+ ctlhi = dw->bytes2block(dwc, len, reg_width, &dlen);
+
+ lli_write(desc, sar, reg);
+ lli_write(desc, dar, mem);
+ lli_write(desc, ctlhi, ctlhi);
+ mem_width = __ffs(data_width | mem);
+ lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width));
+ desc->len = dlen;
+
+ if (!first) {
+ first = desc;
+ } else {
+ lli_write(prev, llp, desc->txd.phys | lms);
+ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+ prev = desc;
+
+ mem += dlen;
+ len -= dlen;
+ total_len += dlen;
+
+ if (len)
+ goto slave_sg_fromdev_fill_desc;
+ }
+ break;
+ default:
+ return NULL;
+ }
+
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Trigger interrupt after last block */
+ lli_set(prev, ctllo, DWC_CTLL_INT_EN);
+
+ prev->lli.llp = 0;
+ lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+ first->total_len = total_len;
+
+ return &first->txd;
+
+err_desc_get:
+ dev_err(chan2dev(chan),
+ "not enough descriptors available. Direction %d\n", direction);
+ dwc_desc_put(dwc, first);
+ return NULL;
+}
+
+bool dw_dma_filter(struct dma_chan *chan, void *param)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma_slave *dws = param;
+
+ if (dws->dma_dev != chan->device->dev)
+ return false;
+
+ /* permit channels in accordance with the channels mask */
+ if (dws->channels && !(dws->channels & dwc->mask))
+ return false;
+
+ /* We have to copy data since dws can be temporary storage */
+ memcpy(&dwc->dws, dws, sizeof(struct dw_dma_slave));
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(dw_dma_filter);
+
+static int dwc_config(struct dma_chan *chan, struct dma_slave_config *sconfig)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+
+ memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig));
+
+ dwc->dma_sconfig.src_maxburst =
+ clamp(dwc->dma_sconfig.src_maxburst, 0U, dwc->max_burst);
+ dwc->dma_sconfig.dst_maxburst =
+ clamp(dwc->dma_sconfig.dst_maxburst, 0U, dwc->max_burst);
+
+ dw->encode_maxburst(dwc, &dwc->dma_sconfig.src_maxburst);
+ dw->encode_maxburst(dwc, &dwc->dma_sconfig.dst_maxburst);
+
+ return 0;
+}
+
+static void dwc_chan_pause(struct dw_dma_chan *dwc, bool drain)
+{
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+ unsigned int count = 20; /* timeout iterations */
+
+ dw->suspend_chan(dwc, drain);
+
+ while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--)
+ udelay(2);
+
+ set_bit(DW_DMA_IS_PAUSED, &dwc->flags);
+}
+
+static int dwc_pause(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ dwc_chan_pause(dwc, false);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ return 0;
+}
+
+static inline void dwc_chan_resume(struct dw_dma_chan *dwc, bool drain)
+{
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+
+ dw->resume_chan(dwc, drain);
+
+ clear_bit(DW_DMA_IS_PAUSED, &dwc->flags);
+}
+
+static int dwc_resume(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+
+ if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags))
+ dwc_chan_resume(dwc, false);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ return 0;
+}
+
+static int dwc_terminate_all(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+ struct dw_desc *desc, *_desc;
+ unsigned long flags;
+ LIST_HEAD(list);
+
+ spin_lock_irqsave(&dwc->lock, flags);
+
+ clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags);
+
+ dwc_chan_pause(dwc, true);
+
+ dwc_chan_disable(dw, dwc);
+
+ dwc_chan_resume(dwc, true);
+
+ /* active_list entries will end up before queued entries */
+ list_splice_init(&dwc->queue, &list);
+ list_splice_init(&dwc->active_list, &list);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ /* Flush all pending and queued descriptors */
+ list_for_each_entry_safe(desc, _desc, &list, desc_node)
+ dwc_descriptor_complete(dwc, desc, false);
+
+ return 0;
+}
+
+static struct dw_desc *dwc_find_desc(struct dw_dma_chan *dwc, dma_cookie_t c)
+{
+ struct dw_desc *desc;
+
+ list_for_each_entry(desc, &dwc->active_list, desc_node)
+ if (desc->txd.cookie == c)
+ return desc;
+
+ return NULL;
+}
+
+static u32 dwc_get_residue(struct dw_dma_chan *dwc, dma_cookie_t cookie)
+{
+ struct dw_desc *desc;
+ unsigned long flags;
+ u32 residue;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+
+ desc = dwc_find_desc(dwc, cookie);
+ if (desc) {
+ if (desc == dwc_first_active(dwc)) {
+ residue = desc->residue;
+ if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue)
+ residue -= dwc_get_sent(dwc);
+ } else {
+ residue = desc->total_len;
+ }
+ } else {
+ residue = 0;
+ }
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ return residue;
+}
+
+static enum dma_status
+dwc_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ enum dma_status ret;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ dma_set_residue(txstate, dwc_get_residue(dwc, cookie));
+
+ if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags) && ret == DMA_IN_PROGRESS)
+ return DMA_PAUSED;
+
+ return ret;
+}
+
+static void dwc_issue_pending(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ if (list_empty(&dwc->active_list))
+ dwc_dostart_first_queued(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+}
+
+/*----------------------------------------------------------------------*/
+
+void do_dw_dma_off(struct dw_dma *dw)
+{
+ dma_writel(dw, CFG, 0);
+
+ channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+ while (dma_readl(dw, CFG) & DW_CFG_DMA_EN)
+ cpu_relax();
+}
+
+void do_dw_dma_on(struct dw_dma *dw)
+{
+ dma_writel(dw, CFG, DW_CFG_DMA_EN);
+}
+
+static int dwc_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+
+ dev_vdbg(chan2dev(chan), "%s\n", __func__);
+
+ /* ASSERT: channel is idle */
+ if (dma_readl(dw, CH_EN) & dwc->mask) {
+ dev_dbg(chan2dev(chan), "DMA channel not idle?\n");
+ return -EIO;
+ }
+
+ dma_cookie_init(chan);
+
+ /*
+ * NOTE: some controllers may have additional features that we
+ * need to initialize here, like "scatter-gather" (which
+ * doesn't mean what you think it means), and status writeback.
+ */
+
+ /*
+ * We need controller-specific data to set up slave transfers.
+ */
+ if (chan->private && !dw_dma_filter(chan, chan->private)) {
+ dev_warn(chan2dev(chan), "Wrong controller-specific data\n");
+ return -EINVAL;
+ }
+
+ /* Enable controller here if needed */
+ if (!dw->in_use)
+ do_dw_dma_on(dw);
+ dw->in_use |= dwc->mask;
+
+ return 0;
+}
+
+static void dwc_free_chan_resources(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+ unsigned long flags;
+
+ dev_dbg(chan2dev(chan), "%s: descs allocated=%u\n", __func__,
+ dwc->descs_allocated);
+
+ /* ASSERT: channel is idle */
+ BUG_ON(!list_empty(&dwc->active_list));
+ BUG_ON(!list_empty(&dwc->queue));
+ BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask);
+
+ spin_lock_irqsave(&dwc->lock, flags);
+
+ /* Clear custom channel configuration */
+ memset(&dwc->dws, 0, sizeof(struct dw_dma_slave));
+
+ /* Disable interrupts */
+ channel_clear_bit(dw, MASK.XFER, dwc->mask);
+ channel_clear_bit(dw, MASK.BLOCK, dwc->mask);
+ channel_clear_bit(dw, MASK.ERROR, dwc->mask);
+
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ /* Disable controller in case it was a last user */
+ dw->in_use &= ~dwc->mask;
+ if (!dw->in_use)
+ do_dw_dma_off(dw);
+
+ dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
+}
+
+static void dwc_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+
+ caps->max_burst = dwc->max_burst;
+
+ /*
+ * It might be crucial for some devices to have the hardware
+ * accelerated multi-block transfers supported, aka LLPs in DW DMAC
+ * notation. So if LLPs are supported then max_sg_burst is set to
+ * zero which means unlimited number of SG entries can be handled in a
+ * single DMA transaction, otherwise it's just one SG entry.
+ */
+ if (dwc->nollp)
+ caps->max_sg_burst = 1;
+ else
+ caps->max_sg_burst = 0;
+}
+
+int do_dma_probe(struct dw_dma_chip *chip)
+{
+ struct dw_dma *dw = chip->dw;
+ struct dw_dma_platform_data *pdata;
+ bool autocfg = false;
+ unsigned int dw_params;
+ unsigned int i;
+ int err;
+
+ dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL);
+ if (!dw->pdata)
+ return -ENOMEM;
+
+ dw->regs = chip->regs;
+
+ pm_runtime_get_sync(chip->dev);
+
+ if (!chip->pdata) {
+ dw_params = dma_readl(dw, DW_PARAMS);
+ dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params);
+
+ autocfg = dw_params >> DW_PARAMS_EN & 1;
+ if (!autocfg) {
+ err = -EINVAL;
+ goto err_pdata;
+ }
+
+ /* Reassign the platform data pointer */
+ pdata = dw->pdata;
+
+ /* Get hardware configuration parameters */
+ pdata->nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 7) + 1;
+ pdata->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1;
+ for (i = 0; i < pdata->nr_masters; i++) {
+ pdata->data_width[i] =
+ 4 << (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3);
+ }
+ pdata->block_size = dma_readl(dw, MAX_BLK_SIZE);
+
+ /* Fill platform data with the default values */
+ pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING;
+ pdata->chan_priority = CHAN_PRIORITY_ASCENDING;
+ } else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
+ err = -EINVAL;
+ goto err_pdata;
+ } else {
+ memcpy(dw->pdata, chip->pdata, sizeof(*dw->pdata));
+
+ /* Reassign the platform data pointer */
+ pdata = dw->pdata;
+ }
+
+ dw->chan = devm_kcalloc(chip->dev, pdata->nr_channels, sizeof(*dw->chan),
+ GFP_KERNEL);
+ if (!dw->chan) {
+ err = -ENOMEM;
+ goto err_pdata;
+ }
+
+ /* Calculate all channel mask before DMA setup */
+ dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
+
+ /* Force dma off, just in case */
+ dw->disable(dw);
+
+ /* Device and instance ID for IRQ and DMA pool */
+ dw->set_device_name(dw, chip->id);
+
+ /* Create a pool of consistent memory blocks for hardware descriptors */
+ dw->desc_pool = dmam_pool_create(dw->name, chip->dev,
+ sizeof(struct dw_desc), 4, 0);
+ if (!dw->desc_pool) {
+ dev_err(chip->dev, "No memory for descriptors dma pool\n");
+ err = -ENOMEM;
+ goto err_pdata;
+ }
+
+ tasklet_setup(&dw->tasklet, dw_dma_tasklet);
+
+ err = request_irq(chip->irq, dw_dma_interrupt, IRQF_SHARED,
+ dw->name, dw);
+ if (err)
+ goto err_pdata;
+
+ INIT_LIST_HEAD(&dw->dma.channels);
+ for (i = 0; i < pdata->nr_channels; i++) {
+ struct dw_dma_chan *dwc = &dw->chan[i];
+
+ dwc->chan.device = &dw->dma;
+ dma_cookie_init(&dwc->chan);
+ if (pdata->chan_allocation_order == CHAN_ALLOCATION_ASCENDING)
+ list_add_tail(&dwc->chan.device_node,
+ &dw->dma.channels);
+ else
+ list_add(&dwc->chan.device_node, &dw->dma.channels);
+
+ /* 7 is highest priority & 0 is lowest. */
+ if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING)
+ dwc->priority = pdata->nr_channels - i - 1;
+ else
+ dwc->priority = i;
+
+ dwc->ch_regs = &__dw_regs(dw)->CHAN[i];
+ spin_lock_init(&dwc->lock);
+ dwc->mask = 1 << i;
+
+ INIT_LIST_HEAD(&dwc->active_list);
+ INIT_LIST_HEAD(&dwc->queue);
+
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+
+ dwc->direction = DMA_TRANS_NONE;
+
+ /* Hardware configuration */
+ if (autocfg) {
+ unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1;
+ void __iomem *addr = &__dw_regs(dw)->DWC_PARAMS[r];
+ unsigned int dwc_params = readl(addr);
+
+ dev_dbg(chip->dev, "DWC_PARAMS[%d]: 0x%08x\n", i,
+ dwc_params);
+
+ /*
+ * Decode maximum block size for given channel. The
+ * stored 4 bit value represents blocks from 0x00 for 3
+ * up to 0x0a for 4095.
+ */
+ dwc->block_size =
+ (4 << ((pdata->block_size >> 4 * i) & 0xf)) - 1;
+
+ /*
+ * According to the DW DMA databook the true scatter-
+ * gether LLPs aren't available if either multi-block
+ * config is disabled (CHx_MULTI_BLK_EN == 0) or the
+ * LLP register is hard-coded to zeros
+ * (CHx_HC_LLP == 1).
+ */
+ dwc->nollp =
+ (dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0 ||
+ (dwc_params >> DWC_PARAMS_HC_LLP & 0x1) == 1;
+ dwc->max_burst =
+ (0x4 << (dwc_params >> DWC_PARAMS_MSIZE & 0x7));
+ } else {
+ dwc->block_size = pdata->block_size;
+ dwc->nollp = !pdata->multi_block[i];
+ dwc->max_burst = pdata->max_burst[i] ?: DW_DMA_MAX_BURST;
+ }
+ }
+
+ /* Clear all interrupts on all channels. */
+ dma_writel(dw, CLEAR.XFER, dw->all_chan_mask);
+ dma_writel(dw, CLEAR.BLOCK, dw->all_chan_mask);
+ dma_writel(dw, CLEAR.SRC_TRAN, dw->all_chan_mask);
+ dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask);
+ dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask);
+
+ /* Set capabilities */
+ dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
+ dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask);
+ dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+
+ dw->dma.dev = chip->dev;
+ dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources;
+ dw->dma.device_free_chan_resources = dwc_free_chan_resources;
+
+ dw->dma.device_prep_dma_memcpy = dwc_prep_dma_memcpy;
+ dw->dma.device_prep_slave_sg = dwc_prep_slave_sg;
+
+ dw->dma.device_caps = dwc_caps;
+ dw->dma.device_config = dwc_config;
+ dw->dma.device_pause = dwc_pause;
+ dw->dma.device_resume = dwc_resume;
+ dw->dma.device_terminate_all = dwc_terminate_all;
+
+ dw->dma.device_tx_status = dwc_tx_status;
+ dw->dma.device_issue_pending = dwc_issue_pending;
+
+ /* DMA capabilities */
+ dw->dma.min_burst = DW_DMA_MIN_BURST;
+ dw->dma.max_burst = DW_DMA_MAX_BURST;
+ dw->dma.src_addr_widths = DW_DMA_BUSWIDTHS;
+ dw->dma.dst_addr_widths = DW_DMA_BUSWIDTHS;
+ dw->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) |
+ BIT(DMA_MEM_TO_MEM);
+ dw->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+ /*
+ * For now there is no hardware with non uniform maximum block size
+ * across all of the device channels, so we set the maximum segment
+ * size as the block size found for the very first channel.
+ */
+ dma_set_max_seg_size(dw->dma.dev, dw->chan[0].block_size);
+
+ err = dma_async_device_register(&dw->dma);
+ if (err)
+ goto err_dma_register;
+
+ dev_info(chip->dev, "DesignWare DMA Controller, %d channels\n",
+ pdata->nr_channels);
+
+ pm_runtime_put_sync_suspend(chip->dev);
+
+ return 0;
+
+err_dma_register:
+ free_irq(chip->irq, dw);
+err_pdata:
+ pm_runtime_put_sync_suspend(chip->dev);
+ return err;
+}
+
+int do_dma_remove(struct dw_dma_chip *chip)
+{
+ struct dw_dma *dw = chip->dw;
+ struct dw_dma_chan *dwc, *_dwc;
+
+ pm_runtime_get_sync(chip->dev);
+
+ do_dw_dma_off(dw);
+ dma_async_device_unregister(&dw->dma);
+
+ free_irq(chip->irq, dw);
+ tasklet_kill(&dw->tasklet);
+
+ list_for_each_entry_safe(dwc, _dwc, &dw->dma.channels,
+ chan.device_node) {
+ list_del(&dwc->chan.device_node);
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+ }
+
+ pm_runtime_put_sync_suspend(chip->dev);
+ return 0;
+}
+
+int do_dw_dma_disable(struct dw_dma_chip *chip)
+{
+ struct dw_dma *dw = chip->dw;
+
+ dw->disable(dw);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(do_dw_dma_disable);
+
+int do_dw_dma_enable(struct dw_dma_chip *chip)
+{
+ struct dw_dma *dw = chip->dw;
+
+ dw->enable(dw);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(do_dw_dma_enable);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller core driver");
+MODULE_AUTHOR("Haavard Skinnemoen (Atmel)");
+MODULE_AUTHOR("Viresh Kumar <vireshk@kernel.org>");
diff --git a/drivers/dma/dw/dw.c b/drivers/dma/dw/dw.c
new file mode 100644
index 000000000..a4862263f
--- /dev/null
+++ b/drivers/dma/dw/dw.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2007-2008 Atmel Corporation
+// Copyright (C) 2010-2011 ST Microelectronics
+// Copyright (C) 2013,2018 Intel Corporation
+
+#include <linux/bitops.h>
+#include <linux/dmaengine.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "internal.h"
+
+static void dw_dma_initialize_chan(struct dw_dma_chan *dwc)
+{
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+ u32 cfghi = is_slave_direction(dwc->direction) ? 0 : DWC_CFGH_FIFO_MODE;
+ u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
+ bool hs_polarity = dwc->dws.hs_polarity;
+
+ cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id);
+ cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id);
+ cfghi |= DWC_CFGH_PROTCTL(dw->pdata->protctl);
+
+ /* Set polarity of handshake interface */
+ cfglo |= hs_polarity ? DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL : 0;
+
+ channel_writel(dwc, CFG_LO, cfglo);
+ channel_writel(dwc, CFG_HI, cfghi);
+}
+
+static void dw_dma_suspend_chan(struct dw_dma_chan *dwc, bool drain)
+{
+ u32 cfglo = channel_readl(dwc, CFG_LO);
+
+ channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP);
+}
+
+static void dw_dma_resume_chan(struct dw_dma_chan *dwc, bool drain)
+{
+ u32 cfglo = channel_readl(dwc, CFG_LO);
+
+ channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP);
+}
+
+static u32 dw_dma_bytes2block(struct dw_dma_chan *dwc,
+ size_t bytes, unsigned int width, size_t *len)
+{
+ u32 block;
+
+ if ((bytes >> width) > dwc->block_size) {
+ block = dwc->block_size;
+ *len = dwc->block_size << width;
+ } else {
+ block = bytes >> width;
+ *len = bytes;
+ }
+
+ return block;
+}
+
+static size_t dw_dma_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width)
+{
+ return DWC_CTLH_BLOCK_TS(block) << width;
+}
+
+static u32 dw_dma_prepare_ctllo(struct dw_dma_chan *dwc)
+{
+ struct dma_slave_config *sconfig = &dwc->dma_sconfig;
+ u8 smsize = (dwc->direction == DMA_DEV_TO_MEM) ? sconfig->src_maxburst : 0;
+ u8 dmsize = (dwc->direction == DMA_MEM_TO_DEV) ? sconfig->dst_maxburst : 0;
+ u8 p_master = dwc->dws.p_master;
+ u8 m_master = dwc->dws.m_master;
+ u8 dms = (dwc->direction == DMA_MEM_TO_DEV) ? p_master : m_master;
+ u8 sms = (dwc->direction == DMA_DEV_TO_MEM) ? p_master : m_master;
+
+ return DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN |
+ DWC_CTLL_DST_MSIZE(dmsize) | DWC_CTLL_SRC_MSIZE(smsize) |
+ DWC_CTLL_DMS(dms) | DWC_CTLL_SMS(sms);
+}
+
+static void dw_dma_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst)
+{
+ /*
+ * Fix burst size according to dw_dmac. We need to convert them as:
+ * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3.
+ */
+ *maxburst = *maxburst > 1 ? fls(*maxburst) - 2 : 0;
+}
+
+static void dw_dma_set_device_name(struct dw_dma *dw, int id)
+{
+ snprintf(dw->name, sizeof(dw->name), "dw:dmac%d", id);
+}
+
+static void dw_dma_disable(struct dw_dma *dw)
+{
+ do_dw_dma_off(dw);
+}
+
+static void dw_dma_enable(struct dw_dma *dw)
+{
+ do_dw_dma_on(dw);
+}
+
+int dw_dma_probe(struct dw_dma_chip *chip)
+{
+ struct dw_dma *dw;
+
+ dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL);
+ if (!dw)
+ return -ENOMEM;
+
+ /* Channel operations */
+ dw->initialize_chan = dw_dma_initialize_chan;
+ dw->suspend_chan = dw_dma_suspend_chan;
+ dw->resume_chan = dw_dma_resume_chan;
+ dw->prepare_ctllo = dw_dma_prepare_ctllo;
+ dw->encode_maxburst = dw_dma_encode_maxburst;
+ dw->bytes2block = dw_dma_bytes2block;
+ dw->block2bytes = dw_dma_block2bytes;
+
+ /* Device operations */
+ dw->set_device_name = dw_dma_set_device_name;
+ dw->disable = dw_dma_disable;
+ dw->enable = dw_dma_enable;
+
+ chip->dw = dw;
+ return do_dma_probe(chip);
+}
+EXPORT_SYMBOL_GPL(dw_dma_probe);
+
+int dw_dma_remove(struct dw_dma_chip *chip)
+{
+ return do_dma_remove(chip);
+}
+EXPORT_SYMBOL_GPL(dw_dma_remove);
diff --git a/drivers/dma/dw/idma32.c b/drivers/dma/dw/idma32.c
new file mode 100644
index 000000000..58f4078d8
--- /dev/null
+++ b/drivers/dma/dw/idma32.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2013,2018,2020-2021 Intel Corporation
+
+#include <linux/bitops.h>
+#include <linux/dmaengine.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "internal.h"
+
+#define DMA_CTL_CH(x) (0x1000 + (x) * 4)
+#define DMA_SRC_ADDR_FILLIN(x) (0x1100 + (x) * 4)
+#define DMA_DST_ADDR_FILLIN(x) (0x1200 + (x) * 4)
+#define DMA_XBAR_SEL(x) (0x1300 + (x) * 4)
+#define DMA_REGACCESS_CHID_CFG (0x1400)
+
+#define CTL_CH_TRANSFER_MODE_MASK GENMASK(1, 0)
+#define CTL_CH_TRANSFER_MODE_S2S 0
+#define CTL_CH_TRANSFER_MODE_S2D 1
+#define CTL_CH_TRANSFER_MODE_D2S 2
+#define CTL_CH_TRANSFER_MODE_D2D 3
+#define CTL_CH_RD_RS_MASK GENMASK(4, 3)
+#define CTL_CH_WR_RS_MASK GENMASK(6, 5)
+#define CTL_CH_RD_NON_SNOOP_BIT BIT(8)
+#define CTL_CH_WR_NON_SNOOP_BIT BIT(9)
+
+#define XBAR_SEL_DEVID_MASK GENMASK(15, 0)
+#define XBAR_SEL_RX_TX_BIT BIT(16)
+#define XBAR_SEL_RX_TX_SHIFT 16
+
+#define REGACCESS_CHID_MASK GENMASK(2, 0)
+
+static unsigned int idma32_get_slave_devfn(struct dw_dma_chan *dwc)
+{
+ struct device *slave = dwc->chan.slave;
+
+ if (!slave || !dev_is_pci(slave))
+ return 0;
+
+ return to_pci_dev(slave)->devfn;
+}
+
+static void idma32_initialize_chan_xbar(struct dw_dma_chan *dwc)
+{
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+ void __iomem *misc = __dw_regs(dw);
+ u32 cfghi = 0, cfglo = 0;
+ u8 dst_id, src_id;
+ u32 value;
+
+ /* DMA Channel ID Configuration register must be programmed first */
+ value = readl(misc + DMA_REGACCESS_CHID_CFG);
+
+ value &= ~REGACCESS_CHID_MASK;
+ value |= dwc->chan.chan_id;
+
+ writel(value, misc + DMA_REGACCESS_CHID_CFG);
+
+ /* Configure channel attributes */
+ value = readl(misc + DMA_CTL_CH(dwc->chan.chan_id));
+
+ value &= ~(CTL_CH_RD_NON_SNOOP_BIT | CTL_CH_WR_NON_SNOOP_BIT);
+ value &= ~(CTL_CH_RD_RS_MASK | CTL_CH_WR_RS_MASK);
+ value &= ~CTL_CH_TRANSFER_MODE_MASK;
+
+ switch (dwc->direction) {
+ case DMA_MEM_TO_DEV:
+ value |= CTL_CH_TRANSFER_MODE_D2S;
+ value |= CTL_CH_WR_NON_SNOOP_BIT;
+ break;
+ case DMA_DEV_TO_MEM:
+ value |= CTL_CH_TRANSFER_MODE_S2D;
+ value |= CTL_CH_RD_NON_SNOOP_BIT;
+ break;
+ default:
+ /*
+ * Memory-to-Memory and Device-to-Device are ignored for now.
+ *
+ * For Memory-to-Memory transfers we would need to set mode
+ * and disable snooping on both sides.
+ */
+ return;
+ }
+
+ writel(value, misc + DMA_CTL_CH(dwc->chan.chan_id));
+
+ /* Configure crossbar selection */
+ value = readl(misc + DMA_XBAR_SEL(dwc->chan.chan_id));
+
+ /* DEVFN selection */
+ value &= ~XBAR_SEL_DEVID_MASK;
+ value |= idma32_get_slave_devfn(dwc);
+
+ switch (dwc->direction) {
+ case DMA_MEM_TO_DEV:
+ value |= XBAR_SEL_RX_TX_BIT;
+ break;
+ case DMA_DEV_TO_MEM:
+ value &= ~XBAR_SEL_RX_TX_BIT;
+ break;
+ default:
+ /* Memory-to-Memory and Device-to-Device are ignored for now */
+ return;
+ }
+
+ writel(value, misc + DMA_XBAR_SEL(dwc->chan.chan_id));
+
+ /* Configure DMA channel low and high registers */
+ switch (dwc->direction) {
+ case DMA_MEM_TO_DEV:
+ dst_id = dwc->chan.chan_id;
+ src_id = dwc->dws.src_id;
+ break;
+ case DMA_DEV_TO_MEM:
+ dst_id = dwc->dws.dst_id;
+ src_id = dwc->chan.chan_id;
+ break;
+ default:
+ /* Memory-to-Memory and Device-to-Device are ignored for now */
+ return;
+ }
+
+ /* Set default burst alignment */
+ cfglo |= IDMA32C_CFGL_DST_BURST_ALIGN | IDMA32C_CFGL_SRC_BURST_ALIGN;
+
+ /* Low 4 bits of the request lines */
+ cfghi |= IDMA32C_CFGH_DST_PER(dst_id & 0xf);
+ cfghi |= IDMA32C_CFGH_SRC_PER(src_id & 0xf);
+
+ /* Request line extension (2 bits) */
+ cfghi |= IDMA32C_CFGH_DST_PER_EXT(dst_id >> 4 & 0x3);
+ cfghi |= IDMA32C_CFGH_SRC_PER_EXT(src_id >> 4 & 0x3);
+
+ channel_writel(dwc, CFG_LO, cfglo);
+ channel_writel(dwc, CFG_HI, cfghi);
+}
+
+static void idma32_initialize_chan_generic(struct dw_dma_chan *dwc)
+{
+ u32 cfghi = 0;
+ u32 cfglo = 0;
+
+ /* Set default burst alignment */
+ cfglo |= IDMA32C_CFGL_DST_BURST_ALIGN | IDMA32C_CFGL_SRC_BURST_ALIGN;
+
+ /* Low 4 bits of the request lines */
+ cfghi |= IDMA32C_CFGH_DST_PER(dwc->dws.dst_id & 0xf);
+ cfghi |= IDMA32C_CFGH_SRC_PER(dwc->dws.src_id & 0xf);
+
+ /* Request line extension (2 bits) */
+ cfghi |= IDMA32C_CFGH_DST_PER_EXT(dwc->dws.dst_id >> 4 & 0x3);
+ cfghi |= IDMA32C_CFGH_SRC_PER_EXT(dwc->dws.src_id >> 4 & 0x3);
+
+ channel_writel(dwc, CFG_LO, cfglo);
+ channel_writel(dwc, CFG_HI, cfghi);
+}
+
+static void idma32_suspend_chan(struct dw_dma_chan *dwc, bool drain)
+{
+ u32 cfglo = channel_readl(dwc, CFG_LO);
+
+ if (drain)
+ cfglo |= IDMA32C_CFGL_CH_DRAIN;
+
+ channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP);
+}
+
+static void idma32_resume_chan(struct dw_dma_chan *dwc, bool drain)
+{
+ u32 cfglo = channel_readl(dwc, CFG_LO);
+
+ if (drain)
+ cfglo &= ~IDMA32C_CFGL_CH_DRAIN;
+
+ channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP);
+}
+
+static u32 idma32_bytes2block(struct dw_dma_chan *dwc,
+ size_t bytes, unsigned int width, size_t *len)
+{
+ u32 block;
+
+ if (bytes > dwc->block_size) {
+ block = dwc->block_size;
+ *len = dwc->block_size;
+ } else {
+ block = bytes;
+ *len = bytes;
+ }
+
+ return block;
+}
+
+static size_t idma32_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width)
+{
+ return IDMA32C_CTLH_BLOCK_TS(block);
+}
+
+static u32 idma32_prepare_ctllo(struct dw_dma_chan *dwc)
+{
+ struct dma_slave_config *sconfig = &dwc->dma_sconfig;
+ u8 smsize = (dwc->direction == DMA_DEV_TO_MEM) ? sconfig->src_maxburst : 0;
+ u8 dmsize = (dwc->direction == DMA_MEM_TO_DEV) ? sconfig->dst_maxburst : 0;
+
+ return DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN |
+ DWC_CTLL_DST_MSIZE(dmsize) | DWC_CTLL_SRC_MSIZE(smsize);
+}
+
+static void idma32_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst)
+{
+ *maxburst = *maxburst > 1 ? fls(*maxburst) - 1 : 0;
+}
+
+static void idma32_set_device_name(struct dw_dma *dw, int id)
+{
+ snprintf(dw->name, sizeof(dw->name), "idma32:dmac%d", id);
+}
+
+/*
+ * Program FIFO size of channels.
+ *
+ * By default full FIFO (512 bytes) is assigned to channel 0. Here we
+ * slice FIFO on equal parts between channels.
+ */
+static void idma32_fifo_partition(struct dw_dma *dw)
+{
+ u64 value = IDMA32C_FP_PSIZE_CH0(64) | IDMA32C_FP_PSIZE_CH1(64) |
+ IDMA32C_FP_UPDATE;
+ u64 fifo_partition = 0;
+
+ /* Fill FIFO_PARTITION low bits (Channels 0..1, 4..5) */
+ fifo_partition |= value << 0;
+
+ /* Fill FIFO_PARTITION high bits (Channels 2..3, 6..7) */
+ fifo_partition |= value << 32;
+
+ /* Program FIFO Partition registers - 64 bytes per channel */
+ idma32_writeq(dw, FIFO_PARTITION1, fifo_partition);
+ idma32_writeq(dw, FIFO_PARTITION0, fifo_partition);
+}
+
+static void idma32_disable(struct dw_dma *dw)
+{
+ do_dw_dma_off(dw);
+ idma32_fifo_partition(dw);
+}
+
+static void idma32_enable(struct dw_dma *dw)
+{
+ idma32_fifo_partition(dw);
+ do_dw_dma_on(dw);
+}
+
+int idma32_dma_probe(struct dw_dma_chip *chip)
+{
+ struct dw_dma *dw;
+
+ dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL);
+ if (!dw)
+ return -ENOMEM;
+
+ /* Channel operations */
+ if (chip->pdata->quirks & DW_DMA_QUIRK_XBAR_PRESENT)
+ dw->initialize_chan = idma32_initialize_chan_xbar;
+ else
+ dw->initialize_chan = idma32_initialize_chan_generic;
+ dw->suspend_chan = idma32_suspend_chan;
+ dw->resume_chan = idma32_resume_chan;
+ dw->prepare_ctllo = idma32_prepare_ctllo;
+ dw->encode_maxburst = idma32_encode_maxburst;
+ dw->bytes2block = idma32_bytes2block;
+ dw->block2bytes = idma32_block2bytes;
+
+ /* Device operations */
+ dw->set_device_name = idma32_set_device_name;
+ dw->disable = idma32_disable;
+ dw->enable = idma32_enable;
+
+ chip->dw = dw;
+ return do_dma_probe(chip);
+}
+EXPORT_SYMBOL_GPL(idma32_dma_probe);
+
+int idma32_dma_remove(struct dw_dma_chip *chip)
+{
+ return do_dma_remove(chip);
+}
+EXPORT_SYMBOL_GPL(idma32_dma_remove);
diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h
new file mode 100644
index 000000000..563ce7348
--- /dev/null
+++ b/drivers/dma/dw/internal.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Driver for the Synopsys DesignWare DMA Controller
+ *
+ * Copyright (C) 2013 Intel Corporation
+ */
+
+#ifndef _DMA_DW_INTERNAL_H
+#define _DMA_DW_INTERNAL_H
+
+#include <linux/dma/dw.h>
+
+#include "regs.h"
+
+int do_dma_probe(struct dw_dma_chip *chip);
+int do_dma_remove(struct dw_dma_chip *chip);
+
+void do_dw_dma_on(struct dw_dma *dw);
+void do_dw_dma_off(struct dw_dma *dw);
+
+int do_dw_dma_disable(struct dw_dma_chip *chip);
+int do_dw_dma_enable(struct dw_dma_chip *chip);
+
+extern bool dw_dma_filter(struct dma_chan *chan, void *param);
+
+#ifdef CONFIG_ACPI
+void dw_dma_acpi_controller_register(struct dw_dma *dw);
+void dw_dma_acpi_controller_free(struct dw_dma *dw);
+#else /* !CONFIG_ACPI */
+static inline void dw_dma_acpi_controller_register(struct dw_dma *dw) {}
+static inline void dw_dma_acpi_controller_free(struct dw_dma *dw) {}
+#endif /* !CONFIG_ACPI */
+
+struct platform_device;
+
+#ifdef CONFIG_OF
+struct dw_dma_platform_data *dw_dma_parse_dt(struct platform_device *pdev);
+void dw_dma_of_controller_register(struct dw_dma *dw);
+void dw_dma_of_controller_free(struct dw_dma *dw);
+#else
+static inline struct dw_dma_platform_data *dw_dma_parse_dt(struct platform_device *pdev)
+{
+ return NULL;
+}
+static inline void dw_dma_of_controller_register(struct dw_dma *dw) {}
+static inline void dw_dma_of_controller_free(struct dw_dma *dw) {}
+#endif
+
+struct dw_dma_chip_pdata {
+ const struct dw_dma_platform_data *pdata;
+ int (*probe)(struct dw_dma_chip *chip);
+ int (*remove)(struct dw_dma_chip *chip);
+ struct dw_dma_chip *chip;
+};
+
+static __maybe_unused const struct dw_dma_chip_pdata dw_dma_chip_pdata = {
+ .probe = dw_dma_probe,
+ .remove = dw_dma_remove,
+};
+
+static const struct dw_dma_platform_data idma32_pdata = {
+ .nr_channels = 8,
+ .chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
+ .chan_priority = CHAN_PRIORITY_ASCENDING,
+ .block_size = 131071,
+ .nr_masters = 1,
+ .data_width = {4},
+ .multi_block = {1, 1, 1, 1, 1, 1, 1, 1},
+};
+
+static __maybe_unused const struct dw_dma_chip_pdata idma32_chip_pdata = {
+ .pdata = &idma32_pdata,
+ .probe = idma32_dma_probe,
+ .remove = idma32_dma_remove,
+};
+
+static const struct dw_dma_platform_data xbar_pdata = {
+ .nr_channels = 8,
+ .chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
+ .chan_priority = CHAN_PRIORITY_ASCENDING,
+ .block_size = 131071,
+ .nr_masters = 1,
+ .data_width = {4},
+ .quirks = DW_DMA_QUIRK_XBAR_PRESENT,
+};
+
+static __maybe_unused const struct dw_dma_chip_pdata xbar_chip_pdata = {
+ .pdata = &xbar_pdata,
+ .probe = idma32_dma_probe,
+ .remove = idma32_dma_remove,
+};
+
+#endif /* _DMA_DW_INTERNAL_H */
diff --git a/drivers/dma/dw/of.c b/drivers/dma/dw/of.c
new file mode 100644
index 000000000..523ca8068
--- /dev/null
+++ b/drivers/dma/dw/of.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Platform driver for the Synopsys DesignWare DMA Controller
+ *
+ * Copyright (C) 2007-2008 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
+ * Copyright (C) 2013 Intel Corporation
+ */
+
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+
+#include "internal.h"
+
+static struct dma_chan *dw_dma_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct dw_dma *dw = ofdma->of_dma_data;
+ struct dw_dma_slave slave = {
+ .dma_dev = dw->dma.dev,
+ };
+ dma_cap_mask_t cap;
+
+ if (dma_spec->args_count < 3 || dma_spec->args_count > 4)
+ return NULL;
+
+ slave.src_id = dma_spec->args[0];
+ slave.dst_id = dma_spec->args[0];
+ slave.m_master = dma_spec->args[1];
+ slave.p_master = dma_spec->args[2];
+ if (dma_spec->args_count >= 4)
+ slave.channels = dma_spec->args[3];
+
+ if (WARN_ON(slave.src_id >= DW_DMA_MAX_NR_REQUESTS ||
+ slave.dst_id >= DW_DMA_MAX_NR_REQUESTS ||
+ slave.m_master >= dw->pdata->nr_masters ||
+ slave.p_master >= dw->pdata->nr_masters ||
+ slave.channels >= BIT(dw->pdata->nr_channels)))
+ return NULL;
+
+ dma_cap_zero(cap);
+ dma_cap_set(DMA_SLAVE, cap);
+
+ /* TODO: there should be a simpler way to do this */
+ return dma_request_channel(cap, dw_dma_filter, &slave);
+}
+
+struct dw_dma_platform_data *dw_dma_parse_dt(struct platform_device *pdev)
+{
+ struct device_node *np = pdev->dev.of_node;
+ struct dw_dma_platform_data *pdata;
+ u32 tmp, arr[DW_DMA_MAX_NR_MASTERS];
+ u32 nr_masters;
+ u32 nr_channels;
+
+ if (of_property_read_u32(np, "dma-masters", &nr_masters))
+ return NULL;
+ if (nr_masters < 1 || nr_masters > DW_DMA_MAX_NR_MASTERS)
+ return NULL;
+
+ if (of_property_read_u32(np, "dma-channels", &nr_channels))
+ return NULL;
+ if (nr_channels > DW_DMA_MAX_NR_CHANNELS)
+ return NULL;
+
+ pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return NULL;
+
+ pdata->nr_masters = nr_masters;
+ pdata->nr_channels = nr_channels;
+
+ of_property_read_u32(np, "chan_allocation_order", &pdata->chan_allocation_order);
+ of_property_read_u32(np, "chan_priority", &pdata->chan_priority);
+
+ of_property_read_u32(np, "block_size", &pdata->block_size);
+
+ /* Try deprecated property first */
+ if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) {
+ for (tmp = 0; tmp < nr_masters; tmp++)
+ pdata->data_width[tmp] = BIT(arr[tmp] & 0x07);
+ }
+
+ /* If "data_width" and "data-width" both provided use the latter one */
+ of_property_read_u32_array(np, "data-width", pdata->data_width, nr_masters);
+
+ memset32(pdata->multi_block, 1, nr_channels);
+ of_property_read_u32_array(np, "multi-block", pdata->multi_block, nr_channels);
+
+ memset32(pdata->max_burst, DW_DMA_MAX_BURST, nr_channels);
+ of_property_read_u32_array(np, "snps,max-burst-len", pdata->max_burst, nr_channels);
+
+ of_property_read_u32(np, "snps,dma-protection-control", &pdata->protctl);
+ if (pdata->protctl > CHAN_PROTCTL_MASK)
+ return NULL;
+
+ return pdata;
+}
+
+void dw_dma_of_controller_register(struct dw_dma *dw)
+{
+ struct device *dev = dw->dma.dev;
+ int ret;
+
+ if (!dev->of_node)
+ return;
+
+ ret = of_dma_controller_register(dev->of_node, dw_dma_of_xlate, dw);
+ if (ret)
+ dev_err(dev, "could not register of_dma_controller\n");
+}
+
+void dw_dma_of_controller_free(struct dw_dma *dw)
+{
+ struct device *dev = dw->dma.dev;
+
+ if (!dev->of_node)
+ return;
+
+ of_dma_controller_free(dev->of_node);
+}
diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
new file mode 100644
index 000000000..ad2d4d012
--- /dev/null
+++ b/drivers/dma/dw/pci.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI driver for the Synopsys DesignWare DMA Controller
+ *
+ * Copyright (C) 2013 Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+
+#include "internal.h"
+
+static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
+{
+ const struct dw_dma_chip_pdata *drv_data = (void *)pid->driver_data;
+ struct dw_dma_chip_pdata *data;
+ struct dw_dma_chip *chip;
+ int ret;
+
+ ret = pcim_enable_device(pdev);
+ if (ret)
+ return ret;
+
+ ret = pcim_iomap_regions(pdev, 1 << 0, pci_name(pdev));
+ if (ret) {
+ dev_err(&pdev->dev, "I/O memory remapping failed\n");
+ return ret;
+ }
+
+ pci_set_master(pdev);
+ pci_try_set_mwi(pdev);
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ if (ret)
+ return ret;
+
+ data = devm_kmemdup(&pdev->dev, drv_data, sizeof(*drv_data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ chip->dev = &pdev->dev;
+ chip->id = pdev->devfn;
+ chip->regs = pcim_iomap_table(pdev)[0];
+ chip->irq = pdev->irq;
+ chip->pdata = data->pdata;
+
+ data->chip = chip;
+
+ ret = data->probe(chip);
+ if (ret)
+ return ret;
+
+ dw_dma_acpi_controller_register(chip->dw);
+
+ pci_set_drvdata(pdev, data);
+
+ return 0;
+}
+
+static void dw_pci_remove(struct pci_dev *pdev)
+{
+ struct dw_dma_chip_pdata *data = pci_get_drvdata(pdev);
+ struct dw_dma_chip *chip = data->chip;
+ int ret;
+
+ dw_dma_acpi_controller_free(chip->dw);
+
+ ret = data->remove(chip);
+ if (ret)
+ dev_warn(&pdev->dev, "can't remove device properly: %d\n", ret);
+}
+
+#ifdef CONFIG_PM_SLEEP
+
+static int dw_pci_suspend_late(struct device *dev)
+{
+ struct dw_dma_chip_pdata *data = dev_get_drvdata(dev);
+ struct dw_dma_chip *chip = data->chip;
+
+ return do_dw_dma_disable(chip);
+};
+
+static int dw_pci_resume_early(struct device *dev)
+{
+ struct dw_dma_chip_pdata *data = dev_get_drvdata(dev);
+ struct dw_dma_chip *chip = data->chip;
+
+ return do_dw_dma_enable(chip);
+};
+
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct dev_pm_ops dw_pci_dev_pm_ops = {
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(dw_pci_suspend_late, dw_pci_resume_early)
+};
+
+static const struct pci_device_id dw_pci_id_table[] = {
+ /* Medfield (GPDMA) */
+ { PCI_VDEVICE(INTEL, 0x0827), (kernel_ulong_t)&dw_dma_chip_pdata },
+
+ /* BayTrail */
+ { PCI_VDEVICE(INTEL, 0x0f06), (kernel_ulong_t)&dw_dma_chip_pdata },
+ { PCI_VDEVICE(INTEL, 0x0f40), (kernel_ulong_t)&dw_dma_chip_pdata },
+
+ /* Merrifield */
+ { PCI_VDEVICE(INTEL, 0x11a2), (kernel_ulong_t)&idma32_chip_pdata },
+
+ /* Braswell */
+ { PCI_VDEVICE(INTEL, 0x2286), (kernel_ulong_t)&dw_dma_chip_pdata },
+ { PCI_VDEVICE(INTEL, 0x22c0), (kernel_ulong_t)&dw_dma_chip_pdata },
+
+ /* Elkhart Lake iDMA 32-bit (PSE DMA) */
+ { PCI_VDEVICE(INTEL, 0x4bb4), (kernel_ulong_t)&xbar_chip_pdata },
+ { PCI_VDEVICE(INTEL, 0x4bb5), (kernel_ulong_t)&xbar_chip_pdata },
+ { PCI_VDEVICE(INTEL, 0x4bb6), (kernel_ulong_t)&xbar_chip_pdata },
+
+ /* Haswell */
+ { PCI_VDEVICE(INTEL, 0x9c60), (kernel_ulong_t)&dw_dma_chip_pdata },
+
+ /* Broadwell */
+ { PCI_VDEVICE(INTEL, 0x9ce0), (kernel_ulong_t)&dw_dma_chip_pdata },
+
+ { }
+};
+MODULE_DEVICE_TABLE(pci, dw_pci_id_table);
+
+static struct pci_driver dw_pci_driver = {
+ .name = "dw_dmac_pci",
+ .id_table = dw_pci_id_table,
+ .probe = dw_pci_probe,
+ .remove = dw_pci_remove,
+ .driver = {
+ .pm = &dw_pci_dev_pm_ops,
+ },
+};
+
+module_pci_driver(dw_pci_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller PCI driver");
+MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
new file mode 100644
index 000000000..47f2292db
--- /dev/null
+++ b/drivers/dma/dw/platform.c
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Platform driver for the Synopsys DesignWare DMA Controller
+ *
+ * Copyright (C) 2007-2008 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
+ * Copyright (C) 2013 Intel Corporation
+ *
+ * Some parts of this driver are derived from the original dw_dmac.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/clk.h>
+#include <linux/pm_runtime.h>
+#include <linux/platform_device.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/of.h>
+#include <linux/acpi.h>
+
+#include "internal.h"
+
+#define DRV_NAME "dw_dmac"
+
+static int dw_probe(struct platform_device *pdev)
+{
+ const struct dw_dma_chip_pdata *match;
+ struct dw_dma_chip_pdata *data;
+ struct dw_dma_chip *chip;
+ struct device *dev = &pdev->dev;
+ int err;
+
+ match = device_get_match_data(dev);
+ if (!match)
+ return -ENODEV;
+
+ data = devm_kmemdup(&pdev->dev, match, sizeof(*match), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ chip->irq = platform_get_irq(pdev, 0);
+ if (chip->irq < 0)
+ return chip->irq;
+
+ chip->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(chip->regs))
+ return PTR_ERR(chip->regs);
+
+ err = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ if (err)
+ return err;
+
+ if (!data->pdata)
+ data->pdata = dev_get_platdata(dev);
+ if (!data->pdata)
+ data->pdata = dw_dma_parse_dt(pdev);
+
+ chip->dev = dev;
+ chip->id = pdev->id;
+ chip->pdata = data->pdata;
+
+ data->chip = chip;
+
+ chip->clk = devm_clk_get_optional(chip->dev, "hclk");
+ if (IS_ERR(chip->clk))
+ return PTR_ERR(chip->clk);
+ err = clk_prepare_enable(chip->clk);
+ if (err)
+ return err;
+
+ pm_runtime_enable(&pdev->dev);
+
+ err = data->probe(chip);
+ if (err)
+ goto err_dw_dma_probe;
+
+ platform_set_drvdata(pdev, data);
+
+ dw_dma_of_controller_register(chip->dw);
+
+ dw_dma_acpi_controller_register(chip->dw);
+
+ return 0;
+
+err_dw_dma_probe:
+ pm_runtime_disable(&pdev->dev);
+ clk_disable_unprepare(chip->clk);
+ return err;
+}
+
+static int dw_remove(struct platform_device *pdev)
+{
+ struct dw_dma_chip_pdata *data = platform_get_drvdata(pdev);
+ struct dw_dma_chip *chip = data->chip;
+ int ret;
+
+ dw_dma_acpi_controller_free(chip->dw);
+
+ dw_dma_of_controller_free(chip->dw);
+
+ ret = data->remove(chip);
+ if (ret)
+ dev_warn(chip->dev, "can't remove device properly: %d\n", ret);
+
+ pm_runtime_disable(&pdev->dev);
+ clk_disable_unprepare(chip->clk);
+
+ return 0;
+}
+
+static void dw_shutdown(struct platform_device *pdev)
+{
+ struct dw_dma_chip_pdata *data = platform_get_drvdata(pdev);
+ struct dw_dma_chip *chip = data->chip;
+
+ /*
+ * We have to call do_dw_dma_disable() to stop any ongoing transfer. On
+ * some platforms we can't do that since DMA device is powered off.
+ * Moreover we have no possibility to check if the platform is affected
+ * or not. That's why we call pm_runtime_get_sync() / pm_runtime_put()
+ * unconditionally. On the other hand we can't use
+ * pm_runtime_suspended() because runtime PM framework is not fully
+ * used by the driver.
+ */
+ pm_runtime_get_sync(chip->dev);
+ do_dw_dma_disable(chip);
+ pm_runtime_put_sync_suspend(chip->dev);
+
+ clk_disable_unprepare(chip->clk);
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id dw_dma_of_id_table[] = {
+ { .compatible = "snps,dma-spear1340", .data = &dw_dma_chip_pdata },
+ { .compatible = "renesas,rzn1-dma", .data = &dw_dma_chip_pdata },
+ {}
+};
+MODULE_DEVICE_TABLE(of, dw_dma_of_id_table);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id dw_dma_acpi_id_table[] = {
+ { "INTL9C60", (kernel_ulong_t)&dw_dma_chip_pdata },
+ { "80862286", (kernel_ulong_t)&dw_dma_chip_pdata },
+ { "808622C0", (kernel_ulong_t)&dw_dma_chip_pdata },
+
+ /* Elkhart Lake iDMA 32-bit (PSE DMA) */
+ { "80864BB4", (kernel_ulong_t)&xbar_chip_pdata },
+ { "80864BB5", (kernel_ulong_t)&xbar_chip_pdata },
+ { "80864BB6", (kernel_ulong_t)&xbar_chip_pdata },
+
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, dw_dma_acpi_id_table);
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+
+static int dw_suspend_late(struct device *dev)
+{
+ struct dw_dma_chip_pdata *data = dev_get_drvdata(dev);
+ struct dw_dma_chip *chip = data->chip;
+
+ do_dw_dma_disable(chip);
+ clk_disable_unprepare(chip->clk);
+
+ return 0;
+}
+
+static int dw_resume_early(struct device *dev)
+{
+ struct dw_dma_chip_pdata *data = dev_get_drvdata(dev);
+ struct dw_dma_chip *chip = data->chip;
+ int ret;
+
+ ret = clk_prepare_enable(chip->clk);
+ if (ret)
+ return ret;
+
+ return do_dw_dma_enable(chip);
+}
+
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct dev_pm_ops dw_dev_pm_ops = {
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(dw_suspend_late, dw_resume_early)
+};
+
+static struct platform_driver dw_driver = {
+ .probe = dw_probe,
+ .remove = dw_remove,
+ .shutdown = dw_shutdown,
+ .driver = {
+ .name = DRV_NAME,
+ .pm = &dw_dev_pm_ops,
+ .of_match_table = of_match_ptr(dw_dma_of_id_table),
+ .acpi_match_table = ACPI_PTR(dw_dma_acpi_id_table),
+ },
+};
+
+static int __init dw_init(void)
+{
+ return platform_driver_register(&dw_driver);
+}
+subsys_initcall(dw_init);
+
+static void __exit dw_exit(void)
+{
+ platform_driver_unregister(&dw_driver);
+}
+module_exit(dw_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller platform driver");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
new file mode 100644
index 000000000..76654bd13
--- /dev/null
+++ b/drivers/dma/dw/regs.h
@@ -0,0 +1,409 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Driver for the Synopsys DesignWare AHB DMA Controller
+ *
+ * Copyright (C) 2005-2007 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
+ * Copyright (C) 2016 Intel Corporation
+ */
+
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+
+#include <linux/io-64-nonatomic-hi-lo.h>
+
+#include "internal.h"
+
+#define DW_DMA_MAX_NR_REQUESTS 16
+
+/* flow controller */
+enum dw_dma_fc {
+ DW_DMA_FC_D_M2M,
+ DW_DMA_FC_D_M2P,
+ DW_DMA_FC_D_P2M,
+ DW_DMA_FC_D_P2P,
+ DW_DMA_FC_P_P2M,
+ DW_DMA_FC_SP_P2P,
+ DW_DMA_FC_P_M2P,
+ DW_DMA_FC_DP_P2P,
+};
+
+/*
+ * Redefine this macro to handle differences between 32- and 64-bit
+ * addressing, big vs. little endian, etc.
+ */
+#define DW_REG(name) u32 name; u32 __pad_##name
+
+/* Hardware register definitions. */
+struct dw_dma_chan_regs {
+ DW_REG(SAR); /* Source Address Register */
+ DW_REG(DAR); /* Destination Address Register */
+ DW_REG(LLP); /* Linked List Pointer */
+ u32 CTL_LO; /* Control Register Low */
+ u32 CTL_HI; /* Control Register High */
+ DW_REG(SSTAT);
+ DW_REG(DSTAT);
+ DW_REG(SSTATAR);
+ DW_REG(DSTATAR);
+ u32 CFG_LO; /* Configuration Register Low */
+ u32 CFG_HI; /* Configuration Register High */
+ DW_REG(SGR);
+ DW_REG(DSR);
+};
+
+struct dw_dma_irq_regs {
+ DW_REG(XFER);
+ DW_REG(BLOCK);
+ DW_REG(SRC_TRAN);
+ DW_REG(DST_TRAN);
+ DW_REG(ERROR);
+};
+
+struct dw_dma_regs {
+ /* per-channel registers */
+ struct dw_dma_chan_regs CHAN[DW_DMA_MAX_NR_CHANNELS];
+
+ /* irq handling */
+ struct dw_dma_irq_regs RAW; /* r */
+ struct dw_dma_irq_regs STATUS; /* r (raw & mask) */
+ struct dw_dma_irq_regs MASK; /* rw (set = irq enabled) */
+ struct dw_dma_irq_regs CLEAR; /* w (ack, affects "raw") */
+
+ DW_REG(STATUS_INT); /* r */
+
+ /* software handshaking */
+ DW_REG(REQ_SRC);
+ DW_REG(REQ_DST);
+ DW_REG(SGL_REQ_SRC);
+ DW_REG(SGL_REQ_DST);
+ DW_REG(LAST_SRC);
+ DW_REG(LAST_DST);
+
+ /* miscellaneous */
+ DW_REG(CFG);
+ DW_REG(CH_EN);
+ DW_REG(ID);
+ DW_REG(TEST);
+
+ /* iDMA 32-bit support */
+ DW_REG(CLASS_PRIORITY0);
+ DW_REG(CLASS_PRIORITY1);
+
+ /* optional encoded params, 0x3c8..0x3f7 */
+ u32 __reserved;
+
+ /* per-channel configuration registers */
+ u32 DWC_PARAMS[DW_DMA_MAX_NR_CHANNELS];
+ u32 MULTI_BLK_TYPE;
+ u32 MAX_BLK_SIZE;
+
+ /* top-level parameters */
+ u32 DW_PARAMS;
+
+ /* component ID */
+ u32 COMP_TYPE;
+ u32 COMP_VERSION;
+
+ /* iDMA 32-bit support */
+ DW_REG(FIFO_PARTITION0);
+ DW_REG(FIFO_PARTITION1);
+
+ DW_REG(SAI_ERR);
+ DW_REG(GLOBAL_CFG);
+};
+
+/* Bitfields in DW_PARAMS */
+#define DW_PARAMS_NR_CHAN 8 /* number of channels */
+#define DW_PARAMS_NR_MASTER 11 /* number of AHB masters */
+#define DW_PARAMS_DATA_WIDTH(n) (15 + 2 * (n))
+#define DW_PARAMS_DATA_WIDTH1 15 /* master 1 data width */
+#define DW_PARAMS_DATA_WIDTH2 17 /* master 2 data width */
+#define DW_PARAMS_DATA_WIDTH3 19 /* master 3 data width */
+#define DW_PARAMS_DATA_WIDTH4 21 /* master 4 data width */
+#define DW_PARAMS_EN 28 /* encoded parameters */
+
+/* Bitfields in DWC_PARAMS */
+#define DWC_PARAMS_MBLK_EN 11 /* multi block transfer */
+#define DWC_PARAMS_HC_LLP 13 /* set LLP register to zero */
+#define DWC_PARAMS_MSIZE 16 /* max group transaction size */
+
+/* bursts size */
+enum dw_dma_msize {
+ DW_DMA_MSIZE_1,
+ DW_DMA_MSIZE_4,
+ DW_DMA_MSIZE_8,
+ DW_DMA_MSIZE_16,
+ DW_DMA_MSIZE_32,
+ DW_DMA_MSIZE_64,
+ DW_DMA_MSIZE_128,
+ DW_DMA_MSIZE_256,
+};
+
+/* Bitfields in LLP */
+#define DWC_LLP_LMS(x) ((x) & 3) /* list master select */
+#define DWC_LLP_LOC(x) ((x) & ~3) /* next lli */
+
+/* Bitfields in CTL_LO */
+#define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */
+#define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */
+#define DWC_CTLL_SRC_WIDTH(n) ((n)<<4)
+#define DWC_CTLL_DST_INC (0<<7) /* DAR update/not */
+#define DWC_CTLL_DST_DEC (1<<7)
+#define DWC_CTLL_DST_FIX (2<<7)
+#define DWC_CTLL_SRC_INC (0<<9) /* SAR update/not */
+#define DWC_CTLL_SRC_DEC (1<<9)
+#define DWC_CTLL_SRC_FIX (2<<9)
+#define DWC_CTLL_DST_MSIZE(n) ((n)<<11) /* burst, #elements */
+#define DWC_CTLL_SRC_MSIZE(n) ((n)<<14)
+#define DWC_CTLL_S_GATH_EN (1 << 17) /* src gather, !FIX */
+#define DWC_CTLL_D_SCAT_EN (1 << 18) /* dst scatter, !FIX */
+#define DWC_CTLL_FC(n) ((n) << 20)
+#define DWC_CTLL_FC_M2M (0 << 20) /* mem-to-mem */
+#define DWC_CTLL_FC_M2P (1 << 20) /* mem-to-periph */
+#define DWC_CTLL_FC_P2M (2 << 20) /* periph-to-mem */
+#define DWC_CTLL_FC_P2P (3 << 20) /* periph-to-periph */
+/* plus 4 transfer types for peripheral-as-flow-controller */
+#define DWC_CTLL_DMS(n) ((n)<<23) /* dst master select */
+#define DWC_CTLL_SMS(n) ((n)<<25) /* src master select */
+#define DWC_CTLL_LLP_D_EN (1 << 27) /* dest block chain */
+#define DWC_CTLL_LLP_S_EN (1 << 28) /* src block chain */
+
+/* Bitfields in CTL_HI */
+#define DWC_CTLH_BLOCK_TS_MASK GENMASK(11, 0)
+#define DWC_CTLH_BLOCK_TS(x) ((x) & DWC_CTLH_BLOCK_TS_MASK)
+#define DWC_CTLH_DONE (1 << 12)
+
+/* Bitfields in CFG_LO */
+#define DWC_CFGL_CH_PRIOR_MASK (0x7 << 5) /* priority mask */
+#define DWC_CFGL_CH_PRIOR(x) ((x) << 5) /* priority */
+#define DWC_CFGL_CH_SUSP (1 << 8) /* pause xfer */
+#define DWC_CFGL_FIFO_EMPTY (1 << 9) /* pause xfer */
+#define DWC_CFGL_HS_DST (1 << 10) /* handshake w/dst */
+#define DWC_CFGL_HS_SRC (1 << 11) /* handshake w/src */
+#define DWC_CFGL_LOCK_CH_XFER (0 << 12) /* scope of LOCK_CH */
+#define DWC_CFGL_LOCK_CH_BLOCK (1 << 12)
+#define DWC_CFGL_LOCK_CH_XACT (2 << 12)
+#define DWC_CFGL_LOCK_BUS_XFER (0 << 14) /* scope of LOCK_BUS */
+#define DWC_CFGL_LOCK_BUS_BLOCK (1 << 14)
+#define DWC_CFGL_LOCK_BUS_XACT (2 << 14)
+#define DWC_CFGL_LOCK_CH (1 << 15) /* channel lockout */
+#define DWC_CFGL_LOCK_BUS (1 << 16) /* busmaster lockout */
+#define DWC_CFGL_HS_DST_POL (1 << 18) /* dst handshake active low */
+#define DWC_CFGL_HS_SRC_POL (1 << 19) /* src handshake active low */
+#define DWC_CFGL_MAX_BURST(x) ((x) << 20)
+#define DWC_CFGL_RELOAD_SAR (1 << 30)
+#define DWC_CFGL_RELOAD_DAR (1 << 31)
+
+/* Bitfields in CFG_HI */
+#define DWC_CFGH_FCMODE (1 << 0)
+#define DWC_CFGH_FIFO_MODE (1 << 1)
+#define DWC_CFGH_PROTCTL(x) ((x) << 2)
+#define DWC_CFGH_PROTCTL_DATA (0 << 2) /* data access - always set */
+#define DWC_CFGH_PROTCTL_PRIV (1 << 2) /* privileged -> AHB HPROT[1] */
+#define DWC_CFGH_PROTCTL_BUFFER (2 << 2) /* bufferable -> AHB HPROT[2] */
+#define DWC_CFGH_PROTCTL_CACHE (4 << 2) /* cacheable -> AHB HPROT[3] */
+#define DWC_CFGH_DS_UPD_EN (1 << 5)
+#define DWC_CFGH_SS_UPD_EN (1 << 6)
+#define DWC_CFGH_SRC_PER(x) ((x) << 7)
+#define DWC_CFGH_DST_PER(x) ((x) << 11)
+
+/* Bitfields in SGR */
+#define DWC_SGR_SGI(x) ((x) << 0)
+#define DWC_SGR_SGC(x) ((x) << 20)
+
+/* Bitfields in DSR */
+#define DWC_DSR_DSI(x) ((x) << 0)
+#define DWC_DSR_DSC(x) ((x) << 20)
+
+/* Bitfields in CFG */
+#define DW_CFG_DMA_EN (1 << 0)
+
+/* iDMA 32-bit support */
+
+/* bursts size */
+enum idma32_msize {
+ IDMA32_MSIZE_1,
+ IDMA32_MSIZE_2,
+ IDMA32_MSIZE_4,
+ IDMA32_MSIZE_8,
+ IDMA32_MSIZE_16,
+ IDMA32_MSIZE_32,
+};
+
+/* Bitfields in CTL_HI */
+#define IDMA32C_CTLH_BLOCK_TS_MASK GENMASK(16, 0)
+#define IDMA32C_CTLH_BLOCK_TS(x) ((x) & IDMA32C_CTLH_BLOCK_TS_MASK)
+#define IDMA32C_CTLH_DONE (1 << 17)
+
+/* Bitfields in CFG_LO */
+#define IDMA32C_CFGL_DST_BURST_ALIGN (1 << 0) /* dst burst align */
+#define IDMA32C_CFGL_SRC_BURST_ALIGN (1 << 1) /* src burst align */
+#define IDMA32C_CFGL_CH_DRAIN (1 << 10) /* drain FIFO */
+#define IDMA32C_CFGL_DST_OPT_BL (1 << 20) /* optimize dst burst length */
+#define IDMA32C_CFGL_SRC_OPT_BL (1 << 21) /* optimize src burst length */
+
+/* Bitfields in CFG_HI */
+#define IDMA32C_CFGH_SRC_PER(x) ((x) << 0)
+#define IDMA32C_CFGH_DST_PER(x) ((x) << 4)
+#define IDMA32C_CFGH_RD_ISSUE_THD(x) ((x) << 8)
+#define IDMA32C_CFGH_RW_ISSUE_THD(x) ((x) << 18)
+#define IDMA32C_CFGH_SRC_PER_EXT(x) ((x) << 28) /* src peripheral extension */
+#define IDMA32C_CFGH_DST_PER_EXT(x) ((x) << 30) /* dst peripheral extension */
+
+/* Bitfields in FIFO_PARTITION */
+#define IDMA32C_FP_PSIZE_CH0(x) ((x) << 0)
+#define IDMA32C_FP_PSIZE_CH1(x) ((x) << 13)
+#define IDMA32C_FP_UPDATE (1 << 26)
+
+enum dw_dmac_flags {
+ DW_DMA_IS_CYCLIC = 0,
+ DW_DMA_IS_SOFT_LLP = 1,
+ DW_DMA_IS_PAUSED = 2,
+ DW_DMA_IS_INITIALIZED = 3,
+};
+
+struct dw_dma_chan {
+ struct dma_chan chan;
+ void __iomem *ch_regs;
+ u8 mask;
+ u8 priority;
+ enum dma_transfer_direction direction;
+
+ /* software emulation of the LLP transfers */
+ struct list_head *tx_node_active;
+
+ spinlock_t lock;
+
+ /* these other elements are all protected by lock */
+ unsigned long flags;
+ struct list_head active_list;
+ struct list_head queue;
+
+ unsigned int descs_allocated;
+
+ /* hardware configuration */
+ unsigned int block_size;
+ bool nollp;
+ u32 max_burst;
+
+ /* custom slave configuration */
+ struct dw_dma_slave dws;
+
+ /* configuration passed via .device_config */
+ struct dma_slave_config dma_sconfig;
+};
+
+static inline struct dw_dma_chan_regs __iomem *
+__dwc_regs(struct dw_dma_chan *dwc)
+{
+ return dwc->ch_regs;
+}
+
+#define channel_readl(dwc, name) \
+ readl(&(__dwc_regs(dwc)->name))
+#define channel_writel(dwc, name, val) \
+ writel((val), &(__dwc_regs(dwc)->name))
+
+static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct dw_dma_chan, chan);
+}
+
+struct dw_dma {
+ struct dma_device dma;
+ char name[20];
+ void __iomem *regs;
+ struct dma_pool *desc_pool;
+ struct tasklet_struct tasklet;
+
+ /* channels */
+ struct dw_dma_chan *chan;
+ u8 all_chan_mask;
+ u8 in_use;
+
+ /* Channel operations */
+ void (*initialize_chan)(struct dw_dma_chan *dwc);
+ void (*suspend_chan)(struct dw_dma_chan *dwc, bool drain);
+ void (*resume_chan)(struct dw_dma_chan *dwc, bool drain);
+ u32 (*prepare_ctllo)(struct dw_dma_chan *dwc);
+ void (*encode_maxburst)(struct dw_dma_chan *dwc, u32 *maxburst);
+ u32 (*bytes2block)(struct dw_dma_chan *dwc, size_t bytes,
+ unsigned int width, size_t *len);
+ size_t (*block2bytes)(struct dw_dma_chan *dwc, u32 block, u32 width);
+
+ /* Device operations */
+ void (*set_device_name)(struct dw_dma *dw, int id);
+ void (*disable)(struct dw_dma *dw);
+ void (*enable)(struct dw_dma *dw);
+
+ /* platform data */
+ struct dw_dma_platform_data *pdata;
+};
+
+static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
+{
+ return dw->regs;
+}
+
+#define dma_readl(dw, name) \
+ readl(&(__dw_regs(dw)->name))
+#define dma_writel(dw, name, val) \
+ writel((val), &(__dw_regs(dw)->name))
+
+#define idma32_readq(dw, name) \
+ hi_lo_readq(&(__dw_regs(dw)->name))
+#define idma32_writeq(dw, name, val) \
+ hi_lo_writeq((val), &(__dw_regs(dw)->name))
+
+#define channel_set_bit(dw, reg, mask) \
+ dma_writel(dw, reg, ((mask) << 8) | (mask))
+#define channel_clear_bit(dw, reg, mask) \
+ dma_writel(dw, reg, ((mask) << 8) | 0)
+
+static inline struct dw_dma *to_dw_dma(struct dma_device *ddev)
+{
+ return container_of(ddev, struct dw_dma, dma);
+}
+
+/* LLI == Linked List Item; a.k.a. DMA block descriptor */
+struct dw_lli {
+ /* values that are not changed by hardware */
+ __le32 sar;
+ __le32 dar;
+ __le32 llp; /* chain to next lli */
+ __le32 ctllo;
+ /* values that may get written back: */
+ __le32 ctlhi;
+ /* sstat and dstat can snapshot peripheral register state.
+ * silicon config may discard either or both...
+ */
+ __le32 sstat;
+ __le32 dstat;
+};
+
+struct dw_desc {
+ /* FIRST values the hardware uses */
+ struct dw_lli lli;
+
+#define lli_set(d, reg, v) ((d)->lli.reg |= cpu_to_le32(v))
+#define lli_clear(d, reg, v) ((d)->lli.reg &= ~cpu_to_le32(v))
+#define lli_read(d, reg) le32_to_cpu((d)->lli.reg)
+#define lli_write(d, reg, v) ((d)->lli.reg = cpu_to_le32(v))
+
+ /* THEN values for driver housekeeping */
+ struct list_head desc_node;
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor txd;
+ size_t len;
+ size_t total_len;
+ u32 residue;
+};
+
+#define to_dw_desc(h) list_entry(h, struct dw_desc, desc_node)
+
+static inline struct dw_desc *
+txd_to_dw_desc(struct dma_async_tx_descriptor *txd)
+{
+ return container_of(txd, struct dw_desc, txd);
+}
diff --git a/drivers/dma/dw/rzn1-dmamux.c b/drivers/dma/dw/rzn1-dmamux.c
new file mode 100644
index 000000000..f9912c3dd
--- /dev/null
+++ b/drivers/dma/dw/rzn1-dmamux.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 Schneider-Electric
+ * Author: Miquel Raynal <miquel.raynal@bootlin.com
+ * Based on TI crossbar driver written by Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+#include <linux/bitops.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/slab.h>
+#include <linux/soc/renesas/r9a06g032-sysctrl.h>
+#include <linux/types.h>
+
+#define RNZ1_DMAMUX_NCELLS 6
+#define RZN1_DMAMUX_MAX_LINES 64
+#define RZN1_DMAMUX_LINES_PER_CTLR 16
+
+struct rzn1_dmamux_data {
+ struct dma_router dmarouter;
+ DECLARE_BITMAP(used_chans, 2 * RZN1_DMAMUX_LINES_PER_CTLR);
+};
+
+struct rzn1_dmamux_map {
+ unsigned int req_idx;
+};
+
+static void rzn1_dmamux_free(struct device *dev, void *route_data)
+{
+ struct rzn1_dmamux_data *dmamux = dev_get_drvdata(dev);
+ struct rzn1_dmamux_map *map = route_data;
+
+ dev_dbg(dev, "Unmapping DMAMUX request %u\n", map->req_idx);
+
+ clear_bit(map->req_idx, dmamux->used_chans);
+
+ kfree(map);
+}
+
+static void *rzn1_dmamux_route_allocate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct platform_device *pdev = of_find_device_by_node(ofdma->of_node);
+ struct rzn1_dmamux_data *dmamux = platform_get_drvdata(pdev);
+ struct rzn1_dmamux_map *map;
+ unsigned int dmac_idx, chan, val;
+ u32 mask;
+ int ret;
+
+ if (dma_spec->args_count != RNZ1_DMAMUX_NCELLS)
+ return ERR_PTR(-EINVAL);
+
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (!map)
+ return ERR_PTR(-ENOMEM);
+
+ chan = dma_spec->args[0];
+ map->req_idx = dma_spec->args[4];
+ val = dma_spec->args[5];
+ dma_spec->args_count -= 2;
+
+ if (chan >= RZN1_DMAMUX_LINES_PER_CTLR) {
+ dev_err(&pdev->dev, "Invalid DMA request line: %u\n", chan);
+ ret = -EINVAL;
+ goto free_map;
+ }
+
+ if (map->req_idx >= RZN1_DMAMUX_MAX_LINES ||
+ (map->req_idx % RZN1_DMAMUX_LINES_PER_CTLR) != chan) {
+ dev_err(&pdev->dev, "Invalid MUX request line: %u\n", map->req_idx);
+ ret = -EINVAL;
+ goto free_map;
+ }
+
+ dmac_idx = map->req_idx >= RZN1_DMAMUX_LINES_PER_CTLR ? 1 : 0;
+ dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", dmac_idx);
+ if (!dma_spec->np) {
+ dev_err(&pdev->dev, "Can't get DMA master\n");
+ ret = -EINVAL;
+ goto free_map;
+ }
+
+ dev_dbg(&pdev->dev, "Mapping DMAMUX request %u to DMAC%u request %u\n",
+ map->req_idx, dmac_idx, chan);
+
+ if (test_and_set_bit(map->req_idx, dmamux->used_chans)) {
+ ret = -EBUSY;
+ goto free_map;
+ }
+
+ mask = BIT(map->req_idx);
+ ret = r9a06g032_sysctrl_set_dmamux(mask, val ? mask : 0);
+ if (ret)
+ goto clear_bitmap;
+
+ return map;
+
+clear_bitmap:
+ clear_bit(map->req_idx, dmamux->used_chans);
+free_map:
+ kfree(map);
+
+ return ERR_PTR(ret);
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id rzn1_dmac_match[] = {
+ { .compatible = "renesas,rzn1-dma" },
+ {}
+};
+#endif
+
+static int rzn1_dmamux_probe(struct platform_device *pdev)
+{
+ struct device_node *mux_node = pdev->dev.of_node;
+ const struct of_device_id *match;
+ struct device_node *dmac_node;
+ struct rzn1_dmamux_data *dmamux;
+
+ dmamux = devm_kzalloc(&pdev->dev, sizeof(*dmamux), GFP_KERNEL);
+ if (!dmamux)
+ return -ENOMEM;
+
+ dmac_node = of_parse_phandle(mux_node, "dma-masters", 0);
+ if (!dmac_node)
+ return dev_err_probe(&pdev->dev, -ENODEV, "Can't get DMA master node\n");
+
+ match = of_match_node(rzn1_dmac_match, dmac_node);
+ of_node_put(dmac_node);
+ if (!match)
+ return dev_err_probe(&pdev->dev, -EINVAL, "DMA master is not supported\n");
+
+ dmamux->dmarouter.dev = &pdev->dev;
+ dmamux->dmarouter.route_free = rzn1_dmamux_free;
+
+ platform_set_drvdata(pdev, dmamux);
+
+ return of_dma_router_register(mux_node, rzn1_dmamux_route_allocate,
+ &dmamux->dmarouter);
+}
+
+static const struct of_device_id rzn1_dmamux_match[] = {
+ { .compatible = "renesas,rzn1-dmamux" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, rzn1_dmamux_match);
+
+static struct platform_driver rzn1_dmamux_driver = {
+ .driver = {
+ .name = "renesas,rzn1-dmamux",
+ .of_match_table = rzn1_dmamux_match,
+ },
+ .probe = rzn1_dmamux_probe,
+};
+module_platform_driver(rzn1_dmamux_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Miquel Raynal <miquel.raynal@bootlin.com");
+MODULE_DESCRIPTION("Renesas RZ/N1 DMAMUX driver");
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
new file mode 100644
index 000000000..d19ea885c
--- /dev/null
+++ b/drivers/dma/ep93xx_dma.c
@@ -0,0 +1,1434 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Driver for the Cirrus Logic EP93xx DMA Controller
+ *
+ * Copyright (C) 2011 Mika Westerberg
+ *
+ * DMA M2P implementation is based on the original
+ * arch/arm/mach-ep93xx/dma-m2p.c which has following copyrights:
+ *
+ * Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org>
+ * Copyright (C) 2006 Applied Data Systems
+ * Copyright (C) 2009 Ryan Mallon <rmallon@gmail.com>
+ *
+ * This driver is based on dw_dmac and amba-pl08x drivers.
+ */
+
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <linux/platform_data/dma-ep93xx.h>
+
+#include "dmaengine.h"
+
+/* M2P registers */
+#define M2P_CONTROL 0x0000
+#define M2P_CONTROL_STALLINT BIT(0)
+#define M2P_CONTROL_NFBINT BIT(1)
+#define M2P_CONTROL_CH_ERROR_INT BIT(3)
+#define M2P_CONTROL_ENABLE BIT(4)
+#define M2P_CONTROL_ICE BIT(6)
+
+#define M2P_INTERRUPT 0x0004
+#define M2P_INTERRUPT_STALL BIT(0)
+#define M2P_INTERRUPT_NFB BIT(1)
+#define M2P_INTERRUPT_ERROR BIT(3)
+
+#define M2P_PPALLOC 0x0008
+#define M2P_STATUS 0x000c
+
+#define M2P_MAXCNT0 0x0020
+#define M2P_BASE0 0x0024
+#define M2P_MAXCNT1 0x0030
+#define M2P_BASE1 0x0034
+
+#define M2P_STATE_IDLE 0
+#define M2P_STATE_STALL 1
+#define M2P_STATE_ON 2
+#define M2P_STATE_NEXT 3
+
+/* M2M registers */
+#define M2M_CONTROL 0x0000
+#define M2M_CONTROL_DONEINT BIT(2)
+#define M2M_CONTROL_ENABLE BIT(3)
+#define M2M_CONTROL_START BIT(4)
+#define M2M_CONTROL_DAH BIT(11)
+#define M2M_CONTROL_SAH BIT(12)
+#define M2M_CONTROL_PW_SHIFT 9
+#define M2M_CONTROL_PW_8 (0 << M2M_CONTROL_PW_SHIFT)
+#define M2M_CONTROL_PW_16 (1 << M2M_CONTROL_PW_SHIFT)
+#define M2M_CONTROL_PW_32 (2 << M2M_CONTROL_PW_SHIFT)
+#define M2M_CONTROL_PW_MASK (3 << M2M_CONTROL_PW_SHIFT)
+#define M2M_CONTROL_TM_SHIFT 13
+#define M2M_CONTROL_TM_TX (1 << M2M_CONTROL_TM_SHIFT)
+#define M2M_CONTROL_TM_RX (2 << M2M_CONTROL_TM_SHIFT)
+#define M2M_CONTROL_NFBINT BIT(21)
+#define M2M_CONTROL_RSS_SHIFT 22
+#define M2M_CONTROL_RSS_SSPRX (1 << M2M_CONTROL_RSS_SHIFT)
+#define M2M_CONTROL_RSS_SSPTX (2 << M2M_CONTROL_RSS_SHIFT)
+#define M2M_CONTROL_RSS_IDE (3 << M2M_CONTROL_RSS_SHIFT)
+#define M2M_CONTROL_NO_HDSK BIT(24)
+#define M2M_CONTROL_PWSC_SHIFT 25
+
+#define M2M_INTERRUPT 0x0004
+#define M2M_INTERRUPT_MASK 6
+
+#define M2M_STATUS 0x000c
+#define M2M_STATUS_CTL_SHIFT 1
+#define M2M_STATUS_CTL_IDLE (0 << M2M_STATUS_CTL_SHIFT)
+#define M2M_STATUS_CTL_STALL (1 << M2M_STATUS_CTL_SHIFT)
+#define M2M_STATUS_CTL_MEMRD (2 << M2M_STATUS_CTL_SHIFT)
+#define M2M_STATUS_CTL_MEMWR (3 << M2M_STATUS_CTL_SHIFT)
+#define M2M_STATUS_CTL_BWCWAIT (4 << M2M_STATUS_CTL_SHIFT)
+#define M2M_STATUS_CTL_MASK (7 << M2M_STATUS_CTL_SHIFT)
+#define M2M_STATUS_BUF_SHIFT 4
+#define M2M_STATUS_BUF_NO (0 << M2M_STATUS_BUF_SHIFT)
+#define M2M_STATUS_BUF_ON (1 << M2M_STATUS_BUF_SHIFT)
+#define M2M_STATUS_BUF_NEXT (2 << M2M_STATUS_BUF_SHIFT)
+#define M2M_STATUS_BUF_MASK (3 << M2M_STATUS_BUF_SHIFT)
+#define M2M_STATUS_DONE BIT(6)
+
+#define M2M_BCR0 0x0010
+#define M2M_BCR1 0x0014
+#define M2M_SAR_BASE0 0x0018
+#define M2M_SAR_BASE1 0x001c
+#define M2M_DAR_BASE0 0x002c
+#define M2M_DAR_BASE1 0x0030
+
+#define DMA_MAX_CHAN_BYTES 0xffff
+#define DMA_MAX_CHAN_DESCRIPTORS 32
+
+struct ep93xx_dma_engine;
+static int ep93xx_dma_slave_config_write(struct dma_chan *chan,
+ enum dma_transfer_direction dir,
+ struct dma_slave_config *config);
+
+/**
+ * struct ep93xx_dma_desc - EP93xx specific transaction descriptor
+ * @src_addr: source address of the transaction
+ * @dst_addr: destination address of the transaction
+ * @size: size of the transaction (in bytes)
+ * @complete: this descriptor is completed
+ * @txd: dmaengine API descriptor
+ * @tx_list: list of linked descriptors
+ * @node: link used for putting this into a channel queue
+ */
+struct ep93xx_dma_desc {
+ u32 src_addr;
+ u32 dst_addr;
+ size_t size;
+ bool complete;
+ struct dma_async_tx_descriptor txd;
+ struct list_head tx_list;
+ struct list_head node;
+};
+
+/**
+ * struct ep93xx_dma_chan - an EP93xx DMA M2P/M2M channel
+ * @chan: dmaengine API channel
+ * @edma: pointer to the engine device
+ * @regs: memory mapped registers
+ * @irq: interrupt number of the channel
+ * @clk: clock used by this channel
+ * @tasklet: channel specific tasklet used for callbacks
+ * @lock: lock protecting the fields following
+ * @flags: flags for the channel
+ * @buffer: which buffer to use next (0/1)
+ * @active: flattened chain of descriptors currently being processed
+ * @queue: pending descriptors which are handled next
+ * @free_list: list of free descriptors which can be used
+ * @runtime_addr: physical address currently used as dest/src (M2M only). This
+ * is set via .device_config before slave operation is
+ * prepared
+ * @runtime_ctrl: M2M runtime values for the control register.
+ * @slave_config: slave configuration
+ *
+ * As EP93xx DMA controller doesn't support real chained DMA descriptors we
+ * will have slightly different scheme here: @active points to a head of
+ * flattened DMA descriptor chain.
+ *
+ * @queue holds pending transactions. These are linked through the first
+ * descriptor in the chain. When a descriptor is moved to the @active queue,
+ * the first and chained descriptors are flattened into a single list.
+ *
+ * @chan.private holds pointer to &struct ep93xx_dma_data which contains
+ * necessary channel configuration information. For memcpy channels this must
+ * be %NULL.
+ */
+struct ep93xx_dma_chan {
+ struct dma_chan chan;
+ const struct ep93xx_dma_engine *edma;
+ void __iomem *regs;
+ int irq;
+ struct clk *clk;
+ struct tasklet_struct tasklet;
+ /* protects the fields following */
+ spinlock_t lock;
+ unsigned long flags;
+/* Channel is configured for cyclic transfers */
+#define EP93XX_DMA_IS_CYCLIC 0
+
+ int buffer;
+ struct list_head active;
+ struct list_head queue;
+ struct list_head free_list;
+ u32 runtime_addr;
+ u32 runtime_ctrl;
+ struct dma_slave_config slave_config;
+};
+
+/**
+ * struct ep93xx_dma_engine - the EP93xx DMA engine instance
+ * @dma_dev: holds the dmaengine device
+ * @m2m: is this an M2M or M2P device
+ * @hw_setup: method which sets the channel up for operation
+ * @hw_synchronize: synchronizes DMA channel termination to current context
+ * @hw_shutdown: shuts the channel down and flushes whatever is left
+ * @hw_submit: pushes active descriptor(s) to the hardware
+ * @hw_interrupt: handle the interrupt
+ * @num_channels: number of channels for this instance
+ * @channels: array of channels
+ *
+ * There is one instance of this struct for the M2P channels and one for the
+ * M2M channels. hw_xxx() methods are used to perform operations which are
+ * different on M2M and M2P channels. These methods are called with channel
+ * lock held and interrupts disabled so they cannot sleep.
+ */
+struct ep93xx_dma_engine {
+ struct dma_device dma_dev;
+ bool m2m;
+ int (*hw_setup)(struct ep93xx_dma_chan *);
+ void (*hw_synchronize)(struct ep93xx_dma_chan *);
+ void (*hw_shutdown)(struct ep93xx_dma_chan *);
+ void (*hw_submit)(struct ep93xx_dma_chan *);
+ int (*hw_interrupt)(struct ep93xx_dma_chan *);
+#define INTERRUPT_UNKNOWN 0
+#define INTERRUPT_DONE 1
+#define INTERRUPT_NEXT_BUFFER 2
+
+ size_t num_channels;
+ struct ep93xx_dma_chan channels[];
+};
+
+static inline struct device *chan2dev(struct ep93xx_dma_chan *edmac)
+{
+ return &edmac->chan.dev->device;
+}
+
+static struct ep93xx_dma_chan *to_ep93xx_dma_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct ep93xx_dma_chan, chan);
+}
+
+/**
+ * ep93xx_dma_set_active - set new active descriptor chain
+ * @edmac: channel
+ * @desc: head of the new active descriptor chain
+ *
+ * Sets @desc to be the head of the new active descriptor chain. This is the
+ * chain which is processed next. The active list must be empty before calling
+ * this function.
+ *
+ * Called with @edmac->lock held and interrupts disabled.
+ */
+static void ep93xx_dma_set_active(struct ep93xx_dma_chan *edmac,
+ struct ep93xx_dma_desc *desc)
+{
+ BUG_ON(!list_empty(&edmac->active));
+
+ list_add_tail(&desc->node, &edmac->active);
+
+ /* Flatten the @desc->tx_list chain into @edmac->active list */
+ while (!list_empty(&desc->tx_list)) {
+ struct ep93xx_dma_desc *d = list_first_entry(&desc->tx_list,
+ struct ep93xx_dma_desc, node);
+
+ /*
+ * We copy the callback parameters from the first descriptor
+ * to all the chained descriptors. This way we can call the
+ * callback without having to find out the first descriptor in
+ * the chain. Useful for cyclic transfers.
+ */
+ d->txd.callback = desc->txd.callback;
+ d->txd.callback_param = desc->txd.callback_param;
+
+ list_move_tail(&d->node, &edmac->active);
+ }
+}
+
+/* Called with @edmac->lock held and interrupts disabled */
+static struct ep93xx_dma_desc *
+ep93xx_dma_get_active(struct ep93xx_dma_chan *edmac)
+{
+ return list_first_entry_or_null(&edmac->active,
+ struct ep93xx_dma_desc, node);
+}
+
+/**
+ * ep93xx_dma_advance_active - advances to the next active descriptor
+ * @edmac: channel
+ *
+ * Function advances active descriptor to the next in the @edmac->active and
+ * returns %true if we still have descriptors in the chain to process.
+ * Otherwise returns %false.
+ *
+ * When the channel is in cyclic mode always returns %true.
+ *
+ * Called with @edmac->lock held and interrupts disabled.
+ */
+static bool ep93xx_dma_advance_active(struct ep93xx_dma_chan *edmac)
+{
+ struct ep93xx_dma_desc *desc;
+
+ list_rotate_left(&edmac->active);
+
+ if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags))
+ return true;
+
+ desc = ep93xx_dma_get_active(edmac);
+ if (!desc)
+ return false;
+
+ /*
+ * If txd.cookie is set it means that we are back in the first
+ * descriptor in the chain and hence done with it.
+ */
+ return !desc->txd.cookie;
+}
+
+/*
+ * M2P DMA implementation
+ */
+
+static void m2p_set_control(struct ep93xx_dma_chan *edmac, u32 control)
+{
+ writel(control, edmac->regs + M2P_CONTROL);
+ /*
+ * EP93xx User's Guide states that we must perform a dummy read after
+ * write to the control register.
+ */
+ readl(edmac->regs + M2P_CONTROL);
+}
+
+static int m2p_hw_setup(struct ep93xx_dma_chan *edmac)
+{
+ struct ep93xx_dma_data *data = edmac->chan.private;
+ u32 control;
+
+ writel(data->port & 0xf, edmac->regs + M2P_PPALLOC);
+
+ control = M2P_CONTROL_CH_ERROR_INT | M2P_CONTROL_ICE
+ | M2P_CONTROL_ENABLE;
+ m2p_set_control(edmac, control);
+
+ edmac->buffer = 0;
+
+ return 0;
+}
+
+static inline u32 m2p_channel_state(struct ep93xx_dma_chan *edmac)
+{
+ return (readl(edmac->regs + M2P_STATUS) >> 4) & 0x3;
+}
+
+static void m2p_hw_synchronize(struct ep93xx_dma_chan *edmac)
+{
+ unsigned long flags;
+ u32 control;
+
+ spin_lock_irqsave(&edmac->lock, flags);
+ control = readl(edmac->regs + M2P_CONTROL);
+ control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT);
+ m2p_set_control(edmac, control);
+ spin_unlock_irqrestore(&edmac->lock, flags);
+
+ while (m2p_channel_state(edmac) >= M2P_STATE_ON)
+ schedule();
+}
+
+static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac)
+{
+ m2p_set_control(edmac, 0);
+
+ while (m2p_channel_state(edmac) != M2P_STATE_IDLE)
+ dev_warn(chan2dev(edmac), "M2P: Not yet IDLE\n");
+}
+
+static void m2p_fill_desc(struct ep93xx_dma_chan *edmac)
+{
+ struct ep93xx_dma_desc *desc;
+ u32 bus_addr;
+
+ desc = ep93xx_dma_get_active(edmac);
+ if (!desc) {
+ dev_warn(chan2dev(edmac), "M2P: empty descriptor list\n");
+ return;
+ }
+
+ if (ep93xx_dma_chan_direction(&edmac->chan) == DMA_MEM_TO_DEV)
+ bus_addr = desc->src_addr;
+ else
+ bus_addr = desc->dst_addr;
+
+ if (edmac->buffer == 0) {
+ writel(desc->size, edmac->regs + M2P_MAXCNT0);
+ writel(bus_addr, edmac->regs + M2P_BASE0);
+ } else {
+ writel(desc->size, edmac->regs + M2P_MAXCNT1);
+ writel(bus_addr, edmac->regs + M2P_BASE1);
+ }
+
+ edmac->buffer ^= 1;
+}
+
+static void m2p_hw_submit(struct ep93xx_dma_chan *edmac)
+{
+ u32 control = readl(edmac->regs + M2P_CONTROL);
+
+ m2p_fill_desc(edmac);
+ control |= M2P_CONTROL_STALLINT;
+
+ if (ep93xx_dma_advance_active(edmac)) {
+ m2p_fill_desc(edmac);
+ control |= M2P_CONTROL_NFBINT;
+ }
+
+ m2p_set_control(edmac, control);
+}
+
+static int m2p_hw_interrupt(struct ep93xx_dma_chan *edmac)
+{
+ u32 irq_status = readl(edmac->regs + M2P_INTERRUPT);
+ u32 control;
+
+ if (irq_status & M2P_INTERRUPT_ERROR) {
+ struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac);
+
+ /* Clear the error interrupt */
+ writel(1, edmac->regs + M2P_INTERRUPT);
+
+ /*
+ * It seems that there is no easy way of reporting errors back
+ * to client so we just report the error here and continue as
+ * usual.
+ *
+ * Revisit this when there is a mechanism to report back the
+ * errors.
+ */
+ dev_err(chan2dev(edmac),
+ "DMA transfer failed! Details:\n"
+ "\tcookie : %d\n"
+ "\tsrc_addr : 0x%08x\n"
+ "\tdst_addr : 0x%08x\n"
+ "\tsize : %zu\n",
+ desc->txd.cookie, desc->src_addr, desc->dst_addr,
+ desc->size);
+ }
+
+ /*
+ * Even latest E2 silicon revision sometimes assert STALL interrupt
+ * instead of NFB. Therefore we treat them equally, basing on the
+ * amount of data we still have to transfer.
+ */
+ if (!(irq_status & (M2P_INTERRUPT_STALL | M2P_INTERRUPT_NFB)))
+ return INTERRUPT_UNKNOWN;
+
+ if (ep93xx_dma_advance_active(edmac)) {
+ m2p_fill_desc(edmac);
+ return INTERRUPT_NEXT_BUFFER;
+ }
+
+ /* Disable interrupts */
+ control = readl(edmac->regs + M2P_CONTROL);
+ control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT);
+ m2p_set_control(edmac, control);
+
+ return INTERRUPT_DONE;
+}
+
+/*
+ * M2M DMA implementation
+ */
+
+static int m2m_hw_setup(struct ep93xx_dma_chan *edmac)
+{
+ const struct ep93xx_dma_data *data = edmac->chan.private;
+ u32 control = 0;
+
+ if (!data) {
+ /* This is memcpy channel, nothing to configure */
+ writel(control, edmac->regs + M2M_CONTROL);
+ return 0;
+ }
+
+ switch (data->port) {
+ case EP93XX_DMA_SSP:
+ /*
+ * This was found via experimenting - anything less than 5
+ * causes the channel to perform only a partial transfer which
+ * leads to problems since we don't get DONE interrupt then.
+ */
+ control = (5 << M2M_CONTROL_PWSC_SHIFT);
+ control |= M2M_CONTROL_NO_HDSK;
+
+ if (data->direction == DMA_MEM_TO_DEV) {
+ control |= M2M_CONTROL_DAH;
+ control |= M2M_CONTROL_TM_TX;
+ control |= M2M_CONTROL_RSS_SSPTX;
+ } else {
+ control |= M2M_CONTROL_SAH;
+ control |= M2M_CONTROL_TM_RX;
+ control |= M2M_CONTROL_RSS_SSPRX;
+ }
+ break;
+
+ case EP93XX_DMA_IDE:
+ /*
+ * This IDE part is totally untested. Values below are taken
+ * from the EP93xx Users's Guide and might not be correct.
+ */
+ if (data->direction == DMA_MEM_TO_DEV) {
+ /* Worst case from the UG */
+ control = (3 << M2M_CONTROL_PWSC_SHIFT);
+ control |= M2M_CONTROL_DAH;
+ control |= M2M_CONTROL_TM_TX;
+ } else {
+ control = (2 << M2M_CONTROL_PWSC_SHIFT);
+ control |= M2M_CONTROL_SAH;
+ control |= M2M_CONTROL_TM_RX;
+ }
+
+ control |= M2M_CONTROL_NO_HDSK;
+ control |= M2M_CONTROL_RSS_IDE;
+ control |= M2M_CONTROL_PW_16;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ writel(control, edmac->regs + M2M_CONTROL);
+ return 0;
+}
+
+static void m2m_hw_shutdown(struct ep93xx_dma_chan *edmac)
+{
+ /* Just disable the channel */
+ writel(0, edmac->regs + M2M_CONTROL);
+}
+
+static void m2m_fill_desc(struct ep93xx_dma_chan *edmac)
+{
+ struct ep93xx_dma_desc *desc;
+
+ desc = ep93xx_dma_get_active(edmac);
+ if (!desc) {
+ dev_warn(chan2dev(edmac), "M2M: empty descriptor list\n");
+ return;
+ }
+
+ if (edmac->buffer == 0) {
+ writel(desc->src_addr, edmac->regs + M2M_SAR_BASE0);
+ writel(desc->dst_addr, edmac->regs + M2M_DAR_BASE0);
+ writel(desc->size, edmac->regs + M2M_BCR0);
+ } else {
+ writel(desc->src_addr, edmac->regs + M2M_SAR_BASE1);
+ writel(desc->dst_addr, edmac->regs + M2M_DAR_BASE1);
+ writel(desc->size, edmac->regs + M2M_BCR1);
+ }
+
+ edmac->buffer ^= 1;
+}
+
+static void m2m_hw_submit(struct ep93xx_dma_chan *edmac)
+{
+ struct ep93xx_dma_data *data = edmac->chan.private;
+ u32 control = readl(edmac->regs + M2M_CONTROL);
+
+ /*
+ * Since we allow clients to configure PW (peripheral width) we always
+ * clear PW bits here and then set them according what is given in
+ * the runtime configuration.
+ */
+ control &= ~M2M_CONTROL_PW_MASK;
+ control |= edmac->runtime_ctrl;
+
+ m2m_fill_desc(edmac);
+ control |= M2M_CONTROL_DONEINT;
+
+ if (ep93xx_dma_advance_active(edmac)) {
+ m2m_fill_desc(edmac);
+ control |= M2M_CONTROL_NFBINT;
+ }
+
+ /*
+ * Now we can finally enable the channel. For M2M channel this must be
+ * done _after_ the BCRx registers are programmed.
+ */
+ control |= M2M_CONTROL_ENABLE;
+ writel(control, edmac->regs + M2M_CONTROL);
+
+ if (!data) {
+ /*
+ * For memcpy channels the software trigger must be asserted
+ * in order to start the memcpy operation.
+ */
+ control |= M2M_CONTROL_START;
+ writel(control, edmac->regs + M2M_CONTROL);
+ }
+}
+
+/*
+ * According to EP93xx User's Guide, we should receive DONE interrupt when all
+ * M2M DMA controller transactions complete normally. This is not always the
+ * case - sometimes EP93xx M2M DMA asserts DONE interrupt when the DMA channel
+ * is still running (channel Buffer FSM in DMA_BUF_ON state, and channel
+ * Control FSM in DMA_MEM_RD state, observed at least in IDE-DMA operation).
+ * In effect, disabling the channel when only DONE bit is set could stop
+ * currently running DMA transfer. To avoid this, we use Buffer FSM and
+ * Control FSM to check current state of DMA channel.
+ */
+static int m2m_hw_interrupt(struct ep93xx_dma_chan *edmac)
+{
+ u32 status = readl(edmac->regs + M2M_STATUS);
+ u32 ctl_fsm = status & M2M_STATUS_CTL_MASK;
+ u32 buf_fsm = status & M2M_STATUS_BUF_MASK;
+ bool done = status & M2M_STATUS_DONE;
+ bool last_done;
+ u32 control;
+ struct ep93xx_dma_desc *desc;
+
+ /* Accept only DONE and NFB interrupts */
+ if (!(readl(edmac->regs + M2M_INTERRUPT) & M2M_INTERRUPT_MASK))
+ return INTERRUPT_UNKNOWN;
+
+ if (done) {
+ /* Clear the DONE bit */
+ writel(0, edmac->regs + M2M_INTERRUPT);
+ }
+
+ /*
+ * Check whether we are done with descriptors or not. This, together
+ * with DMA channel state, determines action to take in interrupt.
+ */
+ desc = ep93xx_dma_get_active(edmac);
+ last_done = !desc || desc->txd.cookie;
+
+ /*
+ * Use M2M DMA Buffer FSM and Control FSM to check current state of
+ * DMA channel. Using DONE and NFB bits from channel status register
+ * or bits from channel interrupt register is not reliable.
+ */
+ if (!last_done &&
+ (buf_fsm == M2M_STATUS_BUF_NO ||
+ buf_fsm == M2M_STATUS_BUF_ON)) {
+ /*
+ * Two buffers are ready for update when Buffer FSM is in
+ * DMA_NO_BUF state. Only one buffer can be prepared without
+ * disabling the channel or polling the DONE bit.
+ * To simplify things, always prepare only one buffer.
+ */
+ if (ep93xx_dma_advance_active(edmac)) {
+ m2m_fill_desc(edmac);
+ if (done && !edmac->chan.private) {
+ /* Software trigger for memcpy channel */
+ control = readl(edmac->regs + M2M_CONTROL);
+ control |= M2M_CONTROL_START;
+ writel(control, edmac->regs + M2M_CONTROL);
+ }
+ return INTERRUPT_NEXT_BUFFER;
+ } else {
+ last_done = true;
+ }
+ }
+
+ /*
+ * Disable the channel only when Buffer FSM is in DMA_NO_BUF state
+ * and Control FSM is in DMA_STALL state.
+ */
+ if (last_done &&
+ buf_fsm == M2M_STATUS_BUF_NO &&
+ ctl_fsm == M2M_STATUS_CTL_STALL) {
+ /* Disable interrupts and the channel */
+ control = readl(edmac->regs + M2M_CONTROL);
+ control &= ~(M2M_CONTROL_DONEINT | M2M_CONTROL_NFBINT
+ | M2M_CONTROL_ENABLE);
+ writel(control, edmac->regs + M2M_CONTROL);
+ return INTERRUPT_DONE;
+ }
+
+ /*
+ * Nothing to do this time.
+ */
+ return INTERRUPT_NEXT_BUFFER;
+}
+
+/*
+ * DMA engine API implementation
+ */
+
+static struct ep93xx_dma_desc *
+ep93xx_dma_desc_get(struct ep93xx_dma_chan *edmac)
+{
+ struct ep93xx_dma_desc *desc, *_desc;
+ struct ep93xx_dma_desc *ret = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&edmac->lock, flags);
+ list_for_each_entry_safe(desc, _desc, &edmac->free_list, node) {
+ if (async_tx_test_ack(&desc->txd)) {
+ list_del_init(&desc->node);
+
+ /* Re-initialize the descriptor */
+ desc->src_addr = 0;
+ desc->dst_addr = 0;
+ desc->size = 0;
+ desc->complete = false;
+ desc->txd.cookie = 0;
+ desc->txd.callback = NULL;
+ desc->txd.callback_param = NULL;
+
+ ret = desc;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&edmac->lock, flags);
+ return ret;
+}
+
+static void ep93xx_dma_desc_put(struct ep93xx_dma_chan *edmac,
+ struct ep93xx_dma_desc *desc)
+{
+ if (desc) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&edmac->lock, flags);
+ list_splice_init(&desc->tx_list, &edmac->free_list);
+ list_add(&desc->node, &edmac->free_list);
+ spin_unlock_irqrestore(&edmac->lock, flags);
+ }
+}
+
+/**
+ * ep93xx_dma_advance_work - start processing the next pending transaction
+ * @edmac: channel
+ *
+ * If we have pending transactions queued and we are currently idling, this
+ * function takes the next queued transaction from the @edmac->queue and
+ * pushes it to the hardware for execution.
+ */
+static void ep93xx_dma_advance_work(struct ep93xx_dma_chan *edmac)
+{
+ struct ep93xx_dma_desc *new;
+ unsigned long flags;
+
+ spin_lock_irqsave(&edmac->lock, flags);
+ if (!list_empty(&edmac->active) || list_empty(&edmac->queue)) {
+ spin_unlock_irqrestore(&edmac->lock, flags);
+ return;
+ }
+
+ /* Take the next descriptor from the pending queue */
+ new = list_first_entry(&edmac->queue, struct ep93xx_dma_desc, node);
+ list_del_init(&new->node);
+
+ ep93xx_dma_set_active(edmac, new);
+
+ /* Push it to the hardware */
+ edmac->edma->hw_submit(edmac);
+ spin_unlock_irqrestore(&edmac->lock, flags);
+}
+
+static void ep93xx_dma_tasklet(struct tasklet_struct *t)
+{
+ struct ep93xx_dma_chan *edmac = from_tasklet(edmac, t, tasklet);
+ struct ep93xx_dma_desc *desc, *d;
+ struct dmaengine_desc_callback cb;
+ LIST_HEAD(list);
+
+ memset(&cb, 0, sizeof(cb));
+ spin_lock_irq(&edmac->lock);
+ /*
+ * If dma_terminate_all() was called before we get to run, the active
+ * list has become empty. If that happens we aren't supposed to do
+ * anything more than call ep93xx_dma_advance_work().
+ */
+ desc = ep93xx_dma_get_active(edmac);
+ if (desc) {
+ if (desc->complete) {
+ /* mark descriptor complete for non cyclic case only */
+ if (!test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags))
+ dma_cookie_complete(&desc->txd);
+ list_splice_init(&edmac->active, &list);
+ }
+ dmaengine_desc_get_callback(&desc->txd, &cb);
+ }
+ spin_unlock_irq(&edmac->lock);
+
+ /* Pick up the next descriptor from the queue */
+ ep93xx_dma_advance_work(edmac);
+
+ /* Now we can release all the chained descriptors */
+ list_for_each_entry_safe(desc, d, &list, node) {
+ dma_descriptor_unmap(&desc->txd);
+ ep93xx_dma_desc_put(edmac, desc);
+ }
+
+ dmaengine_desc_callback_invoke(&cb, NULL);
+}
+
+static irqreturn_t ep93xx_dma_interrupt(int irq, void *dev_id)
+{
+ struct ep93xx_dma_chan *edmac = dev_id;
+ struct ep93xx_dma_desc *desc;
+ irqreturn_t ret = IRQ_HANDLED;
+
+ spin_lock(&edmac->lock);
+
+ desc = ep93xx_dma_get_active(edmac);
+ if (!desc) {
+ dev_warn(chan2dev(edmac),
+ "got interrupt while active list is empty\n");
+ spin_unlock(&edmac->lock);
+ return IRQ_NONE;
+ }
+
+ switch (edmac->edma->hw_interrupt(edmac)) {
+ case INTERRUPT_DONE:
+ desc->complete = true;
+ tasklet_schedule(&edmac->tasklet);
+ break;
+
+ case INTERRUPT_NEXT_BUFFER:
+ if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags))
+ tasklet_schedule(&edmac->tasklet);
+ break;
+
+ default:
+ dev_warn(chan2dev(edmac), "unknown interrupt!\n");
+ ret = IRQ_NONE;
+ break;
+ }
+
+ spin_unlock(&edmac->lock);
+ return ret;
+}
+
+/**
+ * ep93xx_dma_tx_submit - set the prepared descriptor(s) to be executed
+ * @tx: descriptor to be executed
+ *
+ * Function will execute given descriptor on the hardware or if the hardware
+ * is busy, queue the descriptor to be executed later on. Returns cookie which
+ * can be used to poll the status of the descriptor.
+ */
+static dma_cookie_t ep93xx_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(tx->chan);
+ struct ep93xx_dma_desc *desc;
+ dma_cookie_t cookie;
+ unsigned long flags;
+
+ spin_lock_irqsave(&edmac->lock, flags);
+ cookie = dma_cookie_assign(tx);
+
+ desc = container_of(tx, struct ep93xx_dma_desc, txd);
+
+ /*
+ * If nothing is currently prosessed, we push this descriptor
+ * directly to the hardware. Otherwise we put the descriptor
+ * to the pending queue.
+ */
+ if (list_empty(&edmac->active)) {
+ ep93xx_dma_set_active(edmac, desc);
+ edmac->edma->hw_submit(edmac);
+ } else {
+ list_add_tail(&desc->node, &edmac->queue);
+ }
+
+ spin_unlock_irqrestore(&edmac->lock, flags);
+ return cookie;
+}
+
+/**
+ * ep93xx_dma_alloc_chan_resources - allocate resources for the channel
+ * @chan: channel to allocate resources
+ *
+ * Function allocates necessary resources for the given DMA channel and
+ * returns number of allocated descriptors for the channel. Negative errno
+ * is returned in case of failure.
+ */
+static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+ struct ep93xx_dma_data *data = chan->private;
+ const char *name = dma_chan_name(chan);
+ int ret, i;
+
+ /* Sanity check the channel parameters */
+ if (!edmac->edma->m2m) {
+ if (!data)
+ return -EINVAL;
+ if (data->port < EP93XX_DMA_I2S1 ||
+ data->port > EP93XX_DMA_IRDA)
+ return -EINVAL;
+ if (data->direction != ep93xx_dma_chan_direction(chan))
+ return -EINVAL;
+ } else {
+ if (data) {
+ switch (data->port) {
+ case EP93XX_DMA_SSP:
+ case EP93XX_DMA_IDE:
+ if (!is_slave_direction(data->direction))
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+ }
+
+ if (data && data->name)
+ name = data->name;
+
+ ret = clk_prepare_enable(edmac->clk);
+ if (ret)
+ return ret;
+
+ ret = request_irq(edmac->irq, ep93xx_dma_interrupt, 0, name, edmac);
+ if (ret)
+ goto fail_clk_disable;
+
+ spin_lock_irq(&edmac->lock);
+ dma_cookie_init(&edmac->chan);
+ ret = edmac->edma->hw_setup(edmac);
+ spin_unlock_irq(&edmac->lock);
+
+ if (ret)
+ goto fail_free_irq;
+
+ for (i = 0; i < DMA_MAX_CHAN_DESCRIPTORS; i++) {
+ struct ep93xx_dma_desc *desc;
+
+ desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc) {
+ dev_warn(chan2dev(edmac), "not enough descriptors\n");
+ break;
+ }
+
+ INIT_LIST_HEAD(&desc->tx_list);
+
+ dma_async_tx_descriptor_init(&desc->txd, chan);
+ desc->txd.flags = DMA_CTRL_ACK;
+ desc->txd.tx_submit = ep93xx_dma_tx_submit;
+
+ ep93xx_dma_desc_put(edmac, desc);
+ }
+
+ return i;
+
+fail_free_irq:
+ free_irq(edmac->irq, edmac);
+fail_clk_disable:
+ clk_disable_unprepare(edmac->clk);
+
+ return ret;
+}
+
+/**
+ * ep93xx_dma_free_chan_resources - release resources for the channel
+ * @chan: channel
+ *
+ * Function releases all the resources allocated for the given channel.
+ * The channel must be idle when this is called.
+ */
+static void ep93xx_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+ struct ep93xx_dma_desc *desc, *d;
+ unsigned long flags;
+ LIST_HEAD(list);
+
+ BUG_ON(!list_empty(&edmac->active));
+ BUG_ON(!list_empty(&edmac->queue));
+
+ spin_lock_irqsave(&edmac->lock, flags);
+ edmac->edma->hw_shutdown(edmac);
+ edmac->runtime_addr = 0;
+ edmac->runtime_ctrl = 0;
+ edmac->buffer = 0;
+ list_splice_init(&edmac->free_list, &list);
+ spin_unlock_irqrestore(&edmac->lock, flags);
+
+ list_for_each_entry_safe(desc, d, &list, node)
+ kfree(desc);
+
+ clk_disable_unprepare(edmac->clk);
+ free_irq(edmac->irq, edmac);
+}
+
+/**
+ * ep93xx_dma_prep_dma_memcpy - prepare a memcpy DMA operation
+ * @chan: channel
+ * @dest: destination bus address
+ * @src: source bus address
+ * @len: size of the transaction
+ * @flags: flags for the descriptor
+ *
+ * Returns a valid DMA descriptor or %NULL in case of failure.
+ */
+static struct dma_async_tx_descriptor *
+ep93xx_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
+ dma_addr_t src, size_t len, unsigned long flags)
+{
+ struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+ struct ep93xx_dma_desc *desc, *first;
+ size_t bytes, offset;
+
+ first = NULL;
+ for (offset = 0; offset < len; offset += bytes) {
+ desc = ep93xx_dma_desc_get(edmac);
+ if (!desc) {
+ dev_warn(chan2dev(edmac), "couldn't get descriptor\n");
+ goto fail;
+ }
+
+ bytes = min_t(size_t, len - offset, DMA_MAX_CHAN_BYTES);
+
+ desc->src_addr = src + offset;
+ desc->dst_addr = dest + offset;
+ desc->size = bytes;
+
+ if (!first)
+ first = desc;
+ else
+ list_add_tail(&desc->node, &first->tx_list);
+ }
+
+ first->txd.cookie = -EBUSY;
+ first->txd.flags = flags;
+
+ return &first->txd;
+fail:
+ ep93xx_dma_desc_put(edmac, first);
+ return NULL;
+}
+
+/**
+ * ep93xx_dma_prep_slave_sg - prepare a slave DMA operation
+ * @chan: channel
+ * @sgl: list of buffers to transfer
+ * @sg_len: number of entries in @sgl
+ * @dir: direction of tha DMA transfer
+ * @flags: flags for the descriptor
+ * @context: operation context (ignored)
+ *
+ * Returns a valid DMA descriptor or %NULL in case of failure.
+ */
+static struct dma_async_tx_descriptor *
+ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction dir,
+ unsigned long flags, void *context)
+{
+ struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+ struct ep93xx_dma_desc *desc, *first;
+ struct scatterlist *sg;
+ int i;
+
+ if (!edmac->edma->m2m && dir != ep93xx_dma_chan_direction(chan)) {
+ dev_warn(chan2dev(edmac),
+ "channel was configured with different direction\n");
+ return NULL;
+ }
+
+ if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) {
+ dev_warn(chan2dev(edmac),
+ "channel is already used for cyclic transfers\n");
+ return NULL;
+ }
+
+ ep93xx_dma_slave_config_write(chan, dir, &edmac->slave_config);
+
+ first = NULL;
+ for_each_sg(sgl, sg, sg_len, i) {
+ size_t len = sg_dma_len(sg);
+
+ if (len > DMA_MAX_CHAN_BYTES) {
+ dev_warn(chan2dev(edmac), "too big transfer size %zu\n",
+ len);
+ goto fail;
+ }
+
+ desc = ep93xx_dma_desc_get(edmac);
+ if (!desc) {
+ dev_warn(chan2dev(edmac), "couldn't get descriptor\n");
+ goto fail;
+ }
+
+ if (dir == DMA_MEM_TO_DEV) {
+ desc->src_addr = sg_dma_address(sg);
+ desc->dst_addr = edmac->runtime_addr;
+ } else {
+ desc->src_addr = edmac->runtime_addr;
+ desc->dst_addr = sg_dma_address(sg);
+ }
+ desc->size = len;
+
+ if (!first)
+ first = desc;
+ else
+ list_add_tail(&desc->node, &first->tx_list);
+ }
+
+ first->txd.cookie = -EBUSY;
+ first->txd.flags = flags;
+
+ return &first->txd;
+
+fail:
+ ep93xx_dma_desc_put(edmac, first);
+ return NULL;
+}
+
+/**
+ * ep93xx_dma_prep_dma_cyclic - prepare a cyclic DMA operation
+ * @chan: channel
+ * @dma_addr: DMA mapped address of the buffer
+ * @buf_len: length of the buffer (in bytes)
+ * @period_len: length of a single period
+ * @dir: direction of the operation
+ * @flags: tx descriptor status flags
+ *
+ * Prepares a descriptor for cyclic DMA operation. This means that once the
+ * descriptor is submitted, we will be submitting in a @period_len sized
+ * buffers and calling callback once the period has been elapsed. Transfer
+ * terminates only when client calls dmaengine_terminate_all() for this
+ * channel.
+ *
+ * Returns a valid DMA descriptor or %NULL in case of failure.
+ */
+static struct dma_async_tx_descriptor *
+ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
+ size_t buf_len, size_t period_len,
+ enum dma_transfer_direction dir, unsigned long flags)
+{
+ struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+ struct ep93xx_dma_desc *desc, *first;
+ size_t offset = 0;
+
+ if (!edmac->edma->m2m && dir != ep93xx_dma_chan_direction(chan)) {
+ dev_warn(chan2dev(edmac),
+ "channel was configured with different direction\n");
+ return NULL;
+ }
+
+ if (test_and_set_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) {
+ dev_warn(chan2dev(edmac),
+ "channel is already used for cyclic transfers\n");
+ return NULL;
+ }
+
+ if (period_len > DMA_MAX_CHAN_BYTES) {
+ dev_warn(chan2dev(edmac), "too big period length %zu\n",
+ period_len);
+ return NULL;
+ }
+
+ ep93xx_dma_slave_config_write(chan, dir, &edmac->slave_config);
+
+ /* Split the buffer into period size chunks */
+ first = NULL;
+ for (offset = 0; offset < buf_len; offset += period_len) {
+ desc = ep93xx_dma_desc_get(edmac);
+ if (!desc) {
+ dev_warn(chan2dev(edmac), "couldn't get descriptor\n");
+ goto fail;
+ }
+
+ if (dir == DMA_MEM_TO_DEV) {
+ desc->src_addr = dma_addr + offset;
+ desc->dst_addr = edmac->runtime_addr;
+ } else {
+ desc->src_addr = edmac->runtime_addr;
+ desc->dst_addr = dma_addr + offset;
+ }
+
+ desc->size = period_len;
+
+ if (!first)
+ first = desc;
+ else
+ list_add_tail(&desc->node, &first->tx_list);
+ }
+
+ first->txd.cookie = -EBUSY;
+
+ return &first->txd;
+
+fail:
+ ep93xx_dma_desc_put(edmac, first);
+ return NULL;
+}
+
+/**
+ * ep93xx_dma_synchronize - Synchronizes the termination of transfers to the
+ * current context.
+ * @chan: channel
+ *
+ * Synchronizes the DMA channel termination to the current context. When this
+ * function returns it is guaranteed that all transfers for previously issued
+ * descriptors have stopped and it is safe to free the memory associated
+ * with them. Furthermore it is guaranteed that all complete callback functions
+ * for a previously submitted descriptor have finished running and it is safe to
+ * free resources accessed from within the complete callbacks.
+ */
+static void ep93xx_dma_synchronize(struct dma_chan *chan)
+{
+ struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+
+ if (edmac->edma->hw_synchronize)
+ edmac->edma->hw_synchronize(edmac);
+}
+
+/**
+ * ep93xx_dma_terminate_all - terminate all transactions
+ * @chan: channel
+ *
+ * Stops all DMA transactions. All descriptors are put back to the
+ * @edmac->free_list and callbacks are _not_ called.
+ */
+static int ep93xx_dma_terminate_all(struct dma_chan *chan)
+{
+ struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+ struct ep93xx_dma_desc *desc, *_d;
+ unsigned long flags;
+ LIST_HEAD(list);
+
+ spin_lock_irqsave(&edmac->lock, flags);
+ /* First we disable and flush the DMA channel */
+ edmac->edma->hw_shutdown(edmac);
+ clear_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags);
+ list_splice_init(&edmac->active, &list);
+ list_splice_init(&edmac->queue, &list);
+ /*
+ * We then re-enable the channel. This way we can continue submitting
+ * the descriptors by just calling ->hw_submit() again.
+ */
+ edmac->edma->hw_setup(edmac);
+ spin_unlock_irqrestore(&edmac->lock, flags);
+
+ list_for_each_entry_safe(desc, _d, &list, node)
+ ep93xx_dma_desc_put(edmac, desc);
+
+ return 0;
+}
+
+static int ep93xx_dma_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+
+ memcpy(&edmac->slave_config, config, sizeof(*config));
+
+ return 0;
+}
+
+static int ep93xx_dma_slave_config_write(struct dma_chan *chan,
+ enum dma_transfer_direction dir,
+ struct dma_slave_config *config)
+{
+ struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+ enum dma_slave_buswidth width;
+ unsigned long flags;
+ u32 addr, ctrl;
+
+ if (!edmac->edma->m2m)
+ return -EINVAL;
+
+ switch (dir) {
+ case DMA_DEV_TO_MEM:
+ width = config->src_addr_width;
+ addr = config->src_addr;
+ break;
+
+ case DMA_MEM_TO_DEV:
+ width = config->dst_addr_width;
+ addr = config->dst_addr;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ switch (width) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ ctrl = 0;
+ break;
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ ctrl = M2M_CONTROL_PW_16;
+ break;
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ ctrl = M2M_CONTROL_PW_32;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&edmac->lock, flags);
+ edmac->runtime_addr = addr;
+ edmac->runtime_ctrl = ctrl;
+ spin_unlock_irqrestore(&edmac->lock, flags);
+
+ return 0;
+}
+
+/**
+ * ep93xx_dma_tx_status - check if a transaction is completed
+ * @chan: channel
+ * @cookie: transaction specific cookie
+ * @state: state of the transaction is stored here if given
+ *
+ * This function can be used to query state of a given transaction.
+ */
+static enum dma_status ep93xx_dma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *state)
+{
+ return dma_cookie_status(chan, cookie, state);
+}
+
+/**
+ * ep93xx_dma_issue_pending - push pending transactions to the hardware
+ * @chan: channel
+ *
+ * When this function is called, all pending transactions are pushed to the
+ * hardware and executed.
+ */
+static void ep93xx_dma_issue_pending(struct dma_chan *chan)
+{
+ ep93xx_dma_advance_work(to_ep93xx_dma_chan(chan));
+}
+
+static int __init ep93xx_dma_probe(struct platform_device *pdev)
+{
+ struct ep93xx_dma_platform_data *pdata = dev_get_platdata(&pdev->dev);
+ struct ep93xx_dma_engine *edma;
+ struct dma_device *dma_dev;
+ size_t edma_size;
+ int ret, i;
+
+ edma_size = pdata->num_channels * sizeof(struct ep93xx_dma_chan);
+ edma = kzalloc(sizeof(*edma) + edma_size, GFP_KERNEL);
+ if (!edma)
+ return -ENOMEM;
+
+ dma_dev = &edma->dma_dev;
+ edma->m2m = platform_get_device_id(pdev)->driver_data;
+ edma->num_channels = pdata->num_channels;
+
+ INIT_LIST_HEAD(&dma_dev->channels);
+ for (i = 0; i < pdata->num_channels; i++) {
+ const struct ep93xx_dma_chan_data *cdata = &pdata->channels[i];
+ struct ep93xx_dma_chan *edmac = &edma->channels[i];
+
+ edmac->chan.device = dma_dev;
+ edmac->regs = cdata->base;
+ edmac->irq = cdata->irq;
+ edmac->edma = edma;
+
+ edmac->clk = clk_get(NULL, cdata->name);
+ if (IS_ERR(edmac->clk)) {
+ dev_warn(&pdev->dev, "failed to get clock for %s\n",
+ cdata->name);
+ continue;
+ }
+
+ spin_lock_init(&edmac->lock);
+ INIT_LIST_HEAD(&edmac->active);
+ INIT_LIST_HEAD(&edmac->queue);
+ INIT_LIST_HEAD(&edmac->free_list);
+ tasklet_setup(&edmac->tasklet, ep93xx_dma_tasklet);
+
+ list_add_tail(&edmac->chan.device_node,
+ &dma_dev->channels);
+ }
+
+ dma_cap_zero(dma_dev->cap_mask);
+ dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+ dma_cap_set(DMA_CYCLIC, dma_dev->cap_mask);
+
+ dma_dev->dev = &pdev->dev;
+ dma_dev->device_alloc_chan_resources = ep93xx_dma_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = ep93xx_dma_free_chan_resources;
+ dma_dev->device_prep_slave_sg = ep93xx_dma_prep_slave_sg;
+ dma_dev->device_prep_dma_cyclic = ep93xx_dma_prep_dma_cyclic;
+ dma_dev->device_config = ep93xx_dma_slave_config;
+ dma_dev->device_synchronize = ep93xx_dma_synchronize;
+ dma_dev->device_terminate_all = ep93xx_dma_terminate_all;
+ dma_dev->device_issue_pending = ep93xx_dma_issue_pending;
+ dma_dev->device_tx_status = ep93xx_dma_tx_status;
+
+ dma_set_max_seg_size(dma_dev->dev, DMA_MAX_CHAN_BYTES);
+
+ if (edma->m2m) {
+ dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+ dma_dev->device_prep_dma_memcpy = ep93xx_dma_prep_dma_memcpy;
+
+ edma->hw_setup = m2m_hw_setup;
+ edma->hw_shutdown = m2m_hw_shutdown;
+ edma->hw_submit = m2m_hw_submit;
+ edma->hw_interrupt = m2m_hw_interrupt;
+ } else {
+ dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+
+ edma->hw_synchronize = m2p_hw_synchronize;
+ edma->hw_setup = m2p_hw_setup;
+ edma->hw_shutdown = m2p_hw_shutdown;
+ edma->hw_submit = m2p_hw_submit;
+ edma->hw_interrupt = m2p_hw_interrupt;
+ }
+
+ ret = dma_async_device_register(dma_dev);
+ if (unlikely(ret)) {
+ for (i = 0; i < edma->num_channels; i++) {
+ struct ep93xx_dma_chan *edmac = &edma->channels[i];
+ if (!IS_ERR_OR_NULL(edmac->clk))
+ clk_put(edmac->clk);
+ }
+ kfree(edma);
+ } else {
+ dev_info(dma_dev->dev, "EP93xx M2%s DMA ready\n",
+ edma->m2m ? "M" : "P");
+ }
+
+ return ret;
+}
+
+static const struct platform_device_id ep93xx_dma_driver_ids[] = {
+ { "ep93xx-dma-m2p", 0 },
+ { "ep93xx-dma-m2m", 1 },
+ { },
+};
+
+static struct platform_driver ep93xx_dma_driver = {
+ .driver = {
+ .name = "ep93xx-dma",
+ },
+ .id_table = ep93xx_dma_driver_ids,
+};
+
+static int __init ep93xx_dma_module_init(void)
+{
+ return platform_driver_probe(&ep93xx_dma_driver, ep93xx_dma_probe);
+}
+subsys_initcall(ep93xx_dma_module_init);
+
+MODULE_AUTHOR("Mika Westerberg <mika.westerberg@iki.fi>");
+MODULE_DESCRIPTION("EP93xx DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsl-dpaa2-qdma/Kconfig b/drivers/dma/fsl-dpaa2-qdma/Kconfig
new file mode 100644
index 000000000..258ed6be9
--- /dev/null
+++ b/drivers/dma/fsl-dpaa2-qdma/Kconfig
@@ -0,0 +1,9 @@
+menuconfig FSL_DPAA2_QDMA
+ tristate "NXP DPAA2 QDMA"
+ depends on ARM64
+ depends on FSL_MC_BUS && FSL_MC_DPIO
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ NXP Data Path Acceleration Architecture 2 QDMA driver,
+ using the NXP MC bus driver.
diff --git a/drivers/dma/fsl-dpaa2-qdma/Makefile b/drivers/dma/fsl-dpaa2-qdma/Makefile
new file mode 100644
index 000000000..c1d0226f2
--- /dev/null
+++ b/drivers/dma/fsl-dpaa2-qdma/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for the NXP DPAA2 qDMA controllers
+obj-$(CONFIG_FSL_DPAA2_QDMA) += dpaa2-qdma.o dpdmai.o
diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
new file mode 100644
index 000000000..8dd40d00a
--- /dev/null
+++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
@@ -0,0 +1,841 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright 2019 NXP
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/dmapool.h>
+#include <linux/of_irq.h>
+#include <linux/iommu.h>
+#include <linux/sys_soc.h>
+#include <linux/fsl/mc.h>
+#include <soc/fsl/dpaa2-io.h>
+
+#include "../virt-dma.h"
+#include "dpdmai.h"
+#include "dpaa2-qdma.h"
+
+static bool smmu_disable = true;
+
+static struct dpaa2_qdma_chan *to_dpaa2_qdma_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct dpaa2_qdma_chan, vchan.chan);
+}
+
+static struct dpaa2_qdma_comp *to_fsl_qdma_comp(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct dpaa2_qdma_comp, vdesc);
+}
+
+static int dpaa2_qdma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct dpaa2_qdma_chan *dpaa2_chan = to_dpaa2_qdma_chan(chan);
+ struct dpaa2_qdma_engine *dpaa2_qdma = dpaa2_chan->qdma;
+ struct device *dev = &dpaa2_qdma->priv->dpdmai_dev->dev;
+
+ dpaa2_chan->fd_pool = dma_pool_create("fd_pool", dev,
+ sizeof(struct dpaa2_fd),
+ sizeof(struct dpaa2_fd), 0);
+ if (!dpaa2_chan->fd_pool)
+ goto err;
+
+ dpaa2_chan->fl_pool = dma_pool_create("fl_pool", dev,
+ sizeof(struct dpaa2_fl_entry),
+ sizeof(struct dpaa2_fl_entry), 0);
+ if (!dpaa2_chan->fl_pool)
+ goto err_fd;
+
+ dpaa2_chan->sdd_pool =
+ dma_pool_create("sdd_pool", dev,
+ sizeof(struct dpaa2_qdma_sd_d),
+ sizeof(struct dpaa2_qdma_sd_d), 0);
+ if (!dpaa2_chan->sdd_pool)
+ goto err_fl;
+
+ return dpaa2_qdma->desc_allocated++;
+err_fl:
+ dma_pool_destroy(dpaa2_chan->fl_pool);
+err_fd:
+ dma_pool_destroy(dpaa2_chan->fd_pool);
+err:
+ return -ENOMEM;
+}
+
+static void dpaa2_qdma_free_chan_resources(struct dma_chan *chan)
+{
+ struct dpaa2_qdma_chan *dpaa2_chan = to_dpaa2_qdma_chan(chan);
+ struct dpaa2_qdma_engine *dpaa2_qdma = dpaa2_chan->qdma;
+ unsigned long flags;
+
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&dpaa2_chan->vchan.lock, flags);
+ vchan_get_all_descriptors(&dpaa2_chan->vchan, &head);
+ spin_unlock_irqrestore(&dpaa2_chan->vchan.lock, flags);
+
+ vchan_dma_desc_free_list(&dpaa2_chan->vchan, &head);
+
+ dpaa2_dpdmai_free_comp(dpaa2_chan, &dpaa2_chan->comp_used);
+ dpaa2_dpdmai_free_comp(dpaa2_chan, &dpaa2_chan->comp_free);
+
+ dma_pool_destroy(dpaa2_chan->fd_pool);
+ dma_pool_destroy(dpaa2_chan->fl_pool);
+ dma_pool_destroy(dpaa2_chan->sdd_pool);
+ dpaa2_qdma->desc_allocated--;
+}
+
+/*
+ * Request a command descriptor for enqueue.
+ */
+static struct dpaa2_qdma_comp *
+dpaa2_qdma_request_desc(struct dpaa2_qdma_chan *dpaa2_chan)
+{
+ struct dpaa2_qdma_priv *qdma_priv = dpaa2_chan->qdma->priv;
+ struct device *dev = &qdma_priv->dpdmai_dev->dev;
+ struct dpaa2_qdma_comp *comp_temp = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dpaa2_chan->queue_lock, flags);
+ if (list_empty(&dpaa2_chan->comp_free)) {
+ spin_unlock_irqrestore(&dpaa2_chan->queue_lock, flags);
+ comp_temp = kzalloc(sizeof(*comp_temp), GFP_NOWAIT);
+ if (!comp_temp)
+ goto err;
+ comp_temp->fd_virt_addr =
+ dma_pool_alloc(dpaa2_chan->fd_pool, GFP_NOWAIT,
+ &comp_temp->fd_bus_addr);
+ if (!comp_temp->fd_virt_addr)
+ goto err_comp;
+
+ comp_temp->fl_virt_addr =
+ dma_pool_alloc(dpaa2_chan->fl_pool, GFP_NOWAIT,
+ &comp_temp->fl_bus_addr);
+ if (!comp_temp->fl_virt_addr)
+ goto err_fd_virt;
+
+ comp_temp->desc_virt_addr =
+ dma_pool_alloc(dpaa2_chan->sdd_pool, GFP_NOWAIT,
+ &comp_temp->desc_bus_addr);
+ if (!comp_temp->desc_virt_addr)
+ goto err_fl_virt;
+
+ comp_temp->qchan = dpaa2_chan;
+ return comp_temp;
+ }
+
+ comp_temp = list_first_entry(&dpaa2_chan->comp_free,
+ struct dpaa2_qdma_comp, list);
+ list_del(&comp_temp->list);
+ spin_unlock_irqrestore(&dpaa2_chan->queue_lock, flags);
+
+ comp_temp->qchan = dpaa2_chan;
+
+ return comp_temp;
+
+err_fl_virt:
+ dma_pool_free(dpaa2_chan->fl_pool,
+ comp_temp->fl_virt_addr,
+ comp_temp->fl_bus_addr);
+err_fd_virt:
+ dma_pool_free(dpaa2_chan->fd_pool,
+ comp_temp->fd_virt_addr,
+ comp_temp->fd_bus_addr);
+err_comp:
+ kfree(comp_temp);
+err:
+ dev_err(dev, "Failed to request descriptor\n");
+ return NULL;
+}
+
+static void
+dpaa2_qdma_populate_fd(u32 format, struct dpaa2_qdma_comp *dpaa2_comp)
+{
+ struct dpaa2_fd *fd;
+
+ fd = dpaa2_comp->fd_virt_addr;
+ memset(fd, 0, sizeof(struct dpaa2_fd));
+
+ /* fd populated */
+ dpaa2_fd_set_addr(fd, dpaa2_comp->fl_bus_addr);
+
+ /*
+ * Bypass memory translation, Frame list format, short length disable
+ * we need to disable BMT if fsl-mc use iova addr
+ */
+ if (smmu_disable)
+ dpaa2_fd_set_bpid(fd, QMAN_FD_BMT_ENABLE);
+ dpaa2_fd_set_format(fd, QMAN_FD_FMT_ENABLE | QMAN_FD_SL_DISABLE);
+
+ dpaa2_fd_set_frc(fd, format | QDMA_SER_CTX);
+}
+
+/* first frame list for descriptor buffer */
+static void
+dpaa2_qdma_populate_first_framel(struct dpaa2_fl_entry *f_list,
+ struct dpaa2_qdma_comp *dpaa2_comp,
+ bool wrt_changed)
+{
+ struct dpaa2_qdma_sd_d *sdd;
+
+ sdd = dpaa2_comp->desc_virt_addr;
+ memset(sdd, 0, 2 * (sizeof(*sdd)));
+
+ /* source descriptor CMD */
+ sdd->cmd = cpu_to_le32(QDMA_SD_CMD_RDTTYPE_COHERENT);
+ sdd++;
+
+ /* dest descriptor CMD */
+ if (wrt_changed)
+ sdd->cmd = cpu_to_le32(LX2160_QDMA_DD_CMD_WRTTYPE_COHERENT);
+ else
+ sdd->cmd = cpu_to_le32(QDMA_DD_CMD_WRTTYPE_COHERENT);
+
+ memset(f_list, 0, sizeof(struct dpaa2_fl_entry));
+
+ /* first frame list to source descriptor */
+ dpaa2_fl_set_addr(f_list, dpaa2_comp->desc_bus_addr);
+ dpaa2_fl_set_len(f_list, 0x20);
+ dpaa2_fl_set_format(f_list, QDMA_FL_FMT_SBF | QDMA_FL_SL_LONG);
+
+ /* bypass memory translation */
+ if (smmu_disable)
+ f_list->bpid = cpu_to_le16(QDMA_FL_BMT_ENABLE);
+}
+
+/* source and destination frame list */
+static void
+dpaa2_qdma_populate_frames(struct dpaa2_fl_entry *f_list,
+ dma_addr_t dst, dma_addr_t src,
+ size_t len, uint8_t fmt)
+{
+ /* source frame list to source buffer */
+ memset(f_list, 0, sizeof(struct dpaa2_fl_entry));
+
+ dpaa2_fl_set_addr(f_list, src);
+ dpaa2_fl_set_len(f_list, len);
+
+ /* single buffer frame or scatter gather frame */
+ dpaa2_fl_set_format(f_list, (fmt | QDMA_FL_SL_LONG));
+
+ /* bypass memory translation */
+ if (smmu_disable)
+ f_list->bpid = cpu_to_le16(QDMA_FL_BMT_ENABLE);
+
+ f_list++;
+
+ /* destination frame list to destination buffer */
+ memset(f_list, 0, sizeof(struct dpaa2_fl_entry));
+
+ dpaa2_fl_set_addr(f_list, dst);
+ dpaa2_fl_set_len(f_list, len);
+ dpaa2_fl_set_format(f_list, (fmt | QDMA_FL_SL_LONG));
+ /* single buffer frame or scatter gather frame */
+ dpaa2_fl_set_final(f_list, QDMA_FL_F);
+ /* bypass memory translation */
+ if (smmu_disable)
+ f_list->bpid = cpu_to_le16(QDMA_FL_BMT_ENABLE);
+}
+
+static struct dma_async_tx_descriptor
+*dpaa2_qdma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst,
+ dma_addr_t src, size_t len, ulong flags)
+{
+ struct dpaa2_qdma_chan *dpaa2_chan = to_dpaa2_qdma_chan(chan);
+ struct dpaa2_qdma_engine *dpaa2_qdma;
+ struct dpaa2_qdma_comp *dpaa2_comp;
+ struct dpaa2_fl_entry *f_list;
+ bool wrt_changed;
+
+ dpaa2_qdma = dpaa2_chan->qdma;
+ dpaa2_comp = dpaa2_qdma_request_desc(dpaa2_chan);
+ if (!dpaa2_comp)
+ return NULL;
+
+ wrt_changed = (bool)dpaa2_qdma->qdma_wrtype_fixup;
+
+ /* populate Frame descriptor */
+ dpaa2_qdma_populate_fd(QDMA_FD_LONG_FORMAT, dpaa2_comp);
+
+ f_list = dpaa2_comp->fl_virt_addr;
+
+ /* first frame list for descriptor buffer (logn format) */
+ dpaa2_qdma_populate_first_framel(f_list, dpaa2_comp, wrt_changed);
+
+ f_list++;
+
+ dpaa2_qdma_populate_frames(f_list, dst, src, len, QDMA_FL_FMT_SBF);
+
+ return vchan_tx_prep(&dpaa2_chan->vchan, &dpaa2_comp->vdesc, flags);
+}
+
+static void dpaa2_qdma_issue_pending(struct dma_chan *chan)
+{
+ struct dpaa2_qdma_chan *dpaa2_chan = to_dpaa2_qdma_chan(chan);
+ struct dpaa2_qdma_comp *dpaa2_comp;
+ struct virt_dma_desc *vdesc;
+ struct dpaa2_fd *fd;
+ unsigned long flags;
+ int err;
+
+ spin_lock_irqsave(&dpaa2_chan->queue_lock, flags);
+ spin_lock(&dpaa2_chan->vchan.lock);
+ if (vchan_issue_pending(&dpaa2_chan->vchan)) {
+ vdesc = vchan_next_desc(&dpaa2_chan->vchan);
+ if (!vdesc)
+ goto err_enqueue;
+ dpaa2_comp = to_fsl_qdma_comp(vdesc);
+
+ fd = dpaa2_comp->fd_virt_addr;
+
+ list_del(&vdesc->node);
+ list_add_tail(&dpaa2_comp->list, &dpaa2_chan->comp_used);
+
+ err = dpaa2_io_service_enqueue_fq(NULL, dpaa2_chan->fqid, fd);
+ if (err) {
+ list_move_tail(&dpaa2_comp->list,
+ &dpaa2_chan->comp_free);
+ }
+ }
+err_enqueue:
+ spin_unlock(&dpaa2_chan->vchan.lock);
+ spin_unlock_irqrestore(&dpaa2_chan->queue_lock, flags);
+}
+
+static int __cold dpaa2_qdma_setup(struct fsl_mc_device *ls_dev)
+{
+ struct dpaa2_qdma_priv_per_prio *ppriv;
+ struct device *dev = &ls_dev->dev;
+ struct dpaa2_qdma_priv *priv;
+ u8 prio_def = DPDMAI_PRIO_NUM;
+ int err = -EINVAL;
+ int i;
+
+ priv = dev_get_drvdata(dev);
+
+ priv->dev = dev;
+ priv->dpqdma_id = ls_dev->obj_desc.id;
+
+ /* Get the handle for the DPDMAI this interface is associate with */
+ err = dpdmai_open(priv->mc_io, 0, priv->dpqdma_id, &ls_dev->mc_handle);
+ if (err) {
+ dev_err(dev, "dpdmai_open() failed\n");
+ return err;
+ }
+
+ dev_dbg(dev, "Opened dpdmai object successfully\n");
+
+ err = dpdmai_get_attributes(priv->mc_io, 0, ls_dev->mc_handle,
+ &priv->dpdmai_attr);
+ if (err) {
+ dev_err(dev, "dpdmai_get_attributes() failed\n");
+ goto exit;
+ }
+
+ if (priv->dpdmai_attr.version.major > DPDMAI_VER_MAJOR) {
+ err = -EINVAL;
+ dev_err(dev, "DPDMAI major version mismatch\n"
+ "Found %u.%u, supported version is %u.%u\n",
+ priv->dpdmai_attr.version.major,
+ priv->dpdmai_attr.version.minor,
+ DPDMAI_VER_MAJOR, DPDMAI_VER_MINOR);
+ goto exit;
+ }
+
+ if (priv->dpdmai_attr.version.minor > DPDMAI_VER_MINOR) {
+ err = -EINVAL;
+ dev_err(dev, "DPDMAI minor version mismatch\n"
+ "Found %u.%u, supported version is %u.%u\n",
+ priv->dpdmai_attr.version.major,
+ priv->dpdmai_attr.version.minor,
+ DPDMAI_VER_MAJOR, DPDMAI_VER_MINOR);
+ goto exit;
+ }
+
+ priv->num_pairs = min(priv->dpdmai_attr.num_of_priorities, prio_def);
+ ppriv = kcalloc(priv->num_pairs, sizeof(*ppriv), GFP_KERNEL);
+ if (!ppriv) {
+ err = -ENOMEM;
+ goto exit;
+ }
+ priv->ppriv = ppriv;
+
+ for (i = 0; i < priv->num_pairs; i++) {
+ err = dpdmai_get_rx_queue(priv->mc_io, 0, ls_dev->mc_handle,
+ i, &priv->rx_queue_attr[i]);
+ if (err) {
+ dev_err(dev, "dpdmai_get_rx_queue() failed\n");
+ goto exit;
+ }
+ ppriv->rsp_fqid = priv->rx_queue_attr[i].fqid;
+
+ err = dpdmai_get_tx_queue(priv->mc_io, 0, ls_dev->mc_handle,
+ i, &priv->tx_fqid[i]);
+ if (err) {
+ dev_err(dev, "dpdmai_get_tx_queue() failed\n");
+ goto exit;
+ }
+ ppriv->req_fqid = priv->tx_fqid[i];
+ ppriv->prio = i;
+ ppriv->priv = priv;
+ ppriv++;
+ }
+
+ return 0;
+exit:
+ dpdmai_close(priv->mc_io, 0, ls_dev->mc_handle);
+ return err;
+}
+
+static void dpaa2_qdma_fqdan_cb(struct dpaa2_io_notification_ctx *ctx)
+{
+ struct dpaa2_qdma_priv_per_prio *ppriv = container_of(ctx,
+ struct dpaa2_qdma_priv_per_prio, nctx);
+ struct dpaa2_qdma_comp *dpaa2_comp, *_comp_tmp;
+ struct dpaa2_qdma_priv *priv = ppriv->priv;
+ u32 n_chans = priv->dpaa2_qdma->n_chans;
+ struct dpaa2_qdma_chan *qchan;
+ const struct dpaa2_fd *fd_eq;
+ const struct dpaa2_fd *fd;
+ struct dpaa2_dq *dq;
+ int is_last = 0;
+ int found;
+ u8 status;
+ int err;
+ int i;
+
+ do {
+ err = dpaa2_io_service_pull_fq(NULL, ppriv->rsp_fqid,
+ ppriv->store);
+ } while (err);
+
+ while (!is_last) {
+ do {
+ dq = dpaa2_io_store_next(ppriv->store, &is_last);
+ } while (!is_last && !dq);
+ if (!dq) {
+ dev_err(priv->dev, "FQID returned no valid frames!\n");
+ continue;
+ }
+
+ /* obtain FD and process the error */
+ fd = dpaa2_dq_fd(dq);
+
+ status = dpaa2_fd_get_ctrl(fd) & 0xff;
+ if (status)
+ dev_err(priv->dev, "FD error occurred\n");
+ found = 0;
+ for (i = 0; i < n_chans; i++) {
+ qchan = &priv->dpaa2_qdma->chans[i];
+ spin_lock(&qchan->queue_lock);
+ if (list_empty(&qchan->comp_used)) {
+ spin_unlock(&qchan->queue_lock);
+ continue;
+ }
+ list_for_each_entry_safe(dpaa2_comp, _comp_tmp,
+ &qchan->comp_used, list) {
+ fd_eq = dpaa2_comp->fd_virt_addr;
+
+ if (le64_to_cpu(fd_eq->simple.addr) ==
+ le64_to_cpu(fd->simple.addr)) {
+ spin_lock(&qchan->vchan.lock);
+ vchan_cookie_complete(&
+ dpaa2_comp->vdesc);
+ spin_unlock(&qchan->vchan.lock);
+ found = 1;
+ break;
+ }
+ }
+ spin_unlock(&qchan->queue_lock);
+ if (found)
+ break;
+ }
+ }
+
+ dpaa2_io_service_rearm(NULL, ctx);
+}
+
+static int __cold dpaa2_qdma_dpio_setup(struct dpaa2_qdma_priv *priv)
+{
+ struct dpaa2_qdma_priv_per_prio *ppriv;
+ struct device *dev = priv->dev;
+ int err = -EINVAL;
+ int i, num;
+
+ num = priv->num_pairs;
+ ppriv = priv->ppriv;
+ for (i = 0; i < num; i++) {
+ ppriv->nctx.is_cdan = 0;
+ ppriv->nctx.desired_cpu = DPAA2_IO_ANY_CPU;
+ ppriv->nctx.id = ppriv->rsp_fqid;
+ ppriv->nctx.cb = dpaa2_qdma_fqdan_cb;
+ err = dpaa2_io_service_register(NULL, &ppriv->nctx, dev);
+ if (err) {
+ dev_err(dev, "Notification register failed\n");
+ goto err_service;
+ }
+
+ ppriv->store =
+ dpaa2_io_store_create(DPAA2_QDMA_STORE_SIZE, dev);
+ if (!ppriv->store) {
+ err = -ENOMEM;
+ dev_err(dev, "dpaa2_io_store_create() failed\n");
+ goto err_store;
+ }
+
+ ppriv++;
+ }
+ return 0;
+
+err_store:
+ dpaa2_io_service_deregister(NULL, &ppriv->nctx, dev);
+err_service:
+ ppriv--;
+ while (ppriv >= priv->ppriv) {
+ dpaa2_io_service_deregister(NULL, &ppriv->nctx, dev);
+ dpaa2_io_store_destroy(ppriv->store);
+ ppriv--;
+ }
+ return err;
+}
+
+static void dpaa2_dpmai_store_free(struct dpaa2_qdma_priv *priv)
+{
+ struct dpaa2_qdma_priv_per_prio *ppriv = priv->ppriv;
+ int i;
+
+ for (i = 0; i < priv->num_pairs; i++) {
+ dpaa2_io_store_destroy(ppriv->store);
+ ppriv++;
+ }
+}
+
+static void dpaa2_dpdmai_dpio_free(struct dpaa2_qdma_priv *priv)
+{
+ struct dpaa2_qdma_priv_per_prio *ppriv = priv->ppriv;
+ struct device *dev = priv->dev;
+ int i;
+
+ for (i = 0; i < priv->num_pairs; i++) {
+ dpaa2_io_service_deregister(NULL, &ppriv->nctx, dev);
+ ppriv++;
+ }
+}
+
+static int __cold dpaa2_dpdmai_bind(struct dpaa2_qdma_priv *priv)
+{
+ struct dpdmai_rx_queue_cfg rx_queue_cfg;
+ struct dpaa2_qdma_priv_per_prio *ppriv;
+ struct device *dev = priv->dev;
+ struct fsl_mc_device *ls_dev;
+ int i, num;
+ int err;
+
+ ls_dev = to_fsl_mc_device(dev);
+ num = priv->num_pairs;
+ ppriv = priv->ppriv;
+ for (i = 0; i < num; i++) {
+ rx_queue_cfg.options = DPDMAI_QUEUE_OPT_USER_CTX |
+ DPDMAI_QUEUE_OPT_DEST;
+ rx_queue_cfg.user_ctx = ppriv->nctx.qman64;
+ rx_queue_cfg.dest_cfg.dest_type = DPDMAI_DEST_DPIO;
+ rx_queue_cfg.dest_cfg.dest_id = ppriv->nctx.dpio_id;
+ rx_queue_cfg.dest_cfg.priority = ppriv->prio;
+ err = dpdmai_set_rx_queue(priv->mc_io, 0, ls_dev->mc_handle,
+ rx_queue_cfg.dest_cfg.priority,
+ &rx_queue_cfg);
+ if (err) {
+ dev_err(dev, "dpdmai_set_rx_queue() failed\n");
+ return err;
+ }
+
+ ppriv++;
+ }
+
+ return 0;
+}
+
+static int __cold dpaa2_dpdmai_dpio_unbind(struct dpaa2_qdma_priv *priv)
+{
+ struct dpaa2_qdma_priv_per_prio *ppriv = priv->ppriv;
+ struct device *dev = priv->dev;
+ struct fsl_mc_device *ls_dev;
+ int err = 0;
+ int i;
+
+ ls_dev = to_fsl_mc_device(dev);
+
+ for (i = 0; i < priv->num_pairs; i++) {
+ ppriv->nctx.qman64 = 0;
+ ppriv->nctx.dpio_id = 0;
+ ppriv++;
+ }
+
+ err = dpdmai_reset(priv->mc_io, 0, ls_dev->mc_handle);
+ if (err)
+ dev_err(dev, "dpdmai_reset() failed\n");
+
+ return err;
+}
+
+static void dpaa2_dpdmai_free_comp(struct dpaa2_qdma_chan *qchan,
+ struct list_head *head)
+{
+ struct dpaa2_qdma_comp *comp_tmp, *_comp_tmp;
+ unsigned long flags;
+
+ list_for_each_entry_safe(comp_tmp, _comp_tmp,
+ head, list) {
+ spin_lock_irqsave(&qchan->queue_lock, flags);
+ list_del(&comp_tmp->list);
+ spin_unlock_irqrestore(&qchan->queue_lock, flags);
+ dma_pool_free(qchan->fd_pool,
+ comp_tmp->fd_virt_addr,
+ comp_tmp->fd_bus_addr);
+ dma_pool_free(qchan->fl_pool,
+ comp_tmp->fl_virt_addr,
+ comp_tmp->fl_bus_addr);
+ dma_pool_free(qchan->sdd_pool,
+ comp_tmp->desc_virt_addr,
+ comp_tmp->desc_bus_addr);
+ kfree(comp_tmp);
+ }
+}
+
+static void dpaa2_dpdmai_free_channels(struct dpaa2_qdma_engine *dpaa2_qdma)
+{
+ struct dpaa2_qdma_chan *qchan;
+ int num, i;
+
+ num = dpaa2_qdma->n_chans;
+ for (i = 0; i < num; i++) {
+ qchan = &dpaa2_qdma->chans[i];
+ dpaa2_dpdmai_free_comp(qchan, &qchan->comp_used);
+ dpaa2_dpdmai_free_comp(qchan, &qchan->comp_free);
+ dma_pool_destroy(qchan->fd_pool);
+ dma_pool_destroy(qchan->fl_pool);
+ dma_pool_destroy(qchan->sdd_pool);
+ }
+}
+
+static void dpaa2_qdma_free_desc(struct virt_dma_desc *vdesc)
+{
+ struct dpaa2_qdma_comp *dpaa2_comp;
+ struct dpaa2_qdma_chan *qchan;
+ unsigned long flags;
+
+ dpaa2_comp = to_fsl_qdma_comp(vdesc);
+ qchan = dpaa2_comp->qchan;
+ spin_lock_irqsave(&qchan->queue_lock, flags);
+ list_move_tail(&dpaa2_comp->list, &qchan->comp_free);
+ spin_unlock_irqrestore(&qchan->queue_lock, flags);
+}
+
+static int dpaa2_dpdmai_init_channels(struct dpaa2_qdma_engine *dpaa2_qdma)
+{
+ struct dpaa2_qdma_priv *priv = dpaa2_qdma->priv;
+ struct dpaa2_qdma_chan *dpaa2_chan;
+ int num = priv->num_pairs;
+ int i;
+
+ INIT_LIST_HEAD(&dpaa2_qdma->dma_dev.channels);
+ for (i = 0; i < dpaa2_qdma->n_chans; i++) {
+ dpaa2_chan = &dpaa2_qdma->chans[i];
+ dpaa2_chan->qdma = dpaa2_qdma;
+ dpaa2_chan->fqid = priv->tx_fqid[i % num];
+ dpaa2_chan->vchan.desc_free = dpaa2_qdma_free_desc;
+ vchan_init(&dpaa2_chan->vchan, &dpaa2_qdma->dma_dev);
+ spin_lock_init(&dpaa2_chan->queue_lock);
+ INIT_LIST_HEAD(&dpaa2_chan->comp_used);
+ INIT_LIST_HEAD(&dpaa2_chan->comp_free);
+ }
+ return 0;
+}
+
+static int dpaa2_qdma_probe(struct fsl_mc_device *dpdmai_dev)
+{
+ struct device *dev = &dpdmai_dev->dev;
+ struct dpaa2_qdma_engine *dpaa2_qdma;
+ struct dpaa2_qdma_priv *priv;
+ int err;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+ dev_set_drvdata(dev, priv);
+ priv->dpdmai_dev = dpdmai_dev;
+
+ priv->iommu_domain = iommu_get_domain_for_dev(dev);
+ if (priv->iommu_domain)
+ smmu_disable = false;
+
+ /* obtain a MC portal */
+ err = fsl_mc_portal_allocate(dpdmai_dev, 0, &priv->mc_io);
+ if (err) {
+ if (err == -ENXIO)
+ err = -EPROBE_DEFER;
+ else
+ dev_err(dev, "MC portal allocation failed\n");
+ goto err_mcportal;
+ }
+
+ /* DPDMAI initialization */
+ err = dpaa2_qdma_setup(dpdmai_dev);
+ if (err) {
+ dev_err(dev, "dpaa2_dpdmai_setup() failed\n");
+ goto err_dpdmai_setup;
+ }
+
+ /* DPIO */
+ err = dpaa2_qdma_dpio_setup(priv);
+ if (err) {
+ dev_err(dev, "dpaa2_dpdmai_dpio_setup() failed\n");
+ goto err_dpio_setup;
+ }
+
+ /* DPDMAI binding to DPIO */
+ err = dpaa2_dpdmai_bind(priv);
+ if (err) {
+ dev_err(dev, "dpaa2_dpdmai_bind() failed\n");
+ goto err_bind;
+ }
+
+ /* DPDMAI enable */
+ err = dpdmai_enable(priv->mc_io, 0, dpdmai_dev->mc_handle);
+ if (err) {
+ dev_err(dev, "dpdmai_enable() failed\n");
+ goto err_enable;
+ }
+
+ dpaa2_qdma = kzalloc(sizeof(*dpaa2_qdma), GFP_KERNEL);
+ if (!dpaa2_qdma) {
+ err = -ENOMEM;
+ goto err_eng;
+ }
+
+ priv->dpaa2_qdma = dpaa2_qdma;
+ dpaa2_qdma->priv = priv;
+
+ dpaa2_qdma->desc_allocated = 0;
+ dpaa2_qdma->n_chans = NUM_CH;
+
+ dpaa2_dpdmai_init_channels(dpaa2_qdma);
+
+ if (soc_device_match(soc_fixup_tuning))
+ dpaa2_qdma->qdma_wrtype_fixup = true;
+ else
+ dpaa2_qdma->qdma_wrtype_fixup = false;
+
+ dma_cap_set(DMA_PRIVATE, dpaa2_qdma->dma_dev.cap_mask);
+ dma_cap_set(DMA_SLAVE, dpaa2_qdma->dma_dev.cap_mask);
+ dma_cap_set(DMA_MEMCPY, dpaa2_qdma->dma_dev.cap_mask);
+
+ dpaa2_qdma->dma_dev.dev = dev;
+ dpaa2_qdma->dma_dev.device_alloc_chan_resources =
+ dpaa2_qdma_alloc_chan_resources;
+ dpaa2_qdma->dma_dev.device_free_chan_resources =
+ dpaa2_qdma_free_chan_resources;
+ dpaa2_qdma->dma_dev.device_tx_status = dma_cookie_status;
+ dpaa2_qdma->dma_dev.device_prep_dma_memcpy = dpaa2_qdma_prep_memcpy;
+ dpaa2_qdma->dma_dev.device_issue_pending = dpaa2_qdma_issue_pending;
+
+ err = dma_async_device_register(&dpaa2_qdma->dma_dev);
+ if (err) {
+ dev_err(dev, "Can't register NXP QDMA engine.\n");
+ goto err_dpaa2_qdma;
+ }
+
+ return 0;
+
+err_dpaa2_qdma:
+ kfree(dpaa2_qdma);
+err_eng:
+ dpdmai_disable(priv->mc_io, 0, dpdmai_dev->mc_handle);
+err_enable:
+ dpaa2_dpdmai_dpio_unbind(priv);
+err_bind:
+ dpaa2_dpmai_store_free(priv);
+ dpaa2_dpdmai_dpio_free(priv);
+err_dpio_setup:
+ kfree(priv->ppriv);
+ dpdmai_close(priv->mc_io, 0, dpdmai_dev->mc_handle);
+err_dpdmai_setup:
+ fsl_mc_portal_free(priv->mc_io);
+err_mcportal:
+ kfree(priv);
+ dev_set_drvdata(dev, NULL);
+ return err;
+}
+
+static int dpaa2_qdma_remove(struct fsl_mc_device *ls_dev)
+{
+ struct dpaa2_qdma_engine *dpaa2_qdma;
+ struct dpaa2_qdma_priv *priv;
+ struct device *dev;
+
+ dev = &ls_dev->dev;
+ priv = dev_get_drvdata(dev);
+ dpaa2_qdma = priv->dpaa2_qdma;
+
+ dpdmai_disable(priv->mc_io, 0, ls_dev->mc_handle);
+ dpaa2_dpdmai_dpio_unbind(priv);
+ dpaa2_dpmai_store_free(priv);
+ dpaa2_dpdmai_dpio_free(priv);
+ dpdmai_close(priv->mc_io, 0, ls_dev->mc_handle);
+ fsl_mc_portal_free(priv->mc_io);
+ dev_set_drvdata(dev, NULL);
+ dpaa2_dpdmai_free_channels(dpaa2_qdma);
+
+ dma_async_device_unregister(&dpaa2_qdma->dma_dev);
+ kfree(priv);
+ kfree(dpaa2_qdma);
+
+ return 0;
+}
+
+static void dpaa2_qdma_shutdown(struct fsl_mc_device *ls_dev)
+{
+ struct dpaa2_qdma_priv *priv;
+ struct device *dev;
+
+ dev = &ls_dev->dev;
+ priv = dev_get_drvdata(dev);
+
+ dpdmai_disable(priv->mc_io, 0, ls_dev->mc_handle);
+ dpaa2_dpdmai_dpio_unbind(priv);
+ dpdmai_close(priv->mc_io, 0, ls_dev->mc_handle);
+ dpdmai_destroy(priv->mc_io, 0, ls_dev->mc_handle);
+}
+
+static const struct fsl_mc_device_id dpaa2_qdma_id_table[] = {
+ {
+ .vendor = FSL_MC_VENDOR_FREESCALE,
+ .obj_type = "dpdmai",
+ },
+ { .vendor = 0x0 }
+};
+
+static struct fsl_mc_driver dpaa2_qdma_driver = {
+ .driver = {
+ .name = "dpaa2-qdma",
+ .owner = THIS_MODULE,
+ },
+ .probe = dpaa2_qdma_probe,
+ .remove = dpaa2_qdma_remove,
+ .shutdown = dpaa2_qdma_shutdown,
+ .match_id_table = dpaa2_qdma_id_table
+};
+
+static int __init dpaa2_qdma_driver_init(void)
+{
+ return fsl_mc_driver_register(&(dpaa2_qdma_driver));
+}
+late_initcall(dpaa2_qdma_driver_init);
+
+static void __exit fsl_qdma_exit(void)
+{
+ fsl_mc_driver_unregister(&(dpaa2_qdma_driver));
+}
+module_exit(fsl_qdma_exit);
+
+MODULE_ALIAS("platform:fsl-dpaa2-qdma");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("NXP Layerscape DPAA2 qDMA engine driver");
diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.h b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.h
new file mode 100644
index 000000000..03e2f4e0b
--- /dev/null
+++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2019 NXP */
+
+#ifndef __DPAA2_QDMA_H
+#define __DPAA2_QDMA_H
+
+#define DPAA2_QDMA_STORE_SIZE 16
+#define NUM_CH 8
+
+struct dpaa2_qdma_sd_d {
+ u32 rsv:32;
+ union {
+ struct {
+ u32 ssd:12; /* souce stride distance */
+ u32 sss:12; /* souce stride size */
+ u32 rsv1:8;
+ } sdf;
+ struct {
+ u32 dsd:12; /* Destination stride distance */
+ u32 dss:12; /* Destination stride size */
+ u32 rsv2:8;
+ } ddf;
+ } df;
+ u32 rbpcmd; /* Route-by-port command */
+ u32 cmd;
+} __attribute__((__packed__));
+
+/* Source descriptor command read transaction type for RBP=0: */
+/* coherent copy of cacheable memory */
+#define QDMA_SD_CMD_RDTTYPE_COHERENT (0xb << 28)
+/* Destination descriptor command write transaction type for RBP=0: */
+/* coherent copy of cacheable memory */
+#define QDMA_DD_CMD_WRTTYPE_COHERENT (0x6 << 28)
+#define LX2160_QDMA_DD_CMD_WRTTYPE_COHERENT (0xb << 28)
+
+#define QMAN_FD_FMT_ENABLE BIT(0) /* frame list table enable */
+#define QMAN_FD_BMT_ENABLE BIT(15) /* bypass memory translation */
+#define QMAN_FD_BMT_DISABLE (0) /* bypass memory translation */
+#define QMAN_FD_SL_DISABLE (0) /* short lengthe disabled */
+#define QMAN_FD_SL_ENABLE BIT(14) /* short lengthe enabled */
+
+#define QDMA_FINAL_BIT_DISABLE (0) /* final bit disable */
+#define QDMA_FINAL_BIT_ENABLE BIT(31) /* final bit enable */
+
+#define QDMA_FD_SHORT_FORMAT BIT(11) /* short format */
+#define QDMA_FD_LONG_FORMAT (0) /* long format */
+#define QDMA_SER_DISABLE (8) /* no notification */
+#define QDMA_SER_CTX BIT(8) /* notification by FQD_CTX[fqid] */
+#define QDMA_SER_DEST (2 << 8) /* notification by destination desc */
+#define QDMA_SER_BOTH (3 << 8) /* soruce and dest notification */
+#define QDMA_FD_SPF_ENALBE BIT(30) /* source prefetch enable */
+
+#define QMAN_FD_VA_ENABLE BIT(14) /* Address used is virtual address */
+#define QMAN_FD_VA_DISABLE (0)/* Address used is a real address */
+/* Flow Context: 49bit physical address */
+#define QMAN_FD_CBMT_ENABLE BIT(15)
+#define QMAN_FD_CBMT_DISABLE (0) /* Flow Context: 64bit virtual address */
+#define QMAN_FD_SC_DISABLE (0) /* stashing control */
+
+#define QDMA_FL_FMT_SBF (0x0) /* Single buffer frame */
+#define QDMA_FL_FMT_SGE (0x2) /* Scatter gather frame */
+#define QDMA_FL_BMT_ENABLE BIT(15) /* enable bypass memory translation */
+#define QDMA_FL_BMT_DISABLE (0x0) /* enable bypass memory translation */
+#define QDMA_FL_SL_LONG (0x0)/* long length */
+#define QDMA_FL_SL_SHORT (0x1) /* short length */
+#define QDMA_FL_F (0x1)/* last frame list bit */
+
+/*Description of Frame list table structure*/
+struct dpaa2_qdma_chan {
+ struct dpaa2_qdma_engine *qdma;
+ struct virt_dma_chan vchan;
+ struct virt_dma_desc vdesc;
+ enum dma_status status;
+ u32 fqid;
+
+ /* spinlock used by dpaa2 qdma driver */
+ spinlock_t queue_lock;
+ struct dma_pool *fd_pool;
+ struct dma_pool *fl_pool;
+ struct dma_pool *sdd_pool;
+
+ struct list_head comp_used;
+ struct list_head comp_free;
+
+};
+
+struct dpaa2_qdma_comp {
+ dma_addr_t fd_bus_addr;
+ dma_addr_t fl_bus_addr;
+ dma_addr_t desc_bus_addr;
+ struct dpaa2_fd *fd_virt_addr;
+ struct dpaa2_fl_entry *fl_virt_addr;
+ struct dpaa2_qdma_sd_d *desc_virt_addr;
+ struct dpaa2_qdma_chan *qchan;
+ struct virt_dma_desc vdesc;
+ struct list_head list;
+};
+
+struct dpaa2_qdma_engine {
+ struct dma_device dma_dev;
+ u32 n_chans;
+ struct dpaa2_qdma_chan chans[NUM_CH];
+ int qdma_wrtype_fixup;
+ int desc_allocated;
+
+ struct dpaa2_qdma_priv *priv;
+};
+
+/*
+ * dpaa2_qdma_priv - driver private data
+ */
+struct dpaa2_qdma_priv {
+ int dpqdma_id;
+
+ struct iommu_domain *iommu_domain;
+ struct dpdmai_attr dpdmai_attr;
+ struct device *dev;
+ struct fsl_mc_io *mc_io;
+ struct fsl_mc_device *dpdmai_dev;
+ u8 num_pairs;
+
+ struct dpaa2_qdma_engine *dpaa2_qdma;
+ struct dpaa2_qdma_priv_per_prio *ppriv;
+
+ struct dpdmai_rx_queue_attr rx_queue_attr[DPDMAI_PRIO_NUM];
+ u32 tx_fqid[DPDMAI_PRIO_NUM];
+};
+
+struct dpaa2_qdma_priv_per_prio {
+ int req_fqid;
+ int rsp_fqid;
+ int prio;
+
+ struct dpaa2_io_store *store;
+ struct dpaa2_io_notification_ctx nctx;
+
+ struct dpaa2_qdma_priv *priv;
+};
+
+static struct soc_device_attribute soc_fixup_tuning[] = {
+ { .family = "QorIQ LX2160A"},
+ { /* sentinel */ }
+};
+
+/* FD pool size: one FD + 3 Frame list + 2 source/destination descriptor */
+#define FD_POOL_SIZE (sizeof(struct dpaa2_fd) + \
+ sizeof(struct dpaa2_fl_entry) * 3 + \
+ sizeof(struct dpaa2_qdma_sd_d) * 2)
+
+static void dpaa2_dpdmai_free_channels(struct dpaa2_qdma_engine *dpaa2_qdma);
+static void dpaa2_dpdmai_free_comp(struct dpaa2_qdma_chan *qchan,
+ struct list_head *head);
+#endif /* __DPAA2_QDMA_H */
diff --git a/drivers/dma/fsl-dpaa2-qdma/dpdmai.c b/drivers/dma/fsl-dpaa2-qdma/dpdmai.c
new file mode 100644
index 000000000..878662aaa
--- /dev/null
+++ b/drivers/dma/fsl-dpaa2-qdma/dpdmai.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright 2019 NXP
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/fsl/mc.h>
+#include "dpdmai.h"
+
+struct dpdmai_rsp_get_attributes {
+ __le32 id;
+ u8 num_of_priorities;
+ u8 pad0[3];
+ __le16 major;
+ __le16 minor;
+};
+
+struct dpdmai_cmd_queue {
+ __le32 dest_id;
+ u8 priority;
+ u8 queue;
+ u8 dest_type;
+ u8 pad;
+ __le64 user_ctx;
+ union {
+ __le32 options;
+ __le32 fqid;
+ };
+};
+
+struct dpdmai_rsp_get_tx_queue {
+ __le64 pad;
+ __le32 fqid;
+};
+
+#define MC_CMD_OP(_cmd, _param, _offset, _width, _type, _arg) \
+ ((_cmd).params[_param] |= mc_enc((_offset), (_width), _arg))
+
+/* cmd, param, offset, width, type, arg_name */
+#define DPDMAI_CMD_CREATE(cmd, cfg) \
+do { \
+ MC_CMD_OP(cmd, 0, 8, 8, u8, (cfg)->priorities[0]);\
+ MC_CMD_OP(cmd, 0, 16, 8, u8, (cfg)->priorities[1]);\
+} while (0)
+
+static inline u64 mc_enc(int lsoffset, int width, u64 val)
+{
+ return (val & MAKE_UMASK64(width)) << lsoffset;
+}
+
+/**
+ * dpdmai_open() - Open a control session for the specified object
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @dpdmai_id: DPDMAI unique ID
+ * @token: Returned token; use in subsequent API calls
+ *
+ * This function can be used to open a control session for an
+ * already created object; an object may have been declared in
+ * the DPL or by calling the dpdmai_create() function.
+ * This function returns a unique authentication token,
+ * associated with the specific object ID and the specific MC
+ * portal; this token must be used in all subsequent commands for
+ * this specific object.
+ *
+ * Return: '0' on Success; Error code otherwise.
+ */
+int dpdmai_open(struct fsl_mc_io *mc_io, u32 cmd_flags,
+ int dpdmai_id, u16 *token)
+{
+ struct fsl_mc_command cmd = { 0 };
+ __le64 *cmd_dpdmai_id;
+ int err;
+
+ /* prepare command */
+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_OPEN,
+ cmd_flags, 0);
+
+ cmd_dpdmai_id = cmd.params;
+ *cmd_dpdmai_id = cpu_to_le32(dpdmai_id);
+
+ /* send command to mc*/
+ err = mc_send_command(mc_io, &cmd);
+ if (err)
+ return err;
+
+ /* retrieve response parameters */
+ *token = mc_cmd_hdr_read_token(&cmd);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dpdmai_open);
+
+/**
+ * dpdmai_close() - Close the control session of the object
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPDMAI object
+ *
+ * After this function is called, no further operations are
+ * allowed on the object without opening a new control session.
+ *
+ * Return: '0' on Success; Error code otherwise.
+ */
+int dpdmai_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+ struct fsl_mc_command cmd = { 0 };
+
+ /* prepare command */
+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_CLOSE,
+ cmd_flags, token);
+
+ /* send command to mc*/
+ return mc_send_command(mc_io, &cmd);
+}
+EXPORT_SYMBOL_GPL(dpdmai_close);
+
+/**
+ * dpdmai_create() - Create the DPDMAI object
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @cfg: Configuration structure
+ * @token: Returned token; use in subsequent API calls
+ *
+ * Create the DPDMAI object, allocate required resources and
+ * perform required initialization.
+ *
+ * The object can be created either by declaring it in the
+ * DPL file, or by calling this function.
+ *
+ * This function returns a unique authentication token,
+ * associated with the specific object ID and the specific MC
+ * portal; this token must be used in all subsequent calls to
+ * this specific object. For objects that are created using the
+ * DPL file, call dpdmai_open() function to get an authentication
+ * token first.
+ *
+ * Return: '0' on Success; Error code otherwise.
+ */
+int dpdmai_create(struct fsl_mc_io *mc_io, u32 cmd_flags,
+ const struct dpdmai_cfg *cfg, u16 *token)
+{
+ struct fsl_mc_command cmd = { 0 };
+ int err;
+
+ /* prepare command */
+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_CREATE,
+ cmd_flags, 0);
+ DPDMAI_CMD_CREATE(cmd, cfg);
+
+ /* send command to mc*/
+ err = mc_send_command(mc_io, &cmd);
+ if (err)
+ return err;
+
+ /* retrieve response parameters */
+ *token = mc_cmd_hdr_read_token(&cmd);
+
+ return 0;
+}
+
+/**
+ * dpdmai_destroy() - Destroy the DPDMAI object and release all its resources.
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPDMAI object
+ *
+ * Return: '0' on Success; error code otherwise.
+ */
+int dpdmai_destroy(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+ struct fsl_mc_command cmd = { 0 };
+
+ /* prepare command */
+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_DESTROY,
+ cmd_flags, token);
+
+ /* send command to mc*/
+ return mc_send_command(mc_io, &cmd);
+}
+EXPORT_SYMBOL_GPL(dpdmai_destroy);
+
+/**
+ * dpdmai_enable() - Enable the DPDMAI, allow sending and receiving frames.
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPDMAI object
+ *
+ * Return: '0' on Success; Error code otherwise.
+ */
+int dpdmai_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+ struct fsl_mc_command cmd = { 0 };
+
+ /* prepare command */
+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_ENABLE,
+ cmd_flags, token);
+
+ /* send command to mc*/
+ return mc_send_command(mc_io, &cmd);
+}
+EXPORT_SYMBOL_GPL(dpdmai_enable);
+
+/**
+ * dpdmai_disable() - Disable the DPDMAI, stop sending and receiving frames.
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPDMAI object
+ *
+ * Return: '0' on Success; Error code otherwise.
+ */
+int dpdmai_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+ struct fsl_mc_command cmd = { 0 };
+
+ /* prepare command */
+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_DISABLE,
+ cmd_flags, token);
+
+ /* send command to mc*/
+ return mc_send_command(mc_io, &cmd);
+}
+EXPORT_SYMBOL_GPL(dpdmai_disable);
+
+/**
+ * dpdmai_reset() - Reset the DPDMAI, returns the object to initial state.
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPDMAI object
+ *
+ * Return: '0' on Success; Error code otherwise.
+ */
+int dpdmai_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+ struct fsl_mc_command cmd = { 0 };
+
+ /* prepare command */
+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_RESET,
+ cmd_flags, token);
+
+ /* send command to mc*/
+ return mc_send_command(mc_io, &cmd);
+}
+EXPORT_SYMBOL_GPL(dpdmai_reset);
+
+/**
+ * dpdmai_get_attributes() - Retrieve DPDMAI attributes.
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPDMAI object
+ * @attr: Returned object's attributes
+ *
+ * Return: '0' on Success; Error code otherwise.
+ */
+int dpdmai_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags,
+ u16 token, struct dpdmai_attr *attr)
+{
+ struct dpdmai_rsp_get_attributes *rsp_params;
+ struct fsl_mc_command cmd = { 0 };
+ int err;
+
+ /* prepare command */
+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_GET_ATTR,
+ cmd_flags, token);
+
+ /* send command to mc*/
+ err = mc_send_command(mc_io, &cmd);
+ if (err)
+ return err;
+
+ /* retrieve response parameters */
+ rsp_params = (struct dpdmai_rsp_get_attributes *)cmd.params;
+ attr->id = le32_to_cpu(rsp_params->id);
+ attr->version.major = le16_to_cpu(rsp_params->major);
+ attr->version.minor = le16_to_cpu(rsp_params->minor);
+ attr->num_of_priorities = rsp_params->num_of_priorities;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dpdmai_get_attributes);
+
+/**
+ * dpdmai_set_rx_queue() - Set Rx queue configuration
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPDMAI object
+ * @priority: Select the queue relative to number of
+ * priorities configured at DPDMAI creation
+ * @cfg: Rx queue configuration
+ *
+ * Return: '0' on Success; Error code otherwise.
+ */
+int dpdmai_set_rx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ u8 priority, const struct dpdmai_rx_queue_cfg *cfg)
+{
+ struct dpdmai_cmd_queue *cmd_params;
+ struct fsl_mc_command cmd = { 0 };
+
+ /* prepare command */
+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_SET_RX_QUEUE,
+ cmd_flags, token);
+
+ cmd_params = (struct dpdmai_cmd_queue *)cmd.params;
+ cmd_params->dest_id = cpu_to_le32(cfg->dest_cfg.dest_id);
+ cmd_params->priority = cfg->dest_cfg.priority;
+ cmd_params->queue = priority;
+ cmd_params->dest_type = cfg->dest_cfg.dest_type;
+ cmd_params->user_ctx = cpu_to_le64(cfg->user_ctx);
+ cmd_params->options = cpu_to_le32(cfg->options);
+
+ /* send command to mc*/
+ return mc_send_command(mc_io, &cmd);
+}
+EXPORT_SYMBOL_GPL(dpdmai_set_rx_queue);
+
+/**
+ * dpdmai_get_rx_queue() - Retrieve Rx queue attributes.
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPDMAI object
+ * @priority: Select the queue relative to number of
+ * priorities configured at DPDMAI creation
+ * @attr: Returned Rx queue attributes
+ *
+ * Return: '0' on Success; Error code otherwise.
+ */
+int dpdmai_get_rx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ u8 priority, struct dpdmai_rx_queue_attr *attr)
+{
+ struct dpdmai_cmd_queue *cmd_params;
+ struct fsl_mc_command cmd = { 0 };
+ int err;
+
+ /* prepare command */
+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_GET_RX_QUEUE,
+ cmd_flags, token);
+
+ cmd_params = (struct dpdmai_cmd_queue *)cmd.params;
+ cmd_params->queue = priority;
+
+ /* send command to mc*/
+ err = mc_send_command(mc_io, &cmd);
+ if (err)
+ return err;
+
+ /* retrieve response parameters */
+ attr->dest_cfg.dest_id = le32_to_cpu(cmd_params->dest_id);
+ attr->dest_cfg.priority = cmd_params->priority;
+ attr->dest_cfg.dest_type = cmd_params->dest_type;
+ attr->user_ctx = le64_to_cpu(cmd_params->user_ctx);
+ attr->fqid = le32_to_cpu(cmd_params->fqid);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dpdmai_get_rx_queue);
+
+/**
+ * dpdmai_get_tx_queue() - Retrieve Tx queue attributes.
+ * @mc_io: Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token: Token of DPDMAI object
+ * @priority: Select the queue relative to number of
+ * priorities configured at DPDMAI creation
+ * @fqid: Returned Tx queue
+ *
+ * Return: '0' on Success; Error code otherwise.
+ */
+int dpdmai_get_tx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags,
+ u16 token, u8 priority, u32 *fqid)
+{
+ struct dpdmai_rsp_get_tx_queue *rsp_params;
+ struct dpdmai_cmd_queue *cmd_params;
+ struct fsl_mc_command cmd = { 0 };
+ int err;
+
+ /* prepare command */
+ cmd.header = mc_encode_cmd_header(DPDMAI_CMDID_GET_TX_QUEUE,
+ cmd_flags, token);
+
+ cmd_params = (struct dpdmai_cmd_queue *)cmd.params;
+ cmd_params->queue = priority;
+
+ /* send command to mc*/
+ err = mc_send_command(mc_io, &cmd);
+ if (err)
+ return err;
+
+ /* retrieve response parameters */
+
+ rsp_params = (struct dpdmai_rsp_get_tx_queue *)cmd.params;
+ *fqid = le32_to_cpu(rsp_params->fqid);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dpdmai_get_tx_queue);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/fsl-dpaa2-qdma/dpdmai.h b/drivers/dma/fsl-dpaa2-qdma/dpdmai.h
new file mode 100644
index 000000000..b13b9bf0c
--- /dev/null
+++ b/drivers/dma/fsl-dpaa2-qdma/dpdmai.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2019 NXP */
+
+#ifndef __FSL_DPDMAI_H
+#define __FSL_DPDMAI_H
+
+/* DPDMAI Version */
+#define DPDMAI_VER_MAJOR 2
+#define DPDMAI_VER_MINOR 2
+
+#define DPDMAI_CMD_BASE_VERSION 0
+#define DPDMAI_CMD_ID_OFFSET 4
+
+#define DPDMAI_CMDID_FORMAT(x) (((x) << DPDMAI_CMD_ID_OFFSET) | \
+ DPDMAI_CMD_BASE_VERSION)
+
+/* Command IDs */
+#define DPDMAI_CMDID_CLOSE DPDMAI_CMDID_FORMAT(0x800)
+#define DPDMAI_CMDID_OPEN DPDMAI_CMDID_FORMAT(0x80E)
+#define DPDMAI_CMDID_CREATE DPDMAI_CMDID_FORMAT(0x90E)
+#define DPDMAI_CMDID_DESTROY DPDMAI_CMDID_FORMAT(0x900)
+
+#define DPDMAI_CMDID_ENABLE DPDMAI_CMDID_FORMAT(0x002)
+#define DPDMAI_CMDID_DISABLE DPDMAI_CMDID_FORMAT(0x003)
+#define DPDMAI_CMDID_GET_ATTR DPDMAI_CMDID_FORMAT(0x004)
+#define DPDMAI_CMDID_RESET DPDMAI_CMDID_FORMAT(0x005)
+#define DPDMAI_CMDID_IS_ENABLED DPDMAI_CMDID_FORMAT(0x006)
+
+#define DPDMAI_CMDID_SET_IRQ DPDMAI_CMDID_FORMAT(0x010)
+#define DPDMAI_CMDID_GET_IRQ DPDMAI_CMDID_FORMAT(0x011)
+#define DPDMAI_CMDID_SET_IRQ_ENABLE DPDMAI_CMDID_FORMAT(0x012)
+#define DPDMAI_CMDID_GET_IRQ_ENABLE DPDMAI_CMDID_FORMAT(0x013)
+#define DPDMAI_CMDID_SET_IRQ_MASK DPDMAI_CMDID_FORMAT(0x014)
+#define DPDMAI_CMDID_GET_IRQ_MASK DPDMAI_CMDID_FORMAT(0x015)
+#define DPDMAI_CMDID_GET_IRQ_STATUS DPDMAI_CMDID_FORMAT(0x016)
+#define DPDMAI_CMDID_CLEAR_IRQ_STATUS DPDMAI_CMDID_FORMAT(0x017)
+
+#define DPDMAI_CMDID_SET_RX_QUEUE DPDMAI_CMDID_FORMAT(0x1A0)
+#define DPDMAI_CMDID_GET_RX_QUEUE DPDMAI_CMDID_FORMAT(0x1A1)
+#define DPDMAI_CMDID_GET_TX_QUEUE DPDMAI_CMDID_FORMAT(0x1A2)
+
+#define MC_CMD_HDR_TOKEN_O 32 /* Token field offset */
+#define MC_CMD_HDR_TOKEN_S 16 /* Token field size */
+
+#define MAKE_UMASK64(_width) \
+ ((u64)((_width) < 64 ? ((u64)1 << (_width)) - 1 : (u64)-1))
+
+/* Data Path DMA Interface API
+ * Contains initialization APIs and runtime control APIs for DPDMAI
+ */
+
+/**
+ * Maximum number of Tx/Rx priorities per DPDMAI object
+ */
+#define DPDMAI_PRIO_NUM 2
+
+/* DPDMAI queue modification options */
+
+/**
+ * Select to modify the user's context associated with the queue
+ */
+#define DPDMAI_QUEUE_OPT_USER_CTX 0x1
+
+/**
+ * Select to modify the queue's destination
+ */
+#define DPDMAI_QUEUE_OPT_DEST 0x2
+
+/**
+ * struct dpdmai_cfg - Structure representing DPDMAI configuration
+ * @priorities: Priorities for the DMA hardware processing; valid priorities are
+ * configured with values 1-8; the entry following last valid entry
+ * should be configured with 0
+ */
+struct dpdmai_cfg {
+ u8 priorities[DPDMAI_PRIO_NUM];
+};
+
+/**
+ * struct dpdmai_attr - Structure representing DPDMAI attributes
+ * @id: DPDMAI object ID
+ * @version: DPDMAI version
+ * @num_of_priorities: number of priorities
+ */
+struct dpdmai_attr {
+ int id;
+ /**
+ * struct version - DPDMAI version
+ * @major: DPDMAI major version
+ * @minor: DPDMAI minor version
+ */
+ struct {
+ u16 major;
+ u16 minor;
+ } version;
+ u8 num_of_priorities;
+};
+
+/**
+ * enum dpdmai_dest - DPDMAI destination types
+ * @DPDMAI_DEST_NONE: Unassigned destination; The queue is set in parked mode
+ * and does not generate FQDAN notifications; user is expected to dequeue
+ * from the queue based on polling or other user-defined method
+ * @DPDMAI_DEST_DPIO: The queue is set in schedule mode and generates FQDAN
+ * notifications to the specified DPIO; user is expected to dequeue
+ * from the queue only after notification is received
+ * @DPDMAI_DEST_DPCON: The queue is set in schedule mode and does not generate
+ * FQDAN notifications, but is connected to the specified DPCON object;
+ * user is expected to dequeue from the DPCON channel
+ */
+enum dpdmai_dest {
+ DPDMAI_DEST_NONE = 0,
+ DPDMAI_DEST_DPIO = 1,
+ DPDMAI_DEST_DPCON = 2
+};
+
+/**
+ * struct dpdmai_dest_cfg - Structure representing DPDMAI destination parameters
+ * @dest_type: Destination type
+ * @dest_id: Either DPIO ID or DPCON ID, depending on the destination type
+ * @priority: Priority selection within the DPIO or DPCON channel; valid values
+ * are 0-1 or 0-7, depending on the number of priorities in that
+ * channel; not relevant for 'DPDMAI_DEST_NONE' option
+ */
+struct dpdmai_dest_cfg {
+ enum dpdmai_dest dest_type;
+ int dest_id;
+ u8 priority;
+};
+
+/**
+ * struct dpdmai_rx_queue_cfg - DPDMAI RX queue configuration
+ * @options: Flags representing the suggested modifications to the queue;
+ * Use any combination of 'DPDMAI_QUEUE_OPT_<X>' flags
+ * @user_ctx: User context value provided in the frame descriptor of each
+ * dequeued frame;
+ * valid only if 'DPDMAI_QUEUE_OPT_USER_CTX' is contained in 'options'
+ * @dest_cfg: Queue destination parameters;
+ * valid only if 'DPDMAI_QUEUE_OPT_DEST' is contained in 'options'
+ */
+struct dpdmai_rx_queue_cfg {
+ struct dpdmai_dest_cfg dest_cfg;
+ u32 options;
+ u64 user_ctx;
+
+};
+
+/**
+ * struct dpdmai_rx_queue_attr - Structure representing attributes of Rx queues
+ * @user_ctx: User context value provided in the frame descriptor of each
+ * dequeued frame
+ * @dest_cfg: Queue destination configuration
+ * @fqid: Virtual FQID value to be used for dequeue operations
+ */
+struct dpdmai_rx_queue_attr {
+ struct dpdmai_dest_cfg dest_cfg;
+ u64 user_ctx;
+ u32 fqid;
+};
+
+int dpdmai_open(struct fsl_mc_io *mc_io, u32 cmd_flags,
+ int dpdmai_id, u16 *token);
+int dpdmai_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+int dpdmai_destroy(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+int dpdmai_create(struct fsl_mc_io *mc_io, u32 cmd_flags,
+ const struct dpdmai_cfg *cfg, u16 *token);
+int dpdmai_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+int dpdmai_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+int dpdmai_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+int dpdmai_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags,
+ u16 token, struct dpdmai_attr *attr);
+int dpdmai_set_rx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ u8 priority, const struct dpdmai_rx_queue_cfg *cfg);
+int dpdmai_get_rx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+ u8 priority, struct dpdmai_rx_queue_attr *attr);
+int dpdmai_get_tx_queue(struct fsl_mc_io *mc_io, u32 cmd_flags,
+ u16 token, u8 priority, u32 *fqid);
+
+#endif /* __FSL_DPDMAI_H */
diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c
new file mode 100644
index 000000000..a06a1575a
--- /dev/null
+++ b/drivers/dma/fsl-edma-common.c
@@ -0,0 +1,748 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright (c) 2013-2014 Freescale Semiconductor, Inc
+// Copyright (c) 2017 Sysam, Angelo Dureghello <angelo@sysam.it>
+
+#include <linux/dmapool.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+
+#include "fsl-edma-common.h"
+
+#define EDMA_CR 0x00
+#define EDMA_ES 0x04
+#define EDMA_ERQ 0x0C
+#define EDMA_EEI 0x14
+#define EDMA_SERQ 0x1B
+#define EDMA_CERQ 0x1A
+#define EDMA_SEEI 0x19
+#define EDMA_CEEI 0x18
+#define EDMA_CINT 0x1F
+#define EDMA_CERR 0x1E
+#define EDMA_SSRT 0x1D
+#define EDMA_CDNE 0x1C
+#define EDMA_INTR 0x24
+#define EDMA_ERR 0x2C
+
+#define EDMA64_ERQH 0x08
+#define EDMA64_EEIH 0x10
+#define EDMA64_SERQ 0x18
+#define EDMA64_CERQ 0x19
+#define EDMA64_SEEI 0x1a
+#define EDMA64_CEEI 0x1b
+#define EDMA64_CINT 0x1c
+#define EDMA64_CERR 0x1d
+#define EDMA64_SSRT 0x1e
+#define EDMA64_CDNE 0x1f
+#define EDMA64_INTH 0x20
+#define EDMA64_INTL 0x24
+#define EDMA64_ERRH 0x28
+#define EDMA64_ERRL 0x2c
+
+#define EDMA_TCD 0x1000
+
+static void fsl_edma_enable_request(struct fsl_edma_chan *fsl_chan)
+{
+ struct edma_regs *regs = &fsl_chan->edma->regs;
+ u32 ch = fsl_chan->vchan.chan.chan_id;
+
+ if (fsl_chan->edma->drvdata->version == v1) {
+ edma_writeb(fsl_chan->edma, EDMA_SEEI_SEEI(ch), regs->seei);
+ edma_writeb(fsl_chan->edma, ch, regs->serq);
+ } else {
+ /* ColdFire is big endian, and accesses natively
+ * big endian I/O peripherals
+ */
+ iowrite8(EDMA_SEEI_SEEI(ch), regs->seei);
+ iowrite8(ch, regs->serq);
+ }
+}
+
+void fsl_edma_disable_request(struct fsl_edma_chan *fsl_chan)
+{
+ struct edma_regs *regs = &fsl_chan->edma->regs;
+ u32 ch = fsl_chan->vchan.chan.chan_id;
+
+ if (fsl_chan->edma->drvdata->version == v1) {
+ edma_writeb(fsl_chan->edma, ch, regs->cerq);
+ edma_writeb(fsl_chan->edma, EDMA_CEEI_CEEI(ch), regs->ceei);
+ } else {
+ /* ColdFire is big endian, and accesses natively
+ * big endian I/O peripherals
+ */
+ iowrite8(ch, regs->cerq);
+ iowrite8(EDMA_CEEI_CEEI(ch), regs->ceei);
+ }
+}
+EXPORT_SYMBOL_GPL(fsl_edma_disable_request);
+
+static void mux_configure8(struct fsl_edma_chan *fsl_chan, void __iomem *addr,
+ u32 off, u32 slot, bool enable)
+{
+ u8 val8;
+
+ if (enable)
+ val8 = EDMAMUX_CHCFG_ENBL | slot;
+ else
+ val8 = EDMAMUX_CHCFG_DIS;
+
+ iowrite8(val8, addr + off);
+}
+
+static void mux_configure32(struct fsl_edma_chan *fsl_chan, void __iomem *addr,
+ u32 off, u32 slot, bool enable)
+{
+ u32 val;
+
+ if (enable)
+ val = EDMAMUX_CHCFG_ENBL << 24 | slot;
+ else
+ val = EDMAMUX_CHCFG_DIS;
+
+ iowrite32(val, addr + off * 4);
+}
+
+void fsl_edma_chan_mux(struct fsl_edma_chan *fsl_chan,
+ unsigned int slot, bool enable)
+{
+ u32 ch = fsl_chan->vchan.chan.chan_id;
+ void __iomem *muxaddr;
+ unsigned int chans_per_mux, ch_off;
+ int endian_diff[4] = {3, 1, -1, -3};
+ u32 dmamux_nr = fsl_chan->edma->drvdata->dmamuxs;
+
+ chans_per_mux = fsl_chan->edma->n_chans / dmamux_nr;
+ ch_off = fsl_chan->vchan.chan.chan_id % chans_per_mux;
+
+ if (fsl_chan->edma->drvdata->mux_swap)
+ ch_off += endian_diff[ch_off % 4];
+
+ muxaddr = fsl_chan->edma->muxbase[ch / chans_per_mux];
+ slot = EDMAMUX_CHCFG_SOURCE(slot);
+
+ if (fsl_chan->edma->drvdata->version == v3)
+ mux_configure32(fsl_chan, muxaddr, ch_off, slot, enable);
+ else
+ mux_configure8(fsl_chan, muxaddr, ch_off, slot, enable);
+}
+EXPORT_SYMBOL_GPL(fsl_edma_chan_mux);
+
+static unsigned int fsl_edma_get_tcd_attr(enum dma_slave_buswidth addr_width)
+{
+ switch (addr_width) {
+ case 1:
+ return EDMA_TCD_ATTR_SSIZE_8BIT | EDMA_TCD_ATTR_DSIZE_8BIT;
+ case 2:
+ return EDMA_TCD_ATTR_SSIZE_16BIT | EDMA_TCD_ATTR_DSIZE_16BIT;
+ case 4:
+ return EDMA_TCD_ATTR_SSIZE_32BIT | EDMA_TCD_ATTR_DSIZE_32BIT;
+ case 8:
+ return EDMA_TCD_ATTR_SSIZE_64BIT | EDMA_TCD_ATTR_DSIZE_64BIT;
+ default:
+ return EDMA_TCD_ATTR_SSIZE_32BIT | EDMA_TCD_ATTR_DSIZE_32BIT;
+ }
+}
+
+void fsl_edma_free_desc(struct virt_dma_desc *vdesc)
+{
+ struct fsl_edma_desc *fsl_desc;
+ int i;
+
+ fsl_desc = to_fsl_edma_desc(vdesc);
+ for (i = 0; i < fsl_desc->n_tcds; i++)
+ dma_pool_free(fsl_desc->echan->tcd_pool, fsl_desc->tcd[i].vtcd,
+ fsl_desc->tcd[i].ptcd);
+ kfree(fsl_desc);
+}
+EXPORT_SYMBOL_GPL(fsl_edma_free_desc);
+
+int fsl_edma_terminate_all(struct dma_chan *chan)
+{
+ struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+ fsl_edma_disable_request(fsl_chan);
+ fsl_chan->edesc = NULL;
+ fsl_chan->idle = true;
+ vchan_get_all_descriptors(&fsl_chan->vchan, &head);
+ spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+ vchan_dma_desc_free_list(&fsl_chan->vchan, &head);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fsl_edma_terminate_all);
+
+int fsl_edma_pause(struct dma_chan *chan)
+{
+ struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+ if (fsl_chan->edesc) {
+ fsl_edma_disable_request(fsl_chan);
+ fsl_chan->status = DMA_PAUSED;
+ fsl_chan->idle = true;
+ }
+ spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fsl_edma_pause);
+
+int fsl_edma_resume(struct dma_chan *chan)
+{
+ struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+ if (fsl_chan->edesc) {
+ fsl_edma_enable_request(fsl_chan);
+ fsl_chan->status = DMA_IN_PROGRESS;
+ fsl_chan->idle = false;
+ }
+ spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fsl_edma_resume);
+
+static void fsl_edma_unprep_slave_dma(struct fsl_edma_chan *fsl_chan)
+{
+ if (fsl_chan->dma_dir != DMA_NONE)
+ dma_unmap_resource(fsl_chan->vchan.chan.device->dev,
+ fsl_chan->dma_dev_addr,
+ fsl_chan->dma_dev_size,
+ fsl_chan->dma_dir, 0);
+ fsl_chan->dma_dir = DMA_NONE;
+}
+
+static bool fsl_edma_prep_slave_dma(struct fsl_edma_chan *fsl_chan,
+ enum dma_transfer_direction dir)
+{
+ struct device *dev = fsl_chan->vchan.chan.device->dev;
+ enum dma_data_direction dma_dir;
+ phys_addr_t addr = 0;
+ u32 size = 0;
+
+ switch (dir) {
+ case DMA_MEM_TO_DEV:
+ dma_dir = DMA_FROM_DEVICE;
+ addr = fsl_chan->cfg.dst_addr;
+ size = fsl_chan->cfg.dst_maxburst;
+ break;
+ case DMA_DEV_TO_MEM:
+ dma_dir = DMA_TO_DEVICE;
+ addr = fsl_chan->cfg.src_addr;
+ size = fsl_chan->cfg.src_maxburst;
+ break;
+ default:
+ dma_dir = DMA_NONE;
+ break;
+ }
+
+ /* Already mapped for this config? */
+ if (fsl_chan->dma_dir == dma_dir)
+ return true;
+
+ fsl_edma_unprep_slave_dma(fsl_chan);
+
+ fsl_chan->dma_dev_addr = dma_map_resource(dev, addr, size, dma_dir, 0);
+ if (dma_mapping_error(dev, fsl_chan->dma_dev_addr))
+ return false;
+ fsl_chan->dma_dev_size = size;
+ fsl_chan->dma_dir = dma_dir;
+
+ return true;
+}
+
+int fsl_edma_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *cfg)
+{
+ struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+
+ memcpy(&fsl_chan->cfg, cfg, sizeof(*cfg));
+ fsl_edma_unprep_slave_dma(fsl_chan);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fsl_edma_slave_config);
+
+static size_t fsl_edma_desc_residue(struct fsl_edma_chan *fsl_chan,
+ struct virt_dma_desc *vdesc, bool in_progress)
+{
+ struct fsl_edma_desc *edesc = fsl_chan->edesc;
+ struct edma_regs *regs = &fsl_chan->edma->regs;
+ u32 ch = fsl_chan->vchan.chan.chan_id;
+ enum dma_transfer_direction dir = edesc->dirn;
+ dma_addr_t cur_addr, dma_addr;
+ size_t len, size;
+ int i;
+
+ /* calculate the total size in this desc */
+ for (len = i = 0; i < fsl_chan->edesc->n_tcds; i++)
+ len += le32_to_cpu(edesc->tcd[i].vtcd->nbytes)
+ * le16_to_cpu(edesc->tcd[i].vtcd->biter);
+
+ if (!in_progress)
+ return len;
+
+ if (dir == DMA_MEM_TO_DEV)
+ cur_addr = edma_readl(fsl_chan->edma, &regs->tcd[ch].saddr);
+ else
+ cur_addr = edma_readl(fsl_chan->edma, &regs->tcd[ch].daddr);
+
+ /* figure out the finished and calculate the residue */
+ for (i = 0; i < fsl_chan->edesc->n_tcds; i++) {
+ size = le32_to_cpu(edesc->tcd[i].vtcd->nbytes)
+ * le16_to_cpu(edesc->tcd[i].vtcd->biter);
+ if (dir == DMA_MEM_TO_DEV)
+ dma_addr = le32_to_cpu(edesc->tcd[i].vtcd->saddr);
+ else
+ dma_addr = le32_to_cpu(edesc->tcd[i].vtcd->daddr);
+
+ len -= size;
+ if (cur_addr >= dma_addr && cur_addr < dma_addr + size) {
+ len += dma_addr + size - cur_addr;
+ break;
+ }
+ }
+
+ return len;
+}
+
+enum dma_status fsl_edma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+ struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+ struct virt_dma_desc *vdesc;
+ enum dma_status status;
+ unsigned long flags;
+
+ status = dma_cookie_status(chan, cookie, txstate);
+ if (status == DMA_COMPLETE)
+ return status;
+
+ if (!txstate)
+ return fsl_chan->status;
+
+ spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+ vdesc = vchan_find_desc(&fsl_chan->vchan, cookie);
+ if (fsl_chan->edesc && cookie == fsl_chan->edesc->vdesc.tx.cookie)
+ txstate->residue =
+ fsl_edma_desc_residue(fsl_chan, vdesc, true);
+ else if (vdesc)
+ txstate->residue =
+ fsl_edma_desc_residue(fsl_chan, vdesc, false);
+ else
+ txstate->residue = 0;
+
+ spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+
+ return fsl_chan->status;
+}
+EXPORT_SYMBOL_GPL(fsl_edma_tx_status);
+
+static void fsl_edma_set_tcd_regs(struct fsl_edma_chan *fsl_chan,
+ struct fsl_edma_hw_tcd *tcd)
+{
+ struct fsl_edma_engine *edma = fsl_chan->edma;
+ struct edma_regs *regs = &fsl_chan->edma->regs;
+ u32 ch = fsl_chan->vchan.chan.chan_id;
+ u16 csr = 0;
+
+ /*
+ * TCD parameters are stored in struct fsl_edma_hw_tcd in little
+ * endian format. However, we need to load the TCD registers in
+ * big- or little-endian obeying the eDMA engine model endian,
+ * and this is performed from specific edma_write functions
+ */
+ edma_writew(edma, 0, &regs->tcd[ch].csr);
+
+ edma_writel(edma, (s32)tcd->saddr, &regs->tcd[ch].saddr);
+ edma_writel(edma, (s32)tcd->daddr, &regs->tcd[ch].daddr);
+
+ edma_writew(edma, (s16)tcd->attr, &regs->tcd[ch].attr);
+ edma_writew(edma, tcd->soff, &regs->tcd[ch].soff);
+
+ edma_writel(edma, (s32)tcd->nbytes, &regs->tcd[ch].nbytes);
+ edma_writel(edma, (s32)tcd->slast, &regs->tcd[ch].slast);
+
+ edma_writew(edma, (s16)tcd->citer, &regs->tcd[ch].citer);
+ edma_writew(edma, (s16)tcd->biter, &regs->tcd[ch].biter);
+ edma_writew(edma, (s16)tcd->doff, &regs->tcd[ch].doff);
+
+ edma_writel(edma, (s32)tcd->dlast_sga,
+ &regs->tcd[ch].dlast_sga);
+
+ if (fsl_chan->is_sw) {
+ csr = le16_to_cpu(tcd->csr);
+ csr |= EDMA_TCD_CSR_START;
+ tcd->csr = cpu_to_le16(csr);
+ }
+
+ edma_writew(edma, (s16)tcd->csr, &regs->tcd[ch].csr);
+}
+
+static inline
+void fsl_edma_fill_tcd(struct fsl_edma_hw_tcd *tcd, u32 src, u32 dst,
+ u16 attr, u16 soff, u32 nbytes, u32 slast, u16 citer,
+ u16 biter, u16 doff, u32 dlast_sga, bool major_int,
+ bool disable_req, bool enable_sg)
+{
+ u16 csr = 0;
+
+ /*
+ * eDMA hardware SGs require the TCDs to be stored in little
+ * endian format irrespective of the register endian model.
+ * So we put the value in little endian in memory, waiting
+ * for fsl_edma_set_tcd_regs doing the swap.
+ */
+ tcd->saddr = cpu_to_le32(src);
+ tcd->daddr = cpu_to_le32(dst);
+
+ tcd->attr = cpu_to_le16(attr);
+
+ tcd->soff = cpu_to_le16(soff);
+
+ tcd->nbytes = cpu_to_le32(nbytes);
+ tcd->slast = cpu_to_le32(slast);
+
+ tcd->citer = cpu_to_le16(EDMA_TCD_CITER_CITER(citer));
+ tcd->doff = cpu_to_le16(doff);
+
+ tcd->dlast_sga = cpu_to_le32(dlast_sga);
+
+ tcd->biter = cpu_to_le16(EDMA_TCD_BITER_BITER(biter));
+ if (major_int)
+ csr |= EDMA_TCD_CSR_INT_MAJOR;
+
+ if (disable_req)
+ csr |= EDMA_TCD_CSR_D_REQ;
+
+ if (enable_sg)
+ csr |= EDMA_TCD_CSR_E_SG;
+
+ tcd->csr = cpu_to_le16(csr);
+}
+
+static struct fsl_edma_desc *fsl_edma_alloc_desc(struct fsl_edma_chan *fsl_chan,
+ int sg_len)
+{
+ struct fsl_edma_desc *fsl_desc;
+ int i;
+
+ fsl_desc = kzalloc(struct_size(fsl_desc, tcd, sg_len), GFP_NOWAIT);
+ if (!fsl_desc)
+ return NULL;
+
+ fsl_desc->echan = fsl_chan;
+ fsl_desc->n_tcds = sg_len;
+ for (i = 0; i < sg_len; i++) {
+ fsl_desc->tcd[i].vtcd = dma_pool_alloc(fsl_chan->tcd_pool,
+ GFP_NOWAIT, &fsl_desc->tcd[i].ptcd);
+ if (!fsl_desc->tcd[i].vtcd)
+ goto err;
+ }
+ return fsl_desc;
+
+err:
+ while (--i >= 0)
+ dma_pool_free(fsl_chan->tcd_pool, fsl_desc->tcd[i].vtcd,
+ fsl_desc->tcd[i].ptcd);
+ kfree(fsl_desc);
+ return NULL;
+}
+
+struct dma_async_tx_descriptor *fsl_edma_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+ struct fsl_edma_desc *fsl_desc;
+ dma_addr_t dma_buf_next;
+ int sg_len, i;
+ u32 src_addr, dst_addr, last_sg, nbytes;
+ u16 soff, doff, iter;
+
+ if (!is_slave_direction(direction))
+ return NULL;
+
+ if (!fsl_edma_prep_slave_dma(fsl_chan, direction))
+ return NULL;
+
+ sg_len = buf_len / period_len;
+ fsl_desc = fsl_edma_alloc_desc(fsl_chan, sg_len);
+ if (!fsl_desc)
+ return NULL;
+ fsl_desc->iscyclic = true;
+ fsl_desc->dirn = direction;
+
+ dma_buf_next = dma_addr;
+ if (direction == DMA_MEM_TO_DEV) {
+ fsl_chan->attr =
+ fsl_edma_get_tcd_attr(fsl_chan->cfg.dst_addr_width);
+ nbytes = fsl_chan->cfg.dst_addr_width *
+ fsl_chan->cfg.dst_maxburst;
+ } else {
+ fsl_chan->attr =
+ fsl_edma_get_tcd_attr(fsl_chan->cfg.src_addr_width);
+ nbytes = fsl_chan->cfg.src_addr_width *
+ fsl_chan->cfg.src_maxburst;
+ }
+
+ iter = period_len / nbytes;
+
+ for (i = 0; i < sg_len; i++) {
+ if (dma_buf_next >= dma_addr + buf_len)
+ dma_buf_next = dma_addr;
+
+ /* get next sg's physical address */
+ last_sg = fsl_desc->tcd[(i + 1) % sg_len].ptcd;
+
+ if (direction == DMA_MEM_TO_DEV) {
+ src_addr = dma_buf_next;
+ dst_addr = fsl_chan->dma_dev_addr;
+ soff = fsl_chan->cfg.dst_addr_width;
+ doff = 0;
+ } else {
+ src_addr = fsl_chan->dma_dev_addr;
+ dst_addr = dma_buf_next;
+ soff = 0;
+ doff = fsl_chan->cfg.src_addr_width;
+ }
+
+ fsl_edma_fill_tcd(fsl_desc->tcd[i].vtcd, src_addr, dst_addr,
+ fsl_chan->attr, soff, nbytes, 0, iter,
+ iter, doff, last_sg, true, false, true);
+ dma_buf_next += period_len;
+ }
+
+ return vchan_tx_prep(&fsl_chan->vchan, &fsl_desc->vdesc, flags);
+}
+EXPORT_SYMBOL_GPL(fsl_edma_prep_dma_cyclic);
+
+struct dma_async_tx_descriptor *fsl_edma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+ struct fsl_edma_desc *fsl_desc;
+ struct scatterlist *sg;
+ u32 src_addr, dst_addr, last_sg, nbytes;
+ u16 soff, doff, iter;
+ int i;
+
+ if (!is_slave_direction(direction))
+ return NULL;
+
+ if (!fsl_edma_prep_slave_dma(fsl_chan, direction))
+ return NULL;
+
+ fsl_desc = fsl_edma_alloc_desc(fsl_chan, sg_len);
+ if (!fsl_desc)
+ return NULL;
+ fsl_desc->iscyclic = false;
+ fsl_desc->dirn = direction;
+
+ if (direction == DMA_MEM_TO_DEV) {
+ fsl_chan->attr =
+ fsl_edma_get_tcd_attr(fsl_chan->cfg.dst_addr_width);
+ nbytes = fsl_chan->cfg.dst_addr_width *
+ fsl_chan->cfg.dst_maxburst;
+ } else {
+ fsl_chan->attr =
+ fsl_edma_get_tcd_attr(fsl_chan->cfg.src_addr_width);
+ nbytes = fsl_chan->cfg.src_addr_width *
+ fsl_chan->cfg.src_maxburst;
+ }
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ if (direction == DMA_MEM_TO_DEV) {
+ src_addr = sg_dma_address(sg);
+ dst_addr = fsl_chan->dma_dev_addr;
+ soff = fsl_chan->cfg.dst_addr_width;
+ doff = 0;
+ } else {
+ src_addr = fsl_chan->dma_dev_addr;
+ dst_addr = sg_dma_address(sg);
+ soff = 0;
+ doff = fsl_chan->cfg.src_addr_width;
+ }
+
+ iter = sg_dma_len(sg) / nbytes;
+ if (i < sg_len - 1) {
+ last_sg = fsl_desc->tcd[(i + 1)].ptcd;
+ fsl_edma_fill_tcd(fsl_desc->tcd[i].vtcd, src_addr,
+ dst_addr, fsl_chan->attr, soff,
+ nbytes, 0, iter, iter, doff, last_sg,
+ false, false, true);
+ } else {
+ last_sg = 0;
+ fsl_edma_fill_tcd(fsl_desc->tcd[i].vtcd, src_addr,
+ dst_addr, fsl_chan->attr, soff,
+ nbytes, 0, iter, iter, doff, last_sg,
+ true, true, false);
+ }
+ }
+
+ return vchan_tx_prep(&fsl_chan->vchan, &fsl_desc->vdesc, flags);
+}
+EXPORT_SYMBOL_GPL(fsl_edma_prep_slave_sg);
+
+struct dma_async_tx_descriptor *fsl_edma_prep_memcpy(struct dma_chan *chan,
+ dma_addr_t dma_dst, dma_addr_t dma_src,
+ size_t len, unsigned long flags)
+{
+ struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+ struct fsl_edma_desc *fsl_desc;
+
+ fsl_desc = fsl_edma_alloc_desc(fsl_chan, 1);
+ if (!fsl_desc)
+ return NULL;
+ fsl_desc->iscyclic = false;
+
+ fsl_chan->is_sw = true;
+
+ /* To match with copy_align and max_seg_size so 1 tcd is enough */
+ fsl_edma_fill_tcd(fsl_desc->tcd[0].vtcd, dma_src, dma_dst,
+ EDMA_TCD_ATTR_SSIZE_32BYTE | EDMA_TCD_ATTR_DSIZE_32BYTE,
+ 32, len, 0, 1, 1, 32, 0, true, true, false);
+
+ return vchan_tx_prep(&fsl_chan->vchan, &fsl_desc->vdesc, flags);
+}
+EXPORT_SYMBOL_GPL(fsl_edma_prep_memcpy);
+
+void fsl_edma_xfer_desc(struct fsl_edma_chan *fsl_chan)
+{
+ struct virt_dma_desc *vdesc;
+
+ lockdep_assert_held(&fsl_chan->vchan.lock);
+
+ vdesc = vchan_next_desc(&fsl_chan->vchan);
+ if (!vdesc)
+ return;
+ fsl_chan->edesc = to_fsl_edma_desc(vdesc);
+ fsl_edma_set_tcd_regs(fsl_chan, fsl_chan->edesc->tcd[0].vtcd);
+ fsl_edma_enable_request(fsl_chan);
+ fsl_chan->status = DMA_IN_PROGRESS;
+ fsl_chan->idle = false;
+}
+EXPORT_SYMBOL_GPL(fsl_edma_xfer_desc);
+
+void fsl_edma_issue_pending(struct dma_chan *chan)
+{
+ struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+
+ if (unlikely(fsl_chan->pm_state != RUNNING)) {
+ spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+ /* cannot submit due to suspend */
+ return;
+ }
+
+ if (vchan_issue_pending(&fsl_chan->vchan) && !fsl_chan->edesc)
+ fsl_edma_xfer_desc(fsl_chan);
+
+ spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+}
+EXPORT_SYMBOL_GPL(fsl_edma_issue_pending);
+
+int fsl_edma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+
+ fsl_chan->tcd_pool = dma_pool_create("tcd_pool", chan->device->dev,
+ sizeof(struct fsl_edma_hw_tcd),
+ 32, 0);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fsl_edma_alloc_chan_resources);
+
+void fsl_edma_free_chan_resources(struct dma_chan *chan)
+{
+ struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+ struct fsl_edma_engine *edma = fsl_chan->edma;
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+ fsl_edma_disable_request(fsl_chan);
+ if (edma->drvdata->dmamuxs)
+ fsl_edma_chan_mux(fsl_chan, 0, false);
+ fsl_chan->edesc = NULL;
+ vchan_get_all_descriptors(&fsl_chan->vchan, &head);
+ fsl_edma_unprep_slave_dma(fsl_chan);
+ spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+
+ vchan_dma_desc_free_list(&fsl_chan->vchan, &head);
+ dma_pool_destroy(fsl_chan->tcd_pool);
+ fsl_chan->tcd_pool = NULL;
+ fsl_chan->is_sw = false;
+}
+EXPORT_SYMBOL_GPL(fsl_edma_free_chan_resources);
+
+void fsl_edma_cleanup_vchan(struct dma_device *dmadev)
+{
+ struct fsl_edma_chan *chan, *_chan;
+
+ list_for_each_entry_safe(chan, _chan,
+ &dmadev->channels, vchan.chan.device_node) {
+ list_del(&chan->vchan.chan.device_node);
+ tasklet_kill(&chan->vchan.task);
+ }
+}
+EXPORT_SYMBOL_GPL(fsl_edma_cleanup_vchan);
+
+/*
+ * On the 32 channels Vybrid/mpc577x edma version (here called "v1"),
+ * register offsets are different compared to ColdFire mcf5441x 64 channels
+ * edma (here called "v2").
+ *
+ * This function sets up register offsets as per proper declared version
+ * so must be called in xxx_edma_probe() just after setting the
+ * edma "version" and "membase" appropriately.
+ */
+void fsl_edma_setup_regs(struct fsl_edma_engine *edma)
+{
+ edma->regs.cr = edma->membase + EDMA_CR;
+ edma->regs.es = edma->membase + EDMA_ES;
+ edma->regs.erql = edma->membase + EDMA_ERQ;
+ edma->regs.eeil = edma->membase + EDMA_EEI;
+
+ edma->regs.serq = edma->membase + ((edma->drvdata->version == v2) ?
+ EDMA64_SERQ : EDMA_SERQ);
+ edma->regs.cerq = edma->membase + ((edma->drvdata->version == v2) ?
+ EDMA64_CERQ : EDMA_CERQ);
+ edma->regs.seei = edma->membase + ((edma->drvdata->version == v2) ?
+ EDMA64_SEEI : EDMA_SEEI);
+ edma->regs.ceei = edma->membase + ((edma->drvdata->version == v2) ?
+ EDMA64_CEEI : EDMA_CEEI);
+ edma->regs.cint = edma->membase + ((edma->drvdata->version == v2) ?
+ EDMA64_CINT : EDMA_CINT);
+ edma->regs.cerr = edma->membase + ((edma->drvdata->version == v2) ?
+ EDMA64_CERR : EDMA_CERR);
+ edma->regs.ssrt = edma->membase + ((edma->drvdata->version == v2) ?
+ EDMA64_SSRT : EDMA_SSRT);
+ edma->regs.cdne = edma->membase + ((edma->drvdata->version == v2) ?
+ EDMA64_CDNE : EDMA_CDNE);
+ edma->regs.intl = edma->membase + ((edma->drvdata->version == v2) ?
+ EDMA64_INTL : EDMA_INTR);
+ edma->regs.errl = edma->membase + ((edma->drvdata->version == v2) ?
+ EDMA64_ERRL : EDMA_ERR);
+
+ if (edma->drvdata->version == v2) {
+ edma->regs.erqh = edma->membase + EDMA64_ERQH;
+ edma->regs.eeih = edma->membase + EDMA64_EEIH;
+ edma->regs.errh = edma->membase + EDMA64_ERRH;
+ edma->regs.inth = edma->membase + EDMA64_INTH;
+ }
+
+ edma->regs.tcd = edma->membase + EDMA_TCD;
+}
+EXPORT_SYMBOL_GPL(fsl_edma_setup_regs);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h
new file mode 100644
index 000000000..004ec4a6b
--- /dev/null
+++ b/drivers/dma/fsl-edma-common.h
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright 2013-2014 Freescale Semiconductor, Inc.
+ * Copyright 2018 Angelo Dureghello <angelo@sysam.it>
+ */
+#ifndef _FSL_EDMA_COMMON_H_
+#define _FSL_EDMA_COMMON_H_
+
+#include <linux/dma-direction.h>
+#include <linux/platform_device.h>
+#include "virt-dma.h"
+
+#define EDMA_CR_EDBG BIT(1)
+#define EDMA_CR_ERCA BIT(2)
+#define EDMA_CR_ERGA BIT(3)
+#define EDMA_CR_HOE BIT(4)
+#define EDMA_CR_HALT BIT(5)
+#define EDMA_CR_CLM BIT(6)
+#define EDMA_CR_EMLM BIT(7)
+#define EDMA_CR_ECX BIT(16)
+#define EDMA_CR_CX BIT(17)
+
+#define EDMA_SEEI_SEEI(x) ((x) & GENMASK(4, 0))
+#define EDMA_CEEI_CEEI(x) ((x) & GENMASK(4, 0))
+#define EDMA_CINT_CINT(x) ((x) & GENMASK(4, 0))
+#define EDMA_CERR_CERR(x) ((x) & GENMASK(4, 0))
+
+#define EDMA_TCD_ATTR_DSIZE(x) (((x) & GENMASK(2, 0)))
+#define EDMA_TCD_ATTR_DMOD(x) (((x) & GENMASK(4, 0)) << 3)
+#define EDMA_TCD_ATTR_SSIZE(x) (((x) & GENMASK(2, 0)) << 8)
+#define EDMA_TCD_ATTR_SMOD(x) (((x) & GENMASK(4, 0)) << 11)
+#define EDMA_TCD_ATTR_DSIZE_8BIT 0
+#define EDMA_TCD_ATTR_DSIZE_16BIT BIT(0)
+#define EDMA_TCD_ATTR_DSIZE_32BIT BIT(1)
+#define EDMA_TCD_ATTR_DSIZE_64BIT (BIT(0) | BIT(1))
+#define EDMA_TCD_ATTR_DSIZE_32BYTE (BIT(2) | BIT(0))
+#define EDMA_TCD_ATTR_SSIZE_8BIT 0
+#define EDMA_TCD_ATTR_SSIZE_16BIT (EDMA_TCD_ATTR_DSIZE_16BIT << 8)
+#define EDMA_TCD_ATTR_SSIZE_32BIT (EDMA_TCD_ATTR_DSIZE_32BIT << 8)
+#define EDMA_TCD_ATTR_SSIZE_64BIT (EDMA_TCD_ATTR_DSIZE_64BIT << 8)
+#define EDMA_TCD_ATTR_SSIZE_32BYTE (EDMA_TCD_ATTR_DSIZE_32BYTE << 8)
+
+#define EDMA_TCD_CITER_CITER(x) ((x) & GENMASK(14, 0))
+#define EDMA_TCD_BITER_BITER(x) ((x) & GENMASK(14, 0))
+
+#define EDMA_TCD_CSR_START BIT(0)
+#define EDMA_TCD_CSR_INT_MAJOR BIT(1)
+#define EDMA_TCD_CSR_INT_HALF BIT(2)
+#define EDMA_TCD_CSR_D_REQ BIT(3)
+#define EDMA_TCD_CSR_E_SG BIT(4)
+#define EDMA_TCD_CSR_E_LINK BIT(5)
+#define EDMA_TCD_CSR_ACTIVE BIT(6)
+#define EDMA_TCD_CSR_DONE BIT(7)
+
+#define EDMAMUX_CHCFG_DIS 0x0
+#define EDMAMUX_CHCFG_ENBL 0x80
+#define EDMAMUX_CHCFG_SOURCE(n) ((n) & 0x3F)
+
+#define DMAMUX_NR 2
+
+#define FSL_EDMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+enum fsl_edma_pm_state {
+ RUNNING = 0,
+ SUSPENDED,
+};
+
+struct fsl_edma_hw_tcd {
+ __le32 saddr;
+ __le16 soff;
+ __le16 attr;
+ __le32 nbytes;
+ __le32 slast;
+ __le32 daddr;
+ __le16 doff;
+ __le16 citer;
+ __le32 dlast_sga;
+ __le16 csr;
+ __le16 biter;
+};
+
+/*
+ * These are iomem pointers, for both v32 and v64.
+ */
+struct edma_regs {
+ void __iomem *cr;
+ void __iomem *es;
+ void __iomem *erqh;
+ void __iomem *erql; /* aka erq on v32 */
+ void __iomem *eeih;
+ void __iomem *eeil; /* aka eei on v32 */
+ void __iomem *seei;
+ void __iomem *ceei;
+ void __iomem *serq;
+ void __iomem *cerq;
+ void __iomem *cint;
+ void __iomem *cerr;
+ void __iomem *ssrt;
+ void __iomem *cdne;
+ void __iomem *inth;
+ void __iomem *intl;
+ void __iomem *errh;
+ void __iomem *errl;
+ struct fsl_edma_hw_tcd __iomem *tcd;
+};
+
+struct fsl_edma_sw_tcd {
+ dma_addr_t ptcd;
+ struct fsl_edma_hw_tcd *vtcd;
+};
+
+struct fsl_edma_chan {
+ struct virt_dma_chan vchan;
+ enum dma_status status;
+ enum fsl_edma_pm_state pm_state;
+ bool idle;
+ u32 slave_id;
+ struct fsl_edma_engine *edma;
+ struct fsl_edma_desc *edesc;
+ struct dma_slave_config cfg;
+ u32 attr;
+ bool is_sw;
+ struct dma_pool *tcd_pool;
+ dma_addr_t dma_dev_addr;
+ u32 dma_dev_size;
+ enum dma_data_direction dma_dir;
+ char chan_name[16];
+};
+
+struct fsl_edma_desc {
+ struct virt_dma_desc vdesc;
+ struct fsl_edma_chan *echan;
+ bool iscyclic;
+ enum dma_transfer_direction dirn;
+ unsigned int n_tcds;
+ struct fsl_edma_sw_tcd tcd[];
+};
+
+enum edma_version {
+ v1, /* 32ch, Vybrid, mpc57x, etc */
+ v2, /* 64ch Coldfire */
+ v3, /* 32ch, i.mx7ulp */
+};
+
+struct fsl_edma_drvdata {
+ enum edma_version version;
+ u32 dmamuxs;
+ bool has_dmaclk;
+ bool mux_swap;
+ int (*setup_irq)(struct platform_device *pdev,
+ struct fsl_edma_engine *fsl_edma);
+};
+
+struct fsl_edma_engine {
+ struct dma_device dma_dev;
+ void __iomem *membase;
+ void __iomem *muxbase[DMAMUX_NR];
+ struct clk *muxclk[DMAMUX_NR];
+ struct clk *dmaclk;
+ struct mutex fsl_edma_mutex;
+ const struct fsl_edma_drvdata *drvdata;
+ u32 n_chans;
+ int txirq;
+ int errirq;
+ bool big_endian;
+ struct edma_regs regs;
+ struct fsl_edma_chan chans[];
+};
+
+/*
+ * R/W functions for big- or little-endian registers:
+ * The eDMA controller's endian is independent of the CPU core's endian.
+ * For the big-endian IP module, the offset for 8-bit or 16-bit registers
+ * should also be swapped opposite to that in little-endian IP.
+ */
+static inline u32 edma_readl(struct fsl_edma_engine *edma, void __iomem *addr)
+{
+ if (edma->big_endian)
+ return ioread32be(addr);
+ else
+ return ioread32(addr);
+}
+
+static inline void edma_writeb(struct fsl_edma_engine *edma,
+ u8 val, void __iomem *addr)
+{
+ /* swap the reg offset for these in big-endian mode */
+ if (edma->big_endian)
+ iowrite8(val, (void __iomem *)((unsigned long)addr ^ 0x3));
+ else
+ iowrite8(val, addr);
+}
+
+static inline void edma_writew(struct fsl_edma_engine *edma,
+ u16 val, void __iomem *addr)
+{
+ /* swap the reg offset for these in big-endian mode */
+ if (edma->big_endian)
+ iowrite16be(val, (void __iomem *)((unsigned long)addr ^ 0x2));
+ else
+ iowrite16(val, addr);
+}
+
+static inline void edma_writel(struct fsl_edma_engine *edma,
+ u32 val, void __iomem *addr)
+{
+ if (edma->big_endian)
+ iowrite32be(val, addr);
+ else
+ iowrite32(val, addr);
+}
+
+static inline struct fsl_edma_chan *to_fsl_edma_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct fsl_edma_chan, vchan.chan);
+}
+
+static inline struct fsl_edma_desc *to_fsl_edma_desc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct fsl_edma_desc, vdesc);
+}
+
+void fsl_edma_disable_request(struct fsl_edma_chan *fsl_chan);
+void fsl_edma_chan_mux(struct fsl_edma_chan *fsl_chan,
+ unsigned int slot, bool enable);
+void fsl_edma_free_desc(struct virt_dma_desc *vdesc);
+int fsl_edma_terminate_all(struct dma_chan *chan);
+int fsl_edma_pause(struct dma_chan *chan);
+int fsl_edma_resume(struct dma_chan *chan);
+int fsl_edma_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *cfg);
+enum dma_status fsl_edma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *txstate);
+struct dma_async_tx_descriptor *fsl_edma_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags);
+struct dma_async_tx_descriptor *fsl_edma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context);
+struct dma_async_tx_descriptor *fsl_edma_prep_memcpy(
+ struct dma_chan *chan, dma_addr_t dma_dst, dma_addr_t dma_src,
+ size_t len, unsigned long flags);
+void fsl_edma_xfer_desc(struct fsl_edma_chan *fsl_chan);
+void fsl_edma_issue_pending(struct dma_chan *chan);
+int fsl_edma_alloc_chan_resources(struct dma_chan *chan);
+void fsl_edma_free_chan_resources(struct dma_chan *chan);
+void fsl_edma_cleanup_vchan(struct dma_device *dmadev);
+void fsl_edma_setup_regs(struct fsl_edma_engine *edma);
+
+#endif /* _FSL_EDMA_COMMON_H_ */
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c
new file mode 100644
index 000000000..76cbf54ae
--- /dev/null
+++ b/drivers/dma/fsl-edma.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * drivers/dma/fsl-edma.c
+ *
+ * Copyright 2013-2014 Freescale Semiconductor, Inc.
+ *
+ * Driver for the Freescale eDMA engine with flexible channel multiplexing
+ * capability for DMA request sources. The eDMA block can be found on some
+ * Vybrid and Layerscape SoCs.
+ */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_dma.h>
+#include <linux/dma-mapping.h>
+
+#include "fsl-edma-common.h"
+
+static void fsl_edma_synchronize(struct dma_chan *chan)
+{
+ struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+
+ vchan_synchronize(&fsl_chan->vchan);
+}
+
+static irqreturn_t fsl_edma_tx_handler(int irq, void *dev_id)
+{
+ struct fsl_edma_engine *fsl_edma = dev_id;
+ unsigned int intr, ch;
+ struct edma_regs *regs = &fsl_edma->regs;
+ struct fsl_edma_chan *fsl_chan;
+
+ intr = edma_readl(fsl_edma, regs->intl);
+ if (!intr)
+ return IRQ_NONE;
+
+ for (ch = 0; ch < fsl_edma->n_chans; ch++) {
+ if (intr & (0x1 << ch)) {
+ edma_writeb(fsl_edma, EDMA_CINT_CINT(ch), regs->cint);
+
+ fsl_chan = &fsl_edma->chans[ch];
+
+ spin_lock(&fsl_chan->vchan.lock);
+
+ if (!fsl_chan->edesc) {
+ /* terminate_all called before */
+ spin_unlock(&fsl_chan->vchan.lock);
+ continue;
+ }
+
+ if (!fsl_chan->edesc->iscyclic) {
+ list_del(&fsl_chan->edesc->vdesc.node);
+ vchan_cookie_complete(&fsl_chan->edesc->vdesc);
+ fsl_chan->edesc = NULL;
+ fsl_chan->status = DMA_COMPLETE;
+ fsl_chan->idle = true;
+ } else {
+ vchan_cyclic_callback(&fsl_chan->edesc->vdesc);
+ }
+
+ if (!fsl_chan->edesc)
+ fsl_edma_xfer_desc(fsl_chan);
+
+ spin_unlock(&fsl_chan->vchan.lock);
+ }
+ }
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t fsl_edma_err_handler(int irq, void *dev_id)
+{
+ struct fsl_edma_engine *fsl_edma = dev_id;
+ unsigned int err, ch;
+ struct edma_regs *regs = &fsl_edma->regs;
+
+ err = edma_readl(fsl_edma, regs->errl);
+ if (!err)
+ return IRQ_NONE;
+
+ for (ch = 0; ch < fsl_edma->n_chans; ch++) {
+ if (err & (0x1 << ch)) {
+ fsl_edma_disable_request(&fsl_edma->chans[ch]);
+ edma_writeb(fsl_edma, EDMA_CERR_CERR(ch), regs->cerr);
+ fsl_edma->chans[ch].status = DMA_ERROR;
+ fsl_edma->chans[ch].idle = true;
+ }
+ }
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t fsl_edma_irq_handler(int irq, void *dev_id)
+{
+ if (fsl_edma_tx_handler(irq, dev_id) == IRQ_HANDLED)
+ return IRQ_HANDLED;
+
+ return fsl_edma_err_handler(irq, dev_id);
+}
+
+static struct dma_chan *fsl_edma_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct fsl_edma_engine *fsl_edma = ofdma->of_dma_data;
+ struct dma_chan *chan, *_chan;
+ struct fsl_edma_chan *fsl_chan;
+ u32 dmamux_nr = fsl_edma->drvdata->dmamuxs;
+ unsigned long chans_per_mux = fsl_edma->n_chans / dmamux_nr;
+
+ if (dma_spec->args_count != 2)
+ return NULL;
+
+ mutex_lock(&fsl_edma->fsl_edma_mutex);
+ list_for_each_entry_safe(chan, _chan, &fsl_edma->dma_dev.channels, device_node) {
+ if (chan->client_count)
+ continue;
+ if ((chan->chan_id / chans_per_mux) == dma_spec->args[0]) {
+ chan = dma_get_slave_channel(chan);
+ if (chan) {
+ chan->device->privatecnt++;
+ fsl_chan = to_fsl_edma_chan(chan);
+ fsl_chan->slave_id = dma_spec->args[1];
+ fsl_edma_chan_mux(fsl_chan, fsl_chan->slave_id,
+ true);
+ mutex_unlock(&fsl_edma->fsl_edma_mutex);
+ return chan;
+ }
+ }
+ }
+ mutex_unlock(&fsl_edma->fsl_edma_mutex);
+ return NULL;
+}
+
+static int
+fsl_edma_irq_init(struct platform_device *pdev, struct fsl_edma_engine *fsl_edma)
+{
+ int ret;
+
+ fsl_edma->txirq = platform_get_irq_byname(pdev, "edma-tx");
+ if (fsl_edma->txirq < 0)
+ return fsl_edma->txirq;
+
+ fsl_edma->errirq = platform_get_irq_byname(pdev, "edma-err");
+ if (fsl_edma->errirq < 0)
+ return fsl_edma->errirq;
+
+ if (fsl_edma->txirq == fsl_edma->errirq) {
+ ret = devm_request_irq(&pdev->dev, fsl_edma->txirq,
+ fsl_edma_irq_handler, 0, "eDMA", fsl_edma);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't register eDMA IRQ.\n");
+ return ret;
+ }
+ } else {
+ ret = devm_request_irq(&pdev->dev, fsl_edma->txirq,
+ fsl_edma_tx_handler, 0, "eDMA tx", fsl_edma);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't register eDMA tx IRQ.\n");
+ return ret;
+ }
+
+ ret = devm_request_irq(&pdev->dev, fsl_edma->errirq,
+ fsl_edma_err_handler, 0, "eDMA err", fsl_edma);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't register eDMA err IRQ.\n");
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int
+fsl_edma2_irq_init(struct platform_device *pdev,
+ struct fsl_edma_engine *fsl_edma)
+{
+ int i, ret, irq;
+ int count;
+
+ count = platform_irq_count(pdev);
+ dev_dbg(&pdev->dev, "%s Found %d interrupts\r\n", __func__, count);
+ if (count <= 2) {
+ dev_err(&pdev->dev, "Interrupts in DTS not correct.\n");
+ return -EINVAL;
+ }
+ /*
+ * 16 channel independent interrupts + 1 error interrupt on i.mx7ulp.
+ * 2 channel share one interrupt, for example, ch0/ch16, ch1/ch17...
+ * For now, just simply request irq without IRQF_SHARED flag, since 16
+ * channels are enough on i.mx7ulp whose M4 domain own some peripherals.
+ */
+ for (i = 0; i < count; i++) {
+ irq = platform_get_irq(pdev, i);
+ if (irq < 0)
+ return -ENXIO;
+
+ sprintf(fsl_edma->chans[i].chan_name, "eDMA2-CH%02d", i);
+
+ /* The last IRQ is for eDMA err */
+ if (i == count - 1)
+ ret = devm_request_irq(&pdev->dev, irq,
+ fsl_edma_err_handler,
+ 0, "eDMA2-ERR", fsl_edma);
+ else
+ ret = devm_request_irq(&pdev->dev, irq,
+ fsl_edma_tx_handler, 0,
+ fsl_edma->chans[i].chan_name,
+ fsl_edma);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void fsl_edma_irq_exit(
+ struct platform_device *pdev, struct fsl_edma_engine *fsl_edma)
+{
+ if (fsl_edma->txirq == fsl_edma->errirq) {
+ devm_free_irq(&pdev->dev, fsl_edma->txirq, fsl_edma);
+ } else {
+ devm_free_irq(&pdev->dev, fsl_edma->txirq, fsl_edma);
+ devm_free_irq(&pdev->dev, fsl_edma->errirq, fsl_edma);
+ }
+}
+
+static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma, int nr_clocks)
+{
+ int i;
+
+ for (i = 0; i < nr_clocks; i++)
+ clk_disable_unprepare(fsl_edma->muxclk[i]);
+}
+
+static struct fsl_edma_drvdata vf610_data = {
+ .version = v1,
+ .dmamuxs = DMAMUX_NR,
+ .setup_irq = fsl_edma_irq_init,
+};
+
+static struct fsl_edma_drvdata ls1028a_data = {
+ .version = v1,
+ .dmamuxs = DMAMUX_NR,
+ .mux_swap = true,
+ .setup_irq = fsl_edma_irq_init,
+};
+
+static struct fsl_edma_drvdata imx7ulp_data = {
+ .version = v3,
+ .dmamuxs = 1,
+ .has_dmaclk = true,
+ .setup_irq = fsl_edma2_irq_init,
+};
+
+static const struct of_device_id fsl_edma_dt_ids[] = {
+ { .compatible = "fsl,vf610-edma", .data = &vf610_data},
+ { .compatible = "fsl,ls1028a-edma", .data = &ls1028a_data},
+ { .compatible = "fsl,imx7ulp-edma", .data = &imx7ulp_data},
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, fsl_edma_dt_ids);
+
+static int fsl_edma_probe(struct platform_device *pdev)
+{
+ const struct of_device_id *of_id =
+ of_match_device(fsl_edma_dt_ids, &pdev->dev);
+ struct device_node *np = pdev->dev.of_node;
+ struct fsl_edma_engine *fsl_edma;
+ const struct fsl_edma_drvdata *drvdata = NULL;
+ struct fsl_edma_chan *fsl_chan;
+ struct edma_regs *regs;
+ struct resource *res;
+ int len, chans;
+ int ret, i;
+
+ if (of_id)
+ drvdata = of_id->data;
+ if (!drvdata) {
+ dev_err(&pdev->dev, "unable to find driver data\n");
+ return -EINVAL;
+ }
+
+ ret = of_property_read_u32(np, "dma-channels", &chans);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't get dma-channels.\n");
+ return ret;
+ }
+
+ len = sizeof(*fsl_edma) + sizeof(*fsl_chan) * chans;
+ fsl_edma = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+ if (!fsl_edma)
+ return -ENOMEM;
+
+ fsl_edma->drvdata = drvdata;
+ fsl_edma->n_chans = chans;
+ mutex_init(&fsl_edma->fsl_edma_mutex);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ fsl_edma->membase = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(fsl_edma->membase))
+ return PTR_ERR(fsl_edma->membase);
+
+ fsl_edma_setup_regs(fsl_edma);
+ regs = &fsl_edma->regs;
+
+ if (drvdata->has_dmaclk) {
+ fsl_edma->dmaclk = devm_clk_get(&pdev->dev, "dma");
+ if (IS_ERR(fsl_edma->dmaclk)) {
+ dev_err(&pdev->dev, "Missing DMA block clock.\n");
+ return PTR_ERR(fsl_edma->dmaclk);
+ }
+
+ ret = clk_prepare_enable(fsl_edma->dmaclk);
+ if (ret) {
+ dev_err(&pdev->dev, "DMA clk block failed.\n");
+ return ret;
+ }
+ }
+
+ for (i = 0; i < fsl_edma->drvdata->dmamuxs; i++) {
+ char clkname[32];
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1 + i);
+ fsl_edma->muxbase[i] = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(fsl_edma->muxbase[i])) {
+ /* on error: disable all previously enabled clks */
+ fsl_disable_clocks(fsl_edma, i);
+ return PTR_ERR(fsl_edma->muxbase[i]);
+ }
+
+ sprintf(clkname, "dmamux%d", i);
+ fsl_edma->muxclk[i] = devm_clk_get(&pdev->dev, clkname);
+ if (IS_ERR(fsl_edma->muxclk[i])) {
+ dev_err(&pdev->dev, "Missing DMAMUX block clock.\n");
+ /* on error: disable all previously enabled clks */
+ fsl_disable_clocks(fsl_edma, i);
+ return PTR_ERR(fsl_edma->muxclk[i]);
+ }
+
+ ret = clk_prepare_enable(fsl_edma->muxclk[i]);
+ if (ret)
+ /* on error: disable all previously enabled clks */
+ fsl_disable_clocks(fsl_edma, i);
+
+ }
+
+ fsl_edma->big_endian = of_property_read_bool(np, "big-endian");
+
+ INIT_LIST_HEAD(&fsl_edma->dma_dev.channels);
+ for (i = 0; i < fsl_edma->n_chans; i++) {
+ struct fsl_edma_chan *fsl_chan = &fsl_edma->chans[i];
+
+ fsl_chan->edma = fsl_edma;
+ fsl_chan->pm_state = RUNNING;
+ fsl_chan->slave_id = 0;
+ fsl_chan->idle = true;
+ fsl_chan->dma_dir = DMA_NONE;
+ fsl_chan->vchan.desc_free = fsl_edma_free_desc;
+ vchan_init(&fsl_chan->vchan, &fsl_edma->dma_dev);
+
+ edma_writew(fsl_edma, 0x0, &regs->tcd[i].csr);
+ fsl_edma_chan_mux(fsl_chan, 0, false);
+ }
+
+ edma_writel(fsl_edma, ~0, regs->intl);
+ ret = fsl_edma->drvdata->setup_irq(pdev, fsl_edma);
+ if (ret)
+ return ret;
+
+ dma_cap_set(DMA_PRIVATE, fsl_edma->dma_dev.cap_mask);
+ dma_cap_set(DMA_SLAVE, fsl_edma->dma_dev.cap_mask);
+ dma_cap_set(DMA_CYCLIC, fsl_edma->dma_dev.cap_mask);
+ dma_cap_set(DMA_MEMCPY, fsl_edma->dma_dev.cap_mask);
+
+ fsl_edma->dma_dev.dev = &pdev->dev;
+ fsl_edma->dma_dev.device_alloc_chan_resources
+ = fsl_edma_alloc_chan_resources;
+ fsl_edma->dma_dev.device_free_chan_resources
+ = fsl_edma_free_chan_resources;
+ fsl_edma->dma_dev.device_tx_status = fsl_edma_tx_status;
+ fsl_edma->dma_dev.device_prep_slave_sg = fsl_edma_prep_slave_sg;
+ fsl_edma->dma_dev.device_prep_dma_cyclic = fsl_edma_prep_dma_cyclic;
+ fsl_edma->dma_dev.device_prep_dma_memcpy = fsl_edma_prep_memcpy;
+ fsl_edma->dma_dev.device_config = fsl_edma_slave_config;
+ fsl_edma->dma_dev.device_pause = fsl_edma_pause;
+ fsl_edma->dma_dev.device_resume = fsl_edma_resume;
+ fsl_edma->dma_dev.device_terminate_all = fsl_edma_terminate_all;
+ fsl_edma->dma_dev.device_synchronize = fsl_edma_synchronize;
+ fsl_edma->dma_dev.device_issue_pending = fsl_edma_issue_pending;
+
+ fsl_edma->dma_dev.src_addr_widths = FSL_EDMA_BUSWIDTHS;
+ fsl_edma->dma_dev.dst_addr_widths = FSL_EDMA_BUSWIDTHS;
+ fsl_edma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+
+ fsl_edma->dma_dev.copy_align = DMAENGINE_ALIGN_32_BYTES;
+ /* Per worst case 'nbytes = 1' take CITER as the max_seg_size */
+ dma_set_max_seg_size(fsl_edma->dma_dev.dev, 0x3fff);
+
+ platform_set_drvdata(pdev, fsl_edma);
+
+ ret = dma_async_device_register(&fsl_edma->dma_dev);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "Can't register Freescale eDMA engine. (%d)\n", ret);
+ fsl_disable_clocks(fsl_edma, fsl_edma->drvdata->dmamuxs);
+ return ret;
+ }
+
+ ret = of_dma_controller_register(np, fsl_edma_xlate, fsl_edma);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "Can't register Freescale eDMA of_dma. (%d)\n", ret);
+ dma_async_device_unregister(&fsl_edma->dma_dev);
+ fsl_disable_clocks(fsl_edma, fsl_edma->drvdata->dmamuxs);
+ return ret;
+ }
+
+ /* enable round robin arbitration */
+ edma_writel(fsl_edma, EDMA_CR_ERGA | EDMA_CR_ERCA, regs->cr);
+
+ return 0;
+}
+
+static int fsl_edma_remove(struct platform_device *pdev)
+{
+ struct device_node *np = pdev->dev.of_node;
+ struct fsl_edma_engine *fsl_edma = platform_get_drvdata(pdev);
+
+ fsl_edma_irq_exit(pdev, fsl_edma);
+ fsl_edma_cleanup_vchan(&fsl_edma->dma_dev);
+ of_dma_controller_free(np);
+ dma_async_device_unregister(&fsl_edma->dma_dev);
+ fsl_disable_clocks(fsl_edma, fsl_edma->drvdata->dmamuxs);
+
+ return 0;
+}
+
+static int fsl_edma_suspend_late(struct device *dev)
+{
+ struct fsl_edma_engine *fsl_edma = dev_get_drvdata(dev);
+ struct fsl_edma_chan *fsl_chan;
+ unsigned long flags;
+ int i;
+
+ for (i = 0; i < fsl_edma->n_chans; i++) {
+ fsl_chan = &fsl_edma->chans[i];
+ spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+ /* Make sure chan is idle or will force disable. */
+ if (unlikely(!fsl_chan->idle)) {
+ dev_warn(dev, "WARN: There is non-idle channel.");
+ fsl_edma_disable_request(fsl_chan);
+ fsl_edma_chan_mux(fsl_chan, 0, false);
+ }
+
+ fsl_chan->pm_state = SUSPENDED;
+ spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+ }
+
+ return 0;
+}
+
+static int fsl_edma_resume_early(struct device *dev)
+{
+ struct fsl_edma_engine *fsl_edma = dev_get_drvdata(dev);
+ struct fsl_edma_chan *fsl_chan;
+ struct edma_regs *regs = &fsl_edma->regs;
+ int i;
+
+ for (i = 0; i < fsl_edma->n_chans; i++) {
+ fsl_chan = &fsl_edma->chans[i];
+ fsl_chan->pm_state = RUNNING;
+ edma_writew(fsl_edma, 0x0, &regs->tcd[i].csr);
+ if (fsl_chan->slave_id != 0)
+ fsl_edma_chan_mux(fsl_chan, fsl_chan->slave_id, true);
+ }
+
+ edma_writel(fsl_edma, EDMA_CR_ERGA | EDMA_CR_ERCA, regs->cr);
+
+ return 0;
+}
+
+/*
+ * eDMA provides the service to others, so it should be suspend late
+ * and resume early. When eDMA suspend, all of the clients should stop
+ * the DMA data transmission and let the channel idle.
+ */
+static const struct dev_pm_ops fsl_edma_pm_ops = {
+ .suspend_late = fsl_edma_suspend_late,
+ .resume_early = fsl_edma_resume_early,
+};
+
+static struct platform_driver fsl_edma_driver = {
+ .driver = {
+ .name = "fsl-edma",
+ .of_match_table = fsl_edma_dt_ids,
+ .pm = &fsl_edma_pm_ops,
+ },
+ .probe = fsl_edma_probe,
+ .remove = fsl_edma_remove,
+};
+
+static int __init fsl_edma_init(void)
+{
+ return platform_driver_register(&fsl_edma_driver);
+}
+subsys_initcall(fsl_edma_init);
+
+static void __exit fsl_edma_exit(void)
+{
+ platform_driver_unregister(&fsl_edma_driver);
+}
+module_exit(fsl_edma_exit);
+
+MODULE_ALIAS("platform:fsl-edma");
+MODULE_DESCRIPTION("Freescale eDMA engine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c
new file mode 100644
index 000000000..045ead46e
--- /dev/null
+++ b/drivers/dma/fsl-qdma.c
@@ -0,0 +1,1312 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright 2014-2015 Freescale
+// Copyright 2018 NXP
+
+/*
+ * Driver for NXP Layerscape Queue Direct Memory Access Controller
+ *
+ * Author:
+ * Wen He <wen.he_1@nxp.com>
+ * Jiaheng Fan <jiaheng.fan@nxp.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/of_dma.h>
+#include <linux/dma-mapping.h>
+
+#include "virt-dma.h"
+#include "fsldma.h"
+
+/* Register related definition */
+#define FSL_QDMA_DMR 0x0
+#define FSL_QDMA_DSR 0x4
+#define FSL_QDMA_DEIER 0xe00
+#define FSL_QDMA_DEDR 0xe04
+#define FSL_QDMA_DECFDW0R 0xe10
+#define FSL_QDMA_DECFDW1R 0xe14
+#define FSL_QDMA_DECFDW2R 0xe18
+#define FSL_QDMA_DECFDW3R 0xe1c
+#define FSL_QDMA_DECFQIDR 0xe30
+#define FSL_QDMA_DECBR 0xe34
+
+#define FSL_QDMA_BCQMR(x) (0xc0 + 0x100 * (x))
+#define FSL_QDMA_BCQSR(x) (0xc4 + 0x100 * (x))
+#define FSL_QDMA_BCQEDPA_SADDR(x) (0xc8 + 0x100 * (x))
+#define FSL_QDMA_BCQDPA_SADDR(x) (0xcc + 0x100 * (x))
+#define FSL_QDMA_BCQEEPA_SADDR(x) (0xd0 + 0x100 * (x))
+#define FSL_QDMA_BCQEPA_SADDR(x) (0xd4 + 0x100 * (x))
+#define FSL_QDMA_BCQIER(x) (0xe0 + 0x100 * (x))
+#define FSL_QDMA_BCQIDR(x) (0xe4 + 0x100 * (x))
+
+#define FSL_QDMA_SQDPAR 0x80c
+#define FSL_QDMA_SQEPAR 0x814
+#define FSL_QDMA_BSQMR 0x800
+#define FSL_QDMA_BSQSR 0x804
+#define FSL_QDMA_BSQICR 0x828
+#define FSL_QDMA_CQMR 0xa00
+#define FSL_QDMA_CQDSCR1 0xa08
+#define FSL_QDMA_CQDSCR2 0xa0c
+#define FSL_QDMA_CQIER 0xa10
+#define FSL_QDMA_CQEDR 0xa14
+#define FSL_QDMA_SQCCMR 0xa20
+
+/* Registers for bit and genmask */
+#define FSL_QDMA_CQIDR_SQT BIT(15)
+#define QDMA_CCDF_FORMAT BIT(29)
+#define QDMA_CCDF_SER BIT(30)
+#define QDMA_SG_FIN BIT(30)
+#define QDMA_SG_LEN_MASK GENMASK(29, 0)
+#define QDMA_CCDF_MASK GENMASK(28, 20)
+
+#define FSL_QDMA_DEDR_CLEAR GENMASK(31, 0)
+#define FSL_QDMA_BCQIDR_CLEAR GENMASK(31, 0)
+#define FSL_QDMA_DEIER_CLEAR GENMASK(31, 0)
+
+#define FSL_QDMA_BCQIER_CQTIE BIT(15)
+#define FSL_QDMA_BCQIER_CQPEIE BIT(23)
+#define FSL_QDMA_BSQICR_ICEN BIT(31)
+
+#define FSL_QDMA_BSQICR_ICST(x) ((x) << 16)
+#define FSL_QDMA_CQIER_MEIE BIT(31)
+#define FSL_QDMA_CQIER_TEIE BIT(0)
+#define FSL_QDMA_SQCCMR_ENTER_WM BIT(21)
+
+#define FSL_QDMA_BCQMR_EN BIT(31)
+#define FSL_QDMA_BCQMR_EI BIT(30)
+#define FSL_QDMA_BCQMR_CD_THLD(x) ((x) << 20)
+#define FSL_QDMA_BCQMR_CQ_SIZE(x) ((x) << 16)
+
+#define FSL_QDMA_BCQSR_QF BIT(16)
+#define FSL_QDMA_BCQSR_XOFF BIT(0)
+
+#define FSL_QDMA_BSQMR_EN BIT(31)
+#define FSL_QDMA_BSQMR_DI BIT(30)
+#define FSL_QDMA_BSQMR_CQ_SIZE(x) ((x) << 16)
+
+#define FSL_QDMA_BSQSR_QE BIT(17)
+
+#define FSL_QDMA_DMR_DQD BIT(30)
+#define FSL_QDMA_DSR_DB BIT(31)
+
+/* Size related definition */
+#define FSL_QDMA_QUEUE_MAX 8
+#define FSL_QDMA_COMMAND_BUFFER_SIZE 64
+#define FSL_QDMA_DESCRIPTOR_BUFFER_SIZE 32
+#define FSL_QDMA_CIRCULAR_DESC_SIZE_MIN 64
+#define FSL_QDMA_CIRCULAR_DESC_SIZE_MAX 16384
+#define FSL_QDMA_QUEUE_NUM_MAX 8
+
+/* Field definition for CMD */
+#define FSL_QDMA_CMD_RWTTYPE 0x4
+#define FSL_QDMA_CMD_LWC 0x2
+#define FSL_QDMA_CMD_RWTTYPE_OFFSET 28
+#define FSL_QDMA_CMD_NS_OFFSET 27
+#define FSL_QDMA_CMD_DQOS_OFFSET 24
+#define FSL_QDMA_CMD_WTHROTL_OFFSET 20
+#define FSL_QDMA_CMD_DSEN_OFFSET 19
+#define FSL_QDMA_CMD_LWC_OFFSET 16
+
+/* Field definition for Descriptor status */
+#define QDMA_CCDF_STATUS_RTE BIT(5)
+#define QDMA_CCDF_STATUS_WTE BIT(4)
+#define QDMA_CCDF_STATUS_CDE BIT(2)
+#define QDMA_CCDF_STATUS_SDE BIT(1)
+#define QDMA_CCDF_STATUS_DDE BIT(0)
+#define QDMA_CCDF_STATUS_MASK (QDMA_CCDF_STATUS_RTE | \
+ QDMA_CCDF_STATUS_WTE | \
+ QDMA_CCDF_STATUS_CDE | \
+ QDMA_CCDF_STATUS_SDE | \
+ QDMA_CCDF_STATUS_DDE)
+
+/* Field definition for Descriptor offset */
+#define QDMA_CCDF_OFFSET 20
+#define QDMA_SDDF_CMD(x) (((u64)(x)) << 32)
+
+/* Field definition for safe loop count*/
+#define FSL_QDMA_HALT_COUNT 1500
+#define FSL_QDMA_MAX_SIZE 16385
+#define FSL_QDMA_COMP_TIMEOUT 1000
+#define FSL_COMMAND_QUEUE_OVERFLLOW 10
+
+#define FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma_engine, x) \
+ (((fsl_qdma_engine)->block_offset) * (x))
+
+/**
+ * struct fsl_qdma_format - This is the struct holding describing compound
+ * descriptor format with qDMA.
+ * @status: Command status and enqueue status notification.
+ * @cfg: Frame offset and frame format.
+ * @addr_lo: Holding the compound descriptor of the lower
+ * 32-bits address in memory 40-bit address.
+ * @addr_hi: Same as above member, but point high 8-bits in
+ * memory 40-bit address.
+ * @__reserved1: Reserved field.
+ * @cfg8b_w1: Compound descriptor command queue origin produced
+ * by qDMA and dynamic debug field.
+ * @data: Pointer to the memory 40-bit address, describes DMA
+ * source information and DMA destination information.
+ */
+struct fsl_qdma_format {
+ __le32 status;
+ __le32 cfg;
+ union {
+ struct {
+ __le32 addr_lo;
+ u8 addr_hi;
+ u8 __reserved1[2];
+ u8 cfg8b_w1;
+ } __packed;
+ __le64 data;
+ };
+} __packed;
+
+/* qDMA status notification pre information */
+struct fsl_pre_status {
+ u64 addr;
+ u8 queue;
+};
+
+static DEFINE_PER_CPU(struct fsl_pre_status, pre);
+
+struct fsl_qdma_chan {
+ struct virt_dma_chan vchan;
+ struct virt_dma_desc vdesc;
+ enum dma_status status;
+ struct fsl_qdma_engine *qdma;
+ struct fsl_qdma_queue *queue;
+};
+
+struct fsl_qdma_queue {
+ struct fsl_qdma_format *virt_head;
+ struct fsl_qdma_format *virt_tail;
+ struct list_head comp_used;
+ struct list_head comp_free;
+ struct dma_pool *comp_pool;
+ struct dma_pool *desc_pool;
+ spinlock_t queue_lock;
+ dma_addr_t bus_addr;
+ u32 n_cq;
+ u32 id;
+ struct fsl_qdma_format *cq;
+ void __iomem *block_base;
+};
+
+struct fsl_qdma_comp {
+ dma_addr_t bus_addr;
+ dma_addr_t desc_bus_addr;
+ struct fsl_qdma_format *virt_addr;
+ struct fsl_qdma_format *desc_virt_addr;
+ struct fsl_qdma_chan *qchan;
+ struct virt_dma_desc vdesc;
+ struct list_head list;
+};
+
+struct fsl_qdma_engine {
+ struct dma_device dma_dev;
+ void __iomem *ctrl_base;
+ void __iomem *status_base;
+ void __iomem *block_base;
+ u32 n_chans;
+ u32 n_queues;
+ struct mutex fsl_qdma_mutex;
+ int error_irq;
+ int *queue_irq;
+ u32 feature;
+ struct fsl_qdma_queue *queue;
+ struct fsl_qdma_queue **status;
+ struct fsl_qdma_chan *chans;
+ int block_number;
+ int block_offset;
+ int irq_base;
+ int desc_allocated;
+
+};
+
+static inline u64
+qdma_ccdf_addr_get64(const struct fsl_qdma_format *ccdf)
+{
+ return le64_to_cpu(ccdf->data) & (U64_MAX >> 24);
+}
+
+static inline void
+qdma_desc_addr_set64(struct fsl_qdma_format *ccdf, u64 addr)
+{
+ ccdf->addr_hi = upper_32_bits(addr);
+ ccdf->addr_lo = cpu_to_le32(lower_32_bits(addr));
+}
+
+static inline u8
+qdma_ccdf_get_queue(const struct fsl_qdma_format *ccdf)
+{
+ return ccdf->cfg8b_w1 & U8_MAX;
+}
+
+static inline int
+qdma_ccdf_get_offset(const struct fsl_qdma_format *ccdf)
+{
+ return (le32_to_cpu(ccdf->cfg) & QDMA_CCDF_MASK) >> QDMA_CCDF_OFFSET;
+}
+
+static inline void
+qdma_ccdf_set_format(struct fsl_qdma_format *ccdf, int offset)
+{
+ ccdf->cfg = cpu_to_le32(QDMA_CCDF_FORMAT |
+ (offset << QDMA_CCDF_OFFSET));
+}
+
+static inline int
+qdma_ccdf_get_status(const struct fsl_qdma_format *ccdf)
+{
+ return (le32_to_cpu(ccdf->status) & QDMA_CCDF_STATUS_MASK);
+}
+
+static inline void
+qdma_ccdf_set_ser(struct fsl_qdma_format *ccdf, int status)
+{
+ ccdf->status = cpu_to_le32(QDMA_CCDF_SER | status);
+}
+
+static inline void qdma_csgf_set_len(struct fsl_qdma_format *csgf, int len)
+{
+ csgf->cfg = cpu_to_le32(len & QDMA_SG_LEN_MASK);
+}
+
+static inline void qdma_csgf_set_f(struct fsl_qdma_format *csgf, int len)
+{
+ csgf->cfg = cpu_to_le32(QDMA_SG_FIN | (len & QDMA_SG_LEN_MASK));
+}
+
+static u32 qdma_readl(struct fsl_qdma_engine *qdma, void __iomem *addr)
+{
+ return FSL_DMA_IN(qdma, addr, 32);
+}
+
+static void qdma_writel(struct fsl_qdma_engine *qdma, u32 val,
+ void __iomem *addr)
+{
+ FSL_DMA_OUT(qdma, addr, val, 32);
+}
+
+static struct fsl_qdma_chan *to_fsl_qdma_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct fsl_qdma_chan, vchan.chan);
+}
+
+static struct fsl_qdma_comp *to_fsl_qdma_comp(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct fsl_qdma_comp, vdesc);
+}
+
+static void fsl_qdma_free_chan_resources(struct dma_chan *chan)
+{
+ struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan);
+ struct fsl_qdma_queue *fsl_queue = fsl_chan->queue;
+ struct fsl_qdma_engine *fsl_qdma = fsl_chan->qdma;
+ struct fsl_qdma_comp *comp_temp, *_comp_temp;
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+ vchan_get_all_descriptors(&fsl_chan->vchan, &head);
+ spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+
+ vchan_dma_desc_free_list(&fsl_chan->vchan, &head);
+
+ if (!fsl_queue->comp_pool && !fsl_queue->desc_pool)
+ return;
+
+ list_for_each_entry_safe(comp_temp, _comp_temp,
+ &fsl_queue->comp_used, list) {
+ dma_pool_free(fsl_queue->comp_pool,
+ comp_temp->virt_addr,
+ comp_temp->bus_addr);
+ dma_pool_free(fsl_queue->desc_pool,
+ comp_temp->desc_virt_addr,
+ comp_temp->desc_bus_addr);
+ list_del(&comp_temp->list);
+ kfree(comp_temp);
+ }
+
+ list_for_each_entry_safe(comp_temp, _comp_temp,
+ &fsl_queue->comp_free, list) {
+ dma_pool_free(fsl_queue->comp_pool,
+ comp_temp->virt_addr,
+ comp_temp->bus_addr);
+ dma_pool_free(fsl_queue->desc_pool,
+ comp_temp->desc_virt_addr,
+ comp_temp->desc_bus_addr);
+ list_del(&comp_temp->list);
+ kfree(comp_temp);
+ }
+
+ dma_pool_destroy(fsl_queue->comp_pool);
+ dma_pool_destroy(fsl_queue->desc_pool);
+
+ fsl_qdma->desc_allocated--;
+ fsl_queue->comp_pool = NULL;
+ fsl_queue->desc_pool = NULL;
+}
+
+static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp,
+ dma_addr_t dst, dma_addr_t src, u32 len)
+{
+ u32 cmd;
+ struct fsl_qdma_format *sdf, *ddf;
+ struct fsl_qdma_format *ccdf, *csgf_desc, *csgf_src, *csgf_dest;
+
+ ccdf = fsl_comp->virt_addr;
+ csgf_desc = fsl_comp->virt_addr + 1;
+ csgf_src = fsl_comp->virt_addr + 2;
+ csgf_dest = fsl_comp->virt_addr + 3;
+ sdf = fsl_comp->desc_virt_addr;
+ ddf = fsl_comp->desc_virt_addr + 1;
+
+ memset(fsl_comp->virt_addr, 0, FSL_QDMA_COMMAND_BUFFER_SIZE);
+ memset(fsl_comp->desc_virt_addr, 0, FSL_QDMA_DESCRIPTOR_BUFFER_SIZE);
+ /* Head Command Descriptor(Frame Descriptor) */
+ qdma_desc_addr_set64(ccdf, fsl_comp->bus_addr + 16);
+ qdma_ccdf_set_format(ccdf, qdma_ccdf_get_offset(ccdf));
+ qdma_ccdf_set_ser(ccdf, qdma_ccdf_get_status(ccdf));
+ /* Status notification is enqueued to status queue. */
+ /* Compound Command Descriptor(Frame List Table) */
+ qdma_desc_addr_set64(csgf_desc, fsl_comp->desc_bus_addr);
+ /* It must be 32 as Compound S/G Descriptor */
+ qdma_csgf_set_len(csgf_desc, 32);
+ qdma_desc_addr_set64(csgf_src, src);
+ qdma_csgf_set_len(csgf_src, len);
+ qdma_desc_addr_set64(csgf_dest, dst);
+ qdma_csgf_set_len(csgf_dest, len);
+ /* This entry is the last entry. */
+ qdma_csgf_set_f(csgf_dest, len);
+ /* Descriptor Buffer */
+ cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE <<
+ FSL_QDMA_CMD_RWTTYPE_OFFSET);
+ sdf->data = QDMA_SDDF_CMD(cmd);
+
+ cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE <<
+ FSL_QDMA_CMD_RWTTYPE_OFFSET);
+ cmd |= cpu_to_le32(FSL_QDMA_CMD_LWC << FSL_QDMA_CMD_LWC_OFFSET);
+ ddf->data = QDMA_SDDF_CMD(cmd);
+}
+
+/*
+ * Pre-request full command descriptor for enqueue.
+ */
+static int fsl_qdma_pre_request_enqueue_desc(struct fsl_qdma_queue *queue)
+{
+ int i;
+ struct fsl_qdma_comp *comp_temp, *_comp_temp;
+
+ for (i = 0; i < queue->n_cq + FSL_COMMAND_QUEUE_OVERFLLOW; i++) {
+ comp_temp = kzalloc(sizeof(*comp_temp), GFP_KERNEL);
+ if (!comp_temp)
+ goto err_alloc;
+ comp_temp->virt_addr =
+ dma_pool_alloc(queue->comp_pool, GFP_KERNEL,
+ &comp_temp->bus_addr);
+ if (!comp_temp->virt_addr)
+ goto err_dma_alloc;
+
+ comp_temp->desc_virt_addr =
+ dma_pool_alloc(queue->desc_pool, GFP_KERNEL,
+ &comp_temp->desc_bus_addr);
+ if (!comp_temp->desc_virt_addr)
+ goto err_desc_dma_alloc;
+
+ list_add_tail(&comp_temp->list, &queue->comp_free);
+ }
+
+ return 0;
+
+err_desc_dma_alloc:
+ dma_pool_free(queue->comp_pool, comp_temp->virt_addr,
+ comp_temp->bus_addr);
+
+err_dma_alloc:
+ kfree(comp_temp);
+
+err_alloc:
+ list_for_each_entry_safe(comp_temp, _comp_temp,
+ &queue->comp_free, list) {
+ if (comp_temp->virt_addr)
+ dma_pool_free(queue->comp_pool,
+ comp_temp->virt_addr,
+ comp_temp->bus_addr);
+ if (comp_temp->desc_virt_addr)
+ dma_pool_free(queue->desc_pool,
+ comp_temp->desc_virt_addr,
+ comp_temp->desc_bus_addr);
+
+ list_del(&comp_temp->list);
+ kfree(comp_temp);
+ }
+
+ return -ENOMEM;
+}
+
+/*
+ * Request a command descriptor for enqueue.
+ */
+static struct fsl_qdma_comp
+*fsl_qdma_request_enqueue_desc(struct fsl_qdma_chan *fsl_chan)
+{
+ unsigned long flags;
+ struct fsl_qdma_comp *comp_temp;
+ int timeout = FSL_QDMA_COMP_TIMEOUT;
+ struct fsl_qdma_queue *queue = fsl_chan->queue;
+
+ while (timeout--) {
+ spin_lock_irqsave(&queue->queue_lock, flags);
+ if (!list_empty(&queue->comp_free)) {
+ comp_temp = list_first_entry(&queue->comp_free,
+ struct fsl_qdma_comp,
+ list);
+ list_del(&comp_temp->list);
+
+ spin_unlock_irqrestore(&queue->queue_lock, flags);
+ comp_temp->qchan = fsl_chan;
+ return comp_temp;
+ }
+ spin_unlock_irqrestore(&queue->queue_lock, flags);
+ udelay(1);
+ }
+
+ return NULL;
+}
+
+static struct fsl_qdma_queue
+*fsl_qdma_alloc_queue_resources(struct platform_device *pdev,
+ struct fsl_qdma_engine *fsl_qdma)
+{
+ int ret, len, i, j;
+ int queue_num, block_number;
+ unsigned int queue_size[FSL_QDMA_QUEUE_MAX];
+ struct fsl_qdma_queue *queue_head, *queue_temp;
+
+ queue_num = fsl_qdma->n_queues;
+ block_number = fsl_qdma->block_number;
+
+ if (queue_num > FSL_QDMA_QUEUE_MAX)
+ queue_num = FSL_QDMA_QUEUE_MAX;
+ len = sizeof(*queue_head) * queue_num * block_number;
+ queue_head = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+ if (!queue_head)
+ return NULL;
+
+ ret = device_property_read_u32_array(&pdev->dev, "queue-sizes",
+ queue_size, queue_num);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't get queue-sizes.\n");
+ return NULL;
+ }
+ for (j = 0; j < block_number; j++) {
+ for (i = 0; i < queue_num; i++) {
+ if (queue_size[i] > FSL_QDMA_CIRCULAR_DESC_SIZE_MAX ||
+ queue_size[i] < FSL_QDMA_CIRCULAR_DESC_SIZE_MIN) {
+ dev_err(&pdev->dev,
+ "Get wrong queue-sizes.\n");
+ return NULL;
+ }
+ queue_temp = queue_head + i + (j * queue_num);
+
+ queue_temp->cq =
+ dma_alloc_coherent(&pdev->dev,
+ sizeof(struct fsl_qdma_format) *
+ queue_size[i],
+ &queue_temp->bus_addr,
+ GFP_KERNEL);
+ if (!queue_temp->cq)
+ return NULL;
+ queue_temp->block_base = fsl_qdma->block_base +
+ FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, j);
+ queue_temp->n_cq = queue_size[i];
+ queue_temp->id = i;
+ queue_temp->virt_head = queue_temp->cq;
+ queue_temp->virt_tail = queue_temp->cq;
+ /*
+ * List for queue command buffer
+ */
+ INIT_LIST_HEAD(&queue_temp->comp_used);
+ spin_lock_init(&queue_temp->queue_lock);
+ }
+ }
+ return queue_head;
+}
+
+static struct fsl_qdma_queue
+*fsl_qdma_prep_status_queue(struct platform_device *pdev)
+{
+ int ret;
+ unsigned int status_size;
+ struct fsl_qdma_queue *status_head;
+ struct device_node *np = pdev->dev.of_node;
+
+ ret = of_property_read_u32(np, "status-sizes", &status_size);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't get status-sizes.\n");
+ return NULL;
+ }
+ if (status_size > FSL_QDMA_CIRCULAR_DESC_SIZE_MAX ||
+ status_size < FSL_QDMA_CIRCULAR_DESC_SIZE_MIN) {
+ dev_err(&pdev->dev, "Get wrong status_size.\n");
+ return NULL;
+ }
+ status_head = devm_kzalloc(&pdev->dev,
+ sizeof(*status_head), GFP_KERNEL);
+ if (!status_head)
+ return NULL;
+
+ /*
+ * Buffer for queue command
+ */
+ status_head->cq = dma_alloc_coherent(&pdev->dev,
+ sizeof(struct fsl_qdma_format) *
+ status_size,
+ &status_head->bus_addr,
+ GFP_KERNEL);
+ if (!status_head->cq) {
+ devm_kfree(&pdev->dev, status_head);
+ return NULL;
+ }
+ status_head->n_cq = status_size;
+ status_head->virt_head = status_head->cq;
+ status_head->virt_tail = status_head->cq;
+ status_head->comp_pool = NULL;
+
+ return status_head;
+}
+
+static int fsl_qdma_halt(struct fsl_qdma_engine *fsl_qdma)
+{
+ u32 reg;
+ int i, j, count = FSL_QDMA_HALT_COUNT;
+ void __iomem *block, *ctrl = fsl_qdma->ctrl_base;
+
+ /* Disable the command queue and wait for idle state. */
+ reg = qdma_readl(fsl_qdma, ctrl + FSL_QDMA_DMR);
+ reg |= FSL_QDMA_DMR_DQD;
+ qdma_writel(fsl_qdma, reg, ctrl + FSL_QDMA_DMR);
+ for (j = 0; j < fsl_qdma->block_number; j++) {
+ block = fsl_qdma->block_base +
+ FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, j);
+ for (i = 0; i < FSL_QDMA_QUEUE_NUM_MAX; i++)
+ qdma_writel(fsl_qdma, 0, block + FSL_QDMA_BCQMR(i));
+ }
+ while (1) {
+ reg = qdma_readl(fsl_qdma, ctrl + FSL_QDMA_DSR);
+ if (!(reg & FSL_QDMA_DSR_DB))
+ break;
+ if (count-- < 0)
+ return -EBUSY;
+ udelay(100);
+ }
+
+ for (j = 0; j < fsl_qdma->block_number; j++) {
+ block = fsl_qdma->block_base +
+ FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, j);
+
+ /* Disable status queue. */
+ qdma_writel(fsl_qdma, 0, block + FSL_QDMA_BSQMR);
+
+ /*
+ * clear the command queue interrupt detect register for
+ * all queues.
+ */
+ qdma_writel(fsl_qdma, FSL_QDMA_BCQIDR_CLEAR,
+ block + FSL_QDMA_BCQIDR(0));
+ }
+
+ return 0;
+}
+
+static int
+fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma,
+ void *block,
+ int id)
+{
+ bool duplicate;
+ u32 reg, i, count;
+ u8 completion_status;
+ struct fsl_qdma_queue *temp_queue;
+ struct fsl_qdma_format *status_addr;
+ struct fsl_qdma_comp *fsl_comp = NULL;
+ struct fsl_qdma_queue *fsl_queue = fsl_qdma->queue;
+ struct fsl_qdma_queue *fsl_status = fsl_qdma->status[id];
+
+ count = FSL_QDMA_MAX_SIZE;
+
+ while (count--) {
+ duplicate = 0;
+ reg = qdma_readl(fsl_qdma, block + FSL_QDMA_BSQSR);
+ if (reg & FSL_QDMA_BSQSR_QE)
+ return 0;
+
+ status_addr = fsl_status->virt_head;
+
+ if (qdma_ccdf_get_queue(status_addr) ==
+ __this_cpu_read(pre.queue) &&
+ qdma_ccdf_addr_get64(status_addr) ==
+ __this_cpu_read(pre.addr))
+ duplicate = 1;
+ i = qdma_ccdf_get_queue(status_addr) +
+ id * fsl_qdma->n_queues;
+ __this_cpu_write(pre.addr, qdma_ccdf_addr_get64(status_addr));
+ __this_cpu_write(pre.queue, qdma_ccdf_get_queue(status_addr));
+ temp_queue = fsl_queue + i;
+
+ spin_lock(&temp_queue->queue_lock);
+ if (list_empty(&temp_queue->comp_used)) {
+ if (!duplicate) {
+ spin_unlock(&temp_queue->queue_lock);
+ return -EAGAIN;
+ }
+ } else {
+ fsl_comp = list_first_entry(&temp_queue->comp_used,
+ struct fsl_qdma_comp, list);
+ if (fsl_comp->bus_addr + 16 !=
+ __this_cpu_read(pre.addr)) {
+ if (!duplicate) {
+ spin_unlock(&temp_queue->queue_lock);
+ return -EAGAIN;
+ }
+ }
+ }
+
+ if (duplicate) {
+ reg = qdma_readl(fsl_qdma, block + FSL_QDMA_BSQMR);
+ reg |= FSL_QDMA_BSQMR_DI;
+ qdma_desc_addr_set64(status_addr, 0x0);
+ fsl_status->virt_head++;
+ if (fsl_status->virt_head == fsl_status->cq
+ + fsl_status->n_cq)
+ fsl_status->virt_head = fsl_status->cq;
+ qdma_writel(fsl_qdma, reg, block + FSL_QDMA_BSQMR);
+ spin_unlock(&temp_queue->queue_lock);
+ continue;
+ }
+ list_del(&fsl_comp->list);
+
+ completion_status = qdma_ccdf_get_status(status_addr);
+
+ reg = qdma_readl(fsl_qdma, block + FSL_QDMA_BSQMR);
+ reg |= FSL_QDMA_BSQMR_DI;
+ qdma_desc_addr_set64(status_addr, 0x0);
+ fsl_status->virt_head++;
+ if (fsl_status->virt_head == fsl_status->cq + fsl_status->n_cq)
+ fsl_status->virt_head = fsl_status->cq;
+ qdma_writel(fsl_qdma, reg, block + FSL_QDMA_BSQMR);
+ spin_unlock(&temp_queue->queue_lock);
+
+ /* The completion_status is evaluated here
+ * (outside of spin lock)
+ */
+ if (completion_status) {
+ /* A completion error occurred! */
+ if (completion_status & QDMA_CCDF_STATUS_WTE) {
+ /* Write transaction error */
+ fsl_comp->vdesc.tx_result.result =
+ DMA_TRANS_WRITE_FAILED;
+ } else if (completion_status & QDMA_CCDF_STATUS_RTE) {
+ /* Read transaction error */
+ fsl_comp->vdesc.tx_result.result =
+ DMA_TRANS_READ_FAILED;
+ } else {
+ /* Command/source/destination
+ * description error
+ */
+ fsl_comp->vdesc.tx_result.result =
+ DMA_TRANS_ABORTED;
+ dev_err(fsl_qdma->dma_dev.dev,
+ "DMA status descriptor error %x\n",
+ completion_status);
+ }
+ }
+
+ spin_lock(&fsl_comp->qchan->vchan.lock);
+ vchan_cookie_complete(&fsl_comp->vdesc);
+ fsl_comp->qchan->status = DMA_COMPLETE;
+ spin_unlock(&fsl_comp->qchan->vchan.lock);
+ }
+
+ return 0;
+}
+
+static irqreturn_t fsl_qdma_error_handler(int irq, void *dev_id)
+{
+ unsigned int intr;
+ struct fsl_qdma_engine *fsl_qdma = dev_id;
+ void __iomem *status = fsl_qdma->status_base;
+ unsigned int decfdw0r;
+ unsigned int decfdw1r;
+ unsigned int decfdw2r;
+ unsigned int decfdw3r;
+
+ intr = qdma_readl(fsl_qdma, status + FSL_QDMA_DEDR);
+
+ if (intr) {
+ decfdw0r = qdma_readl(fsl_qdma, status + FSL_QDMA_DECFDW0R);
+ decfdw1r = qdma_readl(fsl_qdma, status + FSL_QDMA_DECFDW1R);
+ decfdw2r = qdma_readl(fsl_qdma, status + FSL_QDMA_DECFDW2R);
+ decfdw3r = qdma_readl(fsl_qdma, status + FSL_QDMA_DECFDW3R);
+ dev_err(fsl_qdma->dma_dev.dev,
+ "DMA transaction error! (%x: %x-%x-%x-%x)\n",
+ intr, decfdw0r, decfdw1r, decfdw2r, decfdw3r);
+ }
+
+ qdma_writel(fsl_qdma, FSL_QDMA_DEDR_CLEAR, status + FSL_QDMA_DEDR);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t fsl_qdma_queue_handler(int irq, void *dev_id)
+{
+ int id;
+ unsigned int intr, reg;
+ struct fsl_qdma_engine *fsl_qdma = dev_id;
+ void __iomem *block, *ctrl = fsl_qdma->ctrl_base;
+
+ id = irq - fsl_qdma->irq_base;
+ if (id < 0 && id > fsl_qdma->block_number) {
+ dev_err(fsl_qdma->dma_dev.dev,
+ "irq %d is wrong irq_base is %d\n",
+ irq, fsl_qdma->irq_base);
+ }
+
+ block = fsl_qdma->block_base +
+ FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, id);
+
+ intr = qdma_readl(fsl_qdma, block + FSL_QDMA_BCQIDR(0));
+
+ if ((intr & FSL_QDMA_CQIDR_SQT) != 0)
+ intr = fsl_qdma_queue_transfer_complete(fsl_qdma, block, id);
+
+ if (intr != 0) {
+ reg = qdma_readl(fsl_qdma, ctrl + FSL_QDMA_DMR);
+ reg |= FSL_QDMA_DMR_DQD;
+ qdma_writel(fsl_qdma, reg, ctrl + FSL_QDMA_DMR);
+ qdma_writel(fsl_qdma, 0, block + FSL_QDMA_BCQIER(0));
+ dev_err(fsl_qdma->dma_dev.dev, "QDMA: status err!\n");
+ }
+
+ /* Clear all detected events and interrupts. */
+ qdma_writel(fsl_qdma, FSL_QDMA_BCQIDR_CLEAR,
+ block + FSL_QDMA_BCQIDR(0));
+
+ return IRQ_HANDLED;
+}
+
+static int
+fsl_qdma_irq_init(struct platform_device *pdev,
+ struct fsl_qdma_engine *fsl_qdma)
+{
+ int i;
+ int cpu;
+ int ret;
+ char irq_name[20];
+
+ fsl_qdma->error_irq =
+ platform_get_irq_byname(pdev, "qdma-error");
+ if (fsl_qdma->error_irq < 0)
+ return fsl_qdma->error_irq;
+
+ ret = devm_request_irq(&pdev->dev, fsl_qdma->error_irq,
+ fsl_qdma_error_handler, 0,
+ "qDMA error", fsl_qdma);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't register qDMA controller IRQ.\n");
+ return ret;
+ }
+
+ for (i = 0; i < fsl_qdma->block_number; i++) {
+ sprintf(irq_name, "qdma-queue%d", i);
+ fsl_qdma->queue_irq[i] =
+ platform_get_irq_byname(pdev, irq_name);
+
+ if (fsl_qdma->queue_irq[i] < 0)
+ return fsl_qdma->queue_irq[i];
+
+ ret = devm_request_irq(&pdev->dev,
+ fsl_qdma->queue_irq[i],
+ fsl_qdma_queue_handler,
+ 0,
+ "qDMA queue",
+ fsl_qdma);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "Can't register qDMA queue IRQ.\n");
+ return ret;
+ }
+
+ cpu = i % num_online_cpus();
+ ret = irq_set_affinity_hint(fsl_qdma->queue_irq[i],
+ get_cpu_mask(cpu));
+ if (ret) {
+ dev_err(&pdev->dev,
+ "Can't set cpu %d affinity to IRQ %d.\n",
+ cpu,
+ fsl_qdma->queue_irq[i]);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static void fsl_qdma_irq_exit(struct platform_device *pdev,
+ struct fsl_qdma_engine *fsl_qdma)
+{
+ int i;
+
+ devm_free_irq(&pdev->dev, fsl_qdma->error_irq, fsl_qdma);
+ for (i = 0; i < fsl_qdma->block_number; i++)
+ devm_free_irq(&pdev->dev, fsl_qdma->queue_irq[i], fsl_qdma);
+}
+
+static int fsl_qdma_reg_init(struct fsl_qdma_engine *fsl_qdma)
+{
+ u32 reg;
+ int i, j, ret;
+ struct fsl_qdma_queue *temp;
+ void __iomem *status = fsl_qdma->status_base;
+ void __iomem *block, *ctrl = fsl_qdma->ctrl_base;
+ struct fsl_qdma_queue *fsl_queue = fsl_qdma->queue;
+
+ /* Try to halt the qDMA engine first. */
+ ret = fsl_qdma_halt(fsl_qdma);
+ if (ret) {
+ dev_err(fsl_qdma->dma_dev.dev, "DMA halt failed!");
+ return ret;
+ }
+
+ for (i = 0; i < fsl_qdma->block_number; i++) {
+ /*
+ * Clear the command queue interrupt detect register for
+ * all queues.
+ */
+
+ block = fsl_qdma->block_base +
+ FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, i);
+ qdma_writel(fsl_qdma, FSL_QDMA_BCQIDR_CLEAR,
+ block + FSL_QDMA_BCQIDR(0));
+ }
+
+ for (j = 0; j < fsl_qdma->block_number; j++) {
+ block = fsl_qdma->block_base +
+ FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, j);
+ for (i = 0; i < fsl_qdma->n_queues; i++) {
+ temp = fsl_queue + i + (j * fsl_qdma->n_queues);
+ /*
+ * Initialize Command Queue registers to
+ * point to the first
+ * command descriptor in memory.
+ * Dequeue Pointer Address Registers
+ * Enqueue Pointer Address Registers
+ */
+
+ qdma_writel(fsl_qdma, temp->bus_addr,
+ block + FSL_QDMA_BCQDPA_SADDR(i));
+ qdma_writel(fsl_qdma, temp->bus_addr,
+ block + FSL_QDMA_BCQEPA_SADDR(i));
+
+ /* Initialize the queue mode. */
+ reg = FSL_QDMA_BCQMR_EN;
+ reg |= FSL_QDMA_BCQMR_CD_THLD(ilog2(temp->n_cq) - 4);
+ reg |= FSL_QDMA_BCQMR_CQ_SIZE(ilog2(temp->n_cq) - 6);
+ qdma_writel(fsl_qdma, reg, block + FSL_QDMA_BCQMR(i));
+ }
+
+ /*
+ * Workaround for erratum: ERR010812.
+ * We must enable XOFF to avoid the enqueue rejection occurs.
+ * Setting SQCCMR ENTER_WM to 0x20.
+ */
+
+ qdma_writel(fsl_qdma, FSL_QDMA_SQCCMR_ENTER_WM,
+ block + FSL_QDMA_SQCCMR);
+
+ /*
+ * Initialize status queue registers to point to the first
+ * command descriptor in memory.
+ * Dequeue Pointer Address Registers
+ * Enqueue Pointer Address Registers
+ */
+
+ qdma_writel(fsl_qdma, fsl_qdma->status[j]->bus_addr,
+ block + FSL_QDMA_SQEPAR);
+ qdma_writel(fsl_qdma, fsl_qdma->status[j]->bus_addr,
+ block + FSL_QDMA_SQDPAR);
+ /* Initialize status queue interrupt. */
+ qdma_writel(fsl_qdma, FSL_QDMA_BCQIER_CQTIE,
+ block + FSL_QDMA_BCQIER(0));
+ qdma_writel(fsl_qdma, FSL_QDMA_BSQICR_ICEN |
+ FSL_QDMA_BSQICR_ICST(5) | 0x8000,
+ block + FSL_QDMA_BSQICR);
+ qdma_writel(fsl_qdma, FSL_QDMA_CQIER_MEIE |
+ FSL_QDMA_CQIER_TEIE,
+ block + FSL_QDMA_CQIER);
+
+ /* Initialize the status queue mode. */
+ reg = FSL_QDMA_BSQMR_EN;
+ reg |= FSL_QDMA_BSQMR_CQ_SIZE(ilog2
+ (fsl_qdma->status[j]->n_cq) - 6);
+
+ qdma_writel(fsl_qdma, reg, block + FSL_QDMA_BSQMR);
+ reg = qdma_readl(fsl_qdma, block + FSL_QDMA_BSQMR);
+ }
+
+ /* Initialize controller interrupt register. */
+ qdma_writel(fsl_qdma, FSL_QDMA_DEDR_CLEAR, status + FSL_QDMA_DEDR);
+ qdma_writel(fsl_qdma, FSL_QDMA_DEIER_CLEAR, status + FSL_QDMA_DEIER);
+
+ reg = qdma_readl(fsl_qdma, ctrl + FSL_QDMA_DMR);
+ reg &= ~FSL_QDMA_DMR_DQD;
+ qdma_writel(fsl_qdma, reg, ctrl + FSL_QDMA_DMR);
+
+ return 0;
+}
+
+static struct dma_async_tx_descriptor *
+fsl_qdma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst,
+ dma_addr_t src, size_t len, unsigned long flags)
+{
+ struct fsl_qdma_comp *fsl_comp;
+ struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan);
+
+ fsl_comp = fsl_qdma_request_enqueue_desc(fsl_chan);
+
+ if (!fsl_comp)
+ return NULL;
+
+ fsl_qdma_comp_fill_memcpy(fsl_comp, dst, src, len);
+
+ return vchan_tx_prep(&fsl_chan->vchan, &fsl_comp->vdesc, flags);
+}
+
+static void fsl_qdma_enqueue_desc(struct fsl_qdma_chan *fsl_chan)
+{
+ u32 reg;
+ struct virt_dma_desc *vdesc;
+ struct fsl_qdma_comp *fsl_comp;
+ struct fsl_qdma_queue *fsl_queue = fsl_chan->queue;
+ void __iomem *block = fsl_queue->block_base;
+
+ reg = qdma_readl(fsl_chan->qdma, block + FSL_QDMA_BCQSR(fsl_queue->id));
+ if (reg & (FSL_QDMA_BCQSR_QF | FSL_QDMA_BCQSR_XOFF))
+ return;
+ vdesc = vchan_next_desc(&fsl_chan->vchan);
+ if (!vdesc)
+ return;
+ list_del(&vdesc->node);
+ fsl_comp = to_fsl_qdma_comp(vdesc);
+
+ memcpy(fsl_queue->virt_head++,
+ fsl_comp->virt_addr, sizeof(struct fsl_qdma_format));
+ if (fsl_queue->virt_head == fsl_queue->cq + fsl_queue->n_cq)
+ fsl_queue->virt_head = fsl_queue->cq;
+
+ list_add_tail(&fsl_comp->list, &fsl_queue->comp_used);
+ barrier();
+ reg = qdma_readl(fsl_chan->qdma, block + FSL_QDMA_BCQMR(fsl_queue->id));
+ reg |= FSL_QDMA_BCQMR_EI;
+ qdma_writel(fsl_chan->qdma, reg, block + FSL_QDMA_BCQMR(fsl_queue->id));
+ fsl_chan->status = DMA_IN_PROGRESS;
+}
+
+static void fsl_qdma_free_desc(struct virt_dma_desc *vdesc)
+{
+ unsigned long flags;
+ struct fsl_qdma_comp *fsl_comp;
+ struct fsl_qdma_queue *fsl_queue;
+
+ fsl_comp = to_fsl_qdma_comp(vdesc);
+ fsl_queue = fsl_comp->qchan->queue;
+
+ spin_lock_irqsave(&fsl_queue->queue_lock, flags);
+ list_add_tail(&fsl_comp->list, &fsl_queue->comp_free);
+ spin_unlock_irqrestore(&fsl_queue->queue_lock, flags);
+}
+
+static void fsl_qdma_issue_pending(struct dma_chan *chan)
+{
+ unsigned long flags;
+ struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan);
+ struct fsl_qdma_queue *fsl_queue = fsl_chan->queue;
+
+ spin_lock_irqsave(&fsl_queue->queue_lock, flags);
+ spin_lock(&fsl_chan->vchan.lock);
+ if (vchan_issue_pending(&fsl_chan->vchan))
+ fsl_qdma_enqueue_desc(fsl_chan);
+ spin_unlock(&fsl_chan->vchan.lock);
+ spin_unlock_irqrestore(&fsl_queue->queue_lock, flags);
+}
+
+static void fsl_qdma_synchronize(struct dma_chan *chan)
+{
+ struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan);
+
+ vchan_synchronize(&fsl_chan->vchan);
+}
+
+static int fsl_qdma_terminate_all(struct dma_chan *chan)
+{
+ LIST_HEAD(head);
+ unsigned long flags;
+ struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan);
+
+ spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+ vchan_get_all_descriptors(&fsl_chan->vchan, &head);
+ spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+ vchan_dma_desc_free_list(&fsl_chan->vchan, &head);
+ return 0;
+}
+
+static int fsl_qdma_alloc_chan_resources(struct dma_chan *chan)
+{
+ int ret;
+ struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan);
+ struct fsl_qdma_engine *fsl_qdma = fsl_chan->qdma;
+ struct fsl_qdma_queue *fsl_queue = fsl_chan->queue;
+
+ if (fsl_queue->comp_pool && fsl_queue->desc_pool)
+ return fsl_qdma->desc_allocated;
+
+ INIT_LIST_HEAD(&fsl_queue->comp_free);
+
+ /*
+ * The dma pool for queue command buffer
+ */
+ fsl_queue->comp_pool =
+ dma_pool_create("comp_pool",
+ chan->device->dev,
+ FSL_QDMA_COMMAND_BUFFER_SIZE,
+ 64, 0);
+ if (!fsl_queue->comp_pool)
+ return -ENOMEM;
+
+ /*
+ * The dma pool for Descriptor(SD/DD) buffer
+ */
+ fsl_queue->desc_pool =
+ dma_pool_create("desc_pool",
+ chan->device->dev,
+ FSL_QDMA_DESCRIPTOR_BUFFER_SIZE,
+ 32, 0);
+ if (!fsl_queue->desc_pool)
+ goto err_desc_pool;
+
+ ret = fsl_qdma_pre_request_enqueue_desc(fsl_queue);
+ if (ret) {
+ dev_err(chan->device->dev,
+ "failed to alloc dma buffer for S/G descriptor\n");
+ goto err_mem;
+ }
+
+ fsl_qdma->desc_allocated++;
+ return fsl_qdma->desc_allocated;
+
+err_mem:
+ dma_pool_destroy(fsl_queue->desc_pool);
+err_desc_pool:
+ dma_pool_destroy(fsl_queue->comp_pool);
+ return -ENOMEM;
+}
+
+static int fsl_qdma_probe(struct platform_device *pdev)
+{
+ int ret, i;
+ int blk_num, blk_off;
+ u32 len, chans, queues;
+ struct resource *res;
+ struct fsl_qdma_chan *fsl_chan;
+ struct fsl_qdma_engine *fsl_qdma;
+ struct device_node *np = pdev->dev.of_node;
+
+ ret = of_property_read_u32(np, "dma-channels", &chans);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't get dma-channels.\n");
+ return ret;
+ }
+
+ ret = of_property_read_u32(np, "block-offset", &blk_off);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't get block-offset.\n");
+ return ret;
+ }
+
+ ret = of_property_read_u32(np, "block-number", &blk_num);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't get block-number.\n");
+ return ret;
+ }
+
+ blk_num = min_t(int, blk_num, num_online_cpus());
+
+ len = sizeof(*fsl_qdma);
+ fsl_qdma = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+ if (!fsl_qdma)
+ return -ENOMEM;
+
+ len = sizeof(*fsl_chan) * chans;
+ fsl_qdma->chans = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+ if (!fsl_qdma->chans)
+ return -ENOMEM;
+
+ len = sizeof(struct fsl_qdma_queue *) * blk_num;
+ fsl_qdma->status = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+ if (!fsl_qdma->status)
+ return -ENOMEM;
+
+ len = sizeof(int) * blk_num;
+ fsl_qdma->queue_irq = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+ if (!fsl_qdma->queue_irq)
+ return -ENOMEM;
+
+ ret = of_property_read_u32(np, "fsl,dma-queues", &queues);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't get queues.\n");
+ return ret;
+ }
+
+ fsl_qdma->desc_allocated = 0;
+ fsl_qdma->n_chans = chans;
+ fsl_qdma->n_queues = queues;
+ fsl_qdma->block_number = blk_num;
+ fsl_qdma->block_offset = blk_off;
+
+ mutex_init(&fsl_qdma->fsl_qdma_mutex);
+
+ for (i = 0; i < fsl_qdma->block_number; i++) {
+ fsl_qdma->status[i] = fsl_qdma_prep_status_queue(pdev);
+ if (!fsl_qdma->status[i])
+ return -ENOMEM;
+ }
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ fsl_qdma->ctrl_base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(fsl_qdma->ctrl_base))
+ return PTR_ERR(fsl_qdma->ctrl_base);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ fsl_qdma->status_base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(fsl_qdma->status_base))
+ return PTR_ERR(fsl_qdma->status_base);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+ fsl_qdma->block_base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(fsl_qdma->block_base))
+ return PTR_ERR(fsl_qdma->block_base);
+ fsl_qdma->queue = fsl_qdma_alloc_queue_resources(pdev, fsl_qdma);
+ if (!fsl_qdma->queue)
+ return -ENOMEM;
+
+ ret = fsl_qdma_irq_init(pdev, fsl_qdma);
+ if (ret)
+ return ret;
+
+ fsl_qdma->irq_base = platform_get_irq_byname(pdev, "qdma-queue0");
+ if (fsl_qdma->irq_base < 0)
+ return fsl_qdma->irq_base;
+
+ fsl_qdma->feature = of_property_read_bool(np, "big-endian");
+ INIT_LIST_HEAD(&fsl_qdma->dma_dev.channels);
+
+ for (i = 0; i < fsl_qdma->n_chans; i++) {
+ struct fsl_qdma_chan *fsl_chan = &fsl_qdma->chans[i];
+
+ fsl_chan->qdma = fsl_qdma;
+ fsl_chan->queue = fsl_qdma->queue + i % (fsl_qdma->n_queues *
+ fsl_qdma->block_number);
+ fsl_chan->vchan.desc_free = fsl_qdma_free_desc;
+ vchan_init(&fsl_chan->vchan, &fsl_qdma->dma_dev);
+ }
+
+ dma_cap_set(DMA_MEMCPY, fsl_qdma->dma_dev.cap_mask);
+
+ fsl_qdma->dma_dev.dev = &pdev->dev;
+ fsl_qdma->dma_dev.device_free_chan_resources =
+ fsl_qdma_free_chan_resources;
+ fsl_qdma->dma_dev.device_alloc_chan_resources =
+ fsl_qdma_alloc_chan_resources;
+ fsl_qdma->dma_dev.device_tx_status = dma_cookie_status;
+ fsl_qdma->dma_dev.device_prep_dma_memcpy = fsl_qdma_prep_memcpy;
+ fsl_qdma->dma_dev.device_issue_pending = fsl_qdma_issue_pending;
+ fsl_qdma->dma_dev.device_synchronize = fsl_qdma_synchronize;
+ fsl_qdma->dma_dev.device_terminate_all = fsl_qdma_terminate_all;
+
+ ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(40));
+ if (ret) {
+ dev_err(&pdev->dev, "dma_set_mask failure.\n");
+ return ret;
+ }
+
+ platform_set_drvdata(pdev, fsl_qdma);
+
+ ret = dma_async_device_register(&fsl_qdma->dma_dev);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "Can't register NXP Layerscape qDMA engine.\n");
+ return ret;
+ }
+
+ ret = fsl_qdma_reg_init(fsl_qdma);
+ if (ret) {
+ dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void fsl_qdma_cleanup_vchan(struct dma_device *dmadev)
+{
+ struct fsl_qdma_chan *chan, *_chan;
+
+ list_for_each_entry_safe(chan, _chan,
+ &dmadev->channels, vchan.chan.device_node) {
+ list_del(&chan->vchan.chan.device_node);
+ tasklet_kill(&chan->vchan.task);
+ }
+}
+
+static int fsl_qdma_remove(struct platform_device *pdev)
+{
+ int i;
+ struct fsl_qdma_queue *status;
+ struct device_node *np = pdev->dev.of_node;
+ struct fsl_qdma_engine *fsl_qdma = platform_get_drvdata(pdev);
+
+ fsl_qdma_irq_exit(pdev, fsl_qdma);
+ fsl_qdma_cleanup_vchan(&fsl_qdma->dma_dev);
+ of_dma_controller_free(np);
+ dma_async_device_unregister(&fsl_qdma->dma_dev);
+
+ for (i = 0; i < fsl_qdma->block_number; i++) {
+ status = fsl_qdma->status[i];
+ dma_free_coherent(&pdev->dev, sizeof(struct fsl_qdma_format) *
+ status->n_cq, status->cq, status->bus_addr);
+ }
+ return 0;
+}
+
+static const struct of_device_id fsl_qdma_dt_ids[] = {
+ { .compatible = "fsl,ls1021a-qdma", },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, fsl_qdma_dt_ids);
+
+static struct platform_driver fsl_qdma_driver = {
+ .driver = {
+ .name = "fsl-qdma",
+ .of_match_table = fsl_qdma_dt_ids,
+ },
+ .probe = fsl_qdma_probe,
+ .remove = fsl_qdma_remove,
+};
+
+module_platform_driver(fsl_qdma_driver);
+
+MODULE_ALIAS("platform:fsl-qdma");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("NXP Layerscape qDMA engine driver");
diff --git a/drivers/dma/fsl_raid.c b/drivers/dma/fsl_raid.c
new file mode 100644
index 000000000..fdf3500d9
--- /dev/null
+++ b/drivers/dma/fsl_raid.c
@@ -0,0 +1,897 @@
+/*
+ * drivers/dma/fsl_raid.c
+ *
+ * Freescale RAID Engine device driver
+ *
+ * Author:
+ * Harninder Rai <harninder.rai@freescale.com>
+ * Naveen Burmi <naveenburmi@freescale.com>
+ *
+ * Rewrite:
+ * Xuelin Shi <xuelin.shi@freescale.com>
+ *
+ * Copyright (c) 2010-2014 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Theory of operation:
+ *
+ * General capabilities:
+ * RAID Engine (RE) block is capable of offloading XOR, memcpy and P/Q
+ * calculations required in RAID5 and RAID6 operations. RE driver
+ * registers with Linux's ASYNC layer as dma driver. RE hardware
+ * maintains strict ordering of the requests through chained
+ * command queueing.
+ *
+ * Data flow:
+ * Software RAID layer of Linux (MD layer) maintains RAID partitions,
+ * strips, stripes etc. It sends requests to the underlying ASYNC layer
+ * which further passes it to RE driver. ASYNC layer decides which request
+ * goes to which job ring of RE hardware. For every request processed by
+ * RAID Engine, driver gets an interrupt unless coalescing is set. The
+ * per job ring interrupt handler checks the status register for errors,
+ * clears the interrupt and leave the post interrupt processing to the irq
+ * thread.
+ */
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+
+#include "dmaengine.h"
+#include "fsl_raid.h"
+
+#define FSL_RE_MAX_XOR_SRCS 16
+#define FSL_RE_MAX_PQ_SRCS 16
+#define FSL_RE_MIN_DESCS 256
+#define FSL_RE_MAX_DESCS (4 * FSL_RE_MIN_DESCS)
+#define FSL_RE_FRAME_FORMAT 0x1
+#define FSL_RE_MAX_DATA_LEN (1024*1024)
+
+#define to_fsl_re_dma_desc(tx) container_of(tx, struct fsl_re_desc, async_tx)
+
+/* Add descriptors into per chan software queue - submit_q */
+static dma_cookie_t fsl_re_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct fsl_re_desc *desc;
+ struct fsl_re_chan *re_chan;
+ dma_cookie_t cookie;
+ unsigned long flags;
+
+ desc = to_fsl_re_dma_desc(tx);
+ re_chan = container_of(tx->chan, struct fsl_re_chan, chan);
+
+ spin_lock_irqsave(&re_chan->desc_lock, flags);
+ cookie = dma_cookie_assign(tx);
+ list_add_tail(&desc->node, &re_chan->submit_q);
+ spin_unlock_irqrestore(&re_chan->desc_lock, flags);
+
+ return cookie;
+}
+
+/* Copy descriptor from per chan software queue into hardware job ring */
+static void fsl_re_issue_pending(struct dma_chan *chan)
+{
+ struct fsl_re_chan *re_chan;
+ int avail;
+ struct fsl_re_desc *desc, *_desc;
+ unsigned long flags;
+
+ re_chan = container_of(chan, struct fsl_re_chan, chan);
+
+ spin_lock_irqsave(&re_chan->desc_lock, flags);
+ avail = FSL_RE_SLOT_AVAIL(
+ in_be32(&re_chan->jrregs->inbring_slot_avail));
+
+ list_for_each_entry_safe(desc, _desc, &re_chan->submit_q, node) {
+ if (!avail)
+ break;
+
+ list_move_tail(&desc->node, &re_chan->active_q);
+
+ memcpy(&re_chan->inb_ring_virt_addr[re_chan->inb_count],
+ &desc->hwdesc, sizeof(struct fsl_re_hw_desc));
+
+ re_chan->inb_count = (re_chan->inb_count + 1) &
+ FSL_RE_RING_SIZE_MASK;
+ out_be32(&re_chan->jrregs->inbring_add_job, FSL_RE_ADD_JOB(1));
+ avail--;
+ }
+ spin_unlock_irqrestore(&re_chan->desc_lock, flags);
+}
+
+static void fsl_re_desc_done(struct fsl_re_desc *desc)
+{
+ dma_cookie_complete(&desc->async_tx);
+ dma_descriptor_unmap(&desc->async_tx);
+ dmaengine_desc_get_callback_invoke(&desc->async_tx, NULL);
+}
+
+static void fsl_re_cleanup_descs(struct fsl_re_chan *re_chan)
+{
+ struct fsl_re_desc *desc, *_desc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&re_chan->desc_lock, flags);
+ list_for_each_entry_safe(desc, _desc, &re_chan->ack_q, node) {
+ if (async_tx_test_ack(&desc->async_tx))
+ list_move_tail(&desc->node, &re_chan->free_q);
+ }
+ spin_unlock_irqrestore(&re_chan->desc_lock, flags);
+
+ fsl_re_issue_pending(&re_chan->chan);
+}
+
+static void fsl_re_dequeue(struct tasklet_struct *t)
+{
+ struct fsl_re_chan *re_chan = from_tasklet(re_chan, t, irqtask);
+ struct fsl_re_desc *desc, *_desc;
+ struct fsl_re_hw_desc *hwdesc;
+ unsigned long flags;
+ unsigned int count, oub_count;
+ int found;
+
+ fsl_re_cleanup_descs(re_chan);
+
+ spin_lock_irqsave(&re_chan->desc_lock, flags);
+ count = FSL_RE_SLOT_FULL(in_be32(&re_chan->jrregs->oubring_slot_full));
+ while (count--) {
+ found = 0;
+ hwdesc = &re_chan->oub_ring_virt_addr[re_chan->oub_count];
+ list_for_each_entry_safe(desc, _desc, &re_chan->active_q,
+ node) {
+ /* compare the hw dma addr to find the completed */
+ if (desc->hwdesc.lbea32 == hwdesc->lbea32 &&
+ desc->hwdesc.addr_low == hwdesc->addr_low) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (found) {
+ fsl_re_desc_done(desc);
+ list_move_tail(&desc->node, &re_chan->ack_q);
+ } else {
+ dev_err(re_chan->dev,
+ "found hwdesc not in sw queue, discard it\n");
+ }
+
+ oub_count = (re_chan->oub_count + 1) & FSL_RE_RING_SIZE_MASK;
+ re_chan->oub_count = oub_count;
+
+ out_be32(&re_chan->jrregs->oubring_job_rmvd,
+ FSL_RE_RMVD_JOB(1));
+ }
+ spin_unlock_irqrestore(&re_chan->desc_lock, flags);
+}
+
+/* Per Job Ring interrupt handler */
+static irqreturn_t fsl_re_isr(int irq, void *data)
+{
+ struct fsl_re_chan *re_chan;
+ u32 irqstate, status;
+
+ re_chan = dev_get_drvdata((struct device *)data);
+
+ irqstate = in_be32(&re_chan->jrregs->jr_interrupt_status);
+ if (!irqstate)
+ return IRQ_NONE;
+
+ /*
+ * There's no way in upper layer (read MD layer) to recover from
+ * error conditions except restart everything. In long term we
+ * need to do something more than just crashing
+ */
+ if (irqstate & FSL_RE_ERROR) {
+ status = in_be32(&re_chan->jrregs->jr_status);
+ dev_err(re_chan->dev, "chan error irqstate: %x, status: %x\n",
+ irqstate, status);
+ }
+
+ /* Clear interrupt */
+ out_be32(&re_chan->jrregs->jr_interrupt_status, FSL_RE_CLR_INTR);
+
+ tasklet_schedule(&re_chan->irqtask);
+
+ return IRQ_HANDLED;
+}
+
+static enum dma_status fsl_re_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ return dma_cookie_status(chan, cookie, txstate);
+}
+
+static void fill_cfd_frame(struct fsl_re_cmpnd_frame *cf, u8 index,
+ size_t length, dma_addr_t addr, bool final)
+{
+ u32 efrl = length & FSL_RE_CF_LENGTH_MASK;
+
+ efrl |= final << FSL_RE_CF_FINAL_SHIFT;
+ cf[index].efrl32 = efrl;
+ cf[index].addr_high = upper_32_bits(addr);
+ cf[index].addr_low = lower_32_bits(addr);
+}
+
+static struct fsl_re_desc *fsl_re_init_desc(struct fsl_re_chan *re_chan,
+ struct fsl_re_desc *desc,
+ void *cf, dma_addr_t paddr)
+{
+ desc->re_chan = re_chan;
+ desc->async_tx.tx_submit = fsl_re_tx_submit;
+ dma_async_tx_descriptor_init(&desc->async_tx, &re_chan->chan);
+ INIT_LIST_HEAD(&desc->node);
+
+ desc->hwdesc.fmt32 = FSL_RE_FRAME_FORMAT << FSL_RE_HWDESC_FMT_SHIFT;
+ desc->hwdesc.lbea32 = upper_32_bits(paddr);
+ desc->hwdesc.addr_low = lower_32_bits(paddr);
+ desc->cf_addr = cf;
+ desc->cf_paddr = paddr;
+
+ desc->cdb_addr = (void *)(cf + FSL_RE_CF_DESC_SIZE);
+ desc->cdb_paddr = paddr + FSL_RE_CF_DESC_SIZE;
+
+ return desc;
+}
+
+static struct fsl_re_desc *fsl_re_chan_alloc_desc(struct fsl_re_chan *re_chan,
+ unsigned long flags)
+{
+ struct fsl_re_desc *desc = NULL;
+ void *cf;
+ dma_addr_t paddr;
+ unsigned long lock_flag;
+
+ fsl_re_cleanup_descs(re_chan);
+
+ spin_lock_irqsave(&re_chan->desc_lock, lock_flag);
+ if (!list_empty(&re_chan->free_q)) {
+ /* take one desc from free_q */
+ desc = list_first_entry(&re_chan->free_q,
+ struct fsl_re_desc, node);
+ list_del(&desc->node);
+
+ desc->async_tx.flags = flags;
+ }
+ spin_unlock_irqrestore(&re_chan->desc_lock, lock_flag);
+
+ if (!desc) {
+ desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ cf = dma_pool_alloc(re_chan->re_dev->cf_desc_pool, GFP_NOWAIT,
+ &paddr);
+ if (!cf) {
+ kfree(desc);
+ return NULL;
+ }
+
+ desc = fsl_re_init_desc(re_chan, desc, cf, paddr);
+ desc->async_tx.flags = flags;
+
+ spin_lock_irqsave(&re_chan->desc_lock, lock_flag);
+ re_chan->alloc_count++;
+ spin_unlock_irqrestore(&re_chan->desc_lock, lock_flag);
+ }
+
+ return desc;
+}
+
+static struct dma_async_tx_descriptor *fsl_re_prep_dma_genq(
+ struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ unsigned long flags)
+{
+ struct fsl_re_chan *re_chan;
+ struct fsl_re_desc *desc;
+ struct fsl_re_xor_cdb *xor;
+ struct fsl_re_cmpnd_frame *cf;
+ u32 cdb;
+ unsigned int i, j;
+ unsigned int save_src_cnt = src_cnt;
+ int cont_q = 0;
+
+ re_chan = container_of(chan, struct fsl_re_chan, chan);
+ if (len > FSL_RE_MAX_DATA_LEN) {
+ dev_err(re_chan->dev, "genq tx length %zu, max length %d\n",
+ len, FSL_RE_MAX_DATA_LEN);
+ return NULL;
+ }
+
+ desc = fsl_re_chan_alloc_desc(re_chan, flags);
+ if (desc <= 0)
+ return NULL;
+
+ if (scf && (flags & DMA_PREP_CONTINUE)) {
+ cont_q = 1;
+ src_cnt += 1;
+ }
+
+ /* Filling xor CDB */
+ cdb = FSL_RE_XOR_OPCODE << FSL_RE_CDB_OPCODE_SHIFT;
+ cdb |= (src_cnt - 1) << FSL_RE_CDB_NRCS_SHIFT;
+ cdb |= FSL_RE_BLOCK_SIZE << FSL_RE_CDB_BLKSIZE_SHIFT;
+ cdb |= FSL_RE_INTR_ON_ERROR << FSL_RE_CDB_ERROR_SHIFT;
+ cdb |= FSL_RE_DATA_DEP << FSL_RE_CDB_DEPEND_SHIFT;
+ xor = desc->cdb_addr;
+ xor->cdb32 = cdb;
+
+ if (scf) {
+ /* compute q = src0*coef0^src1*coef1^..., * is GF(8) mult */
+ for (i = 0; i < save_src_cnt; i++)
+ xor->gfm[i] = scf[i];
+ if (cont_q)
+ xor->gfm[i++] = 1;
+ } else {
+ /* compute P, that is XOR all srcs */
+ for (i = 0; i < src_cnt; i++)
+ xor->gfm[i] = 1;
+ }
+
+ /* Filling frame 0 of compound frame descriptor with CDB */
+ cf = desc->cf_addr;
+ fill_cfd_frame(cf, 0, sizeof(*xor), desc->cdb_paddr, 0);
+
+ /* Fill CFD's 1st frame with dest buffer */
+ fill_cfd_frame(cf, 1, len, dest, 0);
+
+ /* Fill CFD's rest of the frames with source buffers */
+ for (i = 2, j = 0; j < save_src_cnt; i++, j++)
+ fill_cfd_frame(cf, i, len, src[j], 0);
+
+ if (cont_q)
+ fill_cfd_frame(cf, i++, len, dest, 0);
+
+ /* Setting the final bit in the last source buffer frame in CFD */
+ cf[i - 1].efrl32 |= 1 << FSL_RE_CF_FINAL_SHIFT;
+
+ return &desc->async_tx;
+}
+
+/*
+ * Prep function for P parity calculation.In RAID Engine terminology,
+ * XOR calculation is called GenQ calculation done through GenQ command
+ */
+static struct dma_async_tx_descriptor *fsl_re_prep_dma_xor(
+ struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ /* NULL let genq take all coef as 1 */
+ return fsl_re_prep_dma_genq(chan, dest, src, src_cnt, NULL, len, flags);
+}
+
+/*
+ * Prep function for P/Q parity calculation.In RAID Engine terminology,
+ * P/Q calculation is called GenQQ done through GenQQ command
+ */
+static struct dma_async_tx_descriptor *fsl_re_prep_dma_pq(
+ struct dma_chan *chan, dma_addr_t *dest, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ unsigned long flags)
+{
+ struct fsl_re_chan *re_chan;
+ struct fsl_re_desc *desc;
+ struct fsl_re_pq_cdb *pq;
+ struct fsl_re_cmpnd_frame *cf;
+ u32 cdb;
+ u8 *p;
+ int gfmq_len, i, j;
+ unsigned int save_src_cnt = src_cnt;
+
+ re_chan = container_of(chan, struct fsl_re_chan, chan);
+ if (len > FSL_RE_MAX_DATA_LEN) {
+ dev_err(re_chan->dev, "pq tx length is %zu, max length is %d\n",
+ len, FSL_RE_MAX_DATA_LEN);
+ return NULL;
+ }
+
+ /*
+ * RE requires at least 2 sources, if given only one source, we pass the
+ * second source same as the first one.
+ * With only one source, generating P is meaningless, only generate Q.
+ */
+ if (src_cnt == 1) {
+ struct dma_async_tx_descriptor *tx;
+ dma_addr_t dma_src[2];
+ unsigned char coef[2];
+
+ dma_src[0] = *src;
+ coef[0] = *scf;
+ dma_src[1] = *src;
+ coef[1] = 0;
+ tx = fsl_re_prep_dma_genq(chan, dest[1], dma_src, 2, coef, len,
+ flags);
+ if (tx)
+ desc = to_fsl_re_dma_desc(tx);
+
+ return tx;
+ }
+
+ /*
+ * During RAID6 array creation, Linux's MD layer gets P and Q
+ * calculated separately in two steps. But our RAID Engine has
+ * the capability to calculate both P and Q with a single command
+ * Hence to merge well with MD layer, we need to provide a hook
+ * here and call re_jq_prep_dma_genq() function
+ */
+
+ if (flags & DMA_PREP_PQ_DISABLE_P)
+ return fsl_re_prep_dma_genq(chan, dest[1], src, src_cnt,
+ scf, len, flags);
+
+ if (flags & DMA_PREP_CONTINUE)
+ src_cnt += 3;
+
+ desc = fsl_re_chan_alloc_desc(re_chan, flags);
+ if (desc <= 0)
+ return NULL;
+
+ /* Filling GenQQ CDB */
+ cdb = FSL_RE_PQ_OPCODE << FSL_RE_CDB_OPCODE_SHIFT;
+ cdb |= (src_cnt - 1) << FSL_RE_CDB_NRCS_SHIFT;
+ cdb |= FSL_RE_BLOCK_SIZE << FSL_RE_CDB_BLKSIZE_SHIFT;
+ cdb |= FSL_RE_BUFFER_OUTPUT << FSL_RE_CDB_BUFFER_SHIFT;
+ cdb |= FSL_RE_DATA_DEP << FSL_RE_CDB_DEPEND_SHIFT;
+
+ pq = desc->cdb_addr;
+ pq->cdb32 = cdb;
+
+ p = pq->gfm_q1;
+ /* Init gfm_q1[] */
+ for (i = 0; i < src_cnt; i++)
+ p[i] = 1;
+
+ /* Align gfm[] to 32bit */
+ gfmq_len = ALIGN(src_cnt, 4);
+
+ /* Init gfm_q2[] */
+ p += gfmq_len;
+ for (i = 0; i < src_cnt; i++)
+ p[i] = scf[i];
+
+ /* Filling frame 0 of compound frame descriptor with CDB */
+ cf = desc->cf_addr;
+ fill_cfd_frame(cf, 0, sizeof(struct fsl_re_pq_cdb), desc->cdb_paddr, 0);
+
+ /* Fill CFD's 1st & 2nd frame with dest buffers */
+ for (i = 1, j = 0; i < 3; i++, j++)
+ fill_cfd_frame(cf, i, len, dest[j], 0);
+
+ /* Fill CFD's rest of the frames with source buffers */
+ for (i = 3, j = 0; j < save_src_cnt; i++, j++)
+ fill_cfd_frame(cf, i, len, src[j], 0);
+
+ /* PQ computation continuation */
+ if (flags & DMA_PREP_CONTINUE) {
+ if (src_cnt - save_src_cnt == 3) {
+ p[save_src_cnt] = 0;
+ p[save_src_cnt + 1] = 0;
+ p[save_src_cnt + 2] = 1;
+ fill_cfd_frame(cf, i++, len, dest[0], 0);
+ fill_cfd_frame(cf, i++, len, dest[1], 0);
+ fill_cfd_frame(cf, i++, len, dest[1], 0);
+ } else {
+ dev_err(re_chan->dev, "PQ tx continuation error!\n");
+ return NULL;
+ }
+ }
+
+ /* Setting the final bit in the last source buffer frame in CFD */
+ cf[i - 1].efrl32 |= 1 << FSL_RE_CF_FINAL_SHIFT;
+
+ return &desc->async_tx;
+}
+
+/*
+ * Prep function for memcpy. In RAID Engine, memcpy is done through MOVE
+ * command. Logic of this function will need to be modified once multipage
+ * support is added in Linux's MD/ASYNC Layer
+ */
+static struct dma_async_tx_descriptor *fsl_re_prep_dma_memcpy(
+ struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct fsl_re_chan *re_chan;
+ struct fsl_re_desc *desc;
+ size_t length;
+ struct fsl_re_cmpnd_frame *cf;
+ struct fsl_re_move_cdb *move;
+ u32 cdb;
+
+ re_chan = container_of(chan, struct fsl_re_chan, chan);
+
+ if (len > FSL_RE_MAX_DATA_LEN) {
+ dev_err(re_chan->dev, "cp tx length is %zu, max length is %d\n",
+ len, FSL_RE_MAX_DATA_LEN);
+ return NULL;
+ }
+
+ desc = fsl_re_chan_alloc_desc(re_chan, flags);
+ if (desc <= 0)
+ return NULL;
+
+ /* Filling move CDB */
+ cdb = FSL_RE_MOVE_OPCODE << FSL_RE_CDB_OPCODE_SHIFT;
+ cdb |= FSL_RE_BLOCK_SIZE << FSL_RE_CDB_BLKSIZE_SHIFT;
+ cdb |= FSL_RE_INTR_ON_ERROR << FSL_RE_CDB_ERROR_SHIFT;
+ cdb |= FSL_RE_DATA_DEP << FSL_RE_CDB_DEPEND_SHIFT;
+
+ move = desc->cdb_addr;
+ move->cdb32 = cdb;
+
+ /* Filling frame 0 of CFD with move CDB */
+ cf = desc->cf_addr;
+ fill_cfd_frame(cf, 0, sizeof(*move), desc->cdb_paddr, 0);
+
+ length = min_t(size_t, len, FSL_RE_MAX_DATA_LEN);
+
+ /* Fill CFD's 1st frame with dest buffer */
+ fill_cfd_frame(cf, 1, length, dest, 0);
+
+ /* Fill CFD's 2nd frame with src buffer */
+ fill_cfd_frame(cf, 2, length, src, 1);
+
+ return &desc->async_tx;
+}
+
+static int fsl_re_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct fsl_re_chan *re_chan;
+ struct fsl_re_desc *desc;
+ void *cf;
+ dma_addr_t paddr;
+ int i;
+
+ re_chan = container_of(chan, struct fsl_re_chan, chan);
+ for (i = 0; i < FSL_RE_MIN_DESCS; i++) {
+ desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc)
+ break;
+
+ cf = dma_pool_alloc(re_chan->re_dev->cf_desc_pool, GFP_KERNEL,
+ &paddr);
+ if (!cf) {
+ kfree(desc);
+ break;
+ }
+
+ INIT_LIST_HEAD(&desc->node);
+ fsl_re_init_desc(re_chan, desc, cf, paddr);
+
+ list_add_tail(&desc->node, &re_chan->free_q);
+ re_chan->alloc_count++;
+ }
+ return re_chan->alloc_count;
+}
+
+static void fsl_re_free_chan_resources(struct dma_chan *chan)
+{
+ struct fsl_re_chan *re_chan;
+ struct fsl_re_desc *desc;
+
+ re_chan = container_of(chan, struct fsl_re_chan, chan);
+ while (re_chan->alloc_count--) {
+ desc = list_first_entry(&re_chan->free_q,
+ struct fsl_re_desc,
+ node);
+
+ list_del(&desc->node);
+ dma_pool_free(re_chan->re_dev->cf_desc_pool, desc->cf_addr,
+ desc->cf_paddr);
+ kfree(desc);
+ }
+
+ if (!list_empty(&re_chan->free_q))
+ dev_err(re_chan->dev, "chan resource cannot be cleaned!\n");
+}
+
+static int fsl_re_chan_probe(struct platform_device *ofdev,
+ struct device_node *np, u8 q, u32 off)
+{
+ struct device *dev, *chandev;
+ struct fsl_re_drv_private *re_priv;
+ struct fsl_re_chan *chan;
+ struct dma_device *dma_dev;
+ u32 ptr;
+ u32 status;
+ int ret = 0, rc;
+ struct platform_device *chan_ofdev;
+
+ dev = &ofdev->dev;
+ re_priv = dev_get_drvdata(dev);
+ dma_dev = &re_priv->dma_dev;
+
+ chan = devm_kzalloc(dev, sizeof(*chan), GFP_KERNEL);
+ if (!chan)
+ return -ENOMEM;
+
+ /* create platform device for chan node */
+ chan_ofdev = of_platform_device_create(np, NULL, dev);
+ if (!chan_ofdev) {
+ dev_err(dev, "Not able to create ofdev for jr %d\n", q);
+ ret = -EINVAL;
+ goto err_free;
+ }
+
+ /* read reg property from dts */
+ rc = of_property_read_u32(np, "reg", &ptr);
+ if (rc) {
+ dev_err(dev, "Reg property not found in jr %d\n", q);
+ ret = -ENODEV;
+ goto err_free;
+ }
+
+ chan->jrregs = (struct fsl_re_chan_cfg *)((u8 *)re_priv->re_regs +
+ off + ptr);
+
+ /* read irq property from dts */
+ chan->irq = irq_of_parse_and_map(np, 0);
+ if (!chan->irq) {
+ dev_err(dev, "No IRQ defined for JR %d\n", q);
+ ret = -ENODEV;
+ goto err_free;
+ }
+
+ snprintf(chan->name, sizeof(chan->name), "re_jr%02d", q);
+
+ chandev = &chan_ofdev->dev;
+ tasklet_setup(&chan->irqtask, fsl_re_dequeue);
+
+ ret = request_irq(chan->irq, fsl_re_isr, 0, chan->name, chandev);
+ if (ret) {
+ dev_err(dev, "Unable to register interrupt for JR %d\n", q);
+ ret = -EINVAL;
+ goto err_free;
+ }
+
+ re_priv->re_jrs[q] = chan;
+ chan->chan.device = dma_dev;
+ chan->chan.private = chan;
+ chan->dev = chandev;
+ chan->re_dev = re_priv;
+
+ spin_lock_init(&chan->desc_lock);
+ INIT_LIST_HEAD(&chan->ack_q);
+ INIT_LIST_HEAD(&chan->active_q);
+ INIT_LIST_HEAD(&chan->submit_q);
+ INIT_LIST_HEAD(&chan->free_q);
+
+ chan->inb_ring_virt_addr = dma_pool_alloc(chan->re_dev->hw_desc_pool,
+ GFP_KERNEL, &chan->inb_phys_addr);
+ if (!chan->inb_ring_virt_addr) {
+ dev_err(dev, "No dma memory for inb_ring_virt_addr\n");
+ ret = -ENOMEM;
+ goto err_free;
+ }
+
+ chan->oub_ring_virt_addr = dma_pool_alloc(chan->re_dev->hw_desc_pool,
+ GFP_KERNEL, &chan->oub_phys_addr);
+ if (!chan->oub_ring_virt_addr) {
+ dev_err(dev, "No dma memory for oub_ring_virt_addr\n");
+ ret = -ENOMEM;
+ goto err_free_1;
+ }
+
+ /* Program the Inbound/Outbound ring base addresses and size */
+ out_be32(&chan->jrregs->inbring_base_h,
+ chan->inb_phys_addr & FSL_RE_ADDR_BIT_MASK);
+ out_be32(&chan->jrregs->oubring_base_h,
+ chan->oub_phys_addr & FSL_RE_ADDR_BIT_MASK);
+ out_be32(&chan->jrregs->inbring_base_l,
+ chan->inb_phys_addr >> FSL_RE_ADDR_BIT_SHIFT);
+ out_be32(&chan->jrregs->oubring_base_l,
+ chan->oub_phys_addr >> FSL_RE_ADDR_BIT_SHIFT);
+ out_be32(&chan->jrregs->inbring_size,
+ FSL_RE_RING_SIZE << FSL_RE_RING_SIZE_SHIFT);
+ out_be32(&chan->jrregs->oubring_size,
+ FSL_RE_RING_SIZE << FSL_RE_RING_SIZE_SHIFT);
+
+ /* Read LIODN value from u-boot */
+ status = in_be32(&chan->jrregs->jr_config_1) & FSL_RE_REG_LIODN_MASK;
+
+ /* Program the CFG reg */
+ out_be32(&chan->jrregs->jr_config_1,
+ FSL_RE_CFG1_CBSI | FSL_RE_CFG1_CBS0 | status);
+
+ dev_set_drvdata(chandev, chan);
+
+ /* Enable RE/CHAN */
+ out_be32(&chan->jrregs->jr_command, FSL_RE_ENABLE);
+
+ return 0;
+
+err_free_1:
+ dma_pool_free(chan->re_dev->hw_desc_pool, chan->inb_ring_virt_addr,
+ chan->inb_phys_addr);
+err_free:
+ return ret;
+}
+
+/* Probe function for RAID Engine */
+static int fsl_re_probe(struct platform_device *ofdev)
+{
+ struct fsl_re_drv_private *re_priv;
+ struct device_node *np;
+ struct device_node *child;
+ u32 off;
+ u8 ridx = 0;
+ struct dma_device *dma_dev;
+ struct resource *res;
+ int rc;
+ struct device *dev = &ofdev->dev;
+
+ re_priv = devm_kzalloc(dev, sizeof(*re_priv), GFP_KERNEL);
+ if (!re_priv)
+ return -ENOMEM;
+
+ res = platform_get_resource(ofdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ /* IOMAP the entire RAID Engine region */
+ re_priv->re_regs = devm_ioremap(dev, res->start, resource_size(res));
+ if (!re_priv->re_regs)
+ return -EBUSY;
+
+ /* Program the RE mode */
+ out_be32(&re_priv->re_regs->global_config, FSL_RE_NON_DPAA_MODE);
+
+ /* Program Galois Field polynomial */
+ out_be32(&re_priv->re_regs->galois_field_config, FSL_RE_GFM_POLY);
+
+ dev_info(dev, "version %x, mode %x, gfp %x\n",
+ in_be32(&re_priv->re_regs->re_version_id),
+ in_be32(&re_priv->re_regs->global_config),
+ in_be32(&re_priv->re_regs->galois_field_config));
+
+ dma_dev = &re_priv->dma_dev;
+ dma_dev->dev = dev;
+ INIT_LIST_HEAD(&dma_dev->channels);
+ dma_set_mask(dev, DMA_BIT_MASK(40));
+
+ dma_dev->device_alloc_chan_resources = fsl_re_alloc_chan_resources;
+ dma_dev->device_tx_status = fsl_re_tx_status;
+ dma_dev->device_issue_pending = fsl_re_issue_pending;
+
+ dma_dev->max_xor = FSL_RE_MAX_XOR_SRCS;
+ dma_dev->device_prep_dma_xor = fsl_re_prep_dma_xor;
+ dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+
+ dma_dev->max_pq = FSL_RE_MAX_PQ_SRCS;
+ dma_dev->device_prep_dma_pq = fsl_re_prep_dma_pq;
+ dma_cap_set(DMA_PQ, dma_dev->cap_mask);
+
+ dma_dev->device_prep_dma_memcpy = fsl_re_prep_dma_memcpy;
+ dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+
+ dma_dev->device_free_chan_resources = fsl_re_free_chan_resources;
+
+ re_priv->total_chans = 0;
+
+ re_priv->cf_desc_pool = dmam_pool_create("fsl_re_cf_desc_pool", dev,
+ FSL_RE_CF_CDB_SIZE,
+ FSL_RE_CF_CDB_ALIGN, 0);
+
+ if (!re_priv->cf_desc_pool) {
+ dev_err(dev, "No memory for fsl re_cf desc pool\n");
+ return -ENOMEM;
+ }
+
+ re_priv->hw_desc_pool = dmam_pool_create("fsl_re_hw_desc_pool", dev,
+ sizeof(struct fsl_re_hw_desc) * FSL_RE_RING_SIZE,
+ FSL_RE_FRAME_ALIGN, 0);
+ if (!re_priv->hw_desc_pool) {
+ dev_err(dev, "No memory for fsl re_hw desc pool\n");
+ return -ENOMEM;
+ }
+
+ dev_set_drvdata(dev, re_priv);
+
+ /* Parse Device tree to find out the total number of JQs present */
+ for_each_compatible_node(np, NULL, "fsl,raideng-v1.0-job-queue") {
+ rc = of_property_read_u32(np, "reg", &off);
+ if (rc) {
+ dev_err(dev, "Reg property not found in JQ node\n");
+ of_node_put(np);
+ return -ENODEV;
+ }
+ /* Find out the Job Rings present under each JQ */
+ for_each_child_of_node(np, child) {
+ rc = of_device_is_compatible(child,
+ "fsl,raideng-v1.0-job-ring");
+ if (rc) {
+ fsl_re_chan_probe(ofdev, child, ridx++, off);
+ re_priv->total_chans++;
+ }
+ }
+ }
+
+ dma_async_device_register(dma_dev);
+
+ return 0;
+}
+
+static void fsl_re_remove_chan(struct fsl_re_chan *chan)
+{
+ tasklet_kill(&chan->irqtask);
+
+ dma_pool_free(chan->re_dev->hw_desc_pool, chan->inb_ring_virt_addr,
+ chan->inb_phys_addr);
+
+ dma_pool_free(chan->re_dev->hw_desc_pool, chan->oub_ring_virt_addr,
+ chan->oub_phys_addr);
+}
+
+static int fsl_re_remove(struct platform_device *ofdev)
+{
+ struct fsl_re_drv_private *re_priv;
+ struct device *dev;
+ int i;
+
+ dev = &ofdev->dev;
+ re_priv = dev_get_drvdata(dev);
+
+ /* Cleanup chan related memory areas */
+ for (i = 0; i < re_priv->total_chans; i++)
+ fsl_re_remove_chan(re_priv->re_jrs[i]);
+
+ /* Unregister the driver */
+ dma_async_device_unregister(&re_priv->dma_dev);
+
+ return 0;
+}
+
+static const struct of_device_id fsl_re_ids[] = {
+ { .compatible = "fsl,raideng-v1.0", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, fsl_re_ids);
+
+static struct platform_driver fsl_re_driver = {
+ .driver = {
+ .name = "fsl-raideng",
+ .of_match_table = fsl_re_ids,
+ },
+ .probe = fsl_re_probe,
+ .remove = fsl_re_remove,
+};
+
+module_platform_driver(fsl_re_driver);
+
+MODULE_AUTHOR("Harninder Rai <harninder.rai@freescale.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Freescale RAID Engine Device Driver");
diff --git a/drivers/dma/fsl_raid.h b/drivers/dma/fsl_raid.h
new file mode 100644
index 000000000..69d743c04
--- /dev/null
+++ b/drivers/dma/fsl_raid.h
@@ -0,0 +1,306 @@
+/*
+ * drivers/dma/fsl_raid.h
+ *
+ * Freescale RAID Engine device driver
+ *
+ * Author:
+ * Harninder Rai <harninder.rai@freescale.com>
+ * Naveen Burmi <naveenburmi@freescale.com>
+ *
+ * Rewrite:
+ * Xuelin Shi <xuelin.shi@freescale.com>
+
+ * Copyright (c) 2010-2012 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#define FSL_RE_MAX_CHANS 4
+#define FSL_RE_DPAA_MODE BIT(30)
+#define FSL_RE_NON_DPAA_MODE BIT(31)
+#define FSL_RE_GFM_POLY 0x1d000000
+#define FSL_RE_ADD_JOB(x) ((x) << 16)
+#define FSL_RE_RMVD_JOB(x) ((x) << 16)
+#define FSL_RE_CFG1_CBSI 0x08000000
+#define FSL_RE_CFG1_CBS0 0x00080000
+#define FSL_RE_SLOT_FULL_SHIFT 8
+#define FSL_RE_SLOT_FULL(x) ((x) >> FSL_RE_SLOT_FULL_SHIFT)
+#define FSL_RE_SLOT_AVAIL_SHIFT 8
+#define FSL_RE_SLOT_AVAIL(x) ((x) >> FSL_RE_SLOT_AVAIL_SHIFT)
+#define FSL_RE_PQ_OPCODE 0x1B
+#define FSL_RE_XOR_OPCODE 0x1A
+#define FSL_RE_MOVE_OPCODE 0x8
+#define FSL_RE_FRAME_ALIGN 16
+#define FSL_RE_BLOCK_SIZE 0x3 /* 4096 bytes */
+#define FSL_RE_CACHEABLE_IO 0x0
+#define FSL_RE_BUFFER_OUTPUT 0x0
+#define FSL_RE_INTR_ON_ERROR 0x1
+#define FSL_RE_DATA_DEP 0x1
+#define FSL_RE_ENABLE_DPI 0x0
+#define FSL_RE_RING_SIZE 0x400
+#define FSL_RE_RING_SIZE_MASK (FSL_RE_RING_SIZE - 1)
+#define FSL_RE_RING_SIZE_SHIFT 8
+#define FSL_RE_ADDR_BIT_SHIFT 4
+#define FSL_RE_ADDR_BIT_MASK (BIT(FSL_RE_ADDR_BIT_SHIFT) - 1)
+#define FSL_RE_ERROR 0x40000000
+#define FSL_RE_INTR 0x80000000
+#define FSL_RE_CLR_INTR 0x80000000
+#define FSL_RE_PAUSE 0x80000000
+#define FSL_RE_ENABLE 0x80000000
+#define FSL_RE_REG_LIODN_MASK 0x00000FFF
+
+#define FSL_RE_CDB_OPCODE_MASK 0xF8000000
+#define FSL_RE_CDB_OPCODE_SHIFT 27
+#define FSL_RE_CDB_EXCLEN_MASK 0x03000000
+#define FSL_RE_CDB_EXCLEN_SHIFT 24
+#define FSL_RE_CDB_EXCLQ1_MASK 0x00F00000
+#define FSL_RE_CDB_EXCLQ1_SHIFT 20
+#define FSL_RE_CDB_EXCLQ2_MASK 0x000F0000
+#define FSL_RE_CDB_EXCLQ2_SHIFT 16
+#define FSL_RE_CDB_BLKSIZE_MASK 0x0000C000
+#define FSL_RE_CDB_BLKSIZE_SHIFT 14
+#define FSL_RE_CDB_CACHE_MASK 0x00003000
+#define FSL_RE_CDB_CACHE_SHIFT 12
+#define FSL_RE_CDB_BUFFER_MASK 0x00000800
+#define FSL_RE_CDB_BUFFER_SHIFT 11
+#define FSL_RE_CDB_ERROR_MASK 0x00000400
+#define FSL_RE_CDB_ERROR_SHIFT 10
+#define FSL_RE_CDB_NRCS_MASK 0x0000003C
+#define FSL_RE_CDB_NRCS_SHIFT 6
+#define FSL_RE_CDB_DEPEND_MASK 0x00000008
+#define FSL_RE_CDB_DEPEND_SHIFT 3
+#define FSL_RE_CDB_DPI_MASK 0x00000004
+#define FSL_RE_CDB_DPI_SHIFT 2
+
+/*
+ * the largest cf block is 19*sizeof(struct cmpnd_frame), which is 304 bytes.
+ * here 19 = 1(cdb)+2(dest)+16(src), align to 64bytes, that is 320 bytes.
+ * the largest cdb block: struct pq_cdb which is 180 bytes, adding to cf block
+ * 320+180=500, align to 64bytes, that is 512 bytes.
+ */
+#define FSL_RE_CF_DESC_SIZE 320
+#define FSL_RE_CF_CDB_SIZE 512
+#define FSL_RE_CF_CDB_ALIGN 64
+
+struct fsl_re_ctrl {
+ /* General Configuration Registers */
+ __be32 global_config; /* Global Configuration Register */
+ u8 rsvd1[4];
+ __be32 galois_field_config; /* Galois Field Configuration Register */
+ u8 rsvd2[4];
+ __be32 jq_wrr_config; /* WRR Configuration register */
+ u8 rsvd3[4];
+ __be32 crc_config; /* CRC Configuration register */
+ u8 rsvd4[228];
+ __be32 system_reset; /* System Reset Register */
+ u8 rsvd5[252];
+ __be32 global_status; /* Global Status Register */
+ u8 rsvd6[832];
+ __be32 re_liodn_base; /* LIODN Base Register */
+ u8 rsvd7[1712];
+ __be32 re_version_id; /* Version ID register of RE */
+ __be32 re_version_id_2; /* Version ID 2 register of RE */
+ u8 rsvd8[512];
+ __be32 host_config; /* Host I/F Configuration Register */
+};
+
+struct fsl_re_chan_cfg {
+ /* Registers for JR interface */
+ __be32 jr_config_0; /* Job Queue Configuration 0 Register */
+ __be32 jr_config_1; /* Job Queue Configuration 1 Register */
+ __be32 jr_interrupt_status; /* Job Queue Interrupt Status Register */
+ u8 rsvd1[4];
+ __be32 jr_command; /* Job Queue Command Register */
+ u8 rsvd2[4];
+ __be32 jr_status; /* Job Queue Status Register */
+ u8 rsvd3[228];
+
+ /* Input Ring */
+ __be32 inbring_base_h; /* Inbound Ring Base Address Register - High */
+ __be32 inbring_base_l; /* Inbound Ring Base Address Register - Low */
+ __be32 inbring_size; /* Inbound Ring Size Register */
+ u8 rsvd4[4];
+ __be32 inbring_slot_avail; /* Inbound Ring Slot Available Register */
+ u8 rsvd5[4];
+ __be32 inbring_add_job; /* Inbound Ring Add Job Register */
+ u8 rsvd6[4];
+ __be32 inbring_cnsmr_indx; /* Inbound Ring Consumer Index Register */
+ u8 rsvd7[220];
+
+ /* Output Ring */
+ __be32 oubring_base_h; /* Outbound Ring Base Address Register - High */
+ __be32 oubring_base_l; /* Outbound Ring Base Address Register - Low */
+ __be32 oubring_size; /* Outbound Ring Size Register */
+ u8 rsvd8[4];
+ __be32 oubring_job_rmvd; /* Outbound Ring Job Removed Register */
+ u8 rsvd9[4];
+ __be32 oubring_slot_full; /* Outbound Ring Slot Full Register */
+ u8 rsvd10[4];
+ __be32 oubring_prdcr_indx; /* Outbound Ring Producer Index */
+};
+
+/*
+ * Command Descriptor Block (CDB) for unicast move command.
+ * In RAID Engine terms, memcpy is done through move command
+ */
+struct fsl_re_move_cdb {
+ __be32 cdb32;
+};
+
+/* Data protection/integrity related fields */
+#define FSL_RE_DPI_APPS_MASK 0xC0000000
+#define FSL_RE_DPI_APPS_SHIFT 30
+#define FSL_RE_DPI_REF_MASK 0x30000000
+#define FSL_RE_DPI_REF_SHIFT 28
+#define FSL_RE_DPI_GUARD_MASK 0x0C000000
+#define FSL_RE_DPI_GUARD_SHIFT 26
+#define FSL_RE_DPI_ATTR_MASK 0x03000000
+#define FSL_RE_DPI_ATTR_SHIFT 24
+#define FSL_RE_DPI_META_MASK 0x0000FFFF
+
+struct fsl_re_dpi {
+ __be32 dpi32;
+ __be32 ref;
+};
+
+/*
+ * CDB for GenQ command. In RAID Engine terminology, XOR is
+ * done through this command
+ */
+struct fsl_re_xor_cdb {
+ __be32 cdb32;
+ u8 gfm[16];
+ struct fsl_re_dpi dpi_dest_spec;
+ struct fsl_re_dpi dpi_src_spec[16];
+};
+
+/* CDB for no-op command */
+struct fsl_re_noop_cdb {
+ __be32 cdb32;
+};
+
+/*
+ * CDB for GenQQ command. In RAID Engine terminology, P/Q is
+ * done through this command
+ */
+struct fsl_re_pq_cdb {
+ __be32 cdb32;
+ u8 gfm_q1[16];
+ u8 gfm_q2[16];
+ struct fsl_re_dpi dpi_dest_spec[2];
+ struct fsl_re_dpi dpi_src_spec[16];
+};
+
+/* Compound frame */
+#define FSL_RE_CF_ADDR_HIGH_MASK 0x000000FF
+#define FSL_RE_CF_EXT_MASK 0x80000000
+#define FSL_RE_CF_EXT_SHIFT 31
+#define FSL_RE_CF_FINAL_MASK 0x40000000
+#define FSL_RE_CF_FINAL_SHIFT 30
+#define FSL_RE_CF_LENGTH_MASK 0x000FFFFF
+#define FSL_RE_CF_BPID_MASK 0x00FF0000
+#define FSL_RE_CF_BPID_SHIFT 16
+#define FSL_RE_CF_OFFSET_MASK 0x00001FFF
+
+struct fsl_re_cmpnd_frame {
+ __be32 addr_high;
+ __be32 addr_low;
+ __be32 efrl32;
+ __be32 rbro32;
+};
+
+/* Frame descriptor */
+#define FSL_RE_HWDESC_LIODN_MASK 0x3F000000
+#define FSL_RE_HWDESC_LIODN_SHIFT 24
+#define FSL_RE_HWDESC_BPID_MASK 0x00FF0000
+#define FSL_RE_HWDESC_BPID_SHIFT 16
+#define FSL_RE_HWDESC_ELIODN_MASK 0x0000F000
+#define FSL_RE_HWDESC_ELIODN_SHIFT 12
+#define FSL_RE_HWDESC_FMT_SHIFT 29
+#define FSL_RE_HWDESC_FMT_MASK (0x3 << FSL_RE_HWDESC_FMT_SHIFT)
+
+struct fsl_re_hw_desc {
+ __be32 lbea32;
+ __be32 addr_low;
+ __be32 fmt32;
+ __be32 status;
+};
+
+/* Raid Engine device private data */
+struct fsl_re_drv_private {
+ u8 total_chans;
+ struct dma_device dma_dev;
+ struct fsl_re_ctrl *re_regs;
+ struct fsl_re_chan *re_jrs[FSL_RE_MAX_CHANS];
+ struct dma_pool *cf_desc_pool;
+ struct dma_pool *hw_desc_pool;
+};
+
+/* Per job ring data structure */
+struct fsl_re_chan {
+ char name[16];
+ spinlock_t desc_lock; /* queue lock */
+ struct list_head ack_q; /* wait to acked queue */
+ struct list_head active_q; /* already issued on hw, not completed */
+ struct list_head submit_q;
+ struct list_head free_q; /* alloc available queue */
+ struct device *dev;
+ struct fsl_re_drv_private *re_dev;
+ struct dma_chan chan;
+ struct fsl_re_chan_cfg *jrregs;
+ int irq;
+ struct tasklet_struct irqtask;
+ u32 alloc_count;
+
+ /* hw descriptor ring for inbound queue*/
+ dma_addr_t inb_phys_addr;
+ struct fsl_re_hw_desc *inb_ring_virt_addr;
+ u32 inb_count;
+
+ /* hw descriptor ring for outbound queue */
+ dma_addr_t oub_phys_addr;
+ struct fsl_re_hw_desc *oub_ring_virt_addr;
+ u32 oub_count;
+};
+
+/* Async transaction descriptor */
+struct fsl_re_desc {
+ struct dma_async_tx_descriptor async_tx;
+ struct list_head node;
+ struct fsl_re_hw_desc hwdesc;
+ struct fsl_re_chan *re_chan;
+
+ /* hwdesc will point to cf_addr */
+ void *cf_addr;
+ dma_addr_t cf_paddr;
+
+ void *cdb_addr;
+ dma_addr_t cdb_paddr;
+ int status;
+};
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
new file mode 100644
index 000000000..f8459cc53
--- /dev/null
+++ b/drivers/dma/fsldma.c
@@ -0,0 +1,1430 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale MPC85xx, MPC83xx DMA Engine support
+ *
+ * Copyright (C) 2007-2010 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author:
+ * Zhang Wei <wei.zhang@freescale.com>, Jul 2007
+ * Ebony Zhu <ebony.zhu@freescale.com>, May 2007
+ *
+ * Description:
+ * DMA engine driver for Freescale MPC8540 DMA controller, which is
+ * also fit for MPC8560, MPC8555, MPC8548, MPC8641, and etc.
+ * The support for MPC8349 DMA controller is also added.
+ *
+ * This driver instructs the DMA controller to issue the PCI Read Multiple
+ * command for PCI read operations, instead of using the default PCI Read Line
+ * command. Please be aware that this setting may result in read pre-fetching
+ * on some platforms.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/fsldma.h>
+#include "dmaengine.h"
+#include "fsldma.h"
+
+#define chan_dbg(chan, fmt, arg...) \
+ dev_dbg(chan->dev, "%s: " fmt, chan->name, ##arg)
+#define chan_err(chan, fmt, arg...) \
+ dev_err(chan->dev, "%s: " fmt, chan->name, ##arg)
+
+static const char msg_ld_oom[] = "No free memory for link descriptor";
+
+/*
+ * Register Helpers
+ */
+
+static void set_sr(struct fsldma_chan *chan, u32 val)
+{
+ FSL_DMA_OUT(chan, &chan->regs->sr, val, 32);
+}
+
+static u32 get_sr(struct fsldma_chan *chan)
+{
+ return FSL_DMA_IN(chan, &chan->regs->sr, 32);
+}
+
+static void set_mr(struct fsldma_chan *chan, u32 val)
+{
+ FSL_DMA_OUT(chan, &chan->regs->mr, val, 32);
+}
+
+static u32 get_mr(struct fsldma_chan *chan)
+{
+ return FSL_DMA_IN(chan, &chan->regs->mr, 32);
+}
+
+static void set_cdar(struct fsldma_chan *chan, dma_addr_t addr)
+{
+ FSL_DMA_OUT(chan, &chan->regs->cdar, addr | FSL_DMA_SNEN, 64);
+}
+
+static dma_addr_t get_cdar(struct fsldma_chan *chan)
+{
+ return FSL_DMA_IN(chan, &chan->regs->cdar, 64) & ~FSL_DMA_SNEN;
+}
+
+static void set_bcr(struct fsldma_chan *chan, u32 val)
+{
+ FSL_DMA_OUT(chan, &chan->regs->bcr, val, 32);
+}
+
+static u32 get_bcr(struct fsldma_chan *chan)
+{
+ return FSL_DMA_IN(chan, &chan->regs->bcr, 32);
+}
+
+/*
+ * Descriptor Helpers
+ */
+
+static void set_desc_cnt(struct fsldma_chan *chan,
+ struct fsl_dma_ld_hw *hw, u32 count)
+{
+ hw->count = CPU_TO_DMA(chan, count, 32);
+}
+
+static void set_desc_src(struct fsldma_chan *chan,
+ struct fsl_dma_ld_hw *hw, dma_addr_t src)
+{
+ u64 snoop_bits;
+
+ snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+ ? ((u64)FSL_DMA_SATR_SREADTYPE_SNOOP_READ << 32) : 0;
+ hw->src_addr = CPU_TO_DMA(chan, snoop_bits | src, 64);
+}
+
+static void set_desc_dst(struct fsldma_chan *chan,
+ struct fsl_dma_ld_hw *hw, dma_addr_t dst)
+{
+ u64 snoop_bits;
+
+ snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+ ? ((u64)FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE << 32) : 0;
+ hw->dst_addr = CPU_TO_DMA(chan, snoop_bits | dst, 64);
+}
+
+static void set_desc_next(struct fsldma_chan *chan,
+ struct fsl_dma_ld_hw *hw, dma_addr_t next)
+{
+ u64 snoop_bits;
+
+ snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
+ ? FSL_DMA_SNEN : 0;
+ hw->next_ln_addr = CPU_TO_DMA(chan, snoop_bits | next, 64);
+}
+
+static void set_ld_eol(struct fsldma_chan *chan, struct fsl_desc_sw *desc)
+{
+ u64 snoop_bits;
+
+ snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
+ ? FSL_DMA_SNEN : 0;
+
+ desc->hw.next_ln_addr = CPU_TO_DMA(chan,
+ DMA_TO_CPU(chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL
+ | snoop_bits, 64);
+}
+
+/*
+ * DMA Engine Hardware Control Helpers
+ */
+
+static void dma_init(struct fsldma_chan *chan)
+{
+ /* Reset the channel */
+ set_mr(chan, 0);
+
+ switch (chan->feature & FSL_DMA_IP_MASK) {
+ case FSL_DMA_IP_85XX:
+ /* Set the channel to below modes:
+ * EIE - Error interrupt enable
+ * EOLNIE - End of links interrupt enable
+ * BWC - Bandwidth sharing among channels
+ */
+ set_mr(chan, FSL_DMA_MR_BWC | FSL_DMA_MR_EIE
+ | FSL_DMA_MR_EOLNIE);
+ break;
+ case FSL_DMA_IP_83XX:
+ /* Set the channel to below modes:
+ * EOTIE - End-of-transfer interrupt enable
+ * PRC_RM - PCI read multiple
+ */
+ set_mr(chan, FSL_DMA_MR_EOTIE | FSL_DMA_MR_PRC_RM);
+ break;
+ }
+}
+
+static int dma_is_idle(struct fsldma_chan *chan)
+{
+ u32 sr = get_sr(chan);
+ return (!(sr & FSL_DMA_SR_CB)) || (sr & FSL_DMA_SR_CH);
+}
+
+/*
+ * Start the DMA controller
+ *
+ * Preconditions:
+ * - the CDAR register must point to the start descriptor
+ * - the MRn[CS] bit must be cleared
+ */
+static void dma_start(struct fsldma_chan *chan)
+{
+ u32 mode;
+
+ mode = get_mr(chan);
+
+ if (chan->feature & FSL_DMA_CHAN_PAUSE_EXT) {
+ set_bcr(chan, 0);
+ mode |= FSL_DMA_MR_EMP_EN;
+ } else {
+ mode &= ~FSL_DMA_MR_EMP_EN;
+ }
+
+ if (chan->feature & FSL_DMA_CHAN_START_EXT) {
+ mode |= FSL_DMA_MR_EMS_EN;
+ } else {
+ mode &= ~FSL_DMA_MR_EMS_EN;
+ mode |= FSL_DMA_MR_CS;
+ }
+
+ set_mr(chan, mode);
+}
+
+static void dma_halt(struct fsldma_chan *chan)
+{
+ u32 mode;
+ int i;
+
+ /* read the mode register */
+ mode = get_mr(chan);
+
+ /*
+ * The 85xx controller supports channel abort, which will stop
+ * the current transfer. On 83xx, this bit is the transfer error
+ * mask bit, which should not be changed.
+ */
+ if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+ mode |= FSL_DMA_MR_CA;
+ set_mr(chan, mode);
+
+ mode &= ~FSL_DMA_MR_CA;
+ }
+
+ /* stop the DMA controller */
+ mode &= ~(FSL_DMA_MR_CS | FSL_DMA_MR_EMS_EN);
+ set_mr(chan, mode);
+
+ /* wait for the DMA controller to become idle */
+ for (i = 0; i < 100; i++) {
+ if (dma_is_idle(chan))
+ return;
+
+ udelay(10);
+ }
+
+ if (!dma_is_idle(chan))
+ chan_err(chan, "DMA halt timeout!\n");
+}
+
+/**
+ * fsl_chan_set_src_loop_size - Set source address hold transfer size
+ * @chan : Freescale DMA channel
+ * @size : Address loop size, 0 for disable loop
+ *
+ * The set source address hold transfer size. The source
+ * address hold or loop transfer size is when the DMA transfer
+ * data from source address (SA), if the loop size is 4, the DMA will
+ * read data from SA, SA + 1, SA + 2, SA + 3, then loop back to SA,
+ * SA + 1 ... and so on.
+ */
+static void fsl_chan_set_src_loop_size(struct fsldma_chan *chan, int size)
+{
+ u32 mode;
+
+ mode = get_mr(chan);
+
+ switch (size) {
+ case 0:
+ mode &= ~FSL_DMA_MR_SAHE;
+ break;
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ mode &= ~FSL_DMA_MR_SAHTS_MASK;
+ mode |= FSL_DMA_MR_SAHE | (__ilog2(size) << 14);
+ break;
+ }
+
+ set_mr(chan, mode);
+}
+
+/**
+ * fsl_chan_set_dst_loop_size - Set destination address hold transfer size
+ * @chan : Freescale DMA channel
+ * @size : Address loop size, 0 for disable loop
+ *
+ * The set destination address hold transfer size. The destination
+ * address hold or loop transfer size is when the DMA transfer
+ * data to destination address (TA), if the loop size is 4, the DMA will
+ * write data to TA, TA + 1, TA + 2, TA + 3, then loop back to TA,
+ * TA + 1 ... and so on.
+ */
+static void fsl_chan_set_dst_loop_size(struct fsldma_chan *chan, int size)
+{
+ u32 mode;
+
+ mode = get_mr(chan);
+
+ switch (size) {
+ case 0:
+ mode &= ~FSL_DMA_MR_DAHE;
+ break;
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ mode &= ~FSL_DMA_MR_DAHTS_MASK;
+ mode |= FSL_DMA_MR_DAHE | (__ilog2(size) << 16);
+ break;
+ }
+
+ set_mr(chan, mode);
+}
+
+/**
+ * fsl_chan_set_request_count - Set DMA Request Count for external control
+ * @chan : Freescale DMA channel
+ * @size : Number of bytes to transfer in a single request
+ *
+ * The Freescale DMA channel can be controlled by the external signal DREQ#.
+ * The DMA request count is how many bytes are allowed to transfer before
+ * pausing the channel, after which a new assertion of DREQ# resumes channel
+ * operation.
+ *
+ * A size of 0 disables external pause control. The maximum size is 1024.
+ */
+static void fsl_chan_set_request_count(struct fsldma_chan *chan, int size)
+{
+ u32 mode;
+
+ BUG_ON(size > 1024);
+
+ mode = get_mr(chan);
+ mode &= ~FSL_DMA_MR_BWC_MASK;
+ mode |= (__ilog2(size) << 24) & FSL_DMA_MR_BWC_MASK;
+
+ set_mr(chan, mode);
+}
+
+/**
+ * fsl_chan_toggle_ext_pause - Toggle channel external pause status
+ * @chan : Freescale DMA channel
+ * @enable : 0 is disabled, 1 is enabled.
+ *
+ * The Freescale DMA channel can be controlled by the external signal DREQ#.
+ * The DMA Request Count feature should be used in addition to this feature
+ * to set the number of bytes to transfer before pausing the channel.
+ */
+static void fsl_chan_toggle_ext_pause(struct fsldma_chan *chan, int enable)
+{
+ if (enable)
+ chan->feature |= FSL_DMA_CHAN_PAUSE_EXT;
+ else
+ chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT;
+}
+
+/**
+ * fsl_chan_toggle_ext_start - Toggle channel external start status
+ * @chan : Freescale DMA channel
+ * @enable : 0 is disabled, 1 is enabled.
+ *
+ * If enable the external start, the channel can be started by an
+ * external DMA start pin. So the dma_start() does not start the
+ * transfer immediately. The DMA channel will wait for the
+ * control pin asserted.
+ */
+static void fsl_chan_toggle_ext_start(struct fsldma_chan *chan, int enable)
+{
+ if (enable)
+ chan->feature |= FSL_DMA_CHAN_START_EXT;
+ else
+ chan->feature &= ~FSL_DMA_CHAN_START_EXT;
+}
+
+int fsl_dma_external_start(struct dma_chan *dchan, int enable)
+{
+ struct fsldma_chan *chan;
+
+ if (!dchan)
+ return -EINVAL;
+
+ chan = to_fsl_chan(dchan);
+
+ fsl_chan_toggle_ext_start(chan, enable);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fsl_dma_external_start);
+
+static void append_ld_queue(struct fsldma_chan *chan, struct fsl_desc_sw *desc)
+{
+ struct fsl_desc_sw *tail = to_fsl_desc(chan->ld_pending.prev);
+
+ if (list_empty(&chan->ld_pending))
+ goto out_splice;
+
+ /*
+ * Add the hardware descriptor to the chain of hardware descriptors
+ * that already exists in memory.
+ *
+ * This will un-set the EOL bit of the existing transaction, and the
+ * last link in this transaction will become the EOL descriptor.
+ */
+ set_desc_next(chan, &tail->hw, desc->async_tx.phys);
+
+ /*
+ * Add the software descriptor and all children to the list
+ * of pending transactions
+ */
+out_splice:
+ list_splice_tail_init(&desc->tx_list, &chan->ld_pending);
+}
+
+static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct fsldma_chan *chan = to_fsl_chan(tx->chan);
+ struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
+ struct fsl_desc_sw *child;
+ dma_cookie_t cookie = -EINVAL;
+
+ spin_lock_bh(&chan->desc_lock);
+
+#ifdef CONFIG_PM
+ if (unlikely(chan->pm_state != RUNNING)) {
+ chan_dbg(chan, "cannot submit due to suspend\n");
+ spin_unlock_bh(&chan->desc_lock);
+ return -1;
+ }
+#endif
+
+ /*
+ * assign cookies to all of the software descriptors
+ * that make up this transaction
+ */
+ list_for_each_entry(child, &desc->tx_list, node) {
+ cookie = dma_cookie_assign(&child->async_tx);
+ }
+
+ /* put this transaction onto the tail of the pending queue */
+ append_ld_queue(chan, desc);
+
+ spin_unlock_bh(&chan->desc_lock);
+
+ return cookie;
+}
+
+/**
+ * fsl_dma_free_descriptor - Free descriptor from channel's DMA pool.
+ * @chan : Freescale DMA channel
+ * @desc: descriptor to be freed
+ */
+static void fsl_dma_free_descriptor(struct fsldma_chan *chan,
+ struct fsl_desc_sw *desc)
+{
+ list_del(&desc->node);
+ chan_dbg(chan, "LD %p free\n", desc);
+ dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+}
+
+/**
+ * fsl_dma_alloc_descriptor - Allocate descriptor from channel's DMA pool.
+ * @chan : Freescale DMA channel
+ *
+ * Return - The descriptor allocated. NULL for failed.
+ */
+static struct fsl_desc_sw *fsl_dma_alloc_descriptor(struct fsldma_chan *chan)
+{
+ struct fsl_desc_sw *desc;
+ dma_addr_t pdesc;
+
+ desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &pdesc);
+ if (!desc) {
+ chan_dbg(chan, "out of memory for link descriptor\n");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&desc->tx_list);
+ dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+ desc->async_tx.tx_submit = fsl_dma_tx_submit;
+ desc->async_tx.phys = pdesc;
+
+ chan_dbg(chan, "LD %p allocated\n", desc);
+
+ return desc;
+}
+
+/**
+ * fsldma_clean_completed_descriptor - free all descriptors which
+ * has been completed and acked
+ * @chan: Freescale DMA channel
+ *
+ * This function is used on all completed and acked descriptors.
+ * All descriptors should only be freed in this function.
+ */
+static void fsldma_clean_completed_descriptor(struct fsldma_chan *chan)
+{
+ struct fsl_desc_sw *desc, *_desc;
+
+ /* Run the callback for each descriptor, in order */
+ list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node)
+ if (async_tx_test_ack(&desc->async_tx))
+ fsl_dma_free_descriptor(chan, desc);
+}
+
+/**
+ * fsldma_run_tx_complete_actions - cleanup a single link descriptor
+ * @chan: Freescale DMA channel
+ * @desc: descriptor to cleanup and free
+ * @cookie: Freescale DMA transaction identifier
+ *
+ * This function is used on a descriptor which has been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies.
+ */
+static dma_cookie_t fsldma_run_tx_complete_actions(struct fsldma_chan *chan,
+ struct fsl_desc_sw *desc, dma_cookie_t cookie)
+{
+ struct dma_async_tx_descriptor *txd = &desc->async_tx;
+ dma_cookie_t ret = cookie;
+
+ BUG_ON(txd->cookie < 0);
+
+ if (txd->cookie > 0) {
+ ret = txd->cookie;
+
+ dma_descriptor_unmap(txd);
+ /* Run the link descriptor callback function */
+ dmaengine_desc_get_callback_invoke(txd, NULL);
+ }
+
+ /* Run any dependencies */
+ dma_run_dependencies(txd);
+
+ return ret;
+}
+
+/**
+ * fsldma_clean_running_descriptor - move the completed descriptor from
+ * ld_running to ld_completed
+ * @chan: Freescale DMA channel
+ * @desc: the descriptor which is completed
+ *
+ * Free the descriptor directly if acked by async_tx api, or move it to
+ * queue ld_completed.
+ */
+static void fsldma_clean_running_descriptor(struct fsldma_chan *chan,
+ struct fsl_desc_sw *desc)
+{
+ /* Remove from the list of transactions */
+ list_del(&desc->node);
+
+ /*
+ * the client is allowed to attach dependent operations
+ * until 'ack' is set
+ */
+ if (!async_tx_test_ack(&desc->async_tx)) {
+ /*
+ * Move this descriptor to the list of descriptors which is
+ * completed, but still awaiting the 'ack' bit to be set.
+ */
+ list_add_tail(&desc->node, &chan->ld_completed);
+ return;
+ }
+
+ dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+}
+
+/**
+ * fsl_chan_xfer_ld_queue - transfer any pending transactions
+ * @chan : Freescale DMA channel
+ *
+ * HARDWARE STATE: idle
+ * LOCKING: must hold chan->desc_lock
+ */
+static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
+{
+ struct fsl_desc_sw *desc;
+
+ /*
+ * If the list of pending descriptors is empty, then we
+ * don't need to do any work at all
+ */
+ if (list_empty(&chan->ld_pending)) {
+ chan_dbg(chan, "no pending LDs\n");
+ return;
+ }
+
+ /*
+ * The DMA controller is not idle, which means that the interrupt
+ * handler will start any queued transactions when it runs after
+ * this transaction finishes
+ */
+ if (!chan->idle) {
+ chan_dbg(chan, "DMA controller still busy\n");
+ return;
+ }
+
+ /*
+ * If there are some link descriptors which have not been
+ * transferred, we need to start the controller
+ */
+
+ /*
+ * Move all elements from the queue of pending transactions
+ * onto the list of running transactions
+ */
+ chan_dbg(chan, "idle, starting controller\n");
+ desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
+ list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
+
+ /*
+ * The 85xx DMA controller doesn't clear the channel start bit
+ * automatically at the end of a transfer. Therefore we must clear
+ * it in software before starting the transfer.
+ */
+ if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+ u32 mode;
+
+ mode = get_mr(chan);
+ mode &= ~FSL_DMA_MR_CS;
+ set_mr(chan, mode);
+ }
+
+ /*
+ * Program the descriptor's address into the DMA controller,
+ * then start the DMA transaction
+ */
+ set_cdar(chan, desc->async_tx.phys);
+ get_cdar(chan);
+
+ dma_start(chan);
+ chan->idle = false;
+}
+
+/**
+ * fsldma_cleanup_descriptors - cleanup link descriptors which are completed
+ * and move them to ld_completed to free until flag 'ack' is set
+ * @chan: Freescale DMA channel
+ *
+ * This function is used on descriptors which have been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies, then
+ * free these descriptors if flag 'ack' is set.
+ */
+static void fsldma_cleanup_descriptors(struct fsldma_chan *chan)
+{
+ struct fsl_desc_sw *desc, *_desc;
+ dma_cookie_t cookie = 0;
+ dma_addr_t curr_phys = get_cdar(chan);
+ int seen_current = 0;
+
+ fsldma_clean_completed_descriptor(chan);
+
+ /* Run the callback for each descriptor, in order */
+ list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
+ /*
+ * do not advance past the current descriptor loaded into the
+ * hardware channel, subsequent descriptors are either in
+ * process or have not been submitted
+ */
+ if (seen_current)
+ break;
+
+ /*
+ * stop the search if we reach the current descriptor and the
+ * channel is busy
+ */
+ if (desc->async_tx.phys == curr_phys) {
+ seen_current = 1;
+ if (!dma_is_idle(chan))
+ break;
+ }
+
+ cookie = fsldma_run_tx_complete_actions(chan, desc, cookie);
+
+ fsldma_clean_running_descriptor(chan, desc);
+ }
+
+ /*
+ * Start any pending transactions automatically
+ *
+ * In the ideal case, we keep the DMA controller busy while we go
+ * ahead and free the descriptors below.
+ */
+ fsl_chan_xfer_ld_queue(chan);
+
+ if (cookie > 0)
+ chan->common.completed_cookie = cookie;
+}
+
+/**
+ * fsl_dma_alloc_chan_resources - Allocate resources for DMA channel.
+ * @chan : Freescale DMA channel
+ *
+ * This function will create a dma pool for descriptor allocation.
+ *
+ * Return - The number of descriptors allocated.
+ */
+static int fsl_dma_alloc_chan_resources(struct dma_chan *dchan)
+{
+ struct fsldma_chan *chan = to_fsl_chan(dchan);
+
+ /* Has this channel already been allocated? */
+ if (chan->desc_pool)
+ return 1;
+
+ /*
+ * We need the descriptor to be aligned to 32bytes
+ * for meeting FSL DMA specification requirement.
+ */
+ chan->desc_pool = dma_pool_create(chan->name, chan->dev,
+ sizeof(struct fsl_desc_sw),
+ __alignof__(struct fsl_desc_sw), 0);
+ if (!chan->desc_pool) {
+ chan_err(chan, "unable to allocate descriptor pool\n");
+ return -ENOMEM;
+ }
+
+ /* there is at least one descriptor free to be allocated */
+ return 1;
+}
+
+/**
+ * fsldma_free_desc_list - Free all descriptors in a queue
+ * @chan: Freescae DMA channel
+ * @list: the list to free
+ *
+ * LOCKING: must hold chan->desc_lock
+ */
+static void fsldma_free_desc_list(struct fsldma_chan *chan,
+ struct list_head *list)
+{
+ struct fsl_desc_sw *desc, *_desc;
+
+ list_for_each_entry_safe(desc, _desc, list, node)
+ fsl_dma_free_descriptor(chan, desc);
+}
+
+static void fsldma_free_desc_list_reverse(struct fsldma_chan *chan,
+ struct list_head *list)
+{
+ struct fsl_desc_sw *desc, *_desc;
+
+ list_for_each_entry_safe_reverse(desc, _desc, list, node)
+ fsl_dma_free_descriptor(chan, desc);
+}
+
+/**
+ * fsl_dma_free_chan_resources - Free all resources of the channel.
+ * @chan : Freescale DMA channel
+ */
+static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
+{
+ struct fsldma_chan *chan = to_fsl_chan(dchan);
+
+ chan_dbg(chan, "free all channel resources\n");
+ spin_lock_bh(&chan->desc_lock);
+ fsldma_cleanup_descriptors(chan);
+ fsldma_free_desc_list(chan, &chan->ld_pending);
+ fsldma_free_desc_list(chan, &chan->ld_running);
+ fsldma_free_desc_list(chan, &chan->ld_completed);
+ spin_unlock_bh(&chan->desc_lock);
+
+ dma_pool_destroy(chan->desc_pool);
+ chan->desc_pool = NULL;
+}
+
+static struct dma_async_tx_descriptor *
+fsl_dma_prep_memcpy(struct dma_chan *dchan,
+ dma_addr_t dma_dst, dma_addr_t dma_src,
+ size_t len, unsigned long flags)
+{
+ struct fsldma_chan *chan;
+ struct fsl_desc_sw *first = NULL, *prev = NULL, *new;
+ size_t copy;
+
+ if (!dchan)
+ return NULL;
+
+ if (!len)
+ return NULL;
+
+ chan = to_fsl_chan(dchan);
+
+ do {
+
+ /* Allocate the link descriptor from DMA pool */
+ new = fsl_dma_alloc_descriptor(chan);
+ if (!new) {
+ chan_err(chan, "%s\n", msg_ld_oom);
+ goto fail;
+ }
+
+ copy = min(len, (size_t)FSL_DMA_BCR_MAX_CNT);
+
+ set_desc_cnt(chan, &new->hw, copy);
+ set_desc_src(chan, &new->hw, dma_src);
+ set_desc_dst(chan, &new->hw, dma_dst);
+
+ if (!first)
+ first = new;
+ else
+ set_desc_next(chan, &prev->hw, new->async_tx.phys);
+
+ new->async_tx.cookie = 0;
+ async_tx_ack(&new->async_tx);
+
+ prev = new;
+ len -= copy;
+ dma_src += copy;
+ dma_dst += copy;
+
+ /* Insert the link descriptor to the LD ring */
+ list_add_tail(&new->node, &first->tx_list);
+ } while (len);
+
+ new->async_tx.flags = flags; /* client is in control of this ack */
+ new->async_tx.cookie = -EBUSY;
+
+ /* Set End-of-link to the last link descriptor of new list */
+ set_ld_eol(chan, new);
+
+ return &first->async_tx;
+
+fail:
+ if (!first)
+ return NULL;
+
+ fsldma_free_desc_list_reverse(chan, &first->tx_list);
+ return NULL;
+}
+
+static int fsl_dma_device_terminate_all(struct dma_chan *dchan)
+{
+ struct fsldma_chan *chan;
+
+ if (!dchan)
+ return -EINVAL;
+
+ chan = to_fsl_chan(dchan);
+
+ spin_lock_bh(&chan->desc_lock);
+
+ /* Halt the DMA engine */
+ dma_halt(chan);
+
+ /* Remove and free all of the descriptors in the LD queue */
+ fsldma_free_desc_list(chan, &chan->ld_pending);
+ fsldma_free_desc_list(chan, &chan->ld_running);
+ fsldma_free_desc_list(chan, &chan->ld_completed);
+ chan->idle = true;
+
+ spin_unlock_bh(&chan->desc_lock);
+ return 0;
+}
+
+static int fsl_dma_device_config(struct dma_chan *dchan,
+ struct dma_slave_config *config)
+{
+ struct fsldma_chan *chan;
+ int size;
+
+ if (!dchan)
+ return -EINVAL;
+
+ chan = to_fsl_chan(dchan);
+
+ /* make sure the channel supports setting burst size */
+ if (!chan->set_request_count)
+ return -ENXIO;
+
+ /* we set the controller burst size depending on direction */
+ if (config->direction == DMA_MEM_TO_DEV)
+ size = config->dst_addr_width * config->dst_maxburst;
+ else
+ size = config->src_addr_width * config->src_maxburst;
+
+ chan->set_request_count(chan, size);
+ return 0;
+}
+
+
+/**
+ * fsl_dma_memcpy_issue_pending - Issue the DMA start command
+ * @chan : Freescale DMA channel
+ */
+static void fsl_dma_memcpy_issue_pending(struct dma_chan *dchan)
+{
+ struct fsldma_chan *chan = to_fsl_chan(dchan);
+
+ spin_lock_bh(&chan->desc_lock);
+ fsl_chan_xfer_ld_queue(chan);
+ spin_unlock_bh(&chan->desc_lock);
+}
+
+/**
+ * fsl_tx_status - Determine the DMA status
+ * @chan : Freescale DMA channel
+ */
+static enum dma_status fsl_tx_status(struct dma_chan *dchan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct fsldma_chan *chan = to_fsl_chan(dchan);
+ enum dma_status ret;
+
+ ret = dma_cookie_status(dchan, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ spin_lock_bh(&chan->desc_lock);
+ fsldma_cleanup_descriptors(chan);
+ spin_unlock_bh(&chan->desc_lock);
+
+ return dma_cookie_status(dchan, cookie, txstate);
+}
+
+/*----------------------------------------------------------------------------*/
+/* Interrupt Handling */
+/*----------------------------------------------------------------------------*/
+
+static irqreturn_t fsldma_chan_irq(int irq, void *data)
+{
+ struct fsldma_chan *chan = data;
+ u32 stat;
+
+ /* save and clear the status register */
+ stat = get_sr(chan);
+ set_sr(chan, stat);
+ chan_dbg(chan, "irq: stat = 0x%x\n", stat);
+
+ /* check that this was really our device */
+ stat &= ~(FSL_DMA_SR_CB | FSL_DMA_SR_CH);
+ if (!stat)
+ return IRQ_NONE;
+
+ if (stat & FSL_DMA_SR_TE)
+ chan_err(chan, "Transfer Error!\n");
+
+ /*
+ * Programming Error
+ * The DMA_INTERRUPT async_tx is a NULL transfer, which will
+ * trigger a PE interrupt.
+ */
+ if (stat & FSL_DMA_SR_PE) {
+ chan_dbg(chan, "irq: Programming Error INT\n");
+ stat &= ~FSL_DMA_SR_PE;
+ if (get_bcr(chan) != 0)
+ chan_err(chan, "Programming Error!\n");
+ }
+
+ /*
+ * For MPC8349, EOCDI event need to update cookie
+ * and start the next transfer if it exist.
+ */
+ if (stat & FSL_DMA_SR_EOCDI) {
+ chan_dbg(chan, "irq: End-of-Chain link INT\n");
+ stat &= ~FSL_DMA_SR_EOCDI;
+ }
+
+ /*
+ * If it current transfer is the end-of-transfer,
+ * we should clear the Channel Start bit for
+ * prepare next transfer.
+ */
+ if (stat & FSL_DMA_SR_EOLNI) {
+ chan_dbg(chan, "irq: End-of-link INT\n");
+ stat &= ~FSL_DMA_SR_EOLNI;
+ }
+
+ /* check that the DMA controller is really idle */
+ if (!dma_is_idle(chan))
+ chan_err(chan, "irq: controller not idle!\n");
+
+ /* check that we handled all of the bits */
+ if (stat)
+ chan_err(chan, "irq: unhandled sr 0x%08x\n", stat);
+
+ /*
+ * Schedule the tasklet to handle all cleanup of the current
+ * transaction. It will start a new transaction if there is
+ * one pending.
+ */
+ tasklet_schedule(&chan->tasklet);
+ chan_dbg(chan, "irq: Exit\n");
+ return IRQ_HANDLED;
+}
+
+static void dma_do_tasklet(struct tasklet_struct *t)
+{
+ struct fsldma_chan *chan = from_tasklet(chan, t, tasklet);
+
+ chan_dbg(chan, "tasklet entry\n");
+
+ spin_lock(&chan->desc_lock);
+
+ /* the hardware is now idle and ready for more */
+ chan->idle = true;
+
+ /* Run all cleanup for descriptors which have been completed */
+ fsldma_cleanup_descriptors(chan);
+
+ spin_unlock(&chan->desc_lock);
+
+ chan_dbg(chan, "tasklet exit\n");
+}
+
+static irqreturn_t fsldma_ctrl_irq(int irq, void *data)
+{
+ struct fsldma_device *fdev = data;
+ struct fsldma_chan *chan;
+ unsigned int handled = 0;
+ u32 gsr, mask;
+ int i;
+
+ gsr = (fdev->feature & FSL_DMA_BIG_ENDIAN) ? in_be32(fdev->regs)
+ : in_le32(fdev->regs);
+ mask = 0xff000000;
+ dev_dbg(fdev->dev, "IRQ: gsr 0x%.8x\n", gsr);
+
+ for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+ chan = fdev->chan[i];
+ if (!chan)
+ continue;
+
+ if (gsr & mask) {
+ dev_dbg(fdev->dev, "IRQ: chan %d\n", chan->id);
+ fsldma_chan_irq(irq, chan);
+ handled++;
+ }
+
+ gsr &= ~mask;
+ mask >>= 8;
+ }
+
+ return IRQ_RETVAL(handled);
+}
+
+static void fsldma_free_irqs(struct fsldma_device *fdev)
+{
+ struct fsldma_chan *chan;
+ int i;
+
+ if (fdev->irq) {
+ dev_dbg(fdev->dev, "free per-controller IRQ\n");
+ free_irq(fdev->irq, fdev);
+ return;
+ }
+
+ for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+ chan = fdev->chan[i];
+ if (chan && chan->irq) {
+ chan_dbg(chan, "free per-channel IRQ\n");
+ free_irq(chan->irq, chan);
+ }
+ }
+}
+
+static int fsldma_request_irqs(struct fsldma_device *fdev)
+{
+ struct fsldma_chan *chan;
+ int ret;
+ int i;
+
+ /* if we have a per-controller IRQ, use that */
+ if (fdev->irq) {
+ dev_dbg(fdev->dev, "request per-controller IRQ\n");
+ ret = request_irq(fdev->irq, fsldma_ctrl_irq, IRQF_SHARED,
+ "fsldma-controller", fdev);
+ return ret;
+ }
+
+ /* no per-controller IRQ, use the per-channel IRQs */
+ for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+ chan = fdev->chan[i];
+ if (!chan)
+ continue;
+
+ if (!chan->irq) {
+ chan_err(chan, "interrupts property missing in device tree\n");
+ ret = -ENODEV;
+ goto out_unwind;
+ }
+
+ chan_dbg(chan, "request per-channel IRQ\n");
+ ret = request_irq(chan->irq, fsldma_chan_irq, IRQF_SHARED,
+ "fsldma-chan", chan);
+ if (ret) {
+ chan_err(chan, "unable to request per-channel IRQ\n");
+ goto out_unwind;
+ }
+ }
+
+ return 0;
+
+out_unwind:
+ for (/* none */; i >= 0; i--) {
+ chan = fdev->chan[i];
+ if (!chan)
+ continue;
+
+ if (!chan->irq)
+ continue;
+
+ free_irq(chan->irq, chan);
+ }
+
+ return ret;
+}
+
+/*----------------------------------------------------------------------------*/
+/* OpenFirmware Subsystem */
+/*----------------------------------------------------------------------------*/
+
+static int fsl_dma_chan_probe(struct fsldma_device *fdev,
+ struct device_node *node, u32 feature, const char *compatible)
+{
+ struct fsldma_chan *chan;
+ struct resource res;
+ int err;
+
+ /* alloc channel */
+ chan = kzalloc(sizeof(*chan), GFP_KERNEL);
+ if (!chan) {
+ err = -ENOMEM;
+ goto out_return;
+ }
+
+ /* ioremap registers for use */
+ chan->regs = of_iomap(node, 0);
+ if (!chan->regs) {
+ dev_err(fdev->dev, "unable to ioremap registers\n");
+ err = -ENOMEM;
+ goto out_free_chan;
+ }
+
+ err = of_address_to_resource(node, 0, &res);
+ if (err) {
+ dev_err(fdev->dev, "unable to find 'reg' property\n");
+ goto out_iounmap_regs;
+ }
+
+ chan->feature = feature;
+ if (!fdev->feature)
+ fdev->feature = chan->feature;
+
+ /*
+ * If the DMA device's feature is different than the feature
+ * of its channels, report the bug
+ */
+ WARN_ON(fdev->feature != chan->feature);
+
+ chan->dev = fdev->dev;
+ chan->id = (res.start & 0xfff) < 0x300 ?
+ ((res.start - 0x100) & 0xfff) >> 7 :
+ ((res.start - 0x200) & 0xfff) >> 7;
+ if (chan->id >= FSL_DMA_MAX_CHANS_PER_DEVICE) {
+ dev_err(fdev->dev, "too many channels for device\n");
+ err = -EINVAL;
+ goto out_iounmap_regs;
+ }
+
+ fdev->chan[chan->id] = chan;
+ tasklet_setup(&chan->tasklet, dma_do_tasklet);
+ snprintf(chan->name, sizeof(chan->name), "chan%d", chan->id);
+
+ /* Initialize the channel */
+ dma_init(chan);
+
+ /* Clear cdar registers */
+ set_cdar(chan, 0);
+
+ switch (chan->feature & FSL_DMA_IP_MASK) {
+ case FSL_DMA_IP_85XX:
+ chan->toggle_ext_pause = fsl_chan_toggle_ext_pause;
+ fallthrough;
+ case FSL_DMA_IP_83XX:
+ chan->toggle_ext_start = fsl_chan_toggle_ext_start;
+ chan->set_src_loop_size = fsl_chan_set_src_loop_size;
+ chan->set_dst_loop_size = fsl_chan_set_dst_loop_size;
+ chan->set_request_count = fsl_chan_set_request_count;
+ }
+
+ spin_lock_init(&chan->desc_lock);
+ INIT_LIST_HEAD(&chan->ld_pending);
+ INIT_LIST_HEAD(&chan->ld_running);
+ INIT_LIST_HEAD(&chan->ld_completed);
+ chan->idle = true;
+#ifdef CONFIG_PM
+ chan->pm_state = RUNNING;
+#endif
+
+ chan->common.device = &fdev->common;
+ dma_cookie_init(&chan->common);
+
+ /* find the IRQ line, if it exists in the device tree */
+ chan->irq = irq_of_parse_and_map(node, 0);
+
+ /* Add the channel to DMA device channel list */
+ list_add_tail(&chan->common.device_node, &fdev->common.channels);
+
+ dev_info(fdev->dev, "#%d (%s), irq %d\n", chan->id, compatible,
+ chan->irq ? chan->irq : fdev->irq);
+
+ return 0;
+
+out_iounmap_regs:
+ iounmap(chan->regs);
+out_free_chan:
+ kfree(chan);
+out_return:
+ return err;
+}
+
+static void fsl_dma_chan_remove(struct fsldma_chan *chan)
+{
+ irq_dispose_mapping(chan->irq);
+ list_del(&chan->common.device_node);
+ iounmap(chan->regs);
+ kfree(chan);
+}
+
+static int fsldma_of_probe(struct platform_device *op)
+{
+ struct fsldma_device *fdev;
+ struct device_node *child;
+ unsigned int i;
+ int err;
+
+ fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
+ if (!fdev) {
+ err = -ENOMEM;
+ goto out_return;
+ }
+
+ fdev->dev = &op->dev;
+ INIT_LIST_HEAD(&fdev->common.channels);
+
+ /* ioremap the registers for use */
+ fdev->regs = of_iomap(op->dev.of_node, 0);
+ if (!fdev->regs) {
+ dev_err(&op->dev, "unable to ioremap registers\n");
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ /* map the channel IRQ if it exists, but don't hookup the handler yet */
+ fdev->irq = irq_of_parse_and_map(op->dev.of_node, 0);
+
+ dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
+ dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
+ fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
+ fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
+ fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
+ fdev->common.device_tx_status = fsl_tx_status;
+ fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
+ fdev->common.device_config = fsl_dma_device_config;
+ fdev->common.device_terminate_all = fsl_dma_device_terminate_all;
+ fdev->common.dev = &op->dev;
+
+ fdev->common.src_addr_widths = FSL_DMA_BUSWIDTHS;
+ fdev->common.dst_addr_widths = FSL_DMA_BUSWIDTHS;
+ fdev->common.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ fdev->common.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+ dma_set_mask(&(op->dev), DMA_BIT_MASK(36));
+
+ platform_set_drvdata(op, fdev);
+
+ /*
+ * We cannot use of_platform_bus_probe() because there is no
+ * of_platform_bus_remove(). Instead, we manually instantiate every DMA
+ * channel object.
+ */
+ for_each_child_of_node(op->dev.of_node, child) {
+ if (of_device_is_compatible(child, "fsl,eloplus-dma-channel")) {
+ fsl_dma_chan_probe(fdev, child,
+ FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN,
+ "fsl,eloplus-dma-channel");
+ }
+
+ if (of_device_is_compatible(child, "fsl,elo-dma-channel")) {
+ fsl_dma_chan_probe(fdev, child,
+ FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN,
+ "fsl,elo-dma-channel");
+ }
+ }
+
+ /*
+ * Hookup the IRQ handler(s)
+ *
+ * If we have a per-controller interrupt, we prefer that to the
+ * per-channel interrupts to reduce the number of shared interrupt
+ * handlers on the same IRQ line
+ */
+ err = fsldma_request_irqs(fdev);
+ if (err) {
+ dev_err(fdev->dev, "unable to request IRQs\n");
+ goto out_free_fdev;
+ }
+
+ dma_async_device_register(&fdev->common);
+ return 0;
+
+out_free_fdev:
+ for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+ if (fdev->chan[i])
+ fsl_dma_chan_remove(fdev->chan[i]);
+ }
+ irq_dispose_mapping(fdev->irq);
+ iounmap(fdev->regs);
+out_free:
+ kfree(fdev);
+out_return:
+ return err;
+}
+
+static int fsldma_of_remove(struct platform_device *op)
+{
+ struct fsldma_device *fdev;
+ unsigned int i;
+
+ fdev = platform_get_drvdata(op);
+ dma_async_device_unregister(&fdev->common);
+
+ fsldma_free_irqs(fdev);
+
+ for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+ if (fdev->chan[i])
+ fsl_dma_chan_remove(fdev->chan[i]);
+ }
+ irq_dispose_mapping(fdev->irq);
+
+ iounmap(fdev->regs);
+ kfree(fdev);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int fsldma_suspend_late(struct device *dev)
+{
+ struct fsldma_device *fdev = dev_get_drvdata(dev);
+ struct fsldma_chan *chan;
+ int i;
+
+ for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+ chan = fdev->chan[i];
+ if (!chan)
+ continue;
+
+ spin_lock_bh(&chan->desc_lock);
+ if (unlikely(!chan->idle))
+ goto out;
+ chan->regs_save.mr = get_mr(chan);
+ chan->pm_state = SUSPENDED;
+ spin_unlock_bh(&chan->desc_lock);
+ }
+ return 0;
+
+out:
+ for (; i >= 0; i--) {
+ chan = fdev->chan[i];
+ if (!chan)
+ continue;
+ chan->pm_state = RUNNING;
+ spin_unlock_bh(&chan->desc_lock);
+ }
+ return -EBUSY;
+}
+
+static int fsldma_resume_early(struct device *dev)
+{
+ struct fsldma_device *fdev = dev_get_drvdata(dev);
+ struct fsldma_chan *chan;
+ u32 mode;
+ int i;
+
+ for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+ chan = fdev->chan[i];
+ if (!chan)
+ continue;
+
+ spin_lock_bh(&chan->desc_lock);
+ mode = chan->regs_save.mr
+ & ~FSL_DMA_MR_CS & ~FSL_DMA_MR_CC & ~FSL_DMA_MR_CA;
+ set_mr(chan, mode);
+ chan->pm_state = RUNNING;
+ spin_unlock_bh(&chan->desc_lock);
+ }
+
+ return 0;
+}
+
+static const struct dev_pm_ops fsldma_pm_ops = {
+ .suspend_late = fsldma_suspend_late,
+ .resume_early = fsldma_resume_early,
+};
+#endif
+
+static const struct of_device_id fsldma_of_ids[] = {
+ { .compatible = "fsl,elo3-dma", },
+ { .compatible = "fsl,eloplus-dma", },
+ { .compatible = "fsl,elo-dma", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, fsldma_of_ids);
+
+static struct platform_driver fsldma_of_driver = {
+ .driver = {
+ .name = "fsl-elo-dma",
+ .of_match_table = fsldma_of_ids,
+#ifdef CONFIG_PM
+ .pm = &fsldma_pm_ops,
+#endif
+ },
+ .probe = fsldma_of_probe,
+ .remove = fsldma_of_remove,
+};
+
+/*----------------------------------------------------------------------------*/
+/* Module Init / Exit */
+/*----------------------------------------------------------------------------*/
+
+static __init int fsldma_init(void)
+{
+ pr_info("Freescale Elo series DMA driver\n");
+ return platform_driver_register(&fsldma_of_driver);
+}
+
+static void __exit fsldma_exit(void)
+{
+ platform_driver_unregister(&fsldma_of_driver);
+}
+
+subsys_initcall(fsldma_init);
+module_exit(fsldma_exit);
+
+MODULE_DESCRIPTION("Freescale Elo series DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
new file mode 100644
index 000000000..308bed0a5
--- /dev/null
+++ b/drivers/dma/fsldma.h
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2007-2010 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author:
+ * Zhang Wei <wei.zhang@freescale.com>, Jul 2007
+ * Ebony Zhu <ebony.zhu@freescale.com>, May 2007
+ */
+#ifndef __DMA_FSLDMA_H
+#define __DMA_FSLDMA_H
+
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+
+/* Define data structures needed by Freescale
+ * MPC8540 and MPC8349 DMA controller.
+ */
+#define FSL_DMA_MR_CS 0x00000001
+#define FSL_DMA_MR_CC 0x00000002
+#define FSL_DMA_MR_CA 0x00000008
+#define FSL_DMA_MR_EIE 0x00000040
+#define FSL_DMA_MR_XFE 0x00000020
+#define FSL_DMA_MR_EOLNIE 0x00000100
+#define FSL_DMA_MR_EOLSIE 0x00000080
+#define FSL_DMA_MR_EOSIE 0x00000200
+#define FSL_DMA_MR_CDSM 0x00000010
+#define FSL_DMA_MR_CTM 0x00000004
+#define FSL_DMA_MR_EMP_EN 0x00200000
+#define FSL_DMA_MR_EMS_EN 0x00040000
+#define FSL_DMA_MR_DAHE 0x00002000
+#define FSL_DMA_MR_SAHE 0x00001000
+
+#define FSL_DMA_MR_SAHTS_MASK 0x0000C000
+#define FSL_DMA_MR_DAHTS_MASK 0x00030000
+#define FSL_DMA_MR_BWC_MASK 0x0f000000
+
+/*
+ * Bandwidth/pause control determines how many bytes a given
+ * channel is allowed to transfer before the DMA engine pauses
+ * the current channel and switches to the next channel
+ */
+#define FSL_DMA_MR_BWC 0x0A000000
+
+/* Special MR definition for MPC8349 */
+#define FSL_DMA_MR_EOTIE 0x00000080
+#define FSL_DMA_MR_PRC_RM 0x00000800
+
+#define FSL_DMA_SR_CH 0x00000020
+#define FSL_DMA_SR_PE 0x00000010
+#define FSL_DMA_SR_CB 0x00000004
+#define FSL_DMA_SR_TE 0x00000080
+#define FSL_DMA_SR_EOSI 0x00000002
+#define FSL_DMA_SR_EOLSI 0x00000001
+#define FSL_DMA_SR_EOCDI 0x00000001
+#define FSL_DMA_SR_EOLNI 0x00000008
+
+#define FSL_DMA_SATR_SBPATMU 0x20000000
+#define FSL_DMA_SATR_STRANSINT_RIO 0x00c00000
+#define FSL_DMA_SATR_SREADTYPE_SNOOP_READ 0x00050000
+#define FSL_DMA_SATR_SREADTYPE_BP_IORH 0x00020000
+#define FSL_DMA_SATR_SREADTYPE_BP_NREAD 0x00040000
+#define FSL_DMA_SATR_SREADTYPE_BP_MREAD 0x00070000
+
+#define FSL_DMA_DATR_DBPATMU 0x20000000
+#define FSL_DMA_DATR_DTRANSINT_RIO 0x00c00000
+#define FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE 0x00050000
+#define FSL_DMA_DATR_DWRITETYPE_BP_FLUSH 0x00010000
+
+#define FSL_DMA_EOL ((u64)0x1)
+#define FSL_DMA_SNEN ((u64)0x10)
+#define FSL_DMA_EOSIE 0x8
+#define FSL_DMA_NLDA_MASK (~(u64)0x1f)
+
+#define FSL_DMA_BCR_MAX_CNT 0x03ffffffu
+
+#define FSL_DMA_DGSR_TE 0x80
+#define FSL_DMA_DGSR_CH 0x20
+#define FSL_DMA_DGSR_PE 0x10
+#define FSL_DMA_DGSR_EOLNI 0x08
+#define FSL_DMA_DGSR_CB 0x04
+#define FSL_DMA_DGSR_EOSI 0x02
+#define FSL_DMA_DGSR_EOLSI 0x01
+
+#define FSL_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+typedef u64 __bitwise v64;
+typedef u32 __bitwise v32;
+
+struct fsl_dma_ld_hw {
+ v64 src_addr;
+ v64 dst_addr;
+ v64 next_ln_addr;
+ v32 count;
+ v32 reserve;
+} __attribute__((aligned(32)));
+
+struct fsl_desc_sw {
+ struct fsl_dma_ld_hw hw;
+ struct list_head node;
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor async_tx;
+} __attribute__((aligned(32)));
+
+struct fsldma_chan_regs {
+ u32 mr; /* 0x00 - Mode Register */
+ u32 sr; /* 0x04 - Status Register */
+ u64 cdar; /* 0x08 - Current descriptor address register */
+ u64 sar; /* 0x10 - Source Address Register */
+ u64 dar; /* 0x18 - Destination Address Register */
+ u32 bcr; /* 0x20 - Byte Count Register */
+ u64 ndar; /* 0x24 - Next Descriptor Address Register */
+};
+
+struct fsldma_chan;
+#define FSL_DMA_MAX_CHANS_PER_DEVICE 8
+
+struct fsldma_device {
+ void __iomem *regs; /* DGSR register base */
+ struct device *dev;
+ struct dma_device common;
+ struct fsldma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE];
+ u32 feature; /* The same as DMA channels */
+ int irq; /* Channel IRQ */
+};
+
+/* Define macros for fsldma_chan->feature property */
+#define FSL_DMA_LITTLE_ENDIAN 0x00000000
+#define FSL_DMA_BIG_ENDIAN 0x00000001
+
+#define FSL_DMA_IP_MASK 0x00000ff0
+#define FSL_DMA_IP_85XX 0x00000010
+#define FSL_DMA_IP_83XX 0x00000020
+
+#define FSL_DMA_CHAN_PAUSE_EXT 0x00001000
+#define FSL_DMA_CHAN_START_EXT 0x00002000
+
+#ifdef CONFIG_PM
+struct fsldma_chan_regs_save {
+ u32 mr;
+};
+
+enum fsldma_pm_state {
+ RUNNING = 0,
+ SUSPENDED,
+};
+#endif
+
+struct fsldma_chan {
+ char name[8]; /* Channel name */
+ struct fsldma_chan_regs __iomem *regs;
+ spinlock_t desc_lock; /* Descriptor operation lock */
+ /*
+ * Descriptors which are queued to run, but have not yet been
+ * submitted to the hardware for execution
+ */
+ struct list_head ld_pending;
+ /*
+ * Descriptors which are currently being executed by the hardware
+ */
+ struct list_head ld_running;
+ /*
+ * Descriptors which have finished execution by the hardware. These
+ * descriptors have already had their cleanup actions run. They are
+ * waiting for the ACK bit to be set by the async_tx API.
+ */
+ struct list_head ld_completed; /* Link descriptors queue */
+ struct dma_chan common; /* DMA common channel */
+ struct dma_pool *desc_pool; /* Descriptors pool */
+ struct device *dev; /* Channel device */
+ int irq; /* Channel IRQ */
+ int id; /* Raw id of this channel */
+ struct tasklet_struct tasklet;
+ u32 feature;
+ bool idle; /* DMA controller is idle */
+#ifdef CONFIG_PM
+ struct fsldma_chan_regs_save regs_save;
+ enum fsldma_pm_state pm_state;
+#endif
+
+ void (*toggle_ext_pause)(struct fsldma_chan *fsl_chan, int enable);
+ void (*toggle_ext_start)(struct fsldma_chan *fsl_chan, int enable);
+ void (*set_src_loop_size)(struct fsldma_chan *fsl_chan, int size);
+ void (*set_dst_loop_size)(struct fsldma_chan *fsl_chan, int size);
+ void (*set_request_count)(struct fsldma_chan *fsl_chan, int size);
+};
+
+#define to_fsl_chan(chan) container_of(chan, struct fsldma_chan, common)
+#define to_fsl_desc(lh) container_of(lh, struct fsl_desc_sw, node)
+#define tx_to_fsl_desc(tx) container_of(tx, struct fsl_desc_sw, async_tx)
+
+#ifdef CONFIG_PPC
+#define fsl_ioread32(p) in_le32(p)
+#define fsl_ioread32be(p) in_be32(p)
+#define fsl_iowrite32(v, p) out_le32(p, v)
+#define fsl_iowrite32be(v, p) out_be32(p, v)
+
+#ifdef __powerpc64__
+#define fsl_ioread64(p) in_le64(p)
+#define fsl_ioread64be(p) in_be64(p)
+#define fsl_iowrite64(v, p) out_le64(p, v)
+#define fsl_iowrite64be(v, p) out_be64(p, v)
+#else
+static u64 fsl_ioread64(const u64 __iomem *addr)
+{
+ u32 val_lo = in_le32((u32 __iomem *)addr);
+ u32 val_hi = in_le32((u32 __iomem *)addr + 1);
+
+ return ((u64)val_hi << 32) + val_lo;
+}
+
+static void fsl_iowrite64(u64 val, u64 __iomem *addr)
+{
+ out_le32((u32 __iomem *)addr + 1, val >> 32);
+ out_le32((u32 __iomem *)addr, (u32)val);
+}
+
+static u64 fsl_ioread64be(const u64 __iomem *addr)
+{
+ u32 val_hi = in_be32((u32 __iomem *)addr);
+ u32 val_lo = in_be32((u32 __iomem *)addr + 1);
+
+ return ((u64)val_hi << 32) + val_lo;
+}
+
+static void fsl_iowrite64be(u64 val, u64 __iomem *addr)
+{
+ out_be32((u32 __iomem *)addr, val >> 32);
+ out_be32((u32 __iomem *)addr + 1, (u32)val);
+}
+#endif
+#endif
+
+#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
+#define fsl_ioread32(p) ioread32(p)
+#define fsl_ioread32be(p) ioread32be(p)
+#define fsl_iowrite32(v, p) iowrite32(v, p)
+#define fsl_iowrite32be(v, p) iowrite32be(v, p)
+#define fsl_ioread64(p) ioread64(p)
+#define fsl_ioread64be(p) ioread64be(p)
+#define fsl_iowrite64(v, p) iowrite64(v, p)
+#define fsl_iowrite64be(v, p) iowrite64be(v, p)
+#endif
+
+#define FSL_DMA_IN(fsl_dma, addr, width) \
+ (((fsl_dma)->feature & FSL_DMA_BIG_ENDIAN) ? \
+ fsl_ioread##width##be(addr) : fsl_ioread##width(addr))
+
+#define FSL_DMA_OUT(fsl_dma, addr, val, width) \
+ (((fsl_dma)->feature & FSL_DMA_BIG_ENDIAN) ? \
+ fsl_iowrite##width##be(val, addr) : fsl_iowrite \
+ ##width(val, addr))
+
+#define DMA_TO_CPU(fsl_chan, d, width) \
+ (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \
+ be##width##_to_cpu((__force __be##width)(v##width)d) : \
+ le##width##_to_cpu((__force __le##width)(v##width)d))
+#define CPU_TO_DMA(fsl_chan, c, width) \
+ (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \
+ (__force v##width)cpu_to_be##width(c) : \
+ (__force v##width)cpu_to_le##width(c))
+
+#endif /* __DMA_FSLDMA_H */
diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c
new file mode 100644
index 000000000..c1350a36f
--- /dev/null
+++ b/drivers/dma/hisi_dma.c
@@ -0,0 +1,1053 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2019-2022 HiSilicon Limited. */
+
+#include <linux/bitfield.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include "virt-dma.h"
+
+/* HiSilicon DMA register common field define */
+#define HISI_DMA_Q_SQ_BASE_L 0x0
+#define HISI_DMA_Q_SQ_BASE_H 0x4
+#define HISI_DMA_Q_SQ_DEPTH 0x8
+#define HISI_DMA_Q_SQ_TAIL_PTR 0xc
+#define HISI_DMA_Q_CQ_BASE_L 0x10
+#define HISI_DMA_Q_CQ_BASE_H 0x14
+#define HISI_DMA_Q_CQ_DEPTH 0x18
+#define HISI_DMA_Q_CQ_HEAD_PTR 0x1c
+#define HISI_DMA_Q_CTRL0 0x20
+#define HISI_DMA_Q_CTRL0_QUEUE_EN BIT(0)
+#define HISI_DMA_Q_CTRL0_QUEUE_PAUSE BIT(4)
+#define HISI_DMA_Q_CTRL1 0x24
+#define HISI_DMA_Q_CTRL1_QUEUE_RESET BIT(0)
+#define HISI_DMA_Q_FSM_STS 0x30
+#define HISI_DMA_Q_FSM_STS_MASK GENMASK(3, 0)
+#define HISI_DMA_Q_ERR_INT_NUM0 0x84
+#define HISI_DMA_Q_ERR_INT_NUM1 0x88
+#define HISI_DMA_Q_ERR_INT_NUM2 0x8c
+
+/* HiSilicon IP08 DMA register and field define */
+#define HISI_DMA_HIP08_MODE 0x217C
+#define HISI_DMA_HIP08_Q_BASE 0x0
+#define HISI_DMA_HIP08_Q_CTRL0_ERR_ABORT_EN BIT(2)
+#define HISI_DMA_HIP08_Q_INT_STS 0x40
+#define HISI_DMA_HIP08_Q_INT_MSK 0x44
+#define HISI_DMA_HIP08_Q_INT_STS_MASK GENMASK(14, 0)
+#define HISI_DMA_HIP08_Q_ERR_INT_NUM3 0x90
+#define HISI_DMA_HIP08_Q_ERR_INT_NUM4 0x94
+#define HISI_DMA_HIP08_Q_ERR_INT_NUM5 0x98
+#define HISI_DMA_HIP08_Q_ERR_INT_NUM6 0x48
+#define HISI_DMA_HIP08_Q_CTRL0_SQCQ_DRCT BIT(24)
+
+/* HiSilicon IP09 DMA register and field define */
+#define HISI_DMA_HIP09_DMA_FLR_DISABLE 0xA00
+#define HISI_DMA_HIP09_DMA_FLR_DISABLE_B BIT(0)
+#define HISI_DMA_HIP09_Q_BASE 0x2000
+#define HISI_DMA_HIP09_Q_CTRL0_ERR_ABORT_EN GENMASK(31, 28)
+#define HISI_DMA_HIP09_Q_CTRL0_SQ_DRCT BIT(26)
+#define HISI_DMA_HIP09_Q_CTRL0_CQ_DRCT BIT(27)
+#define HISI_DMA_HIP09_Q_CTRL1_VA_ENABLE BIT(2)
+#define HISI_DMA_HIP09_Q_INT_STS 0x40
+#define HISI_DMA_HIP09_Q_INT_MSK 0x44
+#define HISI_DMA_HIP09_Q_INT_STS_MASK 0x1
+#define HISI_DMA_HIP09_Q_ERR_INT_STS 0x48
+#define HISI_DMA_HIP09_Q_ERR_INT_MSK 0x4C
+#define HISI_DMA_HIP09_Q_ERR_INT_STS_MASK GENMASK(18, 1)
+#define HISI_DMA_HIP09_PORT_CFG_REG(port_id) (0x800 + \
+ (port_id) * 0x20)
+#define HISI_DMA_HIP09_PORT_CFG_LINK_DOWN_MASK_B BIT(16)
+
+#define HISI_DMA_HIP09_MAX_PORT_NUM 16
+
+#define HISI_DMA_HIP08_MSI_NUM 32
+#define HISI_DMA_HIP08_CHAN_NUM 30
+#define HISI_DMA_HIP09_MSI_NUM 4
+#define HISI_DMA_HIP09_CHAN_NUM 4
+#define HISI_DMA_REVISION_HIP08B 0x21
+#define HISI_DMA_REVISION_HIP09A 0x30
+
+#define HISI_DMA_Q_OFFSET 0x100
+#define HISI_DMA_Q_DEPTH_VAL 1024
+
+#define PCI_BAR_2 2
+
+#define HISI_DMA_POLL_Q_STS_DELAY_US 10
+#define HISI_DMA_POLL_Q_STS_TIME_OUT_US 1000
+
+#define HISI_DMA_MAX_DIR_NAME_LEN 128
+
+/*
+ * The HIP08B(HiSilicon IP08) and HIP09A(HiSilicon IP09) are DMA iEPs, they
+ * have the same pci device id but different pci revision.
+ * Unfortunately, they have different register layouts, so two layout
+ * enumerations are defined.
+ */
+enum hisi_dma_reg_layout {
+ HISI_DMA_REG_LAYOUT_INVALID = 0,
+ HISI_DMA_REG_LAYOUT_HIP08,
+ HISI_DMA_REG_LAYOUT_HIP09
+};
+
+enum hisi_dma_mode {
+ EP = 0,
+ RC,
+};
+
+enum hisi_dma_chan_status {
+ DISABLE = -1,
+ IDLE = 0,
+ RUN,
+ CPL,
+ PAUSE,
+ HALT,
+ ABORT,
+ WAIT,
+ BUFFCLR,
+};
+
+struct hisi_dma_sqe {
+ __le32 dw0;
+#define OPCODE_MASK GENMASK(3, 0)
+#define OPCODE_SMALL_PACKAGE 0x1
+#define OPCODE_M2M 0x4
+#define LOCAL_IRQ_EN BIT(8)
+#define ATTR_SRC_MASK GENMASK(14, 12)
+ __le32 dw1;
+ __le32 dw2;
+#define ATTR_DST_MASK GENMASK(26, 24)
+ __le32 length;
+ __le64 src_addr;
+ __le64 dst_addr;
+};
+
+struct hisi_dma_cqe {
+ __le32 rsv0;
+ __le32 rsv1;
+ __le16 sq_head;
+ __le16 rsv2;
+ __le16 rsv3;
+ __le16 w0;
+#define STATUS_MASK GENMASK(15, 1)
+#define STATUS_SUCC 0x0
+#define VALID_BIT BIT(0)
+};
+
+struct hisi_dma_desc {
+ struct virt_dma_desc vd;
+ struct hisi_dma_sqe sqe;
+};
+
+struct hisi_dma_chan {
+ struct virt_dma_chan vc;
+ struct hisi_dma_dev *hdma_dev;
+ struct hisi_dma_sqe *sq;
+ struct hisi_dma_cqe *cq;
+ dma_addr_t sq_dma;
+ dma_addr_t cq_dma;
+ u32 sq_tail;
+ u32 cq_head;
+ u32 qp_num;
+ enum hisi_dma_chan_status status;
+ struct hisi_dma_desc *desc;
+};
+
+struct hisi_dma_dev {
+ struct pci_dev *pdev;
+ void __iomem *base;
+ struct dma_device dma_dev;
+ u32 chan_num;
+ u32 chan_depth;
+ enum hisi_dma_reg_layout reg_layout;
+ void __iomem *queue_base; /* queue region start of register */
+ struct hisi_dma_chan chan[];
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+static const struct debugfs_reg32 hisi_dma_comm_chan_regs[] = {
+ {"DMA_QUEUE_SQ_DEPTH ", 0x0008ull},
+ {"DMA_QUEUE_SQ_TAIL_PTR ", 0x000Cull},
+ {"DMA_QUEUE_CQ_DEPTH ", 0x0018ull},
+ {"DMA_QUEUE_CQ_HEAD_PTR ", 0x001Cull},
+ {"DMA_QUEUE_CTRL0 ", 0x0020ull},
+ {"DMA_QUEUE_CTRL1 ", 0x0024ull},
+ {"DMA_QUEUE_FSM_STS ", 0x0030ull},
+ {"DMA_QUEUE_SQ_STS ", 0x0034ull},
+ {"DMA_QUEUE_CQ_TAIL_PTR ", 0x003Cull},
+ {"DMA_QUEUE_INT_STS ", 0x0040ull},
+ {"DMA_QUEUE_INT_MSK ", 0x0044ull},
+ {"DMA_QUEUE_INT_RO ", 0x006Cull},
+};
+
+static const struct debugfs_reg32 hisi_dma_hip08_chan_regs[] = {
+ {"DMA_QUEUE_BYTE_CNT ", 0x0038ull},
+ {"DMA_ERR_INT_NUM6 ", 0x0048ull},
+ {"DMA_QUEUE_DESP0 ", 0x0050ull},
+ {"DMA_QUEUE_DESP1 ", 0x0054ull},
+ {"DMA_QUEUE_DESP2 ", 0x0058ull},
+ {"DMA_QUEUE_DESP3 ", 0x005Cull},
+ {"DMA_QUEUE_DESP4 ", 0x0074ull},
+ {"DMA_QUEUE_DESP5 ", 0x0078ull},
+ {"DMA_QUEUE_DESP6 ", 0x007Cull},
+ {"DMA_QUEUE_DESP7 ", 0x0080ull},
+ {"DMA_ERR_INT_NUM0 ", 0x0084ull},
+ {"DMA_ERR_INT_NUM1 ", 0x0088ull},
+ {"DMA_ERR_INT_NUM2 ", 0x008Cull},
+ {"DMA_ERR_INT_NUM3 ", 0x0090ull},
+ {"DMA_ERR_INT_NUM4 ", 0x0094ull},
+ {"DMA_ERR_INT_NUM5 ", 0x0098ull},
+ {"DMA_QUEUE_SQ_STS2 ", 0x00A4ull},
+};
+
+static const struct debugfs_reg32 hisi_dma_hip09_chan_regs[] = {
+ {"DMA_QUEUE_ERR_INT_STS ", 0x0048ull},
+ {"DMA_QUEUE_ERR_INT_MSK ", 0x004Cull},
+ {"DFX_SQ_READ_ERR_PTR ", 0x0068ull},
+ {"DFX_DMA_ERR_INT_NUM0 ", 0x0084ull},
+ {"DFX_DMA_ERR_INT_NUM1 ", 0x0088ull},
+ {"DFX_DMA_ERR_INT_NUM2 ", 0x008Cull},
+ {"DFX_DMA_QUEUE_SQ_STS2 ", 0x00A4ull},
+};
+
+static const struct debugfs_reg32 hisi_dma_hip08_comm_regs[] = {
+ {"DMA_ECC_ERR_ADDR ", 0x2004ull},
+ {"DMA_ECC_ECC_CNT ", 0x2014ull},
+ {"COMMON_AND_CH_ERR_STS ", 0x2030ull},
+ {"LOCAL_CPL_ID_STS_0 ", 0x20E0ull},
+ {"LOCAL_CPL_ID_STS_1 ", 0x20E4ull},
+ {"LOCAL_CPL_ID_STS_2 ", 0x20E8ull},
+ {"LOCAL_CPL_ID_STS_3 ", 0x20ECull},
+ {"LOCAL_TLP_NUM ", 0x2158ull},
+ {"SQCQ_TLP_NUM ", 0x2164ull},
+ {"CPL_NUM ", 0x2168ull},
+ {"INF_BACK_PRESS_STS ", 0x2170ull},
+ {"DMA_CH_RAS_LEVEL ", 0x2184ull},
+ {"DMA_CM_RAS_LEVEL ", 0x2188ull},
+ {"DMA_CH_ERR_STS ", 0x2190ull},
+ {"DMA_CH_DONE_STS ", 0x2194ull},
+ {"DMA_SQ_TAG_STS_0 ", 0x21A0ull},
+ {"DMA_SQ_TAG_STS_1 ", 0x21A4ull},
+ {"DMA_SQ_TAG_STS_2 ", 0x21A8ull},
+ {"DMA_SQ_TAG_STS_3 ", 0x21ACull},
+ {"LOCAL_P_ID_STS_0 ", 0x21B0ull},
+ {"LOCAL_P_ID_STS_1 ", 0x21B4ull},
+ {"LOCAL_P_ID_STS_2 ", 0x21B8ull},
+ {"LOCAL_P_ID_STS_3 ", 0x21BCull},
+ {"DMA_PREBUFF_INFO_0 ", 0x2200ull},
+ {"DMA_CM_TABLE_INFO_0 ", 0x2220ull},
+ {"DMA_CM_CE_RO ", 0x2244ull},
+ {"DMA_CM_NFE_RO ", 0x2248ull},
+ {"DMA_CM_FE_RO ", 0x224Cull},
+};
+
+static const struct debugfs_reg32 hisi_dma_hip09_comm_regs[] = {
+ {"COMMON_AND_CH_ERR_STS ", 0x0030ull},
+ {"DMA_PORT_IDLE_STS ", 0x0150ull},
+ {"DMA_CH_RAS_LEVEL ", 0x0184ull},
+ {"DMA_CM_RAS_LEVEL ", 0x0188ull},
+ {"DMA_CM_CE_RO ", 0x0244ull},
+ {"DMA_CM_NFE_RO ", 0x0248ull},
+ {"DMA_CM_FE_RO ", 0x024Cull},
+ {"DFX_INF_BACK_PRESS_STS0 ", 0x1A40ull},
+ {"DFX_INF_BACK_PRESS_STS1 ", 0x1A44ull},
+ {"DFX_INF_BACK_PRESS_STS2 ", 0x1A48ull},
+ {"DFX_DMA_WRR_DISABLE ", 0x1A4Cull},
+ {"DFX_PA_REQ_TLP_NUM ", 0x1C00ull},
+ {"DFX_PA_BACK_TLP_NUM ", 0x1C04ull},
+ {"DFX_PA_RETRY_TLP_NUM ", 0x1C08ull},
+ {"DFX_LOCAL_NP_TLP_NUM ", 0x1C0Cull},
+ {"DFX_LOCAL_CPL_HEAD_TLP_NUM ", 0x1C10ull},
+ {"DFX_LOCAL_CPL_DATA_TLP_NUM ", 0x1C14ull},
+ {"DFX_LOCAL_CPL_EXT_DATA_TLP_NUM ", 0x1C18ull},
+ {"DFX_LOCAL_P_HEAD_TLP_NUM ", 0x1C1Cull},
+ {"DFX_LOCAL_P_ACK_TLP_NUM ", 0x1C20ull},
+ {"DFX_BUF_ALOC_PORT_REQ_NUM ", 0x1C24ull},
+ {"DFX_BUF_ALOC_PORT_RESULT_NUM ", 0x1C28ull},
+ {"DFX_BUF_FAIL_SIZE_NUM ", 0x1C2Cull},
+ {"DFX_BUF_ALOC_SIZE_NUM ", 0x1C30ull},
+ {"DFX_BUF_NP_RELEASE_SIZE_NUM ", 0x1C34ull},
+ {"DFX_BUF_P_RELEASE_SIZE_NUM ", 0x1C38ull},
+ {"DFX_BUF_PORT_RELEASE_SIZE_NUM ", 0x1C3Cull},
+ {"DFX_DMA_PREBUF_MEM0_ECC_ERR_ADDR ", 0x1CA8ull},
+ {"DFX_DMA_PREBUF_MEM0_ECC_CNT ", 0x1CACull},
+ {"DFX_DMA_LOC_NP_OSTB_ECC_ERR_ADDR ", 0x1CB0ull},
+ {"DFX_DMA_LOC_NP_OSTB_ECC_CNT ", 0x1CB4ull},
+ {"DFX_DMA_PREBUF_MEM1_ECC_ERR_ADDR ", 0x1CC0ull},
+ {"DFX_DMA_PREBUF_MEM1_ECC_CNT ", 0x1CC4ull},
+ {"DMA_CH_DONE_STS ", 0x02E0ull},
+ {"DMA_CH_ERR_STS ", 0x0320ull},
+};
+#endif /* CONFIG_DEBUG_FS*/
+
+static enum hisi_dma_reg_layout hisi_dma_get_reg_layout(struct pci_dev *pdev)
+{
+ if (pdev->revision == HISI_DMA_REVISION_HIP08B)
+ return HISI_DMA_REG_LAYOUT_HIP08;
+ else if (pdev->revision >= HISI_DMA_REVISION_HIP09A)
+ return HISI_DMA_REG_LAYOUT_HIP09;
+
+ return HISI_DMA_REG_LAYOUT_INVALID;
+}
+
+static u32 hisi_dma_get_chan_num(struct pci_dev *pdev)
+{
+ if (pdev->revision == HISI_DMA_REVISION_HIP08B)
+ return HISI_DMA_HIP08_CHAN_NUM;
+
+ return HISI_DMA_HIP09_CHAN_NUM;
+}
+
+static u32 hisi_dma_get_msi_num(struct pci_dev *pdev)
+{
+ if (pdev->revision == HISI_DMA_REVISION_HIP08B)
+ return HISI_DMA_HIP08_MSI_NUM;
+
+ return HISI_DMA_HIP09_MSI_NUM;
+}
+
+static u32 hisi_dma_get_queue_base(struct pci_dev *pdev)
+{
+ if (pdev->revision == HISI_DMA_REVISION_HIP08B)
+ return HISI_DMA_HIP08_Q_BASE;
+
+ return HISI_DMA_HIP09_Q_BASE;
+}
+
+static inline struct hisi_dma_chan *to_hisi_dma_chan(struct dma_chan *c)
+{
+ return container_of(c, struct hisi_dma_chan, vc.chan);
+}
+
+static inline struct hisi_dma_desc *to_hisi_dma_desc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct hisi_dma_desc, vd);
+}
+
+static inline void hisi_dma_chan_write(void __iomem *base, u32 reg, u32 index,
+ u32 val)
+{
+ writel_relaxed(val, base + reg + index * HISI_DMA_Q_OFFSET);
+}
+
+static inline void hisi_dma_update_bit(void __iomem *addr, u32 pos, bool val)
+{
+ u32 tmp;
+
+ tmp = readl_relaxed(addr);
+ tmp = val ? tmp | pos : tmp & ~pos;
+ writel_relaxed(tmp, addr);
+}
+
+static void hisi_dma_pause_dma(struct hisi_dma_dev *hdma_dev, u32 index,
+ bool pause)
+{
+ void __iomem *addr;
+
+ addr = hdma_dev->queue_base + HISI_DMA_Q_CTRL0 +
+ index * HISI_DMA_Q_OFFSET;
+ hisi_dma_update_bit(addr, HISI_DMA_Q_CTRL0_QUEUE_PAUSE, pause);
+}
+
+static void hisi_dma_enable_dma(struct hisi_dma_dev *hdma_dev, u32 index,
+ bool enable)
+{
+ void __iomem *addr;
+
+ addr = hdma_dev->queue_base + HISI_DMA_Q_CTRL0 +
+ index * HISI_DMA_Q_OFFSET;
+ hisi_dma_update_bit(addr, HISI_DMA_Q_CTRL0_QUEUE_EN, enable);
+}
+
+static void hisi_dma_mask_irq(struct hisi_dma_dev *hdma_dev, u32 qp_index)
+{
+ void __iomem *q_base = hdma_dev->queue_base;
+
+ if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08)
+ hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_INT_MSK,
+ qp_index, HISI_DMA_HIP08_Q_INT_STS_MASK);
+ else {
+ hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_INT_MSK,
+ qp_index, HISI_DMA_HIP09_Q_INT_STS_MASK);
+ hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_ERR_INT_MSK,
+ qp_index,
+ HISI_DMA_HIP09_Q_ERR_INT_STS_MASK);
+ }
+}
+
+static void hisi_dma_unmask_irq(struct hisi_dma_dev *hdma_dev, u32 qp_index)
+{
+ void __iomem *q_base = hdma_dev->queue_base;
+
+ if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) {
+ hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_INT_STS,
+ qp_index, HISI_DMA_HIP08_Q_INT_STS_MASK);
+ hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_INT_MSK,
+ qp_index, 0);
+ } else {
+ hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_INT_STS,
+ qp_index, HISI_DMA_HIP09_Q_INT_STS_MASK);
+ hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_ERR_INT_STS,
+ qp_index,
+ HISI_DMA_HIP09_Q_ERR_INT_STS_MASK);
+ hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_INT_MSK,
+ qp_index, 0);
+ hisi_dma_chan_write(q_base, HISI_DMA_HIP09_Q_ERR_INT_MSK,
+ qp_index, 0);
+ }
+}
+
+static void hisi_dma_do_reset(struct hisi_dma_dev *hdma_dev, u32 index)
+{
+ void __iomem *addr;
+
+ addr = hdma_dev->queue_base +
+ HISI_DMA_Q_CTRL1 + index * HISI_DMA_Q_OFFSET;
+ hisi_dma_update_bit(addr, HISI_DMA_Q_CTRL1_QUEUE_RESET, 1);
+}
+
+static void hisi_dma_reset_qp_point(struct hisi_dma_dev *hdma_dev, u32 index)
+{
+ void __iomem *q_base = hdma_dev->queue_base;
+
+ hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_TAIL_PTR, index, 0);
+ hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_HEAD_PTR, index, 0);
+}
+
+static void hisi_dma_reset_or_disable_hw_chan(struct hisi_dma_chan *chan,
+ bool disable)
+{
+ struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
+ u32 index = chan->qp_num, tmp;
+ void __iomem *addr;
+ int ret;
+
+ hisi_dma_pause_dma(hdma_dev, index, true);
+ hisi_dma_enable_dma(hdma_dev, index, false);
+ hisi_dma_mask_irq(hdma_dev, index);
+
+ addr = hdma_dev->queue_base +
+ HISI_DMA_Q_FSM_STS + index * HISI_DMA_Q_OFFSET;
+
+ ret = readl_relaxed_poll_timeout(addr, tmp,
+ FIELD_GET(HISI_DMA_Q_FSM_STS_MASK, tmp) != RUN,
+ HISI_DMA_POLL_Q_STS_DELAY_US, HISI_DMA_POLL_Q_STS_TIME_OUT_US);
+ if (ret) {
+ dev_err(&hdma_dev->pdev->dev, "disable channel timeout!\n");
+ WARN_ON(1);
+ }
+
+ hisi_dma_do_reset(hdma_dev, index);
+ hisi_dma_reset_qp_point(hdma_dev, index);
+ hisi_dma_pause_dma(hdma_dev, index, false);
+
+ if (!disable) {
+ hisi_dma_enable_dma(hdma_dev, index, true);
+ hisi_dma_unmask_irq(hdma_dev, index);
+ }
+
+ ret = readl_relaxed_poll_timeout(addr, tmp,
+ FIELD_GET(HISI_DMA_Q_FSM_STS_MASK, tmp) == IDLE,
+ HISI_DMA_POLL_Q_STS_DELAY_US, HISI_DMA_POLL_Q_STS_TIME_OUT_US);
+ if (ret) {
+ dev_err(&hdma_dev->pdev->dev, "reset channel timeout!\n");
+ WARN_ON(1);
+ }
+}
+
+static void hisi_dma_free_chan_resources(struct dma_chan *c)
+{
+ struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
+ struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
+
+ hisi_dma_reset_or_disable_hw_chan(chan, false);
+ vchan_free_chan_resources(&chan->vc);
+
+ memset(chan->sq, 0, sizeof(struct hisi_dma_sqe) * hdma_dev->chan_depth);
+ memset(chan->cq, 0, sizeof(struct hisi_dma_cqe) * hdma_dev->chan_depth);
+ chan->sq_tail = 0;
+ chan->cq_head = 0;
+ chan->status = DISABLE;
+}
+
+static void hisi_dma_desc_free(struct virt_dma_desc *vd)
+{
+ kfree(to_hisi_dma_desc(vd));
+}
+
+static struct dma_async_tx_descriptor *
+hisi_dma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dst, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
+ struct hisi_dma_desc *desc;
+
+ desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ desc->sqe.length = cpu_to_le32(len);
+ desc->sqe.src_addr = cpu_to_le64(src);
+ desc->sqe.dst_addr = cpu_to_le64(dst);
+
+ return vchan_tx_prep(&chan->vc, &desc->vd, flags);
+}
+
+static enum dma_status
+hisi_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ return dma_cookie_status(c, cookie, txstate);
+}
+
+static void hisi_dma_start_transfer(struct hisi_dma_chan *chan)
+{
+ struct hisi_dma_sqe *sqe = chan->sq + chan->sq_tail;
+ struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
+ struct hisi_dma_desc *desc;
+ struct virt_dma_desc *vd;
+
+ vd = vchan_next_desc(&chan->vc);
+ if (!vd) {
+ chan->desc = NULL;
+ return;
+ }
+ list_del(&vd->node);
+ desc = to_hisi_dma_desc(vd);
+ chan->desc = desc;
+
+ memcpy(sqe, &desc->sqe, sizeof(struct hisi_dma_sqe));
+
+ /* update other field in sqe */
+ sqe->dw0 = cpu_to_le32(FIELD_PREP(OPCODE_MASK, OPCODE_M2M));
+ sqe->dw0 |= cpu_to_le32(LOCAL_IRQ_EN);
+
+ /* make sure data has been updated in sqe */
+ wmb();
+
+ /* update sq tail, point to new sqe position */
+ chan->sq_tail = (chan->sq_tail + 1) % hdma_dev->chan_depth;
+
+ /* update sq_tail to trigger a new task */
+ hisi_dma_chan_write(hdma_dev->queue_base, HISI_DMA_Q_SQ_TAIL_PTR,
+ chan->qp_num, chan->sq_tail);
+}
+
+static void hisi_dma_issue_pending(struct dma_chan *c)
+{
+ struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+
+ if (vchan_issue_pending(&chan->vc) && !chan->desc)
+ hisi_dma_start_transfer(chan);
+
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static int hisi_dma_terminate_all(struct dma_chan *c)
+{
+ struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+
+ hisi_dma_pause_dma(chan->hdma_dev, chan->qp_num, true);
+ if (chan->desc) {
+ vchan_terminate_vdesc(&chan->desc->vd);
+ chan->desc = NULL;
+ }
+
+ vchan_get_all_descriptors(&chan->vc, &head);
+
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+ vchan_dma_desc_free_list(&chan->vc, &head);
+ hisi_dma_pause_dma(chan->hdma_dev, chan->qp_num, false);
+
+ return 0;
+}
+
+static void hisi_dma_synchronize(struct dma_chan *c)
+{
+ struct hisi_dma_chan *chan = to_hisi_dma_chan(c);
+
+ vchan_synchronize(&chan->vc);
+}
+
+static int hisi_dma_alloc_qps_mem(struct hisi_dma_dev *hdma_dev)
+{
+ size_t sq_size = sizeof(struct hisi_dma_sqe) * hdma_dev->chan_depth;
+ size_t cq_size = sizeof(struct hisi_dma_cqe) * hdma_dev->chan_depth;
+ struct device *dev = &hdma_dev->pdev->dev;
+ struct hisi_dma_chan *chan;
+ int i;
+
+ for (i = 0; i < hdma_dev->chan_num; i++) {
+ chan = &hdma_dev->chan[i];
+ chan->sq = dmam_alloc_coherent(dev, sq_size, &chan->sq_dma,
+ GFP_KERNEL);
+ if (!chan->sq)
+ return -ENOMEM;
+
+ chan->cq = dmam_alloc_coherent(dev, cq_size, &chan->cq_dma,
+ GFP_KERNEL);
+ if (!chan->cq)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void hisi_dma_init_hw_qp(struct hisi_dma_dev *hdma_dev, u32 index)
+{
+ struct hisi_dma_chan *chan = &hdma_dev->chan[index];
+ void __iomem *q_base = hdma_dev->queue_base;
+ u32 hw_depth = hdma_dev->chan_depth - 1;
+ void __iomem *addr;
+ u32 tmp;
+
+ /* set sq, cq base */
+ hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_BASE_L, index,
+ lower_32_bits(chan->sq_dma));
+ hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_BASE_H, index,
+ upper_32_bits(chan->sq_dma));
+ hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_BASE_L, index,
+ lower_32_bits(chan->cq_dma));
+ hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_BASE_H, index,
+ upper_32_bits(chan->cq_dma));
+
+ /* set sq, cq depth */
+ hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_DEPTH, index, hw_depth);
+ hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_DEPTH, index, hw_depth);
+
+ /* init sq tail and cq head */
+ hisi_dma_chan_write(q_base, HISI_DMA_Q_SQ_TAIL_PTR, index, 0);
+ hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_HEAD_PTR, index, 0);
+
+ /* init error interrupt stats */
+ hisi_dma_chan_write(q_base, HISI_DMA_Q_ERR_INT_NUM0, index, 0);
+ hisi_dma_chan_write(q_base, HISI_DMA_Q_ERR_INT_NUM1, index, 0);
+ hisi_dma_chan_write(q_base, HISI_DMA_Q_ERR_INT_NUM2, index, 0);
+
+ if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) {
+ hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_ERR_INT_NUM3,
+ index, 0);
+ hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_ERR_INT_NUM4,
+ index, 0);
+ hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_ERR_INT_NUM5,
+ index, 0);
+ hisi_dma_chan_write(q_base, HISI_DMA_HIP08_Q_ERR_INT_NUM6,
+ index, 0);
+ /*
+ * init SQ/CQ direction selecting register.
+ * "0" is to local side and "1" is to remote side.
+ */
+ addr = q_base + HISI_DMA_Q_CTRL0 + index * HISI_DMA_Q_OFFSET;
+ hisi_dma_update_bit(addr, HISI_DMA_HIP08_Q_CTRL0_SQCQ_DRCT, 0);
+
+ /*
+ * 0 - Continue to next descriptor if error occurs.
+ * 1 - Abort the DMA queue if error occurs.
+ */
+ hisi_dma_update_bit(addr,
+ HISI_DMA_HIP08_Q_CTRL0_ERR_ABORT_EN, 0);
+ } else {
+ addr = q_base + HISI_DMA_Q_CTRL0 + index * HISI_DMA_Q_OFFSET;
+
+ /*
+ * init SQ/CQ direction selecting register.
+ * "0" is to local side and "1" is to remote side.
+ */
+ hisi_dma_update_bit(addr, HISI_DMA_HIP09_Q_CTRL0_SQ_DRCT, 0);
+ hisi_dma_update_bit(addr, HISI_DMA_HIP09_Q_CTRL0_CQ_DRCT, 0);
+
+ /*
+ * 0 - Continue to next descriptor if error occurs.
+ * 1 - Abort the DMA queue if error occurs.
+ */
+
+ tmp = readl_relaxed(addr);
+ tmp &= ~HISI_DMA_HIP09_Q_CTRL0_ERR_ABORT_EN;
+ writel_relaxed(tmp, addr);
+
+ /*
+ * 0 - dma should process FLR whith CPU.
+ * 1 - dma not process FLR, only cpu process FLR.
+ */
+ addr = q_base + HISI_DMA_HIP09_DMA_FLR_DISABLE +
+ index * HISI_DMA_Q_OFFSET;
+ hisi_dma_update_bit(addr, HISI_DMA_HIP09_DMA_FLR_DISABLE_B, 0);
+
+ addr = q_base + HISI_DMA_Q_CTRL1 + index * HISI_DMA_Q_OFFSET;
+ hisi_dma_update_bit(addr, HISI_DMA_HIP09_Q_CTRL1_VA_ENABLE, 1);
+ }
+}
+
+static void hisi_dma_enable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index)
+{
+ hisi_dma_init_hw_qp(hdma_dev, qp_index);
+ hisi_dma_unmask_irq(hdma_dev, qp_index);
+ hisi_dma_enable_dma(hdma_dev, qp_index, true);
+}
+
+static void hisi_dma_disable_qp(struct hisi_dma_dev *hdma_dev, u32 qp_index)
+{
+ hisi_dma_reset_or_disable_hw_chan(&hdma_dev->chan[qp_index], true);
+}
+
+static void hisi_dma_enable_qps(struct hisi_dma_dev *hdma_dev)
+{
+ int i;
+
+ for (i = 0; i < hdma_dev->chan_num; i++) {
+ hdma_dev->chan[i].qp_num = i;
+ hdma_dev->chan[i].hdma_dev = hdma_dev;
+ hdma_dev->chan[i].vc.desc_free = hisi_dma_desc_free;
+ vchan_init(&hdma_dev->chan[i].vc, &hdma_dev->dma_dev);
+ hisi_dma_enable_qp(hdma_dev, i);
+ }
+}
+
+static void hisi_dma_disable_qps(struct hisi_dma_dev *hdma_dev)
+{
+ int i;
+
+ for (i = 0; i < hdma_dev->chan_num; i++) {
+ hisi_dma_disable_qp(hdma_dev, i);
+ tasklet_kill(&hdma_dev->chan[i].vc.task);
+ }
+}
+
+static irqreturn_t hisi_dma_irq(int irq, void *data)
+{
+ struct hisi_dma_chan *chan = data;
+ struct hisi_dma_dev *hdma_dev = chan->hdma_dev;
+ struct hisi_dma_desc *desc;
+ struct hisi_dma_cqe *cqe;
+ void __iomem *q_base;
+
+ spin_lock(&chan->vc.lock);
+
+ desc = chan->desc;
+ cqe = chan->cq + chan->cq_head;
+ q_base = hdma_dev->queue_base;
+ if (desc) {
+ chan->cq_head = (chan->cq_head + 1) % hdma_dev->chan_depth;
+ hisi_dma_chan_write(q_base, HISI_DMA_Q_CQ_HEAD_PTR,
+ chan->qp_num, chan->cq_head);
+ if (FIELD_GET(STATUS_MASK, cqe->w0) == STATUS_SUCC) {
+ vchan_cookie_complete(&desc->vd);
+ hisi_dma_start_transfer(chan);
+ } else {
+ dev_err(&hdma_dev->pdev->dev, "task error!\n");
+ }
+ }
+
+ spin_unlock(&chan->vc.lock);
+
+ return IRQ_HANDLED;
+}
+
+static int hisi_dma_request_qps_irq(struct hisi_dma_dev *hdma_dev)
+{
+ struct pci_dev *pdev = hdma_dev->pdev;
+ int i, ret;
+
+ for (i = 0; i < hdma_dev->chan_num; i++) {
+ ret = devm_request_irq(&pdev->dev, pci_irq_vector(pdev, i),
+ hisi_dma_irq, IRQF_SHARED, "hisi_dma",
+ &hdma_dev->chan[i]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* This function enables all hw channels in a device */
+static int hisi_dma_enable_hw_channels(struct hisi_dma_dev *hdma_dev)
+{
+ int ret;
+
+ ret = hisi_dma_alloc_qps_mem(hdma_dev);
+ if (ret) {
+ dev_err(&hdma_dev->pdev->dev, "fail to allocate qp memory!\n");
+ return ret;
+ }
+
+ ret = hisi_dma_request_qps_irq(hdma_dev);
+ if (ret) {
+ dev_err(&hdma_dev->pdev->dev, "fail to request qp irq!\n");
+ return ret;
+ }
+
+ hisi_dma_enable_qps(hdma_dev);
+
+ return 0;
+}
+
+static void hisi_dma_disable_hw_channels(void *data)
+{
+ hisi_dma_disable_qps(data);
+}
+
+static void hisi_dma_set_mode(struct hisi_dma_dev *hdma_dev,
+ enum hisi_dma_mode mode)
+{
+ if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08)
+ writel_relaxed(mode == RC ? 1 : 0,
+ hdma_dev->base + HISI_DMA_HIP08_MODE);
+}
+
+static void hisi_dma_init_hw(struct hisi_dma_dev *hdma_dev)
+{
+ void __iomem *addr;
+ int i;
+
+ if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP09) {
+ for (i = 0; i < HISI_DMA_HIP09_MAX_PORT_NUM; i++) {
+ addr = hdma_dev->base + HISI_DMA_HIP09_PORT_CFG_REG(i);
+ hisi_dma_update_bit(addr,
+ HISI_DMA_HIP09_PORT_CFG_LINK_DOWN_MASK_B, 1);
+ }
+ }
+}
+
+static void hisi_dma_init_dma_dev(struct hisi_dma_dev *hdma_dev)
+{
+ struct dma_device *dma_dev;
+
+ dma_dev = &hdma_dev->dma_dev;
+ dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+ dma_dev->device_free_chan_resources = hisi_dma_free_chan_resources;
+ dma_dev->device_prep_dma_memcpy = hisi_dma_prep_dma_memcpy;
+ dma_dev->device_tx_status = hisi_dma_tx_status;
+ dma_dev->device_issue_pending = hisi_dma_issue_pending;
+ dma_dev->device_terminate_all = hisi_dma_terminate_all;
+ dma_dev->device_synchronize = hisi_dma_synchronize;
+ dma_dev->directions = BIT(DMA_MEM_TO_MEM);
+ dma_dev->dev = &hdma_dev->pdev->dev;
+ INIT_LIST_HEAD(&dma_dev->channels);
+}
+
+/* --- debugfs implementation --- */
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+static struct debugfs_reg32 *hisi_dma_get_ch_regs(struct hisi_dma_dev *hdma_dev,
+ u32 *regs_sz)
+{
+ struct device *dev = &hdma_dev->pdev->dev;
+ struct debugfs_reg32 *regs;
+ u32 regs_sz_comm;
+
+ regs_sz_comm = ARRAY_SIZE(hisi_dma_comm_chan_regs);
+
+ if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08)
+ *regs_sz = regs_sz_comm + ARRAY_SIZE(hisi_dma_hip08_chan_regs);
+ else
+ *regs_sz = regs_sz_comm + ARRAY_SIZE(hisi_dma_hip09_chan_regs);
+
+ regs = devm_kcalloc(dev, *regs_sz, sizeof(struct debugfs_reg32),
+ GFP_KERNEL);
+ if (!regs)
+ return NULL;
+ memcpy(regs, hisi_dma_comm_chan_regs, sizeof(hisi_dma_comm_chan_regs));
+
+ if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08)
+ memcpy(regs + regs_sz_comm, hisi_dma_hip08_chan_regs,
+ sizeof(hisi_dma_hip08_chan_regs));
+ else
+ memcpy(regs + regs_sz_comm, hisi_dma_hip09_chan_regs,
+ sizeof(hisi_dma_hip09_chan_regs));
+
+ return regs;
+}
+
+static int hisi_dma_create_chan_dir(struct hisi_dma_dev *hdma_dev)
+{
+ char dir_name[HISI_DMA_MAX_DIR_NAME_LEN];
+ struct debugfs_regset32 *regsets;
+ struct debugfs_reg32 *regs;
+ struct dentry *chan_dir;
+ struct device *dev;
+ u32 regs_sz;
+ int ret;
+ int i;
+
+ dev = &hdma_dev->pdev->dev;
+
+ regsets = devm_kcalloc(dev, hdma_dev->chan_num,
+ sizeof(*regsets), GFP_KERNEL);
+ if (!regsets)
+ return -ENOMEM;
+
+ regs = hisi_dma_get_ch_regs(hdma_dev, &regs_sz);
+ if (!regs)
+ return -ENOMEM;
+
+ for (i = 0; i < hdma_dev->chan_num; i++) {
+ regsets[i].regs = regs;
+ regsets[i].nregs = regs_sz;
+ regsets[i].base = hdma_dev->queue_base + i * HISI_DMA_Q_OFFSET;
+ regsets[i].dev = dev;
+
+ memset(dir_name, 0, HISI_DMA_MAX_DIR_NAME_LEN);
+ ret = sprintf(dir_name, "channel%d", i);
+ if (ret < 0)
+ return ret;
+
+ chan_dir = debugfs_create_dir(dir_name,
+ hdma_dev->dma_dev.dbg_dev_root);
+ debugfs_create_regset32("regs", 0444, chan_dir, &regsets[i]);
+ }
+
+ return 0;
+}
+
+static void hisi_dma_create_debugfs(struct hisi_dma_dev *hdma_dev)
+{
+ struct debugfs_regset32 *regset;
+ struct device *dev;
+ int ret;
+
+ dev = &hdma_dev->pdev->dev;
+
+ if (hdma_dev->dma_dev.dbg_dev_root == NULL)
+ return;
+
+ regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL);
+ if (!regset)
+ return;
+
+ if (hdma_dev->reg_layout == HISI_DMA_REG_LAYOUT_HIP08) {
+ regset->regs = hisi_dma_hip08_comm_regs;
+ regset->nregs = ARRAY_SIZE(hisi_dma_hip08_comm_regs);
+ } else {
+ regset->regs = hisi_dma_hip09_comm_regs;
+ regset->nregs = ARRAY_SIZE(hisi_dma_hip09_comm_regs);
+ }
+ regset->base = hdma_dev->base;
+ regset->dev = dev;
+
+ debugfs_create_regset32("regs", 0444,
+ hdma_dev->dma_dev.dbg_dev_root, regset);
+
+ ret = hisi_dma_create_chan_dir(hdma_dev);
+ if (ret < 0)
+ dev_info(&hdma_dev->pdev->dev, "fail to create debugfs for channels!\n");
+}
+#else
+static void hisi_dma_create_debugfs(struct hisi_dma_dev *hdma_dev) { }
+#endif /* CONFIG_DEBUG_FS*/
+/* --- debugfs implementation --- */
+
+static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ enum hisi_dma_reg_layout reg_layout;
+ struct device *dev = &pdev->dev;
+ struct hisi_dma_dev *hdma_dev;
+ struct dma_device *dma_dev;
+ u32 chan_num;
+ u32 msi_num;
+ int ret;
+
+ reg_layout = hisi_dma_get_reg_layout(pdev);
+ if (reg_layout == HISI_DMA_REG_LAYOUT_INVALID) {
+ dev_err(dev, "unsupported device!\n");
+ return -EINVAL;
+ }
+
+ ret = pcim_enable_device(pdev);
+ if (ret) {
+ dev_err(dev, "failed to enable device mem!\n");
+ return ret;
+ }
+
+ ret = pcim_iomap_regions(pdev, 1 << PCI_BAR_2, pci_name(pdev));
+ if (ret) {
+ dev_err(dev, "failed to remap I/O region!\n");
+ return ret;
+ }
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (ret)
+ return ret;
+
+ chan_num = hisi_dma_get_chan_num(pdev);
+ hdma_dev = devm_kzalloc(dev, struct_size(hdma_dev, chan, chan_num),
+ GFP_KERNEL);
+ if (!hdma_dev)
+ return -EINVAL;
+
+ hdma_dev->base = pcim_iomap_table(pdev)[PCI_BAR_2];
+ hdma_dev->pdev = pdev;
+ hdma_dev->chan_depth = HISI_DMA_Q_DEPTH_VAL;
+ hdma_dev->chan_num = chan_num;
+ hdma_dev->reg_layout = reg_layout;
+ hdma_dev->queue_base = hdma_dev->base + hisi_dma_get_queue_base(pdev);
+
+ pci_set_drvdata(pdev, hdma_dev);
+ pci_set_master(pdev);
+
+ msi_num = hisi_dma_get_msi_num(pdev);
+
+ /* This will be freed by 'pcim_release()'. See 'pcim_enable_device()' */
+ ret = pci_alloc_irq_vectors(pdev, msi_num, msi_num, PCI_IRQ_MSI);
+ if (ret < 0) {
+ dev_err(dev, "Failed to allocate MSI vectors!\n");
+ return ret;
+ }
+
+ hisi_dma_init_dma_dev(hdma_dev);
+
+ hisi_dma_set_mode(hdma_dev, RC);
+
+ hisi_dma_init_hw(hdma_dev);
+
+ ret = hisi_dma_enable_hw_channels(hdma_dev);
+ if (ret < 0) {
+ dev_err(dev, "failed to enable hw channel!\n");
+ return ret;
+ }
+
+ ret = devm_add_action_or_reset(dev, hisi_dma_disable_hw_channels,
+ hdma_dev);
+ if (ret)
+ return ret;
+
+ dma_dev = &hdma_dev->dma_dev;
+ ret = dmaenginem_async_device_register(dma_dev);
+ if (ret < 0) {
+ dev_err(dev, "failed to register device!\n");
+ return ret;
+ }
+
+ hisi_dma_create_debugfs(hdma_dev);
+
+ return 0;
+}
+
+static const struct pci_device_id hisi_dma_pci_tbl[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa122) },
+ { 0, }
+};
+
+static struct pci_driver hisi_dma_pci_driver = {
+ .name = "hisi_dma",
+ .id_table = hisi_dma_pci_tbl,
+ .probe = hisi_dma_probe,
+};
+
+module_pci_driver(hisi_dma_pci_driver);
+
+MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>");
+MODULE_AUTHOR("Zhenfa Qiu <qiuzhenfa@hisilicon.com>");
+MODULE_DESCRIPTION("HiSilicon Kunpeng DMA controller driver");
+MODULE_LICENSE("GPL v2");
+MODULE_DEVICE_TABLE(pci, hisi_dma_pci_tbl);
diff --git a/drivers/dma/hsu/Kconfig b/drivers/dma/hsu/Kconfig
new file mode 100644
index 000000000..af102baec
--- /dev/null
+++ b/drivers/dma/hsu/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# DMA engine configuration for hsu
+config HSU_DMA
+ tristate
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+
+config HSU_DMA_PCI
+ tristate
+ depends on HSU_DMA && PCI
diff --git a/drivers/dma/hsu/Makefile b/drivers/dma/hsu/Makefile
new file mode 100644
index 000000000..61829b1de
--- /dev/null
+++ b/drivers/dma/hsu/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_HSU_DMA) += hsu_dma.o
+hsu_dma-objs := hsu.o
+
+obj-$(CONFIG_HSU_DMA_PCI) += hsu_dma_pci.o
+hsu_dma_pci-objs := pci.o
diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c
new file mode 100644
index 000000000..af5a2e252
--- /dev/null
+++ b/drivers/dma/hsu/hsu.c
@@ -0,0 +1,512 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Core driver for the High Speed UART DMA
+ *
+ * Copyright (C) 2015 Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * Partially based on the bits found in drivers/tty/serial/mfd.c.
+ */
+
+/*
+ * DMA channel allocation:
+ * 1. Even number chans are used for DMA Read (UART TX), odd chans for DMA
+ * Write (UART RX).
+ * 2. 0/1 channel are assigned to port 0, 2/3 chan to port 1, 4/5 chan to
+ * port 3, and so on.
+ */
+
+#include <linux/bits.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/percpu-defs.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+
+#include "hsu.h"
+
+#define HSU_DMA_BUSWIDTHS \
+ BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+ BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_16_BYTES)
+
+static inline void hsu_chan_disable(struct hsu_dma_chan *hsuc)
+{
+ hsu_chan_writel(hsuc, HSU_CH_CR, 0);
+}
+
+static inline void hsu_chan_enable(struct hsu_dma_chan *hsuc)
+{
+ u32 cr = HSU_CH_CR_CHA;
+
+ if (hsuc->direction == DMA_MEM_TO_DEV)
+ cr &= ~HSU_CH_CR_CHD;
+ else if (hsuc->direction == DMA_DEV_TO_MEM)
+ cr |= HSU_CH_CR_CHD;
+
+ hsu_chan_writel(hsuc, HSU_CH_CR, cr);
+}
+
+static void hsu_dma_chan_start(struct hsu_dma_chan *hsuc)
+{
+ struct dma_slave_config *config = &hsuc->config;
+ struct hsu_dma_desc *desc = hsuc->desc;
+ u32 bsr = 0, mtsr = 0; /* to shut the compiler up */
+ u32 dcr = HSU_CH_DCR_CHSOE | HSU_CH_DCR_CHEI;
+ unsigned int i, count;
+
+ if (hsuc->direction == DMA_MEM_TO_DEV) {
+ bsr = config->dst_maxburst;
+ mtsr = config->dst_addr_width;
+ } else if (hsuc->direction == DMA_DEV_TO_MEM) {
+ bsr = config->src_maxburst;
+ mtsr = config->src_addr_width;
+ }
+
+ hsu_chan_disable(hsuc);
+
+ hsu_chan_writel(hsuc, HSU_CH_DCR, 0);
+ hsu_chan_writel(hsuc, HSU_CH_BSR, bsr);
+ hsu_chan_writel(hsuc, HSU_CH_MTSR, mtsr);
+
+ /* Set descriptors */
+ count = desc->nents - desc->active;
+ for (i = 0; i < count && i < HSU_DMA_CHAN_NR_DESC; i++) {
+ hsu_chan_writel(hsuc, HSU_CH_DxSAR(i), desc->sg[i].addr);
+ hsu_chan_writel(hsuc, HSU_CH_DxTSR(i), desc->sg[i].len);
+
+ /* Prepare value for DCR */
+ dcr |= HSU_CH_DCR_DESCA(i);
+ dcr |= HSU_CH_DCR_CHTOI(i); /* timeout bit, see HSU Errata 1 */
+
+ desc->active++;
+ }
+ /* Only for the last descriptor in the chain */
+ dcr |= HSU_CH_DCR_CHSOD(count - 1);
+ dcr |= HSU_CH_DCR_CHDI(count - 1);
+
+ hsu_chan_writel(hsuc, HSU_CH_DCR, dcr);
+
+ hsu_chan_enable(hsuc);
+}
+
+static void hsu_dma_stop_channel(struct hsu_dma_chan *hsuc)
+{
+ hsu_chan_disable(hsuc);
+ hsu_chan_writel(hsuc, HSU_CH_DCR, 0);
+}
+
+static void hsu_dma_start_channel(struct hsu_dma_chan *hsuc)
+{
+ hsu_dma_chan_start(hsuc);
+}
+
+static void hsu_dma_start_transfer(struct hsu_dma_chan *hsuc)
+{
+ struct virt_dma_desc *vdesc;
+
+ /* Get the next descriptor */
+ vdesc = vchan_next_desc(&hsuc->vchan);
+ if (!vdesc) {
+ hsuc->desc = NULL;
+ return;
+ }
+
+ list_del(&vdesc->node);
+ hsuc->desc = to_hsu_dma_desc(vdesc);
+
+ /* Start the channel with a new descriptor */
+ hsu_dma_start_channel(hsuc);
+}
+
+/*
+ * hsu_dma_get_status() - get DMA channel status
+ * @chip: HSUART DMA chip
+ * @nr: DMA channel number
+ * @status: pointer for DMA Channel Status Register value
+ *
+ * Description:
+ * The function reads and clears the DMA Channel Status Register, checks
+ * if it was a timeout interrupt and returns a corresponding value.
+ *
+ * Caller should provide a valid pointer for the DMA Channel Status
+ * Register value that will be returned in @status.
+ *
+ * Return:
+ * 1 for DMA timeout status, 0 for other DMA status, or error code for
+ * invalid parameters or no interrupt pending.
+ */
+int hsu_dma_get_status(struct hsu_dma_chip *chip, unsigned short nr,
+ u32 *status)
+{
+ struct hsu_dma_chan *hsuc;
+ unsigned long flags;
+ u32 sr;
+
+ /* Sanity check */
+ if (nr >= chip->hsu->nr_channels)
+ return -EINVAL;
+
+ hsuc = &chip->hsu->chan[nr];
+
+ /*
+ * No matter what situation, need read clear the IRQ status
+ * There is a bug, see Errata 5, HSD 2900918
+ */
+ spin_lock_irqsave(&hsuc->vchan.lock, flags);
+ sr = hsu_chan_readl(hsuc, HSU_CH_SR);
+ spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
+
+ /* Check if any interrupt is pending */
+ sr &= ~(HSU_CH_SR_DESCE_ANY | HSU_CH_SR_CDESC_ANY);
+ if (!sr)
+ return -EIO;
+
+ /* Timeout IRQ, need wait some time, see Errata 2 */
+ if (sr & HSU_CH_SR_DESCTO_ANY)
+ udelay(2);
+
+ /*
+ * At this point, at least one of Descriptor Time Out, Channel Error
+ * or Descriptor Done bits must be set. Clear the Descriptor Time Out
+ * bits and if sr is still non-zero, it must be channel error or
+ * descriptor done which are higher priority than timeout and handled
+ * in hsu_dma_do_irq(). Else, it must be a timeout.
+ */
+ sr &= ~HSU_CH_SR_DESCTO_ANY;
+
+ *status = sr;
+
+ return sr ? 0 : 1;
+}
+EXPORT_SYMBOL_GPL(hsu_dma_get_status);
+
+/*
+ * hsu_dma_do_irq() - DMA interrupt handler
+ * @chip: HSUART DMA chip
+ * @nr: DMA channel number
+ * @status: Channel Status Register value
+ *
+ * Description:
+ * This function handles Channel Error and Descriptor Done interrupts.
+ * This function should be called after determining that the DMA interrupt
+ * is not a normal timeout interrupt, ie. hsu_dma_get_status() returned 0.
+ *
+ * Return:
+ * 0 for invalid channel number, 1 otherwise.
+ */
+int hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr, u32 status)
+{
+ struct dma_chan_percpu *stat;
+ struct hsu_dma_chan *hsuc;
+ struct hsu_dma_desc *desc;
+ unsigned long flags;
+
+ /* Sanity check */
+ if (nr >= chip->hsu->nr_channels)
+ return 0;
+
+ hsuc = &chip->hsu->chan[nr];
+ stat = this_cpu_ptr(hsuc->vchan.chan.local);
+
+ spin_lock_irqsave(&hsuc->vchan.lock, flags);
+ desc = hsuc->desc;
+ if (desc) {
+ if (status & HSU_CH_SR_CHE) {
+ desc->status = DMA_ERROR;
+ } else if (desc->active < desc->nents) {
+ hsu_dma_start_channel(hsuc);
+ } else {
+ vchan_cookie_complete(&desc->vdesc);
+ desc->status = DMA_COMPLETE;
+ stat->bytes_transferred += desc->length;
+ hsu_dma_start_transfer(hsuc);
+ }
+ }
+ spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(hsu_dma_do_irq);
+
+static struct hsu_dma_desc *hsu_dma_alloc_desc(unsigned int nents)
+{
+ struct hsu_dma_desc *desc;
+
+ desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ desc->sg = kcalloc(nents, sizeof(*desc->sg), GFP_NOWAIT);
+ if (!desc->sg) {
+ kfree(desc);
+ return NULL;
+ }
+
+ return desc;
+}
+
+static void hsu_dma_desc_free(struct virt_dma_desc *vdesc)
+{
+ struct hsu_dma_desc *desc = to_hsu_dma_desc(vdesc);
+
+ kfree(desc->sg);
+ kfree(desc);
+}
+
+static struct dma_async_tx_descriptor *hsu_dma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+ struct hsu_dma_desc *desc;
+ struct scatterlist *sg;
+ unsigned int i;
+
+ desc = hsu_dma_alloc_desc(sg_len);
+ if (!desc)
+ return NULL;
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ desc->sg[i].addr = sg_dma_address(sg);
+ desc->sg[i].len = sg_dma_len(sg);
+
+ desc->length += sg_dma_len(sg);
+ }
+
+ desc->nents = sg_len;
+ desc->direction = direction;
+ /* desc->active = 0 by kzalloc */
+ desc->status = DMA_IN_PROGRESS;
+
+ return vchan_tx_prep(&hsuc->vchan, &desc->vdesc, flags);
+}
+
+static void hsu_dma_issue_pending(struct dma_chan *chan)
+{
+ struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&hsuc->vchan.lock, flags);
+ if (vchan_issue_pending(&hsuc->vchan) && !hsuc->desc)
+ hsu_dma_start_transfer(hsuc);
+ spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
+}
+
+static size_t hsu_dma_active_desc_size(struct hsu_dma_chan *hsuc)
+{
+ struct hsu_dma_desc *desc = hsuc->desc;
+ size_t bytes = 0;
+ int i;
+
+ for (i = desc->active; i < desc->nents; i++)
+ bytes += desc->sg[i].len;
+
+ i = HSU_DMA_CHAN_NR_DESC - 1;
+ do {
+ bytes += hsu_chan_readl(hsuc, HSU_CH_DxTSR(i));
+ } while (--i >= 0);
+
+ return bytes;
+}
+
+static enum dma_status hsu_dma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *state)
+{
+ struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+ struct virt_dma_desc *vdesc;
+ enum dma_status status;
+ size_t bytes;
+ unsigned long flags;
+
+ status = dma_cookie_status(chan, cookie, state);
+ if (status == DMA_COMPLETE)
+ return status;
+
+ spin_lock_irqsave(&hsuc->vchan.lock, flags);
+ vdesc = vchan_find_desc(&hsuc->vchan, cookie);
+ if (hsuc->desc && cookie == hsuc->desc->vdesc.tx.cookie) {
+ bytes = hsu_dma_active_desc_size(hsuc);
+ dma_set_residue(state, bytes);
+ status = hsuc->desc->status;
+ } else if (vdesc) {
+ bytes = to_hsu_dma_desc(vdesc)->length;
+ dma_set_residue(state, bytes);
+ }
+ spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
+
+ return status;
+}
+
+static int hsu_dma_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+
+ memcpy(&hsuc->config, config, sizeof(hsuc->config));
+
+ return 0;
+}
+
+static int hsu_dma_pause(struct dma_chan *chan)
+{
+ struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&hsuc->vchan.lock, flags);
+ if (hsuc->desc && hsuc->desc->status == DMA_IN_PROGRESS) {
+ hsu_chan_disable(hsuc);
+ hsuc->desc->status = DMA_PAUSED;
+ }
+ spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
+
+ return 0;
+}
+
+static int hsu_dma_resume(struct dma_chan *chan)
+{
+ struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&hsuc->vchan.lock, flags);
+ if (hsuc->desc && hsuc->desc->status == DMA_PAUSED) {
+ hsuc->desc->status = DMA_IN_PROGRESS;
+ hsu_chan_enable(hsuc);
+ }
+ spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
+
+ return 0;
+}
+
+static int hsu_dma_terminate_all(struct dma_chan *chan)
+{
+ struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&hsuc->vchan.lock, flags);
+
+ hsu_dma_stop_channel(hsuc);
+ if (hsuc->desc) {
+ hsu_dma_desc_free(&hsuc->desc->vdesc);
+ hsuc->desc = NULL;
+ }
+
+ vchan_get_all_descriptors(&hsuc->vchan, &head);
+ spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
+ vchan_dma_desc_free_list(&hsuc->vchan, &head);
+
+ return 0;
+}
+
+static void hsu_dma_free_chan_resources(struct dma_chan *chan)
+{
+ vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+static void hsu_dma_synchronize(struct dma_chan *chan)
+{
+ struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+
+ vchan_synchronize(&hsuc->vchan);
+}
+
+int hsu_dma_probe(struct hsu_dma_chip *chip)
+{
+ struct hsu_dma *hsu;
+ void __iomem *addr = chip->regs + chip->offset;
+ unsigned short i;
+ int ret;
+
+ hsu = devm_kzalloc(chip->dev, sizeof(*hsu), GFP_KERNEL);
+ if (!hsu)
+ return -ENOMEM;
+
+ chip->hsu = hsu;
+
+ /* Calculate nr_channels from the IO space length */
+ hsu->nr_channels = (chip->length - chip->offset) / HSU_DMA_CHAN_LENGTH;
+
+ hsu->chan = devm_kcalloc(chip->dev, hsu->nr_channels,
+ sizeof(*hsu->chan), GFP_KERNEL);
+ if (!hsu->chan)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&hsu->dma.channels);
+ for (i = 0; i < hsu->nr_channels; i++) {
+ struct hsu_dma_chan *hsuc = &hsu->chan[i];
+
+ hsuc->vchan.desc_free = hsu_dma_desc_free;
+ vchan_init(&hsuc->vchan, &hsu->dma);
+
+ hsuc->direction = (i & 0x1) ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV;
+ hsuc->reg = addr + i * HSU_DMA_CHAN_LENGTH;
+ }
+
+ dma_cap_set(DMA_SLAVE, hsu->dma.cap_mask);
+ dma_cap_set(DMA_PRIVATE, hsu->dma.cap_mask);
+
+ hsu->dma.device_free_chan_resources = hsu_dma_free_chan_resources;
+
+ hsu->dma.device_prep_slave_sg = hsu_dma_prep_slave_sg;
+
+ hsu->dma.device_issue_pending = hsu_dma_issue_pending;
+ hsu->dma.device_tx_status = hsu_dma_tx_status;
+
+ hsu->dma.device_config = hsu_dma_slave_config;
+ hsu->dma.device_pause = hsu_dma_pause;
+ hsu->dma.device_resume = hsu_dma_resume;
+ hsu->dma.device_terminate_all = hsu_dma_terminate_all;
+ hsu->dma.device_synchronize = hsu_dma_synchronize;
+
+ hsu->dma.src_addr_widths = HSU_DMA_BUSWIDTHS;
+ hsu->dma.dst_addr_widths = HSU_DMA_BUSWIDTHS;
+ hsu->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ hsu->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+ hsu->dma.dev = chip->dev;
+
+ dma_set_max_seg_size(hsu->dma.dev, HSU_CH_DxTSR_MASK);
+
+ ret = dma_async_device_register(&hsu->dma);
+ if (ret)
+ return ret;
+
+ dev_info(chip->dev, "Found HSU DMA, %d channels\n", hsu->nr_channels);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(hsu_dma_probe);
+
+int hsu_dma_remove(struct hsu_dma_chip *chip)
+{
+ struct hsu_dma *hsu = chip->hsu;
+ unsigned short i;
+
+ dma_async_device_unregister(&hsu->dma);
+
+ for (i = 0; i < hsu->nr_channels; i++) {
+ struct hsu_dma_chan *hsuc = &hsu->chan[i];
+
+ tasklet_kill(&hsuc->vchan.task);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(hsu_dma_remove);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("High Speed UART DMA core driver");
+MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h
new file mode 100644
index 000000000..3bca577b9
--- /dev/null
+++ b/drivers/dma/hsu/hsu.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Driver for the High Speed UART DMA
+ *
+ * Copyright (C) 2015 Intel Corporation
+ *
+ * Partially based on the bits found in drivers/tty/serial/mfd.c.
+ */
+
+#ifndef __DMA_HSU_H__
+#define __DMA_HSU_H__
+
+#include <linux/bits.h>
+#include <linux/container_of.h>
+#include <linux/io.h>
+#include <linux/types.h>
+
+#include <linux/dma/hsu.h>
+
+#include "../virt-dma.h"
+
+#define HSU_CH_SR 0x00 /* channel status */
+#define HSU_CH_CR 0x04 /* channel control */
+#define HSU_CH_DCR 0x08 /* descriptor control */
+#define HSU_CH_BSR 0x10 /* FIFO buffer size */
+#define HSU_CH_MTSR 0x14 /* minimum transfer size */
+#define HSU_CH_DxSAR(x) (0x20 + 8 * (x)) /* desc start addr */
+#define HSU_CH_DxTSR(x) (0x24 + 8 * (x)) /* desc transfer size */
+#define HSU_CH_D0SAR 0x20 /* desc 0 start addr */
+#define HSU_CH_D0TSR 0x24 /* desc 0 transfer size */
+#define HSU_CH_D1SAR 0x28
+#define HSU_CH_D1TSR 0x2c
+#define HSU_CH_D2SAR 0x30
+#define HSU_CH_D2TSR 0x34
+#define HSU_CH_D3SAR 0x38
+#define HSU_CH_D3TSR 0x3c
+
+#define HSU_DMA_CHAN_NR_DESC 4
+#define HSU_DMA_CHAN_LENGTH 0x40
+
+/* Bits in HSU_CH_SR */
+#define HSU_CH_SR_DESCTO(x) BIT(8 + (x))
+#define HSU_CH_SR_DESCTO_ANY GENMASK(11, 8)
+#define HSU_CH_SR_CHE BIT(15)
+#define HSU_CH_SR_DESCE(x) BIT(16 + (x))
+#define HSU_CH_SR_DESCE_ANY GENMASK(19, 16)
+#define HSU_CH_SR_CDESC_ANY GENMASK(31, 30)
+
+/* Bits in HSU_CH_CR */
+#define HSU_CH_CR_CHA BIT(0)
+#define HSU_CH_CR_CHD BIT(1)
+
+/* Bits in HSU_CH_DCR */
+#define HSU_CH_DCR_DESCA(x) BIT(0 + (x))
+#define HSU_CH_DCR_CHSOD(x) BIT(8 + (x))
+#define HSU_CH_DCR_CHSOTO BIT(14)
+#define HSU_CH_DCR_CHSOE BIT(15)
+#define HSU_CH_DCR_CHDI(x) BIT(16 + (x))
+#define HSU_CH_DCR_CHEI BIT(23)
+#define HSU_CH_DCR_CHTOI(x) BIT(24 + (x))
+
+/* Bits in HSU_CH_DxTSR */
+#define HSU_CH_DxTSR_MASK GENMASK(15, 0)
+#define HSU_CH_DxTSR_TSR(x) ((x) & HSU_CH_DxTSR_MASK)
+
+struct hsu_dma_sg {
+ dma_addr_t addr;
+ unsigned int len;
+};
+
+struct hsu_dma_desc {
+ struct virt_dma_desc vdesc;
+ enum dma_transfer_direction direction;
+ struct hsu_dma_sg *sg;
+ unsigned int nents;
+ size_t length;
+ unsigned int active;
+ enum dma_status status;
+};
+
+static inline struct hsu_dma_desc *to_hsu_dma_desc(struct virt_dma_desc *vdesc)
+{
+ return container_of(vdesc, struct hsu_dma_desc, vdesc);
+}
+
+struct hsu_dma_chan {
+ struct virt_dma_chan vchan;
+
+ void __iomem *reg;
+
+ /* hardware configuration */
+ enum dma_transfer_direction direction;
+ struct dma_slave_config config;
+
+ struct hsu_dma_desc *desc;
+};
+
+static inline struct hsu_dma_chan *to_hsu_dma_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct hsu_dma_chan, vchan.chan);
+}
+
+static inline u32 hsu_chan_readl(struct hsu_dma_chan *hsuc, int offset)
+{
+ return readl(hsuc->reg + offset);
+}
+
+static inline void hsu_chan_writel(struct hsu_dma_chan *hsuc, int offset,
+ u32 value)
+{
+ writel(value, hsuc->reg + offset);
+}
+
+struct hsu_dma {
+ struct dma_device dma;
+
+ /* channels */
+ struct hsu_dma_chan *chan;
+ unsigned short nr_channels;
+};
+
+static inline struct hsu_dma *to_hsu_dma(struct dma_device *ddev)
+{
+ return container_of(ddev, struct hsu_dma, dma);
+}
+
+#endif /* __DMA_HSU_H__ */
diff --git a/drivers/dma/hsu/pci.c b/drivers/dma/hsu/pci.c
new file mode 100644
index 000000000..0fcc0c0c2
--- /dev/null
+++ b/drivers/dma/hsu/pci.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PCI driver for the High Speed UART DMA
+ *
+ * Copyright (C) 2015 Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * Partially based on the bits found in drivers/tty/serial/mfd.c.
+ */
+
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "hsu.h"
+
+#define HSU_PCI_DMASR 0x00
+#define HSU_PCI_DMAISR 0x04
+
+#define HSU_PCI_CHAN_OFFSET 0x100
+
+#define PCI_DEVICE_ID_INTEL_MFLD_HSU_DMA 0x081e
+#define PCI_DEVICE_ID_INTEL_MRFLD_HSU_DMA 0x1192
+
+static irqreturn_t hsu_pci_irq(int irq, void *dev)
+{
+ struct hsu_dma_chip *chip = dev;
+ unsigned long dmaisr;
+ unsigned short i;
+ u32 status;
+ int ret = 0;
+ int err;
+
+ dmaisr = readl(chip->regs + HSU_PCI_DMAISR);
+ for_each_set_bit(i, &dmaisr, chip->hsu->nr_channels) {
+ err = hsu_dma_get_status(chip, i, &status);
+ if (err > 0)
+ ret |= 1;
+ else if (err == 0)
+ ret |= hsu_dma_do_irq(chip, i, status);
+ }
+
+ return IRQ_RETVAL(ret);
+}
+
+static void hsu_pci_dma_remove(void *chip)
+{
+ hsu_dma_remove(chip);
+}
+
+static int hsu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct device *dev = &pdev->dev;
+ struct hsu_dma_chip *chip;
+ int ret;
+
+ ret = pcim_enable_device(pdev);
+ if (ret)
+ return ret;
+
+ ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev));
+ if (ret) {
+ dev_err(&pdev->dev, "I/O memory remapping failed\n");
+ return ret;
+ }
+
+ pci_set_master(pdev);
+ pci_try_set_mwi(pdev);
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ if (ret)
+ return ret;
+
+ chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+ if (ret < 0)
+ return ret;
+
+ chip->dev = &pdev->dev;
+ chip->regs = pcim_iomap_table(pdev)[0];
+ chip->length = pci_resource_len(pdev, 0);
+ chip->offset = HSU_PCI_CHAN_OFFSET;
+ chip->irq = pci_irq_vector(pdev, 0);
+
+ ret = hsu_dma_probe(chip);
+ if (ret)
+ return ret;
+
+ ret = devm_add_action_or_reset(dev, hsu_pci_dma_remove, chip);
+ if (ret)
+ return ret;
+
+ ret = devm_request_irq(dev, chip->irq, hsu_pci_irq, 0, "hsu_dma_pci", chip);
+ if (ret)
+ return ret;
+
+ /*
+ * On Intel Tangier B0 and Anniedale the interrupt line, disregarding
+ * to have different numbers, is shared between HSU DMA and UART IPs.
+ * Thus on such SoCs we are expecting that IRQ handler is called in
+ * UART driver only. Instead of handling the spurious interrupt
+ * from HSU DMA here and waste CPU time and delay HSU UART interrupt
+ * handling, disable the interrupt entirely.
+ */
+ if (pdev->device == PCI_DEVICE_ID_INTEL_MRFLD_HSU_DMA)
+ disable_irq_nosync(chip->irq);
+
+ pci_set_drvdata(pdev, chip);
+
+ return 0;
+}
+
+static const struct pci_device_id hsu_pci_id_table[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MFLD_HSU_DMA), 0 },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD_HSU_DMA), 0 },
+ { }
+};
+MODULE_DEVICE_TABLE(pci, hsu_pci_id_table);
+
+static struct pci_driver hsu_pci_driver = {
+ .name = "hsu_dma_pci",
+ .id_table = hsu_pci_id_table,
+ .probe = hsu_pci_probe,
+};
+
+module_pci_driver(hsu_pci_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("High Speed UART DMA PCI driver");
+MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c
new file mode 100644
index 000000000..f4c07ad3b
--- /dev/null
+++ b/drivers/dma/idma64.c
@@ -0,0 +1,704 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Core driver for the Intel integrated DMA 64-bit
+ *
+ * Copyright (C) 2015 Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <linux/dma/idma64.h>
+
+#include "idma64.h"
+
+/* For now we support only two channels */
+#define IDMA64_NR_CHAN 2
+
+/* ---------------------------------------------------------------------- */
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+ return &chan->dev->device;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void idma64_off(struct idma64 *idma64)
+{
+ unsigned short count = 100;
+
+ dma_writel(idma64, CFG, 0);
+
+ channel_clear_bit(idma64, MASK(XFER), idma64->all_chan_mask);
+ channel_clear_bit(idma64, MASK(BLOCK), idma64->all_chan_mask);
+ channel_clear_bit(idma64, MASK(SRC_TRAN), idma64->all_chan_mask);
+ channel_clear_bit(idma64, MASK(DST_TRAN), idma64->all_chan_mask);
+ channel_clear_bit(idma64, MASK(ERROR), idma64->all_chan_mask);
+
+ do {
+ cpu_relax();
+ } while (dma_readl(idma64, CFG) & IDMA64_CFG_DMA_EN && --count);
+}
+
+static void idma64_on(struct idma64 *idma64)
+{
+ dma_writel(idma64, CFG, IDMA64_CFG_DMA_EN);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void idma64_chan_init(struct idma64 *idma64, struct idma64_chan *idma64c)
+{
+ u32 cfghi = IDMA64C_CFGH_SRC_PER(1) | IDMA64C_CFGH_DST_PER(0);
+ u32 cfglo = 0;
+
+ /* Set default burst alignment */
+ cfglo |= IDMA64C_CFGL_DST_BURST_ALIGN | IDMA64C_CFGL_SRC_BURST_ALIGN;
+
+ channel_writel(idma64c, CFG_LO, cfglo);
+ channel_writel(idma64c, CFG_HI, cfghi);
+
+ /* Enable interrupts */
+ channel_set_bit(idma64, MASK(XFER), idma64c->mask);
+ channel_set_bit(idma64, MASK(ERROR), idma64c->mask);
+
+ /*
+ * Enforce the controller to be turned on.
+ *
+ * The iDMA is turned off in ->probe() and looses context during system
+ * suspend / resume cycle. That's why we have to enable it each time we
+ * use it.
+ */
+ idma64_on(idma64);
+}
+
+static void idma64_chan_stop(struct idma64 *idma64, struct idma64_chan *idma64c)
+{
+ channel_clear_bit(idma64, CH_EN, idma64c->mask);
+}
+
+static void idma64_chan_start(struct idma64 *idma64, struct idma64_chan *idma64c)
+{
+ struct idma64_desc *desc = idma64c->desc;
+ struct idma64_hw_desc *hw = &desc->hw[0];
+
+ channel_writeq(idma64c, SAR, 0);
+ channel_writeq(idma64c, DAR, 0);
+
+ channel_writel(idma64c, CTL_HI, IDMA64C_CTLH_BLOCK_TS(~0UL));
+ channel_writel(idma64c, CTL_LO, IDMA64C_CTLL_LLP_S_EN | IDMA64C_CTLL_LLP_D_EN);
+
+ channel_writeq(idma64c, LLP, hw->llp);
+
+ channel_set_bit(idma64, CH_EN, idma64c->mask);
+}
+
+static void idma64_stop_transfer(struct idma64_chan *idma64c)
+{
+ struct idma64 *idma64 = to_idma64(idma64c->vchan.chan.device);
+
+ idma64_chan_stop(idma64, idma64c);
+}
+
+static void idma64_start_transfer(struct idma64_chan *idma64c)
+{
+ struct idma64 *idma64 = to_idma64(idma64c->vchan.chan.device);
+ struct virt_dma_desc *vdesc;
+
+ /* Get the next descriptor */
+ vdesc = vchan_next_desc(&idma64c->vchan);
+ if (!vdesc) {
+ idma64c->desc = NULL;
+ return;
+ }
+
+ list_del(&vdesc->node);
+ idma64c->desc = to_idma64_desc(vdesc);
+
+ /* Configure the channel */
+ idma64_chan_init(idma64, idma64c);
+
+ /* Start the channel with a new descriptor */
+ idma64_chan_start(idma64, idma64c);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void idma64_chan_irq(struct idma64 *idma64, unsigned short c,
+ u32 status_err, u32 status_xfer)
+{
+ struct idma64_chan *idma64c = &idma64->chan[c];
+ struct idma64_desc *desc;
+
+ spin_lock(&idma64c->vchan.lock);
+ desc = idma64c->desc;
+ if (desc) {
+ if (status_err & (1 << c)) {
+ dma_writel(idma64, CLEAR(ERROR), idma64c->mask);
+ desc->status = DMA_ERROR;
+ } else if (status_xfer & (1 << c)) {
+ dma_writel(idma64, CLEAR(XFER), idma64c->mask);
+ desc->status = DMA_COMPLETE;
+ vchan_cookie_complete(&desc->vdesc);
+ idma64_start_transfer(idma64c);
+ }
+
+ /* idma64_start_transfer() updates idma64c->desc */
+ if (idma64c->desc == NULL || desc->status == DMA_ERROR)
+ idma64_stop_transfer(idma64c);
+ }
+ spin_unlock(&idma64c->vchan.lock);
+}
+
+static irqreturn_t idma64_irq(int irq, void *dev)
+{
+ struct idma64 *idma64 = dev;
+ u32 status = dma_readl(idma64, STATUS_INT);
+ u32 status_xfer;
+ u32 status_err;
+ unsigned short i;
+
+ dev_vdbg(idma64->dma.dev, "%s: status=%#x\n", __func__, status);
+
+ /* Check if we have any interrupt from the DMA controller */
+ if (!status)
+ return IRQ_NONE;
+
+ status_xfer = dma_readl(idma64, RAW(XFER));
+ status_err = dma_readl(idma64, RAW(ERROR));
+
+ for (i = 0; i < idma64->dma.chancnt; i++)
+ idma64_chan_irq(idma64, i, status_err, status_xfer);
+
+ return IRQ_HANDLED;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct idma64_desc *idma64_alloc_desc(unsigned int ndesc)
+{
+ struct idma64_desc *desc;
+
+ desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ desc->hw = kcalloc(ndesc, sizeof(*desc->hw), GFP_NOWAIT);
+ if (!desc->hw) {
+ kfree(desc);
+ return NULL;
+ }
+
+ return desc;
+}
+
+static void idma64_desc_free(struct idma64_chan *idma64c,
+ struct idma64_desc *desc)
+{
+ struct idma64_hw_desc *hw;
+
+ if (desc->ndesc) {
+ unsigned int i = desc->ndesc;
+
+ do {
+ hw = &desc->hw[--i];
+ dma_pool_free(idma64c->pool, hw->lli, hw->llp);
+ } while (i);
+ }
+
+ kfree(desc->hw);
+ kfree(desc);
+}
+
+static void idma64_vdesc_free(struct virt_dma_desc *vdesc)
+{
+ struct idma64_chan *idma64c = to_idma64_chan(vdesc->tx.chan);
+
+ idma64_desc_free(idma64c, to_idma64_desc(vdesc));
+}
+
+static void idma64_hw_desc_fill(struct idma64_hw_desc *hw,
+ struct dma_slave_config *config,
+ enum dma_transfer_direction direction, u64 llp)
+{
+ struct idma64_lli *lli = hw->lli;
+ u64 sar, dar;
+ u32 ctlhi = IDMA64C_CTLH_BLOCK_TS(hw->len);
+ u32 ctllo = IDMA64C_CTLL_LLP_S_EN | IDMA64C_CTLL_LLP_D_EN;
+ u32 src_width, dst_width;
+
+ if (direction == DMA_MEM_TO_DEV) {
+ sar = hw->phys;
+ dar = config->dst_addr;
+ ctllo |= IDMA64C_CTLL_DST_FIX | IDMA64C_CTLL_SRC_INC |
+ IDMA64C_CTLL_FC_M2P;
+ src_width = __ffs(sar | hw->len | 4);
+ dst_width = __ffs(config->dst_addr_width);
+ } else { /* DMA_DEV_TO_MEM */
+ sar = config->src_addr;
+ dar = hw->phys;
+ ctllo |= IDMA64C_CTLL_DST_INC | IDMA64C_CTLL_SRC_FIX |
+ IDMA64C_CTLL_FC_P2M;
+ src_width = __ffs(config->src_addr_width);
+ dst_width = __ffs(dar | hw->len | 4);
+ }
+
+ lli->sar = sar;
+ lli->dar = dar;
+
+ lli->ctlhi = ctlhi;
+ lli->ctllo = ctllo |
+ IDMA64C_CTLL_SRC_MSIZE(config->src_maxburst) |
+ IDMA64C_CTLL_DST_MSIZE(config->dst_maxburst) |
+ IDMA64C_CTLL_DST_WIDTH(dst_width) |
+ IDMA64C_CTLL_SRC_WIDTH(src_width);
+
+ lli->llp = llp;
+}
+
+static void idma64_desc_fill(struct idma64_chan *idma64c,
+ struct idma64_desc *desc)
+{
+ struct dma_slave_config *config = &idma64c->config;
+ unsigned int i = desc->ndesc;
+ struct idma64_hw_desc *hw = &desc->hw[i - 1];
+ struct idma64_lli *lli = hw->lli;
+ u64 llp = 0;
+
+ /* Fill the hardware descriptors and link them to a list */
+ do {
+ hw = &desc->hw[--i];
+ idma64_hw_desc_fill(hw, config, desc->direction, llp);
+ llp = hw->llp;
+ desc->length += hw->len;
+ } while (i);
+
+ /* Trigger an interrupt after the last block is transfered */
+ lli->ctllo |= IDMA64C_CTLL_INT_EN;
+
+ /* Disable LLP transfer in the last block */
+ lli->ctllo &= ~(IDMA64C_CTLL_LLP_S_EN | IDMA64C_CTLL_LLP_D_EN);
+}
+
+static struct dma_async_tx_descriptor *idma64_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct idma64_chan *idma64c = to_idma64_chan(chan);
+ struct idma64_desc *desc;
+ struct scatterlist *sg;
+ unsigned int i;
+
+ desc = idma64_alloc_desc(sg_len);
+ if (!desc)
+ return NULL;
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct idma64_hw_desc *hw = &desc->hw[i];
+
+ /* Allocate DMA capable memory for hardware descriptor */
+ hw->lli = dma_pool_alloc(idma64c->pool, GFP_NOWAIT, &hw->llp);
+ if (!hw->lli) {
+ desc->ndesc = i;
+ idma64_desc_free(idma64c, desc);
+ return NULL;
+ }
+
+ hw->phys = sg_dma_address(sg);
+ hw->len = sg_dma_len(sg);
+ }
+
+ desc->ndesc = sg_len;
+ desc->direction = direction;
+ desc->status = DMA_IN_PROGRESS;
+
+ idma64_desc_fill(idma64c, desc);
+ return vchan_tx_prep(&idma64c->vchan, &desc->vdesc, flags);
+}
+
+static void idma64_issue_pending(struct dma_chan *chan)
+{
+ struct idma64_chan *idma64c = to_idma64_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&idma64c->vchan.lock, flags);
+ if (vchan_issue_pending(&idma64c->vchan) && !idma64c->desc)
+ idma64_start_transfer(idma64c);
+ spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+}
+
+static size_t idma64_active_desc_size(struct idma64_chan *idma64c)
+{
+ struct idma64_desc *desc = idma64c->desc;
+ struct idma64_hw_desc *hw;
+ size_t bytes = desc->length;
+ u64 llp = channel_readq(idma64c, LLP);
+ u32 ctlhi = channel_readl(idma64c, CTL_HI);
+ unsigned int i = 0;
+
+ do {
+ hw = &desc->hw[i];
+ if (hw->llp == llp)
+ break;
+ bytes -= hw->len;
+ } while (++i < desc->ndesc);
+
+ if (!i)
+ return bytes;
+
+ /* The current chunk is not fully transfered yet */
+ bytes += desc->hw[--i].len;
+
+ return bytes - IDMA64C_CTLH_BLOCK_TS(ctlhi);
+}
+
+static enum dma_status idma64_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *state)
+{
+ struct idma64_chan *idma64c = to_idma64_chan(chan);
+ struct virt_dma_desc *vdesc;
+ enum dma_status status;
+ size_t bytes;
+ unsigned long flags;
+
+ status = dma_cookie_status(chan, cookie, state);
+ if (status == DMA_COMPLETE)
+ return status;
+
+ spin_lock_irqsave(&idma64c->vchan.lock, flags);
+ vdesc = vchan_find_desc(&idma64c->vchan, cookie);
+ if (idma64c->desc && cookie == idma64c->desc->vdesc.tx.cookie) {
+ bytes = idma64_active_desc_size(idma64c);
+ dma_set_residue(state, bytes);
+ status = idma64c->desc->status;
+ } else if (vdesc) {
+ bytes = to_idma64_desc(vdesc)->length;
+ dma_set_residue(state, bytes);
+ }
+ spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+
+ return status;
+}
+
+static void convert_burst(u32 *maxburst)
+{
+ if (*maxburst)
+ *maxburst = __fls(*maxburst);
+ else
+ *maxburst = 0;
+}
+
+static int idma64_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct idma64_chan *idma64c = to_idma64_chan(chan);
+
+ memcpy(&idma64c->config, config, sizeof(idma64c->config));
+
+ convert_burst(&idma64c->config.src_maxburst);
+ convert_burst(&idma64c->config.dst_maxburst);
+
+ return 0;
+}
+
+static void idma64_chan_deactivate(struct idma64_chan *idma64c, bool drain)
+{
+ unsigned short count = 100;
+ u32 cfglo;
+
+ cfglo = channel_readl(idma64c, CFG_LO);
+ if (drain)
+ cfglo |= IDMA64C_CFGL_CH_DRAIN;
+ else
+ cfglo &= ~IDMA64C_CFGL_CH_DRAIN;
+
+ channel_writel(idma64c, CFG_LO, cfglo | IDMA64C_CFGL_CH_SUSP);
+ do {
+ udelay(1);
+ cfglo = channel_readl(idma64c, CFG_LO);
+ } while (!(cfglo & IDMA64C_CFGL_FIFO_EMPTY) && --count);
+}
+
+static void idma64_chan_activate(struct idma64_chan *idma64c)
+{
+ u32 cfglo;
+
+ cfglo = channel_readl(idma64c, CFG_LO);
+ channel_writel(idma64c, CFG_LO, cfglo & ~IDMA64C_CFGL_CH_SUSP);
+}
+
+static int idma64_pause(struct dma_chan *chan)
+{
+ struct idma64_chan *idma64c = to_idma64_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&idma64c->vchan.lock, flags);
+ if (idma64c->desc && idma64c->desc->status == DMA_IN_PROGRESS) {
+ idma64_chan_deactivate(idma64c, false);
+ idma64c->desc->status = DMA_PAUSED;
+ }
+ spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+
+ return 0;
+}
+
+static int idma64_resume(struct dma_chan *chan)
+{
+ struct idma64_chan *idma64c = to_idma64_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&idma64c->vchan.lock, flags);
+ if (idma64c->desc && idma64c->desc->status == DMA_PAUSED) {
+ idma64c->desc->status = DMA_IN_PROGRESS;
+ idma64_chan_activate(idma64c);
+ }
+ spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+
+ return 0;
+}
+
+static int idma64_terminate_all(struct dma_chan *chan)
+{
+ struct idma64_chan *idma64c = to_idma64_chan(chan);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&idma64c->vchan.lock, flags);
+ idma64_chan_deactivate(idma64c, true);
+ idma64_stop_transfer(idma64c);
+ if (idma64c->desc) {
+ idma64_vdesc_free(&idma64c->desc->vdesc);
+ idma64c->desc = NULL;
+ }
+ vchan_get_all_descriptors(&idma64c->vchan, &head);
+ spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+
+ vchan_dma_desc_free_list(&idma64c->vchan, &head);
+ return 0;
+}
+
+static void idma64_synchronize(struct dma_chan *chan)
+{
+ struct idma64_chan *idma64c = to_idma64_chan(chan);
+
+ vchan_synchronize(&idma64c->vchan);
+}
+
+static int idma64_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct idma64_chan *idma64c = to_idma64_chan(chan);
+
+ /* Create a pool of consistent memory blocks for hardware descriptors */
+ idma64c->pool = dma_pool_create(dev_name(chan2dev(chan)),
+ chan->device->dev,
+ sizeof(struct idma64_lli), 8, 0);
+ if (!idma64c->pool) {
+ dev_err(chan2dev(chan), "No memory for descriptors\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void idma64_free_chan_resources(struct dma_chan *chan)
+{
+ struct idma64_chan *idma64c = to_idma64_chan(chan);
+
+ vchan_free_chan_resources(to_virt_chan(chan));
+ dma_pool_destroy(idma64c->pool);
+ idma64c->pool = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define IDMA64_BUSWIDTHS \
+ BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)
+
+static int idma64_probe(struct idma64_chip *chip)
+{
+ struct idma64 *idma64;
+ unsigned short nr_chan = IDMA64_NR_CHAN;
+ unsigned short i;
+ int ret;
+
+ idma64 = devm_kzalloc(chip->dev, sizeof(*idma64), GFP_KERNEL);
+ if (!idma64)
+ return -ENOMEM;
+
+ idma64->regs = chip->regs;
+ chip->idma64 = idma64;
+
+ idma64->chan = devm_kcalloc(chip->dev, nr_chan, sizeof(*idma64->chan),
+ GFP_KERNEL);
+ if (!idma64->chan)
+ return -ENOMEM;
+
+ idma64->all_chan_mask = (1 << nr_chan) - 1;
+
+ /* Turn off iDMA controller */
+ idma64_off(idma64);
+
+ ret = devm_request_irq(chip->dev, chip->irq, idma64_irq, IRQF_SHARED,
+ dev_name(chip->dev), idma64);
+ if (ret)
+ return ret;
+
+ INIT_LIST_HEAD(&idma64->dma.channels);
+ for (i = 0; i < nr_chan; i++) {
+ struct idma64_chan *idma64c = &idma64->chan[i];
+
+ idma64c->vchan.desc_free = idma64_vdesc_free;
+ vchan_init(&idma64c->vchan, &idma64->dma);
+
+ idma64c->regs = idma64->regs + i * IDMA64_CH_LENGTH;
+ idma64c->mask = BIT(i);
+ }
+
+ dma_cap_set(DMA_SLAVE, idma64->dma.cap_mask);
+ dma_cap_set(DMA_PRIVATE, idma64->dma.cap_mask);
+
+ idma64->dma.device_alloc_chan_resources = idma64_alloc_chan_resources;
+ idma64->dma.device_free_chan_resources = idma64_free_chan_resources;
+
+ idma64->dma.device_prep_slave_sg = idma64_prep_slave_sg;
+
+ idma64->dma.device_issue_pending = idma64_issue_pending;
+ idma64->dma.device_tx_status = idma64_tx_status;
+
+ idma64->dma.device_config = idma64_slave_config;
+ idma64->dma.device_pause = idma64_pause;
+ idma64->dma.device_resume = idma64_resume;
+ idma64->dma.device_terminate_all = idma64_terminate_all;
+ idma64->dma.device_synchronize = idma64_synchronize;
+
+ idma64->dma.src_addr_widths = IDMA64_BUSWIDTHS;
+ idma64->dma.dst_addr_widths = IDMA64_BUSWIDTHS;
+ idma64->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ idma64->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+ idma64->dma.dev = chip->sysdev;
+
+ dma_set_max_seg_size(idma64->dma.dev, IDMA64C_CTLH_BLOCK_TS_MASK);
+
+ ret = dma_async_device_register(&idma64->dma);
+ if (ret)
+ return ret;
+
+ dev_info(chip->dev, "Found Intel integrated DMA 64-bit\n");
+ return 0;
+}
+
+static int idma64_remove(struct idma64_chip *chip)
+{
+ struct idma64 *idma64 = chip->idma64;
+ unsigned short i;
+
+ dma_async_device_unregister(&idma64->dma);
+
+ /*
+ * Explicitly call devm_request_irq() to avoid the side effects with
+ * the scheduled tasklets.
+ */
+ devm_free_irq(chip->dev, chip->irq, idma64);
+
+ for (i = 0; i < idma64->dma.chancnt; i++) {
+ struct idma64_chan *idma64c = &idma64->chan[i];
+
+ tasklet_kill(&idma64c->vchan.task);
+ }
+
+ return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int idma64_platform_probe(struct platform_device *pdev)
+{
+ struct idma64_chip *chip;
+ struct device *dev = &pdev->dev;
+ struct device *sysdev = dev->parent;
+ struct resource *mem;
+ int ret;
+
+ chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ chip->irq = platform_get_irq(pdev, 0);
+ if (chip->irq < 0)
+ return chip->irq;
+
+ mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ chip->regs = devm_ioremap_resource(dev, mem);
+ if (IS_ERR(chip->regs))
+ return PTR_ERR(chip->regs);
+
+ ret = dma_coerce_mask_and_coherent(sysdev, DMA_BIT_MASK(64));
+ if (ret)
+ return ret;
+
+ chip->dev = dev;
+ chip->sysdev = sysdev;
+
+ ret = idma64_probe(chip);
+ if (ret)
+ return ret;
+
+ platform_set_drvdata(pdev, chip);
+ return 0;
+}
+
+static int idma64_platform_remove(struct platform_device *pdev)
+{
+ struct idma64_chip *chip = platform_get_drvdata(pdev);
+
+ return idma64_remove(chip);
+}
+
+static int __maybe_unused idma64_pm_suspend(struct device *dev)
+{
+ struct idma64_chip *chip = dev_get_drvdata(dev);
+
+ idma64_off(chip->idma64);
+ return 0;
+}
+
+static int __maybe_unused idma64_pm_resume(struct device *dev)
+{
+ struct idma64_chip *chip = dev_get_drvdata(dev);
+
+ idma64_on(chip->idma64);
+ return 0;
+}
+
+static const struct dev_pm_ops idma64_dev_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(idma64_pm_suspend, idma64_pm_resume)
+};
+
+static struct platform_driver idma64_platform_driver = {
+ .probe = idma64_platform_probe,
+ .remove = idma64_platform_remove,
+ .driver = {
+ .name = LPSS_IDMA64_DRIVER_NAME,
+ .pm = &idma64_dev_pm_ops,
+ },
+};
+
+module_platform_driver(idma64_platform_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("iDMA64 core driver");
+MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
+MODULE_ALIAS("platform:" LPSS_IDMA64_DRIVER_NAME);
diff --git a/drivers/dma/idma64.h b/drivers/dma/idma64.h
new file mode 100644
index 000000000..d013b5435
--- /dev/null
+++ b/drivers/dma/idma64.h
@@ -0,0 +1,229 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Driver for the Intel integrated DMA 64-bit
+ *
+ * Copyright (C) 2015 Intel Corporation
+ */
+
+#ifndef __DMA_IDMA64_H__
+#define __DMA_IDMA64_H__
+
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#include "virt-dma.h"
+
+/* Channel registers */
+
+#define IDMA64_CH_SAR 0x00 /* Source Address Register */
+#define IDMA64_CH_DAR 0x08 /* Destination Address Register */
+#define IDMA64_CH_LLP 0x10 /* Linked List Pointer */
+#define IDMA64_CH_CTL_LO 0x18 /* Control Register Low */
+#define IDMA64_CH_CTL_HI 0x1c /* Control Register High */
+#define IDMA64_CH_SSTAT 0x20
+#define IDMA64_CH_DSTAT 0x28
+#define IDMA64_CH_SSTATAR 0x30
+#define IDMA64_CH_DSTATAR 0x38
+#define IDMA64_CH_CFG_LO 0x40 /* Configuration Register Low */
+#define IDMA64_CH_CFG_HI 0x44 /* Configuration Register High */
+#define IDMA64_CH_SGR 0x48
+#define IDMA64_CH_DSR 0x50
+
+#define IDMA64_CH_LENGTH 0x58
+
+/* Bitfields in CTL_LO */
+#define IDMA64C_CTLL_INT_EN (1 << 0) /* irqs enabled? */
+#define IDMA64C_CTLL_DST_WIDTH(x) ((x) << 1) /* bytes per element */
+#define IDMA64C_CTLL_SRC_WIDTH(x) ((x) << 4)
+#define IDMA64C_CTLL_DST_INC (0 << 8) /* DAR update/not */
+#define IDMA64C_CTLL_DST_FIX (1 << 8)
+#define IDMA64C_CTLL_SRC_INC (0 << 10) /* SAR update/not */
+#define IDMA64C_CTLL_SRC_FIX (1 << 10)
+#define IDMA64C_CTLL_DST_MSIZE(x) ((x) << 11) /* burst, #elements */
+#define IDMA64C_CTLL_SRC_MSIZE(x) ((x) << 14)
+#define IDMA64C_CTLL_FC_M2P (1 << 20) /* mem-to-periph */
+#define IDMA64C_CTLL_FC_P2M (2 << 20) /* periph-to-mem */
+#define IDMA64C_CTLL_LLP_D_EN (1 << 27) /* dest block chain */
+#define IDMA64C_CTLL_LLP_S_EN (1 << 28) /* src block chain */
+
+/* Bitfields in CTL_HI */
+#define IDMA64C_CTLH_BLOCK_TS_MASK ((1 << 17) - 1)
+#define IDMA64C_CTLH_BLOCK_TS(x) ((x) & IDMA64C_CTLH_BLOCK_TS_MASK)
+#define IDMA64C_CTLH_DONE (1 << 17)
+
+/* Bitfields in CFG_LO */
+#define IDMA64C_CFGL_DST_BURST_ALIGN (1 << 0) /* dst burst align */
+#define IDMA64C_CFGL_SRC_BURST_ALIGN (1 << 1) /* src burst align */
+#define IDMA64C_CFGL_CH_SUSP (1 << 8)
+#define IDMA64C_CFGL_FIFO_EMPTY (1 << 9)
+#define IDMA64C_CFGL_CH_DRAIN (1 << 10) /* drain FIFO */
+#define IDMA64C_CFGL_DST_OPT_BL (1 << 20) /* optimize dst burst length */
+#define IDMA64C_CFGL_SRC_OPT_BL (1 << 21) /* optimize src burst length */
+
+/* Bitfields in CFG_HI */
+#define IDMA64C_CFGH_SRC_PER(x) ((x) << 0) /* src peripheral */
+#define IDMA64C_CFGH_DST_PER(x) ((x) << 4) /* dst peripheral */
+#define IDMA64C_CFGH_RD_ISSUE_THD(x) ((x) << 8)
+#define IDMA64C_CFGH_WR_ISSUE_THD(x) ((x) << 18)
+
+/* Interrupt registers */
+
+#define IDMA64_INT_XFER 0x00
+#define IDMA64_INT_BLOCK 0x08
+#define IDMA64_INT_SRC_TRAN 0x10
+#define IDMA64_INT_DST_TRAN 0x18
+#define IDMA64_INT_ERROR 0x20
+
+#define IDMA64_RAW(x) (0x2c0 + IDMA64_INT_##x) /* r */
+#define IDMA64_STATUS(x) (0x2e8 + IDMA64_INT_##x) /* r (raw & mask) */
+#define IDMA64_MASK(x) (0x310 + IDMA64_INT_##x) /* rw (set = irq enabled) */
+#define IDMA64_CLEAR(x) (0x338 + IDMA64_INT_##x) /* w (ack, affects "raw") */
+
+/* Common registers */
+
+#define IDMA64_STATUS_INT 0x360 /* r */
+#define IDMA64_CFG 0x398
+#define IDMA64_CH_EN 0x3a0
+
+/* Bitfields in CFG */
+#define IDMA64_CFG_DMA_EN (1 << 0)
+
+/* Hardware descriptor for Linked LIst transfers */
+struct idma64_lli {
+ u64 sar;
+ u64 dar;
+ u64 llp;
+ u32 ctllo;
+ u32 ctlhi;
+ u32 sstat;
+ u32 dstat;
+};
+
+struct idma64_hw_desc {
+ struct idma64_lli *lli;
+ dma_addr_t llp;
+ dma_addr_t phys;
+ unsigned int len;
+};
+
+struct idma64_desc {
+ struct virt_dma_desc vdesc;
+ enum dma_transfer_direction direction;
+ struct idma64_hw_desc *hw;
+ unsigned int ndesc;
+ size_t length;
+ enum dma_status status;
+};
+
+static inline struct idma64_desc *to_idma64_desc(struct virt_dma_desc *vdesc)
+{
+ return container_of(vdesc, struct idma64_desc, vdesc);
+}
+
+struct idma64_chan {
+ struct virt_dma_chan vchan;
+
+ void __iomem *regs;
+
+ /* hardware configuration */
+ enum dma_transfer_direction direction;
+ unsigned int mask;
+ struct dma_slave_config config;
+
+ void *pool;
+ struct idma64_desc *desc;
+};
+
+static inline struct idma64_chan *to_idma64_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct idma64_chan, vchan.chan);
+}
+
+#define channel_set_bit(idma64, reg, mask) \
+ dma_writel(idma64, reg, ((mask) << 8) | (mask))
+#define channel_clear_bit(idma64, reg, mask) \
+ dma_writel(idma64, reg, ((mask) << 8) | 0)
+
+static inline u32 idma64c_readl(struct idma64_chan *idma64c, int offset)
+{
+ return readl(idma64c->regs + offset);
+}
+
+static inline void idma64c_writel(struct idma64_chan *idma64c, int offset,
+ u32 value)
+{
+ writel(value, idma64c->regs + offset);
+}
+
+#define channel_readl(idma64c, reg) \
+ idma64c_readl(idma64c, IDMA64_CH_##reg)
+#define channel_writel(idma64c, reg, value) \
+ idma64c_writel(idma64c, IDMA64_CH_##reg, (value))
+
+static inline u64 idma64c_readq(struct idma64_chan *idma64c, int offset)
+{
+ return lo_hi_readq(idma64c->regs + offset);
+}
+
+static inline void idma64c_writeq(struct idma64_chan *idma64c, int offset,
+ u64 value)
+{
+ lo_hi_writeq(value, idma64c->regs + offset);
+}
+
+#define channel_readq(idma64c, reg) \
+ idma64c_readq(idma64c, IDMA64_CH_##reg)
+#define channel_writeq(idma64c, reg, value) \
+ idma64c_writeq(idma64c, IDMA64_CH_##reg, (value))
+
+struct idma64 {
+ struct dma_device dma;
+
+ void __iomem *regs;
+
+ /* channels */
+ unsigned short all_chan_mask;
+ struct idma64_chan *chan;
+};
+
+static inline struct idma64 *to_idma64(struct dma_device *ddev)
+{
+ return container_of(ddev, struct idma64, dma);
+}
+
+static inline u32 idma64_readl(struct idma64 *idma64, int offset)
+{
+ return readl(idma64->regs + offset);
+}
+
+static inline void idma64_writel(struct idma64 *idma64, int offset, u32 value)
+{
+ writel(value, idma64->regs + offset);
+}
+
+#define dma_readl(idma64, reg) \
+ idma64_readl(idma64, IDMA64_##reg)
+#define dma_writel(idma64, reg, value) \
+ idma64_writel(idma64, IDMA64_##reg, (value))
+
+/**
+ * struct idma64_chip - representation of iDMA 64-bit controller hardware
+ * @dev: struct device of the DMA controller
+ * @sysdev: struct device of the physical device that does DMA
+ * @irq: irq line
+ * @regs: memory mapped I/O space
+ * @idma64: struct idma64 that is filed by idma64_probe()
+ */
+struct idma64_chip {
+ struct device *dev;
+ struct device *sysdev;
+ int irq;
+ void __iomem *regs;
+ struct idma64 *idma64;
+};
+
+#endif /* __DMA_IDMA64_H__ */
diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile
new file mode 100644
index 000000000..817ffa95a
--- /dev/null
+++ b/drivers/dma/idxd/Makefile
@@ -0,0 +1,12 @@
+ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=IDXD
+
+obj-$(CONFIG_INTEL_IDXD_BUS) += idxd_bus.o
+idxd_bus-y := bus.o
+
+obj-$(CONFIG_INTEL_IDXD) += idxd.o
+idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o
+
+idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o
+
+obj-$(CONFIG_INTEL_IDXD_COMPAT) += idxd_compat.o
+idxd_compat-y := compat.o
diff --git a/drivers/dma/idxd/bus.c b/drivers/dma/idxd/bus.c
new file mode 100644
index 000000000..6f8462105
--- /dev/null
+++ b/drivers/dma/idxd/bus.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include "idxd.h"
+
+
+int __idxd_driver_register(struct idxd_device_driver *idxd_drv, struct module *owner,
+ const char *mod_name)
+{
+ struct device_driver *drv = &idxd_drv->drv;
+
+ if (!idxd_drv->type) {
+ pr_debug("driver type not set (%ps)\n", __builtin_return_address(0));
+ return -EINVAL;
+ }
+
+ drv->name = idxd_drv->name;
+ drv->bus = &dsa_bus_type;
+ drv->owner = owner;
+ drv->mod_name = mod_name;
+
+ return driver_register(drv);
+}
+EXPORT_SYMBOL_GPL(__idxd_driver_register);
+
+void idxd_driver_unregister(struct idxd_device_driver *idxd_drv)
+{
+ driver_unregister(&idxd_drv->drv);
+}
+EXPORT_SYMBOL_GPL(idxd_driver_unregister);
+
+static int idxd_config_bus_match(struct device *dev,
+ struct device_driver *drv)
+{
+ struct idxd_device_driver *idxd_drv =
+ container_of(drv, struct idxd_device_driver, drv);
+ struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+ int i = 0;
+
+ while (idxd_drv->type[i] != IDXD_DEV_NONE) {
+ if (idxd_dev->type == idxd_drv->type[i])
+ return 1;
+ i++;
+ }
+
+ return 0;
+}
+
+static int idxd_config_bus_probe(struct device *dev)
+{
+ struct idxd_device_driver *idxd_drv =
+ container_of(dev->driver, struct idxd_device_driver, drv);
+ struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+ return idxd_drv->probe(idxd_dev);
+}
+
+static void idxd_config_bus_remove(struct device *dev)
+{
+ struct idxd_device_driver *idxd_drv =
+ container_of(dev->driver, struct idxd_device_driver, drv);
+ struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+ idxd_drv->remove(idxd_dev);
+}
+
+struct bus_type dsa_bus_type = {
+ .name = "dsa",
+ .match = idxd_config_bus_match,
+ .probe = idxd_config_bus_probe,
+ .remove = idxd_config_bus_remove,
+};
+EXPORT_SYMBOL_GPL(dsa_bus_type);
+
+static int __init dsa_bus_init(void)
+{
+ return bus_register(&dsa_bus_type);
+}
+module_init(dsa_bus_init);
+
+static void __exit dsa_bus_exit(void)
+{
+ bus_unregister(&dsa_bus_type);
+}
+module_exit(dsa_bus_exit);
+
+MODULE_DESCRIPTION("IDXD driver dsa_bus_type driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
new file mode 100644
index 000000000..a9b96b187
--- /dev/null
+++ b/drivers/dma/idxd/cdev.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/sched/task.h>
+#include <linux/intel-svm.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/iommu.h>
+#include <uapi/linux/idxd.h>
+#include "registers.h"
+#include "idxd.h"
+
+struct idxd_cdev_context {
+ const char *name;
+ dev_t devt;
+ struct ida minor_ida;
+};
+
+/*
+ * ictx is an array based off of accelerator types. enum idxd_type
+ * is used as index
+ */
+static struct idxd_cdev_context ictx[IDXD_TYPE_MAX] = {
+ { .name = "dsa" },
+ { .name = "iax" }
+};
+
+struct idxd_user_context {
+ struct idxd_wq *wq;
+ struct task_struct *task;
+ unsigned int pasid;
+ unsigned int flags;
+ struct iommu_sva *sva;
+};
+
+static void idxd_cdev_dev_release(struct device *dev)
+{
+ struct idxd_cdev *idxd_cdev = dev_to_cdev(dev);
+ struct idxd_cdev_context *cdev_ctx;
+ struct idxd_wq *wq = idxd_cdev->wq;
+
+ cdev_ctx = &ictx[wq->idxd->data->type];
+ ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor);
+ kfree(idxd_cdev);
+}
+
+static struct device_type idxd_cdev_device_type = {
+ .name = "idxd_cdev",
+ .release = idxd_cdev_dev_release,
+};
+
+static inline struct idxd_cdev *inode_idxd_cdev(struct inode *inode)
+{
+ struct cdev *cdev = inode->i_cdev;
+
+ return container_of(cdev, struct idxd_cdev, cdev);
+}
+
+static inline struct idxd_wq *inode_wq(struct inode *inode)
+{
+ struct idxd_cdev *idxd_cdev = inode_idxd_cdev(inode);
+
+ return idxd_cdev->wq;
+}
+
+static int idxd_cdev_open(struct inode *inode, struct file *filp)
+{
+ struct idxd_user_context *ctx;
+ struct idxd_device *idxd;
+ struct idxd_wq *wq;
+ struct device *dev;
+ int rc = 0;
+ struct iommu_sva *sva;
+ unsigned int pasid;
+
+ wq = inode_wq(inode);
+ idxd = wq->idxd;
+ dev = &idxd->pdev->dev;
+
+ dev_dbg(dev, "%s called: %d\n", __func__, idxd_wq_refcount(wq));
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ mutex_lock(&wq->wq_lock);
+
+ if (idxd_wq_refcount(wq) > 0 && wq_dedicated(wq)) {
+ rc = -EBUSY;
+ goto failed;
+ }
+
+ ctx->wq = wq;
+ filp->private_data = ctx;
+
+ if (device_user_pasid_enabled(idxd)) {
+ sva = iommu_sva_bind_device(dev, current->mm, NULL);
+ if (IS_ERR(sva)) {
+ rc = PTR_ERR(sva);
+ dev_err(dev, "pasid allocation failed: %d\n", rc);
+ goto failed;
+ }
+
+ pasid = iommu_sva_get_pasid(sva);
+ if (pasid == IOMMU_PASID_INVALID) {
+ iommu_sva_unbind_device(sva);
+ rc = -EINVAL;
+ goto failed;
+ }
+
+ ctx->sva = sva;
+ ctx->pasid = pasid;
+
+ if (wq_dedicated(wq)) {
+ rc = idxd_wq_set_pasid(wq, pasid);
+ if (rc < 0) {
+ iommu_sva_unbind_device(sva);
+ dev_err(dev, "wq set pasid failed: %d\n", rc);
+ goto failed;
+ }
+ }
+ }
+
+ idxd_wq_get(wq);
+ mutex_unlock(&wq->wq_lock);
+ return 0;
+
+ failed:
+ mutex_unlock(&wq->wq_lock);
+ kfree(ctx);
+ return rc;
+}
+
+static int idxd_cdev_release(struct inode *node, struct file *filep)
+{
+ struct idxd_user_context *ctx = filep->private_data;
+ struct idxd_wq *wq = ctx->wq;
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ int rc;
+
+ dev_dbg(dev, "%s called\n", __func__);
+ filep->private_data = NULL;
+
+ /* Wait for in-flight operations to complete. */
+ if (wq_shared(wq)) {
+ idxd_device_drain_pasid(idxd, ctx->pasid);
+ } else {
+ if (device_user_pasid_enabled(idxd)) {
+ /* The wq disable in the disable pasid function will drain the wq */
+ rc = idxd_wq_disable_pasid(wq);
+ if (rc < 0)
+ dev_err(dev, "wq disable pasid failed.\n");
+ } else {
+ idxd_wq_drain(wq);
+ }
+ }
+
+ if (ctx->sva)
+ iommu_sva_unbind_device(ctx->sva);
+ kfree(ctx);
+ mutex_lock(&wq->wq_lock);
+ idxd_wq_put(wq);
+ mutex_unlock(&wq->wq_lock);
+ return 0;
+}
+
+static int check_vma(struct idxd_wq *wq, struct vm_area_struct *vma,
+ const char *func)
+{
+ struct device *dev = &wq->idxd->pdev->dev;
+
+ if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
+ dev_info_ratelimited(dev,
+ "%s: %s: mapping too large: %lu\n",
+ current->comm, func,
+ vma->vm_end - vma->vm_start);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int idxd_cdev_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct idxd_user_context *ctx = filp->private_data;
+ struct idxd_wq *wq = ctx->wq;
+ struct idxd_device *idxd = wq->idxd;
+ struct pci_dev *pdev = idxd->pdev;
+ phys_addr_t base = pci_resource_start(pdev, IDXD_WQ_BAR);
+ unsigned long pfn;
+ int rc;
+
+ dev_dbg(&pdev->dev, "%s called\n", __func__);
+ rc = check_vma(wq, vma, __func__);
+ if (rc < 0)
+ return rc;
+
+ vma->vm_flags |= VM_DONTCOPY;
+ pfn = (base + idxd_get_wq_portal_full_offset(wq->id,
+ IDXD_PORTAL_LIMITED)) >> PAGE_SHIFT;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_private_data = ctx;
+
+ return io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE,
+ vma->vm_page_prot);
+}
+
+static __poll_t idxd_cdev_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ struct idxd_user_context *ctx = filp->private_data;
+ struct idxd_wq *wq = ctx->wq;
+ struct idxd_device *idxd = wq->idxd;
+ __poll_t out = 0;
+
+ poll_wait(filp, &wq->err_queue, wait);
+ spin_lock(&idxd->dev_lock);
+ if (idxd->sw_err.valid)
+ out = EPOLLIN | EPOLLRDNORM;
+ spin_unlock(&idxd->dev_lock);
+
+ return out;
+}
+
+static const struct file_operations idxd_cdev_fops = {
+ .owner = THIS_MODULE,
+ .open = idxd_cdev_open,
+ .release = idxd_cdev_release,
+ .mmap = idxd_cdev_mmap,
+ .poll = idxd_cdev_poll,
+};
+
+int idxd_cdev_get_major(struct idxd_device *idxd)
+{
+ return MAJOR(ictx[idxd->data->type].devt);
+}
+
+int idxd_wq_add_cdev(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct idxd_cdev *idxd_cdev;
+ struct cdev *cdev;
+ struct device *dev;
+ struct idxd_cdev_context *cdev_ctx;
+ int rc, minor;
+
+ idxd_cdev = kzalloc(sizeof(*idxd_cdev), GFP_KERNEL);
+ if (!idxd_cdev)
+ return -ENOMEM;
+
+ idxd_cdev->idxd_dev.type = IDXD_DEV_CDEV;
+ idxd_cdev->wq = wq;
+ cdev = &idxd_cdev->cdev;
+ dev = cdev_dev(idxd_cdev);
+ cdev_ctx = &ictx[wq->idxd->data->type];
+ minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL);
+ if (minor < 0) {
+ kfree(idxd_cdev);
+ return minor;
+ }
+ idxd_cdev->minor = minor;
+
+ device_initialize(dev);
+ dev->parent = wq_confdev(wq);
+ dev->bus = &dsa_bus_type;
+ dev->type = &idxd_cdev_device_type;
+ dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor);
+
+ rc = dev_set_name(dev, "%s/wq%u.%u", idxd->data->name_prefix, idxd->id, wq->id);
+ if (rc < 0)
+ goto err;
+
+ wq->idxd_cdev = idxd_cdev;
+ cdev_init(cdev, &idxd_cdev_fops);
+ rc = cdev_device_add(cdev, dev);
+ if (rc) {
+ dev_dbg(&wq->idxd->pdev->dev, "cdev_add failed: %d\n", rc);
+ goto err;
+ }
+
+ return 0;
+
+ err:
+ put_device(dev);
+ wq->idxd_cdev = NULL;
+ return rc;
+}
+
+void idxd_wq_del_cdev(struct idxd_wq *wq)
+{
+ struct idxd_cdev *idxd_cdev;
+
+ idxd_cdev = wq->idxd_cdev;
+ wq->idxd_cdev = NULL;
+ cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev));
+ put_device(cdev_dev(idxd_cdev));
+}
+
+static int idxd_user_drv_probe(struct idxd_dev *idxd_dev)
+{
+ struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
+ struct idxd_device *idxd = wq->idxd;
+ int rc;
+
+ if (idxd->state != IDXD_DEV_ENABLED)
+ return -ENXIO;
+
+ /*
+ * User type WQ is enabled only when SVA is enabled for two reasons:
+ * - If no IOMMU or IOMMU Passthrough without SVA, userspace
+ * can directly access physical address through the WQ.
+ * - The IDXD cdev driver does not provide any ways to pin
+ * user pages and translate the address from user VA to IOVA or
+ * PA without IOMMU SVA. Therefore the application has no way
+ * to instruct the device to perform DMA function. This makes
+ * the cdev not usable for normal application usage.
+ */
+ if (!device_user_pasid_enabled(idxd)) {
+ idxd->cmd_status = IDXD_SCMD_WQ_USER_NO_IOMMU;
+ dev_dbg(&idxd->pdev->dev,
+ "User type WQ cannot be enabled without SVA.\n");
+
+ return -EOPNOTSUPP;
+ }
+
+ mutex_lock(&wq->wq_lock);
+ wq->type = IDXD_WQT_USER;
+ rc = drv_enable_wq(wq);
+ if (rc < 0)
+ goto err;
+
+ rc = idxd_wq_add_cdev(wq);
+ if (rc < 0) {
+ idxd->cmd_status = IDXD_SCMD_CDEV_ERR;
+ goto err_cdev;
+ }
+
+ idxd->cmd_status = 0;
+ mutex_unlock(&wq->wq_lock);
+ return 0;
+
+err_cdev:
+ drv_disable_wq(wq);
+err:
+ wq->type = IDXD_WQT_NONE;
+ mutex_unlock(&wq->wq_lock);
+ return rc;
+}
+
+static void idxd_user_drv_remove(struct idxd_dev *idxd_dev)
+{
+ struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
+
+ mutex_lock(&wq->wq_lock);
+ idxd_wq_del_cdev(wq);
+ drv_disable_wq(wq);
+ wq->type = IDXD_WQT_NONE;
+ mutex_unlock(&wq->wq_lock);
+}
+
+static enum idxd_dev_type dev_types[] = {
+ IDXD_DEV_WQ,
+ IDXD_DEV_NONE,
+};
+
+struct idxd_device_driver idxd_user_drv = {
+ .probe = idxd_user_drv_probe,
+ .remove = idxd_user_drv_remove,
+ .name = "user",
+ .type = dev_types,
+};
+EXPORT_SYMBOL_GPL(idxd_user_drv);
+
+int idxd_cdev_register(void)
+{
+ int rc, i;
+
+ for (i = 0; i < IDXD_TYPE_MAX; i++) {
+ ida_init(&ictx[i].minor_ida);
+ rc = alloc_chrdev_region(&ictx[i].devt, 0, MINORMASK,
+ ictx[i].name);
+ if (rc)
+ goto err_free_chrdev_region;
+ }
+
+ return 0;
+
+err_free_chrdev_region:
+ for (i--; i >= 0; i--)
+ unregister_chrdev_region(ictx[i].devt, MINORMASK);
+
+ return rc;
+}
+
+void idxd_cdev_remove(void)
+{
+ int i;
+
+ for (i = 0; i < IDXD_TYPE_MAX; i++) {
+ unregister_chrdev_region(ictx[i].devt, MINORMASK);
+ ida_destroy(&ictx[i].minor_ida);
+ }
+}
diff --git a/drivers/dma/idxd/compat.c b/drivers/dma/idxd/compat.c
new file mode 100644
index 000000000..3df21615f
--- /dev/null
+++ b/drivers/dma/idxd/compat.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/device/bus.h>
+#include "idxd.h"
+
+extern int device_driver_attach(struct device_driver *drv, struct device *dev);
+extern void device_driver_detach(struct device *dev);
+
+#define DRIVER_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \
+ struct driver_attribute driver_attr_##_name = \
+ __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store)
+
+static ssize_t unbind_store(struct device_driver *drv, const char *buf, size_t count)
+{
+ struct bus_type *bus = drv->bus;
+ struct device *dev;
+ int rc = -ENODEV;
+
+ dev = bus_find_device_by_name(bus, NULL, buf);
+ if (dev && dev->driver) {
+ device_driver_detach(dev);
+ rc = count;
+ }
+
+ return rc;
+}
+static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, 0200, NULL, unbind_store);
+
+static ssize_t bind_store(struct device_driver *drv, const char *buf, size_t count)
+{
+ struct bus_type *bus = drv->bus;
+ struct device *dev;
+ struct device_driver *alt_drv = NULL;
+ int rc = -ENODEV;
+ struct idxd_dev *idxd_dev;
+
+ dev = bus_find_device_by_name(bus, NULL, buf);
+ if (!dev || dev->driver || drv != &dsa_drv.drv)
+ return -ENODEV;
+
+ idxd_dev = confdev_to_idxd_dev(dev);
+ if (is_idxd_dev(idxd_dev)) {
+ alt_drv = driver_find("idxd", bus);
+ } else if (is_idxd_wq_dev(idxd_dev)) {
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ if (is_idxd_wq_kernel(wq))
+ alt_drv = driver_find("dmaengine", bus);
+ else if (is_idxd_wq_user(wq))
+ alt_drv = driver_find("user", bus);
+ }
+ if (!alt_drv)
+ return -ENODEV;
+
+ rc = device_driver_attach(alt_drv, dev);
+ if (rc < 0)
+ return rc;
+
+ return count;
+}
+static DRIVER_ATTR_IGNORE_LOCKDEP(bind, 0200, NULL, bind_store);
+
+static struct attribute *dsa_drv_compat_attrs[] = {
+ &driver_attr_bind.attr,
+ &driver_attr_unbind.attr,
+ NULL,
+};
+
+static const struct attribute_group dsa_drv_compat_attr_group = {
+ .attrs = dsa_drv_compat_attrs,
+};
+
+static const struct attribute_group *dsa_drv_compat_groups[] = {
+ &dsa_drv_compat_attr_group,
+ NULL,
+};
+
+static int idxd_dsa_drv_probe(struct idxd_dev *idxd_dev)
+{
+ return -ENODEV;
+}
+
+static void idxd_dsa_drv_remove(struct idxd_dev *idxd_dev)
+{
+}
+
+static enum idxd_dev_type dev_types[] = {
+ IDXD_DEV_NONE,
+};
+
+struct idxd_device_driver dsa_drv = {
+ .name = "dsa",
+ .probe = idxd_dsa_drv_probe,
+ .remove = idxd_dsa_drv_remove,
+ .type = dev_types,
+ .drv = {
+ .suppress_bind_attrs = true,
+ .groups = dsa_drv_compat_groups,
+ },
+};
+
+module_idxd_driver(dsa_drv);
+MODULE_IMPORT_NS(IDXD);
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
new file mode 100644
index 000000000..188f6b862
--- /dev/null
+++ b/drivers/dma/idxd/device.c
@@ -0,0 +1,1507 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/dmaengine.h>
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include <uapi/linux/idxd.h>
+#include "../dmaengine.h"
+#include "idxd.h"
+#include "registers.h"
+
+static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
+ u32 *status);
+static void idxd_device_wqs_clear_state(struct idxd_device *idxd);
+static void idxd_wq_disable_cleanup(struct idxd_wq *wq);
+
+/* Interrupt control bits */
+void idxd_unmask_error_interrupts(struct idxd_device *idxd)
+{
+ union genctrl_reg genctrl;
+
+ genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET);
+ genctrl.softerr_int_en = 1;
+ genctrl.halt_int_en = 1;
+ iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET);
+}
+
+void idxd_mask_error_interrupts(struct idxd_device *idxd)
+{
+ union genctrl_reg genctrl;
+
+ genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET);
+ genctrl.softerr_int_en = 0;
+ genctrl.halt_int_en = 0;
+ iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET);
+}
+
+static void free_hw_descs(struct idxd_wq *wq)
+{
+ int i;
+
+ for (i = 0; i < wq->num_descs; i++)
+ kfree(wq->hw_descs[i]);
+
+ kfree(wq->hw_descs);
+}
+
+static int alloc_hw_descs(struct idxd_wq *wq, int num)
+{
+ struct device *dev = &wq->idxd->pdev->dev;
+ int i;
+ int node = dev_to_node(dev);
+
+ wq->hw_descs = kcalloc_node(num, sizeof(struct dsa_hw_desc *),
+ GFP_KERNEL, node);
+ if (!wq->hw_descs)
+ return -ENOMEM;
+
+ for (i = 0; i < num; i++) {
+ wq->hw_descs[i] = kzalloc_node(sizeof(*wq->hw_descs[i]),
+ GFP_KERNEL, node);
+ if (!wq->hw_descs[i]) {
+ free_hw_descs(wq);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static void free_descs(struct idxd_wq *wq)
+{
+ int i;
+
+ for (i = 0; i < wq->num_descs; i++)
+ kfree(wq->descs[i]);
+
+ kfree(wq->descs);
+}
+
+static int alloc_descs(struct idxd_wq *wq, int num)
+{
+ struct device *dev = &wq->idxd->pdev->dev;
+ int i;
+ int node = dev_to_node(dev);
+
+ wq->descs = kcalloc_node(num, sizeof(struct idxd_desc *),
+ GFP_KERNEL, node);
+ if (!wq->descs)
+ return -ENOMEM;
+
+ for (i = 0; i < num; i++) {
+ wq->descs[i] = kzalloc_node(sizeof(*wq->descs[i]),
+ GFP_KERNEL, node);
+ if (!wq->descs[i]) {
+ free_descs(wq);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+/* WQ control bits */
+int idxd_wq_alloc_resources(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ int rc, num_descs, i;
+
+ if (wq->type != IDXD_WQT_KERNEL)
+ return 0;
+
+ num_descs = wq_dedicated(wq) ? wq->size : wq->threshold;
+ wq->num_descs = num_descs;
+
+ rc = alloc_hw_descs(wq, num_descs);
+ if (rc < 0)
+ return rc;
+
+ wq->compls_size = num_descs * idxd->data->compl_size;
+ wq->compls = dma_alloc_coherent(dev, wq->compls_size, &wq->compls_addr, GFP_KERNEL);
+ if (!wq->compls) {
+ rc = -ENOMEM;
+ goto fail_alloc_compls;
+ }
+
+ rc = alloc_descs(wq, num_descs);
+ if (rc < 0)
+ goto fail_alloc_descs;
+
+ rc = sbitmap_queue_init_node(&wq->sbq, num_descs, -1, false, GFP_KERNEL,
+ dev_to_node(dev));
+ if (rc < 0)
+ goto fail_sbitmap_init;
+
+ for (i = 0; i < num_descs; i++) {
+ struct idxd_desc *desc = wq->descs[i];
+
+ desc->hw = wq->hw_descs[i];
+ if (idxd->data->type == IDXD_TYPE_DSA)
+ desc->completion = &wq->compls[i];
+ else if (idxd->data->type == IDXD_TYPE_IAX)
+ desc->iax_completion = &wq->iax_compls[i];
+ desc->compl_dma = wq->compls_addr + idxd->data->compl_size * i;
+ desc->id = i;
+ desc->wq = wq;
+ desc->cpu = -1;
+ }
+
+ return 0;
+
+ fail_sbitmap_init:
+ free_descs(wq);
+ fail_alloc_descs:
+ dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr);
+ fail_alloc_compls:
+ free_hw_descs(wq);
+ return rc;
+}
+
+void idxd_wq_free_resources(struct idxd_wq *wq)
+{
+ struct device *dev = &wq->idxd->pdev->dev;
+
+ if (wq->type != IDXD_WQT_KERNEL)
+ return;
+
+ free_hw_descs(wq);
+ free_descs(wq);
+ dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr);
+ sbitmap_queue_free(&wq->sbq);
+}
+
+int idxd_wq_enable(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ u32 status;
+
+ if (wq->state == IDXD_WQ_ENABLED) {
+ dev_dbg(dev, "WQ %d already enabled\n", wq->id);
+ return 0;
+ }
+
+ idxd_cmd_exec(idxd, IDXD_CMD_ENABLE_WQ, wq->id, &status);
+
+ if (status != IDXD_CMDSTS_SUCCESS &&
+ status != IDXD_CMDSTS_ERR_WQ_ENABLED) {
+ dev_dbg(dev, "WQ enable failed: %#x\n", status);
+ return -ENXIO;
+ }
+
+ wq->state = IDXD_WQ_ENABLED;
+ set_bit(wq->id, idxd->wq_enable_map);
+ dev_dbg(dev, "WQ %d enabled\n", wq->id);
+ return 0;
+}
+
+int idxd_wq_disable(struct idxd_wq *wq, bool reset_config)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ u32 status, operand;
+
+ dev_dbg(dev, "Disabling WQ %d\n", wq->id);
+
+ if (wq->state != IDXD_WQ_ENABLED) {
+ dev_dbg(dev, "WQ %d in wrong state: %d\n", wq->id, wq->state);
+ return 0;
+ }
+
+ operand = BIT(wq->id % 16) | ((wq->id / 16) << 16);
+ idxd_cmd_exec(idxd, IDXD_CMD_DISABLE_WQ, operand, &status);
+
+ if (status != IDXD_CMDSTS_SUCCESS) {
+ dev_dbg(dev, "WQ disable failed: %#x\n", status);
+ return -ENXIO;
+ }
+
+ if (reset_config)
+ idxd_wq_disable_cleanup(wq);
+ clear_bit(wq->id, idxd->wq_enable_map);
+ wq->state = IDXD_WQ_DISABLED;
+ dev_dbg(dev, "WQ %d disabled\n", wq->id);
+ return 0;
+}
+
+void idxd_wq_drain(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ u32 operand;
+
+ if (wq->state != IDXD_WQ_ENABLED) {
+ dev_dbg(dev, "WQ %d in wrong state: %d\n", wq->id, wq->state);
+ return;
+ }
+
+ dev_dbg(dev, "Draining WQ %d\n", wq->id);
+ operand = BIT(wq->id % 16) | ((wq->id / 16) << 16);
+ idxd_cmd_exec(idxd, IDXD_CMD_DRAIN_WQ, operand, NULL);
+}
+
+void idxd_wq_reset(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ u32 operand;
+
+ if (wq->state != IDXD_WQ_ENABLED) {
+ dev_dbg(dev, "WQ %d in wrong state: %d\n", wq->id, wq->state);
+ return;
+ }
+
+ operand = BIT(wq->id % 16) | ((wq->id / 16) << 16);
+ idxd_cmd_exec(idxd, IDXD_CMD_RESET_WQ, operand, NULL);
+ idxd_wq_disable_cleanup(wq);
+}
+
+int idxd_wq_map_portal(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct pci_dev *pdev = idxd->pdev;
+ struct device *dev = &pdev->dev;
+ resource_size_t start;
+
+ start = pci_resource_start(pdev, IDXD_WQ_BAR);
+ start += idxd_get_wq_portal_full_offset(wq->id, IDXD_PORTAL_LIMITED);
+
+ wq->portal = devm_ioremap(dev, start, IDXD_PORTAL_SIZE);
+ if (!wq->portal)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void idxd_wq_unmap_portal(struct idxd_wq *wq)
+{
+ struct device *dev = &wq->idxd->pdev->dev;
+
+ devm_iounmap(dev, wq->portal);
+ wq->portal = NULL;
+ wq->portal_offset = 0;
+}
+
+void idxd_wqs_unmap_portal(struct idxd_device *idxd)
+{
+ int i;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ if (wq->portal)
+ idxd_wq_unmap_portal(wq);
+ }
+}
+
+static void __idxd_wq_set_priv_locked(struct idxd_wq *wq, int priv)
+{
+ struct idxd_device *idxd = wq->idxd;
+ union wqcfg wqcfg;
+ unsigned int offset;
+
+ offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PRIVL_IDX);
+ spin_lock(&idxd->dev_lock);
+ wqcfg.bits[WQCFG_PRIVL_IDX] = ioread32(idxd->reg_base + offset);
+ wqcfg.priv = priv;
+ wq->wqcfg->bits[WQCFG_PRIVL_IDX] = wqcfg.bits[WQCFG_PRIVL_IDX];
+ iowrite32(wqcfg.bits[WQCFG_PRIVL_IDX], idxd->reg_base + offset);
+ spin_unlock(&idxd->dev_lock);
+}
+
+static void __idxd_wq_set_pasid_locked(struct idxd_wq *wq, int pasid)
+{
+ struct idxd_device *idxd = wq->idxd;
+ union wqcfg wqcfg;
+ unsigned int offset;
+
+ offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX);
+ spin_lock(&idxd->dev_lock);
+ wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset);
+ wqcfg.pasid_en = 1;
+ wqcfg.pasid = pasid;
+ wq->wqcfg->bits[WQCFG_PASID_IDX] = wqcfg.bits[WQCFG_PASID_IDX];
+ iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset);
+ spin_unlock(&idxd->dev_lock);
+}
+
+int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid)
+{
+ int rc;
+
+ rc = idxd_wq_disable(wq, false);
+ if (rc < 0)
+ return rc;
+
+ __idxd_wq_set_pasid_locked(wq, pasid);
+
+ rc = idxd_wq_enable(wq);
+ if (rc < 0)
+ return rc;
+
+ return 0;
+}
+
+int idxd_wq_disable_pasid(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ int rc;
+ union wqcfg wqcfg;
+ unsigned int offset;
+
+ rc = idxd_wq_disable(wq, false);
+ if (rc < 0)
+ return rc;
+
+ offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX);
+ spin_lock(&idxd->dev_lock);
+ wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset);
+ wqcfg.pasid_en = 0;
+ wqcfg.pasid = 0;
+ iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset);
+ spin_unlock(&idxd->dev_lock);
+
+ rc = idxd_wq_enable(wq);
+ if (rc < 0)
+ return rc;
+
+ return 0;
+}
+
+static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+
+ lockdep_assert_held(&wq->wq_lock);
+ wq->state = IDXD_WQ_DISABLED;
+ memset(wq->wqcfg, 0, idxd->wqcfg_size);
+ wq->type = IDXD_WQT_NONE;
+ wq->threshold = 0;
+ wq->priority = 0;
+ wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
+ clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
+ clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
+ clear_bit(WQ_FLAG_ATS_DISABLE, &wq->flags);
+ memset(wq->name, 0, WQ_NAME_SIZE);
+ wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
+ idxd_wq_set_max_batch_size(idxd->data->type, wq, WQ_DEFAULT_MAX_BATCH);
+ if (wq->opcap_bmap)
+ bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS);
+}
+
+static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq)
+{
+ lockdep_assert_held(&wq->wq_lock);
+
+ wq->size = 0;
+ wq->group = NULL;
+}
+
+static void idxd_wq_ref_release(struct percpu_ref *ref)
+{
+ struct idxd_wq *wq = container_of(ref, struct idxd_wq, wq_active);
+
+ complete(&wq->wq_dead);
+}
+
+int idxd_wq_init_percpu_ref(struct idxd_wq *wq)
+{
+ int rc;
+
+ memset(&wq->wq_active, 0, sizeof(wq->wq_active));
+ rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release,
+ PERCPU_REF_ALLOW_REINIT, GFP_KERNEL);
+ if (rc < 0)
+ return rc;
+ reinit_completion(&wq->wq_dead);
+ reinit_completion(&wq->wq_resurrect);
+ return 0;
+}
+
+void __idxd_wq_quiesce(struct idxd_wq *wq)
+{
+ lockdep_assert_held(&wq->wq_lock);
+ reinit_completion(&wq->wq_resurrect);
+ percpu_ref_kill(&wq->wq_active);
+ complete_all(&wq->wq_resurrect);
+ wait_for_completion(&wq->wq_dead);
+}
+
+void idxd_wq_quiesce(struct idxd_wq *wq)
+{
+ mutex_lock(&wq->wq_lock);
+ __idxd_wq_quiesce(wq);
+ mutex_unlock(&wq->wq_lock);
+}
+
+/* Device control bits */
+static inline bool idxd_is_enabled(struct idxd_device *idxd)
+{
+ union gensts_reg gensts;
+
+ gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET);
+
+ if (gensts.state == IDXD_DEVICE_STATE_ENABLED)
+ return true;
+ return false;
+}
+
+static inline bool idxd_device_is_halted(struct idxd_device *idxd)
+{
+ union gensts_reg gensts;
+
+ gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET);
+
+ return (gensts.state == IDXD_DEVICE_STATE_HALT);
+}
+
+/*
+ * This is function is only used for reset during probe and will
+ * poll for completion. Once the device is setup with interrupts,
+ * all commands will be done via interrupt completion.
+ */
+int idxd_device_init_reset(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+ union idxd_command_reg cmd;
+
+ if (idxd_device_is_halted(idxd)) {
+ dev_warn(&idxd->pdev->dev, "Device is HALTED!\n");
+ return -ENXIO;
+ }
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.cmd = IDXD_CMD_RESET_DEVICE;
+ dev_dbg(dev, "%s: sending reset for init.\n", __func__);
+ spin_lock(&idxd->cmd_lock);
+ iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
+
+ while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) &
+ IDXD_CMDSTS_ACTIVE)
+ cpu_relax();
+ spin_unlock(&idxd->cmd_lock);
+ return 0;
+}
+
+static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
+ u32 *status)
+{
+ union idxd_command_reg cmd;
+ DECLARE_COMPLETION_ONSTACK(done);
+ u32 stat;
+ unsigned long flags;
+
+ if (idxd_device_is_halted(idxd)) {
+ dev_warn(&idxd->pdev->dev, "Device is HALTED!\n");
+ if (status)
+ *status = IDXD_CMDSTS_HW_ERR;
+ return;
+ }
+
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.cmd = cmd_code;
+ cmd.operand = operand;
+ cmd.int_req = 1;
+
+ spin_lock_irqsave(&idxd->cmd_lock, flags);
+ wait_event_lock_irq(idxd->cmd_waitq,
+ !test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags),
+ idxd->cmd_lock);
+
+ dev_dbg(&idxd->pdev->dev, "%s: sending cmd: %#x op: %#x\n",
+ __func__, cmd_code, operand);
+
+ idxd->cmd_status = 0;
+ __set_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags);
+ idxd->cmd_done = &done;
+ iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
+
+ /*
+ * After command submitted, release lock and go to sleep until
+ * the command completes via interrupt.
+ */
+ spin_unlock_irqrestore(&idxd->cmd_lock, flags);
+ wait_for_completion(&done);
+ stat = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
+ spin_lock(&idxd->cmd_lock);
+ if (status)
+ *status = stat;
+ idxd->cmd_status = stat & GENMASK(7, 0);
+
+ __clear_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags);
+ /* Wake up other pending commands */
+ wake_up(&idxd->cmd_waitq);
+ spin_unlock(&idxd->cmd_lock);
+}
+
+int idxd_device_enable(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+ u32 status;
+
+ if (idxd_is_enabled(idxd)) {
+ dev_dbg(dev, "Device already enabled\n");
+ return -ENXIO;
+ }
+
+ idxd_cmd_exec(idxd, IDXD_CMD_ENABLE_DEVICE, 0, &status);
+
+ /* If the command is successful or if the device was enabled */
+ if (status != IDXD_CMDSTS_SUCCESS &&
+ status != IDXD_CMDSTS_ERR_DEV_ENABLED) {
+ dev_dbg(dev, "%s: err_code: %#x\n", __func__, status);
+ return -ENXIO;
+ }
+
+ idxd->state = IDXD_DEV_ENABLED;
+ return 0;
+}
+
+int idxd_device_disable(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+ u32 status;
+
+ if (!idxd_is_enabled(idxd)) {
+ dev_dbg(dev, "Device is not enabled\n");
+ return 0;
+ }
+
+ idxd_cmd_exec(idxd, IDXD_CMD_DISABLE_DEVICE, 0, &status);
+
+ /* If the command is successful or if the device was disabled */
+ if (status != IDXD_CMDSTS_SUCCESS &&
+ !(status & IDXD_CMDSTS_ERR_DIS_DEV_EN)) {
+ dev_dbg(dev, "%s: err_code: %#x\n", __func__, status);
+ return -ENXIO;
+ }
+
+ idxd_device_clear_state(idxd);
+ return 0;
+}
+
+void idxd_device_reset(struct idxd_device *idxd)
+{
+ idxd_cmd_exec(idxd, IDXD_CMD_RESET_DEVICE, 0, NULL);
+ idxd_device_clear_state(idxd);
+ spin_lock(&idxd->dev_lock);
+ idxd_unmask_error_interrupts(idxd);
+ spin_unlock(&idxd->dev_lock);
+}
+
+void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid)
+{
+ struct device *dev = &idxd->pdev->dev;
+ u32 operand;
+
+ operand = pasid;
+ dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_DRAIN_PASID, operand);
+ idxd_cmd_exec(idxd, IDXD_CMD_DRAIN_PASID, operand, NULL);
+ dev_dbg(dev, "pasid %d drained\n", pasid);
+}
+
+int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle,
+ enum idxd_interrupt_type irq_type)
+{
+ struct device *dev = &idxd->pdev->dev;
+ u32 operand, status;
+
+ if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)))
+ return -EOPNOTSUPP;
+
+ dev_dbg(dev, "get int handle, idx %d\n", idx);
+
+ operand = idx & GENMASK(15, 0);
+ if (irq_type == IDXD_IRQ_IMS)
+ operand |= CMD_INT_HANDLE_IMS;
+
+ dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_REQUEST_INT_HANDLE, operand);
+
+ idxd_cmd_exec(idxd, IDXD_CMD_REQUEST_INT_HANDLE, operand, &status);
+
+ if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) {
+ dev_dbg(dev, "request int handle failed: %#x\n", status);
+ return -ENXIO;
+ }
+
+ *handle = (status >> IDXD_CMDSTS_RES_SHIFT) & GENMASK(15, 0);
+
+ dev_dbg(dev, "int handle acquired: %u\n", *handle);
+ return 0;
+}
+
+int idxd_device_release_int_handle(struct idxd_device *idxd, int handle,
+ enum idxd_interrupt_type irq_type)
+{
+ struct device *dev = &idxd->pdev->dev;
+ u32 operand, status;
+ union idxd_command_reg cmd;
+
+ if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)))
+ return -EOPNOTSUPP;
+
+ dev_dbg(dev, "release int handle, handle %d\n", handle);
+
+ memset(&cmd, 0, sizeof(cmd));
+ operand = handle & GENMASK(15, 0);
+
+ if (irq_type == IDXD_IRQ_IMS)
+ operand |= CMD_INT_HANDLE_IMS;
+
+ cmd.cmd = IDXD_CMD_RELEASE_INT_HANDLE;
+ cmd.operand = operand;
+
+ dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_RELEASE_INT_HANDLE, operand);
+
+ spin_lock(&idxd->cmd_lock);
+ iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
+
+ while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & IDXD_CMDSTS_ACTIVE)
+ cpu_relax();
+ status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
+ spin_unlock(&idxd->cmd_lock);
+
+ if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) {
+ dev_dbg(dev, "release int handle failed: %#x\n", status);
+ return -ENXIO;
+ }
+
+ dev_dbg(dev, "int handle released.\n");
+ return 0;
+}
+
+/* Device configuration bits */
+static void idxd_engines_clear_state(struct idxd_device *idxd)
+{
+ struct idxd_engine *engine;
+ int i;
+
+ lockdep_assert_held(&idxd->dev_lock);
+ for (i = 0; i < idxd->max_engines; i++) {
+ engine = idxd->engines[i];
+ engine->group = NULL;
+ }
+}
+
+static void idxd_groups_clear_state(struct idxd_device *idxd)
+{
+ struct idxd_group *group;
+ int i;
+
+ lockdep_assert_held(&idxd->dev_lock);
+ for (i = 0; i < idxd->max_groups; i++) {
+ group = idxd->groups[i];
+ memset(&group->grpcfg, 0, sizeof(group->grpcfg));
+ group->num_engines = 0;
+ group->num_wqs = 0;
+ group->use_rdbuf_limit = false;
+ group->rdbufs_allowed = 0;
+ group->rdbufs_reserved = 0;
+ if (idxd->hw.version <= DEVICE_VERSION_2 && !tc_override) {
+ group->tc_a = 1;
+ group->tc_b = 1;
+ } else {
+ group->tc_a = -1;
+ group->tc_b = -1;
+ }
+ group->desc_progress_limit = 0;
+ group->batch_progress_limit = 0;
+ }
+}
+
+static void idxd_device_wqs_clear_state(struct idxd_device *idxd)
+{
+ int i;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ mutex_lock(&wq->wq_lock);
+ idxd_wq_disable_cleanup(wq);
+ idxd_wq_device_reset_cleanup(wq);
+ mutex_unlock(&wq->wq_lock);
+ }
+}
+
+void idxd_device_clear_state(struct idxd_device *idxd)
+{
+ /* IDXD is always disabled. Other states are cleared only when IDXD is configurable. */
+ if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
+ /*
+ * Clearing wq state is protected by wq lock.
+ * So no need to be protected by device lock.
+ */
+ idxd_device_wqs_clear_state(idxd);
+
+ spin_lock(&idxd->dev_lock);
+ idxd_groups_clear_state(idxd);
+ idxd_engines_clear_state(idxd);
+ } else {
+ spin_lock(&idxd->dev_lock);
+ }
+
+ idxd->state = IDXD_DEV_DISABLED;
+ spin_unlock(&idxd->dev_lock);
+}
+
+static void idxd_group_config_write(struct idxd_group *group)
+{
+ struct idxd_device *idxd = group->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ int i;
+ u32 grpcfg_offset;
+
+ dev_dbg(dev, "Writing group %d cfg registers\n", group->id);
+
+ /* setup GRPWQCFG */
+ for (i = 0; i < GRPWQCFG_STRIDES; i++) {
+ grpcfg_offset = GRPWQCFG_OFFSET(idxd, group->id, i);
+ iowrite64(group->grpcfg.wqs[i], idxd->reg_base + grpcfg_offset);
+ dev_dbg(dev, "GRPCFG wq[%d:%d: %#x]: %#llx\n",
+ group->id, i, grpcfg_offset,
+ ioread64(idxd->reg_base + grpcfg_offset));
+ }
+
+ /* setup GRPENGCFG */
+ grpcfg_offset = GRPENGCFG_OFFSET(idxd, group->id);
+ iowrite64(group->grpcfg.engines, idxd->reg_base + grpcfg_offset);
+ dev_dbg(dev, "GRPCFG engs[%d: %#x]: %#llx\n", group->id,
+ grpcfg_offset, ioread64(idxd->reg_base + grpcfg_offset));
+
+ /* setup GRPFLAGS */
+ grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id);
+ iowrite64(group->grpcfg.flags.bits, idxd->reg_base + grpcfg_offset);
+ dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#llx\n",
+ group->id, grpcfg_offset,
+ ioread64(idxd->reg_base + grpcfg_offset));
+}
+
+static int idxd_groups_config_write(struct idxd_device *idxd)
+
+{
+ union gencfg_reg reg;
+ int i;
+ struct device *dev = &idxd->pdev->dev;
+
+ /* Setup bandwidth rdbuf limit */
+ if (idxd->hw.gen_cap.config_en && idxd->rdbuf_limit) {
+ reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
+ reg.rdbuf_limit = idxd->rdbuf_limit;
+ iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
+ }
+
+ dev_dbg(dev, "GENCFG(%#x): %#x\n", IDXD_GENCFG_OFFSET,
+ ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET));
+
+ for (i = 0; i < idxd->max_groups; i++) {
+ struct idxd_group *group = idxd->groups[i];
+
+ idxd_group_config_write(group);
+ }
+
+ return 0;
+}
+
+static bool idxd_device_pasid_priv_enabled(struct idxd_device *idxd)
+{
+ struct pci_dev *pdev = idxd->pdev;
+
+ if (pdev->pasid_enabled && (pdev->pasid_features & PCI_PASID_CAP_PRIV))
+ return true;
+ return false;
+}
+
+static int idxd_wq_config_write(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ u32 wq_offset;
+ int i, n;
+
+ if (!wq->group)
+ return 0;
+
+ /*
+ * Instead of memset the entire shadow copy of WQCFG, copy from the hardware after
+ * wq reset. This will copy back the sticky values that are present on some devices.
+ */
+ for (i = 0; i < WQCFG_STRIDES(idxd); i++) {
+ wq_offset = WQCFG_OFFSET(idxd, wq->id, i);
+ wq->wqcfg->bits[i] |= ioread32(idxd->reg_base + wq_offset);
+ }
+
+ if (wq->size == 0 && wq->type != IDXD_WQT_NONE)
+ wq->size = WQ_DEFAULT_QUEUE_DEPTH;
+
+ /* byte 0-3 */
+ wq->wqcfg->wq_size = wq->size;
+
+ /* bytes 4-7 */
+ wq->wqcfg->wq_thresh = wq->threshold;
+
+ /* byte 8-11 */
+ if (wq_dedicated(wq))
+ wq->wqcfg->mode = 1;
+
+ /*
+ * The WQ priv bit is set depending on the WQ type. priv = 1 if the
+ * WQ type is kernel to indicate privileged access. This setting only
+ * matters for dedicated WQ. According to the DSA spec:
+ * If the WQ is in dedicated mode, WQ PASID Enable is 1, and the
+ * Privileged Mode Enable field of the PCI Express PASID capability
+ * is 0, this field must be 0.
+ *
+ * In the case of a dedicated kernel WQ that is not able to support
+ * the PASID cap, then the configuration will be rejected.
+ */
+ if (wq_dedicated(wq) && wq->wqcfg->pasid_en &&
+ !idxd_device_pasid_priv_enabled(idxd) &&
+ wq->type == IDXD_WQT_KERNEL) {
+ idxd->cmd_status = IDXD_SCMD_WQ_NO_PRIV;
+ return -EOPNOTSUPP;
+ }
+
+ wq->wqcfg->priority = wq->priority;
+
+ if (idxd->hw.gen_cap.block_on_fault &&
+ test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags))
+ wq->wqcfg->bof = 1;
+
+ if (idxd->hw.wq_cap.wq_ats_support)
+ wq->wqcfg->wq_ats_disable = test_bit(WQ_FLAG_ATS_DISABLE, &wq->flags);
+
+ /* bytes 12-15 */
+ wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes);
+ idxd_wqcfg_set_max_batch_shift(idxd->data->type, wq->wqcfg, ilog2(wq->max_batch_size));
+
+ /* bytes 32-63 */
+ if (idxd->hw.wq_cap.op_config && wq->opcap_bmap) {
+ memset(wq->wqcfg->op_config, 0, IDXD_MAX_OPCAP_BITS / 8);
+ for_each_set_bit(n, wq->opcap_bmap, IDXD_MAX_OPCAP_BITS) {
+ int pos = n % BITS_PER_LONG_LONG;
+ int idx = n / BITS_PER_LONG_LONG;
+
+ wq->wqcfg->op_config[idx] |= BIT(pos);
+ }
+ }
+
+ dev_dbg(dev, "WQ %d CFGs\n", wq->id);
+ for (i = 0; i < WQCFG_STRIDES(idxd); i++) {
+ wq_offset = WQCFG_OFFSET(idxd, wq->id, i);
+ iowrite32(wq->wqcfg->bits[i], idxd->reg_base + wq_offset);
+ dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n",
+ wq->id, i, wq_offset,
+ ioread32(idxd->reg_base + wq_offset));
+ }
+
+ return 0;
+}
+
+static int idxd_wqs_config_write(struct idxd_device *idxd)
+{
+ int i, rc;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ rc = idxd_wq_config_write(wq);
+ if (rc < 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+static void idxd_group_flags_setup(struct idxd_device *idxd)
+{
+ int i;
+
+ /* TC-A 0 and TC-B 1 should be defaults */
+ for (i = 0; i < idxd->max_groups; i++) {
+ struct idxd_group *group = idxd->groups[i];
+
+ if (group->tc_a == -1)
+ group->tc_a = group->grpcfg.flags.tc_a = 0;
+ else
+ group->grpcfg.flags.tc_a = group->tc_a;
+ if (group->tc_b == -1)
+ group->tc_b = group->grpcfg.flags.tc_b = 1;
+ else
+ group->grpcfg.flags.tc_b = group->tc_b;
+ group->grpcfg.flags.use_rdbuf_limit = group->use_rdbuf_limit;
+ group->grpcfg.flags.rdbufs_reserved = group->rdbufs_reserved;
+ if (group->rdbufs_allowed)
+ group->grpcfg.flags.rdbufs_allowed = group->rdbufs_allowed;
+ else
+ group->grpcfg.flags.rdbufs_allowed = idxd->max_rdbufs;
+
+ group->grpcfg.flags.desc_progress_limit = group->desc_progress_limit;
+ group->grpcfg.flags.batch_progress_limit = group->batch_progress_limit;
+ }
+}
+
+static int idxd_engines_setup(struct idxd_device *idxd)
+{
+ int i, engines = 0;
+ struct idxd_engine *eng;
+ struct idxd_group *group;
+
+ for (i = 0; i < idxd->max_groups; i++) {
+ group = idxd->groups[i];
+ group->grpcfg.engines = 0;
+ }
+
+ for (i = 0; i < idxd->max_engines; i++) {
+ eng = idxd->engines[i];
+ group = eng->group;
+
+ if (!group)
+ continue;
+
+ group->grpcfg.engines |= BIT(eng->id);
+ engines++;
+ }
+
+ if (!engines)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int idxd_wqs_setup(struct idxd_device *idxd)
+{
+ struct idxd_wq *wq;
+ struct idxd_group *group;
+ int i, j, configured = 0;
+ struct device *dev = &idxd->pdev->dev;
+
+ for (i = 0; i < idxd->max_groups; i++) {
+ group = idxd->groups[i];
+ for (j = 0; j < 4; j++)
+ group->grpcfg.wqs[j] = 0;
+ }
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ wq = idxd->wqs[i];
+ group = wq->group;
+
+ if (!wq->group)
+ continue;
+
+ if (wq_shared(wq) && !wq_shared_supported(wq)) {
+ idxd->cmd_status = IDXD_SCMD_WQ_NO_SWQ_SUPPORT;
+ dev_warn(dev, "No shared wq support but configured.\n");
+ return -EINVAL;
+ }
+
+ group->grpcfg.wqs[wq->id / 64] |= BIT(wq->id % 64);
+ configured++;
+ }
+
+ if (configured == 0) {
+ idxd->cmd_status = IDXD_SCMD_WQ_NONE_CONFIGURED;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int idxd_device_config(struct idxd_device *idxd)
+{
+ int rc;
+
+ lockdep_assert_held(&idxd->dev_lock);
+ rc = idxd_wqs_setup(idxd);
+ if (rc < 0)
+ return rc;
+
+ rc = idxd_engines_setup(idxd);
+ if (rc < 0)
+ return rc;
+
+ idxd_group_flags_setup(idxd);
+
+ rc = idxd_wqs_config_write(idxd);
+ if (rc < 0)
+ return rc;
+
+ rc = idxd_groups_config_write(idxd);
+ if (rc < 0)
+ return rc;
+
+ return 0;
+}
+
+static int idxd_wq_load_config(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ int wqcfg_offset;
+ int i;
+
+ wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, 0);
+ memcpy_fromio(wq->wqcfg, idxd->reg_base + wqcfg_offset, idxd->wqcfg_size);
+
+ wq->size = wq->wqcfg->wq_size;
+ wq->threshold = wq->wqcfg->wq_thresh;
+
+ /* The driver does not support shared WQ mode in read-only config yet */
+ if (wq->wqcfg->mode == 0 || wq->wqcfg->pasid_en)
+ return -EOPNOTSUPP;
+
+ set_bit(WQ_FLAG_DEDICATED, &wq->flags);
+
+ wq->priority = wq->wqcfg->priority;
+
+ wq->max_xfer_bytes = 1ULL << wq->wqcfg->max_xfer_shift;
+ idxd_wq_set_max_batch_size(idxd->data->type, wq, 1U << wq->wqcfg->max_batch_shift);
+
+ for (i = 0; i < WQCFG_STRIDES(idxd); i++) {
+ wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, i);
+ dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", wq->id, i, wqcfg_offset, wq->wqcfg->bits[i]);
+ }
+
+ return 0;
+}
+
+static void idxd_group_load_config(struct idxd_group *group)
+{
+ struct idxd_device *idxd = group->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ int i, j, grpcfg_offset;
+
+ /*
+ * Load WQS bit fields
+ * Iterate through all 256 bits 64 bits at a time
+ */
+ for (i = 0; i < GRPWQCFG_STRIDES; i++) {
+ struct idxd_wq *wq;
+
+ grpcfg_offset = GRPWQCFG_OFFSET(idxd, group->id, i);
+ group->grpcfg.wqs[i] = ioread64(idxd->reg_base + grpcfg_offset);
+ dev_dbg(dev, "GRPCFG wq[%d:%d: %#x]: %#llx\n",
+ group->id, i, grpcfg_offset, group->grpcfg.wqs[i]);
+
+ if (i * 64 >= idxd->max_wqs)
+ break;
+
+ /* Iterate through all 64 bits and check for wq set */
+ for (j = 0; j < 64; j++) {
+ int id = i * 64 + j;
+
+ /* No need to check beyond max wqs */
+ if (id >= idxd->max_wqs)
+ break;
+
+ /* Set group assignment for wq if wq bit is set */
+ if (group->grpcfg.wqs[i] & BIT(j)) {
+ wq = idxd->wqs[id];
+ wq->group = group;
+ }
+ }
+ }
+
+ grpcfg_offset = GRPENGCFG_OFFSET(idxd, group->id);
+ group->grpcfg.engines = ioread64(idxd->reg_base + grpcfg_offset);
+ dev_dbg(dev, "GRPCFG engs[%d: %#x]: %#llx\n", group->id,
+ grpcfg_offset, group->grpcfg.engines);
+
+ /* Iterate through all 64 bits to check engines set */
+ for (i = 0; i < 64; i++) {
+ if (i >= idxd->max_engines)
+ break;
+
+ if (group->grpcfg.engines & BIT(i)) {
+ struct idxd_engine *engine = idxd->engines[i];
+
+ engine->group = group;
+ }
+ }
+
+ grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id);
+ group->grpcfg.flags.bits = ioread64(idxd->reg_base + grpcfg_offset);
+ dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#llx\n",
+ group->id, grpcfg_offset, group->grpcfg.flags.bits);
+}
+
+int idxd_device_load_config(struct idxd_device *idxd)
+{
+ union gencfg_reg reg;
+ int i, rc;
+
+ reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
+ idxd->rdbuf_limit = reg.rdbuf_limit;
+
+ for (i = 0; i < idxd->max_groups; i++) {
+ struct idxd_group *group = idxd->groups[i];
+
+ idxd_group_load_config(group);
+ }
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ rc = idxd_wq_load_config(wq);
+ if (rc < 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+static void idxd_flush_pending_descs(struct idxd_irq_entry *ie)
+{
+ struct idxd_desc *desc, *itr;
+ struct llist_node *head;
+ LIST_HEAD(flist);
+ enum idxd_complete_type ctype;
+
+ spin_lock(&ie->list_lock);
+ head = llist_del_all(&ie->pending_llist);
+ if (head) {
+ llist_for_each_entry_safe(desc, itr, head, llnode)
+ list_add_tail(&desc->list, &ie->work_list);
+ }
+
+ list_for_each_entry_safe(desc, itr, &ie->work_list, list)
+ list_move_tail(&desc->list, &flist);
+ spin_unlock(&ie->list_lock);
+
+ list_for_each_entry_safe(desc, itr, &flist, list) {
+ struct dma_async_tx_descriptor *tx;
+
+ list_del(&desc->list);
+ ctype = desc->completion->status ? IDXD_COMPLETE_NORMAL : IDXD_COMPLETE_ABORT;
+ /*
+ * wq is being disabled. Any remaining descriptors are
+ * likely to be stuck and can be dropped. callback could
+ * point to code that is no longer accessible, for example
+ * if dmatest module has been unloaded.
+ */
+ tx = &desc->txd;
+ tx->callback = NULL;
+ tx->callback_result = NULL;
+ idxd_dma_complete_txd(desc, ctype, true);
+ }
+}
+
+static void idxd_device_set_perm_entry(struct idxd_device *idxd,
+ struct idxd_irq_entry *ie)
+{
+ union msix_perm mperm;
+
+ if (ie->pasid == INVALID_IOASID)
+ return;
+
+ mperm.bits = 0;
+ mperm.pasid = ie->pasid;
+ mperm.pasid_en = 1;
+ iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8);
+}
+
+static void idxd_device_clear_perm_entry(struct idxd_device *idxd,
+ struct idxd_irq_entry *ie)
+{
+ iowrite32(0, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8);
+}
+
+void idxd_wq_free_irq(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct idxd_irq_entry *ie = &wq->ie;
+
+ if (wq->type != IDXD_WQT_KERNEL)
+ return;
+
+ free_irq(ie->vector, ie);
+ idxd_flush_pending_descs(ie);
+ if (idxd->request_int_handles)
+ idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX);
+ idxd_device_clear_perm_entry(idxd, ie);
+ ie->vector = -1;
+ ie->int_handle = INVALID_INT_HANDLE;
+ ie->pasid = INVALID_IOASID;
+}
+
+int idxd_wq_request_irq(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct pci_dev *pdev = idxd->pdev;
+ struct device *dev = &pdev->dev;
+ struct idxd_irq_entry *ie;
+ int rc;
+
+ if (wq->type != IDXD_WQT_KERNEL)
+ return 0;
+
+ ie = &wq->ie;
+ ie->vector = pci_irq_vector(pdev, ie->id);
+ ie->pasid = device_pasid_enabled(idxd) ? idxd->pasid : INVALID_IOASID;
+ idxd_device_set_perm_entry(idxd, ie);
+
+ rc = request_threaded_irq(ie->vector, NULL, idxd_wq_thread, 0, "idxd-portal", ie);
+ if (rc < 0) {
+ dev_err(dev, "Failed to request irq %d.\n", ie->vector);
+ goto err_irq;
+ }
+
+ if (idxd->request_int_handles) {
+ rc = idxd_device_request_int_handle(idxd, ie->id, &ie->int_handle,
+ IDXD_IRQ_MSIX);
+ if (rc < 0)
+ goto err_int_handle;
+ } else {
+ ie->int_handle = ie->id;
+ }
+
+ return 0;
+
+err_int_handle:
+ ie->int_handle = INVALID_INT_HANDLE;
+ free_irq(ie->vector, ie);
+err_irq:
+ idxd_device_clear_perm_entry(idxd, ie);
+ ie->pasid = INVALID_IOASID;
+ return rc;
+}
+
+int drv_enable_wq(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ int rc = -ENXIO;
+
+ lockdep_assert_held(&wq->wq_lock);
+
+ if (idxd->state != IDXD_DEV_ENABLED) {
+ idxd->cmd_status = IDXD_SCMD_DEV_NOT_ENABLED;
+ goto err;
+ }
+
+ if (wq->state != IDXD_WQ_DISABLED) {
+ dev_dbg(dev, "wq %d already enabled.\n", wq->id);
+ idxd->cmd_status = IDXD_SCMD_WQ_ENABLED;
+ rc = -EBUSY;
+ goto err;
+ }
+
+ if (!wq->group) {
+ dev_dbg(dev, "wq %d not attached to group.\n", wq->id);
+ idxd->cmd_status = IDXD_SCMD_WQ_NO_GRP;
+ goto err;
+ }
+
+ if (strlen(wq->name) == 0) {
+ idxd->cmd_status = IDXD_SCMD_WQ_NO_NAME;
+ dev_dbg(dev, "wq %d name not set.\n", wq->id);
+ goto err;
+ }
+
+ /* Shared WQ checks */
+ if (wq_shared(wq)) {
+ if (!wq_shared_supported(wq)) {
+ idxd->cmd_status = IDXD_SCMD_WQ_NO_SVM;
+ dev_dbg(dev, "PASID not enabled and shared wq.\n");
+ goto err;
+ }
+ /*
+ * Shared wq with the threshold set to 0 means the user
+ * did not set the threshold or transitioned from a
+ * dedicated wq but did not set threshold. A value
+ * of 0 would effectively disable the shared wq. The
+ * driver does not allow a value of 0 to be set for
+ * threshold via sysfs.
+ */
+ if (wq->threshold == 0) {
+ idxd->cmd_status = IDXD_SCMD_WQ_NO_THRESH;
+ dev_dbg(dev, "Shared wq and threshold 0.\n");
+ goto err;
+ }
+ }
+
+ /*
+ * In the event that the WQ is configurable for pasid and priv bits.
+ * For kernel wq, the driver should setup the pasid, pasid_en, and priv bit.
+ * However, for non-kernel wq, the driver should only set the pasid_en bit for
+ * shared wq. A dedicated wq that is not 'kernel' type will configure pasid and
+ * pasid_en later on so there is no need to setup.
+ */
+ if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
+ int priv = 0;
+
+ if (wq_pasid_enabled(wq)) {
+ if (is_idxd_wq_kernel(wq) || wq_shared(wq)) {
+ u32 pasid = wq_dedicated(wq) ? idxd->pasid : 0;
+
+ __idxd_wq_set_pasid_locked(wq, pasid);
+ }
+ }
+
+ if (is_idxd_wq_kernel(wq))
+ priv = 1;
+ __idxd_wq_set_priv_locked(wq, priv);
+ }
+
+ rc = 0;
+ spin_lock(&idxd->dev_lock);
+ if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ rc = idxd_device_config(idxd);
+ spin_unlock(&idxd->dev_lock);
+ if (rc < 0) {
+ dev_dbg(dev, "Writing wq %d config failed: %d\n", wq->id, rc);
+ goto err;
+ }
+
+ rc = idxd_wq_enable(wq);
+ if (rc < 0) {
+ dev_dbg(dev, "wq %d enabling failed: %d\n", wq->id, rc);
+ goto err;
+ }
+
+ rc = idxd_wq_map_portal(wq);
+ if (rc < 0) {
+ idxd->cmd_status = IDXD_SCMD_WQ_PORTAL_ERR;
+ dev_dbg(dev, "wq %d portal mapping failed: %d\n", wq->id, rc);
+ goto err_map_portal;
+ }
+
+ wq->client_count = 0;
+
+ rc = idxd_wq_request_irq(wq);
+ if (rc < 0) {
+ idxd->cmd_status = IDXD_SCMD_WQ_IRQ_ERR;
+ dev_dbg(dev, "WQ %d irq setup failed: %d\n", wq->id, rc);
+ goto err_irq;
+ }
+
+ rc = idxd_wq_alloc_resources(wq);
+ if (rc < 0) {
+ idxd->cmd_status = IDXD_SCMD_WQ_RES_ALLOC_ERR;
+ dev_dbg(dev, "WQ resource alloc failed\n");
+ goto err_res_alloc;
+ }
+
+ rc = idxd_wq_init_percpu_ref(wq);
+ if (rc < 0) {
+ idxd->cmd_status = IDXD_SCMD_PERCPU_ERR;
+ dev_dbg(dev, "percpu_ref setup failed\n");
+ goto err_ref;
+ }
+
+ return 0;
+
+err_ref:
+ idxd_wq_free_resources(wq);
+err_res_alloc:
+ idxd_wq_free_irq(wq);
+err_irq:
+ idxd_wq_unmap_portal(wq);
+err_map_portal:
+ if (idxd_wq_disable(wq, false))
+ dev_dbg(dev, "wq %s disable failed\n", dev_name(wq_confdev(wq)));
+err:
+ return rc;
+}
+
+void drv_disable_wq(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+
+ lockdep_assert_held(&wq->wq_lock);
+
+ if (idxd_wq_refcount(wq))
+ dev_warn(dev, "Clients has claim on wq %d: %d\n",
+ wq->id, idxd_wq_refcount(wq));
+
+ idxd_wq_unmap_portal(wq);
+ idxd_wq_drain(wq);
+ idxd_wq_free_irq(wq);
+ idxd_wq_reset(wq);
+ idxd_wq_free_resources(wq);
+ percpu_ref_exit(&wq->wq_active);
+ wq->type = IDXD_WQT_NONE;
+ wq->client_count = 0;
+}
+
+int idxd_device_drv_probe(struct idxd_dev *idxd_dev)
+{
+ struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev);
+ int rc = 0;
+
+ /*
+ * Device should be in disabled state for the idxd_drv to load. If it's in
+ * enabled state, then the device was altered outside of driver's control.
+ * If the state is in halted state, then we don't want to proceed.
+ */
+ if (idxd->state != IDXD_DEV_DISABLED) {
+ idxd->cmd_status = IDXD_SCMD_DEV_ENABLED;
+ return -ENXIO;
+ }
+
+ /* Device configuration */
+ spin_lock(&idxd->dev_lock);
+ if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ rc = idxd_device_config(idxd);
+ spin_unlock(&idxd->dev_lock);
+ if (rc < 0)
+ return -ENXIO;
+
+ /* Start device */
+ rc = idxd_device_enable(idxd);
+ if (rc < 0)
+ return rc;
+
+ /* Setup DMA device without channels */
+ rc = idxd_register_dma_device(idxd);
+ if (rc < 0) {
+ idxd_device_disable(idxd);
+ idxd->cmd_status = IDXD_SCMD_DEV_DMA_ERR;
+ return rc;
+ }
+
+ idxd->cmd_status = 0;
+ return 0;
+}
+
+void idxd_device_drv_remove(struct idxd_dev *idxd_dev)
+{
+ struct device *dev = &idxd_dev->conf_dev;
+ struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev);
+ int i;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
+ struct device *wq_dev = wq_confdev(wq);
+
+ if (wq->state == IDXD_WQ_DISABLED)
+ continue;
+ dev_warn(dev, "Active wq %d on disable %s.\n", i, dev_name(wq_dev));
+ device_release_driver(wq_dev);
+ }
+
+ idxd_unregister_dma_device(idxd);
+ idxd_device_disable(idxd);
+ if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ idxd_device_reset(idxd);
+}
+
+static enum idxd_dev_type dev_types[] = {
+ IDXD_DEV_DSA,
+ IDXD_DEV_IAX,
+ IDXD_DEV_NONE,
+};
+
+struct idxd_device_driver idxd_drv = {
+ .type = dev_types,
+ .probe = idxd_device_drv_probe,
+ .remove = idxd_device_drv_remove,
+ .name = "idxd",
+};
+EXPORT_SYMBOL_GPL(idxd_drv);
diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c
new file mode 100644
index 000000000..e0874cb47
--- /dev/null
+++ b/drivers/dma/idxd/dma.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/dmaengine.h>
+#include <uapi/linux/idxd.h>
+#include "../dmaengine.h"
+#include "registers.h"
+#include "idxd.h"
+
+static inline struct idxd_wq *to_idxd_wq(struct dma_chan *c)
+{
+ struct idxd_dma_chan *idxd_chan;
+
+ idxd_chan = container_of(c, struct idxd_dma_chan, chan);
+ return idxd_chan->wq;
+}
+
+void idxd_dma_complete_txd(struct idxd_desc *desc,
+ enum idxd_complete_type comp_type,
+ bool free_desc)
+{
+ struct idxd_device *idxd = desc->wq->idxd;
+ struct dma_async_tx_descriptor *tx;
+ struct dmaengine_result res;
+ int complete = 1;
+
+ if (desc->completion->status == DSA_COMP_SUCCESS) {
+ res.result = DMA_TRANS_NOERROR;
+ } else if (desc->completion->status) {
+ if (idxd->request_int_handles && comp_type != IDXD_COMPLETE_ABORT &&
+ desc->completion->status == DSA_COMP_INT_HANDLE_INVAL &&
+ idxd_queue_int_handle_resubmit(desc))
+ return;
+ res.result = DMA_TRANS_WRITE_FAILED;
+ } else if (comp_type == IDXD_COMPLETE_ABORT) {
+ res.result = DMA_TRANS_ABORTED;
+ } else {
+ complete = 0;
+ }
+
+ tx = &desc->txd;
+ if (complete && tx->cookie) {
+ dma_cookie_complete(tx);
+ dma_descriptor_unmap(tx);
+ dmaengine_desc_get_callback_invoke(tx, &res);
+ tx->callback = NULL;
+ tx->callback_result = NULL;
+ }
+
+ if (free_desc)
+ idxd_free_desc(desc->wq, desc);
+}
+
+static void op_flag_setup(unsigned long flags, u32 *desc_flags)
+{
+ *desc_flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR;
+ if (flags & DMA_PREP_INTERRUPT)
+ *desc_flags |= IDXD_OP_FLAG_RCI;
+}
+
+static inline void set_completion_address(struct idxd_desc *desc,
+ u64 *compl_addr)
+{
+ *compl_addr = desc->compl_dma;
+}
+
+static inline void idxd_prep_desc_common(struct idxd_wq *wq,
+ struct dsa_hw_desc *hw, char opcode,
+ u64 addr_f1, u64 addr_f2, u64 len,
+ u64 compl, u32 flags)
+{
+ hw->flags = flags;
+ hw->opcode = opcode;
+ hw->src_addr = addr_f1;
+ hw->dst_addr = addr_f2;
+ hw->xfer_size = len;
+ /*
+ * For dedicated WQ, this field is ignored and HW will use the WQCFG.priv
+ * field instead. This field should be set to 1 for kernel descriptors.
+ */
+ hw->priv = 1;
+ hw->completion_addr = compl;
+}
+
+static struct dma_async_tx_descriptor *
+idxd_dma_prep_interrupt(struct dma_chan *c, unsigned long flags)
+{
+ struct idxd_wq *wq = to_idxd_wq(c);
+ u32 desc_flags;
+ struct idxd_desc *desc;
+
+ if (wq->state != IDXD_WQ_ENABLED)
+ return NULL;
+
+ op_flag_setup(flags, &desc_flags);
+ desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
+ if (IS_ERR(desc))
+ return NULL;
+
+ idxd_prep_desc_common(wq, desc->hw, DSA_OPCODE_NOOP,
+ 0, 0, 0, desc->compl_dma, desc_flags);
+ desc->txd.flags = flags;
+ return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+idxd_dma_submit_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+ struct idxd_wq *wq = to_idxd_wq(c);
+ u32 desc_flags;
+ struct idxd_device *idxd = wq->idxd;
+ struct idxd_desc *desc;
+
+ if (wq->state != IDXD_WQ_ENABLED)
+ return NULL;
+
+ if (len > idxd->max_xfer_bytes)
+ return NULL;
+
+ op_flag_setup(flags, &desc_flags);
+ desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
+ if (IS_ERR(desc))
+ return NULL;
+
+ idxd_prep_desc_common(wq, desc->hw, DSA_OPCODE_MEMMOVE,
+ dma_src, dma_dest, len, desc->compl_dma,
+ desc_flags);
+
+ desc->txd.flags = flags;
+
+ return &desc->txd;
+}
+
+static int idxd_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct idxd_wq *wq = to_idxd_wq(chan);
+ struct device *dev = &wq->idxd->pdev->dev;
+
+ idxd_wq_get(wq);
+ dev_dbg(dev, "%s: client_count: %d\n", __func__,
+ idxd_wq_refcount(wq));
+ return 0;
+}
+
+static void idxd_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct idxd_wq *wq = to_idxd_wq(chan);
+ struct device *dev = &wq->idxd->pdev->dev;
+
+ idxd_wq_put(wq);
+ dev_dbg(dev, "%s: client_count: %d\n", __func__,
+ idxd_wq_refcount(wq));
+}
+
+static enum dma_status idxd_dma_tx_status(struct dma_chan *dma_chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ return DMA_OUT_OF_ORDER;
+}
+
+/*
+ * issue_pending() does not need to do anything since tx_submit() does the job
+ * already.
+ */
+static void idxd_dma_issue_pending(struct dma_chan *dma_chan)
+{
+}
+
+static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct dma_chan *c = tx->chan;
+ struct idxd_wq *wq = to_idxd_wq(c);
+ dma_cookie_t cookie;
+ int rc;
+ struct idxd_desc *desc = container_of(tx, struct idxd_desc, txd);
+
+ cookie = dma_cookie_assign(tx);
+
+ rc = idxd_submit_desc(wq, desc);
+ if (rc < 0) {
+ idxd_free_desc(wq, desc);
+ return rc;
+ }
+
+ return cookie;
+}
+
+static void idxd_dma_release(struct dma_device *device)
+{
+ struct idxd_dma_dev *idxd_dma = container_of(device, struct idxd_dma_dev, dma);
+
+ kfree(idxd_dma);
+}
+
+int idxd_register_dma_device(struct idxd_device *idxd)
+{
+ struct idxd_dma_dev *idxd_dma;
+ struct dma_device *dma;
+ struct device *dev = &idxd->pdev->dev;
+ int rc;
+
+ idxd_dma = kzalloc_node(sizeof(*idxd_dma), GFP_KERNEL, dev_to_node(dev));
+ if (!idxd_dma)
+ return -ENOMEM;
+
+ dma = &idxd_dma->dma;
+ INIT_LIST_HEAD(&dma->channels);
+ dma->dev = dev;
+
+ dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
+ dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+ dma_cap_set(DMA_COMPLETION_NO_ORDER, dma->cap_mask);
+ dma->device_release = idxd_dma_release;
+
+ dma->device_prep_dma_interrupt = idxd_dma_prep_interrupt;
+ if (idxd->hw.opcap.bits[0] & IDXD_OPCAP_MEMMOVE) {
+ dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+ dma->device_prep_dma_memcpy = idxd_dma_submit_memcpy;
+ }
+
+ dma->device_tx_status = idxd_dma_tx_status;
+ dma->device_issue_pending = idxd_dma_issue_pending;
+ dma->device_alloc_chan_resources = idxd_dma_alloc_chan_resources;
+ dma->device_free_chan_resources = idxd_dma_free_chan_resources;
+
+ rc = dma_async_device_register(dma);
+ if (rc < 0) {
+ kfree(idxd_dma);
+ return rc;
+ }
+
+ idxd_dma->idxd = idxd;
+ /*
+ * This pointer is protected by the refs taken by the dma_chan. It will remain valid
+ * as long as there are outstanding channels.
+ */
+ idxd->idxd_dma = idxd_dma;
+ return 0;
+}
+
+void idxd_unregister_dma_device(struct idxd_device *idxd)
+{
+ dma_async_device_unregister(&idxd->idxd_dma->dma);
+}
+
+static int idxd_register_dma_channel(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct dma_device *dma = &idxd->idxd_dma->dma;
+ struct device *dev = &idxd->pdev->dev;
+ struct idxd_dma_chan *idxd_chan;
+ struct dma_chan *chan;
+ int rc, i;
+
+ idxd_chan = kzalloc_node(sizeof(*idxd_chan), GFP_KERNEL, dev_to_node(dev));
+ if (!idxd_chan)
+ return -ENOMEM;
+
+ chan = &idxd_chan->chan;
+ chan->device = dma;
+ list_add_tail(&chan->device_node, &dma->channels);
+
+ for (i = 0; i < wq->num_descs; i++) {
+ struct idxd_desc *desc = wq->descs[i];
+
+ dma_async_tx_descriptor_init(&desc->txd, chan);
+ desc->txd.tx_submit = idxd_dma_tx_submit;
+ }
+
+ rc = dma_async_device_channel_register(dma, chan);
+ if (rc < 0) {
+ kfree(idxd_chan);
+ return rc;
+ }
+
+ wq->idxd_chan = idxd_chan;
+ idxd_chan->wq = wq;
+ get_device(wq_confdev(wq));
+
+ return 0;
+}
+
+static void idxd_unregister_dma_channel(struct idxd_wq *wq)
+{
+ struct idxd_dma_chan *idxd_chan = wq->idxd_chan;
+ struct dma_chan *chan = &idxd_chan->chan;
+ struct idxd_dma_dev *idxd_dma = wq->idxd->idxd_dma;
+
+ dma_async_device_channel_unregister(&idxd_dma->dma, chan);
+ list_del(&chan->device_node);
+ kfree(wq->idxd_chan);
+ wq->idxd_chan = NULL;
+ put_device(wq_confdev(wq));
+}
+
+static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev)
+{
+ struct device *dev = &idxd_dev->conf_dev;
+ struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
+ struct idxd_device *idxd = wq->idxd;
+ int rc;
+
+ if (idxd->state != IDXD_DEV_ENABLED)
+ return -ENXIO;
+
+ mutex_lock(&wq->wq_lock);
+ wq->type = IDXD_WQT_KERNEL;
+
+ rc = drv_enable_wq(wq);
+ if (rc < 0) {
+ dev_dbg(dev, "Enable wq %d failed: %d\n", wq->id, rc);
+ rc = -ENXIO;
+ goto err;
+ }
+
+ rc = idxd_register_dma_channel(wq);
+ if (rc < 0) {
+ idxd->cmd_status = IDXD_SCMD_DMA_CHAN_ERR;
+ dev_dbg(dev, "Failed to register dma channel\n");
+ goto err_dma;
+ }
+
+ idxd->cmd_status = 0;
+ mutex_unlock(&wq->wq_lock);
+ return 0;
+
+err_dma:
+ drv_disable_wq(wq);
+err:
+ wq->type = IDXD_WQT_NONE;
+ mutex_unlock(&wq->wq_lock);
+ return rc;
+}
+
+static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev)
+{
+ struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
+
+ mutex_lock(&wq->wq_lock);
+ __idxd_wq_quiesce(wq);
+ idxd_unregister_dma_channel(wq);
+ drv_disable_wq(wq);
+ mutex_unlock(&wq->wq_lock);
+}
+
+static enum idxd_dev_type dev_types[] = {
+ IDXD_DEV_WQ,
+ IDXD_DEV_NONE,
+};
+
+struct idxd_device_driver idxd_dmaengine_drv = {
+ .probe = idxd_dmaengine_drv_probe,
+ .remove = idxd_dmaengine_drv_remove,
+ .name = "dmaengine",
+ .type = dev_types,
+};
+EXPORT_SYMBOL_GPL(idxd_dmaengine_drv);
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
new file mode 100644
index 000000000..7ced8d283
--- /dev/null
+++ b/drivers/dma/idxd/idxd.h
@@ -0,0 +1,681 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#ifndef _IDXD_H_
+#define _IDXD_H_
+
+#include <linux/sbitmap.h>
+#include <linux/dmaengine.h>
+#include <linux/percpu-rwsem.h>
+#include <linux/wait.h>
+#include <linux/cdev.h>
+#include <linux/idr.h>
+#include <linux/pci.h>
+#include <linux/ioasid.h>
+#include <linux/bitmap.h>
+#include <linux/perf_event.h>
+#include <uapi/linux/idxd.h>
+#include "registers.h"
+
+#define IDXD_DRIVER_VERSION "1.00"
+
+extern struct kmem_cache *idxd_desc_pool;
+extern bool tc_override;
+
+struct idxd_wq;
+struct idxd_dev;
+
+enum idxd_dev_type {
+ IDXD_DEV_NONE = -1,
+ IDXD_DEV_DSA = 0,
+ IDXD_DEV_IAX,
+ IDXD_DEV_WQ,
+ IDXD_DEV_GROUP,
+ IDXD_DEV_ENGINE,
+ IDXD_DEV_CDEV,
+ IDXD_DEV_MAX_TYPE,
+};
+
+struct idxd_dev {
+ struct device conf_dev;
+ enum idxd_dev_type type;
+};
+
+#define IDXD_REG_TIMEOUT 50
+#define IDXD_DRAIN_TIMEOUT 5000
+
+enum idxd_type {
+ IDXD_TYPE_UNKNOWN = -1,
+ IDXD_TYPE_DSA = 0,
+ IDXD_TYPE_IAX,
+ IDXD_TYPE_MAX,
+};
+
+#define IDXD_NAME_SIZE 128
+#define IDXD_PMU_EVENT_MAX 64
+
+#define IDXD_ENQCMDS_RETRIES 32
+#define IDXD_ENQCMDS_MAX_RETRIES 64
+
+struct idxd_device_driver {
+ const char *name;
+ enum idxd_dev_type *type;
+ int (*probe)(struct idxd_dev *idxd_dev);
+ void (*remove)(struct idxd_dev *idxd_dev);
+ struct device_driver drv;
+};
+
+extern struct idxd_device_driver dsa_drv;
+extern struct idxd_device_driver idxd_drv;
+extern struct idxd_device_driver idxd_dmaengine_drv;
+extern struct idxd_device_driver idxd_user_drv;
+
+#define INVALID_INT_HANDLE -1
+struct idxd_irq_entry {
+ int id;
+ int vector;
+ struct llist_head pending_llist;
+ struct list_head work_list;
+ /*
+ * Lock to protect access between irq thread process descriptor
+ * and irq thread processing error descriptor.
+ */
+ spinlock_t list_lock;
+ int int_handle;
+ ioasid_t pasid;
+};
+
+struct idxd_group {
+ struct idxd_dev idxd_dev;
+ struct idxd_device *idxd;
+ struct grpcfg grpcfg;
+ int id;
+ int num_engines;
+ int num_wqs;
+ bool use_rdbuf_limit;
+ u8 rdbufs_allowed;
+ u8 rdbufs_reserved;
+ int tc_a;
+ int tc_b;
+ int desc_progress_limit;
+ int batch_progress_limit;
+};
+
+struct idxd_pmu {
+ struct idxd_device *idxd;
+
+ struct perf_event *event_list[IDXD_PMU_EVENT_MAX];
+ int n_events;
+
+ DECLARE_BITMAP(used_mask, IDXD_PMU_EVENT_MAX);
+
+ struct pmu pmu;
+ char name[IDXD_NAME_SIZE];
+ int cpu;
+
+ int n_counters;
+ int counter_width;
+ int n_event_categories;
+
+ bool per_counter_caps_supported;
+ unsigned long supported_event_categories;
+
+ unsigned long supported_filters;
+ int n_filters;
+
+ struct hlist_node cpuhp_node;
+};
+
+#define IDXD_MAX_PRIORITY 0xf
+
+enum idxd_wq_state {
+ IDXD_WQ_DISABLED = 0,
+ IDXD_WQ_ENABLED,
+};
+
+enum idxd_wq_flag {
+ WQ_FLAG_DEDICATED = 0,
+ WQ_FLAG_BLOCK_ON_FAULT,
+ WQ_FLAG_ATS_DISABLE,
+};
+
+enum idxd_wq_type {
+ IDXD_WQT_NONE = 0,
+ IDXD_WQT_KERNEL,
+ IDXD_WQT_USER,
+};
+
+struct idxd_cdev {
+ struct idxd_wq *wq;
+ struct cdev cdev;
+ struct idxd_dev idxd_dev;
+ int minor;
+};
+
+#define IDXD_ALLOCATED_BATCH_SIZE 128U
+#define WQ_NAME_SIZE 1024
+#define WQ_TYPE_SIZE 10
+
+#define WQ_DEFAULT_QUEUE_DEPTH 16
+#define WQ_DEFAULT_MAX_XFER SZ_2M
+#define WQ_DEFAULT_MAX_BATCH 32
+
+enum idxd_op_type {
+ IDXD_OP_BLOCK = 0,
+ IDXD_OP_NONBLOCK = 1,
+};
+
+enum idxd_complete_type {
+ IDXD_COMPLETE_NORMAL = 0,
+ IDXD_COMPLETE_ABORT,
+ IDXD_COMPLETE_DEV_FAIL,
+};
+
+struct idxd_dma_chan {
+ struct dma_chan chan;
+ struct idxd_wq *wq;
+};
+
+struct idxd_wq {
+ void __iomem *portal;
+ u32 portal_offset;
+ unsigned int enqcmds_retries;
+ struct percpu_ref wq_active;
+ struct completion wq_dead;
+ struct completion wq_resurrect;
+ struct idxd_dev idxd_dev;
+ struct idxd_cdev *idxd_cdev;
+ struct wait_queue_head err_queue;
+ struct idxd_device *idxd;
+ int id;
+ struct idxd_irq_entry ie;
+ enum idxd_wq_type type;
+ struct idxd_group *group;
+ int client_count;
+ struct mutex wq_lock; /* mutex for workqueue */
+ u32 size;
+ u32 threshold;
+ u32 priority;
+ enum idxd_wq_state state;
+ unsigned long flags;
+ union wqcfg *wqcfg;
+ unsigned long *opcap_bmap;
+
+ struct dsa_hw_desc **hw_descs;
+ int num_descs;
+ union {
+ struct dsa_completion_record *compls;
+ struct iax_completion_record *iax_compls;
+ };
+ dma_addr_t compls_addr;
+ int compls_size;
+ struct idxd_desc **descs;
+ struct sbitmap_queue sbq;
+ struct idxd_dma_chan *idxd_chan;
+ char name[WQ_NAME_SIZE + 1];
+ u64 max_xfer_bytes;
+ u32 max_batch_size;
+};
+
+struct idxd_engine {
+ struct idxd_dev idxd_dev;
+ int id;
+ struct idxd_group *group;
+ struct idxd_device *idxd;
+};
+
+/* shadow registers */
+struct idxd_hw {
+ u32 version;
+ union gen_cap_reg gen_cap;
+ union wq_cap_reg wq_cap;
+ union group_cap_reg group_cap;
+ union engine_cap_reg engine_cap;
+ struct opcap opcap;
+ u32 cmd_cap;
+};
+
+enum idxd_device_state {
+ IDXD_DEV_HALTED = -1,
+ IDXD_DEV_DISABLED = 0,
+ IDXD_DEV_ENABLED,
+};
+
+enum idxd_device_flag {
+ IDXD_FLAG_CONFIGURABLE = 0,
+ IDXD_FLAG_CMD_RUNNING,
+ IDXD_FLAG_PASID_ENABLED,
+ IDXD_FLAG_USER_PASID_ENABLED,
+};
+
+struct idxd_dma_dev {
+ struct idxd_device *idxd;
+ struct dma_device dma;
+};
+
+struct idxd_driver_data {
+ const char *name_prefix;
+ enum idxd_type type;
+ struct device_type *dev_type;
+ int compl_size;
+ int align;
+};
+
+struct idxd_device {
+ struct idxd_dev idxd_dev;
+ struct idxd_driver_data *data;
+ struct list_head list;
+ struct idxd_hw hw;
+ enum idxd_device_state state;
+ unsigned long flags;
+ int id;
+ int major;
+ u32 cmd_status;
+ struct idxd_irq_entry ie; /* misc irq, msix 0 */
+
+ struct pci_dev *pdev;
+ void __iomem *reg_base;
+
+ spinlock_t dev_lock; /* spinlock for device */
+ spinlock_t cmd_lock; /* spinlock for device commands */
+ struct completion *cmd_done;
+ struct idxd_group **groups;
+ struct idxd_wq **wqs;
+ struct idxd_engine **engines;
+
+ struct iommu_sva *sva;
+ unsigned int pasid;
+
+ int num_groups;
+ int irq_cnt;
+ bool request_int_handles;
+
+ u32 msix_perm_offset;
+ u32 wqcfg_offset;
+ u32 grpcfg_offset;
+ u32 perfmon_offset;
+
+ u64 max_xfer_bytes;
+ u32 max_batch_size;
+ int max_groups;
+ int max_engines;
+ int max_rdbufs;
+ int max_wqs;
+ int max_wq_size;
+ int rdbuf_limit;
+ int nr_rdbufs; /* non-reserved read buffers */
+ unsigned int wqcfg_size;
+ unsigned long *wq_enable_map;
+
+ union sw_err_reg sw_err;
+ wait_queue_head_t cmd_waitq;
+
+ struct idxd_dma_dev *idxd_dma;
+ struct workqueue_struct *wq;
+ struct work_struct work;
+
+ struct idxd_pmu *idxd_pmu;
+
+ unsigned long *opcap_bmap;
+};
+
+/* IDXD software descriptor */
+struct idxd_desc {
+ union {
+ struct dsa_hw_desc *hw;
+ struct iax_hw_desc *iax_hw;
+ };
+ dma_addr_t desc_dma;
+ union {
+ struct dsa_completion_record *completion;
+ struct iax_completion_record *iax_completion;
+ };
+ dma_addr_t compl_dma;
+ struct dma_async_tx_descriptor txd;
+ struct llist_node llnode;
+ struct list_head list;
+ int id;
+ int cpu;
+ struct idxd_wq *wq;
+};
+
+/*
+ * This is software defined error for the completion status. We overload the error code
+ * that will never appear in completion status and only SWERR register.
+ */
+enum idxd_completion_status {
+ IDXD_COMP_DESC_ABORT = 0xff,
+};
+
+#define idxd_confdev(idxd) &idxd->idxd_dev.conf_dev
+#define wq_confdev(wq) &wq->idxd_dev.conf_dev
+#define engine_confdev(engine) &engine->idxd_dev.conf_dev
+#define group_confdev(group) &group->idxd_dev.conf_dev
+#define cdev_dev(cdev) &cdev->idxd_dev.conf_dev
+
+#define confdev_to_idxd_dev(dev) container_of(dev, struct idxd_dev, conf_dev)
+#define idxd_dev_to_idxd(idxd_dev) container_of(idxd_dev, struct idxd_device, idxd_dev)
+#define idxd_dev_to_wq(idxd_dev) container_of(idxd_dev, struct idxd_wq, idxd_dev)
+
+static inline struct idxd_device *confdev_to_idxd(struct device *dev)
+{
+ struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+ return idxd_dev_to_idxd(idxd_dev);
+}
+
+static inline struct idxd_wq *confdev_to_wq(struct device *dev)
+{
+ struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+ return idxd_dev_to_wq(idxd_dev);
+}
+
+static inline struct idxd_engine *confdev_to_engine(struct device *dev)
+{
+ struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+ return container_of(idxd_dev, struct idxd_engine, idxd_dev);
+}
+
+static inline struct idxd_group *confdev_to_group(struct device *dev)
+{
+ struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+ return container_of(idxd_dev, struct idxd_group, idxd_dev);
+}
+
+static inline struct idxd_cdev *dev_to_cdev(struct device *dev)
+{
+ struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+ return container_of(idxd_dev, struct idxd_cdev, idxd_dev);
+}
+
+static inline void idxd_dev_set_type(struct idxd_dev *idev, int type)
+{
+ if (type >= IDXD_DEV_MAX_TYPE) {
+ idev->type = IDXD_DEV_NONE;
+ return;
+ }
+
+ idev->type = type;
+}
+
+static inline struct idxd_irq_entry *idxd_get_ie(struct idxd_device *idxd, int idx)
+{
+ return (idx == 0) ? &idxd->ie : &idxd->wqs[idx - 1]->ie;
+}
+
+static inline struct idxd_wq *ie_to_wq(struct idxd_irq_entry *ie)
+{
+ return container_of(ie, struct idxd_wq, ie);
+}
+
+static inline struct idxd_device *ie_to_idxd(struct idxd_irq_entry *ie)
+{
+ return container_of(ie, struct idxd_device, ie);
+}
+
+extern struct bus_type dsa_bus_type;
+
+extern bool support_enqcmd;
+extern struct ida idxd_ida;
+extern struct device_type dsa_device_type;
+extern struct device_type iax_device_type;
+extern struct device_type idxd_wq_device_type;
+extern struct device_type idxd_engine_device_type;
+extern struct device_type idxd_group_device_type;
+
+static inline bool is_dsa_dev(struct idxd_dev *idxd_dev)
+{
+ return idxd_dev->type == IDXD_DEV_DSA;
+}
+
+static inline bool is_iax_dev(struct idxd_dev *idxd_dev)
+{
+ return idxd_dev->type == IDXD_DEV_IAX;
+}
+
+static inline bool is_idxd_dev(struct idxd_dev *idxd_dev)
+{
+ return is_dsa_dev(idxd_dev) || is_iax_dev(idxd_dev);
+}
+
+static inline bool is_idxd_wq_dev(struct idxd_dev *idxd_dev)
+{
+ return idxd_dev->type == IDXD_DEV_WQ;
+}
+
+static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq)
+{
+ if (wq->type == IDXD_WQT_KERNEL && strcmp(wq->name, "dmaengine") == 0)
+ return true;
+ return false;
+}
+
+static inline bool is_idxd_wq_user(struct idxd_wq *wq)
+{
+ return wq->type == IDXD_WQT_USER;
+}
+
+static inline bool is_idxd_wq_kernel(struct idxd_wq *wq)
+{
+ return wq->type == IDXD_WQT_KERNEL;
+}
+
+static inline bool wq_dedicated(struct idxd_wq *wq)
+{
+ return test_bit(WQ_FLAG_DEDICATED, &wq->flags);
+}
+
+static inline bool wq_shared(struct idxd_wq *wq)
+{
+ return !test_bit(WQ_FLAG_DEDICATED, &wq->flags);
+}
+
+static inline bool device_pasid_enabled(struct idxd_device *idxd)
+{
+ return test_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
+}
+
+static inline bool device_user_pasid_enabled(struct idxd_device *idxd)
+{
+ return test_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags);
+}
+
+static inline bool wq_pasid_enabled(struct idxd_wq *wq)
+{
+ return (is_idxd_wq_kernel(wq) && device_pasid_enabled(wq->idxd)) ||
+ (is_idxd_wq_user(wq) && device_user_pasid_enabled(wq->idxd));
+}
+
+static inline bool wq_shared_supported(struct idxd_wq *wq)
+{
+ return (support_enqcmd && wq_pasid_enabled(wq));
+}
+
+enum idxd_portal_prot {
+ IDXD_PORTAL_UNLIMITED = 0,
+ IDXD_PORTAL_LIMITED,
+};
+
+enum idxd_interrupt_type {
+ IDXD_IRQ_MSIX = 0,
+ IDXD_IRQ_IMS,
+};
+
+static inline int idxd_get_wq_portal_offset(enum idxd_portal_prot prot)
+{
+ return prot * 0x1000;
+}
+
+static inline int idxd_get_wq_portal_full_offset(int wq_id,
+ enum idxd_portal_prot prot)
+{
+ return ((wq_id * 4) << PAGE_SHIFT) + idxd_get_wq_portal_offset(prot);
+}
+
+#define IDXD_PORTAL_MASK (PAGE_SIZE - 1)
+
+/*
+ * Even though this function can be accessed by multiple threads, it is safe to use.
+ * At worst the address gets used more than once before it gets incremented. We don't
+ * hit a threshold until iops becomes many million times a second. So the occasional
+ * reuse of the same address is tolerable compare to using an atomic variable. This is
+ * safe on a system that has atomic load/store for 32bit integers. Given that this is an
+ * Intel iEP device, that should not be a problem.
+ */
+static inline void __iomem *idxd_wq_portal_addr(struct idxd_wq *wq)
+{
+ int ofs = wq->portal_offset;
+
+ wq->portal_offset = (ofs + sizeof(struct dsa_raw_desc)) & IDXD_PORTAL_MASK;
+ return wq->portal + ofs;
+}
+
+static inline void idxd_wq_get(struct idxd_wq *wq)
+{
+ wq->client_count++;
+}
+
+static inline void idxd_wq_put(struct idxd_wq *wq)
+{
+ wq->client_count--;
+}
+
+static inline int idxd_wq_refcount(struct idxd_wq *wq)
+{
+ return wq->client_count;
+};
+
+/*
+ * Intel IAA does not support batch processing.
+ * The max batch size of device, max batch size of wq and
+ * max batch shift of wqcfg should be always 0 on IAA.
+ */
+static inline void idxd_set_max_batch_size(int idxd_type, struct idxd_device *idxd,
+ u32 max_batch_size)
+{
+ if (idxd_type == IDXD_TYPE_IAX)
+ idxd->max_batch_size = 0;
+ else
+ idxd->max_batch_size = max_batch_size;
+}
+
+static inline void idxd_wq_set_max_batch_size(int idxd_type, struct idxd_wq *wq,
+ u32 max_batch_size)
+{
+ if (idxd_type == IDXD_TYPE_IAX)
+ wq->max_batch_size = 0;
+ else
+ wq->max_batch_size = max_batch_size;
+}
+
+static inline void idxd_wqcfg_set_max_batch_shift(int idxd_type, union wqcfg *wqcfg,
+ u32 max_batch_shift)
+{
+ if (idxd_type == IDXD_TYPE_IAX)
+ wqcfg->max_batch_shift = 0;
+ else
+ wqcfg->max_batch_shift = max_batch_shift;
+}
+
+int __must_check __idxd_driver_register(struct idxd_device_driver *idxd_drv,
+ struct module *module, const char *mod_name);
+#define idxd_driver_register(driver) \
+ __idxd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
+
+void idxd_driver_unregister(struct idxd_device_driver *idxd_drv);
+
+#define module_idxd_driver(__idxd_driver) \
+ module_driver(__idxd_driver, idxd_driver_register, idxd_driver_unregister)
+
+int idxd_register_bus_type(void);
+void idxd_unregister_bus_type(void);
+int idxd_register_devices(struct idxd_device *idxd);
+void idxd_unregister_devices(struct idxd_device *idxd);
+int idxd_register_driver(void);
+void idxd_unregister_driver(void);
+void idxd_wqs_quiesce(struct idxd_device *idxd);
+bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc);
+
+/* device interrupt control */
+irqreturn_t idxd_misc_thread(int vec, void *data);
+irqreturn_t idxd_wq_thread(int irq, void *data);
+void idxd_mask_error_interrupts(struct idxd_device *idxd);
+void idxd_unmask_error_interrupts(struct idxd_device *idxd);
+
+/* device control */
+int idxd_register_idxd_drv(void);
+void idxd_unregister_idxd_drv(void);
+int idxd_device_drv_probe(struct idxd_dev *idxd_dev);
+void idxd_device_drv_remove(struct idxd_dev *idxd_dev);
+int drv_enable_wq(struct idxd_wq *wq);
+void drv_disable_wq(struct idxd_wq *wq);
+int idxd_device_init_reset(struct idxd_device *idxd);
+int idxd_device_enable(struct idxd_device *idxd);
+int idxd_device_disable(struct idxd_device *idxd);
+void idxd_device_reset(struct idxd_device *idxd);
+void idxd_device_clear_state(struct idxd_device *idxd);
+int idxd_device_config(struct idxd_device *idxd);
+void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid);
+int idxd_device_load_config(struct idxd_device *idxd);
+int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle,
+ enum idxd_interrupt_type irq_type);
+int idxd_device_release_int_handle(struct idxd_device *idxd, int handle,
+ enum idxd_interrupt_type irq_type);
+
+/* work queue control */
+void idxd_wqs_unmap_portal(struct idxd_device *idxd);
+int idxd_wq_alloc_resources(struct idxd_wq *wq);
+void idxd_wq_free_resources(struct idxd_wq *wq);
+int idxd_wq_enable(struct idxd_wq *wq);
+int idxd_wq_disable(struct idxd_wq *wq, bool reset_config);
+void idxd_wq_drain(struct idxd_wq *wq);
+void idxd_wq_reset(struct idxd_wq *wq);
+int idxd_wq_map_portal(struct idxd_wq *wq);
+void idxd_wq_unmap_portal(struct idxd_wq *wq);
+int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid);
+int idxd_wq_disable_pasid(struct idxd_wq *wq);
+void __idxd_wq_quiesce(struct idxd_wq *wq);
+void idxd_wq_quiesce(struct idxd_wq *wq);
+int idxd_wq_init_percpu_ref(struct idxd_wq *wq);
+void idxd_wq_free_irq(struct idxd_wq *wq);
+int idxd_wq_request_irq(struct idxd_wq *wq);
+
+/* submission */
+int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc);
+struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype);
+void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc);
+int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc);
+
+/* dmaengine */
+int idxd_register_dma_device(struct idxd_device *idxd);
+void idxd_unregister_dma_device(struct idxd_device *idxd);
+void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res);
+void idxd_dma_complete_txd(struct idxd_desc *desc,
+ enum idxd_complete_type comp_type, bool free_desc);
+
+/* cdev */
+int idxd_cdev_register(void);
+void idxd_cdev_remove(void);
+int idxd_cdev_get_major(struct idxd_device *idxd);
+int idxd_wq_add_cdev(struct idxd_wq *wq);
+void idxd_wq_del_cdev(struct idxd_wq *wq);
+
+/* perfmon */
+#if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON)
+int perfmon_pmu_init(struct idxd_device *idxd);
+void perfmon_pmu_remove(struct idxd_device *idxd);
+void perfmon_counter_overflow(struct idxd_device *idxd);
+void perfmon_init(void);
+void perfmon_exit(void);
+#else
+static inline int perfmon_pmu_init(struct idxd_device *idxd) { return 0; }
+static inline void perfmon_pmu_remove(struct idxd_device *idxd) {}
+static inline void perfmon_counter_overflow(struct idxd_device *idxd) {}
+static inline void perfmon_init(void) {}
+static inline void perfmon_exit(void) {}
+#endif
+
+#endif
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
new file mode 100644
index 000000000..e0f49545d
--- /dev/null
+++ b/drivers/dma/idxd/init.c
@@ -0,0 +1,810 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/aer.h>
+#include <linux/fs.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/device.h>
+#include <linux/idr.h>
+#include <linux/intel-svm.h>
+#include <linux/iommu.h>
+#include <uapi/linux/idxd.h>
+#include <linux/dmaengine.h>
+#include "../dmaengine.h"
+#include "registers.h"
+#include "idxd.h"
+#include "perfmon.h"
+
+MODULE_VERSION(IDXD_DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_IMPORT_NS(IDXD);
+
+static bool sva = true;
+module_param(sva, bool, 0644);
+MODULE_PARM_DESC(sva, "Toggle SVA support on/off");
+
+bool tc_override;
+module_param(tc_override, bool, 0644);
+MODULE_PARM_DESC(tc_override, "Override traffic class defaults");
+
+#define DRV_NAME "idxd"
+
+bool support_enqcmd;
+DEFINE_IDA(idxd_ida);
+
+static struct idxd_driver_data idxd_driver_data[] = {
+ [IDXD_TYPE_DSA] = {
+ .name_prefix = "dsa",
+ .type = IDXD_TYPE_DSA,
+ .compl_size = sizeof(struct dsa_completion_record),
+ .align = 32,
+ .dev_type = &dsa_device_type,
+ },
+ [IDXD_TYPE_IAX] = {
+ .name_prefix = "iax",
+ .type = IDXD_TYPE_IAX,
+ .compl_size = sizeof(struct iax_completion_record),
+ .align = 64,
+ .dev_type = &iax_device_type,
+ },
+};
+
+static struct pci_device_id idxd_pci_tbl[] = {
+ /* DSA ver 1.0 platforms */
+ { PCI_DEVICE_DATA(INTEL, DSA_SPR0, &idxd_driver_data[IDXD_TYPE_DSA]) },
+
+ /* IAX ver 1.0 platforms */
+ { PCI_DEVICE_DATA(INTEL, IAX_SPR0, &idxd_driver_data[IDXD_TYPE_IAX]) },
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, idxd_pci_tbl);
+
+static int idxd_setup_interrupts(struct idxd_device *idxd)
+{
+ struct pci_dev *pdev = idxd->pdev;
+ struct device *dev = &pdev->dev;
+ struct idxd_irq_entry *ie;
+ int i, msixcnt;
+ int rc = 0;
+
+ msixcnt = pci_msix_vec_count(pdev);
+ if (msixcnt < 0) {
+ dev_err(dev, "Not MSI-X interrupt capable.\n");
+ return -ENOSPC;
+ }
+ idxd->irq_cnt = msixcnt;
+
+ rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX);
+ if (rc != msixcnt) {
+ dev_err(dev, "Failed enabling %d MSIX entries: %d\n", msixcnt, rc);
+ return -ENOSPC;
+ }
+ dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt);
+
+
+ ie = idxd_get_ie(idxd, 0);
+ ie->vector = pci_irq_vector(pdev, 0);
+ rc = request_threaded_irq(ie->vector, NULL, idxd_misc_thread, 0, "idxd-misc", ie);
+ if (rc < 0) {
+ dev_err(dev, "Failed to allocate misc interrupt.\n");
+ goto err_misc_irq;
+ }
+ dev_dbg(dev, "Requested idxd-misc handler on msix vector %d\n", ie->vector);
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ int msix_idx = i + 1;
+
+ ie = idxd_get_ie(idxd, msix_idx);
+ ie->id = msix_idx;
+ ie->int_handle = INVALID_INT_HANDLE;
+ ie->pasid = INVALID_IOASID;
+
+ spin_lock_init(&ie->list_lock);
+ init_llist_head(&ie->pending_llist);
+ INIT_LIST_HEAD(&ie->work_list);
+ }
+
+ idxd_unmask_error_interrupts(idxd);
+ return 0;
+
+ err_misc_irq:
+ idxd_mask_error_interrupts(idxd);
+ pci_free_irq_vectors(pdev);
+ dev_err(dev, "No usable interrupts\n");
+ return rc;
+}
+
+static void idxd_cleanup_interrupts(struct idxd_device *idxd)
+{
+ struct pci_dev *pdev = idxd->pdev;
+ struct idxd_irq_entry *ie;
+ int msixcnt;
+
+ msixcnt = pci_msix_vec_count(pdev);
+ if (msixcnt <= 0)
+ return;
+
+ ie = idxd_get_ie(idxd, 0);
+ idxd_mask_error_interrupts(idxd);
+ free_irq(ie->vector, ie);
+ pci_free_irq_vectors(pdev);
+}
+
+static int idxd_setup_wqs(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+ struct idxd_wq *wq;
+ struct device *conf_dev;
+ int i, rc;
+
+ idxd->wqs = kcalloc_node(idxd->max_wqs, sizeof(struct idxd_wq *),
+ GFP_KERNEL, dev_to_node(dev));
+ if (!idxd->wqs)
+ return -ENOMEM;
+
+ idxd->wq_enable_map = bitmap_zalloc_node(idxd->max_wqs, GFP_KERNEL, dev_to_node(dev));
+ if (!idxd->wq_enable_map) {
+ kfree(idxd->wqs);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ wq = kzalloc_node(sizeof(*wq), GFP_KERNEL, dev_to_node(dev));
+ if (!wq) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ idxd_dev_set_type(&wq->idxd_dev, IDXD_DEV_WQ);
+ conf_dev = wq_confdev(wq);
+ wq->id = i;
+ wq->idxd = idxd;
+ device_initialize(wq_confdev(wq));
+ conf_dev->parent = idxd_confdev(idxd);
+ conf_dev->bus = &dsa_bus_type;
+ conf_dev->type = &idxd_wq_device_type;
+ rc = dev_set_name(conf_dev, "wq%d.%d", idxd->id, wq->id);
+ if (rc < 0) {
+ put_device(conf_dev);
+ goto err;
+ }
+
+ mutex_init(&wq->wq_lock);
+ init_waitqueue_head(&wq->err_queue);
+ init_completion(&wq->wq_dead);
+ init_completion(&wq->wq_resurrect);
+ wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
+ idxd_wq_set_max_batch_size(idxd->data->type, wq, WQ_DEFAULT_MAX_BATCH);
+ wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
+ wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev));
+ if (!wq->wqcfg) {
+ put_device(conf_dev);
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ if (idxd->hw.wq_cap.op_config) {
+ wq->opcap_bmap = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL);
+ if (!wq->opcap_bmap) {
+ put_device(conf_dev);
+ rc = -ENOMEM;
+ goto err;
+ }
+ bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS);
+ }
+ idxd->wqs[i] = wq;
+ }
+
+ return 0;
+
+ err:
+ while (--i >= 0) {
+ wq = idxd->wqs[i];
+ conf_dev = wq_confdev(wq);
+ put_device(conf_dev);
+ }
+ return rc;
+}
+
+static int idxd_setup_engines(struct idxd_device *idxd)
+{
+ struct idxd_engine *engine;
+ struct device *dev = &idxd->pdev->dev;
+ struct device *conf_dev;
+ int i, rc;
+
+ idxd->engines = kcalloc_node(idxd->max_engines, sizeof(struct idxd_engine *),
+ GFP_KERNEL, dev_to_node(dev));
+ if (!idxd->engines)
+ return -ENOMEM;
+
+ for (i = 0; i < idxd->max_engines; i++) {
+ engine = kzalloc_node(sizeof(*engine), GFP_KERNEL, dev_to_node(dev));
+ if (!engine) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ idxd_dev_set_type(&engine->idxd_dev, IDXD_DEV_ENGINE);
+ conf_dev = engine_confdev(engine);
+ engine->id = i;
+ engine->idxd = idxd;
+ device_initialize(conf_dev);
+ conf_dev->parent = idxd_confdev(idxd);
+ conf_dev->bus = &dsa_bus_type;
+ conf_dev->type = &idxd_engine_device_type;
+ rc = dev_set_name(conf_dev, "engine%d.%d", idxd->id, engine->id);
+ if (rc < 0) {
+ put_device(conf_dev);
+ goto err;
+ }
+
+ idxd->engines[i] = engine;
+ }
+
+ return 0;
+
+ err:
+ while (--i >= 0) {
+ engine = idxd->engines[i];
+ conf_dev = engine_confdev(engine);
+ put_device(conf_dev);
+ }
+ return rc;
+}
+
+static int idxd_setup_groups(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+ struct device *conf_dev;
+ struct idxd_group *group;
+ int i, rc;
+
+ idxd->groups = kcalloc_node(idxd->max_groups, sizeof(struct idxd_group *),
+ GFP_KERNEL, dev_to_node(dev));
+ if (!idxd->groups)
+ return -ENOMEM;
+
+ for (i = 0; i < idxd->max_groups; i++) {
+ group = kzalloc_node(sizeof(*group), GFP_KERNEL, dev_to_node(dev));
+ if (!group) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ idxd_dev_set_type(&group->idxd_dev, IDXD_DEV_GROUP);
+ conf_dev = group_confdev(group);
+ group->id = i;
+ group->idxd = idxd;
+ device_initialize(conf_dev);
+ conf_dev->parent = idxd_confdev(idxd);
+ conf_dev->bus = &dsa_bus_type;
+ conf_dev->type = &idxd_group_device_type;
+ rc = dev_set_name(conf_dev, "group%d.%d", idxd->id, group->id);
+ if (rc < 0) {
+ put_device(conf_dev);
+ goto err;
+ }
+
+ idxd->groups[i] = group;
+ if (idxd->hw.version <= DEVICE_VERSION_2 && !tc_override) {
+ group->tc_a = 1;
+ group->tc_b = 1;
+ } else {
+ group->tc_a = -1;
+ group->tc_b = -1;
+ }
+ }
+
+ return 0;
+
+ err:
+ while (--i >= 0) {
+ group = idxd->groups[i];
+ put_device(group_confdev(group));
+ }
+ return rc;
+}
+
+static void idxd_cleanup_internals(struct idxd_device *idxd)
+{
+ int i;
+
+ for (i = 0; i < idxd->max_groups; i++)
+ put_device(group_confdev(idxd->groups[i]));
+ for (i = 0; i < idxd->max_engines; i++)
+ put_device(engine_confdev(idxd->engines[i]));
+ for (i = 0; i < idxd->max_wqs; i++)
+ put_device(wq_confdev(idxd->wqs[i]));
+ destroy_workqueue(idxd->wq);
+}
+
+static int idxd_setup_internals(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+ int rc, i;
+
+ init_waitqueue_head(&idxd->cmd_waitq);
+
+ rc = idxd_setup_wqs(idxd);
+ if (rc < 0)
+ goto err_wqs;
+
+ rc = idxd_setup_engines(idxd);
+ if (rc < 0)
+ goto err_engine;
+
+ rc = idxd_setup_groups(idxd);
+ if (rc < 0)
+ goto err_group;
+
+ idxd->wq = create_workqueue(dev_name(dev));
+ if (!idxd->wq) {
+ rc = -ENOMEM;
+ goto err_wkq_create;
+ }
+
+ return 0;
+
+ err_wkq_create:
+ for (i = 0; i < idxd->max_groups; i++)
+ put_device(group_confdev(idxd->groups[i]));
+ err_group:
+ for (i = 0; i < idxd->max_engines; i++)
+ put_device(engine_confdev(idxd->engines[i]));
+ err_engine:
+ for (i = 0; i < idxd->max_wqs; i++)
+ put_device(wq_confdev(idxd->wqs[i]));
+ err_wqs:
+ return rc;
+}
+
+static void idxd_read_table_offsets(struct idxd_device *idxd)
+{
+ union offsets_reg offsets;
+ struct device *dev = &idxd->pdev->dev;
+
+ offsets.bits[0] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET);
+ offsets.bits[1] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET + sizeof(u64));
+ idxd->grpcfg_offset = offsets.grpcfg * IDXD_TABLE_MULT;
+ dev_dbg(dev, "IDXD Group Config Offset: %#x\n", idxd->grpcfg_offset);
+ idxd->wqcfg_offset = offsets.wqcfg * IDXD_TABLE_MULT;
+ dev_dbg(dev, "IDXD Work Queue Config Offset: %#x\n", idxd->wqcfg_offset);
+ idxd->msix_perm_offset = offsets.msix_perm * IDXD_TABLE_MULT;
+ dev_dbg(dev, "IDXD MSIX Permission Offset: %#x\n", idxd->msix_perm_offset);
+ idxd->perfmon_offset = offsets.perfmon * IDXD_TABLE_MULT;
+ dev_dbg(dev, "IDXD Perfmon Offset: %#x\n", idxd->perfmon_offset);
+}
+
+static void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count)
+{
+ int i, j, nr;
+
+ for (i = 0, nr = 0; i < count; i++) {
+ for (j = 0; j < BITS_PER_LONG_LONG; j++) {
+ if (val[i] & BIT(j))
+ set_bit(nr, bmap);
+ nr++;
+ }
+ }
+}
+
+static void idxd_read_caps(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+ int i;
+
+ /* reading generic capabilities */
+ idxd->hw.gen_cap.bits = ioread64(idxd->reg_base + IDXD_GENCAP_OFFSET);
+ dev_dbg(dev, "gen_cap: %#llx\n", idxd->hw.gen_cap.bits);
+
+ if (idxd->hw.gen_cap.cmd_cap) {
+ idxd->hw.cmd_cap = ioread32(idxd->reg_base + IDXD_CMDCAP_OFFSET);
+ dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap);
+ }
+
+ /* reading command capabilities */
+ if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE))
+ idxd->request_int_handles = true;
+
+ idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift;
+ dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes);
+ idxd_set_max_batch_size(idxd->data->type, idxd, 1U << idxd->hw.gen_cap.max_batch_shift);
+ dev_dbg(dev, "max batch size: %u\n", idxd->max_batch_size);
+ if (idxd->hw.gen_cap.config_en)
+ set_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags);
+
+ /* reading group capabilities */
+ idxd->hw.group_cap.bits =
+ ioread64(idxd->reg_base + IDXD_GRPCAP_OFFSET);
+ dev_dbg(dev, "group_cap: %#llx\n", idxd->hw.group_cap.bits);
+ idxd->max_groups = idxd->hw.group_cap.num_groups;
+ dev_dbg(dev, "max groups: %u\n", idxd->max_groups);
+ idxd->max_rdbufs = idxd->hw.group_cap.total_rdbufs;
+ dev_dbg(dev, "max read buffers: %u\n", idxd->max_rdbufs);
+ idxd->nr_rdbufs = idxd->max_rdbufs;
+
+ /* read engine capabilities */
+ idxd->hw.engine_cap.bits =
+ ioread64(idxd->reg_base + IDXD_ENGCAP_OFFSET);
+ dev_dbg(dev, "engine_cap: %#llx\n", idxd->hw.engine_cap.bits);
+ idxd->max_engines = idxd->hw.engine_cap.num_engines;
+ dev_dbg(dev, "max engines: %u\n", idxd->max_engines);
+
+ /* read workqueue capabilities */
+ idxd->hw.wq_cap.bits = ioread64(idxd->reg_base + IDXD_WQCAP_OFFSET);
+ dev_dbg(dev, "wq_cap: %#llx\n", idxd->hw.wq_cap.bits);
+ idxd->max_wq_size = idxd->hw.wq_cap.total_wq_size;
+ dev_dbg(dev, "total workqueue size: %u\n", idxd->max_wq_size);
+ idxd->max_wqs = idxd->hw.wq_cap.num_wqs;
+ dev_dbg(dev, "max workqueues: %u\n", idxd->max_wqs);
+ idxd->wqcfg_size = 1 << (idxd->hw.wq_cap.wqcfg_size + IDXD_WQCFG_MIN);
+ dev_dbg(dev, "wqcfg size: %u\n", idxd->wqcfg_size);
+
+ /* reading operation capabilities */
+ for (i = 0; i < 4; i++) {
+ idxd->hw.opcap.bits[i] = ioread64(idxd->reg_base +
+ IDXD_OPCAP_OFFSET + i * sizeof(u64));
+ dev_dbg(dev, "opcap[%d]: %#llx\n", i, idxd->hw.opcap.bits[i]);
+ }
+ multi_u64_to_bmap(idxd->opcap_bmap, &idxd->hw.opcap.bits[0], 4);
+}
+
+static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data)
+{
+ struct device *dev = &pdev->dev;
+ struct device *conf_dev;
+ struct idxd_device *idxd;
+ int rc;
+
+ idxd = kzalloc_node(sizeof(*idxd), GFP_KERNEL, dev_to_node(dev));
+ if (!idxd)
+ return NULL;
+
+ conf_dev = idxd_confdev(idxd);
+ idxd->pdev = pdev;
+ idxd->data = data;
+ idxd_dev_set_type(&idxd->idxd_dev, idxd->data->type);
+ idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL);
+ if (idxd->id < 0)
+ return NULL;
+
+ idxd->opcap_bmap = bitmap_zalloc_node(IDXD_MAX_OPCAP_BITS, GFP_KERNEL, dev_to_node(dev));
+ if (!idxd->opcap_bmap) {
+ ida_free(&idxd_ida, idxd->id);
+ return NULL;
+ }
+
+ device_initialize(conf_dev);
+ conf_dev->parent = dev;
+ conf_dev->bus = &dsa_bus_type;
+ conf_dev->type = idxd->data->dev_type;
+ rc = dev_set_name(conf_dev, "%s%d", idxd->data->name_prefix, idxd->id);
+ if (rc < 0) {
+ put_device(conf_dev);
+ return NULL;
+ }
+
+ spin_lock_init(&idxd->dev_lock);
+ spin_lock_init(&idxd->cmd_lock);
+
+ return idxd;
+}
+
+static int idxd_enable_system_pasid(struct idxd_device *idxd)
+{
+ int flags;
+ unsigned int pasid;
+ struct iommu_sva *sva;
+
+ flags = SVM_FLAG_SUPERVISOR_MODE;
+
+ sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags);
+ if (IS_ERR(sva)) {
+ dev_warn(&idxd->pdev->dev,
+ "iommu sva bind failed: %ld\n", PTR_ERR(sva));
+ return PTR_ERR(sva);
+ }
+
+ pasid = iommu_sva_get_pasid(sva);
+ if (pasid == IOMMU_PASID_INVALID) {
+ iommu_sva_unbind_device(sva);
+ return -ENODEV;
+ }
+
+ idxd->sva = sva;
+ idxd->pasid = pasid;
+ dev_dbg(&idxd->pdev->dev, "system pasid: %u\n", pasid);
+ return 0;
+}
+
+static void idxd_disable_system_pasid(struct idxd_device *idxd)
+{
+
+ iommu_sva_unbind_device(idxd->sva);
+ idxd->sva = NULL;
+}
+
+static int idxd_probe(struct idxd_device *idxd)
+{
+ struct pci_dev *pdev = idxd->pdev;
+ struct device *dev = &pdev->dev;
+ int rc;
+
+ dev_dbg(dev, "%s entered and resetting device\n", __func__);
+ rc = idxd_device_init_reset(idxd);
+ if (rc < 0)
+ return rc;
+
+ dev_dbg(dev, "IDXD reset complete\n");
+
+ if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) {
+ if (iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA)) {
+ dev_warn(dev, "Unable to turn on user SVA feature.\n");
+ } else {
+ set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags);
+
+ if (idxd_enable_system_pasid(idxd))
+ dev_warn(dev, "No in-kernel DMA with PASID.\n");
+ else
+ set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
+ }
+ } else if (!sva) {
+ dev_warn(dev, "User forced SVA off via module param.\n");
+ }
+
+ idxd_read_caps(idxd);
+ idxd_read_table_offsets(idxd);
+
+ rc = idxd_setup_internals(idxd);
+ if (rc)
+ goto err;
+
+ /* If the configs are readonly, then load them from device */
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
+ dev_dbg(dev, "Loading RO device config\n");
+ rc = idxd_device_load_config(idxd);
+ if (rc < 0)
+ goto err_config;
+ }
+
+ rc = idxd_setup_interrupts(idxd);
+ if (rc)
+ goto err_config;
+
+ idxd->major = idxd_cdev_get_major(idxd);
+
+ rc = perfmon_pmu_init(idxd);
+ if (rc < 0)
+ dev_warn(dev, "Failed to initialize perfmon. No PMU support: %d\n", rc);
+
+ dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id);
+ return 0;
+
+ err_config:
+ idxd_cleanup_internals(idxd);
+ err:
+ if (device_pasid_enabled(idxd))
+ idxd_disable_system_pasid(idxd);
+ if (device_user_pasid_enabled(idxd))
+ iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
+ return rc;
+}
+
+static void idxd_cleanup(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+
+ perfmon_pmu_remove(idxd);
+ idxd_cleanup_interrupts(idxd);
+ idxd_cleanup_internals(idxd);
+ if (device_pasid_enabled(idxd))
+ idxd_disable_system_pasid(idxd);
+ if (device_user_pasid_enabled(idxd))
+ iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
+}
+
+static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct device *dev = &pdev->dev;
+ struct idxd_device *idxd;
+ struct idxd_driver_data *data = (struct idxd_driver_data *)id->driver_data;
+ int rc;
+
+ rc = pci_enable_device(pdev);
+ if (rc)
+ return rc;
+
+ dev_dbg(dev, "Alloc IDXD context\n");
+ idxd = idxd_alloc(pdev, data);
+ if (!idxd) {
+ rc = -ENOMEM;
+ goto err_idxd_alloc;
+ }
+
+ dev_dbg(dev, "Mapping BARs\n");
+ idxd->reg_base = pci_iomap(pdev, IDXD_MMIO_BAR, 0);
+ if (!idxd->reg_base) {
+ rc = -ENOMEM;
+ goto err_iomap;
+ }
+
+ dev_dbg(dev, "Set DMA masks\n");
+ rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (rc)
+ goto err;
+
+ dev_dbg(dev, "Set PCI master\n");
+ pci_set_master(pdev);
+ pci_set_drvdata(pdev, idxd);
+
+ idxd->hw.version = ioread32(idxd->reg_base + IDXD_VER_OFFSET);
+ rc = idxd_probe(idxd);
+ if (rc) {
+ dev_err(dev, "Intel(R) IDXD DMA Engine init failed\n");
+ goto err;
+ }
+
+ rc = idxd_register_devices(idxd);
+ if (rc) {
+ dev_err(dev, "IDXD sysfs setup failed\n");
+ goto err_dev_register;
+ }
+
+ dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n",
+ idxd->hw.version);
+
+ return 0;
+
+ err_dev_register:
+ idxd_cleanup(idxd);
+ err:
+ pci_iounmap(pdev, idxd->reg_base);
+ err_iomap:
+ put_device(idxd_confdev(idxd));
+ err_idxd_alloc:
+ pci_disable_device(pdev);
+ return rc;
+}
+
+void idxd_wqs_quiesce(struct idxd_device *idxd)
+{
+ struct idxd_wq *wq;
+ int i;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ wq = idxd->wqs[i];
+ if (wq->state == IDXD_WQ_ENABLED && wq->type == IDXD_WQT_KERNEL)
+ idxd_wq_quiesce(wq);
+ }
+}
+
+static void idxd_shutdown(struct pci_dev *pdev)
+{
+ struct idxd_device *idxd = pci_get_drvdata(pdev);
+ struct idxd_irq_entry *irq_entry;
+ int rc;
+
+ rc = idxd_device_disable(idxd);
+ if (rc)
+ dev_err(&pdev->dev, "Disabling device failed\n");
+
+ irq_entry = &idxd->ie;
+ synchronize_irq(irq_entry->vector);
+ idxd_mask_error_interrupts(idxd);
+ flush_workqueue(idxd->wq);
+}
+
+static void idxd_remove(struct pci_dev *pdev)
+{
+ struct idxd_device *idxd = pci_get_drvdata(pdev);
+ struct idxd_irq_entry *irq_entry;
+
+ idxd_unregister_devices(idxd);
+ /*
+ * When ->release() is called for the idxd->conf_dev, it frees all the memory related
+ * to the idxd context. The driver still needs those bits in order to do the rest of
+ * the cleanup. However, we do need to unbound the idxd sub-driver. So take a ref
+ * on the device here to hold off the freeing while allowing the idxd sub-driver
+ * to unbind.
+ */
+ get_device(idxd_confdev(idxd));
+ device_unregister(idxd_confdev(idxd));
+ idxd_shutdown(pdev);
+ if (device_pasid_enabled(idxd))
+ idxd_disable_system_pasid(idxd);
+
+ irq_entry = idxd_get_ie(idxd, 0);
+ free_irq(irq_entry->vector, irq_entry);
+ pci_free_irq_vectors(pdev);
+ pci_iounmap(pdev, idxd->reg_base);
+ if (device_user_pasid_enabled(idxd))
+ iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
+ pci_disable_device(pdev);
+ destroy_workqueue(idxd->wq);
+ perfmon_pmu_remove(idxd);
+ put_device(idxd_confdev(idxd));
+}
+
+static struct pci_driver idxd_pci_driver = {
+ .name = DRV_NAME,
+ .id_table = idxd_pci_tbl,
+ .probe = idxd_pci_probe,
+ .remove = idxd_remove,
+ .shutdown = idxd_shutdown,
+};
+
+static int __init idxd_init_module(void)
+{
+ int err;
+
+ /*
+ * If the CPU does not support MOVDIR64B or ENQCMDS, there's no point in
+ * enumerating the device. We can not utilize it.
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_MOVDIR64B)) {
+ pr_warn("idxd driver failed to load without MOVDIR64B.\n");
+ return -ENODEV;
+ }
+
+ if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
+ pr_warn("Platform does not have ENQCMD(S) support.\n");
+ else
+ support_enqcmd = true;
+
+ perfmon_init();
+
+ err = idxd_driver_register(&idxd_drv);
+ if (err < 0)
+ goto err_idxd_driver_register;
+
+ err = idxd_driver_register(&idxd_dmaengine_drv);
+ if (err < 0)
+ goto err_idxd_dmaengine_driver_register;
+
+ err = idxd_driver_register(&idxd_user_drv);
+ if (err < 0)
+ goto err_idxd_user_driver_register;
+
+ err = idxd_cdev_register();
+ if (err)
+ goto err_cdev_register;
+
+ err = pci_register_driver(&idxd_pci_driver);
+ if (err)
+ goto err_pci_register;
+
+ return 0;
+
+err_pci_register:
+ idxd_cdev_remove();
+err_cdev_register:
+ idxd_driver_unregister(&idxd_user_drv);
+err_idxd_user_driver_register:
+ idxd_driver_unregister(&idxd_dmaengine_drv);
+err_idxd_dmaengine_driver_register:
+ idxd_driver_unregister(&idxd_drv);
+err_idxd_driver_register:
+ return err;
+}
+module_init(idxd_init_module);
+
+static void __exit idxd_exit_module(void)
+{
+ idxd_driver_unregister(&idxd_user_drv);
+ idxd_driver_unregister(&idxd_dmaengine_drv);
+ idxd_driver_unregister(&idxd_drv);
+ pci_unregister_driver(&idxd_pci_driver);
+ idxd_cdev_remove();
+ perfmon_exit();
+}
+module_exit(idxd_exit_module);
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
new file mode 100644
index 000000000..aa314ebec
--- /dev/null
+++ b/drivers/dma/idxd/irq.c
@@ -0,0 +1,497 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <uapi/linux/idxd.h>
+#include "../dmaengine.h"
+#include "idxd.h"
+#include "registers.h"
+
+enum irq_work_type {
+ IRQ_WORK_NORMAL = 0,
+ IRQ_WORK_PROCESS_FAULT,
+};
+
+struct idxd_resubmit {
+ struct work_struct work;
+ struct idxd_desc *desc;
+};
+
+struct idxd_int_handle_revoke {
+ struct work_struct work;
+ struct idxd_device *idxd;
+};
+
+static void idxd_device_reinit(struct work_struct *work)
+{
+ struct idxd_device *idxd = container_of(work, struct idxd_device, work);
+ struct device *dev = &idxd->pdev->dev;
+ int rc, i;
+
+ idxd_device_reset(idxd);
+ rc = idxd_device_config(idxd);
+ if (rc < 0)
+ goto out;
+
+ rc = idxd_device_enable(idxd);
+ if (rc < 0)
+ goto out;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ if (test_bit(i, idxd->wq_enable_map)) {
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ rc = idxd_wq_enable(wq);
+ if (rc < 0) {
+ clear_bit(i, idxd->wq_enable_map);
+ dev_warn(dev, "Unable to re-enable wq %s\n",
+ dev_name(wq_confdev(wq)));
+ }
+ }
+ }
+
+ return;
+
+ out:
+ idxd_device_clear_state(idxd);
+}
+
+/*
+ * The function sends a drain descriptor for the interrupt handle. The drain ensures
+ * all descriptors with this interrupt handle is flushed and the interrupt
+ * will allow the cleanup of the outstanding descriptors.
+ */
+static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie)
+{
+ struct idxd_wq *wq = ie_to_wq(ie);
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ struct dsa_hw_desc desc = {};
+ void __iomem *portal;
+ int rc;
+
+ /* Issue a simple drain operation with interrupt but no completion record */
+ desc.flags = IDXD_OP_FLAG_RCI;
+ desc.opcode = DSA_OPCODE_DRAIN;
+ desc.priv = 1;
+
+ if (ie->pasid != INVALID_IOASID)
+ desc.pasid = ie->pasid;
+ desc.int_handle = ie->int_handle;
+ portal = idxd_wq_portal_addr(wq);
+
+ /*
+ * The wmb() makes sure that the descriptor is all there before we
+ * issue.
+ */
+ wmb();
+ if (wq_dedicated(wq)) {
+ iosubmit_cmds512(portal, &desc, 1);
+ } else {
+ rc = idxd_enqcmds(wq, portal, &desc);
+ /* This should not fail unless hardware failed. */
+ if (rc < 0)
+ dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id);
+ }
+}
+
+static void idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry *ie)
+{
+ LIST_HEAD(flist);
+ struct idxd_desc *d, *t;
+ struct llist_node *head;
+
+ spin_lock(&ie->list_lock);
+ head = llist_del_all(&ie->pending_llist);
+ if (head) {
+ llist_for_each_entry_safe(d, t, head, llnode)
+ list_add_tail(&d->list, &ie->work_list);
+ }
+
+ list_for_each_entry_safe(d, t, &ie->work_list, list) {
+ if (d->completion->status == DSA_COMP_INT_HANDLE_INVAL)
+ list_move_tail(&d->list, &flist);
+ }
+ spin_unlock(&ie->list_lock);
+
+ list_for_each_entry_safe(d, t, &flist, list) {
+ list_del(&d->list);
+ idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true);
+ }
+}
+
+static void idxd_int_handle_revoke(struct work_struct *work)
+{
+ struct idxd_int_handle_revoke *revoke =
+ container_of(work, struct idxd_int_handle_revoke, work);
+ struct idxd_device *idxd = revoke->idxd;
+ struct pci_dev *pdev = idxd->pdev;
+ struct device *dev = &pdev->dev;
+ int i, new_handle, rc;
+
+ if (!idxd->request_int_handles) {
+ kfree(revoke);
+ dev_warn(dev, "Unexpected int handle refresh interrupt.\n");
+ return;
+ }
+
+ /*
+ * The loop attempts to acquire new interrupt handle for all interrupt
+ * vectors that supports a handle. If a new interrupt handle is acquired and the
+ * wq is kernel type, the driver will kill the percpu_ref to pause all
+ * ongoing descriptor submissions. The interrupt handle is then changed.
+ * After change, the percpu_ref is revived and all the pending submissions
+ * are woken to try again. A drain is sent to for the interrupt handle
+ * at the end to make sure all invalid int handle descriptors are processed.
+ */
+ for (i = 1; i < idxd->irq_cnt; i++) {
+ struct idxd_irq_entry *ie = idxd_get_ie(idxd, i);
+ struct idxd_wq *wq = ie_to_wq(ie);
+
+ if (ie->int_handle == INVALID_INT_HANDLE)
+ continue;
+
+ rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX);
+ if (rc < 0) {
+ dev_warn(dev, "get int handle %d failed: %d\n", i, rc);
+ /*
+ * Failed to acquire new interrupt handle. Kill the WQ
+ * and release all the pending submitters. The submitters will
+ * get error return code and handle appropriately.
+ */
+ ie->int_handle = INVALID_INT_HANDLE;
+ idxd_wq_quiesce(wq);
+ idxd_abort_invalid_int_handle_descs(ie);
+ continue;
+ }
+
+ /* No change in interrupt handle, nothing needs to be done */
+ if (ie->int_handle == new_handle)
+ continue;
+
+ if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) {
+ /*
+ * All the MSIX interrupts are allocated at once during probe.
+ * Therefore we need to update all interrupts even if the WQ
+ * isn't supporting interrupt operations.
+ */
+ ie->int_handle = new_handle;
+ continue;
+ }
+
+ mutex_lock(&wq->wq_lock);
+ reinit_completion(&wq->wq_resurrect);
+
+ /* Kill percpu_ref to pause additional descriptor submissions */
+ percpu_ref_kill(&wq->wq_active);
+
+ /* Wait for all submitters quiesce before we change interrupt handle */
+ wait_for_completion(&wq->wq_dead);
+
+ ie->int_handle = new_handle;
+
+ /* Revive percpu ref and wake up all the waiting submitters */
+ percpu_ref_reinit(&wq->wq_active);
+ complete_all(&wq->wq_resurrect);
+ mutex_unlock(&wq->wq_lock);
+
+ /*
+ * The delay here is to wait for all possible MOVDIR64B that
+ * are issued before percpu_ref_kill() has happened to have
+ * reached the PCIe domain before the drain is issued. The driver
+ * needs to ensure that the drain descriptor issued does not pass
+ * all the other issued descriptors that contain the invalid
+ * interrupt handle in order to ensure that the drain descriptor
+ * interrupt will allow the cleanup of all the descriptors with
+ * invalid interrupt handle.
+ */
+ if (wq_dedicated(wq))
+ udelay(100);
+ idxd_int_handle_revoke_drain(ie);
+ }
+ kfree(revoke);
+}
+
+static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
+{
+ struct device *dev = &idxd->pdev->dev;
+ union gensts_reg gensts;
+ u32 val = 0;
+ int i;
+ bool err = false;
+
+ if (cause & IDXD_INTC_HALT_STATE)
+ goto halt;
+
+ if (cause & IDXD_INTC_ERR) {
+ spin_lock(&idxd->dev_lock);
+ for (i = 0; i < 4; i++)
+ idxd->sw_err.bits[i] = ioread64(idxd->reg_base +
+ IDXD_SWERR_OFFSET + i * sizeof(u64));
+
+ iowrite64(idxd->sw_err.bits[0] & IDXD_SWERR_ACK,
+ idxd->reg_base + IDXD_SWERR_OFFSET);
+
+ if (idxd->sw_err.valid && idxd->sw_err.wq_idx_valid) {
+ int id = idxd->sw_err.wq_idx;
+ struct idxd_wq *wq = idxd->wqs[id];
+
+ if (wq->type == IDXD_WQT_USER)
+ wake_up_interruptible(&wq->err_queue);
+ } else {
+ int i;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ if (wq->type == IDXD_WQT_USER)
+ wake_up_interruptible(&wq->err_queue);
+ }
+ }
+
+ spin_unlock(&idxd->dev_lock);
+ val |= IDXD_INTC_ERR;
+
+ for (i = 0; i < 4; i++)
+ dev_warn(dev, "err[%d]: %#16.16llx\n",
+ i, idxd->sw_err.bits[i]);
+ err = true;
+ }
+
+ if (cause & IDXD_INTC_INT_HANDLE_REVOKED) {
+ struct idxd_int_handle_revoke *revoke;
+
+ val |= IDXD_INTC_INT_HANDLE_REVOKED;
+
+ revoke = kzalloc(sizeof(*revoke), GFP_ATOMIC);
+ if (revoke) {
+ revoke->idxd = idxd;
+ INIT_WORK(&revoke->work, idxd_int_handle_revoke);
+ queue_work(idxd->wq, &revoke->work);
+
+ } else {
+ dev_err(dev, "Failed to allocate work for int handle revoke\n");
+ idxd_wqs_quiesce(idxd);
+ }
+ }
+
+ if (cause & IDXD_INTC_CMD) {
+ val |= IDXD_INTC_CMD;
+ complete(idxd->cmd_done);
+ }
+
+ if (cause & IDXD_INTC_OCCUPY) {
+ /* Driver does not utilize occupancy interrupt */
+ val |= IDXD_INTC_OCCUPY;
+ }
+
+ if (cause & IDXD_INTC_PERFMON_OVFL) {
+ val |= IDXD_INTC_PERFMON_OVFL;
+ perfmon_counter_overflow(idxd);
+ }
+
+ val ^= cause;
+ if (val)
+ dev_warn_once(dev, "Unexpected interrupt cause bits set: %#x\n",
+ val);
+
+ if (!err)
+ return 0;
+
+halt:
+ gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET);
+ if (gensts.state == IDXD_DEVICE_STATE_HALT) {
+ idxd->state = IDXD_DEV_HALTED;
+ if (gensts.reset_type == IDXD_DEVICE_RESET_SOFTWARE) {
+ /*
+ * If we need a software reset, we will throw the work
+ * on a system workqueue in order to allow interrupts
+ * for the device command completions.
+ */
+ INIT_WORK(&idxd->work, idxd_device_reinit);
+ queue_work(idxd->wq, &idxd->work);
+ } else {
+ idxd->state = IDXD_DEV_HALTED;
+ idxd_wqs_quiesce(idxd);
+ idxd_wqs_unmap_portal(idxd);
+ idxd_device_clear_state(idxd);
+ dev_err(&idxd->pdev->dev,
+ "idxd halted, need %s.\n",
+ gensts.reset_type == IDXD_DEVICE_RESET_FLR ?
+ "FLR" : "system reset");
+ return -ENXIO;
+ }
+ }
+
+ return 0;
+}
+
+irqreturn_t idxd_misc_thread(int vec, void *data)
+{
+ struct idxd_irq_entry *irq_entry = data;
+ struct idxd_device *idxd = ie_to_idxd(irq_entry);
+ int rc;
+ u32 cause;
+
+ cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET);
+ if (cause)
+ iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET);
+
+ while (cause) {
+ rc = process_misc_interrupts(idxd, cause);
+ if (rc < 0)
+ break;
+ cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET);
+ if (cause)
+ iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void idxd_int_handle_resubmit_work(struct work_struct *work)
+{
+ struct idxd_resubmit *irw = container_of(work, struct idxd_resubmit, work);
+ struct idxd_desc *desc = irw->desc;
+ struct idxd_wq *wq = desc->wq;
+ int rc;
+
+ desc->completion->status = 0;
+ rc = idxd_submit_desc(wq, desc);
+ if (rc < 0) {
+ dev_dbg(&wq->idxd->pdev->dev, "Failed to resubmit desc %d to wq %d.\n",
+ desc->id, wq->id);
+ /*
+ * If the error is not -EAGAIN, it means the submission failed due to wq
+ * has been killed instead of ENQCMDS failure. Here the driver needs to
+ * notify the submitter of the failure by reporting abort status.
+ *
+ * -EAGAIN comes from ENQCMDS failure. idxd_submit_desc() will handle the
+ * abort.
+ */
+ if (rc != -EAGAIN) {
+ desc->completion->status = IDXD_COMP_DESC_ABORT;
+ idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, false);
+ }
+ idxd_free_desc(wq, desc);
+ }
+ kfree(irw);
+}
+
+bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc)
+{
+ struct idxd_wq *wq = desc->wq;
+ struct idxd_device *idxd = wq->idxd;
+ struct idxd_resubmit *irw;
+
+ irw = kzalloc(sizeof(*irw), GFP_KERNEL);
+ if (!irw)
+ return false;
+
+ irw->desc = desc;
+ INIT_WORK(&irw->work, idxd_int_handle_resubmit_work);
+ queue_work(idxd->wq, &irw->work);
+ return true;
+}
+
+static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry)
+{
+ struct idxd_desc *desc, *t;
+ struct llist_node *head;
+
+ head = llist_del_all(&irq_entry->pending_llist);
+ if (!head)
+ return;
+
+ llist_for_each_entry_safe(desc, t, head, llnode) {
+ u8 status = desc->completion->status & DSA_COMP_STATUS_MASK;
+
+ if (status) {
+ /*
+ * Check against the original status as ABORT is software defined
+ * and 0xff, which DSA_COMP_STATUS_MASK can mask out.
+ */
+ if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
+ idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true);
+ continue;
+ }
+
+ idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true);
+ } else {
+ spin_lock(&irq_entry->list_lock);
+ list_add_tail(&desc->list,
+ &irq_entry->work_list);
+ spin_unlock(&irq_entry->list_lock);
+ }
+ }
+}
+
+static void irq_process_work_list(struct idxd_irq_entry *irq_entry)
+{
+ LIST_HEAD(flist);
+ struct idxd_desc *desc, *n;
+
+ /*
+ * This lock protects list corruption from access of list outside of the irq handler
+ * thread.
+ */
+ spin_lock(&irq_entry->list_lock);
+ if (list_empty(&irq_entry->work_list)) {
+ spin_unlock(&irq_entry->list_lock);
+ return;
+ }
+
+ list_for_each_entry_safe(desc, n, &irq_entry->work_list, list) {
+ if (desc->completion->status) {
+ list_move_tail(&desc->list, &flist);
+ }
+ }
+
+ spin_unlock(&irq_entry->list_lock);
+
+ list_for_each_entry(desc, &flist, list) {
+ /*
+ * Check against the original status as ABORT is software defined
+ * and 0xff, which DSA_COMP_STATUS_MASK can mask out.
+ */
+ if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
+ idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true);
+ continue;
+ }
+
+ idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true);
+ }
+}
+
+irqreturn_t idxd_wq_thread(int irq, void *data)
+{
+ struct idxd_irq_entry *irq_entry = data;
+
+ /*
+ * There are two lists we are processing. The pending_llist is where
+ * submmiter adds all the submitted descriptor after sending it to
+ * the workqueue. It's a lockless singly linked list. The work_list
+ * is the common linux double linked list. We are in a scenario of
+ * multiple producers and a single consumer. The producers are all
+ * the kernel submitters of descriptors, and the consumer is the
+ * kernel irq handler thread for the msix vector when using threaded
+ * irq. To work with the restrictions of llist to remain lockless,
+ * we are doing the following steps:
+ * 1. Iterate through the work_list and process any completed
+ * descriptor. Delete the completed entries during iteration.
+ * 2. llist_del_all() from the pending list.
+ * 3. Iterate through the llist that was deleted from the pending list
+ * and process the completed entries.
+ * 4. If the entry is still waiting on hardware, list_add_tail() to
+ * the work_list.
+ */
+ irq_process_work_list(irq_entry);
+ irq_process_pending_llist(irq_entry);
+
+ return IRQ_HANDLED;
+}
diff --git a/drivers/dma/idxd/perfmon.c b/drivers/dma/idxd/perfmon.c
new file mode 100644
index 000000000..d73004f47
--- /dev/null
+++ b/drivers/dma/idxd/perfmon.c
@@ -0,0 +1,662 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 Intel Corporation. All rights rsvd. */
+
+#include <linux/sched/task.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include "idxd.h"
+#include "perfmon.h"
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+
+static cpumask_t perfmon_dsa_cpu_mask;
+static bool cpuhp_set_up;
+static enum cpuhp_state cpuhp_slot;
+
+/*
+ * perf userspace reads this attribute to determine which cpus to open
+ * counters on. It's connected to perfmon_dsa_cpu_mask, which is
+ * maintained by the cpu hotplug handlers.
+ */
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *perfmon_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group cpumask_attr_group = {
+ .attrs = perfmon_cpumask_attrs,
+};
+
+/*
+ * These attributes specify the bits in the config word that the perf
+ * syscall uses to pass the event ids and categories to perfmon.
+ */
+DEFINE_PERFMON_FORMAT_ATTR(event_category, "config:0-3");
+DEFINE_PERFMON_FORMAT_ATTR(event, "config:4-31");
+
+/*
+ * These attributes specify the bits in the config1 word that the perf
+ * syscall uses to pass filter data to perfmon.
+ */
+DEFINE_PERFMON_FORMAT_ATTR(filter_wq, "config1:0-31");
+DEFINE_PERFMON_FORMAT_ATTR(filter_tc, "config1:32-39");
+DEFINE_PERFMON_FORMAT_ATTR(filter_pgsz, "config1:40-43");
+DEFINE_PERFMON_FORMAT_ATTR(filter_sz, "config1:44-51");
+DEFINE_PERFMON_FORMAT_ATTR(filter_eng, "config1:52-59");
+
+#define PERFMON_FILTERS_START 2
+#define PERFMON_FILTERS_MAX 5
+
+static struct attribute *perfmon_format_attrs[] = {
+ &format_attr_idxd_event_category.attr,
+ &format_attr_idxd_event.attr,
+ &format_attr_idxd_filter_wq.attr,
+ &format_attr_idxd_filter_tc.attr,
+ &format_attr_idxd_filter_pgsz.attr,
+ &format_attr_idxd_filter_sz.attr,
+ &format_attr_idxd_filter_eng.attr,
+ NULL,
+};
+
+static struct attribute_group perfmon_format_attr_group = {
+ .name = "format",
+ .attrs = perfmon_format_attrs,
+};
+
+static const struct attribute_group *perfmon_attr_groups[] = {
+ &perfmon_format_attr_group,
+ &cpumask_attr_group,
+ NULL,
+};
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &perfmon_dsa_cpu_mask);
+}
+
+static bool is_idxd_event(struct idxd_pmu *idxd_pmu, struct perf_event *event)
+{
+ return &idxd_pmu->pmu == event->pmu;
+}
+
+static int perfmon_collect_events(struct idxd_pmu *idxd_pmu,
+ struct perf_event *leader,
+ bool do_grp)
+{
+ struct perf_event *event;
+ int n, max_count;
+
+ max_count = idxd_pmu->n_counters;
+ n = idxd_pmu->n_events;
+
+ if (n >= max_count)
+ return -EINVAL;
+
+ if (is_idxd_event(idxd_pmu, leader)) {
+ idxd_pmu->event_list[n] = leader;
+ idxd_pmu->event_list[n]->hw.idx = n;
+ n++;
+ }
+
+ if (!do_grp)
+ return n;
+
+ for_each_sibling_event(event, leader) {
+ if (!is_idxd_event(idxd_pmu, event) ||
+ event->state <= PERF_EVENT_STATE_OFF)
+ continue;
+
+ if (n >= max_count)
+ return -EINVAL;
+
+ idxd_pmu->event_list[n] = event;
+ idxd_pmu->event_list[n]->hw.idx = n;
+ n++;
+ }
+
+ return n;
+}
+
+static void perfmon_assign_hw_event(struct idxd_pmu *idxd_pmu,
+ struct perf_event *event, int idx)
+{
+ struct idxd_device *idxd = idxd_pmu->idxd;
+ struct hw_perf_event *hwc = &event->hw;
+
+ hwc->idx = idx;
+ hwc->config_base = ioread64(CNTRCFG_REG(idxd, idx));
+ hwc->event_base = ioread64(CNTRCFG_REG(idxd, idx));
+}
+
+static int perfmon_assign_event(struct idxd_pmu *idxd_pmu,
+ struct perf_event *event)
+{
+ int i;
+
+ for (i = 0; i < IDXD_PMU_EVENT_MAX; i++)
+ if (!test_and_set_bit(i, idxd_pmu->used_mask))
+ return i;
+
+ return -EINVAL;
+}
+
+/*
+ * Check whether there are enough counters to satisfy that all the
+ * events in the group can actually be scheduled at the same time.
+ *
+ * To do this, create a fake idxd_pmu object so the event collection
+ * and assignment functions can be used without affecting the internal
+ * state of the real idxd_pmu object.
+ */
+static int perfmon_validate_group(struct idxd_pmu *pmu,
+ struct perf_event *event)
+{
+ struct perf_event *leader = event->group_leader;
+ struct idxd_pmu *fake_pmu;
+ int i, ret = 0, n, idx;
+
+ fake_pmu = kzalloc(sizeof(*fake_pmu), GFP_KERNEL);
+ if (!fake_pmu)
+ return -ENOMEM;
+
+ fake_pmu->pmu.name = pmu->pmu.name;
+ fake_pmu->n_counters = pmu->n_counters;
+
+ n = perfmon_collect_events(fake_pmu, leader, true);
+ if (n < 0) {
+ ret = n;
+ goto out;
+ }
+
+ fake_pmu->n_events = n;
+ n = perfmon_collect_events(fake_pmu, event, false);
+ if (n < 0) {
+ ret = n;
+ goto out;
+ }
+
+ fake_pmu->n_events = n;
+
+ for (i = 0; i < n; i++) {
+ event = fake_pmu->event_list[i];
+
+ idx = perfmon_assign_event(fake_pmu, event);
+ if (idx < 0) {
+ ret = idx;
+ goto out;
+ }
+ }
+out:
+ kfree(fake_pmu);
+
+ return ret;
+}
+
+static int perfmon_pmu_event_init(struct perf_event *event)
+{
+ struct idxd_device *idxd;
+ int ret = 0;
+
+ idxd = event_to_idxd(event);
+ event->hw.idx = -1;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* sampling not supported */
+ if (event->attr.sample_period)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ if (event->pmu != &idxd->idxd_pmu->pmu)
+ return -EINVAL;
+
+ event->hw.event_base = ioread64(PERFMON_TABLE_OFFSET(idxd));
+ event->cpu = idxd->idxd_pmu->cpu;
+ event->hw.config = event->attr.config;
+
+ if (event->group_leader != event)
+ /* non-group events have themselves as leader */
+ ret = perfmon_validate_group(idxd->idxd_pmu, event);
+
+ return ret;
+}
+
+static inline u64 perfmon_pmu_read_counter(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct idxd_device *idxd;
+ int cntr = hwc->idx;
+
+ idxd = event_to_idxd(event);
+
+ return ioread64(CNTRDATA_REG(idxd, cntr));
+}
+
+static void perfmon_pmu_event_update(struct perf_event *event)
+{
+ struct idxd_device *idxd = event_to_idxd(event);
+ u64 prev_raw_count, new_raw_count, delta, p, n;
+ int shift = 64 - idxd->idxd_pmu->counter_width;
+ struct hw_perf_event *hwc = &event->hw;
+
+ do {
+ prev_raw_count = local64_read(&hwc->prev_count);
+ new_raw_count = perfmon_pmu_read_counter(event);
+ } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count);
+
+ n = (new_raw_count << shift);
+ p = (prev_raw_count << shift);
+
+ delta = ((n - p) >> shift);
+
+ local64_add(delta, &event->count);
+}
+
+void perfmon_counter_overflow(struct idxd_device *idxd)
+{
+ int i, n_counters, max_loop = OVERFLOW_SIZE;
+ struct perf_event *event;
+ unsigned long ovfstatus;
+
+ n_counters = min(idxd->idxd_pmu->n_counters, OVERFLOW_SIZE);
+
+ ovfstatus = ioread32(OVFSTATUS_REG(idxd));
+
+ /*
+ * While updating overflowed counters, other counters behind
+ * them could overflow and be missed in a given pass.
+ * Normally this could happen at most n_counters times, but in
+ * theory a tiny counter width could result in continual
+ * overflows and endless looping. max_loop provides a
+ * failsafe in that highly unlikely case.
+ */
+ while (ovfstatus && max_loop--) {
+ /* Figure out which counter(s) overflowed */
+ for_each_set_bit(i, &ovfstatus, n_counters) {
+ unsigned long ovfstatus_clear = 0;
+
+ /* Update event->count for overflowed counter */
+ event = idxd->idxd_pmu->event_list[i];
+ perfmon_pmu_event_update(event);
+ /* Writing 1 to OVFSTATUS bit clears it */
+ set_bit(i, &ovfstatus_clear);
+ iowrite32(ovfstatus_clear, OVFSTATUS_REG(idxd));
+ }
+
+ ovfstatus = ioread32(OVFSTATUS_REG(idxd));
+ }
+
+ /*
+ * Should never happen. If so, it means a counter(s) looped
+ * around twice while this handler was running.
+ */
+ WARN_ON_ONCE(ovfstatus);
+}
+
+static inline void perfmon_reset_config(struct idxd_device *idxd)
+{
+ iowrite32(CONFIG_RESET, PERFRST_REG(idxd));
+ iowrite32(0, OVFSTATUS_REG(idxd));
+ iowrite32(0, PERFFRZ_REG(idxd));
+}
+
+static inline void perfmon_reset_counters(struct idxd_device *idxd)
+{
+ iowrite32(CNTR_RESET, PERFRST_REG(idxd));
+}
+
+static inline void perfmon_reset(struct idxd_device *idxd)
+{
+ perfmon_reset_config(idxd);
+ perfmon_reset_counters(idxd);
+}
+
+static void perfmon_pmu_event_start(struct perf_event *event, int mode)
+{
+ u32 flt_wq, flt_tc, flt_pg_sz, flt_xfer_sz, flt_eng = 0;
+ u64 cntr_cfg, cntrdata, event_enc, event_cat = 0;
+ struct hw_perf_event *hwc = &event->hw;
+ union filter_cfg flt_cfg;
+ union event_cfg event_cfg;
+ struct idxd_device *idxd;
+ int cntr;
+
+ idxd = event_to_idxd(event);
+
+ event->hw.idx = hwc->idx;
+ cntr = hwc->idx;
+
+ /* Obtain event category and event value from user space */
+ event_cfg.val = event->attr.config;
+ flt_cfg.val = event->attr.config1;
+ event_cat = event_cfg.event_cat;
+ event_enc = event_cfg.event_enc;
+
+ /* Obtain filter configuration from user space */
+ flt_wq = flt_cfg.wq;
+ flt_tc = flt_cfg.tc;
+ flt_pg_sz = flt_cfg.pg_sz;
+ flt_xfer_sz = flt_cfg.xfer_sz;
+ flt_eng = flt_cfg.eng;
+
+ if (flt_wq && test_bit(FLT_WQ, &idxd->idxd_pmu->supported_filters))
+ iowrite32(flt_wq, FLTCFG_REG(idxd, cntr, FLT_WQ));
+ if (flt_tc && test_bit(FLT_TC, &idxd->idxd_pmu->supported_filters))
+ iowrite32(flt_tc, FLTCFG_REG(idxd, cntr, FLT_TC));
+ if (flt_pg_sz && test_bit(FLT_PG_SZ, &idxd->idxd_pmu->supported_filters))
+ iowrite32(flt_pg_sz, FLTCFG_REG(idxd, cntr, FLT_PG_SZ));
+ if (flt_xfer_sz && test_bit(FLT_XFER_SZ, &idxd->idxd_pmu->supported_filters))
+ iowrite32(flt_xfer_sz, FLTCFG_REG(idxd, cntr, FLT_XFER_SZ));
+ if (flt_eng && test_bit(FLT_ENG, &idxd->idxd_pmu->supported_filters))
+ iowrite32(flt_eng, FLTCFG_REG(idxd, cntr, FLT_ENG));
+
+ /* Read the start value */
+ cntrdata = ioread64(CNTRDATA_REG(idxd, cntr));
+ local64_set(&event->hw.prev_count, cntrdata);
+
+ /* Set counter to event/category */
+ cntr_cfg = event_cat << CNTRCFG_CATEGORY_SHIFT;
+ cntr_cfg |= event_enc << CNTRCFG_EVENT_SHIFT;
+ /* Set interrupt on overflow and counter enable bits */
+ cntr_cfg |= (CNTRCFG_IRQ_OVERFLOW | CNTRCFG_ENABLE);
+
+ iowrite64(cntr_cfg, CNTRCFG_REG(idxd, cntr));
+}
+
+static void perfmon_pmu_event_stop(struct perf_event *event, int mode)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct idxd_device *idxd;
+ int i, cntr = hwc->idx;
+ u64 cntr_cfg;
+
+ idxd = event_to_idxd(event);
+
+ /* remove this event from event list */
+ for (i = 0; i < idxd->idxd_pmu->n_events; i++) {
+ if (event != idxd->idxd_pmu->event_list[i])
+ continue;
+
+ for (++i; i < idxd->idxd_pmu->n_events; i++)
+ idxd->idxd_pmu->event_list[i - 1] = idxd->idxd_pmu->event_list[i];
+ --idxd->idxd_pmu->n_events;
+ break;
+ }
+
+ cntr_cfg = ioread64(CNTRCFG_REG(idxd, cntr));
+ cntr_cfg &= ~CNTRCFG_ENABLE;
+ iowrite64(cntr_cfg, CNTRCFG_REG(idxd, cntr));
+
+ if (mode == PERF_EF_UPDATE)
+ perfmon_pmu_event_update(event);
+
+ event->hw.idx = -1;
+ clear_bit(cntr, idxd->idxd_pmu->used_mask);
+}
+
+static void perfmon_pmu_event_del(struct perf_event *event, int mode)
+{
+ perfmon_pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int perfmon_pmu_event_add(struct perf_event *event, int flags)
+{
+ struct idxd_device *idxd = event_to_idxd(event);
+ struct idxd_pmu *idxd_pmu = idxd->idxd_pmu;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx, n;
+
+ n = perfmon_collect_events(idxd_pmu, event, false);
+ if (n < 0)
+ return n;
+
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+ if (!(flags & PERF_EF_START))
+ hwc->state |= PERF_HES_ARCH;
+
+ idx = perfmon_assign_event(idxd_pmu, event);
+ if (idx < 0)
+ return idx;
+
+ perfmon_assign_hw_event(idxd_pmu, event, idx);
+
+ if (flags & PERF_EF_START)
+ perfmon_pmu_event_start(event, 0);
+
+ idxd_pmu->n_events = n;
+
+ return 0;
+}
+
+static void enable_perfmon_pmu(struct idxd_device *idxd)
+{
+ iowrite32(COUNTER_UNFREEZE, PERFFRZ_REG(idxd));
+}
+
+static void disable_perfmon_pmu(struct idxd_device *idxd)
+{
+ iowrite32(COUNTER_FREEZE, PERFFRZ_REG(idxd));
+}
+
+static void perfmon_pmu_enable(struct pmu *pmu)
+{
+ struct idxd_device *idxd = pmu_to_idxd(pmu);
+
+ enable_perfmon_pmu(idxd);
+}
+
+static void perfmon_pmu_disable(struct pmu *pmu)
+{
+ struct idxd_device *idxd = pmu_to_idxd(pmu);
+
+ disable_perfmon_pmu(idxd);
+}
+
+static void skip_filter(int i)
+{
+ int j;
+
+ for (j = i; j < PERFMON_FILTERS_MAX; j++)
+ perfmon_format_attrs[PERFMON_FILTERS_START + j] =
+ perfmon_format_attrs[PERFMON_FILTERS_START + j + 1];
+}
+
+static void idxd_pmu_init(struct idxd_pmu *idxd_pmu)
+{
+ int i;
+
+ for (i = 0 ; i < PERFMON_FILTERS_MAX; i++) {
+ if (!test_bit(i, &idxd_pmu->supported_filters))
+ skip_filter(i);
+ }
+
+ idxd_pmu->pmu.name = idxd_pmu->name;
+ idxd_pmu->pmu.attr_groups = perfmon_attr_groups;
+ idxd_pmu->pmu.task_ctx_nr = perf_invalid_context;
+ idxd_pmu->pmu.event_init = perfmon_pmu_event_init;
+ idxd_pmu->pmu.pmu_enable = perfmon_pmu_enable,
+ idxd_pmu->pmu.pmu_disable = perfmon_pmu_disable,
+ idxd_pmu->pmu.add = perfmon_pmu_event_add;
+ idxd_pmu->pmu.del = perfmon_pmu_event_del;
+ idxd_pmu->pmu.start = perfmon_pmu_event_start;
+ idxd_pmu->pmu.stop = perfmon_pmu_event_stop;
+ idxd_pmu->pmu.read = perfmon_pmu_event_update;
+ idxd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
+ idxd_pmu->pmu.module = THIS_MODULE;
+}
+
+void perfmon_pmu_remove(struct idxd_device *idxd)
+{
+ if (!idxd->idxd_pmu)
+ return;
+
+ cpuhp_state_remove_instance(cpuhp_slot, &idxd->idxd_pmu->cpuhp_node);
+ perf_pmu_unregister(&idxd->idxd_pmu->pmu);
+ kfree(idxd->idxd_pmu);
+ idxd->idxd_pmu = NULL;
+}
+
+static int perf_event_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+ struct idxd_pmu *idxd_pmu;
+
+ idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node);
+
+ /* select the first online CPU as the designated reader */
+ if (cpumask_empty(&perfmon_dsa_cpu_mask)) {
+ cpumask_set_cpu(cpu, &perfmon_dsa_cpu_mask);
+ idxd_pmu->cpu = cpu;
+ }
+
+ return 0;
+}
+
+static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node)
+{
+ struct idxd_pmu *idxd_pmu;
+ unsigned int target;
+
+ idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node);
+
+ if (!cpumask_test_and_clear_cpu(cpu, &perfmon_dsa_cpu_mask))
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
+ /* migrate events if there is a valid target */
+ if (target < nr_cpu_ids)
+ cpumask_set_cpu(target, &perfmon_dsa_cpu_mask);
+ else
+ target = -1;
+
+ perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target);
+
+ return 0;
+}
+
+int perfmon_pmu_init(struct idxd_device *idxd)
+{
+ union idxd_perfcap perfcap;
+ struct idxd_pmu *idxd_pmu;
+ int rc = -ENODEV;
+
+ /*
+ * perfmon module initialization failed, nothing to do
+ */
+ if (!cpuhp_set_up)
+ return -ENODEV;
+
+ /*
+ * If perfmon_offset or num_counters is 0, it means perfmon is
+ * not supported on this hardware.
+ */
+ if (idxd->perfmon_offset == 0)
+ return -ENODEV;
+
+ idxd_pmu = kzalloc(sizeof(*idxd_pmu), GFP_KERNEL);
+ if (!idxd_pmu)
+ return -ENOMEM;
+
+ idxd_pmu->idxd = idxd;
+ idxd->idxd_pmu = idxd_pmu;
+
+ if (idxd->data->type == IDXD_TYPE_DSA) {
+ rc = sprintf(idxd_pmu->name, "dsa%d", idxd->id);
+ if (rc < 0)
+ goto free;
+ } else if (idxd->data->type == IDXD_TYPE_IAX) {
+ rc = sprintf(idxd_pmu->name, "iax%d", idxd->id);
+ if (rc < 0)
+ goto free;
+ } else {
+ goto free;
+ }
+
+ perfmon_reset(idxd);
+
+ perfcap.bits = ioread64(PERFCAP_REG(idxd));
+
+ /*
+ * If total perf counter is 0, stop further registration.
+ * This is necessary in order to support driver running on
+ * guest which does not have pmon support.
+ */
+ if (perfcap.num_perf_counter == 0)
+ goto free;
+
+ /* A counter width of 0 means it can't count */
+ if (perfcap.counter_width == 0)
+ goto free;
+
+ /* Overflow interrupt and counter freeze support must be available */
+ if (!perfcap.overflow_interrupt || !perfcap.counter_freeze)
+ goto free;
+
+ /* Number of event categories cannot be 0 */
+ if (perfcap.num_event_category == 0)
+ goto free;
+
+ /*
+ * We don't support per-counter capabilities for now.
+ */
+ if (perfcap.cap_per_counter)
+ goto free;
+
+ idxd_pmu->n_event_categories = perfcap.num_event_category;
+ idxd_pmu->supported_event_categories = perfcap.global_event_category;
+ idxd_pmu->per_counter_caps_supported = perfcap.cap_per_counter;
+
+ /* check filter capability. If 0, then filters are not supported */
+ idxd_pmu->supported_filters = perfcap.filter;
+ if (perfcap.filter)
+ idxd_pmu->n_filters = hweight8(perfcap.filter);
+
+ /* Store the total number of counters categories, and counter width */
+ idxd_pmu->n_counters = perfcap.num_perf_counter;
+ idxd_pmu->counter_width = perfcap.counter_width;
+
+ idxd_pmu_init(idxd_pmu);
+
+ rc = perf_pmu_register(&idxd_pmu->pmu, idxd_pmu->name, -1);
+ if (rc)
+ goto free;
+
+ rc = cpuhp_state_add_instance(cpuhp_slot, &idxd_pmu->cpuhp_node);
+ if (rc) {
+ perf_pmu_unregister(&idxd->idxd_pmu->pmu);
+ goto free;
+ }
+out:
+ return rc;
+free:
+ kfree(idxd_pmu);
+ idxd->idxd_pmu = NULL;
+
+ goto out;
+}
+
+void __init perfmon_init(void)
+{
+ int rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "driver/dma/idxd/perf:online",
+ perf_event_cpu_online,
+ perf_event_cpu_offline);
+ if (WARN_ON(rc < 0))
+ return;
+
+ cpuhp_slot = rc;
+ cpuhp_set_up = true;
+}
+
+void __exit perfmon_exit(void)
+{
+ if (cpuhp_set_up)
+ cpuhp_remove_multi_state(cpuhp_slot);
+}
diff --git a/drivers/dma/idxd/perfmon.h b/drivers/dma/idxd/perfmon.h
new file mode 100644
index 000000000..9a081a1bc
--- /dev/null
+++ b/drivers/dma/idxd/perfmon.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2020 Intel Corporation. All rights rsvd. */
+
+#ifndef _PERFMON_H_
+#define _PERFMON_H_
+
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/sbitmap.h>
+#include <linux/dmaengine.h>
+#include <linux/percpu-rwsem.h>
+#include <linux/wait.h>
+#include <linux/cdev.h>
+#include <linux/uuid.h>
+#include <linux/idxd.h>
+#include <linux/perf_event.h>
+#include "registers.h"
+
+static inline struct idxd_pmu *event_to_pmu(struct perf_event *event)
+{
+ struct idxd_pmu *idxd_pmu;
+ struct pmu *pmu;
+
+ pmu = event->pmu;
+ idxd_pmu = container_of(pmu, struct idxd_pmu, pmu);
+
+ return idxd_pmu;
+}
+
+static inline struct idxd_device *event_to_idxd(struct perf_event *event)
+{
+ struct idxd_pmu *idxd_pmu;
+ struct pmu *pmu;
+
+ pmu = event->pmu;
+ idxd_pmu = container_of(pmu, struct idxd_pmu, pmu);
+
+ return idxd_pmu->idxd;
+}
+
+static inline struct idxd_device *pmu_to_idxd(struct pmu *pmu)
+{
+ struct idxd_pmu *idxd_pmu;
+
+ idxd_pmu = container_of(pmu, struct idxd_pmu, pmu);
+
+ return idxd_pmu->idxd;
+}
+
+enum dsa_perf_events {
+ DSA_PERF_EVENT_WQ = 0,
+ DSA_PERF_EVENT_ENGINE,
+ DSA_PERF_EVENT_ADDR_TRANS,
+ DSA_PERF_EVENT_OP,
+ DSA_PERF_EVENT_COMPL,
+ DSA_PERF_EVENT_MAX,
+};
+
+enum filter_enc {
+ FLT_WQ = 0,
+ FLT_TC,
+ FLT_PG_SZ,
+ FLT_XFER_SZ,
+ FLT_ENG,
+ FLT_MAX,
+};
+
+#define CONFIG_RESET 0x0000000000000001
+#define CNTR_RESET 0x0000000000000002
+#define CNTR_ENABLE 0x0000000000000001
+#define INTR_OVFL 0x0000000000000002
+
+#define COUNTER_FREEZE 0x00000000FFFFFFFF
+#define COUNTER_UNFREEZE 0x0000000000000000
+#define OVERFLOW_SIZE 32
+
+#define CNTRCFG_ENABLE BIT(0)
+#define CNTRCFG_IRQ_OVERFLOW BIT(1)
+#define CNTRCFG_CATEGORY_SHIFT 8
+#define CNTRCFG_EVENT_SHIFT 32
+
+#define PERFMON_TABLE_OFFSET(_idxd) \
+({ \
+ typeof(_idxd) __idxd = (_idxd); \
+ ((__idxd)->reg_base + (__idxd)->perfmon_offset); \
+})
+#define PERFMON_REG_OFFSET(idxd, offset) \
+ (PERFMON_TABLE_OFFSET(idxd) + (offset))
+
+#define PERFCAP_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFCAP_OFFSET))
+#define PERFRST_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFRST_OFFSET))
+#define OVFSTATUS_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_OVFSTATUS_OFFSET))
+#define PERFFRZ_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFFRZ_OFFSET))
+
+#define FLTCFG_REG(idxd, cntr, flt) \
+ (PERFMON_REG_OFFSET(idxd, IDXD_FLTCFG_OFFSET) + ((cntr) * 32) + ((flt) * 4))
+
+#define CNTRCFG_REG(idxd, cntr) \
+ (PERFMON_REG_OFFSET(idxd, IDXD_CNTRCFG_OFFSET) + ((cntr) * 8))
+#define CNTRDATA_REG(idxd, cntr) \
+ (PERFMON_REG_OFFSET(idxd, IDXD_CNTRDATA_OFFSET) + ((cntr) * 8))
+#define CNTRCAP_REG(idxd, cntr) \
+ (PERFMON_REG_OFFSET(idxd, IDXD_CNTRCAP_OFFSET) + ((cntr) * 8))
+
+#define EVNTCAP_REG(idxd, category) \
+ (PERFMON_REG_OFFSET(idxd, IDXD_EVNTCAP_OFFSET) + ((category) * 8))
+
+#define DEFINE_PERFMON_FORMAT_ATTR(_name, _format) \
+static ssize_t __perfmon_idxd_##_name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *page) \
+{ \
+ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
+ return sprintf(page, _format "\n"); \
+} \
+static struct kobj_attribute format_attr_idxd_##_name = \
+ __ATTR(_name, 0444, __perfmon_idxd_##_name##_show, NULL)
+
+#endif
diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h
new file mode 100644
index 000000000..fe3b8d04f
--- /dev/null
+++ b/drivers/dma/idxd/registers.h
@@ -0,0 +1,516 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#ifndef _IDXD_REGISTERS_H_
+#define _IDXD_REGISTERS_H_
+
+/* PCI Config */
+#define PCI_DEVICE_ID_INTEL_DSA_SPR0 0x0b25
+#define PCI_DEVICE_ID_INTEL_IAX_SPR0 0x0cfe
+
+#define DEVICE_VERSION_1 0x100
+#define DEVICE_VERSION_2 0x200
+
+#define IDXD_MMIO_BAR 0
+#define IDXD_WQ_BAR 2
+#define IDXD_PORTAL_SIZE PAGE_SIZE
+
+/* MMIO Device BAR0 Registers */
+#define IDXD_VER_OFFSET 0x00
+#define IDXD_VER_MAJOR_MASK 0xf0
+#define IDXD_VER_MINOR_MASK 0x0f
+#define GET_IDXD_VER_MAJOR(x) (((x) & IDXD_VER_MAJOR_MASK) >> 4)
+#define GET_IDXD_VER_MINOR(x) ((x) & IDXD_VER_MINOR_MASK)
+
+union gen_cap_reg {
+ struct {
+ u64 block_on_fault:1;
+ u64 overlap_copy:1;
+ u64 cache_control_mem:1;
+ u64 cache_control_cache:1;
+ u64 cmd_cap:1;
+ u64 rsvd:3;
+ u64 dest_readback:1;
+ u64 drain_readback:1;
+ u64 rsvd2:6;
+ u64 max_xfer_shift:5;
+ u64 max_batch_shift:4;
+ u64 max_ims_mult:6;
+ u64 config_en:1;
+ u64 rsvd3:32;
+ };
+ u64 bits;
+} __packed;
+#define IDXD_GENCAP_OFFSET 0x10
+
+union wq_cap_reg {
+ struct {
+ u64 total_wq_size:16;
+ u64 num_wqs:8;
+ u64 wqcfg_size:4;
+ u64 rsvd:20;
+ u64 shared_mode:1;
+ u64 dedicated_mode:1;
+ u64 wq_ats_support:1;
+ u64 priority:1;
+ u64 occupancy:1;
+ u64 occupancy_int:1;
+ u64 op_config:1;
+ u64 rsvd3:9;
+ };
+ u64 bits;
+} __packed;
+#define IDXD_WQCAP_OFFSET 0x20
+#define IDXD_WQCFG_MIN 5
+
+union group_cap_reg {
+ struct {
+ u64 num_groups:8;
+ u64 total_rdbufs:8; /* formerly total_tokens */
+ u64 rdbuf_ctrl:1; /* formerly token_en */
+ u64 rdbuf_limit:1; /* formerly token_limit */
+ u64 progress_limit:1; /* descriptor and batch descriptor */
+ u64 rsvd:45;
+ };
+ u64 bits;
+} __packed;
+#define IDXD_GRPCAP_OFFSET 0x30
+
+union engine_cap_reg {
+ struct {
+ u64 num_engines:8;
+ u64 rsvd:56;
+ };
+ u64 bits;
+} __packed;
+
+#define IDXD_ENGCAP_OFFSET 0x38
+
+#define IDXD_OPCAP_NOOP 0x0001
+#define IDXD_OPCAP_BATCH 0x0002
+#define IDXD_OPCAP_MEMMOVE 0x0008
+struct opcap {
+ u64 bits[4];
+};
+
+#define IDXD_MAX_OPCAP_BITS 256U
+
+#define IDXD_OPCAP_OFFSET 0x40
+
+#define IDXD_TABLE_OFFSET 0x60
+union offsets_reg {
+ struct {
+ u64 grpcfg:16;
+ u64 wqcfg:16;
+ u64 msix_perm:16;
+ u64 ims:16;
+ u64 perfmon:16;
+ u64 rsvd:48;
+ };
+ u64 bits[2];
+} __packed;
+
+#define IDXD_TABLE_MULT 0x100
+
+#define IDXD_GENCFG_OFFSET 0x80
+union gencfg_reg {
+ struct {
+ u32 rdbuf_limit:8;
+ u32 rsvd:4;
+ u32 user_int_en:1;
+ u32 rsvd2:19;
+ };
+ u32 bits;
+} __packed;
+
+#define IDXD_GENCTRL_OFFSET 0x88
+union genctrl_reg {
+ struct {
+ u32 softerr_int_en:1;
+ u32 halt_int_en:1;
+ u32 rsvd:30;
+ };
+ u32 bits;
+} __packed;
+
+#define IDXD_GENSTATS_OFFSET 0x90
+union gensts_reg {
+ struct {
+ u32 state:2;
+ u32 reset_type:2;
+ u32 rsvd:28;
+ };
+ u32 bits;
+} __packed;
+
+enum idxd_device_status_state {
+ IDXD_DEVICE_STATE_DISABLED = 0,
+ IDXD_DEVICE_STATE_ENABLED,
+ IDXD_DEVICE_STATE_DRAIN,
+ IDXD_DEVICE_STATE_HALT,
+};
+
+enum idxd_device_reset_type {
+ IDXD_DEVICE_RESET_SOFTWARE = 0,
+ IDXD_DEVICE_RESET_FLR,
+ IDXD_DEVICE_RESET_WARM,
+ IDXD_DEVICE_RESET_COLD,
+};
+
+#define IDXD_INTCAUSE_OFFSET 0x98
+#define IDXD_INTC_ERR 0x01
+#define IDXD_INTC_CMD 0x02
+#define IDXD_INTC_OCCUPY 0x04
+#define IDXD_INTC_PERFMON_OVFL 0x08
+#define IDXD_INTC_HALT_STATE 0x10
+#define IDXD_INTC_INT_HANDLE_REVOKED 0x80000000
+
+#define IDXD_CMD_OFFSET 0xa0
+union idxd_command_reg {
+ struct {
+ u32 operand:20;
+ u32 cmd:5;
+ u32 rsvd:6;
+ u32 int_req:1;
+ };
+ u32 bits;
+} __packed;
+
+enum idxd_cmd {
+ IDXD_CMD_ENABLE_DEVICE = 1,
+ IDXD_CMD_DISABLE_DEVICE,
+ IDXD_CMD_DRAIN_ALL,
+ IDXD_CMD_ABORT_ALL,
+ IDXD_CMD_RESET_DEVICE,
+ IDXD_CMD_ENABLE_WQ,
+ IDXD_CMD_DISABLE_WQ,
+ IDXD_CMD_DRAIN_WQ,
+ IDXD_CMD_ABORT_WQ,
+ IDXD_CMD_RESET_WQ,
+ IDXD_CMD_DRAIN_PASID,
+ IDXD_CMD_ABORT_PASID,
+ IDXD_CMD_REQUEST_INT_HANDLE,
+ IDXD_CMD_RELEASE_INT_HANDLE,
+};
+
+#define CMD_INT_HANDLE_IMS 0x10000
+
+#define IDXD_CMDSTS_OFFSET 0xa8
+union cmdsts_reg {
+ struct {
+ u8 err;
+ u16 result;
+ u8 rsvd:7;
+ u8 active:1;
+ };
+ u32 bits;
+} __packed;
+#define IDXD_CMDSTS_ACTIVE 0x80000000
+#define IDXD_CMDSTS_ERR_MASK 0xff
+#define IDXD_CMDSTS_RES_SHIFT 8
+
+enum idxd_cmdsts_err {
+ IDXD_CMDSTS_SUCCESS = 0,
+ IDXD_CMDSTS_INVAL_CMD,
+ IDXD_CMDSTS_INVAL_WQIDX,
+ IDXD_CMDSTS_HW_ERR,
+ /* enable device errors */
+ IDXD_CMDSTS_ERR_DEV_ENABLED = 0x10,
+ IDXD_CMDSTS_ERR_CONFIG,
+ IDXD_CMDSTS_ERR_BUSMASTER_EN,
+ IDXD_CMDSTS_ERR_PASID_INVAL,
+ IDXD_CMDSTS_ERR_WQ_SIZE_ERANGE,
+ IDXD_CMDSTS_ERR_GRP_CONFIG,
+ IDXD_CMDSTS_ERR_GRP_CONFIG2,
+ IDXD_CMDSTS_ERR_GRP_CONFIG3,
+ IDXD_CMDSTS_ERR_GRP_CONFIG4,
+ /* enable wq errors */
+ IDXD_CMDSTS_ERR_DEV_NOTEN = 0x20,
+ IDXD_CMDSTS_ERR_WQ_ENABLED,
+ IDXD_CMDSTS_ERR_WQ_SIZE,
+ IDXD_CMDSTS_ERR_WQ_PRIOR,
+ IDXD_CMDSTS_ERR_WQ_MODE,
+ IDXD_CMDSTS_ERR_BOF_EN,
+ IDXD_CMDSTS_ERR_PASID_EN,
+ IDXD_CMDSTS_ERR_MAX_BATCH_SIZE,
+ IDXD_CMDSTS_ERR_MAX_XFER_SIZE,
+ /* disable device errors */
+ IDXD_CMDSTS_ERR_DIS_DEV_EN = 0x31,
+ /* disable WQ, drain WQ, abort WQ, reset WQ */
+ IDXD_CMDSTS_ERR_DEV_NOT_EN,
+ /* request interrupt handle */
+ IDXD_CMDSTS_ERR_INVAL_INT_IDX = 0x41,
+ IDXD_CMDSTS_ERR_NO_HANDLE,
+};
+
+#define IDXD_CMDCAP_OFFSET 0xb0
+
+#define IDXD_SWERR_OFFSET 0xc0
+#define IDXD_SWERR_VALID 0x00000001
+#define IDXD_SWERR_OVERFLOW 0x00000002
+#define IDXD_SWERR_ACK (IDXD_SWERR_VALID | IDXD_SWERR_OVERFLOW)
+union sw_err_reg {
+ struct {
+ u64 valid:1;
+ u64 overflow:1;
+ u64 desc_valid:1;
+ u64 wq_idx_valid:1;
+ u64 batch:1;
+ u64 fault_rw:1;
+ u64 priv:1;
+ u64 rsvd:1;
+ u64 error:8;
+ u64 wq_idx:8;
+ u64 rsvd2:8;
+ u64 operation:8;
+ u64 pasid:20;
+ u64 rsvd3:4;
+
+ u64 batch_idx:16;
+ u64 rsvd4:16;
+ u64 invalid_flags:32;
+
+ u64 fault_addr;
+
+ u64 rsvd5;
+ };
+ u64 bits[4];
+} __packed;
+
+union msix_perm {
+ struct {
+ u32 rsvd:2;
+ u32 ignore:1;
+ u32 pasid_en:1;
+ u32 rsvd2:8;
+ u32 pasid:20;
+ };
+ u32 bits;
+} __packed;
+
+union group_flags {
+ struct {
+ u64 tc_a:3;
+ u64 tc_b:3;
+ u64 rsvd:1;
+ u64 use_rdbuf_limit:1;
+ u64 rdbufs_reserved:8;
+ u64 rsvd2:4;
+ u64 rdbufs_allowed:8;
+ u64 rsvd3:4;
+ u64 desc_progress_limit:2;
+ u64 rsvd4:2;
+ u64 batch_progress_limit:2;
+ u64 rsvd5:26;
+ };
+ u64 bits;
+} __packed;
+
+struct grpcfg {
+ u64 wqs[4];
+ u64 engines;
+ union group_flags flags;
+} __packed;
+
+union wqcfg {
+ struct {
+ /* bytes 0-3 */
+ u16 wq_size;
+ u16 rsvd;
+
+ /* bytes 4-7 */
+ u16 wq_thresh;
+ u16 rsvd1;
+
+ /* bytes 8-11 */
+ u32 mode:1; /* shared or dedicated */
+ u32 bof:1; /* block on fault */
+ u32 wq_ats_disable:1;
+ u32 rsvd2:1;
+ u32 priority:4;
+ u32 pasid:20;
+ u32 pasid_en:1;
+ u32 priv:1;
+ u32 rsvd3:2;
+
+ /* bytes 12-15 */
+ u32 max_xfer_shift:5;
+ u32 max_batch_shift:4;
+ u32 rsvd4:23;
+
+ /* bytes 16-19 */
+ u16 occupancy_inth;
+ u16 occupancy_table_sel:1;
+ u16 rsvd5:15;
+
+ /* bytes 20-23 */
+ u16 occupancy_limit;
+ u16 occupancy_int_en:1;
+ u16 rsvd6:15;
+
+ /* bytes 24-27 */
+ u16 occupancy;
+ u16 occupancy_int:1;
+ u16 rsvd7:12;
+ u16 mode_support:1;
+ u16 wq_state:2;
+
+ /* bytes 28-31 */
+ u32 rsvd8;
+
+ /* bytes 32-63 */
+ u64 op_config[4];
+ };
+ u32 bits[16];
+} __packed;
+
+#define WQCFG_PASID_IDX 2
+#define WQCFG_PRIVL_IDX 2
+#define WQCFG_OCCUP_IDX 6
+
+#define WQCFG_OCCUP_MASK 0xffff
+
+/*
+ * This macro calculates the offset into the WQCFG register
+ * idxd - struct idxd *
+ * n - wq id
+ * ofs - the index of the 32b dword for the config register
+ *
+ * The WQCFG register block is divided into groups per each wq. The n index
+ * allows us to move to the register group that's for that particular wq.
+ * Each register is 32bits. The ofs gives us the number of register to access.
+ */
+#define WQCFG_OFFSET(_idxd_dev, n, ofs) \
+({\
+ typeof(_idxd_dev) __idxd_dev = (_idxd_dev); \
+ (__idxd_dev)->wqcfg_offset + (n) * (__idxd_dev)->wqcfg_size + sizeof(u32) * (ofs); \
+})
+
+#define WQCFG_STRIDES(_idxd_dev) ((_idxd_dev)->wqcfg_size / sizeof(u32))
+
+#define GRPCFG_SIZE 64
+#define GRPWQCFG_STRIDES 4
+
+/*
+ * This macro calculates the offset into the GRPCFG register
+ * idxd - struct idxd *
+ * n - wq id
+ * ofs - the index of the 32b dword for the config register
+ *
+ * The WQCFG register block is divided into groups per each wq. The n index
+ * allows us to move to the register group that's for that particular wq.
+ * Each register is 32bits. The ofs gives us the number of register to access.
+ */
+#define GRPWQCFG_OFFSET(idxd_dev, n, ofs) ((idxd_dev)->grpcfg_offset +\
+ (n) * GRPCFG_SIZE + sizeof(u64) * (ofs))
+#define GRPENGCFG_OFFSET(idxd_dev, n) ((idxd_dev)->grpcfg_offset + (n) * GRPCFG_SIZE + 32)
+#define GRPFLGCFG_OFFSET(idxd_dev, n) ((idxd_dev)->grpcfg_offset + (n) * GRPCFG_SIZE + 40)
+
+/* Following is performance monitor registers */
+#define IDXD_PERFCAP_OFFSET 0x0
+union idxd_perfcap {
+ struct {
+ u64 num_perf_counter:6;
+ u64 rsvd1:2;
+ u64 counter_width:8;
+ u64 num_event_category:4;
+ u64 global_event_category:16;
+ u64 filter:8;
+ u64 rsvd2:8;
+ u64 cap_per_counter:1;
+ u64 writeable_counter:1;
+ u64 counter_freeze:1;
+ u64 overflow_interrupt:1;
+ u64 rsvd3:8;
+ };
+ u64 bits;
+} __packed;
+
+#define IDXD_EVNTCAP_OFFSET 0x80
+union idxd_evntcap {
+ struct {
+ u64 events:28;
+ u64 rsvd:36;
+ };
+ u64 bits;
+} __packed;
+
+struct idxd_event {
+ union {
+ struct {
+ u32 event_category:4;
+ u32 events:28;
+ };
+ u32 val;
+ };
+} __packed;
+
+#define IDXD_CNTRCAP_OFFSET 0x800
+struct idxd_cntrcap {
+ union {
+ struct {
+ u32 counter_width:8;
+ u32 rsvd:20;
+ u32 num_events:4;
+ };
+ u32 val;
+ };
+ struct idxd_event events[];
+} __packed;
+
+#define IDXD_PERFRST_OFFSET 0x10
+union idxd_perfrst {
+ struct {
+ u32 perfrst_config:1;
+ u32 perfrst_counter:1;
+ u32 rsvd:30;
+ };
+ u32 val;
+} __packed;
+
+#define IDXD_OVFSTATUS_OFFSET 0x30
+#define IDXD_PERFFRZ_OFFSET 0x20
+#define IDXD_CNTRCFG_OFFSET 0x100
+union idxd_cntrcfg {
+ struct {
+ u64 enable:1;
+ u64 interrupt_ovf:1;
+ u64 global_freeze_ovf:1;
+ u64 rsvd1:5;
+ u64 event_category:4;
+ u64 rsvd2:20;
+ u64 events:28;
+ u64 rsvd3:4;
+ };
+ u64 val;
+} __packed;
+
+#define IDXD_FLTCFG_OFFSET 0x300
+
+#define IDXD_CNTRDATA_OFFSET 0x200
+union idxd_cntrdata {
+ struct {
+ u64 event_count_value;
+ };
+ u64 val;
+} __packed;
+
+union event_cfg {
+ struct {
+ u64 event_cat:4;
+ u64 event_enc:28;
+ };
+ u64 val;
+} __packed;
+
+union filter_cfg {
+ struct {
+ u64 wq:32;
+ u64 tc:8;
+ u64 pg_sz:4;
+ u64 xfer_sz:8;
+ u64 eng:8;
+ };
+ u64 val;
+} __packed;
+
+#endif
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
new file mode 100644
index 000000000..c01db23e3
--- /dev/null
+++ b/drivers/dma/idxd/submit.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <uapi/linux/idxd.h>
+#include "idxd.h"
+#include "registers.h"
+
+static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
+{
+ struct idxd_desc *desc;
+ struct idxd_device *idxd = wq->idxd;
+
+ desc = wq->descs[idx];
+ memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
+ memset(desc->completion, 0, idxd->data->compl_size);
+ desc->cpu = cpu;
+
+ if (device_pasid_enabled(idxd))
+ desc->hw->pasid = idxd->pasid;
+
+ return desc;
+}
+
+struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
+{
+ int cpu, idx;
+ struct idxd_device *idxd = wq->idxd;
+ DEFINE_SBQ_WAIT(wait);
+ struct sbq_wait_state *ws;
+ struct sbitmap_queue *sbq;
+
+ if (idxd->state != IDXD_DEV_ENABLED)
+ return ERR_PTR(-EIO);
+
+ sbq = &wq->sbq;
+ idx = sbitmap_queue_get(sbq, &cpu);
+ if (idx < 0) {
+ if (optype == IDXD_OP_NONBLOCK)
+ return ERR_PTR(-EAGAIN);
+ } else {
+ return __get_desc(wq, idx, cpu);
+ }
+
+ ws = &sbq->ws[0];
+ for (;;) {
+ sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE);
+ if (signal_pending_state(TASK_INTERRUPTIBLE, current))
+ break;
+ idx = sbitmap_queue_get(sbq, &cpu);
+ if (idx >= 0)
+ break;
+ schedule();
+ }
+
+ sbitmap_finish_wait(sbq, ws, &wait);
+ if (idx < 0)
+ return ERR_PTR(-EAGAIN);
+
+ return __get_desc(wq, idx, cpu);
+}
+
+void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
+{
+ int cpu = desc->cpu;
+
+ desc->cpu = -1;
+ sbitmap_queue_clear(&wq->sbq, desc->id, cpu);
+}
+
+static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
+ struct idxd_desc *desc)
+{
+ struct idxd_desc *d, *n;
+
+ lockdep_assert_held(&ie->list_lock);
+ list_for_each_entry_safe(d, n, &ie->work_list, list) {
+ if (d == desc) {
+ list_del(&d->list);
+ return d;
+ }
+ }
+
+ /*
+ * At this point, the desc needs to be aborted is held by the completion
+ * handler where it has taken it off the pending list but has not added to the
+ * work list. It will be cleaned up by the interrupt handler when it sees the
+ * IDXD_COMP_DESC_ABORT for completion status.
+ */
+ return NULL;
+}
+
+static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
+ struct idxd_desc *desc)
+{
+ struct idxd_desc *d, *t, *found = NULL;
+ struct llist_node *head;
+ LIST_HEAD(flist);
+
+ desc->completion->status = IDXD_COMP_DESC_ABORT;
+ /*
+ * Grab the list lock so it will block the irq thread handler. This allows the
+ * abort code to locate the descriptor need to be aborted.
+ */
+ spin_lock(&ie->list_lock);
+ head = llist_del_all(&ie->pending_llist);
+ if (head) {
+ llist_for_each_entry_safe(d, t, head, llnode) {
+ if (d == desc) {
+ found = desc;
+ continue;
+ }
+
+ if (d->completion->status)
+ list_add_tail(&d->list, &flist);
+ else
+ list_add_tail(&d->list, &ie->work_list);
+ }
+ }
+
+ if (!found)
+ found = list_abort_desc(wq, ie, desc);
+ spin_unlock(&ie->list_lock);
+
+ if (found)
+ idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false);
+
+ /*
+ * completing the descriptor will return desc to allocator and
+ * the desc can be acquired by a different process and the
+ * desc->list can be modified. Delete desc from list so the
+ * list trasversing does not get corrupted by the other process.
+ */
+ list_for_each_entry_safe(d, t, &flist, list) {
+ list_del_init(&d->list);
+ idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true);
+ }
+}
+
+/*
+ * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver
+ * has better control of number of descriptors being submitted to a shared wq by limiting
+ * the number of driver allocated descriptors to the wq size. However, when the swq is
+ * exported to a guest kernel, it may be shared with multiple guest kernels. This means
+ * the likelihood of getting busy returned on the swq when submitting goes significantly up.
+ * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving
+ * up. The sysfs knob can be tuned by the system administrator.
+ */
+int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
+{
+ unsigned int retries = wq->enqcmds_retries;
+ int rc;
+
+ do {
+ rc = enqcmds(portal, desc);
+ if (rc == 0)
+ break;
+ cpu_relax();
+ } while (retries--);
+
+ return rc;
+}
+
+int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct idxd_irq_entry *ie = NULL;
+ u32 desc_flags = desc->hw->flags;
+ void __iomem *portal;
+ int rc;
+
+ if (idxd->state != IDXD_DEV_ENABLED)
+ return -EIO;
+
+ if (!percpu_ref_tryget_live(&wq->wq_active)) {
+ wait_for_completion(&wq->wq_resurrect);
+ if (!percpu_ref_tryget_live(&wq->wq_active))
+ return -ENXIO;
+ }
+
+ portal = idxd_wq_portal_addr(wq);
+
+ /*
+ * The wmb() flushes writes to coherent DMA data before
+ * possibly triggering a DMA read. The wmb() is necessary
+ * even on UP because the recipient is a device.
+ */
+ wmb();
+
+ /*
+ * Pending the descriptor to the lockless list for the irq_entry
+ * that we designated the descriptor to.
+ */
+ if (desc_flags & IDXD_OP_FLAG_RCI) {
+ ie = &wq->ie;
+ desc->hw->int_handle = ie->int_handle;
+ llist_add(&desc->llnode, &ie->pending_llist);
+ }
+
+ if (wq_dedicated(wq)) {
+ iosubmit_cmds512(portal, desc->hw, 1);
+ } else {
+ rc = idxd_enqcmds(wq, portal, desc->hw);
+ if (rc < 0) {
+ percpu_ref_put(&wq->wq_active);
+ /* abort operation frees the descriptor */
+ if (ie)
+ llist_abort_desc(wq, ie, desc);
+ return rc;
+ }
+ }
+
+ percpu_ref_put(&wq->wq_active);
+ return 0;
+}
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
new file mode 100644
index 000000000..6e1e14b37
--- /dev/null
+++ b/drivers/dma/idxd/sysfs.c
@@ -0,0 +1,1818 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <uapi/linux/idxd.h>
+#include "registers.h"
+#include "idxd.h"
+
+static char *idxd_wq_type_names[] = {
+ [IDXD_WQT_NONE] = "none",
+ [IDXD_WQT_KERNEL] = "kernel",
+ [IDXD_WQT_USER] = "user",
+};
+
+/* IDXD engine attributes */
+static ssize_t engine_group_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_engine *engine = confdev_to_engine(dev);
+
+ if (engine->group)
+ return sysfs_emit(buf, "%d\n", engine->group->id);
+ else
+ return sysfs_emit(buf, "%d\n", -1);
+}
+
+static ssize_t engine_group_id_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_engine *engine = confdev_to_engine(dev);
+ struct idxd_device *idxd = engine->idxd;
+ long id;
+ int rc;
+ struct idxd_group *prevg;
+
+ rc = kstrtol(buf, 10, &id);
+ if (rc < 0)
+ return -EINVAL;
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+ if (id > idxd->max_groups - 1 || id < -1)
+ return -EINVAL;
+
+ if (id == -1) {
+ if (engine->group) {
+ engine->group->num_engines--;
+ engine->group = NULL;
+ }
+ return count;
+ }
+
+ prevg = engine->group;
+
+ if (prevg)
+ prevg->num_engines--;
+ engine->group = idxd->groups[id];
+ engine->group->num_engines++;
+
+ return count;
+}
+
+static struct device_attribute dev_attr_engine_group =
+ __ATTR(group_id, 0644, engine_group_id_show,
+ engine_group_id_store);
+
+static struct attribute *idxd_engine_attributes[] = {
+ &dev_attr_engine_group.attr,
+ NULL,
+};
+
+static const struct attribute_group idxd_engine_attribute_group = {
+ .attrs = idxd_engine_attributes,
+};
+
+static const struct attribute_group *idxd_engine_attribute_groups[] = {
+ &idxd_engine_attribute_group,
+ NULL,
+};
+
+static void idxd_conf_engine_release(struct device *dev)
+{
+ struct idxd_engine *engine = confdev_to_engine(dev);
+
+ kfree(engine);
+}
+
+struct device_type idxd_engine_device_type = {
+ .name = "engine",
+ .release = idxd_conf_engine_release,
+ .groups = idxd_engine_attribute_groups,
+};
+
+/* Group attributes */
+
+static void idxd_set_free_rdbufs(struct idxd_device *idxd)
+{
+ int i, rdbufs;
+
+ for (i = 0, rdbufs = 0; i < idxd->max_groups; i++) {
+ struct idxd_group *g = idxd->groups[i];
+
+ rdbufs += g->rdbufs_reserved;
+ }
+
+ idxd->nr_rdbufs = idxd->max_rdbufs - rdbufs;
+}
+
+static ssize_t group_read_buffers_reserved_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+
+ return sysfs_emit(buf, "%u\n", group->rdbufs_reserved);
+}
+
+static ssize_t group_tokens_reserved_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ dev_warn_once(dev, "attribute deprecated, see read_buffers_reserved.\n");
+ return group_read_buffers_reserved_show(dev, attr, buf);
+}
+
+static ssize_t group_read_buffers_reserved_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+ struct idxd_device *idxd = group->idxd;
+ unsigned long val;
+ int rc;
+
+ rc = kstrtoul(buf, 10, &val);
+ if (rc < 0)
+ return -EINVAL;
+
+ if (idxd->data->type == IDXD_TYPE_IAX)
+ return -EOPNOTSUPP;
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+ if (idxd->state == IDXD_DEV_ENABLED)
+ return -EPERM;
+
+ if (val > idxd->max_rdbufs)
+ return -EINVAL;
+
+ if (val > idxd->nr_rdbufs + group->rdbufs_reserved)
+ return -EINVAL;
+
+ group->rdbufs_reserved = val;
+ idxd_set_free_rdbufs(idxd);
+ return count;
+}
+
+static ssize_t group_tokens_reserved_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ dev_warn_once(dev, "attribute deprecated, see read_buffers_reserved.\n");
+ return group_read_buffers_reserved_store(dev, attr, buf, count);
+}
+
+static struct device_attribute dev_attr_group_tokens_reserved =
+ __ATTR(tokens_reserved, 0644, group_tokens_reserved_show,
+ group_tokens_reserved_store);
+
+static struct device_attribute dev_attr_group_read_buffers_reserved =
+ __ATTR(read_buffers_reserved, 0644, group_read_buffers_reserved_show,
+ group_read_buffers_reserved_store);
+
+static ssize_t group_read_buffers_allowed_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+
+ return sysfs_emit(buf, "%u\n", group->rdbufs_allowed);
+}
+
+static ssize_t group_tokens_allowed_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ dev_warn_once(dev, "attribute deprecated, see read_buffers_allowed.\n");
+ return group_read_buffers_allowed_show(dev, attr, buf);
+}
+
+static ssize_t group_read_buffers_allowed_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+ struct idxd_device *idxd = group->idxd;
+ unsigned long val;
+ int rc;
+
+ rc = kstrtoul(buf, 10, &val);
+ if (rc < 0)
+ return -EINVAL;
+
+ if (idxd->data->type == IDXD_TYPE_IAX)
+ return -EOPNOTSUPP;
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+ if (idxd->state == IDXD_DEV_ENABLED)
+ return -EPERM;
+
+ if (val < 4 * group->num_engines ||
+ val > group->rdbufs_reserved + idxd->nr_rdbufs)
+ return -EINVAL;
+
+ group->rdbufs_allowed = val;
+ return count;
+}
+
+static ssize_t group_tokens_allowed_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ dev_warn_once(dev, "attribute deprecated, see read_buffers_allowed.\n");
+ return group_read_buffers_allowed_store(dev, attr, buf, count);
+}
+
+static struct device_attribute dev_attr_group_tokens_allowed =
+ __ATTR(tokens_allowed, 0644, group_tokens_allowed_show,
+ group_tokens_allowed_store);
+
+static struct device_attribute dev_attr_group_read_buffers_allowed =
+ __ATTR(read_buffers_allowed, 0644, group_read_buffers_allowed_show,
+ group_read_buffers_allowed_store);
+
+static ssize_t group_use_read_buffer_limit_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+
+ return sysfs_emit(buf, "%u\n", group->use_rdbuf_limit);
+}
+
+static ssize_t group_use_token_limit_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ dev_warn_once(dev, "attribute deprecated, see use_read_buffer_limit.\n");
+ return group_use_read_buffer_limit_show(dev, attr, buf);
+}
+
+static ssize_t group_use_read_buffer_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+ struct idxd_device *idxd = group->idxd;
+ unsigned long val;
+ int rc;
+
+ rc = kstrtoul(buf, 10, &val);
+ if (rc < 0)
+ return -EINVAL;
+
+ if (idxd->data->type == IDXD_TYPE_IAX)
+ return -EOPNOTSUPP;
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+ if (idxd->state == IDXD_DEV_ENABLED)
+ return -EPERM;
+
+ if (idxd->rdbuf_limit == 0)
+ return -EPERM;
+
+ group->use_rdbuf_limit = !!val;
+ return count;
+}
+
+static ssize_t group_use_token_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ dev_warn_once(dev, "attribute deprecated, see use_read_buffer_limit.\n");
+ return group_use_read_buffer_limit_store(dev, attr, buf, count);
+}
+
+static struct device_attribute dev_attr_group_use_token_limit =
+ __ATTR(use_token_limit, 0644, group_use_token_limit_show,
+ group_use_token_limit_store);
+
+static struct device_attribute dev_attr_group_use_read_buffer_limit =
+ __ATTR(use_read_buffer_limit, 0644, group_use_read_buffer_limit_show,
+ group_use_read_buffer_limit_store);
+
+static ssize_t group_engines_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+ int i, rc = 0;
+ struct idxd_device *idxd = group->idxd;
+
+ for (i = 0; i < idxd->max_engines; i++) {
+ struct idxd_engine *engine = idxd->engines[i];
+
+ if (!engine->group)
+ continue;
+
+ if (engine->group->id == group->id)
+ rc += sysfs_emit_at(buf, rc, "engine%d.%d ", idxd->id, engine->id);
+ }
+
+ if (!rc)
+ return 0;
+ rc--;
+ rc += sysfs_emit_at(buf, rc, "\n");
+
+ return rc;
+}
+
+static struct device_attribute dev_attr_group_engines =
+ __ATTR(engines, 0444, group_engines_show, NULL);
+
+static ssize_t group_work_queues_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+ int i, rc = 0;
+ struct idxd_device *idxd = group->idxd;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ if (!wq->group)
+ continue;
+
+ if (wq->group->id == group->id)
+ rc += sysfs_emit_at(buf, rc, "wq%d.%d ", idxd->id, wq->id);
+ }
+
+ if (!rc)
+ return 0;
+ rc--;
+ rc += sysfs_emit_at(buf, rc, "\n");
+
+ return rc;
+}
+
+static struct device_attribute dev_attr_group_work_queues =
+ __ATTR(work_queues, 0444, group_work_queues_show, NULL);
+
+static ssize_t group_traffic_class_a_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+
+ return sysfs_emit(buf, "%d\n", group->tc_a);
+}
+
+static ssize_t group_traffic_class_a_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+ struct idxd_device *idxd = group->idxd;
+ long val;
+ int rc;
+
+ rc = kstrtol(buf, 10, &val);
+ if (rc < 0)
+ return -EINVAL;
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+ if (idxd->state == IDXD_DEV_ENABLED)
+ return -EPERM;
+
+ if (idxd->hw.version <= DEVICE_VERSION_2 && !tc_override)
+ return -EPERM;
+
+ if (val < 0 || val > 7)
+ return -EINVAL;
+
+ group->tc_a = val;
+ return count;
+}
+
+static struct device_attribute dev_attr_group_traffic_class_a =
+ __ATTR(traffic_class_a, 0644, group_traffic_class_a_show,
+ group_traffic_class_a_store);
+
+static ssize_t group_traffic_class_b_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+
+ return sysfs_emit(buf, "%d\n", group->tc_b);
+}
+
+static ssize_t group_traffic_class_b_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+ struct idxd_device *idxd = group->idxd;
+ long val;
+ int rc;
+
+ rc = kstrtol(buf, 10, &val);
+ if (rc < 0)
+ return -EINVAL;
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+ if (idxd->state == IDXD_DEV_ENABLED)
+ return -EPERM;
+
+ if (idxd->hw.version <= DEVICE_VERSION_2 && !tc_override)
+ return -EPERM;
+
+ if (val < 0 || val > 7)
+ return -EINVAL;
+
+ group->tc_b = val;
+ return count;
+}
+
+static struct device_attribute dev_attr_group_traffic_class_b =
+ __ATTR(traffic_class_b, 0644, group_traffic_class_b_show,
+ group_traffic_class_b_store);
+
+static ssize_t group_desc_progress_limit_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+
+ return sysfs_emit(buf, "%d\n", group->desc_progress_limit);
+}
+
+static ssize_t group_desc_progress_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+ int val, rc;
+
+ rc = kstrtoint(buf, 10, &val);
+ if (rc < 0)
+ return -EINVAL;
+
+ if (val & ~GENMASK(1, 0))
+ return -EINVAL;
+
+ group->desc_progress_limit = val;
+ return count;
+}
+
+static struct device_attribute dev_attr_group_desc_progress_limit =
+ __ATTR(desc_progress_limit, 0644, group_desc_progress_limit_show,
+ group_desc_progress_limit_store);
+
+static ssize_t group_batch_progress_limit_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+
+ return sysfs_emit(buf, "%d\n", group->batch_progress_limit);
+}
+
+static ssize_t group_batch_progress_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+ int val, rc;
+
+ rc = kstrtoint(buf, 10, &val);
+ if (rc < 0)
+ return -EINVAL;
+
+ if (val & ~GENMASK(1, 0))
+ return -EINVAL;
+
+ group->batch_progress_limit = val;
+ return count;
+}
+
+static struct device_attribute dev_attr_group_batch_progress_limit =
+ __ATTR(batch_progress_limit, 0644, group_batch_progress_limit_show,
+ group_batch_progress_limit_store);
+static struct attribute *idxd_group_attributes[] = {
+ &dev_attr_group_work_queues.attr,
+ &dev_attr_group_engines.attr,
+ &dev_attr_group_use_token_limit.attr,
+ &dev_attr_group_use_read_buffer_limit.attr,
+ &dev_attr_group_tokens_allowed.attr,
+ &dev_attr_group_read_buffers_allowed.attr,
+ &dev_attr_group_tokens_reserved.attr,
+ &dev_attr_group_read_buffers_reserved.attr,
+ &dev_attr_group_traffic_class_a.attr,
+ &dev_attr_group_traffic_class_b.attr,
+ &dev_attr_group_desc_progress_limit.attr,
+ &dev_attr_group_batch_progress_limit.attr,
+ NULL,
+};
+
+static bool idxd_group_attr_progress_limit_invisible(struct attribute *attr,
+ struct idxd_device *idxd)
+{
+ return (attr == &dev_attr_group_desc_progress_limit.attr ||
+ attr == &dev_attr_group_batch_progress_limit.attr) &&
+ !idxd->hw.group_cap.progress_limit;
+}
+
+static bool idxd_group_attr_read_buffers_invisible(struct attribute *attr,
+ struct idxd_device *idxd)
+{
+ /*
+ * Intel IAA does not support Read Buffer allocation control,
+ * make these attributes invisible.
+ */
+ return (attr == &dev_attr_group_use_token_limit.attr ||
+ attr == &dev_attr_group_use_read_buffer_limit.attr ||
+ attr == &dev_attr_group_tokens_allowed.attr ||
+ attr == &dev_attr_group_read_buffers_allowed.attr ||
+ attr == &dev_attr_group_tokens_reserved.attr ||
+ attr == &dev_attr_group_read_buffers_reserved.attr) &&
+ idxd->data->type == IDXD_TYPE_IAX;
+}
+
+static umode_t idxd_group_attr_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct idxd_group *group = confdev_to_group(dev);
+ struct idxd_device *idxd = group->idxd;
+
+ if (idxd_group_attr_progress_limit_invisible(attr, idxd))
+ return 0;
+
+ if (idxd_group_attr_read_buffers_invisible(attr, idxd))
+ return 0;
+
+ return attr->mode;
+}
+
+static const struct attribute_group idxd_group_attribute_group = {
+ .attrs = idxd_group_attributes,
+ .is_visible = idxd_group_attr_visible,
+};
+
+static const struct attribute_group *idxd_group_attribute_groups[] = {
+ &idxd_group_attribute_group,
+ NULL,
+};
+
+static void idxd_conf_group_release(struct device *dev)
+{
+ struct idxd_group *group = confdev_to_group(dev);
+
+ kfree(group);
+}
+
+struct device_type idxd_group_device_type = {
+ .name = "group",
+ .release = idxd_conf_group_release,
+ .groups = idxd_group_attribute_groups,
+};
+
+/* IDXD work queue attribs */
+static ssize_t wq_clients_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ return sysfs_emit(buf, "%d\n", wq->client_count);
+}
+
+static struct device_attribute dev_attr_wq_clients =
+ __ATTR(clients, 0444, wq_clients_show, NULL);
+
+static ssize_t wq_state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ switch (wq->state) {
+ case IDXD_WQ_DISABLED:
+ return sysfs_emit(buf, "disabled\n");
+ case IDXD_WQ_ENABLED:
+ return sysfs_emit(buf, "enabled\n");
+ }
+
+ return sysfs_emit(buf, "unknown\n");
+}
+
+static struct device_attribute dev_attr_wq_state =
+ __ATTR(state, 0444, wq_state_show, NULL);
+
+static ssize_t wq_group_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ if (wq->group)
+ return sysfs_emit(buf, "%u\n", wq->group->id);
+ else
+ return sysfs_emit(buf, "-1\n");
+}
+
+static ssize_t wq_group_id_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ struct idxd_device *idxd = wq->idxd;
+ long id;
+ int rc;
+ struct idxd_group *prevg, *group;
+
+ rc = kstrtol(buf, 10, &id);
+ if (rc < 0)
+ return -EINVAL;
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+ if (wq->state != IDXD_WQ_DISABLED)
+ return -EPERM;
+
+ if (id > idxd->max_groups - 1 || id < -1)
+ return -EINVAL;
+
+ if (id == -1) {
+ if (wq->group) {
+ wq->group->num_wqs--;
+ wq->group = NULL;
+ }
+ return count;
+ }
+
+ group = idxd->groups[id];
+ prevg = wq->group;
+
+ if (prevg)
+ prevg->num_wqs--;
+ wq->group = group;
+ group->num_wqs++;
+ return count;
+}
+
+static struct device_attribute dev_attr_wq_group_id =
+ __ATTR(group_id, 0644, wq_group_id_show, wq_group_id_store);
+
+static ssize_t wq_mode_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ return sysfs_emit(buf, "%s\n", wq_dedicated(wq) ? "dedicated" : "shared");
+}
+
+static ssize_t wq_mode_store(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ struct idxd_device *idxd = wq->idxd;
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+ if (wq->state != IDXD_WQ_DISABLED)
+ return -EPERM;
+
+ if (sysfs_streq(buf, "dedicated")) {
+ set_bit(WQ_FLAG_DEDICATED, &wq->flags);
+ wq->threshold = 0;
+ } else if (sysfs_streq(buf, "shared")) {
+ clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
+ } else {
+ return -EINVAL;
+ }
+
+ return count;
+}
+
+static struct device_attribute dev_attr_wq_mode =
+ __ATTR(mode, 0644, wq_mode_show, wq_mode_store);
+
+static ssize_t wq_size_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ return sysfs_emit(buf, "%u\n", wq->size);
+}
+
+static int total_claimed_wq_size(struct idxd_device *idxd)
+{
+ int i;
+ int wq_size = 0;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ wq_size += wq->size;
+ }
+
+ return wq_size;
+}
+
+static ssize_t wq_size_store(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ unsigned long size;
+ struct idxd_device *idxd = wq->idxd;
+ int rc;
+
+ rc = kstrtoul(buf, 10, &size);
+ if (rc < 0)
+ return -EINVAL;
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+ if (idxd->state == IDXD_DEV_ENABLED)
+ return -EPERM;
+
+ if (size + total_claimed_wq_size(idxd) - wq->size > idxd->max_wq_size)
+ return -EINVAL;
+
+ wq->size = size;
+ return count;
+}
+
+static struct device_attribute dev_attr_wq_size =
+ __ATTR(size, 0644, wq_size_show, wq_size_store);
+
+static ssize_t wq_priority_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ return sysfs_emit(buf, "%u\n", wq->priority);
+}
+
+static ssize_t wq_priority_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ unsigned long prio;
+ struct idxd_device *idxd = wq->idxd;
+ int rc;
+
+ rc = kstrtoul(buf, 10, &prio);
+ if (rc < 0)
+ return -EINVAL;
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+ if (wq->state != IDXD_WQ_DISABLED)
+ return -EPERM;
+
+ if (prio > IDXD_MAX_PRIORITY)
+ return -EINVAL;
+
+ wq->priority = prio;
+ return count;
+}
+
+static struct device_attribute dev_attr_wq_priority =
+ __ATTR(priority, 0644, wq_priority_show, wq_priority_store);
+
+static ssize_t wq_block_on_fault_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags));
+}
+
+static ssize_t wq_block_on_fault_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ struct idxd_device *idxd = wq->idxd;
+ bool bof;
+ int rc;
+
+ if (!idxd->hw.gen_cap.block_on_fault)
+ return -EOPNOTSUPP;
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+ if (wq->state != IDXD_WQ_DISABLED)
+ return -ENXIO;
+
+ rc = kstrtobool(buf, &bof);
+ if (rc < 0)
+ return rc;
+
+ if (bof)
+ set_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
+ else
+ clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
+
+ return count;
+}
+
+static struct device_attribute dev_attr_wq_block_on_fault =
+ __ATTR(block_on_fault, 0644, wq_block_on_fault_show,
+ wq_block_on_fault_store);
+
+static ssize_t wq_threshold_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ return sysfs_emit(buf, "%u\n", wq->threshold);
+}
+
+static ssize_t wq_threshold_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ struct idxd_device *idxd = wq->idxd;
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buf, 0, &val);
+ if (rc < 0)
+ return -EINVAL;
+
+ if (val > wq->size || val <= 0)
+ return -EINVAL;
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+ if (wq->state != IDXD_WQ_DISABLED)
+ return -ENXIO;
+
+ if (test_bit(WQ_FLAG_DEDICATED, &wq->flags))
+ return -EINVAL;
+
+ wq->threshold = val;
+
+ return count;
+}
+
+static struct device_attribute dev_attr_wq_threshold =
+ __ATTR(threshold, 0644, wq_threshold_show, wq_threshold_store);
+
+static ssize_t wq_type_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ switch (wq->type) {
+ case IDXD_WQT_KERNEL:
+ return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_KERNEL]);
+ case IDXD_WQT_USER:
+ return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_USER]);
+ case IDXD_WQT_NONE:
+ default:
+ return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_NONE]);
+ }
+
+ return -EINVAL;
+}
+
+static ssize_t wq_type_store(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ enum idxd_wq_type old_type;
+
+ if (wq->state != IDXD_WQ_DISABLED)
+ return -EPERM;
+
+ old_type = wq->type;
+ if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_NONE]))
+ wq->type = IDXD_WQT_NONE;
+ else if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_KERNEL]))
+ wq->type = IDXD_WQT_KERNEL;
+ else if (sysfs_streq(buf, idxd_wq_type_names[IDXD_WQT_USER]))
+ wq->type = IDXD_WQT_USER;
+ else
+ return -EINVAL;
+
+ /* If we are changing queue type, clear the name */
+ if (wq->type != old_type)
+ memset(wq->name, 0, WQ_NAME_SIZE + 1);
+
+ return count;
+}
+
+static struct device_attribute dev_attr_wq_type =
+ __ATTR(type, 0644, wq_type_show, wq_type_store);
+
+static ssize_t wq_name_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ return sysfs_emit(buf, "%s\n", wq->name);
+}
+
+static ssize_t wq_name_store(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ char *input, *pos;
+
+ if (wq->state != IDXD_WQ_DISABLED)
+ return -EPERM;
+
+ if (strlen(buf) > WQ_NAME_SIZE || strlen(buf) == 0)
+ return -EINVAL;
+
+ /*
+ * This is temporarily placed here until we have SVM support for
+ * dmaengine.
+ */
+ if (wq->type == IDXD_WQT_KERNEL && device_pasid_enabled(wq->idxd))
+ return -EOPNOTSUPP;
+
+ input = kstrndup(buf, count, GFP_KERNEL);
+ if (!input)
+ return -ENOMEM;
+
+ pos = strim(input);
+ memset(wq->name, 0, WQ_NAME_SIZE + 1);
+ sprintf(wq->name, "%s", pos);
+ kfree(input);
+ return count;
+}
+
+static struct device_attribute dev_attr_wq_name =
+ __ATTR(name, 0644, wq_name_show, wq_name_store);
+
+static ssize_t wq_cdev_minor_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ int minor = -1;
+
+ mutex_lock(&wq->wq_lock);
+ if (wq->idxd_cdev)
+ minor = wq->idxd_cdev->minor;
+ mutex_unlock(&wq->wq_lock);
+
+ if (minor == -1)
+ return -ENXIO;
+ return sysfs_emit(buf, "%d\n", minor);
+}
+
+static struct device_attribute dev_attr_wq_cdev_minor =
+ __ATTR(cdev_minor, 0444, wq_cdev_minor_show, NULL);
+
+static int __get_sysfs_u64(const char *buf, u64 *val)
+{
+ int rc;
+
+ rc = kstrtou64(buf, 0, val);
+ if (rc < 0)
+ return -EINVAL;
+
+ if (*val == 0)
+ return -EINVAL;
+
+ *val = roundup_pow_of_two(*val);
+ return 0;
+}
+
+static ssize_t wq_max_transfer_size_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ return sysfs_emit(buf, "%llu\n", wq->max_xfer_bytes);
+}
+
+static ssize_t wq_max_transfer_size_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ struct idxd_device *idxd = wq->idxd;
+ u64 xfer_size;
+ int rc;
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+ if (wq->state != IDXD_WQ_DISABLED)
+ return -EPERM;
+
+ rc = __get_sysfs_u64(buf, &xfer_size);
+ if (rc < 0)
+ return rc;
+
+ if (xfer_size > idxd->max_xfer_bytes)
+ return -EINVAL;
+
+ wq->max_xfer_bytes = xfer_size;
+
+ return count;
+}
+
+static struct device_attribute dev_attr_wq_max_transfer_size =
+ __ATTR(max_transfer_size, 0644,
+ wq_max_transfer_size_show, wq_max_transfer_size_store);
+
+static ssize_t wq_max_batch_size_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ return sysfs_emit(buf, "%u\n", wq->max_batch_size);
+}
+
+static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ struct idxd_device *idxd = wq->idxd;
+ u64 batch_size;
+ int rc;
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+ if (wq->state != IDXD_WQ_DISABLED)
+ return -EPERM;
+
+ rc = __get_sysfs_u64(buf, &batch_size);
+ if (rc < 0)
+ return rc;
+
+ if (batch_size > idxd->max_batch_size)
+ return -EINVAL;
+
+ idxd_wq_set_max_batch_size(idxd->data->type, wq, (u32)batch_size);
+
+ return count;
+}
+
+static struct device_attribute dev_attr_wq_max_batch_size =
+ __ATTR(max_batch_size, 0644, wq_max_batch_size_show, wq_max_batch_size_store);
+
+static ssize_t wq_ats_disable_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_ATS_DISABLE, &wq->flags));
+}
+
+static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ struct idxd_device *idxd = wq->idxd;
+ bool ats_dis;
+ int rc;
+
+ if (wq->state != IDXD_WQ_DISABLED)
+ return -EPERM;
+
+ if (!idxd->hw.wq_cap.wq_ats_support)
+ return -EOPNOTSUPP;
+
+ rc = kstrtobool(buf, &ats_dis);
+ if (rc < 0)
+ return rc;
+
+ if (ats_dis)
+ set_bit(WQ_FLAG_ATS_DISABLE, &wq->flags);
+ else
+ clear_bit(WQ_FLAG_ATS_DISABLE, &wq->flags);
+
+ return count;
+}
+
+static struct device_attribute dev_attr_wq_ats_disable =
+ __ATTR(ats_disable, 0644, wq_ats_disable_show, wq_ats_disable_store);
+
+static ssize_t wq_occupancy_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ struct idxd_device *idxd = wq->idxd;
+ u32 occup, offset;
+
+ if (!idxd->hw.wq_cap.occupancy)
+ return -EOPNOTSUPP;
+
+ offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_OCCUP_IDX);
+ occup = ioread32(idxd->reg_base + offset) & WQCFG_OCCUP_MASK;
+
+ return sysfs_emit(buf, "%u\n", occup);
+}
+
+static struct device_attribute dev_attr_wq_occupancy =
+ __ATTR(occupancy, 0444, wq_occupancy_show, NULL);
+
+static ssize_t wq_enqcmds_retries_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ if (wq_dedicated(wq))
+ return -EOPNOTSUPP;
+
+ return sysfs_emit(buf, "%u\n", wq->enqcmds_retries);
+}
+
+static ssize_t wq_enqcmds_retries_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ int rc;
+ unsigned int retries;
+
+ if (wq_dedicated(wq))
+ return -EOPNOTSUPP;
+
+ rc = kstrtouint(buf, 10, &retries);
+ if (rc < 0)
+ return rc;
+
+ if (retries > IDXD_ENQCMDS_MAX_RETRIES)
+ retries = IDXD_ENQCMDS_MAX_RETRIES;
+
+ wq->enqcmds_retries = retries;
+ return count;
+}
+
+static struct device_attribute dev_attr_wq_enqcmds_retries =
+ __ATTR(enqcmds_retries, 0644, wq_enqcmds_retries_show, wq_enqcmds_retries_store);
+
+static ssize_t wq_op_config_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ return sysfs_emit(buf, "%*pb\n", IDXD_MAX_OPCAP_BITS, wq->opcap_bmap);
+}
+
+static int idxd_verify_supported_opcap(struct idxd_device *idxd, unsigned long *opmask)
+{
+ int bit;
+
+ /*
+ * The OPCAP is defined as 256 bits that represents each operation the device
+ * supports per bit. Iterate through all the bits and check if the input mask
+ * is set for bits that are not set in the OPCAP for the device. If no OPCAP
+ * bit is set and input mask has the bit set, then return error.
+ */
+ for_each_set_bit(bit, opmask, IDXD_MAX_OPCAP_BITS) {
+ if (!test_bit(bit, idxd->opcap_bmap))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static ssize_t wq_op_config_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ struct idxd_device *idxd = wq->idxd;
+ unsigned long *opmask;
+ int rc;
+
+ if (wq->state != IDXD_WQ_DISABLED)
+ return -EPERM;
+
+ opmask = bitmap_zalloc(IDXD_MAX_OPCAP_BITS, GFP_KERNEL);
+ if (!opmask)
+ return -ENOMEM;
+
+ rc = bitmap_parse(buf, count, opmask, IDXD_MAX_OPCAP_BITS);
+ if (rc < 0)
+ goto err;
+
+ rc = idxd_verify_supported_opcap(idxd, opmask);
+ if (rc < 0)
+ goto err;
+
+ bitmap_copy(wq->opcap_bmap, opmask, IDXD_MAX_OPCAP_BITS);
+
+ bitmap_free(opmask);
+ return count;
+
+err:
+ bitmap_free(opmask);
+ return rc;
+}
+
+static struct device_attribute dev_attr_wq_op_config =
+ __ATTR(op_config, 0644, wq_op_config_show, wq_op_config_store);
+
+static struct attribute *idxd_wq_attributes[] = {
+ &dev_attr_wq_clients.attr,
+ &dev_attr_wq_state.attr,
+ &dev_attr_wq_group_id.attr,
+ &dev_attr_wq_mode.attr,
+ &dev_attr_wq_size.attr,
+ &dev_attr_wq_priority.attr,
+ &dev_attr_wq_block_on_fault.attr,
+ &dev_attr_wq_threshold.attr,
+ &dev_attr_wq_type.attr,
+ &dev_attr_wq_name.attr,
+ &dev_attr_wq_cdev_minor.attr,
+ &dev_attr_wq_max_transfer_size.attr,
+ &dev_attr_wq_max_batch_size.attr,
+ &dev_attr_wq_ats_disable.attr,
+ &dev_attr_wq_occupancy.attr,
+ &dev_attr_wq_enqcmds_retries.attr,
+ &dev_attr_wq_op_config.attr,
+ NULL,
+};
+
+static bool idxd_wq_attr_op_config_invisible(struct attribute *attr,
+ struct idxd_device *idxd)
+{
+ return attr == &dev_attr_wq_op_config.attr &&
+ !idxd->hw.wq_cap.op_config;
+}
+
+static bool idxd_wq_attr_max_batch_size_invisible(struct attribute *attr,
+ struct idxd_device *idxd)
+{
+ /* Intel IAA does not support batch processing, make it invisible */
+ return attr == &dev_attr_wq_max_batch_size.attr &&
+ idxd->data->type == IDXD_TYPE_IAX;
+}
+
+static umode_t idxd_wq_attr_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ struct idxd_device *idxd = wq->idxd;
+
+ if (idxd_wq_attr_op_config_invisible(attr, idxd))
+ return 0;
+
+ if (idxd_wq_attr_max_batch_size_invisible(attr, idxd))
+ return 0;
+
+ return attr->mode;
+}
+
+static const struct attribute_group idxd_wq_attribute_group = {
+ .attrs = idxd_wq_attributes,
+ .is_visible = idxd_wq_attr_visible,
+};
+
+static const struct attribute_group *idxd_wq_attribute_groups[] = {
+ &idxd_wq_attribute_group,
+ NULL,
+};
+
+static void idxd_conf_wq_release(struct device *dev)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ bitmap_free(wq->opcap_bmap);
+ kfree(wq->wqcfg);
+ kfree(wq);
+}
+
+struct device_type idxd_wq_device_type = {
+ .name = "wq",
+ .release = idxd_conf_wq_release,
+ .groups = idxd_wq_attribute_groups,
+};
+
+/* IDXD device attribs */
+static ssize_t version_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%#x\n", idxd->hw.version);
+}
+static DEVICE_ATTR_RO(version);
+
+static ssize_t max_work_queues_size_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%u\n", idxd->max_wq_size);
+}
+static DEVICE_ATTR_RO(max_work_queues_size);
+
+static ssize_t max_groups_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%u\n", idxd->max_groups);
+}
+static DEVICE_ATTR_RO(max_groups);
+
+static ssize_t max_work_queues_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%u\n", idxd->max_wqs);
+}
+static DEVICE_ATTR_RO(max_work_queues);
+
+static ssize_t max_engines_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%u\n", idxd->max_engines);
+}
+static DEVICE_ATTR_RO(max_engines);
+
+static ssize_t numa_node_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%d\n", dev_to_node(&idxd->pdev->dev));
+}
+static DEVICE_ATTR_RO(numa_node);
+
+static ssize_t max_batch_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%u\n", idxd->max_batch_size);
+}
+static DEVICE_ATTR_RO(max_batch_size);
+
+static ssize_t max_transfer_size_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%llu\n", idxd->max_xfer_bytes);
+}
+static DEVICE_ATTR_RO(max_transfer_size);
+
+static ssize_t op_cap_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%*pb\n", IDXD_MAX_OPCAP_BITS, idxd->opcap_bmap);
+}
+static DEVICE_ATTR_RO(op_cap);
+
+static ssize_t gen_cap_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%#llx\n", idxd->hw.gen_cap.bits);
+}
+static DEVICE_ATTR_RO(gen_cap);
+
+static ssize_t configurable_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%u\n", test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags));
+}
+static DEVICE_ATTR_RO(configurable);
+
+static ssize_t clients_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+ int count = 0, i;
+
+ spin_lock(&idxd->dev_lock);
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ count += wq->client_count;
+ }
+ spin_unlock(&idxd->dev_lock);
+
+ return sysfs_emit(buf, "%d\n", count);
+}
+static DEVICE_ATTR_RO(clients);
+
+static ssize_t pasid_enabled_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%u\n", device_user_pasid_enabled(idxd));
+}
+static DEVICE_ATTR_RO(pasid_enabled);
+
+static ssize_t state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ switch (idxd->state) {
+ case IDXD_DEV_DISABLED:
+ return sysfs_emit(buf, "disabled\n");
+ case IDXD_DEV_ENABLED:
+ return sysfs_emit(buf, "enabled\n");
+ case IDXD_DEV_HALTED:
+ return sysfs_emit(buf, "halted\n");
+ }
+
+ return sysfs_emit(buf, "unknown\n");
+}
+static DEVICE_ATTR_RO(state);
+
+static ssize_t errors_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+ int i, out = 0;
+
+ spin_lock(&idxd->dev_lock);
+ for (i = 0; i < 4; i++)
+ out += sysfs_emit_at(buf, out, "%#018llx ", idxd->sw_err.bits[i]);
+ spin_unlock(&idxd->dev_lock);
+ out--;
+ out += sysfs_emit_at(buf, out, "\n");
+ return out;
+}
+static DEVICE_ATTR_RO(errors);
+
+static ssize_t max_read_buffers_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%u\n", idxd->max_rdbufs);
+}
+
+static ssize_t max_tokens_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ dev_warn_once(dev, "attribute deprecated, see max_read_buffers.\n");
+ return max_read_buffers_show(dev, attr, buf);
+}
+
+static DEVICE_ATTR_RO(max_tokens); /* deprecated */
+static DEVICE_ATTR_RO(max_read_buffers);
+
+static ssize_t read_buffer_limit_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%u\n", idxd->rdbuf_limit);
+}
+
+static ssize_t token_limit_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ dev_warn_once(dev, "attribute deprecated, see read_buffer_limit.\n");
+ return read_buffer_limit_show(dev, attr, buf);
+}
+
+static ssize_t read_buffer_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+ unsigned long val;
+ int rc;
+
+ rc = kstrtoul(buf, 10, &val);
+ if (rc < 0)
+ return -EINVAL;
+
+ if (idxd->state == IDXD_DEV_ENABLED)
+ return -EPERM;
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+ if (!idxd->hw.group_cap.rdbuf_limit)
+ return -EPERM;
+
+ if (val > idxd->hw.group_cap.total_rdbufs)
+ return -EINVAL;
+
+ idxd->rdbuf_limit = val;
+ return count;
+}
+
+static ssize_t token_limit_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ dev_warn_once(dev, "attribute deprecated, see read_buffer_limit\n");
+ return read_buffer_limit_store(dev, attr, buf, count);
+}
+
+static DEVICE_ATTR_RW(token_limit); /* deprecated */
+static DEVICE_ATTR_RW(read_buffer_limit);
+
+static ssize_t cdev_major_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%u\n", idxd->major);
+}
+static DEVICE_ATTR_RO(cdev_major);
+
+static ssize_t cmd_status_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ return sysfs_emit(buf, "%#x\n", idxd->cmd_status);
+}
+
+static ssize_t cmd_status_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ idxd->cmd_status = 0;
+ return count;
+}
+static DEVICE_ATTR_RW(cmd_status);
+
+static bool idxd_device_attr_max_batch_size_invisible(struct attribute *attr,
+ struct idxd_device *idxd)
+{
+ /* Intel IAA does not support batch processing, make it invisible */
+ return attr == &dev_attr_max_batch_size.attr &&
+ idxd->data->type == IDXD_TYPE_IAX;
+}
+
+static bool idxd_device_attr_read_buffers_invisible(struct attribute *attr,
+ struct idxd_device *idxd)
+{
+ /*
+ * Intel IAA does not support Read Buffer allocation control,
+ * make these attributes invisible.
+ */
+ return (attr == &dev_attr_max_tokens.attr ||
+ attr == &dev_attr_max_read_buffers.attr ||
+ attr == &dev_attr_token_limit.attr ||
+ attr == &dev_attr_read_buffer_limit.attr) &&
+ idxd->data->type == IDXD_TYPE_IAX;
+}
+
+static umode_t idxd_device_attr_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ if (idxd_device_attr_max_batch_size_invisible(attr, idxd))
+ return 0;
+
+ if (idxd_device_attr_read_buffers_invisible(attr, idxd))
+ return 0;
+
+ return attr->mode;
+}
+
+static struct attribute *idxd_device_attributes[] = {
+ &dev_attr_version.attr,
+ &dev_attr_max_groups.attr,
+ &dev_attr_max_work_queues.attr,
+ &dev_attr_max_work_queues_size.attr,
+ &dev_attr_max_engines.attr,
+ &dev_attr_numa_node.attr,
+ &dev_attr_max_batch_size.attr,
+ &dev_attr_max_transfer_size.attr,
+ &dev_attr_op_cap.attr,
+ &dev_attr_gen_cap.attr,
+ &dev_attr_configurable.attr,
+ &dev_attr_clients.attr,
+ &dev_attr_pasid_enabled.attr,
+ &dev_attr_state.attr,
+ &dev_attr_errors.attr,
+ &dev_attr_max_tokens.attr,
+ &dev_attr_max_read_buffers.attr,
+ &dev_attr_token_limit.attr,
+ &dev_attr_read_buffer_limit.attr,
+ &dev_attr_cdev_major.attr,
+ &dev_attr_cmd_status.attr,
+ NULL,
+};
+
+static const struct attribute_group idxd_device_attribute_group = {
+ .attrs = idxd_device_attributes,
+ .is_visible = idxd_device_attr_visible,
+};
+
+static const struct attribute_group *idxd_attribute_groups[] = {
+ &idxd_device_attribute_group,
+ NULL,
+};
+
+static void idxd_conf_device_release(struct device *dev)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ kfree(idxd->groups);
+ bitmap_free(idxd->wq_enable_map);
+ kfree(idxd->wqs);
+ kfree(idxd->engines);
+ ida_free(&idxd_ida, idxd->id);
+ bitmap_free(idxd->opcap_bmap);
+ kfree(idxd);
+}
+
+struct device_type dsa_device_type = {
+ .name = "dsa",
+ .release = idxd_conf_device_release,
+ .groups = idxd_attribute_groups,
+};
+
+struct device_type iax_device_type = {
+ .name = "iax",
+ .release = idxd_conf_device_release,
+ .groups = idxd_attribute_groups,
+};
+
+static int idxd_register_engine_devices(struct idxd_device *idxd)
+{
+ struct idxd_engine *engine;
+ int i, j, rc;
+
+ for (i = 0; i < idxd->max_engines; i++) {
+ engine = idxd->engines[i];
+ rc = device_add(engine_confdev(engine));
+ if (rc < 0)
+ goto cleanup;
+ }
+
+ return 0;
+
+cleanup:
+ j = i - 1;
+ for (; i < idxd->max_engines; i++) {
+ engine = idxd->engines[i];
+ put_device(engine_confdev(engine));
+ }
+
+ while (j--) {
+ engine = idxd->engines[j];
+ device_unregister(engine_confdev(engine));
+ }
+ return rc;
+}
+
+static int idxd_register_group_devices(struct idxd_device *idxd)
+{
+ struct idxd_group *group;
+ int i, j, rc;
+
+ for (i = 0; i < idxd->max_groups; i++) {
+ group = idxd->groups[i];
+ rc = device_add(group_confdev(group));
+ if (rc < 0)
+ goto cleanup;
+ }
+
+ return 0;
+
+cleanup:
+ j = i - 1;
+ for (; i < idxd->max_groups; i++) {
+ group = idxd->groups[i];
+ put_device(group_confdev(group));
+ }
+
+ while (j--) {
+ group = idxd->groups[j];
+ device_unregister(group_confdev(group));
+ }
+ return rc;
+}
+
+static int idxd_register_wq_devices(struct idxd_device *idxd)
+{
+ struct idxd_wq *wq;
+ int i, rc, j;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ wq = idxd->wqs[i];
+ rc = device_add(wq_confdev(wq));
+ if (rc < 0)
+ goto cleanup;
+ }
+
+ return 0;
+
+cleanup:
+ j = i - 1;
+ for (; i < idxd->max_wqs; i++) {
+ wq = idxd->wqs[i];
+ put_device(wq_confdev(wq));
+ }
+
+ while (j--) {
+ wq = idxd->wqs[j];
+ device_unregister(wq_confdev(wq));
+ }
+ return rc;
+}
+
+int idxd_register_devices(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+ int rc, i;
+
+ rc = device_add(idxd_confdev(idxd));
+ if (rc < 0)
+ return rc;
+
+ rc = idxd_register_wq_devices(idxd);
+ if (rc < 0) {
+ dev_dbg(dev, "WQ devices registering failed: %d\n", rc);
+ goto err_wq;
+ }
+
+ rc = idxd_register_engine_devices(idxd);
+ if (rc < 0) {
+ dev_dbg(dev, "Engine devices registering failed: %d\n", rc);
+ goto err_engine;
+ }
+
+ rc = idxd_register_group_devices(idxd);
+ if (rc < 0) {
+ dev_dbg(dev, "Group device registering failed: %d\n", rc);
+ goto err_group;
+ }
+
+ return 0;
+
+ err_group:
+ for (i = 0; i < idxd->max_engines; i++)
+ device_unregister(engine_confdev(idxd->engines[i]));
+ err_engine:
+ for (i = 0; i < idxd->max_wqs; i++)
+ device_unregister(wq_confdev(idxd->wqs[i]));
+ err_wq:
+ device_del(idxd_confdev(idxd));
+ return rc;
+}
+
+void idxd_unregister_devices(struct idxd_device *idxd)
+{
+ int i;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ device_unregister(wq_confdev(wq));
+ }
+
+ for (i = 0; i < idxd->max_engines; i++) {
+ struct idxd_engine *engine = idxd->engines[i];
+
+ device_unregister(engine_confdev(engine));
+ }
+
+ for (i = 0; i < idxd->max_groups; i++) {
+ struct idxd_group *group = idxd->groups[i];
+
+ device_unregister(group_confdev(group));
+ }
+}
+
+int idxd_register_bus_type(void)
+{
+ return bus_register(&dsa_bus_type);
+}
+
+void idxd_unregister_bus_type(void)
+{
+ bus_unregister(&dsa_bus_type);
+}
diff --git a/drivers/dma/img-mdc-dma.c b/drivers/dma/img-mdc-dma.c
new file mode 100644
index 000000000..e4ea107ce
--- /dev/null
+++ b/drivers/dma/img-mdc-dma.c
@@ -0,0 +1,1090 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IMG Multi-threaded DMA Controller (MDC)
+ *
+ * Copyright (C) 2009,2012,2013 Imagination Technologies Ltd.
+ * Copyright (C) 2014 Google, Inc.
+ */
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define MDC_MAX_DMA_CHANNELS 32
+
+#define MDC_GENERAL_CONFIG 0x000
+#define MDC_GENERAL_CONFIG_LIST_IEN BIT(31)
+#define MDC_GENERAL_CONFIG_IEN BIT(29)
+#define MDC_GENERAL_CONFIG_LEVEL_INT BIT(28)
+#define MDC_GENERAL_CONFIG_INC_W BIT(12)
+#define MDC_GENERAL_CONFIG_INC_R BIT(8)
+#define MDC_GENERAL_CONFIG_PHYSICAL_W BIT(7)
+#define MDC_GENERAL_CONFIG_WIDTH_W_SHIFT 4
+#define MDC_GENERAL_CONFIG_WIDTH_W_MASK 0x7
+#define MDC_GENERAL_CONFIG_PHYSICAL_R BIT(3)
+#define MDC_GENERAL_CONFIG_WIDTH_R_SHIFT 0
+#define MDC_GENERAL_CONFIG_WIDTH_R_MASK 0x7
+
+#define MDC_READ_PORT_CONFIG 0x004
+#define MDC_READ_PORT_CONFIG_STHREAD_SHIFT 28
+#define MDC_READ_PORT_CONFIG_STHREAD_MASK 0xf
+#define MDC_READ_PORT_CONFIG_RTHREAD_SHIFT 24
+#define MDC_READ_PORT_CONFIG_RTHREAD_MASK 0xf
+#define MDC_READ_PORT_CONFIG_WTHREAD_SHIFT 16
+#define MDC_READ_PORT_CONFIG_WTHREAD_MASK 0xf
+#define MDC_READ_PORT_CONFIG_BURST_SIZE_SHIFT 4
+#define MDC_READ_PORT_CONFIG_BURST_SIZE_MASK 0xff
+#define MDC_READ_PORT_CONFIG_DREQ_ENABLE BIT(1)
+
+#define MDC_READ_ADDRESS 0x008
+
+#define MDC_WRITE_ADDRESS 0x00c
+
+#define MDC_TRANSFER_SIZE 0x010
+#define MDC_TRANSFER_SIZE_MASK 0xffffff
+
+#define MDC_LIST_NODE_ADDRESS 0x014
+
+#define MDC_CMDS_PROCESSED 0x018
+#define MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT 16
+#define MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK 0x3f
+#define MDC_CMDS_PROCESSED_INT_ACTIVE BIT(8)
+#define MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT 0
+#define MDC_CMDS_PROCESSED_CMDS_DONE_MASK 0x3f
+
+#define MDC_CONTROL_AND_STATUS 0x01c
+#define MDC_CONTROL_AND_STATUS_CANCEL BIT(20)
+#define MDC_CONTROL_AND_STATUS_LIST_EN BIT(4)
+#define MDC_CONTROL_AND_STATUS_EN BIT(0)
+
+#define MDC_ACTIVE_TRANSFER_SIZE 0x030
+
+#define MDC_GLOBAL_CONFIG_A 0x900
+#define MDC_GLOBAL_CONFIG_A_THREAD_ID_WIDTH_SHIFT 16
+#define MDC_GLOBAL_CONFIG_A_THREAD_ID_WIDTH_MASK 0xff
+#define MDC_GLOBAL_CONFIG_A_DMA_CONTEXTS_SHIFT 8
+#define MDC_GLOBAL_CONFIG_A_DMA_CONTEXTS_MASK 0xff
+#define MDC_GLOBAL_CONFIG_A_SYS_DAT_WIDTH_SHIFT 0
+#define MDC_GLOBAL_CONFIG_A_SYS_DAT_WIDTH_MASK 0xff
+
+struct mdc_hw_list_desc {
+ u32 gen_conf;
+ u32 readport_conf;
+ u32 read_addr;
+ u32 write_addr;
+ u32 xfer_size;
+ u32 node_addr;
+ u32 cmds_done;
+ u32 ctrl_status;
+ /*
+ * Not part of the list descriptor, but instead used by the CPU to
+ * traverse the list.
+ */
+ struct mdc_hw_list_desc *next_desc;
+};
+
+struct mdc_tx_desc {
+ struct mdc_chan *chan;
+ struct virt_dma_desc vd;
+ dma_addr_t list_phys;
+ struct mdc_hw_list_desc *list;
+ bool cyclic;
+ bool cmd_loaded;
+ unsigned int list_len;
+ unsigned int list_period_len;
+ size_t list_xfer_size;
+ unsigned int list_cmds_done;
+};
+
+struct mdc_chan {
+ struct mdc_dma *mdma;
+ struct virt_dma_chan vc;
+ struct dma_slave_config config;
+ struct mdc_tx_desc *desc;
+ int irq;
+ unsigned int periph;
+ unsigned int thread;
+ unsigned int chan_nr;
+};
+
+struct mdc_dma_soc_data {
+ void (*enable_chan)(struct mdc_chan *mchan);
+ void (*disable_chan)(struct mdc_chan *mchan);
+};
+
+struct mdc_dma {
+ struct dma_device dma_dev;
+ void __iomem *regs;
+ struct clk *clk;
+ struct dma_pool *desc_pool;
+ struct regmap *periph_regs;
+ spinlock_t lock;
+ unsigned int nr_threads;
+ unsigned int nr_channels;
+ unsigned int bus_width;
+ unsigned int max_burst_mult;
+ unsigned int max_xfer_size;
+ const struct mdc_dma_soc_data *soc;
+ struct mdc_chan channels[MDC_MAX_DMA_CHANNELS];
+};
+
+static inline u32 mdc_readl(struct mdc_dma *mdma, u32 reg)
+{
+ return readl(mdma->regs + reg);
+}
+
+static inline void mdc_writel(struct mdc_dma *mdma, u32 val, u32 reg)
+{
+ writel(val, mdma->regs + reg);
+}
+
+static inline u32 mdc_chan_readl(struct mdc_chan *mchan, u32 reg)
+{
+ return mdc_readl(mchan->mdma, mchan->chan_nr * 0x040 + reg);
+}
+
+static inline void mdc_chan_writel(struct mdc_chan *mchan, u32 val, u32 reg)
+{
+ mdc_writel(mchan->mdma, val, mchan->chan_nr * 0x040 + reg);
+}
+
+static inline struct mdc_chan *to_mdc_chan(struct dma_chan *c)
+{
+ return container_of(to_virt_chan(c), struct mdc_chan, vc);
+}
+
+static inline struct mdc_tx_desc *to_mdc_desc(struct dma_async_tx_descriptor *t)
+{
+ struct virt_dma_desc *vdesc = container_of(t, struct virt_dma_desc, tx);
+
+ return container_of(vdesc, struct mdc_tx_desc, vd);
+}
+
+static inline struct device *mdma2dev(struct mdc_dma *mdma)
+{
+ return mdma->dma_dev.dev;
+}
+
+static inline unsigned int to_mdc_width(unsigned int bytes)
+{
+ return ffs(bytes) - 1;
+}
+
+static inline void mdc_set_read_width(struct mdc_hw_list_desc *ldesc,
+ unsigned int bytes)
+{
+ ldesc->gen_conf |= to_mdc_width(bytes) <<
+ MDC_GENERAL_CONFIG_WIDTH_R_SHIFT;
+}
+
+static inline void mdc_set_write_width(struct mdc_hw_list_desc *ldesc,
+ unsigned int bytes)
+{
+ ldesc->gen_conf |= to_mdc_width(bytes) <<
+ MDC_GENERAL_CONFIG_WIDTH_W_SHIFT;
+}
+
+static void mdc_list_desc_config(struct mdc_chan *mchan,
+ struct mdc_hw_list_desc *ldesc,
+ enum dma_transfer_direction dir,
+ dma_addr_t src, dma_addr_t dst, size_t len)
+{
+ struct mdc_dma *mdma = mchan->mdma;
+ unsigned int max_burst, burst_size;
+
+ ldesc->gen_conf = MDC_GENERAL_CONFIG_IEN | MDC_GENERAL_CONFIG_LIST_IEN |
+ MDC_GENERAL_CONFIG_LEVEL_INT | MDC_GENERAL_CONFIG_PHYSICAL_W |
+ MDC_GENERAL_CONFIG_PHYSICAL_R;
+ ldesc->readport_conf =
+ (mchan->thread << MDC_READ_PORT_CONFIG_STHREAD_SHIFT) |
+ (mchan->thread << MDC_READ_PORT_CONFIG_RTHREAD_SHIFT) |
+ (mchan->thread << MDC_READ_PORT_CONFIG_WTHREAD_SHIFT);
+ ldesc->read_addr = src;
+ ldesc->write_addr = dst;
+ ldesc->xfer_size = len - 1;
+ ldesc->node_addr = 0;
+ ldesc->cmds_done = 0;
+ ldesc->ctrl_status = MDC_CONTROL_AND_STATUS_LIST_EN |
+ MDC_CONTROL_AND_STATUS_EN;
+ ldesc->next_desc = NULL;
+
+ if (IS_ALIGNED(dst, mdma->bus_width) &&
+ IS_ALIGNED(src, mdma->bus_width))
+ max_burst = mdma->bus_width * mdma->max_burst_mult;
+ else
+ max_burst = mdma->bus_width * (mdma->max_burst_mult - 1);
+
+ if (dir == DMA_MEM_TO_DEV) {
+ ldesc->gen_conf |= MDC_GENERAL_CONFIG_INC_R;
+ ldesc->readport_conf |= MDC_READ_PORT_CONFIG_DREQ_ENABLE;
+ mdc_set_read_width(ldesc, mdma->bus_width);
+ mdc_set_write_width(ldesc, mchan->config.dst_addr_width);
+ burst_size = min(max_burst, mchan->config.dst_maxburst *
+ mchan->config.dst_addr_width);
+ } else if (dir == DMA_DEV_TO_MEM) {
+ ldesc->gen_conf |= MDC_GENERAL_CONFIG_INC_W;
+ ldesc->readport_conf |= MDC_READ_PORT_CONFIG_DREQ_ENABLE;
+ mdc_set_read_width(ldesc, mchan->config.src_addr_width);
+ mdc_set_write_width(ldesc, mdma->bus_width);
+ burst_size = min(max_burst, mchan->config.src_maxburst *
+ mchan->config.src_addr_width);
+ } else {
+ ldesc->gen_conf |= MDC_GENERAL_CONFIG_INC_R |
+ MDC_GENERAL_CONFIG_INC_W;
+ mdc_set_read_width(ldesc, mdma->bus_width);
+ mdc_set_write_width(ldesc, mdma->bus_width);
+ burst_size = max_burst;
+ }
+ ldesc->readport_conf |= (burst_size - 1) <<
+ MDC_READ_PORT_CONFIG_BURST_SIZE_SHIFT;
+}
+
+static void mdc_list_desc_free(struct mdc_tx_desc *mdesc)
+{
+ struct mdc_dma *mdma = mdesc->chan->mdma;
+ struct mdc_hw_list_desc *curr, *next;
+ dma_addr_t curr_phys, next_phys;
+
+ curr = mdesc->list;
+ curr_phys = mdesc->list_phys;
+ while (curr) {
+ next = curr->next_desc;
+ next_phys = curr->node_addr;
+ dma_pool_free(mdma->desc_pool, curr, curr_phys);
+ curr = next;
+ curr_phys = next_phys;
+ }
+}
+
+static void mdc_desc_free(struct virt_dma_desc *vd)
+{
+ struct mdc_tx_desc *mdesc = to_mdc_desc(&vd->tx);
+
+ mdc_list_desc_free(mdesc);
+ kfree(mdesc);
+}
+
+static struct dma_async_tx_descriptor *mdc_prep_dma_memcpy(
+ struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len,
+ unsigned long flags)
+{
+ struct mdc_chan *mchan = to_mdc_chan(chan);
+ struct mdc_dma *mdma = mchan->mdma;
+ struct mdc_tx_desc *mdesc;
+ struct mdc_hw_list_desc *curr, *prev = NULL;
+ dma_addr_t curr_phys;
+
+ if (!len)
+ return NULL;
+
+ mdesc = kzalloc(sizeof(*mdesc), GFP_NOWAIT);
+ if (!mdesc)
+ return NULL;
+ mdesc->chan = mchan;
+ mdesc->list_xfer_size = len;
+
+ while (len > 0) {
+ size_t xfer_size;
+
+ curr = dma_pool_alloc(mdma->desc_pool, GFP_NOWAIT, &curr_phys);
+ if (!curr)
+ goto free_desc;
+
+ if (prev) {
+ prev->node_addr = curr_phys;
+ prev->next_desc = curr;
+ } else {
+ mdesc->list_phys = curr_phys;
+ mdesc->list = curr;
+ }
+
+ xfer_size = min_t(size_t, mdma->max_xfer_size, len);
+
+ mdc_list_desc_config(mchan, curr, DMA_MEM_TO_MEM, src, dest,
+ xfer_size);
+
+ prev = curr;
+
+ mdesc->list_len++;
+ src += xfer_size;
+ dest += xfer_size;
+ len -= xfer_size;
+ }
+
+ return vchan_tx_prep(&mchan->vc, &mdesc->vd, flags);
+
+free_desc:
+ mdc_desc_free(&mdesc->vd);
+
+ return NULL;
+}
+
+static int mdc_check_slave_width(struct mdc_chan *mchan,
+ enum dma_transfer_direction dir)
+{
+ enum dma_slave_buswidth width;
+
+ if (dir == DMA_MEM_TO_DEV)
+ width = mchan->config.dst_addr_width;
+ else
+ width = mchan->config.src_addr_width;
+
+ switch (width) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ case DMA_SLAVE_BUSWIDTH_8_BYTES:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (width > mchan->mdma->bus_width)
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct dma_async_tx_descriptor *mdc_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction dir,
+ unsigned long flags)
+{
+ struct mdc_chan *mchan = to_mdc_chan(chan);
+ struct mdc_dma *mdma = mchan->mdma;
+ struct mdc_tx_desc *mdesc;
+ struct mdc_hw_list_desc *curr, *prev = NULL;
+ dma_addr_t curr_phys;
+
+ if (!buf_len && !period_len)
+ return NULL;
+
+ if (!is_slave_direction(dir))
+ return NULL;
+
+ if (mdc_check_slave_width(mchan, dir) < 0)
+ return NULL;
+
+ mdesc = kzalloc(sizeof(*mdesc), GFP_NOWAIT);
+ if (!mdesc)
+ return NULL;
+ mdesc->chan = mchan;
+ mdesc->cyclic = true;
+ mdesc->list_xfer_size = buf_len;
+ mdesc->list_period_len = DIV_ROUND_UP(period_len,
+ mdma->max_xfer_size);
+
+ while (buf_len > 0) {
+ size_t remainder = min(period_len, buf_len);
+
+ while (remainder > 0) {
+ size_t xfer_size;
+
+ curr = dma_pool_alloc(mdma->desc_pool, GFP_NOWAIT,
+ &curr_phys);
+ if (!curr)
+ goto free_desc;
+
+ if (!prev) {
+ mdesc->list_phys = curr_phys;
+ mdesc->list = curr;
+ } else {
+ prev->node_addr = curr_phys;
+ prev->next_desc = curr;
+ }
+
+ xfer_size = min_t(size_t, mdma->max_xfer_size,
+ remainder);
+
+ if (dir == DMA_MEM_TO_DEV) {
+ mdc_list_desc_config(mchan, curr, dir,
+ buf_addr,
+ mchan->config.dst_addr,
+ xfer_size);
+ } else {
+ mdc_list_desc_config(mchan, curr, dir,
+ mchan->config.src_addr,
+ buf_addr,
+ xfer_size);
+ }
+
+ prev = curr;
+
+ mdesc->list_len++;
+ buf_addr += xfer_size;
+ buf_len -= xfer_size;
+ remainder -= xfer_size;
+ }
+ }
+ prev->node_addr = mdesc->list_phys;
+
+ return vchan_tx_prep(&mchan->vc, &mdesc->vd, flags);
+
+free_desc:
+ mdc_desc_free(&mdesc->vd);
+
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *mdc_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction dir,
+ unsigned long flags, void *context)
+{
+ struct mdc_chan *mchan = to_mdc_chan(chan);
+ struct mdc_dma *mdma = mchan->mdma;
+ struct mdc_tx_desc *mdesc;
+ struct scatterlist *sg;
+ struct mdc_hw_list_desc *curr, *prev = NULL;
+ dma_addr_t curr_phys;
+ unsigned int i;
+
+ if (!sgl)
+ return NULL;
+
+ if (!is_slave_direction(dir))
+ return NULL;
+
+ if (mdc_check_slave_width(mchan, dir) < 0)
+ return NULL;
+
+ mdesc = kzalloc(sizeof(*mdesc), GFP_NOWAIT);
+ if (!mdesc)
+ return NULL;
+ mdesc->chan = mchan;
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ dma_addr_t buf = sg_dma_address(sg);
+ size_t buf_len = sg_dma_len(sg);
+
+ while (buf_len > 0) {
+ size_t xfer_size;
+
+ curr = dma_pool_alloc(mdma->desc_pool, GFP_NOWAIT,
+ &curr_phys);
+ if (!curr)
+ goto free_desc;
+
+ if (!prev) {
+ mdesc->list_phys = curr_phys;
+ mdesc->list = curr;
+ } else {
+ prev->node_addr = curr_phys;
+ prev->next_desc = curr;
+ }
+
+ xfer_size = min_t(size_t, mdma->max_xfer_size,
+ buf_len);
+
+ if (dir == DMA_MEM_TO_DEV) {
+ mdc_list_desc_config(mchan, curr, dir, buf,
+ mchan->config.dst_addr,
+ xfer_size);
+ } else {
+ mdc_list_desc_config(mchan, curr, dir,
+ mchan->config.src_addr,
+ buf, xfer_size);
+ }
+
+ prev = curr;
+
+ mdesc->list_len++;
+ mdesc->list_xfer_size += xfer_size;
+ buf += xfer_size;
+ buf_len -= xfer_size;
+ }
+ }
+
+ return vchan_tx_prep(&mchan->vc, &mdesc->vd, flags);
+
+free_desc:
+ mdc_desc_free(&mdesc->vd);
+
+ return NULL;
+}
+
+static void mdc_issue_desc(struct mdc_chan *mchan)
+{
+ struct mdc_dma *mdma = mchan->mdma;
+ struct virt_dma_desc *vd;
+ struct mdc_tx_desc *mdesc;
+ u32 val;
+
+ vd = vchan_next_desc(&mchan->vc);
+ if (!vd)
+ return;
+
+ list_del(&vd->node);
+
+ mdesc = to_mdc_desc(&vd->tx);
+ mchan->desc = mdesc;
+
+ dev_dbg(mdma2dev(mdma), "Issuing descriptor on channel %d\n",
+ mchan->chan_nr);
+
+ mdma->soc->enable_chan(mchan);
+
+ val = mdc_chan_readl(mchan, MDC_GENERAL_CONFIG);
+ val |= MDC_GENERAL_CONFIG_LIST_IEN | MDC_GENERAL_CONFIG_IEN |
+ MDC_GENERAL_CONFIG_LEVEL_INT | MDC_GENERAL_CONFIG_PHYSICAL_W |
+ MDC_GENERAL_CONFIG_PHYSICAL_R;
+ mdc_chan_writel(mchan, val, MDC_GENERAL_CONFIG);
+ val = (mchan->thread << MDC_READ_PORT_CONFIG_STHREAD_SHIFT) |
+ (mchan->thread << MDC_READ_PORT_CONFIG_RTHREAD_SHIFT) |
+ (mchan->thread << MDC_READ_PORT_CONFIG_WTHREAD_SHIFT);
+ mdc_chan_writel(mchan, val, MDC_READ_PORT_CONFIG);
+ mdc_chan_writel(mchan, mdesc->list_phys, MDC_LIST_NODE_ADDRESS);
+ val = mdc_chan_readl(mchan, MDC_CONTROL_AND_STATUS);
+ val |= MDC_CONTROL_AND_STATUS_LIST_EN;
+ mdc_chan_writel(mchan, val, MDC_CONTROL_AND_STATUS);
+}
+
+static void mdc_issue_pending(struct dma_chan *chan)
+{
+ struct mdc_chan *mchan = to_mdc_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&mchan->vc.lock, flags);
+ if (vchan_issue_pending(&mchan->vc) && !mchan->desc)
+ mdc_issue_desc(mchan);
+ spin_unlock_irqrestore(&mchan->vc.lock, flags);
+}
+
+static enum dma_status mdc_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+ struct mdc_chan *mchan = to_mdc_chan(chan);
+ struct mdc_tx_desc *mdesc;
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+ size_t bytes = 0;
+ int ret;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ if (!txstate)
+ return ret;
+
+ spin_lock_irqsave(&mchan->vc.lock, flags);
+ vd = vchan_find_desc(&mchan->vc, cookie);
+ if (vd) {
+ mdesc = to_mdc_desc(&vd->tx);
+ bytes = mdesc->list_xfer_size;
+ } else if (mchan->desc && mchan->desc->vd.tx.cookie == cookie) {
+ struct mdc_hw_list_desc *ldesc;
+ u32 val1, val2, done, processed, residue;
+ int i, cmds;
+
+ mdesc = mchan->desc;
+
+ /*
+ * Determine the number of commands that haven't been
+ * processed (handled by the IRQ handler) yet.
+ */
+ do {
+ val1 = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED) &
+ ~MDC_CMDS_PROCESSED_INT_ACTIVE;
+ residue = mdc_chan_readl(mchan,
+ MDC_ACTIVE_TRANSFER_SIZE);
+ val2 = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED) &
+ ~MDC_CMDS_PROCESSED_INT_ACTIVE;
+ } while (val1 != val2);
+
+ done = (val1 >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) &
+ MDC_CMDS_PROCESSED_CMDS_DONE_MASK;
+ processed = (val1 >> MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) &
+ MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK;
+ cmds = (done - processed) %
+ (MDC_CMDS_PROCESSED_CMDS_DONE_MASK + 1);
+
+ /*
+ * If the command loaded event hasn't been processed yet, then
+ * the difference above includes an extra command.
+ */
+ if (!mdesc->cmd_loaded)
+ cmds--;
+ else
+ cmds += mdesc->list_cmds_done;
+
+ bytes = mdesc->list_xfer_size;
+ ldesc = mdesc->list;
+ for (i = 0; i < cmds; i++) {
+ bytes -= ldesc->xfer_size + 1;
+ ldesc = ldesc->next_desc;
+ }
+ if (ldesc) {
+ if (residue != MDC_TRANSFER_SIZE_MASK)
+ bytes -= ldesc->xfer_size - residue;
+ else
+ bytes -= ldesc->xfer_size + 1;
+ }
+ }
+ spin_unlock_irqrestore(&mchan->vc.lock, flags);
+
+ dma_set_residue(txstate, bytes);
+
+ return ret;
+}
+
+static unsigned int mdc_get_new_events(struct mdc_chan *mchan)
+{
+ u32 val, processed, done1, done2;
+ unsigned int ret;
+
+ val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
+ processed = (val >> MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) &
+ MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK;
+ /*
+ * CMDS_DONE may have incremented between reading CMDS_PROCESSED
+ * and clearing INT_ACTIVE. Re-read CMDS_PROCESSED to ensure we
+ * didn't miss a command completion.
+ */
+ do {
+ val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
+
+ done1 = (val >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) &
+ MDC_CMDS_PROCESSED_CMDS_DONE_MASK;
+
+ val &= ~((MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK <<
+ MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) |
+ MDC_CMDS_PROCESSED_INT_ACTIVE);
+
+ val |= done1 << MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT;
+
+ mdc_chan_writel(mchan, val, MDC_CMDS_PROCESSED);
+
+ val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
+
+ done2 = (val >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) &
+ MDC_CMDS_PROCESSED_CMDS_DONE_MASK;
+ } while (done1 != done2);
+
+ if (done1 >= processed)
+ ret = done1 - processed;
+ else
+ ret = ((MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK + 1) -
+ processed) + done1;
+
+ return ret;
+}
+
+static int mdc_terminate_all(struct dma_chan *chan)
+{
+ struct mdc_chan *mchan = to_mdc_chan(chan);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&mchan->vc.lock, flags);
+
+ mdc_chan_writel(mchan, MDC_CONTROL_AND_STATUS_CANCEL,
+ MDC_CONTROL_AND_STATUS);
+
+ if (mchan->desc) {
+ vchan_terminate_vdesc(&mchan->desc->vd);
+ mchan->desc = NULL;
+ }
+ vchan_get_all_descriptors(&mchan->vc, &head);
+
+ mdc_get_new_events(mchan);
+
+ spin_unlock_irqrestore(&mchan->vc.lock, flags);
+
+ vchan_dma_desc_free_list(&mchan->vc, &head);
+
+ return 0;
+}
+
+static void mdc_synchronize(struct dma_chan *chan)
+{
+ struct mdc_chan *mchan = to_mdc_chan(chan);
+
+ vchan_synchronize(&mchan->vc);
+}
+
+static int mdc_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct mdc_chan *mchan = to_mdc_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&mchan->vc.lock, flags);
+ mchan->config = *config;
+ spin_unlock_irqrestore(&mchan->vc.lock, flags);
+
+ return 0;
+}
+
+static int mdc_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct mdc_chan *mchan = to_mdc_chan(chan);
+ struct device *dev = mdma2dev(mchan->mdma);
+
+ return pm_runtime_get_sync(dev);
+}
+
+static void mdc_free_chan_resources(struct dma_chan *chan)
+{
+ struct mdc_chan *mchan = to_mdc_chan(chan);
+ struct mdc_dma *mdma = mchan->mdma;
+ struct device *dev = mdma2dev(mdma);
+
+ mdc_terminate_all(chan);
+ mdma->soc->disable_chan(mchan);
+ pm_runtime_put(dev);
+}
+
+static irqreturn_t mdc_chan_irq(int irq, void *dev_id)
+{
+ struct mdc_chan *mchan = (struct mdc_chan *)dev_id;
+ struct mdc_tx_desc *mdesc;
+ unsigned int i, new_events;
+
+ spin_lock(&mchan->vc.lock);
+
+ dev_dbg(mdma2dev(mchan->mdma), "IRQ on channel %d\n", mchan->chan_nr);
+
+ new_events = mdc_get_new_events(mchan);
+
+ if (!new_events)
+ goto out;
+
+ mdesc = mchan->desc;
+ if (!mdesc) {
+ dev_warn(mdma2dev(mchan->mdma),
+ "IRQ with no active descriptor on channel %d\n",
+ mchan->chan_nr);
+ goto out;
+ }
+
+ for (i = 0; i < new_events; i++) {
+ /*
+ * The first interrupt in a transfer indicates that the
+ * command list has been loaded, not that a command has
+ * been completed.
+ */
+ if (!mdesc->cmd_loaded) {
+ mdesc->cmd_loaded = true;
+ continue;
+ }
+
+ mdesc->list_cmds_done++;
+ if (mdesc->cyclic) {
+ mdesc->list_cmds_done %= mdesc->list_len;
+ if (mdesc->list_cmds_done % mdesc->list_period_len == 0)
+ vchan_cyclic_callback(&mdesc->vd);
+ } else if (mdesc->list_cmds_done == mdesc->list_len) {
+ mchan->desc = NULL;
+ vchan_cookie_complete(&mdesc->vd);
+ mdc_issue_desc(mchan);
+ break;
+ }
+ }
+out:
+ spin_unlock(&mchan->vc.lock);
+
+ return IRQ_HANDLED;
+}
+
+static struct dma_chan *mdc_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct mdc_dma *mdma = ofdma->of_dma_data;
+ struct dma_chan *chan;
+
+ if (dma_spec->args_count != 3)
+ return NULL;
+
+ list_for_each_entry(chan, &mdma->dma_dev.channels, device_node) {
+ struct mdc_chan *mchan = to_mdc_chan(chan);
+
+ if (!(dma_spec->args[1] & BIT(mchan->chan_nr)))
+ continue;
+ if (dma_get_slave_channel(chan)) {
+ mchan->periph = dma_spec->args[0];
+ mchan->thread = dma_spec->args[2];
+ return chan;
+ }
+ }
+
+ return NULL;
+}
+
+#define PISTACHIO_CR_PERIPH_DMA_ROUTE(ch) (0x120 + 0x4 * ((ch) / 4))
+#define PISTACHIO_CR_PERIPH_DMA_ROUTE_SHIFT(ch) (8 * ((ch) % 4))
+#define PISTACHIO_CR_PERIPH_DMA_ROUTE_MASK 0x3f
+
+static void pistachio_mdc_enable_chan(struct mdc_chan *mchan)
+{
+ struct mdc_dma *mdma = mchan->mdma;
+
+ regmap_update_bits(mdma->periph_regs,
+ PISTACHIO_CR_PERIPH_DMA_ROUTE(mchan->chan_nr),
+ PISTACHIO_CR_PERIPH_DMA_ROUTE_MASK <<
+ PISTACHIO_CR_PERIPH_DMA_ROUTE_SHIFT(mchan->chan_nr),
+ mchan->periph <<
+ PISTACHIO_CR_PERIPH_DMA_ROUTE_SHIFT(mchan->chan_nr));
+}
+
+static void pistachio_mdc_disable_chan(struct mdc_chan *mchan)
+{
+ struct mdc_dma *mdma = mchan->mdma;
+
+ regmap_update_bits(mdma->periph_regs,
+ PISTACHIO_CR_PERIPH_DMA_ROUTE(mchan->chan_nr),
+ PISTACHIO_CR_PERIPH_DMA_ROUTE_MASK <<
+ PISTACHIO_CR_PERIPH_DMA_ROUTE_SHIFT(mchan->chan_nr),
+ 0);
+}
+
+static const struct mdc_dma_soc_data pistachio_mdc_data = {
+ .enable_chan = pistachio_mdc_enable_chan,
+ .disable_chan = pistachio_mdc_disable_chan,
+};
+
+static const struct of_device_id mdc_dma_of_match[] = {
+ { .compatible = "img,pistachio-mdc-dma", .data = &pistachio_mdc_data, },
+ { },
+};
+MODULE_DEVICE_TABLE(of, mdc_dma_of_match);
+
+static int img_mdc_runtime_suspend(struct device *dev)
+{
+ struct mdc_dma *mdma = dev_get_drvdata(dev);
+
+ clk_disable_unprepare(mdma->clk);
+
+ return 0;
+}
+
+static int img_mdc_runtime_resume(struct device *dev)
+{
+ struct mdc_dma *mdma = dev_get_drvdata(dev);
+
+ return clk_prepare_enable(mdma->clk);
+}
+
+static int mdc_dma_probe(struct platform_device *pdev)
+{
+ struct mdc_dma *mdma;
+ struct resource *res;
+ unsigned int i;
+ u32 val;
+ int ret;
+
+ mdma = devm_kzalloc(&pdev->dev, sizeof(*mdma), GFP_KERNEL);
+ if (!mdma)
+ return -ENOMEM;
+ platform_set_drvdata(pdev, mdma);
+
+ mdma->soc = of_device_get_match_data(&pdev->dev);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ mdma->regs = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(mdma->regs))
+ return PTR_ERR(mdma->regs);
+
+ mdma->periph_regs = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+ "img,cr-periph");
+ if (IS_ERR(mdma->periph_regs))
+ return PTR_ERR(mdma->periph_regs);
+
+ mdma->clk = devm_clk_get(&pdev->dev, "sys");
+ if (IS_ERR(mdma->clk))
+ return PTR_ERR(mdma->clk);
+
+ dma_cap_zero(mdma->dma_dev.cap_mask);
+ dma_cap_set(DMA_SLAVE, mdma->dma_dev.cap_mask);
+ dma_cap_set(DMA_PRIVATE, mdma->dma_dev.cap_mask);
+ dma_cap_set(DMA_CYCLIC, mdma->dma_dev.cap_mask);
+ dma_cap_set(DMA_MEMCPY, mdma->dma_dev.cap_mask);
+
+ val = mdc_readl(mdma, MDC_GLOBAL_CONFIG_A);
+ mdma->nr_channels = (val >> MDC_GLOBAL_CONFIG_A_DMA_CONTEXTS_SHIFT) &
+ MDC_GLOBAL_CONFIG_A_DMA_CONTEXTS_MASK;
+ mdma->nr_threads =
+ 1 << ((val >> MDC_GLOBAL_CONFIG_A_THREAD_ID_WIDTH_SHIFT) &
+ MDC_GLOBAL_CONFIG_A_THREAD_ID_WIDTH_MASK);
+ mdma->bus_width =
+ (1 << ((val >> MDC_GLOBAL_CONFIG_A_SYS_DAT_WIDTH_SHIFT) &
+ MDC_GLOBAL_CONFIG_A_SYS_DAT_WIDTH_MASK)) / 8;
+ /*
+ * Although transfer sizes of up to MDC_TRANSFER_SIZE_MASK + 1 bytes
+ * are supported, this makes it possible for the value reported in
+ * MDC_ACTIVE_TRANSFER_SIZE to be ambiguous - an active transfer size
+ * of MDC_TRANSFER_SIZE_MASK may indicate either that 0 bytes or
+ * MDC_TRANSFER_SIZE_MASK + 1 bytes are remaining. To eliminate this
+ * ambiguity, restrict transfer sizes to one bus-width less than the
+ * actual maximum.
+ */
+ mdma->max_xfer_size = MDC_TRANSFER_SIZE_MASK + 1 - mdma->bus_width;
+
+ of_property_read_u32(pdev->dev.of_node, "dma-channels",
+ &mdma->nr_channels);
+ ret = of_property_read_u32(pdev->dev.of_node,
+ "img,max-burst-multiplier",
+ &mdma->max_burst_mult);
+ if (ret)
+ return ret;
+
+ mdma->dma_dev.dev = &pdev->dev;
+ mdma->dma_dev.device_prep_slave_sg = mdc_prep_slave_sg;
+ mdma->dma_dev.device_prep_dma_cyclic = mdc_prep_dma_cyclic;
+ mdma->dma_dev.device_prep_dma_memcpy = mdc_prep_dma_memcpy;
+ mdma->dma_dev.device_alloc_chan_resources = mdc_alloc_chan_resources;
+ mdma->dma_dev.device_free_chan_resources = mdc_free_chan_resources;
+ mdma->dma_dev.device_tx_status = mdc_tx_status;
+ mdma->dma_dev.device_issue_pending = mdc_issue_pending;
+ mdma->dma_dev.device_terminate_all = mdc_terminate_all;
+ mdma->dma_dev.device_synchronize = mdc_synchronize;
+ mdma->dma_dev.device_config = mdc_slave_config;
+
+ mdma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ mdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ for (i = 1; i <= mdma->bus_width; i <<= 1) {
+ mdma->dma_dev.src_addr_widths |= BIT(i);
+ mdma->dma_dev.dst_addr_widths |= BIT(i);
+ }
+
+ INIT_LIST_HEAD(&mdma->dma_dev.channels);
+ for (i = 0; i < mdma->nr_channels; i++) {
+ struct mdc_chan *mchan = &mdma->channels[i];
+
+ mchan->mdma = mdma;
+ mchan->chan_nr = i;
+ mchan->irq = platform_get_irq(pdev, i);
+ if (mchan->irq < 0)
+ return mchan->irq;
+
+ ret = devm_request_irq(&pdev->dev, mchan->irq, mdc_chan_irq,
+ IRQ_TYPE_LEVEL_HIGH,
+ dev_name(&pdev->dev), mchan);
+ if (ret < 0)
+ return ret;
+
+ mchan->vc.desc_free = mdc_desc_free;
+ vchan_init(&mchan->vc, &mdma->dma_dev);
+ }
+
+ mdma->desc_pool = dmam_pool_create(dev_name(&pdev->dev), &pdev->dev,
+ sizeof(struct mdc_hw_list_desc),
+ 4, 0);
+ if (!mdma->desc_pool)
+ return -ENOMEM;
+
+ pm_runtime_enable(&pdev->dev);
+ if (!pm_runtime_enabled(&pdev->dev)) {
+ ret = img_mdc_runtime_resume(&pdev->dev);
+ if (ret)
+ return ret;
+ }
+
+ ret = dma_async_device_register(&mdma->dma_dev);
+ if (ret)
+ goto suspend;
+
+ ret = of_dma_controller_register(pdev->dev.of_node, mdc_of_xlate, mdma);
+ if (ret)
+ goto unregister;
+
+ dev_info(&pdev->dev, "MDC with %u channels and %u threads\n",
+ mdma->nr_channels, mdma->nr_threads);
+
+ return 0;
+
+unregister:
+ dma_async_device_unregister(&mdma->dma_dev);
+suspend:
+ if (!pm_runtime_enabled(&pdev->dev))
+ img_mdc_runtime_suspend(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+ return ret;
+}
+
+static int mdc_dma_remove(struct platform_device *pdev)
+{
+ struct mdc_dma *mdma = platform_get_drvdata(pdev);
+ struct mdc_chan *mchan, *next;
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&mdma->dma_dev);
+
+ list_for_each_entry_safe(mchan, next, &mdma->dma_dev.channels,
+ vc.chan.device_node) {
+ list_del(&mchan->vc.chan.device_node);
+
+ devm_free_irq(&pdev->dev, mchan->irq, mchan);
+
+ tasklet_kill(&mchan->vc.task);
+ }
+
+ pm_runtime_disable(&pdev->dev);
+ if (!pm_runtime_status_suspended(&pdev->dev))
+ img_mdc_runtime_suspend(&pdev->dev);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int img_mdc_suspend_late(struct device *dev)
+{
+ struct mdc_dma *mdma = dev_get_drvdata(dev);
+ int i;
+
+ /* Check that all channels are idle */
+ for (i = 0; i < mdma->nr_channels; i++) {
+ struct mdc_chan *mchan = &mdma->channels[i];
+
+ if (unlikely(mchan->desc))
+ return -EBUSY;
+ }
+
+ return pm_runtime_force_suspend(dev);
+}
+
+static int img_mdc_resume_early(struct device *dev)
+{
+ return pm_runtime_force_resume(dev);
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct dev_pm_ops img_mdc_pm_ops = {
+ SET_RUNTIME_PM_OPS(img_mdc_runtime_suspend,
+ img_mdc_runtime_resume, NULL)
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(img_mdc_suspend_late,
+ img_mdc_resume_early)
+};
+
+static struct platform_driver mdc_dma_driver = {
+ .driver = {
+ .name = "img-mdc-dma",
+ .pm = &img_mdc_pm_ops,
+ .of_match_table = of_match_ptr(mdc_dma_of_match),
+ },
+ .probe = mdc_dma_probe,
+ .remove = mdc_dma_remove,
+};
+module_platform_driver(mdc_dma_driver);
+
+MODULE_DESCRIPTION("IMG Multi-threaded DMA Controller (MDC) driver");
+MODULE_AUTHOR("Andrew Bresticker <abrestic@chromium.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
new file mode 100644
index 000000000..65c6094ce
--- /dev/null
+++ b/drivers/dma/imx-dma.c
@@ -0,0 +1,1255 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// drivers/dma/imx-dma.c
+//
+// This file contains a driver for the Freescale i.MX DMA engine
+// found on i.MX1/21/27
+//
+// Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+// Copyright 2012 Javier Martin, Vista Silicon <javier.martin@vista-silicon.com>
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+
+#include <asm/irq.h>
+#include <linux/dma/imx-dma.h>
+
+#include "dmaengine.h"
+#define IMXDMA_MAX_CHAN_DESCRIPTORS 16
+#define IMX_DMA_CHANNELS 16
+
+#define IMX_DMA_2D_SLOTS 2
+#define IMX_DMA_2D_SLOT_A 0
+#define IMX_DMA_2D_SLOT_B 1
+
+#define IMX_DMA_LENGTH_LOOP ((unsigned int)-1)
+#define IMX_DMA_MEMSIZE_32 (0 << 4)
+#define IMX_DMA_MEMSIZE_8 (1 << 4)
+#define IMX_DMA_MEMSIZE_16 (2 << 4)
+#define IMX_DMA_TYPE_LINEAR (0 << 10)
+#define IMX_DMA_TYPE_2D (1 << 10)
+#define IMX_DMA_TYPE_FIFO (2 << 10)
+
+#define IMX_DMA_ERR_BURST (1 << 0)
+#define IMX_DMA_ERR_REQUEST (1 << 1)
+#define IMX_DMA_ERR_TRANSFER (1 << 2)
+#define IMX_DMA_ERR_BUFFER (1 << 3)
+#define IMX_DMA_ERR_TIMEOUT (1 << 4)
+
+#define DMA_DCR 0x00 /* Control Register */
+#define DMA_DISR 0x04 /* Interrupt status Register */
+#define DMA_DIMR 0x08 /* Interrupt mask Register */
+#define DMA_DBTOSR 0x0c /* Burst timeout status Register */
+#define DMA_DRTOSR 0x10 /* Request timeout Register */
+#define DMA_DSESR 0x14 /* Transfer Error Status Register */
+#define DMA_DBOSR 0x18 /* Buffer overflow status Register */
+#define DMA_DBTOCR 0x1c /* Burst timeout control Register */
+#define DMA_WSRA 0x40 /* W-Size Register A */
+#define DMA_XSRA 0x44 /* X-Size Register A */
+#define DMA_YSRA 0x48 /* Y-Size Register A */
+#define DMA_WSRB 0x4c /* W-Size Register B */
+#define DMA_XSRB 0x50 /* X-Size Register B */
+#define DMA_YSRB 0x54 /* Y-Size Register B */
+#define DMA_SAR(x) (0x80 + ((x) << 6)) /* Source Address Registers */
+#define DMA_DAR(x) (0x84 + ((x) << 6)) /* Destination Address Registers */
+#define DMA_CNTR(x) (0x88 + ((x) << 6)) /* Count Registers */
+#define DMA_CCR(x) (0x8c + ((x) << 6)) /* Control Registers */
+#define DMA_RSSR(x) (0x90 + ((x) << 6)) /* Request source select Registers */
+#define DMA_BLR(x) (0x94 + ((x) << 6)) /* Burst length Registers */
+#define DMA_RTOR(x) (0x98 + ((x) << 6)) /* Request timeout Registers */
+#define DMA_BUCR(x) (0x98 + ((x) << 6)) /* Bus Utilization Registers */
+#define DMA_CCNR(x) (0x9C + ((x) << 6)) /* Channel counter Registers */
+
+#define DCR_DRST (1<<1)
+#define DCR_DEN (1<<0)
+#define DBTOCR_EN (1<<15)
+#define DBTOCR_CNT(x) ((x) & 0x7fff)
+#define CNTR_CNT(x) ((x) & 0xffffff)
+#define CCR_ACRPT (1<<14)
+#define CCR_DMOD_LINEAR (0x0 << 12)
+#define CCR_DMOD_2D (0x1 << 12)
+#define CCR_DMOD_FIFO (0x2 << 12)
+#define CCR_DMOD_EOBFIFO (0x3 << 12)
+#define CCR_SMOD_LINEAR (0x0 << 10)
+#define CCR_SMOD_2D (0x1 << 10)
+#define CCR_SMOD_FIFO (0x2 << 10)
+#define CCR_SMOD_EOBFIFO (0x3 << 10)
+#define CCR_MDIR_DEC (1<<9)
+#define CCR_MSEL_B (1<<8)
+#define CCR_DSIZ_32 (0x0 << 6)
+#define CCR_DSIZ_8 (0x1 << 6)
+#define CCR_DSIZ_16 (0x2 << 6)
+#define CCR_SSIZ_32 (0x0 << 4)
+#define CCR_SSIZ_8 (0x1 << 4)
+#define CCR_SSIZ_16 (0x2 << 4)
+#define CCR_REN (1<<3)
+#define CCR_RPT (1<<2)
+#define CCR_FRC (1<<1)
+#define CCR_CEN (1<<0)
+#define RTOR_EN (1<<15)
+#define RTOR_CLK (1<<14)
+#define RTOR_PSC (1<<13)
+
+enum imxdma_prep_type {
+ IMXDMA_DESC_MEMCPY,
+ IMXDMA_DESC_INTERLEAVED,
+ IMXDMA_DESC_SLAVE_SG,
+ IMXDMA_DESC_CYCLIC,
+};
+
+struct imx_dma_2d_config {
+ u16 xsr;
+ u16 ysr;
+ u16 wsr;
+ int count;
+};
+
+struct imxdma_desc {
+ struct list_head node;
+ struct dma_async_tx_descriptor desc;
+ enum dma_status status;
+ dma_addr_t src;
+ dma_addr_t dest;
+ size_t len;
+ enum dma_transfer_direction direction;
+ enum imxdma_prep_type type;
+ /* For memcpy and interleaved */
+ unsigned int config_port;
+ unsigned int config_mem;
+ /* For interleaved transfers */
+ unsigned int x;
+ unsigned int y;
+ unsigned int w;
+ /* For slave sg and cyclic */
+ struct scatterlist *sg;
+ unsigned int sgcount;
+};
+
+struct imxdma_channel {
+ int hw_chaining;
+ struct timer_list watchdog;
+ struct imxdma_engine *imxdma;
+ unsigned int channel;
+
+ struct tasklet_struct dma_tasklet;
+ struct list_head ld_free;
+ struct list_head ld_queue;
+ struct list_head ld_active;
+ int descs_allocated;
+ enum dma_slave_buswidth word_size;
+ dma_addr_t per_address;
+ u32 watermark_level;
+ struct dma_chan chan;
+ struct dma_async_tx_descriptor desc;
+ enum dma_status status;
+ int dma_request;
+ struct scatterlist *sg_list;
+ u32 ccr_from_device;
+ u32 ccr_to_device;
+ bool enabled_2d;
+ int slot_2d;
+ unsigned int irq;
+ struct dma_slave_config config;
+};
+
+enum imx_dma_type {
+ IMX1_DMA,
+ IMX21_DMA,
+ IMX27_DMA,
+};
+
+struct imxdma_engine {
+ struct device *dev;
+ struct dma_device dma_device;
+ void __iomem *base;
+ struct clk *dma_ahb;
+ struct clk *dma_ipg;
+ spinlock_t lock;
+ struct imx_dma_2d_config slots_2d[IMX_DMA_2D_SLOTS];
+ struct imxdma_channel channel[IMX_DMA_CHANNELS];
+ enum imx_dma_type devtype;
+ unsigned int irq;
+ unsigned int irq_err;
+
+};
+
+struct imxdma_filter_data {
+ struct imxdma_engine *imxdma;
+ int request;
+};
+
+static const struct of_device_id imx_dma_of_dev_id[] = {
+ {
+ .compatible = "fsl,imx1-dma", .data = (const void *)IMX1_DMA,
+ }, {
+ .compatible = "fsl,imx21-dma", .data = (const void *)IMX21_DMA,
+ }, {
+ .compatible = "fsl,imx27-dma", .data = (const void *)IMX27_DMA,
+ }, {
+ /* sentinel */
+ }
+};
+MODULE_DEVICE_TABLE(of, imx_dma_of_dev_id);
+
+static inline int is_imx1_dma(struct imxdma_engine *imxdma)
+{
+ return imxdma->devtype == IMX1_DMA;
+}
+
+static inline int is_imx27_dma(struct imxdma_engine *imxdma)
+{
+ return imxdma->devtype == IMX27_DMA;
+}
+
+static struct imxdma_channel *to_imxdma_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct imxdma_channel, chan);
+}
+
+static inline bool imxdma_chan_is_doing_cyclic(struct imxdma_channel *imxdmac)
+{
+ struct imxdma_desc *desc;
+
+ if (!list_empty(&imxdmac->ld_active)) {
+ desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc,
+ node);
+ if (desc->type == IMXDMA_DESC_CYCLIC)
+ return true;
+ }
+ return false;
+}
+
+
+
+static void imx_dmav1_writel(struct imxdma_engine *imxdma, unsigned val,
+ unsigned offset)
+{
+ __raw_writel(val, imxdma->base + offset);
+}
+
+static unsigned imx_dmav1_readl(struct imxdma_engine *imxdma, unsigned offset)
+{
+ return __raw_readl(imxdma->base + offset);
+}
+
+static int imxdma_hw_chain(struct imxdma_channel *imxdmac)
+{
+ struct imxdma_engine *imxdma = imxdmac->imxdma;
+
+ if (is_imx27_dma(imxdma))
+ return imxdmac->hw_chaining;
+ else
+ return 0;
+}
+
+/*
+ * imxdma_sg_next - prepare next chunk for scatter-gather DMA emulation
+ */
+static inline void imxdma_sg_next(struct imxdma_desc *d)
+{
+ struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
+ struct imxdma_engine *imxdma = imxdmac->imxdma;
+ struct scatterlist *sg = d->sg;
+ size_t now;
+
+ now = min_t(size_t, d->len, sg_dma_len(sg));
+ if (d->len != IMX_DMA_LENGTH_LOOP)
+ d->len -= now;
+
+ if (d->direction == DMA_DEV_TO_MEM)
+ imx_dmav1_writel(imxdma, sg->dma_address,
+ DMA_DAR(imxdmac->channel));
+ else
+ imx_dmav1_writel(imxdma, sg->dma_address,
+ DMA_SAR(imxdmac->channel));
+
+ imx_dmav1_writel(imxdma, now, DMA_CNTR(imxdmac->channel));
+
+ dev_dbg(imxdma->dev, " %s channel: %d dst 0x%08x, src 0x%08x, "
+ "size 0x%08x\n", __func__, imxdmac->channel,
+ imx_dmav1_readl(imxdma, DMA_DAR(imxdmac->channel)),
+ imx_dmav1_readl(imxdma, DMA_SAR(imxdmac->channel)),
+ imx_dmav1_readl(imxdma, DMA_CNTR(imxdmac->channel)));
+}
+
+static void imxdma_enable_hw(struct imxdma_desc *d)
+{
+ struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
+ struct imxdma_engine *imxdma = imxdmac->imxdma;
+ int channel = imxdmac->channel;
+ unsigned long flags;
+
+ dev_dbg(imxdma->dev, "%s channel %d\n", __func__, channel);
+
+ local_irq_save(flags);
+
+ imx_dmav1_writel(imxdma, 1 << channel, DMA_DISR);
+ imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_DIMR) &
+ ~(1 << channel), DMA_DIMR);
+ imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_CCR(channel)) |
+ CCR_CEN | CCR_ACRPT, DMA_CCR(channel));
+
+ if (!is_imx1_dma(imxdma) &&
+ d->sg && imxdma_hw_chain(imxdmac)) {
+ d->sg = sg_next(d->sg);
+ if (d->sg) {
+ u32 tmp;
+ imxdma_sg_next(d);
+ tmp = imx_dmav1_readl(imxdma, DMA_CCR(channel));
+ imx_dmav1_writel(imxdma, tmp | CCR_RPT | CCR_ACRPT,
+ DMA_CCR(channel));
+ }
+ }
+
+ local_irq_restore(flags);
+}
+
+static void imxdma_disable_hw(struct imxdma_channel *imxdmac)
+{
+ struct imxdma_engine *imxdma = imxdmac->imxdma;
+ int channel = imxdmac->channel;
+ unsigned long flags;
+
+ dev_dbg(imxdma->dev, "%s channel %d\n", __func__, channel);
+
+ if (imxdma_hw_chain(imxdmac))
+ del_timer(&imxdmac->watchdog);
+
+ local_irq_save(flags);
+ imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_DIMR) |
+ (1 << channel), DMA_DIMR);
+ imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_CCR(channel)) &
+ ~CCR_CEN, DMA_CCR(channel));
+ imx_dmav1_writel(imxdma, 1 << channel, DMA_DISR);
+ local_irq_restore(flags);
+}
+
+static void imxdma_watchdog(struct timer_list *t)
+{
+ struct imxdma_channel *imxdmac = from_timer(imxdmac, t, watchdog);
+ struct imxdma_engine *imxdma = imxdmac->imxdma;
+ int channel = imxdmac->channel;
+
+ imx_dmav1_writel(imxdma, 0, DMA_CCR(channel));
+
+ /* Tasklet watchdog error handler */
+ tasklet_schedule(&imxdmac->dma_tasklet);
+ dev_dbg(imxdma->dev, "channel %d: watchdog timeout!\n",
+ imxdmac->channel);
+}
+
+static irqreturn_t imxdma_err_handler(int irq, void *dev_id)
+{
+ struct imxdma_engine *imxdma = dev_id;
+ unsigned int err_mask;
+ int i, disr;
+ int errcode;
+
+ disr = imx_dmav1_readl(imxdma, DMA_DISR);
+
+ err_mask = imx_dmav1_readl(imxdma, DMA_DBTOSR) |
+ imx_dmav1_readl(imxdma, DMA_DRTOSR) |
+ imx_dmav1_readl(imxdma, DMA_DSESR) |
+ imx_dmav1_readl(imxdma, DMA_DBOSR);
+
+ if (!err_mask)
+ return IRQ_HANDLED;
+
+ imx_dmav1_writel(imxdma, disr & err_mask, DMA_DISR);
+
+ for (i = 0; i < IMX_DMA_CHANNELS; i++) {
+ if (!(err_mask & (1 << i)))
+ continue;
+ errcode = 0;
+
+ if (imx_dmav1_readl(imxdma, DMA_DBTOSR) & (1 << i)) {
+ imx_dmav1_writel(imxdma, 1 << i, DMA_DBTOSR);
+ errcode |= IMX_DMA_ERR_BURST;
+ }
+ if (imx_dmav1_readl(imxdma, DMA_DRTOSR) & (1 << i)) {
+ imx_dmav1_writel(imxdma, 1 << i, DMA_DRTOSR);
+ errcode |= IMX_DMA_ERR_REQUEST;
+ }
+ if (imx_dmav1_readl(imxdma, DMA_DSESR) & (1 << i)) {
+ imx_dmav1_writel(imxdma, 1 << i, DMA_DSESR);
+ errcode |= IMX_DMA_ERR_TRANSFER;
+ }
+ if (imx_dmav1_readl(imxdma, DMA_DBOSR) & (1 << i)) {
+ imx_dmav1_writel(imxdma, 1 << i, DMA_DBOSR);
+ errcode |= IMX_DMA_ERR_BUFFER;
+ }
+ /* Tasklet error handler */
+ tasklet_schedule(&imxdma->channel[i].dma_tasklet);
+
+ dev_warn(imxdma->dev,
+ "DMA timeout on channel %d -%s%s%s%s\n", i,
+ errcode & IMX_DMA_ERR_BURST ? " burst" : "",
+ errcode & IMX_DMA_ERR_REQUEST ? " request" : "",
+ errcode & IMX_DMA_ERR_TRANSFER ? " transfer" : "",
+ errcode & IMX_DMA_ERR_BUFFER ? " buffer" : "");
+ }
+ return IRQ_HANDLED;
+}
+
+static void dma_irq_handle_channel(struct imxdma_channel *imxdmac)
+{
+ struct imxdma_engine *imxdma = imxdmac->imxdma;
+ int chno = imxdmac->channel;
+ struct imxdma_desc *desc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&imxdma->lock, flags);
+ if (list_empty(&imxdmac->ld_active)) {
+ spin_unlock_irqrestore(&imxdma->lock, flags);
+ goto out;
+ }
+
+ desc = list_first_entry(&imxdmac->ld_active,
+ struct imxdma_desc,
+ node);
+ spin_unlock_irqrestore(&imxdma->lock, flags);
+
+ if (desc->sg) {
+ u32 tmp;
+ desc->sg = sg_next(desc->sg);
+
+ if (desc->sg) {
+ imxdma_sg_next(desc);
+
+ tmp = imx_dmav1_readl(imxdma, DMA_CCR(chno));
+
+ if (imxdma_hw_chain(imxdmac)) {
+ /* FIXME: The timeout should probably be
+ * configurable
+ */
+ mod_timer(&imxdmac->watchdog,
+ jiffies + msecs_to_jiffies(500));
+
+ tmp |= CCR_CEN | CCR_RPT | CCR_ACRPT;
+ imx_dmav1_writel(imxdma, tmp, DMA_CCR(chno));
+ } else {
+ imx_dmav1_writel(imxdma, tmp & ~CCR_CEN,
+ DMA_CCR(chno));
+ tmp |= CCR_CEN;
+ }
+
+ imx_dmav1_writel(imxdma, tmp, DMA_CCR(chno));
+
+ if (imxdma_chan_is_doing_cyclic(imxdmac))
+ /* Tasklet progression */
+ tasklet_schedule(&imxdmac->dma_tasklet);
+
+ return;
+ }
+
+ if (imxdma_hw_chain(imxdmac)) {
+ del_timer(&imxdmac->watchdog);
+ return;
+ }
+ }
+
+out:
+ imx_dmav1_writel(imxdma, 0, DMA_CCR(chno));
+ /* Tasklet irq */
+ tasklet_schedule(&imxdmac->dma_tasklet);
+}
+
+static irqreturn_t dma_irq_handler(int irq, void *dev_id)
+{
+ struct imxdma_engine *imxdma = dev_id;
+ int i, disr;
+
+ if (!is_imx1_dma(imxdma))
+ imxdma_err_handler(irq, dev_id);
+
+ disr = imx_dmav1_readl(imxdma, DMA_DISR);
+
+ dev_dbg(imxdma->dev, "%s called, disr=0x%08x\n", __func__, disr);
+
+ imx_dmav1_writel(imxdma, disr, DMA_DISR);
+ for (i = 0; i < IMX_DMA_CHANNELS; i++) {
+ if (disr & (1 << i))
+ dma_irq_handle_channel(&imxdma->channel[i]);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int imxdma_xfer_desc(struct imxdma_desc *d)
+{
+ struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
+ struct imxdma_engine *imxdma = imxdmac->imxdma;
+ int slot = -1;
+ int i;
+
+ /* Configure and enable */
+ switch (d->type) {
+ case IMXDMA_DESC_INTERLEAVED:
+ /* Try to get a free 2D slot */
+ for (i = 0; i < IMX_DMA_2D_SLOTS; i++) {
+ if ((imxdma->slots_2d[i].count > 0) &&
+ ((imxdma->slots_2d[i].xsr != d->x) ||
+ (imxdma->slots_2d[i].ysr != d->y) ||
+ (imxdma->slots_2d[i].wsr != d->w)))
+ continue;
+ slot = i;
+ break;
+ }
+ if (slot < 0)
+ return -EBUSY;
+
+ imxdma->slots_2d[slot].xsr = d->x;
+ imxdma->slots_2d[slot].ysr = d->y;
+ imxdma->slots_2d[slot].wsr = d->w;
+ imxdma->slots_2d[slot].count++;
+
+ imxdmac->slot_2d = slot;
+ imxdmac->enabled_2d = true;
+
+ if (slot == IMX_DMA_2D_SLOT_A) {
+ d->config_mem &= ~CCR_MSEL_B;
+ d->config_port &= ~CCR_MSEL_B;
+ imx_dmav1_writel(imxdma, d->x, DMA_XSRA);
+ imx_dmav1_writel(imxdma, d->y, DMA_YSRA);
+ imx_dmav1_writel(imxdma, d->w, DMA_WSRA);
+ } else {
+ d->config_mem |= CCR_MSEL_B;
+ d->config_port |= CCR_MSEL_B;
+ imx_dmav1_writel(imxdma, d->x, DMA_XSRB);
+ imx_dmav1_writel(imxdma, d->y, DMA_YSRB);
+ imx_dmav1_writel(imxdma, d->w, DMA_WSRB);
+ }
+ /*
+ * We fall-through here intentionally, since a 2D transfer is
+ * similar to MEMCPY just adding the 2D slot configuration.
+ */
+ fallthrough;
+ case IMXDMA_DESC_MEMCPY:
+ imx_dmav1_writel(imxdma, d->src, DMA_SAR(imxdmac->channel));
+ imx_dmav1_writel(imxdma, d->dest, DMA_DAR(imxdmac->channel));
+ imx_dmav1_writel(imxdma, d->config_mem | (d->config_port << 2),
+ DMA_CCR(imxdmac->channel));
+
+ imx_dmav1_writel(imxdma, d->len, DMA_CNTR(imxdmac->channel));
+
+ dev_dbg(imxdma->dev,
+ "%s channel: %d dest=0x%08llx src=0x%08llx dma_length=%zu\n",
+ __func__, imxdmac->channel,
+ (unsigned long long)d->dest,
+ (unsigned long long)d->src, d->len);
+
+ break;
+ /* Cyclic transfer is the same as slave_sg with special sg configuration. */
+ case IMXDMA_DESC_CYCLIC:
+ case IMXDMA_DESC_SLAVE_SG:
+ if (d->direction == DMA_DEV_TO_MEM) {
+ imx_dmav1_writel(imxdma, imxdmac->per_address,
+ DMA_SAR(imxdmac->channel));
+ imx_dmav1_writel(imxdma, imxdmac->ccr_from_device,
+ DMA_CCR(imxdmac->channel));
+
+ dev_dbg(imxdma->dev,
+ "%s channel: %d sg=%p sgcount=%d total length=%zu dev_addr=0x%08llx (dev2mem)\n",
+ __func__, imxdmac->channel,
+ d->sg, d->sgcount, d->len,
+ (unsigned long long)imxdmac->per_address);
+ } else if (d->direction == DMA_MEM_TO_DEV) {
+ imx_dmav1_writel(imxdma, imxdmac->per_address,
+ DMA_DAR(imxdmac->channel));
+ imx_dmav1_writel(imxdma, imxdmac->ccr_to_device,
+ DMA_CCR(imxdmac->channel));
+
+ dev_dbg(imxdma->dev,
+ "%s channel: %d sg=%p sgcount=%d total length=%zu dev_addr=0x%08llx (mem2dev)\n",
+ __func__, imxdmac->channel,
+ d->sg, d->sgcount, d->len,
+ (unsigned long long)imxdmac->per_address);
+ } else {
+ dev_err(imxdma->dev, "%s channel: %d bad dma mode\n",
+ __func__, imxdmac->channel);
+ return -EINVAL;
+ }
+
+ imxdma_sg_next(d);
+
+ break;
+ default:
+ return -EINVAL;
+ }
+ imxdma_enable_hw(d);
+ return 0;
+}
+
+static void imxdma_tasklet(struct tasklet_struct *t)
+{
+ struct imxdma_channel *imxdmac = from_tasklet(imxdmac, t, dma_tasklet);
+ struct imxdma_engine *imxdma = imxdmac->imxdma;
+ struct imxdma_desc *desc, *next_desc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&imxdma->lock, flags);
+
+ if (list_empty(&imxdmac->ld_active)) {
+ /* Someone might have called terminate all */
+ spin_unlock_irqrestore(&imxdma->lock, flags);
+ return;
+ }
+ desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc, node);
+
+ /* If we are dealing with a cyclic descriptor, keep it on ld_active
+ * and dont mark the descriptor as complete.
+ * Only in non-cyclic cases it would be marked as complete
+ */
+ if (imxdma_chan_is_doing_cyclic(imxdmac))
+ goto out;
+ else
+ dma_cookie_complete(&desc->desc);
+
+ /* Free 2D slot if it was an interleaved transfer */
+ if (imxdmac->enabled_2d) {
+ imxdma->slots_2d[imxdmac->slot_2d].count--;
+ imxdmac->enabled_2d = false;
+ }
+
+ list_move_tail(imxdmac->ld_active.next, &imxdmac->ld_free);
+
+ if (!list_empty(&imxdmac->ld_queue)) {
+ next_desc = list_first_entry(&imxdmac->ld_queue,
+ struct imxdma_desc, node);
+ list_move_tail(imxdmac->ld_queue.next, &imxdmac->ld_active);
+ if (imxdma_xfer_desc(next_desc) < 0)
+ dev_warn(imxdma->dev, "%s: channel: %d couldn't xfer desc\n",
+ __func__, imxdmac->channel);
+ }
+out:
+ spin_unlock_irqrestore(&imxdma->lock, flags);
+
+ dmaengine_desc_get_callback_invoke(&desc->desc, NULL);
+}
+
+static int imxdma_terminate_all(struct dma_chan *chan)
+{
+ struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+ struct imxdma_engine *imxdma = imxdmac->imxdma;
+ unsigned long flags;
+
+ imxdma_disable_hw(imxdmac);
+
+ spin_lock_irqsave(&imxdma->lock, flags);
+ list_splice_tail_init(&imxdmac->ld_active, &imxdmac->ld_free);
+ list_splice_tail_init(&imxdmac->ld_queue, &imxdmac->ld_free);
+ spin_unlock_irqrestore(&imxdma->lock, flags);
+ return 0;
+}
+
+static int imxdma_config_write(struct dma_chan *chan,
+ struct dma_slave_config *dmaengine_cfg,
+ enum dma_transfer_direction direction)
+{
+ struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+ struct imxdma_engine *imxdma = imxdmac->imxdma;
+ unsigned int mode = 0;
+
+ if (direction == DMA_DEV_TO_MEM) {
+ imxdmac->per_address = dmaengine_cfg->src_addr;
+ imxdmac->watermark_level = dmaengine_cfg->src_maxburst;
+ imxdmac->word_size = dmaengine_cfg->src_addr_width;
+ } else {
+ imxdmac->per_address = dmaengine_cfg->dst_addr;
+ imxdmac->watermark_level = dmaengine_cfg->dst_maxburst;
+ imxdmac->word_size = dmaengine_cfg->dst_addr_width;
+ }
+
+ switch (imxdmac->word_size) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ mode = IMX_DMA_MEMSIZE_8;
+ break;
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ mode = IMX_DMA_MEMSIZE_16;
+ break;
+ default:
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ mode = IMX_DMA_MEMSIZE_32;
+ break;
+ }
+
+ imxdmac->hw_chaining = 0;
+
+ imxdmac->ccr_from_device = (mode | IMX_DMA_TYPE_FIFO) |
+ ((IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR) << 2) |
+ CCR_REN;
+ imxdmac->ccr_to_device =
+ (IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR) |
+ ((mode | IMX_DMA_TYPE_FIFO) << 2) | CCR_REN;
+ imx_dmav1_writel(imxdma, imxdmac->dma_request,
+ DMA_RSSR(imxdmac->channel));
+
+ /* Set burst length */
+ imx_dmav1_writel(imxdma, imxdmac->watermark_level *
+ imxdmac->word_size, DMA_BLR(imxdmac->channel));
+
+ return 0;
+}
+
+static int imxdma_config(struct dma_chan *chan,
+ struct dma_slave_config *dmaengine_cfg)
+{
+ struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+
+ memcpy(&imxdmac->config, dmaengine_cfg, sizeof(*dmaengine_cfg));
+
+ return 0;
+}
+
+static enum dma_status imxdma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ return dma_cookie_status(chan, cookie, txstate);
+}
+
+static dma_cookie_t imxdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct imxdma_channel *imxdmac = to_imxdma_chan(tx->chan);
+ struct imxdma_engine *imxdma = imxdmac->imxdma;
+ dma_cookie_t cookie;
+ unsigned long flags;
+
+ spin_lock_irqsave(&imxdma->lock, flags);
+ list_move_tail(imxdmac->ld_free.next, &imxdmac->ld_queue);
+ cookie = dma_cookie_assign(tx);
+ spin_unlock_irqrestore(&imxdma->lock, flags);
+
+ return cookie;
+}
+
+static int imxdma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+ struct imx_dma_data *data = chan->private;
+
+ if (data != NULL)
+ imxdmac->dma_request = data->dma_request;
+
+ while (imxdmac->descs_allocated < IMXDMA_MAX_CHAN_DESCRIPTORS) {
+ struct imxdma_desc *desc;
+
+ desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc)
+ break;
+ memset(&desc->desc, 0, sizeof(struct dma_async_tx_descriptor));
+ dma_async_tx_descriptor_init(&desc->desc, chan);
+ desc->desc.tx_submit = imxdma_tx_submit;
+ /* txd.flags will be overwritten in prep funcs */
+ desc->desc.flags = DMA_CTRL_ACK;
+ desc->status = DMA_COMPLETE;
+
+ list_add_tail(&desc->node, &imxdmac->ld_free);
+ imxdmac->descs_allocated++;
+ }
+
+ if (!imxdmac->descs_allocated)
+ return -ENOMEM;
+
+ return imxdmac->descs_allocated;
+}
+
+static void imxdma_free_chan_resources(struct dma_chan *chan)
+{
+ struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+ struct imxdma_engine *imxdma = imxdmac->imxdma;
+ struct imxdma_desc *desc, *_desc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&imxdma->lock, flags);
+
+ imxdma_disable_hw(imxdmac);
+ list_splice_tail_init(&imxdmac->ld_active, &imxdmac->ld_free);
+ list_splice_tail_init(&imxdmac->ld_queue, &imxdmac->ld_free);
+
+ spin_unlock_irqrestore(&imxdma->lock, flags);
+
+ list_for_each_entry_safe(desc, _desc, &imxdmac->ld_free, node) {
+ kfree(desc);
+ imxdmac->descs_allocated--;
+ }
+ INIT_LIST_HEAD(&imxdmac->ld_free);
+
+ kfree(imxdmac->sg_list);
+ imxdmac->sg_list = NULL;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+ struct scatterlist *sg;
+ int i, dma_length = 0;
+ struct imxdma_desc *desc;
+
+ if (list_empty(&imxdmac->ld_free) ||
+ imxdma_chan_is_doing_cyclic(imxdmac))
+ return NULL;
+
+ desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ dma_length += sg_dma_len(sg);
+ }
+
+ imxdma_config_write(chan, &imxdmac->config, direction);
+
+ switch (imxdmac->word_size) {
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ if (sg_dma_len(sgl) & 3 || sgl->dma_address & 3)
+ return NULL;
+ break;
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ if (sg_dma_len(sgl) & 1 || sgl->dma_address & 1)
+ return NULL;
+ break;
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ break;
+ default:
+ return NULL;
+ }
+
+ desc->type = IMXDMA_DESC_SLAVE_SG;
+ desc->sg = sgl;
+ desc->sgcount = sg_len;
+ desc->len = dma_length;
+ desc->direction = direction;
+ if (direction == DMA_DEV_TO_MEM) {
+ desc->src = imxdmac->per_address;
+ } else {
+ desc->dest = imxdmac->per_address;
+ }
+ desc->desc.callback = NULL;
+ desc->desc.callback_param = NULL;
+
+ return &desc->desc;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+ struct imxdma_engine *imxdma = imxdmac->imxdma;
+ struct imxdma_desc *desc;
+ int i;
+ unsigned int periods = buf_len / period_len;
+
+ dev_dbg(imxdma->dev, "%s channel: %d buf_len=%zu period_len=%zu\n",
+ __func__, imxdmac->channel, buf_len, period_len);
+
+ if (list_empty(&imxdmac->ld_free) ||
+ imxdma_chan_is_doing_cyclic(imxdmac))
+ return NULL;
+
+ desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+ kfree(imxdmac->sg_list);
+
+ imxdmac->sg_list = kcalloc(periods + 1,
+ sizeof(struct scatterlist), GFP_ATOMIC);
+ if (!imxdmac->sg_list)
+ return NULL;
+
+ sg_init_table(imxdmac->sg_list, periods);
+
+ for (i = 0; i < periods; i++) {
+ sg_assign_page(&imxdmac->sg_list[i], NULL);
+ imxdmac->sg_list[i].offset = 0;
+ imxdmac->sg_list[i].dma_address = dma_addr;
+ sg_dma_len(&imxdmac->sg_list[i]) = period_len;
+ dma_addr += period_len;
+ }
+
+ /* close the loop */
+ sg_chain(imxdmac->sg_list, periods + 1, imxdmac->sg_list);
+
+ desc->type = IMXDMA_DESC_CYCLIC;
+ desc->sg = imxdmac->sg_list;
+ desc->sgcount = periods;
+ desc->len = IMX_DMA_LENGTH_LOOP;
+ desc->direction = direction;
+ if (direction == DMA_DEV_TO_MEM) {
+ desc->src = imxdmac->per_address;
+ } else {
+ desc->dest = imxdmac->per_address;
+ }
+ desc->desc.callback = NULL;
+ desc->desc.callback_param = NULL;
+
+ imxdma_config_write(chan, &imxdmac->config, direction);
+
+ return &desc->desc;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_dma_memcpy(
+ struct dma_chan *chan, dma_addr_t dest,
+ dma_addr_t src, size_t len, unsigned long flags)
+{
+ struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+ struct imxdma_engine *imxdma = imxdmac->imxdma;
+ struct imxdma_desc *desc;
+
+ dev_dbg(imxdma->dev, "%s channel: %d src=0x%llx dst=0x%llx len=%zu\n",
+ __func__, imxdmac->channel, (unsigned long long)src,
+ (unsigned long long)dest, len);
+
+ if (list_empty(&imxdmac->ld_free) ||
+ imxdma_chan_is_doing_cyclic(imxdmac))
+ return NULL;
+
+ desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+ desc->type = IMXDMA_DESC_MEMCPY;
+ desc->src = src;
+ desc->dest = dest;
+ desc->len = len;
+ desc->direction = DMA_MEM_TO_MEM;
+ desc->config_port = IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR;
+ desc->config_mem = IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR;
+ desc->desc.callback = NULL;
+ desc->desc.callback_param = NULL;
+
+ return &desc->desc;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_dma_interleaved(
+ struct dma_chan *chan, struct dma_interleaved_template *xt,
+ unsigned long flags)
+{
+ struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+ struct imxdma_engine *imxdma = imxdmac->imxdma;
+ struct imxdma_desc *desc;
+
+ dev_dbg(imxdma->dev, "%s channel: %d src_start=0x%llx dst_start=0x%llx\n"
+ " src_sgl=%s dst_sgl=%s numf=%zu frame_size=%zu\n", __func__,
+ imxdmac->channel, (unsigned long long)xt->src_start,
+ (unsigned long long) xt->dst_start,
+ xt->src_sgl ? "true" : "false", xt->dst_sgl ? "true" : "false",
+ xt->numf, xt->frame_size);
+
+ if (list_empty(&imxdmac->ld_free) ||
+ imxdma_chan_is_doing_cyclic(imxdmac))
+ return NULL;
+
+ if (xt->frame_size != 1 || xt->numf <= 0 || xt->dir != DMA_MEM_TO_MEM)
+ return NULL;
+
+ desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+ desc->type = IMXDMA_DESC_INTERLEAVED;
+ desc->src = xt->src_start;
+ desc->dest = xt->dst_start;
+ desc->x = xt->sgl[0].size;
+ desc->y = xt->numf;
+ desc->w = xt->sgl[0].icg + desc->x;
+ desc->len = desc->x * desc->y;
+ desc->direction = DMA_MEM_TO_MEM;
+ desc->config_port = IMX_DMA_MEMSIZE_32;
+ desc->config_mem = IMX_DMA_MEMSIZE_32;
+ if (xt->src_sgl)
+ desc->config_mem |= IMX_DMA_TYPE_2D;
+ if (xt->dst_sgl)
+ desc->config_port |= IMX_DMA_TYPE_2D;
+ desc->desc.callback = NULL;
+ desc->desc.callback_param = NULL;
+
+ return &desc->desc;
+}
+
+static void imxdma_issue_pending(struct dma_chan *chan)
+{
+ struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+ struct imxdma_engine *imxdma = imxdmac->imxdma;
+ struct imxdma_desc *desc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&imxdma->lock, flags);
+ if (list_empty(&imxdmac->ld_active) &&
+ !list_empty(&imxdmac->ld_queue)) {
+ desc = list_first_entry(&imxdmac->ld_queue,
+ struct imxdma_desc, node);
+
+ if (imxdma_xfer_desc(desc) < 0) {
+ dev_warn(imxdma->dev,
+ "%s: channel: %d couldn't issue DMA xfer\n",
+ __func__, imxdmac->channel);
+ } else {
+ list_move_tail(imxdmac->ld_queue.next,
+ &imxdmac->ld_active);
+ }
+ }
+ spin_unlock_irqrestore(&imxdma->lock, flags);
+}
+
+static bool imxdma_filter_fn(struct dma_chan *chan, void *param)
+{
+ struct imxdma_filter_data *fdata = param;
+ struct imxdma_channel *imxdma_chan = to_imxdma_chan(chan);
+
+ if (chan->device->dev != fdata->imxdma->dev)
+ return false;
+
+ imxdma_chan->dma_request = fdata->request;
+ chan->private = NULL;
+
+ return true;
+}
+
+static struct dma_chan *imxdma_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ int count = dma_spec->args_count;
+ struct imxdma_engine *imxdma = ofdma->of_dma_data;
+ struct imxdma_filter_data fdata = {
+ .imxdma = imxdma,
+ };
+
+ if (count != 1)
+ return NULL;
+
+ fdata.request = dma_spec->args[0];
+
+ return dma_request_channel(imxdma->dma_device.cap_mask,
+ imxdma_filter_fn, &fdata);
+}
+
+static int __init imxdma_probe(struct platform_device *pdev)
+{
+ struct imxdma_engine *imxdma;
+ struct resource *res;
+ int ret, i;
+ int irq, irq_err;
+
+ imxdma = devm_kzalloc(&pdev->dev, sizeof(*imxdma), GFP_KERNEL);
+ if (!imxdma)
+ return -ENOMEM;
+
+ imxdma->dev = &pdev->dev;
+ imxdma->devtype = (uintptr_t)of_device_get_match_data(&pdev->dev);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ imxdma->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(imxdma->base))
+ return PTR_ERR(imxdma->base);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ imxdma->dma_ipg = devm_clk_get(&pdev->dev, "ipg");
+ if (IS_ERR(imxdma->dma_ipg))
+ return PTR_ERR(imxdma->dma_ipg);
+
+ imxdma->dma_ahb = devm_clk_get(&pdev->dev, "ahb");
+ if (IS_ERR(imxdma->dma_ahb))
+ return PTR_ERR(imxdma->dma_ahb);
+
+ ret = clk_prepare_enable(imxdma->dma_ipg);
+ if (ret)
+ return ret;
+ ret = clk_prepare_enable(imxdma->dma_ahb);
+ if (ret)
+ goto disable_dma_ipg_clk;
+
+ /* reset DMA module */
+ imx_dmav1_writel(imxdma, DCR_DRST, DMA_DCR);
+
+ if (is_imx1_dma(imxdma)) {
+ ret = devm_request_irq(&pdev->dev, irq,
+ dma_irq_handler, 0, "DMA", imxdma);
+ if (ret) {
+ dev_warn(imxdma->dev, "Can't register IRQ for DMA\n");
+ goto disable_dma_ahb_clk;
+ }
+ imxdma->irq = irq;
+
+ irq_err = platform_get_irq(pdev, 1);
+ if (irq_err < 0) {
+ ret = irq_err;
+ goto disable_dma_ahb_clk;
+ }
+
+ ret = devm_request_irq(&pdev->dev, irq_err,
+ imxdma_err_handler, 0, "DMA", imxdma);
+ if (ret) {
+ dev_warn(imxdma->dev, "Can't register ERRIRQ for DMA\n");
+ goto disable_dma_ahb_clk;
+ }
+ imxdma->irq_err = irq_err;
+ }
+
+ /* enable DMA module */
+ imx_dmav1_writel(imxdma, DCR_DEN, DMA_DCR);
+
+ /* clear all interrupts */
+ imx_dmav1_writel(imxdma, (1 << IMX_DMA_CHANNELS) - 1, DMA_DISR);
+
+ /* disable interrupts */
+ imx_dmav1_writel(imxdma, (1 << IMX_DMA_CHANNELS) - 1, DMA_DIMR);
+
+ INIT_LIST_HEAD(&imxdma->dma_device.channels);
+
+ dma_cap_set(DMA_SLAVE, imxdma->dma_device.cap_mask);
+ dma_cap_set(DMA_CYCLIC, imxdma->dma_device.cap_mask);
+ dma_cap_set(DMA_MEMCPY, imxdma->dma_device.cap_mask);
+ dma_cap_set(DMA_INTERLEAVE, imxdma->dma_device.cap_mask);
+
+ /* Initialize 2D global parameters */
+ for (i = 0; i < IMX_DMA_2D_SLOTS; i++)
+ imxdma->slots_2d[i].count = 0;
+
+ spin_lock_init(&imxdma->lock);
+
+ /* Initialize channel parameters */
+ for (i = 0; i < IMX_DMA_CHANNELS; i++) {
+ struct imxdma_channel *imxdmac = &imxdma->channel[i];
+
+ if (!is_imx1_dma(imxdma)) {
+ ret = devm_request_irq(&pdev->dev, irq + i,
+ dma_irq_handler, 0, "DMA", imxdma);
+ if (ret) {
+ dev_warn(imxdma->dev, "Can't register IRQ %d "
+ "for DMA channel %d\n",
+ irq + i, i);
+ goto disable_dma_ahb_clk;
+ }
+
+ imxdmac->irq = irq + i;
+ timer_setup(&imxdmac->watchdog, imxdma_watchdog, 0);
+ }
+
+ imxdmac->imxdma = imxdma;
+
+ INIT_LIST_HEAD(&imxdmac->ld_queue);
+ INIT_LIST_HEAD(&imxdmac->ld_free);
+ INIT_LIST_HEAD(&imxdmac->ld_active);
+
+ tasklet_setup(&imxdmac->dma_tasklet, imxdma_tasklet);
+ imxdmac->chan.device = &imxdma->dma_device;
+ dma_cookie_init(&imxdmac->chan);
+ imxdmac->channel = i;
+
+ /* Add the channel to the DMAC list */
+ list_add_tail(&imxdmac->chan.device_node,
+ &imxdma->dma_device.channels);
+ }
+
+ imxdma->dma_device.dev = &pdev->dev;
+
+ imxdma->dma_device.device_alloc_chan_resources = imxdma_alloc_chan_resources;
+ imxdma->dma_device.device_free_chan_resources = imxdma_free_chan_resources;
+ imxdma->dma_device.device_tx_status = imxdma_tx_status;
+ imxdma->dma_device.device_prep_slave_sg = imxdma_prep_slave_sg;
+ imxdma->dma_device.device_prep_dma_cyclic = imxdma_prep_dma_cyclic;
+ imxdma->dma_device.device_prep_dma_memcpy = imxdma_prep_dma_memcpy;
+ imxdma->dma_device.device_prep_interleaved_dma = imxdma_prep_dma_interleaved;
+ imxdma->dma_device.device_config = imxdma_config;
+ imxdma->dma_device.device_terminate_all = imxdma_terminate_all;
+ imxdma->dma_device.device_issue_pending = imxdma_issue_pending;
+
+ platform_set_drvdata(pdev, imxdma);
+
+ imxdma->dma_device.copy_align = DMAENGINE_ALIGN_4_BYTES;
+ dma_set_max_seg_size(imxdma->dma_device.dev, 0xffffff);
+
+ ret = dma_async_device_register(&imxdma->dma_device);
+ if (ret) {
+ dev_err(&pdev->dev, "unable to register\n");
+ goto disable_dma_ahb_clk;
+ }
+
+ if (pdev->dev.of_node) {
+ ret = of_dma_controller_register(pdev->dev.of_node,
+ imxdma_xlate, imxdma);
+ if (ret) {
+ dev_err(&pdev->dev, "unable to register of_dma_controller\n");
+ goto err_of_dma_controller;
+ }
+ }
+
+ return 0;
+
+err_of_dma_controller:
+ dma_async_device_unregister(&imxdma->dma_device);
+disable_dma_ahb_clk:
+ clk_disable_unprepare(imxdma->dma_ahb);
+disable_dma_ipg_clk:
+ clk_disable_unprepare(imxdma->dma_ipg);
+ return ret;
+}
+
+static void imxdma_free_irq(struct platform_device *pdev, struct imxdma_engine *imxdma)
+{
+ int i;
+
+ if (is_imx1_dma(imxdma)) {
+ disable_irq(imxdma->irq);
+ disable_irq(imxdma->irq_err);
+ }
+
+ for (i = 0; i < IMX_DMA_CHANNELS; i++) {
+ struct imxdma_channel *imxdmac = &imxdma->channel[i];
+
+ if (!is_imx1_dma(imxdma))
+ disable_irq(imxdmac->irq);
+
+ tasklet_kill(&imxdmac->dma_tasklet);
+ }
+}
+
+static int imxdma_remove(struct platform_device *pdev)
+{
+ struct imxdma_engine *imxdma = platform_get_drvdata(pdev);
+
+ imxdma_free_irq(pdev, imxdma);
+
+ dma_async_device_unregister(&imxdma->dma_device);
+
+ if (pdev->dev.of_node)
+ of_dma_controller_free(pdev->dev.of_node);
+
+ clk_disable_unprepare(imxdma->dma_ipg);
+ clk_disable_unprepare(imxdma->dma_ahb);
+
+ return 0;
+}
+
+static struct platform_driver imxdma_driver = {
+ .driver = {
+ .name = "imx-dma",
+ .of_match_table = imx_dma_of_dev_id,
+ },
+ .remove = imxdma_remove,
+};
+
+static int __init imxdma_module_init(void)
+{
+ return platform_driver_probe(&imxdma_driver, imxdma_probe);
+}
+subsys_initcall(imxdma_module_init);
+
+MODULE_AUTHOR("Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>");
+MODULE_DESCRIPTION("i.MX dma driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
new file mode 100644
index 000000000..b926abe4f
--- /dev/null
+++ b/drivers/dma/imx-sdma.c
@@ -0,0 +1,2382 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// drivers/dma/imx-sdma.c
+//
+// This file contains a driver for the Freescale Smart DMA engine
+//
+// Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+//
+// Based on code from Freescale:
+//
+// Copyright 2004-2009 Freescale Semiconductor, Inc. All Rights Reserved.
+
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/dmaengine.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/workqueue.h>
+
+#include <asm/irq.h>
+#include <linux/dma/imx-dma.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
+#include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+/* SDMA registers */
+#define SDMA_H_C0PTR 0x000
+#define SDMA_H_INTR 0x004
+#define SDMA_H_STATSTOP 0x008
+#define SDMA_H_START 0x00c
+#define SDMA_H_EVTOVR 0x010
+#define SDMA_H_DSPOVR 0x014
+#define SDMA_H_HOSTOVR 0x018
+#define SDMA_H_EVTPEND 0x01c
+#define SDMA_H_DSPENBL 0x020
+#define SDMA_H_RESET 0x024
+#define SDMA_H_EVTERR 0x028
+#define SDMA_H_INTRMSK 0x02c
+#define SDMA_H_PSW 0x030
+#define SDMA_H_EVTERRDBG 0x034
+#define SDMA_H_CONFIG 0x038
+#define SDMA_ONCE_ENB 0x040
+#define SDMA_ONCE_DATA 0x044
+#define SDMA_ONCE_INSTR 0x048
+#define SDMA_ONCE_STAT 0x04c
+#define SDMA_ONCE_CMD 0x050
+#define SDMA_EVT_MIRROR 0x054
+#define SDMA_ILLINSTADDR 0x058
+#define SDMA_CHN0ADDR 0x05c
+#define SDMA_ONCE_RTB 0x060
+#define SDMA_XTRIG_CONF1 0x070
+#define SDMA_XTRIG_CONF2 0x074
+#define SDMA_CHNENBL0_IMX35 0x200
+#define SDMA_CHNENBL0_IMX31 0x080
+#define SDMA_CHNPRI_0 0x100
+#define SDMA_DONE0_CONFIG 0x1000
+
+/*
+ * Buffer descriptor status values.
+ */
+#define BD_DONE 0x01
+#define BD_WRAP 0x02
+#define BD_CONT 0x04
+#define BD_INTR 0x08
+#define BD_RROR 0x10
+#define BD_LAST 0x20
+#define BD_EXTD 0x80
+
+/*
+ * Data Node descriptor status values.
+ */
+#define DND_END_OF_FRAME 0x80
+#define DND_END_OF_XFER 0x40
+#define DND_DONE 0x20
+#define DND_UNUSED 0x01
+
+/*
+ * IPCV2 descriptor status values.
+ */
+#define BD_IPCV2_END_OF_FRAME 0x40
+
+#define IPCV2_MAX_NODES 50
+/*
+ * Error bit set in the CCB status field by the SDMA,
+ * in setbd routine, in case of a transfer error
+ */
+#define DATA_ERROR 0x10000000
+
+/*
+ * Buffer descriptor commands.
+ */
+#define C0_ADDR 0x01
+#define C0_LOAD 0x02
+#define C0_DUMP 0x03
+#define C0_SETCTX 0x07
+#define C0_GETCTX 0x03
+#define C0_SETDM 0x01
+#define C0_SETPM 0x04
+#define C0_GETDM 0x02
+#define C0_GETPM 0x08
+/*
+ * Change endianness indicator in the BD command field
+ */
+#define CHANGE_ENDIANNESS 0x80
+
+/*
+ * p_2_p watermark_level description
+ * Bits Name Description
+ * 0-7 Lower WML Lower watermark level
+ * 8 PS 1: Pad Swallowing
+ * 0: No Pad Swallowing
+ * 9 PA 1: Pad Adding
+ * 0: No Pad Adding
+ * 10 SPDIF If this bit is set both source
+ * and destination are on SPBA
+ * 11 Source Bit(SP) 1: Source on SPBA
+ * 0: Source on AIPS
+ * 12 Destination Bit(DP) 1: Destination on SPBA
+ * 0: Destination on AIPS
+ * 13-15 --------- MUST BE 0
+ * 16-23 Higher WML HWML
+ * 24-27 N Total number of samples after
+ * which Pad adding/Swallowing
+ * must be done. It must be odd.
+ * 28 Lower WML Event(LWE) SDMA events reg to check for
+ * LWML event mask
+ * 0: LWE in EVENTS register
+ * 1: LWE in EVENTS2 register
+ * 29 Higher WML Event(HWE) SDMA events reg to check for
+ * HWML event mask
+ * 0: HWE in EVENTS register
+ * 1: HWE in EVENTS2 register
+ * 30 --------- MUST BE 0
+ * 31 CONT 1: Amount of samples to be
+ * transferred is unknown and
+ * script will keep on
+ * transferring samples as long as
+ * both events are detected and
+ * script must be manually stopped
+ * by the application
+ * 0: The amount of samples to be
+ * transferred is equal to the
+ * count field of mode word
+ */
+#define SDMA_WATERMARK_LEVEL_LWML 0xFF
+#define SDMA_WATERMARK_LEVEL_PS BIT(8)
+#define SDMA_WATERMARK_LEVEL_PA BIT(9)
+#define SDMA_WATERMARK_LEVEL_SPDIF BIT(10)
+#define SDMA_WATERMARK_LEVEL_SP BIT(11)
+#define SDMA_WATERMARK_LEVEL_DP BIT(12)
+#define SDMA_WATERMARK_LEVEL_HWML (0xFF << 16)
+#define SDMA_WATERMARK_LEVEL_LWE BIT(28)
+#define SDMA_WATERMARK_LEVEL_HWE BIT(29)
+#define SDMA_WATERMARK_LEVEL_CONT BIT(31)
+
+#define SDMA_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+#define SDMA_DMA_DIRECTIONS (BIT(DMA_DEV_TO_MEM) | \
+ BIT(DMA_MEM_TO_DEV) | \
+ BIT(DMA_DEV_TO_DEV))
+
+#define SDMA_WATERMARK_LEVEL_N_FIFOS GENMASK(15, 12)
+#define SDMA_WATERMARK_LEVEL_OFF_FIFOS GENMASK(19, 16)
+#define SDMA_WATERMARK_LEVEL_WORDS_PER_FIFO GENMASK(31, 28)
+#define SDMA_WATERMARK_LEVEL_SW_DONE BIT(23)
+
+#define SDMA_DONE0_CONFIG_DONE_SEL BIT(7)
+#define SDMA_DONE0_CONFIG_DONE_DIS BIT(6)
+
+/*
+ * struct sdma_script_start_addrs - SDMA script start pointers
+ *
+ * start addresses of the different functions in the physical
+ * address space of the SDMA engine.
+ */
+struct sdma_script_start_addrs {
+ s32 ap_2_ap_addr;
+ s32 ap_2_bp_addr;
+ s32 ap_2_ap_fixed_addr;
+ s32 bp_2_ap_addr;
+ s32 loopback_on_dsp_side_addr;
+ s32 mcu_interrupt_only_addr;
+ s32 firi_2_per_addr;
+ s32 firi_2_mcu_addr;
+ s32 per_2_firi_addr;
+ s32 mcu_2_firi_addr;
+ s32 uart_2_per_addr;
+ s32 uart_2_mcu_addr;
+ s32 per_2_app_addr;
+ s32 mcu_2_app_addr;
+ s32 per_2_per_addr;
+ s32 uartsh_2_per_addr;
+ s32 uartsh_2_mcu_addr;
+ s32 per_2_shp_addr;
+ s32 mcu_2_shp_addr;
+ s32 ata_2_mcu_addr;
+ s32 mcu_2_ata_addr;
+ s32 app_2_per_addr;
+ s32 app_2_mcu_addr;
+ s32 shp_2_per_addr;
+ s32 shp_2_mcu_addr;
+ s32 mshc_2_mcu_addr;
+ s32 mcu_2_mshc_addr;
+ s32 spdif_2_mcu_addr;
+ s32 mcu_2_spdif_addr;
+ s32 asrc_2_mcu_addr;
+ s32 ext_mem_2_ipu_addr;
+ s32 descrambler_addr;
+ s32 dptc_dvfs_addr;
+ s32 utra_addr;
+ s32 ram_code_start_addr;
+ /* End of v1 array */
+ s32 mcu_2_ssish_addr;
+ s32 ssish_2_mcu_addr;
+ s32 hdmi_dma_addr;
+ /* End of v2 array */
+ s32 zcanfd_2_mcu_addr;
+ s32 zqspi_2_mcu_addr;
+ s32 mcu_2_ecspi_addr;
+ s32 mcu_2_sai_addr;
+ s32 sai_2_mcu_addr;
+ s32 uart_2_mcu_rom_addr;
+ s32 uartsh_2_mcu_rom_addr;
+ /* End of v3 array */
+ s32 mcu_2_zqspi_addr;
+ /* End of v4 array */
+};
+
+/*
+ * Mode/Count of data node descriptors - IPCv2
+ */
+struct sdma_mode_count {
+#define SDMA_BD_MAX_CNT 0xffff
+ u32 count : 16; /* size of the buffer pointed by this BD */
+ u32 status : 8; /* E,R,I,C,W,D status bits stored here */
+ u32 command : 8; /* command mostly used for channel 0 */
+};
+
+/*
+ * Buffer descriptor
+ */
+struct sdma_buffer_descriptor {
+ struct sdma_mode_count mode;
+ u32 buffer_addr; /* address of the buffer described */
+ u32 ext_buffer_addr; /* extended buffer address */
+} __attribute__ ((packed));
+
+/**
+ * struct sdma_channel_control - Channel control Block
+ *
+ * @current_bd_ptr: current buffer descriptor processed
+ * @base_bd_ptr: first element of buffer descriptor array
+ * @unused: padding. The SDMA engine expects an array of 128 byte
+ * control blocks
+ */
+struct sdma_channel_control {
+ u32 current_bd_ptr;
+ u32 base_bd_ptr;
+ u32 unused[2];
+} __attribute__ ((packed));
+
+/**
+ * struct sdma_state_registers - SDMA context for a channel
+ *
+ * @pc: program counter
+ * @unused1: unused
+ * @t: test bit: status of arithmetic & test instruction
+ * @rpc: return program counter
+ * @unused0: unused
+ * @sf: source fault while loading data
+ * @spc: loop start program counter
+ * @unused2: unused
+ * @df: destination fault while storing data
+ * @epc: loop end program counter
+ * @lm: loop mode
+ */
+struct sdma_state_registers {
+ u32 pc :14;
+ u32 unused1: 1;
+ u32 t : 1;
+ u32 rpc :14;
+ u32 unused0: 1;
+ u32 sf : 1;
+ u32 spc :14;
+ u32 unused2: 1;
+ u32 df : 1;
+ u32 epc :14;
+ u32 lm : 2;
+} __attribute__ ((packed));
+
+/**
+ * struct sdma_context_data - sdma context specific to a channel
+ *
+ * @channel_state: channel state bits
+ * @gReg: general registers
+ * @mda: burst dma destination address register
+ * @msa: burst dma source address register
+ * @ms: burst dma status register
+ * @md: burst dma data register
+ * @pda: peripheral dma destination address register
+ * @psa: peripheral dma source address register
+ * @ps: peripheral dma status register
+ * @pd: peripheral dma data register
+ * @ca: CRC polynomial register
+ * @cs: CRC accumulator register
+ * @dda: dedicated core destination address register
+ * @dsa: dedicated core source address register
+ * @ds: dedicated core status register
+ * @dd: dedicated core data register
+ * @scratch0: 1st word of dedicated ram for context switch
+ * @scratch1: 2nd word of dedicated ram for context switch
+ * @scratch2: 3rd word of dedicated ram for context switch
+ * @scratch3: 4th word of dedicated ram for context switch
+ * @scratch4: 5th word of dedicated ram for context switch
+ * @scratch5: 6th word of dedicated ram for context switch
+ * @scratch6: 7th word of dedicated ram for context switch
+ * @scratch7: 8th word of dedicated ram for context switch
+ */
+struct sdma_context_data {
+ struct sdma_state_registers channel_state;
+ u32 gReg[8];
+ u32 mda;
+ u32 msa;
+ u32 ms;
+ u32 md;
+ u32 pda;
+ u32 psa;
+ u32 ps;
+ u32 pd;
+ u32 ca;
+ u32 cs;
+ u32 dda;
+ u32 dsa;
+ u32 ds;
+ u32 dd;
+ u32 scratch0;
+ u32 scratch1;
+ u32 scratch2;
+ u32 scratch3;
+ u32 scratch4;
+ u32 scratch5;
+ u32 scratch6;
+ u32 scratch7;
+} __attribute__ ((packed));
+
+
+struct sdma_engine;
+
+/**
+ * struct sdma_desc - descriptor structor for one transfer
+ * @vd: descriptor for virt dma
+ * @num_bd: number of descriptors currently handling
+ * @bd_phys: physical address of bd
+ * @buf_tail: ID of the buffer that was processed
+ * @buf_ptail: ID of the previous buffer that was processed
+ * @period_len: period length, used in cyclic.
+ * @chn_real_count: the real count updated from bd->mode.count
+ * @chn_count: the transfer count set
+ * @sdmac: sdma_channel pointer
+ * @bd: pointer of allocate bd
+ */
+struct sdma_desc {
+ struct virt_dma_desc vd;
+ unsigned int num_bd;
+ dma_addr_t bd_phys;
+ unsigned int buf_tail;
+ unsigned int buf_ptail;
+ unsigned int period_len;
+ unsigned int chn_real_count;
+ unsigned int chn_count;
+ struct sdma_channel *sdmac;
+ struct sdma_buffer_descriptor *bd;
+};
+
+/**
+ * struct sdma_channel - housekeeping for a SDMA channel
+ *
+ * @vc: virt_dma base structure
+ * @desc: sdma description including vd and other special member
+ * @sdma: pointer to the SDMA engine for this channel
+ * @channel: the channel number, matches dmaengine chan_id + 1
+ * @direction: transfer type. Needed for setting SDMA script
+ * @slave_config: Slave configuration
+ * @peripheral_type: Peripheral type. Needed for setting SDMA script
+ * @event_id0: aka dma request line
+ * @event_id1: for channels that use 2 events
+ * @word_size: peripheral access size
+ * @pc_from_device: script address for those device_2_memory
+ * @pc_to_device: script address for those memory_2_device
+ * @device_to_device: script address for those device_2_device
+ * @pc_to_pc: script address for those memory_2_memory
+ * @flags: loop mode or not
+ * @per_address: peripheral source or destination address in common case
+ * destination address in p_2_p case
+ * @per_address2: peripheral source address in p_2_p case
+ * @event_mask: event mask used in p_2_p script
+ * @watermark_level: value for gReg[7], some script will extend it from
+ * basic watermark such as p_2_p
+ * @shp_addr: value for gReg[6]
+ * @per_addr: value for gReg[2]
+ * @status: status of dma channel
+ * @context_loaded: ensure context is only loaded once
+ * @data: specific sdma interface structure
+ * @bd_pool: dma_pool for bd
+ * @terminate_worker: used to call back into terminate work function
+ * @terminated: terminated list
+ * @is_ram_script: flag for script in ram
+ * @n_fifos_src: number of source device fifos
+ * @n_fifos_dst: number of destination device fifos
+ * @sw_done: software done flag
+ * @stride_fifos_src: stride for source device FIFOs
+ * @stride_fifos_dst: stride for destination device FIFOs
+ * @words_per_fifo: copy number of words one time for one FIFO
+ */
+struct sdma_channel {
+ struct virt_dma_chan vc;
+ struct sdma_desc *desc;
+ struct sdma_engine *sdma;
+ unsigned int channel;
+ enum dma_transfer_direction direction;
+ struct dma_slave_config slave_config;
+ enum sdma_peripheral_type peripheral_type;
+ unsigned int event_id0;
+ unsigned int event_id1;
+ enum dma_slave_buswidth word_size;
+ unsigned int pc_from_device, pc_to_device;
+ unsigned int device_to_device;
+ unsigned int pc_to_pc;
+ unsigned long flags;
+ dma_addr_t per_address, per_address2;
+ unsigned long event_mask[2];
+ unsigned long watermark_level;
+ u32 shp_addr, per_addr;
+ enum dma_status status;
+ struct imx_dma_data data;
+ struct work_struct terminate_worker;
+ struct list_head terminated;
+ bool is_ram_script;
+ unsigned int n_fifos_src;
+ unsigned int n_fifos_dst;
+ unsigned int stride_fifos_src;
+ unsigned int stride_fifos_dst;
+ unsigned int words_per_fifo;
+ bool sw_done;
+};
+
+#define IMX_DMA_SG_LOOP BIT(0)
+
+#define MAX_DMA_CHANNELS 32
+#define MXC_SDMA_DEFAULT_PRIORITY 1
+#define MXC_SDMA_MIN_PRIORITY 1
+#define MXC_SDMA_MAX_PRIORITY 7
+
+#define SDMA_FIRMWARE_MAGIC 0x414d4453
+
+/**
+ * struct sdma_firmware_header - Layout of the firmware image
+ *
+ * @magic: "SDMA"
+ * @version_major: increased whenever layout of struct
+ * sdma_script_start_addrs changes.
+ * @version_minor: firmware minor version (for binary compatible changes)
+ * @script_addrs_start: offset of struct sdma_script_start_addrs in this image
+ * @num_script_addrs: Number of script addresses in this image
+ * @ram_code_start: offset of SDMA ram image in this firmware image
+ * @ram_code_size: size of SDMA ram image
+ * @script_addrs: Stores the start address of the SDMA scripts
+ * (in SDMA memory space)
+ */
+struct sdma_firmware_header {
+ u32 magic;
+ u32 version_major;
+ u32 version_minor;
+ u32 script_addrs_start;
+ u32 num_script_addrs;
+ u32 ram_code_start;
+ u32 ram_code_size;
+};
+
+struct sdma_driver_data {
+ int chnenbl0;
+ int num_events;
+ struct sdma_script_start_addrs *script_addrs;
+ bool check_ratio;
+ /*
+ * ecspi ERR009165 fixed should be done in sdma script
+ * and it has been fixed in soc from i.mx6ul.
+ * please get more information from the below link:
+ * https://www.nxp.com/docs/en/errata/IMX6DQCE.pdf
+ */
+ bool ecspi_fixed;
+};
+
+struct sdma_engine {
+ struct device *dev;
+ struct sdma_channel channel[MAX_DMA_CHANNELS];
+ struct sdma_channel_control *channel_control;
+ void __iomem *regs;
+ struct sdma_context_data *context;
+ dma_addr_t context_phys;
+ struct dma_device dma_device;
+ struct clk *clk_ipg;
+ struct clk *clk_ahb;
+ spinlock_t channel_0_lock;
+ u32 script_number;
+ struct sdma_script_start_addrs *script_addrs;
+ const struct sdma_driver_data *drvdata;
+ u32 spba_start_addr;
+ u32 spba_end_addr;
+ unsigned int irq;
+ dma_addr_t bd0_phys;
+ struct sdma_buffer_descriptor *bd0;
+ /* clock ratio for AHB:SDMA core. 1:1 is 1, 2:1 is 0*/
+ bool clk_ratio;
+ bool fw_loaded;
+};
+
+static int sdma_config_write(struct dma_chan *chan,
+ struct dma_slave_config *dmaengine_cfg,
+ enum dma_transfer_direction direction);
+
+static struct sdma_driver_data sdma_imx31 = {
+ .chnenbl0 = SDMA_CHNENBL0_IMX31,
+ .num_events = 32,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx25 = {
+ .ap_2_ap_addr = 729,
+ .uart_2_mcu_addr = 904,
+ .per_2_app_addr = 1255,
+ .mcu_2_app_addr = 834,
+ .uartsh_2_mcu_addr = 1120,
+ .per_2_shp_addr = 1329,
+ .mcu_2_shp_addr = 1048,
+ .ata_2_mcu_addr = 1560,
+ .mcu_2_ata_addr = 1479,
+ .app_2_per_addr = 1189,
+ .app_2_mcu_addr = 770,
+ .shp_2_per_addr = 1407,
+ .shp_2_mcu_addr = 979,
+};
+
+static struct sdma_driver_data sdma_imx25 = {
+ .chnenbl0 = SDMA_CHNENBL0_IMX35,
+ .num_events = 48,
+ .script_addrs = &sdma_script_imx25,
+};
+
+static struct sdma_driver_data sdma_imx35 = {
+ .chnenbl0 = SDMA_CHNENBL0_IMX35,
+ .num_events = 48,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx51 = {
+ .ap_2_ap_addr = 642,
+ .uart_2_mcu_addr = 817,
+ .mcu_2_app_addr = 747,
+ .mcu_2_shp_addr = 961,
+ .ata_2_mcu_addr = 1473,
+ .mcu_2_ata_addr = 1392,
+ .app_2_per_addr = 1033,
+ .app_2_mcu_addr = 683,
+ .shp_2_per_addr = 1251,
+ .shp_2_mcu_addr = 892,
+};
+
+static struct sdma_driver_data sdma_imx51 = {
+ .chnenbl0 = SDMA_CHNENBL0_IMX35,
+ .num_events = 48,
+ .script_addrs = &sdma_script_imx51,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx53 = {
+ .ap_2_ap_addr = 642,
+ .app_2_mcu_addr = 683,
+ .mcu_2_app_addr = 747,
+ .uart_2_mcu_addr = 817,
+ .shp_2_mcu_addr = 891,
+ .mcu_2_shp_addr = 960,
+ .uartsh_2_mcu_addr = 1032,
+ .spdif_2_mcu_addr = 1100,
+ .mcu_2_spdif_addr = 1134,
+ .firi_2_mcu_addr = 1193,
+ .mcu_2_firi_addr = 1290,
+};
+
+static struct sdma_driver_data sdma_imx53 = {
+ .chnenbl0 = SDMA_CHNENBL0_IMX35,
+ .num_events = 48,
+ .script_addrs = &sdma_script_imx53,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx6q = {
+ .ap_2_ap_addr = 642,
+ .uart_2_mcu_addr = 817,
+ .mcu_2_app_addr = 747,
+ .per_2_per_addr = 6331,
+ .uartsh_2_mcu_addr = 1032,
+ .mcu_2_shp_addr = 960,
+ .app_2_mcu_addr = 683,
+ .shp_2_mcu_addr = 891,
+ .spdif_2_mcu_addr = 1100,
+ .mcu_2_spdif_addr = 1134,
+};
+
+static struct sdma_driver_data sdma_imx6q = {
+ .chnenbl0 = SDMA_CHNENBL0_IMX35,
+ .num_events = 48,
+ .script_addrs = &sdma_script_imx6q,
+};
+
+static struct sdma_driver_data sdma_imx6ul = {
+ .chnenbl0 = SDMA_CHNENBL0_IMX35,
+ .num_events = 48,
+ .script_addrs = &sdma_script_imx6q,
+ .ecspi_fixed = true,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx7d = {
+ .ap_2_ap_addr = 644,
+ .uart_2_mcu_addr = 819,
+ .mcu_2_app_addr = 749,
+ .uartsh_2_mcu_addr = 1034,
+ .mcu_2_shp_addr = 962,
+ .app_2_mcu_addr = 685,
+ .shp_2_mcu_addr = 893,
+ .spdif_2_mcu_addr = 1102,
+ .mcu_2_spdif_addr = 1136,
+};
+
+static struct sdma_driver_data sdma_imx7d = {
+ .chnenbl0 = SDMA_CHNENBL0_IMX35,
+ .num_events = 48,
+ .script_addrs = &sdma_script_imx7d,
+};
+
+static struct sdma_driver_data sdma_imx8mq = {
+ .chnenbl0 = SDMA_CHNENBL0_IMX35,
+ .num_events = 48,
+ .script_addrs = &sdma_script_imx7d,
+ .check_ratio = 1,
+};
+
+static const struct of_device_id sdma_dt_ids[] = {
+ { .compatible = "fsl,imx6q-sdma", .data = &sdma_imx6q, },
+ { .compatible = "fsl,imx53-sdma", .data = &sdma_imx53, },
+ { .compatible = "fsl,imx51-sdma", .data = &sdma_imx51, },
+ { .compatible = "fsl,imx35-sdma", .data = &sdma_imx35, },
+ { .compatible = "fsl,imx31-sdma", .data = &sdma_imx31, },
+ { .compatible = "fsl,imx25-sdma", .data = &sdma_imx25, },
+ { .compatible = "fsl,imx7d-sdma", .data = &sdma_imx7d, },
+ { .compatible = "fsl,imx6ul-sdma", .data = &sdma_imx6ul, },
+ { .compatible = "fsl,imx8mq-sdma", .data = &sdma_imx8mq, },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sdma_dt_ids);
+
+#define SDMA_H_CONFIG_DSPDMA BIT(12) /* indicates if the DSPDMA is used */
+#define SDMA_H_CONFIG_RTD_PINS BIT(11) /* indicates if Real-Time Debug pins are enabled */
+#define SDMA_H_CONFIG_ACR BIT(4) /* indicates if AHB freq /core freq = 2 or 1 */
+#define SDMA_H_CONFIG_CSM (3) /* indicates which context switch mode is selected*/
+
+static inline u32 chnenbl_ofs(struct sdma_engine *sdma, unsigned int event)
+{
+ u32 chnenbl0 = sdma->drvdata->chnenbl0;
+ return chnenbl0 + event * 4;
+}
+
+static int sdma_config_ownership(struct sdma_channel *sdmac,
+ bool event_override, bool mcu_override, bool dsp_override)
+{
+ struct sdma_engine *sdma = sdmac->sdma;
+ int channel = sdmac->channel;
+ unsigned long evt, mcu, dsp;
+
+ if (event_override && mcu_override && dsp_override)
+ return -EINVAL;
+
+ evt = readl_relaxed(sdma->regs + SDMA_H_EVTOVR);
+ mcu = readl_relaxed(sdma->regs + SDMA_H_HOSTOVR);
+ dsp = readl_relaxed(sdma->regs + SDMA_H_DSPOVR);
+
+ if (dsp_override)
+ __clear_bit(channel, &dsp);
+ else
+ __set_bit(channel, &dsp);
+
+ if (event_override)
+ __clear_bit(channel, &evt);
+ else
+ __set_bit(channel, &evt);
+
+ if (mcu_override)
+ __clear_bit(channel, &mcu);
+ else
+ __set_bit(channel, &mcu);
+
+ writel_relaxed(evt, sdma->regs + SDMA_H_EVTOVR);
+ writel_relaxed(mcu, sdma->regs + SDMA_H_HOSTOVR);
+ writel_relaxed(dsp, sdma->regs + SDMA_H_DSPOVR);
+
+ return 0;
+}
+
+static int is_sdma_channel_enabled(struct sdma_engine *sdma, int channel)
+{
+ return !!(readl(sdma->regs + SDMA_H_STATSTOP) & BIT(channel));
+}
+
+static void sdma_enable_channel(struct sdma_engine *sdma, int channel)
+{
+ writel(BIT(channel), sdma->regs + SDMA_H_START);
+}
+
+/*
+ * sdma_run_channel0 - run a channel and wait till it's done
+ */
+static int sdma_run_channel0(struct sdma_engine *sdma)
+{
+ int ret;
+ u32 reg;
+
+ sdma_enable_channel(sdma, 0);
+
+ ret = readl_relaxed_poll_timeout_atomic(sdma->regs + SDMA_H_STATSTOP,
+ reg, !(reg & 1), 1, 500);
+ if (ret)
+ dev_err(sdma->dev, "Timeout waiting for CH0 ready\n");
+
+ /* Set bits of CONFIG register with dynamic context switching */
+ reg = readl(sdma->regs + SDMA_H_CONFIG);
+ if ((reg & SDMA_H_CONFIG_CSM) == 0) {
+ reg |= SDMA_H_CONFIG_CSM;
+ writel_relaxed(reg, sdma->regs + SDMA_H_CONFIG);
+ }
+
+ return ret;
+}
+
+static int sdma_load_script(struct sdma_engine *sdma, void *buf, int size,
+ u32 address)
+{
+ struct sdma_buffer_descriptor *bd0 = sdma->bd0;
+ void *buf_virt;
+ dma_addr_t buf_phys;
+ int ret;
+ unsigned long flags;
+
+ buf_virt = dma_alloc_coherent(sdma->dev, size, &buf_phys, GFP_KERNEL);
+ if (!buf_virt)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&sdma->channel_0_lock, flags);
+
+ bd0->mode.command = C0_SETPM;
+ bd0->mode.status = BD_DONE | BD_WRAP | BD_EXTD;
+ bd0->mode.count = size / 2;
+ bd0->buffer_addr = buf_phys;
+ bd0->ext_buffer_addr = address;
+
+ memcpy(buf_virt, buf, size);
+
+ ret = sdma_run_channel0(sdma);
+
+ spin_unlock_irqrestore(&sdma->channel_0_lock, flags);
+
+ dma_free_coherent(sdma->dev, size, buf_virt, buf_phys);
+
+ return ret;
+}
+
+static void sdma_event_enable(struct sdma_channel *sdmac, unsigned int event)
+{
+ struct sdma_engine *sdma = sdmac->sdma;
+ int channel = sdmac->channel;
+ unsigned long val;
+ u32 chnenbl = chnenbl_ofs(sdma, event);
+
+ val = readl_relaxed(sdma->regs + chnenbl);
+ __set_bit(channel, &val);
+ writel_relaxed(val, sdma->regs + chnenbl);
+
+ /* Set SDMA_DONEx_CONFIG is sw_done enabled */
+ if (sdmac->sw_done) {
+ val = readl_relaxed(sdma->regs + SDMA_DONE0_CONFIG);
+ val |= SDMA_DONE0_CONFIG_DONE_SEL;
+ val &= ~SDMA_DONE0_CONFIG_DONE_DIS;
+ writel_relaxed(val, sdma->regs + SDMA_DONE0_CONFIG);
+ }
+}
+
+static void sdma_event_disable(struct sdma_channel *sdmac, unsigned int event)
+{
+ struct sdma_engine *sdma = sdmac->sdma;
+ int channel = sdmac->channel;
+ u32 chnenbl = chnenbl_ofs(sdma, event);
+ unsigned long val;
+
+ val = readl_relaxed(sdma->regs + chnenbl);
+ __clear_bit(channel, &val);
+ writel_relaxed(val, sdma->regs + chnenbl);
+}
+
+static struct sdma_desc *to_sdma_desc(struct dma_async_tx_descriptor *t)
+{
+ return container_of(t, struct sdma_desc, vd.tx);
+}
+
+static void sdma_start_desc(struct sdma_channel *sdmac)
+{
+ struct virt_dma_desc *vd = vchan_next_desc(&sdmac->vc);
+ struct sdma_desc *desc;
+ struct sdma_engine *sdma = sdmac->sdma;
+ int channel = sdmac->channel;
+
+ if (!vd) {
+ sdmac->desc = NULL;
+ return;
+ }
+ sdmac->desc = desc = to_sdma_desc(&vd->tx);
+
+ list_del(&vd->node);
+
+ sdma->channel_control[channel].base_bd_ptr = desc->bd_phys;
+ sdma->channel_control[channel].current_bd_ptr = desc->bd_phys;
+ sdma_enable_channel(sdma, sdmac->channel);
+}
+
+static void sdma_update_channel_loop(struct sdma_channel *sdmac)
+{
+ struct sdma_buffer_descriptor *bd;
+ int error = 0;
+ enum dma_status old_status = sdmac->status;
+
+ /*
+ * loop mode. Iterate over descriptors, re-setup them and
+ * call callback function.
+ */
+ while (sdmac->desc) {
+ struct sdma_desc *desc = sdmac->desc;
+
+ bd = &desc->bd[desc->buf_tail];
+
+ if (bd->mode.status & BD_DONE)
+ break;
+
+ if (bd->mode.status & BD_RROR) {
+ bd->mode.status &= ~BD_RROR;
+ sdmac->status = DMA_ERROR;
+ error = -EIO;
+ }
+
+ /*
+ * We use bd->mode.count to calculate the residue, since contains
+ * the number of bytes present in the current buffer descriptor.
+ */
+
+ desc->chn_real_count = bd->mode.count;
+ bd->mode.count = desc->period_len;
+ desc->buf_ptail = desc->buf_tail;
+ desc->buf_tail = (desc->buf_tail + 1) % desc->num_bd;
+
+ /*
+ * The callback is called from the interrupt context in order
+ * to reduce latency and to avoid the risk of altering the
+ * SDMA transaction status by the time the client tasklet is
+ * executed.
+ */
+ spin_unlock(&sdmac->vc.lock);
+ dmaengine_desc_get_callback_invoke(&desc->vd.tx, NULL);
+ spin_lock(&sdmac->vc.lock);
+
+ /* Assign buffer ownership to SDMA */
+ bd->mode.status |= BD_DONE;
+
+ if (error)
+ sdmac->status = old_status;
+ }
+
+ /*
+ * SDMA stops cyclic channel when DMA request triggers a channel and no SDMA
+ * owned buffer is available (i.e. BD_DONE was set too late).
+ */
+ if (sdmac->desc && !is_sdma_channel_enabled(sdmac->sdma, sdmac->channel)) {
+ dev_warn(sdmac->sdma->dev, "restart cyclic channel %d\n", sdmac->channel);
+ sdma_enable_channel(sdmac->sdma, sdmac->channel);
+ }
+}
+
+static void mxc_sdma_handle_channel_normal(struct sdma_channel *data)
+{
+ struct sdma_channel *sdmac = (struct sdma_channel *) data;
+ struct sdma_buffer_descriptor *bd;
+ int i, error = 0;
+
+ sdmac->desc->chn_real_count = 0;
+ /*
+ * non loop mode. Iterate over all descriptors, collect
+ * errors and call callback function
+ */
+ for (i = 0; i < sdmac->desc->num_bd; i++) {
+ bd = &sdmac->desc->bd[i];
+
+ if (bd->mode.status & (BD_DONE | BD_RROR))
+ error = -EIO;
+ sdmac->desc->chn_real_count += bd->mode.count;
+ }
+
+ if (error)
+ sdmac->status = DMA_ERROR;
+ else
+ sdmac->status = DMA_COMPLETE;
+}
+
+static irqreturn_t sdma_int_handler(int irq, void *dev_id)
+{
+ struct sdma_engine *sdma = dev_id;
+ unsigned long stat;
+
+ stat = readl_relaxed(sdma->regs + SDMA_H_INTR);
+ writel_relaxed(stat, sdma->regs + SDMA_H_INTR);
+ /* channel 0 is special and not handled here, see run_channel0() */
+ stat &= ~1;
+
+ while (stat) {
+ int channel = fls(stat) - 1;
+ struct sdma_channel *sdmac = &sdma->channel[channel];
+ struct sdma_desc *desc;
+
+ spin_lock(&sdmac->vc.lock);
+ desc = sdmac->desc;
+ if (desc) {
+ if (sdmac->flags & IMX_DMA_SG_LOOP) {
+ sdma_update_channel_loop(sdmac);
+ } else {
+ mxc_sdma_handle_channel_normal(sdmac);
+ vchan_cookie_complete(&desc->vd);
+ sdma_start_desc(sdmac);
+ }
+ }
+
+ spin_unlock(&sdmac->vc.lock);
+ __clear_bit(channel, &stat);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * sets the pc of SDMA script according to the peripheral type
+ */
+static int sdma_get_pc(struct sdma_channel *sdmac,
+ enum sdma_peripheral_type peripheral_type)
+{
+ struct sdma_engine *sdma = sdmac->sdma;
+ int per_2_emi = 0, emi_2_per = 0;
+ /*
+ * These are needed once we start to support transfers between
+ * two peripherals or memory-to-memory transfers
+ */
+ int per_2_per = 0, emi_2_emi = 0;
+
+ sdmac->pc_from_device = 0;
+ sdmac->pc_to_device = 0;
+ sdmac->device_to_device = 0;
+ sdmac->pc_to_pc = 0;
+ sdmac->is_ram_script = false;
+
+ switch (peripheral_type) {
+ case IMX_DMATYPE_MEMORY:
+ emi_2_emi = sdma->script_addrs->ap_2_ap_addr;
+ break;
+ case IMX_DMATYPE_DSP:
+ emi_2_per = sdma->script_addrs->bp_2_ap_addr;
+ per_2_emi = sdma->script_addrs->ap_2_bp_addr;
+ break;
+ case IMX_DMATYPE_FIRI:
+ per_2_emi = sdma->script_addrs->firi_2_mcu_addr;
+ emi_2_per = sdma->script_addrs->mcu_2_firi_addr;
+ break;
+ case IMX_DMATYPE_UART:
+ per_2_emi = sdma->script_addrs->uart_2_mcu_addr;
+ emi_2_per = sdma->script_addrs->mcu_2_app_addr;
+ break;
+ case IMX_DMATYPE_UART_SP:
+ per_2_emi = sdma->script_addrs->uartsh_2_mcu_addr;
+ emi_2_per = sdma->script_addrs->mcu_2_shp_addr;
+ break;
+ case IMX_DMATYPE_ATA:
+ per_2_emi = sdma->script_addrs->ata_2_mcu_addr;
+ emi_2_per = sdma->script_addrs->mcu_2_ata_addr;
+ break;
+ case IMX_DMATYPE_CSPI:
+ per_2_emi = sdma->script_addrs->app_2_mcu_addr;
+
+ /* Use rom script mcu_2_app if ERR009165 fixed */
+ if (sdmac->sdma->drvdata->ecspi_fixed) {
+ emi_2_per = sdma->script_addrs->mcu_2_app_addr;
+ } else {
+ emi_2_per = sdma->script_addrs->mcu_2_ecspi_addr;
+ sdmac->is_ram_script = true;
+ }
+
+ break;
+ case IMX_DMATYPE_EXT:
+ case IMX_DMATYPE_SSI:
+ case IMX_DMATYPE_SAI:
+ per_2_emi = sdma->script_addrs->app_2_mcu_addr;
+ emi_2_per = sdma->script_addrs->mcu_2_app_addr;
+ break;
+ case IMX_DMATYPE_SSI_DUAL:
+ per_2_emi = sdma->script_addrs->ssish_2_mcu_addr;
+ emi_2_per = sdma->script_addrs->mcu_2_ssish_addr;
+ sdmac->is_ram_script = true;
+ break;
+ case IMX_DMATYPE_SSI_SP:
+ case IMX_DMATYPE_MMC:
+ case IMX_DMATYPE_SDHC:
+ case IMX_DMATYPE_CSPI_SP:
+ case IMX_DMATYPE_ESAI:
+ case IMX_DMATYPE_MSHC_SP:
+ per_2_emi = sdma->script_addrs->shp_2_mcu_addr;
+ emi_2_per = sdma->script_addrs->mcu_2_shp_addr;
+ break;
+ case IMX_DMATYPE_ASRC:
+ per_2_emi = sdma->script_addrs->asrc_2_mcu_addr;
+ emi_2_per = sdma->script_addrs->asrc_2_mcu_addr;
+ per_2_per = sdma->script_addrs->per_2_per_addr;
+ sdmac->is_ram_script = true;
+ break;
+ case IMX_DMATYPE_ASRC_SP:
+ per_2_emi = sdma->script_addrs->shp_2_mcu_addr;
+ emi_2_per = sdma->script_addrs->mcu_2_shp_addr;
+ per_2_per = sdma->script_addrs->per_2_per_addr;
+ break;
+ case IMX_DMATYPE_MSHC:
+ per_2_emi = sdma->script_addrs->mshc_2_mcu_addr;
+ emi_2_per = sdma->script_addrs->mcu_2_mshc_addr;
+ break;
+ case IMX_DMATYPE_CCM:
+ per_2_emi = sdma->script_addrs->dptc_dvfs_addr;
+ break;
+ case IMX_DMATYPE_SPDIF:
+ per_2_emi = sdma->script_addrs->spdif_2_mcu_addr;
+ emi_2_per = sdma->script_addrs->mcu_2_spdif_addr;
+ break;
+ case IMX_DMATYPE_IPU_MEMORY:
+ emi_2_per = sdma->script_addrs->ext_mem_2_ipu_addr;
+ break;
+ case IMX_DMATYPE_MULTI_SAI:
+ per_2_emi = sdma->script_addrs->sai_2_mcu_addr;
+ emi_2_per = sdma->script_addrs->mcu_2_sai_addr;
+ break;
+ default:
+ dev_err(sdma->dev, "Unsupported transfer type %d\n",
+ peripheral_type);
+ return -EINVAL;
+ }
+
+ sdmac->pc_from_device = per_2_emi;
+ sdmac->pc_to_device = emi_2_per;
+ sdmac->device_to_device = per_2_per;
+ sdmac->pc_to_pc = emi_2_emi;
+
+ return 0;
+}
+
+static int sdma_load_context(struct sdma_channel *sdmac)
+{
+ struct sdma_engine *sdma = sdmac->sdma;
+ int channel = sdmac->channel;
+ int load_address;
+ struct sdma_context_data *context = sdma->context;
+ struct sdma_buffer_descriptor *bd0 = sdma->bd0;
+ int ret;
+ unsigned long flags;
+
+ if (sdmac->direction == DMA_DEV_TO_MEM)
+ load_address = sdmac->pc_from_device;
+ else if (sdmac->direction == DMA_DEV_TO_DEV)
+ load_address = sdmac->device_to_device;
+ else if (sdmac->direction == DMA_MEM_TO_MEM)
+ load_address = sdmac->pc_to_pc;
+ else
+ load_address = sdmac->pc_to_device;
+
+ if (load_address < 0)
+ return load_address;
+
+ dev_dbg(sdma->dev, "load_address = %d\n", load_address);
+ dev_dbg(sdma->dev, "wml = 0x%08x\n", (u32)sdmac->watermark_level);
+ dev_dbg(sdma->dev, "shp_addr = 0x%08x\n", sdmac->shp_addr);
+ dev_dbg(sdma->dev, "per_addr = 0x%08x\n", sdmac->per_addr);
+ dev_dbg(sdma->dev, "event_mask0 = 0x%08x\n", (u32)sdmac->event_mask[0]);
+ dev_dbg(sdma->dev, "event_mask1 = 0x%08x\n", (u32)sdmac->event_mask[1]);
+
+ spin_lock_irqsave(&sdma->channel_0_lock, flags);
+
+ memset(context, 0, sizeof(*context));
+ context->channel_state.pc = load_address;
+
+ /* Send by context the event mask,base address for peripheral
+ * and watermark level
+ */
+ context->gReg[0] = sdmac->event_mask[1];
+ context->gReg[1] = sdmac->event_mask[0];
+ context->gReg[2] = sdmac->per_addr;
+ context->gReg[6] = sdmac->shp_addr;
+ context->gReg[7] = sdmac->watermark_level;
+
+ bd0->mode.command = C0_SETDM;
+ bd0->mode.status = BD_DONE | BD_WRAP | BD_EXTD;
+ bd0->mode.count = sizeof(*context) / 4;
+ bd0->buffer_addr = sdma->context_phys;
+ bd0->ext_buffer_addr = 2048 + (sizeof(*context) / 4) * channel;
+ ret = sdma_run_channel0(sdma);
+
+ spin_unlock_irqrestore(&sdma->channel_0_lock, flags);
+
+ return ret;
+}
+
+static struct sdma_channel *to_sdma_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct sdma_channel, vc.chan);
+}
+
+static int sdma_disable_channel(struct dma_chan *chan)
+{
+ struct sdma_channel *sdmac = to_sdma_chan(chan);
+ struct sdma_engine *sdma = sdmac->sdma;
+ int channel = sdmac->channel;
+
+ writel_relaxed(BIT(channel), sdma->regs + SDMA_H_STATSTOP);
+ sdmac->status = DMA_ERROR;
+
+ return 0;
+}
+static void sdma_channel_terminate_work(struct work_struct *work)
+{
+ struct sdma_channel *sdmac = container_of(work, struct sdma_channel,
+ terminate_worker);
+ /*
+ * According to NXP R&D team a delay of one BD SDMA cost time
+ * (maximum is 1ms) should be added after disable of the channel
+ * bit, to ensure SDMA core has really been stopped after SDMA
+ * clients call .device_terminate_all.
+ */
+ usleep_range(1000, 2000);
+
+ vchan_dma_desc_free_list(&sdmac->vc, &sdmac->terminated);
+}
+
+static int sdma_terminate_all(struct dma_chan *chan)
+{
+ struct sdma_channel *sdmac = to_sdma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&sdmac->vc.lock, flags);
+
+ sdma_disable_channel(chan);
+
+ if (sdmac->desc) {
+ vchan_terminate_vdesc(&sdmac->desc->vd);
+ /*
+ * move out current descriptor into terminated list so that
+ * it could be free in sdma_channel_terminate_work alone
+ * later without potential involving next descriptor raised
+ * up before the last descriptor terminated.
+ */
+ vchan_get_all_descriptors(&sdmac->vc, &sdmac->terminated);
+ sdmac->desc = NULL;
+ schedule_work(&sdmac->terminate_worker);
+ }
+
+ spin_unlock_irqrestore(&sdmac->vc.lock, flags);
+
+ return 0;
+}
+
+static void sdma_channel_synchronize(struct dma_chan *chan)
+{
+ struct sdma_channel *sdmac = to_sdma_chan(chan);
+
+ vchan_synchronize(&sdmac->vc);
+
+ flush_work(&sdmac->terminate_worker);
+}
+
+static void sdma_set_watermarklevel_for_p2p(struct sdma_channel *sdmac)
+{
+ struct sdma_engine *sdma = sdmac->sdma;
+
+ int lwml = sdmac->watermark_level & SDMA_WATERMARK_LEVEL_LWML;
+ int hwml = (sdmac->watermark_level & SDMA_WATERMARK_LEVEL_HWML) >> 16;
+
+ set_bit(sdmac->event_id0 % 32, &sdmac->event_mask[1]);
+ set_bit(sdmac->event_id1 % 32, &sdmac->event_mask[0]);
+
+ if (sdmac->event_id0 > 31)
+ sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_LWE;
+
+ if (sdmac->event_id1 > 31)
+ sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_HWE;
+
+ /*
+ * If LWML(src_maxburst) > HWML(dst_maxburst), we need
+ * swap LWML and HWML of INFO(A.3.2.5.1), also need swap
+ * r0(event_mask[1]) and r1(event_mask[0]).
+ */
+ if (lwml > hwml) {
+ sdmac->watermark_level &= ~(SDMA_WATERMARK_LEVEL_LWML |
+ SDMA_WATERMARK_LEVEL_HWML);
+ sdmac->watermark_level |= hwml;
+ sdmac->watermark_level |= lwml << 16;
+ swap(sdmac->event_mask[0], sdmac->event_mask[1]);
+ }
+
+ if (sdmac->per_address2 >= sdma->spba_start_addr &&
+ sdmac->per_address2 <= sdma->spba_end_addr)
+ sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_SP;
+
+ if (sdmac->per_address >= sdma->spba_start_addr &&
+ sdmac->per_address <= sdma->spba_end_addr)
+ sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_DP;
+
+ sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_CONT;
+}
+
+static void sdma_set_watermarklevel_for_sais(struct sdma_channel *sdmac)
+{
+ unsigned int n_fifos;
+ unsigned int stride_fifos;
+ unsigned int words_per_fifo;
+
+ if (sdmac->sw_done)
+ sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_SW_DONE;
+
+ if (sdmac->direction == DMA_DEV_TO_MEM) {
+ n_fifos = sdmac->n_fifos_src;
+ stride_fifos = sdmac->stride_fifos_src;
+ } else {
+ n_fifos = sdmac->n_fifos_dst;
+ stride_fifos = sdmac->stride_fifos_dst;
+ }
+
+ words_per_fifo = sdmac->words_per_fifo;
+
+ sdmac->watermark_level |=
+ FIELD_PREP(SDMA_WATERMARK_LEVEL_N_FIFOS, n_fifos);
+ sdmac->watermark_level |=
+ FIELD_PREP(SDMA_WATERMARK_LEVEL_OFF_FIFOS, stride_fifos);
+ if (words_per_fifo)
+ sdmac->watermark_level |=
+ FIELD_PREP(SDMA_WATERMARK_LEVEL_WORDS_PER_FIFO, (words_per_fifo - 1));
+}
+
+static int sdma_config_channel(struct dma_chan *chan)
+{
+ struct sdma_channel *sdmac = to_sdma_chan(chan);
+ int ret;
+
+ sdma_disable_channel(chan);
+
+ sdmac->event_mask[0] = 0;
+ sdmac->event_mask[1] = 0;
+ sdmac->shp_addr = 0;
+ sdmac->per_addr = 0;
+
+ switch (sdmac->peripheral_type) {
+ case IMX_DMATYPE_DSP:
+ sdma_config_ownership(sdmac, false, true, true);
+ break;
+ case IMX_DMATYPE_MEMORY:
+ sdma_config_ownership(sdmac, false, true, false);
+ break;
+ default:
+ sdma_config_ownership(sdmac, true, true, false);
+ break;
+ }
+
+ ret = sdma_get_pc(sdmac, sdmac->peripheral_type);
+ if (ret)
+ return ret;
+
+ if ((sdmac->peripheral_type != IMX_DMATYPE_MEMORY) &&
+ (sdmac->peripheral_type != IMX_DMATYPE_DSP)) {
+ /* Handle multiple event channels differently */
+ if (sdmac->event_id1) {
+ if (sdmac->peripheral_type == IMX_DMATYPE_ASRC_SP ||
+ sdmac->peripheral_type == IMX_DMATYPE_ASRC)
+ sdma_set_watermarklevel_for_p2p(sdmac);
+ } else {
+ if (sdmac->peripheral_type ==
+ IMX_DMATYPE_MULTI_SAI)
+ sdma_set_watermarklevel_for_sais(sdmac);
+
+ __set_bit(sdmac->event_id0, sdmac->event_mask);
+ }
+
+ /* Address */
+ sdmac->shp_addr = sdmac->per_address;
+ sdmac->per_addr = sdmac->per_address2;
+ } else {
+ sdmac->watermark_level = 0; /* FIXME: M3_BASE_ADDRESS */
+ }
+
+ return 0;
+}
+
+static int sdma_set_channel_priority(struct sdma_channel *sdmac,
+ unsigned int priority)
+{
+ struct sdma_engine *sdma = sdmac->sdma;
+ int channel = sdmac->channel;
+
+ if (priority < MXC_SDMA_MIN_PRIORITY
+ || priority > MXC_SDMA_MAX_PRIORITY) {
+ return -EINVAL;
+ }
+
+ writel_relaxed(priority, sdma->regs + SDMA_CHNPRI_0 + 4 * channel);
+
+ return 0;
+}
+
+static int sdma_request_channel0(struct sdma_engine *sdma)
+{
+ int ret = -EBUSY;
+
+ sdma->bd0 = dma_alloc_coherent(sdma->dev, PAGE_SIZE, &sdma->bd0_phys,
+ GFP_NOWAIT);
+ if (!sdma->bd0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ sdma->channel_control[0].base_bd_ptr = sdma->bd0_phys;
+ sdma->channel_control[0].current_bd_ptr = sdma->bd0_phys;
+
+ sdma_set_channel_priority(&sdma->channel[0], MXC_SDMA_DEFAULT_PRIORITY);
+ return 0;
+out:
+
+ return ret;
+}
+
+
+static int sdma_alloc_bd(struct sdma_desc *desc)
+{
+ u32 bd_size = desc->num_bd * sizeof(struct sdma_buffer_descriptor);
+ int ret = 0;
+
+ desc->bd = dma_alloc_coherent(desc->sdmac->sdma->dev, bd_size,
+ &desc->bd_phys, GFP_NOWAIT);
+ if (!desc->bd) {
+ ret = -ENOMEM;
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static void sdma_free_bd(struct sdma_desc *desc)
+{
+ u32 bd_size = desc->num_bd * sizeof(struct sdma_buffer_descriptor);
+
+ dma_free_coherent(desc->sdmac->sdma->dev, bd_size, desc->bd,
+ desc->bd_phys);
+}
+
+static void sdma_desc_free(struct virt_dma_desc *vd)
+{
+ struct sdma_desc *desc = container_of(vd, struct sdma_desc, vd);
+
+ sdma_free_bd(desc);
+ kfree(desc);
+}
+
+static int sdma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct sdma_channel *sdmac = to_sdma_chan(chan);
+ struct imx_dma_data *data = chan->private;
+ struct imx_dma_data mem_data;
+ int prio, ret;
+
+ /*
+ * MEMCPY may never setup chan->private by filter function such as
+ * dmatest, thus create 'struct imx_dma_data mem_data' for this case.
+ * Please note in any other slave case, you have to setup chan->private
+ * with 'struct imx_dma_data' in your own filter function if you want to
+ * request dma channel by dma_request_channel() rather than
+ * dma_request_slave_channel(). Othwise, 'MEMCPY in case?' will appear
+ * to warn you to correct your filter function.
+ */
+ if (!data) {
+ dev_dbg(sdmac->sdma->dev, "MEMCPY in case?\n");
+ mem_data.priority = 2;
+ mem_data.peripheral_type = IMX_DMATYPE_MEMORY;
+ mem_data.dma_request = 0;
+ mem_data.dma_request2 = 0;
+ data = &mem_data;
+
+ ret = sdma_get_pc(sdmac, IMX_DMATYPE_MEMORY);
+ if (ret)
+ return ret;
+ }
+
+ switch (data->priority) {
+ case DMA_PRIO_HIGH:
+ prio = 3;
+ break;
+ case DMA_PRIO_MEDIUM:
+ prio = 2;
+ break;
+ case DMA_PRIO_LOW:
+ default:
+ prio = 1;
+ break;
+ }
+
+ sdmac->peripheral_type = data->peripheral_type;
+ sdmac->event_id0 = data->dma_request;
+ sdmac->event_id1 = data->dma_request2;
+
+ ret = clk_enable(sdmac->sdma->clk_ipg);
+ if (ret)
+ return ret;
+ ret = clk_enable(sdmac->sdma->clk_ahb);
+ if (ret)
+ goto disable_clk_ipg;
+
+ ret = sdma_set_channel_priority(sdmac, prio);
+ if (ret)
+ goto disable_clk_ahb;
+
+ return 0;
+
+disable_clk_ahb:
+ clk_disable(sdmac->sdma->clk_ahb);
+disable_clk_ipg:
+ clk_disable(sdmac->sdma->clk_ipg);
+ return ret;
+}
+
+static void sdma_free_chan_resources(struct dma_chan *chan)
+{
+ struct sdma_channel *sdmac = to_sdma_chan(chan);
+ struct sdma_engine *sdma = sdmac->sdma;
+
+ sdma_terminate_all(chan);
+
+ sdma_channel_synchronize(chan);
+
+ sdma_event_disable(sdmac, sdmac->event_id0);
+ if (sdmac->event_id1)
+ sdma_event_disable(sdmac, sdmac->event_id1);
+
+ sdmac->event_id0 = 0;
+ sdmac->event_id1 = 0;
+
+ sdma_set_channel_priority(sdmac, 0);
+
+ clk_disable(sdma->clk_ipg);
+ clk_disable(sdma->clk_ahb);
+}
+
+static struct sdma_desc *sdma_transfer_init(struct sdma_channel *sdmac,
+ enum dma_transfer_direction direction, u32 bds)
+{
+ struct sdma_desc *desc;
+
+ if (!sdmac->sdma->fw_loaded && sdmac->is_ram_script) {
+ dev_warn_once(sdmac->sdma->dev, "sdma firmware not ready!\n");
+ goto err_out;
+ }
+
+ desc = kzalloc((sizeof(*desc)), GFP_NOWAIT);
+ if (!desc)
+ goto err_out;
+
+ sdmac->status = DMA_IN_PROGRESS;
+ sdmac->direction = direction;
+ sdmac->flags = 0;
+
+ desc->chn_count = 0;
+ desc->chn_real_count = 0;
+ desc->buf_tail = 0;
+ desc->buf_ptail = 0;
+ desc->sdmac = sdmac;
+ desc->num_bd = bds;
+
+ if (sdma_alloc_bd(desc))
+ goto err_desc_out;
+
+ /* No slave_config called in MEMCPY case, so do here */
+ if (direction == DMA_MEM_TO_MEM)
+ sdma_config_ownership(sdmac, false, true, false);
+
+ if (sdma_load_context(sdmac))
+ goto err_bd_out;
+
+ return desc;
+
+err_bd_out:
+ sdma_free_bd(desc);
+err_desc_out:
+ kfree(desc);
+err_out:
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *sdma_prep_memcpy(
+ struct dma_chan *chan, dma_addr_t dma_dst,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+ struct sdma_channel *sdmac = to_sdma_chan(chan);
+ struct sdma_engine *sdma = sdmac->sdma;
+ int channel = sdmac->channel;
+ size_t count;
+ int i = 0, param;
+ struct sdma_buffer_descriptor *bd;
+ struct sdma_desc *desc;
+
+ if (!chan || !len)
+ return NULL;
+
+ dev_dbg(sdma->dev, "memcpy: %pad->%pad, len=%zu, channel=%d.\n",
+ &dma_src, &dma_dst, len, channel);
+
+ desc = sdma_transfer_init(sdmac, DMA_MEM_TO_MEM,
+ len / SDMA_BD_MAX_CNT + 1);
+ if (!desc)
+ return NULL;
+
+ do {
+ count = min_t(size_t, len, SDMA_BD_MAX_CNT);
+ bd = &desc->bd[i];
+ bd->buffer_addr = dma_src;
+ bd->ext_buffer_addr = dma_dst;
+ bd->mode.count = count;
+ desc->chn_count += count;
+ bd->mode.command = 0;
+
+ dma_src += count;
+ dma_dst += count;
+ len -= count;
+ i++;
+
+ param = BD_DONE | BD_EXTD | BD_CONT;
+ /* last bd */
+ if (!len) {
+ param |= BD_INTR;
+ param |= BD_LAST;
+ param &= ~BD_CONT;
+ }
+
+ dev_dbg(sdma->dev, "entry %d: count: %zd dma: 0x%x %s%s\n",
+ i, count, bd->buffer_addr,
+ param & BD_WRAP ? "wrap" : "",
+ param & BD_INTR ? " intr" : "");
+
+ bd->mode.status = param;
+ } while (len);
+
+ return vchan_tx_prep(&sdmac->vc, &desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct sdma_channel *sdmac = to_sdma_chan(chan);
+ struct sdma_engine *sdma = sdmac->sdma;
+ int i, count;
+ int channel = sdmac->channel;
+ struct scatterlist *sg;
+ struct sdma_desc *desc;
+
+ sdma_config_write(chan, &sdmac->slave_config, direction);
+
+ desc = sdma_transfer_init(sdmac, direction, sg_len);
+ if (!desc)
+ goto err_out;
+
+ dev_dbg(sdma->dev, "setting up %d entries for channel %d.\n",
+ sg_len, channel);
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct sdma_buffer_descriptor *bd = &desc->bd[i];
+ int param;
+
+ bd->buffer_addr = sg->dma_address;
+
+ count = sg_dma_len(sg);
+
+ if (count > SDMA_BD_MAX_CNT) {
+ dev_err(sdma->dev, "SDMA channel %d: maximum bytes for sg entry exceeded: %d > %d\n",
+ channel, count, SDMA_BD_MAX_CNT);
+ goto err_bd_out;
+ }
+
+ bd->mode.count = count;
+ desc->chn_count += count;
+
+ if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES)
+ goto err_bd_out;
+
+ switch (sdmac->word_size) {
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ bd->mode.command = 0;
+ if (count & 3 || sg->dma_address & 3)
+ goto err_bd_out;
+ break;
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ bd->mode.command = 2;
+ if (count & 1 || sg->dma_address & 1)
+ goto err_bd_out;
+ break;
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ bd->mode.command = 1;
+ break;
+ default:
+ goto err_bd_out;
+ }
+
+ param = BD_DONE | BD_EXTD | BD_CONT;
+
+ if (i + 1 == sg_len) {
+ param |= BD_INTR;
+ param |= BD_LAST;
+ param &= ~BD_CONT;
+ }
+
+ dev_dbg(sdma->dev, "entry %d: count: %d dma: %#llx %s%s\n",
+ i, count, (u64)sg->dma_address,
+ param & BD_WRAP ? "wrap" : "",
+ param & BD_INTR ? " intr" : "");
+
+ bd->mode.status = param;
+ }
+
+ return vchan_tx_prep(&sdmac->vc, &desc->vd, flags);
+err_bd_out:
+ sdma_free_bd(desc);
+ kfree(desc);
+err_out:
+ sdmac->status = DMA_ERROR;
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct sdma_channel *sdmac = to_sdma_chan(chan);
+ struct sdma_engine *sdma = sdmac->sdma;
+ int num_periods = buf_len / period_len;
+ int channel = sdmac->channel;
+ int i = 0, buf = 0;
+ struct sdma_desc *desc;
+
+ dev_dbg(sdma->dev, "%s channel: %d\n", __func__, channel);
+
+ sdma_config_write(chan, &sdmac->slave_config, direction);
+
+ desc = sdma_transfer_init(sdmac, direction, num_periods);
+ if (!desc)
+ goto err_out;
+
+ desc->period_len = period_len;
+
+ sdmac->flags |= IMX_DMA_SG_LOOP;
+
+ if (period_len > SDMA_BD_MAX_CNT) {
+ dev_err(sdma->dev, "SDMA channel %d: maximum period size exceeded: %zu > %d\n",
+ channel, period_len, SDMA_BD_MAX_CNT);
+ goto err_bd_out;
+ }
+
+ while (buf < buf_len) {
+ struct sdma_buffer_descriptor *bd = &desc->bd[i];
+ int param;
+
+ bd->buffer_addr = dma_addr;
+
+ bd->mode.count = period_len;
+
+ if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES)
+ goto err_bd_out;
+ if (sdmac->word_size == DMA_SLAVE_BUSWIDTH_4_BYTES)
+ bd->mode.command = 0;
+ else
+ bd->mode.command = sdmac->word_size;
+
+ param = BD_DONE | BD_EXTD | BD_CONT | BD_INTR;
+ if (i + 1 == num_periods)
+ param |= BD_WRAP;
+
+ dev_dbg(sdma->dev, "entry %d: count: %zu dma: %#llx %s%s\n",
+ i, period_len, (u64)dma_addr,
+ param & BD_WRAP ? "wrap" : "",
+ param & BD_INTR ? " intr" : "");
+
+ bd->mode.status = param;
+
+ dma_addr += period_len;
+ buf += period_len;
+
+ i++;
+ }
+
+ return vchan_tx_prep(&sdmac->vc, &desc->vd, flags);
+err_bd_out:
+ sdma_free_bd(desc);
+ kfree(desc);
+err_out:
+ sdmac->status = DMA_ERROR;
+ return NULL;
+}
+
+static int sdma_config_write(struct dma_chan *chan,
+ struct dma_slave_config *dmaengine_cfg,
+ enum dma_transfer_direction direction)
+{
+ struct sdma_channel *sdmac = to_sdma_chan(chan);
+
+ if (direction == DMA_DEV_TO_MEM) {
+ sdmac->per_address = dmaengine_cfg->src_addr;
+ sdmac->watermark_level = dmaengine_cfg->src_maxburst *
+ dmaengine_cfg->src_addr_width;
+ sdmac->word_size = dmaengine_cfg->src_addr_width;
+ } else if (direction == DMA_DEV_TO_DEV) {
+ sdmac->per_address2 = dmaengine_cfg->src_addr;
+ sdmac->per_address = dmaengine_cfg->dst_addr;
+ sdmac->watermark_level = dmaengine_cfg->src_maxburst &
+ SDMA_WATERMARK_LEVEL_LWML;
+ sdmac->watermark_level |= (dmaengine_cfg->dst_maxburst << 16) &
+ SDMA_WATERMARK_LEVEL_HWML;
+ sdmac->word_size = dmaengine_cfg->dst_addr_width;
+ } else {
+ sdmac->per_address = dmaengine_cfg->dst_addr;
+ sdmac->watermark_level = dmaengine_cfg->dst_maxburst *
+ dmaengine_cfg->dst_addr_width;
+ sdmac->word_size = dmaengine_cfg->dst_addr_width;
+ }
+ sdmac->direction = direction;
+ return sdma_config_channel(chan);
+}
+
+static int sdma_config(struct dma_chan *chan,
+ struct dma_slave_config *dmaengine_cfg)
+{
+ struct sdma_channel *sdmac = to_sdma_chan(chan);
+ struct sdma_engine *sdma = sdmac->sdma;
+
+ memcpy(&sdmac->slave_config, dmaengine_cfg, sizeof(*dmaengine_cfg));
+
+ if (dmaengine_cfg->peripheral_config) {
+ struct sdma_peripheral_config *sdmacfg = dmaengine_cfg->peripheral_config;
+ if (dmaengine_cfg->peripheral_size != sizeof(struct sdma_peripheral_config)) {
+ dev_err(sdma->dev, "Invalid peripheral size %zu, expected %zu\n",
+ dmaengine_cfg->peripheral_size,
+ sizeof(struct sdma_peripheral_config));
+ return -EINVAL;
+ }
+ sdmac->n_fifos_src = sdmacfg->n_fifos_src;
+ sdmac->n_fifos_dst = sdmacfg->n_fifos_dst;
+ sdmac->stride_fifos_src = sdmacfg->stride_fifos_src;
+ sdmac->stride_fifos_dst = sdmacfg->stride_fifos_dst;
+ sdmac->words_per_fifo = sdmacfg->words_per_fifo;
+ sdmac->sw_done = sdmacfg->sw_done;
+ }
+
+ /* Set ENBLn earlier to make sure dma request triggered after that */
+ if (sdmac->event_id0 >= sdmac->sdma->drvdata->num_events)
+ return -EINVAL;
+ sdma_event_enable(sdmac, sdmac->event_id0);
+
+ if (sdmac->event_id1) {
+ if (sdmac->event_id1 >= sdmac->sdma->drvdata->num_events)
+ return -EINVAL;
+ sdma_event_enable(sdmac, sdmac->event_id1);
+ }
+
+ return 0;
+}
+
+static enum dma_status sdma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct sdma_channel *sdmac = to_sdma_chan(chan);
+ struct sdma_desc *desc = NULL;
+ u32 residue;
+ struct virt_dma_desc *vd;
+ enum dma_status ret;
+ unsigned long flags;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE || !txstate)
+ return ret;
+
+ spin_lock_irqsave(&sdmac->vc.lock, flags);
+
+ vd = vchan_find_desc(&sdmac->vc, cookie);
+ if (vd)
+ desc = to_sdma_desc(&vd->tx);
+ else if (sdmac->desc && sdmac->desc->vd.tx.cookie == cookie)
+ desc = sdmac->desc;
+
+ if (desc) {
+ if (sdmac->flags & IMX_DMA_SG_LOOP)
+ residue = (desc->num_bd - desc->buf_ptail) *
+ desc->period_len - desc->chn_real_count;
+ else
+ residue = desc->chn_count - desc->chn_real_count;
+ } else {
+ residue = 0;
+ }
+
+ spin_unlock_irqrestore(&sdmac->vc.lock, flags);
+
+ dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
+ residue);
+
+ return sdmac->status;
+}
+
+static void sdma_issue_pending(struct dma_chan *chan)
+{
+ struct sdma_channel *sdmac = to_sdma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&sdmac->vc.lock, flags);
+ if (vchan_issue_pending(&sdmac->vc) && !sdmac->desc)
+ sdma_start_desc(sdmac);
+ spin_unlock_irqrestore(&sdmac->vc.lock, flags);
+}
+
+#define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1 34
+#define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V2 38
+#define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3 45
+#define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V4 46
+
+static void sdma_add_scripts(struct sdma_engine *sdma,
+ const struct sdma_script_start_addrs *addr)
+{
+ s32 *addr_arr = (u32 *)addr;
+ s32 *saddr_arr = (u32 *)sdma->script_addrs;
+ int i;
+
+ /* use the default firmware in ROM if missing external firmware */
+ if (!sdma->script_number)
+ sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1;
+
+ if (sdma->script_number > sizeof(struct sdma_script_start_addrs)
+ / sizeof(s32)) {
+ dev_err(sdma->dev,
+ "SDMA script number %d not match with firmware.\n",
+ sdma->script_number);
+ return;
+ }
+
+ for (i = 0; i < sdma->script_number; i++)
+ if (addr_arr[i] > 0)
+ saddr_arr[i] = addr_arr[i];
+
+ /*
+ * For compatibility with NXP internal legacy kernel before 4.19 which
+ * is based on uart ram script and mainline kernel based on uart rom
+ * script, both uart ram/rom scripts are present in newer sdma
+ * firmware. Use the rom versions if they are present (V3 or newer).
+ */
+ if (sdma->script_number >= SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3) {
+ if (addr->uart_2_mcu_rom_addr)
+ sdma->script_addrs->uart_2_mcu_addr = addr->uart_2_mcu_rom_addr;
+ if (addr->uartsh_2_mcu_rom_addr)
+ sdma->script_addrs->uartsh_2_mcu_addr = addr->uartsh_2_mcu_rom_addr;
+ }
+}
+
+static void sdma_load_firmware(const struct firmware *fw, void *context)
+{
+ struct sdma_engine *sdma = context;
+ const struct sdma_firmware_header *header;
+ const struct sdma_script_start_addrs *addr;
+ unsigned short *ram_code;
+
+ if (!fw) {
+ dev_info(sdma->dev, "external firmware not found, using ROM firmware\n");
+ /* In this case we just use the ROM firmware. */
+ return;
+ }
+
+ if (fw->size < sizeof(*header))
+ goto err_firmware;
+
+ header = (struct sdma_firmware_header *)fw->data;
+
+ if (header->magic != SDMA_FIRMWARE_MAGIC)
+ goto err_firmware;
+ if (header->ram_code_start + header->ram_code_size > fw->size)
+ goto err_firmware;
+ switch (header->version_major) {
+ case 1:
+ sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1;
+ break;
+ case 2:
+ sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V2;
+ break;
+ case 3:
+ sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3;
+ break;
+ case 4:
+ sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V4;
+ break;
+ default:
+ dev_err(sdma->dev, "unknown firmware version\n");
+ goto err_firmware;
+ }
+
+ addr = (void *)header + header->script_addrs_start;
+ ram_code = (void *)header + header->ram_code_start;
+
+ clk_enable(sdma->clk_ipg);
+ clk_enable(sdma->clk_ahb);
+ /* download the RAM image for SDMA */
+ sdma_load_script(sdma, ram_code,
+ header->ram_code_size,
+ addr->ram_code_start_addr);
+ clk_disable(sdma->clk_ipg);
+ clk_disable(sdma->clk_ahb);
+
+ sdma_add_scripts(sdma, addr);
+
+ sdma->fw_loaded = true;
+
+ dev_info(sdma->dev, "loaded firmware %d.%d\n",
+ header->version_major,
+ header->version_minor);
+
+err_firmware:
+ release_firmware(fw);
+}
+
+#define EVENT_REMAP_CELLS 3
+
+static int sdma_event_remap(struct sdma_engine *sdma)
+{
+ struct device_node *np = sdma->dev->of_node;
+ struct device_node *gpr_np = of_parse_phandle(np, "gpr", 0);
+ struct property *event_remap;
+ struct regmap *gpr;
+ char propname[] = "fsl,sdma-event-remap";
+ u32 reg, val, shift, num_map, i;
+ int ret = 0;
+
+ if (IS_ERR(np) || !gpr_np)
+ goto out;
+
+ event_remap = of_find_property(np, propname, NULL);
+ num_map = event_remap ? (event_remap->length / sizeof(u32)) : 0;
+ if (!num_map) {
+ dev_dbg(sdma->dev, "no event needs to be remapped\n");
+ goto out;
+ } else if (num_map % EVENT_REMAP_CELLS) {
+ dev_err(sdma->dev, "the property %s must modulo %d\n",
+ propname, EVENT_REMAP_CELLS);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ gpr = syscon_node_to_regmap(gpr_np);
+ if (IS_ERR(gpr)) {
+ dev_err(sdma->dev, "failed to get gpr regmap\n");
+ ret = PTR_ERR(gpr);
+ goto out;
+ }
+
+ for (i = 0; i < num_map; i += EVENT_REMAP_CELLS) {
+ ret = of_property_read_u32_index(np, propname, i, &reg);
+ if (ret) {
+ dev_err(sdma->dev, "failed to read property %s index %d\n",
+ propname, i);
+ goto out;
+ }
+
+ ret = of_property_read_u32_index(np, propname, i + 1, &shift);
+ if (ret) {
+ dev_err(sdma->dev, "failed to read property %s index %d\n",
+ propname, i + 1);
+ goto out;
+ }
+
+ ret = of_property_read_u32_index(np, propname, i + 2, &val);
+ if (ret) {
+ dev_err(sdma->dev, "failed to read property %s index %d\n",
+ propname, i + 2);
+ goto out;
+ }
+
+ regmap_update_bits(gpr, reg, BIT(shift), val << shift);
+ }
+
+out:
+ if (gpr_np)
+ of_node_put(gpr_np);
+
+ return ret;
+}
+
+static int sdma_get_firmware(struct sdma_engine *sdma,
+ const char *fw_name)
+{
+ int ret;
+
+ ret = request_firmware_nowait(THIS_MODULE,
+ FW_ACTION_UEVENT, fw_name, sdma->dev,
+ GFP_KERNEL, sdma, sdma_load_firmware);
+
+ return ret;
+}
+
+static int sdma_init(struct sdma_engine *sdma)
+{
+ int i, ret;
+ dma_addr_t ccb_phys;
+
+ ret = clk_enable(sdma->clk_ipg);
+ if (ret)
+ return ret;
+ ret = clk_enable(sdma->clk_ahb);
+ if (ret)
+ goto disable_clk_ipg;
+
+ if (sdma->drvdata->check_ratio &&
+ (clk_get_rate(sdma->clk_ahb) == clk_get_rate(sdma->clk_ipg)))
+ sdma->clk_ratio = 1;
+
+ /* Be sure SDMA has not started yet */
+ writel_relaxed(0, sdma->regs + SDMA_H_C0PTR);
+
+ sdma->channel_control = dma_alloc_coherent(sdma->dev,
+ MAX_DMA_CHANNELS * sizeof(struct sdma_channel_control) +
+ sizeof(struct sdma_context_data),
+ &ccb_phys, GFP_KERNEL);
+
+ if (!sdma->channel_control) {
+ ret = -ENOMEM;
+ goto err_dma_alloc;
+ }
+
+ sdma->context = (void *)sdma->channel_control +
+ MAX_DMA_CHANNELS * sizeof(struct sdma_channel_control);
+ sdma->context_phys = ccb_phys +
+ MAX_DMA_CHANNELS * sizeof(struct sdma_channel_control);
+
+ /* disable all channels */
+ for (i = 0; i < sdma->drvdata->num_events; i++)
+ writel_relaxed(0, sdma->regs + chnenbl_ofs(sdma, i));
+
+ /* All channels have priority 0 */
+ for (i = 0; i < MAX_DMA_CHANNELS; i++)
+ writel_relaxed(0, sdma->regs + SDMA_CHNPRI_0 + i * 4);
+
+ ret = sdma_request_channel0(sdma);
+ if (ret)
+ goto err_dma_alloc;
+
+ sdma_config_ownership(&sdma->channel[0], false, true, false);
+
+ /* Set Command Channel (Channel Zero) */
+ writel_relaxed(0x4050, sdma->regs + SDMA_CHN0ADDR);
+
+ /* Set bits of CONFIG register but with static context switching */
+ if (sdma->clk_ratio)
+ writel_relaxed(SDMA_H_CONFIG_ACR, sdma->regs + SDMA_H_CONFIG);
+ else
+ writel_relaxed(0, sdma->regs + SDMA_H_CONFIG);
+
+ writel_relaxed(ccb_phys, sdma->regs + SDMA_H_C0PTR);
+
+ /* Initializes channel's priorities */
+ sdma_set_channel_priority(&sdma->channel[0], 7);
+
+ clk_disable(sdma->clk_ipg);
+ clk_disable(sdma->clk_ahb);
+
+ return 0;
+
+err_dma_alloc:
+ clk_disable(sdma->clk_ahb);
+disable_clk_ipg:
+ clk_disable(sdma->clk_ipg);
+ dev_err(sdma->dev, "initialisation failed with %d\n", ret);
+ return ret;
+}
+
+static bool sdma_filter_fn(struct dma_chan *chan, void *fn_param)
+{
+ struct sdma_channel *sdmac = to_sdma_chan(chan);
+ struct imx_dma_data *data = fn_param;
+
+ if (!imx_dma_is_general_purpose(chan))
+ return false;
+
+ sdmac->data = *data;
+ chan->private = &sdmac->data;
+
+ return true;
+}
+
+static struct dma_chan *sdma_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct sdma_engine *sdma = ofdma->of_dma_data;
+ dma_cap_mask_t mask = sdma->dma_device.cap_mask;
+ struct imx_dma_data data;
+
+ if (dma_spec->args_count != 3)
+ return NULL;
+
+ data.dma_request = dma_spec->args[0];
+ data.peripheral_type = dma_spec->args[1];
+ data.priority = dma_spec->args[2];
+ /*
+ * init dma_request2 to zero, which is not used by the dts.
+ * For P2P, dma_request2 is init from dma_request_channel(),
+ * chan->private will point to the imx_dma_data, and in
+ * device_alloc_chan_resources(), imx_dma_data.dma_request2 will
+ * be set to sdmac->event_id1.
+ */
+ data.dma_request2 = 0;
+
+ return __dma_request_channel(&mask, sdma_filter_fn, &data,
+ ofdma->of_node);
+}
+
+static int sdma_probe(struct platform_device *pdev)
+{
+ struct device_node *np = pdev->dev.of_node;
+ struct device_node *spba_bus;
+ const char *fw_name;
+ int ret;
+ int irq;
+ struct resource *iores;
+ struct resource spba_res;
+ int i;
+ struct sdma_engine *sdma;
+ s32 *saddr_arr;
+
+ ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ if (ret)
+ return ret;
+
+ sdma = devm_kzalloc(&pdev->dev, sizeof(*sdma), GFP_KERNEL);
+ if (!sdma)
+ return -ENOMEM;
+
+ spin_lock_init(&sdma->channel_0_lock);
+
+ sdma->dev = &pdev->dev;
+ sdma->drvdata = of_device_get_match_data(sdma->dev);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ sdma->regs = devm_ioremap_resource(&pdev->dev, iores);
+ if (IS_ERR(sdma->regs))
+ return PTR_ERR(sdma->regs);
+
+ sdma->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
+ if (IS_ERR(sdma->clk_ipg))
+ return PTR_ERR(sdma->clk_ipg);
+
+ sdma->clk_ahb = devm_clk_get(&pdev->dev, "ahb");
+ if (IS_ERR(sdma->clk_ahb))
+ return PTR_ERR(sdma->clk_ahb);
+
+ ret = clk_prepare(sdma->clk_ipg);
+ if (ret)
+ return ret;
+
+ ret = clk_prepare(sdma->clk_ahb);
+ if (ret)
+ goto err_clk;
+
+ ret = devm_request_irq(&pdev->dev, irq, sdma_int_handler, 0,
+ dev_name(&pdev->dev), sdma);
+ if (ret)
+ goto err_irq;
+
+ sdma->irq = irq;
+
+ sdma->script_addrs = kzalloc(sizeof(*sdma->script_addrs), GFP_KERNEL);
+ if (!sdma->script_addrs) {
+ ret = -ENOMEM;
+ goto err_irq;
+ }
+
+ /* initially no scripts available */
+ saddr_arr = (s32 *)sdma->script_addrs;
+ for (i = 0; i < sizeof(*sdma->script_addrs) / sizeof(s32); i++)
+ saddr_arr[i] = -EINVAL;
+
+ dma_cap_set(DMA_SLAVE, sdma->dma_device.cap_mask);
+ dma_cap_set(DMA_CYCLIC, sdma->dma_device.cap_mask);
+ dma_cap_set(DMA_MEMCPY, sdma->dma_device.cap_mask);
+
+ INIT_LIST_HEAD(&sdma->dma_device.channels);
+ /* Initialize channel parameters */
+ for (i = 0; i < MAX_DMA_CHANNELS; i++) {
+ struct sdma_channel *sdmac = &sdma->channel[i];
+
+ sdmac->sdma = sdma;
+
+ sdmac->channel = i;
+ sdmac->vc.desc_free = sdma_desc_free;
+ INIT_LIST_HEAD(&sdmac->terminated);
+ INIT_WORK(&sdmac->terminate_worker,
+ sdma_channel_terminate_work);
+ /*
+ * Add the channel to the DMAC list. Do not add channel 0 though
+ * because we need it internally in the SDMA driver. This also means
+ * that channel 0 in dmaengine counting matches sdma channel 1.
+ */
+ if (i)
+ vchan_init(&sdmac->vc, &sdma->dma_device);
+ }
+
+ ret = sdma_init(sdma);
+ if (ret)
+ goto err_init;
+
+ ret = sdma_event_remap(sdma);
+ if (ret)
+ goto err_init;
+
+ if (sdma->drvdata->script_addrs)
+ sdma_add_scripts(sdma, sdma->drvdata->script_addrs);
+
+ sdma->dma_device.dev = &pdev->dev;
+
+ sdma->dma_device.device_alloc_chan_resources = sdma_alloc_chan_resources;
+ sdma->dma_device.device_free_chan_resources = sdma_free_chan_resources;
+ sdma->dma_device.device_tx_status = sdma_tx_status;
+ sdma->dma_device.device_prep_slave_sg = sdma_prep_slave_sg;
+ sdma->dma_device.device_prep_dma_cyclic = sdma_prep_dma_cyclic;
+ sdma->dma_device.device_config = sdma_config;
+ sdma->dma_device.device_terminate_all = sdma_terminate_all;
+ sdma->dma_device.device_synchronize = sdma_channel_synchronize;
+ sdma->dma_device.src_addr_widths = SDMA_DMA_BUSWIDTHS;
+ sdma->dma_device.dst_addr_widths = SDMA_DMA_BUSWIDTHS;
+ sdma->dma_device.directions = SDMA_DMA_DIRECTIONS;
+ sdma->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+ sdma->dma_device.device_prep_dma_memcpy = sdma_prep_memcpy;
+ sdma->dma_device.device_issue_pending = sdma_issue_pending;
+ sdma->dma_device.copy_align = 2;
+ dma_set_max_seg_size(sdma->dma_device.dev, SDMA_BD_MAX_CNT);
+
+ platform_set_drvdata(pdev, sdma);
+
+ ret = dma_async_device_register(&sdma->dma_device);
+ if (ret) {
+ dev_err(&pdev->dev, "unable to register\n");
+ goto err_init;
+ }
+
+ if (np) {
+ ret = of_dma_controller_register(np, sdma_xlate, sdma);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to register controller\n");
+ goto err_register;
+ }
+
+ spba_bus = of_find_compatible_node(NULL, NULL, "fsl,spba-bus");
+ ret = of_address_to_resource(spba_bus, 0, &spba_res);
+ if (!ret) {
+ sdma->spba_start_addr = spba_res.start;
+ sdma->spba_end_addr = spba_res.end;
+ }
+ of_node_put(spba_bus);
+ }
+
+ /*
+ * Because that device tree does not encode ROM script address,
+ * the RAM script in firmware is mandatory for device tree
+ * probe, otherwise it fails.
+ */
+ ret = of_property_read_string(np, "fsl,sdma-ram-script-name",
+ &fw_name);
+ if (ret) {
+ dev_warn(&pdev->dev, "failed to get firmware name\n");
+ } else {
+ ret = sdma_get_firmware(sdma, fw_name);
+ if (ret)
+ dev_warn(&pdev->dev, "failed to get firmware from device tree\n");
+ }
+
+ return 0;
+
+err_register:
+ dma_async_device_unregister(&sdma->dma_device);
+err_init:
+ kfree(sdma->script_addrs);
+err_irq:
+ clk_unprepare(sdma->clk_ahb);
+err_clk:
+ clk_unprepare(sdma->clk_ipg);
+ return ret;
+}
+
+static int sdma_remove(struct platform_device *pdev)
+{
+ struct sdma_engine *sdma = platform_get_drvdata(pdev);
+ int i;
+
+ devm_free_irq(&pdev->dev, sdma->irq, sdma);
+ dma_async_device_unregister(&sdma->dma_device);
+ kfree(sdma->script_addrs);
+ clk_unprepare(sdma->clk_ahb);
+ clk_unprepare(sdma->clk_ipg);
+ /* Kill the tasklet */
+ for (i = 0; i < MAX_DMA_CHANNELS; i++) {
+ struct sdma_channel *sdmac = &sdma->channel[i];
+
+ tasklet_kill(&sdmac->vc.task);
+ sdma_free_chan_resources(&sdmac->vc.chan);
+ }
+
+ platform_set_drvdata(pdev, NULL);
+ return 0;
+}
+
+static struct platform_driver sdma_driver = {
+ .driver = {
+ .name = "imx-sdma",
+ .of_match_table = sdma_dt_ids,
+ },
+ .remove = sdma_remove,
+ .probe = sdma_probe,
+};
+
+module_platform_driver(sdma_driver);
+
+MODULE_AUTHOR("Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>");
+MODULE_DESCRIPTION("i.MX SDMA driver");
+#if IS_ENABLED(CONFIG_SOC_IMX6Q)
+MODULE_FIRMWARE("imx/sdma/sdma-imx6q.bin");
+#endif
+#if IS_ENABLED(CONFIG_SOC_IMX7D) || IS_ENABLED(CONFIG_SOC_IMX8M)
+MODULE_FIRMWARE("imx/sdma/sdma-imx7d.bin");
+#endif
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile
new file mode 100644
index 000000000..86638a608
--- /dev/null
+++ b/drivers/dma/ioat/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
+ioatdma-y := init.o dma.o prep.o dca.o sysfs.o
diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c
new file mode 100644
index 000000000..289c59ed7
--- /dev/null
+++ b/drivers/dma/ioat/dca.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 - 2009 Intel Corporation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+
+/* either a kernel change is needed, or we need something like this in kernel */
+#ifndef CONFIG_SMP
+#include <asm/smp.h>
+#undef cpu_physical_id
+#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
+#endif
+
+#include "dma.h"
+#include "registers.h"
+
+/*
+ * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
+ * contain the bit number of the APIC ID to map into the DCA tag. If the valid
+ * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
+ */
+#define DCA_TAG_MAP_VALID 0x80
+
+#define DCA3_TAG_MAP_BIT_TO_INV 0x80
+#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
+#define DCA3_TAG_MAP_LITERAL_VAL 0x1
+
+#define DCA_TAG_MAP_MASK 0xDF
+
+/* expected tag map bytes for I/OAT ver.2 */
+#define DCA2_TAG_MAP_BYTE0 0x80
+#define DCA2_TAG_MAP_BYTE1 0x0
+#define DCA2_TAG_MAP_BYTE2 0x81
+#define DCA2_TAG_MAP_BYTE3 0x82
+#define DCA2_TAG_MAP_BYTE4 0x82
+
+/*
+ * "Legacy" DCA systems do not implement the DCA register set in the
+ * I/OAT device. Software needs direct support for their tag mappings.
+ */
+
+#define APICID_BIT(x) (DCA_TAG_MAP_VALID | (x))
+#define IOAT_TAG_MAP_LEN 8
+
+/* pack PCI B/D/F into a u16 */
+static inline u16 dcaid_from_pcidev(struct pci_dev *pci)
+{
+ return (pci->bus->number << 8) | pci->devfn;
+}
+
+static int dca_enabled_in_bios(struct pci_dev *pdev)
+{
+ /* CPUID level 9 returns DCA configuration */
+ /* Bit 0 indicates DCA enabled by the BIOS */
+ unsigned long cpuid_level_9;
+ int res;
+
+ cpuid_level_9 = cpuid_eax(9);
+ res = test_bit(0, &cpuid_level_9);
+ if (!res)
+ dev_dbg(&pdev->dev, "DCA is disabled in BIOS\n");
+
+ return res;
+}
+
+int system_has_dca_enabled(struct pci_dev *pdev)
+{
+ if (boot_cpu_has(X86_FEATURE_DCA))
+ return dca_enabled_in_bios(pdev);
+
+ dev_dbg(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n");
+ return 0;
+}
+
+struct ioat_dca_slot {
+ struct pci_dev *pdev; /* requester device */
+ u16 rid; /* requester id, as used by IOAT */
+};
+
+#define IOAT_DCA_MAX_REQ 6
+#define IOAT3_DCA_MAX_REQ 2
+
+struct ioat_dca_priv {
+ void __iomem *iobase;
+ void __iomem *dca_base;
+ int max_requesters;
+ int requester_count;
+ u8 tag_map[IOAT_TAG_MAP_LEN];
+ struct ioat_dca_slot req_slots[];
+};
+
+static int ioat_dca_dev_managed(struct dca_provider *dca,
+ struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+
+ pdev = to_pci_dev(dev);
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == pdev)
+ return 1;
+ }
+ return 0;
+}
+
+static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 id;
+ u16 global_req_table;
+
+ /* This implementation only supports PCI-Express */
+ if (!dev_is_pci(dev))
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+ id = dcaid_from_pcidev(pdev);
+
+ if (ioatdca->requester_count == ioatdca->max_requesters)
+ return -ENODEV;
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == NULL) {
+ /* found an empty slot */
+ ioatdca->requester_count++;
+ ioatdca->req_slots[i].pdev = pdev;
+ ioatdca->req_slots[i].rid = id;
+ global_req_table =
+ readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+ writel(id | IOAT_DCA_GREQID_VALID,
+ ioatdca->iobase + global_req_table + (i * 4));
+ return i;
+ }
+ }
+ /* Error, ioatdma->requester_count is out of whack */
+ return -EFAULT;
+}
+
+static int ioat_dca_remove_requester(struct dca_provider *dca,
+ struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 global_req_table;
+
+ /* This implementation only supports PCI-Express */
+ if (!dev_is_pci(dev))
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == pdev) {
+ global_req_table =
+ readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+ writel(0, ioatdca->iobase + global_req_table + (i * 4));
+ ioatdca->req_slots[i].pdev = NULL;
+ ioatdca->req_slots[i].rid = 0;
+ ioatdca->requester_count--;
+ return i;
+ }
+ }
+ return -ENODEV;
+}
+
+static u8 ioat_dca_get_tag(struct dca_provider *dca,
+ struct device *dev,
+ int cpu)
+{
+ u8 tag;
+
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ int i, apic_id, bit, value;
+ u8 entry;
+
+ tag = 0;
+ apic_id = cpu_physical_id(cpu);
+
+ for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+ entry = ioatdca->tag_map[i];
+ if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
+ bit = entry &
+ ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
+ value = (apic_id & (1 << bit)) ? 1 : 0;
+ } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
+ bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
+ value = (apic_id & (1 << bit)) ? 0 : 1;
+ } else {
+ value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
+ }
+ tag |= (value << i);
+ }
+
+ return tag;
+}
+
+static const struct dca_ops ioat_dca_ops = {
+ .add_requester = ioat_dca_add_requester,
+ .remove_requester = ioat_dca_remove_requester,
+ .get_tag = ioat_dca_get_tag,
+ .dev_managed = ioat_dca_dev_managed,
+};
+
+static int ioat_dca_count_dca_slots(void *iobase, u16 dca_offset)
+{
+ int slots = 0;
+ u32 req;
+ u16 global_req_table;
+
+ global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
+ if (global_req_table == 0)
+ return 0;
+
+ do {
+ req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+ slots++;
+ } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+ return slots;
+}
+
+static inline int dca3_tag_map_invalid(u8 *tag_map)
+{
+ /*
+ * If the tag map is not programmed by the BIOS the default is:
+ * 0x80 0x80 0x80 0x80 0x80 0x00 0x00 0x00
+ *
+ * This an invalid map and will result in only 2 possible tags
+ * 0x1F and 0x00. 0x00 is an invalid DCA tag so we know that
+ * this entire definition is invalid.
+ */
+ return ((tag_map[0] == DCA_TAG_MAP_VALID) &&
+ (tag_map[1] == DCA_TAG_MAP_VALID) &&
+ (tag_map[2] == DCA_TAG_MAP_VALID) &&
+ (tag_map[3] == DCA_TAG_MAP_VALID) &&
+ (tag_map[4] == DCA_TAG_MAP_VALID));
+}
+
+struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+ struct dca_provider *dca;
+ struct ioat_dca_priv *ioatdca;
+ int slots;
+ int i;
+ int err;
+ u16 dca_offset;
+ u16 csi_fsb_control;
+ u16 pcie_control;
+ u8 bit;
+
+ union {
+ u64 full;
+ struct {
+ u32 low;
+ u32 high;
+ };
+ } tag_map;
+
+ if (!system_has_dca_enabled(pdev))
+ return NULL;
+
+ dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+ if (dca_offset == 0)
+ return NULL;
+
+ slots = ioat_dca_count_dca_slots(iobase, dca_offset);
+ if (slots == 0)
+ return NULL;
+
+ dca = alloc_dca_provider(&ioat_dca_ops,
+ struct_size(ioatdca, req_slots, slots));
+ if (!dca)
+ return NULL;
+
+ ioatdca = dca_priv(dca);
+ ioatdca->iobase = iobase;
+ ioatdca->dca_base = iobase + dca_offset;
+ ioatdca->max_requesters = slots;
+
+ /* some bios might not know to turn these on */
+ csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+ if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
+ csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
+ writew(csi_fsb_control,
+ ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+ }
+ pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+ if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
+ pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
+ writew(pcie_control,
+ ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+ }
+
+
+ /* TODO version, compatibility and configuration checks */
+
+ /* copy out the APIC to DCA tag map */
+ tag_map.low =
+ readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
+ tag_map.high =
+ readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
+ for (i = 0; i < 8; i++) {
+ bit = tag_map.full >> (8 * i);
+ ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
+ }
+
+ if (dca3_tag_map_invalid(ioatdca->tag_map)) {
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+ pr_warn_once("%s %s: APICID_TAG_MAP set incorrectly by BIOS, disabling DCA\n",
+ dev_driver_string(&pdev->dev),
+ dev_name(&pdev->dev));
+ free_dca_provider(dca);
+ return NULL;
+ }
+
+ err = register_dca_provider(dca, &pdev->dev);
+ if (err) {
+ free_dca_provider(dca);
+ return NULL;
+ }
+
+ return dca;
+}
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
new file mode 100644
index 000000000..e2070df6c
--- /dev/null
+++ b/drivers/dma/ioat/dma.c
@@ -0,0 +1,1061 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2015 Intel Corporation.
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/prefetch.h>
+#include <linux/sizes.h>
+#include "dma.h"
+#include "registers.h"
+#include "hw.h"
+
+#include "../dmaengine.h"
+
+static int completion_timeout = 200;
+module_param(completion_timeout, int, 0644);
+MODULE_PARM_DESC(completion_timeout,
+ "set ioat completion timeout [msec] (default 200 [msec])");
+static int idle_timeout = 2000;
+module_param(idle_timeout, int, 0644);
+MODULE_PARM_DESC(idle_timeout,
+ "set ioat idel timeout [msec] (default 2000 [msec])");
+
+#define IDLE_TIMEOUT msecs_to_jiffies(idle_timeout)
+#define COMPLETION_TIMEOUT msecs_to_jiffies(completion_timeout)
+
+static char *chanerr_str[] = {
+ "DMA Transfer Source Address Error",
+ "DMA Transfer Destination Address Error",
+ "Next Descriptor Address Error",
+ "Descriptor Error",
+ "Chan Address Value Error",
+ "CHANCMD Error",
+ "Chipset Uncorrectable Data Integrity Error",
+ "DMA Uncorrectable Data Integrity Error",
+ "Read Data Error",
+ "Write Data Error",
+ "Descriptor Control Error",
+ "Descriptor Transfer Size Error",
+ "Completion Address Error",
+ "Interrupt Configuration Error",
+ "Super extended descriptor Address Error",
+ "Unaffiliated Error",
+ "CRC or XOR P Error",
+ "XOR Q Error",
+ "Descriptor Count Error",
+ "DIF All F detect Error",
+ "Guard Tag verification Error",
+ "Application Tag verification Error",
+ "Reference Tag verification Error",
+ "Bundle Bit Error",
+ "Result DIF All F detect Error",
+ "Result Guard Tag verification Error",
+ "Result Application Tag verification Error",
+ "Result Reference Tag verification Error",
+};
+
+static void ioat_eh(struct ioatdma_chan *ioat_chan);
+
+static void ioat_print_chanerrs(struct ioatdma_chan *ioat_chan, u32 chanerr)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(chanerr_str); i++) {
+ if ((chanerr >> i) & 1) {
+ dev_err(to_dev(ioat_chan), "Err(%d): %s\n",
+ i, chanerr_str[i]);
+ }
+ }
+}
+
+/**
+ * ioat_dma_do_interrupt - handler used for single vector interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
+{
+ struct ioatdma_device *instance = data;
+ struct ioatdma_chan *ioat_chan;
+ unsigned long attnstatus;
+ int bit;
+ u8 intrctrl;
+
+ intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
+
+ if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
+ return IRQ_NONE;
+
+ if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
+ writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+ return IRQ_NONE;
+ }
+
+ attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
+ for_each_set_bit(bit, &attnstatus, BITS_PER_LONG) {
+ ioat_chan = ioat_chan_by_index(instance, bit);
+ if (test_bit(IOAT_RUN, &ioat_chan->state))
+ tasklet_schedule(&ioat_chan->cleanup_task);
+ }
+
+ writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+ return IRQ_HANDLED;
+}
+
+/**
+ * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
+{
+ struct ioatdma_chan *ioat_chan = data;
+
+ if (test_bit(IOAT_RUN, &ioat_chan->state))
+ tasklet_schedule(&ioat_chan->cleanup_task);
+
+ return IRQ_HANDLED;
+}
+
+void ioat_stop(struct ioatdma_chan *ioat_chan)
+{
+ struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+ struct pci_dev *pdev = ioat_dma->pdev;
+ int chan_id = chan_num(ioat_chan);
+ struct msix_entry *msix;
+
+ /* 1/ stop irq from firing tasklets
+ * 2/ stop the tasklet from re-arming irqs
+ */
+ clear_bit(IOAT_RUN, &ioat_chan->state);
+
+ /* flush inflight interrupts */
+ switch (ioat_dma->irq_mode) {
+ case IOAT_MSIX:
+ msix = &ioat_dma->msix_entries[chan_id];
+ synchronize_irq(msix->vector);
+ break;
+ case IOAT_MSI:
+ case IOAT_INTX:
+ synchronize_irq(pdev->irq);
+ break;
+ default:
+ break;
+ }
+
+ /* flush inflight timers */
+ del_timer_sync(&ioat_chan->timer);
+
+ /* flush inflight tasklet runs */
+ tasklet_kill(&ioat_chan->cleanup_task);
+
+ /* final cleanup now that everything is quiesced and can't re-arm */
+ ioat_cleanup_event(&ioat_chan->cleanup_task);
+}
+
+static void __ioat_issue_pending(struct ioatdma_chan *ioat_chan)
+{
+ ioat_chan->dmacount += ioat_ring_pending(ioat_chan);
+ ioat_chan->issued = ioat_chan->head;
+ writew(ioat_chan->dmacount,
+ ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+ dev_dbg(to_dev(ioat_chan),
+ "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+ __func__, ioat_chan->head, ioat_chan->tail,
+ ioat_chan->issued, ioat_chan->dmacount);
+}
+
+void ioat_issue_pending(struct dma_chan *c)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+ if (ioat_ring_pending(ioat_chan)) {
+ spin_lock_bh(&ioat_chan->prep_lock);
+ __ioat_issue_pending(ioat_chan);
+ spin_unlock_bh(&ioat_chan->prep_lock);
+ }
+}
+
+/**
+ * ioat_update_pending - log pending descriptors
+ * @ioat_chan: ioat+ channel
+ *
+ * Check if the number of unsubmitted descriptors has exceeded the
+ * watermark. Called with prep_lock held
+ */
+static void ioat_update_pending(struct ioatdma_chan *ioat_chan)
+{
+ if (ioat_ring_pending(ioat_chan) > ioat_pending_level)
+ __ioat_issue_pending(ioat_chan);
+}
+
+static void __ioat_start_null_desc(struct ioatdma_chan *ioat_chan)
+{
+ struct ioat_ring_ent *desc;
+ struct ioat_dma_descriptor *hw;
+
+ if (ioat_ring_space(ioat_chan) < 1) {
+ dev_err(to_dev(ioat_chan),
+ "Unable to start null desc - ring full\n");
+ return;
+ }
+
+ dev_dbg(to_dev(ioat_chan),
+ "%s: head: %#x tail: %#x issued: %#x\n",
+ __func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued);
+ desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head);
+
+ hw = desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = 1;
+ hw->ctl_f.compl_write = 1;
+ /* set size to non-zero value (channel returns error when size is 0) */
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ hw->src_addr = 0;
+ hw->dst_addr = 0;
+ async_tx_ack(&desc->txd);
+ ioat_set_chainaddr(ioat_chan, desc->txd.phys);
+ dump_desc_dbg(ioat_chan, desc);
+ /* make sure descriptors are written before we submit */
+ wmb();
+ ioat_chan->head += 1;
+ __ioat_issue_pending(ioat_chan);
+}
+
+void ioat_start_null_desc(struct ioatdma_chan *ioat_chan)
+{
+ spin_lock_bh(&ioat_chan->prep_lock);
+ if (!test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+ __ioat_start_null_desc(ioat_chan);
+ spin_unlock_bh(&ioat_chan->prep_lock);
+}
+
+static void __ioat_restart_chan(struct ioatdma_chan *ioat_chan)
+{
+ /* set the tail to be re-issued */
+ ioat_chan->issued = ioat_chan->tail;
+ ioat_chan->dmacount = 0;
+ mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+ dev_dbg(to_dev(ioat_chan),
+ "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+ __func__, ioat_chan->head, ioat_chan->tail,
+ ioat_chan->issued, ioat_chan->dmacount);
+
+ if (ioat_ring_pending(ioat_chan)) {
+ struct ioat_ring_ent *desc;
+
+ desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail);
+ ioat_set_chainaddr(ioat_chan, desc->txd.phys);
+ __ioat_issue_pending(ioat_chan);
+ } else
+ __ioat_start_null_desc(ioat_chan);
+}
+
+static int ioat_quiesce(struct ioatdma_chan *ioat_chan, unsigned long tmo)
+{
+ unsigned long end = jiffies + tmo;
+ int err = 0;
+ u32 status;
+
+ status = ioat_chansts(ioat_chan);
+ if (is_ioat_active(status) || is_ioat_idle(status))
+ ioat_suspend(ioat_chan);
+ while (is_ioat_active(status) || is_ioat_idle(status)) {
+ if (tmo && time_after(jiffies, end)) {
+ err = -ETIMEDOUT;
+ break;
+ }
+ status = ioat_chansts(ioat_chan);
+ cpu_relax();
+ }
+
+ return err;
+}
+
+static int ioat_reset_sync(struct ioatdma_chan *ioat_chan, unsigned long tmo)
+{
+ unsigned long end = jiffies + tmo;
+ int err = 0;
+
+ ioat_reset(ioat_chan);
+ while (ioat_reset_pending(ioat_chan)) {
+ if (end && time_after(jiffies, end)) {
+ err = -ETIMEDOUT;
+ break;
+ }
+ cpu_relax();
+ }
+
+ return err;
+}
+
+static dma_cookie_t ioat_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
+ __releases(&ioat_chan->prep_lock)
+{
+ struct dma_chan *c = tx->chan;
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+ dma_cookie_t cookie;
+
+ cookie = dma_cookie_assign(tx);
+ dev_dbg(to_dev(ioat_chan), "%s: cookie: %d\n", __func__, cookie);
+
+ if (!test_and_set_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state))
+ mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+ /* make descriptor updates visible before advancing ioat->head,
+ * this is purposefully not smp_wmb() since we are also
+ * publishing the descriptor updates to a dma device
+ */
+ wmb();
+
+ ioat_chan->head += ioat_chan->produce;
+
+ ioat_update_pending(ioat_chan);
+ spin_unlock_bh(&ioat_chan->prep_lock);
+
+ return cookie;
+}
+
+static struct ioat_ring_ent *
+ioat_alloc_ring_ent(struct dma_chan *chan, int idx, gfp_t flags)
+{
+ struct ioat_dma_descriptor *hw;
+ struct ioat_ring_ent *desc;
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+ int chunk;
+ dma_addr_t phys;
+ u8 *pos;
+ off_t offs;
+
+ chunk = idx / IOAT_DESCS_PER_CHUNK;
+ idx &= (IOAT_DESCS_PER_CHUNK - 1);
+ offs = idx * IOAT_DESC_SZ;
+ pos = (u8 *)ioat_chan->descs[chunk].virt + offs;
+ phys = ioat_chan->descs[chunk].hw + offs;
+ hw = (struct ioat_dma_descriptor *)pos;
+ memset(hw, 0, sizeof(*hw));
+
+ desc = kmem_cache_zalloc(ioat_cache, flags);
+ if (!desc)
+ return NULL;
+
+ dma_async_tx_descriptor_init(&desc->txd, chan);
+ desc->txd.tx_submit = ioat_tx_submit_unlock;
+ desc->hw = hw;
+ desc->txd.phys = phys;
+ return desc;
+}
+
+void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
+{
+ kmem_cache_free(ioat_cache, desc);
+}
+
+struct ioat_ring_ent **
+ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+ struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+ struct ioat_ring_ent **ring;
+ int total_descs = 1 << order;
+ int i, chunks;
+
+ /* allocate the array to hold the software ring */
+ ring = kcalloc(total_descs, sizeof(*ring), flags);
+ if (!ring)
+ return NULL;
+
+ chunks = (total_descs * IOAT_DESC_SZ) / IOAT_CHUNK_SIZE;
+ ioat_chan->desc_chunks = chunks;
+
+ for (i = 0; i < chunks; i++) {
+ struct ioat_descs *descs = &ioat_chan->descs[i];
+
+ descs->virt = dma_alloc_coherent(to_dev(ioat_chan),
+ IOAT_CHUNK_SIZE, &descs->hw, flags);
+ if (!descs->virt) {
+ int idx;
+
+ for (idx = 0; idx < i; idx++) {
+ descs = &ioat_chan->descs[idx];
+ dma_free_coherent(to_dev(ioat_chan),
+ IOAT_CHUNK_SIZE,
+ descs->virt, descs->hw);
+ descs->virt = NULL;
+ descs->hw = 0;
+ }
+
+ ioat_chan->desc_chunks = 0;
+ kfree(ring);
+ return NULL;
+ }
+ }
+
+ for (i = 0; i < total_descs; i++) {
+ ring[i] = ioat_alloc_ring_ent(c, i, flags);
+ if (!ring[i]) {
+ int idx;
+
+ while (i--)
+ ioat_free_ring_ent(ring[i], c);
+
+ for (idx = 0; idx < ioat_chan->desc_chunks; idx++) {
+ dma_free_coherent(to_dev(ioat_chan),
+ IOAT_CHUNK_SIZE,
+ ioat_chan->descs[idx].virt,
+ ioat_chan->descs[idx].hw);
+ ioat_chan->descs[idx].virt = NULL;
+ ioat_chan->descs[idx].hw = 0;
+ }
+
+ ioat_chan->desc_chunks = 0;
+ kfree(ring);
+ return NULL;
+ }
+ set_desc_id(ring[i], i);
+ }
+
+ /* link descs */
+ for (i = 0; i < total_descs-1; i++) {
+ struct ioat_ring_ent *next = ring[i+1];
+ struct ioat_dma_descriptor *hw = ring[i]->hw;
+
+ hw->next = next->txd.phys;
+ }
+ ring[i]->hw->next = ring[0]->txd.phys;
+
+ /* setup descriptor pre-fetching for v3.4 */
+ if (ioat_dma->cap & IOAT_CAP_DPS) {
+ u16 drsctl = IOAT_CHAN_DRSZ_2MB | IOAT_CHAN_DRS_EN;
+
+ if (chunks == 1)
+ drsctl |= IOAT_CHAN_DRS_AUTOWRAP;
+
+ writew(drsctl, ioat_chan->reg_base + IOAT_CHAN_DRSCTL_OFFSET);
+
+ }
+
+ return ring;
+}
+
+/**
+ * ioat_check_space_lock - verify space and grab ring producer lock
+ * @ioat_chan: ioat,3 channel (ring) to operate on
+ * @num_descs: allocation length
+ */
+int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs)
+ __acquires(&ioat_chan->prep_lock)
+{
+ spin_lock_bh(&ioat_chan->prep_lock);
+ /* never allow the last descriptor to be consumed, we need at
+ * least one free at all times to allow for on-the-fly ring
+ * resizing.
+ */
+ if (likely(ioat_ring_space(ioat_chan) > num_descs)) {
+ dev_dbg(to_dev(ioat_chan), "%s: num_descs: %d (%x:%x:%x)\n",
+ __func__, num_descs, ioat_chan->head,
+ ioat_chan->tail, ioat_chan->issued);
+ ioat_chan->produce = num_descs;
+ return 0; /* with ioat->prep_lock held */
+ }
+ spin_unlock_bh(&ioat_chan->prep_lock);
+
+ dev_dbg_ratelimited(to_dev(ioat_chan),
+ "%s: ring full! num_descs: %d (%x:%x:%x)\n",
+ __func__, num_descs, ioat_chan->head,
+ ioat_chan->tail, ioat_chan->issued);
+
+ /* progress reclaim in the allocation failure case we may be
+ * called under bh_disabled so we need to trigger the timer
+ * event directly
+ */
+ if (time_is_before_jiffies(ioat_chan->timer.expires)
+ && timer_pending(&ioat_chan->timer)) {
+ mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+ ioat_timer_event(&ioat_chan->timer);
+ }
+
+ return -ENOMEM;
+}
+
+static bool desc_has_ext(struct ioat_ring_ent *desc)
+{
+ struct ioat_dma_descriptor *hw = desc->hw;
+
+ if (hw->ctl_f.op == IOAT_OP_XOR ||
+ hw->ctl_f.op == IOAT_OP_XOR_VAL) {
+ struct ioat_xor_descriptor *xor = desc->xor;
+
+ if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
+ return true;
+ } else if (hw->ctl_f.op == IOAT_OP_PQ ||
+ hw->ctl_f.op == IOAT_OP_PQ_VAL) {
+ struct ioat_pq_descriptor *pq = desc->pq;
+
+ if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
+ return true;
+ }
+
+ return false;
+}
+
+static void
+ioat_free_sed(struct ioatdma_device *ioat_dma, struct ioat_sed_ent *sed)
+{
+ if (!sed)
+ return;
+
+ dma_pool_free(ioat_dma->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma);
+ kmem_cache_free(ioat_sed_cache, sed);
+}
+
+static u64 ioat_get_current_completion(struct ioatdma_chan *ioat_chan)
+{
+ u64 phys_complete;
+ u64 completion;
+
+ completion = *ioat_chan->completion;
+ phys_complete = ioat_chansts_to_addr(completion);
+
+ dev_dbg(to_dev(ioat_chan), "%s: phys_complete: %#llx\n", __func__,
+ (unsigned long long) phys_complete);
+
+ return phys_complete;
+}
+
+static bool ioat_cleanup_preamble(struct ioatdma_chan *ioat_chan,
+ u64 *phys_complete)
+{
+ *phys_complete = ioat_get_current_completion(ioat_chan);
+ if (*phys_complete == ioat_chan->last_completion)
+ return false;
+
+ clear_bit(IOAT_COMPLETION_ACK, &ioat_chan->state);
+ mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+ return true;
+}
+
+static void
+desc_get_errstat(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc)
+{
+ struct ioat_dma_descriptor *hw = desc->hw;
+
+ switch (hw->ctl_f.op) {
+ case IOAT_OP_PQ_VAL:
+ case IOAT_OP_PQ_VAL_16S:
+ {
+ struct ioat_pq_descriptor *pq = desc->pq;
+
+ /* check if there's error written */
+ if (!pq->dwbes_f.wbes)
+ return;
+
+ /* need to set a chanerr var for checking to clear later */
+
+ if (pq->dwbes_f.p_val_err)
+ *desc->result |= SUM_CHECK_P_RESULT;
+
+ if (pq->dwbes_f.q_val_err)
+ *desc->result |= SUM_CHECK_Q_RESULT;
+
+ return;
+ }
+ default:
+ return;
+ }
+}
+
+/**
+ * __cleanup - reclaim used descriptors
+ * @ioat_chan: channel (ring) to clean
+ * @phys_complete: zeroed (or not) completion address (from status)
+ */
+static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete)
+{
+ struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+ struct ioat_ring_ent *desc;
+ bool seen_current = false;
+ int idx = ioat_chan->tail, i;
+ u16 active;
+
+ dev_dbg(to_dev(ioat_chan), "%s: head: %#x tail: %#x issued: %#x\n",
+ __func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued);
+
+ /*
+ * At restart of the channel, the completion address and the
+ * channel status will be 0 due to starting a new chain. Since
+ * it's new chain and the first descriptor "fails", there is
+ * nothing to clean up. We do not want to reap the entire submitted
+ * chain due to this 0 address value and then BUG.
+ */
+ if (!phys_complete)
+ return;
+
+ active = ioat_ring_active(ioat_chan);
+ for (i = 0; i < active && !seen_current; i++) {
+ struct dma_async_tx_descriptor *tx;
+
+ prefetch(ioat_get_ring_ent(ioat_chan, idx + i + 1));
+ desc = ioat_get_ring_ent(ioat_chan, idx + i);
+ dump_desc_dbg(ioat_chan, desc);
+
+ /* set err stat if we are using dwbes */
+ if (ioat_dma->cap & IOAT_CAP_DWBES)
+ desc_get_errstat(ioat_chan, desc);
+
+ tx = &desc->txd;
+ if (tx->cookie) {
+ dma_cookie_complete(tx);
+ dma_descriptor_unmap(tx);
+ dmaengine_desc_get_callback_invoke(tx, NULL);
+ tx->callback = NULL;
+ tx->callback_result = NULL;
+ }
+
+ if (tx->phys == phys_complete)
+ seen_current = true;
+
+ /* skip extended descriptors */
+ if (desc_has_ext(desc)) {
+ BUG_ON(i + 1 >= active);
+ i++;
+ }
+
+ /* cleanup super extended descriptors */
+ if (desc->sed) {
+ ioat_free_sed(ioat_dma, desc->sed);
+ desc->sed = NULL;
+ }
+ }
+
+ /* finish all descriptor reads before incrementing tail */
+ smp_mb();
+ ioat_chan->tail = idx + i;
+ /* no active descs have written a completion? */
+ BUG_ON(active && !seen_current);
+ ioat_chan->last_completion = phys_complete;
+
+ if (active - i == 0) {
+ dev_dbg(to_dev(ioat_chan), "%s: cancel completion timeout\n",
+ __func__);
+ mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+ }
+
+ /* microsecond delay by sysfs variable per pending descriptor */
+ if (ioat_chan->intr_coalesce != ioat_chan->prev_intr_coalesce) {
+ writew(min((ioat_chan->intr_coalesce * (active - i)),
+ IOAT_INTRDELAY_MASK),
+ ioat_chan->ioat_dma->reg_base + IOAT_INTRDELAY_OFFSET);
+ ioat_chan->prev_intr_coalesce = ioat_chan->intr_coalesce;
+ }
+}
+
+static void ioat_cleanup(struct ioatdma_chan *ioat_chan)
+{
+ u64 phys_complete;
+
+ spin_lock_bh(&ioat_chan->cleanup_lock);
+
+ if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
+ __cleanup(ioat_chan, phys_complete);
+
+ if (is_ioat_halted(*ioat_chan->completion)) {
+ u32 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+
+ if (chanerr &
+ (IOAT_CHANERR_HANDLE_MASK | IOAT_CHANERR_RECOVER_MASK)) {
+ mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+ ioat_eh(ioat_chan);
+ }
+ }
+
+ spin_unlock_bh(&ioat_chan->cleanup_lock);
+}
+
+void ioat_cleanup_event(struct tasklet_struct *t)
+{
+ struct ioatdma_chan *ioat_chan = from_tasklet(ioat_chan, t, cleanup_task);
+
+ ioat_cleanup(ioat_chan);
+ if (!test_bit(IOAT_RUN, &ioat_chan->state))
+ return;
+ writew(IOAT_CHANCTRL_RUN, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+static void ioat_restart_channel(struct ioatdma_chan *ioat_chan)
+{
+ u64 phys_complete;
+
+ /* set the completion address register again */
+ writel(lower_32_bits(ioat_chan->completion_dma),
+ ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+ writel(upper_32_bits(ioat_chan->completion_dma),
+ ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+ ioat_quiesce(ioat_chan, 0);
+ if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
+ __cleanup(ioat_chan, phys_complete);
+
+ __ioat_restart_chan(ioat_chan);
+}
+
+
+static void ioat_abort_descs(struct ioatdma_chan *ioat_chan)
+{
+ struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+ struct ioat_ring_ent *desc;
+ u16 active;
+ int idx = ioat_chan->tail, i;
+
+ /*
+ * We assume that the failed descriptor has been processed.
+ * Now we are just returning all the remaining submitted
+ * descriptors to abort.
+ */
+ active = ioat_ring_active(ioat_chan);
+
+ /* we skip the failed descriptor that tail points to */
+ for (i = 1; i < active; i++) {
+ struct dma_async_tx_descriptor *tx;
+
+ prefetch(ioat_get_ring_ent(ioat_chan, idx + i + 1));
+ desc = ioat_get_ring_ent(ioat_chan, idx + i);
+
+ tx = &desc->txd;
+ if (tx->cookie) {
+ struct dmaengine_result res;
+
+ dma_cookie_complete(tx);
+ dma_descriptor_unmap(tx);
+ res.result = DMA_TRANS_ABORTED;
+ dmaengine_desc_get_callback_invoke(tx, &res);
+ tx->callback = NULL;
+ tx->callback_result = NULL;
+ }
+
+ /* skip extended descriptors */
+ if (desc_has_ext(desc)) {
+ WARN_ON(i + 1 >= active);
+ i++;
+ }
+
+ /* cleanup super extended descriptors */
+ if (desc->sed) {
+ ioat_free_sed(ioat_dma, desc->sed);
+ desc->sed = NULL;
+ }
+ }
+
+ smp_mb(); /* finish all descriptor reads before incrementing tail */
+ ioat_chan->tail = idx + active;
+
+ desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail);
+ ioat_chan->last_completion = *ioat_chan->completion = desc->txd.phys;
+}
+
+static void ioat_eh(struct ioatdma_chan *ioat_chan)
+{
+ struct pci_dev *pdev = to_pdev(ioat_chan);
+ struct ioat_dma_descriptor *hw;
+ struct dma_async_tx_descriptor *tx;
+ u64 phys_complete;
+ struct ioat_ring_ent *desc;
+ u32 err_handled = 0;
+ u32 chanerr_int;
+ u32 chanerr;
+ bool abort = false;
+ struct dmaengine_result res;
+
+ /* cleanup so tail points to descriptor that caused the error */
+ if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
+ __cleanup(ioat_chan, phys_complete);
+
+ chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+ pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int);
+
+ dev_dbg(to_dev(ioat_chan), "%s: error = %x:%x\n",
+ __func__, chanerr, chanerr_int);
+
+ desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail);
+ hw = desc->hw;
+ dump_desc_dbg(ioat_chan, desc);
+
+ switch (hw->ctl_f.op) {
+ case IOAT_OP_XOR_VAL:
+ if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
+ *desc->result |= SUM_CHECK_P_RESULT;
+ err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
+ }
+ break;
+ case IOAT_OP_PQ_VAL:
+ case IOAT_OP_PQ_VAL_16S:
+ if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
+ *desc->result |= SUM_CHECK_P_RESULT;
+ err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
+ }
+ if (chanerr & IOAT_CHANERR_XOR_Q_ERR) {
+ *desc->result |= SUM_CHECK_Q_RESULT;
+ err_handled |= IOAT_CHANERR_XOR_Q_ERR;
+ }
+ break;
+ }
+
+ if (chanerr & IOAT_CHANERR_RECOVER_MASK) {
+ if (chanerr & IOAT_CHANERR_READ_DATA_ERR) {
+ res.result = DMA_TRANS_READ_FAILED;
+ err_handled |= IOAT_CHANERR_READ_DATA_ERR;
+ } else if (chanerr & IOAT_CHANERR_WRITE_DATA_ERR) {
+ res.result = DMA_TRANS_WRITE_FAILED;
+ err_handled |= IOAT_CHANERR_WRITE_DATA_ERR;
+ }
+
+ abort = true;
+ } else
+ res.result = DMA_TRANS_NOERROR;
+
+ /* fault on unhandled error or spurious halt */
+ if (chanerr ^ err_handled || chanerr == 0) {
+ dev_err(to_dev(ioat_chan), "%s: fatal error (%x:%x)\n",
+ __func__, chanerr, err_handled);
+ dev_err(to_dev(ioat_chan), "Errors handled:\n");
+ ioat_print_chanerrs(ioat_chan, err_handled);
+ dev_err(to_dev(ioat_chan), "Errors not handled:\n");
+ ioat_print_chanerrs(ioat_chan, (chanerr & ~err_handled));
+
+ BUG();
+ }
+
+ /* cleanup the faulty descriptor since we are continuing */
+ tx = &desc->txd;
+ if (tx->cookie) {
+ dma_cookie_complete(tx);
+ dma_descriptor_unmap(tx);
+ dmaengine_desc_get_callback_invoke(tx, &res);
+ tx->callback = NULL;
+ tx->callback_result = NULL;
+ }
+
+ /* mark faulting descriptor as complete */
+ *ioat_chan->completion = desc->txd.phys;
+
+ spin_lock_bh(&ioat_chan->prep_lock);
+ /* we need abort all descriptors */
+ if (abort) {
+ ioat_abort_descs(ioat_chan);
+ /* clean up the channel, we could be in weird state */
+ ioat_reset_hw(ioat_chan);
+ }
+
+ writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+ pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int);
+
+ ioat_restart_channel(ioat_chan);
+ spin_unlock_bh(&ioat_chan->prep_lock);
+}
+
+static void check_active(struct ioatdma_chan *ioat_chan)
+{
+ if (ioat_ring_active(ioat_chan)) {
+ mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+ return;
+ }
+
+ if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state))
+ mod_timer_pending(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+}
+
+static void ioat_reboot_chan(struct ioatdma_chan *ioat_chan)
+{
+ spin_lock_bh(&ioat_chan->prep_lock);
+ set_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+ spin_unlock_bh(&ioat_chan->prep_lock);
+
+ ioat_abort_descs(ioat_chan);
+ dev_warn(to_dev(ioat_chan), "Reset channel...\n");
+ ioat_reset_hw(ioat_chan);
+ dev_warn(to_dev(ioat_chan), "Restart channel...\n");
+ ioat_restart_channel(ioat_chan);
+
+ spin_lock_bh(&ioat_chan->prep_lock);
+ clear_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+ spin_unlock_bh(&ioat_chan->prep_lock);
+}
+
+void ioat_timer_event(struct timer_list *t)
+{
+ struct ioatdma_chan *ioat_chan = from_timer(ioat_chan, t, timer);
+ dma_addr_t phys_complete;
+ u64 status;
+
+ status = ioat_chansts(ioat_chan);
+
+ /* when halted due to errors check for channel
+ * programming errors before advancing the completion state
+ */
+ if (is_ioat_halted(status)) {
+ u32 chanerr;
+
+ chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+ dev_err(to_dev(ioat_chan), "%s: Channel halted (%x)\n",
+ __func__, chanerr);
+ dev_err(to_dev(ioat_chan), "Errors:\n");
+ ioat_print_chanerrs(ioat_chan, chanerr);
+
+ if (test_bit(IOAT_RUN, &ioat_chan->state)) {
+ spin_lock_bh(&ioat_chan->cleanup_lock);
+ ioat_reboot_chan(ioat_chan);
+ spin_unlock_bh(&ioat_chan->cleanup_lock);
+ }
+
+ return;
+ }
+
+ spin_lock_bh(&ioat_chan->cleanup_lock);
+
+ /* handle the no-actives case */
+ if (!ioat_ring_active(ioat_chan)) {
+ spin_lock_bh(&ioat_chan->prep_lock);
+ check_active(ioat_chan);
+ spin_unlock_bh(&ioat_chan->prep_lock);
+ goto unlock_out;
+ }
+
+ /* handle the missed cleanup case */
+ if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) {
+ /* timer restarted in ioat_cleanup_preamble
+ * and IOAT_COMPLETION_ACK cleared
+ */
+ __cleanup(ioat_chan, phys_complete);
+ goto unlock_out;
+ }
+
+ /* if we haven't made progress and we have already
+ * acknowledged a pending completion once, then be more
+ * forceful with a restart
+ */
+ if (test_bit(IOAT_COMPLETION_ACK, &ioat_chan->state)) {
+ u32 chanerr;
+
+ chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+ dev_err(to_dev(ioat_chan), "CHANSTS: %#Lx CHANERR: %#x\n",
+ status, chanerr);
+ dev_err(to_dev(ioat_chan), "Errors:\n");
+ ioat_print_chanerrs(ioat_chan, chanerr);
+
+ dev_dbg(to_dev(ioat_chan), "Active descriptors: %d\n",
+ ioat_ring_active(ioat_chan));
+
+ ioat_reboot_chan(ioat_chan);
+
+ goto unlock_out;
+ }
+
+ /* handle missed issue pending case */
+ if (ioat_ring_pending(ioat_chan)) {
+ dev_warn(to_dev(ioat_chan),
+ "Completion timeout with pending descriptors\n");
+ spin_lock_bh(&ioat_chan->prep_lock);
+ __ioat_issue_pending(ioat_chan);
+ spin_unlock_bh(&ioat_chan->prep_lock);
+ }
+
+ set_bit(IOAT_COMPLETION_ACK, &ioat_chan->state);
+ mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+unlock_out:
+ spin_unlock_bh(&ioat_chan->cleanup_lock);
+}
+
+enum dma_status
+ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+ enum dma_status ret;
+
+ ret = dma_cookie_status(c, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ ioat_cleanup(ioat_chan);
+
+ return dma_cookie_status(c, cookie, txstate);
+}
+
+int ioat_reset_hw(struct ioatdma_chan *ioat_chan)
+{
+ /* throw away whatever the channel was doing and get it
+ * initialized, with ioat3 specific workarounds
+ */
+ struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+ struct pci_dev *pdev = ioat_dma->pdev;
+ u32 chanerr;
+ u16 dev_id;
+ int err;
+
+ ioat_quiesce(ioat_chan, msecs_to_jiffies(100));
+
+ chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+ writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+
+ if (ioat_dma->version < IOAT_VER_3_3) {
+ /* clear any pending errors */
+ err = pci_read_config_dword(pdev,
+ IOAT_PCI_CHANERR_INT_OFFSET, &chanerr);
+ if (err) {
+ dev_err(&pdev->dev,
+ "channel error register unreachable\n");
+ return err;
+ }
+ pci_write_config_dword(pdev,
+ IOAT_PCI_CHANERR_INT_OFFSET, chanerr);
+
+ /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
+ * (workaround for spurious config parity error after restart)
+ */
+ pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
+ if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
+ pci_write_config_dword(pdev,
+ IOAT_PCI_DMAUNCERRSTS_OFFSET,
+ 0x10);
+ }
+ }
+
+ if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) {
+ ioat_dma->msixtba0 = readq(ioat_dma->reg_base + 0x1000);
+ ioat_dma->msixdata0 = readq(ioat_dma->reg_base + 0x1008);
+ ioat_dma->msixpba = readq(ioat_dma->reg_base + 0x1800);
+ }
+
+
+ err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200));
+ if (!err) {
+ if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) {
+ writeq(ioat_dma->msixtba0, ioat_dma->reg_base + 0x1000);
+ writeq(ioat_dma->msixdata0, ioat_dma->reg_base + 0x1008);
+ writeq(ioat_dma->msixpba, ioat_dma->reg_base + 0x1800);
+ }
+ }
+
+ if (err)
+ dev_err(&pdev->dev, "Failed to reset: %d\n", err);
+
+ return err;
+}
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
new file mode 100644
index 000000000..35e06b382
--- /dev/null
+++ b/drivers/dma/ioat/dma.h
@@ -0,0 +1,406 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ */
+#ifndef IOATDMA_H
+#define IOATDMA_H
+
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/dmapool.h>
+#include <linux/cache.h>
+#include <linux/pci_ids.h>
+#include <linux/circ_buf.h>
+#include <linux/interrupt.h>
+#include "registers.h"
+#include "hw.h"
+
+#define IOAT_DMA_VERSION "5.00"
+
+#define IOAT_DMA_DCA_ANY_CPU ~0
+
+#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, dma_dev)
+#define to_dev(ioat_chan) (&(ioat_chan)->ioat_dma->pdev->dev)
+#define to_pdev(ioat_chan) ((ioat_chan)->ioat_dma->pdev)
+
+#define chan_num(ch) ((int)((ch)->reg_base - (ch)->ioat_dma->reg_base) / 0x80)
+
+/* ioat hardware assumes at least two sources for raid operations */
+#define src_cnt_to_sw(x) ((x) + 2)
+#define src_cnt_to_hw(x) ((x) - 2)
+#define ndest_to_sw(x) ((x) + 1)
+#define ndest_to_hw(x) ((x) - 1)
+#define src16_cnt_to_sw(x) ((x) + 9)
+#define src16_cnt_to_hw(x) ((x) - 9)
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+enum ioat_irq_mode {
+ IOAT_NOIRQ = 0,
+ IOAT_MSIX,
+ IOAT_MSI,
+ IOAT_INTX
+};
+
+/**
+ * struct ioatdma_device - internal representation of a IOAT device
+ * @pdev: PCI-Express device
+ * @reg_base: MMIO register space base address
+ * @completion_pool: DMA buffers for completion ops
+ * @sed_hw_pool: DMA super descriptor pools
+ * @dma_dev: embedded struct dma_device
+ * @version: version of ioatdma device
+ * @msix_entries: irq handlers
+ * @idx: per channel data
+ * @dca: direct cache access context
+ * @irq_mode: interrupt mode (INTX, MSI, MSIX)
+ * @cap: read DMA capabilities register
+ */
+struct ioatdma_device {
+ struct pci_dev *pdev;
+ void __iomem *reg_base;
+ struct dma_pool *completion_pool;
+#define MAX_SED_POOLS 5
+ struct dma_pool *sed_hw_pool[MAX_SED_POOLS];
+ struct dma_device dma_dev;
+ u8 version;
+#define IOAT_MAX_CHANS 4
+ struct msix_entry msix_entries[IOAT_MAX_CHANS];
+ struct ioatdma_chan *idx[IOAT_MAX_CHANS];
+ struct dca_provider *dca;
+ enum ioat_irq_mode irq_mode;
+ u32 cap;
+
+ /* shadow version for CB3.3 chan reset errata workaround */
+ u64 msixtba0;
+ u64 msixdata0;
+ u32 msixpba;
+};
+
+#define IOAT_MAX_ORDER 16
+#define IOAT_MAX_DESCS (1 << IOAT_MAX_ORDER)
+#define IOAT_CHUNK_SIZE (SZ_512K)
+#define IOAT_DESCS_PER_CHUNK (IOAT_CHUNK_SIZE / IOAT_DESC_SZ)
+
+struct ioat_descs {
+ void *virt;
+ dma_addr_t hw;
+};
+
+struct ioatdma_chan {
+ struct dma_chan dma_chan;
+ void __iomem *reg_base;
+ dma_addr_t last_completion;
+ spinlock_t cleanup_lock;
+ unsigned long state;
+ #define IOAT_CHAN_DOWN 0
+ #define IOAT_COMPLETION_ACK 1
+ #define IOAT_RESET_PENDING 2
+ #define IOAT_KOBJ_INIT_FAIL 3
+ #define IOAT_RUN 5
+ #define IOAT_CHAN_ACTIVE 6
+ struct timer_list timer;
+ #define RESET_DELAY msecs_to_jiffies(100)
+ struct ioatdma_device *ioat_dma;
+ dma_addr_t completion_dma;
+ u64 *completion;
+ struct tasklet_struct cleanup_task;
+ struct kobject kobj;
+
+/* ioat v2 / v3 channel attributes
+ * @xfercap_log; log2 of channel max transfer length (for fast division)
+ * @head: allocated index
+ * @issued: hardware notification point
+ * @tail: cleanup index
+ * @dmacount: identical to 'head' except for occasionally resetting to zero
+ * @alloc_order: log2 of the number of allocated descriptors
+ * @produce: number of descriptors to produce at submit time
+ * @ring: software ring buffer implementation of hardware ring
+ * @prep_lock: serializes descriptor preparation (producers)
+ */
+ size_t xfercap_log;
+ u16 head;
+ u16 issued;
+ u16 tail;
+ u16 dmacount;
+ u16 alloc_order;
+ u16 produce;
+ struct ioat_ring_ent **ring;
+ spinlock_t prep_lock;
+ struct ioat_descs descs[IOAT_MAX_DESCS / IOAT_DESCS_PER_CHUNK];
+ int desc_chunks;
+ int intr_coalesce;
+ int prev_intr_coalesce;
+};
+
+struct ioat_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct dma_chan *, char *);
+ ssize_t (*store)(struct dma_chan *, const char *, size_t);
+};
+
+/**
+ * struct ioat_sed_ent - wrapper around super extended hardware descriptor
+ * @hw: hardware SED
+ * @dma: dma address for the SED
+ * @parent: point to the dma descriptor that's the parent
+ * @hw_pool: descriptor pool index
+ */
+struct ioat_sed_ent {
+ struct ioat_sed_raw_descriptor *hw;
+ dma_addr_t dma;
+ struct ioat_ring_ent *parent;
+ unsigned int hw_pool;
+};
+
+/**
+ * struct ioat_ring_ent - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor (for memcpy)
+ * @xor: hardware xor descriptor
+ * @xor_ex: hardware xor extension descriptor
+ * @pq: hardware pq descriptor
+ * @pq_ex: hardware pq extension descriptor
+ * @pqu: hardware pq update descriptor
+ * @raw: hardware raw (un-typed) descriptor
+ * @txd: the generic software descriptor for all engines
+ * @len: total transaction length for unmap
+ * @result: asynchronous result of validate operations
+ * @id: identifier for debug
+ * @sed: pointer to super extended descriptor sw desc
+ */
+
+struct ioat_ring_ent {
+ union {
+ struct ioat_dma_descriptor *hw;
+ struct ioat_xor_descriptor *xor;
+ struct ioat_xor_ext_descriptor *xor_ex;
+ struct ioat_pq_descriptor *pq;
+ struct ioat_pq_ext_descriptor *pq_ex;
+ struct ioat_pq_update_descriptor *pqu;
+ struct ioat_raw_descriptor *raw;
+ };
+ size_t len;
+ struct dma_async_tx_descriptor txd;
+ enum sum_check_flags *result;
+ #ifdef DEBUG
+ int id;
+ #endif
+ struct ioat_sed_ent *sed;
+};
+
+extern const struct sysfs_ops ioat_sysfs_ops;
+extern struct ioat_sysfs_entry ioat_version_attr;
+extern struct ioat_sysfs_entry ioat_cap_attr;
+extern int ioat_pending_level;
+extern struct kobj_type ioat_ktype;
+extern struct kmem_cache *ioat_cache;
+extern struct kmem_cache *ioat_sed_cache;
+
+static inline struct ioatdma_chan *to_ioat_chan(struct dma_chan *c)
+{
+ return container_of(c, struct ioatdma_chan, dma_chan);
+}
+
+/* wrapper around hardware descriptor format + additional software fields */
+#ifdef DEBUG
+#define set_desc_id(desc, i) ((desc)->id = (i))
+#define desc_id(desc) ((desc)->id)
+#else
+#define set_desc_id(desc, i)
+#define desc_id(desc) (0)
+#endif
+
+static inline void
+__dump_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_dma_descriptor *hw,
+ struct dma_async_tx_descriptor *tx, int id)
+{
+ struct device *dev = to_dev(ioat_chan);
+
+ dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x"
+ " ctl: %#10.8x (op: %#x int_en: %d compl: %d)\n", id,
+ (unsigned long long) tx->phys,
+ (unsigned long long) hw->next, tx->cookie, tx->flags,
+ hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write);
+}
+
+#define dump_desc_dbg(c, d) \
+ ({ if (d) __dump_desc_dbg(c, d->hw, &d->txd, desc_id(d)); 0; })
+
+static inline struct ioatdma_chan *
+ioat_chan_by_index(struct ioatdma_device *ioat_dma, int index)
+{
+ return ioat_dma->idx[index];
+}
+
+static inline u64 ioat_chansts(struct ioatdma_chan *ioat_chan)
+{
+ return readq(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET);
+}
+
+static inline u64 ioat_chansts_to_addr(u64 status)
+{
+ return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+}
+
+static inline u32 ioat_chanerr(struct ioatdma_chan *ioat_chan)
+{
+ return readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+}
+
+static inline void ioat_suspend(struct ioatdma_chan *ioat_chan)
+{
+ u8 ver = ioat_chan->ioat_dma->version;
+
+ writeb(IOAT_CHANCMD_SUSPEND,
+ ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline void ioat_reset(struct ioatdma_chan *ioat_chan)
+{
+ u8 ver = ioat_chan->ioat_dma->version;
+
+ writeb(IOAT_CHANCMD_RESET,
+ ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline bool ioat_reset_pending(struct ioatdma_chan *ioat_chan)
+{
+ u8 ver = ioat_chan->ioat_dma->version;
+ u8 cmd;
+
+ cmd = readb(ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+ return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET;
+}
+
+static inline bool is_ioat_active(unsigned long status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
+}
+
+static inline bool is_ioat_idle(unsigned long status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE);
+}
+
+static inline bool is_ioat_halted(unsigned long status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
+}
+
+static inline bool is_ioat_suspended(unsigned long status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
+}
+
+/* channel was fatally programmed */
+static inline bool is_ioat_bug(unsigned long err)
+{
+ return !!err;
+}
+
+
+static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan)
+{
+ return 1 << ioat_chan->alloc_order;
+}
+
+/* count of descriptors in flight with the engine */
+static inline u16 ioat_ring_active(struct ioatdma_chan *ioat_chan)
+{
+ return CIRC_CNT(ioat_chan->head, ioat_chan->tail,
+ ioat_ring_size(ioat_chan));
+}
+
+/* count of descriptors pending submission to hardware */
+static inline u16 ioat_ring_pending(struct ioatdma_chan *ioat_chan)
+{
+ return CIRC_CNT(ioat_chan->head, ioat_chan->issued,
+ ioat_ring_size(ioat_chan));
+}
+
+static inline u32 ioat_ring_space(struct ioatdma_chan *ioat_chan)
+{
+ return ioat_ring_size(ioat_chan) - ioat_ring_active(ioat_chan);
+}
+
+static inline u16
+ioat_xferlen_to_descs(struct ioatdma_chan *ioat_chan, size_t len)
+{
+ u16 num_descs = len >> ioat_chan->xfercap_log;
+
+ num_descs += !!(len & ((1 << ioat_chan->xfercap_log) - 1));
+ return num_descs;
+}
+
+static inline struct ioat_ring_ent *
+ioat_get_ring_ent(struct ioatdma_chan *ioat_chan, u16 idx)
+{
+ return ioat_chan->ring[idx & (ioat_ring_size(ioat_chan) - 1)];
+}
+
+static inline void
+ioat_set_chainaddr(struct ioatdma_chan *ioat_chan, u64 addr)
+{
+ writel(addr & 0x00000000FFFFFFFF,
+ ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
+ writel(addr >> 32,
+ ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
+}
+
+/* IOAT Prep functions */
+struct dma_async_tx_descriptor *
+ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
+ unsigned int src_cnt, size_t len,
+ enum sum_check_flags *result, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ enum sum_check_flags *pqres, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
+ unsigned int src_cnt, size_t len,
+ enum sum_check_flags *result, unsigned long flags);
+
+/* IOAT Operation functions */
+irqreturn_t ioat_dma_do_interrupt(int irq, void *data);
+irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data);
+struct ioat_ring_ent **
+ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags);
+void ioat_start_null_desc(struct ioatdma_chan *ioat_chan);
+void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan);
+int ioat_reset_hw(struct ioatdma_chan *ioat_chan);
+enum dma_status
+ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+ struct dma_tx_state *txstate);
+void ioat_cleanup_event(struct tasklet_struct *t);
+void ioat_timer_event(struct timer_list *t);
+int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs);
+void ioat_issue_pending(struct dma_chan *chan);
+
+/* IOAT Init functions */
+bool is_bwd_ioat(struct pci_dev *pdev);
+struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type);
+void ioat_kobject_del(struct ioatdma_device *ioat_dma);
+int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma);
+void ioat_stop(struct ioatdma_chan *ioat_chan);
+#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
new file mode 100644
index 000000000..79e4e4c09
--- /dev/null
+++ b/drivers/dma/ioat/hw.h
@@ -0,0 +1,271 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ */
+#ifndef _IOAT_HW_H_
+#define _IOAT_HW_H_
+
+/* PCI Configuration Space Values */
+#define IOAT_MMIO_BAR 0
+
+/* CB device ID's */
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB0 0x0e20
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB1 0x0e21
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB2 0x0e22
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB3 0x0e23
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB4 0x0e24
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB5 0x0e25
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB6 0x0e26
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB7 0x0e27
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB8 0x0e2e
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB9 0x0e2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW0 0x2f20
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW1 0x2f21
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW2 0x2f22
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW3 0x2f23
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW4 0x2f24
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW5 0x2f25
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW6 0x2f26
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW7 0x2f27
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW8 0x2f2e
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW9 0x2f2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD0 0x0C50
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD1 0x0C51
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD2 0x0C52
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD3 0x0C53
+
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE0 0x6f50
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE1 0x6f51
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE2 0x6f52
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE3 0x6f53
+
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX0 0x6f20
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX1 0x6f21
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX2 0x6f22
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX3 0x6f23
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX4 0x6f24
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX5 0x6f25
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX6 0x6f26
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX7 0x6f27
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX8 0x6f2e
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX9 0x6f2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_SKX 0x2021
+
+#define PCI_DEVICE_ID_INTEL_IOAT_ICX 0x0b00
+
+#define IOAT_VER_1_2 0x12 /* Version 1.2 */
+#define IOAT_VER_2_0 0x20 /* Version 2.0 */
+#define IOAT_VER_3_0 0x30 /* Version 3.0 */
+#define IOAT_VER_3_2 0x32 /* Version 3.2 */
+#define IOAT_VER_3_3 0x33 /* Version 3.3 */
+#define IOAT_VER_3_4 0x34 /* Version 3.4 */
+
+
+int system_has_dca_enabled(struct pci_dev *pdev);
+
+#define IOAT_DESC_SZ 64
+
+struct ioat_dma_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int src_snoop_dis:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int null:1;
+ unsigned int src_brk:1;
+ unsigned int dest_brk:1;
+ unsigned int bundle:1;
+ unsigned int dest_dca:1;
+ unsigned int hint:1;
+ unsigned int rsvd2:13;
+ #define IOAT_OP_COPY 0x00
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_addr;
+ uint64_t dst_addr;
+ uint64_t next;
+ uint64_t rsv1;
+ uint64_t rsv2;
+ /* store some driver data in an unused portion of the descriptor */
+ union {
+ uint64_t user1;
+ uint64_t tx_cnt;
+ };
+ uint64_t user2;
+};
+
+struct ioat_xor_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int src_snoop_dis:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int src_cnt:3;
+ unsigned int bundle:1;
+ unsigned int dest_dca:1;
+ unsigned int hint:1;
+ unsigned int rsvd:13;
+ #define IOAT_OP_XOR 0x87
+ #define IOAT_OP_XOR_VAL 0x88
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_addr;
+ uint64_t dst_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t src_addr3;
+ uint64_t src_addr4;
+ uint64_t src_addr5;
+};
+
+struct ioat_xor_ext_descriptor {
+ uint64_t src_addr6;
+ uint64_t src_addr7;
+ uint64_t src_addr8;
+ uint64_t next;
+ uint64_t rsvd[4];
+};
+
+struct ioat_pq_descriptor {
+ union {
+ uint32_t size;
+ uint32_t dwbes;
+ struct {
+ unsigned int rsvd:25;
+ unsigned int p_val_err:1;
+ unsigned int q_val_err:1;
+ unsigned int rsvd1:4;
+ unsigned int wbes:1;
+ } dwbes_f;
+ };
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int src_snoop_dis:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int src_cnt:3;
+ unsigned int bundle:1;
+ unsigned int dest_dca:1;
+ unsigned int hint:1;
+ unsigned int p_disable:1;
+ unsigned int q_disable:1;
+ unsigned int rsvd2:2;
+ unsigned int wb_en:1;
+ unsigned int prl_en:1;
+ unsigned int rsvd3:7;
+ #define IOAT_OP_PQ 0x89
+ #define IOAT_OP_PQ_VAL 0x8a
+ #define IOAT_OP_PQ_16S 0xa0
+ #define IOAT_OP_PQ_VAL_16S 0xa1
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_addr;
+ uint64_t p_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ union {
+ uint64_t src_addr3;
+ uint64_t sed_addr;
+ };
+ uint8_t coef[8];
+ uint64_t q_addr;
+};
+
+struct ioat_pq_ext_descriptor {
+ uint64_t src_addr4;
+ uint64_t src_addr5;
+ uint64_t src_addr6;
+ uint64_t next;
+ uint64_t src_addr7;
+ uint64_t src_addr8;
+ uint64_t rsvd[2];
+};
+
+struct ioat_pq_update_descriptor {
+ uint32_t size;
+ union {
+ uint32_t ctl;
+ struct {
+ unsigned int int_en:1;
+ unsigned int src_snoop_dis:1;
+ unsigned int dest_snoop_dis:1;
+ unsigned int compl_write:1;
+ unsigned int fence:1;
+ unsigned int src_cnt:3;
+ unsigned int bundle:1;
+ unsigned int dest_dca:1;
+ unsigned int hint:1;
+ unsigned int p_disable:1;
+ unsigned int q_disable:1;
+ unsigned int rsvd:3;
+ unsigned int coef:8;
+ #define IOAT_OP_PQ_UP 0x8b
+ unsigned int op:8;
+ } ctl_f;
+ };
+ uint64_t src_addr;
+ uint64_t p_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t p_src;
+ uint64_t q_src;
+ uint64_t q_addr;
+};
+
+struct ioat_raw_descriptor {
+ uint64_t field[8];
+};
+
+struct ioat_pq16a_descriptor {
+ uint8_t coef[8];
+ uint64_t src_addr3;
+ uint64_t src_addr4;
+ uint64_t src_addr5;
+ uint64_t src_addr6;
+ uint64_t src_addr7;
+ uint64_t src_addr8;
+ uint64_t src_addr9;
+};
+
+struct ioat_pq16b_descriptor {
+ uint64_t src_addr10;
+ uint64_t src_addr11;
+ uint64_t src_addr12;
+ uint64_t src_addr13;
+ uint64_t src_addr14;
+ uint64_t src_addr15;
+ uint64_t src_addr16;
+ uint64_t rsvd;
+};
+
+union ioat_sed_pq_descriptor {
+ struct ioat_pq16a_descriptor a;
+ struct ioat_pq16b_descriptor b;
+};
+
+#define SED_SIZE 64
+
+struct ioat_sed_raw_descriptor {
+ uint64_t a[8];
+ uint64_t b[8];
+ uint64_t c[8];
+};
+
+#endif
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
new file mode 100644
index 000000000..5d707ff63
--- /dev/null
+++ b/drivers/dma/ioat/init.c
@@ -0,0 +1,1455 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2015 Intel Corporation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/prefetch.h>
+#include <linux/dca.h>
+#include <linux/aer.h>
+#include <linux/sizes.h>
+#include "dma.h"
+#include "registers.h"
+#include "hw.h"
+
+#include "../dmaengine.h"
+
+MODULE_VERSION(IOAT_DMA_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+static const struct pci_device_id ioat_pci_tbl[] = {
+ /* I/OAT v3 platforms */
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+
+ /* I/OAT v3.2 platforms */
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW0) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW1) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW2) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW3) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW4) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW5) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW6) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW7) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX0) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX1) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX2) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX3) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX4) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX5) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX6) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX7) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX8) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX9) },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SKX) },
+
+ /* I/OAT v3.3 platforms */
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3) },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE0) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE1) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE2) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE3) },
+
+ /* I/OAT v3.4 platforms */
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_ICX) },
+
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
+
+static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id);
+static void ioat_remove(struct pci_dev *pdev);
+static void
+ioat_init_channel(struct ioatdma_device *ioat_dma,
+ struct ioatdma_chan *ioat_chan, int idx);
+static void ioat_intr_quirk(struct ioatdma_device *ioat_dma);
+static void ioat_enumerate_channels(struct ioatdma_device *ioat_dma);
+static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma);
+
+static int ioat_dca_enabled = 1;
+module_param(ioat_dca_enabled, int, 0644);
+MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
+int ioat_pending_level = 7;
+module_param(ioat_pending_level, int, 0644);
+MODULE_PARM_DESC(ioat_pending_level,
+ "high-water mark for pushing ioat descriptors (default: 7)");
+static char ioat_interrupt_style[32] = "msix";
+module_param_string(ioat_interrupt_style, ioat_interrupt_style,
+ sizeof(ioat_interrupt_style), 0644);
+MODULE_PARM_DESC(ioat_interrupt_style,
+ "set ioat interrupt style: msix (default), msi, intx");
+
+struct kmem_cache *ioat_cache;
+struct kmem_cache *ioat_sed_cache;
+
+static bool is_jf_ioat(struct pci_dev *pdev)
+{
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF0:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF1:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF2:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF3:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF4:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF5:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF6:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF7:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF8:
+ case PCI_DEVICE_ID_INTEL_IOAT_JSF9:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool is_snb_ioat(struct pci_dev *pdev)
+{
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB0:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB1:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB2:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB3:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB4:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB5:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB6:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB7:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB8:
+ case PCI_DEVICE_ID_INTEL_IOAT_SNB9:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool is_ivb_ioat(struct pci_dev *pdev)
+{
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_IOAT_IVB0:
+ case PCI_DEVICE_ID_INTEL_IOAT_IVB1:
+ case PCI_DEVICE_ID_INTEL_IOAT_IVB2:
+ case PCI_DEVICE_ID_INTEL_IOAT_IVB3:
+ case PCI_DEVICE_ID_INTEL_IOAT_IVB4:
+ case PCI_DEVICE_ID_INTEL_IOAT_IVB5:
+ case PCI_DEVICE_ID_INTEL_IOAT_IVB6:
+ case PCI_DEVICE_ID_INTEL_IOAT_IVB7:
+ case PCI_DEVICE_ID_INTEL_IOAT_IVB8:
+ case PCI_DEVICE_ID_INTEL_IOAT_IVB9:
+ return true;
+ default:
+ return false;
+ }
+
+}
+
+static bool is_hsw_ioat(struct pci_dev *pdev)
+{
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_IOAT_HSW0:
+ case PCI_DEVICE_ID_INTEL_IOAT_HSW1:
+ case PCI_DEVICE_ID_INTEL_IOAT_HSW2:
+ case PCI_DEVICE_ID_INTEL_IOAT_HSW3:
+ case PCI_DEVICE_ID_INTEL_IOAT_HSW4:
+ case PCI_DEVICE_ID_INTEL_IOAT_HSW5:
+ case PCI_DEVICE_ID_INTEL_IOAT_HSW6:
+ case PCI_DEVICE_ID_INTEL_IOAT_HSW7:
+ case PCI_DEVICE_ID_INTEL_IOAT_HSW8:
+ case PCI_DEVICE_ID_INTEL_IOAT_HSW9:
+ return true;
+ default:
+ return false;
+ }
+
+}
+
+static bool is_bdx_ioat(struct pci_dev *pdev)
+{
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_IOAT_BDX0:
+ case PCI_DEVICE_ID_INTEL_IOAT_BDX1:
+ case PCI_DEVICE_ID_INTEL_IOAT_BDX2:
+ case PCI_DEVICE_ID_INTEL_IOAT_BDX3:
+ case PCI_DEVICE_ID_INTEL_IOAT_BDX4:
+ case PCI_DEVICE_ID_INTEL_IOAT_BDX5:
+ case PCI_DEVICE_ID_INTEL_IOAT_BDX6:
+ case PCI_DEVICE_ID_INTEL_IOAT_BDX7:
+ case PCI_DEVICE_ID_INTEL_IOAT_BDX8:
+ case PCI_DEVICE_ID_INTEL_IOAT_BDX9:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool is_skx_ioat(struct pci_dev *pdev)
+{
+ return (pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SKX) ? true : false;
+}
+
+static bool is_xeon_cb32(struct pci_dev *pdev)
+{
+ return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) ||
+ is_hsw_ioat(pdev) || is_bdx_ioat(pdev) || is_skx_ioat(pdev);
+}
+
+bool is_bwd_ioat(struct pci_dev *pdev)
+{
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_IOAT_BWD0:
+ case PCI_DEVICE_ID_INTEL_IOAT_BWD1:
+ case PCI_DEVICE_ID_INTEL_IOAT_BWD2:
+ case PCI_DEVICE_ID_INTEL_IOAT_BWD3:
+ /* even though not Atom, BDX-DE has same DMA silicon */
+ case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0:
+ case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1:
+ case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2:
+ case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool is_bwd_noraid(struct pci_dev *pdev)
+{
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_IOAT_BWD2:
+ case PCI_DEVICE_ID_INTEL_IOAT_BWD3:
+ case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0:
+ case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1:
+ case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2:
+ case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3:
+ return true;
+ default:
+ return false;
+ }
+
+}
+
+/*
+ * Perform a IOAT transaction to verify the HW works.
+ */
+#define IOAT_TEST_SIZE 2000
+
+static void ioat_dma_test_callback(void *dma_async_param)
+{
+ struct completion *cmp = dma_async_param;
+
+ complete(cmp);
+}
+
+/**
+ * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
+ * @ioat_dma: dma device to be tested
+ */
+static int ioat_dma_self_test(struct ioatdma_device *ioat_dma)
+{
+ int i;
+ u8 *src;
+ u8 *dest;
+ struct dma_device *dma = &ioat_dma->dma_dev;
+ struct device *dev = &ioat_dma->pdev->dev;
+ struct dma_chan *dma_chan;
+ struct dma_async_tx_descriptor *tx;
+ dma_addr_t dma_dest, dma_src;
+ dma_cookie_t cookie;
+ int err = 0;
+ struct completion cmp;
+ unsigned long tmo;
+ unsigned long flags;
+
+ src = kzalloc(IOAT_TEST_SIZE, GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+ dest = kzalloc(IOAT_TEST_SIZE, GFP_KERNEL);
+ if (!dest) {
+ kfree(src);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffer */
+ for (i = 0; i < IOAT_TEST_SIZE; i++)
+ src[i] = (u8)i;
+
+ /* Start copy, using first DMA channel */
+ dma_chan = container_of(dma->channels.next, struct dma_chan,
+ device_node);
+ if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+ dev_err(dev, "selftest cannot allocate chan resource\n");
+ err = -ENODEV;
+ goto out;
+ }
+
+ dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, dma_src)) {
+ dev_err(dev, "mapping src buffer failed\n");
+ err = -ENOMEM;
+ goto free_resources;
+ }
+ dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
+ if (dma_mapping_error(dev, dma_dest)) {
+ dev_err(dev, "mapping dest buffer failed\n");
+ err = -ENOMEM;
+ goto unmap_src;
+ }
+ flags = DMA_PREP_INTERRUPT;
+ tx = ioat_dma->dma_dev.device_prep_dma_memcpy(dma_chan, dma_dest,
+ dma_src, IOAT_TEST_SIZE,
+ flags);
+ if (!tx) {
+ dev_err(dev, "Self-test prep failed, disabling\n");
+ err = -ENODEV;
+ goto unmap_dma;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test setup failed, disabling\n");
+ err = -ENODEV;
+ goto unmap_dma;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (tmo == 0 ||
+ dma->device_tx_status(dma_chan, cookie, NULL)
+ != DMA_COMPLETE) {
+ dev_err(dev, "Self-test copy timed out, disabling\n");
+ err = -ENODEV;
+ goto unmap_dma;
+ }
+ if (memcmp(src, dest, IOAT_TEST_SIZE)) {
+ dev_err(dev, "Self-test copy failed compare, disabling\n");
+ err = -ENODEV;
+ goto unmap_dma;
+ }
+
+unmap_dma:
+ dma_unmap_single(dev, dma_dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
+unmap_src:
+ dma_unmap_single(dev, dma_src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
+free_resources:
+ dma->device_free_chan_resources(dma_chan);
+out:
+ kfree(src);
+ kfree(dest);
+ return err;
+}
+
+/**
+ * ioat_dma_setup_interrupts - setup interrupt handler
+ * @ioat_dma: ioat dma device
+ */
+int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma)
+{
+ struct ioatdma_chan *ioat_chan;
+ struct pci_dev *pdev = ioat_dma->pdev;
+ struct device *dev = &pdev->dev;
+ struct msix_entry *msix;
+ int i, j, msixcnt;
+ int err = -EINVAL;
+ u8 intrctrl = 0;
+
+ if (!strcmp(ioat_interrupt_style, "msix"))
+ goto msix;
+ if (!strcmp(ioat_interrupt_style, "msi"))
+ goto msi;
+ if (!strcmp(ioat_interrupt_style, "intx"))
+ goto intx;
+ dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
+ goto err_no_irq;
+
+msix:
+ /* The number of MSI-X vectors should equal the number of channels */
+ msixcnt = ioat_dma->dma_dev.chancnt;
+ for (i = 0; i < msixcnt; i++)
+ ioat_dma->msix_entries[i].entry = i;
+
+ err = pci_enable_msix_exact(pdev, ioat_dma->msix_entries, msixcnt);
+ if (err)
+ goto msi;
+
+ for (i = 0; i < msixcnt; i++) {
+ msix = &ioat_dma->msix_entries[i];
+ ioat_chan = ioat_chan_by_index(ioat_dma, i);
+ err = devm_request_irq(dev, msix->vector,
+ ioat_dma_do_interrupt_msix, 0,
+ "ioat-msix", ioat_chan);
+ if (err) {
+ for (j = 0; j < i; j++) {
+ msix = &ioat_dma->msix_entries[j];
+ ioat_chan = ioat_chan_by_index(ioat_dma, j);
+ devm_free_irq(dev, msix->vector, ioat_chan);
+ }
+ goto msi;
+ }
+ }
+ intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+ ioat_dma->irq_mode = IOAT_MSIX;
+ goto done;
+
+msi:
+ err = pci_enable_msi(pdev);
+ if (err)
+ goto intx;
+
+ err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
+ "ioat-msi", ioat_dma);
+ if (err) {
+ pci_disable_msi(pdev);
+ goto intx;
+ }
+ ioat_dma->irq_mode = IOAT_MSI;
+ goto done;
+
+intx:
+ err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
+ IRQF_SHARED, "ioat-intx", ioat_dma);
+ if (err)
+ goto err_no_irq;
+
+ ioat_dma->irq_mode = IOAT_INTX;
+done:
+ if (is_bwd_ioat(pdev))
+ ioat_intr_quirk(ioat_dma);
+ intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
+ writeb(intrctrl, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET);
+ return 0;
+
+err_no_irq:
+ /* Disable all interrupt generation */
+ writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET);
+ ioat_dma->irq_mode = IOAT_NOIRQ;
+ dev_err(dev, "no usable interrupts\n");
+ return err;
+}
+
+static void ioat_disable_interrupts(struct ioatdma_device *ioat_dma)
+{
+ /* Disable all interrupt generation */
+ writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET);
+}
+
+static int ioat_probe(struct ioatdma_device *ioat_dma)
+{
+ int err = -ENODEV;
+ struct dma_device *dma = &ioat_dma->dma_dev;
+ struct pci_dev *pdev = ioat_dma->pdev;
+ struct device *dev = &pdev->dev;
+
+ ioat_dma->completion_pool = dma_pool_create("completion_pool", dev,
+ sizeof(u64),
+ SMP_CACHE_BYTES,
+ SMP_CACHE_BYTES);
+
+ if (!ioat_dma->completion_pool) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ ioat_enumerate_channels(ioat_dma);
+
+ dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+ dma->dev = &pdev->dev;
+
+ if (!dma->chancnt) {
+ dev_err(dev, "channel enumeration error\n");
+ goto err_setup_interrupts;
+ }
+
+ err = ioat_dma_setup_interrupts(ioat_dma);
+ if (err)
+ goto err_setup_interrupts;
+
+ err = ioat3_dma_self_test(ioat_dma);
+ if (err)
+ goto err_self_test;
+
+ return 0;
+
+err_self_test:
+ ioat_disable_interrupts(ioat_dma);
+err_setup_interrupts:
+ dma_pool_destroy(ioat_dma->completion_pool);
+err_out:
+ return err;
+}
+
+static int ioat_register(struct ioatdma_device *ioat_dma)
+{
+ int err = dma_async_device_register(&ioat_dma->dma_dev);
+
+ if (err) {
+ ioat_disable_interrupts(ioat_dma);
+ dma_pool_destroy(ioat_dma->completion_pool);
+ }
+
+ return err;
+}
+
+static void ioat_dma_remove(struct ioatdma_device *ioat_dma)
+{
+ struct dma_device *dma = &ioat_dma->dma_dev;
+
+ ioat_disable_interrupts(ioat_dma);
+
+ ioat_kobject_del(ioat_dma);
+
+ dma_async_device_unregister(dma);
+}
+
+/**
+ * ioat_enumerate_channels - find and initialize the device's channels
+ * @ioat_dma: the ioat dma device to be enumerated
+ */
+static void ioat_enumerate_channels(struct ioatdma_device *ioat_dma)
+{
+ struct ioatdma_chan *ioat_chan;
+ struct device *dev = &ioat_dma->pdev->dev;
+ struct dma_device *dma = &ioat_dma->dma_dev;
+ u8 xfercap_log;
+ int i;
+
+ INIT_LIST_HEAD(&dma->channels);
+ dma->chancnt = readb(ioat_dma->reg_base + IOAT_CHANCNT_OFFSET);
+ dma->chancnt &= 0x1f; /* bits [4:0] valid */
+ if (dma->chancnt > ARRAY_SIZE(ioat_dma->idx)) {
+ dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
+ dma->chancnt, ARRAY_SIZE(ioat_dma->idx));
+ dma->chancnt = ARRAY_SIZE(ioat_dma->idx);
+ }
+ xfercap_log = readb(ioat_dma->reg_base + IOAT_XFERCAP_OFFSET);
+ xfercap_log &= 0x1f; /* bits [4:0] valid */
+ if (xfercap_log == 0)
+ return;
+ dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
+
+ for (i = 0; i < dma->chancnt; i++) {
+ ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL);
+ if (!ioat_chan)
+ break;
+
+ ioat_init_channel(ioat_dma, ioat_chan, i);
+ ioat_chan->xfercap_log = xfercap_log;
+ spin_lock_init(&ioat_chan->prep_lock);
+ if (ioat_reset_hw(ioat_chan)) {
+ i = 0;
+ break;
+ }
+ }
+ dma->chancnt = i;
+}
+
+/**
+ * ioat_free_chan_resources - release all the descriptors
+ * @c: the channel to be cleaned
+ */
+static void ioat_free_chan_resources(struct dma_chan *c)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+ struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+ struct ioat_ring_ent *desc;
+ const int total_descs = 1 << ioat_chan->alloc_order;
+ int descs;
+ int i;
+
+ /* Before freeing channel resources first check
+ * if they have been previously allocated for this channel.
+ */
+ if (!ioat_chan->ring)
+ return;
+
+ ioat_stop(ioat_chan);
+
+ if (!test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) {
+ ioat_reset_hw(ioat_chan);
+
+ /* Put LTR to idle */
+ if (ioat_dma->version >= IOAT_VER_3_4)
+ writeb(IOAT_CHAN_LTR_SWSEL_IDLE,
+ ioat_chan->reg_base +
+ IOAT_CHAN_LTR_SWSEL_OFFSET);
+ }
+
+ spin_lock_bh(&ioat_chan->cleanup_lock);
+ spin_lock_bh(&ioat_chan->prep_lock);
+ descs = ioat_ring_space(ioat_chan);
+ dev_dbg(to_dev(ioat_chan), "freeing %d idle descriptors\n", descs);
+ for (i = 0; i < descs; i++) {
+ desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head + i);
+ ioat_free_ring_ent(desc, c);
+ }
+
+ if (descs < total_descs)
+ dev_err(to_dev(ioat_chan), "Freeing %d in use descriptors!\n",
+ total_descs - descs);
+
+ for (i = 0; i < total_descs - descs; i++) {
+ desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail + i);
+ dump_desc_dbg(ioat_chan, desc);
+ ioat_free_ring_ent(desc, c);
+ }
+
+ for (i = 0; i < ioat_chan->desc_chunks; i++) {
+ dma_free_coherent(to_dev(ioat_chan), IOAT_CHUNK_SIZE,
+ ioat_chan->descs[i].virt,
+ ioat_chan->descs[i].hw);
+ ioat_chan->descs[i].virt = NULL;
+ ioat_chan->descs[i].hw = 0;
+ }
+ ioat_chan->desc_chunks = 0;
+
+ kfree(ioat_chan->ring);
+ ioat_chan->ring = NULL;
+ ioat_chan->alloc_order = 0;
+ dma_pool_free(ioat_dma->completion_pool, ioat_chan->completion,
+ ioat_chan->completion_dma);
+ spin_unlock_bh(&ioat_chan->prep_lock);
+ spin_unlock_bh(&ioat_chan->cleanup_lock);
+
+ ioat_chan->last_completion = 0;
+ ioat_chan->completion_dma = 0;
+ ioat_chan->dmacount = 0;
+}
+
+/* ioat_alloc_chan_resources - allocate/initialize ioat descriptor ring
+ * @chan: channel to be initialized
+ */
+static int ioat_alloc_chan_resources(struct dma_chan *c)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+ struct ioat_ring_ent **ring;
+ u64 status;
+ int order;
+ int i = 0;
+ u32 chanerr;
+
+ /* have we already been set up? */
+ if (ioat_chan->ring)
+ return 1 << ioat_chan->alloc_order;
+
+ /* Setup register to interrupt and write completion status on error */
+ writew(IOAT_CHANCTRL_RUN, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+ /* allocate a completion writeback area */
+ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+ ioat_chan->completion =
+ dma_pool_zalloc(ioat_chan->ioat_dma->completion_pool,
+ GFP_NOWAIT, &ioat_chan->completion_dma);
+ if (!ioat_chan->completion)
+ return -ENOMEM;
+
+ writel(((u64)ioat_chan->completion_dma) & 0x00000000FFFFFFFF,
+ ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+ writel(((u64)ioat_chan->completion_dma) >> 32,
+ ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+ order = IOAT_MAX_ORDER;
+ ring = ioat_alloc_ring(c, order, GFP_NOWAIT);
+ if (!ring)
+ return -ENOMEM;
+
+ spin_lock_bh(&ioat_chan->cleanup_lock);
+ spin_lock_bh(&ioat_chan->prep_lock);
+ ioat_chan->ring = ring;
+ ioat_chan->head = 0;
+ ioat_chan->issued = 0;
+ ioat_chan->tail = 0;
+ ioat_chan->alloc_order = order;
+ set_bit(IOAT_RUN, &ioat_chan->state);
+ spin_unlock_bh(&ioat_chan->prep_lock);
+ spin_unlock_bh(&ioat_chan->cleanup_lock);
+
+ /* Setting up LTR values for 3.4 or later */
+ if (ioat_chan->ioat_dma->version >= IOAT_VER_3_4) {
+ u32 lat_val;
+
+ lat_val = IOAT_CHAN_LTR_ACTIVE_SNVAL |
+ IOAT_CHAN_LTR_ACTIVE_SNLATSCALE |
+ IOAT_CHAN_LTR_ACTIVE_SNREQMNT;
+ writel(lat_val, ioat_chan->reg_base +
+ IOAT_CHAN_LTR_ACTIVE_OFFSET);
+
+ lat_val = IOAT_CHAN_LTR_IDLE_SNVAL |
+ IOAT_CHAN_LTR_IDLE_SNLATSCALE |
+ IOAT_CHAN_LTR_IDLE_SNREQMNT;
+ writel(lat_val, ioat_chan->reg_base +
+ IOAT_CHAN_LTR_IDLE_OFFSET);
+
+ /* Select to active */
+ writeb(IOAT_CHAN_LTR_SWSEL_ACTIVE,
+ ioat_chan->reg_base +
+ IOAT_CHAN_LTR_SWSEL_OFFSET);
+ }
+
+ ioat_start_null_desc(ioat_chan);
+
+ /* check that we got off the ground */
+ do {
+ udelay(1);
+ status = ioat_chansts(ioat_chan);
+ } while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status));
+
+ if (is_ioat_active(status) || is_ioat_idle(status))
+ return 1 << ioat_chan->alloc_order;
+
+ chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+
+ dev_WARN(to_dev(ioat_chan),
+ "failed to start channel chanerr: %#x\n", chanerr);
+ ioat_free_chan_resources(c);
+ return -EFAULT;
+}
+
+/* common channel initialization */
+static void
+ioat_init_channel(struct ioatdma_device *ioat_dma,
+ struct ioatdma_chan *ioat_chan, int idx)
+{
+ struct dma_device *dma = &ioat_dma->dma_dev;
+
+ ioat_chan->ioat_dma = ioat_dma;
+ ioat_chan->reg_base = ioat_dma->reg_base + (0x80 * (idx + 1));
+ spin_lock_init(&ioat_chan->cleanup_lock);
+ ioat_chan->dma_chan.device = dma;
+ dma_cookie_init(&ioat_chan->dma_chan);
+ list_add_tail(&ioat_chan->dma_chan.device_node, &dma->channels);
+ ioat_dma->idx[idx] = ioat_chan;
+ timer_setup(&ioat_chan->timer, ioat_timer_event, 0);
+ tasklet_setup(&ioat_chan->cleanup_task, ioat_cleanup_event);
+}
+
+#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
+static int ioat_xor_val_self_test(struct ioatdma_device *ioat_dma)
+{
+ int i, src_idx;
+ struct page *dest;
+ struct page *xor_srcs[IOAT_NUM_SRC_TEST];
+ struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
+ dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
+ dma_addr_t dest_dma;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u8 cmp_byte = 0;
+ u32 cmp_word;
+ u32 xor_val_result;
+ int err = 0;
+ struct completion cmp;
+ unsigned long tmo;
+ struct device *dev = &ioat_dma->pdev->dev;
+ struct dma_device *dma = &ioat_dma->dma_dev;
+ u8 op = 0;
+
+ dev_dbg(dev, "%s\n", __func__);
+
+ if (!dma_has_cap(DMA_XOR, dma->cap_mask))
+ return 0;
+
+ for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+ xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+ if (!xor_srcs[src_idx]) {
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+ }
+
+ dest = alloc_page(GFP_KERNEL);
+ if (!dest) {
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffers */
+ for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+ u8 *ptr = page_address(xor_srcs[src_idx]);
+
+ for (i = 0; i < PAGE_SIZE; i++)
+ ptr[i] = (1 << src_idx);
+ }
+
+ for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
+ cmp_byte ^= (u8) (1 << src_idx);
+
+ cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+ (cmp_byte << 8) | cmp_byte;
+
+ memset(page_address(dest), 0, PAGE_SIZE);
+
+ dma_chan = container_of(dma->channels.next, struct dma_chan,
+ device_node);
+ if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* test xor */
+ op = IOAT_OP_XOR;
+
+ dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ if (dma_mapping_error(dev, dest_dma)) {
+ err = -ENOMEM;
+ goto free_resources;
+ }
+
+ for (i = 0; i < IOAT_NUM_SRC_TEST; i++) {
+ dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, dma_srcs[i])) {
+ err = -ENOMEM;
+ goto dma_unmap;
+ }
+ }
+ tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+ IOAT_NUM_SRC_TEST, PAGE_SIZE,
+ DMA_PREP_INTERRUPT);
+
+ if (!tx) {
+ dev_err(dev, "Self-test xor prep failed\n");
+ err = -ENODEV;
+ goto dma_unmap;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test xor setup failed\n");
+ err = -ENODEV;
+ goto dma_unmap;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (tmo == 0 ||
+ dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
+ dev_err(dev, "Self-test xor timed out\n");
+ err = -ENODEV;
+ goto dma_unmap;
+ }
+
+ for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+ dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
+
+ dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+ u32 *ptr = page_address(dest);
+
+ if (ptr[i] != cmp_word) {
+ dev_err(dev, "Self-test xor failed compare\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+ dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+ dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+ /* skip validate if the capability is not present */
+ if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
+ goto free_resources;
+
+ op = IOAT_OP_XOR_VAL;
+
+ /* validate the sources with the destintation page */
+ for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+ xor_val_srcs[i] = xor_srcs[i];
+ xor_val_srcs[i] = dest;
+
+ xor_val_result = 1;
+
+ for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
+ dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, dma_srcs[i])) {
+ err = -ENOMEM;
+ goto dma_unmap;
+ }
+ }
+ tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+ IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &xor_val_result, DMA_PREP_INTERRUPT);
+ if (!tx) {
+ dev_err(dev, "Self-test zero prep failed\n");
+ err = -ENODEV;
+ goto dma_unmap;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test zero setup failed\n");
+ err = -ENODEV;
+ goto dma_unmap;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (tmo == 0 ||
+ dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
+ dev_err(dev, "Self-test validate timed out\n");
+ err = -ENODEV;
+ goto dma_unmap;
+ }
+
+ for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+ dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
+
+ if (xor_val_result != 0) {
+ dev_err(dev, "Self-test validate failed compare\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ memset(page_address(dest), 0, PAGE_SIZE);
+
+ /* test for non-zero parity sum */
+ op = IOAT_OP_XOR_VAL;
+
+ xor_val_result = 0;
+ for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
+ dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, dma_srcs[i])) {
+ err = -ENOMEM;
+ goto dma_unmap;
+ }
+ }
+ tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+ IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &xor_val_result, DMA_PREP_INTERRUPT);
+ if (!tx) {
+ dev_err(dev, "Self-test 2nd zero prep failed\n");
+ err = -ENODEV;
+ goto dma_unmap;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(dev, "Self-test 2nd zero setup failed\n");
+ err = -ENODEV;
+ goto dma_unmap;
+ }
+ dma->device_issue_pending(dma_chan);
+
+ tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (tmo == 0 ||
+ dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
+ dev_err(dev, "Self-test 2nd validate timed out\n");
+ err = -ENODEV;
+ goto dma_unmap;
+ }
+
+ if (xor_val_result != SUM_CHECK_P_RESULT) {
+ dev_err(dev, "Self-test validate failed compare\n");
+ err = -ENODEV;
+ goto dma_unmap;
+ }
+
+ for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+ dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
+
+ goto free_resources;
+dma_unmap:
+ if (op == IOAT_OP_XOR) {
+ while (--i >= 0)
+ dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
+ DMA_TO_DEVICE);
+ dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+ } else if (op == IOAT_OP_XOR_VAL) {
+ while (--i >= 0)
+ dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
+ DMA_TO_DEVICE);
+ }
+free_resources:
+ dma->device_free_chan_resources(dma_chan);
+out:
+ src_idx = IOAT_NUM_SRC_TEST;
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ __free_page(dest);
+ return err;
+}
+
+static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma)
+{
+ int rc;
+
+ rc = ioat_dma_self_test(ioat_dma);
+ if (rc)
+ return rc;
+
+ rc = ioat_xor_val_self_test(ioat_dma);
+
+ return rc;
+}
+
+static void ioat_intr_quirk(struct ioatdma_device *ioat_dma)
+{
+ struct dma_device *dma;
+ struct dma_chan *c;
+ struct ioatdma_chan *ioat_chan;
+ u32 errmask;
+
+ dma = &ioat_dma->dma_dev;
+
+ /*
+ * if we have descriptor write back error status, we mask the
+ * error interrupts
+ */
+ if (ioat_dma->cap & IOAT_CAP_DWBES) {
+ list_for_each_entry(c, &dma->channels, device_node) {
+ ioat_chan = to_ioat_chan(c);
+ errmask = readl(ioat_chan->reg_base +
+ IOAT_CHANERR_MASK_OFFSET);
+ errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR |
+ IOAT_CHANERR_XOR_Q_ERR;
+ writel(errmask, ioat_chan->reg_base +
+ IOAT_CHANERR_MASK_OFFSET);
+ }
+ }
+}
+
+static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca)
+{
+ struct pci_dev *pdev = ioat_dma->pdev;
+ int dca_en = system_has_dca_enabled(pdev);
+ struct dma_device *dma;
+ struct dma_chan *c;
+ struct ioatdma_chan *ioat_chan;
+ int err;
+ u16 val16;
+
+ dma = &ioat_dma->dma_dev;
+ dma->device_prep_dma_memcpy = ioat_dma_prep_memcpy_lock;
+ dma->device_issue_pending = ioat_issue_pending;
+ dma->device_alloc_chan_resources = ioat_alloc_chan_resources;
+ dma->device_free_chan_resources = ioat_free_chan_resources;
+
+ dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
+ dma->device_prep_dma_interrupt = ioat_prep_interrupt_lock;
+
+ ioat_dma->cap = readl(ioat_dma->reg_base + IOAT_DMA_CAP_OFFSET);
+
+ if (is_xeon_cb32(pdev) || is_bwd_noraid(pdev))
+ ioat_dma->cap &=
+ ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS);
+
+ /* dca is incompatible with raid operations */
+ if (dca_en && (ioat_dma->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ)))
+ ioat_dma->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ);
+
+ if (ioat_dma->cap & IOAT_CAP_XOR) {
+ dma->max_xor = 8;
+
+ dma_cap_set(DMA_XOR, dma->cap_mask);
+ dma->device_prep_dma_xor = ioat_prep_xor;
+
+ dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+ dma->device_prep_dma_xor_val = ioat_prep_xor_val;
+ }
+
+ if (ioat_dma->cap & IOAT_CAP_PQ) {
+
+ dma->device_prep_dma_pq = ioat_prep_pq;
+ dma->device_prep_dma_pq_val = ioat_prep_pq_val;
+ dma_cap_set(DMA_PQ, dma->cap_mask);
+ dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
+
+ if (ioat_dma->cap & IOAT_CAP_RAID16SS)
+ dma_set_maxpq(dma, 16, 0);
+ else
+ dma_set_maxpq(dma, 8, 0);
+
+ if (!(ioat_dma->cap & IOAT_CAP_XOR)) {
+ dma->device_prep_dma_xor = ioat_prep_pqxor;
+ dma->device_prep_dma_xor_val = ioat_prep_pqxor_val;
+ dma_cap_set(DMA_XOR, dma->cap_mask);
+ dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+
+ if (ioat_dma->cap & IOAT_CAP_RAID16SS)
+ dma->max_xor = 16;
+ else
+ dma->max_xor = 8;
+ }
+ }
+
+ dma->device_tx_status = ioat_tx_status;
+
+ /* starting with CB3.3 super extended descriptors are supported */
+ if (ioat_dma->cap & IOAT_CAP_RAID16SS) {
+ char pool_name[14];
+ int i;
+
+ for (i = 0; i < MAX_SED_POOLS; i++) {
+ snprintf(pool_name, 14, "ioat_hw%d_sed", i);
+
+ /* allocate SED DMA pool */
+ ioat_dma->sed_hw_pool[i] = dmam_pool_create(pool_name,
+ &pdev->dev,
+ SED_SIZE * (i + 1), 64, 0);
+ if (!ioat_dma->sed_hw_pool[i])
+ return -ENOMEM;
+
+ }
+ }
+
+ if (!(ioat_dma->cap & (IOAT_CAP_XOR | IOAT_CAP_PQ)))
+ dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+
+ err = ioat_probe(ioat_dma);
+ if (err)
+ return err;
+
+ list_for_each_entry(c, &dma->channels, device_node) {
+ ioat_chan = to_ioat_chan(c);
+ writel(IOAT_DMA_DCA_ANY_CPU,
+ ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
+ }
+
+ err = ioat_register(ioat_dma);
+ if (err)
+ return err;
+
+ ioat_kobject_add(ioat_dma, &ioat_ktype);
+
+ if (dca)
+ ioat_dma->dca = ioat_dca_init(pdev, ioat_dma->reg_base);
+
+ /* disable relaxed ordering */
+ err = pcie_capability_read_word(pdev, IOAT_DEVCTRL_OFFSET, &val16);
+ if (err)
+ return pcibios_err_to_errno(err);
+
+ /* clear relaxed ordering enable */
+ val16 &= ~IOAT_DEVCTRL_ROE;
+ err = pcie_capability_write_word(pdev, IOAT_DEVCTRL_OFFSET, val16);
+ if (err)
+ return pcibios_err_to_errno(err);
+
+ if (ioat_dma->cap & IOAT_CAP_DPS)
+ writeb(ioat_pending_level + 1,
+ ioat_dma->reg_base + IOAT_PREFETCH_LIMIT_OFFSET);
+
+ return 0;
+}
+
+static void ioat_shutdown(struct pci_dev *pdev)
+{
+ struct ioatdma_device *ioat_dma = pci_get_drvdata(pdev);
+ struct ioatdma_chan *ioat_chan;
+ int i;
+
+ if (!ioat_dma)
+ return;
+
+ for (i = 0; i < IOAT_MAX_CHANS; i++) {
+ ioat_chan = ioat_dma->idx[i];
+ if (!ioat_chan)
+ continue;
+
+ spin_lock_bh(&ioat_chan->prep_lock);
+ set_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+ spin_unlock_bh(&ioat_chan->prep_lock);
+ /*
+ * Synchronization rule for del_timer_sync():
+ * - The caller must not hold locks which would prevent
+ * completion of the timer's handler.
+ * So prep_lock cannot be held before calling it.
+ */
+ del_timer_sync(&ioat_chan->timer);
+
+ /* this should quiesce then reset */
+ ioat_reset_hw(ioat_chan);
+ }
+
+ ioat_disable_interrupts(ioat_dma);
+}
+
+static void ioat_resume(struct ioatdma_device *ioat_dma)
+{
+ struct ioatdma_chan *ioat_chan;
+ u32 chanerr;
+ int i;
+
+ for (i = 0; i < IOAT_MAX_CHANS; i++) {
+ ioat_chan = ioat_dma->idx[i];
+ if (!ioat_chan)
+ continue;
+
+ spin_lock_bh(&ioat_chan->prep_lock);
+ clear_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+ spin_unlock_bh(&ioat_chan->prep_lock);
+
+ chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+ writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+
+ /* no need to reset as shutdown already did that */
+ }
+}
+
+#define DRV_NAME "ioatdma"
+
+static pci_ers_result_t ioat_pcie_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t error)
+{
+ dev_dbg(&pdev->dev, "%s: PCIe AER error %d\n", DRV_NAME, error);
+
+ /* quiesce and block I/O */
+ ioat_shutdown(pdev);
+
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t ioat_pcie_error_slot_reset(struct pci_dev *pdev)
+{
+ pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED;
+
+ dev_dbg(&pdev->dev, "%s post reset handling\n", DRV_NAME);
+
+ if (pci_enable_device_mem(pdev) < 0) {
+ dev_err(&pdev->dev,
+ "Failed to enable PCIe device after reset.\n");
+ result = PCI_ERS_RESULT_DISCONNECT;
+ } else {
+ pci_set_master(pdev);
+ pci_restore_state(pdev);
+ pci_save_state(pdev);
+ pci_wake_from_d3(pdev, false);
+ }
+
+ return result;
+}
+
+static void ioat_pcie_error_resume(struct pci_dev *pdev)
+{
+ struct ioatdma_device *ioat_dma = pci_get_drvdata(pdev);
+
+ dev_dbg(&pdev->dev, "%s: AER handling resuming\n", DRV_NAME);
+
+ /* initialize and bring everything back */
+ ioat_resume(ioat_dma);
+}
+
+static const struct pci_error_handlers ioat_err_handler = {
+ .error_detected = ioat_pcie_error_detected,
+ .slot_reset = ioat_pcie_error_slot_reset,
+ .resume = ioat_pcie_error_resume,
+};
+
+static struct pci_driver ioat_pci_driver = {
+ .name = DRV_NAME,
+ .id_table = ioat_pci_tbl,
+ .probe = ioat_pci_probe,
+ .remove = ioat_remove,
+ .shutdown = ioat_shutdown,
+ .err_handler = &ioat_err_handler,
+};
+
+static void release_ioatdma(struct dma_device *device)
+{
+ struct ioatdma_device *d = to_ioatdma_device(device);
+ int i;
+
+ for (i = 0; i < IOAT_MAX_CHANS; i++)
+ kfree(d->idx[i]);
+
+ dma_pool_destroy(d->completion_pool);
+ kfree(d);
+}
+
+static struct ioatdma_device *
+alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
+{
+ struct ioatdma_device *d = kzalloc(sizeof(*d), GFP_KERNEL);
+
+ if (!d)
+ return NULL;
+ d->pdev = pdev;
+ d->reg_base = iobase;
+ d->dma_dev.device_release = release_ioatdma;
+ return d;
+}
+
+static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ void __iomem * const *iomap;
+ struct device *dev = &pdev->dev;
+ struct ioatdma_device *device;
+ int err;
+
+ err = pcim_enable_device(pdev);
+ if (err)
+ return err;
+
+ err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME);
+ if (err)
+ return err;
+ iomap = pcim_iomap_table(pdev);
+ if (!iomap)
+ return -ENOMEM;
+
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (err)
+ return err;
+
+ device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]);
+ if (!device)
+ return -ENOMEM;
+ pci_set_master(pdev);
+ pci_set_drvdata(pdev, device);
+
+ device->version = readb(device->reg_base + IOAT_VER_OFFSET);
+ if (device->version >= IOAT_VER_3_4)
+ ioat_dca_enabled = 0;
+ if (device->version >= IOAT_VER_3_0) {
+ if (is_skx_ioat(pdev))
+ device->version = IOAT_VER_3_2;
+ err = ioat3_dma_probe(device, ioat_dca_enabled);
+
+ if (device->version >= IOAT_VER_3_3)
+ pci_enable_pcie_error_reporting(pdev);
+ } else
+ return -ENODEV;
+
+ if (err) {
+ dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n");
+ pci_disable_pcie_error_reporting(pdev);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static void ioat_remove(struct pci_dev *pdev)
+{
+ struct ioatdma_device *device = pci_get_drvdata(pdev);
+
+ if (!device)
+ return;
+
+ ioat_shutdown(pdev);
+
+ dev_err(&pdev->dev, "Removing dma and dca services\n");
+ if (device->dca) {
+ unregister_dca_provider(device->dca, &pdev->dev);
+ free_dca_provider(device->dca);
+ device->dca = NULL;
+ }
+
+ pci_disable_pcie_error_reporting(pdev);
+ ioat_dma_remove(device);
+}
+
+static int __init ioat_init_module(void)
+{
+ int err = -ENOMEM;
+
+ pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
+ DRV_NAME, IOAT_DMA_VERSION);
+
+ ioat_cache = kmem_cache_create("ioat", sizeof(struct ioat_ring_ent),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!ioat_cache)
+ return -ENOMEM;
+
+ ioat_sed_cache = KMEM_CACHE(ioat_sed_ent, 0);
+ if (!ioat_sed_cache)
+ goto err_ioat_cache;
+
+ err = pci_register_driver(&ioat_pci_driver);
+ if (err)
+ goto err_ioat3_cache;
+
+ return 0;
+
+ err_ioat3_cache:
+ kmem_cache_destroy(ioat_sed_cache);
+
+ err_ioat_cache:
+ kmem_cache_destroy(ioat_cache);
+
+ return err;
+}
+module_init(ioat_init_module);
+
+static void __exit ioat_exit_module(void)
+{
+ pci_unregister_driver(&ioat_pci_driver);
+ kmem_cache_destroy(ioat_cache);
+}
+module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioat/prep.c b/drivers/dma/ioat/prep.c
new file mode 100644
index 000000000..033d9f2b3
--- /dev/null
+++ b/drivers/dma/ioat/prep.c
@@ -0,0 +1,737 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2015 Intel Corporation.
+ */
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/gfp.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
+#include "../dmaengine.h"
+#include "registers.h"
+#include "hw.h"
+#include "dma.h"
+
+#define MAX_SCF 256
+
+/* provide a lookup table for setting the source address in the base or
+ * extended descriptor of an xor or pq descriptor
+ */
+static const u8 xor_idx_to_desc = 0xe0;
+static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
+static const u8 pq_idx_to_desc = 0xf8;
+static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2 };
+static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
+static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
+ 0, 1, 2, 3, 4, 5, 6 };
+
+static void xor_set_src(struct ioat_raw_descriptor *descs[2],
+ dma_addr_t addr, u32 offset, int idx)
+{
+ struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
+
+ raw->field[xor_idx_to_field[idx]] = addr + offset;
+}
+
+static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
+{
+ struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+ return raw->field[pq_idx_to_field[idx]];
+}
+
+static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx)
+{
+ struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
+
+ return raw->field[pq16_idx_to_field[idx]];
+}
+
+static void pq_set_src(struct ioat_raw_descriptor *descs[2],
+ dma_addr_t addr, u32 offset, u8 coef, int idx)
+{
+ struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
+ struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+ raw->field[pq_idx_to_field[idx]] = addr + offset;
+ pq->coef[idx] = coef;
+}
+
+static void pq16_set_src(struct ioat_raw_descriptor *desc[3],
+ dma_addr_t addr, u32 offset, u8 coef, unsigned idx)
+{
+ struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0];
+ struct ioat_pq16a_descriptor *pq16 =
+ (struct ioat_pq16a_descriptor *)desc[1];
+ struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
+
+ raw->field[pq16_idx_to_field[idx]] = addr + offset;
+
+ if (idx < 8)
+ pq->coef[idx] = coef;
+ else
+ pq16->coef[idx - 8] = coef;
+}
+
+static struct ioat_sed_ent *
+ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool)
+{
+ struct ioat_sed_ent *sed;
+ gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
+
+ sed = kmem_cache_alloc(ioat_sed_cache, flags);
+ if (!sed)
+ return NULL;
+
+ sed->hw_pool = hw_pool;
+ sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool],
+ flags, &sed->dma);
+ if (!sed->hw) {
+ kmem_cache_free(ioat_sed_cache, sed);
+ return NULL;
+ }
+
+ return sed;
+}
+
+struct dma_async_tx_descriptor *
+ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+ struct ioat_dma_descriptor *hw;
+ struct ioat_ring_ent *desc;
+ dma_addr_t dst = dma_dest;
+ dma_addr_t src = dma_src;
+ size_t total_len = len;
+ int num_descs, idx, i;
+
+ if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+ return NULL;
+
+ num_descs = ioat_xferlen_to_descs(ioat_chan, len);
+ if (likely(num_descs) &&
+ ioat_check_space_lock(ioat_chan, num_descs) == 0)
+ idx = ioat_chan->head;
+ else
+ return NULL;
+ i = 0;
+ do {
+ size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log);
+
+ desc = ioat_get_ring_ent(ioat_chan, idx + i);
+ hw = desc->hw;
+
+ hw->size = copy;
+ hw->ctl = 0;
+ hw->src_addr = src;
+ hw->dst_addr = dst;
+
+ len -= copy;
+ dst += copy;
+ src += copy;
+ dump_desc_dbg(ioat_chan, desc);
+ } while (++i < num_descs);
+
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+ hw->ctl_f.compl_write = 1;
+ dump_desc_dbg(ioat_chan, desc);
+ /* we leave the channel locked to ensure in order submission */
+
+ return &desc->txd;
+}
+
+
+static struct dma_async_tx_descriptor *
+__ioat_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
+ dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
+ size_t len, unsigned long flags)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+ struct ioat_ring_ent *compl_desc;
+ struct ioat_ring_ent *desc;
+ struct ioat_ring_ent *ext;
+ size_t total_len = len;
+ struct ioat_xor_descriptor *xor;
+ struct ioat_xor_ext_descriptor *xor_ex = NULL;
+ struct ioat_dma_descriptor *hw;
+ int num_descs, with_ext, idx, i;
+ u32 offset = 0;
+ u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
+
+ BUG_ON(src_cnt < 2);
+
+ num_descs = ioat_xferlen_to_descs(ioat_chan, len);
+ /* we need 2x the number of descriptors to cover greater than 5
+ * sources
+ */
+ if (src_cnt > 5) {
+ with_ext = 1;
+ num_descs *= 2;
+ } else
+ with_ext = 0;
+
+ /* completion writes from the raid engine may pass completion
+ * writes from the legacy engine, so we need one extra null
+ * (legacy) descriptor to ensure all completion writes arrive in
+ * order.
+ */
+ if (likely(num_descs) &&
+ ioat_check_space_lock(ioat_chan, num_descs+1) == 0)
+ idx = ioat_chan->head;
+ else
+ return NULL;
+ i = 0;
+ do {
+ struct ioat_raw_descriptor *descs[2];
+ size_t xfer_size = min_t(size_t,
+ len, 1 << ioat_chan->xfercap_log);
+ int s;
+
+ desc = ioat_get_ring_ent(ioat_chan, idx + i);
+ xor = desc->xor;
+
+ /* save a branch by unconditionally retrieving the
+ * extended descriptor xor_set_src() knows to not write
+ * to it in the single descriptor case
+ */
+ ext = ioat_get_ring_ent(ioat_chan, idx + i + 1);
+ xor_ex = ext->xor_ex;
+
+ descs[0] = (struct ioat_raw_descriptor *) xor;
+ descs[1] = (struct ioat_raw_descriptor *) xor_ex;
+ for (s = 0; s < src_cnt; s++)
+ xor_set_src(descs, src[s], offset, s);
+ xor->size = xfer_size;
+ xor->dst_addr = dest + offset;
+ xor->ctl = 0;
+ xor->ctl_f.op = op;
+ xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
+
+ len -= xfer_size;
+ offset += xfer_size;
+ dump_desc_dbg(ioat_chan, desc);
+ } while ((i += 1 + with_ext) < num_descs);
+
+ /* last xor descriptor carries the unmap parameters and fence bit */
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ if (result)
+ desc->result = result;
+ xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+
+ /* completion descriptor carries interrupt bit */
+ compl_desc = ioat_get_ring_ent(ioat_chan, idx + i);
+ compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+ hw = compl_desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ hw->ctl_f.compl_write = 1;
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ dump_desc_dbg(ioat_chan, compl_desc);
+
+ /* we leave the channel locked to ensure in order submission */
+ return &compl_desc->txd;
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+ if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+ return NULL;
+
+ return __ioat_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
+ unsigned int src_cnt, size_t len,
+ enum sum_check_flags *result, unsigned long flags)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+ if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+ return NULL;
+
+ /* the cleanup routine only sets bits on validate failure, it
+ * does not clear bits on validate success... so clear it here
+ */
+ *result = 0;
+
+ return __ioat_prep_xor_lock(chan, result, src[0], &src[1],
+ src_cnt - 1, len, flags);
+}
+
+static void
+dump_pq_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc,
+ struct ioat_ring_ent *ext)
+{
+ struct device *dev = to_dev(ioat_chan);
+ struct ioat_pq_descriptor *pq = desc->pq;
+ struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
+ struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
+ int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
+ int i;
+
+ dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
+ " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
+ " src_cnt: %d)\n",
+ desc_id(desc), (unsigned long long) desc->txd.phys,
+ (unsigned long long) (pq_ex ? pq_ex->next : pq->next),
+ desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op,
+ pq->ctl_f.int_en, pq->ctl_f.compl_write,
+ pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
+ pq->ctl_f.src_cnt);
+ for (i = 0; i < src_cnt; i++)
+ dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
+ (unsigned long long) pq_get_src(descs, i), pq->coef[i]);
+ dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
+ dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
+ dev_dbg(dev, "\tNEXT: %#llx\n", pq->next);
+}
+
+static void dump_pq16_desc_dbg(struct ioatdma_chan *ioat_chan,
+ struct ioat_ring_ent *desc)
+{
+ struct device *dev = to_dev(ioat_chan);
+ struct ioat_pq_descriptor *pq = desc->pq;
+ struct ioat_raw_descriptor *descs[] = { (void *)pq,
+ (void *)pq,
+ (void *)pq };
+ int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
+ int i;
+
+ if (desc->sed) {
+ descs[1] = (void *)desc->sed->hw;
+ descs[2] = (void *)desc->sed->hw + 64;
+ }
+
+ dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
+ " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
+ " src_cnt: %d)\n",
+ desc_id(desc), (unsigned long long) desc->txd.phys,
+ (unsigned long long) pq->next,
+ desc->txd.flags, pq->size, pq->ctl,
+ pq->ctl_f.op, pq->ctl_f.int_en,
+ pq->ctl_f.compl_write,
+ pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
+ pq->ctl_f.src_cnt);
+ for (i = 0; i < src_cnt; i++) {
+ dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
+ (unsigned long long) pq16_get_src(descs, i),
+ pq->coef[i]);
+ }
+ dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
+ dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
+}
+
+static struct dma_async_tx_descriptor *
+__ioat_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
+ const dma_addr_t *dst, const dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf,
+ size_t len, unsigned long flags)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+ struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+ struct ioat_ring_ent *compl_desc;
+ struct ioat_ring_ent *desc;
+ struct ioat_ring_ent *ext;
+ size_t total_len = len;
+ struct ioat_pq_descriptor *pq;
+ struct ioat_pq_ext_descriptor *pq_ex = NULL;
+ struct ioat_dma_descriptor *hw;
+ u32 offset = 0;
+ u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
+ int i, s, idx, with_ext, num_descs;
+ int cb32 = (ioat_dma->version < IOAT_VER_3_3) ? 1 : 0;
+
+ dev_dbg(to_dev(ioat_chan), "%s\n", __func__);
+ /* the engine requires at least two sources (we provide
+ * at least 1 implied source in the DMA_PREP_CONTINUE case)
+ */
+ BUG_ON(src_cnt + dmaf_continue(flags) < 2);
+
+ num_descs = ioat_xferlen_to_descs(ioat_chan, len);
+ /* we need 2x the number of descriptors to cover greater than 3
+ * sources (we need 1 extra source in the q-only continuation
+ * case and 3 extra sources in the p+q continuation case.
+ */
+ if (src_cnt + dmaf_p_disabled_continue(flags) > 3 ||
+ (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) {
+ with_ext = 1;
+ num_descs *= 2;
+ } else
+ with_ext = 0;
+
+ /* completion writes from the raid engine may pass completion
+ * writes from the legacy engine, so we need one extra null
+ * (legacy) descriptor to ensure all completion writes arrive in
+ * order.
+ */
+ if (likely(num_descs) &&
+ ioat_check_space_lock(ioat_chan, num_descs + cb32) == 0)
+ idx = ioat_chan->head;
+ else
+ return NULL;
+ i = 0;
+ do {
+ struct ioat_raw_descriptor *descs[2];
+ size_t xfer_size = min_t(size_t, len,
+ 1 << ioat_chan->xfercap_log);
+
+ desc = ioat_get_ring_ent(ioat_chan, idx + i);
+ pq = desc->pq;
+
+ /* save a branch by unconditionally retrieving the
+ * extended descriptor pq_set_src() knows to not write
+ * to it in the single descriptor case
+ */
+ ext = ioat_get_ring_ent(ioat_chan, idx + i + with_ext);
+ pq_ex = ext->pq_ex;
+
+ descs[0] = (struct ioat_raw_descriptor *) pq;
+ descs[1] = (struct ioat_raw_descriptor *) pq_ex;
+
+ for (s = 0; s < src_cnt; s++)
+ pq_set_src(descs, src[s], offset, scf[s], s);
+
+ /* see the comment for dma_maxpq in include/linux/dmaengine.h */
+ if (dmaf_p_disabled_continue(flags))
+ pq_set_src(descs, dst[1], offset, 1, s++);
+ else if (dmaf_continue(flags)) {
+ pq_set_src(descs, dst[0], offset, 0, s++);
+ pq_set_src(descs, dst[1], offset, 1, s++);
+ pq_set_src(descs, dst[1], offset, 0, s++);
+ }
+ pq->size = xfer_size;
+ pq->p_addr = dst[0] + offset;
+ pq->q_addr = dst[1] + offset;
+ pq->ctl = 0;
+ pq->ctl_f.op = op;
+ /* we turn on descriptor write back error status */
+ if (ioat_dma->cap & IOAT_CAP_DWBES)
+ pq->ctl_f.wb_en = result ? 1 : 0;
+ pq->ctl_f.src_cnt = src_cnt_to_hw(s);
+ pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
+ pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
+
+ len -= xfer_size;
+ offset += xfer_size;
+ } while ((i += 1 + with_ext) < num_descs);
+
+ /* last pq descriptor carries the unmap parameters and fence bit */
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ if (result)
+ desc->result = result;
+ pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+ dump_pq_desc_dbg(ioat_chan, desc, ext);
+
+ if (!cb32) {
+ pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ pq->ctl_f.compl_write = 1;
+ compl_desc = desc;
+ } else {
+ /* completion descriptor carries interrupt bit */
+ compl_desc = ioat_get_ring_ent(ioat_chan, idx + i);
+ compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+ hw = compl_desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ hw->ctl_f.compl_write = 1;
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ dump_desc_dbg(ioat_chan, compl_desc);
+ }
+
+
+ /* we leave the channel locked to ensure in order submission */
+ return &compl_desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+__ioat_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
+ const dma_addr_t *dst, const dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf,
+ size_t len, unsigned long flags)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+ struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+ struct ioat_ring_ent *desc;
+ size_t total_len = len;
+ struct ioat_pq_descriptor *pq;
+ u32 offset = 0;
+ u8 op;
+ int i, s, idx, num_descs;
+
+ /* this function is only called with 9-16 sources */
+ op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
+
+ dev_dbg(to_dev(ioat_chan), "%s\n", __func__);
+
+ num_descs = ioat_xferlen_to_descs(ioat_chan, len);
+
+ /*
+ * 16 source pq is only available on cb3.3 and has no completion
+ * write hw bug.
+ */
+ if (num_descs && ioat_check_space_lock(ioat_chan, num_descs) == 0)
+ idx = ioat_chan->head;
+ else
+ return NULL;
+
+ i = 0;
+
+ do {
+ struct ioat_raw_descriptor *descs[4];
+ size_t xfer_size = min_t(size_t, len,
+ 1 << ioat_chan->xfercap_log);
+
+ desc = ioat_get_ring_ent(ioat_chan, idx + i);
+ pq = desc->pq;
+
+ descs[0] = (struct ioat_raw_descriptor *) pq;
+
+ desc->sed = ioat3_alloc_sed(ioat_dma, (src_cnt-2) >> 3);
+ if (!desc->sed) {
+ dev_err(to_dev(ioat_chan),
+ "%s: no free sed entries\n", __func__);
+ return NULL;
+ }
+
+ pq->sed_addr = desc->sed->dma;
+ desc->sed->parent = desc;
+
+ descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw;
+ descs[2] = (void *)descs[1] + 64;
+
+ for (s = 0; s < src_cnt; s++)
+ pq16_set_src(descs, src[s], offset, scf[s], s);
+
+ /* see the comment for dma_maxpq in include/linux/dmaengine.h */
+ if (dmaf_p_disabled_continue(flags))
+ pq16_set_src(descs, dst[1], offset, 1, s++);
+ else if (dmaf_continue(flags)) {
+ pq16_set_src(descs, dst[0], offset, 0, s++);
+ pq16_set_src(descs, dst[1], offset, 1, s++);
+ pq16_set_src(descs, dst[1], offset, 0, s++);
+ }
+
+ pq->size = xfer_size;
+ pq->p_addr = dst[0] + offset;
+ pq->q_addr = dst[1] + offset;
+ pq->ctl = 0;
+ pq->ctl_f.op = op;
+ pq->ctl_f.src_cnt = src16_cnt_to_hw(s);
+ /* we turn on descriptor write back error status */
+ if (ioat_dma->cap & IOAT_CAP_DWBES)
+ pq->ctl_f.wb_en = result ? 1 : 0;
+ pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
+ pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
+
+ len -= xfer_size;
+ offset += xfer_size;
+ } while (++i < num_descs);
+
+ /* last pq descriptor carries the unmap parameters and fence bit */
+ desc->txd.flags = flags;
+ desc->len = total_len;
+ if (result)
+ desc->result = result;
+ pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+
+ /* with cb3.3 we should be able to do completion w/o a null desc */
+ pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ pq->ctl_f.compl_write = 1;
+
+ dump_pq16_desc_dbg(ioat_chan, desc);
+
+ /* we leave the channel locked to ensure in order submission */
+ return &desc->txd;
+}
+
+static int src_cnt_flags(unsigned int src_cnt, unsigned long flags)
+{
+ if (dmaf_p_disabled_continue(flags))
+ return src_cnt + 1;
+ else if (dmaf_continue(flags))
+ return src_cnt + 3;
+ else
+ return src_cnt;
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ unsigned long flags)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+ if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+ return NULL;
+
+ /* specify valid address for disabled result */
+ if (flags & DMA_PREP_PQ_DISABLE_P)
+ dst[0] = dst[1];
+ if (flags & DMA_PREP_PQ_DISABLE_Q)
+ dst[1] = dst[0];
+
+ /* handle the single source multiply case from the raid6
+ * recovery path
+ */
+ if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) {
+ dma_addr_t single_source[2];
+ unsigned char single_source_coef[2];
+
+ BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
+ single_source[0] = src[0];
+ single_source[1] = src[0];
+ single_source_coef[0] = scf[0];
+ single_source_coef[1] = 0;
+
+ return src_cnt_flags(src_cnt, flags) > 8 ?
+ __ioat_prep_pq16_lock(chan, NULL, dst, single_source,
+ 2, single_source_coef, len,
+ flags) :
+ __ioat_prep_pq_lock(chan, NULL, dst, single_source, 2,
+ single_source_coef, len, flags);
+
+ } else {
+ return src_cnt_flags(src_cnt, flags) > 8 ?
+ __ioat_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
+ scf, len, flags) :
+ __ioat_prep_pq_lock(chan, NULL, dst, src, src_cnt,
+ scf, len, flags);
+ }
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ enum sum_check_flags *pqres, unsigned long flags)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+ if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+ return NULL;
+
+ /* specify valid address for disabled result */
+ if (flags & DMA_PREP_PQ_DISABLE_P)
+ pq[0] = pq[1];
+ if (flags & DMA_PREP_PQ_DISABLE_Q)
+ pq[1] = pq[0];
+
+ /* the cleanup routine only sets bits on validate failure, it
+ * does not clear bits on validate success... so clear it here
+ */
+ *pqres = 0;
+
+ return src_cnt_flags(src_cnt, flags) > 8 ?
+ __ioat_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
+ flags) :
+ __ioat_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
+ flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ unsigned char scf[MAX_SCF];
+ dma_addr_t pq[2];
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+ if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+ return NULL;
+
+ if (src_cnt > MAX_SCF)
+ return NULL;
+
+ memset(scf, 0, src_cnt);
+ pq[0] = dst;
+ flags |= DMA_PREP_PQ_DISABLE_Q;
+ pq[1] = dst; /* specify valid address for disabled result */
+
+ return src_cnt_flags(src_cnt, flags) > 8 ?
+ __ioat_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
+ flags) :
+ __ioat_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
+ flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
+ unsigned int src_cnt, size_t len,
+ enum sum_check_flags *result, unsigned long flags)
+{
+ unsigned char scf[MAX_SCF];
+ dma_addr_t pq[2];
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+ if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+ return NULL;
+
+ if (src_cnt > MAX_SCF)
+ return NULL;
+
+ /* the cleanup routine only sets bits on validate failure, it
+ * does not clear bits on validate success... so clear it here
+ */
+ *result = 0;
+
+ memset(scf, 0, src_cnt);
+ pq[0] = src[0];
+ flags |= DMA_PREP_PQ_DISABLE_Q;
+ pq[1] = pq[0]; /* specify valid address for disabled result */
+
+ return src_cnt_flags(src_cnt, flags) > 8 ?
+ __ioat_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
+ scf, len, flags) :
+ __ioat_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
+ scf, len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+ struct ioat_ring_ent *desc;
+ struct ioat_dma_descriptor *hw;
+
+ if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+ return NULL;
+
+ if (ioat_check_space_lock(ioat_chan, 1) == 0)
+ desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head);
+ else
+ return NULL;
+
+ hw = desc->hw;
+ hw->ctl = 0;
+ hw->ctl_f.null = 1;
+ hw->ctl_f.int_en = 1;
+ hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+ hw->ctl_f.compl_write = 1;
+ hw->size = NULL_DESC_BUFFER_SIZE;
+ hw->src_addr = 0;
+ hw->dst_addr = 0;
+
+ desc->txd.flags = flags;
+ desc->len = 1;
+
+ dump_desc_dbg(ioat_chan, desc);
+
+ /* we leave the channel locked to ensure in order submission */
+ return &desc->txd;
+}
+
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
new file mode 100644
index 000000000..f55a5f92f
--- /dev/null
+++ b/drivers/dma/ioat/registers.h
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ */
+#ifndef _IOAT_REGISTERS_H_
+#define _IOAT_REGISTERS_H_
+
+#define IOAT_PCI_DMACTRL_OFFSET 0x48
+#define IOAT_PCI_DMACTRL_DMA_EN 0x00000001
+#define IOAT_PCI_DMACTRL_MSI_EN 0x00000002
+
+#define IOAT_PCI_DEVICE_ID_OFFSET 0x02
+#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148
+#define IOAT_PCI_CHANERR_INT_OFFSET 0x180
+#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184
+
+/* PCIe config registers */
+
+/* EXPCAPID + N */
+#define IOAT_DEVCTRL_OFFSET 0x8
+/* relaxed ordering enable */
+#define IOAT_DEVCTRL_ROE 0x10
+
+/* MMIO Device Registers */
+#define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */
+
+#define IOAT_XFERCAP_OFFSET 0x01 /* 8-bit */
+#define IOAT_XFERCAP_4KB 12
+#define IOAT_XFERCAP_8KB 13
+#define IOAT_XFERCAP_16KB 14
+#define IOAT_XFERCAP_32KB 15
+#define IOAT_XFERCAP_32GB 0
+
+#define IOAT_GENCTRL_OFFSET 0x02 /* 8-bit */
+#define IOAT_GENCTRL_DEBUG_EN 0x01
+
+#define IOAT_INTRCTRL_OFFSET 0x03 /* 8-bit */
+#define IOAT_INTRCTRL_MASTER_INT_EN 0x01 /* Master Interrupt Enable */
+#define IOAT_INTRCTRL_INT_STATUS 0x02 /* ATTNSTATUS -or- Channel Int */
+#define IOAT_INTRCTRL_INT 0x04 /* INT_STATUS -and- MASTER_INT_EN */
+#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */
+
+#define IOAT_ATTNSTATUS_OFFSET 0x04 /* Each bit is a channel */
+
+#define IOAT_VER_OFFSET 0x08 /* 8-bit */
+#define IOAT_VER_MAJOR_MASK 0xF0
+#define IOAT_VER_MINOR_MASK 0x0F
+#define GET_IOAT_VER_MAJOR(x) (((x) & IOAT_VER_MAJOR_MASK) >> 4)
+#define GET_IOAT_VER_MINOR(x) ((x) & IOAT_VER_MINOR_MASK)
+
+#define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */
+
+#define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */
+#define IOAT_INTRDELAY_MASK 0x3FFF /* Interrupt Delay Time */
+#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */
+
+#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */
+#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001
+#define IOAT_DEVICE_MMIO_RESTRICTED 0x0002
+#define IOAT_DEVICE_MEMORY_BYPASS 0x0004
+#define IOAT_DEVICE_ADDRESS_REMAPPING 0x0008
+
+#define IOAT_DMA_CAP_OFFSET 0x10 /* 32-bit */
+#define IOAT_CAP_PAGE_BREAK 0x00000001
+#define IOAT_CAP_CRC 0x00000002
+#define IOAT_CAP_SKIP_MARKER 0x00000004
+#define IOAT_CAP_DCA 0x00000010
+#define IOAT_CAP_CRC_MOVE 0x00000020
+#define IOAT_CAP_FILL_BLOCK 0x00000040
+#define IOAT_CAP_APIC 0x00000080
+#define IOAT_CAP_XOR 0x00000100
+#define IOAT_CAP_PQ 0x00000200
+#define IOAT_CAP_DWBES 0x00002000
+#define IOAT_CAP_RAID16SS 0x00020000
+#define IOAT_CAP_DPS 0x00800000
+
+#define IOAT_PREFETCH_LIMIT_OFFSET 0x4C /* CHWPREFLMT */
+
+#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */
+
+/* DMA Channel Registers */
+#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */
+#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000
+#define IOAT3_CHANCTRL_COMPL_DCA_EN 0x0200
+#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100
+#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020
+#define IOAT_CHANCTRL_ERR_INT_EN 0x0010
+#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008
+#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
+#define IOAT_CHANCTRL_INT_REARM 0x0001
+#define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\
+ IOAT_CHANCTRL_ERR_INT_EN |\
+ IOAT_CHANCTRL_ERR_COMPLETION_EN |\
+ IOAT_CHANCTRL_ANY_ERR_ABORT_EN)
+
+#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */
+#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */
+#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */
+
+#define IOAT_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL)
+#define IOAT_CHANSTS_SOFT_ERR 0x10ULL
+#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x8ULL
+#define IOAT_CHANSTS_STATUS 0x7ULL
+#define IOAT_CHANSTS_ACTIVE 0x0
+#define IOAT_CHANSTS_DONE 0x1
+#define IOAT_CHANSTS_SUSPENDED 0x2
+#define IOAT_CHANSTS_HALTED 0x3
+
+
+
+#define IOAT_CHAN_DMACOUNT_OFFSET 0x06 /* 16-bit DMA Count register */
+
+#define IOAT_DCACTRL_OFFSET 0x30 /* 32 bit Direct Cache Access Control Register */
+#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
+#define IOAT_DCACTRL_TARGET_CPU_MASK 0xFFFF /* APIC ID */
+
+/* CB DCA Memory Space Registers */
+#define IOAT_DCAOFFSET_OFFSET 0x14
+/* CB_BAR + IOAT_DCAOFFSET value */
+#define IOAT_DCA_VER_OFFSET 0x00
+#define IOAT_DCA_VER_MAJOR_MASK 0xF0
+#define IOAT_DCA_VER_MINOR_MASK 0x0F
+
+#define IOAT_DCA_COMP_OFFSET 0x02
+#define IOAT_DCA_COMP_V1 0x1
+
+#define IOAT_FSB_CAPABILITY_OFFSET 0x04
+#define IOAT_FSB_CAPABILITY_PREFETCH 0x1
+
+#define IOAT_PCI_CAPABILITY_OFFSET 0x06
+#define IOAT_PCI_CAPABILITY_MEMWR 0x1
+
+#define IOAT_FSB_CAP_ENABLE_OFFSET 0x08
+#define IOAT_FSB_CAP_ENABLE_PREFETCH 0x1
+
+#define IOAT_PCI_CAP_ENABLE_OFFSET 0x0A
+#define IOAT_PCI_CAP_ENABLE_MEMWR 0x1
+
+#define IOAT_APICID_TAG_MAP_OFFSET 0x0C
+#define IOAT_APICID_TAG_MAP_TAG0 0x0000000F
+#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
+#define IOAT_APICID_TAG_MAP_TAG1 0x000000F0
+#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
+#define IOAT_APICID_TAG_MAP_TAG2 0x00000F00
+#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
+#define IOAT_APICID_TAG_MAP_TAG3 0x0000F000
+#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
+#define IOAT_APICID_TAG_MAP_TAG4 0x000F0000
+#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
+#define IOAT_APICID_TAG_CB2_VALID 0x8080808080
+
+#define IOAT_DCA_GREQID_OFFSET 0x10
+#define IOAT_DCA_GREQID_SIZE 0x04
+#define IOAT_DCA_GREQID_MASK 0xFFFF
+#define IOAT_DCA_GREQID_IGNOREFUN 0x10000000
+#define IOAT_DCA_GREQID_VALID 0x20000000
+#define IOAT_DCA_GREQID_LASTID 0x80000000
+
+#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
+#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1
+
+#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
+#define IOAT3_PCI_CAPABILITY_MEMWR 0x1
+
+#define IOAT3_CSI_CONTROL_OFFSET 0x0C
+#define IOAT3_CSI_CONTROL_PREFETCH 0x1
+
+#define IOAT3_PCI_CONTROL_OFFSET 0x0E
+#define IOAT3_PCI_CONTROL_MEMWR 0x1
+
+#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
+
+#define IOAT3_DCA_GREQID_OFFSET 0x02
+
+#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */
+#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */
+#define IOAT_CHAINADDR_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
+#define IOAT1_CHAINADDR_OFFSET_LOW 0x0C
+#define IOAT2_CHAINADDR_OFFSET_LOW 0x10
+#define IOAT_CHAINADDR_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
+#define IOAT1_CHAINADDR_OFFSET_HIGH 0x10
+#define IOAT2_CHAINADDR_OFFSET_HIGH 0x14
+#define IOAT_CHAINADDR_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
+
+#define IOAT1_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */
+#define IOAT2_CHANCMD_OFFSET 0x04 /* 8-bit DMA Channel Command Register */
+#define IOAT_CHANCMD_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
+#define IOAT_CHANCMD_RESET 0x20
+#define IOAT_CHANCMD_RESUME 0x10
+#define IOAT_CHANCMD_ABORT 0x08
+#define IOAT_CHANCMD_SUSPEND 0x04
+#define IOAT_CHANCMD_APPEND 0x02
+#define IOAT_CHANCMD_START 0x01
+
+#define IOAT_CHANCMP_OFFSET 0x18 /* 64-bit Channel Completion Address Register */
+#define IOAT_CHANCMP_OFFSET_LOW 0x18
+#define IOAT_CHANCMP_OFFSET_HIGH 0x1C
+
+#define IOAT_CDAR_OFFSET 0x20 /* 64-bit Current Descriptor Address Register */
+#define IOAT_CDAR_OFFSET_LOW 0x20
+#define IOAT_CDAR_OFFSET_HIGH 0x24
+
+#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */
+#define IOAT_CHANERR_SRC_ADDR_ERR 0x0001
+#define IOAT_CHANERR_DEST_ADDR_ERR 0x0002
+#define IOAT_CHANERR_NEXT_ADDR_ERR 0x0004
+#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR 0x0008
+#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010
+#define IOAT_CHANERR_CHANCMD_ERR 0x0020
+#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040
+#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080
+#define IOAT_CHANERR_READ_DATA_ERR 0x0100
+#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200
+#define IOAT_CHANERR_CONTROL_ERR 0x0400
+#define IOAT_CHANERR_LENGTH_ERR 0x0800
+#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000
+#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000
+#define IOAT_CHANERR_SOFT_ERR 0x4000
+#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000
+#define IOAT_CHANERR_XOR_P_OR_CRC_ERR 0x10000
+#define IOAT_CHANERR_XOR_Q_ERR 0x20000
+#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000
+
+#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
+#define IOAT_CHANERR_RECOVER_MASK (IOAT_CHANERR_READ_DATA_ERR | \
+ IOAT_CHANERR_WRITE_DATA_ERR)
+
+#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */
+
+#define IOAT_CHAN_DRSCTL_OFFSET 0xB6
+#define IOAT_CHAN_DRSZ_4KB 0x0000
+#define IOAT_CHAN_DRSZ_8KB 0x0001
+#define IOAT_CHAN_DRSZ_2MB 0x0009
+#define IOAT_CHAN_DRS_EN 0x0100
+#define IOAT_CHAN_DRS_AUTOWRAP 0x0200
+
+#define IOAT_CHAN_LTR_SWSEL_OFFSET 0xBC
+#define IOAT_CHAN_LTR_SWSEL_ACTIVE 0x0
+#define IOAT_CHAN_LTR_SWSEL_IDLE 0x1
+
+#define IOAT_CHAN_LTR_ACTIVE_OFFSET 0xC0
+#define IOAT_CHAN_LTR_ACTIVE_SNVAL 0x0000 /* 0 us */
+#define IOAT_CHAN_LTR_ACTIVE_SNLATSCALE 0x0800 /* 1us scale */
+#define IOAT_CHAN_LTR_ACTIVE_SNREQMNT 0x8000 /* snoop req enable */
+
+#define IOAT_CHAN_LTR_IDLE_OFFSET 0xC4
+#define IOAT_CHAN_LTR_IDLE_SNVAL 0x0258 /* 600 us */
+#define IOAT_CHAN_LTR_IDLE_SNLATSCALE 0x0800 /* 1us scale */
+#define IOAT_CHAN_LTR_IDLE_SNREQMNT 0x8000 /* snoop req enable */
+
+#endif /* _IOAT_REGISTERS_H_ */
diff --git a/drivers/dma/ioat/sysfs.c b/drivers/dma/ioat/sysfs.c
new file mode 100644
index 000000000..168adf28c
--- /dev/null
+++ b/drivers/dma/ioat/sysfs.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2015 Intel Corporation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/dmaengine.h>
+#include <linux/pci.h>
+#include "dma.h"
+#include "registers.h"
+#include "hw.h"
+
+#include "../dmaengine.h"
+
+static ssize_t cap_show(struct dma_chan *c, char *page)
+{
+ struct dma_device *dma = c->device;
+
+ return sprintf(page, "copy%s%s%s%s%s\n",
+ dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
+ dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
+ dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
+ dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
+ dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "");
+
+}
+struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap);
+
+static ssize_t version_show(struct dma_chan *c, char *page)
+{
+ struct dma_device *dma = c->device;
+ struct ioatdma_device *ioat_dma = to_ioatdma_device(dma);
+
+ return sprintf(page, "%d.%d\n",
+ ioat_dma->version >> 4, ioat_dma->version & 0xf);
+}
+struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version);
+
+static ssize_t
+ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+ struct ioat_sysfs_entry *entry;
+ struct ioatdma_chan *ioat_chan;
+
+ entry = container_of(attr, struct ioat_sysfs_entry, attr);
+ ioat_chan = container_of(kobj, struct ioatdma_chan, kobj);
+
+ if (!entry->show)
+ return -EIO;
+ return entry->show(&ioat_chan->dma_chan, page);
+}
+
+static ssize_t
+ioat_attr_store(struct kobject *kobj, struct attribute *attr,
+const char *page, size_t count)
+{
+ struct ioat_sysfs_entry *entry;
+ struct ioatdma_chan *ioat_chan;
+
+ entry = container_of(attr, struct ioat_sysfs_entry, attr);
+ ioat_chan = container_of(kobj, struct ioatdma_chan, kobj);
+
+ if (!entry->store)
+ return -EIO;
+ return entry->store(&ioat_chan->dma_chan, page, count);
+}
+
+const struct sysfs_ops ioat_sysfs_ops = {
+ .show = ioat_attr_show,
+ .store = ioat_attr_store,
+};
+
+void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type)
+{
+ struct dma_device *dma = &ioat_dma->dma_dev;
+ struct dma_chan *c;
+
+ list_for_each_entry(c, &dma->channels, device_node) {
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+ struct kobject *parent = &c->dev->device.kobj;
+ int err;
+
+ err = kobject_init_and_add(&ioat_chan->kobj, type,
+ parent, "quickdata");
+ if (err) {
+ dev_warn(to_dev(ioat_chan),
+ "sysfs init error (%d), continuing...\n", err);
+ kobject_put(&ioat_chan->kobj);
+ set_bit(IOAT_KOBJ_INIT_FAIL, &ioat_chan->state);
+ }
+ }
+}
+
+void ioat_kobject_del(struct ioatdma_device *ioat_dma)
+{
+ struct dma_device *dma = &ioat_dma->dma_dev;
+ struct dma_chan *c;
+
+ list_for_each_entry(c, &dma->channels, device_node) {
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+ if (!test_bit(IOAT_KOBJ_INIT_FAIL, &ioat_chan->state)) {
+ kobject_del(&ioat_chan->kobj);
+ kobject_put(&ioat_chan->kobj);
+ }
+ }
+}
+
+static ssize_t ring_size_show(struct dma_chan *c, char *page)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+ return sprintf(page, "%d\n", (1 << ioat_chan->alloc_order) & ~1);
+}
+static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+
+static ssize_t ring_active_show(struct dma_chan *c, char *page)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+ /* ...taken outside the lock, no need to be precise */
+ return sprintf(page, "%d\n", ioat_ring_active(ioat_chan));
+}
+static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
+
+static ssize_t intr_coalesce_show(struct dma_chan *c, char *page)
+{
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+ return sprintf(page, "%d\n", ioat_chan->intr_coalesce);
+}
+
+static ssize_t intr_coalesce_store(struct dma_chan *c, const char *page,
+size_t count)
+{
+ int intr_coalesce = 0;
+ struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+ if (sscanf(page, "%du", &intr_coalesce) != -1) {
+ if ((intr_coalesce < 0) ||
+ (intr_coalesce > IOAT_INTRDELAY_MASK))
+ return -EINVAL;
+ ioat_chan->intr_coalesce = intr_coalesce;
+ }
+
+ return count;
+}
+
+static struct ioat_sysfs_entry intr_coalesce_attr = __ATTR_RW(intr_coalesce);
+
+static struct attribute *ioat_attrs[] = {
+ &ring_size_attr.attr,
+ &ring_active_attr.attr,
+ &ioat_cap_attr.attr,
+ &ioat_version_attr.attr,
+ &intr_coalesce_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(ioat);
+
+struct kobj_type ioat_ktype = {
+ .sysfs_ops = &ioat_sysfs_ops,
+ .default_groups = ioat_groups,
+};
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
new file mode 100644
index 000000000..310b899d5
--- /dev/null
+++ b/drivers/dma/iop-adma.c
@@ -0,0 +1,1554 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * offload engine driver for the Intel Xscale series of i/o processors
+ * Copyright © 2006, Intel Corporation.
+ */
+
+/*
+ * This driver supports the asynchrounous DMA copy and RAID engines available
+ * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/prefetch.h>
+#include <linux/memory.h>
+#include <linux/ioport.h>
+#include <linux/raid/pq.h>
+#include <linux/slab.h>
+
+#include "iop-adma.h"
+#include "dmaengine.h"
+
+#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common)
+#define to_iop_adma_device(dev) \
+ container_of(dev, struct iop_adma_device, common)
+#define tx_to_iop_adma_slot(tx) \
+ container_of(tx, struct iop_adma_desc_slot, async_tx)
+
+/**
+ * iop_adma_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &iop_chan->lock while calling this function
+ */
+static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
+{
+ int stride = slot->slots_per_op;
+
+ while (stride--) {
+ slot->slots_per_op = 0;
+ slot = list_entry(slot->slot_node.next,
+ struct iop_adma_desc_slot,
+ slot_node);
+ }
+}
+
+static dma_cookie_t
+iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
+ struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
+{
+ struct dma_async_tx_descriptor *tx = &desc->async_tx;
+
+ BUG_ON(tx->cookie < 0);
+ if (tx->cookie > 0) {
+ cookie = tx->cookie;
+ tx->cookie = 0;
+
+ /* call the callback (must not sleep or submit new
+ * operations to this channel)
+ */
+ dmaengine_desc_get_callback_invoke(tx, NULL);
+
+ dma_descriptor_unmap(tx);
+ if (desc->group_head)
+ desc->group_head = NULL;
+ }
+
+ /* run dependent operations */
+ dma_run_dependencies(tx);
+
+ return cookie;
+}
+
+static int
+iop_adma_clean_slot(struct iop_adma_desc_slot *desc,
+ struct iop_adma_chan *iop_chan)
+{
+ /* the client is allowed to attach dependent operations
+ * until 'ack' is set
+ */
+ if (!async_tx_test_ack(&desc->async_tx))
+ return 0;
+
+ /* leave the last descriptor in the chain
+ * so we can append to it
+ */
+ if (desc->chain_node.next == &iop_chan->chain)
+ return 1;
+
+ dev_dbg(iop_chan->device->common.dev,
+ "\tfree slot: %d slots_per_op: %d\n",
+ desc->idx, desc->slots_per_op);
+
+ list_del(&desc->chain_node);
+ iop_adma_free_slots(desc);
+
+ return 0;
+}
+
+static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
+{
+ struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL;
+ dma_cookie_t cookie = 0;
+ u32 current_desc = iop_chan_get_current_descriptor(iop_chan);
+ int busy = iop_chan_is_busy(iop_chan);
+ int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
+
+ dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+ /* free completed slots from the chain starting with
+ * the oldest descriptor
+ */
+ list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
+ chain_node) {
+ pr_debug("\tcookie: %d slot: %d busy: %d "
+ "this_desc: %pad next_desc: %#llx ack: %d\n",
+ iter->async_tx.cookie, iter->idx, busy,
+ &iter->async_tx.phys, (u64)iop_desc_get_next_desc(iter),
+ async_tx_test_ack(&iter->async_tx));
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+
+ /* do not advance past the current descriptor loaded into the
+ * hardware channel, subsequent descriptors are either in
+ * process or have not been submitted
+ */
+ if (seen_current)
+ break;
+
+ /* stop the search if we reach the current descriptor and the
+ * channel is busy, or if it appears that the current descriptor
+ * needs to be re-read (i.e. has been appended to)
+ */
+ if (iter->async_tx.phys == current_desc) {
+ BUG_ON(seen_current++);
+ if (busy || iop_desc_get_next_desc(iter))
+ break;
+ }
+
+ /* detect the start of a group transaction */
+ if (!slot_cnt && !slots_per_op) {
+ slot_cnt = iter->slot_cnt;
+ slots_per_op = iter->slots_per_op;
+ if (slot_cnt <= slots_per_op) {
+ slot_cnt = 0;
+ slots_per_op = 0;
+ }
+ }
+
+ if (slot_cnt) {
+ pr_debug("\tgroup++\n");
+ if (!grp_start)
+ grp_start = iter;
+ slot_cnt -= slots_per_op;
+ }
+
+ /* all the members of a group are complete */
+ if (slots_per_op != 0 && slot_cnt == 0) {
+ struct iop_adma_desc_slot *grp_iter, *_grp_iter;
+ int end_of_chain = 0;
+ pr_debug("\tgroup end\n");
+
+ /* collect the total results */
+ if (grp_start->xor_check_result) {
+ u32 zero_sum_result = 0;
+ slot_cnt = grp_start->slot_cnt;
+ grp_iter = grp_start;
+
+ list_for_each_entry_from(grp_iter,
+ &iop_chan->chain, chain_node) {
+ zero_sum_result |=
+ iop_desc_get_zero_result(grp_iter);
+ pr_debug("\titer%d result: %d\n",
+ grp_iter->idx, zero_sum_result);
+ slot_cnt -= slots_per_op;
+ if (slot_cnt == 0)
+ break;
+ }
+ pr_debug("\tgrp_start->xor_check_result: %p\n",
+ grp_start->xor_check_result);
+ *grp_start->xor_check_result = zero_sum_result;
+ }
+
+ /* clean up the group */
+ slot_cnt = grp_start->slot_cnt;
+ grp_iter = grp_start;
+ list_for_each_entry_safe_from(grp_iter, _grp_iter,
+ &iop_chan->chain, chain_node) {
+ cookie = iop_adma_run_tx_complete_actions(
+ grp_iter, iop_chan, cookie);
+
+ slot_cnt -= slots_per_op;
+ end_of_chain = iop_adma_clean_slot(grp_iter,
+ iop_chan);
+
+ if (slot_cnt == 0 || end_of_chain)
+ break;
+ }
+
+ /* the group should be complete at this point */
+ BUG_ON(slot_cnt);
+
+ slots_per_op = 0;
+ grp_start = NULL;
+ if (end_of_chain)
+ break;
+ else
+ continue;
+ } else if (slots_per_op) /* wait for group completion */
+ continue;
+
+ /* write back zero sum results (single descriptor case) */
+ if (iter->xor_check_result && iter->async_tx.cookie)
+ *iter->xor_check_result =
+ iop_desc_get_zero_result(iter);
+
+ cookie = iop_adma_run_tx_complete_actions(
+ iter, iop_chan, cookie);
+
+ if (iop_adma_clean_slot(iter, iop_chan))
+ break;
+ }
+
+ if (cookie > 0) {
+ iop_chan->common.completed_cookie = cookie;
+ pr_debug("\tcompleted cookie %d\n", cookie);
+ }
+}
+
+static void
+iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
+{
+ spin_lock_bh(&iop_chan->lock);
+ __iop_adma_slot_cleanup(iop_chan);
+ spin_unlock_bh(&iop_chan->lock);
+}
+
+static void iop_adma_tasklet(struct tasklet_struct *t)
+{
+ struct iop_adma_chan *iop_chan = from_tasklet(iop_chan, t,
+ irq_tasklet);
+
+ /* lockdep will flag depedency submissions as potentially
+ * recursive locking, this is not the case as a dependency
+ * submission will never recurse a channels submit routine.
+ * There are checks in async_tx.c to prevent this.
+ */
+ spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING);
+ __iop_adma_slot_cleanup(iop_chan);
+ spin_unlock(&iop_chan->lock);
+}
+
+static struct iop_adma_desc_slot *
+iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots,
+ int slots_per_op)
+{
+ struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL;
+ LIST_HEAD(chain);
+ int slots_found, retry = 0;
+
+ /* start search from the last allocated descrtiptor
+ * if a contiguous allocation can not be found start searching
+ * from the beginning of the list
+ */
+retry:
+ slots_found = 0;
+ if (retry == 0)
+ iter = iop_chan->last_used;
+ else
+ iter = list_entry(&iop_chan->all_slots,
+ struct iop_adma_desc_slot,
+ slot_node);
+
+ list_for_each_entry_safe_continue(
+ iter, _iter, &iop_chan->all_slots, slot_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+ if (iter->slots_per_op) {
+ /* give up after finding the first busy slot
+ * on the second pass through the list
+ */
+ if (retry)
+ break;
+
+ slots_found = 0;
+ continue;
+ }
+
+ /* start the allocation if the slot is correctly aligned */
+ if (!slots_found++) {
+ if (iop_desc_is_aligned(iter, slots_per_op))
+ alloc_start = iter;
+ else {
+ slots_found = 0;
+ continue;
+ }
+ }
+
+ if (slots_found == num_slots) {
+ struct iop_adma_desc_slot *alloc_tail = NULL;
+ struct iop_adma_desc_slot *last_used = NULL;
+ iter = alloc_start;
+ while (num_slots) {
+ int i;
+ dev_dbg(iop_chan->device->common.dev,
+ "allocated slot: %d "
+ "(desc %p phys: %#llx) slots_per_op %d\n",
+ iter->idx, iter->hw_desc,
+ (u64)iter->async_tx.phys, slots_per_op);
+
+ /* pre-ack all but the last descriptor */
+ if (num_slots != slots_per_op)
+ async_tx_ack(&iter->async_tx);
+
+ list_add_tail(&iter->chain_node, &chain);
+ alloc_tail = iter;
+ iter->async_tx.cookie = 0;
+ iter->slot_cnt = num_slots;
+ iter->xor_check_result = NULL;
+ for (i = 0; i < slots_per_op; i++) {
+ iter->slots_per_op = slots_per_op - i;
+ last_used = iter;
+ iter = list_entry(iter->slot_node.next,
+ struct iop_adma_desc_slot,
+ slot_node);
+ }
+ num_slots -= slots_per_op;
+ }
+ alloc_tail->group_head = alloc_start;
+ alloc_tail->async_tx.cookie = -EBUSY;
+ list_splice(&chain, &alloc_tail->tx_list);
+ iop_chan->last_used = last_used;
+ iop_desc_clear_next_desc(alloc_start);
+ iop_desc_clear_next_desc(alloc_tail);
+ return alloc_tail;
+ }
+ }
+ if (!retry++)
+ goto retry;
+
+ /* perform direct reclaim if the allocation fails */
+ __iop_adma_slot_cleanup(iop_chan);
+
+ return NULL;
+}
+
+static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan)
+{
+ dev_dbg(iop_chan->device->common.dev, "pending: %d\n",
+ iop_chan->pending);
+
+ if (iop_chan->pending >= IOP_ADMA_THRESHOLD) {
+ iop_chan->pending = 0;
+ iop_chan_append(iop_chan);
+ }
+}
+
+static dma_cookie_t
+iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan);
+ struct iop_adma_desc_slot *grp_start, *old_chain_tail;
+ int slot_cnt;
+ dma_cookie_t cookie;
+ dma_addr_t next_dma;
+
+ grp_start = sw_desc->group_head;
+ slot_cnt = grp_start->slot_cnt;
+
+ spin_lock_bh(&iop_chan->lock);
+ cookie = dma_cookie_assign(tx);
+
+ old_chain_tail = list_entry(iop_chan->chain.prev,
+ struct iop_adma_desc_slot, chain_node);
+ list_splice_init(&sw_desc->tx_list,
+ &old_chain_tail->chain_node);
+
+ /* fix up the hardware chain */
+ next_dma = grp_start->async_tx.phys;
+ iop_desc_set_next_desc(old_chain_tail, next_dma);
+ BUG_ON(iop_desc_get_next_desc(old_chain_tail) != next_dma); /* flush */
+
+ /* check for pre-chained descriptors */
+ iop_paranoia(iop_desc_get_next_desc(sw_desc));
+
+ /* increment the pending count by the number of slots
+ * memcpy operations have a 1:1 (slot:operation) relation
+ * other operations are heavier and will pop the threshold
+ * more often.
+ */
+ iop_chan->pending += slot_cnt;
+ iop_adma_check_threshold(iop_chan);
+ spin_unlock_bh(&iop_chan->lock);
+
+ dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n",
+ __func__, sw_desc->async_tx.cookie, sw_desc->idx);
+
+ return cookie;
+}
+
+static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan);
+static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan);
+
+/**
+ * iop_adma_alloc_chan_resources - returns the number of allocated descriptors
+ * @chan: allocate descriptor resources for this channel
+ *
+ * Note: We keep the slots for 1 operation on iop_chan->chain at all times. To
+ * avoid deadlock, via async_xor, num_descs_in_pool must at a minimum be
+ * greater than 2x the number slots needed to satisfy a device->max_xor
+ * request.
+ * */
+static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
+{
+ char *hw_desc;
+ dma_addr_t dma_desc;
+ int idx;
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *slot = NULL;
+ int init = iop_chan->slots_allocated ? 0 : 1;
+ struct iop_adma_platform_data *plat_data =
+ dev_get_platdata(&iop_chan->device->pdev->dev);
+ int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE;
+
+ /* Allocate descriptor slots */
+ do {
+ idx = iop_chan->slots_allocated;
+ if (idx == num_descs_in_pool)
+ break;
+
+ slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+ if (!slot) {
+ printk(KERN_INFO "IOP ADMA Channel only initialized"
+ " %d descriptor slots", idx);
+ break;
+ }
+ hw_desc = (char *) iop_chan->device->dma_desc_pool_virt;
+ slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
+
+ dma_async_tx_descriptor_init(&slot->async_tx, chan);
+ slot->async_tx.tx_submit = iop_adma_tx_submit;
+ INIT_LIST_HEAD(&slot->tx_list);
+ INIT_LIST_HEAD(&slot->chain_node);
+ INIT_LIST_HEAD(&slot->slot_node);
+ dma_desc = iop_chan->device->dma_desc_pool;
+ slot->async_tx.phys = dma_desc + idx * IOP_ADMA_SLOT_SIZE;
+ slot->idx = idx;
+
+ spin_lock_bh(&iop_chan->lock);
+ iop_chan->slots_allocated++;
+ list_add_tail(&slot->slot_node, &iop_chan->all_slots);
+ spin_unlock_bh(&iop_chan->lock);
+ } while (iop_chan->slots_allocated < num_descs_in_pool);
+
+ if (idx && !iop_chan->last_used)
+ iop_chan->last_used = list_entry(iop_chan->all_slots.next,
+ struct iop_adma_desc_slot,
+ slot_node);
+
+ dev_dbg(iop_chan->device->common.dev,
+ "allocated %d descriptor slots last_used: %p\n",
+ iop_chan->slots_allocated, iop_chan->last_used);
+
+ /* initialize the channel and the chain with a null operation */
+ if (init) {
+ if (dma_has_cap(DMA_MEMCPY,
+ iop_chan->device->common.cap_mask))
+ iop_chan_start_null_memcpy(iop_chan);
+ else if (dma_has_cap(DMA_XOR,
+ iop_chan->device->common.cap_mask))
+ iop_chan_start_null_xor(iop_chan);
+ else
+ BUG();
+ }
+
+ return (idx > 0) ? idx : -ENOMEM;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ int slot_cnt, slots_per_op;
+
+ dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ iop_desc_init_interrupt(grp_start, iop_chan);
+ sw_desc->async_tx.flags = flags;
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ int slot_cnt, slots_per_op;
+
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT);
+
+ dev_dbg(iop_chan->device->common.dev, "%s len: %zu\n",
+ __func__, len);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ iop_desc_init_memcpy(grp_start, flags);
+ iop_desc_set_byte_count(grp_start, iop_chan, len);
+ iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+ iop_desc_set_memcpy_src_addr(grp_start, dma_src);
+ sw_desc->async_tx.flags = flags;
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
+ dma_addr_t *dma_src, unsigned int src_cnt, size_t len,
+ unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ int slot_cnt, slots_per_op;
+
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+ dev_dbg(iop_chan->device->common.dev,
+ "%s src_cnt: %d len: %zu flags: %lx\n",
+ __func__, src_cnt, len, flags);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ iop_desc_init_xor(grp_start, src_cnt, flags);
+ iop_desc_set_byte_count(grp_start, iop_chan, len);
+ iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+ sw_desc->async_tx.flags = flags;
+ while (src_cnt--)
+ iop_desc_set_xor_src_addr(grp_start, src_cnt,
+ dma_src[src_cnt]);
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
+ unsigned int src_cnt, size_t len, u32 *result,
+ unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ int slot_cnt, slots_per_op;
+
+ if (unlikely(!len))
+ return NULL;
+
+ dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %zu\n",
+ __func__, src_cnt, len);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ iop_desc_init_zero_sum(grp_start, src_cnt, flags);
+ iop_desc_set_zero_sum_byte_count(grp_start, len);
+ grp_start->xor_check_result = result;
+ pr_debug("\t%s: grp_start->xor_check_result: %p\n",
+ __func__, grp_start->xor_check_result);
+ sw_desc->async_tx.flags = flags;
+ while (src_cnt--)
+ iop_desc_set_zero_sum_src_addr(grp_start, src_cnt,
+ dma_src[src_cnt]);
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *g;
+ int slot_cnt, slots_per_op;
+ int continue_srcs;
+
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+ dev_dbg(iop_chan->device->common.dev,
+ "%s src_cnt: %d len: %zu flags: %lx\n",
+ __func__, src_cnt, len, flags);
+
+ if (dmaf_p_disabled_continue(flags))
+ continue_srcs = 1+src_cnt;
+ else if (dmaf_continue(flags))
+ continue_srcs = 3+src_cnt;
+ else
+ continue_srcs = 0+src_cnt;
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ int i;
+
+ g = sw_desc->group_head;
+ iop_desc_set_byte_count(g, iop_chan, len);
+
+ /* even if P is disabled its destination address (bits
+ * [3:0]) must match Q. It is ok if P points to an
+ * invalid address, it won't be written.
+ */
+ if (flags & DMA_PREP_PQ_DISABLE_P)
+ dst[0] = dst[1] & 0x7;
+
+ iop_desc_set_pq_addr(g, dst);
+ sw_desc->async_tx.flags = flags;
+ for (i = 0; i < src_cnt; i++)
+ iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
+
+ /* if we are continuing a previous operation factor in
+ * the old p and q values, see the comment for dma_maxpq
+ * in include/linux/dmaengine.h
+ */
+ if (dmaf_p_disabled_continue(flags))
+ iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+ else if (dmaf_continue(flags)) {
+ iop_desc_set_pq_src_addr(g, i++, dst[0], 0);
+ iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+ iop_desc_set_pq_src_addr(g, i++, dst[1], 0);
+ }
+ iop_desc_init_pq(g, i, flags);
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf,
+ size_t len, enum sum_check_flags *pqres,
+ unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *g;
+ int slot_cnt, slots_per_op;
+
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+ dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %zu\n",
+ __func__, src_cnt, len);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ /* for validate operations p and q are tagged onto the
+ * end of the source list
+ */
+ int pq_idx = src_cnt;
+
+ g = sw_desc->group_head;
+ iop_desc_init_pq_zero_sum(g, src_cnt+2, flags);
+ iop_desc_set_pq_zero_sum_byte_count(g, len);
+ g->pq_check_result = pqres;
+ pr_debug("\t%s: g->pq_check_result: %p\n",
+ __func__, g->pq_check_result);
+ sw_desc->async_tx.flags = flags;
+ while (src_cnt--)
+ iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
+ src[src_cnt],
+ scf[src_cnt]);
+ iop_desc_set_pq_zero_sum_addr(g, pq_idx, src);
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static void iop_adma_free_chan_resources(struct dma_chan *chan)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *iter, *_iter;
+ int in_use_descs = 0;
+
+ iop_adma_slot_cleanup(iop_chan);
+
+ spin_lock_bh(&iop_chan->lock);
+ list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
+ chain_node) {
+ in_use_descs++;
+ list_del(&iter->chain_node);
+ }
+ list_for_each_entry_safe_reverse(
+ iter, _iter, &iop_chan->all_slots, slot_node) {
+ list_del(&iter->slot_node);
+ kfree(iter);
+ iop_chan->slots_allocated--;
+ }
+ iop_chan->last_used = NULL;
+
+ dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n",
+ __func__, iop_chan->slots_allocated);
+ spin_unlock_bh(&iop_chan->lock);
+
+ /* one is ok since we left it on there on purpose */
+ if (in_use_descs > 1)
+ printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n",
+ in_use_descs - 1);
+}
+
+/**
+ * iop_adma_status - poll the status of an ADMA transaction
+ * @chan: ADMA channel handle
+ * @cookie: ADMA transaction identifier
+ * @txstate: a holder for the current state of the channel or NULL
+ */
+static enum dma_status iop_adma_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ int ret;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ iop_adma_slot_cleanup(iop_chan);
+
+ return dma_cookie_status(chan, cookie, txstate);
+}
+
+static irqreturn_t iop_adma_eot_handler(int irq, void *data)
+{
+ struct iop_adma_chan *chan = data;
+
+ dev_dbg(chan->device->common.dev, "%s\n", __func__);
+
+ tasklet_schedule(&chan->irq_tasklet);
+
+ iop_adma_device_clear_eot_status(chan);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t iop_adma_eoc_handler(int irq, void *data)
+{
+ struct iop_adma_chan *chan = data;
+
+ dev_dbg(chan->device->common.dev, "%s\n", __func__);
+
+ tasklet_schedule(&chan->irq_tasklet);
+
+ iop_adma_device_clear_eoc_status(chan);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t iop_adma_err_handler(int irq, void *data)
+{
+ struct iop_adma_chan *chan = data;
+ unsigned long status = iop_chan_get_status(chan);
+
+ dev_err(chan->device->common.dev,
+ "error ( %s%s%s%s%s%s%s)\n",
+ iop_is_err_int_parity(status, chan) ? "int_parity " : "",
+ iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "",
+ iop_is_err_int_tabort(status, chan) ? "int_tabort " : "",
+ iop_is_err_int_mabort(status, chan) ? "int_mabort " : "",
+ iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "",
+ iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "",
+ iop_is_err_split_tx(status, chan) ? "split_tx " : "");
+
+ iop_adma_device_clear_err_status(chan);
+
+ BUG();
+
+ return IRQ_HANDLED;
+}
+
+static void iop_adma_issue_pending(struct dma_chan *chan)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+
+ if (iop_chan->pending) {
+ iop_chan->pending = 0;
+ iop_chan_append(iop_chan);
+ }
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+#define IOP_ADMA_TEST_SIZE 2000
+
+static int iop_adma_memcpy_self_test(struct iop_adma_device *device)
+{
+ int i;
+ void *src, *dest;
+ dma_addr_t src_dma, dest_dma;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ struct dma_async_tx_descriptor *tx;
+ int err = 0;
+ struct iop_adma_chan *iop_chan;
+
+ dev_dbg(device->common.dev, "%s\n", __func__);
+
+ src = kmalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+ dest = kzalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL);
+ if (!dest) {
+ kfree(src);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffer */
+ for (i = 0; i < IOP_ADMA_TEST_SIZE; i++)
+ ((u8 *) src)[i] = (u8)i;
+
+ /* Start copy, using first DMA channel */
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dest_dma = dma_map_single(dma_chan->device->dev, dest,
+ IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
+ src_dma = dma_map_single(dma_chan->device->dev, src,
+ IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE);
+ tx = iop_adma_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+ IOP_ADMA_TEST_SIZE,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(1);
+
+ if (iop_adma_status(dma_chan, cookie, NULL) !=
+ DMA_COMPLETE) {
+ dev_err(dma_chan->device->dev,
+ "Self-test copy timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ iop_chan = to_iop_adma_chan(dma_chan);
+ dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
+ IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
+ if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) {
+ dev_err(dma_chan->device->dev,
+ "Self-test copy failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ iop_adma_free_chan_resources(dma_chan);
+out:
+ kfree(src);
+ kfree(dest);
+ return err;
+}
+
+#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
+static int
+iop_adma_xor_val_self_test(struct iop_adma_device *device)
+{
+ int i, src_idx;
+ struct page *dest;
+ struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST];
+ struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
+ dma_addr_t dma_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
+ dma_addr_t dest_dma;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u8 cmp_byte = 0;
+ u32 cmp_word;
+ u32 zero_sum_result;
+ int err = 0;
+ struct iop_adma_chan *iop_chan;
+
+ dev_dbg(device->common.dev, "%s\n", __func__);
+
+ for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
+ xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+ if (!xor_srcs[src_idx]) {
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+ }
+
+ dest = alloc_page(GFP_KERNEL);
+ if (!dest) {
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffers */
+ for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
+ u8 *ptr = page_address(xor_srcs[src_idx]);
+ for (i = 0; i < PAGE_SIZE; i++)
+ ptr[i] = (1 << src_idx);
+ }
+
+ for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++)
+ cmp_byte ^= (u8) (1 << src_idx);
+
+ cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+ (cmp_byte << 8) | cmp_byte;
+
+ memset(page_address(dest), 0, PAGE_SIZE);
+
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* test xor */
+ dest_dma = dma_map_page(dma_chan->device->dev, dest, 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+ dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+ 0, PAGE_SIZE, DMA_TO_DEVICE);
+ tx = iop_adma_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+ IOP_ADMA_NUM_SRC_TEST, PAGE_SIZE,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_status(dma_chan, cookie, NULL) !=
+ DMA_COMPLETE) {
+ dev_err(dma_chan->device->dev,
+ "Self-test xor timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ iop_chan = to_iop_adma_chan(dma_chan);
+ dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i] != cmp_word) {
+ dev_err(dma_chan->device->dev,
+ "Self-test xor failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+ dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma,
+ PAGE_SIZE, DMA_TO_DEVICE);
+
+ /* skip zero sum if the capability is not present */
+ if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
+ goto free_resources;
+
+ /* zero sum the sources with the destintation page */
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+ zero_sum_srcs[i] = xor_srcs[i];
+ zero_sum_srcs[i] = dest;
+
+ zero_sum_result = 1;
+
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++)
+ dma_srcs[i] = dma_map_page(dma_chan->device->dev,
+ zero_sum_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+ IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &zero_sum_result,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
+ dev_err(dma_chan->device->dev,
+ "Self-test zero sum timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (zero_sum_result != 0) {
+ dev_err(dma_chan->device->dev,
+ "Self-test zero sum failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ /* test for non-zero parity sum */
+ zero_sum_result = 0;
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++)
+ dma_srcs[i] = dma_map_page(dma_chan->device->dev,
+ zero_sum_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+ IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &zero_sum_result,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
+ dev_err(dma_chan->device->dev,
+ "Self-test non-zero sum timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (zero_sum_result != 1) {
+ dev_err(dma_chan->device->dev,
+ "Self-test non-zero sum failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ iop_adma_free_chan_resources(dma_chan);
+out:
+ src_idx = IOP_ADMA_NUM_SRC_TEST;
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ __free_page(dest);
+ return err;
+}
+
+#ifdef CONFIG_RAID6_PQ
+static int
+iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
+{
+ /* combined sources, software pq results, and extra hw pq results */
+ struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2];
+ /* ptr to the extra hw pq buffers defined above */
+ struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2];
+ /* address conversion buffers (dma_map / page_address) */
+ void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2];
+ dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST+2];
+ dma_addr_t *pq_dest = &pq_src[IOP_ADMA_NUM_SRC_TEST];
+
+ int i;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u32 zero_sum_result;
+ int err = 0;
+ struct device *dev;
+
+ dev_dbg(device->common.dev, "%s\n", __func__);
+
+ for (i = 0; i < ARRAY_SIZE(pq); i++) {
+ pq[i] = alloc_page(GFP_KERNEL);
+ if (!pq[i]) {
+ while (i--)
+ __free_page(pq[i]);
+ return -ENOMEM;
+ }
+ }
+
+ /* Fill in src buffers */
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
+ pq_sw[i] = page_address(pq[i]);
+ memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE);
+ }
+ pq_sw[i] = page_address(pq[i]);
+ pq_sw[i+1] = page_address(pq[i+1]);
+
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dev = dma_chan->device->dev;
+
+ /* initialize the dests */
+ memset(page_address(pq_hw[0]), 0 , PAGE_SIZE);
+ memset(page_address(pq_hw[1]), 0 , PAGE_SIZE);
+
+ /* test pq */
+ pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+ pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+
+ tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src,
+ IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp,
+ PAGE_SIZE,
+ DMA_PREP_INTERRUPT |
+ DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_status(dma_chan, cookie, NULL) !=
+ DMA_COMPLETE) {
+ dev_err(dev, "Self-test pq timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw);
+
+ if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST],
+ page_address(pq_hw[0]), PAGE_SIZE) != 0) {
+ dev_err(dev, "Self-test p failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1],
+ page_address(pq_hw[1]), PAGE_SIZE) != 0) {
+ dev_err(dev, "Self-test q failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ /* test correct zero sum using the software generated pq values */
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+ pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+
+ zero_sum_result = ~0;
+ tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+ pq_src, IOP_ADMA_NUM_SRC_TEST,
+ raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+ DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_status(dma_chan, cookie, NULL) !=
+ DMA_COMPLETE) {
+ dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (zero_sum_result != 0) {
+ dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n",
+ zero_sum_result);
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ /* test incorrect zero sum */
+ i = IOP_ADMA_NUM_SRC_TEST;
+ memset(pq_sw[i] + 100, 0, 100);
+ memset(pq_sw[i+1] + 200, 0, 200);
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+ pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+
+ zero_sum_result = 0;
+ tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+ pq_src, IOP_ADMA_NUM_SRC_TEST,
+ raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+ DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_status(dma_chan, cookie, NULL) !=
+ DMA_COMPLETE) {
+ dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) {
+ dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n",
+ zero_sum_result);
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ iop_adma_free_chan_resources(dma_chan);
+out:
+ i = ARRAY_SIZE(pq);
+ while (i--)
+ __free_page(pq[i]);
+ return err;
+}
+#endif
+
+static int iop_adma_remove(struct platform_device *dev)
+{
+ struct iop_adma_device *device = platform_get_drvdata(dev);
+ struct dma_chan *chan, *_chan;
+ struct iop_adma_chan *iop_chan;
+ struct iop_adma_platform_data *plat_data = dev_get_platdata(&dev->dev);
+
+ dma_async_device_unregister(&device->common);
+
+ dma_free_coherent(&dev->dev, plat_data->pool_size,
+ device->dma_desc_pool_virt, device->dma_desc_pool);
+
+ list_for_each_entry_safe(chan, _chan, &device->common.channels,
+ device_node) {
+ iop_chan = to_iop_adma_chan(chan);
+ list_del(&chan->device_node);
+ kfree(iop_chan);
+ }
+ kfree(device);
+
+ return 0;
+}
+
+static int iop_adma_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+ int ret = 0, i;
+ struct iop_adma_device *adev;
+ struct iop_adma_chan *iop_chan;
+ struct dma_device *dma_dev;
+ struct iop_adma_platform_data *plat_data = dev_get_platdata(&pdev->dev);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ if (!devm_request_mem_region(&pdev->dev, res->start,
+ resource_size(res), pdev->name))
+ return -EBUSY;
+
+ adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+ if (!adev)
+ return -ENOMEM;
+ dma_dev = &adev->common;
+
+ /* allocate coherent memory for hardware descriptors
+ * note: writecombine gives slightly better performance, but
+ * requires that we explicitly flush the writes
+ */
+ adev->dma_desc_pool_virt = dma_alloc_wc(&pdev->dev,
+ plat_data->pool_size,
+ &adev->dma_desc_pool,
+ GFP_KERNEL);
+ if (!adev->dma_desc_pool_virt) {
+ ret = -ENOMEM;
+ goto err_free_adev;
+ }
+
+ dev_dbg(&pdev->dev, "%s: allocated descriptor pool virt %p phys %pad\n",
+ __func__, adev->dma_desc_pool_virt, &adev->dma_desc_pool);
+
+ adev->id = plat_data->hw_id;
+
+ /* discover transaction capabilites from the platform data */
+ dma_dev->cap_mask = plat_data->cap_mask;
+
+ adev->pdev = pdev;
+ platform_set_drvdata(pdev, adev);
+
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ /* set base routines */
+ dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = iop_adma_free_chan_resources;
+ dma_dev->device_tx_status = iop_adma_status;
+ dma_dev->device_issue_pending = iop_adma_issue_pending;
+ dma_dev->dev = &pdev->dev;
+
+ /* set prep routines based on capability */
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy;
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ dma_dev->max_xor = iop_adma_get_max_xor();
+ dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
+ }
+ if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_xor_val =
+ iop_adma_prep_dma_xor_val;
+ if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+ dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0);
+ dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq;
+ }
+ if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_pq_val =
+ iop_adma_prep_dma_pq_val;
+ if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_interrupt =
+ iop_adma_prep_dma_interrupt;
+
+ iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL);
+ if (!iop_chan) {
+ ret = -ENOMEM;
+ goto err_free_dma;
+ }
+ iop_chan->device = adev;
+
+ iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start,
+ resource_size(res));
+ if (!iop_chan->mmr_base) {
+ ret = -ENOMEM;
+ goto err_free_iop_chan;
+ }
+ tasklet_setup(&iop_chan->irq_tasklet, iop_adma_tasklet);
+
+ /* clear errors before enabling interrupts */
+ iop_adma_device_clear_err_status(iop_chan);
+
+ for (i = 0; i < 3; i++) {
+ static const irq_handler_t handler[] = {
+ iop_adma_eot_handler,
+ iop_adma_eoc_handler,
+ iop_adma_err_handler
+ };
+ int irq = platform_get_irq(pdev, i);
+ if (irq < 0) {
+ ret = -ENXIO;
+ goto err_free_iop_chan;
+ } else {
+ ret = devm_request_irq(&pdev->dev, irq,
+ handler[i], 0, pdev->name, iop_chan);
+ if (ret)
+ goto err_free_iop_chan;
+ }
+ }
+
+ spin_lock_init(&iop_chan->lock);
+ INIT_LIST_HEAD(&iop_chan->chain);
+ INIT_LIST_HEAD(&iop_chan->all_slots);
+ iop_chan->common.device = dma_dev;
+ dma_cookie_init(&iop_chan->common);
+ list_add_tail(&iop_chan->common.device_node, &dma_dev->channels);
+
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+ ret = iop_adma_memcpy_self_test(adev);
+ dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+ if (ret)
+ goto err_free_iop_chan;
+ }
+
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ ret = iop_adma_xor_val_self_test(adev);
+ dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+ if (ret)
+ goto err_free_iop_chan;
+ }
+
+ if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) &&
+ dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) {
+ #ifdef CONFIG_RAID6_PQ
+ ret = iop_adma_pq_zero_sum_self_test(adev);
+ dev_dbg(&pdev->dev, "pq self test returned %d\n", ret);
+ #else
+ /* can not test raid6, so do not publish capability */
+ dma_cap_clear(DMA_PQ, dma_dev->cap_mask);
+ dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask);
+ ret = 0;
+ #endif
+ if (ret)
+ goto err_free_iop_chan;
+ }
+
+ dev_info(&pdev->dev, "Intel(R) IOP: ( %s%s%s%s%s%s)\n",
+ dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
+ dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
+ dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+ dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
+ dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+ dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+ dma_async_device_register(dma_dev);
+ goto out;
+
+ err_free_iop_chan:
+ kfree(iop_chan);
+ err_free_dma:
+ dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+ adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ err_free_adev:
+ kfree(adev);
+ out:
+ return ret;
+}
+
+static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
+{
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ dma_cookie_t cookie;
+ int slot_cnt, slots_per_op;
+
+ dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+
+ list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
+ async_tx_ack(&sw_desc->async_tx);
+ iop_desc_init_memcpy(grp_start, 0);
+ iop_desc_set_byte_count(grp_start, iop_chan, 0);
+ iop_desc_set_dest_addr(grp_start, iop_chan, 0);
+ iop_desc_set_memcpy_src_addr(grp_start, 0);
+
+ cookie = dma_cookie_assign(&sw_desc->async_tx);
+
+ /* initialize the completed cookie to be less than
+ * the most recently used cookie
+ */
+ iop_chan->common.completed_cookie = cookie - 1;
+
+ /* channel should not be busy */
+ BUG_ON(iop_chan_is_busy(iop_chan));
+
+ /* clear any prior error-status bits */
+ iop_adma_device_clear_err_status(iop_chan);
+
+ /* disable operation */
+ iop_chan_disable(iop_chan);
+
+ /* set the descriptor address */
+ iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
+
+ /* 1/ don't add pre-chained descriptors
+ * 2/ dummy read to flush next_desc write
+ */
+ BUG_ON(iop_desc_get_next_desc(sw_desc));
+
+ /* run the descriptor */
+ iop_chan_enable(iop_chan);
+ } else
+ dev_err(iop_chan->device->common.dev,
+ "failed to allocate null descriptor\n");
+ spin_unlock_bh(&iop_chan->lock);
+}
+
+static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
+{
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ dma_cookie_t cookie;
+ int slot_cnt, slots_per_op;
+
+ dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
+ async_tx_ack(&sw_desc->async_tx);
+ iop_desc_init_null_xor(grp_start, 2, 0);
+ iop_desc_set_byte_count(grp_start, iop_chan, 0);
+ iop_desc_set_dest_addr(grp_start, iop_chan, 0);
+ iop_desc_set_xor_src_addr(grp_start, 0, 0);
+ iop_desc_set_xor_src_addr(grp_start, 1, 0);
+
+ cookie = dma_cookie_assign(&sw_desc->async_tx);
+
+ /* initialize the completed cookie to be less than
+ * the most recently used cookie
+ */
+ iop_chan->common.completed_cookie = cookie - 1;
+
+ /* channel should not be busy */
+ BUG_ON(iop_chan_is_busy(iop_chan));
+
+ /* clear any prior error-status bits */
+ iop_adma_device_clear_err_status(iop_chan);
+
+ /* disable operation */
+ iop_chan_disable(iop_chan);
+
+ /* set the descriptor address */
+ iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
+
+ /* 1/ don't add pre-chained descriptors
+ * 2/ dummy read to flush next_desc write
+ */
+ BUG_ON(iop_desc_get_next_desc(sw_desc));
+
+ /* run the descriptor */
+ iop_chan_enable(iop_chan);
+ } else
+ dev_err(iop_chan->device->common.dev,
+ "failed to allocate null descriptor\n");
+ spin_unlock_bh(&iop_chan->lock);
+}
+
+static struct platform_driver iop_adma_driver = {
+ .probe = iop_adma_probe,
+ .remove = iop_adma_remove,
+ .driver = {
+ .name = "iop-adma",
+ },
+};
+
+module_platform_driver(iop_adma_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("IOP ADMA Engine Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:iop-adma");
diff --git a/drivers/dma/iop-adma.h b/drivers/dma/iop-adma.h
new file mode 100644
index 000000000..d44eabb6f
--- /dev/null
+++ b/drivers/dma/iop-adma.h
@@ -0,0 +1,914 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright © 2006, Intel Corporation.
+ */
+#ifndef _ADMA_H
+#define _ADMA_H
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/platform_data/dma-iop32x.h>
+
+/* Memory copy units */
+#define DMA_CCR(chan) (chan->mmr_base + 0x0)
+#define DMA_CSR(chan) (chan->mmr_base + 0x4)
+#define DMA_DAR(chan) (chan->mmr_base + 0xc)
+#define DMA_NDAR(chan) (chan->mmr_base + 0x10)
+#define DMA_PADR(chan) (chan->mmr_base + 0x14)
+#define DMA_PUADR(chan) (chan->mmr_base + 0x18)
+#define DMA_LADR(chan) (chan->mmr_base + 0x1c)
+#define DMA_BCR(chan) (chan->mmr_base + 0x20)
+#define DMA_DCR(chan) (chan->mmr_base + 0x24)
+
+/* Application accelerator unit */
+#define AAU_ACR(chan) (chan->mmr_base + 0x0)
+#define AAU_ASR(chan) (chan->mmr_base + 0x4)
+#define AAU_ADAR(chan) (chan->mmr_base + 0x8)
+#define AAU_ANDAR(chan) (chan->mmr_base + 0xc)
+#define AAU_SAR(src, chan) (chan->mmr_base + (0x10 + ((src) << 2)))
+#define AAU_DAR(chan) (chan->mmr_base + 0x20)
+#define AAU_ABCR(chan) (chan->mmr_base + 0x24)
+#define AAU_ADCR(chan) (chan->mmr_base + 0x28)
+#define AAU_SAR_EDCR(src_edc) (chan->mmr_base + (0x02c + ((src_edc-4) << 2)))
+#define AAU_EDCR0_IDX 8
+#define AAU_EDCR1_IDX 17
+#define AAU_EDCR2_IDX 26
+
+struct iop3xx_aau_desc_ctrl {
+ unsigned int int_en:1;
+ unsigned int blk1_cmd_ctrl:3;
+ unsigned int blk2_cmd_ctrl:3;
+ unsigned int blk3_cmd_ctrl:3;
+ unsigned int blk4_cmd_ctrl:3;
+ unsigned int blk5_cmd_ctrl:3;
+ unsigned int blk6_cmd_ctrl:3;
+ unsigned int blk7_cmd_ctrl:3;
+ unsigned int blk8_cmd_ctrl:3;
+ unsigned int blk_ctrl:2;
+ unsigned int dual_xor_en:1;
+ unsigned int tx_complete:1;
+ unsigned int zero_result_err:1;
+ unsigned int zero_result_en:1;
+ unsigned int dest_write_en:1;
+};
+
+struct iop3xx_aau_e_desc_ctrl {
+ unsigned int reserved:1;
+ unsigned int blk1_cmd_ctrl:3;
+ unsigned int blk2_cmd_ctrl:3;
+ unsigned int blk3_cmd_ctrl:3;
+ unsigned int blk4_cmd_ctrl:3;
+ unsigned int blk5_cmd_ctrl:3;
+ unsigned int blk6_cmd_ctrl:3;
+ unsigned int blk7_cmd_ctrl:3;
+ unsigned int blk8_cmd_ctrl:3;
+ unsigned int reserved2:7;
+};
+
+struct iop3xx_dma_desc_ctrl {
+ unsigned int pci_transaction:4;
+ unsigned int int_en:1;
+ unsigned int dac_cycle_en:1;
+ unsigned int mem_to_mem_en:1;
+ unsigned int crc_data_tx_en:1;
+ unsigned int crc_gen_en:1;
+ unsigned int crc_seed_dis:1;
+ unsigned int reserved:21;
+ unsigned int crc_tx_complete:1;
+};
+
+struct iop3xx_desc_dma {
+ u32 next_desc;
+ union {
+ u32 pci_src_addr;
+ u32 pci_dest_addr;
+ u32 src_addr;
+ };
+ union {
+ u32 upper_pci_src_addr;
+ u32 upper_pci_dest_addr;
+ };
+ union {
+ u32 local_pci_src_addr;
+ u32 local_pci_dest_addr;
+ u32 dest_addr;
+ };
+ u32 byte_count;
+ union {
+ u32 desc_ctrl;
+ struct iop3xx_dma_desc_ctrl desc_ctrl_field;
+ };
+ u32 crc_addr;
+};
+
+struct iop3xx_desc_aau {
+ u32 next_desc;
+ u32 src[4];
+ u32 dest_addr;
+ u32 byte_count;
+ union {
+ u32 desc_ctrl;
+ struct iop3xx_aau_desc_ctrl desc_ctrl_field;
+ };
+ union {
+ u32 src_addr;
+ u32 e_desc_ctrl;
+ struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field;
+ } src_edc[31];
+};
+
+struct iop3xx_aau_gfmr {
+ unsigned int gfmr1:8;
+ unsigned int gfmr2:8;
+ unsigned int gfmr3:8;
+ unsigned int gfmr4:8;
+};
+
+struct iop3xx_desc_pq_xor {
+ u32 next_desc;
+ u32 src[3];
+ union {
+ u32 data_mult1;
+ struct iop3xx_aau_gfmr data_mult1_field;
+ };
+ u32 dest_addr;
+ u32 byte_count;
+ union {
+ u32 desc_ctrl;
+ struct iop3xx_aau_desc_ctrl desc_ctrl_field;
+ };
+ union {
+ u32 src_addr;
+ u32 e_desc_ctrl;
+ struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field;
+ u32 data_multiplier;
+ struct iop3xx_aau_gfmr data_mult_field;
+ u32 reserved;
+ } src_edc_gfmr[19];
+};
+
+struct iop3xx_desc_dual_xor {
+ u32 next_desc;
+ u32 src0_addr;
+ u32 src1_addr;
+ u32 h_src_addr;
+ u32 d_src_addr;
+ u32 h_dest_addr;
+ u32 byte_count;
+ union {
+ u32 desc_ctrl;
+ struct iop3xx_aau_desc_ctrl desc_ctrl_field;
+ };
+ u32 d_dest_addr;
+};
+
+union iop3xx_desc {
+ struct iop3xx_desc_aau *aau;
+ struct iop3xx_desc_dma *dma;
+ struct iop3xx_desc_pq_xor *pq_xor;
+ struct iop3xx_desc_dual_xor *dual_xor;
+ void *ptr;
+};
+
+/* No support for p+q operations */
+static inline int
+iop_chan_pq_slot_count(size_t len, int src_cnt, int *slots_per_op)
+{
+ BUG();
+ return 0;
+}
+
+static inline void
+iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
+ unsigned long flags)
+{
+ BUG();
+}
+
+static inline void
+iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
+{
+ BUG();
+}
+
+static inline void
+iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
+ dma_addr_t addr, unsigned char coef)
+{
+ BUG();
+}
+
+static inline int
+iop_chan_pq_zero_sum_slot_count(size_t len, int src_cnt, int *slots_per_op)
+{
+ BUG();
+ return 0;
+}
+
+static inline void
+iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
+ unsigned long flags)
+{
+ BUG();
+}
+
+static inline void
+iop_desc_set_pq_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
+{
+ BUG();
+}
+
+#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
+
+static inline void
+iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
+ dma_addr_t *src)
+{
+ BUG();
+}
+
+static inline int iop_adma_get_max_xor(void)
+{
+ return 32;
+}
+
+static inline int iop_adma_get_max_pq(void)
+{
+ BUG();
+ return 0;
+}
+
+static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
+{
+ int id = chan->device->id;
+
+ switch (id) {
+ case DMA0_ID:
+ case DMA1_ID:
+ return __raw_readl(DMA_DAR(chan));
+ case AAU_ID:
+ return __raw_readl(AAU_ADAR(chan));
+ default:
+ BUG();
+ }
+ return 0;
+}
+
+static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan,
+ u32 next_desc_addr)
+{
+ int id = chan->device->id;
+
+ switch (id) {
+ case DMA0_ID:
+ case DMA1_ID:
+ __raw_writel(next_desc_addr, DMA_NDAR(chan));
+ break;
+ case AAU_ID:
+ __raw_writel(next_desc_addr, AAU_ANDAR(chan));
+ break;
+ }
+
+}
+
+#define IOP_ADMA_STATUS_BUSY (1 << 10)
+#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT (1024)
+#define IOP_ADMA_XOR_MAX_BYTE_COUNT (16 * 1024 * 1024)
+#define IOP_ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024)
+
+static inline int iop_chan_is_busy(struct iop_adma_chan *chan)
+{
+ u32 status = __raw_readl(DMA_CSR(chan));
+ return (status & IOP_ADMA_STATUS_BUSY) ? 1 : 0;
+}
+
+static inline int iop_desc_is_aligned(struct iop_adma_desc_slot *desc,
+ int num_slots)
+{
+ /* num_slots will only ever be 1, 2, 4, or 8 */
+ return (desc->idx & (num_slots - 1)) ? 0 : 1;
+}
+
+/* to do: support large (i.e. > hw max) buffer sizes */
+static inline int iop_chan_memcpy_slot_count(size_t len, int *slots_per_op)
+{
+ *slots_per_op = 1;
+ return 1;
+}
+
+/* to do: support large (i.e. > hw max) buffer sizes */
+static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op)
+{
+ *slots_per_op = 1;
+ return 1;
+}
+
+static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt,
+ int *slots_per_op)
+{
+ static const char slot_count_table[] = {
+ 1, 1, 1, 1, /* 01 - 04 */
+ 2, 2, 2, 2, /* 05 - 08 */
+ 4, 4, 4, 4, /* 09 - 12 */
+ 4, 4, 4, 4, /* 13 - 16 */
+ 8, 8, 8, 8, /* 17 - 20 */
+ 8, 8, 8, 8, /* 21 - 24 */
+ 8, 8, 8, 8, /* 25 - 28 */
+ 8, 8, 8, 8, /* 29 - 32 */
+ };
+ *slots_per_op = slot_count_table[src_cnt - 1];
+ return *slots_per_op;
+}
+
+static inline int
+iop_chan_interrupt_slot_count(int *slots_per_op, struct iop_adma_chan *chan)
+{
+ switch (chan->device->id) {
+ case DMA0_ID:
+ case DMA1_ID:
+ return iop_chan_memcpy_slot_count(0, slots_per_op);
+ case AAU_ID:
+ return iop3xx_aau_xor_slot_count(0, 2, slots_per_op);
+ default:
+ BUG();
+ }
+ return 0;
+}
+
+static inline int iop_chan_xor_slot_count(size_t len, int src_cnt,
+ int *slots_per_op)
+{
+ int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op);
+
+ if (len <= IOP_ADMA_XOR_MAX_BYTE_COUNT)
+ return slot_cnt;
+
+ len -= IOP_ADMA_XOR_MAX_BYTE_COUNT;
+ while (len > IOP_ADMA_XOR_MAX_BYTE_COUNT) {
+ len -= IOP_ADMA_XOR_MAX_BYTE_COUNT;
+ slot_cnt += *slots_per_op;
+ }
+
+ slot_cnt += *slots_per_op;
+
+ return slot_cnt;
+}
+
+/* zero sum on iop3xx is limited to 1k at a time so it requires multiple
+ * descriptors
+ */
+static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
+ int *slots_per_op)
+{
+ int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op);
+
+ if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT)
+ return slot_cnt;
+
+ len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
+ while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
+ len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
+ slot_cnt += *slots_per_op;
+ }
+
+ slot_cnt += *slots_per_op;
+
+ return slot_cnt;
+}
+
+static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
+ struct iop_adma_chan *chan)
+{
+ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
+
+ switch (chan->device->id) {
+ case DMA0_ID:
+ case DMA1_ID:
+ return hw_desc.dma->byte_count;
+ case AAU_ID:
+ return hw_desc.aau->byte_count;
+ default:
+ BUG();
+ }
+ return 0;
+}
+
+/* translate the src_idx to a descriptor word index */
+static inline int __desc_idx(int src_idx)
+{
+ static const int desc_idx_table[] = { 0, 0, 0, 0,
+ 0, 1, 2, 3,
+ 5, 6, 7, 8,
+ 9, 10, 11, 12,
+ 14, 15, 16, 17,
+ 18, 19, 20, 21,
+ 23, 24, 25, 26,
+ 27, 28, 29, 30,
+ };
+
+ return desc_idx_table[src_idx];
+}
+
+static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc,
+ struct iop_adma_chan *chan,
+ int src_idx)
+{
+ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
+
+ switch (chan->device->id) {
+ case DMA0_ID:
+ case DMA1_ID:
+ return hw_desc.dma->src_addr;
+ case AAU_ID:
+ break;
+ default:
+ BUG();
+ }
+
+ if (src_idx < 4)
+ return hw_desc.aau->src[src_idx];
+ else
+ return hw_desc.aau->src_edc[__desc_idx(src_idx)].src_addr;
+}
+
+static inline void iop3xx_aau_desc_set_src_addr(struct iop3xx_desc_aau *hw_desc,
+ int src_idx, dma_addr_t addr)
+{
+ if (src_idx < 4)
+ hw_desc->src[src_idx] = addr;
+ else
+ hw_desc->src_edc[__desc_idx(src_idx)].src_addr = addr;
+}
+
+static inline void
+iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, unsigned long flags)
+{
+ struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
+ union {
+ u32 value;
+ struct iop3xx_dma_desc_ctrl field;
+ } u_desc_ctrl;
+
+ u_desc_ctrl.value = 0;
+ u_desc_ctrl.field.mem_to_mem_en = 1;
+ u_desc_ctrl.field.pci_transaction = 0xe; /* memory read block */
+ u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+ hw_desc->desc_ctrl = u_desc_ctrl.value;
+ hw_desc->upper_pci_src_addr = 0;
+ hw_desc->crc_addr = 0;
+}
+
+static inline void
+iop_desc_init_memset(struct iop_adma_desc_slot *desc, unsigned long flags)
+{
+ struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
+ union {
+ u32 value;
+ struct iop3xx_aau_desc_ctrl field;
+ } u_desc_ctrl;
+
+ u_desc_ctrl.value = 0;
+ u_desc_ctrl.field.blk1_cmd_ctrl = 0x2; /* memory block fill */
+ u_desc_ctrl.field.dest_write_en = 1;
+ u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+ hw_desc->desc_ctrl = u_desc_ctrl.value;
+}
+
+static inline u32
+iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt,
+ unsigned long flags)
+{
+ int i, shift;
+ u32 edcr;
+ union {
+ u32 value;
+ struct iop3xx_aau_desc_ctrl field;
+ } u_desc_ctrl;
+
+ u_desc_ctrl.value = 0;
+ switch (src_cnt) {
+ case 25 ... 32:
+ u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
+ edcr = 0;
+ shift = 1;
+ for (i = 24; i < src_cnt; i++) {
+ edcr |= (1 << shift);
+ shift += 3;
+ }
+ hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = edcr;
+ src_cnt = 24;
+ fallthrough;
+ case 17 ... 24:
+ if (!u_desc_ctrl.field.blk_ctrl) {
+ hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
+ u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
+ }
+ edcr = 0;
+ shift = 1;
+ for (i = 16; i < src_cnt; i++) {
+ edcr |= (1 << shift);
+ shift += 3;
+ }
+ hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = edcr;
+ src_cnt = 16;
+ fallthrough;
+ case 9 ... 16:
+ if (!u_desc_ctrl.field.blk_ctrl)
+ u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */
+ edcr = 0;
+ shift = 1;
+ for (i = 8; i < src_cnt; i++) {
+ edcr |= (1 << shift);
+ shift += 3;
+ }
+ hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = edcr;
+ src_cnt = 8;
+ fallthrough;
+ case 2 ... 8:
+ shift = 1;
+ for (i = 0; i < src_cnt; i++) {
+ u_desc_ctrl.value |= (1 << shift);
+ shift += 3;
+ }
+
+ if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4)
+ u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */
+ }
+
+ u_desc_ctrl.field.dest_write_en = 1;
+ u_desc_ctrl.field.blk1_cmd_ctrl = 0x7; /* direct fill */
+ u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+ hw_desc->desc_ctrl = u_desc_ctrl.value;
+
+ return u_desc_ctrl.value;
+}
+
+static inline void
+iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt,
+ unsigned long flags)
+{
+ iop3xx_desc_init_xor(desc->hw_desc, src_cnt, flags);
+}
+
+/* return the number of operations */
+static inline int
+iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
+ unsigned long flags)
+{
+ int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
+ struct iop3xx_desc_aau *hw_desc, *prev_hw_desc, *iter;
+ union {
+ u32 value;
+ struct iop3xx_aau_desc_ctrl field;
+ } u_desc_ctrl;
+ int i, j;
+
+ hw_desc = desc->hw_desc;
+
+ for (i = 0, j = 0; (slot_cnt -= slots_per_op) >= 0;
+ i += slots_per_op, j++) {
+ iter = iop_hw_desc_slot_idx(hw_desc, i);
+ u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, flags);
+ u_desc_ctrl.field.dest_write_en = 0;
+ u_desc_ctrl.field.zero_result_en = 1;
+ u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+ iter->desc_ctrl = u_desc_ctrl.value;
+
+ /* for the subsequent descriptors preserve the store queue
+ * and chain them together
+ */
+ if (i) {
+ prev_hw_desc =
+ iop_hw_desc_slot_idx(hw_desc, i - slots_per_op);
+ prev_hw_desc->next_desc =
+ (u32) (desc->async_tx.phys + (i << 5));
+ }
+ }
+
+ return j;
+}
+
+static inline void
+iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt,
+ unsigned long flags)
+{
+ struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
+ union {
+ u32 value;
+ struct iop3xx_aau_desc_ctrl field;
+ } u_desc_ctrl;
+
+ u_desc_ctrl.value = 0;
+ switch (src_cnt) {
+ case 25 ... 32:
+ u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
+ hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
+ fallthrough;
+ case 17 ... 24:
+ if (!u_desc_ctrl.field.blk_ctrl) {
+ hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
+ u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
+ }
+ hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = 0;
+ fallthrough;
+ case 9 ... 16:
+ if (!u_desc_ctrl.field.blk_ctrl)
+ u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */
+ hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = 0;
+ fallthrough;
+ case 1 ... 8:
+ if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4)
+ u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */
+ }
+
+ u_desc_ctrl.field.dest_write_en = 0;
+ u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+ hw_desc->desc_ctrl = u_desc_ctrl.value;
+}
+
+static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
+ struct iop_adma_chan *chan,
+ u32 byte_count)
+{
+ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
+
+ switch (chan->device->id) {
+ case DMA0_ID:
+ case DMA1_ID:
+ hw_desc.dma->byte_count = byte_count;
+ break;
+ case AAU_ID:
+ hw_desc.aau->byte_count = byte_count;
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline void
+iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
+ struct iop_adma_chan *chan)
+{
+ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
+
+ switch (chan->device->id) {
+ case DMA0_ID:
+ case DMA1_ID:
+ iop_desc_init_memcpy(desc, 1);
+ hw_desc.dma->byte_count = 0;
+ hw_desc.dma->dest_addr = 0;
+ hw_desc.dma->src_addr = 0;
+ break;
+ case AAU_ID:
+ iop_desc_init_null_xor(desc, 2, 1);
+ hw_desc.aau->byte_count = 0;
+ hw_desc.aau->dest_addr = 0;
+ hw_desc.aau->src[0] = 0;
+ hw_desc.aau->src[1] = 0;
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline void
+iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
+{
+ int slots_per_op = desc->slots_per_op;
+ struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
+ int i = 0;
+
+ if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
+ hw_desc->byte_count = len;
+ } else {
+ do {
+ iter = iop_hw_desc_slot_idx(hw_desc, i);
+ iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
+ len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
+ i += slots_per_op;
+ } while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT);
+
+ iter = iop_hw_desc_slot_idx(hw_desc, i);
+ iter->byte_count = len;
+ }
+}
+
+static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
+ struct iop_adma_chan *chan,
+ dma_addr_t addr)
+{
+ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
+
+ switch (chan->device->id) {
+ case DMA0_ID:
+ case DMA1_ID:
+ hw_desc.dma->dest_addr = addr;
+ break;
+ case AAU_ID:
+ hw_desc.aau->dest_addr = addr;
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
+ dma_addr_t addr)
+{
+ struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
+ hw_desc->src_addr = addr;
+}
+
+static inline void
+iop_desc_set_zero_sum_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
+ dma_addr_t addr)
+{
+
+ struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
+ int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
+ int i;
+
+ for (i = 0; (slot_cnt -= slots_per_op) >= 0;
+ i += slots_per_op, addr += IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
+ iter = iop_hw_desc_slot_idx(hw_desc, i);
+ iop3xx_aau_desc_set_src_addr(iter, src_idx, addr);
+ }
+}
+
+static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
+ int src_idx, dma_addr_t addr)
+{
+
+ struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
+ int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
+ int i;
+
+ for (i = 0; (slot_cnt -= slots_per_op) >= 0;
+ i += slots_per_op, addr += IOP_ADMA_XOR_MAX_BYTE_COUNT) {
+ iter = iop_hw_desc_slot_idx(hw_desc, i);
+ iop3xx_aau_desc_set_src_addr(iter, src_idx, addr);
+ }
+}
+
+static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
+ u32 next_desc_addr)
+{
+ /* hw_desc->next_desc is the same location for all channels */
+ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
+
+ iop_paranoia(hw_desc.dma->next_desc);
+ hw_desc.dma->next_desc = next_desc_addr;
+}
+
+static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc)
+{
+ /* hw_desc->next_desc is the same location for all channels */
+ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
+ return hw_desc.dma->next_desc;
+}
+
+static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc)
+{
+ /* hw_desc->next_desc is the same location for all channels */
+ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
+ hw_desc.dma->next_desc = 0;
+}
+
+static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
+ u32 val)
+{
+ struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
+ hw_desc->src[0] = val;
+}
+
+static inline enum sum_check_flags
+iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
+{
+ struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
+ struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
+
+ iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
+ return desc_ctrl.zero_result_err << SUM_CHECK_P;
+}
+
+static inline void iop_chan_append(struct iop_adma_chan *chan)
+{
+ u32 dma_chan_ctrl;
+
+ dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
+ dma_chan_ctrl |= 0x2;
+ __raw_writel(dma_chan_ctrl, DMA_CCR(chan));
+}
+
+static inline u32 iop_chan_get_status(struct iop_adma_chan *chan)
+{
+ return __raw_readl(DMA_CSR(chan));
+}
+
+static inline void iop_chan_disable(struct iop_adma_chan *chan)
+{
+ u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
+ dma_chan_ctrl &= ~1;
+ __raw_writel(dma_chan_ctrl, DMA_CCR(chan));
+}
+
+static inline void iop_chan_enable(struct iop_adma_chan *chan)
+{
+ u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
+
+ dma_chan_ctrl |= 1;
+ __raw_writel(dma_chan_ctrl, DMA_CCR(chan));
+}
+
+static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan)
+{
+ u32 status = __raw_readl(DMA_CSR(chan));
+ status &= (1 << 9);
+ __raw_writel(status, DMA_CSR(chan));
+}
+
+static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan)
+{
+ u32 status = __raw_readl(DMA_CSR(chan));
+ status &= (1 << 8);
+ __raw_writel(status, DMA_CSR(chan));
+}
+
+static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan)
+{
+ u32 status = __raw_readl(DMA_CSR(chan));
+
+ switch (chan->device->id) {
+ case DMA0_ID:
+ case DMA1_ID:
+ status &= (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1);
+ break;
+ case AAU_ID:
+ status &= (1 << 5);
+ break;
+ default:
+ BUG();
+ }
+
+ __raw_writel(status, DMA_CSR(chan));
+}
+
+static inline int
+iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan)
+{
+ return 0;
+}
+
+static inline int
+iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan)
+{
+ return 0;
+}
+
+static inline int
+iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan)
+{
+ return 0;
+}
+
+static inline int
+iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan)
+{
+ return test_bit(5, &status);
+}
+
+static inline int
+iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan)
+{
+ switch (chan->device->id) {
+ case DMA0_ID:
+ case DMA1_ID:
+ return test_bit(2, &status);
+ default:
+ return 0;
+ }
+}
+
+static inline int
+iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan)
+{
+ switch (chan->device->id) {
+ case DMA0_ID:
+ case DMA1_ID:
+ return test_bit(3, &status);
+ default:
+ return 0;
+ }
+}
+
+static inline int
+iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan)
+{
+ switch (chan->device->id) {
+ case DMA0_ID:
+ case DMA1_ID:
+ return test_bit(1, &status);
+ default:
+ return 0;
+ }
+}
+#endif /* _ADMA_H */
diff --git a/drivers/dma/ipu/Makefile b/drivers/dma/ipu/Makefile
new file mode 100644
index 000000000..c79ff116d
--- /dev/null
+++ b/drivers/dma/ipu/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-y += ipu_irq.o ipu_idmac.o
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
new file mode 100644
index 000000000..baab1ca9f
--- /dev/null
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -0,0 +1,1802 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2008
+ * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
+ *
+ * Copyright (C) 2005-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/clk.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/dma/ipu-dma.h>
+
+#include "../dmaengine.h"
+#include "ipu_intern.h"
+
+#define FS_VF_IN_VALID 0x00000002
+#define FS_ENC_IN_VALID 0x00000001
+
+static int ipu_disable_channel(struct idmac *idmac, struct idmac_channel *ichan,
+ bool wait_for_stop);
+
+/*
+ * There can be only one, we could allocate it dynamically, but then we'd have
+ * to add an extra parameter to some functions, and use something as ugly as
+ * struct ipu *ipu = to_ipu(to_idmac(ichan->dma_chan.device));
+ * in the ISR
+ */
+static struct ipu ipu_data;
+
+#define to_ipu(id) container_of(id, struct ipu, idmac)
+
+static u32 __idmac_read_icreg(struct ipu *ipu, unsigned long reg)
+{
+ return __raw_readl(ipu->reg_ic + reg);
+}
+
+#define idmac_read_icreg(ipu, reg) __idmac_read_icreg(ipu, reg - IC_CONF)
+
+static void __idmac_write_icreg(struct ipu *ipu, u32 value, unsigned long reg)
+{
+ __raw_writel(value, ipu->reg_ic + reg);
+}
+
+#define idmac_write_icreg(ipu, v, reg) __idmac_write_icreg(ipu, v, reg - IC_CONF)
+
+static u32 idmac_read_ipureg(struct ipu *ipu, unsigned long reg)
+{
+ return __raw_readl(ipu->reg_ipu + reg);
+}
+
+static void idmac_write_ipureg(struct ipu *ipu, u32 value, unsigned long reg)
+{
+ __raw_writel(value, ipu->reg_ipu + reg);
+}
+
+/*****************************************************************************
+ * IPU / IC common functions
+ */
+static void dump_idmac_reg(struct ipu *ipu)
+{
+ dev_dbg(ipu->dev, "IDMAC_CONF 0x%x, IC_CONF 0x%x, IDMAC_CHA_EN 0x%x, "
+ "IDMAC_CHA_PRI 0x%x, IDMAC_CHA_BUSY 0x%x\n",
+ idmac_read_icreg(ipu, IDMAC_CONF),
+ idmac_read_icreg(ipu, IC_CONF),
+ idmac_read_icreg(ipu, IDMAC_CHA_EN),
+ idmac_read_icreg(ipu, IDMAC_CHA_PRI),
+ idmac_read_icreg(ipu, IDMAC_CHA_BUSY));
+ dev_dbg(ipu->dev, "BUF0_RDY 0x%x, BUF1_RDY 0x%x, CUR_BUF 0x%x, "
+ "DB_MODE 0x%x, TASKS_STAT 0x%x\n",
+ idmac_read_ipureg(ipu, IPU_CHA_BUF0_RDY),
+ idmac_read_ipureg(ipu, IPU_CHA_BUF1_RDY),
+ idmac_read_ipureg(ipu, IPU_CHA_CUR_BUF),
+ idmac_read_ipureg(ipu, IPU_CHA_DB_MODE_SEL),
+ idmac_read_ipureg(ipu, IPU_TASKS_STAT));
+}
+
+static uint32_t bytes_per_pixel(enum pixel_fmt fmt)
+{
+ switch (fmt) {
+ case IPU_PIX_FMT_GENERIC: /* generic data */
+ case IPU_PIX_FMT_RGB332:
+ case IPU_PIX_FMT_YUV420P:
+ case IPU_PIX_FMT_YUV422P:
+ default:
+ return 1;
+ case IPU_PIX_FMT_RGB565:
+ case IPU_PIX_FMT_YUYV:
+ case IPU_PIX_FMT_UYVY:
+ return 2;
+ case IPU_PIX_FMT_BGR24:
+ case IPU_PIX_FMT_RGB24:
+ return 3;
+ case IPU_PIX_FMT_GENERIC_32: /* generic data */
+ case IPU_PIX_FMT_BGR32:
+ case IPU_PIX_FMT_RGB32:
+ case IPU_PIX_FMT_ABGR32:
+ return 4;
+ }
+}
+
+/* Enable direct write to memory by the Camera Sensor Interface */
+static void ipu_ic_enable_task(struct ipu *ipu, enum ipu_channel channel)
+{
+ uint32_t ic_conf, mask;
+
+ switch (channel) {
+ case IDMAC_IC_0:
+ mask = IC_CONF_PRPENC_EN;
+ break;
+ case IDMAC_IC_7:
+ mask = IC_CONF_RWS_EN | IC_CONF_PRPENC_EN;
+ break;
+ default:
+ return;
+ }
+ ic_conf = idmac_read_icreg(ipu, IC_CONF) | mask;
+ idmac_write_icreg(ipu, ic_conf, IC_CONF);
+}
+
+/* Called under spin_lock_irqsave(&ipu_data.lock) */
+static void ipu_ic_disable_task(struct ipu *ipu, enum ipu_channel channel)
+{
+ uint32_t ic_conf, mask;
+
+ switch (channel) {
+ case IDMAC_IC_0:
+ mask = IC_CONF_PRPENC_EN;
+ break;
+ case IDMAC_IC_7:
+ mask = IC_CONF_RWS_EN | IC_CONF_PRPENC_EN;
+ break;
+ default:
+ return;
+ }
+ ic_conf = idmac_read_icreg(ipu, IC_CONF) & ~mask;
+ idmac_write_icreg(ipu, ic_conf, IC_CONF);
+}
+
+static uint32_t ipu_channel_status(struct ipu *ipu, enum ipu_channel channel)
+{
+ uint32_t stat = TASK_STAT_IDLE;
+ uint32_t task_stat_reg = idmac_read_ipureg(ipu, IPU_TASKS_STAT);
+
+ switch (channel) {
+ case IDMAC_IC_7:
+ stat = (task_stat_reg & TSTAT_CSI2MEM_MASK) >>
+ TSTAT_CSI2MEM_OFFSET;
+ break;
+ case IDMAC_IC_0:
+ case IDMAC_SDC_0:
+ case IDMAC_SDC_1:
+ default:
+ break;
+ }
+ return stat;
+}
+
+struct chan_param_mem_planar {
+ /* Word 0 */
+ u32 xv:10;
+ u32 yv:10;
+ u32 xb:12;
+
+ u32 yb:12;
+ u32 res1:2;
+ u32 nsb:1;
+ u32 lnpb:6;
+ u32 ubo_l:11;
+
+ u32 ubo_h:15;
+ u32 vbo_l:17;
+
+ u32 vbo_h:9;
+ u32 res2:3;
+ u32 fw:12;
+ u32 fh_l:8;
+
+ u32 fh_h:4;
+ u32 res3:28;
+
+ /* Word 1 */
+ u32 eba0;
+
+ u32 eba1;
+
+ u32 bpp:3;
+ u32 sl:14;
+ u32 pfs:3;
+ u32 bam:3;
+ u32 res4:2;
+ u32 npb:6;
+ u32 res5:1;
+
+ u32 sat:2;
+ u32 res6:30;
+} __attribute__ ((packed));
+
+struct chan_param_mem_interleaved {
+ /* Word 0 */
+ u32 xv:10;
+ u32 yv:10;
+ u32 xb:12;
+
+ u32 yb:12;
+ u32 sce:1;
+ u32 res1:1;
+ u32 nsb:1;
+ u32 lnpb:6;
+ u32 sx:10;
+ u32 sy_l:1;
+
+ u32 sy_h:9;
+ u32 ns:10;
+ u32 sm:10;
+ u32 sdx_l:3;
+
+ u32 sdx_h:2;
+ u32 sdy:5;
+ u32 sdrx:1;
+ u32 sdry:1;
+ u32 sdr1:1;
+ u32 res2:2;
+ u32 fw:12;
+ u32 fh_l:8;
+
+ u32 fh_h:4;
+ u32 res3:28;
+
+ /* Word 1 */
+ u32 eba0;
+
+ u32 eba1;
+
+ u32 bpp:3;
+ u32 sl:14;
+ u32 pfs:3;
+ u32 bam:3;
+ u32 res4:2;
+ u32 npb:6;
+ u32 res5:1;
+
+ u32 sat:2;
+ u32 scc:1;
+ u32 ofs0:5;
+ u32 ofs1:5;
+ u32 ofs2:5;
+ u32 ofs3:5;
+ u32 wid0:3;
+ u32 wid1:3;
+ u32 wid2:3;
+
+ u32 wid3:3;
+ u32 dec_sel:1;
+ u32 res6:28;
+} __attribute__ ((packed));
+
+union chan_param_mem {
+ struct chan_param_mem_planar pp;
+ struct chan_param_mem_interleaved ip;
+};
+
+static void ipu_ch_param_set_plane_offset(union chan_param_mem *params,
+ u32 u_offset, u32 v_offset)
+{
+ params->pp.ubo_l = u_offset & 0x7ff;
+ params->pp.ubo_h = u_offset >> 11;
+ params->pp.vbo_l = v_offset & 0x1ffff;
+ params->pp.vbo_h = v_offset >> 17;
+}
+
+static void ipu_ch_param_set_size(union chan_param_mem *params,
+ uint32_t pixel_fmt, uint16_t width,
+ uint16_t height, uint16_t stride)
+{
+ u32 u_offset;
+ u32 v_offset;
+
+ params->pp.fw = width - 1;
+ params->pp.fh_l = height - 1;
+ params->pp.fh_h = (height - 1) >> 8;
+ params->pp.sl = stride - 1;
+
+ switch (pixel_fmt) {
+ case IPU_PIX_FMT_GENERIC:
+ /*Represents 8-bit Generic data */
+ params->pp.bpp = 3;
+ params->pp.pfs = 7;
+ params->pp.npb = 31;
+ params->pp.sat = 2; /* SAT = use 32-bit access */
+ break;
+ case IPU_PIX_FMT_GENERIC_32:
+ /*Represents 32-bit Generic data */
+ params->pp.bpp = 0;
+ params->pp.pfs = 7;
+ params->pp.npb = 7;
+ params->pp.sat = 2; /* SAT = use 32-bit access */
+ break;
+ case IPU_PIX_FMT_RGB565:
+ params->ip.bpp = 2;
+ params->ip.pfs = 4;
+ params->ip.npb = 15;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ params->ip.ofs0 = 0; /* Red bit offset */
+ params->ip.ofs1 = 5; /* Green bit offset */
+ params->ip.ofs2 = 11; /* Blue bit offset */
+ params->ip.ofs3 = 16; /* Alpha bit offset */
+ params->ip.wid0 = 4; /* Red bit width - 1 */
+ params->ip.wid1 = 5; /* Green bit width - 1 */
+ params->ip.wid2 = 4; /* Blue bit width - 1 */
+ break;
+ case IPU_PIX_FMT_BGR24:
+ params->ip.bpp = 1; /* 24 BPP & RGB PFS */
+ params->ip.pfs = 4;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ params->ip.ofs0 = 0; /* Red bit offset */
+ params->ip.ofs1 = 8; /* Green bit offset */
+ params->ip.ofs2 = 16; /* Blue bit offset */
+ params->ip.ofs3 = 24; /* Alpha bit offset */
+ params->ip.wid0 = 7; /* Red bit width - 1 */
+ params->ip.wid1 = 7; /* Green bit width - 1 */
+ params->ip.wid2 = 7; /* Blue bit width - 1 */
+ break;
+ case IPU_PIX_FMT_RGB24:
+ params->ip.bpp = 1; /* 24 BPP & RGB PFS */
+ params->ip.pfs = 4;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ params->ip.ofs0 = 16; /* Red bit offset */
+ params->ip.ofs1 = 8; /* Green bit offset */
+ params->ip.ofs2 = 0; /* Blue bit offset */
+ params->ip.ofs3 = 24; /* Alpha bit offset */
+ params->ip.wid0 = 7; /* Red bit width - 1 */
+ params->ip.wid1 = 7; /* Green bit width - 1 */
+ params->ip.wid2 = 7; /* Blue bit width - 1 */
+ break;
+ case IPU_PIX_FMT_BGRA32:
+ case IPU_PIX_FMT_BGR32:
+ case IPU_PIX_FMT_ABGR32:
+ params->ip.bpp = 0;
+ params->ip.pfs = 4;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ params->ip.ofs0 = 8; /* Red bit offset */
+ params->ip.ofs1 = 16; /* Green bit offset */
+ params->ip.ofs2 = 24; /* Blue bit offset */
+ params->ip.ofs3 = 0; /* Alpha bit offset */
+ params->ip.wid0 = 7; /* Red bit width - 1 */
+ params->ip.wid1 = 7; /* Green bit width - 1 */
+ params->ip.wid2 = 7; /* Blue bit width - 1 */
+ params->ip.wid3 = 7; /* Alpha bit width - 1 */
+ break;
+ case IPU_PIX_FMT_RGBA32:
+ case IPU_PIX_FMT_RGB32:
+ params->ip.bpp = 0;
+ params->ip.pfs = 4;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ params->ip.ofs0 = 24; /* Red bit offset */
+ params->ip.ofs1 = 16; /* Green bit offset */
+ params->ip.ofs2 = 8; /* Blue bit offset */
+ params->ip.ofs3 = 0; /* Alpha bit offset */
+ params->ip.wid0 = 7; /* Red bit width - 1 */
+ params->ip.wid1 = 7; /* Green bit width - 1 */
+ params->ip.wid2 = 7; /* Blue bit width - 1 */
+ params->ip.wid3 = 7; /* Alpha bit width - 1 */
+ break;
+ case IPU_PIX_FMT_UYVY:
+ params->ip.bpp = 2;
+ params->ip.pfs = 6;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ break;
+ case IPU_PIX_FMT_YUV420P2:
+ case IPU_PIX_FMT_YUV420P:
+ params->ip.bpp = 3;
+ params->ip.pfs = 3;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ u_offset = stride * height;
+ v_offset = u_offset + u_offset / 4;
+ ipu_ch_param_set_plane_offset(params, u_offset, v_offset);
+ break;
+ case IPU_PIX_FMT_YVU422P:
+ params->ip.bpp = 3;
+ params->ip.pfs = 2;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ v_offset = stride * height;
+ u_offset = v_offset + v_offset / 2;
+ ipu_ch_param_set_plane_offset(params, u_offset, v_offset);
+ break;
+ case IPU_PIX_FMT_YUV422P:
+ params->ip.bpp = 3;
+ params->ip.pfs = 2;
+ params->ip.npb = 7;
+ params->ip.sat = 2; /* SAT = 32-bit access */
+ u_offset = stride * height;
+ v_offset = u_offset + u_offset / 2;
+ ipu_ch_param_set_plane_offset(params, u_offset, v_offset);
+ break;
+ default:
+ dev_err(ipu_data.dev,
+ "mx3 ipu: unimplemented pixel format %d\n", pixel_fmt);
+ break;
+ }
+
+ params->pp.nsb = 1;
+}
+
+static void ipu_ch_param_set_buffer(union chan_param_mem *params,
+ dma_addr_t buf0, dma_addr_t buf1)
+{
+ params->pp.eba0 = buf0;
+ params->pp.eba1 = buf1;
+}
+
+static void ipu_ch_param_set_rotation(union chan_param_mem *params,
+ enum ipu_rotate_mode rotate)
+{
+ params->pp.bam = rotate;
+}
+
+static void ipu_write_param_mem(uint32_t addr, uint32_t *data,
+ uint32_t num_words)
+{
+ for (; num_words > 0; num_words--) {
+ dev_dbg(ipu_data.dev,
+ "write param mem - addr = 0x%08X, data = 0x%08X\n",
+ addr, *data);
+ idmac_write_ipureg(&ipu_data, addr, IPU_IMA_ADDR);
+ idmac_write_ipureg(&ipu_data, *data++, IPU_IMA_DATA);
+ addr++;
+ if ((addr & 0x7) == 5) {
+ addr &= ~0x7; /* set to word 0 */
+ addr += 8; /* increment to next row */
+ }
+ }
+}
+
+static int calc_resize_coeffs(uint32_t in_size, uint32_t out_size,
+ uint32_t *resize_coeff,
+ uint32_t *downsize_coeff)
+{
+ uint32_t temp_size;
+ uint32_t temp_downsize;
+
+ *resize_coeff = 1 << 13;
+ *downsize_coeff = 1 << 13;
+
+ /* Cannot downsize more than 8:1 */
+ if (out_size << 3 < in_size)
+ return -EINVAL;
+
+ /* compute downsizing coefficient */
+ temp_downsize = 0;
+ temp_size = in_size;
+ while (temp_size >= out_size * 2 && temp_downsize < 2) {
+ temp_size >>= 1;
+ temp_downsize++;
+ }
+ *downsize_coeff = temp_downsize;
+
+ /*
+ * compute resizing coefficient using the following formula:
+ * resize_coeff = M*(SI -1)/(SO - 1)
+ * where M = 2^13, SI - input size, SO - output size
+ */
+ *resize_coeff = (8192L * (temp_size - 1)) / (out_size - 1);
+ if (*resize_coeff >= 16384L) {
+ dev_err(ipu_data.dev, "Warning! Overflow on resize coeff.\n");
+ *resize_coeff = 0x3FFF;
+ }
+
+ dev_dbg(ipu_data.dev, "resizing from %u -> %u pixels, "
+ "downsize=%u, resize=%u.%lu (reg=%u)\n", in_size, out_size,
+ *downsize_coeff, *resize_coeff >= 8192L ? 1 : 0,
+ ((*resize_coeff & 0x1FFF) * 10000L) / 8192L, *resize_coeff);
+
+ return 0;
+}
+
+static enum ipu_color_space format_to_colorspace(enum pixel_fmt fmt)
+{
+ switch (fmt) {
+ case IPU_PIX_FMT_RGB565:
+ case IPU_PIX_FMT_BGR24:
+ case IPU_PIX_FMT_RGB24:
+ case IPU_PIX_FMT_BGR32:
+ case IPU_PIX_FMT_RGB32:
+ return IPU_COLORSPACE_RGB;
+ default:
+ return IPU_COLORSPACE_YCBCR;
+ }
+}
+
+static int ipu_ic_init_prpenc(struct ipu *ipu,
+ union ipu_channel_param *params, bool src_is_csi)
+{
+ uint32_t reg, ic_conf;
+ uint32_t downsize_coeff, resize_coeff;
+ enum ipu_color_space in_fmt, out_fmt;
+
+ /* Setup vertical resizing */
+ calc_resize_coeffs(params->video.in_height,
+ params->video.out_height,
+ &resize_coeff, &downsize_coeff);
+ reg = (downsize_coeff << 30) | (resize_coeff << 16);
+
+ /* Setup horizontal resizing */
+ calc_resize_coeffs(params->video.in_width,
+ params->video.out_width,
+ &resize_coeff, &downsize_coeff);
+ reg |= (downsize_coeff << 14) | resize_coeff;
+
+ /* Setup color space conversion */
+ in_fmt = format_to_colorspace(params->video.in_pixel_fmt);
+ out_fmt = format_to_colorspace(params->video.out_pixel_fmt);
+
+ /*
+ * Colourspace conversion unsupported yet - see _init_csc() in
+ * Freescale sources
+ */
+ if (in_fmt != out_fmt) {
+ dev_err(ipu->dev, "Colourspace conversion unsupported!\n");
+ return -EOPNOTSUPP;
+ }
+
+ idmac_write_icreg(ipu, reg, IC_PRP_ENC_RSC);
+
+ ic_conf = idmac_read_icreg(ipu, IC_CONF);
+
+ if (src_is_csi)
+ ic_conf &= ~IC_CONF_RWS_EN;
+ else
+ ic_conf |= IC_CONF_RWS_EN;
+
+ idmac_write_icreg(ipu, ic_conf, IC_CONF);
+
+ return 0;
+}
+
+static uint32_t dma_param_addr(uint32_t dma_ch)
+{
+ /* Channel Parameter Memory */
+ return 0x10000 | (dma_ch << 4);
+}
+
+static void ipu_channel_set_priority(struct ipu *ipu, enum ipu_channel channel,
+ bool prio)
+{
+ u32 reg = idmac_read_icreg(ipu, IDMAC_CHA_PRI);
+
+ if (prio)
+ reg |= 1UL << channel;
+ else
+ reg &= ~(1UL << channel);
+
+ idmac_write_icreg(ipu, reg, IDMAC_CHA_PRI);
+
+ dump_idmac_reg(ipu);
+}
+
+static uint32_t ipu_channel_conf_mask(enum ipu_channel channel)
+{
+ uint32_t mask;
+
+ switch (channel) {
+ case IDMAC_IC_0:
+ case IDMAC_IC_7:
+ mask = IPU_CONF_CSI_EN | IPU_CONF_IC_EN;
+ break;
+ case IDMAC_SDC_0:
+ case IDMAC_SDC_1:
+ mask = IPU_CONF_SDC_EN | IPU_CONF_DI_EN;
+ break;
+ default:
+ mask = 0;
+ break;
+ }
+
+ return mask;
+}
+
+/**
+ * ipu_enable_channel() - enable an IPU channel.
+ * @idmac: IPU DMAC context.
+ * @ichan: IDMAC channel.
+ * @return: 0 on success or negative error code on failure.
+ */
+static int ipu_enable_channel(struct idmac *idmac, struct idmac_channel *ichan)
+{
+ struct ipu *ipu = to_ipu(idmac);
+ enum ipu_channel channel = ichan->dma_chan.chan_id;
+ uint32_t reg;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipu->lock, flags);
+
+ /* Reset to buffer 0 */
+ idmac_write_ipureg(ipu, 1UL << channel, IPU_CHA_CUR_BUF);
+ ichan->active_buffer = 0;
+ ichan->status = IPU_CHANNEL_ENABLED;
+
+ switch (channel) {
+ case IDMAC_SDC_0:
+ case IDMAC_SDC_1:
+ case IDMAC_IC_7:
+ ipu_channel_set_priority(ipu, channel, true);
+ break;
+ default:
+ break;
+ }
+
+ reg = idmac_read_icreg(ipu, IDMAC_CHA_EN);
+
+ idmac_write_icreg(ipu, reg | (1UL << channel), IDMAC_CHA_EN);
+
+ ipu_ic_enable_task(ipu, channel);
+
+ spin_unlock_irqrestore(&ipu->lock, flags);
+ return 0;
+}
+
+/**
+ * ipu_init_channel_buffer() - initialize a buffer for logical IPU channel.
+ * @ichan: IDMAC channel.
+ * @pixel_fmt: pixel format of buffer. Pixel format is a FOURCC ASCII code.
+ * @width: width of buffer in pixels.
+ * @height: height of buffer in pixels.
+ * @stride: stride length of buffer in pixels.
+ * @rot_mode: rotation mode of buffer. A rotation setting other than
+ * IPU_ROTATE_VERT_FLIP should only be used for input buffers of
+ * rotation channels.
+ * @phyaddr_0: buffer 0 physical address.
+ * @phyaddr_1: buffer 1 physical address. Setting this to a value other than
+ * NULL enables double buffering mode.
+ * @return: 0 on success or negative error code on failure.
+ */
+static int ipu_init_channel_buffer(struct idmac_channel *ichan,
+ enum pixel_fmt pixel_fmt,
+ uint16_t width, uint16_t height,
+ uint32_t stride,
+ enum ipu_rotate_mode rot_mode,
+ dma_addr_t phyaddr_0, dma_addr_t phyaddr_1)
+{
+ enum ipu_channel channel = ichan->dma_chan.chan_id;
+ struct idmac *idmac = to_idmac(ichan->dma_chan.device);
+ struct ipu *ipu = to_ipu(idmac);
+ union chan_param_mem params = {};
+ unsigned long flags;
+ uint32_t reg;
+ uint32_t stride_bytes;
+
+ stride_bytes = stride * bytes_per_pixel(pixel_fmt);
+
+ if (stride_bytes % 4) {
+ dev_err(ipu->dev,
+ "Stride length must be 32-bit aligned, stride = %d, bytes = %d\n",
+ stride, stride_bytes);
+ return -EINVAL;
+ }
+
+ /* IC channel's stride must be a multiple of 8 pixels */
+ if ((channel <= IDMAC_IC_13) && (stride % 8)) {
+ dev_err(ipu->dev, "Stride must be 8 pixel multiple\n");
+ return -EINVAL;
+ }
+
+ /* Build parameter memory data for DMA channel */
+ ipu_ch_param_set_size(&params, pixel_fmt, width, height, stride_bytes);
+ ipu_ch_param_set_buffer(&params, phyaddr_0, phyaddr_1);
+ ipu_ch_param_set_rotation(&params, rot_mode);
+
+ spin_lock_irqsave(&ipu->lock, flags);
+
+ ipu_write_param_mem(dma_param_addr(channel), (uint32_t *)&params, 10);
+
+ reg = idmac_read_ipureg(ipu, IPU_CHA_DB_MODE_SEL);
+
+ if (phyaddr_1)
+ reg |= 1UL << channel;
+ else
+ reg &= ~(1UL << channel);
+
+ idmac_write_ipureg(ipu, reg, IPU_CHA_DB_MODE_SEL);
+
+ ichan->status = IPU_CHANNEL_READY;
+
+ spin_unlock_irqrestore(&ipu->lock, flags);
+
+ return 0;
+}
+
+/**
+ * ipu_select_buffer() - mark a channel's buffer as ready.
+ * @channel: channel ID.
+ * @buffer_n: buffer number to mark ready.
+ */
+static void ipu_select_buffer(enum ipu_channel channel, int buffer_n)
+{
+ /* No locking - this is a write-one-to-set register, cleared by IPU */
+ if (buffer_n == 0)
+ /* Mark buffer 0 as ready. */
+ idmac_write_ipureg(&ipu_data, 1UL << channel, IPU_CHA_BUF0_RDY);
+ else
+ /* Mark buffer 1 as ready. */
+ idmac_write_ipureg(&ipu_data, 1UL << channel, IPU_CHA_BUF1_RDY);
+}
+
+/**
+ * ipu_update_channel_buffer() - update physical address of a channel buffer.
+ * @ichan: IDMAC channel.
+ * @buffer_n: buffer number to update.
+ * 0 or 1 are the only valid values.
+ * @phyaddr: buffer physical address.
+ */
+/* Called under spin_lock(_irqsave)(&ichan->lock) */
+static void ipu_update_channel_buffer(struct idmac_channel *ichan,
+ int buffer_n, dma_addr_t phyaddr)
+{
+ enum ipu_channel channel = ichan->dma_chan.chan_id;
+ uint32_t reg;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipu_data.lock, flags);
+
+ if (buffer_n == 0) {
+ reg = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF0_RDY);
+ if (reg & (1UL << channel)) {
+ ipu_ic_disable_task(&ipu_data, channel);
+ ichan->status = IPU_CHANNEL_READY;
+ }
+
+ /* 44.3.3.1.9 - Row Number 1 (WORD1, offset 0) */
+ idmac_write_ipureg(&ipu_data, dma_param_addr(channel) +
+ 0x0008UL, IPU_IMA_ADDR);
+ idmac_write_ipureg(&ipu_data, phyaddr, IPU_IMA_DATA);
+ } else {
+ reg = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF1_RDY);
+ if (reg & (1UL << channel)) {
+ ipu_ic_disable_task(&ipu_data, channel);
+ ichan->status = IPU_CHANNEL_READY;
+ }
+
+ /* Check if double-buffering is already enabled */
+ reg = idmac_read_ipureg(&ipu_data, IPU_CHA_DB_MODE_SEL);
+
+ if (!(reg & (1UL << channel)))
+ idmac_write_ipureg(&ipu_data, reg | (1UL << channel),
+ IPU_CHA_DB_MODE_SEL);
+
+ /* 44.3.3.1.9 - Row Number 1 (WORD1, offset 1) */
+ idmac_write_ipureg(&ipu_data, dma_param_addr(channel) +
+ 0x0009UL, IPU_IMA_ADDR);
+ idmac_write_ipureg(&ipu_data, phyaddr, IPU_IMA_DATA);
+ }
+
+ spin_unlock_irqrestore(&ipu_data.lock, flags);
+}
+
+/* Called under spin_lock_irqsave(&ichan->lock) */
+static int ipu_submit_buffer(struct idmac_channel *ichan,
+ struct idmac_tx_desc *desc, struct scatterlist *sg, int buf_idx)
+{
+ unsigned int chan_id = ichan->dma_chan.chan_id;
+ struct device *dev = &ichan->dma_chan.dev->device;
+
+ if (async_tx_test_ack(&desc->txd))
+ return -EINTR;
+
+ /*
+ * On first invocation this shouldn't be necessary, the call to
+ * ipu_init_channel_buffer() above will set addresses for us, so we
+ * could make it conditional on status >= IPU_CHANNEL_ENABLED, but
+ * doing it again shouldn't hurt either.
+ */
+ ipu_update_channel_buffer(ichan, buf_idx, sg_dma_address(sg));
+
+ ipu_select_buffer(chan_id, buf_idx);
+ dev_dbg(dev, "Updated sg %p on channel 0x%x buffer %d\n",
+ sg, chan_id, buf_idx);
+
+ return 0;
+}
+
+/* Called under spin_lock_irqsave(&ichan->lock) */
+static int ipu_submit_channel_buffers(struct idmac_channel *ichan,
+ struct idmac_tx_desc *desc)
+{
+ struct scatterlist *sg;
+ int i, ret = 0;
+
+ for (i = 0, sg = desc->sg; i < 2 && sg; i++) {
+ if (!ichan->sg[i]) {
+ ichan->sg[i] = sg;
+
+ ret = ipu_submit_buffer(ichan, desc, sg, i);
+ if (ret < 0)
+ return ret;
+
+ sg = sg_next(sg);
+ }
+ }
+
+ return ret;
+}
+
+static dma_cookie_t idmac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct idmac_tx_desc *desc = to_tx_desc(tx);
+ struct idmac_channel *ichan = to_idmac_chan(tx->chan);
+ struct idmac *idmac = to_idmac(tx->chan->device);
+ struct ipu *ipu = to_ipu(idmac);
+ struct device *dev = &ichan->dma_chan.dev->device;
+ dma_cookie_t cookie;
+ unsigned long flags;
+ int ret;
+
+ /* Sanity check */
+ if (!list_empty(&desc->list)) {
+ /* The descriptor doesn't belong to client */
+ dev_err(dev, "Descriptor %p not prepared!\n", tx);
+ return -EBUSY;
+ }
+
+ mutex_lock(&ichan->chan_mutex);
+
+ async_tx_clear_ack(tx);
+
+ if (ichan->status < IPU_CHANNEL_READY) {
+ struct idmac_video_param *video = &ichan->params.video;
+ /*
+ * Initial buffer assignment - the first two sg-entries from
+ * the descriptor will end up in the IDMAC buffers
+ */
+ dma_addr_t dma_1 = sg_is_last(desc->sg) ? 0 :
+ sg_dma_address(&desc->sg[1]);
+
+ WARN_ON(ichan->sg[0] || ichan->sg[1]);
+
+ cookie = ipu_init_channel_buffer(ichan,
+ video->out_pixel_fmt,
+ video->out_width,
+ video->out_height,
+ video->out_stride,
+ IPU_ROTATE_NONE,
+ sg_dma_address(&desc->sg[0]),
+ dma_1);
+ if (cookie < 0)
+ goto out;
+ }
+
+ dev_dbg(dev, "Submitting sg %p\n", &desc->sg[0]);
+
+ cookie = dma_cookie_assign(tx);
+
+ /* ipu->lock can be taken under ichan->lock, but not v.v. */
+ spin_lock_irqsave(&ichan->lock, flags);
+
+ list_add_tail(&desc->list, &ichan->queue);
+ /* submit_buffers() atomically verifies and fills empty sg slots */
+ ret = ipu_submit_channel_buffers(ichan, desc);
+
+ spin_unlock_irqrestore(&ichan->lock, flags);
+
+ if (ret < 0) {
+ cookie = ret;
+ goto dequeue;
+ }
+
+ if (ichan->status < IPU_CHANNEL_ENABLED) {
+ ret = ipu_enable_channel(idmac, ichan);
+ if (ret < 0) {
+ cookie = ret;
+ goto dequeue;
+ }
+ }
+
+ dump_idmac_reg(ipu);
+
+dequeue:
+ if (cookie < 0) {
+ spin_lock_irqsave(&ichan->lock, flags);
+ list_del_init(&desc->list);
+ spin_unlock_irqrestore(&ichan->lock, flags);
+ tx->cookie = cookie;
+ ichan->dma_chan.cookie = cookie;
+ }
+
+out:
+ mutex_unlock(&ichan->chan_mutex);
+
+ return cookie;
+}
+
+/* Called with ichan->chan_mutex held */
+static int idmac_desc_alloc(struct idmac_channel *ichan, int n)
+{
+ struct idmac_tx_desc *desc =
+ vmalloc(array_size(n, sizeof(struct idmac_tx_desc)));
+ struct idmac *idmac = to_idmac(ichan->dma_chan.device);
+
+ if (!desc)
+ return -ENOMEM;
+
+ /* No interrupts, just disable the tasklet for a moment */
+ tasklet_disable(&to_ipu(idmac)->tasklet);
+
+ ichan->n_tx_desc = n;
+ ichan->desc = desc;
+ INIT_LIST_HEAD(&ichan->queue);
+ INIT_LIST_HEAD(&ichan->free_list);
+
+ while (n--) {
+ struct dma_async_tx_descriptor *txd = &desc->txd;
+
+ memset(txd, 0, sizeof(*txd));
+ dma_async_tx_descriptor_init(txd, &ichan->dma_chan);
+ txd->tx_submit = idmac_tx_submit;
+
+ list_add(&desc->list, &ichan->free_list);
+
+ desc++;
+ }
+
+ tasklet_enable(&to_ipu(idmac)->tasklet);
+
+ return 0;
+}
+
+/**
+ * ipu_init_channel() - initialize an IPU channel.
+ * @idmac: IPU DMAC context.
+ * @ichan: pointer to the channel object.
+ * @return 0 on success or negative error code on failure.
+ */
+static int ipu_init_channel(struct idmac *idmac, struct idmac_channel *ichan)
+{
+ union ipu_channel_param *params = &ichan->params;
+ uint32_t ipu_conf;
+ enum ipu_channel channel = ichan->dma_chan.chan_id;
+ unsigned long flags;
+ uint32_t reg;
+ struct ipu *ipu = to_ipu(idmac);
+ int ret = 0, n_desc = 0;
+
+ dev_dbg(ipu->dev, "init channel = %d\n", channel);
+
+ if (channel != IDMAC_SDC_0 && channel != IDMAC_SDC_1 &&
+ channel != IDMAC_IC_7)
+ return -EINVAL;
+
+ spin_lock_irqsave(&ipu->lock, flags);
+
+ switch (channel) {
+ case IDMAC_IC_7:
+ n_desc = 16;
+ reg = idmac_read_icreg(ipu, IC_CONF);
+ idmac_write_icreg(ipu, reg & ~IC_CONF_CSI_MEM_WR_EN, IC_CONF);
+ break;
+ case IDMAC_IC_0:
+ n_desc = 16;
+ reg = idmac_read_ipureg(ipu, IPU_FS_PROC_FLOW);
+ idmac_write_ipureg(ipu, reg & ~FS_ENC_IN_VALID, IPU_FS_PROC_FLOW);
+ ret = ipu_ic_init_prpenc(ipu, params, true);
+ break;
+ case IDMAC_SDC_0:
+ case IDMAC_SDC_1:
+ n_desc = 4;
+ break;
+ default:
+ break;
+ }
+
+ ipu->channel_init_mask |= 1L << channel;
+
+ /* Enable IPU sub module */
+ ipu_conf = idmac_read_ipureg(ipu, IPU_CONF) |
+ ipu_channel_conf_mask(channel);
+ idmac_write_ipureg(ipu, ipu_conf, IPU_CONF);
+
+ spin_unlock_irqrestore(&ipu->lock, flags);
+
+ if (n_desc && !ichan->desc)
+ ret = idmac_desc_alloc(ichan, n_desc);
+
+ dump_idmac_reg(ipu);
+
+ return ret;
+}
+
+/**
+ * ipu_uninit_channel() - uninitialize an IPU channel.
+ * @idmac: IPU DMAC context.
+ * @ichan: pointer to the channel object.
+ */
+static void ipu_uninit_channel(struct idmac *idmac, struct idmac_channel *ichan)
+{
+ enum ipu_channel channel = ichan->dma_chan.chan_id;
+ unsigned long flags;
+ uint32_t reg;
+ unsigned long chan_mask = 1UL << channel;
+ uint32_t ipu_conf;
+ struct ipu *ipu = to_ipu(idmac);
+
+ spin_lock_irqsave(&ipu->lock, flags);
+
+ if (!(ipu->channel_init_mask & chan_mask)) {
+ dev_err(ipu->dev, "Channel already uninitialized %d\n",
+ channel);
+ spin_unlock_irqrestore(&ipu->lock, flags);
+ return;
+ }
+
+ /* Reset the double buffer */
+ reg = idmac_read_ipureg(ipu, IPU_CHA_DB_MODE_SEL);
+ idmac_write_ipureg(ipu, reg & ~chan_mask, IPU_CHA_DB_MODE_SEL);
+
+ ichan->sec_chan_en = false;
+
+ switch (channel) {
+ case IDMAC_IC_7:
+ reg = idmac_read_icreg(ipu, IC_CONF);
+ idmac_write_icreg(ipu, reg & ~(IC_CONF_RWS_EN | IC_CONF_PRPENC_EN),
+ IC_CONF);
+ break;
+ case IDMAC_IC_0:
+ reg = idmac_read_icreg(ipu, IC_CONF);
+ idmac_write_icreg(ipu, reg & ~(IC_CONF_PRPENC_EN | IC_CONF_PRPENC_CSC1),
+ IC_CONF);
+ break;
+ case IDMAC_SDC_0:
+ case IDMAC_SDC_1:
+ default:
+ break;
+ }
+
+ ipu->channel_init_mask &= ~(1L << channel);
+
+ ipu_conf = idmac_read_ipureg(ipu, IPU_CONF) &
+ ~ipu_channel_conf_mask(channel);
+ idmac_write_ipureg(ipu, ipu_conf, IPU_CONF);
+
+ spin_unlock_irqrestore(&ipu->lock, flags);
+
+ ichan->n_tx_desc = 0;
+ vfree(ichan->desc);
+ ichan->desc = NULL;
+}
+
+/**
+ * ipu_disable_channel() - disable an IPU channel.
+ * @idmac: IPU DMAC context.
+ * @ichan: channel object pointer.
+ * @wait_for_stop: flag to set whether to wait for channel end of frame or
+ * return immediately.
+ * @return: 0 on success or negative error code on failure.
+ */
+static int ipu_disable_channel(struct idmac *idmac, struct idmac_channel *ichan,
+ bool wait_for_stop)
+{
+ enum ipu_channel channel = ichan->dma_chan.chan_id;
+ struct ipu *ipu = to_ipu(idmac);
+ uint32_t reg;
+ unsigned long flags;
+ unsigned long chan_mask = 1UL << channel;
+ unsigned int timeout;
+
+ if (wait_for_stop && channel != IDMAC_SDC_1 && channel != IDMAC_SDC_0) {
+ timeout = 40;
+ /* This waiting always fails. Related to spurious irq problem */
+ while ((idmac_read_icreg(ipu, IDMAC_CHA_BUSY) & chan_mask) ||
+ (ipu_channel_status(ipu, channel) == TASK_STAT_ACTIVE)) {
+ timeout--;
+ msleep(10);
+
+ if (!timeout) {
+ dev_dbg(ipu->dev,
+ "Warning: timeout waiting for channel %u to "
+ "stop: buf0_rdy = 0x%08X, buf1_rdy = 0x%08X, "
+ "busy = 0x%08X, tstat = 0x%08X\n", channel,
+ idmac_read_ipureg(ipu, IPU_CHA_BUF0_RDY),
+ idmac_read_ipureg(ipu, IPU_CHA_BUF1_RDY),
+ idmac_read_icreg(ipu, IDMAC_CHA_BUSY),
+ idmac_read_ipureg(ipu, IPU_TASKS_STAT));
+ break;
+ }
+ }
+ dev_dbg(ipu->dev, "timeout = %d * 10ms\n", 40 - timeout);
+ }
+ /* SDC BG and FG must be disabled before DMA is disabled */
+ if (wait_for_stop && (channel == IDMAC_SDC_0 ||
+ channel == IDMAC_SDC_1)) {
+ for (timeout = 5;
+ timeout && !ipu_irq_status(ichan->eof_irq); timeout--)
+ msleep(5);
+ }
+
+ spin_lock_irqsave(&ipu->lock, flags);
+
+ /* Disable IC task */
+ ipu_ic_disable_task(ipu, channel);
+
+ /* Disable DMA channel(s) */
+ reg = idmac_read_icreg(ipu, IDMAC_CHA_EN);
+ idmac_write_icreg(ipu, reg & ~chan_mask, IDMAC_CHA_EN);
+
+ spin_unlock_irqrestore(&ipu->lock, flags);
+
+ return 0;
+}
+
+static struct scatterlist *idmac_sg_next(struct idmac_channel *ichan,
+ struct idmac_tx_desc **desc, struct scatterlist *sg)
+{
+ struct scatterlist *sgnew = sg ? sg_next(sg) : NULL;
+
+ if (sgnew)
+ /* next sg-element in this list */
+ return sgnew;
+
+ if ((*desc)->list.next == &ichan->queue)
+ /* No more descriptors on the queue */
+ return NULL;
+
+ /* Fetch next descriptor */
+ *desc = list_entry((*desc)->list.next, struct idmac_tx_desc, list);
+ return (*desc)->sg;
+}
+
+/*
+ * We have several possibilities here:
+ * current BUF next BUF
+ *
+ * not last sg next not last sg
+ * not last sg next last sg
+ * last sg first sg from next descriptor
+ * last sg NULL
+ *
+ * Besides, the descriptor queue might be empty or not. We process all these
+ * cases carefully.
+ */
+static irqreturn_t idmac_interrupt(int irq, void *dev_id)
+{
+ struct idmac_channel *ichan = dev_id;
+ struct device *dev = &ichan->dma_chan.dev->device;
+ unsigned int chan_id = ichan->dma_chan.chan_id;
+ struct scatterlist **sg, *sgnext, *sgnew = NULL;
+ /* Next transfer descriptor */
+ struct idmac_tx_desc *desc, *descnew;
+ bool done = false;
+ u32 ready0, ready1, curbuf, err;
+ struct dmaengine_desc_callback cb;
+
+ /* IDMAC has cleared the respective BUFx_RDY bit, we manage the buffer */
+
+ dev_dbg(dev, "IDMAC irq %d, buf %d\n", irq, ichan->active_buffer);
+
+ spin_lock(&ipu_data.lock);
+
+ ready0 = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF0_RDY);
+ ready1 = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF1_RDY);
+ curbuf = idmac_read_ipureg(&ipu_data, IPU_CHA_CUR_BUF);
+ err = idmac_read_ipureg(&ipu_data, IPU_INT_STAT_4);
+
+ if (err & (1 << chan_id)) {
+ idmac_write_ipureg(&ipu_data, 1 << chan_id, IPU_INT_STAT_4);
+ spin_unlock(&ipu_data.lock);
+ /*
+ * Doing this
+ * ichan->sg[0] = ichan->sg[1] = NULL;
+ * you can force channel re-enable on the next tx_submit(), but
+ * this is dirty - think about descriptors with multiple
+ * sg elements.
+ */
+ dev_warn(dev, "NFB4EOF on channel %d, ready %x, %x, cur %x\n",
+ chan_id, ready0, ready1, curbuf);
+ return IRQ_HANDLED;
+ }
+ spin_unlock(&ipu_data.lock);
+
+ /* Other interrupts do not interfere with this channel */
+ spin_lock(&ichan->lock);
+ if (unlikely((ichan->active_buffer && (ready1 >> chan_id) & 1) ||
+ (!ichan->active_buffer && (ready0 >> chan_id) & 1)
+ )) {
+ spin_unlock(&ichan->lock);
+ dev_dbg(dev,
+ "IRQ with active buffer still ready on channel %x, "
+ "active %d, ready %x, %x!\n", chan_id,
+ ichan->active_buffer, ready0, ready1);
+ return IRQ_NONE;
+ }
+
+ if (unlikely(list_empty(&ichan->queue))) {
+ ichan->sg[ichan->active_buffer] = NULL;
+ spin_unlock(&ichan->lock);
+ dev_err(dev,
+ "IRQ without queued buffers on channel %x, active %d, "
+ "ready %x, %x!\n", chan_id,
+ ichan->active_buffer, ready0, ready1);
+ return IRQ_NONE;
+ }
+
+ /*
+ * active_buffer is a software flag, it shows which buffer we are
+ * currently expecting back from the hardware, IDMAC should be
+ * processing the other buffer already
+ */
+ sg = &ichan->sg[ichan->active_buffer];
+ sgnext = ichan->sg[!ichan->active_buffer];
+
+ if (!*sg) {
+ spin_unlock(&ichan->lock);
+ return IRQ_HANDLED;
+ }
+
+ desc = list_entry(ichan->queue.next, struct idmac_tx_desc, list);
+ descnew = desc;
+
+ dev_dbg(dev, "IDMAC irq %d, dma %#llx, next dma %#llx, current %d, curbuf %#x\n",
+ irq, (u64)sg_dma_address(*sg),
+ sgnext ? (u64)sg_dma_address(sgnext) : 0,
+ ichan->active_buffer, curbuf);
+
+ /* Find the descriptor of sgnext */
+ sgnew = idmac_sg_next(ichan, &descnew, *sg);
+ if (sgnext != sgnew)
+ dev_err(dev, "Submitted buffer %p, next buffer %p\n", sgnext, sgnew);
+
+ /*
+ * if sgnext == NULL sg must be the last element in a scatterlist and
+ * queue must be empty
+ */
+ if (unlikely(!sgnext)) {
+ if (!WARN_ON(sg_next(*sg)))
+ dev_dbg(dev, "Underrun on channel %x\n", chan_id);
+ ichan->sg[!ichan->active_buffer] = sgnew;
+
+ if (unlikely(sgnew)) {
+ ipu_submit_buffer(ichan, descnew, sgnew, !ichan->active_buffer);
+ } else {
+ spin_lock(&ipu_data.lock);
+ ipu_ic_disable_task(&ipu_data, chan_id);
+ spin_unlock(&ipu_data.lock);
+ ichan->status = IPU_CHANNEL_READY;
+ /* Continue to check for complete descriptor */
+ }
+ }
+
+ /* Calculate and submit the next sg element */
+ sgnew = idmac_sg_next(ichan, &descnew, sgnew);
+
+ if (unlikely(!sg_next(*sg)) || !sgnext) {
+ /*
+ * Last element in scatterlist done, remove from the queue,
+ * _init for debugging
+ */
+ list_del_init(&desc->list);
+ done = true;
+ }
+
+ *sg = sgnew;
+
+ if (likely(sgnew) &&
+ ipu_submit_buffer(ichan, descnew, sgnew, ichan->active_buffer) < 0) {
+ dmaengine_desc_get_callback(&descnew->txd, &cb);
+
+ list_del_init(&descnew->list);
+ spin_unlock(&ichan->lock);
+
+ dmaengine_desc_callback_invoke(&cb, NULL);
+ spin_lock(&ichan->lock);
+ }
+
+ /* Flip the active buffer - even if update above failed */
+ ichan->active_buffer = !ichan->active_buffer;
+ if (done)
+ dma_cookie_complete(&desc->txd);
+
+ dmaengine_desc_get_callback(&desc->txd, &cb);
+
+ spin_unlock(&ichan->lock);
+
+ if (done && (desc->txd.flags & DMA_PREP_INTERRUPT))
+ dmaengine_desc_callback_invoke(&cb, NULL);
+
+ return IRQ_HANDLED;
+}
+
+static void ipu_gc_tasklet(struct tasklet_struct *t)
+{
+ struct ipu *ipu = from_tasklet(ipu, t, tasklet);
+ int i;
+
+ for (i = 0; i < IPU_CHANNELS_NUM; i++) {
+ struct idmac_channel *ichan = ipu->channel + i;
+ struct idmac_tx_desc *desc;
+ unsigned long flags;
+ struct scatterlist *sg;
+ int j, k;
+
+ for (j = 0; j < ichan->n_tx_desc; j++) {
+ desc = ichan->desc + j;
+ spin_lock_irqsave(&ichan->lock, flags);
+ if (async_tx_test_ack(&desc->txd)) {
+ list_move(&desc->list, &ichan->free_list);
+ for_each_sg(desc->sg, sg, desc->sg_len, k) {
+ if (ichan->sg[0] == sg)
+ ichan->sg[0] = NULL;
+ else if (ichan->sg[1] == sg)
+ ichan->sg[1] = NULL;
+ }
+ async_tx_clear_ack(&desc->txd);
+ }
+ spin_unlock_irqrestore(&ichan->lock, flags);
+ }
+ }
+}
+
+/* Allocate and initialise a transfer descriptor. */
+static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan,
+ struct scatterlist *sgl, unsigned int sg_len,
+ enum dma_transfer_direction direction, unsigned long tx_flags,
+ void *context)
+{
+ struct idmac_channel *ichan = to_idmac_chan(chan);
+ struct idmac_tx_desc *desc = NULL;
+ struct dma_async_tx_descriptor *txd = NULL;
+ unsigned long flags;
+
+ /* We only can handle these three channels so far */
+ if (chan->chan_id != IDMAC_SDC_0 && chan->chan_id != IDMAC_SDC_1 &&
+ chan->chan_id != IDMAC_IC_7)
+ return NULL;
+
+ if (!is_slave_direction(direction)) {
+ dev_err(chan->device->dev, "Invalid DMA direction %d!\n", direction);
+ return NULL;
+ }
+
+ mutex_lock(&ichan->chan_mutex);
+
+ spin_lock_irqsave(&ichan->lock, flags);
+ if (!list_empty(&ichan->free_list)) {
+ desc = list_entry(ichan->free_list.next,
+ struct idmac_tx_desc, list);
+
+ list_del_init(&desc->list);
+
+ desc->sg_len = sg_len;
+ desc->sg = sgl;
+ txd = &desc->txd;
+ txd->flags = tx_flags;
+ }
+ spin_unlock_irqrestore(&ichan->lock, flags);
+
+ mutex_unlock(&ichan->chan_mutex);
+
+ tasklet_schedule(&to_ipu(to_idmac(chan->device))->tasklet);
+
+ return txd;
+}
+
+/* Re-select the current buffer and re-activate the channel */
+static void idmac_issue_pending(struct dma_chan *chan)
+{
+ struct idmac_channel *ichan = to_idmac_chan(chan);
+ struct idmac *idmac = to_idmac(chan->device);
+ struct ipu *ipu = to_ipu(idmac);
+ unsigned long flags;
+
+ /* This is not always needed, but doesn't hurt either */
+ spin_lock_irqsave(&ipu->lock, flags);
+ ipu_select_buffer(chan->chan_id, ichan->active_buffer);
+ spin_unlock_irqrestore(&ipu->lock, flags);
+
+ /*
+ * Might need to perform some parts of initialisation from
+ * ipu_enable_channel(), but not all, we do not want to reset to buffer
+ * 0, don't need to set priority again either, but re-enabling the task
+ * and the channel might be a good idea.
+ */
+}
+
+static int idmac_pause(struct dma_chan *chan)
+{
+ struct idmac_channel *ichan = to_idmac_chan(chan);
+ struct idmac *idmac = to_idmac(chan->device);
+ struct ipu *ipu = to_ipu(idmac);
+ struct list_head *list, *tmp;
+ unsigned long flags;
+
+ mutex_lock(&ichan->chan_mutex);
+
+ spin_lock_irqsave(&ipu->lock, flags);
+ ipu_ic_disable_task(ipu, chan->chan_id);
+
+ /* Return all descriptors into "prepared" state */
+ list_for_each_safe(list, tmp, &ichan->queue)
+ list_del_init(list);
+
+ ichan->sg[0] = NULL;
+ ichan->sg[1] = NULL;
+
+ spin_unlock_irqrestore(&ipu->lock, flags);
+
+ ichan->status = IPU_CHANNEL_INITIALIZED;
+
+ mutex_unlock(&ichan->chan_mutex);
+
+ return 0;
+}
+
+static int __idmac_terminate_all(struct dma_chan *chan)
+{
+ struct idmac_channel *ichan = to_idmac_chan(chan);
+ struct idmac *idmac = to_idmac(chan->device);
+ struct ipu *ipu = to_ipu(idmac);
+ unsigned long flags;
+ int i;
+
+ ipu_disable_channel(idmac, ichan,
+ ichan->status >= IPU_CHANNEL_ENABLED);
+
+ tasklet_disable(&ipu->tasklet);
+
+ /* ichan->queue is modified in ISR, have to spinlock */
+ spin_lock_irqsave(&ichan->lock, flags);
+ list_splice_init(&ichan->queue, &ichan->free_list);
+
+ if (ichan->desc)
+ for (i = 0; i < ichan->n_tx_desc; i++) {
+ struct idmac_tx_desc *desc = ichan->desc + i;
+ if (list_empty(&desc->list))
+ /* Descriptor was prepared, but not submitted */
+ list_add(&desc->list, &ichan->free_list);
+
+ async_tx_clear_ack(&desc->txd);
+ }
+
+ ichan->sg[0] = NULL;
+ ichan->sg[1] = NULL;
+ spin_unlock_irqrestore(&ichan->lock, flags);
+
+ tasklet_enable(&ipu->tasklet);
+
+ ichan->status = IPU_CHANNEL_INITIALIZED;
+
+ return 0;
+}
+
+static int idmac_terminate_all(struct dma_chan *chan)
+{
+ struct idmac_channel *ichan = to_idmac_chan(chan);
+ int ret;
+
+ mutex_lock(&ichan->chan_mutex);
+
+ ret = __idmac_terminate_all(chan);
+
+ mutex_unlock(&ichan->chan_mutex);
+
+ return ret;
+}
+
+#ifdef DEBUG
+static irqreturn_t ic_sof_irq(int irq, void *dev_id)
+{
+ struct idmac_channel *ichan = dev_id;
+ printk(KERN_DEBUG "Got SOF IRQ %d on Channel %d\n",
+ irq, ichan->dma_chan.chan_id);
+ disable_irq_nosync(irq);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ic_eof_irq(int irq, void *dev_id)
+{
+ struct idmac_channel *ichan = dev_id;
+ printk(KERN_DEBUG "Got EOF IRQ %d on Channel %d\n",
+ irq, ichan->dma_chan.chan_id);
+ disable_irq_nosync(irq);
+ return IRQ_HANDLED;
+}
+
+static int ic_sof = -EINVAL, ic_eof = -EINVAL;
+#endif
+
+static int idmac_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct idmac_channel *ichan = to_idmac_chan(chan);
+ struct idmac *idmac = to_idmac(chan->device);
+ int ret;
+
+ /* dmaengine.c now guarantees to only offer free channels */
+ BUG_ON(chan->client_count > 1);
+ WARN_ON(ichan->status != IPU_CHANNEL_FREE);
+
+ dma_cookie_init(chan);
+
+ ret = ipu_irq_map(chan->chan_id);
+ if (ret < 0)
+ goto eimap;
+
+ ichan->eof_irq = ret;
+
+ /*
+ * Important to first disable the channel, because maybe someone
+ * used it before us, e.g., the bootloader
+ */
+ ipu_disable_channel(idmac, ichan, true);
+
+ ret = ipu_init_channel(idmac, ichan);
+ if (ret < 0)
+ goto eichan;
+
+ ret = request_irq(ichan->eof_irq, idmac_interrupt, 0,
+ ichan->eof_name, ichan);
+ if (ret < 0)
+ goto erirq;
+
+#ifdef DEBUG
+ if (chan->chan_id == IDMAC_IC_7) {
+ ic_sof = ipu_irq_map(69);
+ if (ic_sof > 0) {
+ ret = request_irq(ic_sof, ic_sof_irq, 0, "IC SOF", ichan);
+ if (ret)
+ dev_err(&chan->dev->device, "request irq failed for IC SOF");
+ }
+ ic_eof = ipu_irq_map(70);
+ if (ic_eof > 0) {
+ ret = request_irq(ic_eof, ic_eof_irq, 0, "IC EOF", ichan);
+ if (ret)
+ dev_err(&chan->dev->device, "request irq failed for IC EOF");
+ }
+ }
+#endif
+
+ ichan->status = IPU_CHANNEL_INITIALIZED;
+
+ dev_dbg(&chan->dev->device, "Found channel 0x%x, irq %d\n",
+ chan->chan_id, ichan->eof_irq);
+
+ return ret;
+
+erirq:
+ ipu_uninit_channel(idmac, ichan);
+eichan:
+ ipu_irq_unmap(chan->chan_id);
+eimap:
+ return ret;
+}
+
+static void idmac_free_chan_resources(struct dma_chan *chan)
+{
+ struct idmac_channel *ichan = to_idmac_chan(chan);
+ struct idmac *idmac = to_idmac(chan->device);
+
+ mutex_lock(&ichan->chan_mutex);
+
+ __idmac_terminate_all(chan);
+
+ if (ichan->status > IPU_CHANNEL_FREE) {
+#ifdef DEBUG
+ if (chan->chan_id == IDMAC_IC_7) {
+ if (ic_sof > 0) {
+ free_irq(ic_sof, ichan);
+ ipu_irq_unmap(69);
+ ic_sof = -EINVAL;
+ }
+ if (ic_eof > 0) {
+ free_irq(ic_eof, ichan);
+ ipu_irq_unmap(70);
+ ic_eof = -EINVAL;
+ }
+ }
+#endif
+ free_irq(ichan->eof_irq, ichan);
+ ipu_irq_unmap(chan->chan_id);
+ }
+
+ ichan->status = IPU_CHANNEL_FREE;
+
+ ipu_uninit_channel(idmac, ichan);
+
+ mutex_unlock(&ichan->chan_mutex);
+
+ tasklet_schedule(&to_ipu(idmac)->tasklet);
+}
+
+static enum dma_status idmac_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+ return dma_cookie_status(chan, cookie, txstate);
+}
+
+static int __init ipu_idmac_init(struct ipu *ipu)
+{
+ struct idmac *idmac = &ipu->idmac;
+ struct dma_device *dma = &idmac->dma;
+ int i;
+
+ dma_cap_set(DMA_SLAVE, dma->cap_mask);
+ dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+
+ /* Compulsory common fields */
+ dma->dev = ipu->dev;
+ dma->device_alloc_chan_resources = idmac_alloc_chan_resources;
+ dma->device_free_chan_resources = idmac_free_chan_resources;
+ dma->device_tx_status = idmac_tx_status;
+ dma->device_issue_pending = idmac_issue_pending;
+
+ /* Compulsory for DMA_SLAVE fields */
+ dma->device_prep_slave_sg = idmac_prep_slave_sg;
+ dma->device_pause = idmac_pause;
+ dma->device_terminate_all = idmac_terminate_all;
+
+ INIT_LIST_HEAD(&dma->channels);
+ for (i = 0; i < IPU_CHANNELS_NUM; i++) {
+ struct idmac_channel *ichan = ipu->channel + i;
+ struct dma_chan *dma_chan = &ichan->dma_chan;
+
+ spin_lock_init(&ichan->lock);
+ mutex_init(&ichan->chan_mutex);
+
+ ichan->status = IPU_CHANNEL_FREE;
+ ichan->sec_chan_en = false;
+ snprintf(ichan->eof_name, sizeof(ichan->eof_name), "IDMAC EOF %d", i);
+
+ dma_chan->device = &idmac->dma;
+ dma_cookie_init(dma_chan);
+ dma_chan->chan_id = i;
+ list_add_tail(&dma_chan->device_node, &dma->channels);
+ }
+
+ idmac_write_icreg(ipu, 0x00000070, IDMAC_CONF);
+
+ return dma_async_device_register(&idmac->dma);
+}
+
+static void ipu_idmac_exit(struct ipu *ipu)
+{
+ int i;
+ struct idmac *idmac = &ipu->idmac;
+
+ for (i = 0; i < IPU_CHANNELS_NUM; i++) {
+ struct idmac_channel *ichan = ipu->channel + i;
+
+ idmac_terminate_all(&ichan->dma_chan);
+ }
+
+ dma_async_device_unregister(&idmac->dma);
+}
+
+/*****************************************************************************
+ * IPU common probe / remove
+ */
+
+static int __init ipu_probe(struct platform_device *pdev)
+{
+ struct resource *mem_ipu, *mem_ic;
+ int ret;
+
+ spin_lock_init(&ipu_data.lock);
+
+ mem_ipu = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ mem_ic = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!mem_ipu || !mem_ic)
+ return -EINVAL;
+
+ ipu_data.dev = &pdev->dev;
+
+ platform_set_drvdata(pdev, &ipu_data);
+
+ ret = platform_get_irq(pdev, 0);
+ if (ret < 0)
+ goto err_noirq;
+
+ ipu_data.irq_fn = ret;
+ ret = platform_get_irq(pdev, 1);
+ if (ret < 0)
+ goto err_noirq;
+
+ ipu_data.irq_err = ret;
+
+ dev_dbg(&pdev->dev, "fn irq %u, err irq %u\n",
+ ipu_data.irq_fn, ipu_data.irq_err);
+
+ /* Remap IPU common registers */
+ ipu_data.reg_ipu = ioremap(mem_ipu->start, resource_size(mem_ipu));
+ if (!ipu_data.reg_ipu) {
+ ret = -ENOMEM;
+ goto err_ioremap_ipu;
+ }
+
+ /* Remap Image Converter and Image DMA Controller registers */
+ ipu_data.reg_ic = ioremap(mem_ic->start, resource_size(mem_ic));
+ if (!ipu_data.reg_ic) {
+ ret = -ENOMEM;
+ goto err_ioremap_ic;
+ }
+
+ /* Get IPU clock */
+ ipu_data.ipu_clk = clk_get(&pdev->dev, NULL);
+ if (IS_ERR(ipu_data.ipu_clk)) {
+ ret = PTR_ERR(ipu_data.ipu_clk);
+ goto err_clk_get;
+ }
+
+ /* Make sure IPU HSP clock is running */
+ clk_prepare_enable(ipu_data.ipu_clk);
+
+ /* Disable all interrupts */
+ idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_1);
+ idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_2);
+ idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_3);
+ idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_4);
+ idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_5);
+
+ dev_dbg(&pdev->dev, "%s @ 0x%08lx, fn irq %u, err irq %u\n", pdev->name,
+ (unsigned long)mem_ipu->start, ipu_data.irq_fn, ipu_data.irq_err);
+
+ ret = ipu_irq_attach_irq(&ipu_data, pdev);
+ if (ret < 0)
+ goto err_attach_irq;
+
+ /* Initialize DMA engine */
+ ret = ipu_idmac_init(&ipu_data);
+ if (ret < 0)
+ goto err_idmac_init;
+
+ tasklet_setup(&ipu_data.tasklet, ipu_gc_tasklet);
+
+ ipu_data.dev = &pdev->dev;
+
+ dev_dbg(ipu_data.dev, "IPU initialized\n");
+
+ return 0;
+
+err_idmac_init:
+err_attach_irq:
+ ipu_irq_detach_irq(&ipu_data, pdev);
+ clk_disable_unprepare(ipu_data.ipu_clk);
+ clk_put(ipu_data.ipu_clk);
+err_clk_get:
+ iounmap(ipu_data.reg_ic);
+err_ioremap_ic:
+ iounmap(ipu_data.reg_ipu);
+err_ioremap_ipu:
+err_noirq:
+ dev_err(&pdev->dev, "Failed to probe IPU: %d\n", ret);
+ return ret;
+}
+
+static int ipu_remove(struct platform_device *pdev)
+{
+ struct ipu *ipu = platform_get_drvdata(pdev);
+
+ ipu_idmac_exit(ipu);
+ ipu_irq_detach_irq(ipu, pdev);
+ clk_disable_unprepare(ipu->ipu_clk);
+ clk_put(ipu->ipu_clk);
+ iounmap(ipu->reg_ic);
+ iounmap(ipu->reg_ipu);
+ tasklet_kill(&ipu->tasklet);
+
+ return 0;
+}
+
+/*
+ * We need two MEM resources - with IPU-common and Image Converter registers,
+ * including PF_CONF and IDMAC_* registers, and two IRQs - function and error
+ */
+static struct platform_driver ipu_platform_driver = {
+ .driver = {
+ .name = "ipu-core",
+ },
+ .remove = ipu_remove,
+};
+
+static int __init ipu_init(void)
+{
+ return platform_driver_probe(&ipu_platform_driver, ipu_probe);
+}
+subsys_initcall(ipu_init);
+
+MODULE_DESCRIPTION("IPU core driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Guennadi Liakhovetski <lg@denx.de>");
+MODULE_ALIAS("platform:ipu-core");
diff --git a/drivers/dma/ipu/ipu_intern.h b/drivers/dma/ipu/ipu_intern.h
new file mode 100644
index 000000000..e7ec1dec3
--- /dev/null
+++ b/drivers/dma/ipu/ipu_intern.h
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2008
+ * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
+ *
+ * Copyright (C) 2005-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ */
+
+#ifndef _IPU_INTERN_H_
+#define _IPU_INTERN_H_
+
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+
+/* IPU Common registers */
+#define IPU_CONF 0x00
+#define IPU_CHA_BUF0_RDY 0x04
+#define IPU_CHA_BUF1_RDY 0x08
+#define IPU_CHA_DB_MODE_SEL 0x0C
+#define IPU_CHA_CUR_BUF 0x10
+#define IPU_FS_PROC_FLOW 0x14
+#define IPU_FS_DISP_FLOW 0x18
+#define IPU_TASKS_STAT 0x1C
+#define IPU_IMA_ADDR 0x20
+#define IPU_IMA_DATA 0x24
+#define IPU_INT_CTRL_1 0x28
+#define IPU_INT_CTRL_2 0x2C
+#define IPU_INT_CTRL_3 0x30
+#define IPU_INT_CTRL_4 0x34
+#define IPU_INT_CTRL_5 0x38
+#define IPU_INT_STAT_1 0x3C
+#define IPU_INT_STAT_2 0x40
+#define IPU_INT_STAT_3 0x44
+#define IPU_INT_STAT_4 0x48
+#define IPU_INT_STAT_5 0x4C
+#define IPU_BRK_CTRL_1 0x50
+#define IPU_BRK_CTRL_2 0x54
+#define IPU_BRK_STAT 0x58
+#define IPU_DIAGB_CTRL 0x5C
+
+/* IPU_CONF Register bits */
+#define IPU_CONF_CSI_EN 0x00000001
+#define IPU_CONF_IC_EN 0x00000002
+#define IPU_CONF_ROT_EN 0x00000004
+#define IPU_CONF_PF_EN 0x00000008
+#define IPU_CONF_SDC_EN 0x00000010
+#define IPU_CONF_ADC_EN 0x00000020
+#define IPU_CONF_DI_EN 0x00000040
+#define IPU_CONF_DU_EN 0x00000080
+#define IPU_CONF_PXL_ENDIAN 0x00000100
+
+/* Image Converter Registers */
+#define IC_CONF 0x88
+#define IC_PRP_ENC_RSC 0x8C
+#define IC_PRP_VF_RSC 0x90
+#define IC_PP_RSC 0x94
+#define IC_CMBP_1 0x98
+#define IC_CMBP_2 0x9C
+#define PF_CONF 0xA0
+#define IDMAC_CONF 0xA4
+#define IDMAC_CHA_EN 0xA8
+#define IDMAC_CHA_PRI 0xAC
+#define IDMAC_CHA_BUSY 0xB0
+
+/* Image Converter Register bits */
+#define IC_CONF_PRPENC_EN 0x00000001
+#define IC_CONF_PRPENC_CSC1 0x00000002
+#define IC_CONF_PRPENC_ROT_EN 0x00000004
+#define IC_CONF_PRPVF_EN 0x00000100
+#define IC_CONF_PRPVF_CSC1 0x00000200
+#define IC_CONF_PRPVF_CSC2 0x00000400
+#define IC_CONF_PRPVF_CMB 0x00000800
+#define IC_CONF_PRPVF_ROT_EN 0x00001000
+#define IC_CONF_PP_EN 0x00010000
+#define IC_CONF_PP_CSC1 0x00020000
+#define IC_CONF_PP_CSC2 0x00040000
+#define IC_CONF_PP_CMB 0x00080000
+#define IC_CONF_PP_ROT_EN 0x00100000
+#define IC_CONF_IC_GLB_LOC_A 0x10000000
+#define IC_CONF_KEY_COLOR_EN 0x20000000
+#define IC_CONF_RWS_EN 0x40000000
+#define IC_CONF_CSI_MEM_WR_EN 0x80000000
+
+#define IDMA_CHAN_INVALID 0x000000FF
+#define IDMA_IC_0 0x00000001
+#define IDMA_IC_1 0x00000002
+#define IDMA_IC_2 0x00000004
+#define IDMA_IC_3 0x00000008
+#define IDMA_IC_4 0x00000010
+#define IDMA_IC_5 0x00000020
+#define IDMA_IC_6 0x00000040
+#define IDMA_IC_7 0x00000080
+#define IDMA_IC_8 0x00000100
+#define IDMA_IC_9 0x00000200
+#define IDMA_IC_10 0x00000400
+#define IDMA_IC_11 0x00000800
+#define IDMA_IC_12 0x00001000
+#define IDMA_IC_13 0x00002000
+#define IDMA_SDC_BG 0x00004000
+#define IDMA_SDC_FG 0x00008000
+#define IDMA_SDC_MASK 0x00010000
+#define IDMA_SDC_PARTIAL 0x00020000
+#define IDMA_ADC_SYS1_WR 0x00040000
+#define IDMA_ADC_SYS2_WR 0x00080000
+#define IDMA_ADC_SYS1_CMD 0x00100000
+#define IDMA_ADC_SYS2_CMD 0x00200000
+#define IDMA_ADC_SYS1_RD 0x00400000
+#define IDMA_ADC_SYS2_RD 0x00800000
+#define IDMA_PF_QP 0x01000000
+#define IDMA_PF_BSP 0x02000000
+#define IDMA_PF_Y_IN 0x04000000
+#define IDMA_PF_U_IN 0x08000000
+#define IDMA_PF_V_IN 0x10000000
+#define IDMA_PF_Y_OUT 0x20000000
+#define IDMA_PF_U_OUT 0x40000000
+#define IDMA_PF_V_OUT 0x80000000
+
+#define TSTAT_PF_H264_PAUSE 0x00000001
+#define TSTAT_CSI2MEM_MASK 0x0000000C
+#define TSTAT_CSI2MEM_OFFSET 2
+#define TSTAT_VF_MASK 0x00000600
+#define TSTAT_VF_OFFSET 9
+#define TSTAT_VF_ROT_MASK 0x000C0000
+#define TSTAT_VF_ROT_OFFSET 18
+#define TSTAT_ENC_MASK 0x00000180
+#define TSTAT_ENC_OFFSET 7
+#define TSTAT_ENC_ROT_MASK 0x00030000
+#define TSTAT_ENC_ROT_OFFSET 16
+#define TSTAT_PP_MASK 0x00001800
+#define TSTAT_PP_OFFSET 11
+#define TSTAT_PP_ROT_MASK 0x00300000
+#define TSTAT_PP_ROT_OFFSET 20
+#define TSTAT_PF_MASK 0x00C00000
+#define TSTAT_PF_OFFSET 22
+#define TSTAT_ADCSYS1_MASK 0x03000000
+#define TSTAT_ADCSYS1_OFFSET 24
+#define TSTAT_ADCSYS2_MASK 0x0C000000
+#define TSTAT_ADCSYS2_OFFSET 26
+
+#define TASK_STAT_IDLE 0
+#define TASK_STAT_ACTIVE 1
+#define TASK_STAT_WAIT4READY 2
+
+struct idmac {
+ struct dma_device dma;
+};
+
+struct ipu {
+ void __iomem *reg_ipu;
+ void __iomem *reg_ic;
+ unsigned int irq_fn; /* IPU Function IRQ to the CPU */
+ unsigned int irq_err; /* IPU Error IRQ to the CPU */
+ unsigned int irq_base; /* Beginning of the IPU IRQ range */
+ unsigned long channel_init_mask;
+ spinlock_t lock;
+ struct clk *ipu_clk;
+ struct device *dev;
+ struct idmac idmac;
+ struct idmac_channel channel[IPU_CHANNELS_NUM];
+ struct tasklet_struct tasklet;
+};
+
+#define to_idmac(d) container_of(d, struct idmac, dma)
+
+extern int ipu_irq_attach_irq(struct ipu *ipu, struct platform_device *dev);
+extern void ipu_irq_detach_irq(struct ipu *ipu, struct platform_device *dev);
+
+extern bool ipu_irq_status(uint32_t irq);
+extern int ipu_irq_map(unsigned int source);
+extern int ipu_irq_unmap(unsigned int source);
+
+#endif
diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c
new file mode 100644
index 000000000..97d9a6f04
--- /dev/null
+++ b/drivers/dma/ipu/ipu_irq.c
@@ -0,0 +1,367 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2008
+ * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
+ */
+
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/dma/ipu-dma.h>
+
+#include "ipu_intern.h"
+
+/*
+ * Register read / write - shall be inlined by the compiler
+ */
+static u32 ipu_read_reg(struct ipu *ipu, unsigned long reg)
+{
+ return __raw_readl(ipu->reg_ipu + reg);
+}
+
+static void ipu_write_reg(struct ipu *ipu, u32 value, unsigned long reg)
+{
+ __raw_writel(value, ipu->reg_ipu + reg);
+}
+
+
+/*
+ * IPU IRQ chip driver
+ */
+
+#define IPU_IRQ_NR_FN_BANKS 3
+#define IPU_IRQ_NR_ERR_BANKS 2
+#define IPU_IRQ_NR_BANKS (IPU_IRQ_NR_FN_BANKS + IPU_IRQ_NR_ERR_BANKS)
+
+struct ipu_irq_bank {
+ unsigned int control;
+ unsigned int status;
+ struct ipu *ipu;
+};
+
+static struct ipu_irq_bank irq_bank[IPU_IRQ_NR_BANKS] = {
+ /* 3 groups of functional interrupts */
+ {
+ .control = IPU_INT_CTRL_1,
+ .status = IPU_INT_STAT_1,
+ }, {
+ .control = IPU_INT_CTRL_2,
+ .status = IPU_INT_STAT_2,
+ }, {
+ .control = IPU_INT_CTRL_3,
+ .status = IPU_INT_STAT_3,
+ },
+ /* 2 groups of error interrupts */
+ {
+ .control = IPU_INT_CTRL_4,
+ .status = IPU_INT_STAT_4,
+ }, {
+ .control = IPU_INT_CTRL_5,
+ .status = IPU_INT_STAT_5,
+ },
+};
+
+struct ipu_irq_map {
+ unsigned int irq;
+ int source;
+ struct ipu_irq_bank *bank;
+ struct ipu *ipu;
+};
+
+static struct ipu_irq_map irq_map[CONFIG_MX3_IPU_IRQS];
+/* Protects allocations from the above array of maps */
+static DEFINE_MUTEX(map_lock);
+/* Protects register accesses and individual mappings */
+static DEFINE_RAW_SPINLOCK(bank_lock);
+
+static struct ipu_irq_map *src2map(unsigned int src)
+{
+ int i;
+
+ for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++)
+ if (irq_map[i].source == src)
+ return irq_map + i;
+
+ return NULL;
+}
+
+static void ipu_irq_unmask(struct irq_data *d)
+{
+ struct ipu_irq_map *map = irq_data_get_irq_chip_data(d);
+ struct ipu_irq_bank *bank;
+ uint32_t reg;
+ unsigned long lock_flags;
+
+ raw_spin_lock_irqsave(&bank_lock, lock_flags);
+
+ bank = map->bank;
+ if (!bank) {
+ raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+ pr_err("IPU: %s(%u) - unmapped!\n", __func__, d->irq);
+ return;
+ }
+
+ reg = ipu_read_reg(bank->ipu, bank->control);
+ reg |= (1UL << (map->source & 31));
+ ipu_write_reg(bank->ipu, reg, bank->control);
+
+ raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+}
+
+static void ipu_irq_mask(struct irq_data *d)
+{
+ struct ipu_irq_map *map = irq_data_get_irq_chip_data(d);
+ struct ipu_irq_bank *bank;
+ uint32_t reg;
+ unsigned long lock_flags;
+
+ raw_spin_lock_irqsave(&bank_lock, lock_flags);
+
+ bank = map->bank;
+ if (!bank) {
+ raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+ pr_err("IPU: %s(%u) - unmapped!\n", __func__, d->irq);
+ return;
+ }
+
+ reg = ipu_read_reg(bank->ipu, bank->control);
+ reg &= ~(1UL << (map->source & 31));
+ ipu_write_reg(bank->ipu, reg, bank->control);
+
+ raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+}
+
+static void ipu_irq_ack(struct irq_data *d)
+{
+ struct ipu_irq_map *map = irq_data_get_irq_chip_data(d);
+ struct ipu_irq_bank *bank;
+ unsigned long lock_flags;
+
+ raw_spin_lock_irqsave(&bank_lock, lock_flags);
+
+ bank = map->bank;
+ if (!bank) {
+ raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+ pr_err("IPU: %s(%u) - unmapped!\n", __func__, d->irq);
+ return;
+ }
+
+ ipu_write_reg(bank->ipu, 1UL << (map->source & 31), bank->status);
+ raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+}
+
+/**
+ * ipu_irq_status() - returns the current interrupt status of the specified IRQ.
+ * @irq: interrupt line to get status for.
+ * @return: true if the interrupt is pending/asserted or false if the
+ * interrupt is not pending.
+ */
+bool ipu_irq_status(unsigned int irq)
+{
+ struct ipu_irq_map *map = irq_get_chip_data(irq);
+ struct ipu_irq_bank *bank;
+ unsigned long lock_flags;
+ bool ret;
+
+ raw_spin_lock_irqsave(&bank_lock, lock_flags);
+ bank = map->bank;
+ ret = bank && ipu_read_reg(bank->ipu, bank->status) &
+ (1UL << (map->source & 31));
+ raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+
+ return ret;
+}
+
+/**
+ * ipu_irq_map() - map an IPU interrupt source to an IRQ number
+ * @source: interrupt source bit position (see below)
+ * @return: mapped IRQ number or negative error code
+ *
+ * The source parameter has to be explained further. On i.MX31 IPU has 137 IRQ
+ * sources, they are broken down in 5 32-bit registers, like 32, 32, 24, 32, 17.
+ * However, the source argument of this function is not the sequence number of
+ * the possible IRQ, but rather its bit position. So, first interrupt in fourth
+ * register has source number 96, and not 88. This makes calculations easier,
+ * and also provides forward compatibility with any future IPU implementations
+ * with any interrupt bit assignments.
+ */
+int ipu_irq_map(unsigned int source)
+{
+ int i, ret = -ENOMEM;
+ struct ipu_irq_map *map;
+
+ might_sleep();
+
+ mutex_lock(&map_lock);
+ map = src2map(source);
+ if (map) {
+ pr_err("IPU: Source %u already mapped to IRQ %u\n", source, map->irq);
+ ret = -EBUSY;
+ goto out;
+ }
+
+ for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++) {
+ if (irq_map[i].source < 0) {
+ unsigned long lock_flags;
+
+ raw_spin_lock_irqsave(&bank_lock, lock_flags);
+ irq_map[i].source = source;
+ irq_map[i].bank = irq_bank + source / 32;
+ raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+
+ ret = irq_map[i].irq;
+ pr_debug("IPU: mapped source %u to IRQ %u\n",
+ source, ret);
+ break;
+ }
+ }
+out:
+ mutex_unlock(&map_lock);
+
+ if (ret < 0)
+ pr_err("IPU: couldn't map source %u: %d\n", source, ret);
+
+ return ret;
+}
+
+/**
+ * ipu_irq_unmap() - unmap an IPU interrupt source
+ * @source: interrupt source bit position (see ipu_irq_map())
+ * @return: 0 or negative error code
+ */
+int ipu_irq_unmap(unsigned int source)
+{
+ int i, ret = -EINVAL;
+
+ might_sleep();
+
+ mutex_lock(&map_lock);
+ for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++) {
+ if (irq_map[i].source == source) {
+ unsigned long lock_flags;
+
+ pr_debug("IPU: unmapped source %u from IRQ %u\n",
+ source, irq_map[i].irq);
+
+ raw_spin_lock_irqsave(&bank_lock, lock_flags);
+ irq_map[i].source = -EINVAL;
+ irq_map[i].bank = NULL;
+ raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+
+ ret = 0;
+ break;
+ }
+ }
+ mutex_unlock(&map_lock);
+
+ return ret;
+}
+
+/* Chained IRQ handler for IPU function and error interrupt */
+static void ipu_irq_handler(struct irq_desc *desc)
+{
+ struct ipu *ipu = irq_desc_get_handler_data(desc);
+ u32 status;
+ int i, line;
+
+ for (i = 0; i < IPU_IRQ_NR_BANKS; i++) {
+ struct ipu_irq_bank *bank = irq_bank + i;
+
+ raw_spin_lock(&bank_lock);
+ status = ipu_read_reg(ipu, bank->status);
+ /*
+ * Don't think we have to clear all interrupts here, they will
+ * be acked by ->handle_irq() (handle_level_irq). However, we
+ * might want to clear unhandled interrupts after the loop...
+ */
+ status &= ipu_read_reg(ipu, bank->control);
+ raw_spin_unlock(&bank_lock);
+ while ((line = ffs(status))) {
+ struct ipu_irq_map *map;
+ unsigned int irq;
+
+ line--;
+ status &= ~(1UL << line);
+
+ raw_spin_lock(&bank_lock);
+ map = src2map(32 * i + line);
+ if (!map) {
+ raw_spin_unlock(&bank_lock);
+ pr_err("IPU: Interrupt on unmapped source %u bank %d\n",
+ line, i);
+ continue;
+ }
+ irq = map->irq;
+ raw_spin_unlock(&bank_lock);
+ generic_handle_irq(irq);
+ }
+ }
+}
+
+static struct irq_chip ipu_irq_chip = {
+ .name = "ipu_irq",
+ .irq_ack = ipu_irq_ack,
+ .irq_mask = ipu_irq_mask,
+ .irq_unmask = ipu_irq_unmask,
+};
+
+/* Install the IRQ handler */
+int __init ipu_irq_attach_irq(struct ipu *ipu, struct platform_device *dev)
+{
+ unsigned int irq, i;
+ int irq_base = irq_alloc_descs(-1, 0, CONFIG_MX3_IPU_IRQS,
+ numa_node_id());
+
+ if (irq_base < 0)
+ return irq_base;
+
+ for (i = 0; i < IPU_IRQ_NR_BANKS; i++)
+ irq_bank[i].ipu = ipu;
+
+ for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++) {
+ int ret;
+
+ irq = irq_base + i;
+ ret = irq_set_chip(irq, &ipu_irq_chip);
+ if (ret < 0)
+ return ret;
+ ret = irq_set_chip_data(irq, irq_map + i);
+ if (ret < 0)
+ return ret;
+ irq_map[i].ipu = ipu;
+ irq_map[i].irq = irq;
+ irq_map[i].source = -EINVAL;
+ irq_set_handler(irq, handle_level_irq);
+ irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE);
+ }
+
+ irq_set_chained_handler_and_data(ipu->irq_fn, ipu_irq_handler, ipu);
+
+ irq_set_chained_handler_and_data(ipu->irq_err, ipu_irq_handler, ipu);
+
+ ipu->irq_base = irq_base;
+
+ return 0;
+}
+
+void ipu_irq_detach_irq(struct ipu *ipu, struct platform_device *dev)
+{
+ unsigned int irq, irq_base;
+
+ irq_base = ipu->irq_base;
+
+ irq_set_chained_handler_and_data(ipu->irq_fn, NULL, NULL);
+
+ irq_set_chained_handler_and_data(ipu->irq_err, NULL, NULL);
+
+ for (irq = irq_base; irq < irq_base + CONFIG_MX3_IPU_IRQS; irq++) {
+ irq_set_status_flags(irq, IRQ_NOREQUEST);
+ irq_set_chip(irq, NULL);
+ irq_set_chip_data(irq, NULL);
+ }
+}
diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
new file mode 100644
index 000000000..ecdaada95
--- /dev/null
+++ b/drivers/dma/k3dma.c
@@ -0,0 +1,1044 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2013 - 2015 Linaro Ltd.
+ * Copyright (c) 2013 HiSilicon Limited.
+ */
+#include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/of_dma.h>
+
+#include "virt-dma.h"
+
+#define DRIVER_NAME "k3-dma"
+#define DMA_MAX_SIZE 0x1ffc
+#define DMA_CYCLIC_MAX_PERIOD 0x1000
+#define LLI_BLOCK_SIZE (4 * PAGE_SIZE)
+
+#define INT_STAT 0x00
+#define INT_TC1 0x04
+#define INT_TC2 0x08
+#define INT_ERR1 0x0c
+#define INT_ERR2 0x10
+#define INT_TC1_MASK 0x18
+#define INT_TC2_MASK 0x1c
+#define INT_ERR1_MASK 0x20
+#define INT_ERR2_MASK 0x24
+#define INT_TC1_RAW 0x600
+#define INT_TC2_RAW 0x608
+#define INT_ERR1_RAW 0x610
+#define INT_ERR2_RAW 0x618
+#define CH_PRI 0x688
+#define CH_STAT 0x690
+#define CX_CUR_CNT 0x704
+#define CX_LLI 0x800
+#define CX_CNT1 0x80c
+#define CX_CNT0 0x810
+#define CX_SRC 0x814
+#define CX_DST 0x818
+#define CX_CFG 0x81c
+
+#define CX_LLI_CHAIN_EN 0x2
+#define CX_CFG_EN 0x1
+#define CX_CFG_NODEIRQ BIT(1)
+#define CX_CFG_MEM2PER (0x1 << 2)
+#define CX_CFG_PER2MEM (0x2 << 2)
+#define CX_CFG_SRCINCR (0x1 << 31)
+#define CX_CFG_DSTINCR (0x1 << 30)
+
+struct k3_desc_hw {
+ u32 lli;
+ u32 reserved[3];
+ u32 count;
+ u32 saddr;
+ u32 daddr;
+ u32 config;
+} __aligned(32);
+
+struct k3_dma_desc_sw {
+ struct virt_dma_desc vd;
+ dma_addr_t desc_hw_lli;
+ size_t desc_num;
+ size_t size;
+ struct k3_desc_hw *desc_hw;
+};
+
+struct k3_dma_phy;
+
+struct k3_dma_chan {
+ u32 ccfg;
+ struct virt_dma_chan vc;
+ struct k3_dma_phy *phy;
+ struct list_head node;
+ dma_addr_t dev_addr;
+ enum dma_status status;
+ bool cyclic;
+ struct dma_slave_config slave_config;
+};
+
+struct k3_dma_phy {
+ u32 idx;
+ void __iomem *base;
+ struct k3_dma_chan *vchan;
+ struct k3_dma_desc_sw *ds_run;
+ struct k3_dma_desc_sw *ds_done;
+};
+
+struct k3_dma_dev {
+ struct dma_device slave;
+ void __iomem *base;
+ struct tasklet_struct task;
+ spinlock_t lock;
+ struct list_head chan_pending;
+ struct k3_dma_phy *phy;
+ struct k3_dma_chan *chans;
+ struct clk *clk;
+ struct dma_pool *pool;
+ u32 dma_channels;
+ u32 dma_requests;
+ u32 dma_channel_mask;
+ unsigned int irq;
+};
+
+
+#define K3_FLAG_NOCLK BIT(1)
+
+struct k3dma_soc_data {
+ unsigned long flags;
+};
+
+
+#define to_k3_dma(dmadev) container_of(dmadev, struct k3_dma_dev, slave)
+
+static int k3_dma_config_write(struct dma_chan *chan,
+ enum dma_transfer_direction dir,
+ struct dma_slave_config *cfg);
+
+static struct k3_dma_chan *to_k3_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct k3_dma_chan, vc.chan);
+}
+
+static void k3_dma_pause_dma(struct k3_dma_phy *phy, bool on)
+{
+ u32 val = 0;
+
+ if (on) {
+ val = readl_relaxed(phy->base + CX_CFG);
+ val |= CX_CFG_EN;
+ writel_relaxed(val, phy->base + CX_CFG);
+ } else {
+ val = readl_relaxed(phy->base + CX_CFG);
+ val &= ~CX_CFG_EN;
+ writel_relaxed(val, phy->base + CX_CFG);
+ }
+}
+
+static void k3_dma_terminate_chan(struct k3_dma_phy *phy, struct k3_dma_dev *d)
+{
+ u32 val = 0;
+
+ k3_dma_pause_dma(phy, false);
+
+ val = 0x1 << phy->idx;
+ writel_relaxed(val, d->base + INT_TC1_RAW);
+ writel_relaxed(val, d->base + INT_TC2_RAW);
+ writel_relaxed(val, d->base + INT_ERR1_RAW);
+ writel_relaxed(val, d->base + INT_ERR2_RAW);
+}
+
+static void k3_dma_set_desc(struct k3_dma_phy *phy, struct k3_desc_hw *hw)
+{
+ writel_relaxed(hw->lli, phy->base + CX_LLI);
+ writel_relaxed(hw->count, phy->base + CX_CNT0);
+ writel_relaxed(hw->saddr, phy->base + CX_SRC);
+ writel_relaxed(hw->daddr, phy->base + CX_DST);
+ writel_relaxed(hw->config, phy->base + CX_CFG);
+}
+
+static u32 k3_dma_get_curr_cnt(struct k3_dma_dev *d, struct k3_dma_phy *phy)
+{
+ u32 cnt = 0;
+
+ cnt = readl_relaxed(d->base + CX_CUR_CNT + phy->idx * 0x10);
+ cnt &= 0xffff;
+ return cnt;
+}
+
+static u32 k3_dma_get_curr_lli(struct k3_dma_phy *phy)
+{
+ return readl_relaxed(phy->base + CX_LLI);
+}
+
+static u32 k3_dma_get_chan_stat(struct k3_dma_dev *d)
+{
+ return readl_relaxed(d->base + CH_STAT);
+}
+
+static void k3_dma_enable_dma(struct k3_dma_dev *d, bool on)
+{
+ if (on) {
+ /* set same priority */
+ writel_relaxed(0x0, d->base + CH_PRI);
+
+ /* unmask irq */
+ writel_relaxed(0xffff, d->base + INT_TC1_MASK);
+ writel_relaxed(0xffff, d->base + INT_TC2_MASK);
+ writel_relaxed(0xffff, d->base + INT_ERR1_MASK);
+ writel_relaxed(0xffff, d->base + INT_ERR2_MASK);
+ } else {
+ /* mask irq */
+ writel_relaxed(0x0, d->base + INT_TC1_MASK);
+ writel_relaxed(0x0, d->base + INT_TC2_MASK);
+ writel_relaxed(0x0, d->base + INT_ERR1_MASK);
+ writel_relaxed(0x0, d->base + INT_ERR2_MASK);
+ }
+}
+
+static irqreturn_t k3_dma_int_handler(int irq, void *dev_id)
+{
+ struct k3_dma_dev *d = (struct k3_dma_dev *)dev_id;
+ struct k3_dma_phy *p;
+ struct k3_dma_chan *c;
+ u32 stat = readl_relaxed(d->base + INT_STAT);
+ u32 tc1 = readl_relaxed(d->base + INT_TC1);
+ u32 tc2 = readl_relaxed(d->base + INT_TC2);
+ u32 err1 = readl_relaxed(d->base + INT_ERR1);
+ u32 err2 = readl_relaxed(d->base + INT_ERR2);
+ u32 i, irq_chan = 0;
+
+ while (stat) {
+ i = __ffs(stat);
+ stat &= ~BIT(i);
+ if (likely(tc1 & BIT(i)) || (tc2 & BIT(i))) {
+
+ p = &d->phy[i];
+ c = p->vchan;
+ if (c && (tc1 & BIT(i))) {
+ spin_lock(&c->vc.lock);
+ if (p->ds_run != NULL) {
+ vchan_cookie_complete(&p->ds_run->vd);
+ p->ds_done = p->ds_run;
+ p->ds_run = NULL;
+ }
+ spin_unlock(&c->vc.lock);
+ }
+ if (c && (tc2 & BIT(i))) {
+ spin_lock(&c->vc.lock);
+ if (p->ds_run != NULL)
+ vchan_cyclic_callback(&p->ds_run->vd);
+ spin_unlock(&c->vc.lock);
+ }
+ irq_chan |= BIT(i);
+ }
+ if (unlikely((err1 & BIT(i)) || (err2 & BIT(i))))
+ dev_warn(d->slave.dev, "DMA ERR\n");
+ }
+
+ writel_relaxed(irq_chan, d->base + INT_TC1_RAW);
+ writel_relaxed(irq_chan, d->base + INT_TC2_RAW);
+ writel_relaxed(err1, d->base + INT_ERR1_RAW);
+ writel_relaxed(err2, d->base + INT_ERR2_RAW);
+
+ if (irq_chan)
+ tasklet_schedule(&d->task);
+
+ if (irq_chan || err1 || err2)
+ return IRQ_HANDLED;
+
+ return IRQ_NONE;
+}
+
+static int k3_dma_start_txd(struct k3_dma_chan *c)
+{
+ struct k3_dma_dev *d = to_k3_dma(c->vc.chan.device);
+ struct virt_dma_desc *vd = vchan_next_desc(&c->vc);
+
+ if (!c->phy)
+ return -EAGAIN;
+
+ if (BIT(c->phy->idx) & k3_dma_get_chan_stat(d))
+ return -EAGAIN;
+
+ /* Avoid losing track of ds_run if a transaction is in flight */
+ if (c->phy->ds_run)
+ return -EAGAIN;
+
+ if (vd) {
+ struct k3_dma_desc_sw *ds =
+ container_of(vd, struct k3_dma_desc_sw, vd);
+ /*
+ * fetch and remove request from vc->desc_issued
+ * so vc->desc_issued only contains desc pending
+ */
+ list_del(&ds->vd.node);
+
+ c->phy->ds_run = ds;
+ c->phy->ds_done = NULL;
+ /* start dma */
+ k3_dma_set_desc(c->phy, &ds->desc_hw[0]);
+ return 0;
+ }
+ c->phy->ds_run = NULL;
+ c->phy->ds_done = NULL;
+ return -EAGAIN;
+}
+
+static void k3_dma_tasklet(struct tasklet_struct *t)
+{
+ struct k3_dma_dev *d = from_tasklet(d, t, task);
+ struct k3_dma_phy *p;
+ struct k3_dma_chan *c, *cn;
+ unsigned pch, pch_alloc = 0;
+
+ /* check new dma request of running channel in vc->desc_issued */
+ list_for_each_entry_safe(c, cn, &d->slave.channels, vc.chan.device_node) {
+ spin_lock_irq(&c->vc.lock);
+ p = c->phy;
+ if (p && p->ds_done) {
+ if (k3_dma_start_txd(c)) {
+ /* No current txd associated with this channel */
+ dev_dbg(d->slave.dev, "pchan %u: free\n", p->idx);
+ /* Mark this channel free */
+ c->phy = NULL;
+ p->vchan = NULL;
+ }
+ }
+ spin_unlock_irq(&c->vc.lock);
+ }
+
+ /* check new channel request in d->chan_pending */
+ spin_lock_irq(&d->lock);
+ for (pch = 0; pch < d->dma_channels; pch++) {
+ if (!(d->dma_channel_mask & (1 << pch)))
+ continue;
+
+ p = &d->phy[pch];
+
+ if (p->vchan == NULL && !list_empty(&d->chan_pending)) {
+ c = list_first_entry(&d->chan_pending,
+ struct k3_dma_chan, node);
+ /* remove from d->chan_pending */
+ list_del_init(&c->node);
+ pch_alloc |= 1 << pch;
+ /* Mark this channel allocated */
+ p->vchan = c;
+ c->phy = p;
+ dev_dbg(d->slave.dev, "pchan %u: alloc vchan %p\n", pch, &c->vc);
+ }
+ }
+ spin_unlock_irq(&d->lock);
+
+ for (pch = 0; pch < d->dma_channels; pch++) {
+ if (!(d->dma_channel_mask & (1 << pch)))
+ continue;
+
+ if (pch_alloc & (1 << pch)) {
+ p = &d->phy[pch];
+ c = p->vchan;
+ if (c) {
+ spin_lock_irq(&c->vc.lock);
+ k3_dma_start_txd(c);
+ spin_unlock_irq(&c->vc.lock);
+ }
+ }
+ }
+}
+
+static void k3_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct k3_dma_chan *c = to_k3_chan(chan);
+ struct k3_dma_dev *d = to_k3_dma(chan->device);
+ unsigned long flags;
+
+ spin_lock_irqsave(&d->lock, flags);
+ list_del_init(&c->node);
+ spin_unlock_irqrestore(&d->lock, flags);
+
+ vchan_free_chan_resources(&c->vc);
+ c->ccfg = 0;
+}
+
+static enum dma_status k3_dma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *state)
+{
+ struct k3_dma_chan *c = to_k3_chan(chan);
+ struct k3_dma_dev *d = to_k3_dma(chan->device);
+ struct k3_dma_phy *p;
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+ enum dma_status ret;
+ size_t bytes = 0;
+
+ ret = dma_cookie_status(&c->vc.chan, cookie, state);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+ p = c->phy;
+ ret = c->status;
+
+ /*
+ * If the cookie is on our issue queue, then the residue is
+ * its total size.
+ */
+ vd = vchan_find_desc(&c->vc, cookie);
+ if (vd && !c->cyclic) {
+ bytes = container_of(vd, struct k3_dma_desc_sw, vd)->size;
+ } else if ((!p) || (!p->ds_run)) {
+ bytes = 0;
+ } else {
+ struct k3_dma_desc_sw *ds = p->ds_run;
+ u32 clli = 0, index = 0;
+
+ bytes = k3_dma_get_curr_cnt(d, p);
+ clli = k3_dma_get_curr_lli(p);
+ index = ((clli - ds->desc_hw_lli) /
+ sizeof(struct k3_desc_hw)) + 1;
+ for (; index < ds->desc_num; index++) {
+ bytes += ds->desc_hw[index].count;
+ /* end of lli */
+ if (!ds->desc_hw[index].lli)
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+ dma_set_residue(state, bytes);
+ return ret;
+}
+
+static void k3_dma_issue_pending(struct dma_chan *chan)
+{
+ struct k3_dma_chan *c = to_k3_chan(chan);
+ struct k3_dma_dev *d = to_k3_dma(chan->device);
+ unsigned long flags;
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+ /* add request to vc->desc_issued */
+ if (vchan_issue_pending(&c->vc)) {
+ spin_lock(&d->lock);
+ if (!c->phy) {
+ if (list_empty(&c->node)) {
+ /* if new channel, add chan_pending */
+ list_add_tail(&c->node, &d->chan_pending);
+ /* check in tasklet */
+ tasklet_schedule(&d->task);
+ dev_dbg(d->slave.dev, "vchan %p: issued\n", &c->vc);
+ }
+ }
+ spin_unlock(&d->lock);
+ } else
+ dev_dbg(d->slave.dev, "vchan %p: nothing to issue\n", &c->vc);
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+}
+
+static void k3_dma_fill_desc(struct k3_dma_desc_sw *ds, dma_addr_t dst,
+ dma_addr_t src, size_t len, u32 num, u32 ccfg)
+{
+ if (num != ds->desc_num - 1)
+ ds->desc_hw[num].lli = ds->desc_hw_lli + (num + 1) *
+ sizeof(struct k3_desc_hw);
+
+ ds->desc_hw[num].lli |= CX_LLI_CHAIN_EN;
+ ds->desc_hw[num].count = len;
+ ds->desc_hw[num].saddr = src;
+ ds->desc_hw[num].daddr = dst;
+ ds->desc_hw[num].config = ccfg;
+}
+
+static struct k3_dma_desc_sw *k3_dma_alloc_desc_resource(int num,
+ struct dma_chan *chan)
+{
+ struct k3_dma_chan *c = to_k3_chan(chan);
+ struct k3_dma_desc_sw *ds;
+ struct k3_dma_dev *d = to_k3_dma(chan->device);
+ int lli_limit = LLI_BLOCK_SIZE / sizeof(struct k3_desc_hw);
+
+ if (num > lli_limit) {
+ dev_dbg(chan->device->dev, "vch %p: sg num %d exceed max %d\n",
+ &c->vc, num, lli_limit);
+ return NULL;
+ }
+
+ ds = kzalloc(sizeof(*ds), GFP_NOWAIT);
+ if (!ds)
+ return NULL;
+
+ ds->desc_hw = dma_pool_zalloc(d->pool, GFP_NOWAIT, &ds->desc_hw_lli);
+ if (!ds->desc_hw) {
+ dev_dbg(chan->device->dev, "vch %p: dma alloc fail\n", &c->vc);
+ kfree(ds);
+ return NULL;
+ }
+ ds->desc_num = num;
+ return ds;
+}
+
+static struct dma_async_tx_descriptor *k3_dma_prep_memcpy(
+ struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct k3_dma_chan *c = to_k3_chan(chan);
+ struct k3_dma_desc_sw *ds;
+ size_t copy = 0;
+ int num = 0;
+
+ if (!len)
+ return NULL;
+
+ num = DIV_ROUND_UP(len, DMA_MAX_SIZE);
+
+ ds = k3_dma_alloc_desc_resource(num, chan);
+ if (!ds)
+ return NULL;
+
+ c->cyclic = 0;
+ ds->size = len;
+ num = 0;
+
+ if (!c->ccfg) {
+ /* default is memtomem, without calling device_config */
+ c->ccfg = CX_CFG_SRCINCR | CX_CFG_DSTINCR | CX_CFG_EN;
+ c->ccfg |= (0xf << 20) | (0xf << 24); /* burst = 16 */
+ c->ccfg |= (0x3 << 12) | (0x3 << 16); /* width = 64 bit */
+ }
+
+ do {
+ copy = min_t(size_t, len, DMA_MAX_SIZE);
+ k3_dma_fill_desc(ds, dst, src, copy, num++, c->ccfg);
+
+ src += copy;
+ dst += copy;
+ len -= copy;
+ } while (len);
+
+ ds->desc_hw[num-1].lli = 0; /* end of link */
+ return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *k3_dma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl, unsigned int sglen,
+ enum dma_transfer_direction dir, unsigned long flags, void *context)
+{
+ struct k3_dma_chan *c = to_k3_chan(chan);
+ struct k3_dma_desc_sw *ds;
+ size_t len, avail, total = 0;
+ struct scatterlist *sg;
+ dma_addr_t addr, src = 0, dst = 0;
+ int num = sglen, i;
+
+ if (sgl == NULL)
+ return NULL;
+
+ c->cyclic = 0;
+
+ for_each_sg(sgl, sg, sglen, i) {
+ avail = sg_dma_len(sg);
+ if (avail > DMA_MAX_SIZE)
+ num += DIV_ROUND_UP(avail, DMA_MAX_SIZE) - 1;
+ }
+
+ ds = k3_dma_alloc_desc_resource(num, chan);
+ if (!ds)
+ return NULL;
+ num = 0;
+ k3_dma_config_write(chan, dir, &c->slave_config);
+
+ for_each_sg(sgl, sg, sglen, i) {
+ addr = sg_dma_address(sg);
+ avail = sg_dma_len(sg);
+ total += avail;
+
+ do {
+ len = min_t(size_t, avail, DMA_MAX_SIZE);
+
+ if (dir == DMA_MEM_TO_DEV) {
+ src = addr;
+ dst = c->dev_addr;
+ } else if (dir == DMA_DEV_TO_MEM) {
+ src = c->dev_addr;
+ dst = addr;
+ }
+
+ k3_dma_fill_desc(ds, dst, src, len, num++, c->ccfg);
+
+ addr += len;
+ avail -= len;
+ } while (avail);
+ }
+
+ ds->desc_hw[num-1].lli = 0; /* end of link */
+ ds->size = total;
+ return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+k3_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
+ size_t buf_len, size_t period_len,
+ enum dma_transfer_direction dir,
+ unsigned long flags)
+{
+ struct k3_dma_chan *c = to_k3_chan(chan);
+ struct k3_dma_desc_sw *ds;
+ size_t len, avail, total = 0;
+ dma_addr_t addr, src = 0, dst = 0;
+ int num = 1, since = 0;
+ size_t modulo = DMA_CYCLIC_MAX_PERIOD;
+ u32 en_tc2 = 0;
+
+ dev_dbg(chan->device->dev, "%s: buf %pad, dst %pad, buf len %zu, period_len = %zu, dir %d\n",
+ __func__, &buf_addr, &to_k3_chan(chan)->dev_addr,
+ buf_len, period_len, (int)dir);
+
+ avail = buf_len;
+ if (avail > modulo)
+ num += DIV_ROUND_UP(avail, modulo) - 1;
+
+ ds = k3_dma_alloc_desc_resource(num, chan);
+ if (!ds)
+ return NULL;
+
+ c->cyclic = 1;
+ addr = buf_addr;
+ avail = buf_len;
+ total = avail;
+ num = 0;
+ k3_dma_config_write(chan, dir, &c->slave_config);
+
+ if (period_len < modulo)
+ modulo = period_len;
+
+ do {
+ len = min_t(size_t, avail, modulo);
+
+ if (dir == DMA_MEM_TO_DEV) {
+ src = addr;
+ dst = c->dev_addr;
+ } else if (dir == DMA_DEV_TO_MEM) {
+ src = c->dev_addr;
+ dst = addr;
+ }
+ since += len;
+ if (since >= period_len) {
+ /* descriptor asks for TC2 interrupt on completion */
+ en_tc2 = CX_CFG_NODEIRQ;
+ since -= period_len;
+ } else
+ en_tc2 = 0;
+
+ k3_dma_fill_desc(ds, dst, src, len, num++, c->ccfg | en_tc2);
+
+ addr += len;
+ avail -= len;
+ } while (avail);
+
+ /* "Cyclic" == end of link points back to start of link */
+ ds->desc_hw[num - 1].lli |= ds->desc_hw_lli;
+
+ ds->size = total;
+
+ return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+static int k3_dma_config(struct dma_chan *chan,
+ struct dma_slave_config *cfg)
+{
+ struct k3_dma_chan *c = to_k3_chan(chan);
+
+ memcpy(&c->slave_config, cfg, sizeof(*cfg));
+
+ return 0;
+}
+
+static int k3_dma_config_write(struct dma_chan *chan,
+ enum dma_transfer_direction dir,
+ struct dma_slave_config *cfg)
+{
+ struct k3_dma_chan *c = to_k3_chan(chan);
+ u32 maxburst = 0, val = 0;
+ enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+
+ if (dir == DMA_DEV_TO_MEM) {
+ c->ccfg = CX_CFG_DSTINCR;
+ c->dev_addr = cfg->src_addr;
+ maxburst = cfg->src_maxburst;
+ width = cfg->src_addr_width;
+ } else if (dir == DMA_MEM_TO_DEV) {
+ c->ccfg = CX_CFG_SRCINCR;
+ c->dev_addr = cfg->dst_addr;
+ maxburst = cfg->dst_maxburst;
+ width = cfg->dst_addr_width;
+ }
+ switch (width) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ case DMA_SLAVE_BUSWIDTH_8_BYTES:
+ val = __ffs(width);
+ break;
+ default:
+ val = 3;
+ break;
+ }
+ c->ccfg |= (val << 12) | (val << 16);
+
+ if ((maxburst == 0) || (maxburst > 16))
+ val = 15;
+ else
+ val = maxburst - 1;
+ c->ccfg |= (val << 20) | (val << 24);
+ c->ccfg |= CX_CFG_MEM2PER | CX_CFG_EN;
+
+ /* specific request line */
+ c->ccfg |= c->vc.chan.chan_id << 4;
+
+ return 0;
+}
+
+static void k3_dma_free_desc(struct virt_dma_desc *vd)
+{
+ struct k3_dma_desc_sw *ds =
+ container_of(vd, struct k3_dma_desc_sw, vd);
+ struct k3_dma_dev *d = to_k3_dma(vd->tx.chan->device);
+
+ dma_pool_free(d->pool, ds->desc_hw, ds->desc_hw_lli);
+ kfree(ds);
+}
+
+static int k3_dma_terminate_all(struct dma_chan *chan)
+{
+ struct k3_dma_chan *c = to_k3_chan(chan);
+ struct k3_dma_dev *d = to_k3_dma(chan->device);
+ struct k3_dma_phy *p = c->phy;
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ dev_dbg(d->slave.dev, "vchan %p: terminate all\n", &c->vc);
+
+ /* Prevent this channel being scheduled */
+ spin_lock(&d->lock);
+ list_del_init(&c->node);
+ spin_unlock(&d->lock);
+
+ /* Clear the tx descriptor lists */
+ spin_lock_irqsave(&c->vc.lock, flags);
+ vchan_get_all_descriptors(&c->vc, &head);
+ if (p) {
+ /* vchan is assigned to a pchan - stop the channel */
+ k3_dma_terminate_chan(p, d);
+ c->phy = NULL;
+ p->vchan = NULL;
+ if (p->ds_run) {
+ vchan_terminate_vdesc(&p->ds_run->vd);
+ p->ds_run = NULL;
+ }
+ p->ds_done = NULL;
+ }
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+ vchan_dma_desc_free_list(&c->vc, &head);
+
+ return 0;
+}
+
+static void k3_dma_synchronize(struct dma_chan *chan)
+{
+ struct k3_dma_chan *c = to_k3_chan(chan);
+
+ vchan_synchronize(&c->vc);
+}
+
+static int k3_dma_transfer_pause(struct dma_chan *chan)
+{
+ struct k3_dma_chan *c = to_k3_chan(chan);
+ struct k3_dma_dev *d = to_k3_dma(chan->device);
+ struct k3_dma_phy *p = c->phy;
+
+ dev_dbg(d->slave.dev, "vchan %p: pause\n", &c->vc);
+ if (c->status == DMA_IN_PROGRESS) {
+ c->status = DMA_PAUSED;
+ if (p) {
+ k3_dma_pause_dma(p, false);
+ } else {
+ spin_lock(&d->lock);
+ list_del_init(&c->node);
+ spin_unlock(&d->lock);
+ }
+ }
+
+ return 0;
+}
+
+static int k3_dma_transfer_resume(struct dma_chan *chan)
+{
+ struct k3_dma_chan *c = to_k3_chan(chan);
+ struct k3_dma_dev *d = to_k3_dma(chan->device);
+ struct k3_dma_phy *p = c->phy;
+ unsigned long flags;
+
+ dev_dbg(d->slave.dev, "vchan %p: resume\n", &c->vc);
+ spin_lock_irqsave(&c->vc.lock, flags);
+ if (c->status == DMA_PAUSED) {
+ c->status = DMA_IN_PROGRESS;
+ if (p) {
+ k3_dma_pause_dma(p, true);
+ } else if (!list_empty(&c->vc.desc_issued)) {
+ spin_lock(&d->lock);
+ list_add_tail(&c->node, &d->chan_pending);
+ spin_unlock(&d->lock);
+ }
+ }
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+
+ return 0;
+}
+
+static const struct k3dma_soc_data k3_v1_dma_data = {
+ .flags = 0,
+};
+
+static const struct k3dma_soc_data asp_v1_dma_data = {
+ .flags = K3_FLAG_NOCLK,
+};
+
+static const struct of_device_id k3_pdma_dt_ids[] = {
+ { .compatible = "hisilicon,k3-dma-1.0",
+ .data = &k3_v1_dma_data
+ },
+ { .compatible = "hisilicon,hisi-pcm-asp-dma-1.0",
+ .data = &asp_v1_dma_data
+ },
+ {}
+};
+MODULE_DEVICE_TABLE(of, k3_pdma_dt_ids);
+
+static struct dma_chan *k3_of_dma_simple_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct k3_dma_dev *d = ofdma->of_dma_data;
+ unsigned int request = dma_spec->args[0];
+
+ if (request >= d->dma_requests)
+ return NULL;
+
+ return dma_get_slave_channel(&(d->chans[request].vc.chan));
+}
+
+static int k3_dma_probe(struct platform_device *op)
+{
+ const struct k3dma_soc_data *soc_data;
+ struct k3_dma_dev *d;
+ const struct of_device_id *of_id;
+ int i, ret, irq = 0;
+
+ d = devm_kzalloc(&op->dev, sizeof(*d), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+
+ soc_data = device_get_match_data(&op->dev);
+ if (!soc_data)
+ return -EINVAL;
+
+ d->base = devm_platform_ioremap_resource(op, 0);
+ if (IS_ERR(d->base))
+ return PTR_ERR(d->base);
+
+ of_id = of_match_device(k3_pdma_dt_ids, &op->dev);
+ if (of_id) {
+ of_property_read_u32((&op->dev)->of_node,
+ "dma-channels", &d->dma_channels);
+ of_property_read_u32((&op->dev)->of_node,
+ "dma-requests", &d->dma_requests);
+ ret = of_property_read_u32((&op->dev)->of_node,
+ "dma-channel-mask", &d->dma_channel_mask);
+ if (ret) {
+ dev_warn(&op->dev,
+ "dma-channel-mask doesn't exist, considering all as available.\n");
+ d->dma_channel_mask = (u32)~0UL;
+ }
+ }
+
+ if (!(soc_data->flags & K3_FLAG_NOCLK)) {
+ d->clk = devm_clk_get(&op->dev, NULL);
+ if (IS_ERR(d->clk)) {
+ dev_err(&op->dev, "no dma clk\n");
+ return PTR_ERR(d->clk);
+ }
+ }
+
+ irq = platform_get_irq(op, 0);
+ ret = devm_request_irq(&op->dev, irq,
+ k3_dma_int_handler, 0, DRIVER_NAME, d);
+ if (ret)
+ return ret;
+
+ d->irq = irq;
+
+ /* A DMA memory pool for LLIs, align on 32-byte boundary */
+ d->pool = dmam_pool_create(DRIVER_NAME, &op->dev,
+ LLI_BLOCK_SIZE, 32, 0);
+ if (!d->pool)
+ return -ENOMEM;
+
+ /* init phy channel */
+ d->phy = devm_kcalloc(&op->dev,
+ d->dma_channels, sizeof(struct k3_dma_phy), GFP_KERNEL);
+ if (d->phy == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < d->dma_channels; i++) {
+ struct k3_dma_phy *p;
+
+ if (!(d->dma_channel_mask & BIT(i)))
+ continue;
+
+ p = &d->phy[i];
+ p->idx = i;
+ p->base = d->base + i * 0x40;
+ }
+
+ INIT_LIST_HEAD(&d->slave.channels);
+ dma_cap_set(DMA_SLAVE, d->slave.cap_mask);
+ dma_cap_set(DMA_MEMCPY, d->slave.cap_mask);
+ dma_cap_set(DMA_CYCLIC, d->slave.cap_mask);
+ d->slave.dev = &op->dev;
+ d->slave.device_free_chan_resources = k3_dma_free_chan_resources;
+ d->slave.device_tx_status = k3_dma_tx_status;
+ d->slave.device_prep_dma_memcpy = k3_dma_prep_memcpy;
+ d->slave.device_prep_slave_sg = k3_dma_prep_slave_sg;
+ d->slave.device_prep_dma_cyclic = k3_dma_prep_dma_cyclic;
+ d->slave.device_issue_pending = k3_dma_issue_pending;
+ d->slave.device_config = k3_dma_config;
+ d->slave.device_pause = k3_dma_transfer_pause;
+ d->slave.device_resume = k3_dma_transfer_resume;
+ d->slave.device_terminate_all = k3_dma_terminate_all;
+ d->slave.device_synchronize = k3_dma_synchronize;
+ d->slave.copy_align = DMAENGINE_ALIGN_8_BYTES;
+
+ /* init virtual channel */
+ d->chans = devm_kcalloc(&op->dev,
+ d->dma_requests, sizeof(struct k3_dma_chan), GFP_KERNEL);
+ if (d->chans == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < d->dma_requests; i++) {
+ struct k3_dma_chan *c = &d->chans[i];
+
+ c->status = DMA_IN_PROGRESS;
+ INIT_LIST_HEAD(&c->node);
+ c->vc.desc_free = k3_dma_free_desc;
+ vchan_init(&c->vc, &d->slave);
+ }
+
+ /* Enable clock before accessing registers */
+ ret = clk_prepare_enable(d->clk);
+ if (ret < 0) {
+ dev_err(&op->dev, "clk_prepare_enable failed: %d\n", ret);
+ return ret;
+ }
+
+ k3_dma_enable_dma(d, true);
+
+ ret = dma_async_device_register(&d->slave);
+ if (ret)
+ goto dma_async_register_fail;
+
+ ret = of_dma_controller_register((&op->dev)->of_node,
+ k3_of_dma_simple_xlate, d);
+ if (ret)
+ goto of_dma_register_fail;
+
+ spin_lock_init(&d->lock);
+ INIT_LIST_HEAD(&d->chan_pending);
+ tasklet_setup(&d->task, k3_dma_tasklet);
+ platform_set_drvdata(op, d);
+ dev_info(&op->dev, "initialized\n");
+
+ return 0;
+
+of_dma_register_fail:
+ dma_async_device_unregister(&d->slave);
+dma_async_register_fail:
+ clk_disable_unprepare(d->clk);
+ return ret;
+}
+
+static int k3_dma_remove(struct platform_device *op)
+{
+ struct k3_dma_chan *c, *cn;
+ struct k3_dma_dev *d = platform_get_drvdata(op);
+
+ dma_async_device_unregister(&d->slave);
+ of_dma_controller_free((&op->dev)->of_node);
+
+ devm_free_irq(&op->dev, d->irq, d);
+
+ list_for_each_entry_safe(c, cn, &d->slave.channels, vc.chan.device_node) {
+ list_del(&c->vc.chan.device_node);
+ tasklet_kill(&c->vc.task);
+ }
+ tasklet_kill(&d->task);
+ clk_disable_unprepare(d->clk);
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int k3_dma_suspend_dev(struct device *dev)
+{
+ struct k3_dma_dev *d = dev_get_drvdata(dev);
+ u32 stat = 0;
+
+ stat = k3_dma_get_chan_stat(d);
+ if (stat) {
+ dev_warn(d->slave.dev,
+ "chan %d is running fail to suspend\n", stat);
+ return -1;
+ }
+ k3_dma_enable_dma(d, false);
+ clk_disable_unprepare(d->clk);
+ return 0;
+}
+
+static int k3_dma_resume_dev(struct device *dev)
+{
+ struct k3_dma_dev *d = dev_get_drvdata(dev);
+ int ret = 0;
+
+ ret = clk_prepare_enable(d->clk);
+ if (ret < 0) {
+ dev_err(d->slave.dev, "clk_prepare_enable failed: %d\n", ret);
+ return ret;
+ }
+ k3_dma_enable_dma(d, true);
+ return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(k3_dma_pmops, k3_dma_suspend_dev, k3_dma_resume_dev);
+
+static struct platform_driver k3_pdma_driver = {
+ .driver = {
+ .name = DRIVER_NAME,
+ .pm = &k3_dma_pmops,
+ .of_match_table = k3_pdma_dt_ids,
+ },
+ .probe = k3_dma_probe,
+ .remove = k3_dma_remove,
+};
+
+module_platform_driver(k3_pdma_driver);
+
+MODULE_DESCRIPTION("HiSilicon k3 DMA Driver");
+MODULE_ALIAS("platform:k3dma");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/lgm/Kconfig b/drivers/dma/lgm/Kconfig
new file mode 100644
index 000000000..9194330ed
--- /dev/null
+++ b/drivers/dma/lgm/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config INTEL_LDMA
+ bool "Lightning Mountain centralized DMA controllers"
+ depends on X86 || COMPILE_TEST
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for Intel Lightning Mountain SOC DMA controllers.
+ These controllers provide DMA capabilities for a variety of on-chip
+ devices such as HSNAND and GSWIP (Gigabit Switch IP).
diff --git a/drivers/dma/lgm/Makefile b/drivers/dma/lgm/Makefile
new file mode 100644
index 000000000..f318a8eff
--- /dev/null
+++ b/drivers/dma/lgm/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_INTEL_LDMA) += lgm-dma.o
diff --git a/drivers/dma/lgm/lgm-dma.c b/drivers/dma/lgm/lgm-dma.c
new file mode 100644
index 000000000..1709d159a
--- /dev/null
+++ b/drivers/dma/lgm/lgm-dma.c
@@ -0,0 +1,1740 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Lightning Mountain centralized DMA controller driver
+ *
+ * Copyright (c) 2016 - 2020 Intel Corporation.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/of_dma.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+#define DRIVER_NAME "lgm-dma"
+
+#define DMA_ID 0x0008
+#define DMA_ID_REV GENMASK(7, 0)
+#define DMA_ID_PNR GENMASK(19, 16)
+#define DMA_ID_CHNR GENMASK(26, 20)
+#define DMA_ID_DW_128B BIT(27)
+#define DMA_ID_AW_36B BIT(28)
+#define DMA_VER32 0x32
+#define DMA_VER31 0x31
+#define DMA_VER22 0x0A
+
+#define DMA_CTRL 0x0010
+#define DMA_CTRL_RST BIT(0)
+#define DMA_CTRL_DSRAM_PATH BIT(1)
+#define DMA_CTRL_DBURST_WR BIT(3)
+#define DMA_CTRL_VLD_DF_ACK BIT(4)
+#define DMA_CTRL_CH_FL BIT(6)
+#define DMA_CTRL_DS_FOD BIT(7)
+#define DMA_CTRL_DRB BIT(8)
+#define DMA_CTRL_ENBE BIT(9)
+#define DMA_CTRL_DESC_TMOUT_CNT_V31 GENMASK(27, 16)
+#define DMA_CTRL_DESC_TMOUT_EN_V31 BIT(30)
+#define DMA_CTRL_PKTARB BIT(31)
+
+#define DMA_CPOLL 0x0014
+#define DMA_CPOLL_CNT GENMASK(15, 4)
+#define DMA_CPOLL_EN BIT(31)
+
+#define DMA_CS 0x0018
+#define DMA_CS_MASK GENMASK(5, 0)
+
+#define DMA_CCTRL 0x001C
+#define DMA_CCTRL_ON BIT(0)
+#define DMA_CCTRL_RST BIT(1)
+#define DMA_CCTRL_CH_POLL_EN BIT(2)
+#define DMA_CCTRL_CH_ABC BIT(3) /* Adaptive Burst Chop */
+#define DMA_CDBA_MSB GENMASK(7, 4)
+#define DMA_CCTRL_DIR_TX BIT(8)
+#define DMA_CCTRL_CLASS GENMASK(11, 9)
+#define DMA_CCTRL_CLASSH GENMASK(19, 18)
+#define DMA_CCTRL_WR_NP_EN BIT(21)
+#define DMA_CCTRL_PDEN BIT(23)
+#define DMA_MAX_CLASS (SZ_32 - 1)
+
+#define DMA_CDBA 0x0020
+#define DMA_CDLEN 0x0024
+#define DMA_CIS 0x0028
+#define DMA_CIE 0x002C
+#define DMA_CI_EOP BIT(1)
+#define DMA_CI_DUR BIT(2)
+#define DMA_CI_DESCPT BIT(3)
+#define DMA_CI_CHOFF BIT(4)
+#define DMA_CI_RDERR BIT(5)
+#define DMA_CI_ALL \
+ (DMA_CI_EOP | DMA_CI_DUR | DMA_CI_DESCPT | DMA_CI_CHOFF | DMA_CI_RDERR)
+
+#define DMA_PS 0x0040
+#define DMA_PCTRL 0x0044
+#define DMA_PCTRL_RXBL16 BIT(0)
+#define DMA_PCTRL_TXBL16 BIT(1)
+#define DMA_PCTRL_RXBL GENMASK(3, 2)
+#define DMA_PCTRL_RXBL_8 3
+#define DMA_PCTRL_TXBL GENMASK(5, 4)
+#define DMA_PCTRL_TXBL_8 3
+#define DMA_PCTRL_PDEN BIT(6)
+#define DMA_PCTRL_RXBL32 BIT(7)
+#define DMA_PCTRL_RXENDI GENMASK(9, 8)
+#define DMA_PCTRL_TXENDI GENMASK(11, 10)
+#define DMA_PCTRL_TXBL32 BIT(15)
+#define DMA_PCTRL_MEM_FLUSH BIT(16)
+
+#define DMA_IRNEN1 0x00E8
+#define DMA_IRNCR1 0x00EC
+#define DMA_IRNEN 0x00F4
+#define DMA_IRNCR 0x00F8
+#define DMA_C_DP_TICK 0x100
+#define DMA_C_DP_TICK_TIKNARB GENMASK(15, 0)
+#define DMA_C_DP_TICK_TIKARB GENMASK(31, 16)
+
+#define DMA_C_HDRM 0x110
+/*
+ * If header mode is set in DMA descriptor,
+ * If bit 30 is disabled, HDR_LEN must be configured according to channel
+ * requirement.
+ * If bit 30 is enabled(checksum with heade mode), HDR_LEN has no need to
+ * be configured. It will enable check sum for switch
+ * If header mode is not set in DMA descriptor,
+ * This register setting doesn't matter
+ */
+#define DMA_C_HDRM_HDR_SUM BIT(30)
+
+#define DMA_C_BOFF 0x120
+#define DMA_C_BOFF_BOF_LEN GENMASK(7, 0)
+#define DMA_C_BOFF_EN BIT(31)
+
+#define DMA_ORRC 0x190
+#define DMA_ORRC_ORRCNT GENMASK(8, 4)
+#define DMA_ORRC_EN BIT(31)
+
+#define DMA_C_ENDIAN 0x200
+#define DMA_C_END_DATAENDI GENMASK(1, 0)
+#define DMA_C_END_DE_EN BIT(7)
+#define DMA_C_END_DESENDI GENMASK(9, 8)
+#define DMA_C_END_DES_EN BIT(16)
+
+/* DMA controller capability */
+#define DMA_ADDR_36BIT BIT(0)
+#define DMA_DATA_128BIT BIT(1)
+#define DMA_CHAN_FLOW_CTL BIT(2)
+#define DMA_DESC_FOD BIT(3)
+#define DMA_DESC_IN_SRAM BIT(4)
+#define DMA_EN_BYTE_EN BIT(5)
+#define DMA_DBURST_WR BIT(6)
+#define DMA_VALID_DESC_FETCH_ACK BIT(7)
+#define DMA_DFT_DRB BIT(8)
+
+#define DMA_ORRC_MAX_CNT (SZ_32 - 1)
+#define DMA_DFT_POLL_CNT SZ_4
+#define DMA_DFT_BURST_V22 SZ_2
+#define DMA_BURSTL_8DW SZ_8
+#define DMA_BURSTL_16DW SZ_16
+#define DMA_BURSTL_32DW SZ_32
+#define DMA_DFT_BURST DMA_BURSTL_16DW
+#define DMA_MAX_DESC_NUM (SZ_8K - 1)
+#define DMA_CHAN_BOFF_MAX (SZ_256 - 1)
+#define DMA_DFT_ENDIAN 0
+
+#define DMA_DFT_DESC_TCNT 50
+#define DMA_HDR_LEN_MAX (SZ_16K - 1)
+
+/* DMA flags */
+#define DMA_TX_CH BIT(0)
+#define DMA_RX_CH BIT(1)
+#define DEVICE_ALLOC_DESC BIT(2)
+#define CHAN_IN_USE BIT(3)
+#define DMA_HW_DESC BIT(4)
+
+/* Descriptor fields */
+#define DESC_DATA_LEN GENMASK(15, 0)
+#define DESC_BYTE_OFF GENMASK(25, 23)
+#define DESC_EOP BIT(28)
+#define DESC_SOP BIT(29)
+#define DESC_C BIT(30)
+#define DESC_OWN BIT(31)
+
+#define DMA_CHAN_RST 1
+#define DMA_MAX_SIZE (BIT(16) - 1)
+#define MAX_LOWER_CHANS 32
+#define MASK_LOWER_CHANS GENMASK(4, 0)
+#define DMA_OWN 1
+#define HIGH_4_BITS GENMASK(3, 0)
+#define DMA_DFT_DESC_NUM 1
+#define DMA_PKT_DROP_DIS 0
+
+enum ldma_chan_on_off {
+ DMA_CH_OFF = 0,
+ DMA_CH_ON = 1,
+};
+
+enum {
+ DMA_TYPE_TX = 0,
+ DMA_TYPE_RX,
+ DMA_TYPE_MCPY,
+};
+
+struct ldma_dev;
+struct ldma_port;
+
+struct ldma_chan {
+ struct virt_dma_chan vchan;
+ struct ldma_port *port; /* back pointer */
+ char name[8]; /* Channel name */
+ int nr; /* Channel id in hardware */
+ u32 flags; /* central way or channel based way */
+ enum ldma_chan_on_off onoff;
+ dma_addr_t desc_phys;
+ void *desc_base; /* Virtual address */
+ u32 desc_cnt; /* Number of descriptors */
+ int rst;
+ u32 hdrm_len;
+ bool hdrm_csum;
+ u32 boff_len;
+ u32 data_endian;
+ u32 desc_endian;
+ bool pden;
+ bool desc_rx_np;
+ bool data_endian_en;
+ bool desc_endian_en;
+ bool abc_en;
+ bool desc_init;
+ struct dma_pool *desc_pool; /* Descriptors pool */
+ u32 desc_num;
+ struct dw2_desc_sw *ds;
+ struct work_struct work;
+ struct dma_slave_config config;
+};
+
+struct ldma_port {
+ struct ldma_dev *ldev; /* back pointer */
+ u32 portid;
+ u32 rxbl;
+ u32 txbl;
+ u32 rxendi;
+ u32 txendi;
+ u32 pkt_drop;
+};
+
+/* Instance specific data */
+struct ldma_inst_data {
+ bool desc_in_sram;
+ bool chan_fc;
+ bool desc_fod; /* Fetch On Demand */
+ bool valid_desc_fetch_ack;
+ u32 orrc; /* Outstanding read count */
+ const char *name;
+ u32 type;
+};
+
+struct ldma_dev {
+ struct device *dev;
+ void __iomem *base;
+ struct reset_control *rst;
+ struct clk *core_clk;
+ struct dma_device dma_dev;
+ u32 ver;
+ int irq;
+ struct ldma_port *ports;
+ struct ldma_chan *chans; /* channel list on this DMA or port */
+ spinlock_t dev_lock; /* Controller register exclusive */
+ u32 chan_nrs;
+ u32 port_nrs;
+ u32 channels_mask;
+ u32 flags;
+ u32 pollcnt;
+ const struct ldma_inst_data *inst;
+ struct workqueue_struct *wq;
+};
+
+struct dw2_desc {
+ u32 field;
+ u32 addr;
+} __packed __aligned(8);
+
+struct dw2_desc_sw {
+ struct virt_dma_desc vdesc;
+ struct ldma_chan *chan;
+ dma_addr_t desc_phys;
+ size_t desc_cnt;
+ size_t size;
+ struct dw2_desc *desc_hw;
+};
+
+static inline void
+ldma_update_bits(struct ldma_dev *d, u32 mask, u32 val, u32 ofs)
+{
+ u32 old_val, new_val;
+
+ old_val = readl(d->base + ofs);
+ new_val = (old_val & ~mask) | (val & mask);
+
+ if (new_val != old_val)
+ writel(new_val, d->base + ofs);
+}
+
+static inline struct ldma_chan *to_ldma_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct ldma_chan, vchan.chan);
+}
+
+static inline struct ldma_dev *to_ldma_dev(struct dma_device *dma_dev)
+{
+ return container_of(dma_dev, struct ldma_dev, dma_dev);
+}
+
+static inline struct dw2_desc_sw *to_lgm_dma_desc(struct virt_dma_desc *vdesc)
+{
+ return container_of(vdesc, struct dw2_desc_sw, vdesc);
+}
+
+static inline bool ldma_chan_tx(struct ldma_chan *c)
+{
+ return !!(c->flags & DMA_TX_CH);
+}
+
+static inline bool ldma_chan_is_hw_desc(struct ldma_chan *c)
+{
+ return !!(c->flags & DMA_HW_DESC);
+}
+
+static void ldma_dev_reset(struct ldma_dev *d)
+
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, DMA_CTRL_RST, DMA_CTRL_RST, DMA_CTRL);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_pkt_arb_cfg(struct ldma_dev *d, bool enable)
+{
+ unsigned long flags;
+ u32 mask = DMA_CTRL_PKTARB;
+ u32 val = enable ? DMA_CTRL_PKTARB : 0;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, mask, val, DMA_CTRL);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_sram_desc_cfg(struct ldma_dev *d, bool enable)
+{
+ unsigned long flags;
+ u32 mask = DMA_CTRL_DSRAM_PATH;
+ u32 val = enable ? DMA_CTRL_DSRAM_PATH : 0;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, mask, val, DMA_CTRL);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_chan_flow_ctl_cfg(struct ldma_dev *d, bool enable)
+{
+ unsigned long flags;
+ u32 mask, val;
+
+ if (d->inst->type != DMA_TYPE_TX)
+ return;
+
+ mask = DMA_CTRL_CH_FL;
+ val = enable ? DMA_CTRL_CH_FL : 0;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, mask, val, DMA_CTRL);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_global_polling_enable(struct ldma_dev *d)
+{
+ unsigned long flags;
+ u32 mask = DMA_CPOLL_EN | DMA_CPOLL_CNT;
+ u32 val = DMA_CPOLL_EN;
+
+ val |= FIELD_PREP(DMA_CPOLL_CNT, d->pollcnt);
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, mask, val, DMA_CPOLL);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_desc_fetch_on_demand_cfg(struct ldma_dev *d, bool enable)
+{
+ unsigned long flags;
+ u32 mask, val;
+
+ if (d->inst->type == DMA_TYPE_MCPY)
+ return;
+
+ mask = DMA_CTRL_DS_FOD;
+ val = enable ? DMA_CTRL_DS_FOD : 0;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, mask, val, DMA_CTRL);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_byte_enable_cfg(struct ldma_dev *d, bool enable)
+{
+ unsigned long flags;
+ u32 mask = DMA_CTRL_ENBE;
+ u32 val = enable ? DMA_CTRL_ENBE : 0;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, mask, val, DMA_CTRL);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_orrc_cfg(struct ldma_dev *d)
+{
+ unsigned long flags;
+ u32 val = 0;
+ u32 mask;
+
+ if (d->inst->type == DMA_TYPE_RX)
+ return;
+
+ mask = DMA_ORRC_EN | DMA_ORRC_ORRCNT;
+ if (d->inst->orrc > 0 && d->inst->orrc <= DMA_ORRC_MAX_CNT)
+ val = DMA_ORRC_EN | FIELD_PREP(DMA_ORRC_ORRCNT, d->inst->orrc);
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, mask, val, DMA_ORRC);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_df_tout_cfg(struct ldma_dev *d, bool enable, int tcnt)
+{
+ u32 mask = DMA_CTRL_DESC_TMOUT_CNT_V31;
+ unsigned long flags;
+ u32 val;
+
+ if (enable)
+ val = DMA_CTRL_DESC_TMOUT_EN_V31 | FIELD_PREP(DMA_CTRL_DESC_TMOUT_CNT_V31, tcnt);
+ else
+ val = 0;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, mask, val, DMA_CTRL);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_dburst_wr_cfg(struct ldma_dev *d, bool enable)
+{
+ unsigned long flags;
+ u32 mask, val;
+
+ if (d->inst->type != DMA_TYPE_RX && d->inst->type != DMA_TYPE_MCPY)
+ return;
+
+ mask = DMA_CTRL_DBURST_WR;
+ val = enable ? DMA_CTRL_DBURST_WR : 0;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, mask, val, DMA_CTRL);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_vld_fetch_ack_cfg(struct ldma_dev *d, bool enable)
+{
+ unsigned long flags;
+ u32 mask, val;
+
+ if (d->inst->type != DMA_TYPE_TX)
+ return;
+
+ mask = DMA_CTRL_VLD_DF_ACK;
+ val = enable ? DMA_CTRL_VLD_DF_ACK : 0;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, mask, val, DMA_CTRL);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_drb_cfg(struct ldma_dev *d, int enable)
+{
+ unsigned long flags;
+ u32 mask = DMA_CTRL_DRB;
+ u32 val = enable ? DMA_CTRL_DRB : 0;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, mask, val, DMA_CTRL);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static int ldma_dev_cfg(struct ldma_dev *d)
+{
+ bool enable;
+
+ ldma_dev_pkt_arb_cfg(d, true);
+ ldma_dev_global_polling_enable(d);
+
+ enable = !!(d->flags & DMA_DFT_DRB);
+ ldma_dev_drb_cfg(d, enable);
+
+ enable = !!(d->flags & DMA_EN_BYTE_EN);
+ ldma_dev_byte_enable_cfg(d, enable);
+
+ enable = !!(d->flags & DMA_CHAN_FLOW_CTL);
+ ldma_dev_chan_flow_ctl_cfg(d, enable);
+
+ enable = !!(d->flags & DMA_DESC_FOD);
+ ldma_dev_desc_fetch_on_demand_cfg(d, enable);
+
+ enable = !!(d->flags & DMA_DESC_IN_SRAM);
+ ldma_dev_sram_desc_cfg(d, enable);
+
+ enable = !!(d->flags & DMA_DBURST_WR);
+ ldma_dev_dburst_wr_cfg(d, enable);
+
+ enable = !!(d->flags & DMA_VALID_DESC_FETCH_ACK);
+ ldma_dev_vld_fetch_ack_cfg(d, enable);
+
+ if (d->ver > DMA_VER22) {
+ ldma_dev_orrc_cfg(d);
+ ldma_dev_df_tout_cfg(d, true, DMA_DFT_DESC_TCNT);
+ }
+
+ dev_dbg(d->dev, "%s Controller 0x%08x configuration done\n",
+ d->inst->name, readl(d->base + DMA_CTRL));
+
+ return 0;
+}
+
+static int ldma_chan_cctrl_cfg(struct ldma_chan *c, u32 val)
+{
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ u32 class_low, class_high;
+ unsigned long flags;
+ u32 reg;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+ reg = readl(d->base + DMA_CCTRL);
+ /* Read from hardware */
+ if (reg & DMA_CCTRL_DIR_TX)
+ c->flags |= DMA_TX_CH;
+ else
+ c->flags |= DMA_RX_CH;
+
+ /* Keep the class value unchanged */
+ class_low = FIELD_GET(DMA_CCTRL_CLASS, reg);
+ class_high = FIELD_GET(DMA_CCTRL_CLASSH, reg);
+ val &= ~DMA_CCTRL_CLASS;
+ val |= FIELD_PREP(DMA_CCTRL_CLASS, class_low);
+ val &= ~DMA_CCTRL_CLASSH;
+ val |= FIELD_PREP(DMA_CCTRL_CLASSH, class_high);
+ writel(val, d->base + DMA_CCTRL);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+
+ return 0;
+}
+
+static void ldma_chan_irq_init(struct ldma_chan *c)
+{
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ unsigned long flags;
+ u32 enofs, crofs;
+ u32 cn_bit;
+
+ if (c->nr < MAX_LOWER_CHANS) {
+ enofs = DMA_IRNEN;
+ crofs = DMA_IRNCR;
+ } else {
+ enofs = DMA_IRNEN1;
+ crofs = DMA_IRNCR1;
+ }
+
+ cn_bit = BIT(c->nr & MASK_LOWER_CHANS);
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+
+ /* Clear all interrupts and disabled it */
+ writel(0, d->base + DMA_CIE);
+ writel(DMA_CI_ALL, d->base + DMA_CIS);
+
+ ldma_update_bits(d, cn_bit, 0, enofs);
+ writel(cn_bit, d->base + crofs);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_chan_set_class(struct ldma_chan *c, u32 val)
+{
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ u32 class_val;
+
+ if (d->inst->type == DMA_TYPE_MCPY || val > DMA_MAX_CLASS)
+ return;
+
+ /* 3 bits low */
+ class_val = FIELD_PREP(DMA_CCTRL_CLASS, val & 0x7);
+ /* 2 bits high */
+ class_val |= FIELD_PREP(DMA_CCTRL_CLASSH, (val >> 3) & 0x3);
+
+ ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+ ldma_update_bits(d, DMA_CCTRL_CLASS | DMA_CCTRL_CLASSH, class_val,
+ DMA_CCTRL);
+}
+
+static int ldma_chan_on(struct ldma_chan *c)
+{
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ unsigned long flags;
+
+ /* If descriptors not configured, not allow to turn on channel */
+ if (WARN_ON(!c->desc_init))
+ return -EINVAL;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+ ldma_update_bits(d, DMA_CCTRL_ON, DMA_CCTRL_ON, DMA_CCTRL);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+
+ c->onoff = DMA_CH_ON;
+
+ return 0;
+}
+
+static int ldma_chan_off(struct ldma_chan *c)
+{
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ unsigned long flags;
+ u32 val;
+ int ret;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+ ldma_update_bits(d, DMA_CCTRL_ON, 0, DMA_CCTRL);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+
+ ret = readl_poll_timeout_atomic(d->base + DMA_CCTRL, val,
+ !(val & DMA_CCTRL_ON), 0, 10000);
+ if (ret)
+ return ret;
+
+ c->onoff = DMA_CH_OFF;
+
+ return 0;
+}
+
+static void ldma_chan_desc_hw_cfg(struct ldma_chan *c, dma_addr_t desc_base,
+ int desc_num)
+{
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ unsigned long flags;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+ writel(lower_32_bits(desc_base), d->base + DMA_CDBA);
+
+ /* Higher 4 bits of 36 bit addressing */
+ if (IS_ENABLED(CONFIG_64BIT)) {
+ u32 hi = upper_32_bits(desc_base) & HIGH_4_BITS;
+
+ ldma_update_bits(d, DMA_CDBA_MSB,
+ FIELD_PREP(DMA_CDBA_MSB, hi), DMA_CCTRL);
+ }
+ writel(desc_num, d->base + DMA_CDLEN);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+
+ c->desc_init = true;
+}
+
+static struct dma_async_tx_descriptor *
+ldma_chan_desc_cfg(struct dma_chan *chan, dma_addr_t desc_base, int desc_num)
+{
+ struct ldma_chan *c = to_ldma_chan(chan);
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ struct dma_async_tx_descriptor *tx;
+ struct dw2_desc_sw *ds;
+
+ if (!desc_num) {
+ dev_err(d->dev, "Channel %d must allocate descriptor first\n",
+ c->nr);
+ return NULL;
+ }
+
+ if (desc_num > DMA_MAX_DESC_NUM) {
+ dev_err(d->dev, "Channel %d descriptor number out of range %d\n",
+ c->nr, desc_num);
+ return NULL;
+ }
+
+ ldma_chan_desc_hw_cfg(c, desc_base, desc_num);
+
+ c->flags |= DMA_HW_DESC;
+ c->desc_cnt = desc_num;
+ c->desc_phys = desc_base;
+
+ ds = kzalloc(sizeof(*ds), GFP_NOWAIT);
+ if (!ds)
+ return NULL;
+
+ tx = &ds->vdesc.tx;
+ dma_async_tx_descriptor_init(tx, chan);
+
+ return tx;
+}
+
+static int ldma_chan_reset(struct ldma_chan *c)
+{
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ unsigned long flags;
+ u32 val;
+ int ret;
+
+ ret = ldma_chan_off(c);
+ if (ret)
+ return ret;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+ ldma_update_bits(d, DMA_CCTRL_RST, DMA_CCTRL_RST, DMA_CCTRL);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+
+ ret = readl_poll_timeout_atomic(d->base + DMA_CCTRL, val,
+ !(val & DMA_CCTRL_RST), 0, 10000);
+ if (ret)
+ return ret;
+
+ c->rst = 1;
+ c->desc_init = false;
+
+ return 0;
+}
+
+static void ldma_chan_byte_offset_cfg(struct ldma_chan *c, u32 boff_len)
+{
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ u32 mask = DMA_C_BOFF_EN | DMA_C_BOFF_BOF_LEN;
+ u32 val;
+
+ if (boff_len > 0 && boff_len <= DMA_CHAN_BOFF_MAX)
+ val = FIELD_PREP(DMA_C_BOFF_BOF_LEN, boff_len) | DMA_C_BOFF_EN;
+ else
+ val = 0;
+
+ ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+ ldma_update_bits(d, mask, val, DMA_C_BOFF);
+}
+
+static void ldma_chan_data_endian_cfg(struct ldma_chan *c, bool enable,
+ u32 endian_type)
+{
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ u32 mask = DMA_C_END_DE_EN | DMA_C_END_DATAENDI;
+ u32 val;
+
+ if (enable)
+ val = DMA_C_END_DE_EN | FIELD_PREP(DMA_C_END_DATAENDI, endian_type);
+ else
+ val = 0;
+
+ ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+ ldma_update_bits(d, mask, val, DMA_C_ENDIAN);
+}
+
+static void ldma_chan_desc_endian_cfg(struct ldma_chan *c, bool enable,
+ u32 endian_type)
+{
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ u32 mask = DMA_C_END_DES_EN | DMA_C_END_DESENDI;
+ u32 val;
+
+ if (enable)
+ val = DMA_C_END_DES_EN | FIELD_PREP(DMA_C_END_DESENDI, endian_type);
+ else
+ val = 0;
+
+ ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+ ldma_update_bits(d, mask, val, DMA_C_ENDIAN);
+}
+
+static void ldma_chan_hdr_mode_cfg(struct ldma_chan *c, u32 hdr_len, bool csum)
+{
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ u32 mask, val;
+
+ /* NB, csum disabled, hdr length must be provided */
+ if (!csum && (!hdr_len || hdr_len > DMA_HDR_LEN_MAX))
+ return;
+
+ mask = DMA_C_HDRM_HDR_SUM;
+ val = DMA_C_HDRM_HDR_SUM;
+
+ if (!csum && hdr_len)
+ val = hdr_len;
+
+ ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+ ldma_update_bits(d, mask, val, DMA_C_HDRM);
+}
+
+static void ldma_chan_rxwr_np_cfg(struct ldma_chan *c, bool enable)
+{
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ u32 mask, val;
+
+ /* Only valid for RX channel */
+ if (ldma_chan_tx(c))
+ return;
+
+ mask = DMA_CCTRL_WR_NP_EN;
+ val = enable ? DMA_CCTRL_WR_NP_EN : 0;
+
+ ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+ ldma_update_bits(d, mask, val, DMA_CCTRL);
+}
+
+static void ldma_chan_abc_cfg(struct ldma_chan *c, bool enable)
+{
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ u32 mask, val;
+
+ if (d->ver < DMA_VER32 || ldma_chan_tx(c))
+ return;
+
+ mask = DMA_CCTRL_CH_ABC;
+ val = enable ? DMA_CCTRL_CH_ABC : 0;
+
+ ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+ ldma_update_bits(d, mask, val, DMA_CCTRL);
+}
+
+static int ldma_port_cfg(struct ldma_port *p)
+{
+ unsigned long flags;
+ struct ldma_dev *d;
+ u32 reg;
+
+ d = p->ldev;
+ reg = FIELD_PREP(DMA_PCTRL_TXENDI, p->txendi);
+ reg |= FIELD_PREP(DMA_PCTRL_RXENDI, p->rxendi);
+
+ if (d->ver == DMA_VER22) {
+ reg |= FIELD_PREP(DMA_PCTRL_TXBL, p->txbl);
+ reg |= FIELD_PREP(DMA_PCTRL_RXBL, p->rxbl);
+ } else {
+ reg |= FIELD_PREP(DMA_PCTRL_PDEN, p->pkt_drop);
+
+ if (p->txbl == DMA_BURSTL_32DW)
+ reg |= DMA_PCTRL_TXBL32;
+ else if (p->txbl == DMA_BURSTL_16DW)
+ reg |= DMA_PCTRL_TXBL16;
+ else
+ reg |= FIELD_PREP(DMA_PCTRL_TXBL, DMA_PCTRL_TXBL_8);
+
+ if (p->rxbl == DMA_BURSTL_32DW)
+ reg |= DMA_PCTRL_RXBL32;
+ else if (p->rxbl == DMA_BURSTL_16DW)
+ reg |= DMA_PCTRL_RXBL16;
+ else
+ reg |= FIELD_PREP(DMA_PCTRL_RXBL, DMA_PCTRL_RXBL_8);
+ }
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ writel(p->portid, d->base + DMA_PS);
+ writel(reg, d->base + DMA_PCTRL);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+
+ reg = readl(d->base + DMA_PCTRL); /* read back */
+ dev_dbg(d->dev, "Port Control 0x%08x configuration done\n", reg);
+
+ return 0;
+}
+
+static int ldma_chan_cfg(struct ldma_chan *c)
+{
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ unsigned long flags;
+ u32 reg;
+
+ reg = c->pden ? DMA_CCTRL_PDEN : 0;
+ reg |= c->onoff ? DMA_CCTRL_ON : 0;
+ reg |= c->rst ? DMA_CCTRL_RST : 0;
+
+ ldma_chan_cctrl_cfg(c, reg);
+ ldma_chan_irq_init(c);
+
+ if (d->ver <= DMA_VER22)
+ return 0;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ ldma_chan_set_class(c, c->nr);
+ ldma_chan_byte_offset_cfg(c, c->boff_len);
+ ldma_chan_data_endian_cfg(c, c->data_endian_en, c->data_endian);
+ ldma_chan_desc_endian_cfg(c, c->desc_endian_en, c->desc_endian);
+ ldma_chan_hdr_mode_cfg(c, c->hdrm_len, c->hdrm_csum);
+ ldma_chan_rxwr_np_cfg(c, c->desc_rx_np);
+ ldma_chan_abc_cfg(c, c->abc_en);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+
+ if (ldma_chan_is_hw_desc(c))
+ ldma_chan_desc_hw_cfg(c, c->desc_phys, c->desc_cnt);
+
+ return 0;
+}
+
+static void ldma_dev_init(struct ldma_dev *d)
+{
+ unsigned long ch_mask = (unsigned long)d->channels_mask;
+ struct ldma_port *p;
+ struct ldma_chan *c;
+ int i;
+ u32 j;
+
+ spin_lock_init(&d->dev_lock);
+ ldma_dev_reset(d);
+ ldma_dev_cfg(d);
+
+ /* DMA port initialization */
+ for (i = 0; i < d->port_nrs; i++) {
+ p = &d->ports[i];
+ ldma_port_cfg(p);
+ }
+
+ /* DMA channel initialization */
+ for_each_set_bit(j, &ch_mask, d->chan_nrs) {
+ c = &d->chans[j];
+ ldma_chan_cfg(c);
+ }
+}
+
+static int ldma_parse_dt(struct ldma_dev *d)
+{
+ struct fwnode_handle *fwnode = dev_fwnode(d->dev);
+ struct ldma_port *p;
+ int i;
+
+ if (fwnode_property_read_bool(fwnode, "intel,dma-byte-en"))
+ d->flags |= DMA_EN_BYTE_EN;
+
+ if (fwnode_property_read_bool(fwnode, "intel,dma-dburst-wr"))
+ d->flags |= DMA_DBURST_WR;
+
+ if (fwnode_property_read_bool(fwnode, "intel,dma-drb"))
+ d->flags |= DMA_DFT_DRB;
+
+ if (fwnode_property_read_u32(fwnode, "intel,dma-poll-cnt",
+ &d->pollcnt))
+ d->pollcnt = DMA_DFT_POLL_CNT;
+
+ if (d->inst->chan_fc)
+ d->flags |= DMA_CHAN_FLOW_CTL;
+
+ if (d->inst->desc_fod)
+ d->flags |= DMA_DESC_FOD;
+
+ if (d->inst->desc_in_sram)
+ d->flags |= DMA_DESC_IN_SRAM;
+
+ if (d->inst->valid_desc_fetch_ack)
+ d->flags |= DMA_VALID_DESC_FETCH_ACK;
+
+ if (d->ver > DMA_VER22) {
+ if (!d->port_nrs)
+ return -EINVAL;
+
+ for (i = 0; i < d->port_nrs; i++) {
+ p = &d->ports[i];
+ p->rxendi = DMA_DFT_ENDIAN;
+ p->txendi = DMA_DFT_ENDIAN;
+ p->rxbl = DMA_DFT_BURST;
+ p->txbl = DMA_DFT_BURST;
+ p->pkt_drop = DMA_PKT_DROP_DIS;
+ }
+ }
+
+ return 0;
+}
+
+static void dma_free_desc_resource(struct virt_dma_desc *vdesc)
+{
+ struct dw2_desc_sw *ds = to_lgm_dma_desc(vdesc);
+ struct ldma_chan *c = ds->chan;
+
+ dma_pool_free(c->desc_pool, ds->desc_hw, ds->desc_phys);
+ kfree(ds);
+}
+
+static struct dw2_desc_sw *
+dma_alloc_desc_resource(int num, struct ldma_chan *c)
+{
+ struct device *dev = c->vchan.chan.device->dev;
+ struct dw2_desc_sw *ds;
+
+ if (num > c->desc_num) {
+ dev_err(dev, "sg num %d exceed max %d\n", num, c->desc_num);
+ return NULL;
+ }
+
+ ds = kzalloc(sizeof(*ds), GFP_NOWAIT);
+ if (!ds)
+ return NULL;
+
+ ds->chan = c;
+ ds->desc_hw = dma_pool_zalloc(c->desc_pool, GFP_ATOMIC,
+ &ds->desc_phys);
+ if (!ds->desc_hw) {
+ dev_dbg(dev, "out of memory for link descriptor\n");
+ kfree(ds);
+ return NULL;
+ }
+ ds->desc_cnt = num;
+
+ return ds;
+}
+
+static void ldma_chan_irq_en(struct ldma_chan *c)
+{
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ unsigned long flags;
+
+ spin_lock_irqsave(&d->dev_lock, flags);
+ writel(c->nr, d->base + DMA_CS);
+ writel(DMA_CI_EOP, d->base + DMA_CIE);
+ writel(BIT(c->nr), d->base + DMA_IRNEN);
+ spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_issue_pending(struct dma_chan *chan)
+{
+ struct ldma_chan *c = to_ldma_chan(chan);
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ unsigned long flags;
+
+ if (d->ver == DMA_VER22) {
+ spin_lock_irqsave(&c->vchan.lock, flags);
+ if (vchan_issue_pending(&c->vchan)) {
+ struct virt_dma_desc *vdesc;
+
+ /* Get the next descriptor */
+ vdesc = vchan_next_desc(&c->vchan);
+ if (!vdesc) {
+ c->ds = NULL;
+ spin_unlock_irqrestore(&c->vchan.lock, flags);
+ return;
+ }
+ list_del(&vdesc->node);
+ c->ds = to_lgm_dma_desc(vdesc);
+ ldma_chan_desc_hw_cfg(c, c->ds->desc_phys, c->ds->desc_cnt);
+ ldma_chan_irq_en(c);
+ }
+ spin_unlock_irqrestore(&c->vchan.lock, flags);
+ }
+ ldma_chan_on(c);
+}
+
+static void ldma_synchronize(struct dma_chan *chan)
+{
+ struct ldma_chan *c = to_ldma_chan(chan);
+
+ /*
+ * clear any pending work if any. In that
+ * case the resource needs to be free here.
+ */
+ cancel_work_sync(&c->work);
+ vchan_synchronize(&c->vchan);
+ if (c->ds)
+ dma_free_desc_resource(&c->ds->vdesc);
+}
+
+static int ldma_terminate_all(struct dma_chan *chan)
+{
+ struct ldma_chan *c = to_ldma_chan(chan);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&c->vchan.lock, flags);
+ vchan_get_all_descriptors(&c->vchan, &head);
+ spin_unlock_irqrestore(&c->vchan.lock, flags);
+ vchan_dma_desc_free_list(&c->vchan, &head);
+
+ return ldma_chan_reset(c);
+}
+
+static int ldma_resume_chan(struct dma_chan *chan)
+{
+ struct ldma_chan *c = to_ldma_chan(chan);
+
+ ldma_chan_on(c);
+
+ return 0;
+}
+
+static int ldma_pause_chan(struct dma_chan *chan)
+{
+ struct ldma_chan *c = to_ldma_chan(chan);
+
+ return ldma_chan_off(c);
+}
+
+static enum dma_status
+ldma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct ldma_chan *c = to_ldma_chan(chan);
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ enum dma_status status = DMA_COMPLETE;
+
+ if (d->ver == DMA_VER22)
+ status = dma_cookie_status(chan, cookie, txstate);
+
+ return status;
+}
+
+static void dma_chan_irq(int irq, void *data)
+{
+ struct ldma_chan *c = data;
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ u32 stat;
+
+ /* Disable channel interrupts */
+ writel(c->nr, d->base + DMA_CS);
+ stat = readl(d->base + DMA_CIS);
+ if (!stat)
+ return;
+
+ writel(readl(d->base + DMA_CIE) & ~DMA_CI_ALL, d->base + DMA_CIE);
+ writel(stat, d->base + DMA_CIS);
+ queue_work(d->wq, &c->work);
+}
+
+static irqreturn_t dma_interrupt(int irq, void *dev_id)
+{
+ struct ldma_dev *d = dev_id;
+ struct ldma_chan *c;
+ unsigned long irncr;
+ u32 cid;
+
+ irncr = readl(d->base + DMA_IRNCR);
+ if (!irncr) {
+ dev_err(d->dev, "dummy interrupt\n");
+ return IRQ_NONE;
+ }
+
+ for_each_set_bit(cid, &irncr, d->chan_nrs) {
+ /* Mask */
+ writel(readl(d->base + DMA_IRNEN) & ~BIT(cid), d->base + DMA_IRNEN);
+ /* Ack */
+ writel(readl(d->base + DMA_IRNCR) | BIT(cid), d->base + DMA_IRNCR);
+
+ c = &d->chans[cid];
+ dma_chan_irq(irq, c);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void prep_slave_burst_len(struct ldma_chan *c)
+{
+ struct ldma_port *p = c->port;
+ struct dma_slave_config *cfg = &c->config;
+
+ if (cfg->dst_maxburst)
+ cfg->src_maxburst = cfg->dst_maxburst;
+
+ /* TX and RX has the same burst length */
+ p->txbl = ilog2(cfg->src_maxburst);
+ p->rxbl = p->txbl;
+}
+
+static struct dma_async_tx_descriptor *
+ldma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sglen, enum dma_transfer_direction dir,
+ unsigned long flags, void *context)
+{
+ struct ldma_chan *c = to_ldma_chan(chan);
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ size_t len, avail, total = 0;
+ struct dw2_desc *hw_ds;
+ struct dw2_desc_sw *ds;
+ struct scatterlist *sg;
+ int num = sglen, i;
+ dma_addr_t addr;
+
+ if (!sgl)
+ return NULL;
+
+ if (d->ver > DMA_VER22)
+ return ldma_chan_desc_cfg(chan, sgl->dma_address, sglen);
+
+ for_each_sg(sgl, sg, sglen, i) {
+ avail = sg_dma_len(sg);
+ if (avail > DMA_MAX_SIZE)
+ num += DIV_ROUND_UP(avail, DMA_MAX_SIZE) - 1;
+ }
+
+ ds = dma_alloc_desc_resource(num, c);
+ if (!ds)
+ return NULL;
+
+ c->ds = ds;
+
+ num = 0;
+ /* sop and eop has to be handled nicely */
+ for_each_sg(sgl, sg, sglen, i) {
+ addr = sg_dma_address(sg);
+ avail = sg_dma_len(sg);
+ total += avail;
+
+ do {
+ len = min_t(size_t, avail, DMA_MAX_SIZE);
+
+ hw_ds = &ds->desc_hw[num];
+ switch (sglen) {
+ case 1:
+ hw_ds->field &= ~DESC_SOP;
+ hw_ds->field |= FIELD_PREP(DESC_SOP, 1);
+
+ hw_ds->field &= ~DESC_EOP;
+ hw_ds->field |= FIELD_PREP(DESC_EOP, 1);
+ break;
+ default:
+ if (num == 0) {
+ hw_ds->field &= ~DESC_SOP;
+ hw_ds->field |= FIELD_PREP(DESC_SOP, 1);
+
+ hw_ds->field &= ~DESC_EOP;
+ hw_ds->field |= FIELD_PREP(DESC_EOP, 0);
+ } else if (num == (sglen - 1)) {
+ hw_ds->field &= ~DESC_SOP;
+ hw_ds->field |= FIELD_PREP(DESC_SOP, 0);
+ hw_ds->field &= ~DESC_EOP;
+ hw_ds->field |= FIELD_PREP(DESC_EOP, 1);
+ } else {
+ hw_ds->field &= ~DESC_SOP;
+ hw_ds->field |= FIELD_PREP(DESC_SOP, 0);
+
+ hw_ds->field &= ~DESC_EOP;
+ hw_ds->field |= FIELD_PREP(DESC_EOP, 0);
+ }
+ break;
+ }
+ /* Only 32 bit address supported */
+ hw_ds->addr = (u32)addr;
+
+ hw_ds->field &= ~DESC_DATA_LEN;
+ hw_ds->field |= FIELD_PREP(DESC_DATA_LEN, len);
+
+ hw_ds->field &= ~DESC_C;
+ hw_ds->field |= FIELD_PREP(DESC_C, 0);
+
+ hw_ds->field &= ~DESC_BYTE_OFF;
+ hw_ds->field |= FIELD_PREP(DESC_BYTE_OFF, addr & 0x3);
+
+ /* Ensure data ready before ownership change */
+ wmb();
+ hw_ds->field &= ~DESC_OWN;
+ hw_ds->field |= FIELD_PREP(DESC_OWN, DMA_OWN);
+
+ /* Ensure ownership changed before moving forward */
+ wmb();
+ num++;
+ addr += len;
+ avail -= len;
+ } while (avail);
+ }
+
+ ds->size = total;
+ prep_slave_burst_len(c);
+
+ return vchan_tx_prep(&c->vchan, &ds->vdesc, DMA_CTRL_ACK);
+}
+
+static int
+ldma_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg)
+{
+ struct ldma_chan *c = to_ldma_chan(chan);
+
+ memcpy(&c->config, cfg, sizeof(c->config));
+
+ return 0;
+}
+
+static int ldma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct ldma_chan *c = to_ldma_chan(chan);
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+ struct device *dev = c->vchan.chan.device->dev;
+ size_t desc_sz;
+
+ if (d->ver > DMA_VER22) {
+ c->flags |= CHAN_IN_USE;
+ return 0;
+ }
+
+ if (c->desc_pool)
+ return c->desc_num;
+
+ desc_sz = c->desc_num * sizeof(struct dw2_desc);
+ c->desc_pool = dma_pool_create(c->name, dev, desc_sz,
+ __alignof__(struct dw2_desc), 0);
+
+ if (!c->desc_pool) {
+ dev_err(dev, "unable to allocate descriptor pool\n");
+ return -ENOMEM;
+ }
+
+ return c->desc_num;
+}
+
+static void ldma_free_chan_resources(struct dma_chan *chan)
+{
+ struct ldma_chan *c = to_ldma_chan(chan);
+ struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+
+ if (d->ver == DMA_VER22) {
+ dma_pool_destroy(c->desc_pool);
+ c->desc_pool = NULL;
+ vchan_free_chan_resources(to_virt_chan(chan));
+ ldma_chan_reset(c);
+ } else {
+ c->flags &= ~CHAN_IN_USE;
+ }
+}
+
+static void dma_work(struct work_struct *work)
+{
+ struct ldma_chan *c = container_of(work, struct ldma_chan, work);
+ struct dma_async_tx_descriptor *tx = &c->ds->vdesc.tx;
+ struct virt_dma_chan *vc = &c->vchan;
+ struct dmaengine_desc_callback cb;
+ struct virt_dma_desc *vd, *_vd;
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&c->vchan.lock, flags);
+ list_splice_tail_init(&vc->desc_completed, &head);
+ spin_unlock_irqrestore(&c->vchan.lock, flags);
+ dmaengine_desc_get_callback(tx, &cb);
+ dma_cookie_complete(tx);
+ dmaengine_desc_callback_invoke(&cb, NULL);
+
+ list_for_each_entry_safe(vd, _vd, &head, node) {
+ dmaengine_desc_get_callback(tx, &cb);
+ dma_cookie_complete(tx);
+ list_del(&vd->node);
+ dmaengine_desc_callback_invoke(&cb, NULL);
+
+ vchan_vdesc_fini(vd);
+ }
+ c->ds = NULL;
+}
+
+static void
+update_burst_len_v22(struct ldma_chan *c, struct ldma_port *p, u32 burst)
+{
+ if (ldma_chan_tx(c))
+ p->txbl = ilog2(burst);
+ else
+ p->rxbl = ilog2(burst);
+}
+
+static void
+update_burst_len_v3X(struct ldma_chan *c, struct ldma_port *p, u32 burst)
+{
+ if (ldma_chan_tx(c))
+ p->txbl = burst;
+ else
+ p->rxbl = burst;
+}
+
+static int
+update_client_configs(struct of_dma *ofdma, struct of_phandle_args *spec)
+{
+ struct ldma_dev *d = ofdma->of_dma_data;
+ u32 chan_id = spec->args[0];
+ u32 port_id = spec->args[1];
+ u32 burst = spec->args[2];
+ struct ldma_port *p;
+ struct ldma_chan *c;
+
+ if (chan_id >= d->chan_nrs || port_id >= d->port_nrs)
+ return 0;
+
+ p = &d->ports[port_id];
+ c = &d->chans[chan_id];
+ c->port = p;
+
+ if (d->ver == DMA_VER22)
+ update_burst_len_v22(c, p, burst);
+ else
+ update_burst_len_v3X(c, p, burst);
+
+ ldma_port_cfg(p);
+
+ return 1;
+}
+
+static struct dma_chan *ldma_xlate(struct of_phandle_args *spec,
+ struct of_dma *ofdma)
+{
+ struct ldma_dev *d = ofdma->of_dma_data;
+ u32 chan_id = spec->args[0];
+ int ret;
+
+ if (!spec->args_count)
+ return NULL;
+
+ /* if args_count is 1 driver use default settings */
+ if (spec->args_count > 1) {
+ ret = update_client_configs(ofdma, spec);
+ if (!ret)
+ return NULL;
+ }
+
+ return dma_get_slave_channel(&d->chans[chan_id].vchan.chan);
+}
+
+static void ldma_dma_init_v22(int i, struct ldma_dev *d)
+{
+ struct ldma_chan *c;
+
+ c = &d->chans[i];
+ c->nr = i; /* Real channel number */
+ c->rst = DMA_CHAN_RST;
+ c->desc_num = DMA_DFT_DESC_NUM;
+ snprintf(c->name, sizeof(c->name), "chan%d", c->nr);
+ INIT_WORK(&c->work, dma_work);
+ c->vchan.desc_free = dma_free_desc_resource;
+ vchan_init(&c->vchan, &d->dma_dev);
+}
+
+static void ldma_dma_init_v3X(int i, struct ldma_dev *d)
+{
+ struct ldma_chan *c;
+
+ c = &d->chans[i];
+ c->data_endian = DMA_DFT_ENDIAN;
+ c->desc_endian = DMA_DFT_ENDIAN;
+ c->data_endian_en = false;
+ c->desc_endian_en = false;
+ c->desc_rx_np = false;
+ c->flags |= DEVICE_ALLOC_DESC;
+ c->onoff = DMA_CH_OFF;
+ c->rst = DMA_CHAN_RST;
+ c->abc_en = true;
+ c->hdrm_csum = false;
+ c->boff_len = 0;
+ c->nr = i;
+ c->vchan.desc_free = dma_free_desc_resource;
+ vchan_init(&c->vchan, &d->dma_dev);
+}
+
+static int ldma_init_v22(struct ldma_dev *d, struct platform_device *pdev)
+{
+ int ret;
+
+ ret = device_property_read_u32(d->dev, "dma-channels", &d->chan_nrs);
+ if (ret < 0) {
+ dev_err(d->dev, "unable to read dma-channels property\n");
+ return ret;
+ }
+
+ d->irq = platform_get_irq(pdev, 0);
+ if (d->irq < 0)
+ return d->irq;
+
+ ret = devm_request_irq(&pdev->dev, d->irq, dma_interrupt, 0,
+ DRIVER_NAME, d);
+ if (ret)
+ return ret;
+
+ d->wq = alloc_ordered_workqueue("dma_wq", WQ_MEM_RECLAIM |
+ WQ_HIGHPRI);
+ if (!d->wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void ldma_clk_disable(void *data)
+{
+ struct ldma_dev *d = data;
+
+ clk_disable_unprepare(d->core_clk);
+ reset_control_assert(d->rst);
+}
+
+static const struct ldma_inst_data dma0 = {
+ .name = "dma0",
+ .chan_fc = false,
+ .desc_fod = false,
+ .desc_in_sram = false,
+ .valid_desc_fetch_ack = false,
+};
+
+static const struct ldma_inst_data dma2tx = {
+ .name = "dma2tx",
+ .type = DMA_TYPE_TX,
+ .orrc = 16,
+ .chan_fc = true,
+ .desc_fod = true,
+ .desc_in_sram = true,
+ .valid_desc_fetch_ack = true,
+};
+
+static const struct ldma_inst_data dma1rx = {
+ .name = "dma1rx",
+ .type = DMA_TYPE_RX,
+ .orrc = 16,
+ .chan_fc = false,
+ .desc_fod = true,
+ .desc_in_sram = true,
+ .valid_desc_fetch_ack = false,
+};
+
+static const struct ldma_inst_data dma1tx = {
+ .name = "dma1tx",
+ .type = DMA_TYPE_TX,
+ .orrc = 16,
+ .chan_fc = true,
+ .desc_fod = true,
+ .desc_in_sram = true,
+ .valid_desc_fetch_ack = true,
+};
+
+static const struct ldma_inst_data dma0tx = {
+ .name = "dma0tx",
+ .type = DMA_TYPE_TX,
+ .orrc = 16,
+ .chan_fc = true,
+ .desc_fod = true,
+ .desc_in_sram = true,
+ .valid_desc_fetch_ack = true,
+};
+
+static const struct ldma_inst_data dma3 = {
+ .name = "dma3",
+ .type = DMA_TYPE_MCPY,
+ .orrc = 16,
+ .chan_fc = false,
+ .desc_fod = false,
+ .desc_in_sram = true,
+ .valid_desc_fetch_ack = false,
+};
+
+static const struct ldma_inst_data toe_dma30 = {
+ .name = "toe_dma30",
+ .type = DMA_TYPE_MCPY,
+ .orrc = 16,
+ .chan_fc = false,
+ .desc_fod = false,
+ .desc_in_sram = true,
+ .valid_desc_fetch_ack = true,
+};
+
+static const struct ldma_inst_data toe_dma31 = {
+ .name = "toe_dma31",
+ .type = DMA_TYPE_MCPY,
+ .orrc = 16,
+ .chan_fc = false,
+ .desc_fod = false,
+ .desc_in_sram = true,
+ .valid_desc_fetch_ack = true,
+};
+
+static const struct of_device_id intel_ldma_match[] = {
+ { .compatible = "intel,lgm-cdma", .data = &dma0},
+ { .compatible = "intel,lgm-dma2tx", .data = &dma2tx},
+ { .compatible = "intel,lgm-dma1rx", .data = &dma1rx},
+ { .compatible = "intel,lgm-dma1tx", .data = &dma1tx},
+ { .compatible = "intel,lgm-dma0tx", .data = &dma0tx},
+ { .compatible = "intel,lgm-dma3", .data = &dma3},
+ { .compatible = "intel,lgm-toe-dma30", .data = &toe_dma30},
+ { .compatible = "intel,lgm-toe-dma31", .data = &toe_dma31},
+ {}
+};
+
+static int intel_ldma_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct dma_device *dma_dev;
+ unsigned long ch_mask;
+ struct ldma_chan *c;
+ struct ldma_port *p;
+ struct ldma_dev *d;
+ u32 id, bitn = 32, j;
+ int i, ret;
+
+ d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+
+ /* Link controller to platform device */
+ d->dev = &pdev->dev;
+
+ d->inst = device_get_match_data(dev);
+ if (!d->inst) {
+ dev_err(dev, "No device match found\n");
+ return -ENODEV;
+ }
+
+ d->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(d->base))
+ return PTR_ERR(d->base);
+
+ /* Power up and reset the dma engine, some DMAs always on?? */
+ d->core_clk = devm_clk_get_optional(dev, NULL);
+ if (IS_ERR(d->core_clk))
+ return PTR_ERR(d->core_clk);
+
+ d->rst = devm_reset_control_get_optional(dev, NULL);
+ if (IS_ERR(d->rst))
+ return PTR_ERR(d->rst);
+
+ clk_prepare_enable(d->core_clk);
+ reset_control_deassert(d->rst);
+
+ ret = devm_add_action_or_reset(dev, ldma_clk_disable, d);
+ if (ret) {
+ dev_err(dev, "Failed to devm_add_action_or_reset, %d\n", ret);
+ return ret;
+ }
+
+ id = readl(d->base + DMA_ID);
+ d->chan_nrs = FIELD_GET(DMA_ID_CHNR, id);
+ d->port_nrs = FIELD_GET(DMA_ID_PNR, id);
+ d->ver = FIELD_GET(DMA_ID_REV, id);
+
+ if (id & DMA_ID_AW_36B)
+ d->flags |= DMA_ADDR_36BIT;
+
+ if (IS_ENABLED(CONFIG_64BIT) && (id & DMA_ID_AW_36B))
+ bitn = 36;
+
+ if (id & DMA_ID_DW_128B)
+ d->flags |= DMA_DATA_128BIT;
+
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(bitn));
+ if (ret) {
+ dev_err(dev, "No usable DMA configuration\n");
+ return ret;
+ }
+
+ if (d->ver == DMA_VER22) {
+ ret = ldma_init_v22(d, pdev);
+ if (ret)
+ return ret;
+ }
+
+ ret = device_property_read_u32(dev, "dma-channel-mask", &d->channels_mask);
+ if (ret < 0)
+ d->channels_mask = GENMASK(d->chan_nrs - 1, 0);
+
+ dma_dev = &d->dma_dev;
+
+ dma_cap_zero(dma_dev->cap_mask);
+ dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+
+ /* Channel initializations */
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ /* Port Initializations */
+ d->ports = devm_kcalloc(dev, d->port_nrs, sizeof(*p), GFP_KERNEL);
+ if (!d->ports)
+ return -ENOMEM;
+
+ /* Channels Initializations */
+ d->chans = devm_kcalloc(d->dev, d->chan_nrs, sizeof(*c), GFP_KERNEL);
+ if (!d->chans)
+ return -ENOMEM;
+
+ for (i = 0; i < d->port_nrs; i++) {
+ p = &d->ports[i];
+ p->portid = i;
+ p->ldev = d;
+ }
+
+ dma_dev->dev = &pdev->dev;
+
+ ch_mask = (unsigned long)d->channels_mask;
+ for_each_set_bit(j, &ch_mask, d->chan_nrs) {
+ if (d->ver == DMA_VER22)
+ ldma_dma_init_v22(j, d);
+ else
+ ldma_dma_init_v3X(j, d);
+ }
+
+ ret = ldma_parse_dt(d);
+ if (ret)
+ return ret;
+
+ dma_dev->device_alloc_chan_resources = ldma_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = ldma_free_chan_resources;
+ dma_dev->device_terminate_all = ldma_terminate_all;
+ dma_dev->device_issue_pending = ldma_issue_pending;
+ dma_dev->device_tx_status = ldma_tx_status;
+ dma_dev->device_resume = ldma_resume_chan;
+ dma_dev->device_pause = ldma_pause_chan;
+ dma_dev->device_prep_slave_sg = ldma_prep_slave_sg;
+
+ if (d->ver == DMA_VER22) {
+ dma_dev->device_config = ldma_slave_config;
+ dma_dev->device_synchronize = ldma_synchronize;
+ dma_dev->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ dma_dev->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ dma_dev->directions = BIT(DMA_MEM_TO_DEV) |
+ BIT(DMA_DEV_TO_MEM);
+ dma_dev->residue_granularity =
+ DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+ }
+
+ platform_set_drvdata(pdev, d);
+
+ ldma_dev_init(d);
+
+ ret = dma_async_device_register(dma_dev);
+ if (ret) {
+ dev_err(dev, "Failed to register slave DMA engine device\n");
+ return ret;
+ }
+
+ ret = of_dma_controller_register(pdev->dev.of_node, ldma_xlate, d);
+ if (ret) {
+ dev_err(dev, "Failed to register of DMA controller\n");
+ dma_async_device_unregister(dma_dev);
+ return ret;
+ }
+
+ dev_info(dev, "Init done - rev: %x, ports: %d channels: %d\n", d->ver,
+ d->port_nrs, d->chan_nrs);
+
+ return 0;
+}
+
+static struct platform_driver intel_ldma_driver = {
+ .probe = intel_ldma_probe,
+ .driver = {
+ .name = DRIVER_NAME,
+ .of_match_table = intel_ldma_match,
+ },
+};
+
+/*
+ * Perform this driver as device_initcall to make sure initialization happens
+ * before its DMA clients of some are platform specific and also to provide
+ * registered DMA channels and DMA capabilities to clients before their
+ * initialization.
+ */
+static int __init intel_ldma_init(void)
+{
+ return platform_driver_register(&intel_ldma_driver);
+}
+
+device_initcall(intel_ldma_init);
diff --git a/drivers/dma/lpc18xx-dmamux.c b/drivers/dma/lpc18xx-dmamux.c
new file mode 100644
index 000000000..df98cae87
--- /dev/null
+++ b/drivers/dma/lpc18xx-dmamux.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * DMA Router driver for LPC18xx/43xx DMA MUX
+ *
+ * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com>
+ *
+ * Based on TI DMA Crossbar driver by:
+ * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com
+ * Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/regmap.h>
+#include <linux/spinlock.h>
+
+/* CREG register offset and macros for mux manipulation */
+#define LPC18XX_CREG_DMAMUX 0x11c
+#define LPC18XX_DMAMUX_VAL(v, n) ((v) << (n * 2))
+#define LPC18XX_DMAMUX_MASK(n) (0x3 << (n * 2))
+#define LPC18XX_DMAMUX_MAX_VAL 0x3
+
+struct lpc18xx_dmamux {
+ u32 value;
+ bool busy;
+};
+
+struct lpc18xx_dmamux_data {
+ struct dma_router dmarouter;
+ struct lpc18xx_dmamux *muxes;
+ u32 dma_master_requests;
+ u32 dma_mux_requests;
+ struct regmap *reg;
+ spinlock_t lock;
+};
+
+static void lpc18xx_dmamux_free(struct device *dev, void *route_data)
+{
+ struct lpc18xx_dmamux_data *dmamux = dev_get_drvdata(dev);
+ struct lpc18xx_dmamux *mux = route_data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dmamux->lock, flags);
+ mux->busy = false;
+ spin_unlock_irqrestore(&dmamux->lock, flags);
+}
+
+static void *lpc18xx_dmamux_reserve(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct platform_device *pdev = of_find_device_by_node(ofdma->of_node);
+ struct lpc18xx_dmamux_data *dmamux = platform_get_drvdata(pdev);
+ unsigned long flags;
+ unsigned mux;
+
+ if (dma_spec->args_count != 3) {
+ dev_err(&pdev->dev, "invalid number of dma mux args\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ mux = dma_spec->args[0];
+ if (mux >= dmamux->dma_master_requests) {
+ dev_err(&pdev->dev, "invalid mux number: %d\n",
+ dma_spec->args[0]);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (dma_spec->args[1] > LPC18XX_DMAMUX_MAX_VAL) {
+ dev_err(&pdev->dev, "invalid dma mux value: %d\n",
+ dma_spec->args[1]);
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* The of_node_put() will be done in the core for the node */
+ dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0);
+ if (!dma_spec->np) {
+ dev_err(&pdev->dev, "can't get dma master\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ spin_lock_irqsave(&dmamux->lock, flags);
+ if (dmamux->muxes[mux].busy) {
+ spin_unlock_irqrestore(&dmamux->lock, flags);
+ dev_err(&pdev->dev, "dma request %u busy with %u.%u\n",
+ mux, mux, dmamux->muxes[mux].value);
+ of_node_put(dma_spec->np);
+ return ERR_PTR(-EBUSY);
+ }
+
+ dmamux->muxes[mux].busy = true;
+ dmamux->muxes[mux].value = dma_spec->args[1];
+
+ regmap_update_bits(dmamux->reg, LPC18XX_CREG_DMAMUX,
+ LPC18XX_DMAMUX_MASK(mux),
+ LPC18XX_DMAMUX_VAL(dmamux->muxes[mux].value, mux));
+ spin_unlock_irqrestore(&dmamux->lock, flags);
+
+ dma_spec->args[1] = dma_spec->args[2];
+ dma_spec->args_count = 2;
+
+ dev_dbg(&pdev->dev, "mapping dmamux %u.%u to dma request %u\n", mux,
+ dmamux->muxes[mux].value, mux);
+
+ return &dmamux->muxes[mux];
+}
+
+static int lpc18xx_dmamux_probe(struct platform_device *pdev)
+{
+ struct device_node *dma_np, *np = pdev->dev.of_node;
+ struct lpc18xx_dmamux_data *dmamux;
+ int ret;
+
+ dmamux = devm_kzalloc(&pdev->dev, sizeof(*dmamux), GFP_KERNEL);
+ if (!dmamux)
+ return -ENOMEM;
+
+ dmamux->reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg");
+ if (IS_ERR(dmamux->reg)) {
+ dev_err(&pdev->dev, "syscon lookup failed\n");
+ return PTR_ERR(dmamux->reg);
+ }
+
+ ret = of_property_read_u32(np, "dma-requests",
+ &dmamux->dma_mux_requests);
+ if (ret) {
+ dev_err(&pdev->dev, "missing dma-requests property\n");
+ return ret;
+ }
+
+ dma_np = of_parse_phandle(np, "dma-masters", 0);
+ if (!dma_np) {
+ dev_err(&pdev->dev, "can't get dma master\n");
+ return -ENODEV;
+ }
+
+ ret = of_property_read_u32(dma_np, "dma-requests",
+ &dmamux->dma_master_requests);
+ of_node_put(dma_np);
+ if (ret) {
+ dev_err(&pdev->dev, "missing master dma-requests property\n");
+ return ret;
+ }
+
+ dmamux->muxes = devm_kcalloc(&pdev->dev, dmamux->dma_master_requests,
+ sizeof(struct lpc18xx_dmamux),
+ GFP_KERNEL);
+ if (!dmamux->muxes)
+ return -ENOMEM;
+
+ spin_lock_init(&dmamux->lock);
+ platform_set_drvdata(pdev, dmamux);
+ dmamux->dmarouter.dev = &pdev->dev;
+ dmamux->dmarouter.route_free = lpc18xx_dmamux_free;
+
+ return of_dma_router_register(np, lpc18xx_dmamux_reserve,
+ &dmamux->dmarouter);
+}
+
+static const struct of_device_id lpc18xx_dmamux_match[] = {
+ { .compatible = "nxp,lpc1850-dmamux" },
+ {},
+};
+
+static struct platform_driver lpc18xx_dmamux_driver = {
+ .probe = lpc18xx_dmamux_probe,
+ .driver = {
+ .name = "lpc18xx-dmamux",
+ .of_match_table = lpc18xx_dmamux_match,
+ },
+};
+
+static int __init lpc18xx_dmamux_init(void)
+{
+ return platform_driver_register(&lpc18xx_dmamux_driver);
+}
+arch_initcall(lpc18xx_dmamux_init);
diff --git a/drivers/dma/mcf-edma.c b/drivers/dma/mcf-edma.c
new file mode 100644
index 000000000..60d3c5f09
--- /dev/null
+++ b/drivers/dma/mcf-edma.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright (c) 2013-2014 Freescale Semiconductor, Inc
+// Copyright (c) 2017 Sysam, Angelo Dureghello <angelo@sysam.it>
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/dma-mcf-edma.h>
+
+#include "fsl-edma-common.h"
+
+#define EDMA_CHANNELS 64
+#define EDMA_MASK_CH(x) ((x) & GENMASK(5, 0))
+
+static irqreturn_t mcf_edma_tx_handler(int irq, void *dev_id)
+{
+ struct fsl_edma_engine *mcf_edma = dev_id;
+ struct edma_regs *regs = &mcf_edma->regs;
+ unsigned int ch;
+ struct fsl_edma_chan *mcf_chan;
+ u64 intmap;
+
+ intmap = ioread32(regs->inth);
+ intmap <<= 32;
+ intmap |= ioread32(regs->intl);
+ if (!intmap)
+ return IRQ_NONE;
+
+ for (ch = 0; ch < mcf_edma->n_chans; ch++) {
+ if (intmap & BIT(ch)) {
+ iowrite8(EDMA_MASK_CH(ch), regs->cint);
+
+ mcf_chan = &mcf_edma->chans[ch];
+
+ spin_lock(&mcf_chan->vchan.lock);
+
+ if (!mcf_chan->edesc) {
+ /* terminate_all called before */
+ spin_unlock(&mcf_chan->vchan.lock);
+ continue;
+ }
+
+ if (!mcf_chan->edesc->iscyclic) {
+ list_del(&mcf_chan->edesc->vdesc.node);
+ vchan_cookie_complete(&mcf_chan->edesc->vdesc);
+ mcf_chan->edesc = NULL;
+ mcf_chan->status = DMA_COMPLETE;
+ mcf_chan->idle = true;
+ } else {
+ vchan_cyclic_callback(&mcf_chan->edesc->vdesc);
+ }
+
+ if (!mcf_chan->edesc)
+ fsl_edma_xfer_desc(mcf_chan);
+
+ spin_unlock(&mcf_chan->vchan.lock);
+ }
+ }
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t mcf_edma_err_handler(int irq, void *dev_id)
+{
+ struct fsl_edma_engine *mcf_edma = dev_id;
+ struct edma_regs *regs = &mcf_edma->regs;
+ unsigned int err, ch;
+
+ err = ioread32(regs->errl);
+ if (!err)
+ return IRQ_NONE;
+
+ for (ch = 0; ch < (EDMA_CHANNELS / 2); ch++) {
+ if (err & BIT(ch)) {
+ fsl_edma_disable_request(&mcf_edma->chans[ch]);
+ iowrite8(EDMA_CERR_CERR(ch), regs->cerr);
+ mcf_edma->chans[ch].status = DMA_ERROR;
+ mcf_edma->chans[ch].idle = true;
+ }
+ }
+
+ err = ioread32(regs->errh);
+ if (!err)
+ return IRQ_NONE;
+
+ for (ch = (EDMA_CHANNELS / 2); ch < EDMA_CHANNELS; ch++) {
+ if (err & (BIT(ch - (EDMA_CHANNELS / 2)))) {
+ fsl_edma_disable_request(&mcf_edma->chans[ch]);
+ iowrite8(EDMA_CERR_CERR(ch), regs->cerr);
+ mcf_edma->chans[ch].status = DMA_ERROR;
+ mcf_edma->chans[ch].idle = true;
+ }
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int mcf_edma_irq_init(struct platform_device *pdev,
+ struct fsl_edma_engine *mcf_edma)
+{
+ int ret = 0, i;
+ struct resource *res;
+
+ res = platform_get_resource_byname(pdev,
+ IORESOURCE_IRQ, "edma-tx-00-15");
+ if (!res)
+ return -1;
+
+ for (ret = 0, i = res->start; i <= res->end; ++i)
+ ret |= request_irq(i, mcf_edma_tx_handler, 0, "eDMA", mcf_edma);
+ if (ret)
+ return ret;
+
+ res = platform_get_resource_byname(pdev,
+ IORESOURCE_IRQ, "edma-tx-16-55");
+ if (!res)
+ return -1;
+
+ for (ret = 0, i = res->start; i <= res->end; ++i)
+ ret |= request_irq(i, mcf_edma_tx_handler, 0, "eDMA", mcf_edma);
+ if (ret)
+ return ret;
+
+ ret = platform_get_irq_byname(pdev, "edma-tx-56-63");
+ if (ret != -ENXIO) {
+ ret = request_irq(ret, mcf_edma_tx_handler,
+ 0, "eDMA", mcf_edma);
+ if (ret)
+ return ret;
+ }
+
+ ret = platform_get_irq_byname(pdev, "edma-err");
+ if (ret != -ENXIO) {
+ ret = request_irq(ret, mcf_edma_err_handler,
+ 0, "eDMA", mcf_edma);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void mcf_edma_irq_free(struct platform_device *pdev,
+ struct fsl_edma_engine *mcf_edma)
+{
+ int irq;
+ struct resource *res;
+
+ res = platform_get_resource_byname(pdev,
+ IORESOURCE_IRQ, "edma-tx-00-15");
+ if (res) {
+ for (irq = res->start; irq <= res->end; irq++)
+ free_irq(irq, mcf_edma);
+ }
+
+ res = platform_get_resource_byname(pdev,
+ IORESOURCE_IRQ, "edma-tx-16-55");
+ if (res) {
+ for (irq = res->start; irq <= res->end; irq++)
+ free_irq(irq, mcf_edma);
+ }
+
+ irq = platform_get_irq_byname(pdev, "edma-tx-56-63");
+ if (irq != -ENXIO)
+ free_irq(irq, mcf_edma);
+
+ irq = platform_get_irq_byname(pdev, "edma-err");
+ if (irq != -ENXIO)
+ free_irq(irq, mcf_edma);
+}
+
+static struct fsl_edma_drvdata mcf_data = {
+ .version = v2,
+ .setup_irq = mcf_edma_irq_init,
+};
+
+static int mcf_edma_probe(struct platform_device *pdev)
+{
+ struct mcf_edma_platform_data *pdata;
+ struct fsl_edma_engine *mcf_edma;
+ struct fsl_edma_chan *mcf_chan;
+ struct edma_regs *regs;
+ struct resource *res;
+ int ret, i, len, chans;
+
+ pdata = dev_get_platdata(&pdev->dev);
+ if (!pdata) {
+ dev_err(&pdev->dev, "no platform data supplied\n");
+ return -EINVAL;
+ }
+
+ if (!pdata->dma_channels) {
+ dev_info(&pdev->dev, "setting default channel number to 64");
+ chans = 64;
+ } else {
+ chans = pdata->dma_channels;
+ }
+
+ len = sizeof(*mcf_edma) + sizeof(*mcf_chan) * chans;
+ mcf_edma = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+ if (!mcf_edma)
+ return -ENOMEM;
+
+ mcf_edma->n_chans = chans;
+
+ /* Set up drvdata for ColdFire edma */
+ mcf_edma->drvdata = &mcf_data;
+ mcf_edma->big_endian = 1;
+
+ mutex_init(&mcf_edma->fsl_edma_mutex);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+ mcf_edma->membase = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(mcf_edma->membase))
+ return PTR_ERR(mcf_edma->membase);
+
+ fsl_edma_setup_regs(mcf_edma);
+ regs = &mcf_edma->regs;
+
+ INIT_LIST_HEAD(&mcf_edma->dma_dev.channels);
+ for (i = 0; i < mcf_edma->n_chans; i++) {
+ struct fsl_edma_chan *mcf_chan = &mcf_edma->chans[i];
+
+ mcf_chan->edma = mcf_edma;
+ mcf_chan->slave_id = i;
+ mcf_chan->idle = true;
+ mcf_chan->dma_dir = DMA_NONE;
+ mcf_chan->vchan.desc_free = fsl_edma_free_desc;
+ vchan_init(&mcf_chan->vchan, &mcf_edma->dma_dev);
+ iowrite32(0x0, &regs->tcd[i].csr);
+ }
+
+ iowrite32(~0, regs->inth);
+ iowrite32(~0, regs->intl);
+
+ ret = mcf_edma->drvdata->setup_irq(pdev, mcf_edma);
+ if (ret)
+ return ret;
+
+ dma_cap_set(DMA_PRIVATE, mcf_edma->dma_dev.cap_mask);
+ dma_cap_set(DMA_SLAVE, mcf_edma->dma_dev.cap_mask);
+ dma_cap_set(DMA_CYCLIC, mcf_edma->dma_dev.cap_mask);
+
+ mcf_edma->dma_dev.dev = &pdev->dev;
+ mcf_edma->dma_dev.device_alloc_chan_resources =
+ fsl_edma_alloc_chan_resources;
+ mcf_edma->dma_dev.device_free_chan_resources =
+ fsl_edma_free_chan_resources;
+ mcf_edma->dma_dev.device_config = fsl_edma_slave_config;
+ mcf_edma->dma_dev.device_prep_dma_cyclic =
+ fsl_edma_prep_dma_cyclic;
+ mcf_edma->dma_dev.device_prep_slave_sg = fsl_edma_prep_slave_sg;
+ mcf_edma->dma_dev.device_tx_status = fsl_edma_tx_status;
+ mcf_edma->dma_dev.device_pause = fsl_edma_pause;
+ mcf_edma->dma_dev.device_resume = fsl_edma_resume;
+ mcf_edma->dma_dev.device_terminate_all = fsl_edma_terminate_all;
+ mcf_edma->dma_dev.device_issue_pending = fsl_edma_issue_pending;
+
+ mcf_edma->dma_dev.src_addr_widths = FSL_EDMA_BUSWIDTHS;
+ mcf_edma->dma_dev.dst_addr_widths = FSL_EDMA_BUSWIDTHS;
+ mcf_edma->dma_dev.directions =
+ BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+
+ mcf_edma->dma_dev.filter.fn = mcf_edma_filter_fn;
+ mcf_edma->dma_dev.filter.map = pdata->slave_map;
+ mcf_edma->dma_dev.filter.mapcnt = pdata->slavecnt;
+
+ platform_set_drvdata(pdev, mcf_edma);
+
+ ret = dma_async_device_register(&mcf_edma->dma_dev);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "Can't register Freescale eDMA engine. (%d)\n", ret);
+ return ret;
+ }
+
+ /* Enable round robin arbitration */
+ iowrite32(EDMA_CR_ERGA | EDMA_CR_ERCA, regs->cr);
+
+ return 0;
+}
+
+static int mcf_edma_remove(struct platform_device *pdev)
+{
+ struct fsl_edma_engine *mcf_edma = platform_get_drvdata(pdev);
+
+ mcf_edma_irq_free(pdev, mcf_edma);
+ fsl_edma_cleanup_vchan(&mcf_edma->dma_dev);
+ dma_async_device_unregister(&mcf_edma->dma_dev);
+
+ return 0;
+}
+
+static struct platform_driver mcf_edma_driver = {
+ .driver = {
+ .name = "mcf-edma",
+ },
+ .probe = mcf_edma_probe,
+ .remove = mcf_edma_remove,
+};
+
+bool mcf_edma_filter_fn(struct dma_chan *chan, void *param)
+{
+ if (chan->device->dev->driver == &mcf_edma_driver.driver) {
+ struct fsl_edma_chan *mcf_chan = to_fsl_edma_chan(chan);
+
+ return (mcf_chan->slave_id == (uintptr_t)param);
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(mcf_edma_filter_fn);
+
+static int __init mcf_edma_init(void)
+{
+ return platform_driver_register(&mcf_edma_driver);
+}
+subsys_initcall(mcf_edma_init);
+
+static void __exit mcf_edma_exit(void)
+{
+ platform_driver_unregister(&mcf_edma_driver);
+}
+module_exit(mcf_edma_exit);
+
+MODULE_ALIAS("platform:mcf-edma");
+MODULE_DESCRIPTION("Freescale eDMA engine driver, ColdFire family");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/mediatek/Kconfig b/drivers/dma/mediatek/Kconfig
new file mode 100644
index 000000000..7a46a5455
--- /dev/null
+++ b/drivers/dma/mediatek/Kconfig
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config MTK_HSDMA
+ tristate "MediaTek High-Speed DMA controller support"
+ depends on ARCH_MEDIATEK || COMPILE_TEST
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for High-Speed DMA controller on MediaTek
+ SoCs.
+
+ This controller provides the channels which is dedicated to
+ memory-to-memory transfer to offload from CPU through ring-
+ based descriptor management.
+
+config MTK_CQDMA
+ tristate "MediaTek Command-Queue DMA controller support"
+ depends on ARCH_MEDIATEK || COMPILE_TEST
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+ help
+ Enable support for Command-Queue DMA controller on MediaTek
+ SoCs.
+
+ This controller provides the channels which is dedicated to
+ memory-to-memory transfer to offload from CPU.
+
+config MTK_UART_APDMA
+ tristate "MediaTek SoCs APDMA support for UART"
+ depends on OF && SERIAL_8250_MT6577
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Support for the UART DMA engine found on MediaTek MTK SoCs.
+ When SERIAL_8250_MT6577 is enabled, and if you want to use DMA,
+ you can enable the config. The DMA engine can only be used
+ with MediaTek SoCs.
diff --git a/drivers/dma/mediatek/Makefile b/drivers/dma/mediatek/Makefile
new file mode 100644
index 000000000..5ba39a5ed
--- /dev/null
+++ b/drivers/dma/mediatek/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_MTK_UART_APDMA) += mtk-uart-apdma.o
+obj-$(CONFIG_MTK_HSDMA) += mtk-hsdma.o
+obj-$(CONFIG_MTK_CQDMA) += mtk-cqdma.o
diff --git a/drivers/dma/mediatek/mtk-cqdma.c b/drivers/dma/mediatek/mtk-cqdma.c
new file mode 100644
index 000000000..9ae92b894
--- /dev/null
+++ b/drivers/dma/mediatek/mtk-cqdma.c
@@ -0,0 +1,938 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018-2019 MediaTek Inc.
+
+/*
+ * Driver for MediaTek Command-Queue DMA Controller
+ *
+ * Author: Shun-Chih Yu <shun-chih.yu@mediatek.com>
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/iopoll.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/refcount.h>
+#include <linux/slab.h>
+
+#include "../virt-dma.h"
+
+#define MTK_CQDMA_USEC_POLL 10
+#define MTK_CQDMA_TIMEOUT_POLL 1000
+#define MTK_CQDMA_DMA_BUSWIDTHS BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)
+#define MTK_CQDMA_ALIGN_SIZE 1
+
+/* The default number of virtual channel */
+#define MTK_CQDMA_NR_VCHANS 32
+
+/* The default number of physical channel */
+#define MTK_CQDMA_NR_PCHANS 3
+
+/* Registers for underlying dma manipulation */
+#define MTK_CQDMA_INT_FLAG 0x0
+#define MTK_CQDMA_INT_EN 0x4
+#define MTK_CQDMA_EN 0x8
+#define MTK_CQDMA_RESET 0xc
+#define MTK_CQDMA_FLUSH 0x14
+#define MTK_CQDMA_SRC 0x1c
+#define MTK_CQDMA_DST 0x20
+#define MTK_CQDMA_LEN1 0x24
+#define MTK_CQDMA_LEN2 0x28
+#define MTK_CQDMA_SRC2 0x60
+#define MTK_CQDMA_DST2 0x64
+
+/* Registers setting */
+#define MTK_CQDMA_EN_BIT BIT(0)
+#define MTK_CQDMA_INT_FLAG_BIT BIT(0)
+#define MTK_CQDMA_INT_EN_BIT BIT(0)
+#define MTK_CQDMA_FLUSH_BIT BIT(0)
+
+#define MTK_CQDMA_WARM_RST_BIT BIT(0)
+#define MTK_CQDMA_HARD_RST_BIT BIT(1)
+
+#define MTK_CQDMA_MAX_LEN GENMASK(27, 0)
+#define MTK_CQDMA_ADDR_LIMIT GENMASK(31, 0)
+#define MTK_CQDMA_ADDR2_SHFIT (32)
+
+/**
+ * struct mtk_cqdma_vdesc - The struct holding info describing virtual
+ * descriptor (CVD)
+ * @vd: An instance for struct virt_dma_desc
+ * @len: The total data size device wants to move
+ * @residue: The remaining data size device will move
+ * @dest: The destination address device wants to move to
+ * @src: The source address device wants to move from
+ * @ch: The pointer to the corresponding dma channel
+ * @node: The lise_head struct to build link-list for VDs
+ * @parent: The pointer to the parent CVD
+ */
+struct mtk_cqdma_vdesc {
+ struct virt_dma_desc vd;
+ size_t len;
+ size_t residue;
+ dma_addr_t dest;
+ dma_addr_t src;
+ struct dma_chan *ch;
+
+ struct list_head node;
+ struct mtk_cqdma_vdesc *parent;
+};
+
+/**
+ * struct mtk_cqdma_pchan - The struct holding info describing physical
+ * channel (PC)
+ * @queue: Queue for the PDs issued to this PC
+ * @base: The mapped register I/O base of this PC
+ * @irq: The IRQ that this PC are using
+ * @refcnt: Track how many VCs are using this PC
+ * @tasklet: Tasklet for this PC
+ * @lock: Lock protect agaisting multiple VCs access PC
+ */
+struct mtk_cqdma_pchan {
+ struct list_head queue;
+ void __iomem *base;
+ u32 irq;
+
+ refcount_t refcnt;
+
+ struct tasklet_struct tasklet;
+
+ /* lock to protect PC */
+ spinlock_t lock;
+};
+
+/**
+ * struct mtk_cqdma_vchan - The struct holding info describing virtual
+ * channel (VC)
+ * @vc: An instance for struct virt_dma_chan
+ * @pc: The pointer to the underlying PC
+ * @issue_completion: The wait for all issued descriptors completited
+ * @issue_synchronize: Bool indicating channel synchronization starts
+ */
+struct mtk_cqdma_vchan {
+ struct virt_dma_chan vc;
+ struct mtk_cqdma_pchan *pc;
+ struct completion issue_completion;
+ bool issue_synchronize;
+};
+
+/**
+ * struct mtk_cqdma_device - The struct holding info describing CQDMA
+ * device
+ * @ddev: An instance for struct dma_device
+ * @clk: The clock that device internal is using
+ * @dma_requests: The number of VCs the device supports to
+ * @dma_channels: The number of PCs the device supports to
+ * @vc: The pointer to all available VCs
+ * @pc: The pointer to all the underlying PCs
+ */
+struct mtk_cqdma_device {
+ struct dma_device ddev;
+ struct clk *clk;
+
+ u32 dma_requests;
+ u32 dma_channels;
+ struct mtk_cqdma_vchan *vc;
+ struct mtk_cqdma_pchan **pc;
+};
+
+static struct mtk_cqdma_device *to_cqdma_dev(struct dma_chan *chan)
+{
+ return container_of(chan->device, struct mtk_cqdma_device, ddev);
+}
+
+static struct mtk_cqdma_vchan *to_cqdma_vchan(struct dma_chan *chan)
+{
+ return container_of(chan, struct mtk_cqdma_vchan, vc.chan);
+}
+
+static struct mtk_cqdma_vdesc *to_cqdma_vdesc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct mtk_cqdma_vdesc, vd);
+}
+
+static struct device *cqdma2dev(struct mtk_cqdma_device *cqdma)
+{
+ return cqdma->ddev.dev;
+}
+
+static u32 mtk_dma_read(struct mtk_cqdma_pchan *pc, u32 reg)
+{
+ return readl(pc->base + reg);
+}
+
+static void mtk_dma_write(struct mtk_cqdma_pchan *pc, u32 reg, u32 val)
+{
+ writel_relaxed(val, pc->base + reg);
+}
+
+static void mtk_dma_rmw(struct mtk_cqdma_pchan *pc, u32 reg,
+ u32 mask, u32 set)
+{
+ u32 val;
+
+ val = mtk_dma_read(pc, reg);
+ val &= ~mask;
+ val |= set;
+ mtk_dma_write(pc, reg, val);
+}
+
+static void mtk_dma_set(struct mtk_cqdma_pchan *pc, u32 reg, u32 val)
+{
+ mtk_dma_rmw(pc, reg, 0, val);
+}
+
+static void mtk_dma_clr(struct mtk_cqdma_pchan *pc, u32 reg, u32 val)
+{
+ mtk_dma_rmw(pc, reg, val, 0);
+}
+
+static void mtk_cqdma_vdesc_free(struct virt_dma_desc *vd)
+{
+ kfree(to_cqdma_vdesc(vd));
+}
+
+static int mtk_cqdma_poll_engine_done(struct mtk_cqdma_pchan *pc, bool atomic)
+{
+ u32 status = 0;
+
+ if (!atomic)
+ return readl_poll_timeout(pc->base + MTK_CQDMA_EN,
+ status,
+ !(status & MTK_CQDMA_EN_BIT),
+ MTK_CQDMA_USEC_POLL,
+ MTK_CQDMA_TIMEOUT_POLL);
+
+ return readl_poll_timeout_atomic(pc->base + MTK_CQDMA_EN,
+ status,
+ !(status & MTK_CQDMA_EN_BIT),
+ MTK_CQDMA_USEC_POLL,
+ MTK_CQDMA_TIMEOUT_POLL);
+}
+
+static int mtk_cqdma_hard_reset(struct mtk_cqdma_pchan *pc)
+{
+ mtk_dma_set(pc, MTK_CQDMA_RESET, MTK_CQDMA_HARD_RST_BIT);
+ mtk_dma_clr(pc, MTK_CQDMA_RESET, MTK_CQDMA_HARD_RST_BIT);
+
+ return mtk_cqdma_poll_engine_done(pc, true);
+}
+
+static void mtk_cqdma_start(struct mtk_cqdma_pchan *pc,
+ struct mtk_cqdma_vdesc *cvd)
+{
+ /* wait for the previous transaction done */
+ if (mtk_cqdma_poll_engine_done(pc, true) < 0)
+ dev_err(cqdma2dev(to_cqdma_dev(cvd->ch)), "cqdma wait transaction timeout\n");
+
+ /* warm reset the dma engine for the new transaction */
+ mtk_dma_set(pc, MTK_CQDMA_RESET, MTK_CQDMA_WARM_RST_BIT);
+ if (mtk_cqdma_poll_engine_done(pc, true) < 0)
+ dev_err(cqdma2dev(to_cqdma_dev(cvd->ch)), "cqdma warm reset timeout\n");
+
+ /* setup the source */
+ mtk_dma_set(pc, MTK_CQDMA_SRC, cvd->src & MTK_CQDMA_ADDR_LIMIT);
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+ mtk_dma_set(pc, MTK_CQDMA_SRC2, cvd->src >> MTK_CQDMA_ADDR2_SHFIT);
+#else
+ mtk_dma_set(pc, MTK_CQDMA_SRC2, 0);
+#endif
+
+ /* setup the destination */
+ mtk_dma_set(pc, MTK_CQDMA_DST, cvd->dest & MTK_CQDMA_ADDR_LIMIT);
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+ mtk_dma_set(pc, MTK_CQDMA_DST2, cvd->dest >> MTK_CQDMA_ADDR2_SHFIT);
+#else
+ mtk_dma_set(pc, MTK_CQDMA_DST2, 0);
+#endif
+
+ /* setup the length */
+ mtk_dma_set(pc, MTK_CQDMA_LEN1, cvd->len);
+
+ /* start dma engine */
+ mtk_dma_set(pc, MTK_CQDMA_EN, MTK_CQDMA_EN_BIT);
+}
+
+static void mtk_cqdma_issue_vchan_pending(struct mtk_cqdma_vchan *cvc)
+{
+ struct virt_dma_desc *vd, *vd2;
+ struct mtk_cqdma_pchan *pc = cvc->pc;
+ struct mtk_cqdma_vdesc *cvd;
+ bool trigger_engine = false;
+
+ lockdep_assert_held(&cvc->vc.lock);
+ lockdep_assert_held(&pc->lock);
+
+ list_for_each_entry_safe(vd, vd2, &cvc->vc.desc_issued, node) {
+ /* need to trigger dma engine if PC's queue is empty */
+ if (list_empty(&pc->queue))
+ trigger_engine = true;
+
+ cvd = to_cqdma_vdesc(vd);
+
+ /* add VD into PC's queue */
+ list_add_tail(&cvd->node, &pc->queue);
+
+ /* start the dma engine */
+ if (trigger_engine)
+ mtk_cqdma_start(pc, cvd);
+
+ /* remove VD from list desc_issued */
+ list_del(&vd->node);
+ }
+}
+
+/*
+ * return true if this VC is active,
+ * meaning that there are VDs under processing by the PC
+ */
+static bool mtk_cqdma_is_vchan_active(struct mtk_cqdma_vchan *cvc)
+{
+ struct mtk_cqdma_vdesc *cvd;
+
+ list_for_each_entry(cvd, &cvc->pc->queue, node)
+ if (cvc == to_cqdma_vchan(cvd->ch))
+ return true;
+
+ return false;
+}
+
+/*
+ * return the pointer of the CVD that is just consumed by the PC
+ */
+static struct mtk_cqdma_vdesc
+*mtk_cqdma_consume_work_queue(struct mtk_cqdma_pchan *pc)
+{
+ struct mtk_cqdma_vchan *cvc;
+ struct mtk_cqdma_vdesc *cvd, *ret = NULL;
+
+ /* consume a CVD from PC's queue */
+ cvd = list_first_entry_or_null(&pc->queue,
+ struct mtk_cqdma_vdesc, node);
+ if (unlikely(!cvd || !cvd->parent))
+ return NULL;
+
+ cvc = to_cqdma_vchan(cvd->ch);
+ ret = cvd;
+
+ /* update residue of the parent CVD */
+ cvd->parent->residue -= cvd->len;
+
+ /* delete CVD from PC's queue */
+ list_del(&cvd->node);
+
+ spin_lock(&cvc->vc.lock);
+
+ /* check whether all the child CVDs completed */
+ if (!cvd->parent->residue) {
+ /* add the parent VD into list desc_completed */
+ vchan_cookie_complete(&cvd->parent->vd);
+
+ /* setup completion if this VC is under synchronization */
+ if (cvc->issue_synchronize && !mtk_cqdma_is_vchan_active(cvc)) {
+ complete(&cvc->issue_completion);
+ cvc->issue_synchronize = false;
+ }
+ }
+
+ spin_unlock(&cvc->vc.lock);
+
+ /* start transaction for next CVD in the queue */
+ cvd = list_first_entry_or_null(&pc->queue,
+ struct mtk_cqdma_vdesc, node);
+ if (cvd)
+ mtk_cqdma_start(pc, cvd);
+
+ return ret;
+}
+
+static void mtk_cqdma_tasklet_cb(struct tasklet_struct *t)
+{
+ struct mtk_cqdma_pchan *pc = from_tasklet(pc, t, tasklet);
+ struct mtk_cqdma_vdesc *cvd = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pc->lock, flags);
+ /* consume the queue */
+ cvd = mtk_cqdma_consume_work_queue(pc);
+ spin_unlock_irqrestore(&pc->lock, flags);
+
+ /* submit the next CVD */
+ if (cvd) {
+ dma_run_dependencies(&cvd->vd.tx);
+
+ /*
+ * free child CVD after completion.
+ * the parent CVD would be freed with desc_free by user.
+ */
+ if (cvd->parent != cvd)
+ kfree(cvd);
+ }
+
+ /* re-enable interrupt before leaving tasklet */
+ enable_irq(pc->irq);
+}
+
+static irqreturn_t mtk_cqdma_irq(int irq, void *devid)
+{
+ struct mtk_cqdma_device *cqdma = devid;
+ irqreturn_t ret = IRQ_NONE;
+ bool schedule_tasklet = false;
+ u32 i;
+
+ /* clear interrupt flags for each PC */
+ for (i = 0; i < cqdma->dma_channels; ++i, schedule_tasklet = false) {
+ spin_lock(&cqdma->pc[i]->lock);
+ if (mtk_dma_read(cqdma->pc[i],
+ MTK_CQDMA_INT_FLAG) & MTK_CQDMA_INT_FLAG_BIT) {
+ /* clear interrupt */
+ mtk_dma_clr(cqdma->pc[i], MTK_CQDMA_INT_FLAG,
+ MTK_CQDMA_INT_FLAG_BIT);
+
+ schedule_tasklet = true;
+ ret = IRQ_HANDLED;
+ }
+ spin_unlock(&cqdma->pc[i]->lock);
+
+ if (schedule_tasklet) {
+ /* disable interrupt */
+ disable_irq_nosync(cqdma->pc[i]->irq);
+
+ /* schedule the tasklet to handle the transactions */
+ tasklet_schedule(&cqdma->pc[i]->tasklet);
+ }
+ }
+
+ return ret;
+}
+
+static struct virt_dma_desc *mtk_cqdma_find_active_desc(struct dma_chan *c,
+ dma_cookie_t cookie)
+{
+ struct mtk_cqdma_vchan *cvc = to_cqdma_vchan(c);
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cvc->pc->lock, flags);
+ list_for_each_entry(vd, &cvc->pc->queue, node)
+ if (vd->tx.cookie == cookie) {
+ spin_unlock_irqrestore(&cvc->pc->lock, flags);
+ return vd;
+ }
+ spin_unlock_irqrestore(&cvc->pc->lock, flags);
+
+ list_for_each_entry(vd, &cvc->vc.desc_issued, node)
+ if (vd->tx.cookie == cookie)
+ return vd;
+
+ return NULL;
+}
+
+static enum dma_status mtk_cqdma_tx_status(struct dma_chan *c,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct mtk_cqdma_vchan *cvc = to_cqdma_vchan(c);
+ struct mtk_cqdma_vdesc *cvd;
+ struct virt_dma_desc *vd;
+ enum dma_status ret;
+ unsigned long flags;
+ size_t bytes = 0;
+
+ ret = dma_cookie_status(c, cookie, txstate);
+ if (ret == DMA_COMPLETE || !txstate)
+ return ret;
+
+ spin_lock_irqsave(&cvc->vc.lock, flags);
+ vd = mtk_cqdma_find_active_desc(c, cookie);
+ spin_unlock_irqrestore(&cvc->vc.lock, flags);
+
+ if (vd) {
+ cvd = to_cqdma_vdesc(vd);
+ bytes = cvd->residue;
+ }
+
+ dma_set_residue(txstate, bytes);
+
+ return ret;
+}
+
+static void mtk_cqdma_issue_pending(struct dma_chan *c)
+{
+ struct mtk_cqdma_vchan *cvc = to_cqdma_vchan(c);
+ unsigned long pc_flags;
+ unsigned long vc_flags;
+
+ /* acquire PC's lock before VS's lock for lock dependency in tasklet */
+ spin_lock_irqsave(&cvc->pc->lock, pc_flags);
+ spin_lock_irqsave(&cvc->vc.lock, vc_flags);
+
+ if (vchan_issue_pending(&cvc->vc))
+ mtk_cqdma_issue_vchan_pending(cvc);
+
+ spin_unlock_irqrestore(&cvc->vc.lock, vc_flags);
+ spin_unlock_irqrestore(&cvc->pc->lock, pc_flags);
+}
+
+static struct dma_async_tx_descriptor *
+mtk_cqdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest,
+ dma_addr_t src, size_t len, unsigned long flags)
+{
+ struct mtk_cqdma_vdesc **cvd;
+ struct dma_async_tx_descriptor *tx = NULL, *prev_tx = NULL;
+ size_t i, tlen, nr_vd;
+
+ /*
+ * In the case that trsanction length is larger than the
+ * DMA engine supports, a single memcpy transaction needs
+ * to be separated into several DMA transactions.
+ * Each DMA transaction would be described by a CVD,
+ * and the first one is referred as the parent CVD,
+ * while the others are child CVDs.
+ * The parent CVD's tx descriptor is the only tx descriptor
+ * returned to the DMA user, and it should not be completed
+ * until all the child CVDs completed.
+ */
+ nr_vd = DIV_ROUND_UP(len, MTK_CQDMA_MAX_LEN);
+ cvd = kcalloc(nr_vd, sizeof(*cvd), GFP_NOWAIT);
+ if (!cvd)
+ return NULL;
+
+ for (i = 0; i < nr_vd; ++i) {
+ cvd[i] = kzalloc(sizeof(*cvd[i]), GFP_NOWAIT);
+ if (!cvd[i]) {
+ for (; i > 0; --i)
+ kfree(cvd[i - 1]);
+ return NULL;
+ }
+
+ /* setup dma channel */
+ cvd[i]->ch = c;
+
+ /* setup sourece, destination, and length */
+ tlen = (len > MTK_CQDMA_MAX_LEN) ? MTK_CQDMA_MAX_LEN : len;
+ cvd[i]->len = tlen;
+ cvd[i]->src = src;
+ cvd[i]->dest = dest;
+
+ /* setup tx descriptor */
+ tx = vchan_tx_prep(to_virt_chan(c), &cvd[i]->vd, flags);
+ tx->next = NULL;
+
+ if (!i) {
+ cvd[0]->residue = len;
+ } else {
+ prev_tx->next = tx;
+ cvd[i]->residue = tlen;
+ }
+
+ cvd[i]->parent = cvd[0];
+
+ /* update the src, dest, len, prev_tx for the next CVD */
+ src += tlen;
+ dest += tlen;
+ len -= tlen;
+ prev_tx = tx;
+ }
+
+ return &cvd[0]->vd.tx;
+}
+
+static void mtk_cqdma_free_inactive_desc(struct dma_chan *c)
+{
+ struct virt_dma_chan *vc = to_virt_chan(c);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ /*
+ * set desc_allocated, desc_submitted,
+ * and desc_issued as the candicates to be freed
+ */
+ spin_lock_irqsave(&vc->lock, flags);
+ list_splice_tail_init(&vc->desc_allocated, &head);
+ list_splice_tail_init(&vc->desc_submitted, &head);
+ list_splice_tail_init(&vc->desc_issued, &head);
+ spin_unlock_irqrestore(&vc->lock, flags);
+
+ /* free descriptor lists */
+ vchan_dma_desc_free_list(vc, &head);
+}
+
+static void mtk_cqdma_free_active_desc(struct dma_chan *c)
+{
+ struct mtk_cqdma_vchan *cvc = to_cqdma_vchan(c);
+ bool sync_needed = false;
+ unsigned long pc_flags;
+ unsigned long vc_flags;
+
+ /* acquire PC's lock first due to lock dependency in dma ISR */
+ spin_lock_irqsave(&cvc->pc->lock, pc_flags);
+ spin_lock_irqsave(&cvc->vc.lock, vc_flags);
+
+ /* synchronization is required if this VC is active */
+ if (mtk_cqdma_is_vchan_active(cvc)) {
+ cvc->issue_synchronize = true;
+ sync_needed = true;
+ }
+
+ spin_unlock_irqrestore(&cvc->vc.lock, vc_flags);
+ spin_unlock_irqrestore(&cvc->pc->lock, pc_flags);
+
+ /* waiting for the completion of this VC */
+ if (sync_needed)
+ wait_for_completion(&cvc->issue_completion);
+
+ /* free all descriptors in list desc_completed */
+ vchan_synchronize(&cvc->vc);
+
+ WARN_ONCE(!list_empty(&cvc->vc.desc_completed),
+ "Desc pending still in list desc_completed\n");
+}
+
+static int mtk_cqdma_terminate_all(struct dma_chan *c)
+{
+ /* free descriptors not processed yet by hardware */
+ mtk_cqdma_free_inactive_desc(c);
+
+ /* free descriptors being processed by hardware */
+ mtk_cqdma_free_active_desc(c);
+
+ return 0;
+}
+
+static int mtk_cqdma_alloc_chan_resources(struct dma_chan *c)
+{
+ struct mtk_cqdma_device *cqdma = to_cqdma_dev(c);
+ struct mtk_cqdma_vchan *vc = to_cqdma_vchan(c);
+ struct mtk_cqdma_pchan *pc = NULL;
+ u32 i, min_refcnt = U32_MAX, refcnt;
+ unsigned long flags;
+
+ /* allocate PC with the minimun refcount */
+ for (i = 0; i < cqdma->dma_channels; ++i) {
+ refcnt = refcount_read(&cqdma->pc[i]->refcnt);
+ if (refcnt < min_refcnt) {
+ pc = cqdma->pc[i];
+ min_refcnt = refcnt;
+ }
+ }
+
+ if (!pc)
+ return -ENOSPC;
+
+ spin_lock_irqsave(&pc->lock, flags);
+
+ if (!refcount_read(&pc->refcnt)) {
+ /* allocate PC when the refcount is zero */
+ mtk_cqdma_hard_reset(pc);
+
+ /* enable interrupt for this PC */
+ mtk_dma_set(pc, MTK_CQDMA_INT_EN, MTK_CQDMA_INT_EN_BIT);
+
+ /*
+ * refcount_inc would complain increment on 0; use-after-free.
+ * Thus, we need to explicitly set it as 1 initially.
+ */
+ refcount_set(&pc->refcnt, 1);
+ } else {
+ refcount_inc(&pc->refcnt);
+ }
+
+ spin_unlock_irqrestore(&pc->lock, flags);
+
+ vc->pc = pc;
+
+ return 0;
+}
+
+static void mtk_cqdma_free_chan_resources(struct dma_chan *c)
+{
+ struct mtk_cqdma_vchan *cvc = to_cqdma_vchan(c);
+ unsigned long flags;
+
+ /* free all descriptors in all lists on the VC */
+ mtk_cqdma_terminate_all(c);
+
+ spin_lock_irqsave(&cvc->pc->lock, flags);
+
+ /* PC is not freed until there is no VC mapped to it */
+ if (refcount_dec_and_test(&cvc->pc->refcnt)) {
+ /* start the flush operation and stop the engine */
+ mtk_dma_set(cvc->pc, MTK_CQDMA_FLUSH, MTK_CQDMA_FLUSH_BIT);
+
+ /* wait for the completion of flush operation */
+ if (mtk_cqdma_poll_engine_done(cvc->pc, true) < 0)
+ dev_err(cqdma2dev(to_cqdma_dev(c)), "cqdma flush timeout\n");
+
+ /* clear the flush bit and interrupt flag */
+ mtk_dma_clr(cvc->pc, MTK_CQDMA_FLUSH, MTK_CQDMA_FLUSH_BIT);
+ mtk_dma_clr(cvc->pc, MTK_CQDMA_INT_FLAG,
+ MTK_CQDMA_INT_FLAG_BIT);
+
+ /* disable interrupt for this PC */
+ mtk_dma_clr(cvc->pc, MTK_CQDMA_INT_EN, MTK_CQDMA_INT_EN_BIT);
+ }
+
+ spin_unlock_irqrestore(&cvc->pc->lock, flags);
+}
+
+static int mtk_cqdma_hw_init(struct mtk_cqdma_device *cqdma)
+{
+ unsigned long flags;
+ int err;
+ u32 i;
+
+ pm_runtime_enable(cqdma2dev(cqdma));
+ pm_runtime_get_sync(cqdma2dev(cqdma));
+
+ err = clk_prepare_enable(cqdma->clk);
+
+ if (err) {
+ pm_runtime_put_sync(cqdma2dev(cqdma));
+ pm_runtime_disable(cqdma2dev(cqdma));
+ return err;
+ }
+
+ /* reset all PCs */
+ for (i = 0; i < cqdma->dma_channels; ++i) {
+ spin_lock_irqsave(&cqdma->pc[i]->lock, flags);
+ if (mtk_cqdma_hard_reset(cqdma->pc[i]) < 0) {
+ dev_err(cqdma2dev(cqdma), "cqdma hard reset timeout\n");
+ spin_unlock_irqrestore(&cqdma->pc[i]->lock, flags);
+
+ clk_disable_unprepare(cqdma->clk);
+ pm_runtime_put_sync(cqdma2dev(cqdma));
+ pm_runtime_disable(cqdma2dev(cqdma));
+ return -EINVAL;
+ }
+ spin_unlock_irqrestore(&cqdma->pc[i]->lock, flags);
+ }
+
+ return 0;
+}
+
+static void mtk_cqdma_hw_deinit(struct mtk_cqdma_device *cqdma)
+{
+ unsigned long flags;
+ u32 i;
+
+ /* reset all PCs */
+ for (i = 0; i < cqdma->dma_channels; ++i) {
+ spin_lock_irqsave(&cqdma->pc[i]->lock, flags);
+ if (mtk_cqdma_hard_reset(cqdma->pc[i]) < 0)
+ dev_err(cqdma2dev(cqdma), "cqdma hard reset timeout\n");
+ spin_unlock_irqrestore(&cqdma->pc[i]->lock, flags);
+ }
+
+ clk_disable_unprepare(cqdma->clk);
+
+ pm_runtime_put_sync(cqdma2dev(cqdma));
+ pm_runtime_disable(cqdma2dev(cqdma));
+}
+
+static const struct of_device_id mtk_cqdma_match[] = {
+ { .compatible = "mediatek,mt6765-cqdma" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mtk_cqdma_match);
+
+static int mtk_cqdma_probe(struct platform_device *pdev)
+{
+ struct mtk_cqdma_device *cqdma;
+ struct mtk_cqdma_vchan *vc;
+ struct dma_device *dd;
+ int err;
+ u32 i;
+
+ cqdma = devm_kzalloc(&pdev->dev, sizeof(*cqdma), GFP_KERNEL);
+ if (!cqdma)
+ return -ENOMEM;
+
+ dd = &cqdma->ddev;
+
+ cqdma->clk = devm_clk_get(&pdev->dev, "cqdma");
+ if (IS_ERR(cqdma->clk)) {
+ dev_err(&pdev->dev, "No clock for %s\n",
+ dev_name(&pdev->dev));
+ return PTR_ERR(cqdma->clk);
+ }
+
+ dma_cap_set(DMA_MEMCPY, dd->cap_mask);
+
+ dd->copy_align = MTK_CQDMA_ALIGN_SIZE;
+ dd->device_alloc_chan_resources = mtk_cqdma_alloc_chan_resources;
+ dd->device_free_chan_resources = mtk_cqdma_free_chan_resources;
+ dd->device_tx_status = mtk_cqdma_tx_status;
+ dd->device_issue_pending = mtk_cqdma_issue_pending;
+ dd->device_prep_dma_memcpy = mtk_cqdma_prep_dma_memcpy;
+ dd->device_terminate_all = mtk_cqdma_terminate_all;
+ dd->src_addr_widths = MTK_CQDMA_DMA_BUSWIDTHS;
+ dd->dst_addr_widths = MTK_CQDMA_DMA_BUSWIDTHS;
+ dd->directions = BIT(DMA_MEM_TO_MEM);
+ dd->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+ dd->dev = &pdev->dev;
+ INIT_LIST_HEAD(&dd->channels);
+
+ if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node,
+ "dma-requests",
+ &cqdma->dma_requests)) {
+ dev_info(&pdev->dev,
+ "Using %u as missing dma-requests property\n",
+ MTK_CQDMA_NR_VCHANS);
+
+ cqdma->dma_requests = MTK_CQDMA_NR_VCHANS;
+ }
+
+ if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node,
+ "dma-channels",
+ &cqdma->dma_channels)) {
+ dev_info(&pdev->dev,
+ "Using %u as missing dma-channels property\n",
+ MTK_CQDMA_NR_PCHANS);
+
+ cqdma->dma_channels = MTK_CQDMA_NR_PCHANS;
+ }
+
+ cqdma->pc = devm_kcalloc(&pdev->dev, cqdma->dma_channels,
+ sizeof(*cqdma->pc), GFP_KERNEL);
+ if (!cqdma->pc)
+ return -ENOMEM;
+
+ /* initialization for PCs */
+ for (i = 0; i < cqdma->dma_channels; ++i) {
+ cqdma->pc[i] = devm_kcalloc(&pdev->dev, 1,
+ sizeof(**cqdma->pc), GFP_KERNEL);
+ if (!cqdma->pc[i])
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&cqdma->pc[i]->queue);
+ spin_lock_init(&cqdma->pc[i]->lock);
+ refcount_set(&cqdma->pc[i]->refcnt, 0);
+ cqdma->pc[i]->base = devm_platform_ioremap_resource(pdev, i);
+ if (IS_ERR(cqdma->pc[i]->base))
+ return PTR_ERR(cqdma->pc[i]->base);
+
+ /* allocate IRQ resource */
+ err = platform_get_irq(pdev, i);
+ if (err < 0)
+ return err;
+ cqdma->pc[i]->irq = err;
+
+ err = devm_request_irq(&pdev->dev, cqdma->pc[i]->irq,
+ mtk_cqdma_irq, 0, dev_name(&pdev->dev),
+ cqdma);
+ if (err) {
+ dev_err(&pdev->dev,
+ "request_irq failed with err %d\n", err);
+ return -EINVAL;
+ }
+ }
+
+ /* allocate resource for VCs */
+ cqdma->vc = devm_kcalloc(&pdev->dev, cqdma->dma_requests,
+ sizeof(*cqdma->vc), GFP_KERNEL);
+ if (!cqdma->vc)
+ return -ENOMEM;
+
+ for (i = 0; i < cqdma->dma_requests; i++) {
+ vc = &cqdma->vc[i];
+ vc->vc.desc_free = mtk_cqdma_vdesc_free;
+ vchan_init(&vc->vc, dd);
+ init_completion(&vc->issue_completion);
+ }
+
+ err = dma_async_device_register(dd);
+ if (err)
+ return err;
+
+ err = of_dma_controller_register(pdev->dev.of_node,
+ of_dma_xlate_by_chan_id, cqdma);
+ if (err) {
+ dev_err(&pdev->dev,
+ "MediaTek CQDMA OF registration failed %d\n", err);
+ goto err_unregister;
+ }
+
+ err = mtk_cqdma_hw_init(cqdma);
+ if (err) {
+ dev_err(&pdev->dev,
+ "MediaTek CQDMA HW initialization failed %d\n", err);
+ goto err_unregister;
+ }
+
+ platform_set_drvdata(pdev, cqdma);
+
+ /* initialize tasklet for each PC */
+ for (i = 0; i < cqdma->dma_channels; ++i)
+ tasklet_setup(&cqdma->pc[i]->tasklet, mtk_cqdma_tasklet_cb);
+
+ dev_info(&pdev->dev, "MediaTek CQDMA driver registered\n");
+
+ return 0;
+
+err_unregister:
+ dma_async_device_unregister(dd);
+
+ return err;
+}
+
+static int mtk_cqdma_remove(struct platform_device *pdev)
+{
+ struct mtk_cqdma_device *cqdma = platform_get_drvdata(pdev);
+ struct mtk_cqdma_vchan *vc;
+ unsigned long flags;
+ int i;
+
+ /* kill VC task */
+ for (i = 0; i < cqdma->dma_requests; i++) {
+ vc = &cqdma->vc[i];
+
+ list_del(&vc->vc.chan.device_node);
+ tasklet_kill(&vc->vc.task);
+ }
+
+ /* disable interrupt */
+ for (i = 0; i < cqdma->dma_channels; i++) {
+ spin_lock_irqsave(&cqdma->pc[i]->lock, flags);
+ mtk_dma_clr(cqdma->pc[i], MTK_CQDMA_INT_EN,
+ MTK_CQDMA_INT_EN_BIT);
+ spin_unlock_irqrestore(&cqdma->pc[i]->lock, flags);
+
+ /* Waits for any pending IRQ handlers to complete */
+ synchronize_irq(cqdma->pc[i]->irq);
+
+ tasklet_kill(&cqdma->pc[i]->tasklet);
+ }
+
+ /* disable hardware */
+ mtk_cqdma_hw_deinit(cqdma);
+
+ dma_async_device_unregister(&cqdma->ddev);
+ of_dma_controller_free(pdev->dev.of_node);
+
+ return 0;
+}
+
+static struct platform_driver mtk_cqdma_driver = {
+ .probe = mtk_cqdma_probe,
+ .remove = mtk_cqdma_remove,
+ .driver = {
+ .name = KBUILD_MODNAME,
+ .of_match_table = mtk_cqdma_match,
+ },
+};
+module_platform_driver(mtk_cqdma_driver);
+
+MODULE_DESCRIPTION("MediaTek CQDMA Controller Driver");
+MODULE_AUTHOR("Shun-Chih Yu <shun-chih.yu@mediatek.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/mediatek/mtk-hsdma.c b/drivers/dma/mediatek/mtk-hsdma.c
new file mode 100644
index 000000000..f7717c44b
--- /dev/null
+++ b/drivers/dma/mediatek/mtk-hsdma.c
@@ -0,0 +1,1056 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 MediaTek Inc.
+
+/*
+ * Driver for MediaTek High-Speed DMA Controller
+ *
+ * Author: Sean Wang <sean.wang@mediatek.com>
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/iopoll.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/refcount.h>
+#include <linux/slab.h>
+
+#include "../virt-dma.h"
+
+#define MTK_HSDMA_USEC_POLL 20
+#define MTK_HSDMA_TIMEOUT_POLL 200000
+#define MTK_HSDMA_DMA_BUSWIDTHS BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)
+
+/* The default number of virtual channel */
+#define MTK_HSDMA_NR_VCHANS 3
+
+/* Only one physical channel supported */
+#define MTK_HSDMA_NR_MAX_PCHANS 1
+
+/* Macro for physical descriptor (PD) manipulation */
+/* The number of PD which must be 2 of power */
+#define MTK_DMA_SIZE 64
+#define MTK_HSDMA_NEXT_DESP_IDX(x, y) (((x) + 1) & ((y) - 1))
+#define MTK_HSDMA_LAST_DESP_IDX(x, y) (((x) - 1) & ((y) - 1))
+#define MTK_HSDMA_MAX_LEN 0x3f80
+#define MTK_HSDMA_ALIGN_SIZE 4
+#define MTK_HSDMA_PLEN_MASK 0x3fff
+#define MTK_HSDMA_DESC_PLEN(x) (((x) & MTK_HSDMA_PLEN_MASK) << 16)
+#define MTK_HSDMA_DESC_PLEN_GET(x) (((x) >> 16) & MTK_HSDMA_PLEN_MASK)
+
+/* Registers for underlying ring manipulation */
+#define MTK_HSDMA_TX_BASE 0x0
+#define MTK_HSDMA_TX_CNT 0x4
+#define MTK_HSDMA_TX_CPU 0x8
+#define MTK_HSDMA_TX_DMA 0xc
+#define MTK_HSDMA_RX_BASE 0x100
+#define MTK_HSDMA_RX_CNT 0x104
+#define MTK_HSDMA_RX_CPU 0x108
+#define MTK_HSDMA_RX_DMA 0x10c
+
+/* Registers for global setup */
+#define MTK_HSDMA_GLO 0x204
+#define MTK_HSDMA_GLO_MULTI_DMA BIT(10)
+#define MTK_HSDMA_TX_WB_DDONE BIT(6)
+#define MTK_HSDMA_BURST_64BYTES (0x2 << 4)
+#define MTK_HSDMA_GLO_RX_BUSY BIT(3)
+#define MTK_HSDMA_GLO_RX_DMA BIT(2)
+#define MTK_HSDMA_GLO_TX_BUSY BIT(1)
+#define MTK_HSDMA_GLO_TX_DMA BIT(0)
+#define MTK_HSDMA_GLO_DMA (MTK_HSDMA_GLO_TX_DMA | \
+ MTK_HSDMA_GLO_RX_DMA)
+#define MTK_HSDMA_GLO_BUSY (MTK_HSDMA_GLO_RX_BUSY | \
+ MTK_HSDMA_GLO_TX_BUSY)
+#define MTK_HSDMA_GLO_DEFAULT (MTK_HSDMA_GLO_TX_DMA | \
+ MTK_HSDMA_GLO_RX_DMA | \
+ MTK_HSDMA_TX_WB_DDONE | \
+ MTK_HSDMA_BURST_64BYTES | \
+ MTK_HSDMA_GLO_MULTI_DMA)
+
+/* Registers for reset */
+#define MTK_HSDMA_RESET 0x208
+#define MTK_HSDMA_RST_TX BIT(0)
+#define MTK_HSDMA_RST_RX BIT(16)
+
+/* Registers for interrupt control */
+#define MTK_HSDMA_DLYINT 0x20c
+#define MTK_HSDMA_RXDLY_INT_EN BIT(15)
+
+/* Interrupt fires when the pending number's more than the specified */
+#define MTK_HSDMA_RXMAX_PINT(x) (((x) & 0x7f) << 8)
+
+/* Interrupt fires when the pending time's more than the specified in 20 us */
+#define MTK_HSDMA_RXMAX_PTIME(x) ((x) & 0x7f)
+#define MTK_HSDMA_DLYINT_DEFAULT (MTK_HSDMA_RXDLY_INT_EN | \
+ MTK_HSDMA_RXMAX_PINT(20) | \
+ MTK_HSDMA_RXMAX_PTIME(20))
+#define MTK_HSDMA_INT_STATUS 0x220
+#define MTK_HSDMA_INT_ENABLE 0x228
+#define MTK_HSDMA_INT_RXDONE BIT(16)
+
+enum mtk_hsdma_vdesc_flag {
+ MTK_HSDMA_VDESC_FINISHED = 0x01,
+};
+
+#define IS_MTK_HSDMA_VDESC_FINISHED(x) ((x) == MTK_HSDMA_VDESC_FINISHED)
+
+/**
+ * struct mtk_hsdma_pdesc - This is the struct holding info describing physical
+ * descriptor (PD) and its placement must be kept at
+ * 4-bytes alignment in little endian order.
+ * @desc1: | The control pad used to indicate hardware how to
+ * @desc2: | deal with the descriptor such as source and
+ * @desc3: | destination address and data length. The maximum
+ * @desc4: | data length each pdesc can handle is 0x3f80 bytes
+ */
+struct mtk_hsdma_pdesc {
+ __le32 desc1;
+ __le32 desc2;
+ __le32 desc3;
+ __le32 desc4;
+} __packed __aligned(4);
+
+/**
+ * struct mtk_hsdma_vdesc - This is the struct holding info describing virtual
+ * descriptor (VD)
+ * @vd: An instance for struct virt_dma_desc
+ * @len: The total data size device wants to move
+ * @residue: The remaining data size device will move
+ * @dest: The destination address device wants to move to
+ * @src: The source address device wants to move from
+ */
+struct mtk_hsdma_vdesc {
+ struct virt_dma_desc vd;
+ size_t len;
+ size_t residue;
+ dma_addr_t dest;
+ dma_addr_t src;
+};
+
+/**
+ * struct mtk_hsdma_cb - This is the struct holding extra info required for RX
+ * ring to know what relevant VD the PD is being
+ * mapped to.
+ * @vd: Pointer to the relevant VD.
+ * @flag: Flag indicating what action should be taken when VD
+ * is completed.
+ */
+struct mtk_hsdma_cb {
+ struct virt_dma_desc *vd;
+ enum mtk_hsdma_vdesc_flag flag;
+};
+
+/**
+ * struct mtk_hsdma_ring - This struct holds info describing underlying ring
+ * space
+ * @txd: The descriptor TX ring which describes DMA source
+ * information
+ * @rxd: The descriptor RX ring which describes DMA
+ * destination information
+ * @cb: The extra information pointed at by RX ring
+ * @tphys: The physical addr of TX ring
+ * @rphys: The physical addr of RX ring
+ * @cur_tptr: Pointer to the next free descriptor used by the host
+ * @cur_rptr: Pointer to the last done descriptor by the device
+ */
+struct mtk_hsdma_ring {
+ struct mtk_hsdma_pdesc *txd;
+ struct mtk_hsdma_pdesc *rxd;
+ struct mtk_hsdma_cb *cb;
+ dma_addr_t tphys;
+ dma_addr_t rphys;
+ u16 cur_tptr;
+ u16 cur_rptr;
+};
+
+/**
+ * struct mtk_hsdma_pchan - This is the struct holding info describing physical
+ * channel (PC)
+ * @ring: An instance for the underlying ring
+ * @sz_ring: Total size allocated for the ring
+ * @nr_free: Total number of free rooms in the ring. It would
+ * be accessed and updated frequently between IRQ
+ * context and user context to reflect whether ring
+ * can accept requests from VD.
+ */
+struct mtk_hsdma_pchan {
+ struct mtk_hsdma_ring ring;
+ size_t sz_ring;
+ atomic_t nr_free;
+};
+
+/**
+ * struct mtk_hsdma_vchan - This is the struct holding info describing virtual
+ * channel (VC)
+ * @vc: An instance for struct virt_dma_chan
+ * @issue_completion: The wait for all issued descriptors completited
+ * @issue_synchronize: Bool indicating channel synchronization starts
+ * @desc_hw_processing: List those descriptors the hardware is processing,
+ * which is protected by vc.lock
+ */
+struct mtk_hsdma_vchan {
+ struct virt_dma_chan vc;
+ struct completion issue_completion;
+ bool issue_synchronize;
+ struct list_head desc_hw_processing;
+};
+
+/**
+ * struct mtk_hsdma_soc - This is the struct holding differences among SoCs
+ * @ddone: Bit mask for DDONE
+ * @ls0: Bit mask for LS0
+ */
+struct mtk_hsdma_soc {
+ __le32 ddone;
+ __le32 ls0;
+};
+
+/**
+ * struct mtk_hsdma_device - This is the struct holding info describing HSDMA
+ * device
+ * @ddev: An instance for struct dma_device
+ * @base: The mapped register I/O base
+ * @clk: The clock that device internal is using
+ * @irq: The IRQ that device are using
+ * @dma_requests: The number of VCs the device supports to
+ * @vc: The pointer to all available VCs
+ * @pc: The pointer to the underlying PC
+ * @pc_refcnt: Track how many VCs are using the PC
+ * @lock: Lock protect agaisting multiple VCs access PC
+ * @soc: The pointer to area holding differences among
+ * vaious platform
+ */
+struct mtk_hsdma_device {
+ struct dma_device ddev;
+ void __iomem *base;
+ struct clk *clk;
+ u32 irq;
+
+ u32 dma_requests;
+ struct mtk_hsdma_vchan *vc;
+ struct mtk_hsdma_pchan *pc;
+ refcount_t pc_refcnt;
+
+ /* Lock used to protect against multiple VCs access PC */
+ spinlock_t lock;
+
+ const struct mtk_hsdma_soc *soc;
+};
+
+static struct mtk_hsdma_device *to_hsdma_dev(struct dma_chan *chan)
+{
+ return container_of(chan->device, struct mtk_hsdma_device, ddev);
+}
+
+static inline struct mtk_hsdma_vchan *to_hsdma_vchan(struct dma_chan *chan)
+{
+ return container_of(chan, struct mtk_hsdma_vchan, vc.chan);
+}
+
+static struct mtk_hsdma_vdesc *to_hsdma_vdesc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct mtk_hsdma_vdesc, vd);
+}
+
+static struct device *hsdma2dev(struct mtk_hsdma_device *hsdma)
+{
+ return hsdma->ddev.dev;
+}
+
+static u32 mtk_dma_read(struct mtk_hsdma_device *hsdma, u32 reg)
+{
+ return readl(hsdma->base + reg);
+}
+
+static void mtk_dma_write(struct mtk_hsdma_device *hsdma, u32 reg, u32 val)
+{
+ writel(val, hsdma->base + reg);
+}
+
+static void mtk_dma_rmw(struct mtk_hsdma_device *hsdma, u32 reg,
+ u32 mask, u32 set)
+{
+ u32 val;
+
+ val = mtk_dma_read(hsdma, reg);
+ val &= ~mask;
+ val |= set;
+ mtk_dma_write(hsdma, reg, val);
+}
+
+static void mtk_dma_set(struct mtk_hsdma_device *hsdma, u32 reg, u32 val)
+{
+ mtk_dma_rmw(hsdma, reg, 0, val);
+}
+
+static void mtk_dma_clr(struct mtk_hsdma_device *hsdma, u32 reg, u32 val)
+{
+ mtk_dma_rmw(hsdma, reg, val, 0);
+}
+
+static void mtk_hsdma_vdesc_free(struct virt_dma_desc *vd)
+{
+ kfree(container_of(vd, struct mtk_hsdma_vdesc, vd));
+}
+
+static int mtk_hsdma_busy_wait(struct mtk_hsdma_device *hsdma)
+{
+ u32 status = 0;
+
+ return readl_poll_timeout(hsdma->base + MTK_HSDMA_GLO, status,
+ !(status & MTK_HSDMA_GLO_BUSY),
+ MTK_HSDMA_USEC_POLL,
+ MTK_HSDMA_TIMEOUT_POLL);
+}
+
+static int mtk_hsdma_alloc_pchan(struct mtk_hsdma_device *hsdma,
+ struct mtk_hsdma_pchan *pc)
+{
+ struct mtk_hsdma_ring *ring = &pc->ring;
+ int err;
+
+ memset(pc, 0, sizeof(*pc));
+
+ /*
+ * Allocate ring space where [0 ... MTK_DMA_SIZE - 1] is for TX ring
+ * and [MTK_DMA_SIZE ... 2 * MTK_DMA_SIZE - 1] is for RX ring.
+ */
+ pc->sz_ring = 2 * MTK_DMA_SIZE * sizeof(*ring->txd);
+ ring->txd = dma_alloc_coherent(hsdma2dev(hsdma), pc->sz_ring,
+ &ring->tphys, GFP_NOWAIT);
+ if (!ring->txd)
+ return -ENOMEM;
+
+ ring->rxd = &ring->txd[MTK_DMA_SIZE];
+ ring->rphys = ring->tphys + MTK_DMA_SIZE * sizeof(*ring->txd);
+ ring->cur_tptr = 0;
+ ring->cur_rptr = MTK_DMA_SIZE - 1;
+
+ ring->cb = kcalloc(MTK_DMA_SIZE, sizeof(*ring->cb), GFP_NOWAIT);
+ if (!ring->cb) {
+ err = -ENOMEM;
+ goto err_free_dma;
+ }
+
+ atomic_set(&pc->nr_free, MTK_DMA_SIZE - 1);
+
+ /* Disable HSDMA and wait for the completion */
+ mtk_dma_clr(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA);
+ err = mtk_hsdma_busy_wait(hsdma);
+ if (err)
+ goto err_free_cb;
+
+ /* Reset */
+ mtk_dma_set(hsdma, MTK_HSDMA_RESET,
+ MTK_HSDMA_RST_TX | MTK_HSDMA_RST_RX);
+ mtk_dma_clr(hsdma, MTK_HSDMA_RESET,
+ MTK_HSDMA_RST_TX | MTK_HSDMA_RST_RX);
+
+ /* Setup HSDMA initial pointer in the ring */
+ mtk_dma_write(hsdma, MTK_HSDMA_TX_BASE, ring->tphys);
+ mtk_dma_write(hsdma, MTK_HSDMA_TX_CNT, MTK_DMA_SIZE);
+ mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, ring->cur_tptr);
+ mtk_dma_write(hsdma, MTK_HSDMA_TX_DMA, 0);
+ mtk_dma_write(hsdma, MTK_HSDMA_RX_BASE, ring->rphys);
+ mtk_dma_write(hsdma, MTK_HSDMA_RX_CNT, MTK_DMA_SIZE);
+ mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, ring->cur_rptr);
+ mtk_dma_write(hsdma, MTK_HSDMA_RX_DMA, 0);
+
+ /* Enable HSDMA */
+ mtk_dma_set(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA);
+
+ /* Setup delayed interrupt */
+ mtk_dma_write(hsdma, MTK_HSDMA_DLYINT, MTK_HSDMA_DLYINT_DEFAULT);
+
+ /* Enable interrupt */
+ mtk_dma_set(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+
+ return 0;
+
+err_free_cb:
+ kfree(ring->cb);
+
+err_free_dma:
+ dma_free_coherent(hsdma2dev(hsdma),
+ pc->sz_ring, ring->txd, ring->tphys);
+ return err;
+}
+
+static void mtk_hsdma_free_pchan(struct mtk_hsdma_device *hsdma,
+ struct mtk_hsdma_pchan *pc)
+{
+ struct mtk_hsdma_ring *ring = &pc->ring;
+
+ /* Disable HSDMA and then wait for the completion */
+ mtk_dma_clr(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA);
+ mtk_hsdma_busy_wait(hsdma);
+
+ /* Reset pointer in the ring */
+ mtk_dma_clr(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+ mtk_dma_write(hsdma, MTK_HSDMA_TX_BASE, 0);
+ mtk_dma_write(hsdma, MTK_HSDMA_TX_CNT, 0);
+ mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, 0);
+ mtk_dma_write(hsdma, MTK_HSDMA_RX_BASE, 0);
+ mtk_dma_write(hsdma, MTK_HSDMA_RX_CNT, 0);
+ mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, MTK_DMA_SIZE - 1);
+
+ kfree(ring->cb);
+
+ dma_free_coherent(hsdma2dev(hsdma),
+ pc->sz_ring, ring->txd, ring->tphys);
+}
+
+static int mtk_hsdma_issue_pending_vdesc(struct mtk_hsdma_device *hsdma,
+ struct mtk_hsdma_pchan *pc,
+ struct mtk_hsdma_vdesc *hvd)
+{
+ struct mtk_hsdma_ring *ring = &pc->ring;
+ struct mtk_hsdma_pdesc *txd, *rxd;
+ u16 reserved, prev, tlen, num_sgs;
+ unsigned long flags;
+
+ /* Protect against PC is accessed by multiple VCs simultaneously */
+ spin_lock_irqsave(&hsdma->lock, flags);
+
+ /*
+ * Reserve rooms, where pc->nr_free is used to track how many free
+ * rooms in the ring being updated in user and IRQ context.
+ */
+ num_sgs = DIV_ROUND_UP(hvd->len, MTK_HSDMA_MAX_LEN);
+ reserved = min_t(u16, num_sgs, atomic_read(&pc->nr_free));
+
+ if (!reserved) {
+ spin_unlock_irqrestore(&hsdma->lock, flags);
+ return -ENOSPC;
+ }
+
+ atomic_sub(reserved, &pc->nr_free);
+
+ while (reserved--) {
+ /* Limit size by PD capability for valid data moving */
+ tlen = (hvd->len > MTK_HSDMA_MAX_LEN) ?
+ MTK_HSDMA_MAX_LEN : hvd->len;
+
+ /*
+ * Setup PDs using the remaining VD info mapped on those
+ * reserved rooms. And since RXD is shared memory between the
+ * host and the device allocated by dma_alloc_coherent call,
+ * the helper macro WRITE_ONCE can ensure the data written to
+ * RAM would really happens.
+ */
+ txd = &ring->txd[ring->cur_tptr];
+ WRITE_ONCE(txd->desc1, hvd->src);
+ WRITE_ONCE(txd->desc2,
+ hsdma->soc->ls0 | MTK_HSDMA_DESC_PLEN(tlen));
+
+ rxd = &ring->rxd[ring->cur_tptr];
+ WRITE_ONCE(rxd->desc1, hvd->dest);
+ WRITE_ONCE(rxd->desc2, MTK_HSDMA_DESC_PLEN(tlen));
+
+ /* Associate VD, the PD belonged to */
+ ring->cb[ring->cur_tptr].vd = &hvd->vd;
+
+ /* Move forward the pointer of TX ring */
+ ring->cur_tptr = MTK_HSDMA_NEXT_DESP_IDX(ring->cur_tptr,
+ MTK_DMA_SIZE);
+
+ /* Update VD with remaining data */
+ hvd->src += tlen;
+ hvd->dest += tlen;
+ hvd->len -= tlen;
+ }
+
+ /*
+ * Tagging flag for the last PD for VD will be responsible for
+ * completing VD.
+ */
+ if (!hvd->len) {
+ prev = MTK_HSDMA_LAST_DESP_IDX(ring->cur_tptr, MTK_DMA_SIZE);
+ ring->cb[prev].flag = MTK_HSDMA_VDESC_FINISHED;
+ }
+
+ /* Ensure all changes indeed done before we're going on */
+ wmb();
+
+ /*
+ * Updating into hardware the pointer of TX ring lets HSDMA to take
+ * action for those pending PDs.
+ */
+ mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, ring->cur_tptr);
+
+ spin_unlock_irqrestore(&hsdma->lock, flags);
+
+ return 0;
+}
+
+static void mtk_hsdma_issue_vchan_pending(struct mtk_hsdma_device *hsdma,
+ struct mtk_hsdma_vchan *hvc)
+{
+ struct virt_dma_desc *vd, *vd2;
+ int err;
+
+ lockdep_assert_held(&hvc->vc.lock);
+
+ list_for_each_entry_safe(vd, vd2, &hvc->vc.desc_issued, node) {
+ struct mtk_hsdma_vdesc *hvd;
+
+ hvd = to_hsdma_vdesc(vd);
+
+ /* Map VD into PC and all VCs shares a single PC */
+ err = mtk_hsdma_issue_pending_vdesc(hsdma, hsdma->pc, hvd);
+
+ /*
+ * Move VD from desc_issued to desc_hw_processing when entire
+ * VD is fit into available PDs. Otherwise, the uncompleted
+ * VDs would stay in list desc_issued and then restart the
+ * processing as soon as possible once underlying ring space
+ * got freed.
+ */
+ if (err == -ENOSPC || hvd->len > 0)
+ break;
+
+ /*
+ * The extra list desc_hw_processing is used because
+ * hardware can't provide sufficient information allowing us
+ * to know what VDs are still working on the underlying ring.
+ * Through the additional list, it can help us to implement
+ * terminate_all, residue calculation and such thing needed
+ * to know detail descriptor status on the hardware.
+ */
+ list_move_tail(&vd->node, &hvc->desc_hw_processing);
+ }
+}
+
+static void mtk_hsdma_free_rooms_in_ring(struct mtk_hsdma_device *hsdma)
+{
+ struct mtk_hsdma_vchan *hvc;
+ struct mtk_hsdma_pdesc *rxd;
+ struct mtk_hsdma_vdesc *hvd;
+ struct mtk_hsdma_pchan *pc;
+ struct mtk_hsdma_cb *cb;
+ int i = MTK_DMA_SIZE;
+ __le32 desc2;
+ u32 status;
+ u16 next;
+
+ /* Read IRQ status */
+ status = mtk_dma_read(hsdma, MTK_HSDMA_INT_STATUS);
+ if (unlikely(!(status & MTK_HSDMA_INT_RXDONE)))
+ goto rx_done;
+
+ pc = hsdma->pc;
+
+ /*
+ * Using a fail-safe loop with iterations of up to MTK_DMA_SIZE to
+ * reclaim these finished descriptors: The most number of PDs the ISR
+ * can handle at one time shouldn't be more than MTK_DMA_SIZE so we
+ * take it as limited count instead of just using a dangerous infinite
+ * poll.
+ */
+ while (i--) {
+ next = MTK_HSDMA_NEXT_DESP_IDX(pc->ring.cur_rptr,
+ MTK_DMA_SIZE);
+ rxd = &pc->ring.rxd[next];
+
+ /*
+ * If MTK_HSDMA_DESC_DDONE is no specified, that means data
+ * moving for the PD is still under going.
+ */
+ desc2 = READ_ONCE(rxd->desc2);
+ if (!(desc2 & hsdma->soc->ddone))
+ break;
+
+ cb = &pc->ring.cb[next];
+ if (unlikely(!cb->vd)) {
+ dev_err(hsdma2dev(hsdma), "cb->vd cannot be null\n");
+ break;
+ }
+
+ /* Update residue of VD the associated PD belonged to */
+ hvd = to_hsdma_vdesc(cb->vd);
+ hvd->residue -= MTK_HSDMA_DESC_PLEN_GET(rxd->desc2);
+
+ /* Complete VD until the relevant last PD is finished */
+ if (IS_MTK_HSDMA_VDESC_FINISHED(cb->flag)) {
+ hvc = to_hsdma_vchan(cb->vd->tx.chan);
+
+ spin_lock(&hvc->vc.lock);
+
+ /* Remove VD from list desc_hw_processing */
+ list_del(&cb->vd->node);
+
+ /* Add VD into list desc_completed */
+ vchan_cookie_complete(cb->vd);
+
+ if (hvc->issue_synchronize &&
+ list_empty(&hvc->desc_hw_processing)) {
+ complete(&hvc->issue_completion);
+ hvc->issue_synchronize = false;
+ }
+ spin_unlock(&hvc->vc.lock);
+
+ cb->flag = 0;
+ }
+
+ cb->vd = NULL;
+
+ /*
+ * Recycle the RXD with the helper WRITE_ONCE that can ensure
+ * data written into RAM would really happens.
+ */
+ WRITE_ONCE(rxd->desc1, 0);
+ WRITE_ONCE(rxd->desc2, 0);
+ pc->ring.cur_rptr = next;
+
+ /* Release rooms */
+ atomic_inc(&pc->nr_free);
+ }
+
+ /* Ensure all changes indeed done before we're going on */
+ wmb();
+
+ /* Update CPU pointer for those completed PDs */
+ mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, pc->ring.cur_rptr);
+
+ /*
+ * Acking the pending IRQ allows hardware no longer to keep the used
+ * IRQ line in certain trigger state when software has completed all
+ * the finished physical descriptors.
+ */
+ if (atomic_read(&pc->nr_free) >= MTK_DMA_SIZE - 1)
+ mtk_dma_write(hsdma, MTK_HSDMA_INT_STATUS, status);
+
+ /* ASAP handles pending VDs in all VCs after freeing some rooms */
+ for (i = 0; i < hsdma->dma_requests; i++) {
+ hvc = &hsdma->vc[i];
+ spin_lock(&hvc->vc.lock);
+ mtk_hsdma_issue_vchan_pending(hsdma, hvc);
+ spin_unlock(&hvc->vc.lock);
+ }
+
+rx_done:
+ /* All completed PDs are cleaned up, so enable interrupt again */
+ mtk_dma_set(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+}
+
+static irqreturn_t mtk_hsdma_irq(int irq, void *devid)
+{
+ struct mtk_hsdma_device *hsdma = devid;
+
+ /*
+ * Disable interrupt until all completed PDs are cleaned up in
+ * mtk_hsdma_free_rooms call.
+ */
+ mtk_dma_clr(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+
+ mtk_hsdma_free_rooms_in_ring(hsdma);
+
+ return IRQ_HANDLED;
+}
+
+static struct virt_dma_desc *mtk_hsdma_find_active_desc(struct dma_chan *c,
+ dma_cookie_t cookie)
+{
+ struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+ struct virt_dma_desc *vd;
+
+ list_for_each_entry(vd, &hvc->desc_hw_processing, node)
+ if (vd->tx.cookie == cookie)
+ return vd;
+
+ list_for_each_entry(vd, &hvc->vc.desc_issued, node)
+ if (vd->tx.cookie == cookie)
+ return vd;
+
+ return NULL;
+}
+
+static enum dma_status mtk_hsdma_tx_status(struct dma_chan *c,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+ struct mtk_hsdma_vdesc *hvd;
+ struct virt_dma_desc *vd;
+ enum dma_status ret;
+ unsigned long flags;
+ size_t bytes = 0;
+
+ ret = dma_cookie_status(c, cookie, txstate);
+ if (ret == DMA_COMPLETE || !txstate)
+ return ret;
+
+ spin_lock_irqsave(&hvc->vc.lock, flags);
+ vd = mtk_hsdma_find_active_desc(c, cookie);
+ spin_unlock_irqrestore(&hvc->vc.lock, flags);
+
+ if (vd) {
+ hvd = to_hsdma_vdesc(vd);
+ bytes = hvd->residue;
+ }
+
+ dma_set_residue(txstate, bytes);
+
+ return ret;
+}
+
+static void mtk_hsdma_issue_pending(struct dma_chan *c)
+{
+ struct mtk_hsdma_device *hsdma = to_hsdma_dev(c);
+ struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+ unsigned long flags;
+
+ spin_lock_irqsave(&hvc->vc.lock, flags);
+
+ if (vchan_issue_pending(&hvc->vc))
+ mtk_hsdma_issue_vchan_pending(hsdma, hvc);
+
+ spin_unlock_irqrestore(&hvc->vc.lock, flags);
+}
+
+static struct dma_async_tx_descriptor *
+mtk_hsdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest,
+ dma_addr_t src, size_t len, unsigned long flags)
+{
+ struct mtk_hsdma_vdesc *hvd;
+
+ hvd = kzalloc(sizeof(*hvd), GFP_NOWAIT);
+ if (!hvd)
+ return NULL;
+
+ hvd->len = len;
+ hvd->residue = len;
+ hvd->src = src;
+ hvd->dest = dest;
+
+ return vchan_tx_prep(to_virt_chan(c), &hvd->vd, flags);
+}
+
+static int mtk_hsdma_free_inactive_desc(struct dma_chan *c)
+{
+ struct virt_dma_chan *vc = to_virt_chan(c);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&vc->lock, flags);
+ list_splice_tail_init(&vc->desc_allocated, &head);
+ list_splice_tail_init(&vc->desc_submitted, &head);
+ list_splice_tail_init(&vc->desc_issued, &head);
+ spin_unlock_irqrestore(&vc->lock, flags);
+
+ /* At the point, we don't expect users put descriptor into VC again */
+ vchan_dma_desc_free_list(vc, &head);
+
+ return 0;
+}
+
+static void mtk_hsdma_free_active_desc(struct dma_chan *c)
+{
+ struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+ bool sync_needed = false;
+
+ /*
+ * Once issue_synchronize is being set, which means once the hardware
+ * consumes all descriptors for the channel in the ring, the
+ * synchronization must be notified immediately it is completed.
+ */
+ spin_lock(&hvc->vc.lock);
+ if (!list_empty(&hvc->desc_hw_processing)) {
+ hvc->issue_synchronize = true;
+ sync_needed = true;
+ }
+ spin_unlock(&hvc->vc.lock);
+
+ if (sync_needed)
+ wait_for_completion(&hvc->issue_completion);
+ /*
+ * At the point, we expect that all remaining descriptors in the ring
+ * for the channel should be all processing done.
+ */
+ WARN_ONCE(!list_empty(&hvc->desc_hw_processing),
+ "Desc pending still in list desc_hw_processing\n");
+
+ /* Free all descriptors in list desc_completed */
+ vchan_synchronize(&hvc->vc);
+
+ WARN_ONCE(!list_empty(&hvc->vc.desc_completed),
+ "Desc pending still in list desc_completed\n");
+}
+
+static int mtk_hsdma_terminate_all(struct dma_chan *c)
+{
+ /*
+ * Free pending descriptors not processed yet by hardware that have
+ * previously been submitted to the channel.
+ */
+ mtk_hsdma_free_inactive_desc(c);
+
+ /*
+ * However, the DMA engine doesn't provide any way to stop these
+ * descriptors being processed currently by hardware. The only way is
+ * to just waiting until these descriptors are all processed completely
+ * through mtk_hsdma_free_active_desc call.
+ */
+ mtk_hsdma_free_active_desc(c);
+
+ return 0;
+}
+
+static int mtk_hsdma_alloc_chan_resources(struct dma_chan *c)
+{
+ struct mtk_hsdma_device *hsdma = to_hsdma_dev(c);
+ int err;
+
+ /*
+ * Since HSDMA has only one PC, the resource for PC is being allocated
+ * when the first VC is being created and the other VCs would run on
+ * the same PC.
+ */
+ if (!refcount_read(&hsdma->pc_refcnt)) {
+ err = mtk_hsdma_alloc_pchan(hsdma, hsdma->pc);
+ if (err)
+ return err;
+ /*
+ * refcount_inc would complain increment on 0; use-after-free.
+ * Thus, we need to explicitly set it as 1 initially.
+ */
+ refcount_set(&hsdma->pc_refcnt, 1);
+ } else {
+ refcount_inc(&hsdma->pc_refcnt);
+ }
+
+ return 0;
+}
+
+static void mtk_hsdma_free_chan_resources(struct dma_chan *c)
+{
+ struct mtk_hsdma_device *hsdma = to_hsdma_dev(c);
+
+ /* Free all descriptors in all lists on the VC */
+ mtk_hsdma_terminate_all(c);
+
+ /* The resource for PC is not freed until all the VCs are destroyed */
+ if (!refcount_dec_and_test(&hsdma->pc_refcnt))
+ return;
+
+ mtk_hsdma_free_pchan(hsdma, hsdma->pc);
+}
+
+static int mtk_hsdma_hw_init(struct mtk_hsdma_device *hsdma)
+{
+ int err;
+
+ pm_runtime_enable(hsdma2dev(hsdma));
+ pm_runtime_get_sync(hsdma2dev(hsdma));
+
+ err = clk_prepare_enable(hsdma->clk);
+ if (err)
+ return err;
+
+ mtk_dma_write(hsdma, MTK_HSDMA_INT_ENABLE, 0);
+ mtk_dma_write(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DEFAULT);
+
+ return 0;
+}
+
+static int mtk_hsdma_hw_deinit(struct mtk_hsdma_device *hsdma)
+{
+ mtk_dma_write(hsdma, MTK_HSDMA_GLO, 0);
+
+ clk_disable_unprepare(hsdma->clk);
+
+ pm_runtime_put_sync(hsdma2dev(hsdma));
+ pm_runtime_disable(hsdma2dev(hsdma));
+
+ return 0;
+}
+
+static const struct mtk_hsdma_soc mt7623_soc = {
+ .ddone = BIT(31),
+ .ls0 = BIT(30),
+};
+
+static const struct mtk_hsdma_soc mt7622_soc = {
+ .ddone = BIT(15),
+ .ls0 = BIT(14),
+};
+
+static const struct of_device_id mtk_hsdma_match[] = {
+ { .compatible = "mediatek,mt7623-hsdma", .data = &mt7623_soc},
+ { .compatible = "mediatek,mt7622-hsdma", .data = &mt7622_soc},
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mtk_hsdma_match);
+
+static int mtk_hsdma_probe(struct platform_device *pdev)
+{
+ struct mtk_hsdma_device *hsdma;
+ struct mtk_hsdma_vchan *vc;
+ struct dma_device *dd;
+ struct resource *res;
+ int i, err;
+
+ hsdma = devm_kzalloc(&pdev->dev, sizeof(*hsdma), GFP_KERNEL);
+ if (!hsdma)
+ return -ENOMEM;
+
+ dd = &hsdma->ddev;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ hsdma->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(hsdma->base))
+ return PTR_ERR(hsdma->base);
+
+ hsdma->soc = of_device_get_match_data(&pdev->dev);
+ if (!hsdma->soc) {
+ dev_err(&pdev->dev, "No device match found\n");
+ return -ENODEV;
+ }
+
+ hsdma->clk = devm_clk_get(&pdev->dev, "hsdma");
+ if (IS_ERR(hsdma->clk)) {
+ dev_err(&pdev->dev, "No clock for %s\n",
+ dev_name(&pdev->dev));
+ return PTR_ERR(hsdma->clk);
+ }
+
+ err = platform_get_irq(pdev, 0);
+ if (err < 0)
+ return err;
+ hsdma->irq = err;
+
+ refcount_set(&hsdma->pc_refcnt, 0);
+ spin_lock_init(&hsdma->lock);
+
+ dma_cap_set(DMA_MEMCPY, dd->cap_mask);
+
+ dd->copy_align = MTK_HSDMA_ALIGN_SIZE;
+ dd->device_alloc_chan_resources = mtk_hsdma_alloc_chan_resources;
+ dd->device_free_chan_resources = mtk_hsdma_free_chan_resources;
+ dd->device_tx_status = mtk_hsdma_tx_status;
+ dd->device_issue_pending = mtk_hsdma_issue_pending;
+ dd->device_prep_dma_memcpy = mtk_hsdma_prep_dma_memcpy;
+ dd->device_terminate_all = mtk_hsdma_terminate_all;
+ dd->src_addr_widths = MTK_HSDMA_DMA_BUSWIDTHS;
+ dd->dst_addr_widths = MTK_HSDMA_DMA_BUSWIDTHS;
+ dd->directions = BIT(DMA_MEM_TO_MEM);
+ dd->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+ dd->dev = &pdev->dev;
+ INIT_LIST_HEAD(&dd->channels);
+
+ hsdma->dma_requests = MTK_HSDMA_NR_VCHANS;
+ if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node,
+ "dma-requests",
+ &hsdma->dma_requests)) {
+ dev_info(&pdev->dev,
+ "Using %u as missing dma-requests property\n",
+ MTK_HSDMA_NR_VCHANS);
+ }
+
+ hsdma->pc = devm_kcalloc(&pdev->dev, MTK_HSDMA_NR_MAX_PCHANS,
+ sizeof(*hsdma->pc), GFP_KERNEL);
+ if (!hsdma->pc)
+ return -ENOMEM;
+
+ hsdma->vc = devm_kcalloc(&pdev->dev, hsdma->dma_requests,
+ sizeof(*hsdma->vc), GFP_KERNEL);
+ if (!hsdma->vc)
+ return -ENOMEM;
+
+ for (i = 0; i < hsdma->dma_requests; i++) {
+ vc = &hsdma->vc[i];
+ vc->vc.desc_free = mtk_hsdma_vdesc_free;
+ vchan_init(&vc->vc, dd);
+ init_completion(&vc->issue_completion);
+ INIT_LIST_HEAD(&vc->desc_hw_processing);
+ }
+
+ err = dma_async_device_register(dd);
+ if (err)
+ return err;
+
+ err = of_dma_controller_register(pdev->dev.of_node,
+ of_dma_xlate_by_chan_id, hsdma);
+ if (err) {
+ dev_err(&pdev->dev,
+ "MediaTek HSDMA OF registration failed %d\n", err);
+ goto err_unregister;
+ }
+
+ mtk_hsdma_hw_init(hsdma);
+
+ err = devm_request_irq(&pdev->dev, hsdma->irq,
+ mtk_hsdma_irq, 0,
+ dev_name(&pdev->dev), hsdma);
+ if (err) {
+ dev_err(&pdev->dev,
+ "request_irq failed with err %d\n", err);
+ goto err_free;
+ }
+
+ platform_set_drvdata(pdev, hsdma);
+
+ dev_info(&pdev->dev, "MediaTek HSDMA driver registered\n");
+
+ return 0;
+
+err_free:
+ mtk_hsdma_hw_deinit(hsdma);
+ of_dma_controller_free(pdev->dev.of_node);
+err_unregister:
+ dma_async_device_unregister(dd);
+
+ return err;
+}
+
+static int mtk_hsdma_remove(struct platform_device *pdev)
+{
+ struct mtk_hsdma_device *hsdma = platform_get_drvdata(pdev);
+ struct mtk_hsdma_vchan *vc;
+ int i;
+
+ /* Kill VC task */
+ for (i = 0; i < hsdma->dma_requests; i++) {
+ vc = &hsdma->vc[i];
+
+ list_del(&vc->vc.chan.device_node);
+ tasklet_kill(&vc->vc.task);
+ }
+
+ /* Disable DMA interrupt */
+ mtk_dma_write(hsdma, MTK_HSDMA_INT_ENABLE, 0);
+
+ /* Waits for any pending IRQ handlers to complete */
+ synchronize_irq(hsdma->irq);
+
+ /* Disable hardware */
+ mtk_hsdma_hw_deinit(hsdma);
+
+ dma_async_device_unregister(&hsdma->ddev);
+ of_dma_controller_free(pdev->dev.of_node);
+
+ return 0;
+}
+
+static struct platform_driver mtk_hsdma_driver = {
+ .probe = mtk_hsdma_probe,
+ .remove = mtk_hsdma_remove,
+ .driver = {
+ .name = KBUILD_MODNAME,
+ .of_match_table = mtk_hsdma_match,
+ },
+};
+module_platform_driver(mtk_hsdma_driver);
+
+MODULE_DESCRIPTION("MediaTek High-Speed DMA Controller Driver");
+MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c
new file mode 100644
index 000000000..0acf6a92a
--- /dev/null
+++ b/drivers/dma/mediatek/mtk-uart-apdma.c
@@ -0,0 +1,656 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MediaTek UART APDMA driver.
+ *
+ * Copyright (c) 2019 MediaTek Inc.
+ * Author: Long Cheng <long.cheng@mediatek.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../virt-dma.h"
+
+/* The default number of virtual channel */
+#define MTK_UART_APDMA_NR_VCHANS 8
+
+#define VFF_EN_B BIT(0)
+#define VFF_STOP_B BIT(0)
+#define VFF_FLUSH_B BIT(0)
+#define VFF_4G_EN_B BIT(0)
+/* rx valid size >= vff thre */
+#define VFF_RX_INT_EN_B (BIT(0) | BIT(1))
+/* tx left size >= vff thre */
+#define VFF_TX_INT_EN_B BIT(0)
+#define VFF_WARM_RST_B BIT(0)
+#define VFF_RX_INT_CLR_B (BIT(0) | BIT(1))
+#define VFF_TX_INT_CLR_B 0
+#define VFF_STOP_CLR_B 0
+#define VFF_EN_CLR_B 0
+#define VFF_INT_EN_CLR_B 0
+#define VFF_4G_SUPPORT_CLR_B 0
+
+/*
+ * interrupt trigger level for tx
+ * if threshold is n, no polling is required to start tx.
+ * otherwise need polling VFF_FLUSH.
+ */
+#define VFF_TX_THRE(n) (n)
+/* interrupt trigger level for rx */
+#define VFF_RX_THRE(n) ((n) * 3 / 4)
+
+#define VFF_RING_SIZE 0xffff
+/* invert this bit when wrap ring head again */
+#define VFF_RING_WRAP 0x10000
+
+#define VFF_INT_FLAG 0x00
+#define VFF_INT_EN 0x04
+#define VFF_EN 0x08
+#define VFF_RST 0x0c
+#define VFF_STOP 0x10
+#define VFF_FLUSH 0x14
+#define VFF_ADDR 0x1c
+#define VFF_LEN 0x24
+#define VFF_THRE 0x28
+#define VFF_WPT 0x2c
+#define VFF_RPT 0x30
+/* TX: the buffer size HW can read. RX: the buffer size SW can read. */
+#define VFF_VALID_SIZE 0x3c
+/* TX: the buffer size SW can write. RX: the buffer size HW can write. */
+#define VFF_LEFT_SIZE 0x40
+#define VFF_DEBUG_STATUS 0x50
+#define VFF_4G_SUPPORT 0x54
+
+struct mtk_uart_apdmadev {
+ struct dma_device ddev;
+ struct clk *clk;
+ bool support_33bits;
+ unsigned int dma_requests;
+};
+
+struct mtk_uart_apdma_desc {
+ struct virt_dma_desc vd;
+
+ dma_addr_t addr;
+ unsigned int avail_len;
+};
+
+struct mtk_chan {
+ struct virt_dma_chan vc;
+ struct dma_slave_config cfg;
+ struct mtk_uart_apdma_desc *desc;
+ enum dma_transfer_direction dir;
+
+ void __iomem *base;
+ unsigned int irq;
+
+ unsigned int rx_status;
+};
+
+static inline struct mtk_uart_apdmadev *
+to_mtk_uart_apdma_dev(struct dma_device *d)
+{
+ return container_of(d, struct mtk_uart_apdmadev, ddev);
+}
+
+static inline struct mtk_chan *to_mtk_uart_apdma_chan(struct dma_chan *c)
+{
+ return container_of(c, struct mtk_chan, vc.chan);
+}
+
+static inline struct mtk_uart_apdma_desc *to_mtk_uart_apdma_desc
+ (struct dma_async_tx_descriptor *t)
+{
+ return container_of(t, struct mtk_uart_apdma_desc, vd.tx);
+}
+
+static void mtk_uart_apdma_write(struct mtk_chan *c,
+ unsigned int reg, unsigned int val)
+{
+ writel(val, c->base + reg);
+}
+
+static unsigned int mtk_uart_apdma_read(struct mtk_chan *c, unsigned int reg)
+{
+ return readl(c->base + reg);
+}
+
+static void mtk_uart_apdma_desc_free(struct virt_dma_desc *vd)
+{
+ kfree(container_of(vd, struct mtk_uart_apdma_desc, vd));
+}
+
+static void mtk_uart_apdma_start_tx(struct mtk_chan *c)
+{
+ struct mtk_uart_apdmadev *mtkd =
+ to_mtk_uart_apdma_dev(c->vc.chan.device);
+ struct mtk_uart_apdma_desc *d = c->desc;
+ unsigned int wpt, vff_sz;
+
+ vff_sz = c->cfg.dst_port_window_size;
+ if (!mtk_uart_apdma_read(c, VFF_LEN)) {
+ mtk_uart_apdma_write(c, VFF_ADDR, d->addr);
+ mtk_uart_apdma_write(c, VFF_LEN, vff_sz);
+ mtk_uart_apdma_write(c, VFF_THRE, VFF_TX_THRE(vff_sz));
+ mtk_uart_apdma_write(c, VFF_WPT, 0);
+ mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B);
+
+ if (mtkd->support_33bits)
+ mtk_uart_apdma_write(c, VFF_4G_SUPPORT, VFF_4G_EN_B);
+ }
+
+ mtk_uart_apdma_write(c, VFF_EN, VFF_EN_B);
+ if (mtk_uart_apdma_read(c, VFF_EN) != VFF_EN_B)
+ dev_err(c->vc.chan.device->dev, "Enable TX fail\n");
+
+ if (!mtk_uart_apdma_read(c, VFF_LEFT_SIZE)) {
+ mtk_uart_apdma_write(c, VFF_INT_EN, VFF_TX_INT_EN_B);
+ return;
+ }
+
+ wpt = mtk_uart_apdma_read(c, VFF_WPT);
+
+ wpt += c->desc->avail_len;
+ if ((wpt & VFF_RING_SIZE) == vff_sz)
+ wpt = (wpt & VFF_RING_WRAP) ^ VFF_RING_WRAP;
+
+ /* Let DMA start moving data */
+ mtk_uart_apdma_write(c, VFF_WPT, wpt);
+
+ /* HW auto set to 0 when left size >= threshold */
+ mtk_uart_apdma_write(c, VFF_INT_EN, VFF_TX_INT_EN_B);
+ if (!mtk_uart_apdma_read(c, VFF_FLUSH))
+ mtk_uart_apdma_write(c, VFF_FLUSH, VFF_FLUSH_B);
+}
+
+static void mtk_uart_apdma_start_rx(struct mtk_chan *c)
+{
+ struct mtk_uart_apdmadev *mtkd =
+ to_mtk_uart_apdma_dev(c->vc.chan.device);
+ struct mtk_uart_apdma_desc *d = c->desc;
+ unsigned int vff_sz;
+
+ vff_sz = c->cfg.src_port_window_size;
+ if (!mtk_uart_apdma_read(c, VFF_LEN)) {
+ mtk_uart_apdma_write(c, VFF_ADDR, d->addr);
+ mtk_uart_apdma_write(c, VFF_LEN, vff_sz);
+ mtk_uart_apdma_write(c, VFF_THRE, VFF_RX_THRE(vff_sz));
+ mtk_uart_apdma_write(c, VFF_RPT, 0);
+ mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_RX_INT_CLR_B);
+
+ if (mtkd->support_33bits)
+ mtk_uart_apdma_write(c, VFF_4G_SUPPORT, VFF_4G_EN_B);
+ }
+
+ mtk_uart_apdma_write(c, VFF_INT_EN, VFF_RX_INT_EN_B);
+ mtk_uart_apdma_write(c, VFF_EN, VFF_EN_B);
+ if (mtk_uart_apdma_read(c, VFF_EN) != VFF_EN_B)
+ dev_err(c->vc.chan.device->dev, "Enable RX fail\n");
+}
+
+static void mtk_uart_apdma_tx_handler(struct mtk_chan *c)
+{
+ mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B);
+ mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B);
+ mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B);
+}
+
+static void mtk_uart_apdma_rx_handler(struct mtk_chan *c)
+{
+ struct mtk_uart_apdma_desc *d = c->desc;
+ unsigned int len, wg, rg;
+ int cnt;
+
+ mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_RX_INT_CLR_B);
+
+ if (!mtk_uart_apdma_read(c, VFF_VALID_SIZE))
+ return;
+
+ mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B);
+ mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B);
+
+ len = c->cfg.src_port_window_size;
+ rg = mtk_uart_apdma_read(c, VFF_RPT);
+ wg = mtk_uart_apdma_read(c, VFF_WPT);
+ cnt = (wg & VFF_RING_SIZE) - (rg & VFF_RING_SIZE);
+
+ /*
+ * The buffer is ring buffer. If wrap bit different,
+ * represents the start of the next cycle for WPT
+ */
+ if ((rg ^ wg) & VFF_RING_WRAP)
+ cnt += len;
+
+ c->rx_status = d->avail_len - cnt;
+ mtk_uart_apdma_write(c, VFF_RPT, wg);
+}
+
+static void mtk_uart_apdma_chan_complete_handler(struct mtk_chan *c)
+{
+ struct mtk_uart_apdma_desc *d = c->desc;
+
+ if (d) {
+ list_del(&d->vd.node);
+ vchan_cookie_complete(&d->vd);
+ c->desc = NULL;
+ }
+}
+
+static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id)
+{
+ struct dma_chan *chan = (struct dma_chan *)dev_id;
+ struct mtk_chan *c = to_mtk_uart_apdma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+ if (c->dir == DMA_DEV_TO_MEM)
+ mtk_uart_apdma_rx_handler(c);
+ else if (c->dir == DMA_MEM_TO_DEV)
+ mtk_uart_apdma_tx_handler(c);
+ mtk_uart_apdma_chan_complete_handler(c);
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+static int mtk_uart_apdma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct mtk_uart_apdmadev *mtkd = to_mtk_uart_apdma_dev(chan->device);
+ struct mtk_chan *c = to_mtk_uart_apdma_chan(chan);
+ unsigned int status;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(mtkd->ddev.dev);
+ if (ret < 0) {
+ pm_runtime_put_noidle(chan->device->dev);
+ return ret;
+ }
+
+ mtk_uart_apdma_write(c, VFF_ADDR, 0);
+ mtk_uart_apdma_write(c, VFF_THRE, 0);
+ mtk_uart_apdma_write(c, VFF_LEN, 0);
+ mtk_uart_apdma_write(c, VFF_RST, VFF_WARM_RST_B);
+
+ ret = readx_poll_timeout(readl, c->base + VFF_EN,
+ status, !status, 10, 100);
+ if (ret)
+ goto err_pm;
+
+ ret = request_irq(c->irq, mtk_uart_apdma_irq_handler,
+ IRQF_TRIGGER_NONE, KBUILD_MODNAME, chan);
+ if (ret < 0) {
+ dev_err(chan->device->dev, "Can't request dma IRQ\n");
+ ret = -EINVAL;
+ goto err_pm;
+ }
+
+ if (mtkd->support_33bits)
+ mtk_uart_apdma_write(c, VFF_4G_SUPPORT, VFF_4G_SUPPORT_CLR_B);
+
+err_pm:
+ pm_runtime_put_noidle(mtkd->ddev.dev);
+ return ret;
+}
+
+static void mtk_uart_apdma_free_chan_resources(struct dma_chan *chan)
+{
+ struct mtk_uart_apdmadev *mtkd = to_mtk_uart_apdma_dev(chan->device);
+ struct mtk_chan *c = to_mtk_uart_apdma_chan(chan);
+
+ free_irq(c->irq, chan);
+
+ tasklet_kill(&c->vc.task);
+
+ vchan_free_chan_resources(&c->vc);
+
+ pm_runtime_put_sync(mtkd->ddev.dev);
+}
+
+static enum dma_status mtk_uart_apdma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct mtk_chan *c = to_mtk_uart_apdma_chan(chan);
+ enum dma_status ret;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (!txstate)
+ return ret;
+
+ dma_set_residue(txstate, c->rx_status);
+
+ return ret;
+}
+
+/*
+ * dmaengine_prep_slave_single will call the function. and sglen is 1.
+ * 8250 uart using one ring buffer, and deal with one sg.
+ */
+static struct dma_async_tx_descriptor *mtk_uart_apdma_prep_slave_sg
+ (struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sglen, enum dma_transfer_direction dir,
+ unsigned long tx_flags, void *context)
+{
+ struct mtk_chan *c = to_mtk_uart_apdma_chan(chan);
+ struct mtk_uart_apdma_desc *d;
+
+ if (!is_slave_direction(dir) || sglen != 1)
+ return NULL;
+
+ /* Now allocate and setup the descriptor */
+ d = kzalloc(sizeof(*d), GFP_NOWAIT);
+ if (!d)
+ return NULL;
+
+ d->avail_len = sg_dma_len(sgl);
+ d->addr = sg_dma_address(sgl);
+ c->dir = dir;
+
+ return vchan_tx_prep(&c->vc, &d->vd, tx_flags);
+}
+
+static void mtk_uart_apdma_issue_pending(struct dma_chan *chan)
+{
+ struct mtk_chan *c = to_mtk_uart_apdma_chan(chan);
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+ if (vchan_issue_pending(&c->vc) && !c->desc) {
+ vd = vchan_next_desc(&c->vc);
+ c->desc = to_mtk_uart_apdma_desc(&vd->tx);
+
+ if (c->dir == DMA_DEV_TO_MEM)
+ mtk_uart_apdma_start_rx(c);
+ else if (c->dir == DMA_MEM_TO_DEV)
+ mtk_uart_apdma_start_tx(c);
+ }
+
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+}
+
+static int mtk_uart_apdma_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct mtk_chan *c = to_mtk_uart_apdma_chan(chan);
+
+ memcpy(&c->cfg, config, sizeof(*config));
+
+ return 0;
+}
+
+static int mtk_uart_apdma_terminate_all(struct dma_chan *chan)
+{
+ struct mtk_chan *c = to_mtk_uart_apdma_chan(chan);
+ unsigned long flags;
+ unsigned int status;
+ LIST_HEAD(head);
+ int ret;
+
+ mtk_uart_apdma_write(c, VFF_FLUSH, VFF_FLUSH_B);
+
+ ret = readx_poll_timeout(readl, c->base + VFF_FLUSH,
+ status, status != VFF_FLUSH_B, 10, 100);
+ if (ret)
+ dev_err(c->vc.chan.device->dev, "flush: fail, status=0x%x\n",
+ mtk_uart_apdma_read(c, VFF_DEBUG_STATUS));
+
+ /*
+ * Stop need 3 steps.
+ * 1. set stop to 1
+ * 2. wait en to 0
+ * 3. set stop as 0
+ */
+ mtk_uart_apdma_write(c, VFF_STOP, VFF_STOP_B);
+ ret = readx_poll_timeout(readl, c->base + VFF_EN,
+ status, !status, 10, 100);
+ if (ret)
+ dev_err(c->vc.chan.device->dev, "stop: fail, status=0x%x\n",
+ mtk_uart_apdma_read(c, VFF_DEBUG_STATUS));
+
+ mtk_uart_apdma_write(c, VFF_STOP, VFF_STOP_CLR_B);
+ mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B);
+
+ if (c->dir == DMA_DEV_TO_MEM)
+ mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_RX_INT_CLR_B);
+ else if (c->dir == DMA_MEM_TO_DEV)
+ mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B);
+
+ synchronize_irq(c->irq);
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+ vchan_get_all_descriptors(&c->vc, &head);
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+
+ vchan_dma_desc_free_list(&c->vc, &head);
+
+ return 0;
+}
+
+static int mtk_uart_apdma_device_pause(struct dma_chan *chan)
+{
+ struct mtk_chan *c = to_mtk_uart_apdma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+
+ mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B);
+ mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B);
+
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+ synchronize_irq(c->irq);
+
+ return 0;
+}
+
+static void mtk_uart_apdma_free(struct mtk_uart_apdmadev *mtkd)
+{
+ while (!list_empty(&mtkd->ddev.channels)) {
+ struct mtk_chan *c = list_first_entry(&mtkd->ddev.channels,
+ struct mtk_chan, vc.chan.device_node);
+
+ list_del(&c->vc.chan.device_node);
+ tasklet_kill(&c->vc.task);
+ }
+}
+
+static const struct of_device_id mtk_uart_apdma_match[] = {
+ { .compatible = "mediatek,mt6577-uart-dma", },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, mtk_uart_apdma_match);
+
+static int mtk_uart_apdma_probe(struct platform_device *pdev)
+{
+ struct device_node *np = pdev->dev.of_node;
+ struct mtk_uart_apdmadev *mtkd;
+ int bit_mask = 32, rc;
+ struct mtk_chan *c;
+ unsigned int i;
+
+ mtkd = devm_kzalloc(&pdev->dev, sizeof(*mtkd), GFP_KERNEL);
+ if (!mtkd)
+ return -ENOMEM;
+
+ mtkd->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(mtkd->clk)) {
+ dev_err(&pdev->dev, "No clock specified\n");
+ rc = PTR_ERR(mtkd->clk);
+ return rc;
+ }
+
+ if (of_property_read_bool(np, "mediatek,dma-33bits"))
+ mtkd->support_33bits = true;
+
+ if (mtkd->support_33bits)
+ bit_mask = 33;
+
+ rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(bit_mask));
+ if (rc)
+ return rc;
+
+ dma_cap_set(DMA_SLAVE, mtkd->ddev.cap_mask);
+ mtkd->ddev.device_alloc_chan_resources =
+ mtk_uart_apdma_alloc_chan_resources;
+ mtkd->ddev.device_free_chan_resources =
+ mtk_uart_apdma_free_chan_resources;
+ mtkd->ddev.device_tx_status = mtk_uart_apdma_tx_status;
+ mtkd->ddev.device_issue_pending = mtk_uart_apdma_issue_pending;
+ mtkd->ddev.device_prep_slave_sg = mtk_uart_apdma_prep_slave_sg;
+ mtkd->ddev.device_config = mtk_uart_apdma_slave_config;
+ mtkd->ddev.device_pause = mtk_uart_apdma_device_pause;
+ mtkd->ddev.device_terminate_all = mtk_uart_apdma_terminate_all;
+ mtkd->ddev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE);
+ mtkd->ddev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE);
+ mtkd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ mtkd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+ mtkd->ddev.dev = &pdev->dev;
+ INIT_LIST_HEAD(&mtkd->ddev.channels);
+
+ mtkd->dma_requests = MTK_UART_APDMA_NR_VCHANS;
+ if (of_property_read_u32(np, "dma-requests", &mtkd->dma_requests)) {
+ dev_info(&pdev->dev,
+ "Using %u as missing dma-requests property\n",
+ MTK_UART_APDMA_NR_VCHANS);
+ }
+
+ for (i = 0; i < mtkd->dma_requests; i++) {
+ c = devm_kzalloc(mtkd->ddev.dev, sizeof(*c), GFP_KERNEL);
+ if (!c) {
+ rc = -ENODEV;
+ goto err_no_dma;
+ }
+
+ c->base = devm_platform_ioremap_resource(pdev, i);
+ if (IS_ERR(c->base)) {
+ rc = PTR_ERR(c->base);
+ goto err_no_dma;
+ }
+ c->vc.desc_free = mtk_uart_apdma_desc_free;
+ vchan_init(&c->vc, &mtkd->ddev);
+
+ rc = platform_get_irq(pdev, i);
+ if (rc < 0)
+ goto err_no_dma;
+ c->irq = rc;
+ }
+
+ pm_runtime_enable(&pdev->dev);
+ pm_runtime_set_active(&pdev->dev);
+
+ rc = dma_async_device_register(&mtkd->ddev);
+ if (rc)
+ goto rpm_disable;
+
+ platform_set_drvdata(pdev, mtkd);
+
+ /* Device-tree DMA controller registration */
+ rc = of_dma_controller_register(np, of_dma_xlate_by_chan_id, mtkd);
+ if (rc)
+ goto dma_remove;
+
+ return rc;
+
+dma_remove:
+ dma_async_device_unregister(&mtkd->ddev);
+rpm_disable:
+ pm_runtime_disable(&pdev->dev);
+err_no_dma:
+ mtk_uart_apdma_free(mtkd);
+ return rc;
+}
+
+static int mtk_uart_apdma_remove(struct platform_device *pdev)
+{
+ struct mtk_uart_apdmadev *mtkd = platform_get_drvdata(pdev);
+
+ of_dma_controller_free(pdev->dev.of_node);
+
+ mtk_uart_apdma_free(mtkd);
+
+ dma_async_device_unregister(&mtkd->ddev);
+
+ pm_runtime_disable(&pdev->dev);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int mtk_uart_apdma_suspend(struct device *dev)
+{
+ struct mtk_uart_apdmadev *mtkd = dev_get_drvdata(dev);
+
+ if (!pm_runtime_suspended(dev))
+ clk_disable_unprepare(mtkd->clk);
+
+ return 0;
+}
+
+static int mtk_uart_apdma_resume(struct device *dev)
+{
+ int ret;
+ struct mtk_uart_apdmadev *mtkd = dev_get_drvdata(dev);
+
+ if (!pm_runtime_suspended(dev)) {
+ ret = clk_prepare_enable(mtkd->clk);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+#ifdef CONFIG_PM
+static int mtk_uart_apdma_runtime_suspend(struct device *dev)
+{
+ struct mtk_uart_apdmadev *mtkd = dev_get_drvdata(dev);
+
+ clk_disable_unprepare(mtkd->clk);
+
+ return 0;
+}
+
+static int mtk_uart_apdma_runtime_resume(struct device *dev)
+{
+ struct mtk_uart_apdmadev *mtkd = dev_get_drvdata(dev);
+
+ return clk_prepare_enable(mtkd->clk);
+}
+#endif /* CONFIG_PM */
+
+static const struct dev_pm_ops mtk_uart_apdma_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(mtk_uart_apdma_suspend, mtk_uart_apdma_resume)
+ SET_RUNTIME_PM_OPS(mtk_uart_apdma_runtime_suspend,
+ mtk_uart_apdma_runtime_resume, NULL)
+};
+
+static struct platform_driver mtk_uart_apdma_driver = {
+ .probe = mtk_uart_apdma_probe,
+ .remove = mtk_uart_apdma_remove,
+ .driver = {
+ .name = KBUILD_MODNAME,
+ .pm = &mtk_uart_apdma_pm_ops,
+ .of_match_table = of_match_ptr(mtk_uart_apdma_match),
+ },
+};
+
+module_platform_driver(mtk_uart_apdma_driver);
+
+MODULE_DESCRIPTION("MediaTek UART APDMA Controller Driver");
+MODULE_AUTHOR("Long Cheng <long.cheng@mediatek.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/milbeaut-hdmac.c b/drivers/dma/milbeaut-hdmac.c
new file mode 100644
index 000000000..1b0a95892
--- /dev/null
+++ b/drivers/dma/milbeaut-hdmac.c
@@ -0,0 +1,578 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (C) 2019 Linaro Ltd.
+// Copyright (C) 2019 Socionext Inc.
+
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/bitfield.h>
+
+#include "virt-dma.h"
+
+#define MLB_HDMAC_DMACR 0x0 /* global */
+#define MLB_HDMAC_DE BIT(31)
+#define MLB_HDMAC_DS BIT(30)
+#define MLB_HDMAC_PR BIT(28)
+#define MLB_HDMAC_DH GENMASK(27, 24)
+
+#define MLB_HDMAC_CH_STRIDE 0x10
+
+#define MLB_HDMAC_DMACA 0x0 /* channel */
+#define MLB_HDMAC_EB BIT(31)
+#define MLB_HDMAC_PB BIT(30)
+#define MLB_HDMAC_ST BIT(29)
+#define MLB_HDMAC_IS GENMASK(28, 24)
+#define MLB_HDMAC_BT GENMASK(23, 20)
+#define MLB_HDMAC_BC GENMASK(19, 16)
+#define MLB_HDMAC_TC GENMASK(15, 0)
+#define MLB_HDMAC_DMACB 0x4
+#define MLB_HDMAC_TT GENMASK(31, 30)
+#define MLB_HDMAC_MS GENMASK(29, 28)
+#define MLB_HDMAC_TW GENMASK(27, 26)
+#define MLB_HDMAC_FS BIT(25)
+#define MLB_HDMAC_FD BIT(24)
+#define MLB_HDMAC_RC BIT(23)
+#define MLB_HDMAC_RS BIT(22)
+#define MLB_HDMAC_RD BIT(21)
+#define MLB_HDMAC_EI BIT(20)
+#define MLB_HDMAC_CI BIT(19)
+#define HDMAC_PAUSE 0x7
+#define MLB_HDMAC_SS GENMASK(18, 16)
+#define MLB_HDMAC_SP GENMASK(15, 12)
+#define MLB_HDMAC_DP GENMASK(11, 8)
+#define MLB_HDMAC_DMACSA 0x8
+#define MLB_HDMAC_DMACDA 0xc
+
+#define MLB_HDMAC_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+struct milbeaut_hdmac_desc {
+ struct virt_dma_desc vd;
+ struct scatterlist *sgl;
+ unsigned int sg_len;
+ unsigned int sg_cur;
+ enum dma_transfer_direction dir;
+};
+
+struct milbeaut_hdmac_chan {
+ struct virt_dma_chan vc;
+ struct milbeaut_hdmac_device *mdev;
+ struct milbeaut_hdmac_desc *md;
+ void __iomem *reg_ch_base;
+ unsigned int slave_id;
+ struct dma_slave_config cfg;
+};
+
+struct milbeaut_hdmac_device {
+ struct dma_device ddev;
+ struct clk *clk;
+ void __iomem *reg_base;
+ struct milbeaut_hdmac_chan channels[];
+};
+
+static struct milbeaut_hdmac_chan *
+to_milbeaut_hdmac_chan(struct virt_dma_chan *vc)
+{
+ return container_of(vc, struct milbeaut_hdmac_chan, vc);
+}
+
+static struct milbeaut_hdmac_desc *
+to_milbeaut_hdmac_desc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct milbeaut_hdmac_desc, vd);
+}
+
+/* mc->vc.lock must be held by caller */
+static struct milbeaut_hdmac_desc *
+milbeaut_hdmac_next_desc(struct milbeaut_hdmac_chan *mc)
+{
+ struct virt_dma_desc *vd;
+
+ vd = vchan_next_desc(&mc->vc);
+ if (!vd) {
+ mc->md = NULL;
+ return NULL;
+ }
+
+ list_del(&vd->node);
+
+ mc->md = to_milbeaut_hdmac_desc(vd);
+
+ return mc->md;
+}
+
+/* mc->vc.lock must be held by caller */
+static void milbeaut_chan_start(struct milbeaut_hdmac_chan *mc,
+ struct milbeaut_hdmac_desc *md)
+{
+ struct scatterlist *sg;
+ u32 cb, ca, src_addr, dest_addr, len;
+ u32 width, burst;
+
+ sg = &md->sgl[md->sg_cur];
+ len = sg_dma_len(sg);
+
+ cb = MLB_HDMAC_CI | MLB_HDMAC_EI;
+ if (md->dir == DMA_MEM_TO_DEV) {
+ cb |= MLB_HDMAC_FD;
+ width = mc->cfg.dst_addr_width;
+ burst = mc->cfg.dst_maxburst;
+ src_addr = sg_dma_address(sg);
+ dest_addr = mc->cfg.dst_addr;
+ } else {
+ cb |= MLB_HDMAC_FS;
+ width = mc->cfg.src_addr_width;
+ burst = mc->cfg.src_maxburst;
+ src_addr = mc->cfg.src_addr;
+ dest_addr = sg_dma_address(sg);
+ }
+ cb |= FIELD_PREP(MLB_HDMAC_TW, (width >> 1));
+ cb |= FIELD_PREP(MLB_HDMAC_MS, 2);
+
+ writel_relaxed(MLB_HDMAC_DE, mc->mdev->reg_base + MLB_HDMAC_DMACR);
+ writel_relaxed(src_addr, mc->reg_ch_base + MLB_HDMAC_DMACSA);
+ writel_relaxed(dest_addr, mc->reg_ch_base + MLB_HDMAC_DMACDA);
+ writel_relaxed(cb, mc->reg_ch_base + MLB_HDMAC_DMACB);
+
+ ca = FIELD_PREP(MLB_HDMAC_IS, mc->slave_id);
+ if (burst == 16)
+ ca |= FIELD_PREP(MLB_HDMAC_BT, 0xf);
+ else if (burst == 8)
+ ca |= FIELD_PREP(MLB_HDMAC_BT, 0xd);
+ else if (burst == 4)
+ ca |= FIELD_PREP(MLB_HDMAC_BT, 0xb);
+ burst *= width;
+ ca |= FIELD_PREP(MLB_HDMAC_TC, (len / burst - 1));
+ writel_relaxed(ca, mc->reg_ch_base + MLB_HDMAC_DMACA);
+ ca |= MLB_HDMAC_EB;
+ writel_relaxed(ca, mc->reg_ch_base + MLB_HDMAC_DMACA);
+}
+
+/* mc->vc.lock must be held by caller */
+static void milbeaut_hdmac_start(struct milbeaut_hdmac_chan *mc)
+{
+ struct milbeaut_hdmac_desc *md;
+
+ md = milbeaut_hdmac_next_desc(mc);
+ if (md)
+ milbeaut_chan_start(mc, md);
+}
+
+static irqreturn_t milbeaut_hdmac_interrupt(int irq, void *dev_id)
+{
+ struct milbeaut_hdmac_chan *mc = dev_id;
+ struct milbeaut_hdmac_desc *md;
+ u32 val;
+
+ spin_lock(&mc->vc.lock);
+
+ /* Ack and Disable irqs */
+ val = readl_relaxed(mc->reg_ch_base + MLB_HDMAC_DMACB);
+ val &= ~(FIELD_PREP(MLB_HDMAC_SS, HDMAC_PAUSE));
+ writel_relaxed(val, mc->reg_ch_base + MLB_HDMAC_DMACB);
+ val &= ~MLB_HDMAC_EI;
+ val &= ~MLB_HDMAC_CI;
+ writel_relaxed(val, mc->reg_ch_base + MLB_HDMAC_DMACB);
+
+ md = mc->md;
+ if (!md)
+ goto out;
+
+ md->sg_cur++;
+
+ if (md->sg_cur >= md->sg_len) {
+ vchan_cookie_complete(&md->vd);
+ md = milbeaut_hdmac_next_desc(mc);
+ if (!md)
+ goto out;
+ }
+
+ milbeaut_chan_start(mc, md);
+
+out:
+ spin_unlock(&mc->vc.lock);
+ return IRQ_HANDLED;
+}
+
+static void milbeaut_hdmac_free_chan_resources(struct dma_chan *chan)
+{
+ vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+static int
+milbeaut_hdmac_chan_config(struct dma_chan *chan, struct dma_slave_config *cfg)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct milbeaut_hdmac_chan *mc = to_milbeaut_hdmac_chan(vc);
+
+ spin_lock(&mc->vc.lock);
+ mc->cfg = *cfg;
+ spin_unlock(&mc->vc.lock);
+
+ return 0;
+}
+
+static int milbeaut_hdmac_chan_pause(struct dma_chan *chan)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct milbeaut_hdmac_chan *mc = to_milbeaut_hdmac_chan(vc);
+ u32 val;
+
+ spin_lock(&mc->vc.lock);
+ val = readl_relaxed(mc->reg_ch_base + MLB_HDMAC_DMACA);
+ val |= MLB_HDMAC_PB;
+ writel_relaxed(val, mc->reg_ch_base + MLB_HDMAC_DMACA);
+ spin_unlock(&mc->vc.lock);
+
+ return 0;
+}
+
+static int milbeaut_hdmac_chan_resume(struct dma_chan *chan)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct milbeaut_hdmac_chan *mc = to_milbeaut_hdmac_chan(vc);
+ u32 val;
+
+ spin_lock(&mc->vc.lock);
+ val = readl_relaxed(mc->reg_ch_base + MLB_HDMAC_DMACA);
+ val &= ~MLB_HDMAC_PB;
+ writel_relaxed(val, mc->reg_ch_base + MLB_HDMAC_DMACA);
+ spin_unlock(&mc->vc.lock);
+
+ return 0;
+}
+
+static struct dma_async_tx_descriptor *
+milbeaut_hdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len,
+ enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct milbeaut_hdmac_desc *md;
+ int i;
+
+ if (!is_slave_direction(direction))
+ return NULL;
+
+ md = kzalloc(sizeof(*md), GFP_NOWAIT);
+ if (!md)
+ return NULL;
+
+ md->sgl = kcalloc(sg_len, sizeof(*sgl), GFP_NOWAIT);
+ if (!md->sgl) {
+ kfree(md);
+ return NULL;
+ }
+
+ for (i = 0; i < sg_len; i++)
+ md->sgl[i] = sgl[i];
+
+ md->sg_len = sg_len;
+ md->dir = direction;
+
+ return vchan_tx_prep(vc, &md->vd, flags);
+}
+
+static int milbeaut_hdmac_terminate_all(struct dma_chan *chan)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct milbeaut_hdmac_chan *mc = to_milbeaut_hdmac_chan(vc);
+ unsigned long flags;
+ u32 val;
+
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&vc->lock, flags);
+
+ val = readl_relaxed(mc->reg_ch_base + MLB_HDMAC_DMACA);
+ val &= ~MLB_HDMAC_EB; /* disable the channel */
+ writel_relaxed(val, mc->reg_ch_base + MLB_HDMAC_DMACA);
+
+ if (mc->md) {
+ vchan_terminate_vdesc(&mc->md->vd);
+ mc->md = NULL;
+ }
+
+ vchan_get_all_descriptors(vc, &head);
+
+ spin_unlock_irqrestore(&vc->lock, flags);
+
+ vchan_dma_desc_free_list(vc, &head);
+
+ return 0;
+}
+
+static void milbeaut_hdmac_synchronize(struct dma_chan *chan)
+{
+ vchan_synchronize(to_virt_chan(chan));
+}
+
+static enum dma_status milbeaut_hdmac_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct virt_dma_chan *vc;
+ struct virt_dma_desc *vd;
+ struct milbeaut_hdmac_chan *mc;
+ struct milbeaut_hdmac_desc *md = NULL;
+ enum dma_status stat;
+ unsigned long flags;
+ int i;
+
+ stat = dma_cookie_status(chan, cookie, txstate);
+ /* Return immediately if we do not need to compute the residue. */
+ if (stat == DMA_COMPLETE || !txstate)
+ return stat;
+
+ vc = to_virt_chan(chan);
+
+ spin_lock_irqsave(&vc->lock, flags);
+
+ mc = to_milbeaut_hdmac_chan(vc);
+
+ /* residue from the on-flight chunk */
+ if (mc->md && mc->md->vd.tx.cookie == cookie) {
+ struct scatterlist *sg;
+ u32 done;
+
+ md = mc->md;
+ sg = &md->sgl[md->sg_cur];
+
+ if (md->dir == DMA_DEV_TO_MEM)
+ done = readl_relaxed(mc->reg_ch_base
+ + MLB_HDMAC_DMACDA);
+ else
+ done = readl_relaxed(mc->reg_ch_base
+ + MLB_HDMAC_DMACSA);
+ done -= sg_dma_address(sg);
+
+ txstate->residue = -done;
+ }
+
+ if (!md) {
+ vd = vchan_find_desc(vc, cookie);
+ if (vd)
+ md = to_milbeaut_hdmac_desc(vd);
+ }
+
+ if (md) {
+ /* residue from the queued chunks */
+ for (i = md->sg_cur; i < md->sg_len; i++)
+ txstate->residue += sg_dma_len(&md->sgl[i]);
+ }
+
+ spin_unlock_irqrestore(&vc->lock, flags);
+
+ return stat;
+}
+
+static void milbeaut_hdmac_issue_pending(struct dma_chan *chan)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct milbeaut_hdmac_chan *mc = to_milbeaut_hdmac_chan(vc);
+ unsigned long flags;
+
+ spin_lock_irqsave(&vc->lock, flags);
+
+ if (vchan_issue_pending(vc) && !mc->md)
+ milbeaut_hdmac_start(mc);
+
+ spin_unlock_irqrestore(&vc->lock, flags);
+}
+
+static void milbeaut_hdmac_desc_free(struct virt_dma_desc *vd)
+{
+ struct milbeaut_hdmac_desc *md = to_milbeaut_hdmac_desc(vd);
+
+ kfree(md->sgl);
+ kfree(md);
+}
+
+static struct dma_chan *
+milbeaut_hdmac_xlate(struct of_phandle_args *dma_spec, struct of_dma *of_dma)
+{
+ struct milbeaut_hdmac_device *mdev = of_dma->of_dma_data;
+ struct milbeaut_hdmac_chan *mc;
+ struct virt_dma_chan *vc;
+ struct dma_chan *chan;
+
+ if (dma_spec->args_count != 1)
+ return NULL;
+
+ chan = dma_get_any_slave_channel(&mdev->ddev);
+ if (!chan)
+ return NULL;
+
+ vc = to_virt_chan(chan);
+ mc = to_milbeaut_hdmac_chan(vc);
+ mc->slave_id = dma_spec->args[0];
+
+ return chan;
+}
+
+static int milbeaut_hdmac_chan_init(struct platform_device *pdev,
+ struct milbeaut_hdmac_device *mdev,
+ int chan_id)
+{
+ struct device *dev = &pdev->dev;
+ struct milbeaut_hdmac_chan *mc = &mdev->channels[chan_id];
+ char *irq_name;
+ int irq, ret;
+
+ irq = platform_get_irq(pdev, chan_id);
+ if (irq < 0)
+ return irq;
+
+ irq_name = devm_kasprintf(dev, GFP_KERNEL, "milbeaut-hdmac-%d",
+ chan_id);
+ if (!irq_name)
+ return -ENOMEM;
+
+ ret = devm_request_irq(dev, irq, milbeaut_hdmac_interrupt,
+ IRQF_SHARED, irq_name, mc);
+ if (ret)
+ return ret;
+
+ mc->mdev = mdev;
+ mc->reg_ch_base = mdev->reg_base + MLB_HDMAC_CH_STRIDE * (chan_id + 1);
+ mc->vc.desc_free = milbeaut_hdmac_desc_free;
+ vchan_init(&mc->vc, &mdev->ddev);
+
+ return 0;
+}
+
+static int milbeaut_hdmac_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct milbeaut_hdmac_device *mdev;
+ struct dma_device *ddev;
+ int nr_chans, ret, i;
+
+ nr_chans = platform_irq_count(pdev);
+ if (nr_chans < 0)
+ return nr_chans;
+
+ ret = dma_set_mask(dev, DMA_BIT_MASK(32));
+ if (ret)
+ return ret;
+
+ mdev = devm_kzalloc(dev, struct_size(mdev, channels, nr_chans),
+ GFP_KERNEL);
+ if (!mdev)
+ return -ENOMEM;
+
+ mdev->reg_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(mdev->reg_base))
+ return PTR_ERR(mdev->reg_base);
+
+ mdev->clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(mdev->clk)) {
+ dev_err(dev, "failed to get clock\n");
+ return PTR_ERR(mdev->clk);
+ }
+
+ ret = clk_prepare_enable(mdev->clk);
+ if (ret)
+ return ret;
+
+ ddev = &mdev->ddev;
+ ddev->dev = dev;
+ dma_cap_set(DMA_SLAVE, ddev->cap_mask);
+ dma_cap_set(DMA_PRIVATE, ddev->cap_mask);
+ ddev->src_addr_widths = MLB_HDMAC_BUSWIDTHS;
+ ddev->dst_addr_widths = MLB_HDMAC_BUSWIDTHS;
+ ddev->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+ ddev->device_free_chan_resources = milbeaut_hdmac_free_chan_resources;
+ ddev->device_config = milbeaut_hdmac_chan_config;
+ ddev->device_pause = milbeaut_hdmac_chan_pause;
+ ddev->device_resume = milbeaut_hdmac_chan_resume;
+ ddev->device_prep_slave_sg = milbeaut_hdmac_prep_slave_sg;
+ ddev->device_terminate_all = milbeaut_hdmac_terminate_all;
+ ddev->device_synchronize = milbeaut_hdmac_synchronize;
+ ddev->device_tx_status = milbeaut_hdmac_tx_status;
+ ddev->device_issue_pending = milbeaut_hdmac_issue_pending;
+ INIT_LIST_HEAD(&ddev->channels);
+
+ for (i = 0; i < nr_chans; i++) {
+ ret = milbeaut_hdmac_chan_init(pdev, mdev, i);
+ if (ret)
+ goto disable_clk;
+ }
+
+ ret = dma_async_device_register(ddev);
+ if (ret)
+ goto disable_clk;
+
+ ret = of_dma_controller_register(dev->of_node,
+ milbeaut_hdmac_xlate, mdev);
+ if (ret)
+ goto unregister_dmac;
+
+ platform_set_drvdata(pdev, mdev);
+
+ return 0;
+
+unregister_dmac:
+ dma_async_device_unregister(ddev);
+disable_clk:
+ clk_disable_unprepare(mdev->clk);
+
+ return ret;
+}
+
+static int milbeaut_hdmac_remove(struct platform_device *pdev)
+{
+ struct milbeaut_hdmac_device *mdev = platform_get_drvdata(pdev);
+ struct dma_chan *chan;
+ int ret;
+
+ /*
+ * Before reaching here, almost all descriptors have been freed by the
+ * ->device_free_chan_resources() hook. However, each channel might
+ * be still holding one descriptor that was on-flight at that moment.
+ * Terminate it to make sure this hardware is no longer running. Then,
+ * free the channel resources once again to avoid memory leak.
+ */
+ list_for_each_entry(chan, &mdev->ddev.channels, device_node) {
+ ret = dmaengine_terminate_sync(chan);
+ if (ret)
+ return ret;
+ milbeaut_hdmac_free_chan_resources(chan);
+ }
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&mdev->ddev);
+ clk_disable_unprepare(mdev->clk);
+
+ return 0;
+}
+
+static const struct of_device_id milbeaut_hdmac_match[] = {
+ { .compatible = "socionext,milbeaut-m10v-hdmac" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, milbeaut_hdmac_match);
+
+static struct platform_driver milbeaut_hdmac_driver = {
+ .probe = milbeaut_hdmac_probe,
+ .remove = milbeaut_hdmac_remove,
+ .driver = {
+ .name = "milbeaut-m10v-hdmac",
+ .of_match_table = milbeaut_hdmac_match,
+ },
+};
+module_platform_driver(milbeaut_hdmac_driver);
+
+MODULE_DESCRIPTION("Milbeaut HDMAC DmaEngine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/milbeaut-xdmac.c b/drivers/dma/milbeaut-xdmac.c
new file mode 100644
index 000000000..d29d01e73
--- /dev/null
+++ b/drivers/dma/milbeaut-xdmac.c
@@ -0,0 +1,416 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (C) 2019 Linaro Ltd.
+// Copyright (C) 2019 Socionext Inc.
+
+#include <linux/bits.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/bitfield.h>
+
+#include "virt-dma.h"
+
+/* global register */
+#define M10V_XDACS 0x00
+
+/* channel local register */
+#define M10V_XDTBC 0x10
+#define M10V_XDSSA 0x14
+#define M10V_XDDSA 0x18
+#define M10V_XDSAC 0x1C
+#define M10V_XDDAC 0x20
+#define M10V_XDDCC 0x24
+#define M10V_XDDES 0x28
+#define M10V_XDDPC 0x2C
+#define M10V_XDDSD 0x30
+
+#define M10V_XDACS_XE BIT(28)
+
+#define M10V_DEFBS 0x3
+#define M10V_DEFBL 0xf
+
+#define M10V_XDSAC_SBS GENMASK(17, 16)
+#define M10V_XDSAC_SBL GENMASK(11, 8)
+
+#define M10V_XDDAC_DBS GENMASK(17, 16)
+#define M10V_XDDAC_DBL GENMASK(11, 8)
+
+#define M10V_XDDES_CE BIT(28)
+#define M10V_XDDES_SE BIT(24)
+#define M10V_XDDES_SA BIT(15)
+#define M10V_XDDES_TF GENMASK(23, 20)
+#define M10V_XDDES_EI BIT(1)
+#define M10V_XDDES_TI BIT(0)
+
+#define M10V_XDDSD_IS_MASK GENMASK(3, 0)
+#define M10V_XDDSD_IS_NORMAL 0x8
+
+#define MLB_XDMAC_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+struct milbeaut_xdmac_desc {
+ struct virt_dma_desc vd;
+ size_t len;
+ dma_addr_t src;
+ dma_addr_t dst;
+};
+
+struct milbeaut_xdmac_chan {
+ struct virt_dma_chan vc;
+ struct milbeaut_xdmac_desc *md;
+ void __iomem *reg_ch_base;
+};
+
+struct milbeaut_xdmac_device {
+ struct dma_device ddev;
+ void __iomem *reg_base;
+ struct milbeaut_xdmac_chan channels[];
+};
+
+static struct milbeaut_xdmac_chan *
+to_milbeaut_xdmac_chan(struct virt_dma_chan *vc)
+{
+ return container_of(vc, struct milbeaut_xdmac_chan, vc);
+}
+
+static struct milbeaut_xdmac_desc *
+to_milbeaut_xdmac_desc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct milbeaut_xdmac_desc, vd);
+}
+
+/* mc->vc.lock must be held by caller */
+static struct milbeaut_xdmac_desc *
+milbeaut_xdmac_next_desc(struct milbeaut_xdmac_chan *mc)
+{
+ struct virt_dma_desc *vd;
+
+ vd = vchan_next_desc(&mc->vc);
+ if (!vd) {
+ mc->md = NULL;
+ return NULL;
+ }
+
+ list_del(&vd->node);
+
+ mc->md = to_milbeaut_xdmac_desc(vd);
+
+ return mc->md;
+}
+
+/* mc->vc.lock must be held by caller */
+static void milbeaut_chan_start(struct milbeaut_xdmac_chan *mc,
+ struct milbeaut_xdmac_desc *md)
+{
+ u32 val;
+
+ /* Setup the channel */
+ val = md->len - 1;
+ writel_relaxed(val, mc->reg_ch_base + M10V_XDTBC);
+
+ val = md->src;
+ writel_relaxed(val, mc->reg_ch_base + M10V_XDSSA);
+
+ val = md->dst;
+ writel_relaxed(val, mc->reg_ch_base + M10V_XDDSA);
+
+ val = readl_relaxed(mc->reg_ch_base + M10V_XDSAC);
+ val &= ~(M10V_XDSAC_SBS | M10V_XDSAC_SBL);
+ val |= FIELD_PREP(M10V_XDSAC_SBS, M10V_DEFBS) |
+ FIELD_PREP(M10V_XDSAC_SBL, M10V_DEFBL);
+ writel_relaxed(val, mc->reg_ch_base + M10V_XDSAC);
+
+ val = readl_relaxed(mc->reg_ch_base + M10V_XDDAC);
+ val &= ~(M10V_XDDAC_DBS | M10V_XDDAC_DBL);
+ val |= FIELD_PREP(M10V_XDDAC_DBS, M10V_DEFBS) |
+ FIELD_PREP(M10V_XDDAC_DBL, M10V_DEFBL);
+ writel_relaxed(val, mc->reg_ch_base + M10V_XDDAC);
+
+ /* Start the channel */
+ val = readl_relaxed(mc->reg_ch_base + M10V_XDDES);
+ val &= ~(M10V_XDDES_CE | M10V_XDDES_SE | M10V_XDDES_TF |
+ M10V_XDDES_EI | M10V_XDDES_TI);
+ val |= FIELD_PREP(M10V_XDDES_CE, 1) | FIELD_PREP(M10V_XDDES_SE, 1) |
+ FIELD_PREP(M10V_XDDES_TF, 1) | FIELD_PREP(M10V_XDDES_EI, 1) |
+ FIELD_PREP(M10V_XDDES_TI, 1);
+ writel_relaxed(val, mc->reg_ch_base + M10V_XDDES);
+}
+
+/* mc->vc.lock must be held by caller */
+static void milbeaut_xdmac_start(struct milbeaut_xdmac_chan *mc)
+{
+ struct milbeaut_xdmac_desc *md;
+
+ md = milbeaut_xdmac_next_desc(mc);
+ if (md)
+ milbeaut_chan_start(mc, md);
+}
+
+static irqreturn_t milbeaut_xdmac_interrupt(int irq, void *dev_id)
+{
+ struct milbeaut_xdmac_chan *mc = dev_id;
+ struct milbeaut_xdmac_desc *md;
+ u32 val;
+
+ spin_lock(&mc->vc.lock);
+
+ /* Ack and Stop */
+ val = FIELD_PREP(M10V_XDDSD_IS_MASK, 0x0);
+ writel_relaxed(val, mc->reg_ch_base + M10V_XDDSD);
+
+ md = mc->md;
+ if (!md)
+ goto out;
+
+ vchan_cookie_complete(&md->vd);
+
+ milbeaut_xdmac_start(mc);
+out:
+ spin_unlock(&mc->vc.lock);
+ return IRQ_HANDLED;
+}
+
+static void milbeaut_xdmac_free_chan_resources(struct dma_chan *chan)
+{
+ vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+static struct dma_async_tx_descriptor *
+milbeaut_xdmac_prep_memcpy(struct dma_chan *chan, dma_addr_t dst,
+ dma_addr_t src, size_t len, unsigned long flags)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct milbeaut_xdmac_desc *md;
+
+ md = kzalloc(sizeof(*md), GFP_NOWAIT);
+ if (!md)
+ return NULL;
+
+ md->len = len;
+ md->src = src;
+ md->dst = dst;
+
+ return vchan_tx_prep(vc, &md->vd, flags);
+}
+
+static int milbeaut_xdmac_terminate_all(struct dma_chan *chan)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct milbeaut_xdmac_chan *mc = to_milbeaut_xdmac_chan(vc);
+ unsigned long flags;
+ u32 val;
+
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&vc->lock, flags);
+
+ /* Halt the channel */
+ val = readl(mc->reg_ch_base + M10V_XDDES);
+ val &= ~M10V_XDDES_CE;
+ val |= FIELD_PREP(M10V_XDDES_CE, 0);
+ writel(val, mc->reg_ch_base + M10V_XDDES);
+
+ if (mc->md) {
+ vchan_terminate_vdesc(&mc->md->vd);
+ mc->md = NULL;
+ }
+
+ vchan_get_all_descriptors(vc, &head);
+
+ spin_unlock_irqrestore(&vc->lock, flags);
+
+ vchan_dma_desc_free_list(vc, &head);
+
+ return 0;
+}
+
+static void milbeaut_xdmac_synchronize(struct dma_chan *chan)
+{
+ vchan_synchronize(to_virt_chan(chan));
+}
+
+static void milbeaut_xdmac_issue_pending(struct dma_chan *chan)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct milbeaut_xdmac_chan *mc = to_milbeaut_xdmac_chan(vc);
+ unsigned long flags;
+
+ spin_lock_irqsave(&vc->lock, flags);
+
+ if (vchan_issue_pending(vc) && !mc->md)
+ milbeaut_xdmac_start(mc);
+
+ spin_unlock_irqrestore(&vc->lock, flags);
+}
+
+static void milbeaut_xdmac_desc_free(struct virt_dma_desc *vd)
+{
+ kfree(to_milbeaut_xdmac_desc(vd));
+}
+
+static int milbeaut_xdmac_chan_init(struct platform_device *pdev,
+ struct milbeaut_xdmac_device *mdev,
+ int chan_id)
+{
+ struct device *dev = &pdev->dev;
+ struct milbeaut_xdmac_chan *mc = &mdev->channels[chan_id];
+ char *irq_name;
+ int irq, ret;
+
+ irq = platform_get_irq(pdev, chan_id);
+ if (irq < 0)
+ return irq;
+
+ irq_name = devm_kasprintf(dev, GFP_KERNEL, "milbeaut-xdmac-%d",
+ chan_id);
+ if (!irq_name)
+ return -ENOMEM;
+
+ ret = devm_request_irq(dev, irq, milbeaut_xdmac_interrupt,
+ IRQF_SHARED, irq_name, mc);
+ if (ret)
+ return ret;
+
+ mc->reg_ch_base = mdev->reg_base + chan_id * 0x30;
+
+ mc->vc.desc_free = milbeaut_xdmac_desc_free;
+ vchan_init(&mc->vc, &mdev->ddev);
+
+ return 0;
+}
+
+static void enable_xdmac(struct milbeaut_xdmac_device *mdev)
+{
+ unsigned int val;
+
+ val = readl(mdev->reg_base + M10V_XDACS);
+ val |= M10V_XDACS_XE;
+ writel(val, mdev->reg_base + M10V_XDACS);
+}
+
+static void disable_xdmac(struct milbeaut_xdmac_device *mdev)
+{
+ unsigned int val;
+
+ val = readl(mdev->reg_base + M10V_XDACS);
+ val &= ~M10V_XDACS_XE;
+ writel(val, mdev->reg_base + M10V_XDACS);
+}
+
+static int milbeaut_xdmac_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct milbeaut_xdmac_device *mdev;
+ struct dma_device *ddev;
+ int nr_chans, ret, i;
+
+ nr_chans = platform_irq_count(pdev);
+ if (nr_chans < 0)
+ return nr_chans;
+
+ mdev = devm_kzalloc(dev, struct_size(mdev, channels, nr_chans),
+ GFP_KERNEL);
+ if (!mdev)
+ return -ENOMEM;
+
+ mdev->reg_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(mdev->reg_base))
+ return PTR_ERR(mdev->reg_base);
+
+ ddev = &mdev->ddev;
+ ddev->dev = dev;
+ dma_cap_set(DMA_MEMCPY, ddev->cap_mask);
+ ddev->src_addr_widths = MLB_XDMAC_BUSWIDTHS;
+ ddev->dst_addr_widths = MLB_XDMAC_BUSWIDTHS;
+ ddev->device_free_chan_resources = milbeaut_xdmac_free_chan_resources;
+ ddev->device_prep_dma_memcpy = milbeaut_xdmac_prep_memcpy;
+ ddev->device_terminate_all = milbeaut_xdmac_terminate_all;
+ ddev->device_synchronize = milbeaut_xdmac_synchronize;
+ ddev->device_tx_status = dma_cookie_status;
+ ddev->device_issue_pending = milbeaut_xdmac_issue_pending;
+ INIT_LIST_HEAD(&ddev->channels);
+
+ for (i = 0; i < nr_chans; i++) {
+ ret = milbeaut_xdmac_chan_init(pdev, mdev, i);
+ if (ret)
+ return ret;
+ }
+
+ enable_xdmac(mdev);
+
+ ret = dma_async_device_register(ddev);
+ if (ret)
+ goto disable_xdmac;
+
+ ret = of_dma_controller_register(dev->of_node,
+ of_dma_simple_xlate, mdev);
+ if (ret)
+ goto unregister_dmac;
+
+ platform_set_drvdata(pdev, mdev);
+
+ return 0;
+
+unregister_dmac:
+ dma_async_device_unregister(ddev);
+disable_xdmac:
+ disable_xdmac(mdev);
+ return ret;
+}
+
+static int milbeaut_xdmac_remove(struct platform_device *pdev)
+{
+ struct milbeaut_xdmac_device *mdev = platform_get_drvdata(pdev);
+ struct dma_chan *chan;
+ int ret;
+
+ /*
+ * Before reaching here, almost all descriptors have been freed by the
+ * ->device_free_chan_resources() hook. However, each channel might
+ * be still holding one descriptor that was on-flight at that moment.
+ * Terminate it to make sure this hardware is no longer running. Then,
+ * free the channel resources once again to avoid memory leak.
+ */
+ list_for_each_entry(chan, &mdev->ddev.channels, device_node) {
+ ret = dmaengine_terminate_sync(chan);
+ if (ret)
+ return ret;
+ milbeaut_xdmac_free_chan_resources(chan);
+ }
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&mdev->ddev);
+
+ disable_xdmac(mdev);
+
+ return 0;
+}
+
+static const struct of_device_id milbeaut_xdmac_match[] = {
+ { .compatible = "socionext,milbeaut-m10v-xdmac" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, milbeaut_xdmac_match);
+
+static struct platform_driver milbeaut_xdmac_driver = {
+ .probe = milbeaut_xdmac_probe,
+ .remove = milbeaut_xdmac_remove,
+ .driver = {
+ .name = "milbeaut-m10v-xdmac",
+ .of_match_table = milbeaut_xdmac_match,
+ },
+};
+module_platform_driver(milbeaut_xdmac_driver);
+
+MODULE_DESCRIPTION("Milbeaut XDMAC DmaEngine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
new file mode 100644
index 000000000..e8d71b355
--- /dev/null
+++ b/drivers/dma/mmp_pdma.c
@@ -0,0 +1,1153 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2012 Marvell International Ltd.
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/platform_data/mmp_dma.h>
+#include <linux/dmapool.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/of.h>
+
+#include "dmaengine.h"
+
+#define DCSR 0x0000
+#define DALGN 0x00a0
+#define DINT 0x00f0
+#define DDADR 0x0200
+#define DSADR(n) (0x0204 + ((n) << 4))
+#define DTADR(n) (0x0208 + ((n) << 4))
+#define DCMD 0x020c
+
+#define DCSR_RUN BIT(31) /* Run Bit (read / write) */
+#define DCSR_NODESC BIT(30) /* No-Descriptor Fetch (read / write) */
+#define DCSR_STOPIRQEN BIT(29) /* Stop Interrupt Enable (read / write) */
+#define DCSR_REQPEND BIT(8) /* Request Pending (read-only) */
+#define DCSR_STOPSTATE BIT(3) /* Stop State (read-only) */
+#define DCSR_ENDINTR BIT(2) /* End Interrupt (read / write) */
+#define DCSR_STARTINTR BIT(1) /* Start Interrupt (read / write) */
+#define DCSR_BUSERR BIT(0) /* Bus Error Interrupt (read / write) */
+
+#define DCSR_EORIRQEN BIT(28) /* End of Receive Interrupt Enable (R/W) */
+#define DCSR_EORJMPEN BIT(27) /* Jump to next descriptor on EOR */
+#define DCSR_EORSTOPEN BIT(26) /* STOP on an EOR */
+#define DCSR_SETCMPST BIT(25) /* Set Descriptor Compare Status */
+#define DCSR_CLRCMPST BIT(24) /* Clear Descriptor Compare Status */
+#define DCSR_CMPST BIT(10) /* The Descriptor Compare Status */
+#define DCSR_EORINTR BIT(9) /* The end of Receive */
+
+#define DRCMR(n) ((((n) < 64) ? 0x0100 : 0x1100) + (((n) & 0x3f) << 2))
+#define DRCMR_MAPVLD BIT(7) /* Map Valid (read / write) */
+#define DRCMR_CHLNUM 0x1f /* mask for Channel Number (read / write) */
+
+#define DDADR_DESCADDR 0xfffffff0 /* Address of next descriptor (mask) */
+#define DDADR_STOP BIT(0) /* Stop (read / write) */
+
+#define DCMD_INCSRCADDR BIT(31) /* Source Address Increment Setting. */
+#define DCMD_INCTRGADDR BIT(30) /* Target Address Increment Setting. */
+#define DCMD_FLOWSRC BIT(29) /* Flow Control by the source. */
+#define DCMD_FLOWTRG BIT(28) /* Flow Control by the target. */
+#define DCMD_STARTIRQEN BIT(22) /* Start Interrupt Enable */
+#define DCMD_ENDIRQEN BIT(21) /* End Interrupt Enable */
+#define DCMD_ENDIAN BIT(18) /* Device Endian-ness. */
+#define DCMD_BURST8 (1 << 16) /* 8 byte burst */
+#define DCMD_BURST16 (2 << 16) /* 16 byte burst */
+#define DCMD_BURST32 (3 << 16) /* 32 byte burst */
+#define DCMD_WIDTH1 (1 << 14) /* 1 byte width */
+#define DCMD_WIDTH2 (2 << 14) /* 2 byte width (HalfWord) */
+#define DCMD_WIDTH4 (3 << 14) /* 4 byte width (Word) */
+#define DCMD_LENGTH 0x01fff /* length mask (max = 8K - 1) */
+
+#define PDMA_MAX_DESC_BYTES DCMD_LENGTH
+
+struct mmp_pdma_desc_hw {
+ u32 ddadr; /* Points to the next descriptor + flags */
+ u32 dsadr; /* DSADR value for the current transfer */
+ u32 dtadr; /* DTADR value for the current transfer */
+ u32 dcmd; /* DCMD value for the current transfer */
+} __aligned(32);
+
+struct mmp_pdma_desc_sw {
+ struct mmp_pdma_desc_hw desc;
+ struct list_head node;
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor async_tx;
+};
+
+struct mmp_pdma_phy;
+
+struct mmp_pdma_chan {
+ struct device *dev;
+ struct dma_chan chan;
+ struct dma_async_tx_descriptor desc;
+ struct mmp_pdma_phy *phy;
+ enum dma_transfer_direction dir;
+ struct dma_slave_config slave_config;
+
+ struct mmp_pdma_desc_sw *cyclic_first; /* first desc_sw if channel
+ * is in cyclic mode */
+
+ /* channel's basic info */
+ struct tasklet_struct tasklet;
+ u32 dcmd;
+ u32 drcmr;
+ u32 dev_addr;
+
+ /* list for desc */
+ spinlock_t desc_lock; /* Descriptor list lock */
+ struct list_head chain_pending; /* Link descriptors queue for pending */
+ struct list_head chain_running; /* Link descriptors queue for running */
+ bool idle; /* channel statue machine */
+ bool byte_align;
+
+ struct dma_pool *desc_pool; /* Descriptors pool */
+};
+
+struct mmp_pdma_phy {
+ int idx;
+ void __iomem *base;
+ struct mmp_pdma_chan *vchan;
+};
+
+struct mmp_pdma_device {
+ int dma_channels;
+ void __iomem *base;
+ struct device *dev;
+ struct dma_device device;
+ struct mmp_pdma_phy *phy;
+ spinlock_t phy_lock; /* protect alloc/free phy channels */
+};
+
+#define tx_to_mmp_pdma_desc(tx) \
+ container_of(tx, struct mmp_pdma_desc_sw, async_tx)
+#define to_mmp_pdma_desc(lh) \
+ container_of(lh, struct mmp_pdma_desc_sw, node)
+#define to_mmp_pdma_chan(dchan) \
+ container_of(dchan, struct mmp_pdma_chan, chan)
+#define to_mmp_pdma_dev(dmadev) \
+ container_of(dmadev, struct mmp_pdma_device, device)
+
+static int mmp_pdma_config_write(struct dma_chan *dchan,
+ struct dma_slave_config *cfg,
+ enum dma_transfer_direction direction);
+
+static void set_desc(struct mmp_pdma_phy *phy, dma_addr_t addr)
+{
+ u32 reg = (phy->idx << 4) + DDADR;
+
+ writel(addr, phy->base + reg);
+}
+
+static void enable_chan(struct mmp_pdma_phy *phy)
+{
+ u32 reg, dalgn;
+
+ if (!phy->vchan)
+ return;
+
+ reg = DRCMR(phy->vchan->drcmr);
+ writel(DRCMR_MAPVLD | phy->idx, phy->base + reg);
+
+ dalgn = readl(phy->base + DALGN);
+ if (phy->vchan->byte_align)
+ dalgn |= 1 << phy->idx;
+ else
+ dalgn &= ~(1 << phy->idx);
+ writel(dalgn, phy->base + DALGN);
+
+ reg = (phy->idx << 2) + DCSR;
+ writel(readl(phy->base + reg) | DCSR_RUN, phy->base + reg);
+}
+
+static void disable_chan(struct mmp_pdma_phy *phy)
+{
+ u32 reg;
+
+ if (!phy)
+ return;
+
+ reg = (phy->idx << 2) + DCSR;
+ writel(readl(phy->base + reg) & ~DCSR_RUN, phy->base + reg);
+}
+
+static int clear_chan_irq(struct mmp_pdma_phy *phy)
+{
+ u32 dcsr;
+ u32 dint = readl(phy->base + DINT);
+ u32 reg = (phy->idx << 2) + DCSR;
+
+ if (!(dint & BIT(phy->idx)))
+ return -EAGAIN;
+
+ /* clear irq */
+ dcsr = readl(phy->base + reg);
+ writel(dcsr, phy->base + reg);
+ if ((dcsr & DCSR_BUSERR) && (phy->vchan))
+ dev_warn(phy->vchan->dev, "DCSR_BUSERR\n");
+
+ return 0;
+}
+
+static irqreturn_t mmp_pdma_chan_handler(int irq, void *dev_id)
+{
+ struct mmp_pdma_phy *phy = dev_id;
+
+ if (clear_chan_irq(phy) != 0)
+ return IRQ_NONE;
+
+ tasklet_schedule(&phy->vchan->tasklet);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t mmp_pdma_int_handler(int irq, void *dev_id)
+{
+ struct mmp_pdma_device *pdev = dev_id;
+ struct mmp_pdma_phy *phy;
+ u32 dint = readl(pdev->base + DINT);
+ int i, ret;
+ int irq_num = 0;
+
+ while (dint) {
+ i = __ffs(dint);
+ /* only handle interrupts belonging to pdma driver*/
+ if (i >= pdev->dma_channels)
+ break;
+ dint &= (dint - 1);
+ phy = &pdev->phy[i];
+ ret = mmp_pdma_chan_handler(irq, phy);
+ if (ret == IRQ_HANDLED)
+ irq_num++;
+ }
+
+ if (irq_num)
+ return IRQ_HANDLED;
+
+ return IRQ_NONE;
+}
+
+/* lookup free phy channel as descending priority */
+static struct mmp_pdma_phy *lookup_phy(struct mmp_pdma_chan *pchan)
+{
+ int prio, i;
+ struct mmp_pdma_device *pdev = to_mmp_pdma_dev(pchan->chan.device);
+ struct mmp_pdma_phy *phy, *found = NULL;
+ unsigned long flags;
+
+ /*
+ * dma channel priorities
+ * ch 0 - 3, 16 - 19 <--> (0)
+ * ch 4 - 7, 20 - 23 <--> (1)
+ * ch 8 - 11, 24 - 27 <--> (2)
+ * ch 12 - 15, 28 - 31 <--> (3)
+ */
+
+ spin_lock_irqsave(&pdev->phy_lock, flags);
+ for (prio = 0; prio <= ((pdev->dma_channels - 1) & 0xf) >> 2; prio++) {
+ for (i = 0; i < pdev->dma_channels; i++) {
+ if (prio != (i & 0xf) >> 2)
+ continue;
+ phy = &pdev->phy[i];
+ if (!phy->vchan) {
+ phy->vchan = pchan;
+ found = phy;
+ goto out_unlock;
+ }
+ }
+ }
+
+out_unlock:
+ spin_unlock_irqrestore(&pdev->phy_lock, flags);
+ return found;
+}
+
+static void mmp_pdma_free_phy(struct mmp_pdma_chan *pchan)
+{
+ struct mmp_pdma_device *pdev = to_mmp_pdma_dev(pchan->chan.device);
+ unsigned long flags;
+ u32 reg;
+
+ if (!pchan->phy)
+ return;
+
+ /* clear the channel mapping in DRCMR */
+ reg = DRCMR(pchan->drcmr);
+ writel(0, pchan->phy->base + reg);
+
+ spin_lock_irqsave(&pdev->phy_lock, flags);
+ pchan->phy->vchan = NULL;
+ pchan->phy = NULL;
+ spin_unlock_irqrestore(&pdev->phy_lock, flags);
+}
+
+/*
+ * start_pending_queue - transfer any pending transactions
+ * pending list ==> running list
+ */
+static void start_pending_queue(struct mmp_pdma_chan *chan)
+{
+ struct mmp_pdma_desc_sw *desc;
+
+ /* still in running, irq will start the pending list */
+ if (!chan->idle) {
+ dev_dbg(chan->dev, "DMA controller still busy\n");
+ return;
+ }
+
+ if (list_empty(&chan->chain_pending)) {
+ /* chance to re-fetch phy channel with higher prio */
+ mmp_pdma_free_phy(chan);
+ dev_dbg(chan->dev, "no pending list\n");
+ return;
+ }
+
+ if (!chan->phy) {
+ chan->phy = lookup_phy(chan);
+ if (!chan->phy) {
+ dev_dbg(chan->dev, "no free dma channel\n");
+ return;
+ }
+ }
+
+ /*
+ * pending -> running
+ * reintilize pending list
+ */
+ desc = list_first_entry(&chan->chain_pending,
+ struct mmp_pdma_desc_sw, node);
+ list_splice_tail_init(&chan->chain_pending, &chan->chain_running);
+
+ /*
+ * Program the descriptor's address into the DMA controller,
+ * then start the DMA transaction
+ */
+ set_desc(chan->phy, desc->async_tx.phys);
+ enable_chan(chan->phy);
+ chan->idle = false;
+}
+
+
+/* desc->tx_list ==> pending list */
+static dma_cookie_t mmp_pdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct mmp_pdma_chan *chan = to_mmp_pdma_chan(tx->chan);
+ struct mmp_pdma_desc_sw *desc = tx_to_mmp_pdma_desc(tx);
+ struct mmp_pdma_desc_sw *child;
+ unsigned long flags;
+ dma_cookie_t cookie = -EBUSY;
+
+ spin_lock_irqsave(&chan->desc_lock, flags);
+
+ list_for_each_entry(child, &desc->tx_list, node) {
+ cookie = dma_cookie_assign(&child->async_tx);
+ }
+
+ /* softly link to pending list - desc->tx_list ==> pending list */
+ list_splice_tail_init(&desc->tx_list, &chan->chain_pending);
+
+ spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+ return cookie;
+}
+
+static struct mmp_pdma_desc_sw *
+mmp_pdma_alloc_descriptor(struct mmp_pdma_chan *chan)
+{
+ struct mmp_pdma_desc_sw *desc;
+ dma_addr_t pdesc;
+
+ desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &pdesc);
+ if (!desc) {
+ dev_err(chan->dev, "out of memory for link descriptor\n");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&desc->tx_list);
+ dma_async_tx_descriptor_init(&desc->async_tx, &chan->chan);
+ /* each desc has submit */
+ desc->async_tx.tx_submit = mmp_pdma_tx_submit;
+ desc->async_tx.phys = pdesc;
+
+ return desc;
+}
+
+/*
+ * mmp_pdma_alloc_chan_resources - Allocate resources for DMA channel.
+ *
+ * This function will create a dma pool for descriptor allocation.
+ * Request irq only when channel is requested
+ * Return - The number of allocated descriptors.
+ */
+
+static int mmp_pdma_alloc_chan_resources(struct dma_chan *dchan)
+{
+ struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+
+ if (chan->desc_pool)
+ return 1;
+
+ chan->desc_pool = dma_pool_create(dev_name(&dchan->dev->device),
+ chan->dev,
+ sizeof(struct mmp_pdma_desc_sw),
+ __alignof__(struct mmp_pdma_desc_sw),
+ 0);
+ if (!chan->desc_pool) {
+ dev_err(chan->dev, "unable to allocate descriptor pool\n");
+ return -ENOMEM;
+ }
+
+ mmp_pdma_free_phy(chan);
+ chan->idle = true;
+ chan->dev_addr = 0;
+ return 1;
+}
+
+static void mmp_pdma_free_desc_list(struct mmp_pdma_chan *chan,
+ struct list_head *list)
+{
+ struct mmp_pdma_desc_sw *desc, *_desc;
+
+ list_for_each_entry_safe(desc, _desc, list, node) {
+ list_del(&desc->node);
+ dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+ }
+}
+
+static void mmp_pdma_free_chan_resources(struct dma_chan *dchan)
+{
+ struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->desc_lock, flags);
+ mmp_pdma_free_desc_list(chan, &chan->chain_pending);
+ mmp_pdma_free_desc_list(chan, &chan->chain_running);
+ spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+ dma_pool_destroy(chan->desc_pool);
+ chan->desc_pool = NULL;
+ chan->idle = true;
+ chan->dev_addr = 0;
+ mmp_pdma_free_phy(chan);
+ return;
+}
+
+static struct dma_async_tx_descriptor *
+mmp_pdma_prep_memcpy(struct dma_chan *dchan,
+ dma_addr_t dma_dst, dma_addr_t dma_src,
+ size_t len, unsigned long flags)
+{
+ struct mmp_pdma_chan *chan;
+ struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new;
+ size_t copy = 0;
+
+ if (!dchan)
+ return NULL;
+
+ if (!len)
+ return NULL;
+
+ chan = to_mmp_pdma_chan(dchan);
+ chan->byte_align = false;
+
+ if (!chan->dir) {
+ chan->dir = DMA_MEM_TO_MEM;
+ chan->dcmd = DCMD_INCTRGADDR | DCMD_INCSRCADDR;
+ chan->dcmd |= DCMD_BURST32;
+ }
+
+ do {
+ /* Allocate the link descriptor from DMA pool */
+ new = mmp_pdma_alloc_descriptor(chan);
+ if (!new) {
+ dev_err(chan->dev, "no memory for desc\n");
+ goto fail;
+ }
+
+ copy = min_t(size_t, len, PDMA_MAX_DESC_BYTES);
+ if (dma_src & 0x7 || dma_dst & 0x7)
+ chan->byte_align = true;
+
+ new->desc.dcmd = chan->dcmd | (DCMD_LENGTH & copy);
+ new->desc.dsadr = dma_src;
+ new->desc.dtadr = dma_dst;
+
+ if (!first)
+ first = new;
+ else
+ prev->desc.ddadr = new->async_tx.phys;
+
+ new->async_tx.cookie = 0;
+ async_tx_ack(&new->async_tx);
+
+ prev = new;
+ len -= copy;
+
+ if (chan->dir == DMA_MEM_TO_DEV) {
+ dma_src += copy;
+ } else if (chan->dir == DMA_DEV_TO_MEM) {
+ dma_dst += copy;
+ } else if (chan->dir == DMA_MEM_TO_MEM) {
+ dma_src += copy;
+ dma_dst += copy;
+ }
+
+ /* Insert the link descriptor to the LD ring */
+ list_add_tail(&new->node, &first->tx_list);
+ } while (len);
+
+ first->async_tx.flags = flags; /* client is in control of this ack */
+ first->async_tx.cookie = -EBUSY;
+
+ /* last desc and fire IRQ */
+ new->desc.ddadr = DDADR_STOP;
+ new->desc.dcmd |= DCMD_ENDIRQEN;
+
+ chan->cyclic_first = NULL;
+
+ return &first->async_tx;
+
+fail:
+ if (first)
+ mmp_pdma_free_desc_list(chan, &first->tx_list);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mmp_pdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction dir,
+ unsigned long flags, void *context)
+{
+ struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+ struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new = NULL;
+ size_t len, avail;
+ struct scatterlist *sg;
+ dma_addr_t addr;
+ int i;
+
+ if ((sgl == NULL) || (sg_len == 0))
+ return NULL;
+
+ chan->byte_align = false;
+
+ mmp_pdma_config_write(dchan, &chan->slave_config, dir);
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ addr = sg_dma_address(sg);
+ avail = sg_dma_len(sgl);
+
+ do {
+ len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES);
+ if (addr & 0x7)
+ chan->byte_align = true;
+
+ /* allocate and populate the descriptor */
+ new = mmp_pdma_alloc_descriptor(chan);
+ if (!new) {
+ dev_err(chan->dev, "no memory for desc\n");
+ goto fail;
+ }
+
+ new->desc.dcmd = chan->dcmd | (DCMD_LENGTH & len);
+ if (dir == DMA_MEM_TO_DEV) {
+ new->desc.dsadr = addr;
+ new->desc.dtadr = chan->dev_addr;
+ } else {
+ new->desc.dsadr = chan->dev_addr;
+ new->desc.dtadr = addr;
+ }
+
+ if (!first)
+ first = new;
+ else
+ prev->desc.ddadr = new->async_tx.phys;
+
+ new->async_tx.cookie = 0;
+ async_tx_ack(&new->async_tx);
+ prev = new;
+
+ /* Insert the link descriptor to the LD ring */
+ list_add_tail(&new->node, &first->tx_list);
+
+ /* update metadata */
+ addr += len;
+ avail -= len;
+ } while (avail);
+ }
+
+ first->async_tx.cookie = -EBUSY;
+ first->async_tx.flags = flags;
+
+ /* last desc and fire IRQ */
+ new->desc.ddadr = DDADR_STOP;
+ new->desc.dcmd |= DCMD_ENDIRQEN;
+
+ chan->dir = dir;
+ chan->cyclic_first = NULL;
+
+ return &first->async_tx;
+
+fail:
+ if (first)
+ mmp_pdma_free_desc_list(chan, &first->tx_list);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mmp_pdma_prep_dma_cyclic(struct dma_chan *dchan,
+ dma_addr_t buf_addr, size_t len, size_t period_len,
+ enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct mmp_pdma_chan *chan;
+ struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new;
+ dma_addr_t dma_src, dma_dst;
+
+ if (!dchan || !len || !period_len)
+ return NULL;
+
+ /* the buffer length must be a multiple of period_len */
+ if (len % period_len != 0)
+ return NULL;
+
+ if (period_len > PDMA_MAX_DESC_BYTES)
+ return NULL;
+
+ chan = to_mmp_pdma_chan(dchan);
+ mmp_pdma_config_write(dchan, &chan->slave_config, direction);
+
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+ dma_src = buf_addr;
+ dma_dst = chan->dev_addr;
+ break;
+ case DMA_DEV_TO_MEM:
+ dma_dst = buf_addr;
+ dma_src = chan->dev_addr;
+ break;
+ default:
+ dev_err(chan->dev, "Unsupported direction for cyclic DMA\n");
+ return NULL;
+ }
+
+ chan->dir = direction;
+
+ do {
+ /* Allocate the link descriptor from DMA pool */
+ new = mmp_pdma_alloc_descriptor(chan);
+ if (!new) {
+ dev_err(chan->dev, "no memory for desc\n");
+ goto fail;
+ }
+
+ new->desc.dcmd = (chan->dcmd | DCMD_ENDIRQEN |
+ (DCMD_LENGTH & period_len));
+ new->desc.dsadr = dma_src;
+ new->desc.dtadr = dma_dst;
+
+ if (!first)
+ first = new;
+ else
+ prev->desc.ddadr = new->async_tx.phys;
+
+ new->async_tx.cookie = 0;
+ async_tx_ack(&new->async_tx);
+
+ prev = new;
+ len -= period_len;
+
+ if (chan->dir == DMA_MEM_TO_DEV)
+ dma_src += period_len;
+ else
+ dma_dst += period_len;
+
+ /* Insert the link descriptor to the LD ring */
+ list_add_tail(&new->node, &first->tx_list);
+ } while (len);
+
+ first->async_tx.flags = flags; /* client is in control of this ack */
+ first->async_tx.cookie = -EBUSY;
+
+ /* make the cyclic link */
+ new->desc.ddadr = first->async_tx.phys;
+ chan->cyclic_first = first;
+
+ return &first->async_tx;
+
+fail:
+ if (first)
+ mmp_pdma_free_desc_list(chan, &first->tx_list);
+ return NULL;
+}
+
+static int mmp_pdma_config_write(struct dma_chan *dchan,
+ struct dma_slave_config *cfg,
+ enum dma_transfer_direction direction)
+{
+ struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+ u32 maxburst = 0, addr = 0;
+ enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+
+ if (!dchan)
+ return -EINVAL;
+
+ if (direction == DMA_DEV_TO_MEM) {
+ chan->dcmd = DCMD_INCTRGADDR | DCMD_FLOWSRC;
+ maxburst = cfg->src_maxburst;
+ width = cfg->src_addr_width;
+ addr = cfg->src_addr;
+ } else if (direction == DMA_MEM_TO_DEV) {
+ chan->dcmd = DCMD_INCSRCADDR | DCMD_FLOWTRG;
+ maxburst = cfg->dst_maxburst;
+ width = cfg->dst_addr_width;
+ addr = cfg->dst_addr;
+ }
+
+ if (width == DMA_SLAVE_BUSWIDTH_1_BYTE)
+ chan->dcmd |= DCMD_WIDTH1;
+ else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES)
+ chan->dcmd |= DCMD_WIDTH2;
+ else if (width == DMA_SLAVE_BUSWIDTH_4_BYTES)
+ chan->dcmd |= DCMD_WIDTH4;
+
+ if (maxburst == 8)
+ chan->dcmd |= DCMD_BURST8;
+ else if (maxburst == 16)
+ chan->dcmd |= DCMD_BURST16;
+ else if (maxburst == 32)
+ chan->dcmd |= DCMD_BURST32;
+
+ chan->dir = direction;
+ chan->dev_addr = addr;
+
+ return 0;
+}
+
+static int mmp_pdma_config(struct dma_chan *dchan,
+ struct dma_slave_config *cfg)
+{
+ struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+
+ memcpy(&chan->slave_config, cfg, sizeof(*cfg));
+ return 0;
+}
+
+static int mmp_pdma_terminate_all(struct dma_chan *dchan)
+{
+ struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+ unsigned long flags;
+
+ if (!dchan)
+ return -EINVAL;
+
+ disable_chan(chan->phy);
+ mmp_pdma_free_phy(chan);
+ spin_lock_irqsave(&chan->desc_lock, flags);
+ mmp_pdma_free_desc_list(chan, &chan->chain_pending);
+ mmp_pdma_free_desc_list(chan, &chan->chain_running);
+ spin_unlock_irqrestore(&chan->desc_lock, flags);
+ chan->idle = true;
+
+ return 0;
+}
+
+static unsigned int mmp_pdma_residue(struct mmp_pdma_chan *chan,
+ dma_cookie_t cookie)
+{
+ struct mmp_pdma_desc_sw *sw;
+ u32 curr, residue = 0;
+ bool passed = false;
+ bool cyclic = chan->cyclic_first != NULL;
+
+ /*
+ * If the channel does not have a phy pointer anymore, it has already
+ * been completed. Therefore, its residue is 0.
+ */
+ if (!chan->phy)
+ return 0;
+
+ if (chan->dir == DMA_DEV_TO_MEM)
+ curr = readl(chan->phy->base + DTADR(chan->phy->idx));
+ else
+ curr = readl(chan->phy->base + DSADR(chan->phy->idx));
+
+ list_for_each_entry(sw, &chan->chain_running, node) {
+ u32 start, end, len;
+
+ if (chan->dir == DMA_DEV_TO_MEM)
+ start = sw->desc.dtadr;
+ else
+ start = sw->desc.dsadr;
+
+ len = sw->desc.dcmd & DCMD_LENGTH;
+ end = start + len;
+
+ /*
+ * 'passed' will be latched once we found the descriptor which
+ * lies inside the boundaries of the curr pointer. All
+ * descriptors that occur in the list _after_ we found that
+ * partially handled descriptor are still to be processed and
+ * are hence added to the residual bytes counter.
+ */
+
+ if (passed) {
+ residue += len;
+ } else if (curr >= start && curr <= end) {
+ residue += end - curr;
+ passed = true;
+ }
+
+ /*
+ * Descriptors that have the ENDIRQEN bit set mark the end of a
+ * transaction chain, and the cookie assigned with it has been
+ * returned previously from mmp_pdma_tx_submit().
+ *
+ * In case we have multiple transactions in the running chain,
+ * and the cookie does not match the one the user asked us
+ * about, reset the state variables and start over.
+ *
+ * This logic does not apply to cyclic transactions, where all
+ * descriptors have the ENDIRQEN bit set, and for which we
+ * can't have multiple transactions on one channel anyway.
+ */
+ if (cyclic || !(sw->desc.dcmd & DCMD_ENDIRQEN))
+ continue;
+
+ if (sw->async_tx.cookie == cookie) {
+ return residue;
+ } else {
+ residue = 0;
+ passed = false;
+ }
+ }
+
+ /* We should only get here in case of cyclic transactions */
+ return residue;
+}
+
+static enum dma_status mmp_pdma_tx_status(struct dma_chan *dchan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+ enum dma_status ret;
+
+ ret = dma_cookie_status(dchan, cookie, txstate);
+ if (likely(ret != DMA_ERROR))
+ dma_set_residue(txstate, mmp_pdma_residue(chan, cookie));
+
+ return ret;
+}
+
+/*
+ * mmp_pdma_issue_pending - Issue the DMA start command
+ * pending list ==> running list
+ */
+static void mmp_pdma_issue_pending(struct dma_chan *dchan)
+{
+ struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->desc_lock, flags);
+ start_pending_queue(chan);
+ spin_unlock_irqrestore(&chan->desc_lock, flags);
+}
+
+/*
+ * dma_do_tasklet
+ * Do call back
+ * Start pending list
+ */
+static void dma_do_tasklet(struct tasklet_struct *t)
+{
+ struct mmp_pdma_chan *chan = from_tasklet(chan, t, tasklet);
+ struct mmp_pdma_desc_sw *desc, *_desc;
+ LIST_HEAD(chain_cleanup);
+ unsigned long flags;
+ struct dmaengine_desc_callback cb;
+
+ if (chan->cyclic_first) {
+ spin_lock_irqsave(&chan->desc_lock, flags);
+ desc = chan->cyclic_first;
+ dmaengine_desc_get_callback(&desc->async_tx, &cb);
+ spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+ dmaengine_desc_callback_invoke(&cb, NULL);
+
+ return;
+ }
+
+ /* submit pending list; callback for each desc; free desc */
+ spin_lock_irqsave(&chan->desc_lock, flags);
+
+ list_for_each_entry_safe(desc, _desc, &chan->chain_running, node) {
+ /*
+ * move the descriptors to a temporary list so we can drop
+ * the lock during the entire cleanup operation
+ */
+ list_move(&desc->node, &chain_cleanup);
+
+ /*
+ * Look for the first list entry which has the ENDIRQEN flag
+ * set. That is the descriptor we got an interrupt for, so
+ * complete that transaction and its cookie.
+ */
+ if (desc->desc.dcmd & DCMD_ENDIRQEN) {
+ dma_cookie_t cookie = desc->async_tx.cookie;
+ dma_cookie_complete(&desc->async_tx);
+ dev_dbg(chan->dev, "completed_cookie=%d\n", cookie);
+ break;
+ }
+ }
+
+ /*
+ * The hardware is idle and ready for more when the
+ * chain_running list is empty.
+ */
+ chan->idle = list_empty(&chan->chain_running);
+
+ /* Start any pending transactions automatically */
+ start_pending_queue(chan);
+ spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+ /* Run the callback for each descriptor, in order */
+ list_for_each_entry_safe(desc, _desc, &chain_cleanup, node) {
+ struct dma_async_tx_descriptor *txd = &desc->async_tx;
+
+ /* Remove from the list of transactions */
+ list_del(&desc->node);
+ /* Run the link descriptor callback function */
+ dmaengine_desc_get_callback(txd, &cb);
+ dmaengine_desc_callback_invoke(&cb, NULL);
+
+ dma_pool_free(chan->desc_pool, desc, txd->phys);
+ }
+}
+
+static int mmp_pdma_remove(struct platform_device *op)
+{
+ struct mmp_pdma_device *pdev = platform_get_drvdata(op);
+ struct mmp_pdma_phy *phy;
+ int i, irq = 0, irq_num = 0;
+
+ if (op->dev.of_node)
+ of_dma_controller_free(op->dev.of_node);
+
+ for (i = 0; i < pdev->dma_channels; i++) {
+ if (platform_get_irq(op, i) > 0)
+ irq_num++;
+ }
+
+ if (irq_num != pdev->dma_channels) {
+ irq = platform_get_irq(op, 0);
+ devm_free_irq(&op->dev, irq, pdev);
+ } else {
+ for (i = 0; i < pdev->dma_channels; i++) {
+ phy = &pdev->phy[i];
+ irq = platform_get_irq(op, i);
+ devm_free_irq(&op->dev, irq, phy);
+ }
+ }
+
+ dma_async_device_unregister(&pdev->device);
+ return 0;
+}
+
+static int mmp_pdma_chan_init(struct mmp_pdma_device *pdev, int idx, int irq)
+{
+ struct mmp_pdma_phy *phy = &pdev->phy[idx];
+ struct mmp_pdma_chan *chan;
+ int ret;
+
+ chan = devm_kzalloc(pdev->dev, sizeof(*chan), GFP_KERNEL);
+ if (chan == NULL)
+ return -ENOMEM;
+
+ phy->idx = idx;
+ phy->base = pdev->base;
+
+ if (irq) {
+ ret = devm_request_irq(pdev->dev, irq, mmp_pdma_chan_handler,
+ IRQF_SHARED, "pdma", phy);
+ if (ret) {
+ dev_err(pdev->dev, "channel request irq fail!\n");
+ return ret;
+ }
+ }
+
+ spin_lock_init(&chan->desc_lock);
+ chan->dev = pdev->dev;
+ chan->chan.device = &pdev->device;
+ tasklet_setup(&chan->tasklet, dma_do_tasklet);
+ INIT_LIST_HEAD(&chan->chain_pending);
+ INIT_LIST_HEAD(&chan->chain_running);
+
+ /* register virt channel to dma engine */
+ list_add_tail(&chan->chan.device_node, &pdev->device.channels);
+
+ return 0;
+}
+
+static const struct of_device_id mmp_pdma_dt_ids[] = {
+ { .compatible = "marvell,pdma-1.0", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, mmp_pdma_dt_ids);
+
+static struct dma_chan *mmp_pdma_dma_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct mmp_pdma_device *d = ofdma->of_dma_data;
+ struct dma_chan *chan;
+
+ chan = dma_get_any_slave_channel(&d->device);
+ if (!chan)
+ return NULL;
+
+ to_mmp_pdma_chan(chan)->drcmr = dma_spec->args[0];
+
+ return chan;
+}
+
+static int mmp_pdma_probe(struct platform_device *op)
+{
+ struct mmp_pdma_device *pdev;
+ const struct of_device_id *of_id;
+ struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev);
+ struct resource *iores;
+ int i, ret, irq = 0;
+ int dma_channels = 0, irq_num = 0;
+ const enum dma_slave_buswidth widths =
+ DMA_SLAVE_BUSWIDTH_1_BYTE | DMA_SLAVE_BUSWIDTH_2_BYTES |
+ DMA_SLAVE_BUSWIDTH_4_BYTES;
+
+ pdev = devm_kzalloc(&op->dev, sizeof(*pdev), GFP_KERNEL);
+ if (!pdev)
+ return -ENOMEM;
+
+ pdev->dev = &op->dev;
+
+ spin_lock_init(&pdev->phy_lock);
+
+ iores = platform_get_resource(op, IORESOURCE_MEM, 0);
+ pdev->base = devm_ioremap_resource(pdev->dev, iores);
+ if (IS_ERR(pdev->base))
+ return PTR_ERR(pdev->base);
+
+ of_id = of_match_device(mmp_pdma_dt_ids, pdev->dev);
+ if (of_id) {
+ /* Parse new and deprecated dma-channels properties */
+ if (of_property_read_u32(pdev->dev->of_node, "dma-channels",
+ &dma_channels))
+ of_property_read_u32(pdev->dev->of_node, "#dma-channels",
+ &dma_channels);
+ } else if (pdata && pdata->dma_channels) {
+ dma_channels = pdata->dma_channels;
+ } else {
+ dma_channels = 32; /* default 32 channel */
+ }
+ pdev->dma_channels = dma_channels;
+
+ for (i = 0; i < dma_channels; i++) {
+ if (platform_get_irq_optional(op, i) > 0)
+ irq_num++;
+ }
+
+ pdev->phy = devm_kcalloc(pdev->dev, dma_channels, sizeof(*pdev->phy),
+ GFP_KERNEL);
+ if (pdev->phy == NULL)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&pdev->device.channels);
+
+ if (irq_num != dma_channels) {
+ /* all chan share one irq, demux inside */
+ irq = platform_get_irq(op, 0);
+ ret = devm_request_irq(pdev->dev, irq, mmp_pdma_int_handler,
+ IRQF_SHARED, "pdma", pdev);
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < dma_channels; i++) {
+ irq = (irq_num != dma_channels) ? 0 : platform_get_irq(op, i);
+ ret = mmp_pdma_chan_init(pdev, i, irq);
+ if (ret)
+ return ret;
+ }
+
+ dma_cap_set(DMA_SLAVE, pdev->device.cap_mask);
+ dma_cap_set(DMA_MEMCPY, pdev->device.cap_mask);
+ dma_cap_set(DMA_CYCLIC, pdev->device.cap_mask);
+ dma_cap_set(DMA_PRIVATE, pdev->device.cap_mask);
+ pdev->device.dev = &op->dev;
+ pdev->device.device_alloc_chan_resources = mmp_pdma_alloc_chan_resources;
+ pdev->device.device_free_chan_resources = mmp_pdma_free_chan_resources;
+ pdev->device.device_tx_status = mmp_pdma_tx_status;
+ pdev->device.device_prep_dma_memcpy = mmp_pdma_prep_memcpy;
+ pdev->device.device_prep_slave_sg = mmp_pdma_prep_slave_sg;
+ pdev->device.device_prep_dma_cyclic = mmp_pdma_prep_dma_cyclic;
+ pdev->device.device_issue_pending = mmp_pdma_issue_pending;
+ pdev->device.device_config = mmp_pdma_config;
+ pdev->device.device_terminate_all = mmp_pdma_terminate_all;
+ pdev->device.copy_align = DMAENGINE_ALIGN_8_BYTES;
+ pdev->device.src_addr_widths = widths;
+ pdev->device.dst_addr_widths = widths;
+ pdev->device.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+ pdev->device.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+ if (pdev->dev->coherent_dma_mask)
+ dma_set_mask(pdev->dev, pdev->dev->coherent_dma_mask);
+ else
+ dma_set_mask(pdev->dev, DMA_BIT_MASK(64));
+
+ ret = dma_async_device_register(&pdev->device);
+ if (ret) {
+ dev_err(pdev->device.dev, "unable to register\n");
+ return ret;
+ }
+
+ if (op->dev.of_node) {
+ /* Device-tree DMA controller registration */
+ ret = of_dma_controller_register(op->dev.of_node,
+ mmp_pdma_dma_xlate, pdev);
+ if (ret < 0) {
+ dev_err(&op->dev, "of_dma_controller_register failed\n");
+ dma_async_device_unregister(&pdev->device);
+ return ret;
+ }
+ }
+
+ platform_set_drvdata(op, pdev);
+ dev_info(pdev->device.dev, "initialized %d channels\n", dma_channels);
+ return 0;
+}
+
+static const struct platform_device_id mmp_pdma_id_table[] = {
+ { "mmp-pdma", },
+ { },
+};
+
+static struct platform_driver mmp_pdma_driver = {
+ .driver = {
+ .name = "mmp-pdma",
+ .of_match_table = mmp_pdma_dt_ids,
+ },
+ .id_table = mmp_pdma_id_table,
+ .probe = mmp_pdma_probe,
+ .remove = mmp_pdma_remove,
+};
+
+module_platform_driver(mmp_pdma_driver);
+
+MODULE_DESCRIPTION("MARVELL MMP Peripheral DMA Driver");
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c
new file mode 100644
index 000000000..a262e0eb4
--- /dev/null
+++ b/drivers/dma/mmp_tdma.c
@@ -0,0 +1,770 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Driver For Marvell Two-channel DMA Engine
+ *
+ * Copyright: Marvell International Ltd.
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/platform_data/dma-mmp_tdma.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+
+#include "dmaengine.h"
+
+/*
+ * Two-Channel DMA registers
+ */
+#define TDBCR 0x00 /* Byte Count */
+#define TDSAR 0x10 /* Src Addr */
+#define TDDAR 0x20 /* Dst Addr */
+#define TDNDPR 0x30 /* Next Desc */
+#define TDCR 0x40 /* Control */
+#define TDCP 0x60 /* Priority*/
+#define TDCDPR 0x70 /* Current Desc */
+#define TDIMR 0x80 /* Int Mask */
+#define TDISR 0xa0 /* Int Status */
+
+/* Two-Channel DMA Control Register */
+#define TDCR_SSZ_8_BITS (0x0 << 22) /* Sample Size */
+#define TDCR_SSZ_12_BITS (0x1 << 22)
+#define TDCR_SSZ_16_BITS (0x2 << 22)
+#define TDCR_SSZ_20_BITS (0x3 << 22)
+#define TDCR_SSZ_24_BITS (0x4 << 22)
+#define TDCR_SSZ_32_BITS (0x5 << 22)
+#define TDCR_SSZ_SHIFT (0x1 << 22)
+#define TDCR_SSZ_MASK (0x7 << 22)
+#define TDCR_SSPMOD (0x1 << 21) /* SSP MOD */
+#define TDCR_ABR (0x1 << 20) /* Channel Abort */
+#define TDCR_CDE (0x1 << 17) /* Close Desc Enable */
+#define TDCR_PACKMOD (0x1 << 16) /* Pack Mode (ADMA Only) */
+#define TDCR_CHANACT (0x1 << 14) /* Channel Active */
+#define TDCR_FETCHND (0x1 << 13) /* Fetch Next Desc */
+#define TDCR_CHANEN (0x1 << 12) /* Channel Enable */
+#define TDCR_INTMODE (0x1 << 10) /* Interrupt Mode */
+#define TDCR_CHAINMOD (0x1 << 9) /* Chain Mode */
+#define TDCR_BURSTSZ_MSK (0x7 << 6) /* Burst Size */
+#define TDCR_BURSTSZ_4B (0x0 << 6)
+#define TDCR_BURSTSZ_8B (0x1 << 6)
+#define TDCR_BURSTSZ_16B (0x3 << 6)
+#define TDCR_BURSTSZ_32B (0x6 << 6)
+#define TDCR_BURSTSZ_64B (0x7 << 6)
+#define TDCR_BURSTSZ_SQU_1B (0x5 << 6)
+#define TDCR_BURSTSZ_SQU_2B (0x6 << 6)
+#define TDCR_BURSTSZ_SQU_4B (0x0 << 6)
+#define TDCR_BURSTSZ_SQU_8B (0x1 << 6)
+#define TDCR_BURSTSZ_SQU_16B (0x3 << 6)
+#define TDCR_BURSTSZ_SQU_32B (0x7 << 6)
+#define TDCR_BURSTSZ_128B (0x5 << 6)
+#define TDCR_DSTDIR_MSK (0x3 << 4) /* Dst Direction */
+#define TDCR_DSTDIR_ADDR_HOLD (0x2 << 4) /* Dst Addr Hold */
+#define TDCR_DSTDIR_ADDR_INC (0x0 << 4) /* Dst Addr Increment */
+#define TDCR_SRCDIR_MSK (0x3 << 2) /* Src Direction */
+#define TDCR_SRCDIR_ADDR_HOLD (0x2 << 2) /* Src Addr Hold */
+#define TDCR_SRCDIR_ADDR_INC (0x0 << 2) /* Src Addr Increment */
+#define TDCR_DSTDESCCONT (0x1 << 1)
+#define TDCR_SRCDESTCONT (0x1 << 0)
+
+/* Two-Channel DMA Int Mask Register */
+#define TDIMR_COMP (0x1 << 0)
+
+/* Two-Channel DMA Int Status Register */
+#define TDISR_COMP (0x1 << 0)
+
+/*
+ * Two-Channel DMA Descriptor Struct
+ * NOTE: desc's buf must be aligned to 16 bytes.
+ */
+struct mmp_tdma_desc {
+ u32 byte_cnt;
+ u32 src_addr;
+ u32 dst_addr;
+ u32 nxt_desc;
+};
+
+enum mmp_tdma_type {
+ MMP_AUD_TDMA = 0,
+ PXA910_SQU,
+};
+
+#define TDMA_MAX_XFER_BYTES SZ_64K
+
+struct mmp_tdma_chan {
+ struct device *dev;
+ struct dma_chan chan;
+ struct dma_async_tx_descriptor desc;
+ struct tasklet_struct tasklet;
+
+ struct mmp_tdma_desc *desc_arr;
+ dma_addr_t desc_arr_phys;
+ int desc_num;
+ enum dma_transfer_direction dir;
+ dma_addr_t dev_addr;
+ u32 burst_sz;
+ enum dma_slave_buswidth buswidth;
+ enum dma_status status;
+ struct dma_slave_config slave_config;
+
+ int idx;
+ enum mmp_tdma_type type;
+ int irq;
+ void __iomem *reg_base;
+
+ size_t buf_len;
+ size_t period_len;
+ size_t pos;
+
+ struct gen_pool *pool;
+};
+
+#define TDMA_CHANNEL_NUM 2
+struct mmp_tdma_device {
+ struct device *dev;
+ void __iomem *base;
+ struct dma_device device;
+ struct mmp_tdma_chan *tdmac[TDMA_CHANNEL_NUM];
+};
+
+#define to_mmp_tdma_chan(dchan) container_of(dchan, struct mmp_tdma_chan, chan)
+
+static int mmp_tdma_config_write(struct dma_chan *chan,
+ enum dma_transfer_direction dir,
+ struct dma_slave_config *dmaengine_cfg);
+
+static void mmp_tdma_chan_set_desc(struct mmp_tdma_chan *tdmac, dma_addr_t phys)
+{
+ writel(phys, tdmac->reg_base + TDNDPR);
+ writel(readl(tdmac->reg_base + TDCR) | TDCR_FETCHND,
+ tdmac->reg_base + TDCR);
+}
+
+static void mmp_tdma_enable_irq(struct mmp_tdma_chan *tdmac, bool enable)
+{
+ if (enable)
+ writel(TDIMR_COMP, tdmac->reg_base + TDIMR);
+ else
+ writel(0, tdmac->reg_base + TDIMR);
+}
+
+static void mmp_tdma_enable_chan(struct mmp_tdma_chan *tdmac)
+{
+ /* enable dma chan */
+ writel(readl(tdmac->reg_base + TDCR) | TDCR_CHANEN,
+ tdmac->reg_base + TDCR);
+ tdmac->status = DMA_IN_PROGRESS;
+}
+
+static int mmp_tdma_disable_chan(struct dma_chan *chan)
+{
+ struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+ u32 tdcr;
+
+ tdcr = readl(tdmac->reg_base + TDCR);
+ tdcr |= TDCR_ABR;
+ tdcr &= ~TDCR_CHANEN;
+ writel(tdcr, tdmac->reg_base + TDCR);
+
+ tdmac->status = DMA_COMPLETE;
+
+ return 0;
+}
+
+static int mmp_tdma_resume_chan(struct dma_chan *chan)
+{
+ struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+ writel(readl(tdmac->reg_base + TDCR) | TDCR_CHANEN,
+ tdmac->reg_base + TDCR);
+ tdmac->status = DMA_IN_PROGRESS;
+
+ return 0;
+}
+
+static int mmp_tdma_pause_chan(struct dma_chan *chan)
+{
+ struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+ writel(readl(tdmac->reg_base + TDCR) & ~TDCR_CHANEN,
+ tdmac->reg_base + TDCR);
+ tdmac->status = DMA_PAUSED;
+
+ return 0;
+}
+
+static int mmp_tdma_config_chan(struct dma_chan *chan)
+{
+ struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+ unsigned int tdcr = 0;
+
+ mmp_tdma_disable_chan(chan);
+
+ if (tdmac->dir == DMA_MEM_TO_DEV)
+ tdcr = TDCR_DSTDIR_ADDR_HOLD | TDCR_SRCDIR_ADDR_INC;
+ else if (tdmac->dir == DMA_DEV_TO_MEM)
+ tdcr = TDCR_SRCDIR_ADDR_HOLD | TDCR_DSTDIR_ADDR_INC;
+
+ if (tdmac->type == MMP_AUD_TDMA) {
+ tdcr |= TDCR_PACKMOD;
+
+ switch (tdmac->burst_sz) {
+ case 4:
+ tdcr |= TDCR_BURSTSZ_4B;
+ break;
+ case 8:
+ tdcr |= TDCR_BURSTSZ_8B;
+ break;
+ case 16:
+ tdcr |= TDCR_BURSTSZ_16B;
+ break;
+ case 32:
+ tdcr |= TDCR_BURSTSZ_32B;
+ break;
+ case 64:
+ tdcr |= TDCR_BURSTSZ_64B;
+ break;
+ case 128:
+ tdcr |= TDCR_BURSTSZ_128B;
+ break;
+ default:
+ dev_err(tdmac->dev, "unknown burst size.\n");
+ return -EINVAL;
+ }
+
+ switch (tdmac->buswidth) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ tdcr |= TDCR_SSZ_8_BITS;
+ break;
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ tdcr |= TDCR_SSZ_16_BITS;
+ break;
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ tdcr |= TDCR_SSZ_32_BITS;
+ break;
+ default:
+ dev_err(tdmac->dev, "unknown bus size.\n");
+ return -EINVAL;
+ }
+ } else if (tdmac->type == PXA910_SQU) {
+ tdcr |= TDCR_SSPMOD;
+
+ switch (tdmac->burst_sz) {
+ case 1:
+ tdcr |= TDCR_BURSTSZ_SQU_1B;
+ break;
+ case 2:
+ tdcr |= TDCR_BURSTSZ_SQU_2B;
+ break;
+ case 4:
+ tdcr |= TDCR_BURSTSZ_SQU_4B;
+ break;
+ case 8:
+ tdcr |= TDCR_BURSTSZ_SQU_8B;
+ break;
+ case 16:
+ tdcr |= TDCR_BURSTSZ_SQU_16B;
+ break;
+ case 32:
+ tdcr |= TDCR_BURSTSZ_SQU_32B;
+ break;
+ default:
+ dev_err(tdmac->dev, "unknown burst size.\n");
+ return -EINVAL;
+ }
+ }
+
+ writel(tdcr, tdmac->reg_base + TDCR);
+ return 0;
+}
+
+static int mmp_tdma_clear_chan_irq(struct mmp_tdma_chan *tdmac)
+{
+ u32 reg = readl(tdmac->reg_base + TDISR);
+
+ if (reg & TDISR_COMP) {
+ /* clear irq */
+ reg &= ~TDISR_COMP;
+ writel(reg, tdmac->reg_base + TDISR);
+
+ return 0;
+ }
+ return -EAGAIN;
+}
+
+static size_t mmp_tdma_get_pos(struct mmp_tdma_chan *tdmac)
+{
+ size_t reg;
+
+ if (tdmac->idx == 0) {
+ reg = __raw_readl(tdmac->reg_base + TDSAR);
+ reg -= tdmac->desc_arr[0].src_addr;
+ } else if (tdmac->idx == 1) {
+ reg = __raw_readl(tdmac->reg_base + TDDAR);
+ reg -= tdmac->desc_arr[0].dst_addr;
+ } else
+ return -EINVAL;
+
+ return reg;
+}
+
+static irqreturn_t mmp_tdma_chan_handler(int irq, void *dev_id)
+{
+ struct mmp_tdma_chan *tdmac = dev_id;
+
+ if (mmp_tdma_clear_chan_irq(tdmac) == 0) {
+ tasklet_schedule(&tdmac->tasklet);
+ return IRQ_HANDLED;
+ } else
+ return IRQ_NONE;
+}
+
+static irqreturn_t mmp_tdma_int_handler(int irq, void *dev_id)
+{
+ struct mmp_tdma_device *tdev = dev_id;
+ int i, ret;
+ int irq_num = 0;
+
+ for (i = 0; i < TDMA_CHANNEL_NUM; i++) {
+ struct mmp_tdma_chan *tdmac = tdev->tdmac[i];
+
+ ret = mmp_tdma_chan_handler(irq, tdmac);
+ if (ret == IRQ_HANDLED)
+ irq_num++;
+ }
+
+ if (irq_num)
+ return IRQ_HANDLED;
+ else
+ return IRQ_NONE;
+}
+
+static void dma_do_tasklet(struct tasklet_struct *t)
+{
+ struct mmp_tdma_chan *tdmac = from_tasklet(tdmac, t, tasklet);
+
+ dmaengine_desc_get_callback_invoke(&tdmac->desc, NULL);
+}
+
+static void mmp_tdma_free_descriptor(struct mmp_tdma_chan *tdmac)
+{
+ struct gen_pool *gpool;
+ int size = tdmac->desc_num * sizeof(struct mmp_tdma_desc);
+
+ gpool = tdmac->pool;
+ if (gpool && tdmac->desc_arr)
+ gen_pool_free(gpool, (unsigned long)tdmac->desc_arr,
+ size);
+ tdmac->desc_arr = NULL;
+ if (tdmac->status == DMA_ERROR)
+ tdmac->status = DMA_COMPLETE;
+
+ return;
+}
+
+static dma_cookie_t mmp_tdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(tx->chan);
+
+ mmp_tdma_chan_set_desc(tdmac, tdmac->desc_arr_phys);
+
+ return 0;
+}
+
+static int mmp_tdma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+ int ret;
+
+ dma_async_tx_descriptor_init(&tdmac->desc, chan);
+ tdmac->desc.tx_submit = mmp_tdma_tx_submit;
+
+ if (tdmac->irq) {
+ ret = devm_request_irq(tdmac->dev, tdmac->irq,
+ mmp_tdma_chan_handler, 0, "tdma", tdmac);
+ if (ret)
+ return ret;
+ }
+ return 1;
+}
+
+static void mmp_tdma_free_chan_resources(struct dma_chan *chan)
+{
+ struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+ if (tdmac->irq)
+ devm_free_irq(tdmac->dev, tdmac->irq, tdmac);
+ mmp_tdma_free_descriptor(tdmac);
+ return;
+}
+
+static struct mmp_tdma_desc *mmp_tdma_alloc_descriptor(struct mmp_tdma_chan *tdmac)
+{
+ struct gen_pool *gpool;
+ int size = tdmac->desc_num * sizeof(struct mmp_tdma_desc);
+
+ gpool = tdmac->pool;
+ if (!gpool)
+ return NULL;
+
+ tdmac->desc_arr = gen_pool_dma_alloc(gpool, size, &tdmac->desc_arr_phys);
+
+ return tdmac->desc_arr;
+}
+
+static struct dma_async_tx_descriptor *mmp_tdma_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+ struct mmp_tdma_desc *desc;
+ int num_periods = buf_len / period_len;
+ int i = 0, buf = 0;
+
+ if (!is_slave_direction(direction)) {
+ dev_err(tdmac->dev, "unsupported transfer direction\n");
+ return NULL;
+ }
+
+ if (tdmac->status != DMA_COMPLETE) {
+ dev_err(tdmac->dev, "controller busy");
+ return NULL;
+ }
+
+ if (period_len > TDMA_MAX_XFER_BYTES) {
+ dev_err(tdmac->dev,
+ "maximum period size exceeded: %zu > %d\n",
+ period_len, TDMA_MAX_XFER_BYTES);
+ goto err_out;
+ }
+
+ tdmac->status = DMA_IN_PROGRESS;
+ tdmac->desc_num = num_periods;
+ desc = mmp_tdma_alloc_descriptor(tdmac);
+ if (!desc)
+ goto err_out;
+
+ if (mmp_tdma_config_write(chan, direction, &tdmac->slave_config))
+ goto err_out;
+
+ while (buf < buf_len) {
+ desc = &tdmac->desc_arr[i];
+
+ if (i + 1 == num_periods)
+ desc->nxt_desc = tdmac->desc_arr_phys;
+ else
+ desc->nxt_desc = tdmac->desc_arr_phys +
+ sizeof(*desc) * (i + 1);
+
+ if (direction == DMA_MEM_TO_DEV) {
+ desc->src_addr = dma_addr;
+ desc->dst_addr = tdmac->dev_addr;
+ } else {
+ desc->src_addr = tdmac->dev_addr;
+ desc->dst_addr = dma_addr;
+ }
+ desc->byte_cnt = period_len;
+ dma_addr += period_len;
+ buf += period_len;
+ i++;
+ }
+
+ /* enable interrupt */
+ if (flags & DMA_PREP_INTERRUPT)
+ mmp_tdma_enable_irq(tdmac, true);
+
+ tdmac->buf_len = buf_len;
+ tdmac->period_len = period_len;
+ tdmac->pos = 0;
+
+ return &tdmac->desc;
+
+err_out:
+ tdmac->status = DMA_ERROR;
+ return NULL;
+}
+
+static int mmp_tdma_terminate_all(struct dma_chan *chan)
+{
+ struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+ mmp_tdma_disable_chan(chan);
+ /* disable interrupt */
+ mmp_tdma_enable_irq(tdmac, false);
+
+ return 0;
+}
+
+static int mmp_tdma_config(struct dma_chan *chan,
+ struct dma_slave_config *dmaengine_cfg)
+{
+ struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+ memcpy(&tdmac->slave_config, dmaengine_cfg, sizeof(*dmaengine_cfg));
+
+ return 0;
+}
+
+static int mmp_tdma_config_write(struct dma_chan *chan,
+ enum dma_transfer_direction dir,
+ struct dma_slave_config *dmaengine_cfg)
+{
+ struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+ if (dir == DMA_DEV_TO_MEM) {
+ tdmac->dev_addr = dmaengine_cfg->src_addr;
+ tdmac->burst_sz = dmaengine_cfg->src_maxburst;
+ tdmac->buswidth = dmaengine_cfg->src_addr_width;
+ } else {
+ tdmac->dev_addr = dmaengine_cfg->dst_addr;
+ tdmac->burst_sz = dmaengine_cfg->dst_maxburst;
+ tdmac->buswidth = dmaengine_cfg->dst_addr_width;
+ }
+ tdmac->dir = dir;
+
+ return mmp_tdma_config_chan(chan);
+}
+
+static enum dma_status mmp_tdma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+ struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+ tdmac->pos = mmp_tdma_get_pos(tdmac);
+ dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
+ tdmac->buf_len - tdmac->pos);
+
+ return tdmac->status;
+}
+
+static void mmp_tdma_issue_pending(struct dma_chan *chan)
+{
+ struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+ mmp_tdma_enable_chan(tdmac);
+}
+
+static int mmp_tdma_remove(struct platform_device *pdev)
+{
+ if (pdev->dev.of_node)
+ of_dma_controller_free(pdev->dev.of_node);
+
+ return 0;
+}
+
+static int mmp_tdma_chan_init(struct mmp_tdma_device *tdev,
+ int idx, int irq,
+ int type, struct gen_pool *pool)
+{
+ struct mmp_tdma_chan *tdmac;
+
+ if (idx >= TDMA_CHANNEL_NUM) {
+ dev_err(tdev->dev, "too many channels for device!\n");
+ return -EINVAL;
+ }
+
+ /* alloc channel */
+ tdmac = devm_kzalloc(tdev->dev, sizeof(*tdmac), GFP_KERNEL);
+ if (!tdmac)
+ return -ENOMEM;
+
+ if (irq)
+ tdmac->irq = irq;
+ tdmac->dev = tdev->dev;
+ tdmac->chan.device = &tdev->device;
+ tdmac->idx = idx;
+ tdmac->type = type;
+ tdmac->reg_base = tdev->base + idx * 4;
+ tdmac->pool = pool;
+ tdmac->status = DMA_COMPLETE;
+ tdev->tdmac[tdmac->idx] = tdmac;
+ tasklet_setup(&tdmac->tasklet, dma_do_tasklet);
+
+ /* add the channel to tdma_chan list */
+ list_add_tail(&tdmac->chan.device_node,
+ &tdev->device.channels);
+ return 0;
+}
+
+struct mmp_tdma_filter_param {
+ unsigned int chan_id;
+};
+
+static bool mmp_tdma_filter_fn(struct dma_chan *chan, void *fn_param)
+{
+ struct mmp_tdma_filter_param *param = fn_param;
+
+ if (chan->chan_id != param->chan_id)
+ return false;
+
+ return true;
+}
+
+static struct dma_chan *mmp_tdma_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct mmp_tdma_device *tdev = ofdma->of_dma_data;
+ dma_cap_mask_t mask = tdev->device.cap_mask;
+ struct mmp_tdma_filter_param param;
+
+ if (dma_spec->args_count != 1)
+ return NULL;
+
+ param.chan_id = dma_spec->args[0];
+
+ if (param.chan_id >= TDMA_CHANNEL_NUM)
+ return NULL;
+
+ return __dma_request_channel(&mask, mmp_tdma_filter_fn, &param,
+ ofdma->of_node);
+}
+
+static const struct of_device_id mmp_tdma_dt_ids[] = {
+ { .compatible = "marvell,adma-1.0", .data = (void *)MMP_AUD_TDMA},
+ { .compatible = "marvell,pxa910-squ", .data = (void *)PXA910_SQU},
+ {}
+};
+MODULE_DEVICE_TABLE(of, mmp_tdma_dt_ids);
+
+static int mmp_tdma_probe(struct platform_device *pdev)
+{
+ enum mmp_tdma_type type;
+ const struct of_device_id *of_id;
+ struct mmp_tdma_device *tdev;
+ struct resource *iores;
+ int i, ret;
+ int irq = 0, irq_num = 0;
+ int chan_num = TDMA_CHANNEL_NUM;
+ struct gen_pool *pool = NULL;
+
+ of_id = of_match_device(mmp_tdma_dt_ids, &pdev->dev);
+ if (of_id)
+ type = (enum mmp_tdma_type) of_id->data;
+ else
+ type = platform_get_device_id(pdev)->driver_data;
+
+ /* always have couple channels */
+ tdev = devm_kzalloc(&pdev->dev, sizeof(*tdev), GFP_KERNEL);
+ if (!tdev)
+ return -ENOMEM;
+
+ tdev->dev = &pdev->dev;
+
+ for (i = 0; i < chan_num; i++) {
+ if (platform_get_irq(pdev, i) > 0)
+ irq_num++;
+ }
+
+ iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ tdev->base = devm_ioremap_resource(&pdev->dev, iores);
+ if (IS_ERR(tdev->base))
+ return PTR_ERR(tdev->base);
+
+ INIT_LIST_HEAD(&tdev->device.channels);
+
+ if (pdev->dev.of_node)
+ pool = of_gen_pool_get(pdev->dev.of_node, "asram", 0);
+ else
+ pool = sram_get_gpool("asram");
+ if (!pool) {
+ dev_err(&pdev->dev, "asram pool not available\n");
+ return -ENOMEM;
+ }
+
+ if (irq_num != chan_num) {
+ irq = platform_get_irq(pdev, 0);
+ ret = devm_request_irq(&pdev->dev, irq,
+ mmp_tdma_int_handler, IRQF_SHARED, "tdma", tdev);
+ if (ret)
+ return ret;
+ }
+
+ /* initialize channel parameters */
+ for (i = 0; i < chan_num; i++) {
+ irq = (irq_num != chan_num) ? 0 : platform_get_irq(pdev, i);
+ ret = mmp_tdma_chan_init(tdev, i, irq, type, pool);
+ if (ret)
+ return ret;
+ }
+
+ dma_cap_set(DMA_SLAVE, tdev->device.cap_mask);
+ dma_cap_set(DMA_CYCLIC, tdev->device.cap_mask);
+ tdev->device.dev = &pdev->dev;
+ tdev->device.device_alloc_chan_resources =
+ mmp_tdma_alloc_chan_resources;
+ tdev->device.device_free_chan_resources =
+ mmp_tdma_free_chan_resources;
+ tdev->device.device_prep_dma_cyclic = mmp_tdma_prep_dma_cyclic;
+ tdev->device.device_tx_status = mmp_tdma_tx_status;
+ tdev->device.device_issue_pending = mmp_tdma_issue_pending;
+ tdev->device.device_config = mmp_tdma_config;
+ tdev->device.device_pause = mmp_tdma_pause_chan;
+ tdev->device.device_resume = mmp_tdma_resume_chan;
+ tdev->device.device_terminate_all = mmp_tdma_terminate_all;
+ tdev->device.copy_align = DMAENGINE_ALIGN_8_BYTES;
+
+ tdev->device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ if (type == MMP_AUD_TDMA) {
+ tdev->device.max_burst = SZ_128;
+ tdev->device.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ tdev->device.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ } else if (type == PXA910_SQU) {
+ tdev->device.max_burst = SZ_32;
+ }
+ tdev->device.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ tdev->device.descriptor_reuse = true;
+
+ dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
+ platform_set_drvdata(pdev, tdev);
+
+ ret = dmaenginem_async_device_register(&tdev->device);
+ if (ret) {
+ dev_err(tdev->device.dev, "unable to register\n");
+ return ret;
+ }
+
+ if (pdev->dev.of_node) {
+ ret = of_dma_controller_register(pdev->dev.of_node,
+ mmp_tdma_xlate, tdev);
+ if (ret) {
+ dev_err(tdev->device.dev,
+ "failed to register controller\n");
+ return ret;
+ }
+ }
+
+ dev_info(tdev->device.dev, "initialized\n");
+ return 0;
+}
+
+static const struct platform_device_id mmp_tdma_id_table[] = {
+ { "mmp-adma", MMP_AUD_TDMA },
+ { "pxa910-squ", PXA910_SQU },
+ { },
+};
+
+static struct platform_driver mmp_tdma_driver = {
+ .driver = {
+ .name = "mmp-tdma",
+ .of_match_table = mmp_tdma_dt_ids,
+ },
+ .id_table = mmp_tdma_id_table,
+ .probe = mmp_tdma_probe,
+ .remove = mmp_tdma_remove,
+};
+
+module_platform_driver(mmp_tdma_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MMP Two-Channel DMA Driver");
+MODULE_ALIAS("platform:mmp-tdma");
+MODULE_AUTHOR("Leo Yan <leoy@marvell.com>");
+MODULE_AUTHOR("Zhangfei Gao <zhangfei.gao@marvell.com>");
diff --git a/drivers/dma/moxart-dma.c b/drivers/dma/moxart-dma.c
new file mode 100644
index 000000000..7459382a8
--- /dev/null
+++ b/drivers/dma/moxart-dma.c
@@ -0,0 +1,678 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * MOXA ART SoCs DMA Engine support.
+ *
+ * Copyright (C) 2013 Jonas Jensen
+ *
+ * Jonas Jensen <jonas.jensen@gmail.com>
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_dma.h>
+#include <linux/bitops.h>
+
+#include <asm/cacheflush.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define APB_DMA_MAX_CHANNEL 4
+
+#define REG_OFF_ADDRESS_SOURCE 0
+#define REG_OFF_ADDRESS_DEST 4
+#define REG_OFF_CYCLES 8
+#define REG_OFF_CTRL 12
+#define REG_OFF_CHAN_SIZE 16
+
+#define APB_DMA_ENABLE BIT(0)
+#define APB_DMA_FIN_INT_STS BIT(1)
+#define APB_DMA_FIN_INT_EN BIT(2)
+#define APB_DMA_BURST_MODE BIT(3)
+#define APB_DMA_ERR_INT_STS BIT(4)
+#define APB_DMA_ERR_INT_EN BIT(5)
+
+/*
+ * Unset: APB
+ * Set: AHB
+ */
+#define APB_DMA_SOURCE_SELECT 0x40
+#define APB_DMA_DEST_SELECT 0x80
+
+#define APB_DMA_SOURCE 0x100
+#define APB_DMA_DEST 0x1000
+
+#define APB_DMA_SOURCE_MASK 0x700
+#define APB_DMA_DEST_MASK 0x7000
+
+/*
+ * 000: No increment
+ * 001: +1 (Burst=0), +4 (Burst=1)
+ * 010: +2 (Burst=0), +8 (Burst=1)
+ * 011: +4 (Burst=0), +16 (Burst=1)
+ * 101: -1 (Burst=0), -4 (Burst=1)
+ * 110: -2 (Burst=0), -8 (Burst=1)
+ * 111: -4 (Burst=0), -16 (Burst=1)
+ */
+#define APB_DMA_SOURCE_INC_0 0
+#define APB_DMA_SOURCE_INC_1_4 0x100
+#define APB_DMA_SOURCE_INC_2_8 0x200
+#define APB_DMA_SOURCE_INC_4_16 0x300
+#define APB_DMA_SOURCE_DEC_1_4 0x500
+#define APB_DMA_SOURCE_DEC_2_8 0x600
+#define APB_DMA_SOURCE_DEC_4_16 0x700
+#define APB_DMA_DEST_INC_0 0
+#define APB_DMA_DEST_INC_1_4 0x1000
+#define APB_DMA_DEST_INC_2_8 0x2000
+#define APB_DMA_DEST_INC_4_16 0x3000
+#define APB_DMA_DEST_DEC_1_4 0x5000
+#define APB_DMA_DEST_DEC_2_8 0x6000
+#define APB_DMA_DEST_DEC_4_16 0x7000
+
+/*
+ * Request signal select source/destination address for DMA hardware handshake.
+ *
+ * The request line number is a property of the DMA controller itself,
+ * e.g. MMC must always request channels where dma_slave_config->slave_id is 5.
+ *
+ * 0: No request / Grant signal
+ * 1-15: Request / Grant signal
+ */
+#define APB_DMA_SOURCE_REQ_NO 0x1000000
+#define APB_DMA_SOURCE_REQ_NO_MASK 0xf000000
+#define APB_DMA_DEST_REQ_NO 0x10000
+#define APB_DMA_DEST_REQ_NO_MASK 0xf0000
+
+#define APB_DMA_DATA_WIDTH 0x100000
+#define APB_DMA_DATA_WIDTH_MASK 0x300000
+/*
+ * Data width of transfer:
+ *
+ * 00: Word
+ * 01: Half
+ * 10: Byte
+ */
+#define APB_DMA_DATA_WIDTH_4 0
+#define APB_DMA_DATA_WIDTH_2 0x100000
+#define APB_DMA_DATA_WIDTH_1 0x200000
+
+#define APB_DMA_CYCLES_MASK 0x00ffffff
+
+#define MOXART_DMA_DATA_TYPE_S8 0x00
+#define MOXART_DMA_DATA_TYPE_S16 0x01
+#define MOXART_DMA_DATA_TYPE_S32 0x02
+
+struct moxart_sg {
+ dma_addr_t addr;
+ uint32_t len;
+};
+
+struct moxart_desc {
+ enum dma_transfer_direction dma_dir;
+ dma_addr_t dev_addr;
+ unsigned int sglen;
+ unsigned int dma_cycles;
+ struct virt_dma_desc vd;
+ uint8_t es;
+ struct moxart_sg sg[];
+};
+
+struct moxart_chan {
+ struct virt_dma_chan vc;
+
+ void __iomem *base;
+ struct moxart_desc *desc;
+
+ struct dma_slave_config cfg;
+
+ bool allocated;
+ bool error;
+ int ch_num;
+ unsigned int line_reqno;
+ unsigned int sgidx;
+};
+
+struct moxart_dmadev {
+ struct dma_device dma_slave;
+ struct moxart_chan slave_chans[APB_DMA_MAX_CHANNEL];
+ unsigned int irq;
+};
+
+struct moxart_filter_data {
+ struct moxart_dmadev *mdc;
+ struct of_phandle_args *dma_spec;
+};
+
+static const unsigned int es_bytes[] = {
+ [MOXART_DMA_DATA_TYPE_S8] = 1,
+ [MOXART_DMA_DATA_TYPE_S16] = 2,
+ [MOXART_DMA_DATA_TYPE_S32] = 4,
+};
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+ return &chan->dev->device;
+}
+
+static inline struct moxart_chan *to_moxart_dma_chan(struct dma_chan *c)
+{
+ return container_of(c, struct moxart_chan, vc.chan);
+}
+
+static inline struct moxart_desc *to_moxart_dma_desc(
+ struct dma_async_tx_descriptor *t)
+{
+ return container_of(t, struct moxart_desc, vd.tx);
+}
+
+static void moxart_dma_desc_free(struct virt_dma_desc *vd)
+{
+ kfree(container_of(vd, struct moxart_desc, vd));
+}
+
+static int moxart_terminate_all(struct dma_chan *chan)
+{
+ struct moxart_chan *ch = to_moxart_dma_chan(chan);
+ unsigned long flags;
+ LIST_HEAD(head);
+ u32 ctrl;
+
+ dev_dbg(chan2dev(chan), "%s: ch=%p\n", __func__, ch);
+
+ spin_lock_irqsave(&ch->vc.lock, flags);
+
+ if (ch->desc) {
+ moxart_dma_desc_free(&ch->desc->vd);
+ ch->desc = NULL;
+ }
+
+ ctrl = readl(ch->base + REG_OFF_CTRL);
+ ctrl &= ~(APB_DMA_ENABLE | APB_DMA_FIN_INT_EN | APB_DMA_ERR_INT_EN);
+ writel(ctrl, ch->base + REG_OFF_CTRL);
+
+ vchan_get_all_descriptors(&ch->vc, &head);
+ spin_unlock_irqrestore(&ch->vc.lock, flags);
+ vchan_dma_desc_free_list(&ch->vc, &head);
+
+ return 0;
+}
+
+static int moxart_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *cfg)
+{
+ struct moxart_chan *ch = to_moxart_dma_chan(chan);
+ u32 ctrl;
+
+ ch->cfg = *cfg;
+
+ ctrl = readl(ch->base + REG_OFF_CTRL);
+ ctrl |= APB_DMA_BURST_MODE;
+ ctrl &= ~(APB_DMA_DEST_MASK | APB_DMA_SOURCE_MASK);
+ ctrl &= ~(APB_DMA_DEST_REQ_NO_MASK | APB_DMA_SOURCE_REQ_NO_MASK);
+
+ switch (ch->cfg.src_addr_width) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ ctrl |= APB_DMA_DATA_WIDTH_1;
+ if (ch->cfg.direction != DMA_MEM_TO_DEV)
+ ctrl |= APB_DMA_DEST_INC_1_4;
+ else
+ ctrl |= APB_DMA_SOURCE_INC_1_4;
+ break;
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ ctrl |= APB_DMA_DATA_WIDTH_2;
+ if (ch->cfg.direction != DMA_MEM_TO_DEV)
+ ctrl |= APB_DMA_DEST_INC_2_8;
+ else
+ ctrl |= APB_DMA_SOURCE_INC_2_8;
+ break;
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ ctrl &= ~APB_DMA_DATA_WIDTH;
+ if (ch->cfg.direction != DMA_MEM_TO_DEV)
+ ctrl |= APB_DMA_DEST_INC_4_16;
+ else
+ ctrl |= APB_DMA_SOURCE_INC_4_16;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (ch->cfg.direction == DMA_MEM_TO_DEV) {
+ ctrl &= ~APB_DMA_DEST_SELECT;
+ ctrl |= APB_DMA_SOURCE_SELECT;
+ ctrl |= (ch->line_reqno << 16 &
+ APB_DMA_DEST_REQ_NO_MASK);
+ } else {
+ ctrl |= APB_DMA_DEST_SELECT;
+ ctrl &= ~APB_DMA_SOURCE_SELECT;
+ ctrl |= (ch->line_reqno << 24 &
+ APB_DMA_SOURCE_REQ_NO_MASK);
+ }
+
+ writel(ctrl, ch->base + REG_OFF_CTRL);
+
+ return 0;
+}
+
+static struct dma_async_tx_descriptor *moxart_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction dir,
+ unsigned long tx_flags, void *context)
+{
+ struct moxart_chan *ch = to_moxart_dma_chan(chan);
+ struct moxart_desc *d;
+ enum dma_slave_buswidth dev_width;
+ dma_addr_t dev_addr;
+ struct scatterlist *sgent;
+ unsigned int es;
+ unsigned int i;
+
+ if (!is_slave_direction(dir)) {
+ dev_err(chan2dev(chan), "%s: invalid DMA direction\n",
+ __func__);
+ return NULL;
+ }
+
+ if (dir == DMA_DEV_TO_MEM) {
+ dev_addr = ch->cfg.src_addr;
+ dev_width = ch->cfg.src_addr_width;
+ } else {
+ dev_addr = ch->cfg.dst_addr;
+ dev_width = ch->cfg.dst_addr_width;
+ }
+
+ switch (dev_width) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ es = MOXART_DMA_DATA_TYPE_S8;
+ break;
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ es = MOXART_DMA_DATA_TYPE_S16;
+ break;
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ es = MOXART_DMA_DATA_TYPE_S32;
+ break;
+ default:
+ dev_err(chan2dev(chan), "%s: unsupported data width (%u)\n",
+ __func__, dev_width);
+ return NULL;
+ }
+
+ d = kzalloc(struct_size(d, sg, sg_len), GFP_ATOMIC);
+ if (!d)
+ return NULL;
+
+ d->dma_dir = dir;
+ d->dev_addr = dev_addr;
+ d->es = es;
+
+ for_each_sg(sgl, sgent, sg_len, i) {
+ d->sg[i].addr = sg_dma_address(sgent);
+ d->sg[i].len = sg_dma_len(sgent);
+ }
+
+ d->sglen = sg_len;
+
+ ch->error = 0;
+
+ return vchan_tx_prep(&ch->vc, &d->vd, tx_flags);
+}
+
+static struct dma_chan *moxart_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct moxart_dmadev *mdc = ofdma->of_dma_data;
+ struct dma_chan *chan;
+ struct moxart_chan *ch;
+
+ chan = dma_get_any_slave_channel(&mdc->dma_slave);
+ if (!chan)
+ return NULL;
+
+ ch = to_moxart_dma_chan(chan);
+ ch->line_reqno = dma_spec->args[0];
+
+ return chan;
+}
+
+static int moxart_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct moxart_chan *ch = to_moxart_dma_chan(chan);
+
+ dev_dbg(chan2dev(chan), "%s: allocating channel #%u\n",
+ __func__, ch->ch_num);
+ ch->allocated = 1;
+
+ return 0;
+}
+
+static void moxart_free_chan_resources(struct dma_chan *chan)
+{
+ struct moxart_chan *ch = to_moxart_dma_chan(chan);
+
+ vchan_free_chan_resources(&ch->vc);
+
+ dev_dbg(chan2dev(chan), "%s: freeing channel #%u\n",
+ __func__, ch->ch_num);
+ ch->allocated = 0;
+}
+
+static void moxart_dma_set_params(struct moxart_chan *ch, dma_addr_t src_addr,
+ dma_addr_t dst_addr)
+{
+ writel(src_addr, ch->base + REG_OFF_ADDRESS_SOURCE);
+ writel(dst_addr, ch->base + REG_OFF_ADDRESS_DEST);
+}
+
+static void moxart_set_transfer_params(struct moxart_chan *ch, unsigned int len)
+{
+ struct moxart_desc *d = ch->desc;
+ unsigned int sglen_div = es_bytes[d->es];
+
+ d->dma_cycles = len >> sglen_div;
+
+ /*
+ * There are 4 cycles on 64 bytes copied, i.e. one cycle copies 16
+ * bytes ( when width is APB_DMAB_DATA_WIDTH_4 ).
+ */
+ writel(d->dma_cycles, ch->base + REG_OFF_CYCLES);
+
+ dev_dbg(chan2dev(&ch->vc.chan), "%s: set %u DMA cycles (len=%u)\n",
+ __func__, d->dma_cycles, len);
+}
+
+static void moxart_start_dma(struct moxart_chan *ch)
+{
+ u32 ctrl;
+
+ ctrl = readl(ch->base + REG_OFF_CTRL);
+ ctrl |= (APB_DMA_ENABLE | APB_DMA_FIN_INT_EN | APB_DMA_ERR_INT_EN);
+ writel(ctrl, ch->base + REG_OFF_CTRL);
+}
+
+static void moxart_dma_start_sg(struct moxart_chan *ch, unsigned int idx)
+{
+ struct moxart_desc *d = ch->desc;
+ struct moxart_sg *sg = ch->desc->sg + idx;
+
+ if (ch->desc->dma_dir == DMA_MEM_TO_DEV)
+ moxart_dma_set_params(ch, sg->addr, d->dev_addr);
+ else if (ch->desc->dma_dir == DMA_DEV_TO_MEM)
+ moxart_dma_set_params(ch, d->dev_addr, sg->addr);
+
+ moxart_set_transfer_params(ch, sg->len);
+
+ moxart_start_dma(ch);
+}
+
+static void moxart_dma_start_desc(struct dma_chan *chan)
+{
+ struct moxart_chan *ch = to_moxart_dma_chan(chan);
+ struct virt_dma_desc *vd;
+
+ vd = vchan_next_desc(&ch->vc);
+
+ if (!vd) {
+ ch->desc = NULL;
+ return;
+ }
+
+ list_del(&vd->node);
+
+ ch->desc = to_moxart_dma_desc(&vd->tx);
+ ch->sgidx = 0;
+
+ moxart_dma_start_sg(ch, 0);
+}
+
+static void moxart_issue_pending(struct dma_chan *chan)
+{
+ struct moxart_chan *ch = to_moxart_dma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ch->vc.lock, flags);
+ if (vchan_issue_pending(&ch->vc) && !ch->desc)
+ moxart_dma_start_desc(chan);
+ spin_unlock_irqrestore(&ch->vc.lock, flags);
+}
+
+static size_t moxart_dma_desc_size(struct moxart_desc *d,
+ unsigned int completed_sgs)
+{
+ unsigned int i;
+ size_t size;
+
+ for (size = i = completed_sgs; i < d->sglen; i++)
+ size += d->sg[i].len;
+
+ return size;
+}
+
+static size_t moxart_dma_desc_size_in_flight(struct moxart_chan *ch)
+{
+ size_t size;
+ unsigned int completed_cycles, cycles;
+
+ size = moxart_dma_desc_size(ch->desc, ch->sgidx);
+ cycles = readl(ch->base + REG_OFF_CYCLES);
+ completed_cycles = (ch->desc->dma_cycles - cycles);
+ size -= completed_cycles << es_bytes[ch->desc->es];
+
+ dev_dbg(chan2dev(&ch->vc.chan), "%s: size=%zu\n", __func__, size);
+
+ return size;
+}
+
+static enum dma_status moxart_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct moxart_chan *ch = to_moxart_dma_chan(chan);
+ struct virt_dma_desc *vd;
+ struct moxart_desc *d;
+ enum dma_status ret;
+ unsigned long flags;
+
+ /*
+ * dma_cookie_status() assigns initial residue value.
+ */
+ ret = dma_cookie_status(chan, cookie, txstate);
+
+ spin_lock_irqsave(&ch->vc.lock, flags);
+ vd = vchan_find_desc(&ch->vc, cookie);
+ if (vd) {
+ d = to_moxart_dma_desc(&vd->tx);
+ txstate->residue = moxart_dma_desc_size(d, 0);
+ } else if (ch->desc && ch->desc->vd.tx.cookie == cookie) {
+ txstate->residue = moxart_dma_desc_size_in_flight(ch);
+ }
+ spin_unlock_irqrestore(&ch->vc.lock, flags);
+
+ if (ch->error)
+ return DMA_ERROR;
+
+ return ret;
+}
+
+static void moxart_dma_init(struct dma_device *dma, struct device *dev)
+{
+ dma->device_prep_slave_sg = moxart_prep_slave_sg;
+ dma->device_alloc_chan_resources = moxart_alloc_chan_resources;
+ dma->device_free_chan_resources = moxart_free_chan_resources;
+ dma->device_issue_pending = moxart_issue_pending;
+ dma->device_tx_status = moxart_tx_status;
+ dma->device_config = moxart_slave_config;
+ dma->device_terminate_all = moxart_terminate_all;
+ dma->dev = dev;
+
+ INIT_LIST_HEAD(&dma->channels);
+}
+
+static irqreturn_t moxart_dma_interrupt(int irq, void *devid)
+{
+ struct moxart_dmadev *mc = devid;
+ struct moxart_chan *ch = &mc->slave_chans[0];
+ unsigned int i;
+ u32 ctrl;
+
+ dev_dbg(chan2dev(&ch->vc.chan), "%s\n", __func__);
+
+ for (i = 0; i < APB_DMA_MAX_CHANNEL; i++, ch++) {
+ if (!ch->allocated)
+ continue;
+
+ ctrl = readl(ch->base + REG_OFF_CTRL);
+
+ dev_dbg(chan2dev(&ch->vc.chan), "%s: ch=%p ch->base=%p ctrl=%x\n",
+ __func__, ch, ch->base, ctrl);
+
+ if (ctrl & APB_DMA_FIN_INT_STS) {
+ ctrl &= ~APB_DMA_FIN_INT_STS;
+ if (ch->desc) {
+ spin_lock(&ch->vc.lock);
+ if (++ch->sgidx < ch->desc->sglen) {
+ moxart_dma_start_sg(ch, ch->sgidx);
+ } else {
+ vchan_cookie_complete(&ch->desc->vd);
+ moxart_dma_start_desc(&ch->vc.chan);
+ }
+ spin_unlock(&ch->vc.lock);
+ }
+ }
+
+ if (ctrl & APB_DMA_ERR_INT_STS) {
+ ctrl &= ~APB_DMA_ERR_INT_STS;
+ ch->error = 1;
+ }
+
+ writel(ctrl, ch->base + REG_OFF_CTRL);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int moxart_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device_node *node = dev->of_node;
+ struct resource *res;
+ void __iomem *dma_base_addr;
+ int ret, i;
+ unsigned int irq;
+ struct moxart_chan *ch;
+ struct moxart_dmadev *mdc;
+
+ mdc = devm_kzalloc(dev, sizeof(*mdc), GFP_KERNEL);
+ if (!mdc)
+ return -ENOMEM;
+
+ irq = irq_of_parse_and_map(node, 0);
+ if (!irq) {
+ dev_err(dev, "no IRQ resource\n");
+ return -EINVAL;
+ }
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ dma_base_addr = devm_ioremap_resource(dev, res);
+ if (IS_ERR(dma_base_addr))
+ return PTR_ERR(dma_base_addr);
+
+ dma_cap_zero(mdc->dma_slave.cap_mask);
+ dma_cap_set(DMA_SLAVE, mdc->dma_slave.cap_mask);
+ dma_cap_set(DMA_PRIVATE, mdc->dma_slave.cap_mask);
+
+ moxart_dma_init(&mdc->dma_slave, dev);
+
+ ch = &mdc->slave_chans[0];
+ for (i = 0; i < APB_DMA_MAX_CHANNEL; i++, ch++) {
+ ch->ch_num = i;
+ ch->base = dma_base_addr + i * REG_OFF_CHAN_SIZE;
+ ch->allocated = 0;
+
+ ch->vc.desc_free = moxart_dma_desc_free;
+ vchan_init(&ch->vc, &mdc->dma_slave);
+
+ dev_dbg(dev, "%s: chs[%d]: ch->ch_num=%u ch->base=%p\n",
+ __func__, i, ch->ch_num, ch->base);
+ }
+
+ platform_set_drvdata(pdev, mdc);
+
+ ret = devm_request_irq(dev, irq, moxart_dma_interrupt, 0,
+ "moxart-dma-engine", mdc);
+ if (ret) {
+ dev_err(dev, "devm_request_irq failed\n");
+ return ret;
+ }
+ mdc->irq = irq;
+
+ ret = dma_async_device_register(&mdc->dma_slave);
+ if (ret) {
+ dev_err(dev, "dma_async_device_register failed\n");
+ return ret;
+ }
+
+ ret = of_dma_controller_register(node, moxart_of_xlate, mdc);
+ if (ret) {
+ dev_err(dev, "of_dma_controller_register failed\n");
+ dma_async_device_unregister(&mdc->dma_slave);
+ return ret;
+ }
+
+ dev_dbg(dev, "%s: IRQ=%u\n", __func__, irq);
+
+ return 0;
+}
+
+static int moxart_remove(struct platform_device *pdev)
+{
+ struct moxart_dmadev *m = platform_get_drvdata(pdev);
+
+ devm_free_irq(&pdev->dev, m->irq, m);
+
+ dma_async_device_unregister(&m->dma_slave);
+
+ if (pdev->dev.of_node)
+ of_dma_controller_free(pdev->dev.of_node);
+
+ return 0;
+}
+
+static const struct of_device_id moxart_dma_match[] = {
+ { .compatible = "moxa,moxart-dma" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, moxart_dma_match);
+
+static struct platform_driver moxart_driver = {
+ .probe = moxart_probe,
+ .remove = moxart_remove,
+ .driver = {
+ .name = "moxart-dma-engine",
+ .of_match_table = moxart_dma_match,
+ },
+};
+
+static int moxart_init(void)
+{
+ return platform_driver_register(&moxart_driver);
+}
+subsys_initcall(moxart_init);
+
+static void __exit moxart_exit(void)
+{
+ platform_driver_unregister(&moxart_driver);
+}
+module_exit(moxart_exit);
+
+MODULE_AUTHOR("Jonas Jensen <jonas.jensen@gmail.com>");
+MODULE_DESCRIPTION("MOXART DMA engine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
new file mode 100644
index 000000000..4a51fdbf5
--- /dev/null
+++ b/drivers/dma/mpc512x_dma.c
@@ -0,0 +1,1125 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) Freescale Semicondutor, Inc. 2007, 2008.
+ * Copyright (C) Semihalf 2009
+ * Copyright (C) Ilya Yanok, Emcraft Systems 2010
+ * Copyright (C) Alexander Popov, Promcontroller 2014
+ * Copyright (C) Mario Six, Guntermann & Drunck GmbH, 2016
+ *
+ * Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description
+ * (defines, structures and comments) was taken from MPC5121 DMA driver
+ * written by Hongjun Chen <hong-jun.chen@freescale.com>.
+ *
+ * Approved as OSADL project by a majority of OSADL members and funded
+ * by OSADL membership fees in 2009; for details see www.osadl.org.
+ */
+
+/*
+ * MPC512x and MPC8308 DMA driver. It supports memory to memory data transfers
+ * (tested using dmatest module) and data transfers between memory and
+ * peripheral I/O memory by means of slave scatter/gather with these
+ * limitations:
+ * - chunked transfers (described by s/g lists with more than one item) are
+ * refused as long as proper support for scatter/gather is missing
+ * - transfers on MPC8308 always start from software as this SoC does not have
+ * external request lines for peripheral flow control
+ * - memory <-> I/O memory transfer chunks of sizes of 1, 2, 4, 16 (for
+ * MPC512x), and 32 bytes are supported, and, consequently, source
+ * addresses and destination addresses must be aligned accordingly;
+ * furthermore, for MPC512x SoCs, the transfer size must be aligned on
+ * (chunk size * maxburst)
+ */
+
+#include <linux/module.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+
+#include <linux/random.h>
+
+#include "dmaengine.h"
+
+/* Number of DMA Transfer descriptors allocated per channel */
+#define MPC_DMA_DESCRIPTORS 64
+
+/* Macro definitions */
+#define MPC_DMA_TCD_OFFSET 0x1000
+
+/*
+ * Maximum channel counts for individual hardware variants
+ * and the maximum channel count over all supported controllers,
+ * used for data structure size
+ */
+#define MPC8308_DMACHAN_MAX 16
+#define MPC512x_DMACHAN_MAX 64
+#define MPC_DMA_CHANNELS 64
+
+/* Arbitration mode of group and channel */
+#define MPC_DMA_DMACR_EDCG (1 << 31)
+#define MPC_DMA_DMACR_ERGA (1 << 3)
+#define MPC_DMA_DMACR_ERCA (1 << 2)
+
+/* Error codes */
+#define MPC_DMA_DMAES_VLD (1 << 31)
+#define MPC_DMA_DMAES_GPE (1 << 15)
+#define MPC_DMA_DMAES_CPE (1 << 14)
+#define MPC_DMA_DMAES_ERRCHN(err) \
+ (((err) >> 8) & 0x3f)
+#define MPC_DMA_DMAES_SAE (1 << 7)
+#define MPC_DMA_DMAES_SOE (1 << 6)
+#define MPC_DMA_DMAES_DAE (1 << 5)
+#define MPC_DMA_DMAES_DOE (1 << 4)
+#define MPC_DMA_DMAES_NCE (1 << 3)
+#define MPC_DMA_DMAES_SGE (1 << 2)
+#define MPC_DMA_DMAES_SBE (1 << 1)
+#define MPC_DMA_DMAES_DBE (1 << 0)
+
+#define MPC_DMA_DMAGPOR_SNOOP_ENABLE (1 << 6)
+
+#define MPC_DMA_TSIZE_1 0x00
+#define MPC_DMA_TSIZE_2 0x01
+#define MPC_DMA_TSIZE_4 0x02
+#define MPC_DMA_TSIZE_16 0x04
+#define MPC_DMA_TSIZE_32 0x05
+
+/* MPC5121 DMA engine registers */
+struct __attribute__ ((__packed__)) mpc_dma_regs {
+ /* 0x00 */
+ u32 dmacr; /* DMA control register */
+ u32 dmaes; /* DMA error status */
+ /* 0x08 */
+ u32 dmaerqh; /* DMA enable request high(channels 63~32) */
+ u32 dmaerql; /* DMA enable request low(channels 31~0) */
+ u32 dmaeeih; /* DMA enable error interrupt high(ch63~32) */
+ u32 dmaeeil; /* DMA enable error interrupt low(ch31~0) */
+ /* 0x18 */
+ u8 dmaserq; /* DMA set enable request */
+ u8 dmacerq; /* DMA clear enable request */
+ u8 dmaseei; /* DMA set enable error interrupt */
+ u8 dmaceei; /* DMA clear enable error interrupt */
+ /* 0x1c */
+ u8 dmacint; /* DMA clear interrupt request */
+ u8 dmacerr; /* DMA clear error */
+ u8 dmassrt; /* DMA set start bit */
+ u8 dmacdne; /* DMA clear DONE status bit */
+ /* 0x20 */
+ u32 dmainth; /* DMA interrupt request high(ch63~32) */
+ u32 dmaintl; /* DMA interrupt request low(ch31~0) */
+ u32 dmaerrh; /* DMA error high(ch63~32) */
+ u32 dmaerrl; /* DMA error low(ch31~0) */
+ /* 0x30 */
+ u32 dmahrsh; /* DMA hw request status high(ch63~32) */
+ u32 dmahrsl; /* DMA hardware request status low(ch31~0) */
+ union {
+ u32 dmaihsa; /* DMA interrupt high select AXE(ch63~32) */
+ u32 dmagpor; /* (General purpose register on MPC8308) */
+ };
+ u32 dmailsa; /* DMA interrupt low select AXE(ch31~0) */
+ /* 0x40 ~ 0xff */
+ u32 reserve0[48]; /* Reserved */
+ /* 0x100 */
+ u8 dchpri[MPC_DMA_CHANNELS];
+ /* DMA channels(0~63) priority */
+};
+
+struct __attribute__ ((__packed__)) mpc_dma_tcd {
+ /* 0x00 */
+ u32 saddr; /* Source address */
+
+ u32 smod:5; /* Source address modulo */
+ u32 ssize:3; /* Source data transfer size */
+ u32 dmod:5; /* Destination address modulo */
+ u32 dsize:3; /* Destination data transfer size */
+ u32 soff:16; /* Signed source address offset */
+
+ /* 0x08 */
+ u32 nbytes; /* Inner "minor" byte count */
+ u32 slast; /* Last source address adjustment */
+ u32 daddr; /* Destination address */
+
+ /* 0x14 */
+ u32 citer_elink:1; /* Enable channel-to-channel linking on
+ * minor loop complete
+ */
+ u32 citer_linkch:6; /* Link channel for minor loop complete */
+ u32 citer:9; /* Current "major" iteration count */
+ u32 doff:16; /* Signed destination address offset */
+
+ /* 0x18 */
+ u32 dlast_sga; /* Last Destination address adjustment/scatter
+ * gather address
+ */
+
+ /* 0x1c */
+ u32 biter_elink:1; /* Enable channel-to-channel linking on major
+ * loop complete
+ */
+ u32 biter_linkch:6;
+ u32 biter:9; /* Beginning "major" iteration count */
+ u32 bwc:2; /* Bandwidth control */
+ u32 major_linkch:6; /* Link channel number */
+ u32 done:1; /* Channel done */
+ u32 active:1; /* Channel active */
+ u32 major_elink:1; /* Enable channel-to-channel linking on major
+ * loop complete
+ */
+ u32 e_sg:1; /* Enable scatter/gather processing */
+ u32 d_req:1; /* Disable request */
+ u32 int_half:1; /* Enable an interrupt when major counter is
+ * half complete
+ */
+ u32 int_maj:1; /* Enable an interrupt when major iteration
+ * count completes
+ */
+ u32 start:1; /* Channel start */
+};
+
+struct mpc_dma_desc {
+ struct dma_async_tx_descriptor desc;
+ struct mpc_dma_tcd *tcd;
+ dma_addr_t tcd_paddr;
+ int error;
+ struct list_head node;
+ int will_access_peripheral;
+};
+
+struct mpc_dma_chan {
+ struct dma_chan chan;
+ struct list_head free;
+ struct list_head prepared;
+ struct list_head queued;
+ struct list_head active;
+ struct list_head completed;
+ struct mpc_dma_tcd *tcd;
+ dma_addr_t tcd_paddr;
+
+ /* Settings for access to peripheral FIFO */
+ dma_addr_t src_per_paddr;
+ u32 src_tcd_nunits;
+ u8 swidth;
+ dma_addr_t dst_per_paddr;
+ u32 dst_tcd_nunits;
+ u8 dwidth;
+
+ /* Lock for this structure */
+ spinlock_t lock;
+};
+
+struct mpc_dma {
+ struct dma_device dma;
+ struct tasklet_struct tasklet;
+ struct mpc_dma_chan channels[MPC_DMA_CHANNELS];
+ struct mpc_dma_regs __iomem *regs;
+ struct mpc_dma_tcd __iomem *tcd;
+ int irq;
+ int irq2;
+ uint error_status;
+ int is_mpc8308;
+
+ /* Lock for error_status field in this structure */
+ spinlock_t error_status_lock;
+};
+
+#define DRV_NAME "mpc512x_dma"
+
+/* Convert struct dma_chan to struct mpc_dma_chan */
+static inline struct mpc_dma_chan *dma_chan_to_mpc_dma_chan(struct dma_chan *c)
+{
+ return container_of(c, struct mpc_dma_chan, chan);
+}
+
+/* Convert struct dma_chan to struct mpc_dma */
+static inline struct mpc_dma *dma_chan_to_mpc_dma(struct dma_chan *c)
+{
+ struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(c);
+
+ return container_of(mchan, struct mpc_dma, channels[c->chan_id]);
+}
+
+/*
+ * Execute all queued DMA descriptors.
+ *
+ * Following requirements must be met while calling mpc_dma_execute():
+ * a) mchan->lock is acquired,
+ * b) mchan->active list is empty,
+ * c) mchan->queued list contains at least one entry.
+ */
+static void mpc_dma_execute(struct mpc_dma_chan *mchan)
+{
+ struct mpc_dma *mdma = dma_chan_to_mpc_dma(&mchan->chan);
+ struct mpc_dma_desc *first = NULL;
+ struct mpc_dma_desc *prev = NULL;
+ struct mpc_dma_desc *mdesc;
+ int cid = mchan->chan.chan_id;
+
+ while (!list_empty(&mchan->queued)) {
+ mdesc = list_first_entry(&mchan->queued,
+ struct mpc_dma_desc, node);
+ /*
+ * Grab either several mem-to-mem transfer descriptors
+ * or one peripheral transfer descriptor,
+ * don't mix mem-to-mem and peripheral transfer descriptors
+ * within the same 'active' list.
+ */
+ if (mdesc->will_access_peripheral) {
+ if (list_empty(&mchan->active))
+ list_move_tail(&mdesc->node, &mchan->active);
+ break;
+ } else {
+ list_move_tail(&mdesc->node, &mchan->active);
+ }
+ }
+
+ /* Chain descriptors into one transaction */
+ list_for_each_entry(mdesc, &mchan->active, node) {
+ if (!first)
+ first = mdesc;
+
+ if (!prev) {
+ prev = mdesc;
+ continue;
+ }
+
+ prev->tcd->dlast_sga = mdesc->tcd_paddr;
+ prev->tcd->e_sg = 1;
+ mdesc->tcd->start = 1;
+
+ prev = mdesc;
+ }
+
+ prev->tcd->int_maj = 1;
+
+ /* Send first descriptor in chain into hardware */
+ memcpy_toio(&mdma->tcd[cid], first->tcd, sizeof(struct mpc_dma_tcd));
+
+ if (first != prev)
+ mdma->tcd[cid].e_sg = 1;
+
+ if (mdma->is_mpc8308) {
+ /* MPC8308, no request lines, software initiated start */
+ out_8(&mdma->regs->dmassrt, cid);
+ } else if (first->will_access_peripheral) {
+ /* Peripherals involved, start by external request signal */
+ out_8(&mdma->regs->dmaserq, cid);
+ } else {
+ /* Memory to memory transfer, software initiated start */
+ out_8(&mdma->regs->dmassrt, cid);
+ }
+}
+
+/* Handle interrupt on one half of DMA controller (32 channels) */
+static void mpc_dma_irq_process(struct mpc_dma *mdma, u32 is, u32 es, int off)
+{
+ struct mpc_dma_chan *mchan;
+ struct mpc_dma_desc *mdesc;
+ u32 status = is | es;
+ int ch;
+
+ while ((ch = fls(status) - 1) >= 0) {
+ status &= ~(1 << ch);
+ mchan = &mdma->channels[ch + off];
+
+ spin_lock(&mchan->lock);
+
+ out_8(&mdma->regs->dmacint, ch + off);
+ out_8(&mdma->regs->dmacerr, ch + off);
+
+ /* Check error status */
+ if (es & (1 << ch))
+ list_for_each_entry(mdesc, &mchan->active, node)
+ mdesc->error = -EIO;
+
+ /* Execute queued descriptors */
+ list_splice_tail_init(&mchan->active, &mchan->completed);
+ if (!list_empty(&mchan->queued))
+ mpc_dma_execute(mchan);
+
+ spin_unlock(&mchan->lock);
+ }
+}
+
+/* Interrupt handler */
+static irqreturn_t mpc_dma_irq(int irq, void *data)
+{
+ struct mpc_dma *mdma = data;
+ uint es;
+
+ /* Save error status register */
+ es = in_be32(&mdma->regs->dmaes);
+ spin_lock(&mdma->error_status_lock);
+ if ((es & MPC_DMA_DMAES_VLD) && mdma->error_status == 0)
+ mdma->error_status = es;
+ spin_unlock(&mdma->error_status_lock);
+
+ /* Handle interrupt on each channel */
+ if (mdma->dma.chancnt > 32) {
+ mpc_dma_irq_process(mdma, in_be32(&mdma->regs->dmainth),
+ in_be32(&mdma->regs->dmaerrh), 32);
+ }
+ mpc_dma_irq_process(mdma, in_be32(&mdma->regs->dmaintl),
+ in_be32(&mdma->regs->dmaerrl), 0);
+
+ /* Schedule tasklet */
+ tasklet_schedule(&mdma->tasklet);
+
+ return IRQ_HANDLED;
+}
+
+/* process completed descriptors */
+static void mpc_dma_process_completed(struct mpc_dma *mdma)
+{
+ dma_cookie_t last_cookie = 0;
+ struct mpc_dma_chan *mchan;
+ struct mpc_dma_desc *mdesc;
+ struct dma_async_tx_descriptor *desc;
+ unsigned long flags;
+ LIST_HEAD(list);
+ int i;
+
+ for (i = 0; i < mdma->dma.chancnt; i++) {
+ mchan = &mdma->channels[i];
+
+ /* Get all completed descriptors */
+ spin_lock_irqsave(&mchan->lock, flags);
+ if (!list_empty(&mchan->completed))
+ list_splice_tail_init(&mchan->completed, &list);
+ spin_unlock_irqrestore(&mchan->lock, flags);
+
+ if (list_empty(&list))
+ continue;
+
+ /* Execute callbacks and run dependencies */
+ list_for_each_entry(mdesc, &list, node) {
+ desc = &mdesc->desc;
+
+ dmaengine_desc_get_callback_invoke(desc, NULL);
+
+ last_cookie = desc->cookie;
+ dma_run_dependencies(desc);
+ }
+
+ /* Free descriptors */
+ spin_lock_irqsave(&mchan->lock, flags);
+ list_splice_tail_init(&list, &mchan->free);
+ mchan->chan.completed_cookie = last_cookie;
+ spin_unlock_irqrestore(&mchan->lock, flags);
+ }
+}
+
+/* DMA Tasklet */
+static void mpc_dma_tasklet(struct tasklet_struct *t)
+{
+ struct mpc_dma *mdma = from_tasklet(mdma, t, tasklet);
+ unsigned long flags;
+ uint es;
+
+ spin_lock_irqsave(&mdma->error_status_lock, flags);
+ es = mdma->error_status;
+ mdma->error_status = 0;
+ spin_unlock_irqrestore(&mdma->error_status_lock, flags);
+
+ /* Print nice error report */
+ if (es) {
+ dev_err(mdma->dma.dev,
+ "Hardware reported following error(s) on channel %u:\n",
+ MPC_DMA_DMAES_ERRCHN(es));
+
+ if (es & MPC_DMA_DMAES_GPE)
+ dev_err(mdma->dma.dev, "- Group Priority Error\n");
+ if (es & MPC_DMA_DMAES_CPE)
+ dev_err(mdma->dma.dev, "- Channel Priority Error\n");
+ if (es & MPC_DMA_DMAES_SAE)
+ dev_err(mdma->dma.dev, "- Source Address Error\n");
+ if (es & MPC_DMA_DMAES_SOE)
+ dev_err(mdma->dma.dev, "- Source Offset Configuration Error\n");
+ if (es & MPC_DMA_DMAES_DAE)
+ dev_err(mdma->dma.dev, "- Destination Address Error\n");
+ if (es & MPC_DMA_DMAES_DOE)
+ dev_err(mdma->dma.dev, "- Destination Offset Configuration Error\n");
+ if (es & MPC_DMA_DMAES_NCE)
+ dev_err(mdma->dma.dev, "- NBytes/Citter Configuration Error\n");
+ if (es & MPC_DMA_DMAES_SGE)
+ dev_err(mdma->dma.dev, "- Scatter/Gather Configuration Error\n");
+ if (es & MPC_DMA_DMAES_SBE)
+ dev_err(mdma->dma.dev, "- Source Bus Error\n");
+ if (es & MPC_DMA_DMAES_DBE)
+ dev_err(mdma->dma.dev, "- Destination Bus Error\n");
+ }
+
+ mpc_dma_process_completed(mdma);
+}
+
+/* Submit descriptor to hardware */
+static dma_cookie_t mpc_dma_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+ struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(txd->chan);
+ struct mpc_dma_desc *mdesc;
+ unsigned long flags;
+ dma_cookie_t cookie;
+
+ mdesc = container_of(txd, struct mpc_dma_desc, desc);
+
+ spin_lock_irqsave(&mchan->lock, flags);
+
+ /* Move descriptor to queue */
+ list_move_tail(&mdesc->node, &mchan->queued);
+
+ /* If channel is idle, execute all queued descriptors */
+ if (list_empty(&mchan->active))
+ mpc_dma_execute(mchan);
+
+ /* Update cookie */
+ cookie = dma_cookie_assign(txd);
+ spin_unlock_irqrestore(&mchan->lock, flags);
+
+ return cookie;
+}
+
+/* Alloc channel resources */
+static int mpc_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+ struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+ struct mpc_dma_desc *mdesc;
+ struct mpc_dma_tcd *tcd;
+ dma_addr_t tcd_paddr;
+ unsigned long flags;
+ LIST_HEAD(descs);
+ int i;
+
+ /* Alloc DMA memory for Transfer Control Descriptors */
+ tcd = dma_alloc_coherent(mdma->dma.dev,
+ MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd),
+ &tcd_paddr, GFP_KERNEL);
+ if (!tcd)
+ return -ENOMEM;
+
+ /* Alloc descriptors for this channel */
+ for (i = 0; i < MPC_DMA_DESCRIPTORS; i++) {
+ mdesc = kzalloc(sizeof(struct mpc_dma_desc), GFP_KERNEL);
+ if (!mdesc) {
+ dev_notice(mdma->dma.dev,
+ "Memory allocation error. Allocated only %u descriptors\n", i);
+ break;
+ }
+
+ dma_async_tx_descriptor_init(&mdesc->desc, chan);
+ mdesc->desc.flags = DMA_CTRL_ACK;
+ mdesc->desc.tx_submit = mpc_dma_tx_submit;
+
+ mdesc->tcd = &tcd[i];
+ mdesc->tcd_paddr = tcd_paddr + (i * sizeof(struct mpc_dma_tcd));
+
+ list_add_tail(&mdesc->node, &descs);
+ }
+
+ /* Return error only if no descriptors were allocated */
+ if (i == 0) {
+ dma_free_coherent(mdma->dma.dev,
+ MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd),
+ tcd, tcd_paddr);
+ return -ENOMEM;
+ }
+
+ spin_lock_irqsave(&mchan->lock, flags);
+ mchan->tcd = tcd;
+ mchan->tcd_paddr = tcd_paddr;
+ list_splice_tail_init(&descs, &mchan->free);
+ spin_unlock_irqrestore(&mchan->lock, flags);
+
+ /* Enable Error Interrupt */
+ out_8(&mdma->regs->dmaseei, chan->chan_id);
+
+ return 0;
+}
+
+/* Free channel resources */
+static void mpc_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+ struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+ struct mpc_dma_desc *mdesc, *tmp;
+ struct mpc_dma_tcd *tcd;
+ dma_addr_t tcd_paddr;
+ unsigned long flags;
+ LIST_HEAD(descs);
+
+ spin_lock_irqsave(&mchan->lock, flags);
+
+ /* Channel must be idle */
+ BUG_ON(!list_empty(&mchan->prepared));
+ BUG_ON(!list_empty(&mchan->queued));
+ BUG_ON(!list_empty(&mchan->active));
+ BUG_ON(!list_empty(&mchan->completed));
+
+ /* Move data */
+ list_splice_tail_init(&mchan->free, &descs);
+ tcd = mchan->tcd;
+ tcd_paddr = mchan->tcd_paddr;
+
+ spin_unlock_irqrestore(&mchan->lock, flags);
+
+ /* Free DMA memory used by descriptors */
+ dma_free_coherent(mdma->dma.dev,
+ MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd),
+ tcd, tcd_paddr);
+
+ /* Free descriptors */
+ list_for_each_entry_safe(mdesc, tmp, &descs, node)
+ kfree(mdesc);
+
+ /* Disable Error Interrupt */
+ out_8(&mdma->regs->dmaceei, chan->chan_id);
+}
+
+/* Send all pending descriptor to hardware */
+static void mpc_dma_issue_pending(struct dma_chan *chan)
+{
+ /*
+ * We are posting descriptors to the hardware as soon as
+ * they are ready, so this function does nothing.
+ */
+}
+
+/* Check request completion status */
+static enum dma_status
+mpc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ return dma_cookie_status(chan, cookie, txstate);
+}
+
+/* Prepare descriptor for memory to memory copy */
+static struct dma_async_tx_descriptor *
+mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+ struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+ struct mpc_dma_desc *mdesc = NULL;
+ struct mpc_dma_tcd *tcd;
+ unsigned long iflags;
+
+ /* Get free descriptor */
+ spin_lock_irqsave(&mchan->lock, iflags);
+ if (!list_empty(&mchan->free)) {
+ mdesc = list_first_entry(&mchan->free, struct mpc_dma_desc,
+ node);
+ list_del(&mdesc->node);
+ }
+ spin_unlock_irqrestore(&mchan->lock, iflags);
+
+ if (!mdesc) {
+ /* try to free completed descriptors */
+ mpc_dma_process_completed(mdma);
+ return NULL;
+ }
+
+ mdesc->error = 0;
+ mdesc->will_access_peripheral = 0;
+ tcd = mdesc->tcd;
+
+ /* Prepare Transfer Control Descriptor for this transaction */
+ memset(tcd, 0, sizeof(struct mpc_dma_tcd));
+
+ if (IS_ALIGNED(src | dst | len, 32)) {
+ tcd->ssize = MPC_DMA_TSIZE_32;
+ tcd->dsize = MPC_DMA_TSIZE_32;
+ tcd->soff = 32;
+ tcd->doff = 32;
+ } else if (!mdma->is_mpc8308 && IS_ALIGNED(src | dst | len, 16)) {
+ /* MPC8308 doesn't support 16 byte transfers */
+ tcd->ssize = MPC_DMA_TSIZE_16;
+ tcd->dsize = MPC_DMA_TSIZE_16;
+ tcd->soff = 16;
+ tcd->doff = 16;
+ } else if (IS_ALIGNED(src | dst | len, 4)) {
+ tcd->ssize = MPC_DMA_TSIZE_4;
+ tcd->dsize = MPC_DMA_TSIZE_4;
+ tcd->soff = 4;
+ tcd->doff = 4;
+ } else if (IS_ALIGNED(src | dst | len, 2)) {
+ tcd->ssize = MPC_DMA_TSIZE_2;
+ tcd->dsize = MPC_DMA_TSIZE_2;
+ tcd->soff = 2;
+ tcd->doff = 2;
+ } else {
+ tcd->ssize = MPC_DMA_TSIZE_1;
+ tcd->dsize = MPC_DMA_TSIZE_1;
+ tcd->soff = 1;
+ tcd->doff = 1;
+ }
+
+ tcd->saddr = src;
+ tcd->daddr = dst;
+ tcd->nbytes = len;
+ tcd->biter = 1;
+ tcd->citer = 1;
+
+ /* Place descriptor in prepared list */
+ spin_lock_irqsave(&mchan->lock, iflags);
+ list_add_tail(&mdesc->node, &mchan->prepared);
+ spin_unlock_irqrestore(&mchan->lock, iflags);
+
+ return &mdesc->desc;
+}
+
+inline u8 buswidth_to_dmatsize(u8 buswidth)
+{
+ u8 res;
+
+ for (res = 0; buswidth > 1; buswidth /= 2)
+ res++;
+ return res;
+}
+
+static struct dma_async_tx_descriptor *
+mpc_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+ struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+ struct mpc_dma_desc *mdesc = NULL;
+ dma_addr_t per_paddr;
+ u32 tcd_nunits;
+ struct mpc_dma_tcd *tcd;
+ unsigned long iflags;
+ struct scatterlist *sg;
+ size_t len;
+ int iter, i;
+
+ /* Currently there is no proper support for scatter/gather */
+ if (sg_len != 1)
+ return NULL;
+
+ if (!is_slave_direction(direction))
+ return NULL;
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ spin_lock_irqsave(&mchan->lock, iflags);
+
+ mdesc = list_first_entry(&mchan->free,
+ struct mpc_dma_desc, node);
+ if (!mdesc) {
+ spin_unlock_irqrestore(&mchan->lock, iflags);
+ /* Try to free completed descriptors */
+ mpc_dma_process_completed(mdma);
+ return NULL;
+ }
+
+ list_del(&mdesc->node);
+
+ if (direction == DMA_DEV_TO_MEM) {
+ per_paddr = mchan->src_per_paddr;
+ tcd_nunits = mchan->src_tcd_nunits;
+ } else {
+ per_paddr = mchan->dst_per_paddr;
+ tcd_nunits = mchan->dst_tcd_nunits;
+ }
+
+ spin_unlock_irqrestore(&mchan->lock, iflags);
+
+ if (per_paddr == 0 || tcd_nunits == 0)
+ goto err_prep;
+
+ mdesc->error = 0;
+ mdesc->will_access_peripheral = 1;
+
+ /* Prepare Transfer Control Descriptor for this transaction */
+ tcd = mdesc->tcd;
+
+ memset(tcd, 0, sizeof(struct mpc_dma_tcd));
+
+ if (direction == DMA_DEV_TO_MEM) {
+ tcd->saddr = per_paddr;
+ tcd->daddr = sg_dma_address(sg);
+
+ if (!IS_ALIGNED(sg_dma_address(sg), mchan->dwidth))
+ goto err_prep;
+
+ tcd->soff = 0;
+ tcd->doff = mchan->dwidth;
+ } else {
+ tcd->saddr = sg_dma_address(sg);
+ tcd->daddr = per_paddr;
+
+ if (!IS_ALIGNED(sg_dma_address(sg), mchan->swidth))
+ goto err_prep;
+
+ tcd->soff = mchan->swidth;
+ tcd->doff = 0;
+ }
+
+ tcd->ssize = buswidth_to_dmatsize(mchan->swidth);
+ tcd->dsize = buswidth_to_dmatsize(mchan->dwidth);
+
+ if (mdma->is_mpc8308) {
+ tcd->nbytes = sg_dma_len(sg);
+ if (!IS_ALIGNED(tcd->nbytes, mchan->swidth))
+ goto err_prep;
+
+ /* No major loops for MPC8303 */
+ tcd->biter = 1;
+ tcd->citer = 1;
+ } else {
+ len = sg_dma_len(sg);
+ tcd->nbytes = tcd_nunits * tcd->ssize;
+ if (!IS_ALIGNED(len, tcd->nbytes))
+ goto err_prep;
+
+ iter = len / tcd->nbytes;
+ if (iter >= 1 << 15) {
+ /* len is too big */
+ goto err_prep;
+ }
+ /* citer_linkch contains the high bits of iter */
+ tcd->biter = iter & 0x1ff;
+ tcd->biter_linkch = iter >> 9;
+ tcd->citer = tcd->biter;
+ tcd->citer_linkch = tcd->biter_linkch;
+ }
+
+ tcd->e_sg = 0;
+ tcd->d_req = 1;
+
+ /* Place descriptor in prepared list */
+ spin_lock_irqsave(&mchan->lock, iflags);
+ list_add_tail(&mdesc->node, &mchan->prepared);
+ spin_unlock_irqrestore(&mchan->lock, iflags);
+ }
+
+ return &mdesc->desc;
+
+err_prep:
+ /* Put the descriptor back */
+ spin_lock_irqsave(&mchan->lock, iflags);
+ list_add_tail(&mdesc->node, &mchan->free);
+ spin_unlock_irqrestore(&mchan->lock, iflags);
+
+ return NULL;
+}
+
+inline bool is_buswidth_valid(u8 buswidth, bool is_mpc8308)
+{
+ switch (buswidth) {
+ case 16:
+ if (is_mpc8308)
+ return false;
+ break;
+ case 1:
+ case 2:
+ case 4:
+ case 32:
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+static int mpc_dma_device_config(struct dma_chan *chan,
+ struct dma_slave_config *cfg)
+{
+ struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+ struct mpc_dma *mdma = dma_chan_to_mpc_dma(&mchan->chan);
+ unsigned long flags;
+
+ /*
+ * Software constraints:
+ * - only transfers between a peripheral device and memory are
+ * supported
+ * - transfer chunk sizes of 1, 2, 4, 16 (for MPC512x), and 32 bytes
+ * are supported, and, consequently, source addresses and
+ * destination addresses; must be aligned accordingly; furthermore,
+ * for MPC512x SoCs, the transfer size must be aligned on (chunk
+ * size * maxburst)
+ * - during the transfer, the RAM address is incremented by the size
+ * of transfer chunk
+ * - the peripheral port's address is constant during the transfer.
+ */
+
+ if (!IS_ALIGNED(cfg->src_addr, cfg->src_addr_width) ||
+ !IS_ALIGNED(cfg->dst_addr, cfg->dst_addr_width)) {
+ return -EINVAL;
+ }
+
+ if (!is_buswidth_valid(cfg->src_addr_width, mdma->is_mpc8308) ||
+ !is_buswidth_valid(cfg->dst_addr_width, mdma->is_mpc8308))
+ return -EINVAL;
+
+ spin_lock_irqsave(&mchan->lock, flags);
+
+ mchan->src_per_paddr = cfg->src_addr;
+ mchan->src_tcd_nunits = cfg->src_maxburst;
+ mchan->swidth = cfg->src_addr_width;
+ mchan->dst_per_paddr = cfg->dst_addr;
+ mchan->dst_tcd_nunits = cfg->dst_maxburst;
+ mchan->dwidth = cfg->dst_addr_width;
+
+ /* Apply defaults */
+ if (mchan->src_tcd_nunits == 0)
+ mchan->src_tcd_nunits = 1;
+ if (mchan->dst_tcd_nunits == 0)
+ mchan->dst_tcd_nunits = 1;
+
+ spin_unlock_irqrestore(&mchan->lock, flags);
+
+ return 0;
+}
+
+static int mpc_dma_device_terminate_all(struct dma_chan *chan)
+{
+ struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+ struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+ unsigned long flags;
+
+ /* Disable channel requests */
+ spin_lock_irqsave(&mchan->lock, flags);
+
+ out_8(&mdma->regs->dmacerq, chan->chan_id);
+ list_splice_tail_init(&mchan->prepared, &mchan->free);
+ list_splice_tail_init(&mchan->queued, &mchan->free);
+ list_splice_tail_init(&mchan->active, &mchan->free);
+
+ spin_unlock_irqrestore(&mchan->lock, flags);
+
+ return 0;
+}
+
+static int mpc_dma_probe(struct platform_device *op)
+{
+ struct device_node *dn = op->dev.of_node;
+ struct device *dev = &op->dev;
+ struct dma_device *dma;
+ struct mpc_dma *mdma;
+ struct mpc_dma_chan *mchan;
+ struct resource res;
+ ulong regs_start, regs_size;
+ int retval, i;
+ u8 chancnt;
+
+ mdma = devm_kzalloc(dev, sizeof(struct mpc_dma), GFP_KERNEL);
+ if (!mdma) {
+ retval = -ENOMEM;
+ goto err;
+ }
+
+ mdma->irq = irq_of_parse_and_map(dn, 0);
+ if (!mdma->irq) {
+ dev_err(dev, "Error mapping IRQ!\n");
+ retval = -EINVAL;
+ goto err;
+ }
+
+ if (of_device_is_compatible(dn, "fsl,mpc8308-dma")) {
+ mdma->is_mpc8308 = 1;
+ mdma->irq2 = irq_of_parse_and_map(dn, 1);
+ if (!mdma->irq2) {
+ dev_err(dev, "Error mapping IRQ!\n");
+ retval = -EINVAL;
+ goto err_dispose1;
+ }
+ }
+
+ retval = of_address_to_resource(dn, 0, &res);
+ if (retval) {
+ dev_err(dev, "Error parsing memory region!\n");
+ goto err_dispose2;
+ }
+
+ regs_start = res.start;
+ regs_size = resource_size(&res);
+
+ if (!devm_request_mem_region(dev, regs_start, regs_size, DRV_NAME)) {
+ dev_err(dev, "Error requesting memory region!\n");
+ retval = -EBUSY;
+ goto err_dispose2;
+ }
+
+ mdma->regs = devm_ioremap(dev, regs_start, regs_size);
+ if (!mdma->regs) {
+ dev_err(dev, "Error mapping memory region!\n");
+ retval = -ENOMEM;
+ goto err_dispose2;
+ }
+
+ mdma->tcd = (struct mpc_dma_tcd *)((u8 *)(mdma->regs)
+ + MPC_DMA_TCD_OFFSET);
+
+ retval = request_irq(mdma->irq, &mpc_dma_irq, 0, DRV_NAME, mdma);
+ if (retval) {
+ dev_err(dev, "Error requesting IRQ!\n");
+ retval = -EINVAL;
+ goto err_dispose2;
+ }
+
+ if (mdma->is_mpc8308) {
+ retval = request_irq(mdma->irq2, &mpc_dma_irq, 0,
+ DRV_NAME, mdma);
+ if (retval) {
+ dev_err(dev, "Error requesting IRQ2!\n");
+ retval = -EINVAL;
+ goto err_free1;
+ }
+ }
+
+ spin_lock_init(&mdma->error_status_lock);
+
+ dma = &mdma->dma;
+ dma->dev = dev;
+ dma->device_alloc_chan_resources = mpc_dma_alloc_chan_resources;
+ dma->device_free_chan_resources = mpc_dma_free_chan_resources;
+ dma->device_issue_pending = mpc_dma_issue_pending;
+ dma->device_tx_status = mpc_dma_tx_status;
+ dma->device_prep_dma_memcpy = mpc_dma_prep_memcpy;
+ dma->device_prep_slave_sg = mpc_dma_prep_slave_sg;
+ dma->device_config = mpc_dma_device_config;
+ dma->device_terminate_all = mpc_dma_device_terminate_all;
+
+ INIT_LIST_HEAD(&dma->channels);
+ dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+ dma_cap_set(DMA_SLAVE, dma->cap_mask);
+
+ if (mdma->is_mpc8308)
+ chancnt = MPC8308_DMACHAN_MAX;
+ else
+ chancnt = MPC512x_DMACHAN_MAX;
+
+ for (i = 0; i < chancnt; i++) {
+ mchan = &mdma->channels[i];
+
+ mchan->chan.device = dma;
+ dma_cookie_init(&mchan->chan);
+
+ INIT_LIST_HEAD(&mchan->free);
+ INIT_LIST_HEAD(&mchan->prepared);
+ INIT_LIST_HEAD(&mchan->queued);
+ INIT_LIST_HEAD(&mchan->active);
+ INIT_LIST_HEAD(&mchan->completed);
+
+ spin_lock_init(&mchan->lock);
+ list_add_tail(&mchan->chan.device_node, &dma->channels);
+ }
+
+ tasklet_setup(&mdma->tasklet, mpc_dma_tasklet);
+
+ /*
+ * Configure DMA Engine:
+ * - Dynamic clock,
+ * - Round-robin group arbitration,
+ * - Round-robin channel arbitration.
+ */
+ if (mdma->is_mpc8308) {
+ /* MPC8308 has 16 channels and lacks some registers */
+ out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_ERCA);
+
+ /* enable snooping */
+ out_be32(&mdma->regs->dmagpor, MPC_DMA_DMAGPOR_SNOOP_ENABLE);
+ /* Disable error interrupts */
+ out_be32(&mdma->regs->dmaeeil, 0);
+
+ /* Clear interrupts status */
+ out_be32(&mdma->regs->dmaintl, 0xFFFF);
+ out_be32(&mdma->regs->dmaerrl, 0xFFFF);
+ } else {
+ out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_EDCG |
+ MPC_DMA_DMACR_ERGA |
+ MPC_DMA_DMACR_ERCA);
+
+ /* Disable hardware DMA requests */
+ out_be32(&mdma->regs->dmaerqh, 0);
+ out_be32(&mdma->regs->dmaerql, 0);
+
+ /* Disable error interrupts */
+ out_be32(&mdma->regs->dmaeeih, 0);
+ out_be32(&mdma->regs->dmaeeil, 0);
+
+ /* Clear interrupts status */
+ out_be32(&mdma->regs->dmainth, 0xFFFFFFFF);
+ out_be32(&mdma->regs->dmaintl, 0xFFFFFFFF);
+ out_be32(&mdma->regs->dmaerrh, 0xFFFFFFFF);
+ out_be32(&mdma->regs->dmaerrl, 0xFFFFFFFF);
+
+ /* Route interrupts to IPIC */
+ out_be32(&mdma->regs->dmaihsa, 0);
+ out_be32(&mdma->regs->dmailsa, 0);
+ }
+
+ /* Register DMA engine */
+ dev_set_drvdata(dev, mdma);
+ retval = dma_async_device_register(dma);
+ if (retval)
+ goto err_free2;
+
+ /* Register with OF helpers for DMA lookups (nonfatal) */
+ if (dev->of_node) {
+ retval = of_dma_controller_register(dev->of_node,
+ of_dma_xlate_by_chan_id, mdma);
+ if (retval)
+ dev_warn(dev, "Could not register for OF lookup\n");
+ }
+
+ return 0;
+
+err_free2:
+ if (mdma->is_mpc8308)
+ free_irq(mdma->irq2, mdma);
+err_free1:
+ free_irq(mdma->irq, mdma);
+err_dispose2:
+ if (mdma->is_mpc8308)
+ irq_dispose_mapping(mdma->irq2);
+err_dispose1:
+ irq_dispose_mapping(mdma->irq);
+err:
+ return retval;
+}
+
+static int mpc_dma_remove(struct platform_device *op)
+{
+ struct device *dev = &op->dev;
+ struct mpc_dma *mdma = dev_get_drvdata(dev);
+
+ if (dev->of_node)
+ of_dma_controller_free(dev->of_node);
+ dma_async_device_unregister(&mdma->dma);
+ if (mdma->is_mpc8308) {
+ free_irq(mdma->irq2, mdma);
+ irq_dispose_mapping(mdma->irq2);
+ }
+ free_irq(mdma->irq, mdma);
+ irq_dispose_mapping(mdma->irq);
+ tasklet_kill(&mdma->tasklet);
+
+ return 0;
+}
+
+static const struct of_device_id mpc_dma_match[] = {
+ { .compatible = "fsl,mpc5121-dma", },
+ { .compatible = "fsl,mpc8308-dma", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, mpc_dma_match);
+
+static struct platform_driver mpc_dma_driver = {
+ .probe = mpc_dma_probe,
+ .remove = mpc_dma_remove,
+ .driver = {
+ .name = DRV_NAME,
+ .of_match_table = mpc_dma_match,
+ },
+};
+
+module_platform_driver(mpc_dma_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Piotr Ziecik <kosmo@semihalf.com>");
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
new file mode 100644
index 000000000..23b232b57
--- /dev/null
+++ b/drivers/dma/mv_xor.c
@@ -0,0 +1,1468 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * offload engine driver for the Marvell XOR engine
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/cpumask.h>
+#include <linux/platform_data/dma-mv_xor.h>
+
+#include "dmaengine.h"
+#include "mv_xor.h"
+
+enum mv_xor_type {
+ XOR_ORION,
+ XOR_ARMADA_38X,
+ XOR_ARMADA_37XX,
+};
+
+enum mv_xor_mode {
+ XOR_MODE_IN_REG,
+ XOR_MODE_IN_DESC,
+};
+
+static void mv_xor_issue_pending(struct dma_chan *chan);
+
+#define to_mv_xor_chan(chan) \
+ container_of(chan, struct mv_xor_chan, dmachan)
+
+#define to_mv_xor_slot(tx) \
+ container_of(tx, struct mv_xor_desc_slot, async_tx)
+
+#define mv_chan_to_devp(chan) \
+ ((chan)->dmadev.dev)
+
+static void mv_desc_init(struct mv_xor_desc_slot *desc,
+ dma_addr_t addr, u32 byte_count,
+ enum dma_ctrl_flags flags)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+
+ hw_desc->status = XOR_DESC_DMA_OWNED;
+ hw_desc->phy_next_desc = 0;
+ /* Enable end-of-descriptor interrupts only for DMA_PREP_INTERRUPT */
+ hw_desc->desc_command = (flags & DMA_PREP_INTERRUPT) ?
+ XOR_DESC_EOD_INT_EN : 0;
+ hw_desc->phy_dest_addr = addr;
+ hw_desc->byte_count = byte_count;
+}
+
+static void mv_desc_set_mode(struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+
+ switch (desc->type) {
+ case DMA_XOR:
+ case DMA_INTERRUPT:
+ hw_desc->desc_command |= XOR_DESC_OPERATION_XOR;
+ break;
+ case DMA_MEMCPY:
+ hw_desc->desc_command |= XOR_DESC_OPERATION_MEMCPY;
+ break;
+ default:
+ BUG();
+ return;
+ }
+}
+
+static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
+ u32 next_desc_addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ BUG_ON(hw_desc->phy_next_desc);
+ hw_desc->phy_next_desc = next_desc_addr;
+}
+
+static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
+ int index, dma_addr_t addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_src_addr[mv_phy_src_idx(index)] = addr;
+ if (desc->type == DMA_XOR)
+ hw_desc->desc_command |= (1 << index);
+}
+
+static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
+{
+ return readl_relaxed(XOR_CURR_DESC(chan));
+}
+
+static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan,
+ u32 next_desc_addr)
+{
+ writel_relaxed(next_desc_addr, XOR_NEXT_DESC(chan));
+}
+
+static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan)
+{
+ u32 val = readl_relaxed(XOR_INTR_MASK(chan));
+ val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
+ writel_relaxed(val, XOR_INTR_MASK(chan));
+}
+
+static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
+{
+ u32 intr_cause = readl_relaxed(XOR_INTR_CAUSE(chan));
+ intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
+ return intr_cause;
+}
+
+static void mv_chan_clear_eoc_cause(struct mv_xor_chan *chan)
+{
+ u32 val;
+
+ val = XOR_INT_END_OF_DESC | XOR_INT_END_OF_CHAIN | XOR_INT_STOPPED;
+ val = ~(val << (chan->idx * 16));
+ dev_dbg(mv_chan_to_devp(chan), "%s, val 0x%08x\n", __func__, val);
+ writel_relaxed(val, XOR_INTR_CAUSE(chan));
+}
+
+static void mv_chan_clear_err_status(struct mv_xor_chan *chan)
+{
+ u32 val = 0xFFFF0000 >> (chan->idx * 16);
+ writel_relaxed(val, XOR_INTR_CAUSE(chan));
+}
+
+static void mv_chan_set_mode(struct mv_xor_chan *chan,
+ u32 op_mode)
+{
+ u32 config = readl_relaxed(XOR_CONFIG(chan));
+
+ config &= ~0x7;
+ config |= op_mode;
+
+#if defined(__BIG_ENDIAN)
+ config |= XOR_DESCRIPTOR_SWAP;
+#else
+ config &= ~XOR_DESCRIPTOR_SWAP;
+#endif
+
+ writel_relaxed(config, XOR_CONFIG(chan));
+}
+
+static void mv_chan_activate(struct mv_xor_chan *chan)
+{
+ dev_dbg(mv_chan_to_devp(chan), " activate chan.\n");
+
+ /* writel ensures all descriptors are flushed before activation */
+ writel(BIT(0), XOR_ACTIVATION(chan));
+}
+
+static char mv_chan_is_busy(struct mv_xor_chan *chan)
+{
+ u32 state = readl_relaxed(XOR_ACTIVATION(chan));
+
+ state = (state >> 4) & 0x3;
+
+ return (state == 1) ? 1 : 0;
+}
+
+/*
+ * mv_chan_start_new_chain - program the engine to operate on new
+ * chain headed by sw_desc
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_chan_start_new_chain(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *sw_desc)
+{
+ dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: sw_desc %p\n",
+ __func__, __LINE__, sw_desc);
+
+ /* set the hardware chain */
+ mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys);
+
+ mv_chan->pending++;
+ mv_xor_issue_pending(&mv_chan->dmachan);
+}
+
+static dma_cookie_t
+mv_desc_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *mv_chan,
+ dma_cookie_t cookie)
+{
+ BUG_ON(desc->async_tx.cookie < 0);
+
+ if (desc->async_tx.cookie > 0) {
+ cookie = desc->async_tx.cookie;
+
+ dma_descriptor_unmap(&desc->async_tx);
+ /* call the callback (must not sleep or submit new
+ * operations to this channel)
+ */
+ dmaengine_desc_get_callback_invoke(&desc->async_tx, NULL);
+ }
+
+ /* run dependent operations */
+ dma_run_dependencies(&desc->async_tx);
+
+ return cookie;
+}
+
+static int
+mv_chan_clean_completed_slots(struct mv_xor_chan *mv_chan)
+{
+ struct mv_xor_desc_slot *iter, *_iter;
+
+ dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__);
+ list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+ node) {
+
+ if (async_tx_test_ack(&iter->async_tx)) {
+ list_move_tail(&iter->node, &mv_chan->free_slots);
+ if (!list_empty(&iter->sg_tx_list)) {
+ list_splice_tail_init(&iter->sg_tx_list,
+ &mv_chan->free_slots);
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+mv_desc_clean_slot(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *mv_chan)
+{
+ dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: desc %p flags %d\n",
+ __func__, __LINE__, desc, desc->async_tx.flags);
+
+ /* the client is allowed to attach dependent operations
+ * until 'ack' is set
+ */
+ if (!async_tx_test_ack(&desc->async_tx)) {
+ /* move this slot to the completed_slots */
+ list_move_tail(&desc->node, &mv_chan->completed_slots);
+ if (!list_empty(&desc->sg_tx_list)) {
+ list_splice_tail_init(&desc->sg_tx_list,
+ &mv_chan->completed_slots);
+ }
+ } else {
+ list_move_tail(&desc->node, &mv_chan->free_slots);
+ if (!list_empty(&desc->sg_tx_list)) {
+ list_splice_tail_init(&desc->sg_tx_list,
+ &mv_chan->free_slots);
+ }
+ }
+
+ return 0;
+}
+
+/* This function must be called with the mv_xor_chan spinlock held */
+static void mv_chan_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+ struct mv_xor_desc_slot *iter, *_iter;
+ dma_cookie_t cookie = 0;
+ int busy = mv_chan_is_busy(mv_chan);
+ u32 current_desc = mv_chan_get_current_desc(mv_chan);
+ int current_cleaned = 0;
+ struct mv_xor_desc *hw_desc;
+
+ dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__);
+ dev_dbg(mv_chan_to_devp(mv_chan), "current_desc %x\n", current_desc);
+ mv_chan_clean_completed_slots(mv_chan);
+
+ /* free completed slots from the chain starting with
+ * the oldest descriptor
+ */
+
+ list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+ node) {
+
+ /* clean finished descriptors */
+ hw_desc = iter->hw_desc;
+ if (hw_desc->status & XOR_DESC_SUCCESS) {
+ cookie = mv_desc_run_tx_complete_actions(iter, mv_chan,
+ cookie);
+
+ /* done processing desc, clean slot */
+ mv_desc_clean_slot(iter, mv_chan);
+
+ /* break if we did cleaned the current */
+ if (iter->async_tx.phys == current_desc) {
+ current_cleaned = 1;
+ break;
+ }
+ } else {
+ if (iter->async_tx.phys == current_desc) {
+ current_cleaned = 0;
+ break;
+ }
+ }
+ }
+
+ if ((busy == 0) && !list_empty(&mv_chan->chain)) {
+ if (current_cleaned) {
+ /*
+ * current descriptor cleaned and removed, run
+ * from list head
+ */
+ iter = list_entry(mv_chan->chain.next,
+ struct mv_xor_desc_slot,
+ node);
+ mv_chan_start_new_chain(mv_chan, iter);
+ } else {
+ if (!list_is_last(&iter->node, &mv_chan->chain)) {
+ /*
+ * descriptors are still waiting after
+ * current, trigger them
+ */
+ iter = list_entry(iter->node.next,
+ struct mv_xor_desc_slot,
+ node);
+ mv_chan_start_new_chain(mv_chan, iter);
+ } else {
+ /*
+ * some descriptors are still waiting
+ * to be cleaned
+ */
+ tasklet_schedule(&mv_chan->irq_tasklet);
+ }
+ }
+ }
+
+ if (cookie > 0)
+ mv_chan->dmachan.completed_cookie = cookie;
+}
+
+static void mv_xor_tasklet(struct tasklet_struct *t)
+{
+ struct mv_xor_chan *chan = from_tasklet(chan, t, irq_tasklet);
+
+ spin_lock(&chan->lock);
+ mv_chan_slot_cleanup(chan);
+ spin_unlock(&chan->lock);
+}
+
+static struct mv_xor_desc_slot *
+mv_chan_alloc_slot(struct mv_xor_chan *mv_chan)
+{
+ struct mv_xor_desc_slot *iter;
+
+ spin_lock_bh(&mv_chan->lock);
+
+ if (!list_empty(&mv_chan->free_slots)) {
+ iter = list_first_entry(&mv_chan->free_slots,
+ struct mv_xor_desc_slot,
+ node);
+
+ list_move_tail(&iter->node, &mv_chan->allocated_slots);
+
+ spin_unlock_bh(&mv_chan->lock);
+
+ /* pre-ack descriptor */
+ async_tx_ack(&iter->async_tx);
+ iter->async_tx.cookie = -EBUSY;
+
+ return iter;
+
+ }
+
+ spin_unlock_bh(&mv_chan->lock);
+
+ /* try to free some slots if the allocation fails */
+ tasklet_schedule(&mv_chan->irq_tasklet);
+
+ return NULL;
+}
+
+/************************ DMA engine API functions ****************************/
+static dma_cookie_t
+mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx);
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan);
+ struct mv_xor_desc_slot *old_chain_tail;
+ dma_cookie_t cookie;
+ int new_hw_chain = 1;
+
+ dev_dbg(mv_chan_to_devp(mv_chan),
+ "%s sw_desc %p: async_tx %p\n",
+ __func__, sw_desc, &sw_desc->async_tx);
+
+ spin_lock_bh(&mv_chan->lock);
+ cookie = dma_cookie_assign(tx);
+
+ if (list_empty(&mv_chan->chain))
+ list_move_tail(&sw_desc->node, &mv_chan->chain);
+ else {
+ new_hw_chain = 0;
+
+ old_chain_tail = list_entry(mv_chan->chain.prev,
+ struct mv_xor_desc_slot,
+ node);
+ list_move_tail(&sw_desc->node, &mv_chan->chain);
+
+ dev_dbg(mv_chan_to_devp(mv_chan), "Append to last desc %pa\n",
+ &old_chain_tail->async_tx.phys);
+
+ /* fix up the hardware chain */
+ mv_desc_set_next_desc(old_chain_tail, sw_desc->async_tx.phys);
+
+ /* if the channel is not busy */
+ if (!mv_chan_is_busy(mv_chan)) {
+ u32 current_desc = mv_chan_get_current_desc(mv_chan);
+ /*
+ * and the curren desc is the end of the chain before
+ * the append, then we need to start the channel
+ */
+ if (current_desc == old_chain_tail->async_tx.phys)
+ new_hw_chain = 1;
+ }
+ }
+
+ if (new_hw_chain)
+ mv_chan_start_new_chain(mv_chan, sw_desc);
+
+ spin_unlock_bh(&mv_chan->lock);
+
+ return cookie;
+}
+
+/* returns the number of allocated descriptors */
+static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
+{
+ void *virt_desc;
+ dma_addr_t dma_desc;
+ int idx;
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *slot = NULL;
+ int num_descs_in_pool = MV_XOR_POOL_SIZE/MV_XOR_SLOT_SIZE;
+
+ /* Allocate descriptor slots */
+ idx = mv_chan->slots_allocated;
+ while (idx < num_descs_in_pool) {
+ slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+ if (!slot) {
+ dev_info(mv_chan_to_devp(mv_chan),
+ "channel only initialized %d descriptor slots",
+ idx);
+ break;
+ }
+ virt_desc = mv_chan->dma_desc_pool_virt;
+ slot->hw_desc = virt_desc + idx * MV_XOR_SLOT_SIZE;
+
+ dma_async_tx_descriptor_init(&slot->async_tx, chan);
+ slot->async_tx.tx_submit = mv_xor_tx_submit;
+ INIT_LIST_HEAD(&slot->node);
+ INIT_LIST_HEAD(&slot->sg_tx_list);
+ dma_desc = mv_chan->dma_desc_pool;
+ slot->async_tx.phys = dma_desc + idx * MV_XOR_SLOT_SIZE;
+ slot->idx = idx++;
+
+ spin_lock_bh(&mv_chan->lock);
+ mv_chan->slots_allocated = idx;
+ list_add_tail(&slot->node, &mv_chan->free_slots);
+ spin_unlock_bh(&mv_chan->lock);
+ }
+
+ dev_dbg(mv_chan_to_devp(mv_chan),
+ "allocated %d descriptor slots\n",
+ mv_chan->slots_allocated);
+
+ return mv_chan->slots_allocated ? : -ENOMEM;
+}
+
+/*
+ * Check if source or destination is an PCIe/IO address (non-SDRAM) and add
+ * a new MBus window if necessary. Use a cache for these check so that
+ * the MMIO mapped registers don't have to be accessed for this check
+ * to speed up this process.
+ */
+static int mv_xor_add_io_win(struct mv_xor_chan *mv_chan, u32 addr)
+{
+ struct mv_xor_device *xordev = mv_chan->xordev;
+ void __iomem *base = mv_chan->mmr_high_base;
+ u32 win_enable;
+ u32 size;
+ u8 target, attr;
+ int ret;
+ int i;
+
+ /* Nothing needs to get done for the Armada 3700 */
+ if (xordev->xor_type == XOR_ARMADA_37XX)
+ return 0;
+
+ /*
+ * Loop over the cached windows to check, if the requested area
+ * is already mapped. If this the case, nothing needs to be done
+ * and we can return.
+ */
+ for (i = 0; i < WINDOW_COUNT; i++) {
+ if (addr >= xordev->win_start[i] &&
+ addr <= xordev->win_end[i]) {
+ /* Window is already mapped */
+ return 0;
+ }
+ }
+
+ /*
+ * The window is not mapped, so we need to create the new mapping
+ */
+
+ /* If no IO window is found that addr has to be located in SDRAM */
+ ret = mvebu_mbus_get_io_win_info(addr, &size, &target, &attr);
+ if (ret < 0)
+ return 0;
+
+ /*
+ * Mask the base addr 'addr' according to 'size' read back from the
+ * MBus window. Otherwise we might end up with an address located
+ * somewhere in the middle of this area here.
+ */
+ size -= 1;
+ addr &= ~size;
+
+ /*
+ * Reading one of both enabled register is enough, as they are always
+ * programmed to the identical values
+ */
+ win_enable = readl(base + WINDOW_BAR_ENABLE(0));
+
+ /* Set 'i' to the first free window to write the new values to */
+ i = ffs(~win_enable) - 1;
+ if (i >= WINDOW_COUNT)
+ return -ENOMEM;
+
+ writel((addr & 0xffff0000) | (attr << 8) | target,
+ base + WINDOW_BASE(i));
+ writel(size & 0xffff0000, base + WINDOW_SIZE(i));
+
+ /* Fill the caching variables for later use */
+ xordev->win_start[i] = addr;
+ xordev->win_end[i] = addr + size;
+
+ win_enable |= (1 << i);
+ win_enable |= 3 << (16 + (2 * i));
+ writel(win_enable, base + WINDOW_BAR_ENABLE(0));
+ writel(win_enable, base + WINDOW_BAR_ENABLE(1));
+
+ return 0;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc;
+ int ret;
+
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(len > MV_XOR_MAX_BYTE_COUNT);
+
+ dev_dbg(mv_chan_to_devp(mv_chan),
+ "%s src_cnt: %d len: %zu dest %pad flags: %ld\n",
+ __func__, src_cnt, len, &dest, flags);
+
+ /* Check if a new window needs to get added for 'dest' */
+ ret = mv_xor_add_io_win(mv_chan, dest);
+ if (ret)
+ return NULL;
+
+ sw_desc = mv_chan_alloc_slot(mv_chan);
+ if (sw_desc) {
+ sw_desc->type = DMA_XOR;
+ sw_desc->async_tx.flags = flags;
+ mv_desc_init(sw_desc, dest, len, flags);
+ if (mv_chan->op_in_desc == XOR_MODE_IN_DESC)
+ mv_desc_set_mode(sw_desc);
+ while (src_cnt--) {
+ /* Check if a new window needs to get added for 'src' */
+ ret = mv_xor_add_io_win(mv_chan, src[src_cnt]);
+ if (ret)
+ return NULL;
+ mv_desc_set_src_addr(sw_desc, src_cnt, src[src_cnt]);
+ }
+ }
+
+ dev_dbg(mv_chan_to_devp(mv_chan),
+ "%s sw_desc %p async_tx %p \n",
+ __func__, sw_desc, &sw_desc->async_tx);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ /*
+ * A MEMCPY operation is identical to an XOR operation with only
+ * a single source address.
+ */
+ return mv_xor_prep_dma_xor(chan, dest, &src, 1, len, flags);
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ dma_addr_t src, dest;
+ size_t len;
+
+ src = mv_chan->dummy_src_addr;
+ dest = mv_chan->dummy_dst_addr;
+ len = MV_XOR_MIN_BYTE_COUNT;
+
+ /*
+ * We implement the DMA_INTERRUPT operation as a minimum sized
+ * XOR operation with a single dummy source address.
+ */
+ return mv_xor_prep_dma_xor(chan, dest, &src, 1, len, flags);
+}
+
+static void mv_xor_free_chan_resources(struct dma_chan *chan)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *iter, *_iter;
+ int in_use_descs = 0;
+
+ spin_lock_bh(&mv_chan->lock);
+
+ mv_chan_slot_cleanup(mv_chan);
+
+ list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+ node) {
+ in_use_descs++;
+ list_move_tail(&iter->node, &mv_chan->free_slots);
+ }
+ list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+ node) {
+ in_use_descs++;
+ list_move_tail(&iter->node, &mv_chan->free_slots);
+ }
+ list_for_each_entry_safe(iter, _iter, &mv_chan->allocated_slots,
+ node) {
+ in_use_descs++;
+ list_move_tail(&iter->node, &mv_chan->free_slots);
+ }
+ list_for_each_entry_safe_reverse(
+ iter, _iter, &mv_chan->free_slots, node) {
+ list_del(&iter->node);
+ kfree(iter);
+ mv_chan->slots_allocated--;
+ }
+
+ dev_dbg(mv_chan_to_devp(mv_chan), "%s slots_allocated %d\n",
+ __func__, mv_chan->slots_allocated);
+ spin_unlock_bh(&mv_chan->lock);
+
+ if (in_use_descs)
+ dev_err(mv_chan_to_devp(mv_chan),
+ "freeing %d in use descriptors!\n", in_use_descs);
+}
+
+/**
+ * mv_xor_status - poll the status of an XOR transaction
+ * @chan: XOR channel handle
+ * @cookie: XOR transaction identifier
+ * @txstate: XOR transactions state holder (or NULL)
+ */
+static enum dma_status mv_xor_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ enum dma_status ret;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ spin_lock_bh(&mv_chan->lock);
+ mv_chan_slot_cleanup(mv_chan);
+ spin_unlock_bh(&mv_chan->lock);
+
+ return dma_cookie_status(chan, cookie, txstate);
+}
+
+static void mv_chan_dump_regs(struct mv_xor_chan *chan)
+{
+ u32 val;
+
+ val = readl_relaxed(XOR_CONFIG(chan));
+ dev_err(mv_chan_to_devp(chan), "config 0x%08x\n", val);
+
+ val = readl_relaxed(XOR_ACTIVATION(chan));
+ dev_err(mv_chan_to_devp(chan), "activation 0x%08x\n", val);
+
+ val = readl_relaxed(XOR_INTR_CAUSE(chan));
+ dev_err(mv_chan_to_devp(chan), "intr cause 0x%08x\n", val);
+
+ val = readl_relaxed(XOR_INTR_MASK(chan));
+ dev_err(mv_chan_to_devp(chan), "intr mask 0x%08x\n", val);
+
+ val = readl_relaxed(XOR_ERROR_CAUSE(chan));
+ dev_err(mv_chan_to_devp(chan), "error cause 0x%08x\n", val);
+
+ val = readl_relaxed(XOR_ERROR_ADDR(chan));
+ dev_err(mv_chan_to_devp(chan), "error addr 0x%08x\n", val);
+}
+
+static void mv_chan_err_interrupt_handler(struct mv_xor_chan *chan,
+ u32 intr_cause)
+{
+ if (intr_cause & XOR_INT_ERR_DECODE) {
+ dev_dbg(mv_chan_to_devp(chan), "ignoring address decode error\n");
+ return;
+ }
+
+ dev_err(mv_chan_to_devp(chan), "error on chan %d. intr cause 0x%08x\n",
+ chan->idx, intr_cause);
+
+ mv_chan_dump_regs(chan);
+ WARN_ON(1);
+}
+
+static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
+{
+ struct mv_xor_chan *chan = data;
+ u32 intr_cause = mv_chan_get_intr_cause(chan);
+
+ dev_dbg(mv_chan_to_devp(chan), "intr cause %x\n", intr_cause);
+
+ if (intr_cause & XOR_INTR_ERRORS)
+ mv_chan_err_interrupt_handler(chan, intr_cause);
+
+ tasklet_schedule(&chan->irq_tasklet);
+
+ mv_chan_clear_eoc_cause(chan);
+
+ return IRQ_HANDLED;
+}
+
+static void mv_xor_issue_pending(struct dma_chan *chan)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+
+ if (mv_chan->pending >= MV_XOR_THRESHOLD) {
+ mv_chan->pending = 0;
+ mv_chan_activate(mv_chan);
+ }
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+
+static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan)
+{
+ int i, ret;
+ void *src, *dest;
+ dma_addr_t src_dma, dest_dma;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ struct dma_async_tx_descriptor *tx;
+ struct dmaengine_unmap_data *unmap;
+ int err = 0;
+
+ src = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+
+ dest = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!dest) {
+ kfree(src);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffer */
+ for (i = 0; i < PAGE_SIZE; i++)
+ ((u8 *) src)[i] = (u8)i;
+
+ dma_chan = &mv_chan->dmachan;
+ if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ unmap = dmaengine_get_unmap_data(dma_chan->device->dev, 2, GFP_KERNEL);
+ if (!unmap) {
+ err = -ENOMEM;
+ goto free_resources;
+ }
+
+ src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src),
+ offset_in_page(src), PAGE_SIZE,
+ DMA_TO_DEVICE);
+ unmap->addr[0] = src_dma;
+
+ ret = dma_mapping_error(dma_chan->device->dev, src_dma);
+ if (ret) {
+ err = -ENOMEM;
+ goto free_resources;
+ }
+ unmap->to_cnt = 1;
+
+ dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest),
+ offset_in_page(dest), PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ unmap->addr[1] = dest_dma;
+
+ ret = dma_mapping_error(dma_chan->device->dev, dest_dma);
+ if (ret) {
+ err = -ENOMEM;
+ goto free_resources;
+ }
+ unmap->from_cnt = 1;
+ unmap->len = PAGE_SIZE;
+
+ tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+ PAGE_SIZE, 0);
+ if (!tx) {
+ dev_err(dma_chan->device->dev,
+ "Self-test cannot prepare operation, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ cookie = mv_xor_tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ dev_err(dma_chan->device->dev,
+ "Self-test submit error, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ mv_xor_issue_pending(dma_chan);
+ async_tx_ack(tx);
+ msleep(1);
+
+ if (mv_xor_status(dma_chan, cookie, NULL) !=
+ DMA_COMPLETE) {
+ dev_err(dma_chan->device->dev,
+ "Self-test copy timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ dma_sync_single_for_cpu(dma_chan->device->dev, dest_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ if (memcmp(src, dest, PAGE_SIZE)) {
+ dev_err(dma_chan->device->dev,
+ "Self-test copy failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ dmaengine_unmap_put(unmap);
+ mv_xor_free_chan_resources(dma_chan);
+out:
+ kfree(src);
+ kfree(dest);
+ return err;
+}
+
+#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
+static int
+mv_chan_xor_self_test(struct mv_xor_chan *mv_chan)
+{
+ int i, src_idx, ret;
+ struct page *dest;
+ struct page *xor_srcs[MV_XOR_NUM_SRC_TEST];
+ dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST];
+ dma_addr_t dest_dma;
+ struct dma_async_tx_descriptor *tx;
+ struct dmaengine_unmap_data *unmap;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u8 cmp_byte = 0;
+ u32 cmp_word;
+ int err = 0;
+ int src_count = MV_XOR_NUM_SRC_TEST;
+
+ for (src_idx = 0; src_idx < src_count; src_idx++) {
+ xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+ if (!xor_srcs[src_idx]) {
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+ }
+
+ dest = alloc_page(GFP_KERNEL);
+ if (!dest) {
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffers */
+ for (src_idx = 0; src_idx < src_count; src_idx++) {
+ u8 *ptr = page_address(xor_srcs[src_idx]);
+ for (i = 0; i < PAGE_SIZE; i++)
+ ptr[i] = (1 << src_idx);
+ }
+
+ for (src_idx = 0; src_idx < src_count; src_idx++)
+ cmp_byte ^= (u8) (1 << src_idx);
+
+ cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+ (cmp_byte << 8) | cmp_byte;
+
+ memset(page_address(dest), 0, PAGE_SIZE);
+
+ dma_chan = &mv_chan->dmachan;
+ if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ unmap = dmaengine_get_unmap_data(dma_chan->device->dev, src_count + 1,
+ GFP_KERNEL);
+ if (!unmap) {
+ err = -ENOMEM;
+ goto free_resources;
+ }
+
+ /* test xor */
+ for (i = 0; i < src_count; i++) {
+ unmap->addr[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+ 0, PAGE_SIZE, DMA_TO_DEVICE);
+ dma_srcs[i] = unmap->addr[i];
+ ret = dma_mapping_error(dma_chan->device->dev, unmap->addr[i]);
+ if (ret) {
+ err = -ENOMEM;
+ goto free_resources;
+ }
+ unmap->to_cnt++;
+ }
+
+ unmap->addr[src_count] = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ dest_dma = unmap->addr[src_count];
+ ret = dma_mapping_error(dma_chan->device->dev, unmap->addr[src_count]);
+ if (ret) {
+ err = -ENOMEM;
+ goto free_resources;
+ }
+ unmap->from_cnt = 1;
+ unmap->len = PAGE_SIZE;
+
+ tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+ src_count, PAGE_SIZE, 0);
+ if (!tx) {
+ dev_err(dma_chan->device->dev,
+ "Self-test cannot prepare operation, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ cookie = mv_xor_tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ dev_err(dma_chan->device->dev,
+ "Self-test submit error, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ mv_xor_issue_pending(dma_chan);
+ async_tx_ack(tx);
+ msleep(8);
+
+ if (mv_xor_status(dma_chan, cookie, NULL) !=
+ DMA_COMPLETE) {
+ dev_err(dma_chan->device->dev,
+ "Self-test xor timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ dma_sync_single_for_cpu(dma_chan->device->dev, dest_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i] != cmp_word) {
+ dev_err(dma_chan->device->dev,
+ "Self-test xor failed compare, disabling. index %d, data %x, expected %x\n",
+ i, ptr[i], cmp_word);
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+
+free_resources:
+ dmaengine_unmap_put(unmap);
+ mv_xor_free_chan_resources(dma_chan);
+out:
+ src_idx = src_count;
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ __free_page(dest);
+ return err;
+}
+
+static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan)
+{
+ struct dma_chan *chan, *_chan;
+ struct device *dev = mv_chan->dmadev.dev;
+
+ dma_async_device_unregister(&mv_chan->dmadev);
+
+ dma_free_coherent(dev, MV_XOR_POOL_SIZE,
+ mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool);
+ dma_unmap_single(dev, mv_chan->dummy_src_addr,
+ MV_XOR_MIN_BYTE_COUNT, DMA_FROM_DEVICE);
+ dma_unmap_single(dev, mv_chan->dummy_dst_addr,
+ MV_XOR_MIN_BYTE_COUNT, DMA_TO_DEVICE);
+
+ list_for_each_entry_safe(chan, _chan, &mv_chan->dmadev.channels,
+ device_node) {
+ list_del(&chan->device_node);
+ }
+
+ free_irq(mv_chan->irq, mv_chan);
+
+ return 0;
+}
+
+static struct mv_xor_chan *
+mv_xor_channel_add(struct mv_xor_device *xordev,
+ struct platform_device *pdev,
+ int idx, dma_cap_mask_t cap_mask, int irq)
+{
+ int ret = 0;
+ struct mv_xor_chan *mv_chan;
+ struct dma_device *dma_dev;
+
+ mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
+ if (!mv_chan)
+ return ERR_PTR(-ENOMEM);
+
+ mv_chan->idx = idx;
+ mv_chan->irq = irq;
+ if (xordev->xor_type == XOR_ORION)
+ mv_chan->op_in_desc = XOR_MODE_IN_REG;
+ else
+ mv_chan->op_in_desc = XOR_MODE_IN_DESC;
+
+ dma_dev = &mv_chan->dmadev;
+ dma_dev->dev = &pdev->dev;
+ mv_chan->xordev = xordev;
+
+ /*
+ * These source and destination dummy buffers are used to implement
+ * a DMA_INTERRUPT operation as a minimum-sized XOR operation.
+ * Hence, we only need to map the buffers at initialization-time.
+ */
+ mv_chan->dummy_src_addr = dma_map_single(dma_dev->dev,
+ mv_chan->dummy_src, MV_XOR_MIN_BYTE_COUNT, DMA_FROM_DEVICE);
+ mv_chan->dummy_dst_addr = dma_map_single(dma_dev->dev,
+ mv_chan->dummy_dst, MV_XOR_MIN_BYTE_COUNT, DMA_TO_DEVICE);
+
+ /* allocate coherent memory for hardware descriptors
+ * note: writecombine gives slightly better performance, but
+ * requires that we explicitly flush the writes
+ */
+ mv_chan->dma_desc_pool_virt =
+ dma_alloc_wc(&pdev->dev, MV_XOR_POOL_SIZE, &mv_chan->dma_desc_pool,
+ GFP_KERNEL);
+ if (!mv_chan->dma_desc_pool_virt)
+ return ERR_PTR(-ENOMEM);
+
+ /* discover transaction capabilites from the platform data */
+ dma_dev->cap_mask = cap_mask;
+
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ /* set base routines */
+ dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = mv_xor_free_chan_resources;
+ dma_dev->device_tx_status = mv_xor_status;
+ dma_dev->device_issue_pending = mv_xor_issue_pending;
+
+ /* set prep routines based on capability */
+ if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_interrupt = mv_xor_prep_dma_interrupt;
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy;
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ dma_dev->max_xor = 8;
+ dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor;
+ }
+
+ mv_chan->mmr_base = xordev->xor_base;
+ mv_chan->mmr_high_base = xordev->xor_high_base;
+ tasklet_setup(&mv_chan->irq_tasklet, mv_xor_tasklet);
+
+ /* clear errors before enabling interrupts */
+ mv_chan_clear_err_status(mv_chan);
+
+ ret = request_irq(mv_chan->irq, mv_xor_interrupt_handler,
+ 0, dev_name(&pdev->dev), mv_chan);
+ if (ret)
+ goto err_free_dma;
+
+ mv_chan_unmask_interrupts(mv_chan);
+
+ if (mv_chan->op_in_desc == XOR_MODE_IN_DESC)
+ mv_chan_set_mode(mv_chan, XOR_OPERATION_MODE_IN_DESC);
+ else
+ mv_chan_set_mode(mv_chan, XOR_OPERATION_MODE_XOR);
+
+ spin_lock_init(&mv_chan->lock);
+ INIT_LIST_HEAD(&mv_chan->chain);
+ INIT_LIST_HEAD(&mv_chan->completed_slots);
+ INIT_LIST_HEAD(&mv_chan->free_slots);
+ INIT_LIST_HEAD(&mv_chan->allocated_slots);
+ mv_chan->dmachan.device = dma_dev;
+ dma_cookie_init(&mv_chan->dmachan);
+
+ list_add_tail(&mv_chan->dmachan.device_node, &dma_dev->channels);
+
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+ ret = mv_chan_memcpy_self_test(mv_chan);
+ dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+ if (ret)
+ goto err_free_irq;
+ }
+
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ ret = mv_chan_xor_self_test(mv_chan);
+ dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+ if (ret)
+ goto err_free_irq;
+ }
+
+ dev_info(&pdev->dev, "Marvell XOR (%s): ( %s%s%s)\n",
+ mv_chan->op_in_desc ? "Descriptor Mode" : "Registers Mode",
+ dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+ dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+ dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+ ret = dma_async_device_register(dma_dev);
+ if (ret)
+ goto err_free_irq;
+
+ return mv_chan;
+
+err_free_irq:
+ free_irq(mv_chan->irq, mv_chan);
+err_free_dma:
+ dma_free_coherent(&pdev->dev, MV_XOR_POOL_SIZE,
+ mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool);
+ return ERR_PTR(ret);
+}
+
+static void
+mv_xor_conf_mbus_windows(struct mv_xor_device *xordev,
+ const struct mbus_dram_target_info *dram)
+{
+ void __iomem *base = xordev->xor_high_base;
+ u32 win_enable = 0;
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ writel(0, base + WINDOW_BASE(i));
+ writel(0, base + WINDOW_SIZE(i));
+ if (i < 4)
+ writel(0, base + WINDOW_REMAP_HIGH(i));
+ }
+
+ for (i = 0; i < dram->num_cs; i++) {
+ const struct mbus_dram_window *cs = dram->cs + i;
+
+ writel((cs->base & 0xffff0000) |
+ (cs->mbus_attr << 8) |
+ dram->mbus_dram_target_id, base + WINDOW_BASE(i));
+ writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
+
+ /* Fill the caching variables for later use */
+ xordev->win_start[i] = cs->base;
+ xordev->win_end[i] = cs->base + cs->size - 1;
+
+ win_enable |= (1 << i);
+ win_enable |= 3 << (16 + (2 * i));
+ }
+
+ writel(win_enable, base + WINDOW_BAR_ENABLE(0));
+ writel(win_enable, base + WINDOW_BAR_ENABLE(1));
+ writel(0, base + WINDOW_OVERRIDE_CTRL(0));
+ writel(0, base + WINDOW_OVERRIDE_CTRL(1));
+}
+
+static void
+mv_xor_conf_mbus_windows_a3700(struct mv_xor_device *xordev)
+{
+ void __iomem *base = xordev->xor_high_base;
+ u32 win_enable = 0;
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ writel(0, base + WINDOW_BASE(i));
+ writel(0, base + WINDOW_SIZE(i));
+ if (i < 4)
+ writel(0, base + WINDOW_REMAP_HIGH(i));
+ }
+ /*
+ * For Armada3700 open default 4GB Mbus window. The dram
+ * related configuration are done at AXIS level.
+ */
+ writel(0xffff0000, base + WINDOW_SIZE(0));
+ win_enable |= 1;
+ win_enable |= 3 << 16;
+
+ writel(win_enable, base + WINDOW_BAR_ENABLE(0));
+ writel(win_enable, base + WINDOW_BAR_ENABLE(1));
+ writel(0, base + WINDOW_OVERRIDE_CTRL(0));
+ writel(0, base + WINDOW_OVERRIDE_CTRL(1));
+}
+
+/*
+ * Since this XOR driver is basically used only for RAID5, we don't
+ * need to care about synchronizing ->suspend with DMA activity,
+ * because the DMA engine will naturally be quiet due to the block
+ * devices being suspended.
+ */
+static int mv_xor_suspend(struct platform_device *pdev, pm_message_t state)
+{
+ struct mv_xor_device *xordev = platform_get_drvdata(pdev);
+ int i;
+
+ for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
+ struct mv_xor_chan *mv_chan = xordev->channels[i];
+
+ if (!mv_chan)
+ continue;
+
+ mv_chan->saved_config_reg =
+ readl_relaxed(XOR_CONFIG(mv_chan));
+ mv_chan->saved_int_mask_reg =
+ readl_relaxed(XOR_INTR_MASK(mv_chan));
+ }
+
+ return 0;
+}
+
+static int mv_xor_resume(struct platform_device *dev)
+{
+ struct mv_xor_device *xordev = platform_get_drvdata(dev);
+ const struct mbus_dram_target_info *dram;
+ int i;
+
+ for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
+ struct mv_xor_chan *mv_chan = xordev->channels[i];
+
+ if (!mv_chan)
+ continue;
+
+ writel_relaxed(mv_chan->saved_config_reg,
+ XOR_CONFIG(mv_chan));
+ writel_relaxed(mv_chan->saved_int_mask_reg,
+ XOR_INTR_MASK(mv_chan));
+ }
+
+ if (xordev->xor_type == XOR_ARMADA_37XX) {
+ mv_xor_conf_mbus_windows_a3700(xordev);
+ return 0;
+ }
+
+ dram = mv_mbus_dram_info();
+ if (dram)
+ mv_xor_conf_mbus_windows(xordev, dram);
+
+ return 0;
+}
+
+static const struct of_device_id mv_xor_dt_ids[] = {
+ { .compatible = "marvell,orion-xor", .data = (void *)XOR_ORION },
+ { .compatible = "marvell,armada-380-xor", .data = (void *)XOR_ARMADA_38X },
+ { .compatible = "marvell,armada-3700-xor", .data = (void *)XOR_ARMADA_37XX },
+ {},
+};
+
+static unsigned int mv_xor_engine_count;
+
+static int mv_xor_probe(struct platform_device *pdev)
+{
+ const struct mbus_dram_target_info *dram;
+ struct mv_xor_device *xordev;
+ struct mv_xor_platform_data *pdata = dev_get_platdata(&pdev->dev);
+ struct resource *res;
+ unsigned int max_engines, max_channels;
+ int i, ret;
+
+ dev_notice(&pdev->dev, "Marvell shared XOR driver\n");
+
+ xordev = devm_kzalloc(&pdev->dev, sizeof(*xordev), GFP_KERNEL);
+ if (!xordev)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ xordev->xor_base = devm_ioremap(&pdev->dev, res->start,
+ resource_size(res));
+ if (!xordev->xor_base)
+ return -EBUSY;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!res)
+ return -ENODEV;
+
+ xordev->xor_high_base = devm_ioremap(&pdev->dev, res->start,
+ resource_size(res));
+ if (!xordev->xor_high_base)
+ return -EBUSY;
+
+ platform_set_drvdata(pdev, xordev);
+
+
+ /*
+ * We need to know which type of XOR device we use before
+ * setting up. In non-dt case it can only be the legacy one.
+ */
+ xordev->xor_type = XOR_ORION;
+ if (pdev->dev.of_node) {
+ const struct of_device_id *of_id =
+ of_match_device(mv_xor_dt_ids,
+ &pdev->dev);
+
+ xordev->xor_type = (uintptr_t)of_id->data;
+ }
+
+ /*
+ * (Re-)program MBUS remapping windows if we are asked to.
+ */
+ if (xordev->xor_type == XOR_ARMADA_37XX) {
+ mv_xor_conf_mbus_windows_a3700(xordev);
+ } else {
+ dram = mv_mbus_dram_info();
+ if (dram)
+ mv_xor_conf_mbus_windows(xordev, dram);
+ }
+
+ /* Not all platforms can gate the clock, so it is not
+ * an error if the clock does not exists.
+ */
+ xordev->clk = clk_get(&pdev->dev, NULL);
+ if (!IS_ERR(xordev->clk))
+ clk_prepare_enable(xordev->clk);
+
+ /*
+ * We don't want to have more than one channel per CPU in
+ * order for async_tx to perform well. So we limit the number
+ * of engines and channels so that we take into account this
+ * constraint. Note that we also want to use channels from
+ * separate engines when possible. For dual-CPU Armada 3700
+ * SoC with single XOR engine allow using its both channels.
+ */
+ max_engines = num_present_cpus();
+ if (xordev->xor_type == XOR_ARMADA_37XX)
+ max_channels = num_present_cpus();
+ else
+ max_channels = min_t(unsigned int,
+ MV_XOR_MAX_CHANNELS,
+ DIV_ROUND_UP(num_present_cpus(), 2));
+
+ if (mv_xor_engine_count >= max_engines)
+ return 0;
+
+ if (pdev->dev.of_node) {
+ struct device_node *np;
+ int i = 0;
+
+ for_each_child_of_node(pdev->dev.of_node, np) {
+ struct mv_xor_chan *chan;
+ dma_cap_mask_t cap_mask;
+ int irq;
+
+ if (i >= max_channels)
+ continue;
+
+ dma_cap_zero(cap_mask);
+ dma_cap_set(DMA_MEMCPY, cap_mask);
+ dma_cap_set(DMA_XOR, cap_mask);
+ dma_cap_set(DMA_INTERRUPT, cap_mask);
+
+ irq = irq_of_parse_and_map(np, 0);
+ if (!irq) {
+ ret = -ENODEV;
+ goto err_channel_add;
+ }
+
+ chan = mv_xor_channel_add(xordev, pdev, i,
+ cap_mask, irq);
+ if (IS_ERR(chan)) {
+ ret = PTR_ERR(chan);
+ irq_dispose_mapping(irq);
+ goto err_channel_add;
+ }
+
+ xordev->channels[i] = chan;
+ i++;
+ }
+ } else if (pdata && pdata->channels) {
+ for (i = 0; i < max_channels; i++) {
+ struct mv_xor_channel_data *cd;
+ struct mv_xor_chan *chan;
+ int irq;
+
+ cd = &pdata->channels[i];
+ irq = platform_get_irq(pdev, i);
+ if (irq < 0) {
+ ret = irq;
+ goto err_channel_add;
+ }
+
+ chan = mv_xor_channel_add(xordev, pdev, i,
+ cd->cap_mask, irq);
+ if (IS_ERR(chan)) {
+ ret = PTR_ERR(chan);
+ goto err_channel_add;
+ }
+
+ xordev->channels[i] = chan;
+ }
+ }
+
+ return 0;
+
+err_channel_add:
+ for (i = 0; i < MV_XOR_MAX_CHANNELS; i++)
+ if (xordev->channels[i]) {
+ mv_xor_channel_remove(xordev->channels[i]);
+ if (pdev->dev.of_node)
+ irq_dispose_mapping(xordev->channels[i]->irq);
+ }
+
+ if (!IS_ERR(xordev->clk)) {
+ clk_disable_unprepare(xordev->clk);
+ clk_put(xordev->clk);
+ }
+
+ return ret;
+}
+
+static struct platform_driver mv_xor_driver = {
+ .probe = mv_xor_probe,
+ .suspend = mv_xor_suspend,
+ .resume = mv_xor_resume,
+ .driver = {
+ .name = MV_XOR_NAME,
+ .of_match_table = mv_xor_dt_ids,
+ },
+};
+
+builtin_platform_driver(mv_xor_driver);
+
+/*
+MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>");
+MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine");
+MODULE_LICENSE("GPL");
+*/
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
new file mode 100644
index 000000000..d86086b05
--- /dev/null
+++ b/drivers/dma/mv_xor.h
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ */
+
+#ifndef MV_XOR_H
+#define MV_XOR_H
+
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+#define MV_XOR_POOL_SIZE (MV_XOR_SLOT_SIZE * 3072)
+#define MV_XOR_SLOT_SIZE 64
+#define MV_XOR_THRESHOLD 1
+#define MV_XOR_MAX_CHANNELS 2
+
+#define MV_XOR_MIN_BYTE_COUNT SZ_128
+#define MV_XOR_MAX_BYTE_COUNT (SZ_16M - 1)
+
+/* Values for the XOR_CONFIG register */
+#define XOR_OPERATION_MODE_XOR 0
+#define XOR_OPERATION_MODE_MEMCPY 2
+#define XOR_OPERATION_MODE_IN_DESC 7
+#define XOR_DESCRIPTOR_SWAP BIT(14)
+#define XOR_DESC_SUCCESS 0x40000000
+
+#define XOR_DESC_OPERATION_XOR (0 << 24)
+#define XOR_DESC_OPERATION_CRC32C (1 << 24)
+#define XOR_DESC_OPERATION_MEMCPY (2 << 24)
+
+#define XOR_DESC_DMA_OWNED BIT(31)
+#define XOR_DESC_EOD_INT_EN BIT(31)
+
+#define XOR_CURR_DESC(chan) (chan->mmr_high_base + 0x10 + (chan->idx * 4))
+#define XOR_NEXT_DESC(chan) (chan->mmr_high_base + 0x00 + (chan->idx * 4))
+#define XOR_BYTE_COUNT(chan) (chan->mmr_high_base + 0x20 + (chan->idx * 4))
+#define XOR_DEST_POINTER(chan) (chan->mmr_high_base + 0xB0 + (chan->idx * 4))
+#define XOR_BLOCK_SIZE(chan) (chan->mmr_high_base + 0xC0 + (chan->idx * 4))
+#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_high_base + 0xE0)
+#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_high_base + 0xE4)
+
+#define XOR_CONFIG(chan) (chan->mmr_base + 0x10 + (chan->idx * 4))
+#define XOR_ACTIVATION(chan) (chan->mmr_base + 0x20 + (chan->idx * 4))
+#define XOR_INTR_CAUSE(chan) (chan->mmr_base + 0x30)
+#define XOR_INTR_MASK(chan) (chan->mmr_base + 0x40)
+#define XOR_ERROR_CAUSE(chan) (chan->mmr_base + 0x50)
+#define XOR_ERROR_ADDR(chan) (chan->mmr_base + 0x60)
+
+#define XOR_INT_END_OF_DESC BIT(0)
+#define XOR_INT_END_OF_CHAIN BIT(1)
+#define XOR_INT_STOPPED BIT(2)
+#define XOR_INT_PAUSED BIT(3)
+#define XOR_INT_ERR_DECODE BIT(4)
+#define XOR_INT_ERR_RDPROT BIT(5)
+#define XOR_INT_ERR_WRPROT BIT(6)
+#define XOR_INT_ERR_OWN BIT(7)
+#define XOR_INT_ERR_PAR BIT(8)
+#define XOR_INT_ERR_MBUS BIT(9)
+
+#define XOR_INTR_ERRORS (XOR_INT_ERR_DECODE | XOR_INT_ERR_RDPROT | \
+ XOR_INT_ERR_WRPROT | XOR_INT_ERR_OWN | \
+ XOR_INT_ERR_PAR | XOR_INT_ERR_MBUS)
+
+#define XOR_INTR_MASK_VALUE (XOR_INT_END_OF_DESC | XOR_INT_END_OF_CHAIN | \
+ XOR_INT_STOPPED | XOR_INTR_ERRORS)
+
+#define WINDOW_BASE(w) (0x50 + ((w) << 2))
+#define WINDOW_SIZE(w) (0x70 + ((w) << 2))
+#define WINDOW_REMAP_HIGH(w) (0x90 + ((w) << 2))
+#define WINDOW_BAR_ENABLE(chan) (0x40 + ((chan) << 2))
+#define WINDOW_OVERRIDE_CTRL(chan) (0xA0 + ((chan) << 2))
+
+#define WINDOW_COUNT 8
+
+struct mv_xor_device {
+ void __iomem *xor_base;
+ void __iomem *xor_high_base;
+ struct clk *clk;
+ struct mv_xor_chan *channels[MV_XOR_MAX_CHANNELS];
+ int xor_type;
+
+ u32 win_start[WINDOW_COUNT];
+ u32 win_end[WINDOW_COUNT];
+};
+
+/**
+ * struct mv_xor_chan - internal representation of a XOR channel
+ * @pending: allows batching of hardware operations
+ * @lock: serializes enqueue/dequeue operations to the descriptors pool
+ * @mmr_base: memory mapped register base
+ * @idx: the index of the xor channel
+ * @chain: device chain view of the descriptors
+ * @free_slots: free slots usable by the channel
+ * @allocated_slots: slots allocated by the driver
+ * @completed_slots: slots completed by HW but still need to be acked
+ * @device: parent device
+ * @common: common dmaengine channel object members
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs
+ * @op_in_desc: new mode of driver, each op is writen to descriptor.
+ */
+struct mv_xor_chan {
+ int pending;
+ spinlock_t lock; /* protects the descriptor slot pool */
+ void __iomem *mmr_base;
+ void __iomem *mmr_high_base;
+ unsigned int idx;
+ int irq;
+ struct list_head chain;
+ struct list_head free_slots;
+ struct list_head allocated_slots;
+ struct list_head completed_slots;
+ dma_addr_t dma_desc_pool;
+ void *dma_desc_pool_virt;
+ size_t pool_size;
+ struct dma_device dmadev;
+ struct dma_chan dmachan;
+ int slots_allocated;
+ struct tasklet_struct irq_tasklet;
+ int op_in_desc;
+ char dummy_src[MV_XOR_MIN_BYTE_COUNT];
+ char dummy_dst[MV_XOR_MIN_BYTE_COUNT];
+ dma_addr_t dummy_src_addr, dummy_dst_addr;
+ u32 saved_config_reg, saved_int_mask_reg;
+
+ struct mv_xor_device *xordev;
+};
+
+/**
+ * struct mv_xor_desc_slot - software descriptor
+ * @node: node on the mv_xor_chan lists
+ * @hw_desc: virtual address of the hardware descriptor chain
+ * @phys: hardware address of the hardware descriptor chain
+ * @slot_used: slot in use or not
+ * @idx: pool index
+ * @tx_list: list of slots that make up a multi-descriptor transaction
+ * @async_tx: support for the async_tx api
+ */
+struct mv_xor_desc_slot {
+ struct list_head node;
+ struct list_head sg_tx_list;
+ enum dma_transaction_type type;
+ void *hw_desc;
+ u16 idx;
+ struct dma_async_tx_descriptor async_tx;
+};
+
+/*
+ * This structure describes XOR descriptor size 64bytes. The
+ * mv_phy_src_idx() macro must be used when indexing the values of the
+ * phy_src_addr[] array. This is due to the fact that the 'descriptor
+ * swap' feature, used on big endian systems, swaps descriptors data
+ * within blocks of 8 bytes. So two consecutive values of the
+ * phy_src_addr[] array are actually swapped in big-endian, which
+ * explains the different mv_phy_src_idx() implementation.
+ */
+#if defined(__LITTLE_ENDIAN)
+struct mv_xor_desc {
+ u32 status; /* descriptor execution status */
+ u32 crc32_result; /* result of CRC-32 calculation */
+ u32 desc_command; /* type of operation to be carried out */
+ u32 phy_next_desc; /* next descriptor address pointer */
+ u32 byte_count; /* size of src/dst blocks in bytes */
+ u32 phy_dest_addr; /* destination block address */
+ u32 phy_src_addr[8]; /* source block addresses */
+ u32 reserved0;
+ u32 reserved1;
+};
+#define mv_phy_src_idx(src_idx) (src_idx)
+#else
+struct mv_xor_desc {
+ u32 crc32_result; /* result of CRC-32 calculation */
+ u32 status; /* descriptor execution status */
+ u32 phy_next_desc; /* next descriptor address pointer */
+ u32 desc_command; /* type of operation to be carried out */
+ u32 phy_dest_addr; /* destination block address */
+ u32 byte_count; /* size of src/dst blocks in bytes */
+ u32 phy_src_addr[8]; /* source block addresses */
+ u32 reserved1;
+ u32 reserved0;
+};
+#define mv_phy_src_idx(src_idx) (src_idx ^ 1)
+#endif
+
+#define to_mv_sw_desc(addr_hw_desc) \
+ container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc)
+
+#define mv_hw_desc_slot_idx(hw_desc, idx) \
+ ((void *)(((unsigned long)hw_desc) + ((idx) << 5)))
+
+#endif
diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c
new file mode 100644
index 000000000..d086ff182
--- /dev/null
+++ b/drivers/dma/mv_xor_v2.c
@@ -0,0 +1,923 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2015-2016 Marvell International Ltd.
+
+ */
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+
+#include "dmaengine.h"
+
+/* DMA Engine Registers */
+#define MV_XOR_V2_DMA_DESQ_BALR_OFF 0x000
+#define MV_XOR_V2_DMA_DESQ_BAHR_OFF 0x004
+#define MV_XOR_V2_DMA_DESQ_SIZE_OFF 0x008
+#define MV_XOR_V2_DMA_DESQ_DONE_OFF 0x00C
+#define MV_XOR_V2_DMA_DESQ_DONE_PENDING_MASK 0x7FFF
+#define MV_XOR_V2_DMA_DESQ_DONE_PENDING_SHIFT 0
+#define MV_XOR_V2_DMA_DESQ_DONE_READ_PTR_MASK 0x1FFF
+#define MV_XOR_V2_DMA_DESQ_DONE_READ_PTR_SHIFT 16
+#define MV_XOR_V2_DMA_DESQ_ARATTR_OFF 0x010
+#define MV_XOR_V2_DMA_DESQ_ATTR_CACHE_MASK 0x3F3F
+#define MV_XOR_V2_DMA_DESQ_ATTR_OUTER_SHAREABLE 0x202
+#define MV_XOR_V2_DMA_DESQ_ATTR_CACHEABLE 0x3C3C
+#define MV_XOR_V2_DMA_IMSG_CDAT_OFF 0x014
+#define MV_XOR_V2_DMA_IMSG_THRD_OFF 0x018
+#define MV_XOR_V2_DMA_IMSG_THRD_MASK 0x7FFF
+#define MV_XOR_V2_DMA_IMSG_TIMER_EN BIT(18)
+#define MV_XOR_V2_DMA_DESQ_AWATTR_OFF 0x01C
+ /* Same flags as MV_XOR_V2_DMA_DESQ_ARATTR_OFF */
+#define MV_XOR_V2_DMA_DESQ_ALLOC_OFF 0x04C
+#define MV_XOR_V2_DMA_DESQ_ALLOC_WRPTR_MASK 0xFFFF
+#define MV_XOR_V2_DMA_DESQ_ALLOC_WRPTR_SHIFT 16
+#define MV_XOR_V2_DMA_IMSG_BALR_OFF 0x050
+#define MV_XOR_V2_DMA_IMSG_BAHR_OFF 0x054
+#define MV_XOR_V2_DMA_DESQ_CTRL_OFF 0x100
+#define MV_XOR_V2_DMA_DESQ_CTRL_32B 1
+#define MV_XOR_V2_DMA_DESQ_CTRL_128B 7
+#define MV_XOR_V2_DMA_DESQ_STOP_OFF 0x800
+#define MV_XOR_V2_DMA_DESQ_DEALLOC_OFF 0x804
+#define MV_XOR_V2_DMA_DESQ_ADD_OFF 0x808
+#define MV_XOR_V2_DMA_IMSG_TMOT 0x810
+#define MV_XOR_V2_DMA_IMSG_TIMER_THRD_MASK 0x1FFF
+
+/* XOR Global registers */
+#define MV_XOR_V2_GLOB_BW_CTRL 0x4
+#define MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_RD_SHIFT 0
+#define MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_RD_VAL 64
+#define MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_WR_SHIFT 8
+#define MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_WR_VAL 8
+#define MV_XOR_V2_GLOB_BW_CTRL_RD_BURST_LEN_SHIFT 12
+#define MV_XOR_V2_GLOB_BW_CTRL_RD_BURST_LEN_VAL 4
+#define MV_XOR_V2_GLOB_BW_CTRL_WR_BURST_LEN_SHIFT 16
+#define MV_XOR_V2_GLOB_BW_CTRL_WR_BURST_LEN_VAL 4
+#define MV_XOR_V2_GLOB_PAUSE 0x014
+#define MV_XOR_V2_GLOB_PAUSE_AXI_TIME_DIS_VAL 0x8
+#define MV_XOR_V2_GLOB_SYS_INT_CAUSE 0x200
+#define MV_XOR_V2_GLOB_SYS_INT_MASK 0x204
+#define MV_XOR_V2_GLOB_MEM_INT_CAUSE 0x220
+#define MV_XOR_V2_GLOB_MEM_INT_MASK 0x224
+
+#define MV_XOR_V2_MIN_DESC_SIZE 32
+#define MV_XOR_V2_EXT_DESC_SIZE 128
+
+#define MV_XOR_V2_DESC_RESERVED_SIZE 12
+#define MV_XOR_V2_DESC_BUFF_D_ADDR_SIZE 12
+
+#define MV_XOR_V2_CMD_LINE_NUM_MAX_D_BUF 8
+
+/*
+ * Descriptors queue size. With 32 bytes descriptors, up to 2^14
+ * descriptors are allowed, with 128 bytes descriptors, up to 2^12
+ * descriptors are allowed. This driver uses 128 bytes descriptors,
+ * but experimentation has shown that a set of 1024 descriptors is
+ * sufficient to reach a good level of performance.
+ */
+#define MV_XOR_V2_DESC_NUM 1024
+
+/*
+ * Threshold values for descriptors and timeout, determined by
+ * experimentation as giving a good level of performance.
+ */
+#define MV_XOR_V2_DONE_IMSG_THRD 0x14
+#define MV_XOR_V2_TIMER_THRD 0xB0
+
+/**
+ * struct mv_xor_v2_descriptor - DMA HW descriptor
+ * @desc_id: used by S/W and is not affected by H/W.
+ * @flags: error and status flags
+ * @crc32_result: CRC32 calculation result
+ * @desc_ctrl: operation mode and control flags
+ * @buff_size: amount of bytes to be processed
+ * @fill_pattern_src_addr: Fill-Pattern or Source-Address and
+ * AW-Attributes
+ * @data_buff_addr: Source (and might be RAID6 destination)
+ * addresses of data buffers in RAID5 and RAID6
+ * @reserved: reserved
+ */
+struct mv_xor_v2_descriptor {
+ u16 desc_id;
+ u16 flags;
+ u32 crc32_result;
+ u32 desc_ctrl;
+
+ /* Definitions for desc_ctrl */
+#define DESC_NUM_ACTIVE_D_BUF_SHIFT 22
+#define DESC_OP_MODE_SHIFT 28
+#define DESC_OP_MODE_NOP 0 /* Idle operation */
+#define DESC_OP_MODE_MEMCPY 1 /* Pure-DMA operation */
+#define DESC_OP_MODE_MEMSET 2 /* Mem-Fill operation */
+#define DESC_OP_MODE_MEMINIT 3 /* Mem-Init operation */
+#define DESC_OP_MODE_MEM_COMPARE 4 /* Mem-Compare operation */
+#define DESC_OP_MODE_CRC32 5 /* CRC32 calculation */
+#define DESC_OP_MODE_XOR 6 /* RAID5 (XOR) operation */
+#define DESC_OP_MODE_RAID6 7 /* RAID6 P&Q-generation */
+#define DESC_OP_MODE_RAID6_REC 8 /* RAID6 Recovery */
+#define DESC_Q_BUFFER_ENABLE BIT(16)
+#define DESC_P_BUFFER_ENABLE BIT(17)
+#define DESC_IOD BIT(27)
+
+ u32 buff_size;
+ u32 fill_pattern_src_addr[4];
+ u32 data_buff_addr[MV_XOR_V2_DESC_BUFF_D_ADDR_SIZE];
+ u32 reserved[MV_XOR_V2_DESC_RESERVED_SIZE];
+};
+
+/**
+ * struct mv_xor_v2_device - implements a xor device
+ * @lock: lock for the engine
+ * @clk: reference to the 'core' clock
+ * @reg_clk: reference to the 'reg' clock
+ * @dma_base: memory mapped DMA register base
+ * @glob_base: memory mapped global register base
+ * @irq_tasklet: tasklet used for IRQ handling call-backs
+ * @free_sw_desc: linked list of free SW descriptors
+ * @dmadev: dma device
+ * @dmachan: dma channel
+ * @hw_desq: HW descriptors queue
+ * @hw_desq_virt: virtual address of DESCQ
+ * @sw_desq: SW descriptors queue
+ * @desc_size: HW descriptor size
+ * @npendings: number of pending descriptors (for which tx_submit has
+ * @hw_queue_idx: HW queue index
+ * @irq: The Linux interrupt number
+ * been called, but not yet issue_pending)
+ */
+struct mv_xor_v2_device {
+ spinlock_t lock;
+ void __iomem *dma_base;
+ void __iomem *glob_base;
+ struct clk *clk;
+ struct clk *reg_clk;
+ struct tasklet_struct irq_tasklet;
+ struct list_head free_sw_desc;
+ struct dma_device dmadev;
+ struct dma_chan dmachan;
+ dma_addr_t hw_desq;
+ struct mv_xor_v2_descriptor *hw_desq_virt;
+ struct mv_xor_v2_sw_desc *sw_desq;
+ int desc_size;
+ unsigned int npendings;
+ unsigned int hw_queue_idx;
+ unsigned int irq;
+};
+
+/**
+ * struct mv_xor_v2_sw_desc - implements a xor SW descriptor
+ * @idx: descriptor index
+ * @async_tx: support for the async_tx api
+ * @hw_desc: assosiated HW descriptor
+ * @free_list: node of the free SW descriprots list
+*/
+struct mv_xor_v2_sw_desc {
+ int idx;
+ struct dma_async_tx_descriptor async_tx;
+ struct mv_xor_v2_descriptor hw_desc;
+ struct list_head free_list;
+};
+
+/*
+ * Fill the data buffers to a HW descriptor
+ */
+static void mv_xor_v2_set_data_buffers(struct mv_xor_v2_device *xor_dev,
+ struct mv_xor_v2_descriptor *desc,
+ dma_addr_t src, int index)
+{
+ int arr_index = ((index >> 1) * 3);
+
+ /*
+ * Fill the buffer's addresses to the descriptor.
+ *
+ * The format of the buffers address for 2 sequential buffers
+ * X and X + 1:
+ *
+ * First word: Buffer-DX-Address-Low[31:0]
+ * Second word: Buffer-DX+1-Address-Low[31:0]
+ * Third word: DX+1-Buffer-Address-High[47:32] [31:16]
+ * DX-Buffer-Address-High[47:32] [15:0]
+ */
+ if ((index & 0x1) == 0) {
+ desc->data_buff_addr[arr_index] = lower_32_bits(src);
+
+ desc->data_buff_addr[arr_index + 2] &= ~0xFFFF;
+ desc->data_buff_addr[arr_index + 2] |=
+ upper_32_bits(src) & 0xFFFF;
+ } else {
+ desc->data_buff_addr[arr_index + 1] =
+ lower_32_bits(src);
+
+ desc->data_buff_addr[arr_index + 2] &= ~0xFFFF0000;
+ desc->data_buff_addr[arr_index + 2] |=
+ (upper_32_bits(src) & 0xFFFF) << 16;
+ }
+}
+
+/*
+ * notify the engine of new descriptors, and update the available index.
+ */
+static void mv_xor_v2_add_desc_to_desq(struct mv_xor_v2_device *xor_dev,
+ int num_of_desc)
+{
+ /* write the number of new descriptors in the DESQ. */
+ writel(num_of_desc, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_ADD_OFF);
+}
+
+/*
+ * free HW descriptors
+ */
+static void mv_xor_v2_free_desc_from_desq(struct mv_xor_v2_device *xor_dev,
+ int num_of_desc)
+{
+ /* write the number of new descriptors in the DESQ. */
+ writel(num_of_desc, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_DEALLOC_OFF);
+}
+
+/*
+ * Set descriptor size
+ * Return the HW descriptor size in bytes
+ */
+static int mv_xor_v2_set_desc_size(struct mv_xor_v2_device *xor_dev)
+{
+ writel(MV_XOR_V2_DMA_DESQ_CTRL_128B,
+ xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_CTRL_OFF);
+
+ return MV_XOR_V2_EXT_DESC_SIZE;
+}
+
+/*
+ * Set the IMSG threshold
+ */
+static inline
+void mv_xor_v2_enable_imsg_thrd(struct mv_xor_v2_device *xor_dev)
+{
+ u32 reg;
+
+ /* Configure threshold of number of descriptors, and enable timer */
+ reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF);
+ reg &= ~MV_XOR_V2_DMA_IMSG_THRD_MASK;
+ reg |= MV_XOR_V2_DONE_IMSG_THRD;
+ reg |= MV_XOR_V2_DMA_IMSG_TIMER_EN;
+ writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF);
+
+ /* Configure Timer Threshold */
+ reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_TMOT);
+ reg &= ~MV_XOR_V2_DMA_IMSG_TIMER_THRD_MASK;
+ reg |= MV_XOR_V2_TIMER_THRD;
+ writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_TMOT);
+}
+
+static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data)
+{
+ struct mv_xor_v2_device *xor_dev = data;
+ unsigned int ndescs;
+ u32 reg;
+
+ reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_DONE_OFF);
+
+ ndescs = ((reg >> MV_XOR_V2_DMA_DESQ_DONE_PENDING_SHIFT) &
+ MV_XOR_V2_DMA_DESQ_DONE_PENDING_MASK);
+
+ /* No descriptors to process */
+ if (!ndescs)
+ return IRQ_NONE;
+
+ /* schedule a tasklet to handle descriptors callbacks */
+ tasklet_schedule(&xor_dev->irq_tasklet);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * submit a descriptor to the DMA engine
+ */
+static dma_cookie_t
+mv_xor_v2_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ void *dest_hw_desc;
+ dma_cookie_t cookie;
+ struct mv_xor_v2_sw_desc *sw_desc =
+ container_of(tx, struct mv_xor_v2_sw_desc, async_tx);
+ struct mv_xor_v2_device *xor_dev =
+ container_of(tx->chan, struct mv_xor_v2_device, dmachan);
+
+ dev_dbg(xor_dev->dmadev.dev,
+ "%s sw_desc %p: async_tx %p\n",
+ __func__, sw_desc, &sw_desc->async_tx);
+
+ /* assign cookie */
+ spin_lock_bh(&xor_dev->lock);
+ cookie = dma_cookie_assign(tx);
+
+ /* copy the HW descriptor from the SW descriptor to the DESQ */
+ dest_hw_desc = xor_dev->hw_desq_virt + xor_dev->hw_queue_idx;
+
+ memcpy(dest_hw_desc, &sw_desc->hw_desc, xor_dev->desc_size);
+
+ xor_dev->npendings++;
+ xor_dev->hw_queue_idx++;
+ if (xor_dev->hw_queue_idx >= MV_XOR_V2_DESC_NUM)
+ xor_dev->hw_queue_idx = 0;
+
+ spin_unlock_bh(&xor_dev->lock);
+
+ return cookie;
+}
+
+/*
+ * Prepare a SW descriptor
+ */
+static struct mv_xor_v2_sw_desc *
+mv_xor_v2_prep_sw_desc(struct mv_xor_v2_device *xor_dev)
+{
+ struct mv_xor_v2_sw_desc *sw_desc;
+ bool found = false;
+
+ /* Lock the channel */
+ spin_lock_bh(&xor_dev->lock);
+
+ if (list_empty(&xor_dev->free_sw_desc)) {
+ spin_unlock_bh(&xor_dev->lock);
+ /* schedule tasklet to free some descriptors */
+ tasklet_schedule(&xor_dev->irq_tasklet);
+ return NULL;
+ }
+
+ list_for_each_entry(sw_desc, &xor_dev->free_sw_desc, free_list) {
+ if (async_tx_test_ack(&sw_desc->async_tx)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ spin_unlock_bh(&xor_dev->lock);
+ return NULL;
+ }
+
+ list_del(&sw_desc->free_list);
+
+ /* Release the channel */
+ spin_unlock_bh(&xor_dev->lock);
+
+ return sw_desc;
+}
+
+/*
+ * Prepare a HW descriptor for a memcpy operation
+ */
+static struct dma_async_tx_descriptor *
+mv_xor_v2_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
+ dma_addr_t src, size_t len, unsigned long flags)
+{
+ struct mv_xor_v2_sw_desc *sw_desc;
+ struct mv_xor_v2_descriptor *hw_descriptor;
+ struct mv_xor_v2_device *xor_dev;
+
+ xor_dev = container_of(chan, struct mv_xor_v2_device, dmachan);
+
+ dev_dbg(xor_dev->dmadev.dev,
+ "%s len: %zu src %pad dest %pad flags: %ld\n",
+ __func__, len, &src, &dest, flags);
+
+ sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+ if (!sw_desc)
+ return NULL;
+
+ sw_desc->async_tx.flags = flags;
+
+ /* set the HW descriptor */
+ hw_descriptor = &sw_desc->hw_desc;
+
+ /* save the SW descriptor ID to restore when operation is done */
+ hw_descriptor->desc_id = sw_desc->idx;
+
+ /* Set the MEMCPY control word */
+ hw_descriptor->desc_ctrl =
+ DESC_OP_MODE_MEMCPY << DESC_OP_MODE_SHIFT;
+
+ if (flags & DMA_PREP_INTERRUPT)
+ hw_descriptor->desc_ctrl |= DESC_IOD;
+
+ /* Set source address */
+ hw_descriptor->fill_pattern_src_addr[0] = lower_32_bits(src);
+ hw_descriptor->fill_pattern_src_addr[1] =
+ upper_32_bits(src) & 0xFFFF;
+
+ /* Set Destination address */
+ hw_descriptor->fill_pattern_src_addr[2] = lower_32_bits(dest);
+ hw_descriptor->fill_pattern_src_addr[3] =
+ upper_32_bits(dest) & 0xFFFF;
+
+ /* Set buffers size */
+ hw_descriptor->buff_size = len;
+
+ /* return the async tx descriptor */
+ return &sw_desc->async_tx;
+}
+
+/*
+ * Prepare a HW descriptor for a XOR operation
+ */
+static struct dma_async_tx_descriptor *
+mv_xor_v2_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ struct mv_xor_v2_sw_desc *sw_desc;
+ struct mv_xor_v2_descriptor *hw_descriptor;
+ struct mv_xor_v2_device *xor_dev =
+ container_of(chan, struct mv_xor_v2_device, dmachan);
+ int i;
+
+ if (src_cnt > MV_XOR_V2_CMD_LINE_NUM_MAX_D_BUF || src_cnt < 1)
+ return NULL;
+
+ dev_dbg(xor_dev->dmadev.dev,
+ "%s src_cnt: %d len: %zu dest %pad flags: %ld\n",
+ __func__, src_cnt, len, &dest, flags);
+
+ sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+ if (!sw_desc)
+ return NULL;
+
+ sw_desc->async_tx.flags = flags;
+
+ /* set the HW descriptor */
+ hw_descriptor = &sw_desc->hw_desc;
+
+ /* save the SW descriptor ID to restore when operation is done */
+ hw_descriptor->desc_id = sw_desc->idx;
+
+ /* Set the XOR control word */
+ hw_descriptor->desc_ctrl =
+ DESC_OP_MODE_XOR << DESC_OP_MODE_SHIFT;
+ hw_descriptor->desc_ctrl |= DESC_P_BUFFER_ENABLE;
+
+ if (flags & DMA_PREP_INTERRUPT)
+ hw_descriptor->desc_ctrl |= DESC_IOD;
+
+ /* Set the data buffers */
+ for (i = 0; i < src_cnt; i++)
+ mv_xor_v2_set_data_buffers(xor_dev, hw_descriptor, src[i], i);
+
+ hw_descriptor->desc_ctrl |=
+ src_cnt << DESC_NUM_ACTIVE_D_BUF_SHIFT;
+
+ /* Set Destination address */
+ hw_descriptor->fill_pattern_src_addr[2] = lower_32_bits(dest);
+ hw_descriptor->fill_pattern_src_addr[3] =
+ upper_32_bits(dest) & 0xFFFF;
+
+ /* Set buffers size */
+ hw_descriptor->buff_size = len;
+
+ /* return the async tx descriptor */
+ return &sw_desc->async_tx;
+}
+
+/*
+ * Prepare a HW descriptor for interrupt operation.
+ */
+static struct dma_async_tx_descriptor *
+mv_xor_v2_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
+{
+ struct mv_xor_v2_sw_desc *sw_desc;
+ struct mv_xor_v2_descriptor *hw_descriptor;
+ struct mv_xor_v2_device *xor_dev =
+ container_of(chan, struct mv_xor_v2_device, dmachan);
+
+ sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+ if (!sw_desc)
+ return NULL;
+
+ /* set the HW descriptor */
+ hw_descriptor = &sw_desc->hw_desc;
+
+ /* save the SW descriptor ID to restore when operation is done */
+ hw_descriptor->desc_id = sw_desc->idx;
+
+ /* Set the INTERRUPT control word */
+ hw_descriptor->desc_ctrl =
+ DESC_OP_MODE_NOP << DESC_OP_MODE_SHIFT;
+ hw_descriptor->desc_ctrl |= DESC_IOD;
+
+ /* return the async tx descriptor */
+ return &sw_desc->async_tx;
+}
+
+/*
+ * push pending transactions to hardware
+ */
+static void mv_xor_v2_issue_pending(struct dma_chan *chan)
+{
+ struct mv_xor_v2_device *xor_dev =
+ container_of(chan, struct mv_xor_v2_device, dmachan);
+
+ spin_lock_bh(&xor_dev->lock);
+
+ /*
+ * update the engine with the number of descriptors to
+ * process
+ */
+ mv_xor_v2_add_desc_to_desq(xor_dev, xor_dev->npendings);
+ xor_dev->npendings = 0;
+
+ spin_unlock_bh(&xor_dev->lock);
+}
+
+static inline
+int mv_xor_v2_get_pending_params(struct mv_xor_v2_device *xor_dev,
+ int *pending_ptr)
+{
+ u32 reg;
+
+ reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_DONE_OFF);
+
+ /* get the next pending descriptor index */
+ *pending_ptr = ((reg >> MV_XOR_V2_DMA_DESQ_DONE_READ_PTR_SHIFT) &
+ MV_XOR_V2_DMA_DESQ_DONE_READ_PTR_MASK);
+
+ /* get the number of descriptors pending handle */
+ return ((reg >> MV_XOR_V2_DMA_DESQ_DONE_PENDING_SHIFT) &
+ MV_XOR_V2_DMA_DESQ_DONE_PENDING_MASK);
+}
+
+/*
+ * handle the descriptors after HW process
+ */
+static void mv_xor_v2_tasklet(struct tasklet_struct *t)
+{
+ struct mv_xor_v2_device *xor_dev = from_tasklet(xor_dev, t,
+ irq_tasklet);
+ int pending_ptr, num_of_pending, i;
+ struct mv_xor_v2_sw_desc *next_pending_sw_desc = NULL;
+
+ dev_dbg(xor_dev->dmadev.dev, "%s %d\n", __func__, __LINE__);
+
+ /* get the pending descriptors parameters */
+ num_of_pending = mv_xor_v2_get_pending_params(xor_dev, &pending_ptr);
+
+ /* loop over free descriptors */
+ for (i = 0; i < num_of_pending; i++) {
+ struct mv_xor_v2_descriptor *next_pending_hw_desc =
+ xor_dev->hw_desq_virt + pending_ptr;
+
+ /* get the SW descriptor related to the HW descriptor */
+ next_pending_sw_desc =
+ &xor_dev->sw_desq[next_pending_hw_desc->desc_id];
+
+ /* call the callback */
+ if (next_pending_sw_desc->async_tx.cookie > 0) {
+ /*
+ * update the channel's completed cookie - no
+ * lock is required the IMSG threshold provide
+ * the locking
+ */
+ dma_cookie_complete(&next_pending_sw_desc->async_tx);
+
+ dma_descriptor_unmap(&next_pending_sw_desc->async_tx);
+ dmaengine_desc_get_callback_invoke(
+ &next_pending_sw_desc->async_tx, NULL);
+ }
+
+ dma_run_dependencies(&next_pending_sw_desc->async_tx);
+
+ /* Lock the channel */
+ spin_lock(&xor_dev->lock);
+
+ /* add the SW descriptor to the free descriptors list */
+ list_add(&next_pending_sw_desc->free_list,
+ &xor_dev->free_sw_desc);
+
+ /* Release the channel */
+ spin_unlock(&xor_dev->lock);
+
+ /* increment the next descriptor */
+ pending_ptr++;
+ if (pending_ptr >= MV_XOR_V2_DESC_NUM)
+ pending_ptr = 0;
+ }
+
+ if (num_of_pending != 0) {
+ /* free the descriptores */
+ mv_xor_v2_free_desc_from_desq(xor_dev, num_of_pending);
+ }
+}
+
+/*
+ * Set DMA Interrupt-message (IMSG) parameters
+ */
+static void mv_xor_v2_set_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+ struct mv_xor_v2_device *xor_dev = dev_get_drvdata(desc->dev);
+
+ writel(msg->address_lo,
+ xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_BALR_OFF);
+ writel(msg->address_hi & 0xFFFF,
+ xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_BAHR_OFF);
+ writel(msg->data,
+ xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_CDAT_OFF);
+}
+
+static int mv_xor_v2_descq_init(struct mv_xor_v2_device *xor_dev)
+{
+ u32 reg;
+
+ /* write the DESQ size to the DMA engine */
+ writel(MV_XOR_V2_DESC_NUM,
+ xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_SIZE_OFF);
+
+ /* write the DESQ address to the DMA enngine*/
+ writel(lower_32_bits(xor_dev->hw_desq),
+ xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_BALR_OFF);
+ writel(upper_32_bits(xor_dev->hw_desq),
+ xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_BAHR_OFF);
+
+ /*
+ * This is a temporary solution, until we activate the
+ * SMMU. Set the attributes for reading & writing data buffers
+ * & descriptors to:
+ *
+ * - OuterShareable - Snoops will be performed on CPU caches
+ * - Enable cacheable - Bufferable, Modifiable, Other Allocate
+ * and Allocate
+ */
+ reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_ARATTR_OFF);
+ reg &= ~MV_XOR_V2_DMA_DESQ_ATTR_CACHE_MASK;
+ reg |= MV_XOR_V2_DMA_DESQ_ATTR_OUTER_SHAREABLE |
+ MV_XOR_V2_DMA_DESQ_ATTR_CACHEABLE;
+ writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_ARATTR_OFF);
+
+ reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_AWATTR_OFF);
+ reg &= ~MV_XOR_V2_DMA_DESQ_ATTR_CACHE_MASK;
+ reg |= MV_XOR_V2_DMA_DESQ_ATTR_OUTER_SHAREABLE |
+ MV_XOR_V2_DMA_DESQ_ATTR_CACHEABLE;
+ writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_AWATTR_OFF);
+
+ /* BW CTRL - set values to optimize the XOR performance:
+ *
+ * - Set WrBurstLen & RdBurstLen - the unit will issue
+ * maximum of 256B write/read transactions.
+ * - Limit the number of outstanding write & read data
+ * (OBB/IBB) requests to the maximal value.
+ */
+ reg = ((MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_RD_VAL <<
+ MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_RD_SHIFT) |
+ (MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_WR_VAL <<
+ MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_WR_SHIFT) |
+ (MV_XOR_V2_GLOB_BW_CTRL_RD_BURST_LEN_VAL <<
+ MV_XOR_V2_GLOB_BW_CTRL_RD_BURST_LEN_SHIFT) |
+ (MV_XOR_V2_GLOB_BW_CTRL_WR_BURST_LEN_VAL <<
+ MV_XOR_V2_GLOB_BW_CTRL_WR_BURST_LEN_SHIFT));
+ writel(reg, xor_dev->glob_base + MV_XOR_V2_GLOB_BW_CTRL);
+
+ /* Disable the AXI timer feature */
+ reg = readl(xor_dev->glob_base + MV_XOR_V2_GLOB_PAUSE);
+ reg |= MV_XOR_V2_GLOB_PAUSE_AXI_TIME_DIS_VAL;
+ writel(reg, xor_dev->glob_base + MV_XOR_V2_GLOB_PAUSE);
+
+ /* enable the DMA engine */
+ writel(0, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF);
+
+ return 0;
+}
+
+static int mv_xor_v2_suspend(struct platform_device *dev, pm_message_t state)
+{
+ struct mv_xor_v2_device *xor_dev = platform_get_drvdata(dev);
+
+ /* Set this bit to disable to stop the XOR unit. */
+ writel(0x1, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF);
+
+ return 0;
+}
+
+static int mv_xor_v2_resume(struct platform_device *dev)
+{
+ struct mv_xor_v2_device *xor_dev = platform_get_drvdata(dev);
+
+ mv_xor_v2_set_desc_size(xor_dev);
+ mv_xor_v2_enable_imsg_thrd(xor_dev);
+ mv_xor_v2_descq_init(xor_dev);
+
+ return 0;
+}
+
+static int mv_xor_v2_probe(struct platform_device *pdev)
+{
+ struct mv_xor_v2_device *xor_dev;
+ struct resource *res;
+ int i, ret = 0;
+ struct dma_device *dma_dev;
+ struct mv_xor_v2_sw_desc *sw_desc;
+
+ BUILD_BUG_ON(sizeof(struct mv_xor_v2_descriptor) !=
+ MV_XOR_V2_EXT_DESC_SIZE);
+
+ xor_dev = devm_kzalloc(&pdev->dev, sizeof(*xor_dev), GFP_KERNEL);
+ if (!xor_dev)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ xor_dev->dma_base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(xor_dev->dma_base))
+ return PTR_ERR(xor_dev->dma_base);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ xor_dev->glob_base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(xor_dev->glob_base))
+ return PTR_ERR(xor_dev->glob_base);
+
+ platform_set_drvdata(pdev, xor_dev);
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
+ if (ret)
+ return ret;
+
+ xor_dev->reg_clk = devm_clk_get(&pdev->dev, "reg");
+ if (PTR_ERR(xor_dev->reg_clk) != -ENOENT) {
+ if (!IS_ERR(xor_dev->reg_clk)) {
+ ret = clk_prepare_enable(xor_dev->reg_clk);
+ if (ret)
+ return ret;
+ } else {
+ return PTR_ERR(xor_dev->reg_clk);
+ }
+ }
+
+ xor_dev->clk = devm_clk_get(&pdev->dev, NULL);
+ if (PTR_ERR(xor_dev->clk) == -EPROBE_DEFER) {
+ ret = -EPROBE_DEFER;
+ goto disable_reg_clk;
+ }
+ if (!IS_ERR(xor_dev->clk)) {
+ ret = clk_prepare_enable(xor_dev->clk);
+ if (ret)
+ goto disable_reg_clk;
+ }
+
+ ret = platform_msi_domain_alloc_irqs(&pdev->dev, 1,
+ mv_xor_v2_set_msi_msg);
+ if (ret)
+ goto disable_clk;
+
+ xor_dev->irq = msi_get_virq(&pdev->dev, 0);
+
+ ret = devm_request_irq(&pdev->dev, xor_dev->irq,
+ mv_xor_v2_interrupt_handler, 0,
+ dev_name(&pdev->dev), xor_dev);
+ if (ret)
+ goto free_msi_irqs;
+
+ tasklet_setup(&xor_dev->irq_tasklet, mv_xor_v2_tasklet);
+
+ xor_dev->desc_size = mv_xor_v2_set_desc_size(xor_dev);
+
+ dma_cookie_init(&xor_dev->dmachan);
+
+ /*
+ * allocate coherent memory for hardware descriptors
+ * note: writecombine gives slightly better performance, but
+ * requires that we explicitly flush the writes
+ */
+ xor_dev->hw_desq_virt =
+ dma_alloc_coherent(&pdev->dev,
+ xor_dev->desc_size * MV_XOR_V2_DESC_NUM,
+ &xor_dev->hw_desq, GFP_KERNEL);
+ if (!xor_dev->hw_desq_virt) {
+ ret = -ENOMEM;
+ goto free_msi_irqs;
+ }
+
+ /* alloc memory for the SW descriptors */
+ xor_dev->sw_desq = devm_kcalloc(&pdev->dev,
+ MV_XOR_V2_DESC_NUM, sizeof(*sw_desc),
+ GFP_KERNEL);
+ if (!xor_dev->sw_desq) {
+ ret = -ENOMEM;
+ goto free_hw_desq;
+ }
+
+ spin_lock_init(&xor_dev->lock);
+
+ /* init the free SW descriptors list */
+ INIT_LIST_HEAD(&xor_dev->free_sw_desc);
+
+ /* add all SW descriptors to the free list */
+ for (i = 0; i < MV_XOR_V2_DESC_NUM; i++) {
+ struct mv_xor_v2_sw_desc *sw_desc =
+ xor_dev->sw_desq + i;
+ sw_desc->idx = i;
+ dma_async_tx_descriptor_init(&sw_desc->async_tx,
+ &xor_dev->dmachan);
+ sw_desc->async_tx.tx_submit = mv_xor_v2_tx_submit;
+ async_tx_ack(&sw_desc->async_tx);
+
+ list_add(&sw_desc->free_list,
+ &xor_dev->free_sw_desc);
+ }
+
+ dma_dev = &xor_dev->dmadev;
+
+ /* set DMA capabilities */
+ dma_cap_zero(dma_dev->cap_mask);
+ dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+ dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+ dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);
+
+ /* init dma link list */
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ /* set base routines */
+ dma_dev->device_tx_status = dma_cookie_status;
+ dma_dev->device_issue_pending = mv_xor_v2_issue_pending;
+ dma_dev->dev = &pdev->dev;
+
+ dma_dev->device_prep_dma_memcpy = mv_xor_v2_prep_dma_memcpy;
+ dma_dev->device_prep_dma_interrupt = mv_xor_v2_prep_dma_interrupt;
+ dma_dev->max_xor = 8;
+ dma_dev->device_prep_dma_xor = mv_xor_v2_prep_dma_xor;
+
+ xor_dev->dmachan.device = dma_dev;
+
+ list_add_tail(&xor_dev->dmachan.device_node,
+ &dma_dev->channels);
+
+ mv_xor_v2_enable_imsg_thrd(xor_dev);
+
+ mv_xor_v2_descq_init(xor_dev);
+
+ ret = dma_async_device_register(dma_dev);
+ if (ret)
+ goto free_hw_desq;
+
+ dev_notice(&pdev->dev, "Marvell Version 2 XOR driver\n");
+
+ return 0;
+
+free_hw_desq:
+ dma_free_coherent(&pdev->dev,
+ xor_dev->desc_size * MV_XOR_V2_DESC_NUM,
+ xor_dev->hw_desq_virt, xor_dev->hw_desq);
+free_msi_irqs:
+ platform_msi_domain_free_irqs(&pdev->dev);
+disable_clk:
+ clk_disable_unprepare(xor_dev->clk);
+disable_reg_clk:
+ clk_disable_unprepare(xor_dev->reg_clk);
+ return ret;
+}
+
+static int mv_xor_v2_remove(struct platform_device *pdev)
+{
+ struct mv_xor_v2_device *xor_dev = platform_get_drvdata(pdev);
+
+ dma_async_device_unregister(&xor_dev->dmadev);
+
+ dma_free_coherent(&pdev->dev,
+ xor_dev->desc_size * MV_XOR_V2_DESC_NUM,
+ xor_dev->hw_desq_virt, xor_dev->hw_desq);
+
+ devm_free_irq(&pdev->dev, xor_dev->irq, xor_dev);
+
+ platform_msi_domain_free_irqs(&pdev->dev);
+
+ tasklet_kill(&xor_dev->irq_tasklet);
+
+ clk_disable_unprepare(xor_dev->clk);
+ clk_disable_unprepare(xor_dev->reg_clk);
+
+ return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id mv_xor_v2_dt_ids[] = {
+ { .compatible = "marvell,xor-v2", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, mv_xor_v2_dt_ids);
+#endif
+
+static struct platform_driver mv_xor_v2_driver = {
+ .probe = mv_xor_v2_probe,
+ .suspend = mv_xor_v2_suspend,
+ .resume = mv_xor_v2_resume,
+ .remove = mv_xor_v2_remove,
+ .driver = {
+ .name = "mv_xor_v2",
+ .of_match_table = of_match_ptr(mv_xor_v2_dt_ids),
+ },
+};
+
+module_platform_driver(mv_xor_v2_driver);
+
+MODULE_DESCRIPTION("DMA engine driver for Marvell's Version 2 of XOR engine");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
new file mode 100644
index 000000000..dc147cc24
--- /dev/null
+++ b/drivers/dma/mxs-dma.c
@@ -0,0 +1,845 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright 2011 Freescale Semiconductor, Inc. All Rights Reserved.
+//
+// Refer to drivers/dma/imx-sdma.c
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/stmp_device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/list.h>
+#include <linux/dma/mxs-dma.h>
+
+#include <asm/irq.h>
+
+#include "dmaengine.h"
+
+/*
+ * NOTE: The term "PIO" throughout the mxs-dma implementation means
+ * PIO mode of mxs apbh-dma and apbx-dma. With this working mode,
+ * dma can program the controller registers of peripheral devices.
+ */
+
+#define dma_is_apbh(mxs_dma) ((mxs_dma)->type == MXS_DMA_APBH)
+#define apbh_is_old(mxs_dma) ((mxs_dma)->dev_id == IMX23_DMA)
+
+#define HW_APBHX_CTRL0 0x000
+#define BM_APBH_CTRL0_APB_BURST8_EN (1 << 29)
+#define BM_APBH_CTRL0_APB_BURST_EN (1 << 28)
+#define BP_APBH_CTRL0_RESET_CHANNEL 16
+#define HW_APBHX_CTRL1 0x010
+#define HW_APBHX_CTRL2 0x020
+#define HW_APBHX_CHANNEL_CTRL 0x030
+#define BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL 16
+/*
+ * The offset of NXTCMDAR register is different per both dma type and version,
+ * while stride for each channel is all the same 0x70.
+ */
+#define HW_APBHX_CHn_NXTCMDAR(d, n) \
+ (((dma_is_apbh(d) && apbh_is_old(d)) ? 0x050 : 0x110) + (n) * 0x70)
+#define HW_APBHX_CHn_SEMA(d, n) \
+ (((dma_is_apbh(d) && apbh_is_old(d)) ? 0x080 : 0x140) + (n) * 0x70)
+#define HW_APBHX_CHn_BAR(d, n) \
+ (((dma_is_apbh(d) && apbh_is_old(d)) ? 0x070 : 0x130) + (n) * 0x70)
+#define HW_APBX_CHn_DEBUG1(d, n) (0x150 + (n) * 0x70)
+
+/*
+ * ccw bits definitions
+ *
+ * COMMAND: 0..1 (2)
+ * CHAIN: 2 (1)
+ * IRQ: 3 (1)
+ * NAND_LOCK: 4 (1) - not implemented
+ * NAND_WAIT4READY: 5 (1) - not implemented
+ * DEC_SEM: 6 (1)
+ * WAIT4END: 7 (1)
+ * HALT_ON_TERMINATE: 8 (1)
+ * TERMINATE_FLUSH: 9 (1)
+ * RESERVED: 10..11 (2)
+ * PIO_NUM: 12..15 (4)
+ */
+#define BP_CCW_COMMAND 0
+#define BM_CCW_COMMAND (3 << 0)
+#define CCW_CHAIN (1 << 2)
+#define CCW_IRQ (1 << 3)
+#define CCW_WAIT4RDY (1 << 5)
+#define CCW_DEC_SEM (1 << 6)
+#define CCW_WAIT4END (1 << 7)
+#define CCW_HALT_ON_TERM (1 << 8)
+#define CCW_TERM_FLUSH (1 << 9)
+#define BP_CCW_PIO_NUM 12
+#define BM_CCW_PIO_NUM (0xf << 12)
+
+#define BF_CCW(value, field) (((value) << BP_CCW_##field) & BM_CCW_##field)
+
+#define MXS_DMA_CMD_NO_XFER 0
+#define MXS_DMA_CMD_WRITE 1
+#define MXS_DMA_CMD_READ 2
+#define MXS_DMA_CMD_DMA_SENSE 3 /* not implemented */
+
+struct mxs_dma_ccw {
+ u32 next;
+ u16 bits;
+ u16 xfer_bytes;
+#define MAX_XFER_BYTES 0xff00
+ u32 bufaddr;
+#define MXS_PIO_WORDS 16
+ u32 pio_words[MXS_PIO_WORDS];
+};
+
+#define CCW_BLOCK_SIZE (4 * PAGE_SIZE)
+#define NUM_CCW (int)(CCW_BLOCK_SIZE / sizeof(struct mxs_dma_ccw))
+
+struct mxs_dma_chan {
+ struct mxs_dma_engine *mxs_dma;
+ struct dma_chan chan;
+ struct dma_async_tx_descriptor desc;
+ struct tasklet_struct tasklet;
+ unsigned int chan_irq;
+ struct mxs_dma_ccw *ccw;
+ dma_addr_t ccw_phys;
+ int desc_count;
+ enum dma_status status;
+ unsigned int flags;
+ bool reset;
+#define MXS_DMA_SG_LOOP (1 << 0)
+#define MXS_DMA_USE_SEMAPHORE (1 << 1)
+};
+
+#define MXS_DMA_CHANNELS 16
+#define MXS_DMA_CHANNELS_MASK 0xffff
+
+enum mxs_dma_devtype {
+ MXS_DMA_APBH,
+ MXS_DMA_APBX,
+};
+
+enum mxs_dma_id {
+ IMX23_DMA,
+ IMX28_DMA,
+};
+
+struct mxs_dma_engine {
+ enum mxs_dma_id dev_id;
+ enum mxs_dma_devtype type;
+ void __iomem *base;
+ struct clk *clk;
+ struct dma_device dma_device;
+ struct mxs_dma_chan mxs_chans[MXS_DMA_CHANNELS];
+ struct platform_device *pdev;
+ unsigned int nr_channels;
+};
+
+struct mxs_dma_type {
+ enum mxs_dma_id id;
+ enum mxs_dma_devtype type;
+};
+
+static struct mxs_dma_type mxs_dma_types[] = {
+ {
+ .id = IMX23_DMA,
+ .type = MXS_DMA_APBH,
+ }, {
+ .id = IMX23_DMA,
+ .type = MXS_DMA_APBX,
+ }, {
+ .id = IMX28_DMA,
+ .type = MXS_DMA_APBH,
+ }, {
+ .id = IMX28_DMA,
+ .type = MXS_DMA_APBX,
+ }
+};
+
+static const struct of_device_id mxs_dma_dt_ids[] = {
+ { .compatible = "fsl,imx23-dma-apbh", .data = &mxs_dma_types[0], },
+ { .compatible = "fsl,imx23-dma-apbx", .data = &mxs_dma_types[1], },
+ { .compatible = "fsl,imx28-dma-apbh", .data = &mxs_dma_types[2], },
+ { .compatible = "fsl,imx28-dma-apbx", .data = &mxs_dma_types[3], },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mxs_dma_dt_ids);
+
+static struct mxs_dma_chan *to_mxs_dma_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct mxs_dma_chan, chan);
+}
+
+static void mxs_dma_reset_chan(struct dma_chan *chan)
+{
+ struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+ struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+ int chan_id = mxs_chan->chan.chan_id;
+
+ /*
+ * mxs dma channel resets can cause a channel stall. To recover from a
+ * channel stall, we have to reset the whole DMA engine. To avoid this,
+ * we use cyclic DMA with semaphores, that are enhanced in
+ * mxs_dma_int_handler. To reset the channel, we can simply stop writing
+ * into the semaphore counter.
+ */
+ if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE &&
+ mxs_chan->flags & MXS_DMA_SG_LOOP) {
+ mxs_chan->reset = true;
+ } else if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma)) {
+ writel(1 << (chan_id + BP_APBH_CTRL0_RESET_CHANNEL),
+ mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_SET);
+ } else {
+ unsigned long elapsed = 0;
+ const unsigned long max_wait = 50000; /* 50ms */
+ void __iomem *reg_dbg1 = mxs_dma->base +
+ HW_APBX_CHn_DEBUG1(mxs_dma, chan_id);
+
+ /*
+ * On i.MX28 APBX, the DMA channel can stop working if we reset
+ * the channel while it is in READ_FLUSH (0x08) state.
+ * We wait here until we leave the state. Then we trigger the
+ * reset. Waiting a maximum of 50ms, the kernel shouldn't crash
+ * because of this.
+ */
+ while ((readl(reg_dbg1) & 0xf) == 0x8 && elapsed < max_wait) {
+ udelay(100);
+ elapsed += 100;
+ }
+
+ if (elapsed >= max_wait)
+ dev_err(&mxs_chan->mxs_dma->pdev->dev,
+ "Failed waiting for the DMA channel %d to leave state READ_FLUSH, trying to reset channel in READ_FLUSH state now\n",
+ chan_id);
+
+ writel(1 << (chan_id + BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL),
+ mxs_dma->base + HW_APBHX_CHANNEL_CTRL + STMP_OFFSET_REG_SET);
+ }
+
+ mxs_chan->status = DMA_COMPLETE;
+}
+
+static void mxs_dma_enable_chan(struct dma_chan *chan)
+{
+ struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+ struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+ int chan_id = mxs_chan->chan.chan_id;
+
+ /* set cmd_addr up */
+ writel(mxs_chan->ccw_phys,
+ mxs_dma->base + HW_APBHX_CHn_NXTCMDAR(mxs_dma, chan_id));
+
+ /* write 1 to SEMA to kick off the channel */
+ if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE &&
+ mxs_chan->flags & MXS_DMA_SG_LOOP) {
+ /* A cyclic DMA consists of at least 2 segments, so initialize
+ * the semaphore with 2 so we have enough time to add 1 to the
+ * semaphore if we need to */
+ writel(2, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id));
+ } else {
+ writel(1, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id));
+ }
+ mxs_chan->reset = false;
+}
+
+static void mxs_dma_disable_chan(struct dma_chan *chan)
+{
+ struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+
+ mxs_chan->status = DMA_COMPLETE;
+}
+
+static int mxs_dma_pause_chan(struct dma_chan *chan)
+{
+ struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+ struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+ int chan_id = mxs_chan->chan.chan_id;
+
+ /* freeze the channel */
+ if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma))
+ writel(1 << chan_id,
+ mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_SET);
+ else
+ writel(1 << chan_id,
+ mxs_dma->base + HW_APBHX_CHANNEL_CTRL + STMP_OFFSET_REG_SET);
+
+ mxs_chan->status = DMA_PAUSED;
+ return 0;
+}
+
+static int mxs_dma_resume_chan(struct dma_chan *chan)
+{
+ struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+ struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+ int chan_id = mxs_chan->chan.chan_id;
+
+ /* unfreeze the channel */
+ if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma))
+ writel(1 << chan_id,
+ mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_CLR);
+ else
+ writel(1 << chan_id,
+ mxs_dma->base + HW_APBHX_CHANNEL_CTRL + STMP_OFFSET_REG_CLR);
+
+ mxs_chan->status = DMA_IN_PROGRESS;
+ return 0;
+}
+
+static dma_cookie_t mxs_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ return dma_cookie_assign(tx);
+}
+
+static void mxs_dma_tasklet(struct tasklet_struct *t)
+{
+ struct mxs_dma_chan *mxs_chan = from_tasklet(mxs_chan, t, tasklet);
+
+ dmaengine_desc_get_callback_invoke(&mxs_chan->desc, NULL);
+}
+
+static int mxs_dma_irq_to_chan(struct mxs_dma_engine *mxs_dma, int irq)
+{
+ int i;
+
+ for (i = 0; i != mxs_dma->nr_channels; ++i)
+ if (mxs_dma->mxs_chans[i].chan_irq == irq)
+ return i;
+
+ return -EINVAL;
+}
+
+static irqreturn_t mxs_dma_int_handler(int irq, void *dev_id)
+{
+ struct mxs_dma_engine *mxs_dma = dev_id;
+ struct mxs_dma_chan *mxs_chan;
+ u32 completed;
+ u32 err;
+ int chan = mxs_dma_irq_to_chan(mxs_dma, irq);
+
+ if (chan < 0)
+ return IRQ_NONE;
+
+ /* completion status */
+ completed = readl(mxs_dma->base + HW_APBHX_CTRL1);
+ completed = (completed >> chan) & 0x1;
+
+ /* Clear interrupt */
+ writel((1 << chan),
+ mxs_dma->base + HW_APBHX_CTRL1 + STMP_OFFSET_REG_CLR);
+
+ /* error status */
+ err = readl(mxs_dma->base + HW_APBHX_CTRL2);
+ err &= (1 << (MXS_DMA_CHANNELS + chan)) | (1 << chan);
+
+ /*
+ * error status bit is in the upper 16 bits, error irq bit in the lower
+ * 16 bits. We transform it into a simpler error code:
+ * err: 0x00 = no error, 0x01 = TERMINATION, 0x02 = BUS_ERROR
+ */
+ err = (err >> (MXS_DMA_CHANNELS + chan)) + (err >> chan);
+
+ /* Clear error irq */
+ writel((1 << chan),
+ mxs_dma->base + HW_APBHX_CTRL2 + STMP_OFFSET_REG_CLR);
+
+ /*
+ * When both completion and error of termination bits set at the
+ * same time, we do not take it as an error. IOW, it only becomes
+ * an error we need to handle here in case of either it's a bus
+ * error or a termination error with no completion. 0x01 is termination
+ * error, so we can subtract err & completed to get the real error case.
+ */
+ err -= err & completed;
+
+ mxs_chan = &mxs_dma->mxs_chans[chan];
+
+ if (err) {
+ dev_dbg(mxs_dma->dma_device.dev,
+ "%s: error in channel %d\n", __func__,
+ chan);
+ mxs_chan->status = DMA_ERROR;
+ mxs_dma_reset_chan(&mxs_chan->chan);
+ } else if (mxs_chan->status != DMA_COMPLETE) {
+ if (mxs_chan->flags & MXS_DMA_SG_LOOP) {
+ mxs_chan->status = DMA_IN_PROGRESS;
+ if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE)
+ writel(1, mxs_dma->base +
+ HW_APBHX_CHn_SEMA(mxs_dma, chan));
+ } else {
+ mxs_chan->status = DMA_COMPLETE;
+ }
+ }
+
+ if (mxs_chan->status == DMA_COMPLETE) {
+ if (mxs_chan->reset)
+ return IRQ_HANDLED;
+ dma_cookie_complete(&mxs_chan->desc);
+ }
+
+ /* schedule tasklet on this channel */
+ tasklet_schedule(&mxs_chan->tasklet);
+
+ return IRQ_HANDLED;
+}
+
+static int mxs_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+ struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+ int ret;
+
+ mxs_chan->ccw = dma_alloc_coherent(mxs_dma->dma_device.dev,
+ CCW_BLOCK_SIZE,
+ &mxs_chan->ccw_phys, GFP_KERNEL);
+ if (!mxs_chan->ccw) {
+ ret = -ENOMEM;
+ goto err_alloc;
+ }
+
+ ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler,
+ 0, "mxs-dma", mxs_dma);
+ if (ret)
+ goto err_irq;
+
+ ret = clk_prepare_enable(mxs_dma->clk);
+ if (ret)
+ goto err_clk;
+
+ mxs_dma_reset_chan(chan);
+
+ dma_async_tx_descriptor_init(&mxs_chan->desc, chan);
+ mxs_chan->desc.tx_submit = mxs_dma_tx_submit;
+
+ /* the descriptor is ready */
+ async_tx_ack(&mxs_chan->desc);
+
+ return 0;
+
+err_clk:
+ free_irq(mxs_chan->chan_irq, mxs_dma);
+err_irq:
+ dma_free_coherent(mxs_dma->dma_device.dev, CCW_BLOCK_SIZE,
+ mxs_chan->ccw, mxs_chan->ccw_phys);
+err_alloc:
+ return ret;
+}
+
+static void mxs_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+ struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+
+ mxs_dma_disable_chan(chan);
+
+ free_irq(mxs_chan->chan_irq, mxs_dma);
+
+ dma_free_coherent(mxs_dma->dma_device.dev, CCW_BLOCK_SIZE,
+ mxs_chan->ccw, mxs_chan->ccw_phys);
+
+ clk_disable_unprepare(mxs_dma->clk);
+}
+
+/*
+ * How to use the flags for ->device_prep_slave_sg() :
+ * [1] If there is only one DMA command in the DMA chain, the code should be:
+ * ......
+ * ->device_prep_slave_sg(DMA_CTRL_ACK);
+ * ......
+ * [2] If there are two DMA commands in the DMA chain, the code should be
+ * ......
+ * ->device_prep_slave_sg(0);
+ * ......
+ * ->device_prep_slave_sg(DMA_CTRL_ACK);
+ * ......
+ * [3] If there are more than two DMA commands in the DMA chain, the code
+ * should be:
+ * ......
+ * ->device_prep_slave_sg(0); // First
+ * ......
+ * ->device_prep_slave_sg(DMA_CTRL_ACK]);
+ * ......
+ * ->device_prep_slave_sg(DMA_CTRL_ACK); // Last
+ * ......
+ */
+static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+ struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+ struct mxs_dma_ccw *ccw;
+ struct scatterlist *sg;
+ u32 i, j;
+ u32 *pio;
+ int idx = 0;
+
+ if (mxs_chan->status == DMA_IN_PROGRESS)
+ idx = mxs_chan->desc_count;
+
+ if (sg_len + idx > NUM_CCW) {
+ dev_err(mxs_dma->dma_device.dev,
+ "maximum number of sg exceeded: %d > %d\n",
+ sg_len, NUM_CCW);
+ goto err_out;
+ }
+
+ mxs_chan->status = DMA_IN_PROGRESS;
+ mxs_chan->flags = 0;
+
+ /*
+ * If the sg is prepared with append flag set, the sg
+ * will be appended to the last prepared sg.
+ */
+ if (idx) {
+ BUG_ON(idx < 1);
+ ccw = &mxs_chan->ccw[idx - 1];
+ ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * idx;
+ ccw->bits |= CCW_CHAIN;
+ ccw->bits &= ~CCW_IRQ;
+ ccw->bits &= ~CCW_DEC_SEM;
+ } else {
+ idx = 0;
+ }
+
+ if (direction == DMA_TRANS_NONE) {
+ ccw = &mxs_chan->ccw[idx++];
+ pio = (u32 *) sgl;
+
+ for (j = 0; j < sg_len;)
+ ccw->pio_words[j++] = *pio++;
+
+ ccw->bits = 0;
+ ccw->bits |= CCW_IRQ;
+ ccw->bits |= CCW_DEC_SEM;
+ if (flags & MXS_DMA_CTRL_WAIT4END)
+ ccw->bits |= CCW_WAIT4END;
+ ccw->bits |= CCW_HALT_ON_TERM;
+ ccw->bits |= CCW_TERM_FLUSH;
+ ccw->bits |= BF_CCW(sg_len, PIO_NUM);
+ ccw->bits |= BF_CCW(MXS_DMA_CMD_NO_XFER, COMMAND);
+ if (flags & MXS_DMA_CTRL_WAIT4RDY)
+ ccw->bits |= CCW_WAIT4RDY;
+ } else {
+ for_each_sg(sgl, sg, sg_len, i) {
+ if (sg_dma_len(sg) > MAX_XFER_BYTES) {
+ dev_err(mxs_dma->dma_device.dev, "maximum bytes for sg entry exceeded: %d > %d\n",
+ sg_dma_len(sg), MAX_XFER_BYTES);
+ goto err_out;
+ }
+
+ ccw = &mxs_chan->ccw[idx++];
+
+ ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * idx;
+ ccw->bufaddr = sg->dma_address;
+ ccw->xfer_bytes = sg_dma_len(sg);
+
+ ccw->bits = 0;
+ ccw->bits |= CCW_CHAIN;
+ ccw->bits |= CCW_HALT_ON_TERM;
+ ccw->bits |= CCW_TERM_FLUSH;
+ ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
+ MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ,
+ COMMAND);
+
+ if (i + 1 == sg_len) {
+ ccw->bits &= ~CCW_CHAIN;
+ ccw->bits |= CCW_IRQ;
+ ccw->bits |= CCW_DEC_SEM;
+ if (flags & MXS_DMA_CTRL_WAIT4END)
+ ccw->bits |= CCW_WAIT4END;
+ }
+ }
+ }
+ mxs_chan->desc_count = idx;
+
+ return &mxs_chan->desc;
+
+err_out:
+ mxs_chan->status = DMA_ERROR;
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+ struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+ u32 num_periods = buf_len / period_len;
+ u32 i = 0, buf = 0;
+
+ if (mxs_chan->status == DMA_IN_PROGRESS)
+ return NULL;
+
+ mxs_chan->status = DMA_IN_PROGRESS;
+ mxs_chan->flags |= MXS_DMA_SG_LOOP;
+ mxs_chan->flags |= MXS_DMA_USE_SEMAPHORE;
+
+ if (num_periods > NUM_CCW) {
+ dev_err(mxs_dma->dma_device.dev,
+ "maximum number of sg exceeded: %d > %d\n",
+ num_periods, NUM_CCW);
+ goto err_out;
+ }
+
+ if (period_len > MAX_XFER_BYTES) {
+ dev_err(mxs_dma->dma_device.dev,
+ "maximum period size exceeded: %zu > %d\n",
+ period_len, MAX_XFER_BYTES);
+ goto err_out;
+ }
+
+ while (buf < buf_len) {
+ struct mxs_dma_ccw *ccw = &mxs_chan->ccw[i];
+
+ if (i + 1 == num_periods)
+ ccw->next = mxs_chan->ccw_phys;
+ else
+ ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * (i + 1);
+
+ ccw->bufaddr = dma_addr;
+ ccw->xfer_bytes = period_len;
+
+ ccw->bits = 0;
+ ccw->bits |= CCW_CHAIN;
+ ccw->bits |= CCW_IRQ;
+ ccw->bits |= CCW_HALT_ON_TERM;
+ ccw->bits |= CCW_TERM_FLUSH;
+ ccw->bits |= CCW_DEC_SEM;
+ ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
+ MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, COMMAND);
+
+ dma_addr += period_len;
+ buf += period_len;
+
+ i++;
+ }
+ mxs_chan->desc_count = i;
+
+ return &mxs_chan->desc;
+
+err_out:
+ mxs_chan->status = DMA_ERROR;
+ return NULL;
+}
+
+static int mxs_dma_terminate_all(struct dma_chan *chan)
+{
+ mxs_dma_reset_chan(chan);
+ mxs_dma_disable_chan(chan);
+
+ return 0;
+}
+
+static enum dma_status mxs_dma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+ struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+ struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+ u32 residue = 0;
+
+ if (mxs_chan->status == DMA_IN_PROGRESS &&
+ mxs_chan->flags & MXS_DMA_SG_LOOP) {
+ struct mxs_dma_ccw *last_ccw;
+ u32 bar;
+
+ last_ccw = &mxs_chan->ccw[mxs_chan->desc_count - 1];
+ residue = last_ccw->xfer_bytes + last_ccw->bufaddr;
+
+ bar = readl(mxs_dma->base +
+ HW_APBHX_CHn_BAR(mxs_dma, chan->chan_id));
+ residue -= bar;
+ }
+
+ dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
+ residue);
+
+ return mxs_chan->status;
+}
+
+static int mxs_dma_init(struct mxs_dma_engine *mxs_dma)
+{
+ int ret;
+
+ ret = clk_prepare_enable(mxs_dma->clk);
+ if (ret)
+ return ret;
+
+ ret = stmp_reset_block(mxs_dma->base);
+ if (ret)
+ goto err_out;
+
+ /* enable apbh burst */
+ if (dma_is_apbh(mxs_dma)) {
+ writel(BM_APBH_CTRL0_APB_BURST_EN,
+ mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_SET);
+ writel(BM_APBH_CTRL0_APB_BURST8_EN,
+ mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_SET);
+ }
+
+ /* enable irq for all the channels */
+ writel(MXS_DMA_CHANNELS_MASK << MXS_DMA_CHANNELS,
+ mxs_dma->base + HW_APBHX_CTRL1 + STMP_OFFSET_REG_SET);
+
+err_out:
+ clk_disable_unprepare(mxs_dma->clk);
+ return ret;
+}
+
+struct mxs_dma_filter_param {
+ unsigned int chan_id;
+};
+
+static bool mxs_dma_filter_fn(struct dma_chan *chan, void *fn_param)
+{
+ struct mxs_dma_filter_param *param = fn_param;
+ struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+ struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+ int chan_irq;
+
+ if (chan->chan_id != param->chan_id)
+ return false;
+
+ chan_irq = platform_get_irq(mxs_dma->pdev, param->chan_id);
+ if (chan_irq < 0)
+ return false;
+
+ mxs_chan->chan_irq = chan_irq;
+
+ return true;
+}
+
+static struct dma_chan *mxs_dma_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct mxs_dma_engine *mxs_dma = ofdma->of_dma_data;
+ dma_cap_mask_t mask = mxs_dma->dma_device.cap_mask;
+ struct mxs_dma_filter_param param;
+
+ if (dma_spec->args_count != 1)
+ return NULL;
+
+ param.chan_id = dma_spec->args[0];
+
+ if (param.chan_id >= mxs_dma->nr_channels)
+ return NULL;
+
+ return __dma_request_channel(&mask, mxs_dma_filter_fn, &param,
+ ofdma->of_node);
+}
+
+static int mxs_dma_probe(struct platform_device *pdev)
+{
+ struct device_node *np = pdev->dev.of_node;
+ const struct mxs_dma_type *dma_type;
+ struct mxs_dma_engine *mxs_dma;
+ struct resource *iores;
+ int ret, i;
+
+ mxs_dma = devm_kzalloc(&pdev->dev, sizeof(*mxs_dma), GFP_KERNEL);
+ if (!mxs_dma)
+ return -ENOMEM;
+
+ ret = of_property_read_u32(np, "dma-channels", &mxs_dma->nr_channels);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to read dma-channels\n");
+ return ret;
+ }
+
+ dma_type = (struct mxs_dma_type *)of_device_get_match_data(&pdev->dev);
+ mxs_dma->type = dma_type->type;
+ mxs_dma->dev_id = dma_type->id;
+
+ iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ mxs_dma->base = devm_ioremap_resource(&pdev->dev, iores);
+ if (IS_ERR(mxs_dma->base))
+ return PTR_ERR(mxs_dma->base);
+
+ mxs_dma->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(mxs_dma->clk))
+ return PTR_ERR(mxs_dma->clk);
+
+ dma_cap_set(DMA_SLAVE, mxs_dma->dma_device.cap_mask);
+ dma_cap_set(DMA_CYCLIC, mxs_dma->dma_device.cap_mask);
+
+ INIT_LIST_HEAD(&mxs_dma->dma_device.channels);
+
+ /* Initialize channel parameters */
+ for (i = 0; i < MXS_DMA_CHANNELS; i++) {
+ struct mxs_dma_chan *mxs_chan = &mxs_dma->mxs_chans[i];
+
+ mxs_chan->mxs_dma = mxs_dma;
+ mxs_chan->chan.device = &mxs_dma->dma_device;
+ dma_cookie_init(&mxs_chan->chan);
+
+ tasklet_setup(&mxs_chan->tasklet, mxs_dma_tasklet);
+
+
+ /* Add the channel to mxs_chan list */
+ list_add_tail(&mxs_chan->chan.device_node,
+ &mxs_dma->dma_device.channels);
+ }
+
+ ret = mxs_dma_init(mxs_dma);
+ if (ret)
+ return ret;
+
+ mxs_dma->pdev = pdev;
+ mxs_dma->dma_device.dev = &pdev->dev;
+
+ /* mxs_dma gets 65535 bytes maximum sg size */
+ dma_set_max_seg_size(mxs_dma->dma_device.dev, MAX_XFER_BYTES);
+
+ mxs_dma->dma_device.device_alloc_chan_resources = mxs_dma_alloc_chan_resources;
+ mxs_dma->dma_device.device_free_chan_resources = mxs_dma_free_chan_resources;
+ mxs_dma->dma_device.device_tx_status = mxs_dma_tx_status;
+ mxs_dma->dma_device.device_prep_slave_sg = mxs_dma_prep_slave_sg;
+ mxs_dma->dma_device.device_prep_dma_cyclic = mxs_dma_prep_dma_cyclic;
+ mxs_dma->dma_device.device_pause = mxs_dma_pause_chan;
+ mxs_dma->dma_device.device_resume = mxs_dma_resume_chan;
+ mxs_dma->dma_device.device_terminate_all = mxs_dma_terminate_all;
+ mxs_dma->dma_device.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ mxs_dma->dma_device.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ mxs_dma->dma_device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ mxs_dma->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ mxs_dma->dma_device.device_issue_pending = mxs_dma_enable_chan;
+
+ ret = dmaenginem_async_device_register(&mxs_dma->dma_device);
+ if (ret) {
+ dev_err(mxs_dma->dma_device.dev, "unable to register\n");
+ return ret;
+ }
+
+ ret = of_dma_controller_register(np, mxs_dma_xlate, mxs_dma);
+ if (ret) {
+ dev_err(mxs_dma->dma_device.dev,
+ "failed to register controller\n");
+ }
+
+ dev_info(mxs_dma->dma_device.dev, "initialized\n");
+
+ return 0;
+}
+
+static struct platform_driver mxs_dma_driver = {
+ .driver = {
+ .name = "mxs-dma",
+ .of_match_table = mxs_dma_dt_ids,
+ },
+ .probe = mxs_dma_probe,
+};
+
+builtin_platform_driver(mxs_dma_driver);
diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c
new file mode 100644
index 000000000..a7063e9cd
--- /dev/null
+++ b/drivers/dma/nbpfaxi.c
@@ -0,0 +1,1530 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2013-2014 Renesas Electronics Europe Ltd.
+ * Author: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ */
+
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <dt-bindings/dma/nbpfaxi.h>
+
+#include "dmaengine.h"
+
+#define NBPF_REG_CHAN_OFFSET 0
+#define NBPF_REG_CHAN_SIZE 0x40
+
+/* Channel Current Transaction Byte register */
+#define NBPF_CHAN_CUR_TR_BYTE 0x20
+
+/* Channel Status register */
+#define NBPF_CHAN_STAT 0x24
+#define NBPF_CHAN_STAT_EN 1
+#define NBPF_CHAN_STAT_TACT 4
+#define NBPF_CHAN_STAT_ERR 0x10
+#define NBPF_CHAN_STAT_END 0x20
+#define NBPF_CHAN_STAT_TC 0x40
+#define NBPF_CHAN_STAT_DER 0x400
+
+/* Channel Control register */
+#define NBPF_CHAN_CTRL 0x28
+#define NBPF_CHAN_CTRL_SETEN 1
+#define NBPF_CHAN_CTRL_CLREN 2
+#define NBPF_CHAN_CTRL_STG 4
+#define NBPF_CHAN_CTRL_SWRST 8
+#define NBPF_CHAN_CTRL_CLRRQ 0x10
+#define NBPF_CHAN_CTRL_CLREND 0x20
+#define NBPF_CHAN_CTRL_CLRTC 0x40
+#define NBPF_CHAN_CTRL_SETSUS 0x100
+#define NBPF_CHAN_CTRL_CLRSUS 0x200
+
+/* Channel Configuration register */
+#define NBPF_CHAN_CFG 0x2c
+#define NBPF_CHAN_CFG_SEL 7 /* terminal SELect: 0..7 */
+#define NBPF_CHAN_CFG_REQD 8 /* REQuest Direction: DMAREQ is 0: input, 1: output */
+#define NBPF_CHAN_CFG_LOEN 0x10 /* LOw ENable: low DMA request line is: 0: inactive, 1: active */
+#define NBPF_CHAN_CFG_HIEN 0x20 /* HIgh ENable: high DMA request line is: 0: inactive, 1: active */
+#define NBPF_CHAN_CFG_LVL 0x40 /* LeVeL: DMA request line is sensed as 0: edge, 1: level */
+#define NBPF_CHAN_CFG_AM 0x700 /* ACK Mode: 0: Pulse mode, 1: Level mode, b'1x: Bus Cycle */
+#define NBPF_CHAN_CFG_SDS 0xf000 /* Source Data Size: 0: 8 bits,... , 7: 1024 bits */
+#define NBPF_CHAN_CFG_DDS 0xf0000 /* Destination Data Size: as above */
+#define NBPF_CHAN_CFG_SAD 0x100000 /* Source ADdress counting: 0: increment, 1: fixed */
+#define NBPF_CHAN_CFG_DAD 0x200000 /* Destination ADdress counting: 0: increment, 1: fixed */
+#define NBPF_CHAN_CFG_TM 0x400000 /* Transfer Mode: 0: single, 1: block TM */
+#define NBPF_CHAN_CFG_DEM 0x1000000 /* DMAEND interrupt Mask */
+#define NBPF_CHAN_CFG_TCM 0x2000000 /* DMATCO interrupt Mask */
+#define NBPF_CHAN_CFG_SBE 0x8000000 /* Sweep Buffer Enable */
+#define NBPF_CHAN_CFG_RSEL 0x10000000 /* RM: Register Set sELect */
+#define NBPF_CHAN_CFG_RSW 0x20000000 /* RM: Register Select sWitch */
+#define NBPF_CHAN_CFG_REN 0x40000000 /* RM: Register Set Enable */
+#define NBPF_CHAN_CFG_DMS 0x80000000 /* 0: register mode (RM), 1: link mode (LM) */
+
+#define NBPF_CHAN_NXLA 0x38
+#define NBPF_CHAN_CRLA 0x3c
+
+/* Link Header field */
+#define NBPF_HEADER_LV 1
+#define NBPF_HEADER_LE 2
+#define NBPF_HEADER_WBD 4
+#define NBPF_HEADER_DIM 8
+
+#define NBPF_CTRL 0x300
+#define NBPF_CTRL_PR 1 /* 0: fixed priority, 1: round robin */
+#define NBPF_CTRL_LVINT 2 /* DMAEND and DMAERR signalling: 0: pulse, 1: level */
+
+#define NBPF_DSTAT_ER 0x314
+#define NBPF_DSTAT_END 0x318
+
+#define NBPF_DMA_BUSWIDTHS \
+ (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+ BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+struct nbpf_config {
+ int num_channels;
+ int buffer_size;
+};
+
+/*
+ * We've got 3 types of objects, used to describe DMA transfers:
+ * 1. high-level descriptor, containing a struct dma_async_tx_descriptor object
+ * in it, used to communicate with the user
+ * 2. hardware DMA link descriptors, that we pass to DMAC for DMA transfer
+ * queuing, these must be DMAable, using either the streaming DMA API or
+ * allocated from coherent memory - one per SG segment
+ * 3. one per SG segment descriptors, used to manage HW link descriptors from
+ * (2). They do not have to be DMAable. They can either be (a) allocated
+ * together with link descriptors as mixed (DMA / CPU) objects, or (b)
+ * separately. Even if allocated separately it would be best to link them
+ * to link descriptors once during channel resource allocation and always
+ * use them as a single object.
+ * Therefore for both cases (a) and (b) at run-time objects (2) and (3) shall be
+ * treated as a single SG segment descriptor.
+ */
+
+struct nbpf_link_reg {
+ u32 header;
+ u32 src_addr;
+ u32 dst_addr;
+ u32 transaction_size;
+ u32 config;
+ u32 interval;
+ u32 extension;
+ u32 next;
+} __packed;
+
+struct nbpf_device;
+struct nbpf_channel;
+struct nbpf_desc;
+
+struct nbpf_link_desc {
+ struct nbpf_link_reg *hwdesc;
+ dma_addr_t hwdesc_dma_addr;
+ struct nbpf_desc *desc;
+ struct list_head node;
+};
+
+/**
+ * struct nbpf_desc - DMA transfer descriptor
+ * @async_tx: dmaengine object
+ * @user_wait: waiting for a user ack
+ * @length: total transfer length
+ * @chan: associated DMAC channel
+ * @sg: list of hardware descriptors, represented by struct nbpf_link_desc
+ * @node: member in channel descriptor lists
+ */
+struct nbpf_desc {
+ struct dma_async_tx_descriptor async_tx;
+ bool user_wait;
+ size_t length;
+ struct nbpf_channel *chan;
+ struct list_head sg;
+ struct list_head node;
+};
+
+/* Take a wild guess: allocate 4 segments per descriptor */
+#define NBPF_SEGMENTS_PER_DESC 4
+#define NBPF_DESCS_PER_PAGE ((PAGE_SIZE - sizeof(struct list_head)) / \
+ (sizeof(struct nbpf_desc) + \
+ NBPF_SEGMENTS_PER_DESC * \
+ (sizeof(struct nbpf_link_desc) + sizeof(struct nbpf_link_reg))))
+#define NBPF_SEGMENTS_PER_PAGE (NBPF_SEGMENTS_PER_DESC * NBPF_DESCS_PER_PAGE)
+
+struct nbpf_desc_page {
+ struct list_head node;
+ struct nbpf_desc desc[NBPF_DESCS_PER_PAGE];
+ struct nbpf_link_desc ldesc[NBPF_SEGMENTS_PER_PAGE];
+ struct nbpf_link_reg hwdesc[NBPF_SEGMENTS_PER_PAGE];
+};
+
+/**
+ * struct nbpf_channel - one DMAC channel
+ * @dma_chan: standard dmaengine channel object
+ * @tasklet: channel specific tasklet used for callbacks
+ * @base: register address base
+ * @nbpf: DMAC
+ * @name: IRQ name
+ * @irq: IRQ number
+ * @slave_src_addr: source address for slave DMA
+ * @slave_src_width: source slave data size in bytes
+ * @slave_src_burst: maximum source slave burst size in bytes
+ * @slave_dst_addr: destination address for slave DMA
+ * @slave_dst_width: destination slave data size in bytes
+ * @slave_dst_burst: maximum destination slave burst size in bytes
+ * @terminal: DMA terminal, assigned to this channel
+ * @dmarq_cfg: DMA request line configuration - high / low, edge / level for NBPF_CHAN_CFG
+ * @flags: configuration flags from DT
+ * @lock: protect descriptor lists
+ * @free_links: list of free link descriptors
+ * @free: list of free descriptors
+ * @queued: list of queued descriptors
+ * @active: list of descriptors, scheduled for processing
+ * @done: list of completed descriptors, waiting post-processing
+ * @desc_page: list of additionally allocated descriptor pages - if any
+ * @running: linked descriptor of running transaction
+ * @paused: are translations on this channel paused?
+ */
+struct nbpf_channel {
+ struct dma_chan dma_chan;
+ struct tasklet_struct tasklet;
+ void __iomem *base;
+ struct nbpf_device *nbpf;
+ char name[16];
+ int irq;
+ dma_addr_t slave_src_addr;
+ size_t slave_src_width;
+ size_t slave_src_burst;
+ dma_addr_t slave_dst_addr;
+ size_t slave_dst_width;
+ size_t slave_dst_burst;
+ unsigned int terminal;
+ u32 dmarq_cfg;
+ unsigned long flags;
+ spinlock_t lock;
+ struct list_head free_links;
+ struct list_head free;
+ struct list_head queued;
+ struct list_head active;
+ struct list_head done;
+ struct list_head desc_page;
+ struct nbpf_desc *running;
+ bool paused;
+};
+
+struct nbpf_device {
+ struct dma_device dma_dev;
+ void __iomem *base;
+ u32 max_burst_mem_read;
+ u32 max_burst_mem_write;
+ struct clk *clk;
+ const struct nbpf_config *config;
+ unsigned int eirq;
+ struct nbpf_channel chan[];
+};
+
+enum nbpf_model {
+ NBPF1B4,
+ NBPF1B8,
+ NBPF1B16,
+ NBPF4B4,
+ NBPF4B8,
+ NBPF4B16,
+ NBPF8B4,
+ NBPF8B8,
+ NBPF8B16,
+};
+
+static struct nbpf_config nbpf_cfg[] = {
+ [NBPF1B4] = {
+ .num_channels = 1,
+ .buffer_size = 4,
+ },
+ [NBPF1B8] = {
+ .num_channels = 1,
+ .buffer_size = 8,
+ },
+ [NBPF1B16] = {
+ .num_channels = 1,
+ .buffer_size = 16,
+ },
+ [NBPF4B4] = {
+ .num_channels = 4,
+ .buffer_size = 4,
+ },
+ [NBPF4B8] = {
+ .num_channels = 4,
+ .buffer_size = 8,
+ },
+ [NBPF4B16] = {
+ .num_channels = 4,
+ .buffer_size = 16,
+ },
+ [NBPF8B4] = {
+ .num_channels = 8,
+ .buffer_size = 4,
+ },
+ [NBPF8B8] = {
+ .num_channels = 8,
+ .buffer_size = 8,
+ },
+ [NBPF8B16] = {
+ .num_channels = 8,
+ .buffer_size = 16,
+ },
+};
+
+#define nbpf_to_chan(d) container_of(d, struct nbpf_channel, dma_chan)
+
+/*
+ * dmaengine drivers seem to have a lot in common and instead of sharing more
+ * code, they reimplement those common algorithms independently. In this driver
+ * we try to separate the hardware-specific part from the (largely) generic
+ * part. This improves code readability and makes it possible in the future to
+ * reuse the generic code in form of a helper library. That generic code should
+ * be suitable for various DMA controllers, using transfer descriptors in RAM
+ * and pushing one SG list at a time to the DMA controller.
+ */
+
+/* Hardware-specific part */
+
+static inline u32 nbpf_chan_read(struct nbpf_channel *chan,
+ unsigned int offset)
+{
+ u32 data = ioread32(chan->base + offset);
+ dev_dbg(chan->dma_chan.device->dev, "%s(0x%p + 0x%x) = 0x%x\n",
+ __func__, chan->base, offset, data);
+ return data;
+}
+
+static inline void nbpf_chan_write(struct nbpf_channel *chan,
+ unsigned int offset, u32 data)
+{
+ iowrite32(data, chan->base + offset);
+ dev_dbg(chan->dma_chan.device->dev, "%s(0x%p + 0x%x) = 0x%x\n",
+ __func__, chan->base, offset, data);
+}
+
+static inline u32 nbpf_read(struct nbpf_device *nbpf,
+ unsigned int offset)
+{
+ u32 data = ioread32(nbpf->base + offset);
+ dev_dbg(nbpf->dma_dev.dev, "%s(0x%p + 0x%x) = 0x%x\n",
+ __func__, nbpf->base, offset, data);
+ return data;
+}
+
+static inline void nbpf_write(struct nbpf_device *nbpf,
+ unsigned int offset, u32 data)
+{
+ iowrite32(data, nbpf->base + offset);
+ dev_dbg(nbpf->dma_dev.dev, "%s(0x%p + 0x%x) = 0x%x\n",
+ __func__, nbpf->base, offset, data);
+}
+
+static void nbpf_chan_halt(struct nbpf_channel *chan)
+{
+ nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREN);
+}
+
+static bool nbpf_status_get(struct nbpf_channel *chan)
+{
+ u32 status = nbpf_read(chan->nbpf, NBPF_DSTAT_END);
+
+ return status & BIT(chan - chan->nbpf->chan);
+}
+
+static void nbpf_status_ack(struct nbpf_channel *chan)
+{
+ nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREND);
+}
+
+static u32 nbpf_error_get(struct nbpf_device *nbpf)
+{
+ return nbpf_read(nbpf, NBPF_DSTAT_ER);
+}
+
+static struct nbpf_channel *nbpf_error_get_channel(struct nbpf_device *nbpf, u32 error)
+{
+ return nbpf->chan + __ffs(error);
+}
+
+static void nbpf_error_clear(struct nbpf_channel *chan)
+{
+ u32 status;
+ int i;
+
+ /* Stop the channel, make sure DMA has been aborted */
+ nbpf_chan_halt(chan);
+
+ for (i = 1000; i; i--) {
+ status = nbpf_chan_read(chan, NBPF_CHAN_STAT);
+ if (!(status & NBPF_CHAN_STAT_TACT))
+ break;
+ cpu_relax();
+ }
+
+ if (!i)
+ dev_err(chan->dma_chan.device->dev,
+ "%s(): abort timeout, channel status 0x%x\n", __func__, status);
+
+ nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SWRST);
+}
+
+static int nbpf_start(struct nbpf_desc *desc)
+{
+ struct nbpf_channel *chan = desc->chan;
+ struct nbpf_link_desc *ldesc = list_first_entry(&desc->sg, struct nbpf_link_desc, node);
+
+ nbpf_chan_write(chan, NBPF_CHAN_NXLA, (u32)ldesc->hwdesc_dma_addr);
+ nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SETEN | NBPF_CHAN_CTRL_CLRSUS);
+ chan->paused = false;
+
+ /* Software trigger MEMCPY - only MEMCPY uses the block mode */
+ if (ldesc->hwdesc->config & NBPF_CHAN_CFG_TM)
+ nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_STG);
+
+ dev_dbg(chan->nbpf->dma_dev.dev, "%s(): next 0x%x, cur 0x%x\n", __func__,
+ nbpf_chan_read(chan, NBPF_CHAN_NXLA), nbpf_chan_read(chan, NBPF_CHAN_CRLA));
+
+ return 0;
+}
+
+static void nbpf_chan_prepare(struct nbpf_channel *chan)
+{
+ chan->dmarq_cfg = (chan->flags & NBPF_SLAVE_RQ_HIGH ? NBPF_CHAN_CFG_HIEN : 0) |
+ (chan->flags & NBPF_SLAVE_RQ_LOW ? NBPF_CHAN_CFG_LOEN : 0) |
+ (chan->flags & NBPF_SLAVE_RQ_LEVEL ?
+ NBPF_CHAN_CFG_LVL | (NBPF_CHAN_CFG_AM & 0x200) : 0) |
+ chan->terminal;
+}
+
+static void nbpf_chan_prepare_default(struct nbpf_channel *chan)
+{
+ /* Don't output DMAACK */
+ chan->dmarq_cfg = NBPF_CHAN_CFG_AM & 0x400;
+ chan->terminal = 0;
+ chan->flags = 0;
+}
+
+static void nbpf_chan_configure(struct nbpf_channel *chan)
+{
+ /*
+ * We assume, that only the link mode and DMA request line configuration
+ * have to be set in the configuration register manually. Dynamic
+ * per-transfer configuration will be loaded from transfer descriptors.
+ */
+ nbpf_chan_write(chan, NBPF_CHAN_CFG, NBPF_CHAN_CFG_DMS | chan->dmarq_cfg);
+}
+
+static u32 nbpf_xfer_ds(struct nbpf_device *nbpf, size_t size,
+ enum dma_transfer_direction direction)
+{
+ int max_burst = nbpf->config->buffer_size * 8;
+
+ if (nbpf->max_burst_mem_read || nbpf->max_burst_mem_write) {
+ switch (direction) {
+ case DMA_MEM_TO_MEM:
+ max_burst = min_not_zero(nbpf->max_burst_mem_read,
+ nbpf->max_burst_mem_write);
+ break;
+ case DMA_MEM_TO_DEV:
+ if (nbpf->max_burst_mem_read)
+ max_burst = nbpf->max_burst_mem_read;
+ break;
+ case DMA_DEV_TO_MEM:
+ if (nbpf->max_burst_mem_write)
+ max_burst = nbpf->max_burst_mem_write;
+ break;
+ case DMA_DEV_TO_DEV:
+ default:
+ break;
+ }
+ }
+
+ /* Maximum supported bursts depend on the buffer size */
+ return min_t(int, __ffs(size), ilog2(max_burst));
+}
+
+static size_t nbpf_xfer_size(struct nbpf_device *nbpf,
+ enum dma_slave_buswidth width, u32 burst)
+{
+ size_t size;
+
+ if (!burst)
+ burst = 1;
+
+ switch (width) {
+ case DMA_SLAVE_BUSWIDTH_8_BYTES:
+ size = 8 * burst;
+ break;
+
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ size = 4 * burst;
+ break;
+
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ size = 2 * burst;
+ break;
+
+ default:
+ pr_warn("%s(): invalid bus width %u\n", __func__, width);
+ fallthrough;
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ size = burst;
+ }
+
+ return nbpf_xfer_ds(nbpf, size, DMA_TRANS_NONE);
+}
+
+/*
+ * We need a way to recognise slaves, whose data is sent "raw" over the bus,
+ * i.e. it isn't known in advance how many bytes will be received. Therefore
+ * the slave driver has to provide a "large enough" buffer and either read the
+ * buffer, when it is full, or detect, that some data has arrived, then wait for
+ * a timeout, if no more data arrives - receive what's already there. We want to
+ * handle such slaves in a special way to allow an optimised mode for other
+ * users, for whom the amount of data is known in advance. So far there's no way
+ * to recognise such slaves. We use a data-width check to distinguish between
+ * the SD host and the PL011 UART.
+ */
+
+static int nbpf_prep_one(struct nbpf_link_desc *ldesc,
+ enum dma_transfer_direction direction,
+ dma_addr_t src, dma_addr_t dst, size_t size, bool last)
+{
+ struct nbpf_link_reg *hwdesc = ldesc->hwdesc;
+ struct nbpf_desc *desc = ldesc->desc;
+ struct nbpf_channel *chan = desc->chan;
+ struct device *dev = chan->dma_chan.device->dev;
+ size_t mem_xfer, slave_xfer;
+ bool can_burst;
+
+ hwdesc->header = NBPF_HEADER_WBD | NBPF_HEADER_LV |
+ (last ? NBPF_HEADER_LE : 0);
+
+ hwdesc->src_addr = src;
+ hwdesc->dst_addr = dst;
+ hwdesc->transaction_size = size;
+
+ /*
+ * set config: SAD, DAD, DDS, SDS, etc.
+ * Note on transfer sizes: the DMAC can perform unaligned DMA transfers,
+ * but it is important to have transaction size a multiple of both
+ * receiver and transmitter transfer sizes. It is also possible to use
+ * different RAM and device transfer sizes, and it does work well with
+ * some devices, e.g. with V08R07S01E SD host controllers, which can use
+ * 128 byte transfers. But this doesn't work with other devices,
+ * especially when the transaction size is unknown. This is the case,
+ * e.g. with serial drivers like amba-pl011.c. For reception it sets up
+ * the transaction size of 4K and if fewer bytes are received, it
+ * pauses DMA and reads out data received via DMA as well as those left
+ * in the Rx FIFO. For this to work with the RAM side using burst
+ * transfers we enable the SBE bit and terminate the transfer in our
+ * .device_pause handler.
+ */
+ mem_xfer = nbpf_xfer_ds(chan->nbpf, size, direction);
+
+ switch (direction) {
+ case DMA_DEV_TO_MEM:
+ can_burst = chan->slave_src_width >= 3;
+ slave_xfer = min(mem_xfer, can_burst ?
+ chan->slave_src_burst : chan->slave_src_width);
+ /*
+ * Is the slave narrower than 64 bits, i.e. isn't using the full
+ * bus width and cannot use bursts?
+ */
+ if (mem_xfer > chan->slave_src_burst && !can_burst)
+ mem_xfer = chan->slave_src_burst;
+ /* Device-to-RAM DMA is unreliable without REQD set */
+ hwdesc->config = NBPF_CHAN_CFG_SAD | (NBPF_CHAN_CFG_DDS & (mem_xfer << 16)) |
+ (NBPF_CHAN_CFG_SDS & (slave_xfer << 12)) | NBPF_CHAN_CFG_REQD |
+ NBPF_CHAN_CFG_SBE;
+ break;
+
+ case DMA_MEM_TO_DEV:
+ slave_xfer = min(mem_xfer, chan->slave_dst_width >= 3 ?
+ chan->slave_dst_burst : chan->slave_dst_width);
+ hwdesc->config = NBPF_CHAN_CFG_DAD | (NBPF_CHAN_CFG_SDS & (mem_xfer << 12)) |
+ (NBPF_CHAN_CFG_DDS & (slave_xfer << 16)) | NBPF_CHAN_CFG_REQD;
+ break;
+
+ case DMA_MEM_TO_MEM:
+ hwdesc->config = NBPF_CHAN_CFG_TCM | NBPF_CHAN_CFG_TM |
+ (NBPF_CHAN_CFG_SDS & (mem_xfer << 12)) |
+ (NBPF_CHAN_CFG_DDS & (mem_xfer << 16));
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ hwdesc->config |= chan->dmarq_cfg | (last ? 0 : NBPF_CHAN_CFG_DEM) |
+ NBPF_CHAN_CFG_DMS;
+
+ dev_dbg(dev, "%s(): desc @ %pad: hdr 0x%x, cfg 0x%x, %zu @ %pad -> %pad\n",
+ __func__, &ldesc->hwdesc_dma_addr, hwdesc->header,
+ hwdesc->config, size, &src, &dst);
+
+ dma_sync_single_for_device(dev, ldesc->hwdesc_dma_addr, sizeof(*hwdesc),
+ DMA_TO_DEVICE);
+
+ return 0;
+}
+
+static size_t nbpf_bytes_left(struct nbpf_channel *chan)
+{
+ return nbpf_chan_read(chan, NBPF_CHAN_CUR_TR_BYTE);
+}
+
+static void nbpf_configure(struct nbpf_device *nbpf)
+{
+ nbpf_write(nbpf, NBPF_CTRL, NBPF_CTRL_LVINT);
+}
+
+/* Generic part */
+
+/* DMA ENGINE functions */
+static void nbpf_issue_pending(struct dma_chan *dchan)
+{
+ struct nbpf_channel *chan = nbpf_to_chan(dchan);
+ unsigned long flags;
+
+ dev_dbg(dchan->device->dev, "Entry %s()\n", __func__);
+
+ spin_lock_irqsave(&chan->lock, flags);
+ if (list_empty(&chan->queued))
+ goto unlock;
+
+ list_splice_tail_init(&chan->queued, &chan->active);
+
+ if (!chan->running) {
+ struct nbpf_desc *desc = list_first_entry(&chan->active,
+ struct nbpf_desc, node);
+ if (!nbpf_start(desc))
+ chan->running = desc;
+ }
+
+unlock:
+ spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static enum dma_status nbpf_tx_status(struct dma_chan *dchan,
+ dma_cookie_t cookie, struct dma_tx_state *state)
+{
+ struct nbpf_channel *chan = nbpf_to_chan(dchan);
+ enum dma_status status = dma_cookie_status(dchan, cookie, state);
+
+ if (state) {
+ dma_cookie_t running;
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->lock, flags);
+ running = chan->running ? chan->running->async_tx.cookie : -EINVAL;
+
+ if (cookie == running) {
+ state->residue = nbpf_bytes_left(chan);
+ dev_dbg(dchan->device->dev, "%s(): residue %u\n", __func__,
+ state->residue);
+ } else if (status == DMA_IN_PROGRESS) {
+ struct nbpf_desc *desc;
+ bool found = false;
+
+ list_for_each_entry(desc, &chan->active, node)
+ if (desc->async_tx.cookie == cookie) {
+ found = true;
+ break;
+ }
+
+ if (!found)
+ list_for_each_entry(desc, &chan->queued, node)
+ if (desc->async_tx.cookie == cookie) {
+ found = true;
+ break;
+
+ }
+
+ state->residue = found ? desc->length : 0;
+ }
+
+ spin_unlock_irqrestore(&chan->lock, flags);
+ }
+
+ if (chan->paused)
+ status = DMA_PAUSED;
+
+ return status;
+}
+
+static dma_cookie_t nbpf_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct nbpf_desc *desc = container_of(tx, struct nbpf_desc, async_tx);
+ struct nbpf_channel *chan = desc->chan;
+ unsigned long flags;
+ dma_cookie_t cookie;
+
+ spin_lock_irqsave(&chan->lock, flags);
+ cookie = dma_cookie_assign(tx);
+ list_add_tail(&desc->node, &chan->queued);
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ dev_dbg(chan->dma_chan.device->dev, "Entry %s(%d)\n", __func__, cookie);
+
+ return cookie;
+}
+
+static int nbpf_desc_page_alloc(struct nbpf_channel *chan)
+{
+ struct dma_chan *dchan = &chan->dma_chan;
+ struct nbpf_desc_page *dpage = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ struct nbpf_link_desc *ldesc;
+ struct nbpf_link_reg *hwdesc;
+ struct nbpf_desc *desc;
+ LIST_HEAD(head);
+ LIST_HEAD(lhead);
+ int i;
+ struct device *dev = dchan->device->dev;
+
+ if (!dpage)
+ return -ENOMEM;
+
+ dev_dbg(dev, "%s(): alloc %lu descriptors, %lu segments, total alloc %zu\n",
+ __func__, NBPF_DESCS_PER_PAGE, NBPF_SEGMENTS_PER_PAGE, sizeof(*dpage));
+
+ for (i = 0, ldesc = dpage->ldesc, hwdesc = dpage->hwdesc;
+ i < ARRAY_SIZE(dpage->ldesc);
+ i++, ldesc++, hwdesc++) {
+ ldesc->hwdesc = hwdesc;
+ list_add_tail(&ldesc->node, &lhead);
+ ldesc->hwdesc_dma_addr = dma_map_single(dchan->device->dev,
+ hwdesc, sizeof(*hwdesc), DMA_TO_DEVICE);
+
+ dev_dbg(dev, "%s(): mapped 0x%p to %pad\n", __func__,
+ hwdesc, &ldesc->hwdesc_dma_addr);
+ }
+
+ for (i = 0, desc = dpage->desc;
+ i < ARRAY_SIZE(dpage->desc);
+ i++, desc++) {
+ dma_async_tx_descriptor_init(&desc->async_tx, dchan);
+ desc->async_tx.tx_submit = nbpf_tx_submit;
+ desc->chan = chan;
+ INIT_LIST_HEAD(&desc->sg);
+ list_add_tail(&desc->node, &head);
+ }
+
+ /*
+ * This function cannot be called from interrupt context, so, no need to
+ * save flags
+ */
+ spin_lock_irq(&chan->lock);
+ list_splice_tail(&lhead, &chan->free_links);
+ list_splice_tail(&head, &chan->free);
+ list_add(&dpage->node, &chan->desc_page);
+ spin_unlock_irq(&chan->lock);
+
+ return ARRAY_SIZE(dpage->desc);
+}
+
+static void nbpf_desc_put(struct nbpf_desc *desc)
+{
+ struct nbpf_channel *chan = desc->chan;
+ struct nbpf_link_desc *ldesc, *tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->lock, flags);
+ list_for_each_entry_safe(ldesc, tmp, &desc->sg, node)
+ list_move(&ldesc->node, &chan->free_links);
+
+ list_add(&desc->node, &chan->free);
+ spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static void nbpf_scan_acked(struct nbpf_channel *chan)
+{
+ struct nbpf_desc *desc, *tmp;
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&chan->lock, flags);
+ list_for_each_entry_safe(desc, tmp, &chan->done, node)
+ if (async_tx_test_ack(&desc->async_tx) && desc->user_wait) {
+ list_move(&desc->node, &head);
+ desc->user_wait = false;
+ }
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ list_for_each_entry_safe(desc, tmp, &head, node) {
+ list_del(&desc->node);
+ nbpf_desc_put(desc);
+ }
+}
+
+/*
+ * We have to allocate descriptors with the channel lock dropped. This means,
+ * before we re-acquire the lock buffers can be taken already, so we have to
+ * re-check after re-acquiring the lock and possibly retry, if buffers are gone
+ * again.
+ */
+static struct nbpf_desc *nbpf_desc_get(struct nbpf_channel *chan, size_t len)
+{
+ struct nbpf_desc *desc = NULL;
+ struct nbpf_link_desc *ldesc, *prev = NULL;
+
+ nbpf_scan_acked(chan);
+
+ spin_lock_irq(&chan->lock);
+
+ do {
+ int i = 0, ret;
+
+ if (list_empty(&chan->free)) {
+ /* No more free descriptors */
+ spin_unlock_irq(&chan->lock);
+ ret = nbpf_desc_page_alloc(chan);
+ if (ret < 0)
+ return NULL;
+ spin_lock_irq(&chan->lock);
+ continue;
+ }
+ desc = list_first_entry(&chan->free, struct nbpf_desc, node);
+ list_del(&desc->node);
+
+ do {
+ if (list_empty(&chan->free_links)) {
+ /* No more free link descriptors */
+ spin_unlock_irq(&chan->lock);
+ ret = nbpf_desc_page_alloc(chan);
+ if (ret < 0) {
+ nbpf_desc_put(desc);
+ return NULL;
+ }
+ spin_lock_irq(&chan->lock);
+ continue;
+ }
+
+ ldesc = list_first_entry(&chan->free_links,
+ struct nbpf_link_desc, node);
+ ldesc->desc = desc;
+ if (prev)
+ prev->hwdesc->next = (u32)ldesc->hwdesc_dma_addr;
+
+ prev = ldesc;
+ list_move_tail(&ldesc->node, &desc->sg);
+
+ i++;
+ } while (i < len);
+ } while (!desc);
+
+ prev->hwdesc->next = 0;
+
+ spin_unlock_irq(&chan->lock);
+
+ return desc;
+}
+
+static void nbpf_chan_idle(struct nbpf_channel *chan)
+{
+ struct nbpf_desc *desc, *tmp;
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&chan->lock, flags);
+
+ list_splice_init(&chan->done, &head);
+ list_splice_init(&chan->active, &head);
+ list_splice_init(&chan->queued, &head);
+
+ chan->running = NULL;
+
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ list_for_each_entry_safe(desc, tmp, &head, node) {
+ dev_dbg(chan->nbpf->dma_dev.dev, "%s(): force-free desc %p cookie %d\n",
+ __func__, desc, desc->async_tx.cookie);
+ list_del(&desc->node);
+ nbpf_desc_put(desc);
+ }
+}
+
+static int nbpf_pause(struct dma_chan *dchan)
+{
+ struct nbpf_channel *chan = nbpf_to_chan(dchan);
+
+ dev_dbg(dchan->device->dev, "Entry %s\n", __func__);
+
+ chan->paused = true;
+ nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SETSUS);
+ /* See comment in nbpf_prep_one() */
+ nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREN);
+
+ return 0;
+}
+
+static int nbpf_terminate_all(struct dma_chan *dchan)
+{
+ struct nbpf_channel *chan = nbpf_to_chan(dchan);
+
+ dev_dbg(dchan->device->dev, "Entry %s\n", __func__);
+ dev_dbg(dchan->device->dev, "Terminating\n");
+
+ nbpf_chan_halt(chan);
+ nbpf_chan_idle(chan);
+
+ return 0;
+}
+
+static int nbpf_config(struct dma_chan *dchan,
+ struct dma_slave_config *config)
+{
+ struct nbpf_channel *chan = nbpf_to_chan(dchan);
+
+ dev_dbg(dchan->device->dev, "Entry %s\n", __func__);
+
+ /*
+ * We could check config->slave_id to match chan->terminal here,
+ * but with DT they would be coming from the same source, so
+ * such a check would be superflous
+ */
+
+ chan->slave_dst_addr = config->dst_addr;
+ chan->slave_dst_width = nbpf_xfer_size(chan->nbpf,
+ config->dst_addr_width, 1);
+ chan->slave_dst_burst = nbpf_xfer_size(chan->nbpf,
+ config->dst_addr_width,
+ config->dst_maxburst);
+ chan->slave_src_addr = config->src_addr;
+ chan->slave_src_width = nbpf_xfer_size(chan->nbpf,
+ config->src_addr_width, 1);
+ chan->slave_src_burst = nbpf_xfer_size(chan->nbpf,
+ config->src_addr_width,
+ config->src_maxburst);
+
+ return 0;
+}
+
+static struct dma_async_tx_descriptor *nbpf_prep_sg(struct nbpf_channel *chan,
+ struct scatterlist *src_sg, struct scatterlist *dst_sg,
+ size_t len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct nbpf_link_desc *ldesc;
+ struct scatterlist *mem_sg;
+ struct nbpf_desc *desc;
+ bool inc_src, inc_dst;
+ size_t data_len = 0;
+ int i = 0;
+
+ switch (direction) {
+ case DMA_DEV_TO_MEM:
+ mem_sg = dst_sg;
+ inc_src = false;
+ inc_dst = true;
+ break;
+
+ case DMA_MEM_TO_DEV:
+ mem_sg = src_sg;
+ inc_src = true;
+ inc_dst = false;
+ break;
+
+ default:
+ case DMA_MEM_TO_MEM:
+ mem_sg = src_sg;
+ inc_src = true;
+ inc_dst = true;
+ }
+
+ desc = nbpf_desc_get(chan, len);
+ if (!desc)
+ return NULL;
+
+ desc->async_tx.flags = flags;
+ desc->async_tx.cookie = -EBUSY;
+ desc->user_wait = false;
+
+ /*
+ * This is a private descriptor list, and we own the descriptor. No need
+ * to lock.
+ */
+ list_for_each_entry(ldesc, &desc->sg, node) {
+ int ret = nbpf_prep_one(ldesc, direction,
+ sg_dma_address(src_sg),
+ sg_dma_address(dst_sg),
+ sg_dma_len(mem_sg),
+ i == len - 1);
+ if (ret < 0) {
+ nbpf_desc_put(desc);
+ return NULL;
+ }
+ data_len += sg_dma_len(mem_sg);
+ if (inc_src)
+ src_sg = sg_next(src_sg);
+ if (inc_dst)
+ dst_sg = sg_next(dst_sg);
+ mem_sg = direction == DMA_DEV_TO_MEM ? dst_sg : src_sg;
+ i++;
+ }
+
+ desc->length = data_len;
+
+ /* The user has to return the descriptor to us ASAP via .tx_submit() */
+ return &desc->async_tx;
+}
+
+static struct dma_async_tx_descriptor *nbpf_prep_memcpy(
+ struct dma_chan *dchan, dma_addr_t dst, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct nbpf_channel *chan = nbpf_to_chan(dchan);
+ struct scatterlist dst_sg;
+ struct scatterlist src_sg;
+
+ sg_init_table(&dst_sg, 1);
+ sg_init_table(&src_sg, 1);
+
+ sg_dma_address(&dst_sg) = dst;
+ sg_dma_address(&src_sg) = src;
+
+ sg_dma_len(&dst_sg) = len;
+ sg_dma_len(&src_sg) = len;
+
+ dev_dbg(dchan->device->dev, "%s(): %zu @ %pad -> %pad\n",
+ __func__, len, &src, &dst);
+
+ return nbpf_prep_sg(chan, &src_sg, &dst_sg, 1,
+ DMA_MEM_TO_MEM, flags);
+}
+
+static struct dma_async_tx_descriptor *nbpf_prep_slave_sg(
+ struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
+ enum dma_transfer_direction direction, unsigned long flags, void *context)
+{
+ struct nbpf_channel *chan = nbpf_to_chan(dchan);
+ struct scatterlist slave_sg;
+
+ dev_dbg(dchan->device->dev, "Entry %s()\n", __func__);
+
+ sg_init_table(&slave_sg, 1);
+
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+ sg_dma_address(&slave_sg) = chan->slave_dst_addr;
+ return nbpf_prep_sg(chan, sgl, &slave_sg, sg_len,
+ direction, flags);
+
+ case DMA_DEV_TO_MEM:
+ sg_dma_address(&slave_sg) = chan->slave_src_addr;
+ return nbpf_prep_sg(chan, &slave_sg, sgl, sg_len,
+ direction, flags);
+
+ default:
+ return NULL;
+ }
+}
+
+static int nbpf_alloc_chan_resources(struct dma_chan *dchan)
+{
+ struct nbpf_channel *chan = nbpf_to_chan(dchan);
+ int ret;
+
+ INIT_LIST_HEAD(&chan->free);
+ INIT_LIST_HEAD(&chan->free_links);
+ INIT_LIST_HEAD(&chan->queued);
+ INIT_LIST_HEAD(&chan->active);
+ INIT_LIST_HEAD(&chan->done);
+
+ ret = nbpf_desc_page_alloc(chan);
+ if (ret < 0)
+ return ret;
+
+ dev_dbg(dchan->device->dev, "Entry %s(): terminal %u\n", __func__,
+ chan->terminal);
+
+ nbpf_chan_configure(chan);
+
+ return ret;
+}
+
+static void nbpf_free_chan_resources(struct dma_chan *dchan)
+{
+ struct nbpf_channel *chan = nbpf_to_chan(dchan);
+ struct nbpf_desc_page *dpage, *tmp;
+
+ dev_dbg(dchan->device->dev, "Entry %s()\n", __func__);
+
+ nbpf_chan_halt(chan);
+ nbpf_chan_idle(chan);
+ /* Clean up for if a channel is re-used for MEMCPY after slave DMA */
+ nbpf_chan_prepare_default(chan);
+
+ list_for_each_entry_safe(dpage, tmp, &chan->desc_page, node) {
+ struct nbpf_link_desc *ldesc;
+ int i;
+ list_del(&dpage->node);
+ for (i = 0, ldesc = dpage->ldesc;
+ i < ARRAY_SIZE(dpage->ldesc);
+ i++, ldesc++)
+ dma_unmap_single(dchan->device->dev, ldesc->hwdesc_dma_addr,
+ sizeof(*ldesc->hwdesc), DMA_TO_DEVICE);
+ free_page((unsigned long)dpage);
+ }
+}
+
+static struct dma_chan *nbpf_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct nbpf_device *nbpf = ofdma->of_dma_data;
+ struct dma_chan *dchan;
+ struct nbpf_channel *chan;
+
+ if (dma_spec->args_count != 2)
+ return NULL;
+
+ dchan = dma_get_any_slave_channel(&nbpf->dma_dev);
+ if (!dchan)
+ return NULL;
+
+ dev_dbg(dchan->device->dev, "Entry %s(%pOFn)\n", __func__,
+ dma_spec->np);
+
+ chan = nbpf_to_chan(dchan);
+
+ chan->terminal = dma_spec->args[0];
+ chan->flags = dma_spec->args[1];
+
+ nbpf_chan_prepare(chan);
+ nbpf_chan_configure(chan);
+
+ return dchan;
+}
+
+static void nbpf_chan_tasklet(struct tasklet_struct *t)
+{
+ struct nbpf_channel *chan = from_tasklet(chan, t, tasklet);
+ struct nbpf_desc *desc, *tmp;
+ struct dmaengine_desc_callback cb;
+
+ while (!list_empty(&chan->done)) {
+ bool found = false, must_put, recycling = false;
+
+ spin_lock_irq(&chan->lock);
+
+ list_for_each_entry_safe(desc, tmp, &chan->done, node) {
+ if (!desc->user_wait) {
+ /* Newly completed descriptor, have to process */
+ found = true;
+ break;
+ } else if (async_tx_test_ack(&desc->async_tx)) {
+ /*
+ * This descriptor was waiting for a user ACK,
+ * it can be recycled now.
+ */
+ list_del(&desc->node);
+ spin_unlock_irq(&chan->lock);
+ nbpf_desc_put(desc);
+ recycling = true;
+ break;
+ }
+ }
+
+ if (recycling)
+ continue;
+
+ if (!found) {
+ /* This can happen if TERMINATE_ALL has been called */
+ spin_unlock_irq(&chan->lock);
+ break;
+ }
+
+ dma_cookie_complete(&desc->async_tx);
+
+ /*
+ * With released lock we cannot dereference desc, maybe it's
+ * still on the "done" list
+ */
+ if (async_tx_test_ack(&desc->async_tx)) {
+ list_del(&desc->node);
+ must_put = true;
+ } else {
+ desc->user_wait = true;
+ must_put = false;
+ }
+
+ dmaengine_desc_get_callback(&desc->async_tx, &cb);
+
+ /* ack and callback completed descriptor */
+ spin_unlock_irq(&chan->lock);
+
+ dmaengine_desc_callback_invoke(&cb, NULL);
+
+ if (must_put)
+ nbpf_desc_put(desc);
+ }
+}
+
+static irqreturn_t nbpf_chan_irq(int irq, void *dev)
+{
+ struct nbpf_channel *chan = dev;
+ bool done = nbpf_status_get(chan);
+ struct nbpf_desc *desc;
+ irqreturn_t ret;
+ bool bh = false;
+
+ if (!done)
+ return IRQ_NONE;
+
+ nbpf_status_ack(chan);
+
+ dev_dbg(&chan->dma_chan.dev->device, "%s()\n", __func__);
+
+ spin_lock(&chan->lock);
+ desc = chan->running;
+ if (WARN_ON(!desc)) {
+ ret = IRQ_NONE;
+ goto unlock;
+ } else {
+ ret = IRQ_HANDLED;
+ bh = true;
+ }
+
+ list_move_tail(&desc->node, &chan->done);
+ chan->running = NULL;
+
+ if (!list_empty(&chan->active)) {
+ desc = list_first_entry(&chan->active,
+ struct nbpf_desc, node);
+ if (!nbpf_start(desc))
+ chan->running = desc;
+ }
+
+unlock:
+ spin_unlock(&chan->lock);
+
+ if (bh)
+ tasklet_schedule(&chan->tasklet);
+
+ return ret;
+}
+
+static irqreturn_t nbpf_err_irq(int irq, void *dev)
+{
+ struct nbpf_device *nbpf = dev;
+ u32 error = nbpf_error_get(nbpf);
+
+ dev_warn(nbpf->dma_dev.dev, "DMA error IRQ %u\n", irq);
+
+ if (!error)
+ return IRQ_NONE;
+
+ do {
+ struct nbpf_channel *chan = nbpf_error_get_channel(nbpf, error);
+ /* On error: abort all queued transfers, no callback */
+ nbpf_error_clear(chan);
+ nbpf_chan_idle(chan);
+ error = nbpf_error_get(nbpf);
+ } while (error);
+
+ return IRQ_HANDLED;
+}
+
+static int nbpf_chan_probe(struct nbpf_device *nbpf, int n)
+{
+ struct dma_device *dma_dev = &nbpf->dma_dev;
+ struct nbpf_channel *chan = nbpf->chan + n;
+ int ret;
+
+ chan->nbpf = nbpf;
+ chan->base = nbpf->base + NBPF_REG_CHAN_OFFSET + NBPF_REG_CHAN_SIZE * n;
+ INIT_LIST_HEAD(&chan->desc_page);
+ spin_lock_init(&chan->lock);
+ chan->dma_chan.device = dma_dev;
+ dma_cookie_init(&chan->dma_chan);
+ nbpf_chan_prepare_default(chan);
+
+ dev_dbg(dma_dev->dev, "%s(): channel %d: -> %p\n", __func__, n, chan->base);
+
+ snprintf(chan->name, sizeof(chan->name), "nbpf %d", n);
+
+ tasklet_setup(&chan->tasklet, nbpf_chan_tasklet);
+ ret = devm_request_irq(dma_dev->dev, chan->irq,
+ nbpf_chan_irq, IRQF_SHARED,
+ chan->name, chan);
+ if (ret < 0)
+ return ret;
+
+ /* Add the channel to DMA device channel list */
+ list_add_tail(&chan->dma_chan.device_node,
+ &dma_dev->channels);
+
+ return 0;
+}
+
+static const struct of_device_id nbpf_match[] = {
+ {.compatible = "renesas,nbpfaxi64dmac1b4", .data = &nbpf_cfg[NBPF1B4]},
+ {.compatible = "renesas,nbpfaxi64dmac1b8", .data = &nbpf_cfg[NBPF1B8]},
+ {.compatible = "renesas,nbpfaxi64dmac1b16", .data = &nbpf_cfg[NBPF1B16]},
+ {.compatible = "renesas,nbpfaxi64dmac4b4", .data = &nbpf_cfg[NBPF4B4]},
+ {.compatible = "renesas,nbpfaxi64dmac4b8", .data = &nbpf_cfg[NBPF4B8]},
+ {.compatible = "renesas,nbpfaxi64dmac4b16", .data = &nbpf_cfg[NBPF4B16]},
+ {.compatible = "renesas,nbpfaxi64dmac8b4", .data = &nbpf_cfg[NBPF8B4]},
+ {.compatible = "renesas,nbpfaxi64dmac8b8", .data = &nbpf_cfg[NBPF8B8]},
+ {.compatible = "renesas,nbpfaxi64dmac8b16", .data = &nbpf_cfg[NBPF8B16]},
+ {}
+};
+MODULE_DEVICE_TABLE(of, nbpf_match);
+
+static int nbpf_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
+ struct nbpf_device *nbpf;
+ struct dma_device *dma_dev;
+ struct resource *iomem;
+ const struct nbpf_config *cfg;
+ int num_channels;
+ int ret, irq, eirq, i;
+ int irqbuf[9] /* maximum 8 channels + error IRQ */;
+ unsigned int irqs = 0;
+
+ BUILD_BUG_ON(sizeof(struct nbpf_desc_page) > PAGE_SIZE);
+
+ /* DT only */
+ if (!np)
+ return -ENODEV;
+
+ cfg = of_device_get_match_data(dev);
+ num_channels = cfg->num_channels;
+
+ nbpf = devm_kzalloc(dev, struct_size(nbpf, chan, num_channels),
+ GFP_KERNEL);
+ if (!nbpf)
+ return -ENOMEM;
+
+ dma_dev = &nbpf->dma_dev;
+ dma_dev->dev = dev;
+
+ iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ nbpf->base = devm_ioremap_resource(dev, iomem);
+ if (IS_ERR(nbpf->base))
+ return PTR_ERR(nbpf->base);
+
+ nbpf->clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(nbpf->clk))
+ return PTR_ERR(nbpf->clk);
+
+ of_property_read_u32(np, "max-burst-mem-read",
+ &nbpf->max_burst_mem_read);
+ of_property_read_u32(np, "max-burst-mem-write",
+ &nbpf->max_burst_mem_write);
+
+ nbpf->config = cfg;
+
+ for (i = 0; irqs < ARRAY_SIZE(irqbuf); i++) {
+ irq = platform_get_irq_optional(pdev, i);
+ if (irq < 0 && irq != -ENXIO)
+ return irq;
+ if (irq > 0)
+ irqbuf[irqs++] = irq;
+ }
+
+ /*
+ * 3 IRQ resource schemes are supported:
+ * 1. 1 shared IRQ for error and all channels
+ * 2. 2 IRQs: one for error and one shared for all channels
+ * 3. 1 IRQ for error and an own IRQ for each channel
+ */
+ if (irqs != 1 && irqs != 2 && irqs != num_channels + 1)
+ return -ENXIO;
+
+ if (irqs == 1) {
+ eirq = irqbuf[0];
+
+ for (i = 0; i <= num_channels; i++)
+ nbpf->chan[i].irq = irqbuf[0];
+ } else {
+ eirq = platform_get_irq_byname(pdev, "error");
+ if (eirq < 0)
+ return eirq;
+
+ if (irqs == num_channels + 1) {
+ struct nbpf_channel *chan;
+
+ for (i = 0, chan = nbpf->chan; i <= num_channels;
+ i++, chan++) {
+ /* Skip the error IRQ */
+ if (irqbuf[i] == eirq)
+ i++;
+ chan->irq = irqbuf[i];
+ }
+
+ if (chan != nbpf->chan + num_channels)
+ return -EINVAL;
+ } else {
+ /* 2 IRQs and more than one channel */
+ if (irqbuf[0] == eirq)
+ irq = irqbuf[1];
+ else
+ irq = irqbuf[0];
+
+ for (i = 0; i <= num_channels; i++)
+ nbpf->chan[i].irq = irq;
+ }
+ }
+
+ ret = devm_request_irq(dev, eirq, nbpf_err_irq,
+ IRQF_SHARED, "dma error", nbpf);
+ if (ret < 0)
+ return ret;
+ nbpf->eirq = eirq;
+
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ /* Create DMA Channel */
+ for (i = 0; i < num_channels; i++) {
+ ret = nbpf_chan_probe(nbpf, i);
+ if (ret < 0)
+ return ret;
+ }
+
+ dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+ dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+ dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+
+ /* Common and MEMCPY operations */
+ dma_dev->device_alloc_chan_resources
+ = nbpf_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = nbpf_free_chan_resources;
+ dma_dev->device_prep_dma_memcpy = nbpf_prep_memcpy;
+ dma_dev->device_tx_status = nbpf_tx_status;
+ dma_dev->device_issue_pending = nbpf_issue_pending;
+
+ /*
+ * If we drop support for unaligned MEMCPY buffer addresses and / or
+ * lengths by setting
+ * dma_dev->copy_align = 4;
+ * then we can set transfer length to 4 bytes in nbpf_prep_one() for
+ * DMA_MEM_TO_MEM
+ */
+
+ /* Compulsory for DMA_SLAVE fields */
+ dma_dev->device_prep_slave_sg = nbpf_prep_slave_sg;
+ dma_dev->device_config = nbpf_config;
+ dma_dev->device_pause = nbpf_pause;
+ dma_dev->device_terminate_all = nbpf_terminate_all;
+
+ dma_dev->src_addr_widths = NBPF_DMA_BUSWIDTHS;
+ dma_dev->dst_addr_widths = NBPF_DMA_BUSWIDTHS;
+ dma_dev->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+
+ platform_set_drvdata(pdev, nbpf);
+
+ ret = clk_prepare_enable(nbpf->clk);
+ if (ret < 0)
+ return ret;
+
+ nbpf_configure(nbpf);
+
+ ret = dma_async_device_register(dma_dev);
+ if (ret < 0)
+ goto e_clk_off;
+
+ ret = of_dma_controller_register(np, nbpf_of_xlate, nbpf);
+ if (ret < 0)
+ goto e_dma_dev_unreg;
+
+ return 0;
+
+e_dma_dev_unreg:
+ dma_async_device_unregister(dma_dev);
+e_clk_off:
+ clk_disable_unprepare(nbpf->clk);
+
+ return ret;
+}
+
+static int nbpf_remove(struct platform_device *pdev)
+{
+ struct nbpf_device *nbpf = platform_get_drvdata(pdev);
+ int i;
+
+ devm_free_irq(&pdev->dev, nbpf->eirq, nbpf);
+
+ for (i = 0; i < nbpf->config->num_channels; i++) {
+ struct nbpf_channel *chan = nbpf->chan + i;
+
+ devm_free_irq(&pdev->dev, chan->irq, chan);
+
+ tasklet_kill(&chan->tasklet);
+ }
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&nbpf->dma_dev);
+ clk_disable_unprepare(nbpf->clk);
+
+ return 0;
+}
+
+static const struct platform_device_id nbpf_ids[] = {
+ {"nbpfaxi64dmac1b4", (kernel_ulong_t)&nbpf_cfg[NBPF1B4]},
+ {"nbpfaxi64dmac1b8", (kernel_ulong_t)&nbpf_cfg[NBPF1B8]},
+ {"nbpfaxi64dmac1b16", (kernel_ulong_t)&nbpf_cfg[NBPF1B16]},
+ {"nbpfaxi64dmac4b4", (kernel_ulong_t)&nbpf_cfg[NBPF4B4]},
+ {"nbpfaxi64dmac4b8", (kernel_ulong_t)&nbpf_cfg[NBPF4B8]},
+ {"nbpfaxi64dmac4b16", (kernel_ulong_t)&nbpf_cfg[NBPF4B16]},
+ {"nbpfaxi64dmac8b4", (kernel_ulong_t)&nbpf_cfg[NBPF8B4]},
+ {"nbpfaxi64dmac8b8", (kernel_ulong_t)&nbpf_cfg[NBPF8B8]},
+ {"nbpfaxi64dmac8b16", (kernel_ulong_t)&nbpf_cfg[NBPF8B16]},
+ {},
+};
+MODULE_DEVICE_TABLE(platform, nbpf_ids);
+
+#ifdef CONFIG_PM
+static int nbpf_runtime_suspend(struct device *dev)
+{
+ struct nbpf_device *nbpf = dev_get_drvdata(dev);
+ clk_disable_unprepare(nbpf->clk);
+ return 0;
+}
+
+static int nbpf_runtime_resume(struct device *dev)
+{
+ struct nbpf_device *nbpf = dev_get_drvdata(dev);
+ return clk_prepare_enable(nbpf->clk);
+}
+#endif
+
+static const struct dev_pm_ops nbpf_pm_ops = {
+ SET_RUNTIME_PM_OPS(nbpf_runtime_suspend, nbpf_runtime_resume, NULL)
+};
+
+static struct platform_driver nbpf_driver = {
+ .driver = {
+ .name = "dma-nbpf",
+ .of_match_table = nbpf_match,
+ .pm = &nbpf_pm_ops,
+ },
+ .id_table = nbpf_ids,
+ .probe = nbpf_probe,
+ .remove = nbpf_remove,
+};
+
+module_platform_driver(nbpf_driver);
+
+MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>");
+MODULE_DESCRIPTION("dmaengine driver for NBPFAXI64* DMACs");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
new file mode 100644
index 000000000..ac61ecda2
--- /dev/null
+++ b/drivers/dma/of-dma.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Device tree helpers for DMA request / controller
+ *
+ * Based on of_gpio.c
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+
+#include "dmaengine.h"
+
+static LIST_HEAD(of_dma_list);
+static DEFINE_MUTEX(of_dma_lock);
+
+/**
+ * of_dma_find_controller - Get a DMA controller in DT DMA helpers list
+ * @dma_spec: pointer to DMA specifier as found in the device tree
+ *
+ * Finds a DMA controller with matching device node and number for dma cells
+ * in a list of registered DMA controllers. If a match is found a valid pointer
+ * to the DMA data stored is retuned. A NULL pointer is returned if no match is
+ * found.
+ */
+static struct of_dma *of_dma_find_controller(struct of_phandle_args *dma_spec)
+{
+ struct of_dma *ofdma;
+
+ list_for_each_entry(ofdma, &of_dma_list, of_dma_controllers)
+ if (ofdma->of_node == dma_spec->np)
+ return ofdma;
+
+ pr_debug("%s: can't find DMA controller %pOF\n", __func__,
+ dma_spec->np);
+
+ return NULL;
+}
+
+/**
+ * of_dma_router_xlate - translation function for router devices
+ * @dma_spec: pointer to DMA specifier as found in the device tree
+ * @ofdma: pointer to DMA controller data (router information)
+ *
+ * The function creates new dma_spec to be passed to the router driver's
+ * of_dma_route_allocate() function to prepare a dma_spec which will be used
+ * to request channel from the real DMA controller.
+ */
+static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct dma_chan *chan;
+ struct of_dma *ofdma_target;
+ struct of_phandle_args dma_spec_target;
+ void *route_data;
+
+ /* translate the request for the real DMA controller */
+ memcpy(&dma_spec_target, dma_spec, sizeof(dma_spec_target));
+ route_data = ofdma->of_dma_route_allocate(&dma_spec_target, ofdma);
+ if (IS_ERR(route_data))
+ return NULL;
+
+ ofdma_target = of_dma_find_controller(&dma_spec_target);
+ if (!ofdma_target) {
+ ofdma->dma_router->route_free(ofdma->dma_router->dev,
+ route_data);
+ chan = ERR_PTR(-EPROBE_DEFER);
+ goto err;
+ }
+
+ chan = ofdma_target->of_dma_xlate(&dma_spec_target, ofdma_target);
+ if (IS_ERR_OR_NULL(chan)) {
+ ofdma->dma_router->route_free(ofdma->dma_router->dev,
+ route_data);
+ } else {
+ int ret = 0;
+
+ chan->router = ofdma->dma_router;
+ chan->route_data = route_data;
+
+ if (chan->device->device_router_config)
+ ret = chan->device->device_router_config(chan);
+
+ if (ret) {
+ dma_release_channel(chan);
+ chan = ERR_PTR(ret);
+ }
+ }
+
+err:
+ /*
+ * Need to put the node back since the ofdma->of_dma_route_allocate
+ * has taken it for generating the new, translated dma_spec
+ */
+ of_node_put(dma_spec_target.np);
+ return chan;
+}
+
+/**
+ * of_dma_controller_register - Register a DMA controller to DT DMA helpers
+ * @np: device node of DMA controller
+ * @of_dma_xlate: translation function which converts a phandle
+ * arguments list into a dma_chan structure
+ * @data: pointer to controller specific data to be used by
+ * translation function
+ *
+ * Returns 0 on success or appropriate errno value on error.
+ *
+ * Allocated memory should be freed with appropriate of_dma_controller_free()
+ * call.
+ */
+int of_dma_controller_register(struct device_node *np,
+ struct dma_chan *(*of_dma_xlate)
+ (struct of_phandle_args *, struct of_dma *),
+ void *data)
+{
+ struct of_dma *ofdma;
+
+ if (!np || !of_dma_xlate) {
+ pr_err("%s: not enough information provided\n", __func__);
+ return -EINVAL;
+ }
+
+ ofdma = kzalloc(sizeof(*ofdma), GFP_KERNEL);
+ if (!ofdma)
+ return -ENOMEM;
+
+ ofdma->of_node = np;
+ ofdma->of_dma_xlate = of_dma_xlate;
+ ofdma->of_dma_data = data;
+
+ /* Now queue of_dma controller structure in list */
+ mutex_lock(&of_dma_lock);
+ list_add_tail(&ofdma->of_dma_controllers, &of_dma_list);
+ mutex_unlock(&of_dma_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(of_dma_controller_register);
+
+/**
+ * of_dma_controller_free - Remove a DMA controller from DT DMA helpers list
+ * @np: device node of DMA controller
+ *
+ * Memory allocated by of_dma_controller_register() is freed here.
+ */
+void of_dma_controller_free(struct device_node *np)
+{
+ struct of_dma *ofdma;
+
+ mutex_lock(&of_dma_lock);
+
+ list_for_each_entry(ofdma, &of_dma_list, of_dma_controllers)
+ if (ofdma->of_node == np) {
+ list_del(&ofdma->of_dma_controllers);
+ kfree(ofdma);
+ break;
+ }
+
+ mutex_unlock(&of_dma_lock);
+}
+EXPORT_SYMBOL_GPL(of_dma_controller_free);
+
+/**
+ * of_dma_router_register - Register a DMA router to DT DMA helpers as a
+ * controller
+ * @np: device node of DMA router
+ * @of_dma_route_allocate: setup function for the router which need to
+ * modify the dma_spec for the DMA controller to
+ * use and to set up the requested route.
+ * @dma_router: pointer to dma_router structure to be used when
+ * the route need to be free up.
+ *
+ * Returns 0 on success or appropriate errno value on error.
+ *
+ * Allocated memory should be freed with appropriate of_dma_controller_free()
+ * call.
+ */
+int of_dma_router_register(struct device_node *np,
+ void *(*of_dma_route_allocate)
+ (struct of_phandle_args *, struct of_dma *),
+ struct dma_router *dma_router)
+{
+ struct of_dma *ofdma;
+
+ if (!np || !of_dma_route_allocate || !dma_router) {
+ pr_err("%s: not enough information provided\n", __func__);
+ return -EINVAL;
+ }
+
+ ofdma = kzalloc(sizeof(*ofdma), GFP_KERNEL);
+ if (!ofdma)
+ return -ENOMEM;
+
+ ofdma->of_node = np;
+ ofdma->of_dma_xlate = of_dma_router_xlate;
+ ofdma->of_dma_route_allocate = of_dma_route_allocate;
+ ofdma->dma_router = dma_router;
+
+ /* Now queue of_dma controller structure in list */
+ mutex_lock(&of_dma_lock);
+ list_add_tail(&ofdma->of_dma_controllers, &of_dma_list);
+ mutex_unlock(&of_dma_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(of_dma_router_register);
+
+/**
+ * of_dma_match_channel - Check if a DMA specifier matches name
+ * @np: device node to look for DMA channels
+ * @name: channel name to be matched
+ * @index: index of DMA specifier in list of DMA specifiers
+ * @dma_spec: pointer to DMA specifier as found in the device tree
+ *
+ * Check if the DMA specifier pointed to by the index in a list of DMA
+ * specifiers, matches the name provided. Returns 0 if the name matches and
+ * a valid pointer to the DMA specifier is found. Otherwise returns -ENODEV.
+ */
+static int of_dma_match_channel(struct device_node *np, const char *name,
+ int index, struct of_phandle_args *dma_spec)
+{
+ const char *s;
+
+ if (of_property_read_string_index(np, "dma-names", index, &s))
+ return -ENODEV;
+
+ if (strcmp(name, s))
+ return -ENODEV;
+
+ if (of_parse_phandle_with_args(np, "dmas", "#dma-cells", index,
+ dma_spec))
+ return -ENODEV;
+
+ return 0;
+}
+
+/**
+ * of_dma_request_slave_channel - Get the DMA slave channel
+ * @np: device node to get DMA request from
+ * @name: name of desired channel
+ *
+ * Returns pointer to appropriate DMA channel on success or an error pointer.
+ */
+struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
+ const char *name)
+{
+ struct of_phandle_args dma_spec;
+ struct of_dma *ofdma;
+ struct dma_chan *chan;
+ int count, i, start;
+ int ret_no_channel = -ENODEV;
+ static atomic_t last_index;
+
+ if (!np || !name) {
+ pr_err("%s: not enough information provided\n", __func__);
+ return ERR_PTR(-ENODEV);
+ }
+
+ /* Silently fail if there is not even the "dmas" property */
+ if (!of_find_property(np, "dmas", NULL))
+ return ERR_PTR(-ENODEV);
+
+ count = of_property_count_strings(np, "dma-names");
+ if (count < 0) {
+ pr_err("%s: dma-names property of node '%pOF' missing or empty\n",
+ __func__, np);
+ return ERR_PTR(-ENODEV);
+ }
+
+ /*
+ * approximate an average distribution across multiple
+ * entries with the same name
+ */
+ start = atomic_inc_return(&last_index);
+ for (i = 0; i < count; i++) {
+ if (of_dma_match_channel(np, name,
+ (i + start) % count,
+ &dma_spec))
+ continue;
+
+ mutex_lock(&of_dma_lock);
+ ofdma = of_dma_find_controller(&dma_spec);
+
+ if (ofdma) {
+ chan = ofdma->of_dma_xlate(&dma_spec, ofdma);
+ } else {
+ ret_no_channel = -EPROBE_DEFER;
+ chan = NULL;
+ }
+
+ mutex_unlock(&of_dma_lock);
+
+ of_node_put(dma_spec.np);
+
+ if (chan)
+ return chan;
+ }
+
+ return ERR_PTR(ret_no_channel);
+}
+EXPORT_SYMBOL_GPL(of_dma_request_slave_channel);
+
+/**
+ * of_dma_simple_xlate - Simple DMA engine translation function
+ * @dma_spec: pointer to DMA specifier as found in the device tree
+ * @ofdma: pointer to DMA controller data
+ *
+ * A simple translation function for devices that use a 32-bit value for the
+ * filter_param when calling the DMA engine dma_request_channel() function.
+ * Note that this translation function requires that #dma-cells is equal to 1
+ * and the argument of the dma specifier is the 32-bit filter_param. Returns
+ * pointer to appropriate dma channel on success or NULL on error.
+ */
+struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ int count = dma_spec->args_count;
+ struct of_dma_filter_info *info = ofdma->of_dma_data;
+
+ if (!info || !info->filter_fn)
+ return NULL;
+
+ if (count != 1)
+ return NULL;
+
+ return __dma_request_channel(&info->dma_cap, info->filter_fn,
+ &dma_spec->args[0], dma_spec->np);
+}
+EXPORT_SYMBOL_GPL(of_dma_simple_xlate);
+
+/**
+ * of_dma_xlate_by_chan_id - Translate dt property to DMA channel by channel id
+ * @dma_spec: pointer to DMA specifier as found in the device tree
+ * @ofdma: pointer to DMA controller data
+ *
+ * This function can be used as the of xlate callback for DMA driver which wants
+ * to match the channel based on the channel id. When using this xlate function
+ * the #dma-cells propety of the DMA controller dt node needs to be set to 1.
+ * The data parameter of of_dma_controller_register must be a pointer to the
+ * dma_device struct the function should match upon.
+ *
+ * Returns pointer to appropriate dma channel on success or NULL on error.
+ */
+struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct dma_device *dev = ofdma->of_dma_data;
+ struct dma_chan *chan, *candidate = NULL;
+
+ if (!dev || dma_spec->args_count != 1)
+ return NULL;
+
+ list_for_each_entry(chan, &dev->channels, device_node)
+ if (chan->chan_id == dma_spec->args[0]) {
+ candidate = chan;
+ break;
+ }
+
+ if (!candidate)
+ return NULL;
+
+ return dma_get_slave_channel(candidate);
+}
+EXPORT_SYMBOL_GPL(of_dma_xlate_by_chan_id);
diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c
new file mode 100644
index 000000000..b6e0ac831
--- /dev/null
+++ b/drivers/dma/owl-dma.c
@@ -0,0 +1,1278 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Actions Semi Owl SoCs DMA driver
+//
+// Copyright (c) 2014 Actions Semi Inc.
+// Author: David Liu <liuwei@actions-semi.com>
+//
+// Copyright (c) 2018 Linaro Ltd.
+// Author: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/slab.h>
+#include "virt-dma.h"
+
+#define OWL_DMA_FRAME_MAX_LENGTH 0xfffff
+
+/* Global DMA Controller Registers */
+#define OWL_DMA_IRQ_PD0 0x00
+#define OWL_DMA_IRQ_PD1 0x04
+#define OWL_DMA_IRQ_PD2 0x08
+#define OWL_DMA_IRQ_PD3 0x0C
+#define OWL_DMA_IRQ_EN0 0x10
+#define OWL_DMA_IRQ_EN1 0x14
+#define OWL_DMA_IRQ_EN2 0x18
+#define OWL_DMA_IRQ_EN3 0x1C
+#define OWL_DMA_SECURE_ACCESS_CTL 0x20
+#define OWL_DMA_NIC_QOS 0x24
+#define OWL_DMA_DBGSEL 0x28
+#define OWL_DMA_IDLE_STAT 0x2C
+
+/* Channel Registers */
+#define OWL_DMA_CHAN_BASE(i) (0x100 + (i) * 0x100)
+#define OWL_DMAX_MODE 0x00
+#define OWL_DMAX_SOURCE 0x04
+#define OWL_DMAX_DESTINATION 0x08
+#define OWL_DMAX_FRAME_LEN 0x0C
+#define OWL_DMAX_FRAME_CNT 0x10
+#define OWL_DMAX_REMAIN_FRAME_CNT 0x14
+#define OWL_DMAX_REMAIN_CNT 0x18
+#define OWL_DMAX_SOURCE_STRIDE 0x1C
+#define OWL_DMAX_DESTINATION_STRIDE 0x20
+#define OWL_DMAX_START 0x24
+#define OWL_DMAX_PAUSE 0x28
+#define OWL_DMAX_CHAINED_CTL 0x2C
+#define OWL_DMAX_CONSTANT 0x30
+#define OWL_DMAX_LINKLIST_CTL 0x34
+#define OWL_DMAX_NEXT_DESCRIPTOR 0x38
+#define OWL_DMAX_CURRENT_DESCRIPTOR_NUM 0x3C
+#define OWL_DMAX_INT_CTL 0x40
+#define OWL_DMAX_INT_STATUS 0x44
+#define OWL_DMAX_CURRENT_SOURCE_POINTER 0x48
+#define OWL_DMAX_CURRENT_DESTINATION_POINTER 0x4C
+
+/* OWL_DMAX_MODE Bits */
+#define OWL_DMA_MODE_TS(x) (((x) & GENMASK(5, 0)) << 0)
+#define OWL_DMA_MODE_ST(x) (((x) & GENMASK(1, 0)) << 8)
+#define OWL_DMA_MODE_ST_DEV OWL_DMA_MODE_ST(0)
+#define OWL_DMA_MODE_ST_DCU OWL_DMA_MODE_ST(2)
+#define OWL_DMA_MODE_ST_SRAM OWL_DMA_MODE_ST(3)
+#define OWL_DMA_MODE_DT(x) (((x) & GENMASK(1, 0)) << 10)
+#define OWL_DMA_MODE_DT_DEV OWL_DMA_MODE_DT(0)
+#define OWL_DMA_MODE_DT_DCU OWL_DMA_MODE_DT(2)
+#define OWL_DMA_MODE_DT_SRAM OWL_DMA_MODE_DT(3)
+#define OWL_DMA_MODE_SAM(x) (((x) & GENMASK(1, 0)) << 16)
+#define OWL_DMA_MODE_SAM_CONST OWL_DMA_MODE_SAM(0)
+#define OWL_DMA_MODE_SAM_INC OWL_DMA_MODE_SAM(1)
+#define OWL_DMA_MODE_SAM_STRIDE OWL_DMA_MODE_SAM(2)
+#define OWL_DMA_MODE_DAM(x) (((x) & GENMASK(1, 0)) << 18)
+#define OWL_DMA_MODE_DAM_CONST OWL_DMA_MODE_DAM(0)
+#define OWL_DMA_MODE_DAM_INC OWL_DMA_MODE_DAM(1)
+#define OWL_DMA_MODE_DAM_STRIDE OWL_DMA_MODE_DAM(2)
+#define OWL_DMA_MODE_PW(x) (((x) & GENMASK(2, 0)) << 20)
+#define OWL_DMA_MODE_CB BIT(23)
+#define OWL_DMA_MODE_NDDBW(x) (((x) & 0x1) << 28)
+#define OWL_DMA_MODE_NDDBW_32BIT OWL_DMA_MODE_NDDBW(0)
+#define OWL_DMA_MODE_NDDBW_8BIT OWL_DMA_MODE_NDDBW(1)
+#define OWL_DMA_MODE_CFE BIT(29)
+#define OWL_DMA_MODE_LME BIT(30)
+#define OWL_DMA_MODE_CME BIT(31)
+
+/* OWL_DMAX_LINKLIST_CTL Bits */
+#define OWL_DMA_LLC_SAV(x) (((x) & GENMASK(1, 0)) << 8)
+#define OWL_DMA_LLC_SAV_INC OWL_DMA_LLC_SAV(0)
+#define OWL_DMA_LLC_SAV_LOAD_NEXT OWL_DMA_LLC_SAV(1)
+#define OWL_DMA_LLC_SAV_LOAD_PREV OWL_DMA_LLC_SAV(2)
+#define OWL_DMA_LLC_DAV(x) (((x) & GENMASK(1, 0)) << 10)
+#define OWL_DMA_LLC_DAV_INC OWL_DMA_LLC_DAV(0)
+#define OWL_DMA_LLC_DAV_LOAD_NEXT OWL_DMA_LLC_DAV(1)
+#define OWL_DMA_LLC_DAV_LOAD_PREV OWL_DMA_LLC_DAV(2)
+#define OWL_DMA_LLC_SUSPEND BIT(16)
+
+/* OWL_DMAX_INT_CTL Bits */
+#define OWL_DMA_INTCTL_BLOCK BIT(0)
+#define OWL_DMA_INTCTL_SUPER_BLOCK BIT(1)
+#define OWL_DMA_INTCTL_FRAME BIT(2)
+#define OWL_DMA_INTCTL_HALF_FRAME BIT(3)
+#define OWL_DMA_INTCTL_LAST_FRAME BIT(4)
+
+/* OWL_DMAX_INT_STATUS Bits */
+#define OWL_DMA_INTSTAT_BLOCK BIT(0)
+#define OWL_DMA_INTSTAT_SUPER_BLOCK BIT(1)
+#define OWL_DMA_INTSTAT_FRAME BIT(2)
+#define OWL_DMA_INTSTAT_HALF_FRAME BIT(3)
+#define OWL_DMA_INTSTAT_LAST_FRAME BIT(4)
+
+/* Pack shift and newshift in a single word */
+#define BIT_FIELD(val, width, shift, newshift) \
+ ((((val) >> (shift)) & ((BIT(width)) - 1)) << (newshift))
+
+/* Frame count value is fixed as 1 */
+#define FCNT_VAL 0x1
+
+/**
+ * enum owl_dmadesc_offsets - Describe DMA descriptor, hardware link
+ * list for dma transfer
+ * @OWL_DMADESC_NEXT_LLI: physical address of the next link list
+ * @OWL_DMADESC_SADDR: source physical address
+ * @OWL_DMADESC_DADDR: destination physical address
+ * @OWL_DMADESC_FLEN: frame length
+ * @OWL_DMADESC_SRC_STRIDE: source stride
+ * @OWL_DMADESC_DST_STRIDE: destination stride
+ * @OWL_DMADESC_CTRLA: dma_mode and linklist ctrl config
+ * @OWL_DMADESC_CTRLB: interrupt config
+ * @OWL_DMADESC_CONST_NUM: data for constant fill
+ * @OWL_DMADESC_SIZE: max size of this enum
+ */
+enum owl_dmadesc_offsets {
+ OWL_DMADESC_NEXT_LLI = 0,
+ OWL_DMADESC_SADDR,
+ OWL_DMADESC_DADDR,
+ OWL_DMADESC_FLEN,
+ OWL_DMADESC_SRC_STRIDE,
+ OWL_DMADESC_DST_STRIDE,
+ OWL_DMADESC_CTRLA,
+ OWL_DMADESC_CTRLB,
+ OWL_DMADESC_CONST_NUM,
+ OWL_DMADESC_SIZE
+};
+
+enum owl_dma_id {
+ S900_DMA,
+ S700_DMA,
+};
+
+/**
+ * struct owl_dma_lli - Link list for dma transfer
+ * @hw: hardware link list
+ * @phys: physical address of hardware link list
+ * @node: node for txd's lli_list
+ */
+struct owl_dma_lli {
+ u32 hw[OWL_DMADESC_SIZE];
+ dma_addr_t phys;
+ struct list_head node;
+};
+
+/**
+ * struct owl_dma_txd - Wrapper for struct dma_async_tx_descriptor
+ * @vd: virtual DMA descriptor
+ * @lli_list: link list of lli nodes
+ * @cyclic: flag to indicate cyclic transfers
+ */
+struct owl_dma_txd {
+ struct virt_dma_desc vd;
+ struct list_head lli_list;
+ bool cyclic;
+};
+
+/**
+ * struct owl_dma_pchan - Holder for the physical channels
+ * @id: physical index to this channel
+ * @base: virtual memory base for the dma channel
+ * @vchan: the virtual channel currently being served by this physical channel
+ */
+struct owl_dma_pchan {
+ u32 id;
+ void __iomem *base;
+ struct owl_dma_vchan *vchan;
+};
+
+/**
+ * struct owl_dma_vchan - Wrapper for DMA ENGINE channel
+ * @vc: wrapped virtual channel
+ * @pchan: the physical channel utilized by this channel
+ * @txd: active transaction on this channel
+ * @cfg: slave configuration for this channel
+ * @drq: physical DMA request ID for this channel
+ */
+struct owl_dma_vchan {
+ struct virt_dma_chan vc;
+ struct owl_dma_pchan *pchan;
+ struct owl_dma_txd *txd;
+ struct dma_slave_config cfg;
+ u8 drq;
+};
+
+/**
+ * struct owl_dma - Holder for the Owl DMA controller
+ * @dma: dma engine for this instance
+ * @base: virtual memory base for the DMA controller
+ * @clk: clock for the DMA controller
+ * @lock: a lock to use when change DMA controller global register
+ * @lli_pool: a pool for the LLI descriptors
+ * @irq: interrupt ID for the DMA controller
+ * @nr_pchans: the number of physical channels
+ * @pchans: array of data for the physical channels
+ * @nr_vchans: the number of physical channels
+ * @vchans: array of data for the physical channels
+ * @devid: device id based on OWL SoC
+ */
+struct owl_dma {
+ struct dma_device dma;
+ void __iomem *base;
+ struct clk *clk;
+ spinlock_t lock;
+ struct dma_pool *lli_pool;
+ int irq;
+
+ unsigned int nr_pchans;
+ struct owl_dma_pchan *pchans;
+
+ unsigned int nr_vchans;
+ struct owl_dma_vchan *vchans;
+ enum owl_dma_id devid;
+};
+
+static void pchan_update(struct owl_dma_pchan *pchan, u32 reg,
+ u32 val, bool state)
+{
+ u32 regval;
+
+ regval = readl(pchan->base + reg);
+
+ if (state)
+ regval |= val;
+ else
+ regval &= ~val;
+
+ writel(val, pchan->base + reg);
+}
+
+static void pchan_writel(struct owl_dma_pchan *pchan, u32 reg, u32 data)
+{
+ writel(data, pchan->base + reg);
+}
+
+static u32 pchan_readl(struct owl_dma_pchan *pchan, u32 reg)
+{
+ return readl(pchan->base + reg);
+}
+
+static void dma_update(struct owl_dma *od, u32 reg, u32 val, bool state)
+{
+ u32 regval;
+
+ regval = readl(od->base + reg);
+
+ if (state)
+ regval |= val;
+ else
+ regval &= ~val;
+
+ writel(val, od->base + reg);
+}
+
+static void dma_writel(struct owl_dma *od, u32 reg, u32 data)
+{
+ writel(data, od->base + reg);
+}
+
+static u32 dma_readl(struct owl_dma *od, u32 reg)
+{
+ return readl(od->base + reg);
+}
+
+static inline struct owl_dma *to_owl_dma(struct dma_device *dd)
+{
+ return container_of(dd, struct owl_dma, dma);
+}
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+ return &chan->dev->device;
+}
+
+static inline struct owl_dma_vchan *to_owl_vchan(struct dma_chan *chan)
+{
+ return container_of(chan, struct owl_dma_vchan, vc.chan);
+}
+
+static inline struct owl_dma_txd *to_owl_txd(struct dma_async_tx_descriptor *tx)
+{
+ return container_of(tx, struct owl_dma_txd, vd.tx);
+}
+
+static inline u32 llc_hw_ctrla(u32 mode, u32 llc_ctl)
+{
+ u32 ctl;
+
+ ctl = BIT_FIELD(mode, 4, 28, 28) |
+ BIT_FIELD(mode, 8, 16, 20) |
+ BIT_FIELD(mode, 4, 8, 16) |
+ BIT_FIELD(mode, 6, 0, 10) |
+ BIT_FIELD(llc_ctl, 2, 10, 8) |
+ BIT_FIELD(llc_ctl, 2, 8, 6);
+
+ return ctl;
+}
+
+static inline u32 llc_hw_ctrlb(u32 int_ctl)
+{
+ u32 ctl;
+
+ /*
+ * Irrespective of the SoC, ctrlb value starts filling from
+ * bit 18.
+ */
+ ctl = BIT_FIELD(int_ctl, 7, 0, 18);
+
+ return ctl;
+}
+
+static u32 llc_hw_flen(struct owl_dma_lli *lli)
+{
+ return lli->hw[OWL_DMADESC_FLEN] & GENMASK(19, 0);
+}
+
+static void owl_dma_free_lli(struct owl_dma *od,
+ struct owl_dma_lli *lli)
+{
+ list_del(&lli->node);
+ dma_pool_free(od->lli_pool, lli, lli->phys);
+}
+
+static struct owl_dma_lli *owl_dma_alloc_lli(struct owl_dma *od)
+{
+ struct owl_dma_lli *lli;
+ dma_addr_t phys;
+
+ lli = dma_pool_alloc(od->lli_pool, GFP_NOWAIT, &phys);
+ if (!lli)
+ return NULL;
+
+ INIT_LIST_HEAD(&lli->node);
+ lli->phys = phys;
+
+ return lli;
+}
+
+static struct owl_dma_lli *owl_dma_add_lli(struct owl_dma_txd *txd,
+ struct owl_dma_lli *prev,
+ struct owl_dma_lli *next,
+ bool is_cyclic)
+{
+ if (!is_cyclic)
+ list_add_tail(&next->node, &txd->lli_list);
+
+ if (prev) {
+ prev->hw[OWL_DMADESC_NEXT_LLI] = next->phys;
+ prev->hw[OWL_DMADESC_CTRLA] |=
+ llc_hw_ctrla(OWL_DMA_MODE_LME, 0);
+ }
+
+ return next;
+}
+
+static inline int owl_dma_cfg_lli(struct owl_dma_vchan *vchan,
+ struct owl_dma_lli *lli,
+ dma_addr_t src, dma_addr_t dst,
+ u32 len, enum dma_transfer_direction dir,
+ struct dma_slave_config *sconfig,
+ bool is_cyclic)
+{
+ struct owl_dma *od = to_owl_dma(vchan->vc.chan.device);
+ u32 mode, ctrlb;
+
+ mode = OWL_DMA_MODE_PW(0);
+
+ switch (dir) {
+ case DMA_MEM_TO_MEM:
+ mode |= OWL_DMA_MODE_TS(0) | OWL_DMA_MODE_ST_DCU |
+ OWL_DMA_MODE_DT_DCU | OWL_DMA_MODE_SAM_INC |
+ OWL_DMA_MODE_DAM_INC;
+
+ break;
+ case DMA_MEM_TO_DEV:
+ mode |= OWL_DMA_MODE_TS(vchan->drq)
+ | OWL_DMA_MODE_ST_DCU | OWL_DMA_MODE_DT_DEV
+ | OWL_DMA_MODE_SAM_INC | OWL_DMA_MODE_DAM_CONST;
+
+ /*
+ * Hardware only supports 32bit and 8bit buswidth. Since the
+ * default is 32bit, select 8bit only when requested.
+ */
+ if (sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_1_BYTE)
+ mode |= OWL_DMA_MODE_NDDBW_8BIT;
+
+ break;
+ case DMA_DEV_TO_MEM:
+ mode |= OWL_DMA_MODE_TS(vchan->drq)
+ | OWL_DMA_MODE_ST_DEV | OWL_DMA_MODE_DT_DCU
+ | OWL_DMA_MODE_SAM_CONST | OWL_DMA_MODE_DAM_INC;
+
+ /*
+ * Hardware only supports 32bit and 8bit buswidth. Since the
+ * default is 32bit, select 8bit only when requested.
+ */
+ if (sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_1_BYTE)
+ mode |= OWL_DMA_MODE_NDDBW_8BIT;
+
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ lli->hw[OWL_DMADESC_CTRLA] = llc_hw_ctrla(mode,
+ OWL_DMA_LLC_SAV_LOAD_NEXT |
+ OWL_DMA_LLC_DAV_LOAD_NEXT);
+
+ if (is_cyclic)
+ ctrlb = llc_hw_ctrlb(OWL_DMA_INTCTL_BLOCK);
+ else
+ ctrlb = llc_hw_ctrlb(OWL_DMA_INTCTL_SUPER_BLOCK);
+
+ lli->hw[OWL_DMADESC_NEXT_LLI] = 0; /* One link list by default */
+ lli->hw[OWL_DMADESC_SADDR] = src;
+ lli->hw[OWL_DMADESC_DADDR] = dst;
+ lli->hw[OWL_DMADESC_SRC_STRIDE] = 0;
+ lli->hw[OWL_DMADESC_DST_STRIDE] = 0;
+
+ if (od->devid == S700_DMA) {
+ /* Max frame length is 1MB */
+ lli->hw[OWL_DMADESC_FLEN] = len;
+ /*
+ * On S700, word starts from offset 0x1C is shared between
+ * frame count and ctrlb, where first 12 bits are for frame
+ * count and rest of 20 bits are for ctrlb.
+ */
+ lli->hw[OWL_DMADESC_CTRLB] = FCNT_VAL | ctrlb;
+ } else {
+ /*
+ * On S900, word starts from offset 0xC is shared between
+ * frame length (max frame length is 1MB) and frame count,
+ * where first 20 bits are for frame length and rest of
+ * 12 bits are for frame count.
+ */
+ lli->hw[OWL_DMADESC_FLEN] = len | FCNT_VAL << 20;
+ lli->hw[OWL_DMADESC_CTRLB] = ctrlb;
+ }
+
+ return 0;
+}
+
+static struct owl_dma_pchan *owl_dma_get_pchan(struct owl_dma *od,
+ struct owl_dma_vchan *vchan)
+{
+ struct owl_dma_pchan *pchan = NULL;
+ unsigned long flags;
+ int i;
+
+ for (i = 0; i < od->nr_pchans; i++) {
+ pchan = &od->pchans[i];
+
+ spin_lock_irqsave(&od->lock, flags);
+ if (!pchan->vchan) {
+ pchan->vchan = vchan;
+ spin_unlock_irqrestore(&od->lock, flags);
+ break;
+ }
+
+ spin_unlock_irqrestore(&od->lock, flags);
+ }
+
+ return pchan;
+}
+
+static int owl_dma_pchan_busy(struct owl_dma *od, struct owl_dma_pchan *pchan)
+{
+ unsigned int val;
+
+ val = dma_readl(od, OWL_DMA_IDLE_STAT);
+
+ return !(val & (1 << pchan->id));
+}
+
+static void owl_dma_terminate_pchan(struct owl_dma *od,
+ struct owl_dma_pchan *pchan)
+{
+ unsigned long flags;
+ u32 irq_pd;
+
+ pchan_writel(pchan, OWL_DMAX_START, 0);
+ pchan_update(pchan, OWL_DMAX_INT_STATUS, 0xff, false);
+
+ spin_lock_irqsave(&od->lock, flags);
+ dma_update(od, OWL_DMA_IRQ_EN0, (1 << pchan->id), false);
+
+ irq_pd = dma_readl(od, OWL_DMA_IRQ_PD0);
+ if (irq_pd & (1 << pchan->id)) {
+ dev_warn(od->dma.dev,
+ "terminating pchan %d that still has pending irq\n",
+ pchan->id);
+ dma_writel(od, OWL_DMA_IRQ_PD0, (1 << pchan->id));
+ }
+
+ pchan->vchan = NULL;
+
+ spin_unlock_irqrestore(&od->lock, flags);
+}
+
+static void owl_dma_pause_pchan(struct owl_dma_pchan *pchan)
+{
+ pchan_writel(pchan, 1, OWL_DMAX_PAUSE);
+}
+
+static void owl_dma_resume_pchan(struct owl_dma_pchan *pchan)
+{
+ pchan_writel(pchan, 0, OWL_DMAX_PAUSE);
+}
+
+static int owl_dma_start_next_txd(struct owl_dma_vchan *vchan)
+{
+ struct owl_dma *od = to_owl_dma(vchan->vc.chan.device);
+ struct virt_dma_desc *vd = vchan_next_desc(&vchan->vc);
+ struct owl_dma_pchan *pchan = vchan->pchan;
+ struct owl_dma_txd *txd = to_owl_txd(&vd->tx);
+ struct owl_dma_lli *lli;
+ unsigned long flags;
+ u32 int_ctl;
+
+ list_del(&vd->node);
+
+ vchan->txd = txd;
+
+ /* Wait for channel inactive */
+ while (owl_dma_pchan_busy(od, pchan))
+ cpu_relax();
+
+ lli = list_first_entry(&txd->lli_list,
+ struct owl_dma_lli, node);
+
+ if (txd->cyclic)
+ int_ctl = OWL_DMA_INTCTL_BLOCK;
+ else
+ int_ctl = OWL_DMA_INTCTL_SUPER_BLOCK;
+
+ pchan_writel(pchan, OWL_DMAX_MODE, OWL_DMA_MODE_LME);
+ pchan_writel(pchan, OWL_DMAX_LINKLIST_CTL,
+ OWL_DMA_LLC_SAV_LOAD_NEXT | OWL_DMA_LLC_DAV_LOAD_NEXT);
+ pchan_writel(pchan, OWL_DMAX_NEXT_DESCRIPTOR, lli->phys);
+ pchan_writel(pchan, OWL_DMAX_INT_CTL, int_ctl);
+
+ /* Clear IRQ status for this pchan */
+ pchan_update(pchan, OWL_DMAX_INT_STATUS, 0xff, false);
+
+ spin_lock_irqsave(&od->lock, flags);
+
+ dma_update(od, OWL_DMA_IRQ_EN0, (1 << pchan->id), true);
+
+ spin_unlock_irqrestore(&od->lock, flags);
+
+ dev_dbg(chan2dev(&vchan->vc.chan), "starting pchan %d\n", pchan->id);
+
+ /* Start DMA transfer for this pchan */
+ pchan_writel(pchan, OWL_DMAX_START, 0x1);
+
+ return 0;
+}
+
+static void owl_dma_phy_free(struct owl_dma *od, struct owl_dma_vchan *vchan)
+{
+ /* Ensure that the physical channel is stopped */
+ owl_dma_terminate_pchan(od, vchan->pchan);
+
+ vchan->pchan = NULL;
+}
+
+static irqreturn_t owl_dma_interrupt(int irq, void *dev_id)
+{
+ struct owl_dma *od = dev_id;
+ struct owl_dma_vchan *vchan;
+ struct owl_dma_pchan *pchan;
+ unsigned long pending;
+ int i;
+ unsigned int global_irq_pending, chan_irq_pending;
+
+ spin_lock(&od->lock);
+
+ pending = dma_readl(od, OWL_DMA_IRQ_PD0);
+
+ /* Clear IRQ status for each pchan */
+ for_each_set_bit(i, &pending, od->nr_pchans) {
+ pchan = &od->pchans[i];
+ pchan_update(pchan, OWL_DMAX_INT_STATUS, 0xff, false);
+ }
+
+ /* Clear pending IRQ */
+ dma_writel(od, OWL_DMA_IRQ_PD0, pending);
+
+ /* Check missed pending IRQ */
+ for (i = 0; i < od->nr_pchans; i++) {
+ pchan = &od->pchans[i];
+ chan_irq_pending = pchan_readl(pchan, OWL_DMAX_INT_CTL) &
+ pchan_readl(pchan, OWL_DMAX_INT_STATUS);
+
+ /* Dummy read to ensure OWL_DMA_IRQ_PD0 value is updated */
+ dma_readl(od, OWL_DMA_IRQ_PD0);
+
+ global_irq_pending = dma_readl(od, OWL_DMA_IRQ_PD0);
+
+ if (chan_irq_pending && !(global_irq_pending & BIT(i))) {
+ dev_dbg(od->dma.dev,
+ "global and channel IRQ pending match err\n");
+
+ /* Clear IRQ status for this pchan */
+ pchan_update(pchan, OWL_DMAX_INT_STATUS,
+ 0xff, false);
+
+ /* Update global IRQ pending */
+ pending |= BIT(i);
+ }
+ }
+
+ spin_unlock(&od->lock);
+
+ for_each_set_bit(i, &pending, od->nr_pchans) {
+ struct owl_dma_txd *txd;
+
+ pchan = &od->pchans[i];
+
+ vchan = pchan->vchan;
+ if (!vchan) {
+ dev_warn(od->dma.dev, "no vchan attached on pchan %d\n",
+ pchan->id);
+ continue;
+ }
+
+ spin_lock(&vchan->vc.lock);
+
+ txd = vchan->txd;
+ if (txd) {
+ vchan->txd = NULL;
+
+ vchan_cookie_complete(&txd->vd);
+
+ /*
+ * Start the next descriptor (if any),
+ * otherwise free this channel.
+ */
+ if (vchan_next_desc(&vchan->vc))
+ owl_dma_start_next_txd(vchan);
+ else
+ owl_dma_phy_free(od, vchan);
+ }
+
+ spin_unlock(&vchan->vc.lock);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void owl_dma_free_txd(struct owl_dma *od, struct owl_dma_txd *txd)
+{
+ struct owl_dma_lli *lli, *_lli;
+
+ if (unlikely(!txd))
+ return;
+
+ list_for_each_entry_safe(lli, _lli, &txd->lli_list, node)
+ owl_dma_free_lli(od, lli);
+
+ kfree(txd);
+}
+
+static void owl_dma_desc_free(struct virt_dma_desc *vd)
+{
+ struct owl_dma *od = to_owl_dma(vd->tx.chan->device);
+ struct owl_dma_txd *txd = to_owl_txd(&vd->tx);
+
+ owl_dma_free_txd(od, txd);
+}
+
+static int owl_dma_terminate_all(struct dma_chan *chan)
+{
+ struct owl_dma *od = to_owl_dma(chan->device);
+ struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&vchan->vc.lock, flags);
+
+ if (vchan->pchan)
+ owl_dma_phy_free(od, vchan);
+
+ if (vchan->txd) {
+ owl_dma_desc_free(&vchan->txd->vd);
+ vchan->txd = NULL;
+ }
+
+ vchan_get_all_descriptors(&vchan->vc, &head);
+
+ spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+ vchan_dma_desc_free_list(&vchan->vc, &head);
+
+ return 0;
+}
+
+static int owl_dma_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+
+ /* Reject definitely invalid configurations */
+ if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES ||
+ config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+ return -EINVAL;
+
+ memcpy(&vchan->cfg, config, sizeof(struct dma_slave_config));
+
+ return 0;
+}
+
+static int owl_dma_pause(struct dma_chan *chan)
+{
+ struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&vchan->vc.lock, flags);
+
+ owl_dma_pause_pchan(vchan->pchan);
+
+ spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+ return 0;
+}
+
+static int owl_dma_resume(struct dma_chan *chan)
+{
+ struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+ unsigned long flags;
+
+ if (!vchan->pchan && !vchan->txd)
+ return 0;
+
+ dev_dbg(chan2dev(chan), "vchan %p: resume\n", &vchan->vc);
+
+ spin_lock_irqsave(&vchan->vc.lock, flags);
+
+ owl_dma_resume_pchan(vchan->pchan);
+
+ spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+ return 0;
+}
+
+static u32 owl_dma_getbytes_chan(struct owl_dma_vchan *vchan)
+{
+ struct owl_dma_pchan *pchan;
+ struct owl_dma_txd *txd;
+ struct owl_dma_lli *lli;
+ unsigned int next_lli_phy;
+ size_t bytes;
+
+ pchan = vchan->pchan;
+ txd = vchan->txd;
+
+ if (!pchan || !txd)
+ return 0;
+
+ /* Get remain count of current node in link list */
+ bytes = pchan_readl(pchan, OWL_DMAX_REMAIN_CNT);
+
+ /* Loop through the preceding nodes to get total remaining bytes */
+ if (pchan_readl(pchan, OWL_DMAX_MODE) & OWL_DMA_MODE_LME) {
+ next_lli_phy = pchan_readl(pchan, OWL_DMAX_NEXT_DESCRIPTOR);
+ list_for_each_entry(lli, &txd->lli_list, node) {
+ /* Start from the next active node */
+ if (lli->phys == next_lli_phy) {
+ list_for_each_entry(lli, &txd->lli_list, node)
+ bytes += llc_hw_flen(lli);
+ break;
+ }
+ }
+ }
+
+ return bytes;
+}
+
+static enum dma_status owl_dma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *state)
+{
+ struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+ struct owl_dma_lli *lli;
+ struct virt_dma_desc *vd;
+ struct owl_dma_txd *txd;
+ enum dma_status ret;
+ unsigned long flags;
+ size_t bytes = 0;
+
+ ret = dma_cookie_status(chan, cookie, state);
+ if (ret == DMA_COMPLETE || !state)
+ return ret;
+
+ spin_lock_irqsave(&vchan->vc.lock, flags);
+
+ vd = vchan_find_desc(&vchan->vc, cookie);
+ if (vd) {
+ txd = to_owl_txd(&vd->tx);
+ list_for_each_entry(lli, &txd->lli_list, node)
+ bytes += llc_hw_flen(lli);
+ } else {
+ bytes = owl_dma_getbytes_chan(vchan);
+ }
+
+ spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+ dma_set_residue(state, bytes);
+
+ return ret;
+}
+
+static void owl_dma_phy_alloc_and_start(struct owl_dma_vchan *vchan)
+{
+ struct owl_dma *od = to_owl_dma(vchan->vc.chan.device);
+ struct owl_dma_pchan *pchan;
+
+ pchan = owl_dma_get_pchan(od, vchan);
+ if (!pchan)
+ return;
+
+ dev_dbg(od->dma.dev, "allocated pchan %d\n", pchan->id);
+
+ vchan->pchan = pchan;
+ owl_dma_start_next_txd(vchan);
+}
+
+static void owl_dma_issue_pending(struct dma_chan *chan)
+{
+ struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&vchan->vc.lock, flags);
+ if (vchan_issue_pending(&vchan->vc)) {
+ if (!vchan->pchan)
+ owl_dma_phy_alloc_and_start(vchan);
+ }
+ spin_unlock_irqrestore(&vchan->vc.lock, flags);
+}
+
+static struct dma_async_tx_descriptor
+ *owl_dma_prep_memcpy(struct dma_chan *chan,
+ dma_addr_t dst, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct owl_dma *od = to_owl_dma(chan->device);
+ struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+ struct owl_dma_txd *txd;
+ struct owl_dma_lli *lli, *prev = NULL;
+ size_t offset, bytes;
+ int ret;
+
+ if (!len)
+ return NULL;
+
+ txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+ if (!txd)
+ return NULL;
+
+ INIT_LIST_HEAD(&txd->lli_list);
+
+ /* Process the transfer as frame by frame */
+ for (offset = 0; offset < len; offset += bytes) {
+ lli = owl_dma_alloc_lli(od);
+ if (!lli) {
+ dev_warn(chan2dev(chan), "failed to allocate lli\n");
+ goto err_txd_free;
+ }
+
+ bytes = min_t(size_t, (len - offset), OWL_DMA_FRAME_MAX_LENGTH);
+
+ ret = owl_dma_cfg_lli(vchan, lli, src + offset, dst + offset,
+ bytes, DMA_MEM_TO_MEM,
+ &vchan->cfg, txd->cyclic);
+ if (ret) {
+ dev_warn(chan2dev(chan), "failed to config lli\n");
+ goto err_txd_free;
+ }
+
+ prev = owl_dma_add_lli(txd, prev, lli, false);
+ }
+
+ return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
+
+err_txd_free:
+ owl_dma_free_txd(od, txd);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor
+ *owl_dma_prep_slave_sg(struct dma_chan *chan,
+ struct scatterlist *sgl,
+ unsigned int sg_len,
+ enum dma_transfer_direction dir,
+ unsigned long flags, void *context)
+{
+ struct owl_dma *od = to_owl_dma(chan->device);
+ struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+ struct dma_slave_config *sconfig = &vchan->cfg;
+ struct owl_dma_txd *txd;
+ struct owl_dma_lli *lli, *prev = NULL;
+ struct scatterlist *sg;
+ dma_addr_t addr, src = 0, dst = 0;
+ size_t len;
+ int ret, i;
+
+ txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+ if (!txd)
+ return NULL;
+
+ INIT_LIST_HEAD(&txd->lli_list);
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ addr = sg_dma_address(sg);
+ len = sg_dma_len(sg);
+
+ if (len > OWL_DMA_FRAME_MAX_LENGTH) {
+ dev_err(od->dma.dev,
+ "frame length exceeds max supported length");
+ goto err_txd_free;
+ }
+
+ lli = owl_dma_alloc_lli(od);
+ if (!lli) {
+ dev_err(chan2dev(chan), "failed to allocate lli");
+ goto err_txd_free;
+ }
+
+ if (dir == DMA_MEM_TO_DEV) {
+ src = addr;
+ dst = sconfig->dst_addr;
+ } else {
+ src = sconfig->src_addr;
+ dst = addr;
+ }
+
+ ret = owl_dma_cfg_lli(vchan, lli, src, dst, len, dir, sconfig,
+ txd->cyclic);
+ if (ret) {
+ dev_warn(chan2dev(chan), "failed to config lli");
+ goto err_txd_free;
+ }
+
+ prev = owl_dma_add_lli(txd, prev, lli, false);
+ }
+
+ return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
+
+err_txd_free:
+ owl_dma_free_txd(od, txd);
+
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor
+ *owl_prep_dma_cyclic(struct dma_chan *chan,
+ dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len,
+ enum dma_transfer_direction dir,
+ unsigned long flags)
+{
+ struct owl_dma *od = to_owl_dma(chan->device);
+ struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+ struct dma_slave_config *sconfig = &vchan->cfg;
+ struct owl_dma_txd *txd;
+ struct owl_dma_lli *lli, *prev = NULL, *first = NULL;
+ dma_addr_t src = 0, dst = 0;
+ unsigned int periods = buf_len / period_len;
+ int ret, i;
+
+ txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+ if (!txd)
+ return NULL;
+
+ INIT_LIST_HEAD(&txd->lli_list);
+ txd->cyclic = true;
+
+ for (i = 0; i < periods; i++) {
+ lli = owl_dma_alloc_lli(od);
+ if (!lli) {
+ dev_warn(chan2dev(chan), "failed to allocate lli");
+ goto err_txd_free;
+ }
+
+ if (dir == DMA_MEM_TO_DEV) {
+ src = buf_addr + (period_len * i);
+ dst = sconfig->dst_addr;
+ } else if (dir == DMA_DEV_TO_MEM) {
+ src = sconfig->src_addr;
+ dst = buf_addr + (period_len * i);
+ }
+
+ ret = owl_dma_cfg_lli(vchan, lli, src, dst, period_len,
+ dir, sconfig, txd->cyclic);
+ if (ret) {
+ dev_warn(chan2dev(chan), "failed to config lli");
+ goto err_txd_free;
+ }
+
+ if (!first)
+ first = lli;
+
+ prev = owl_dma_add_lli(txd, prev, lli, false);
+ }
+
+ /* close the cyclic list */
+ owl_dma_add_lli(txd, prev, first, true);
+
+ return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
+
+err_txd_free:
+ owl_dma_free_txd(od, txd);
+
+ return NULL;
+}
+
+static void owl_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+
+ /* Ensure all queued descriptors are freed */
+ vchan_free_chan_resources(&vchan->vc);
+}
+
+static inline void owl_dma_free(struct owl_dma *od)
+{
+ struct owl_dma_vchan *vchan = NULL;
+ struct owl_dma_vchan *next;
+
+ list_for_each_entry_safe(vchan,
+ next, &od->dma.channels, vc.chan.device_node) {
+ list_del(&vchan->vc.chan.device_node);
+ tasklet_kill(&vchan->vc.task);
+ }
+}
+
+static struct dma_chan *owl_dma_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct owl_dma *od = ofdma->of_dma_data;
+ struct owl_dma_vchan *vchan;
+ struct dma_chan *chan;
+ u8 drq = dma_spec->args[0];
+
+ if (drq > od->nr_vchans)
+ return NULL;
+
+ chan = dma_get_any_slave_channel(&od->dma);
+ if (!chan)
+ return NULL;
+
+ vchan = to_owl_vchan(chan);
+ vchan->drq = drq;
+
+ return chan;
+}
+
+static const struct of_device_id owl_dma_match[] = {
+ { .compatible = "actions,s500-dma", .data = (void *)S900_DMA,},
+ { .compatible = "actions,s700-dma", .data = (void *)S700_DMA,},
+ { .compatible = "actions,s900-dma", .data = (void *)S900_DMA,},
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, owl_dma_match);
+
+static int owl_dma_probe(struct platform_device *pdev)
+{
+ struct device_node *np = pdev->dev.of_node;
+ struct owl_dma *od;
+ int ret, i, nr_channels, nr_requests;
+
+ od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL);
+ if (!od)
+ return -ENOMEM;
+
+ od->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(od->base))
+ return PTR_ERR(od->base);
+
+ ret = of_property_read_u32(np, "dma-channels", &nr_channels);
+ if (ret) {
+ dev_err(&pdev->dev, "can't get dma-channels\n");
+ return ret;
+ }
+
+ ret = of_property_read_u32(np, "dma-requests", &nr_requests);
+ if (ret) {
+ dev_err(&pdev->dev, "can't get dma-requests\n");
+ return ret;
+ }
+
+ dev_info(&pdev->dev, "dma-channels %d, dma-requests %d\n",
+ nr_channels, nr_requests);
+
+ od->devid = (enum owl_dma_id)of_device_get_match_data(&pdev->dev);
+
+ od->nr_pchans = nr_channels;
+ od->nr_vchans = nr_requests;
+
+ pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+
+ platform_set_drvdata(pdev, od);
+ spin_lock_init(&od->lock);
+
+ dma_cap_set(DMA_MEMCPY, od->dma.cap_mask);
+ dma_cap_set(DMA_SLAVE, od->dma.cap_mask);
+ dma_cap_set(DMA_CYCLIC, od->dma.cap_mask);
+
+ od->dma.dev = &pdev->dev;
+ od->dma.device_free_chan_resources = owl_dma_free_chan_resources;
+ od->dma.device_tx_status = owl_dma_tx_status;
+ od->dma.device_issue_pending = owl_dma_issue_pending;
+ od->dma.device_prep_dma_memcpy = owl_dma_prep_memcpy;
+ od->dma.device_prep_slave_sg = owl_dma_prep_slave_sg;
+ od->dma.device_prep_dma_cyclic = owl_prep_dma_cyclic;
+ od->dma.device_config = owl_dma_config;
+ od->dma.device_pause = owl_dma_pause;
+ od->dma.device_resume = owl_dma_resume;
+ od->dma.device_terminate_all = owl_dma_terminate_all;
+ od->dma.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ od->dma.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ od->dma.directions = BIT(DMA_MEM_TO_MEM);
+ od->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+ INIT_LIST_HEAD(&od->dma.channels);
+
+ od->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(od->clk)) {
+ dev_err(&pdev->dev, "unable to get clock\n");
+ return PTR_ERR(od->clk);
+ }
+
+ /*
+ * Eventhough the DMA controller is capable of generating 4
+ * IRQ's for DMA priority feature, we only use 1 IRQ for
+ * simplification.
+ */
+ od->irq = platform_get_irq(pdev, 0);
+ ret = devm_request_irq(&pdev->dev, od->irq, owl_dma_interrupt, 0,
+ dev_name(&pdev->dev), od);
+ if (ret) {
+ dev_err(&pdev->dev, "unable to request IRQ\n");
+ return ret;
+ }
+
+ /* Init physical channel */
+ od->pchans = devm_kcalloc(&pdev->dev, od->nr_pchans,
+ sizeof(struct owl_dma_pchan), GFP_KERNEL);
+ if (!od->pchans)
+ return -ENOMEM;
+
+ for (i = 0; i < od->nr_pchans; i++) {
+ struct owl_dma_pchan *pchan = &od->pchans[i];
+
+ pchan->id = i;
+ pchan->base = od->base + OWL_DMA_CHAN_BASE(i);
+ }
+
+ /* Init virtual channel */
+ od->vchans = devm_kcalloc(&pdev->dev, od->nr_vchans,
+ sizeof(struct owl_dma_vchan), GFP_KERNEL);
+ if (!od->vchans)
+ return -ENOMEM;
+
+ for (i = 0; i < od->nr_vchans; i++) {
+ struct owl_dma_vchan *vchan = &od->vchans[i];
+
+ vchan->vc.desc_free = owl_dma_desc_free;
+ vchan_init(&vchan->vc, &od->dma);
+ }
+
+ /* Create a pool of consistent memory blocks for hardware descriptors */
+ od->lli_pool = dma_pool_create(dev_name(od->dma.dev), od->dma.dev,
+ sizeof(struct owl_dma_lli),
+ __alignof__(struct owl_dma_lli),
+ 0);
+ if (!od->lli_pool) {
+ dev_err(&pdev->dev, "unable to allocate DMA descriptor pool\n");
+ return -ENOMEM;
+ }
+
+ clk_prepare_enable(od->clk);
+
+ ret = dma_async_device_register(&od->dma);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to register DMA engine device\n");
+ goto err_pool_free;
+ }
+
+ /* Device-tree DMA controller registration */
+ ret = of_dma_controller_register(pdev->dev.of_node,
+ owl_dma_of_xlate, od);
+ if (ret) {
+ dev_err(&pdev->dev, "of_dma_controller_register failed\n");
+ goto err_dma_unregister;
+ }
+
+ return 0;
+
+err_dma_unregister:
+ dma_async_device_unregister(&od->dma);
+err_pool_free:
+ clk_disable_unprepare(od->clk);
+ dma_pool_destroy(od->lli_pool);
+
+ return ret;
+}
+
+static int owl_dma_remove(struct platform_device *pdev)
+{
+ struct owl_dma *od = platform_get_drvdata(pdev);
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&od->dma);
+
+ /* Mask all interrupts for this execution environment */
+ dma_writel(od, OWL_DMA_IRQ_EN0, 0x0);
+
+ /* Make sure we won't have any further interrupts */
+ devm_free_irq(od->dma.dev, od->irq, od);
+
+ owl_dma_free(od);
+
+ clk_disable_unprepare(od->clk);
+ dma_pool_destroy(od->lli_pool);
+
+ return 0;
+}
+
+static struct platform_driver owl_dma_driver = {
+ .probe = owl_dma_probe,
+ .remove = owl_dma_remove,
+ .driver = {
+ .name = "dma-owl",
+ .of_match_table = of_match_ptr(owl_dma_match),
+ },
+};
+
+static int owl_dma_init(void)
+{
+ return platform_driver_register(&owl_dma_driver);
+}
+subsys_initcall(owl_dma_init);
+
+static void __exit owl_dma_exit(void)
+{
+ platform_driver_unregister(&owl_dma_driver);
+}
+module_exit(owl_dma_exit);
+
+MODULE_AUTHOR("David Liu <liuwei@actions-semi.com>");
+MODULE_AUTHOR("Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>");
+MODULE_DESCRIPTION("Actions Semi Owl SoCs DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
new file mode 100644
index 000000000..c359decc0
--- /dev/null
+++ b/drivers/dma/pch_dma.c
@@ -0,0 +1,995 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Topcliff PCH DMA controller driver
+ * Copyright (c) 2010 Intel Corporation
+ * Copyright (C) 2011 LAPIS Semiconductor Co., Ltd.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pch_dma.h>
+
+#include "dmaengine.h"
+
+#define DRV_NAME "pch-dma"
+
+#define DMA_CTL0_DISABLE 0x0
+#define DMA_CTL0_SG 0x1
+#define DMA_CTL0_ONESHOT 0x2
+#define DMA_CTL0_MODE_MASK_BITS 0x3
+#define DMA_CTL0_DIR_SHIFT_BITS 2
+#define DMA_CTL0_BITS_PER_CH 4
+
+#define DMA_CTL2_START_SHIFT_BITS 8
+#define DMA_CTL2_IRQ_ENABLE_MASK ((1UL << DMA_CTL2_START_SHIFT_BITS) - 1)
+
+#define DMA_STATUS_IDLE 0x0
+#define DMA_STATUS_DESC_READ 0x1
+#define DMA_STATUS_WAIT 0x2
+#define DMA_STATUS_ACCESS 0x3
+#define DMA_STATUS_BITS_PER_CH 2
+#define DMA_STATUS_MASK_BITS 0x3
+#define DMA_STATUS_SHIFT_BITS 16
+#define DMA_STATUS_IRQ(x) (0x1 << (x))
+#define DMA_STATUS0_ERR(x) (0x1 << ((x) + 8))
+#define DMA_STATUS2_ERR(x) (0x1 << (x))
+
+#define DMA_DESC_WIDTH_SHIFT_BITS 12
+#define DMA_DESC_WIDTH_1_BYTE (0x3 << DMA_DESC_WIDTH_SHIFT_BITS)
+#define DMA_DESC_WIDTH_2_BYTES (0x2 << DMA_DESC_WIDTH_SHIFT_BITS)
+#define DMA_DESC_WIDTH_4_BYTES (0x0 << DMA_DESC_WIDTH_SHIFT_BITS)
+#define DMA_DESC_MAX_COUNT_1_BYTE 0x3FF
+#define DMA_DESC_MAX_COUNT_2_BYTES 0x3FF
+#define DMA_DESC_MAX_COUNT_4_BYTES 0x7FF
+#define DMA_DESC_END_WITHOUT_IRQ 0x0
+#define DMA_DESC_END_WITH_IRQ 0x1
+#define DMA_DESC_FOLLOW_WITHOUT_IRQ 0x2
+#define DMA_DESC_FOLLOW_WITH_IRQ 0x3
+
+#define MAX_CHAN_NR 12
+
+#define DMA_MASK_CTL0_MODE 0x33333333
+#define DMA_MASK_CTL2_MODE 0x00003333
+
+static unsigned int init_nr_desc_per_channel = 64;
+module_param(init_nr_desc_per_channel, uint, 0644);
+MODULE_PARM_DESC(init_nr_desc_per_channel,
+ "initial descriptors per channel (default: 64)");
+
+struct pch_dma_desc_regs {
+ u32 dev_addr;
+ u32 mem_addr;
+ u32 size;
+ u32 next;
+};
+
+struct pch_dma_regs {
+ u32 dma_ctl0;
+ u32 dma_ctl1;
+ u32 dma_ctl2;
+ u32 dma_ctl3;
+ u32 dma_sts0;
+ u32 dma_sts1;
+ u32 dma_sts2;
+ u32 reserved3;
+ struct pch_dma_desc_regs desc[MAX_CHAN_NR];
+};
+
+struct pch_dma_desc {
+ struct pch_dma_desc_regs regs;
+ struct dma_async_tx_descriptor txd;
+ struct list_head desc_node;
+ struct list_head tx_list;
+};
+
+struct pch_dma_chan {
+ struct dma_chan chan;
+ void __iomem *membase;
+ enum dma_transfer_direction dir;
+ struct tasklet_struct tasklet;
+ unsigned long err_status;
+
+ spinlock_t lock;
+
+ struct list_head active_list;
+ struct list_head queue;
+ struct list_head free_list;
+ unsigned int descs_allocated;
+};
+
+#define PDC_DEV_ADDR 0x00
+#define PDC_MEM_ADDR 0x04
+#define PDC_SIZE 0x08
+#define PDC_NEXT 0x0C
+
+#define channel_readl(pdc, name) \
+ readl((pdc)->membase + PDC_##name)
+#define channel_writel(pdc, name, val) \
+ writel((val), (pdc)->membase + PDC_##name)
+
+struct pch_dma {
+ struct dma_device dma;
+ void __iomem *membase;
+ struct dma_pool *pool;
+ struct pch_dma_regs regs;
+ struct pch_dma_desc_regs ch_regs[MAX_CHAN_NR];
+ struct pch_dma_chan channels[MAX_CHAN_NR];
+};
+
+#define PCH_DMA_CTL0 0x00
+#define PCH_DMA_CTL1 0x04
+#define PCH_DMA_CTL2 0x08
+#define PCH_DMA_CTL3 0x0C
+#define PCH_DMA_STS0 0x10
+#define PCH_DMA_STS1 0x14
+#define PCH_DMA_STS2 0x18
+
+#define dma_readl(pd, name) \
+ readl((pd)->membase + PCH_DMA_##name)
+#define dma_writel(pd, name, val) \
+ writel((val), (pd)->membase + PCH_DMA_##name)
+
+static inline
+struct pch_dma_desc *to_pd_desc(struct dma_async_tx_descriptor *txd)
+{
+ return container_of(txd, struct pch_dma_desc, txd);
+}
+
+static inline struct pch_dma_chan *to_pd_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct pch_dma_chan, chan);
+}
+
+static inline struct pch_dma *to_pd(struct dma_device *ddev)
+{
+ return container_of(ddev, struct pch_dma, dma);
+}
+
+static inline struct device *chan2dev(struct dma_chan *chan)
+{
+ return &chan->dev->device;
+}
+
+static inline struct device *chan2parent(struct dma_chan *chan)
+{
+ return chan->dev->device.parent;
+}
+
+static inline
+struct pch_dma_desc *pdc_first_active(struct pch_dma_chan *pd_chan)
+{
+ return list_first_entry(&pd_chan->active_list,
+ struct pch_dma_desc, desc_node);
+}
+
+static inline
+struct pch_dma_desc *pdc_first_queued(struct pch_dma_chan *pd_chan)
+{
+ return list_first_entry(&pd_chan->queue,
+ struct pch_dma_desc, desc_node);
+}
+
+static void pdc_enable_irq(struct dma_chan *chan, int enable)
+{
+ struct pch_dma *pd = to_pd(chan->device);
+ u32 val;
+ int pos;
+
+ if (chan->chan_id < 8)
+ pos = chan->chan_id;
+ else
+ pos = chan->chan_id + 8;
+
+ val = dma_readl(pd, CTL2);
+
+ if (enable)
+ val |= 0x1 << pos;
+ else
+ val &= ~(0x1 << pos);
+
+ dma_writel(pd, CTL2, val);
+
+ dev_dbg(chan2dev(chan), "pdc_enable_irq: chan %d -> %x\n",
+ chan->chan_id, val);
+}
+
+static void pdc_set_dir(struct dma_chan *chan)
+{
+ struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+ struct pch_dma *pd = to_pd(chan->device);
+ u32 val;
+ u32 mask_mode;
+ u32 mask_ctl;
+
+ if (chan->chan_id < 8) {
+ val = dma_readl(pd, CTL0);
+
+ mask_mode = DMA_CTL0_MODE_MASK_BITS <<
+ (DMA_CTL0_BITS_PER_CH * chan->chan_id);
+ mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+ (DMA_CTL0_BITS_PER_CH * chan->chan_id));
+ val &= mask_mode;
+ if (pd_chan->dir == DMA_MEM_TO_DEV)
+ val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
+ DMA_CTL0_DIR_SHIFT_BITS);
+ else
+ val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
+ DMA_CTL0_DIR_SHIFT_BITS));
+
+ val |= mask_ctl;
+ dma_writel(pd, CTL0, val);
+ } else {
+ int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */
+ val = dma_readl(pd, CTL3);
+
+ mask_mode = DMA_CTL0_MODE_MASK_BITS <<
+ (DMA_CTL0_BITS_PER_CH * ch);
+ mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+ (DMA_CTL0_BITS_PER_CH * ch));
+ val &= mask_mode;
+ if (pd_chan->dir == DMA_MEM_TO_DEV)
+ val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch +
+ DMA_CTL0_DIR_SHIFT_BITS);
+ else
+ val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * ch +
+ DMA_CTL0_DIR_SHIFT_BITS));
+ val |= mask_ctl;
+ dma_writel(pd, CTL3, val);
+ }
+
+ dev_dbg(chan2dev(chan), "pdc_set_dir: chan %d -> %x\n",
+ chan->chan_id, val);
+}
+
+static void pdc_set_mode(struct dma_chan *chan, u32 mode)
+{
+ struct pch_dma *pd = to_pd(chan->device);
+ u32 val;
+ u32 mask_ctl;
+ u32 mask_dir;
+
+ if (chan->chan_id < 8) {
+ mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+ (DMA_CTL0_BITS_PER_CH * chan->chan_id));
+ mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +\
+ DMA_CTL0_DIR_SHIFT_BITS);
+ val = dma_readl(pd, CTL0);
+ val &= mask_dir;
+ val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id);
+ val |= mask_ctl;
+ dma_writel(pd, CTL0, val);
+ } else {
+ int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */
+ mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+ (DMA_CTL0_BITS_PER_CH * ch));
+ mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * ch +\
+ DMA_CTL0_DIR_SHIFT_BITS);
+ val = dma_readl(pd, CTL3);
+ val &= mask_dir;
+ val |= mode << (DMA_CTL0_BITS_PER_CH * ch);
+ val |= mask_ctl;
+ dma_writel(pd, CTL3, val);
+ }
+
+ dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n",
+ chan->chan_id, val);
+}
+
+static u32 pdc_get_status0(struct pch_dma_chan *pd_chan)
+{
+ struct pch_dma *pd = to_pd(pd_chan->chan.device);
+ u32 val;
+
+ val = dma_readl(pd, STS0);
+ return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS +
+ DMA_STATUS_BITS_PER_CH * pd_chan->chan.chan_id));
+}
+
+static u32 pdc_get_status2(struct pch_dma_chan *pd_chan)
+{
+ struct pch_dma *pd = to_pd(pd_chan->chan.device);
+ u32 val;
+
+ val = dma_readl(pd, STS2);
+ return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS +
+ DMA_STATUS_BITS_PER_CH * (pd_chan->chan.chan_id - 8)));
+}
+
+static bool pdc_is_idle(struct pch_dma_chan *pd_chan)
+{
+ u32 sts;
+
+ if (pd_chan->chan.chan_id < 8)
+ sts = pdc_get_status0(pd_chan);
+ else
+ sts = pdc_get_status2(pd_chan);
+
+
+ if (sts == DMA_STATUS_IDLE)
+ return true;
+ else
+ return false;
+}
+
+static void pdc_dostart(struct pch_dma_chan *pd_chan, struct pch_dma_desc* desc)
+{
+ if (!pdc_is_idle(pd_chan)) {
+ dev_err(chan2dev(&pd_chan->chan),
+ "BUG: Attempt to start non-idle channel\n");
+ return;
+ }
+
+ dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> dev_addr: %x\n",
+ pd_chan->chan.chan_id, desc->regs.dev_addr);
+ dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> mem_addr: %x\n",
+ pd_chan->chan.chan_id, desc->regs.mem_addr);
+ dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> size: %x\n",
+ pd_chan->chan.chan_id, desc->regs.size);
+ dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> next: %x\n",
+ pd_chan->chan.chan_id, desc->regs.next);
+
+ if (list_empty(&desc->tx_list)) {
+ channel_writel(pd_chan, DEV_ADDR, desc->regs.dev_addr);
+ channel_writel(pd_chan, MEM_ADDR, desc->regs.mem_addr);
+ channel_writel(pd_chan, SIZE, desc->regs.size);
+ channel_writel(pd_chan, NEXT, desc->regs.next);
+ pdc_set_mode(&pd_chan->chan, DMA_CTL0_ONESHOT);
+ } else {
+ channel_writel(pd_chan, NEXT, desc->txd.phys);
+ pdc_set_mode(&pd_chan->chan, DMA_CTL0_SG);
+ }
+}
+
+static void pdc_chain_complete(struct pch_dma_chan *pd_chan,
+ struct pch_dma_desc *desc)
+{
+ struct dma_async_tx_descriptor *txd = &desc->txd;
+ struct dmaengine_desc_callback cb;
+
+ dmaengine_desc_get_callback(txd, &cb);
+ list_splice_init(&desc->tx_list, &pd_chan->free_list);
+ list_move(&desc->desc_node, &pd_chan->free_list);
+
+ dmaengine_desc_callback_invoke(&cb, NULL);
+}
+
+static void pdc_complete_all(struct pch_dma_chan *pd_chan)
+{
+ struct pch_dma_desc *desc, *_d;
+ LIST_HEAD(list);
+
+ BUG_ON(!pdc_is_idle(pd_chan));
+
+ if (!list_empty(&pd_chan->queue))
+ pdc_dostart(pd_chan, pdc_first_queued(pd_chan));
+
+ list_splice_init(&pd_chan->active_list, &list);
+ list_splice_init(&pd_chan->queue, &pd_chan->active_list);
+
+ list_for_each_entry_safe(desc, _d, &list, desc_node)
+ pdc_chain_complete(pd_chan, desc);
+}
+
+static void pdc_handle_error(struct pch_dma_chan *pd_chan)
+{
+ struct pch_dma_desc *bad_desc;
+
+ bad_desc = pdc_first_active(pd_chan);
+ list_del(&bad_desc->desc_node);
+
+ list_splice_init(&pd_chan->queue, pd_chan->active_list.prev);
+
+ if (!list_empty(&pd_chan->active_list))
+ pdc_dostart(pd_chan, pdc_first_active(pd_chan));
+
+ dev_crit(chan2dev(&pd_chan->chan), "Bad descriptor submitted\n");
+ dev_crit(chan2dev(&pd_chan->chan), "descriptor cookie: %d\n",
+ bad_desc->txd.cookie);
+
+ pdc_chain_complete(pd_chan, bad_desc);
+}
+
+static void pdc_advance_work(struct pch_dma_chan *pd_chan)
+{
+ if (list_empty(&pd_chan->active_list) ||
+ list_is_singular(&pd_chan->active_list)) {
+ pdc_complete_all(pd_chan);
+ } else {
+ pdc_chain_complete(pd_chan, pdc_first_active(pd_chan));
+ pdc_dostart(pd_chan, pdc_first_active(pd_chan));
+ }
+}
+
+static dma_cookie_t pd_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+ struct pch_dma_desc *desc = to_pd_desc(txd);
+ struct pch_dma_chan *pd_chan = to_pd_chan(txd->chan);
+
+ spin_lock(&pd_chan->lock);
+
+ if (list_empty(&pd_chan->active_list)) {
+ list_add_tail(&desc->desc_node, &pd_chan->active_list);
+ pdc_dostart(pd_chan, desc);
+ } else {
+ list_add_tail(&desc->desc_node, &pd_chan->queue);
+ }
+
+ spin_unlock(&pd_chan->lock);
+ return 0;
+}
+
+static struct pch_dma_desc *pdc_alloc_desc(struct dma_chan *chan, gfp_t flags)
+{
+ struct pch_dma_desc *desc = NULL;
+ struct pch_dma *pd = to_pd(chan->device);
+ dma_addr_t addr;
+
+ desc = dma_pool_zalloc(pd->pool, flags, &addr);
+ if (desc) {
+ INIT_LIST_HEAD(&desc->tx_list);
+ dma_async_tx_descriptor_init(&desc->txd, chan);
+ desc->txd.tx_submit = pd_tx_submit;
+ desc->txd.flags = DMA_CTRL_ACK;
+ desc->txd.phys = addr;
+ }
+
+ return desc;
+}
+
+static struct pch_dma_desc *pdc_desc_get(struct pch_dma_chan *pd_chan)
+{
+ struct pch_dma_desc *desc, *_d;
+ struct pch_dma_desc *ret = NULL;
+ int i = 0;
+
+ spin_lock(&pd_chan->lock);
+ list_for_each_entry_safe(desc, _d, &pd_chan->free_list, desc_node) {
+ i++;
+ if (async_tx_test_ack(&desc->txd)) {
+ list_del(&desc->desc_node);
+ ret = desc;
+ break;
+ }
+ dev_dbg(chan2dev(&pd_chan->chan), "desc %p not ACKed\n", desc);
+ }
+ spin_unlock(&pd_chan->lock);
+ dev_dbg(chan2dev(&pd_chan->chan), "scanned %d descriptors\n", i);
+
+ if (!ret) {
+ ret = pdc_alloc_desc(&pd_chan->chan, GFP_ATOMIC);
+ if (ret) {
+ spin_lock(&pd_chan->lock);
+ pd_chan->descs_allocated++;
+ spin_unlock(&pd_chan->lock);
+ } else {
+ dev_err(chan2dev(&pd_chan->chan),
+ "failed to alloc desc\n");
+ }
+ }
+
+ return ret;
+}
+
+static void pdc_desc_put(struct pch_dma_chan *pd_chan,
+ struct pch_dma_desc *desc)
+{
+ if (desc) {
+ spin_lock(&pd_chan->lock);
+ list_splice_init(&desc->tx_list, &pd_chan->free_list);
+ list_add(&desc->desc_node, &pd_chan->free_list);
+ spin_unlock(&pd_chan->lock);
+ }
+}
+
+static int pd_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+ struct pch_dma_desc *desc;
+ LIST_HEAD(tmp_list);
+ int i;
+
+ if (!pdc_is_idle(pd_chan)) {
+ dev_dbg(chan2dev(chan), "DMA channel not idle ?\n");
+ return -EIO;
+ }
+
+ if (!list_empty(&pd_chan->free_list))
+ return pd_chan->descs_allocated;
+
+ for (i = 0; i < init_nr_desc_per_channel; i++) {
+ desc = pdc_alloc_desc(chan, GFP_KERNEL);
+
+ if (!desc) {
+ dev_warn(chan2dev(chan),
+ "Only allocated %d initial descriptors\n", i);
+ break;
+ }
+
+ list_add_tail(&desc->desc_node, &tmp_list);
+ }
+
+ spin_lock_irq(&pd_chan->lock);
+ list_splice(&tmp_list, &pd_chan->free_list);
+ pd_chan->descs_allocated = i;
+ dma_cookie_init(chan);
+ spin_unlock_irq(&pd_chan->lock);
+
+ pdc_enable_irq(chan, 1);
+
+ return pd_chan->descs_allocated;
+}
+
+static void pd_free_chan_resources(struct dma_chan *chan)
+{
+ struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+ struct pch_dma *pd = to_pd(chan->device);
+ struct pch_dma_desc *desc, *_d;
+ LIST_HEAD(tmp_list);
+
+ BUG_ON(!pdc_is_idle(pd_chan));
+ BUG_ON(!list_empty(&pd_chan->active_list));
+ BUG_ON(!list_empty(&pd_chan->queue));
+
+ spin_lock_irq(&pd_chan->lock);
+ list_splice_init(&pd_chan->free_list, &tmp_list);
+ pd_chan->descs_allocated = 0;
+ spin_unlock_irq(&pd_chan->lock);
+
+ list_for_each_entry_safe(desc, _d, &tmp_list, desc_node)
+ dma_pool_free(pd->pool, desc, desc->txd.phys);
+
+ pdc_enable_irq(chan, 0);
+}
+
+static enum dma_status pd_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ return dma_cookie_status(chan, cookie, txstate);
+}
+
+static void pd_issue_pending(struct dma_chan *chan)
+{
+ struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+
+ if (pdc_is_idle(pd_chan)) {
+ spin_lock(&pd_chan->lock);
+ pdc_advance_work(pd_chan);
+ spin_unlock(&pd_chan->lock);
+ }
+}
+
+static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan,
+ struct scatterlist *sgl, unsigned int sg_len,
+ enum dma_transfer_direction direction, unsigned long flags,
+ void *context)
+{
+ struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+ struct pch_dma_slave *pd_slave = chan->private;
+ struct pch_dma_desc *first = NULL;
+ struct pch_dma_desc *prev = NULL;
+ struct pch_dma_desc *desc = NULL;
+ struct scatterlist *sg;
+ dma_addr_t reg;
+ int i;
+
+ if (unlikely(!sg_len)) {
+ dev_info(chan2dev(chan), "prep_slave_sg: length is zero!\n");
+ return NULL;
+ }
+
+ if (direction == DMA_DEV_TO_MEM)
+ reg = pd_slave->rx_reg;
+ else if (direction == DMA_MEM_TO_DEV)
+ reg = pd_slave->tx_reg;
+ else
+ return NULL;
+
+ pd_chan->dir = direction;
+ pdc_set_dir(chan);
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ desc = pdc_desc_get(pd_chan);
+
+ if (!desc)
+ goto err_desc_get;
+
+ desc->regs.dev_addr = reg;
+ desc->regs.mem_addr = sg_dma_address(sg);
+ desc->regs.size = sg_dma_len(sg);
+ desc->regs.next = DMA_DESC_FOLLOW_WITHOUT_IRQ;
+
+ switch (pd_slave->width) {
+ case PCH_DMA_WIDTH_1_BYTE:
+ if (desc->regs.size > DMA_DESC_MAX_COUNT_1_BYTE)
+ goto err_desc_get;
+ desc->regs.size |= DMA_DESC_WIDTH_1_BYTE;
+ break;
+ case PCH_DMA_WIDTH_2_BYTES:
+ if (desc->regs.size > DMA_DESC_MAX_COUNT_2_BYTES)
+ goto err_desc_get;
+ desc->regs.size |= DMA_DESC_WIDTH_2_BYTES;
+ break;
+ case PCH_DMA_WIDTH_4_BYTES:
+ if (desc->regs.size > DMA_DESC_MAX_COUNT_4_BYTES)
+ goto err_desc_get;
+ desc->regs.size |= DMA_DESC_WIDTH_4_BYTES;
+ break;
+ default:
+ goto err_desc_get;
+ }
+
+ if (!first) {
+ first = desc;
+ } else {
+ prev->regs.next |= desc->txd.phys;
+ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+
+ prev = desc;
+ }
+
+ if (flags & DMA_PREP_INTERRUPT)
+ desc->regs.next = DMA_DESC_END_WITH_IRQ;
+ else
+ desc->regs.next = DMA_DESC_END_WITHOUT_IRQ;
+
+ first->txd.cookie = -EBUSY;
+ desc->txd.flags = flags;
+
+ return &first->txd;
+
+err_desc_get:
+ dev_err(chan2dev(chan), "failed to get desc or wrong parameters\n");
+ pdc_desc_put(pd_chan, first);
+ return NULL;
+}
+
+static int pd_device_terminate_all(struct dma_chan *chan)
+{
+ struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+ struct pch_dma_desc *desc, *_d;
+ LIST_HEAD(list);
+
+ spin_lock_irq(&pd_chan->lock);
+
+ pdc_set_mode(&pd_chan->chan, DMA_CTL0_DISABLE);
+
+ list_splice_init(&pd_chan->active_list, &list);
+ list_splice_init(&pd_chan->queue, &list);
+
+ list_for_each_entry_safe(desc, _d, &list, desc_node)
+ pdc_chain_complete(pd_chan, desc);
+
+ spin_unlock_irq(&pd_chan->lock);
+
+ return 0;
+}
+
+static void pdc_tasklet(struct tasklet_struct *t)
+{
+ struct pch_dma_chan *pd_chan = from_tasklet(pd_chan, t, tasklet);
+ unsigned long flags;
+
+ if (!pdc_is_idle(pd_chan)) {
+ dev_err(chan2dev(&pd_chan->chan),
+ "BUG: handle non-idle channel in tasklet\n");
+ return;
+ }
+
+ spin_lock_irqsave(&pd_chan->lock, flags);
+ if (test_and_clear_bit(0, &pd_chan->err_status))
+ pdc_handle_error(pd_chan);
+ else
+ pdc_advance_work(pd_chan);
+ spin_unlock_irqrestore(&pd_chan->lock, flags);
+}
+
+static irqreturn_t pd_irq(int irq, void *devid)
+{
+ struct pch_dma *pd = (struct pch_dma *)devid;
+ struct pch_dma_chan *pd_chan;
+ u32 sts0;
+ u32 sts2;
+ int i;
+ int ret0 = IRQ_NONE;
+ int ret2 = IRQ_NONE;
+
+ sts0 = dma_readl(pd, STS0);
+ sts2 = dma_readl(pd, STS2);
+
+ dev_dbg(pd->dma.dev, "pd_irq sts0: %x\n", sts0);
+
+ for (i = 0; i < pd->dma.chancnt; i++) {
+ pd_chan = &pd->channels[i];
+
+ if (i < 8) {
+ if (sts0 & DMA_STATUS_IRQ(i)) {
+ if (sts0 & DMA_STATUS0_ERR(i))
+ set_bit(0, &pd_chan->err_status);
+
+ tasklet_schedule(&pd_chan->tasklet);
+ ret0 = IRQ_HANDLED;
+ }
+ } else {
+ if (sts2 & DMA_STATUS_IRQ(i - 8)) {
+ if (sts2 & DMA_STATUS2_ERR(i))
+ set_bit(0, &pd_chan->err_status);
+
+ tasklet_schedule(&pd_chan->tasklet);
+ ret2 = IRQ_HANDLED;
+ }
+ }
+ }
+
+ /* clear interrupt bits in status register */
+ if (ret0)
+ dma_writel(pd, STS0, sts0);
+ if (ret2)
+ dma_writel(pd, STS2, sts2);
+
+ return ret0 | ret2;
+}
+
+static void __maybe_unused pch_dma_save_regs(struct pch_dma *pd)
+{
+ struct pch_dma_chan *pd_chan;
+ struct dma_chan *chan, *_c;
+ int i = 0;
+
+ pd->regs.dma_ctl0 = dma_readl(pd, CTL0);
+ pd->regs.dma_ctl1 = dma_readl(pd, CTL1);
+ pd->regs.dma_ctl2 = dma_readl(pd, CTL2);
+ pd->regs.dma_ctl3 = dma_readl(pd, CTL3);
+
+ list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) {
+ pd_chan = to_pd_chan(chan);
+
+ pd->ch_regs[i].dev_addr = channel_readl(pd_chan, DEV_ADDR);
+ pd->ch_regs[i].mem_addr = channel_readl(pd_chan, MEM_ADDR);
+ pd->ch_regs[i].size = channel_readl(pd_chan, SIZE);
+ pd->ch_regs[i].next = channel_readl(pd_chan, NEXT);
+
+ i++;
+ }
+}
+
+static void __maybe_unused pch_dma_restore_regs(struct pch_dma *pd)
+{
+ struct pch_dma_chan *pd_chan;
+ struct dma_chan *chan, *_c;
+ int i = 0;
+
+ dma_writel(pd, CTL0, pd->regs.dma_ctl0);
+ dma_writel(pd, CTL1, pd->regs.dma_ctl1);
+ dma_writel(pd, CTL2, pd->regs.dma_ctl2);
+ dma_writel(pd, CTL3, pd->regs.dma_ctl3);
+
+ list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) {
+ pd_chan = to_pd_chan(chan);
+
+ channel_writel(pd_chan, DEV_ADDR, pd->ch_regs[i].dev_addr);
+ channel_writel(pd_chan, MEM_ADDR, pd->ch_regs[i].mem_addr);
+ channel_writel(pd_chan, SIZE, pd->ch_regs[i].size);
+ channel_writel(pd_chan, NEXT, pd->ch_regs[i].next);
+
+ i++;
+ }
+}
+
+static int __maybe_unused pch_dma_suspend(struct device *dev)
+{
+ struct pch_dma *pd = dev_get_drvdata(dev);
+
+ if (pd)
+ pch_dma_save_regs(pd);
+
+ return 0;
+}
+
+static int __maybe_unused pch_dma_resume(struct device *dev)
+{
+ struct pch_dma *pd = dev_get_drvdata(dev);
+
+ if (pd)
+ pch_dma_restore_regs(pd);
+
+ return 0;
+}
+
+static int pch_dma_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct pch_dma *pd;
+ struct pch_dma_regs *regs;
+ unsigned int nr_channels;
+ int err;
+ int i;
+
+ nr_channels = id->driver_data;
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return -ENOMEM;
+
+ pci_set_drvdata(pdev, pd);
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "Cannot enable PCI device\n");
+ goto err_free_mem;
+ }
+
+ if (!(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
+ dev_err(&pdev->dev, "Cannot find proper base address\n");
+ err = -ENODEV;
+ goto err_disable_pdev;
+ }
+
+ err = pci_request_regions(pdev, DRV_NAME);
+ if (err) {
+ dev_err(&pdev->dev, "Cannot obtain PCI resources\n");
+ goto err_disable_pdev;
+ }
+
+ err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pdev->dev, "Cannot set proper DMA config\n");
+ goto err_free_res;
+ }
+
+ regs = pd->membase = pci_iomap(pdev, 1, 0);
+ if (!pd->membase) {
+ dev_err(&pdev->dev, "Cannot map MMIO registers\n");
+ err = -ENOMEM;
+ goto err_free_res;
+ }
+
+ pci_set_master(pdev);
+ pd->dma.dev = &pdev->dev;
+
+ err = request_irq(pdev->irq, pd_irq, IRQF_SHARED, DRV_NAME, pd);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to request IRQ\n");
+ goto err_iounmap;
+ }
+
+ pd->pool = dma_pool_create("pch_dma_desc_pool", &pdev->dev,
+ sizeof(struct pch_dma_desc), 4, 0);
+ if (!pd->pool) {
+ dev_err(&pdev->dev, "Failed to alloc DMA descriptors\n");
+ err = -ENOMEM;
+ goto err_free_irq;
+ }
+
+
+ INIT_LIST_HEAD(&pd->dma.channels);
+
+ for (i = 0; i < nr_channels; i++) {
+ struct pch_dma_chan *pd_chan = &pd->channels[i];
+
+ pd_chan->chan.device = &pd->dma;
+ dma_cookie_init(&pd_chan->chan);
+
+ pd_chan->membase = &regs->desc[i];
+
+ spin_lock_init(&pd_chan->lock);
+
+ INIT_LIST_HEAD(&pd_chan->active_list);
+ INIT_LIST_HEAD(&pd_chan->queue);
+ INIT_LIST_HEAD(&pd_chan->free_list);
+
+ tasklet_setup(&pd_chan->tasklet, pdc_tasklet);
+ list_add_tail(&pd_chan->chan.device_node, &pd->dma.channels);
+ }
+
+ dma_cap_zero(pd->dma.cap_mask);
+ dma_cap_set(DMA_PRIVATE, pd->dma.cap_mask);
+ dma_cap_set(DMA_SLAVE, pd->dma.cap_mask);
+
+ pd->dma.device_alloc_chan_resources = pd_alloc_chan_resources;
+ pd->dma.device_free_chan_resources = pd_free_chan_resources;
+ pd->dma.device_tx_status = pd_tx_status;
+ pd->dma.device_issue_pending = pd_issue_pending;
+ pd->dma.device_prep_slave_sg = pd_prep_slave_sg;
+ pd->dma.device_terminate_all = pd_device_terminate_all;
+
+ err = dma_async_device_register(&pd->dma);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to register DMA device\n");
+ goto err_free_pool;
+ }
+
+ return 0;
+
+err_free_pool:
+ dma_pool_destroy(pd->pool);
+err_free_irq:
+ free_irq(pdev->irq, pd);
+err_iounmap:
+ pci_iounmap(pdev, pd->membase);
+err_free_res:
+ pci_release_regions(pdev);
+err_disable_pdev:
+ pci_disable_device(pdev);
+err_free_mem:
+ kfree(pd);
+ return err;
+}
+
+static void pch_dma_remove(struct pci_dev *pdev)
+{
+ struct pch_dma *pd = pci_get_drvdata(pdev);
+ struct pch_dma_chan *pd_chan;
+ struct dma_chan *chan, *_c;
+
+ if (pd) {
+ dma_async_device_unregister(&pd->dma);
+
+ free_irq(pdev->irq, pd);
+
+ list_for_each_entry_safe(chan, _c, &pd->dma.channels,
+ device_node) {
+ pd_chan = to_pd_chan(chan);
+
+ tasklet_kill(&pd_chan->tasklet);
+ }
+
+ dma_pool_destroy(pd->pool);
+ pci_iounmap(pdev, pd->membase);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ kfree(pd);
+ }
+}
+
+/* PCI Device ID of DMA device */
+#define PCI_DEVICE_ID_EG20T_PCH_DMA_8CH 0x8810
+#define PCI_DEVICE_ID_EG20T_PCH_DMA_4CH 0x8815
+#define PCI_DEVICE_ID_ML7213_DMA1_8CH 0x8026
+#define PCI_DEVICE_ID_ML7213_DMA2_8CH 0x802B
+#define PCI_DEVICE_ID_ML7213_DMA3_4CH 0x8034
+#define PCI_DEVICE_ID_ML7213_DMA4_12CH 0x8032
+#define PCI_DEVICE_ID_ML7223_DMA1_4CH 0x800B
+#define PCI_DEVICE_ID_ML7223_DMA2_4CH 0x800E
+#define PCI_DEVICE_ID_ML7223_DMA3_4CH 0x8017
+#define PCI_DEVICE_ID_ML7223_DMA4_4CH 0x803B
+#define PCI_DEVICE_ID_ML7831_DMA1_8CH 0x8810
+#define PCI_DEVICE_ID_ML7831_DMA2_4CH 0x8815
+
+static const struct pci_device_id pch_dma_id_table[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_4CH), 4 },
+ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA1_8CH), 8}, /* UART Video */
+ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA2_8CH), 8}, /* PCMIF SPI */
+ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA3_4CH), 4}, /* FPGA */
+ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA4_12CH), 12}, /* I2S */
+ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA1_4CH), 4}, /* UART */
+ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA2_4CH), 4}, /* Video SPI */
+ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA3_4CH), 4}, /* Security */
+ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA4_4CH), 4}, /* FPGA */
+ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7831_DMA1_8CH), 8}, /* UART */
+ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7831_DMA2_4CH), 4}, /* SPI */
+ { 0, },
+};
+
+static SIMPLE_DEV_PM_OPS(pch_dma_pm_ops, pch_dma_suspend, pch_dma_resume);
+
+static struct pci_driver pch_dma_driver = {
+ .name = DRV_NAME,
+ .id_table = pch_dma_id_table,
+ .probe = pch_dma_probe,
+ .remove = pch_dma_remove,
+ .driver.pm = &pch_dma_pm_ops,
+};
+
+module_pci_driver(pch_dma_driver);
+
+MODULE_DESCRIPTION("Intel EG20T PCH / LAPIS Semicon ML7213/ML7223/ML7831 IOH "
+ "DMA controller driver");
+MODULE_AUTHOR("Yong Wang <yong.y.wang@intel.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DEVICE_TABLE(pci, pch_dma_id_table);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
new file mode 100644
index 000000000..3cf0b3838
--- /dev/null
+++ b/drivers/dma/pl330.c
@@ -0,0 +1,3277 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com
+ *
+ * Copyright (C) 2010 Samsung Electronics Co. Ltd.
+ * Jaswinder Singh <jassi.brar@samsung.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/amba/bus.h>
+#include <linux/scatterlist.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/err.h>
+#include <linux/pm_runtime.h>
+#include <linux/bug.h>
+#include <linux/reset.h>
+
+#include "dmaengine.h"
+#define PL330_MAX_CHAN 8
+#define PL330_MAX_IRQS 32
+#define PL330_MAX_PERI 32
+#define PL330_MAX_BURST 16
+
+#define PL330_QUIRK_BROKEN_NO_FLUSHP BIT(0)
+#define PL330_QUIRK_PERIPH_BURST BIT(1)
+
+enum pl330_cachectrl {
+ CCTRL0, /* Noncacheable and nonbufferable */
+ CCTRL1, /* Bufferable only */
+ CCTRL2, /* Cacheable, but do not allocate */
+ CCTRL3, /* Cacheable and bufferable, but do not allocate */
+ INVALID1, /* AWCACHE = 0x1000 */
+ INVALID2,
+ CCTRL6, /* Cacheable write-through, allocate on writes only */
+ CCTRL7, /* Cacheable write-back, allocate on writes only */
+};
+
+enum pl330_byteswap {
+ SWAP_NO,
+ SWAP_2,
+ SWAP_4,
+ SWAP_8,
+ SWAP_16,
+};
+
+/* Register and Bit field Definitions */
+#define DS 0x0
+#define DS_ST_STOP 0x0
+#define DS_ST_EXEC 0x1
+#define DS_ST_CMISS 0x2
+#define DS_ST_UPDTPC 0x3
+#define DS_ST_WFE 0x4
+#define DS_ST_ATBRR 0x5
+#define DS_ST_QBUSY 0x6
+#define DS_ST_WFP 0x7
+#define DS_ST_KILL 0x8
+#define DS_ST_CMPLT 0x9
+#define DS_ST_FLTCMP 0xe
+#define DS_ST_FAULT 0xf
+
+#define DPC 0x4
+#define INTEN 0x20
+#define ES 0x24
+#define INTSTATUS 0x28
+#define INTCLR 0x2c
+#define FSM 0x30
+#define FSC 0x34
+#define FTM 0x38
+
+#define _FTC 0x40
+#define FTC(n) (_FTC + (n)*0x4)
+
+#define _CS 0x100
+#define CS(n) (_CS + (n)*0x8)
+#define CS_CNS (1 << 21)
+
+#define _CPC 0x104
+#define CPC(n) (_CPC + (n)*0x8)
+
+#define _SA 0x400
+#define SA(n) (_SA + (n)*0x20)
+
+#define _DA 0x404
+#define DA(n) (_DA + (n)*0x20)
+
+#define _CC 0x408
+#define CC(n) (_CC + (n)*0x20)
+
+#define CC_SRCINC (1 << 0)
+#define CC_DSTINC (1 << 14)
+#define CC_SRCPRI (1 << 8)
+#define CC_DSTPRI (1 << 22)
+#define CC_SRCNS (1 << 9)
+#define CC_DSTNS (1 << 23)
+#define CC_SRCIA (1 << 10)
+#define CC_DSTIA (1 << 24)
+#define CC_SRCBRSTLEN_SHFT 4
+#define CC_DSTBRSTLEN_SHFT 18
+#define CC_SRCBRSTSIZE_SHFT 1
+#define CC_DSTBRSTSIZE_SHFT 15
+#define CC_SRCCCTRL_SHFT 11
+#define CC_SRCCCTRL_MASK 0x7
+#define CC_DSTCCTRL_SHFT 25
+#define CC_DRCCCTRL_MASK 0x7
+#define CC_SWAP_SHFT 28
+
+#define _LC0 0x40c
+#define LC0(n) (_LC0 + (n)*0x20)
+
+#define _LC1 0x410
+#define LC1(n) (_LC1 + (n)*0x20)
+
+#define DBGSTATUS 0xd00
+#define DBG_BUSY (1 << 0)
+
+#define DBGCMD 0xd04
+#define DBGINST0 0xd08
+#define DBGINST1 0xd0c
+
+#define CR0 0xe00
+#define CR1 0xe04
+#define CR2 0xe08
+#define CR3 0xe0c
+#define CR4 0xe10
+#define CRD 0xe14
+
+#define PERIPH_ID 0xfe0
+#define PERIPH_REV_SHIFT 20
+#define PERIPH_REV_MASK 0xf
+#define PERIPH_REV_R0P0 0
+#define PERIPH_REV_R1P0 1
+#define PERIPH_REV_R1P1 2
+
+#define CR0_PERIPH_REQ_SET (1 << 0)
+#define CR0_BOOT_EN_SET (1 << 1)
+#define CR0_BOOT_MAN_NS (1 << 2)
+#define CR0_NUM_CHANS_SHIFT 4
+#define CR0_NUM_CHANS_MASK 0x7
+#define CR0_NUM_PERIPH_SHIFT 12
+#define CR0_NUM_PERIPH_MASK 0x1f
+#define CR0_NUM_EVENTS_SHIFT 17
+#define CR0_NUM_EVENTS_MASK 0x1f
+
+#define CR1_ICACHE_LEN_SHIFT 0
+#define CR1_ICACHE_LEN_MASK 0x7
+#define CR1_NUM_ICACHELINES_SHIFT 4
+#define CR1_NUM_ICACHELINES_MASK 0xf
+
+#define CRD_DATA_WIDTH_SHIFT 0
+#define CRD_DATA_WIDTH_MASK 0x7
+#define CRD_WR_CAP_SHIFT 4
+#define CRD_WR_CAP_MASK 0x7
+#define CRD_WR_Q_DEP_SHIFT 8
+#define CRD_WR_Q_DEP_MASK 0xf
+#define CRD_RD_CAP_SHIFT 12
+#define CRD_RD_CAP_MASK 0x7
+#define CRD_RD_Q_DEP_SHIFT 16
+#define CRD_RD_Q_DEP_MASK 0xf
+#define CRD_DATA_BUFF_SHIFT 20
+#define CRD_DATA_BUFF_MASK 0x3ff
+
+#define PART 0x330
+#define DESIGNER 0x41
+#define REVISION 0x0
+#define INTEG_CFG 0x0
+#define PERIPH_ID_VAL ((PART << 0) | (DESIGNER << 12))
+
+#define PL330_STATE_STOPPED (1 << 0)
+#define PL330_STATE_EXECUTING (1 << 1)
+#define PL330_STATE_WFE (1 << 2)
+#define PL330_STATE_FAULTING (1 << 3)
+#define PL330_STATE_COMPLETING (1 << 4)
+#define PL330_STATE_WFP (1 << 5)
+#define PL330_STATE_KILLING (1 << 6)
+#define PL330_STATE_FAULT_COMPLETING (1 << 7)
+#define PL330_STATE_CACHEMISS (1 << 8)
+#define PL330_STATE_UPDTPC (1 << 9)
+#define PL330_STATE_ATBARRIER (1 << 10)
+#define PL330_STATE_QUEUEBUSY (1 << 11)
+#define PL330_STATE_INVALID (1 << 15)
+
+#define PL330_STABLE_STATES (PL330_STATE_STOPPED | PL330_STATE_EXECUTING \
+ | PL330_STATE_WFE | PL330_STATE_FAULTING)
+
+#define CMD_DMAADDH 0x54
+#define CMD_DMAEND 0x00
+#define CMD_DMAFLUSHP 0x35
+#define CMD_DMAGO 0xa0
+#define CMD_DMALD 0x04
+#define CMD_DMALDP 0x25
+#define CMD_DMALP 0x20
+#define CMD_DMALPEND 0x28
+#define CMD_DMAKILL 0x01
+#define CMD_DMAMOV 0xbc
+#define CMD_DMANOP 0x18
+#define CMD_DMARMB 0x12
+#define CMD_DMASEV 0x34
+#define CMD_DMAST 0x08
+#define CMD_DMASTP 0x29
+#define CMD_DMASTZ 0x0c
+#define CMD_DMAWFE 0x36
+#define CMD_DMAWFP 0x30
+#define CMD_DMAWMB 0x13
+
+#define SZ_DMAADDH 3
+#define SZ_DMAEND 1
+#define SZ_DMAFLUSHP 2
+#define SZ_DMALD 1
+#define SZ_DMALDP 2
+#define SZ_DMALP 2
+#define SZ_DMALPEND 2
+#define SZ_DMAKILL 1
+#define SZ_DMAMOV 6
+#define SZ_DMANOP 1
+#define SZ_DMARMB 1
+#define SZ_DMASEV 2
+#define SZ_DMAST 1
+#define SZ_DMASTP 2
+#define SZ_DMASTZ 1
+#define SZ_DMAWFE 2
+#define SZ_DMAWFP 2
+#define SZ_DMAWMB 1
+#define SZ_DMAGO 6
+
+#define BRST_LEN(ccr) ((((ccr) >> CC_SRCBRSTLEN_SHFT) & 0xf) + 1)
+#define BRST_SIZE(ccr) (1 << (((ccr) >> CC_SRCBRSTSIZE_SHFT) & 0x7))
+
+#define BYTE_TO_BURST(b, ccr) ((b) / BRST_SIZE(ccr) / BRST_LEN(ccr))
+#define BURST_TO_BYTE(c, ccr) ((c) * BRST_SIZE(ccr) * BRST_LEN(ccr))
+
+/*
+ * With 256 bytes, we can do more than 2.5MB and 5MB xfers per req
+ * at 1byte/burst for P<->M and M<->M respectively.
+ * For typical scenario, at 1word/burst, 10MB and 20MB xfers per req
+ * should be enough for P<->M and M<->M respectively.
+ */
+#define MCODE_BUFF_PER_REQ 256
+
+/* Use this _only_ to wait on transient states */
+#define UNTIL(t, s) while (!(_state(t) & (s))) cpu_relax();
+
+#ifdef PL330_DEBUG_MCGEN
+static unsigned cmd_line;
+#define PL330_DBGCMD_DUMP(off, x...) do { \
+ printk("%x:", cmd_line); \
+ printk(KERN_CONT x); \
+ cmd_line += off; \
+ } while (0)
+#define PL330_DBGMC_START(addr) (cmd_line = addr)
+#else
+#define PL330_DBGCMD_DUMP(off, x...) do {} while (0)
+#define PL330_DBGMC_START(addr) do {} while (0)
+#endif
+
+/* The number of default descriptors */
+
+#define NR_DEFAULT_DESC 16
+
+/* Delay for runtime PM autosuspend, ms */
+#define PL330_AUTOSUSPEND_DELAY 20
+
+/* Populated by the PL330 core driver for DMA API driver's info */
+struct pl330_config {
+ u32 periph_id;
+#define DMAC_MODE_NS (1 << 0)
+ unsigned int mode;
+ unsigned int data_bus_width:10; /* In number of bits */
+ unsigned int data_buf_dep:11;
+ unsigned int num_chan:4;
+ unsigned int num_peri:6;
+ u32 peri_ns;
+ unsigned int num_events:6;
+ u32 irq_ns;
+};
+
+/*
+ * Request Configuration.
+ * The PL330 core does not modify this and uses the last
+ * working configuration if the request doesn't provide any.
+ *
+ * The Client may want to provide this info only for the
+ * first request and a request with new settings.
+ */
+struct pl330_reqcfg {
+ /* Address Incrementing */
+ unsigned dst_inc:1;
+ unsigned src_inc:1;
+
+ /*
+ * For now, the SRC & DST protection levels
+ * and burst size/length are assumed same.
+ */
+ bool nonsecure;
+ bool privileged;
+ bool insnaccess;
+ unsigned brst_len:5;
+ unsigned brst_size:3; /* in power of 2 */
+
+ enum pl330_cachectrl dcctl;
+ enum pl330_cachectrl scctl;
+ enum pl330_byteswap swap;
+ struct pl330_config *pcfg;
+};
+
+/*
+ * One cycle of DMAC operation.
+ * There may be more than one xfer in a request.
+ */
+struct pl330_xfer {
+ u32 src_addr;
+ u32 dst_addr;
+ /* Size to xfer */
+ u32 bytes;
+};
+
+/* The xfer callbacks are made with one of these arguments. */
+enum pl330_op_err {
+ /* The all xfers in the request were success. */
+ PL330_ERR_NONE,
+ /* If req aborted due to global error. */
+ PL330_ERR_ABORT,
+ /* If req failed due to problem with Channel. */
+ PL330_ERR_FAIL,
+};
+
+enum dmamov_dst {
+ SAR = 0,
+ CCR,
+ DAR,
+};
+
+enum pl330_dst {
+ SRC = 0,
+ DST,
+};
+
+enum pl330_cond {
+ SINGLE,
+ BURST,
+ ALWAYS,
+};
+
+struct dma_pl330_desc;
+
+struct _pl330_req {
+ u32 mc_bus;
+ void *mc_cpu;
+ struct dma_pl330_desc *desc;
+};
+
+/* ToBeDone for tasklet */
+struct _pl330_tbd {
+ bool reset_dmac;
+ bool reset_mngr;
+ u8 reset_chan;
+};
+
+/* A DMAC Thread */
+struct pl330_thread {
+ u8 id;
+ int ev;
+ /* If the channel is not yet acquired by any client */
+ bool free;
+ /* Parent DMAC */
+ struct pl330_dmac *dmac;
+ /* Only two at a time */
+ struct _pl330_req req[2];
+ /* Index of the last enqueued request */
+ unsigned lstenq;
+ /* Index of the last submitted request or -1 if the DMA is stopped */
+ int req_running;
+};
+
+enum pl330_dmac_state {
+ UNINIT,
+ INIT,
+ DYING,
+};
+
+enum desc_status {
+ /* In the DMAC pool */
+ FREE,
+ /*
+ * Allocated to some channel during prep_xxx
+ * Also may be sitting on the work_list.
+ */
+ PREP,
+ /*
+ * Sitting on the work_list and already submitted
+ * to the PL330 core. Not more than two descriptors
+ * of a channel can be BUSY at any time.
+ */
+ BUSY,
+ /*
+ * Pause was called while descriptor was BUSY. Due to hardware
+ * limitations, only termination is possible for descriptors
+ * that have been paused.
+ */
+ PAUSED,
+ /*
+ * Sitting on the channel work_list but xfer done
+ * by PL330 core
+ */
+ DONE,
+};
+
+struct dma_pl330_chan {
+ /* Schedule desc completion */
+ struct tasklet_struct task;
+
+ /* DMA-Engine Channel */
+ struct dma_chan chan;
+
+ /* List of submitted descriptors */
+ struct list_head submitted_list;
+ /* List of issued descriptors */
+ struct list_head work_list;
+ /* List of completed descriptors */
+ struct list_head completed_list;
+
+ /* Pointer to the DMAC that manages this channel,
+ * NULL if the channel is available to be acquired.
+ * As the parent, this DMAC also provides descriptors
+ * to the channel.
+ */
+ struct pl330_dmac *dmac;
+
+ /* To protect channel manipulation */
+ spinlock_t lock;
+
+ /*
+ * Hardware channel thread of PL330 DMAC. NULL if the channel is
+ * available.
+ */
+ struct pl330_thread *thread;
+
+ /* For D-to-M and M-to-D channels */
+ int burst_sz; /* the peripheral fifo width */
+ int burst_len; /* the number of burst */
+ phys_addr_t fifo_addr;
+ /* DMA-mapped view of the FIFO; may differ if an IOMMU is present */
+ dma_addr_t fifo_dma;
+ enum dma_data_direction dir;
+ struct dma_slave_config slave_config;
+
+ /* for cyclic capability */
+ bool cyclic;
+
+ /* for runtime pm tracking */
+ bool active;
+};
+
+struct pl330_dmac {
+ /* DMA-Engine Device */
+ struct dma_device ddma;
+
+ /* Pool of descriptors available for the DMAC's channels */
+ struct list_head desc_pool;
+ /* To protect desc_pool manipulation */
+ spinlock_t pool_lock;
+
+ /* Size of MicroCode buffers for each channel. */
+ unsigned mcbufsz;
+ /* ioremap'ed address of PL330 registers. */
+ void __iomem *base;
+ /* Populated by the PL330 core driver during pl330_add */
+ struct pl330_config pcfg;
+
+ spinlock_t lock;
+ /* Maximum possible events/irqs */
+ int events[32];
+ /* BUS address of MicroCode buffer */
+ dma_addr_t mcode_bus;
+ /* CPU address of MicroCode buffer */
+ void *mcode_cpu;
+ /* List of all Channel threads */
+ struct pl330_thread *channels;
+ /* Pointer to the MANAGER thread */
+ struct pl330_thread *manager;
+ /* To handle bad news in interrupt */
+ struct tasklet_struct tasks;
+ struct _pl330_tbd dmac_tbd;
+ /* State of DMAC operation */
+ enum pl330_dmac_state state;
+ /* Holds list of reqs with due callbacks */
+ struct list_head req_done;
+
+ /* Peripheral channels connected to this DMAC */
+ unsigned int num_peripherals;
+ struct dma_pl330_chan *peripherals; /* keep at end */
+ int quirks;
+
+ struct reset_control *rstc;
+ struct reset_control *rstc_ocp;
+};
+
+static struct pl330_of_quirks {
+ char *quirk;
+ int id;
+} of_quirks[] = {
+ {
+ .quirk = "arm,pl330-broken-no-flushp",
+ .id = PL330_QUIRK_BROKEN_NO_FLUSHP,
+ },
+ {
+ .quirk = "arm,pl330-periph-burst",
+ .id = PL330_QUIRK_PERIPH_BURST,
+ }
+};
+
+struct dma_pl330_desc {
+ /* To attach to a queue as child */
+ struct list_head node;
+
+ /* Descriptor for the DMA Engine API */
+ struct dma_async_tx_descriptor txd;
+
+ /* Xfer for PL330 core */
+ struct pl330_xfer px;
+
+ struct pl330_reqcfg rqcfg;
+
+ enum desc_status status;
+
+ int bytes_requested;
+ bool last;
+
+ /* The channel which currently holds this desc */
+ struct dma_pl330_chan *pchan;
+
+ enum dma_transfer_direction rqtype;
+ /* Index of peripheral for the xfer. */
+ unsigned peri:5;
+ /* Hook to attach to DMAC's list of reqs with due callback */
+ struct list_head rqd;
+};
+
+struct _xfer_spec {
+ u32 ccr;
+ struct dma_pl330_desc *desc;
+};
+
+static int pl330_config_write(struct dma_chan *chan,
+ struct dma_slave_config *slave_config,
+ enum dma_transfer_direction direction);
+
+static inline bool _queue_full(struct pl330_thread *thrd)
+{
+ return thrd->req[0].desc != NULL && thrd->req[1].desc != NULL;
+}
+
+static inline bool is_manager(struct pl330_thread *thrd)
+{
+ return thrd->dmac->manager == thrd;
+}
+
+/* If manager of the thread is in Non-Secure mode */
+static inline bool _manager_ns(struct pl330_thread *thrd)
+{
+ return (thrd->dmac->pcfg.mode & DMAC_MODE_NS) ? true : false;
+}
+
+static inline u32 get_revision(u32 periph_id)
+{
+ return (periph_id >> PERIPH_REV_SHIFT) & PERIPH_REV_MASK;
+}
+
+static inline u32 _emit_END(unsigned dry_run, u8 buf[])
+{
+ if (dry_run)
+ return SZ_DMAEND;
+
+ buf[0] = CMD_DMAEND;
+
+ PL330_DBGCMD_DUMP(SZ_DMAEND, "\tDMAEND\n");
+
+ return SZ_DMAEND;
+}
+
+static inline u32 _emit_FLUSHP(unsigned dry_run, u8 buf[], u8 peri)
+{
+ if (dry_run)
+ return SZ_DMAFLUSHP;
+
+ buf[0] = CMD_DMAFLUSHP;
+
+ peri &= 0x1f;
+ peri <<= 3;
+ buf[1] = peri;
+
+ PL330_DBGCMD_DUMP(SZ_DMAFLUSHP, "\tDMAFLUSHP %u\n", peri >> 3);
+
+ return SZ_DMAFLUSHP;
+}
+
+static inline u32 _emit_LD(unsigned dry_run, u8 buf[], enum pl330_cond cond)
+{
+ if (dry_run)
+ return SZ_DMALD;
+
+ buf[0] = CMD_DMALD;
+
+ if (cond == SINGLE)
+ buf[0] |= (0 << 1) | (1 << 0);
+ else if (cond == BURST)
+ buf[0] |= (1 << 1) | (1 << 0);
+
+ PL330_DBGCMD_DUMP(SZ_DMALD, "\tDMALD%c\n",
+ cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'));
+
+ return SZ_DMALD;
+}
+
+static inline u32 _emit_LDP(unsigned dry_run, u8 buf[],
+ enum pl330_cond cond, u8 peri)
+{
+ if (dry_run)
+ return SZ_DMALDP;
+
+ buf[0] = CMD_DMALDP;
+
+ if (cond == BURST)
+ buf[0] |= (1 << 1);
+
+ peri &= 0x1f;
+ peri <<= 3;
+ buf[1] = peri;
+
+ PL330_DBGCMD_DUMP(SZ_DMALDP, "\tDMALDP%c %u\n",
+ cond == SINGLE ? 'S' : 'B', peri >> 3);
+
+ return SZ_DMALDP;
+}
+
+static inline u32 _emit_LP(unsigned dry_run, u8 buf[],
+ unsigned loop, u8 cnt)
+{
+ if (dry_run)
+ return SZ_DMALP;
+
+ buf[0] = CMD_DMALP;
+
+ if (loop)
+ buf[0] |= (1 << 1);
+
+ cnt--; /* DMAC increments by 1 internally */
+ buf[1] = cnt;
+
+ PL330_DBGCMD_DUMP(SZ_DMALP, "\tDMALP_%c %u\n", loop ? '1' : '0', cnt);
+
+ return SZ_DMALP;
+}
+
+struct _arg_LPEND {
+ enum pl330_cond cond;
+ bool forever;
+ unsigned loop;
+ u8 bjump;
+};
+
+static inline u32 _emit_LPEND(unsigned dry_run, u8 buf[],
+ const struct _arg_LPEND *arg)
+{
+ enum pl330_cond cond = arg->cond;
+ bool forever = arg->forever;
+ unsigned loop = arg->loop;
+ u8 bjump = arg->bjump;
+
+ if (dry_run)
+ return SZ_DMALPEND;
+
+ buf[0] = CMD_DMALPEND;
+
+ if (loop)
+ buf[0] |= (1 << 2);
+
+ if (!forever)
+ buf[0] |= (1 << 4);
+
+ if (cond == SINGLE)
+ buf[0] |= (0 << 1) | (1 << 0);
+ else if (cond == BURST)
+ buf[0] |= (1 << 1) | (1 << 0);
+
+ buf[1] = bjump;
+
+ PL330_DBGCMD_DUMP(SZ_DMALPEND, "\tDMALP%s%c_%c bjmpto_%x\n",
+ forever ? "FE" : "END",
+ cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'),
+ loop ? '1' : '0',
+ bjump);
+
+ return SZ_DMALPEND;
+}
+
+static inline u32 _emit_KILL(unsigned dry_run, u8 buf[])
+{
+ if (dry_run)
+ return SZ_DMAKILL;
+
+ buf[0] = CMD_DMAKILL;
+
+ return SZ_DMAKILL;
+}
+
+static inline u32 _emit_MOV(unsigned dry_run, u8 buf[],
+ enum dmamov_dst dst, u32 val)
+{
+ if (dry_run)
+ return SZ_DMAMOV;
+
+ buf[0] = CMD_DMAMOV;
+ buf[1] = dst;
+ buf[2] = val;
+ buf[3] = val >> 8;
+ buf[4] = val >> 16;
+ buf[5] = val >> 24;
+
+ PL330_DBGCMD_DUMP(SZ_DMAMOV, "\tDMAMOV %s 0x%x\n",
+ dst == SAR ? "SAR" : (dst == DAR ? "DAR" : "CCR"), val);
+
+ return SZ_DMAMOV;
+}
+
+static inline u32 _emit_RMB(unsigned dry_run, u8 buf[])
+{
+ if (dry_run)
+ return SZ_DMARMB;
+
+ buf[0] = CMD_DMARMB;
+
+ PL330_DBGCMD_DUMP(SZ_DMARMB, "\tDMARMB\n");
+
+ return SZ_DMARMB;
+}
+
+static inline u32 _emit_SEV(unsigned dry_run, u8 buf[], u8 ev)
+{
+ if (dry_run)
+ return SZ_DMASEV;
+
+ buf[0] = CMD_DMASEV;
+
+ ev &= 0x1f;
+ ev <<= 3;
+ buf[1] = ev;
+
+ PL330_DBGCMD_DUMP(SZ_DMASEV, "\tDMASEV %u\n", ev >> 3);
+
+ return SZ_DMASEV;
+}
+
+static inline u32 _emit_ST(unsigned dry_run, u8 buf[], enum pl330_cond cond)
+{
+ if (dry_run)
+ return SZ_DMAST;
+
+ buf[0] = CMD_DMAST;
+
+ if (cond == SINGLE)
+ buf[0] |= (0 << 1) | (1 << 0);
+ else if (cond == BURST)
+ buf[0] |= (1 << 1) | (1 << 0);
+
+ PL330_DBGCMD_DUMP(SZ_DMAST, "\tDMAST%c\n",
+ cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'));
+
+ return SZ_DMAST;
+}
+
+static inline u32 _emit_STP(unsigned dry_run, u8 buf[],
+ enum pl330_cond cond, u8 peri)
+{
+ if (dry_run)
+ return SZ_DMASTP;
+
+ buf[0] = CMD_DMASTP;
+
+ if (cond == BURST)
+ buf[0] |= (1 << 1);
+
+ peri &= 0x1f;
+ peri <<= 3;
+ buf[1] = peri;
+
+ PL330_DBGCMD_DUMP(SZ_DMASTP, "\tDMASTP%c %u\n",
+ cond == SINGLE ? 'S' : 'B', peri >> 3);
+
+ return SZ_DMASTP;
+}
+
+static inline u32 _emit_WFP(unsigned dry_run, u8 buf[],
+ enum pl330_cond cond, u8 peri)
+{
+ if (dry_run)
+ return SZ_DMAWFP;
+
+ buf[0] = CMD_DMAWFP;
+
+ if (cond == SINGLE)
+ buf[0] |= (0 << 1) | (0 << 0);
+ else if (cond == BURST)
+ buf[0] |= (1 << 1) | (0 << 0);
+ else
+ buf[0] |= (0 << 1) | (1 << 0);
+
+ peri &= 0x1f;
+ peri <<= 3;
+ buf[1] = peri;
+
+ PL330_DBGCMD_DUMP(SZ_DMAWFP, "\tDMAWFP%c %u\n",
+ cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'P'), peri >> 3);
+
+ return SZ_DMAWFP;
+}
+
+static inline u32 _emit_WMB(unsigned dry_run, u8 buf[])
+{
+ if (dry_run)
+ return SZ_DMAWMB;
+
+ buf[0] = CMD_DMAWMB;
+
+ PL330_DBGCMD_DUMP(SZ_DMAWMB, "\tDMAWMB\n");
+
+ return SZ_DMAWMB;
+}
+
+struct _arg_GO {
+ u8 chan;
+ u32 addr;
+ unsigned ns;
+};
+
+static inline u32 _emit_GO(unsigned dry_run, u8 buf[],
+ const struct _arg_GO *arg)
+{
+ u8 chan = arg->chan;
+ u32 addr = arg->addr;
+ unsigned ns = arg->ns;
+
+ if (dry_run)
+ return SZ_DMAGO;
+
+ buf[0] = CMD_DMAGO;
+ buf[0] |= (ns << 1);
+ buf[1] = chan & 0x7;
+ buf[2] = addr;
+ buf[3] = addr >> 8;
+ buf[4] = addr >> 16;
+ buf[5] = addr >> 24;
+
+ return SZ_DMAGO;
+}
+
+#define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
+
+/* Returns Time-Out */
+static bool _until_dmac_idle(struct pl330_thread *thrd)
+{
+ void __iomem *regs = thrd->dmac->base;
+ unsigned long loops = msecs_to_loops(5);
+
+ do {
+ /* Until Manager is Idle */
+ if (!(readl(regs + DBGSTATUS) & DBG_BUSY))
+ break;
+
+ cpu_relax();
+ } while (--loops);
+
+ if (!loops)
+ return true;
+
+ return false;
+}
+
+static inline void _execute_DBGINSN(struct pl330_thread *thrd,
+ u8 insn[], bool as_manager)
+{
+ void __iomem *regs = thrd->dmac->base;
+ u32 val;
+
+ /* If timed out due to halted state-machine */
+ if (_until_dmac_idle(thrd)) {
+ dev_err(thrd->dmac->ddma.dev, "DMAC halted!\n");
+ return;
+ }
+
+ val = (insn[0] << 16) | (insn[1] << 24);
+ if (!as_manager) {
+ val |= (1 << 0);
+ val |= (thrd->id << 8); /* Channel Number */
+ }
+ writel(val, regs + DBGINST0);
+
+ val = le32_to_cpu(*((__le32 *)&insn[2]));
+ writel(val, regs + DBGINST1);
+
+ /* Get going */
+ writel(0, regs + DBGCMD);
+}
+
+static inline u32 _state(struct pl330_thread *thrd)
+{
+ void __iomem *regs = thrd->dmac->base;
+ u32 val;
+
+ if (is_manager(thrd))
+ val = readl(regs + DS) & 0xf;
+ else
+ val = readl(regs + CS(thrd->id)) & 0xf;
+
+ switch (val) {
+ case DS_ST_STOP:
+ return PL330_STATE_STOPPED;
+ case DS_ST_EXEC:
+ return PL330_STATE_EXECUTING;
+ case DS_ST_CMISS:
+ return PL330_STATE_CACHEMISS;
+ case DS_ST_UPDTPC:
+ return PL330_STATE_UPDTPC;
+ case DS_ST_WFE:
+ return PL330_STATE_WFE;
+ case DS_ST_FAULT:
+ return PL330_STATE_FAULTING;
+ case DS_ST_ATBRR:
+ if (is_manager(thrd))
+ return PL330_STATE_INVALID;
+ else
+ return PL330_STATE_ATBARRIER;
+ case DS_ST_QBUSY:
+ if (is_manager(thrd))
+ return PL330_STATE_INVALID;
+ else
+ return PL330_STATE_QUEUEBUSY;
+ case DS_ST_WFP:
+ if (is_manager(thrd))
+ return PL330_STATE_INVALID;
+ else
+ return PL330_STATE_WFP;
+ case DS_ST_KILL:
+ if (is_manager(thrd))
+ return PL330_STATE_INVALID;
+ else
+ return PL330_STATE_KILLING;
+ case DS_ST_CMPLT:
+ if (is_manager(thrd))
+ return PL330_STATE_INVALID;
+ else
+ return PL330_STATE_COMPLETING;
+ case DS_ST_FLTCMP:
+ if (is_manager(thrd))
+ return PL330_STATE_INVALID;
+ else
+ return PL330_STATE_FAULT_COMPLETING;
+ default:
+ return PL330_STATE_INVALID;
+ }
+}
+
+static void _stop(struct pl330_thread *thrd)
+{
+ void __iomem *regs = thrd->dmac->base;
+ u8 insn[6] = {0, 0, 0, 0, 0, 0};
+ u32 inten = readl(regs + INTEN);
+
+ if (_state(thrd) == PL330_STATE_FAULT_COMPLETING)
+ UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING);
+
+ /* Return if nothing needs to be done */
+ if (_state(thrd) == PL330_STATE_COMPLETING
+ || _state(thrd) == PL330_STATE_KILLING
+ || _state(thrd) == PL330_STATE_STOPPED)
+ return;
+
+ _emit_KILL(0, insn);
+
+ _execute_DBGINSN(thrd, insn, is_manager(thrd));
+
+ /* clear the event */
+ if (inten & (1 << thrd->ev))
+ writel(1 << thrd->ev, regs + INTCLR);
+ /* Stop generating interrupts for SEV */
+ writel(inten & ~(1 << thrd->ev), regs + INTEN);
+}
+
+/* Start doing req 'idx' of thread 'thrd' */
+static bool _trigger(struct pl330_thread *thrd)
+{
+ void __iomem *regs = thrd->dmac->base;
+ struct _pl330_req *req;
+ struct dma_pl330_desc *desc;
+ struct _arg_GO go;
+ unsigned ns;
+ u8 insn[6] = {0, 0, 0, 0, 0, 0};
+ int idx;
+
+ /* Return if already ACTIVE */
+ if (_state(thrd) != PL330_STATE_STOPPED)
+ return true;
+
+ idx = 1 - thrd->lstenq;
+ if (thrd->req[idx].desc != NULL) {
+ req = &thrd->req[idx];
+ } else {
+ idx = thrd->lstenq;
+ if (thrd->req[idx].desc != NULL)
+ req = &thrd->req[idx];
+ else
+ req = NULL;
+ }
+
+ /* Return if no request */
+ if (!req)
+ return true;
+
+ /* Return if req is running */
+ if (idx == thrd->req_running)
+ return true;
+
+ desc = req->desc;
+
+ ns = desc->rqcfg.nonsecure ? 1 : 0;
+
+ /* See 'Abort Sources' point-4 at Page 2-25 */
+ if (_manager_ns(thrd) && !ns)
+ dev_info(thrd->dmac->ddma.dev, "%s:%d Recipe for ABORT!\n",
+ __func__, __LINE__);
+
+ go.chan = thrd->id;
+ go.addr = req->mc_bus;
+ go.ns = ns;
+ _emit_GO(0, insn, &go);
+
+ /* Set to generate interrupts for SEV */
+ writel(readl(regs + INTEN) | (1 << thrd->ev), regs + INTEN);
+
+ /* Only manager can execute GO */
+ _execute_DBGINSN(thrd, insn, true);
+
+ thrd->req_running = idx;
+
+ return true;
+}
+
+static bool pl330_start_thread(struct pl330_thread *thrd)
+{
+ switch (_state(thrd)) {
+ case PL330_STATE_FAULT_COMPLETING:
+ UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING);
+
+ if (_state(thrd) == PL330_STATE_KILLING)
+ UNTIL(thrd, PL330_STATE_STOPPED)
+ fallthrough;
+
+ case PL330_STATE_FAULTING:
+ _stop(thrd);
+ fallthrough;
+
+ case PL330_STATE_KILLING:
+ case PL330_STATE_COMPLETING:
+ UNTIL(thrd, PL330_STATE_STOPPED)
+ fallthrough;
+
+ case PL330_STATE_STOPPED:
+ return _trigger(thrd);
+
+ case PL330_STATE_WFP:
+ case PL330_STATE_QUEUEBUSY:
+ case PL330_STATE_ATBARRIER:
+ case PL330_STATE_UPDTPC:
+ case PL330_STATE_CACHEMISS:
+ case PL330_STATE_EXECUTING:
+ return true;
+
+ case PL330_STATE_WFE: /* For RESUME, nothing yet */
+ default:
+ return false;
+ }
+}
+
+static inline int _ldst_memtomem(unsigned dry_run, u8 buf[],
+ const struct _xfer_spec *pxs, int cyc)
+{
+ int off = 0;
+ struct pl330_config *pcfg = pxs->desc->rqcfg.pcfg;
+
+ /* check lock-up free version */
+ if (get_revision(pcfg->periph_id) >= PERIPH_REV_R1P0) {
+ while (cyc--) {
+ off += _emit_LD(dry_run, &buf[off], ALWAYS);
+ off += _emit_ST(dry_run, &buf[off], ALWAYS);
+ }
+ } else {
+ while (cyc--) {
+ off += _emit_LD(dry_run, &buf[off], ALWAYS);
+ off += _emit_RMB(dry_run, &buf[off]);
+ off += _emit_ST(dry_run, &buf[off], ALWAYS);
+ off += _emit_WMB(dry_run, &buf[off]);
+ }
+ }
+
+ return off;
+}
+
+static u32 _emit_load(unsigned int dry_run, u8 buf[],
+ enum pl330_cond cond, enum dma_transfer_direction direction,
+ u8 peri)
+{
+ int off = 0;
+
+ switch (direction) {
+ case DMA_MEM_TO_MEM:
+ case DMA_MEM_TO_DEV:
+ off += _emit_LD(dry_run, &buf[off], cond);
+ break;
+
+ case DMA_DEV_TO_MEM:
+ if (cond == ALWAYS) {
+ off += _emit_LDP(dry_run, &buf[off], SINGLE,
+ peri);
+ off += _emit_LDP(dry_run, &buf[off], BURST,
+ peri);
+ } else {
+ off += _emit_LDP(dry_run, &buf[off], cond,
+ peri);
+ }
+ break;
+
+ default:
+ /* this code should be unreachable */
+ WARN_ON(1);
+ break;
+ }
+
+ return off;
+}
+
+static inline u32 _emit_store(unsigned int dry_run, u8 buf[],
+ enum pl330_cond cond, enum dma_transfer_direction direction,
+ u8 peri)
+{
+ int off = 0;
+
+ switch (direction) {
+ case DMA_MEM_TO_MEM:
+ case DMA_DEV_TO_MEM:
+ off += _emit_ST(dry_run, &buf[off], cond);
+ break;
+
+ case DMA_MEM_TO_DEV:
+ if (cond == ALWAYS) {
+ off += _emit_STP(dry_run, &buf[off], SINGLE,
+ peri);
+ off += _emit_STP(dry_run, &buf[off], BURST,
+ peri);
+ } else {
+ off += _emit_STP(dry_run, &buf[off], cond,
+ peri);
+ }
+ break;
+
+ default:
+ /* this code should be unreachable */
+ WARN_ON(1);
+ break;
+ }
+
+ return off;
+}
+
+static inline int _ldst_peripheral(struct pl330_dmac *pl330,
+ unsigned dry_run, u8 buf[],
+ const struct _xfer_spec *pxs, int cyc,
+ enum pl330_cond cond)
+{
+ int off = 0;
+
+ /*
+ * do FLUSHP at beginning to clear any stale dma requests before the
+ * first WFP.
+ */
+ if (!(pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP))
+ off += _emit_FLUSHP(dry_run, &buf[off], pxs->desc->peri);
+ while (cyc--) {
+ off += _emit_WFP(dry_run, &buf[off], cond, pxs->desc->peri);
+ off += _emit_load(dry_run, &buf[off], cond, pxs->desc->rqtype,
+ pxs->desc->peri);
+ off += _emit_store(dry_run, &buf[off], cond, pxs->desc->rqtype,
+ pxs->desc->peri);
+ }
+
+ return off;
+}
+
+static int _bursts(struct pl330_dmac *pl330, unsigned dry_run, u8 buf[],
+ const struct _xfer_spec *pxs, int cyc)
+{
+ int off = 0;
+ enum pl330_cond cond = BRST_LEN(pxs->ccr) > 1 ? BURST : SINGLE;
+
+ if (pl330->quirks & PL330_QUIRK_PERIPH_BURST)
+ cond = BURST;
+
+ switch (pxs->desc->rqtype) {
+ case DMA_MEM_TO_DEV:
+ case DMA_DEV_TO_MEM:
+ off += _ldst_peripheral(pl330, dry_run, &buf[off], pxs, cyc,
+ cond);
+ break;
+
+ case DMA_MEM_TO_MEM:
+ off += _ldst_memtomem(dry_run, &buf[off], pxs, cyc);
+ break;
+
+ default:
+ /* this code should be unreachable */
+ WARN_ON(1);
+ break;
+ }
+
+ return off;
+}
+
+/*
+ * only the unaligned burst transfers have the dregs.
+ * so, still transfer dregs with a reduced size burst
+ * for mem-to-mem, mem-to-dev or dev-to-mem.
+ */
+static int _dregs(struct pl330_dmac *pl330, unsigned int dry_run, u8 buf[],
+ const struct _xfer_spec *pxs, int transfer_length)
+{
+ int off = 0;
+ int dregs_ccr;
+
+ if (transfer_length == 0)
+ return off;
+
+ /*
+ * dregs_len = (total bytes - BURST_TO_BYTE(bursts, ccr)) /
+ * BRST_SIZE(ccr)
+ * the dregs len must be smaller than burst len,
+ * so, for higher efficiency, we can modify CCR
+ * to use a reduced size burst len for the dregs.
+ */
+ dregs_ccr = pxs->ccr;
+ dregs_ccr &= ~((0xf << CC_SRCBRSTLEN_SHFT) |
+ (0xf << CC_DSTBRSTLEN_SHFT));
+ dregs_ccr |= (((transfer_length - 1) & 0xf) <<
+ CC_SRCBRSTLEN_SHFT);
+ dregs_ccr |= (((transfer_length - 1) & 0xf) <<
+ CC_DSTBRSTLEN_SHFT);
+
+ switch (pxs->desc->rqtype) {
+ case DMA_MEM_TO_DEV:
+ case DMA_DEV_TO_MEM:
+ off += _emit_MOV(dry_run, &buf[off], CCR, dregs_ccr);
+ off += _ldst_peripheral(pl330, dry_run, &buf[off], pxs, 1,
+ BURST);
+ break;
+
+ case DMA_MEM_TO_MEM:
+ off += _emit_MOV(dry_run, &buf[off], CCR, dregs_ccr);
+ off += _ldst_memtomem(dry_run, &buf[off], pxs, 1);
+ break;
+
+ default:
+ /* this code should be unreachable */
+ WARN_ON(1);
+ break;
+ }
+
+ return off;
+}
+
+/* Returns bytes consumed and updates bursts */
+static inline int _loop(struct pl330_dmac *pl330, unsigned dry_run, u8 buf[],
+ unsigned long *bursts, const struct _xfer_spec *pxs)
+{
+ int cyc, cycmax, szlp, szlpend, szbrst, off;
+ unsigned lcnt0, lcnt1, ljmp0, ljmp1;
+ struct _arg_LPEND lpend;
+
+ if (*bursts == 1)
+ return _bursts(pl330, dry_run, buf, pxs, 1);
+
+ /* Max iterations possible in DMALP is 256 */
+ if (*bursts >= 256*256) {
+ lcnt1 = 256;
+ lcnt0 = 256;
+ cyc = *bursts / lcnt1 / lcnt0;
+ } else if (*bursts > 256) {
+ lcnt1 = 256;
+ lcnt0 = *bursts / lcnt1;
+ cyc = 1;
+ } else {
+ lcnt1 = *bursts;
+ lcnt0 = 0;
+ cyc = 1;
+ }
+
+ szlp = _emit_LP(1, buf, 0, 0);
+ szbrst = _bursts(pl330, 1, buf, pxs, 1);
+
+ lpend.cond = ALWAYS;
+ lpend.forever = false;
+ lpend.loop = 0;
+ lpend.bjump = 0;
+ szlpend = _emit_LPEND(1, buf, &lpend);
+
+ if (lcnt0) {
+ szlp *= 2;
+ szlpend *= 2;
+ }
+
+ /*
+ * Max bursts that we can unroll due to limit on the
+ * size of backward jump that can be encoded in DMALPEND
+ * which is 8-bits and hence 255
+ */
+ cycmax = (255 - (szlp + szlpend)) / szbrst;
+
+ cyc = (cycmax < cyc) ? cycmax : cyc;
+
+ off = 0;
+
+ if (lcnt0) {
+ off += _emit_LP(dry_run, &buf[off], 0, lcnt0);
+ ljmp0 = off;
+ }
+
+ off += _emit_LP(dry_run, &buf[off], 1, lcnt1);
+ ljmp1 = off;
+
+ off += _bursts(pl330, dry_run, &buf[off], pxs, cyc);
+
+ lpend.cond = ALWAYS;
+ lpend.forever = false;
+ lpend.loop = 1;
+ lpend.bjump = off - ljmp1;
+ off += _emit_LPEND(dry_run, &buf[off], &lpend);
+
+ if (lcnt0) {
+ lpend.cond = ALWAYS;
+ lpend.forever = false;
+ lpend.loop = 0;
+ lpend.bjump = off - ljmp0;
+ off += _emit_LPEND(dry_run, &buf[off], &lpend);
+ }
+
+ *bursts = lcnt1 * cyc;
+ if (lcnt0)
+ *bursts *= lcnt0;
+
+ return off;
+}
+
+static inline int _setup_loops(struct pl330_dmac *pl330,
+ unsigned dry_run, u8 buf[],
+ const struct _xfer_spec *pxs)
+{
+ struct pl330_xfer *x = &pxs->desc->px;
+ u32 ccr = pxs->ccr;
+ unsigned long c, bursts = BYTE_TO_BURST(x->bytes, ccr);
+ int num_dregs = (x->bytes - BURST_TO_BYTE(bursts, ccr)) /
+ BRST_SIZE(ccr);
+ int off = 0;
+
+ while (bursts) {
+ c = bursts;
+ off += _loop(pl330, dry_run, &buf[off], &c, pxs);
+ bursts -= c;
+ }
+ off += _dregs(pl330, dry_run, &buf[off], pxs, num_dregs);
+
+ return off;
+}
+
+static inline int _setup_xfer(struct pl330_dmac *pl330,
+ unsigned dry_run, u8 buf[],
+ const struct _xfer_spec *pxs)
+{
+ struct pl330_xfer *x = &pxs->desc->px;
+ int off = 0;
+
+ /* DMAMOV SAR, x->src_addr */
+ off += _emit_MOV(dry_run, &buf[off], SAR, x->src_addr);
+ /* DMAMOV DAR, x->dst_addr */
+ off += _emit_MOV(dry_run, &buf[off], DAR, x->dst_addr);
+
+ /* Setup Loop(s) */
+ off += _setup_loops(pl330, dry_run, &buf[off], pxs);
+
+ return off;
+}
+
+/*
+ * A req is a sequence of one or more xfer units.
+ * Returns the number of bytes taken to setup the MC for the req.
+ */
+static int _setup_req(struct pl330_dmac *pl330, unsigned dry_run,
+ struct pl330_thread *thrd, unsigned index,
+ struct _xfer_spec *pxs)
+{
+ struct _pl330_req *req = &thrd->req[index];
+ u8 *buf = req->mc_cpu;
+ int off = 0;
+
+ PL330_DBGMC_START(req->mc_bus);
+
+ /* DMAMOV CCR, ccr */
+ off += _emit_MOV(dry_run, &buf[off], CCR, pxs->ccr);
+
+ off += _setup_xfer(pl330, dry_run, &buf[off], pxs);
+
+ /* DMASEV peripheral/event */
+ off += _emit_SEV(dry_run, &buf[off], thrd->ev);
+ /* DMAEND */
+ off += _emit_END(dry_run, &buf[off]);
+
+ return off;
+}
+
+static inline u32 _prepare_ccr(const struct pl330_reqcfg *rqc)
+{
+ u32 ccr = 0;
+
+ if (rqc->src_inc)
+ ccr |= CC_SRCINC;
+
+ if (rqc->dst_inc)
+ ccr |= CC_DSTINC;
+
+ /* We set same protection levels for Src and DST for now */
+ if (rqc->privileged)
+ ccr |= CC_SRCPRI | CC_DSTPRI;
+ if (rqc->nonsecure)
+ ccr |= CC_SRCNS | CC_DSTNS;
+ if (rqc->insnaccess)
+ ccr |= CC_SRCIA | CC_DSTIA;
+
+ ccr |= (((rqc->brst_len - 1) & 0xf) << CC_SRCBRSTLEN_SHFT);
+ ccr |= (((rqc->brst_len - 1) & 0xf) << CC_DSTBRSTLEN_SHFT);
+
+ ccr |= (rqc->brst_size << CC_SRCBRSTSIZE_SHFT);
+ ccr |= (rqc->brst_size << CC_DSTBRSTSIZE_SHFT);
+
+ ccr |= (rqc->scctl << CC_SRCCCTRL_SHFT);
+ ccr |= (rqc->dcctl << CC_DSTCCTRL_SHFT);
+
+ ccr |= (rqc->swap << CC_SWAP_SHFT);
+
+ return ccr;
+}
+
+/*
+ * Submit a list of xfers after which the client wants notification.
+ * Client is not notified after each xfer unit, just once after all
+ * xfer units are done or some error occurs.
+ */
+static int pl330_submit_req(struct pl330_thread *thrd,
+ struct dma_pl330_desc *desc)
+{
+ struct pl330_dmac *pl330 = thrd->dmac;
+ struct _xfer_spec xs;
+ unsigned long flags;
+ unsigned idx;
+ u32 ccr;
+ int ret = 0;
+
+ switch (desc->rqtype) {
+ case DMA_MEM_TO_DEV:
+ break;
+
+ case DMA_DEV_TO_MEM:
+ break;
+
+ case DMA_MEM_TO_MEM:
+ break;
+
+ default:
+ return -ENOTSUPP;
+ }
+
+ if (pl330->state == DYING
+ || pl330->dmac_tbd.reset_chan & (1 << thrd->id)) {
+ dev_info(thrd->dmac->ddma.dev, "%s:%d\n",
+ __func__, __LINE__);
+ return -EAGAIN;
+ }
+
+ /* If request for non-existing peripheral */
+ if (desc->rqtype != DMA_MEM_TO_MEM &&
+ desc->peri >= pl330->pcfg.num_peri) {
+ dev_info(thrd->dmac->ddma.dev,
+ "%s:%d Invalid peripheral(%u)!\n",
+ __func__, __LINE__, desc->peri);
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&pl330->lock, flags);
+
+ if (_queue_full(thrd)) {
+ ret = -EAGAIN;
+ goto xfer_exit;
+ }
+
+ /* Prefer Secure Channel */
+ if (!_manager_ns(thrd))
+ desc->rqcfg.nonsecure = 0;
+ else
+ desc->rqcfg.nonsecure = 1;
+
+ ccr = _prepare_ccr(&desc->rqcfg);
+
+ idx = thrd->req[0].desc == NULL ? 0 : 1;
+
+ xs.ccr = ccr;
+ xs.desc = desc;
+
+ /* First dry run to check if req is acceptable */
+ ret = _setup_req(pl330, 1, thrd, idx, &xs);
+
+ if (ret > pl330->mcbufsz / 2) {
+ dev_info(pl330->ddma.dev, "%s:%d Try increasing mcbufsz (%i/%i)\n",
+ __func__, __LINE__, ret, pl330->mcbufsz / 2);
+ ret = -ENOMEM;
+ goto xfer_exit;
+ }
+
+ /* Hook the request */
+ thrd->lstenq = idx;
+ thrd->req[idx].desc = desc;
+ _setup_req(pl330, 0, thrd, idx, &xs);
+
+ ret = 0;
+
+xfer_exit:
+ spin_unlock_irqrestore(&pl330->lock, flags);
+
+ return ret;
+}
+
+static void dma_pl330_rqcb(struct dma_pl330_desc *desc, enum pl330_op_err err)
+{
+ struct dma_pl330_chan *pch;
+ unsigned long flags;
+
+ if (!desc)
+ return;
+
+ pch = desc->pchan;
+
+ /* If desc aborted */
+ if (!pch)
+ return;
+
+ spin_lock_irqsave(&pch->lock, flags);
+
+ desc->status = DONE;
+
+ spin_unlock_irqrestore(&pch->lock, flags);
+
+ tasklet_schedule(&pch->task);
+}
+
+static void pl330_dotask(struct tasklet_struct *t)
+{
+ struct pl330_dmac *pl330 = from_tasklet(pl330, t, tasks);
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&pl330->lock, flags);
+
+ /* The DMAC itself gone nuts */
+ if (pl330->dmac_tbd.reset_dmac) {
+ pl330->state = DYING;
+ /* Reset the manager too */
+ pl330->dmac_tbd.reset_mngr = true;
+ /* Clear the reset flag */
+ pl330->dmac_tbd.reset_dmac = false;
+ }
+
+ if (pl330->dmac_tbd.reset_mngr) {
+ _stop(pl330->manager);
+ /* Reset all channels */
+ pl330->dmac_tbd.reset_chan = (1 << pl330->pcfg.num_chan) - 1;
+ /* Clear the reset flag */
+ pl330->dmac_tbd.reset_mngr = false;
+ }
+
+ for (i = 0; i < pl330->pcfg.num_chan; i++) {
+
+ if (pl330->dmac_tbd.reset_chan & (1 << i)) {
+ struct pl330_thread *thrd = &pl330->channels[i];
+ void __iomem *regs = pl330->base;
+ enum pl330_op_err err;
+
+ _stop(thrd);
+
+ if (readl(regs + FSC) & (1 << thrd->id))
+ err = PL330_ERR_FAIL;
+ else
+ err = PL330_ERR_ABORT;
+
+ spin_unlock_irqrestore(&pl330->lock, flags);
+ dma_pl330_rqcb(thrd->req[1 - thrd->lstenq].desc, err);
+ dma_pl330_rqcb(thrd->req[thrd->lstenq].desc, err);
+ spin_lock_irqsave(&pl330->lock, flags);
+
+ thrd->req[0].desc = NULL;
+ thrd->req[1].desc = NULL;
+ thrd->req_running = -1;
+
+ /* Clear the reset flag */
+ pl330->dmac_tbd.reset_chan &= ~(1 << i);
+ }
+ }
+
+ spin_unlock_irqrestore(&pl330->lock, flags);
+
+ return;
+}
+
+/* Returns 1 if state was updated, 0 otherwise */
+static int pl330_update(struct pl330_dmac *pl330)
+{
+ struct dma_pl330_desc *descdone;
+ unsigned long flags;
+ void __iomem *regs;
+ u32 val;
+ int id, ev, ret = 0;
+
+ regs = pl330->base;
+
+ spin_lock_irqsave(&pl330->lock, flags);
+
+ val = readl(regs + FSM) & 0x1;
+ if (val)
+ pl330->dmac_tbd.reset_mngr = true;
+ else
+ pl330->dmac_tbd.reset_mngr = false;
+
+ val = readl(regs + FSC) & ((1 << pl330->pcfg.num_chan) - 1);
+ pl330->dmac_tbd.reset_chan |= val;
+ if (val) {
+ int i = 0;
+ while (i < pl330->pcfg.num_chan) {
+ if (val & (1 << i)) {
+ dev_info(pl330->ddma.dev,
+ "Reset Channel-%d\t CS-%x FTC-%x\n",
+ i, readl(regs + CS(i)),
+ readl(regs + FTC(i)));
+ _stop(&pl330->channels[i]);
+ }
+ i++;
+ }
+ }
+
+ /* Check which event happened i.e, thread notified */
+ val = readl(regs + ES);
+ if (pl330->pcfg.num_events < 32
+ && val & ~((1 << pl330->pcfg.num_events) - 1)) {
+ pl330->dmac_tbd.reset_dmac = true;
+ dev_err(pl330->ddma.dev, "%s:%d Unexpected!\n", __func__,
+ __LINE__);
+ ret = 1;
+ goto updt_exit;
+ }
+
+ for (ev = 0; ev < pl330->pcfg.num_events; ev++) {
+ if (val & (1 << ev)) { /* Event occurred */
+ struct pl330_thread *thrd;
+ u32 inten = readl(regs + INTEN);
+ int active;
+
+ /* Clear the event */
+ if (inten & (1 << ev))
+ writel(1 << ev, regs + INTCLR);
+
+ ret = 1;
+
+ id = pl330->events[ev];
+
+ thrd = &pl330->channels[id];
+
+ active = thrd->req_running;
+ if (active == -1) /* Aborted */
+ continue;
+
+ /* Detach the req */
+ descdone = thrd->req[active].desc;
+ thrd->req[active].desc = NULL;
+
+ thrd->req_running = -1;
+
+ /* Get going again ASAP */
+ pl330_start_thread(thrd);
+
+ /* For now, just make a list of callbacks to be done */
+ list_add_tail(&descdone->rqd, &pl330->req_done);
+ }
+ }
+
+ /* Now that we are in no hurry, do the callbacks */
+ while (!list_empty(&pl330->req_done)) {
+ descdone = list_first_entry(&pl330->req_done,
+ struct dma_pl330_desc, rqd);
+ list_del(&descdone->rqd);
+ spin_unlock_irqrestore(&pl330->lock, flags);
+ dma_pl330_rqcb(descdone, PL330_ERR_NONE);
+ spin_lock_irqsave(&pl330->lock, flags);
+ }
+
+updt_exit:
+ spin_unlock_irqrestore(&pl330->lock, flags);
+
+ if (pl330->dmac_tbd.reset_dmac
+ || pl330->dmac_tbd.reset_mngr
+ || pl330->dmac_tbd.reset_chan) {
+ ret = 1;
+ tasklet_schedule(&pl330->tasks);
+ }
+
+ return ret;
+}
+
+/* Reserve an event */
+static inline int _alloc_event(struct pl330_thread *thrd)
+{
+ struct pl330_dmac *pl330 = thrd->dmac;
+ int ev;
+
+ for (ev = 0; ev < pl330->pcfg.num_events; ev++)
+ if (pl330->events[ev] == -1) {
+ pl330->events[ev] = thrd->id;
+ return ev;
+ }
+
+ return -1;
+}
+
+static bool _chan_ns(const struct pl330_dmac *pl330, int i)
+{
+ return pl330->pcfg.irq_ns & (1 << i);
+}
+
+/* Upon success, returns IdentityToken for the
+ * allocated channel, NULL otherwise.
+ */
+static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330)
+{
+ struct pl330_thread *thrd = NULL;
+ int chans, i;
+
+ if (pl330->state == DYING)
+ return NULL;
+
+ chans = pl330->pcfg.num_chan;
+
+ for (i = 0; i < chans; i++) {
+ thrd = &pl330->channels[i];
+ if ((thrd->free) && (!_manager_ns(thrd) ||
+ _chan_ns(pl330, i))) {
+ thrd->ev = _alloc_event(thrd);
+ if (thrd->ev >= 0) {
+ thrd->free = false;
+ thrd->lstenq = 1;
+ thrd->req[0].desc = NULL;
+ thrd->req[1].desc = NULL;
+ thrd->req_running = -1;
+ break;
+ }
+ }
+ thrd = NULL;
+ }
+
+ return thrd;
+}
+
+/* Release an event */
+static inline void _free_event(struct pl330_thread *thrd, int ev)
+{
+ struct pl330_dmac *pl330 = thrd->dmac;
+
+ /* If the event is valid and was held by the thread */
+ if (ev >= 0 && ev < pl330->pcfg.num_events
+ && pl330->events[ev] == thrd->id)
+ pl330->events[ev] = -1;
+}
+
+static void pl330_release_channel(struct pl330_thread *thrd)
+{
+ if (!thrd || thrd->free)
+ return;
+
+ _stop(thrd);
+
+ dma_pl330_rqcb(thrd->req[1 - thrd->lstenq].desc, PL330_ERR_ABORT);
+ dma_pl330_rqcb(thrd->req[thrd->lstenq].desc, PL330_ERR_ABORT);
+
+ _free_event(thrd, thrd->ev);
+ thrd->free = true;
+}
+
+/* Initialize the structure for PL330 configuration, that can be used
+ * by the client driver the make best use of the DMAC
+ */
+static void read_dmac_config(struct pl330_dmac *pl330)
+{
+ void __iomem *regs = pl330->base;
+ u32 val;
+
+ val = readl(regs + CRD) >> CRD_DATA_WIDTH_SHIFT;
+ val &= CRD_DATA_WIDTH_MASK;
+ pl330->pcfg.data_bus_width = 8 * (1 << val);
+
+ val = readl(regs + CRD) >> CRD_DATA_BUFF_SHIFT;
+ val &= CRD_DATA_BUFF_MASK;
+ pl330->pcfg.data_buf_dep = val + 1;
+
+ val = readl(regs + CR0) >> CR0_NUM_CHANS_SHIFT;
+ val &= CR0_NUM_CHANS_MASK;
+ val += 1;
+ pl330->pcfg.num_chan = val;
+
+ val = readl(regs + CR0);
+ if (val & CR0_PERIPH_REQ_SET) {
+ val = (val >> CR0_NUM_PERIPH_SHIFT) & CR0_NUM_PERIPH_MASK;
+ val += 1;
+ pl330->pcfg.num_peri = val;
+ pl330->pcfg.peri_ns = readl(regs + CR4);
+ } else {
+ pl330->pcfg.num_peri = 0;
+ }
+
+ val = readl(regs + CR0);
+ if (val & CR0_BOOT_MAN_NS)
+ pl330->pcfg.mode |= DMAC_MODE_NS;
+ else
+ pl330->pcfg.mode &= ~DMAC_MODE_NS;
+
+ val = readl(regs + CR0) >> CR0_NUM_EVENTS_SHIFT;
+ val &= CR0_NUM_EVENTS_MASK;
+ val += 1;
+ pl330->pcfg.num_events = val;
+
+ pl330->pcfg.irq_ns = readl(regs + CR3);
+}
+
+static inline void _reset_thread(struct pl330_thread *thrd)
+{
+ struct pl330_dmac *pl330 = thrd->dmac;
+
+ thrd->req[0].mc_cpu = pl330->mcode_cpu
+ + (thrd->id * pl330->mcbufsz);
+ thrd->req[0].mc_bus = pl330->mcode_bus
+ + (thrd->id * pl330->mcbufsz);
+ thrd->req[0].desc = NULL;
+
+ thrd->req[1].mc_cpu = thrd->req[0].mc_cpu
+ + pl330->mcbufsz / 2;
+ thrd->req[1].mc_bus = thrd->req[0].mc_bus
+ + pl330->mcbufsz / 2;
+ thrd->req[1].desc = NULL;
+
+ thrd->req_running = -1;
+}
+
+static int dmac_alloc_threads(struct pl330_dmac *pl330)
+{
+ int chans = pl330->pcfg.num_chan;
+ struct pl330_thread *thrd;
+ int i;
+
+ /* Allocate 1 Manager and 'chans' Channel threads */
+ pl330->channels = kcalloc(1 + chans, sizeof(*thrd),
+ GFP_KERNEL);
+ if (!pl330->channels)
+ return -ENOMEM;
+
+ /* Init Channel threads */
+ for (i = 0; i < chans; i++) {
+ thrd = &pl330->channels[i];
+ thrd->id = i;
+ thrd->dmac = pl330;
+ _reset_thread(thrd);
+ thrd->free = true;
+ }
+
+ /* MANAGER is indexed at the end */
+ thrd = &pl330->channels[chans];
+ thrd->id = chans;
+ thrd->dmac = pl330;
+ thrd->free = false;
+ pl330->manager = thrd;
+
+ return 0;
+}
+
+static int dmac_alloc_resources(struct pl330_dmac *pl330)
+{
+ int chans = pl330->pcfg.num_chan;
+ int ret;
+
+ /*
+ * Alloc MicroCode buffer for 'chans' Channel threads.
+ * A channel's buffer offset is (Channel_Id * MCODE_BUFF_PERCHAN)
+ */
+ pl330->mcode_cpu = dma_alloc_attrs(pl330->ddma.dev,
+ chans * pl330->mcbufsz,
+ &pl330->mcode_bus, GFP_KERNEL,
+ DMA_ATTR_PRIVILEGED);
+ if (!pl330->mcode_cpu) {
+ dev_err(pl330->ddma.dev, "%s:%d Can't allocate memory!\n",
+ __func__, __LINE__);
+ return -ENOMEM;
+ }
+
+ ret = dmac_alloc_threads(pl330);
+ if (ret) {
+ dev_err(pl330->ddma.dev, "%s:%d Can't to create channels for DMAC!\n",
+ __func__, __LINE__);
+ dma_free_attrs(pl330->ddma.dev,
+ chans * pl330->mcbufsz,
+ pl330->mcode_cpu, pl330->mcode_bus,
+ DMA_ATTR_PRIVILEGED);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int pl330_add(struct pl330_dmac *pl330)
+{
+ int i, ret;
+
+ /* Check if we can handle this DMAC */
+ if ((pl330->pcfg.periph_id & 0xfffff) != PERIPH_ID_VAL) {
+ dev_err(pl330->ddma.dev, "PERIPH_ID 0x%x !\n",
+ pl330->pcfg.periph_id);
+ return -EINVAL;
+ }
+
+ /* Read the configuration of the DMAC */
+ read_dmac_config(pl330);
+
+ if (pl330->pcfg.num_events == 0) {
+ dev_err(pl330->ddma.dev, "%s:%d Can't work without events!\n",
+ __func__, __LINE__);
+ return -EINVAL;
+ }
+
+ spin_lock_init(&pl330->lock);
+
+ INIT_LIST_HEAD(&pl330->req_done);
+
+ /* Use default MC buffer size if not provided */
+ if (!pl330->mcbufsz)
+ pl330->mcbufsz = MCODE_BUFF_PER_REQ * 2;
+
+ /* Mark all events as free */
+ for (i = 0; i < pl330->pcfg.num_events; i++)
+ pl330->events[i] = -1;
+
+ /* Allocate resources needed by the DMAC */
+ ret = dmac_alloc_resources(pl330);
+ if (ret) {
+ dev_err(pl330->ddma.dev, "Unable to create channels for DMAC\n");
+ return ret;
+ }
+
+ tasklet_setup(&pl330->tasks, pl330_dotask);
+
+ pl330->state = INIT;
+
+ return 0;
+}
+
+static int dmac_free_threads(struct pl330_dmac *pl330)
+{
+ struct pl330_thread *thrd;
+ int i;
+
+ /* Release Channel threads */
+ for (i = 0; i < pl330->pcfg.num_chan; i++) {
+ thrd = &pl330->channels[i];
+ pl330_release_channel(thrd);
+ }
+
+ /* Free memory */
+ kfree(pl330->channels);
+
+ return 0;
+}
+
+static void pl330_del(struct pl330_dmac *pl330)
+{
+ pl330->state = UNINIT;
+
+ tasklet_kill(&pl330->tasks);
+
+ /* Free DMAC resources */
+ dmac_free_threads(pl330);
+
+ dma_free_attrs(pl330->ddma.dev,
+ pl330->pcfg.num_chan * pl330->mcbufsz, pl330->mcode_cpu,
+ pl330->mcode_bus, DMA_ATTR_PRIVILEGED);
+}
+
+/* forward declaration */
+static struct amba_driver pl330_driver;
+
+static inline struct dma_pl330_chan *
+to_pchan(struct dma_chan *ch)
+{
+ if (!ch)
+ return NULL;
+
+ return container_of(ch, struct dma_pl330_chan, chan);
+}
+
+static inline struct dma_pl330_desc *
+to_desc(struct dma_async_tx_descriptor *tx)
+{
+ return container_of(tx, struct dma_pl330_desc, txd);
+}
+
+static inline void fill_queue(struct dma_pl330_chan *pch)
+{
+ struct dma_pl330_desc *desc;
+ int ret;
+
+ list_for_each_entry(desc, &pch->work_list, node) {
+
+ /* If already submitted */
+ if (desc->status == BUSY || desc->status == PAUSED)
+ continue;
+
+ ret = pl330_submit_req(pch->thread, desc);
+ if (!ret) {
+ desc->status = BUSY;
+ } else if (ret == -EAGAIN) {
+ /* QFull or DMAC Dying */
+ break;
+ } else {
+ /* Unacceptable request */
+ desc->status = DONE;
+ dev_err(pch->dmac->ddma.dev, "%s:%d Bad Desc(%d)\n",
+ __func__, __LINE__, desc->txd.cookie);
+ tasklet_schedule(&pch->task);
+ }
+ }
+}
+
+static void pl330_tasklet(struct tasklet_struct *t)
+{
+ struct dma_pl330_chan *pch = from_tasklet(pch, t, task);
+ struct dma_pl330_desc *desc, *_dt;
+ unsigned long flags;
+ bool power_down = false;
+
+ spin_lock_irqsave(&pch->lock, flags);
+
+ /* Pick up ripe tomatoes */
+ list_for_each_entry_safe(desc, _dt, &pch->work_list, node)
+ if (desc->status == DONE) {
+ if (!pch->cyclic)
+ dma_cookie_complete(&desc->txd);
+ list_move_tail(&desc->node, &pch->completed_list);
+ }
+
+ /* Try to submit a req imm. next to the last completed cookie */
+ fill_queue(pch);
+
+ if (list_empty(&pch->work_list)) {
+ spin_lock(&pch->thread->dmac->lock);
+ _stop(pch->thread);
+ spin_unlock(&pch->thread->dmac->lock);
+ power_down = true;
+ pch->active = false;
+ } else {
+ /* Make sure the PL330 Channel thread is active */
+ spin_lock(&pch->thread->dmac->lock);
+ pl330_start_thread(pch->thread);
+ spin_unlock(&pch->thread->dmac->lock);
+ }
+
+ while (!list_empty(&pch->completed_list)) {
+ struct dmaengine_desc_callback cb;
+
+ desc = list_first_entry(&pch->completed_list,
+ struct dma_pl330_desc, node);
+
+ dmaengine_desc_get_callback(&desc->txd, &cb);
+
+ if (pch->cyclic) {
+ desc->status = PREP;
+ list_move_tail(&desc->node, &pch->work_list);
+ if (power_down) {
+ pch->active = true;
+ spin_lock(&pch->thread->dmac->lock);
+ pl330_start_thread(pch->thread);
+ spin_unlock(&pch->thread->dmac->lock);
+ power_down = false;
+ }
+ } else {
+ desc->status = FREE;
+ list_move_tail(&desc->node, &pch->dmac->desc_pool);
+ }
+
+ dma_descriptor_unmap(&desc->txd);
+
+ if (dmaengine_desc_callback_valid(&cb)) {
+ spin_unlock_irqrestore(&pch->lock, flags);
+ dmaengine_desc_callback_invoke(&cb, NULL);
+ spin_lock_irqsave(&pch->lock, flags);
+ }
+ }
+ spin_unlock_irqrestore(&pch->lock, flags);
+
+ /* If work list empty, power down */
+ if (power_down) {
+ pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
+ pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
+ }
+}
+
+static struct dma_chan *of_dma_pl330_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ int count = dma_spec->args_count;
+ struct pl330_dmac *pl330 = ofdma->of_dma_data;
+ unsigned int chan_id;
+
+ if (!pl330)
+ return NULL;
+
+ if (count != 1)
+ return NULL;
+
+ chan_id = dma_spec->args[0];
+ if (chan_id >= pl330->num_peripherals)
+ return NULL;
+
+ return dma_get_slave_channel(&pl330->peripherals[chan_id].chan);
+}
+
+static int pl330_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct dma_pl330_chan *pch = to_pchan(chan);
+ struct pl330_dmac *pl330 = pch->dmac;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pl330->lock, flags);
+
+ dma_cookie_init(chan);
+ pch->cyclic = false;
+
+ pch->thread = pl330_request_channel(pl330);
+ if (!pch->thread) {
+ spin_unlock_irqrestore(&pl330->lock, flags);
+ return -ENOMEM;
+ }
+
+ tasklet_setup(&pch->task, pl330_tasklet);
+
+ spin_unlock_irqrestore(&pl330->lock, flags);
+
+ return 1;
+}
+
+/*
+ * We need the data direction between the DMAC (the dma-mapping "device") and
+ * the FIFO (the dmaengine "dev"), from the FIFO's point of view. Confusing!
+ */
+static enum dma_data_direction
+pl330_dma_slave_map_dir(enum dma_transfer_direction dir)
+{
+ switch (dir) {
+ case DMA_MEM_TO_DEV:
+ return DMA_FROM_DEVICE;
+ case DMA_DEV_TO_MEM:
+ return DMA_TO_DEVICE;
+ case DMA_DEV_TO_DEV:
+ return DMA_BIDIRECTIONAL;
+ default:
+ return DMA_NONE;
+ }
+}
+
+static void pl330_unprep_slave_fifo(struct dma_pl330_chan *pch)
+{
+ if (pch->dir != DMA_NONE)
+ dma_unmap_resource(pch->chan.device->dev, pch->fifo_dma,
+ 1 << pch->burst_sz, pch->dir, 0);
+ pch->dir = DMA_NONE;
+}
+
+
+static bool pl330_prep_slave_fifo(struct dma_pl330_chan *pch,
+ enum dma_transfer_direction dir)
+{
+ struct device *dev = pch->chan.device->dev;
+ enum dma_data_direction dma_dir = pl330_dma_slave_map_dir(dir);
+
+ /* Already mapped for this config? */
+ if (pch->dir == dma_dir)
+ return true;
+
+ pl330_unprep_slave_fifo(pch);
+ pch->fifo_dma = dma_map_resource(dev, pch->fifo_addr,
+ 1 << pch->burst_sz, dma_dir, 0);
+ if (dma_mapping_error(dev, pch->fifo_dma))
+ return false;
+
+ pch->dir = dma_dir;
+ return true;
+}
+
+static int fixup_burst_len(int max_burst_len, int quirks)
+{
+ if (max_burst_len > PL330_MAX_BURST)
+ return PL330_MAX_BURST;
+ else if (max_burst_len < 1)
+ return 1;
+ else
+ return max_burst_len;
+}
+
+static int pl330_config_write(struct dma_chan *chan,
+ struct dma_slave_config *slave_config,
+ enum dma_transfer_direction direction)
+{
+ struct dma_pl330_chan *pch = to_pchan(chan);
+
+ pl330_unprep_slave_fifo(pch);
+ if (direction == DMA_MEM_TO_DEV) {
+ if (slave_config->dst_addr)
+ pch->fifo_addr = slave_config->dst_addr;
+ if (slave_config->dst_addr_width)
+ pch->burst_sz = __ffs(slave_config->dst_addr_width);
+ pch->burst_len = fixup_burst_len(slave_config->dst_maxburst,
+ pch->dmac->quirks);
+ } else if (direction == DMA_DEV_TO_MEM) {
+ if (slave_config->src_addr)
+ pch->fifo_addr = slave_config->src_addr;
+ if (slave_config->src_addr_width)
+ pch->burst_sz = __ffs(slave_config->src_addr_width);
+ pch->burst_len = fixup_burst_len(slave_config->src_maxburst,
+ pch->dmac->quirks);
+ }
+
+ return 0;
+}
+
+static int pl330_config(struct dma_chan *chan,
+ struct dma_slave_config *slave_config)
+{
+ struct dma_pl330_chan *pch = to_pchan(chan);
+
+ memcpy(&pch->slave_config, slave_config, sizeof(*slave_config));
+
+ return 0;
+}
+
+static int pl330_terminate_all(struct dma_chan *chan)
+{
+ struct dma_pl330_chan *pch = to_pchan(chan);
+ struct dma_pl330_desc *desc;
+ unsigned long flags;
+ struct pl330_dmac *pl330 = pch->dmac;
+ bool power_down = false;
+
+ pm_runtime_get_sync(pl330->ddma.dev);
+ spin_lock_irqsave(&pch->lock, flags);
+
+ spin_lock(&pl330->lock);
+ _stop(pch->thread);
+ pch->thread->req[0].desc = NULL;
+ pch->thread->req[1].desc = NULL;
+ pch->thread->req_running = -1;
+ spin_unlock(&pl330->lock);
+
+ power_down = pch->active;
+ pch->active = false;
+
+ /* Mark all desc done */
+ list_for_each_entry(desc, &pch->submitted_list, node) {
+ desc->status = FREE;
+ dma_cookie_complete(&desc->txd);
+ }
+
+ list_for_each_entry(desc, &pch->work_list , node) {
+ desc->status = FREE;
+ dma_cookie_complete(&desc->txd);
+ }
+
+ list_splice_tail_init(&pch->submitted_list, &pl330->desc_pool);
+ list_splice_tail_init(&pch->work_list, &pl330->desc_pool);
+ list_splice_tail_init(&pch->completed_list, &pl330->desc_pool);
+ spin_unlock_irqrestore(&pch->lock, flags);
+ pm_runtime_mark_last_busy(pl330->ddma.dev);
+ if (power_down)
+ pm_runtime_put_autosuspend(pl330->ddma.dev);
+ pm_runtime_put_autosuspend(pl330->ddma.dev);
+
+ return 0;
+}
+
+/*
+ * We don't support DMA_RESUME command because of hardware
+ * limitations, so after pausing the channel we cannot restore
+ * it to active state. We have to terminate channel and setup
+ * DMA transfer again. This pause feature was implemented to
+ * allow safely read residue before channel termination.
+ */
+static int pl330_pause(struct dma_chan *chan)
+{
+ struct dma_pl330_chan *pch = to_pchan(chan);
+ struct pl330_dmac *pl330 = pch->dmac;
+ struct dma_pl330_desc *desc;
+ unsigned long flags;
+
+ pm_runtime_get_sync(pl330->ddma.dev);
+ spin_lock_irqsave(&pch->lock, flags);
+
+ spin_lock(&pl330->lock);
+ _stop(pch->thread);
+ spin_unlock(&pl330->lock);
+
+ list_for_each_entry(desc, &pch->work_list, node) {
+ if (desc->status == BUSY)
+ desc->status = PAUSED;
+ }
+ spin_unlock_irqrestore(&pch->lock, flags);
+ pm_runtime_mark_last_busy(pl330->ddma.dev);
+ pm_runtime_put_autosuspend(pl330->ddma.dev);
+
+ return 0;
+}
+
+static void pl330_free_chan_resources(struct dma_chan *chan)
+{
+ struct dma_pl330_chan *pch = to_pchan(chan);
+ struct pl330_dmac *pl330 = pch->dmac;
+ unsigned long flags;
+
+ tasklet_kill(&pch->task);
+
+ pm_runtime_get_sync(pch->dmac->ddma.dev);
+ spin_lock_irqsave(&pl330->lock, flags);
+
+ pl330_release_channel(pch->thread);
+ pch->thread = NULL;
+
+ if (pch->cyclic)
+ list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool);
+
+ spin_unlock_irqrestore(&pl330->lock, flags);
+ pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
+ pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
+ pl330_unprep_slave_fifo(pch);
+}
+
+static int pl330_get_current_xferred_count(struct dma_pl330_chan *pch,
+ struct dma_pl330_desc *desc)
+{
+ struct pl330_thread *thrd = pch->thread;
+ struct pl330_dmac *pl330 = pch->dmac;
+ void __iomem *regs = thrd->dmac->base;
+ u32 val, addr;
+
+ pm_runtime_get_sync(pl330->ddma.dev);
+ val = addr = 0;
+ if (desc->rqcfg.src_inc) {
+ val = readl(regs + SA(thrd->id));
+ addr = desc->px.src_addr;
+ } else {
+ val = readl(regs + DA(thrd->id));
+ addr = desc->px.dst_addr;
+ }
+ pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
+ pm_runtime_put_autosuspend(pl330->ddma.dev);
+
+ /* If DMAMOV hasn't finished yet, SAR/DAR can be zero */
+ if (!val)
+ return 0;
+
+ return val - addr;
+}
+
+static enum dma_status
+pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ enum dma_status ret;
+ unsigned long flags;
+ struct dma_pl330_desc *desc, *running = NULL, *last_enq = NULL;
+ struct dma_pl330_chan *pch = to_pchan(chan);
+ unsigned int transferred, residual = 0;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+
+ if (!txstate)
+ return ret;
+
+ if (ret == DMA_COMPLETE)
+ goto out;
+
+ spin_lock_irqsave(&pch->lock, flags);
+ spin_lock(&pch->thread->dmac->lock);
+
+ if (pch->thread->req_running != -1)
+ running = pch->thread->req[pch->thread->req_running].desc;
+
+ last_enq = pch->thread->req[pch->thread->lstenq].desc;
+
+ /* Check in pending list */
+ list_for_each_entry(desc, &pch->work_list, node) {
+ if (desc->status == DONE)
+ transferred = desc->bytes_requested;
+ else if (running && desc == running)
+ transferred =
+ pl330_get_current_xferred_count(pch, desc);
+ else if (desc->status == BUSY || desc->status == PAUSED)
+ /*
+ * Busy but not running means either just enqueued,
+ * or finished and not yet marked done
+ */
+ if (desc == last_enq)
+ transferred = 0;
+ else
+ transferred = desc->bytes_requested;
+ else
+ transferred = 0;
+ residual += desc->bytes_requested - transferred;
+ if (desc->txd.cookie == cookie) {
+ switch (desc->status) {
+ case DONE:
+ ret = DMA_COMPLETE;
+ break;
+ case PAUSED:
+ ret = DMA_PAUSED;
+ break;
+ case PREP:
+ case BUSY:
+ ret = DMA_IN_PROGRESS;
+ break;
+ default:
+ WARN_ON(1);
+ }
+ break;
+ }
+ if (desc->last)
+ residual = 0;
+ }
+ spin_unlock(&pch->thread->dmac->lock);
+ spin_unlock_irqrestore(&pch->lock, flags);
+
+out:
+ dma_set_residue(txstate, residual);
+
+ return ret;
+}
+
+static void pl330_issue_pending(struct dma_chan *chan)
+{
+ struct dma_pl330_chan *pch = to_pchan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&pch->lock, flags);
+ if (list_empty(&pch->work_list)) {
+ /*
+ * Warn on nothing pending. Empty submitted_list may
+ * break our pm_runtime usage counter as it is
+ * updated on work_list emptiness status.
+ */
+ WARN_ON(list_empty(&pch->submitted_list));
+ pch->active = true;
+ pm_runtime_get_sync(pch->dmac->ddma.dev);
+ }
+ list_splice_tail_init(&pch->submitted_list, &pch->work_list);
+ spin_unlock_irqrestore(&pch->lock, flags);
+
+ pl330_tasklet(&pch->task);
+}
+
+/*
+ * We returned the last one of the circular list of descriptor(s)
+ * from prep_xxx, so the argument to submit corresponds to the last
+ * descriptor of the list.
+ */
+static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct dma_pl330_desc *desc, *last = to_desc(tx);
+ struct dma_pl330_chan *pch = to_pchan(tx->chan);
+ dma_cookie_t cookie;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pch->lock, flags);
+
+ /* Assign cookies to all nodes */
+ while (!list_empty(&last->node)) {
+ desc = list_entry(last->node.next, struct dma_pl330_desc, node);
+ if (pch->cyclic) {
+ desc->txd.callback = last->txd.callback;
+ desc->txd.callback_param = last->txd.callback_param;
+ }
+ desc->last = false;
+
+ dma_cookie_assign(&desc->txd);
+
+ list_move_tail(&desc->node, &pch->submitted_list);
+ }
+
+ last->last = true;
+ cookie = dma_cookie_assign(&last->txd);
+ list_add_tail(&last->node, &pch->submitted_list);
+ spin_unlock_irqrestore(&pch->lock, flags);
+
+ return cookie;
+}
+
+static inline void _init_desc(struct dma_pl330_desc *desc)
+{
+ desc->rqcfg.swap = SWAP_NO;
+ desc->rqcfg.scctl = CCTRL0;
+ desc->rqcfg.dcctl = CCTRL0;
+ desc->txd.tx_submit = pl330_tx_submit;
+
+ INIT_LIST_HEAD(&desc->node);
+}
+
+/* Returns the number of descriptors added to the DMAC pool */
+static int add_desc(struct list_head *pool, spinlock_t *lock,
+ gfp_t flg, int count)
+{
+ struct dma_pl330_desc *desc;
+ unsigned long flags;
+ int i;
+
+ desc = kcalloc(count, sizeof(*desc), flg);
+ if (!desc)
+ return 0;
+
+ spin_lock_irqsave(lock, flags);
+
+ for (i = 0; i < count; i++) {
+ _init_desc(&desc[i]);
+ list_add_tail(&desc[i].node, pool);
+ }
+
+ spin_unlock_irqrestore(lock, flags);
+
+ return count;
+}
+
+static struct dma_pl330_desc *pluck_desc(struct list_head *pool,
+ spinlock_t *lock)
+{
+ struct dma_pl330_desc *desc = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(lock, flags);
+
+ if (!list_empty(pool)) {
+ desc = list_entry(pool->next,
+ struct dma_pl330_desc, node);
+
+ list_del_init(&desc->node);
+
+ desc->status = PREP;
+ desc->txd.callback = NULL;
+ }
+
+ spin_unlock_irqrestore(lock, flags);
+
+ return desc;
+}
+
+static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch)
+{
+ struct pl330_dmac *pl330 = pch->dmac;
+ u8 *peri_id = pch->chan.private;
+ struct dma_pl330_desc *desc;
+
+ /* Pluck one desc from the pool of DMAC */
+ desc = pluck_desc(&pl330->desc_pool, &pl330->pool_lock);
+
+ /* If the DMAC pool is empty, alloc new */
+ if (!desc) {
+ static DEFINE_SPINLOCK(lock);
+ LIST_HEAD(pool);
+
+ if (!add_desc(&pool, &lock, GFP_ATOMIC, 1))
+ return NULL;
+
+ desc = pluck_desc(&pool, &lock);
+ WARN_ON(!desc || !list_empty(&pool));
+ }
+
+ /* Initialize the descriptor */
+ desc->pchan = pch;
+ desc->txd.cookie = 0;
+ async_tx_ack(&desc->txd);
+
+ desc->peri = peri_id ? pch->chan.chan_id : 0;
+ desc->rqcfg.pcfg = &pch->dmac->pcfg;
+
+ dma_async_tx_descriptor_init(&desc->txd, &pch->chan);
+
+ return desc;
+}
+
+static inline void fill_px(struct pl330_xfer *px,
+ dma_addr_t dst, dma_addr_t src, size_t len)
+{
+ px->bytes = len;
+ px->dst_addr = dst;
+ px->src_addr = src;
+}
+
+static struct dma_pl330_desc *
+__pl330_prep_dma_memcpy(struct dma_pl330_chan *pch, dma_addr_t dst,
+ dma_addr_t src, size_t len)
+{
+ struct dma_pl330_desc *desc = pl330_get_desc(pch);
+
+ if (!desc) {
+ dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n",
+ __func__, __LINE__);
+ return NULL;
+ }
+
+ /*
+ * Ideally we should lookout for reqs bigger than
+ * those that can be programmed with 256 bytes of
+ * MC buffer, but considering a req size is seldom
+ * going to be word-unaligned and more than 200MB,
+ * we take it easy.
+ * Also, should the limit is reached we'd rather
+ * have the platform increase MC buffer size than
+ * complicating this API driver.
+ */
+ fill_px(&desc->px, dst, src, len);
+
+ return desc;
+}
+
+/* Call after fixing burst size */
+static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len)
+{
+ struct dma_pl330_chan *pch = desc->pchan;
+ struct pl330_dmac *pl330 = pch->dmac;
+ int burst_len;
+
+ burst_len = pl330->pcfg.data_bus_width / 8;
+ burst_len *= pl330->pcfg.data_buf_dep / pl330->pcfg.num_chan;
+ burst_len >>= desc->rqcfg.brst_size;
+
+ /* src/dst_burst_len can't be more than 16 */
+ if (burst_len > PL330_MAX_BURST)
+ burst_len = PL330_MAX_BURST;
+
+ return burst_len;
+}
+
+static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t dma_addr, size_t len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct dma_pl330_desc *desc = NULL, *first = NULL;
+ struct dma_pl330_chan *pch = to_pchan(chan);
+ struct pl330_dmac *pl330 = pch->dmac;
+ unsigned int i;
+ dma_addr_t dst;
+ dma_addr_t src;
+
+ if (len % period_len != 0)
+ return NULL;
+
+ if (!is_slave_direction(direction)) {
+ dev_err(pch->dmac->ddma.dev, "%s:%d Invalid dma direction\n",
+ __func__, __LINE__);
+ return NULL;
+ }
+
+ pl330_config_write(chan, &pch->slave_config, direction);
+
+ if (!pl330_prep_slave_fifo(pch, direction))
+ return NULL;
+
+ for (i = 0; i < len / period_len; i++) {
+ desc = pl330_get_desc(pch);
+ if (!desc) {
+ unsigned long iflags;
+
+ dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n",
+ __func__, __LINE__);
+
+ if (!first)
+ return NULL;
+
+ spin_lock_irqsave(&pl330->pool_lock, iflags);
+
+ while (!list_empty(&first->node)) {
+ desc = list_entry(first->node.next,
+ struct dma_pl330_desc, node);
+ list_move_tail(&desc->node, &pl330->desc_pool);
+ }
+
+ list_move_tail(&first->node, &pl330->desc_pool);
+
+ spin_unlock_irqrestore(&pl330->pool_lock, iflags);
+
+ return NULL;
+ }
+
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+ desc->rqcfg.src_inc = 1;
+ desc->rqcfg.dst_inc = 0;
+ src = dma_addr;
+ dst = pch->fifo_dma;
+ break;
+ case DMA_DEV_TO_MEM:
+ desc->rqcfg.src_inc = 0;
+ desc->rqcfg.dst_inc = 1;
+ src = pch->fifo_dma;
+ dst = dma_addr;
+ break;
+ default:
+ break;
+ }
+
+ desc->rqtype = direction;
+ desc->rqcfg.brst_size = pch->burst_sz;
+ desc->rqcfg.brst_len = pch->burst_len;
+ desc->bytes_requested = period_len;
+ fill_px(&desc->px, dst, src, period_len);
+
+ if (!first)
+ first = desc;
+ else
+ list_add_tail(&desc->node, &first->node);
+
+ dma_addr += period_len;
+ }
+
+ if (!desc)
+ return NULL;
+
+ pch->cyclic = true;
+
+ return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
+ dma_addr_t src, size_t len, unsigned long flags)
+{
+ struct dma_pl330_desc *desc;
+ struct dma_pl330_chan *pch = to_pchan(chan);
+ struct pl330_dmac *pl330;
+ int burst;
+
+ if (unlikely(!pch || !len))
+ return NULL;
+
+ pl330 = pch->dmac;
+
+ desc = __pl330_prep_dma_memcpy(pch, dst, src, len);
+ if (!desc)
+ return NULL;
+
+ desc->rqcfg.src_inc = 1;
+ desc->rqcfg.dst_inc = 1;
+ desc->rqtype = DMA_MEM_TO_MEM;
+
+ /* Select max possible burst size */
+ burst = pl330->pcfg.data_bus_width / 8;
+
+ /*
+ * Make sure we use a burst size that aligns with all the memcpy
+ * parameters because our DMA programming algorithm doesn't cope with
+ * transfers which straddle an entry in the DMA device's MFIFO.
+ */
+ while ((src | dst | len) & (burst - 1))
+ burst /= 2;
+
+ desc->rqcfg.brst_size = 0;
+ while (burst != (1 << desc->rqcfg.brst_size))
+ desc->rqcfg.brst_size++;
+
+ desc->rqcfg.brst_len = get_burst_len(desc, len);
+ /*
+ * If burst size is smaller than bus width then make sure we only
+ * transfer one at a time to avoid a burst stradling an MFIFO entry.
+ */
+ if (burst * 8 < pl330->pcfg.data_bus_width)
+ desc->rqcfg.brst_len = 1;
+
+ desc->bytes_requested = len;
+
+ return &desc->txd;
+}
+
+static void __pl330_giveback_desc(struct pl330_dmac *pl330,
+ struct dma_pl330_desc *first)
+{
+ unsigned long flags;
+ struct dma_pl330_desc *desc;
+
+ if (!first)
+ return;
+
+ spin_lock_irqsave(&pl330->pool_lock, flags);
+
+ while (!list_empty(&first->node)) {
+ desc = list_entry(first->node.next,
+ struct dma_pl330_desc, node);
+ list_move_tail(&desc->node, &pl330->desc_pool);
+ }
+
+ list_move_tail(&first->node, &pl330->desc_pool);
+
+ spin_unlock_irqrestore(&pl330->pool_lock, flags);
+}
+
+static struct dma_async_tx_descriptor *
+pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flg, void *context)
+{
+ struct dma_pl330_desc *first, *desc = NULL;
+ struct dma_pl330_chan *pch = to_pchan(chan);
+ struct scatterlist *sg;
+ int i;
+
+ if (unlikely(!pch || !sgl || !sg_len))
+ return NULL;
+
+ pl330_config_write(chan, &pch->slave_config, direction);
+
+ if (!pl330_prep_slave_fifo(pch, direction))
+ return NULL;
+
+ first = NULL;
+
+ for_each_sg(sgl, sg, sg_len, i) {
+
+ desc = pl330_get_desc(pch);
+ if (!desc) {
+ struct pl330_dmac *pl330 = pch->dmac;
+
+ dev_err(pch->dmac->ddma.dev,
+ "%s:%d Unable to fetch desc\n",
+ __func__, __LINE__);
+ __pl330_giveback_desc(pl330, first);
+
+ return NULL;
+ }
+
+ if (!first)
+ first = desc;
+ else
+ list_add_tail(&desc->node, &first->node);
+
+ if (direction == DMA_MEM_TO_DEV) {
+ desc->rqcfg.src_inc = 1;
+ desc->rqcfg.dst_inc = 0;
+ fill_px(&desc->px, pch->fifo_dma, sg_dma_address(sg),
+ sg_dma_len(sg));
+ } else {
+ desc->rqcfg.src_inc = 0;
+ desc->rqcfg.dst_inc = 1;
+ fill_px(&desc->px, sg_dma_address(sg), pch->fifo_dma,
+ sg_dma_len(sg));
+ }
+
+ desc->rqcfg.brst_size = pch->burst_sz;
+ desc->rqcfg.brst_len = pch->burst_len;
+ desc->rqtype = direction;
+ desc->bytes_requested = sg_dma_len(sg);
+ }
+
+ /* Return the last desc in the chain */
+ return &desc->txd;
+}
+
+static irqreturn_t pl330_irq_handler(int irq, void *data)
+{
+ if (pl330_update(data))
+ return IRQ_HANDLED;
+ else
+ return IRQ_NONE;
+}
+
+#define PL330_DMA_BUSWIDTHS \
+ BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+ BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)
+
+#ifdef CONFIG_DEBUG_FS
+static int pl330_debugfs_show(struct seq_file *s, void *data)
+{
+ struct pl330_dmac *pl330 = s->private;
+ int chans, pchs, ch, pr;
+
+ chans = pl330->pcfg.num_chan;
+ pchs = pl330->num_peripherals;
+
+ seq_puts(s, "PL330 physical channels:\n");
+ seq_puts(s, "THREAD:\t\tCHANNEL:\n");
+ seq_puts(s, "--------\t-----\n");
+ for (ch = 0; ch < chans; ch++) {
+ struct pl330_thread *thrd = &pl330->channels[ch];
+ int found = -1;
+
+ for (pr = 0; pr < pchs; pr++) {
+ struct dma_pl330_chan *pch = &pl330->peripherals[pr];
+
+ if (!pch->thread || thrd->id != pch->thread->id)
+ continue;
+
+ found = pr;
+ }
+
+ seq_printf(s, "%d\t\t", thrd->id);
+ if (found == -1)
+ seq_puts(s, "--\n");
+ else
+ seq_printf(s, "%d\n", found);
+ }
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(pl330_debugfs);
+
+static inline void init_pl330_debugfs(struct pl330_dmac *pl330)
+{
+ debugfs_create_file(dev_name(pl330->ddma.dev),
+ S_IFREG | 0444, NULL, pl330,
+ &pl330_debugfs_fops);
+}
+#else
+static inline void init_pl330_debugfs(struct pl330_dmac *pl330)
+{
+}
+#endif
+
+/*
+ * Runtime PM callbacks are provided by amba/bus.c driver.
+ *
+ * It is assumed here that IRQ safe runtime PM is chosen in probe and amba
+ * bus driver will only disable/enable the clock in runtime PM callbacks.
+ */
+static int __maybe_unused pl330_suspend(struct device *dev)
+{
+ struct amba_device *pcdev = to_amba_device(dev);
+
+ pm_runtime_force_suspend(dev);
+ clk_unprepare(pcdev->pclk);
+
+ return 0;
+}
+
+static int __maybe_unused pl330_resume(struct device *dev)
+{
+ struct amba_device *pcdev = to_amba_device(dev);
+ int ret;
+
+ ret = clk_prepare(pcdev->pclk);
+ if (ret)
+ return ret;
+
+ pm_runtime_force_resume(dev);
+
+ return ret;
+}
+
+static const struct dev_pm_ops pl330_pm = {
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(pl330_suspend, pl330_resume)
+};
+
+static int
+pl330_probe(struct amba_device *adev, const struct amba_id *id)
+{
+ struct pl330_config *pcfg;
+ struct pl330_dmac *pl330;
+ struct dma_pl330_chan *pch, *_p;
+ struct dma_device *pd;
+ struct resource *res;
+ int i, ret, irq;
+ int num_chan;
+ struct device_node *np = adev->dev.of_node;
+
+ ret = dma_set_mask_and_coherent(&adev->dev, DMA_BIT_MASK(32));
+ if (ret)
+ return ret;
+
+ /* Allocate a new DMAC and its Channels */
+ pl330 = devm_kzalloc(&adev->dev, sizeof(*pl330), GFP_KERNEL);
+ if (!pl330)
+ return -ENOMEM;
+
+ pd = &pl330->ddma;
+ pd->dev = &adev->dev;
+
+ pl330->mcbufsz = 0;
+
+ /* get quirk */
+ for (i = 0; i < ARRAY_SIZE(of_quirks); i++)
+ if (of_property_read_bool(np, of_quirks[i].quirk))
+ pl330->quirks |= of_quirks[i].id;
+
+ res = &adev->res;
+ pl330->base = devm_ioremap_resource(&adev->dev, res);
+ if (IS_ERR(pl330->base))
+ return PTR_ERR(pl330->base);
+
+ amba_set_drvdata(adev, pl330);
+
+ pl330->rstc = devm_reset_control_get_optional(&adev->dev, "dma");
+ if (IS_ERR(pl330->rstc)) {
+ return dev_err_probe(&adev->dev, PTR_ERR(pl330->rstc), "Failed to get reset!\n");
+ } else {
+ ret = reset_control_deassert(pl330->rstc);
+ if (ret) {
+ dev_err(&adev->dev, "Couldn't deassert the device from reset!\n");
+ return ret;
+ }
+ }
+
+ pl330->rstc_ocp = devm_reset_control_get_optional(&adev->dev, "dma-ocp");
+ if (IS_ERR(pl330->rstc_ocp)) {
+ return dev_err_probe(&adev->dev, PTR_ERR(pl330->rstc_ocp),
+ "Failed to get OCP reset!\n");
+ } else {
+ ret = reset_control_deassert(pl330->rstc_ocp);
+ if (ret) {
+ dev_err(&adev->dev, "Couldn't deassert the device from OCP reset!\n");
+ return ret;
+ }
+ }
+
+ for (i = 0; i < AMBA_NR_IRQS; i++) {
+ irq = adev->irq[i];
+ if (irq) {
+ ret = devm_request_irq(&adev->dev, irq,
+ pl330_irq_handler, 0,
+ dev_name(&adev->dev), pl330);
+ if (ret)
+ return ret;
+ } else {
+ break;
+ }
+ }
+
+ pcfg = &pl330->pcfg;
+
+ pcfg->periph_id = adev->periphid;
+ ret = pl330_add(pl330);
+ if (ret)
+ return ret;
+
+ INIT_LIST_HEAD(&pl330->desc_pool);
+ spin_lock_init(&pl330->pool_lock);
+
+ /* Create a descriptor pool of default size */
+ if (!add_desc(&pl330->desc_pool, &pl330->pool_lock,
+ GFP_KERNEL, NR_DEFAULT_DESC))
+ dev_warn(&adev->dev, "unable to allocate desc\n");
+
+ INIT_LIST_HEAD(&pd->channels);
+
+ /* Initialize channel parameters */
+ num_chan = max_t(int, pcfg->num_peri, pcfg->num_chan);
+
+ pl330->num_peripherals = num_chan;
+
+ pl330->peripherals = kcalloc(num_chan, sizeof(*pch), GFP_KERNEL);
+ if (!pl330->peripherals) {
+ ret = -ENOMEM;
+ goto probe_err2;
+ }
+
+ for (i = 0; i < num_chan; i++) {
+ pch = &pl330->peripherals[i];
+
+ pch->chan.private = adev->dev.of_node;
+ INIT_LIST_HEAD(&pch->submitted_list);
+ INIT_LIST_HEAD(&pch->work_list);
+ INIT_LIST_HEAD(&pch->completed_list);
+ spin_lock_init(&pch->lock);
+ pch->thread = NULL;
+ pch->chan.device = pd;
+ pch->dmac = pl330;
+ pch->dir = DMA_NONE;
+
+ /* Add the channel to the DMAC list */
+ list_add_tail(&pch->chan.device_node, &pd->channels);
+ }
+
+ dma_cap_set(DMA_MEMCPY, pd->cap_mask);
+ if (pcfg->num_peri) {
+ dma_cap_set(DMA_SLAVE, pd->cap_mask);
+ dma_cap_set(DMA_CYCLIC, pd->cap_mask);
+ dma_cap_set(DMA_PRIVATE, pd->cap_mask);
+ }
+
+ pd->device_alloc_chan_resources = pl330_alloc_chan_resources;
+ pd->device_free_chan_resources = pl330_free_chan_resources;
+ pd->device_prep_dma_memcpy = pl330_prep_dma_memcpy;
+ pd->device_prep_dma_cyclic = pl330_prep_dma_cyclic;
+ pd->device_tx_status = pl330_tx_status;
+ pd->device_prep_slave_sg = pl330_prep_slave_sg;
+ pd->device_config = pl330_config;
+ pd->device_pause = pl330_pause;
+ pd->device_terminate_all = pl330_terminate_all;
+ pd->device_issue_pending = pl330_issue_pending;
+ pd->src_addr_widths = PL330_DMA_BUSWIDTHS;
+ pd->dst_addr_widths = PL330_DMA_BUSWIDTHS;
+ pd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ pd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ pd->max_burst = PL330_MAX_BURST;
+
+ ret = dma_async_device_register(pd);
+ if (ret) {
+ dev_err(&adev->dev, "unable to register DMAC\n");
+ goto probe_err3;
+ }
+
+ if (adev->dev.of_node) {
+ ret = of_dma_controller_register(adev->dev.of_node,
+ of_dma_pl330_xlate, pl330);
+ if (ret) {
+ dev_err(&adev->dev,
+ "unable to register DMA to the generic DT DMA helpers\n");
+ }
+ }
+
+ /*
+ * This is the limit for transfers with a buswidth of 1, larger
+ * buswidths will have larger limits.
+ */
+ ret = dma_set_max_seg_size(&adev->dev, 1900800);
+ if (ret)
+ dev_err(&adev->dev, "unable to set the seg size\n");
+
+
+ init_pl330_debugfs(pl330);
+ dev_info(&adev->dev,
+ "Loaded driver for PL330 DMAC-%x\n", adev->periphid);
+ dev_info(&adev->dev,
+ "\tDBUFF-%ux%ubytes Num_Chans-%u Num_Peri-%u Num_Events-%u\n",
+ pcfg->data_buf_dep, pcfg->data_bus_width / 8, pcfg->num_chan,
+ pcfg->num_peri, pcfg->num_events);
+
+ pm_runtime_irq_safe(&adev->dev);
+ pm_runtime_use_autosuspend(&adev->dev);
+ pm_runtime_set_autosuspend_delay(&adev->dev, PL330_AUTOSUSPEND_DELAY);
+ pm_runtime_mark_last_busy(&adev->dev);
+ pm_runtime_put_autosuspend(&adev->dev);
+
+ return 0;
+probe_err3:
+ /* Idle the DMAC */
+ list_for_each_entry_safe(pch, _p, &pl330->ddma.channels,
+ chan.device_node) {
+
+ /* Remove the channel */
+ list_del(&pch->chan.device_node);
+
+ /* Flush the channel */
+ if (pch->thread) {
+ pl330_terminate_all(&pch->chan);
+ pl330_free_chan_resources(&pch->chan);
+ }
+ }
+probe_err2:
+ pl330_del(pl330);
+
+ if (pl330->rstc_ocp)
+ reset_control_assert(pl330->rstc_ocp);
+
+ if (pl330->rstc)
+ reset_control_assert(pl330->rstc);
+ return ret;
+}
+
+static void pl330_remove(struct amba_device *adev)
+{
+ struct pl330_dmac *pl330 = amba_get_drvdata(adev);
+ struct dma_pl330_chan *pch, *_p;
+ int i, irq;
+
+ pm_runtime_get_noresume(pl330->ddma.dev);
+
+ if (adev->dev.of_node)
+ of_dma_controller_free(adev->dev.of_node);
+
+ for (i = 0; i < AMBA_NR_IRQS; i++) {
+ irq = adev->irq[i];
+ if (irq)
+ devm_free_irq(&adev->dev, irq, pl330);
+ }
+
+ dma_async_device_unregister(&pl330->ddma);
+
+ /* Idle the DMAC */
+ list_for_each_entry_safe(pch, _p, &pl330->ddma.channels,
+ chan.device_node) {
+
+ /* Remove the channel */
+ list_del(&pch->chan.device_node);
+
+ /* Flush the channel */
+ if (pch->thread) {
+ pl330_terminate_all(&pch->chan);
+ pl330_free_chan_resources(&pch->chan);
+ }
+ }
+
+ pl330_del(pl330);
+
+ if (pl330->rstc_ocp)
+ reset_control_assert(pl330->rstc_ocp);
+
+ if (pl330->rstc)
+ reset_control_assert(pl330->rstc);
+}
+
+static const struct amba_id pl330_ids[] = {
+ {
+ .id = 0x00041330,
+ .mask = 0x000fffff,
+ },
+ { 0, 0 },
+};
+
+MODULE_DEVICE_TABLE(amba, pl330_ids);
+
+static struct amba_driver pl330_driver = {
+ .drv = {
+ .owner = THIS_MODULE,
+ .name = "dma-pl330",
+ .pm = &pl330_pm,
+ },
+ .id_table = pl330_ids,
+ .probe = pl330_probe,
+ .remove = pl330_remove,
+};
+
+module_amba_driver(pl330_driver);
+
+MODULE_AUTHOR("Jaswinder Singh <jassisinghbrar@gmail.com>");
+MODULE_DESCRIPTION("API Driver for PL330 DMAC");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/plx_dma.c b/drivers/dma/plx_dma.c
new file mode 100644
index 000000000..12725fa16
--- /dev/null
+++ b/drivers/dma/plx_dma.c
@@ -0,0 +1,636 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Microsemi Switchtec(tm) PCIe Management Driver
+ * Copyright (c) 2019, Logan Gunthorpe <logang@deltatee.com>
+ * Copyright (c) 2019, GigaIO Networks, Inc
+ */
+
+#include "dmaengine.h"
+
+#include <linux/circ_buf.h>
+#include <linux/dmaengine.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+MODULE_DESCRIPTION("PLX ExpressLane PEX PCI Switch DMA Engine");
+MODULE_VERSION("0.1");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Logan Gunthorpe");
+
+#define PLX_REG_DESC_RING_ADDR 0x214
+#define PLX_REG_DESC_RING_ADDR_HI 0x218
+#define PLX_REG_DESC_RING_NEXT_ADDR 0x21C
+#define PLX_REG_DESC_RING_COUNT 0x220
+#define PLX_REG_DESC_RING_LAST_ADDR 0x224
+#define PLX_REG_DESC_RING_LAST_SIZE 0x228
+#define PLX_REG_PREF_LIMIT 0x234
+#define PLX_REG_CTRL 0x238
+#define PLX_REG_CTRL2 0x23A
+#define PLX_REG_INTR_CTRL 0x23C
+#define PLX_REG_INTR_STATUS 0x23E
+
+#define PLX_REG_PREF_LIMIT_PREF_FOUR 8
+
+#define PLX_REG_CTRL_GRACEFUL_PAUSE BIT(0)
+#define PLX_REG_CTRL_ABORT BIT(1)
+#define PLX_REG_CTRL_WRITE_BACK_EN BIT(2)
+#define PLX_REG_CTRL_START BIT(3)
+#define PLX_REG_CTRL_RING_STOP_MODE BIT(4)
+#define PLX_REG_CTRL_DESC_MODE_BLOCK (0 << 5)
+#define PLX_REG_CTRL_DESC_MODE_ON_CHIP (1 << 5)
+#define PLX_REG_CTRL_DESC_MODE_OFF_CHIP (2 << 5)
+#define PLX_REG_CTRL_DESC_INVALID BIT(8)
+#define PLX_REG_CTRL_GRACEFUL_PAUSE_DONE BIT(9)
+#define PLX_REG_CTRL_ABORT_DONE BIT(10)
+#define PLX_REG_CTRL_IMM_PAUSE_DONE BIT(12)
+#define PLX_REG_CTRL_IN_PROGRESS BIT(30)
+
+#define PLX_REG_CTRL_RESET_VAL (PLX_REG_CTRL_DESC_INVALID | \
+ PLX_REG_CTRL_GRACEFUL_PAUSE_DONE | \
+ PLX_REG_CTRL_ABORT_DONE | \
+ PLX_REG_CTRL_IMM_PAUSE_DONE)
+
+#define PLX_REG_CTRL_START_VAL (PLX_REG_CTRL_WRITE_BACK_EN | \
+ PLX_REG_CTRL_DESC_MODE_OFF_CHIP | \
+ PLX_REG_CTRL_START | \
+ PLX_REG_CTRL_RESET_VAL)
+
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_64B 0
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_128B 1
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_256B 2
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_512B 3
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_1KB 4
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_2KB 5
+#define PLX_REG_CTRL2_MAX_TXFR_SIZE_4B 7
+
+#define PLX_REG_INTR_CRTL_ERROR_EN BIT(0)
+#define PLX_REG_INTR_CRTL_INV_DESC_EN BIT(1)
+#define PLX_REG_INTR_CRTL_ABORT_DONE_EN BIT(3)
+#define PLX_REG_INTR_CRTL_PAUSE_DONE_EN BIT(4)
+#define PLX_REG_INTR_CRTL_IMM_PAUSE_DONE_EN BIT(5)
+
+#define PLX_REG_INTR_STATUS_ERROR BIT(0)
+#define PLX_REG_INTR_STATUS_INV_DESC BIT(1)
+#define PLX_REG_INTR_STATUS_DESC_DONE BIT(2)
+#define PLX_REG_INTR_CRTL_ABORT_DONE BIT(3)
+
+struct plx_dma_hw_std_desc {
+ __le32 flags_and_size;
+ __le16 dst_addr_hi;
+ __le16 src_addr_hi;
+ __le32 dst_addr_lo;
+ __le32 src_addr_lo;
+};
+
+#define PLX_DESC_SIZE_MASK 0x7ffffff
+#define PLX_DESC_FLAG_VALID BIT(31)
+#define PLX_DESC_FLAG_INT_WHEN_DONE BIT(30)
+
+#define PLX_DESC_WB_SUCCESS BIT(30)
+#define PLX_DESC_WB_RD_FAIL BIT(29)
+#define PLX_DESC_WB_WR_FAIL BIT(28)
+
+#define PLX_DMA_RING_COUNT 2048
+
+struct plx_dma_desc {
+ struct dma_async_tx_descriptor txd;
+ struct plx_dma_hw_std_desc *hw;
+ u32 orig_size;
+};
+
+struct plx_dma_dev {
+ struct dma_device dma_dev;
+ struct dma_chan dma_chan;
+ struct pci_dev __rcu *pdev;
+ void __iomem *bar;
+ struct tasklet_struct desc_task;
+
+ spinlock_t ring_lock;
+ bool ring_active;
+ int head;
+ int tail;
+ struct plx_dma_hw_std_desc *hw_ring;
+ dma_addr_t hw_ring_dma;
+ struct plx_dma_desc **desc_ring;
+};
+
+static struct plx_dma_dev *chan_to_plx_dma_dev(struct dma_chan *c)
+{
+ return container_of(c, struct plx_dma_dev, dma_chan);
+}
+
+static struct plx_dma_desc *to_plx_desc(struct dma_async_tx_descriptor *txd)
+{
+ return container_of(txd, struct plx_dma_desc, txd);
+}
+
+static struct plx_dma_desc *plx_dma_get_desc(struct plx_dma_dev *plxdev, int i)
+{
+ return plxdev->desc_ring[i & (PLX_DMA_RING_COUNT - 1)];
+}
+
+static void plx_dma_process_desc(struct plx_dma_dev *plxdev)
+{
+ struct dmaengine_result res;
+ struct plx_dma_desc *desc;
+ u32 flags;
+
+ spin_lock(&plxdev->ring_lock);
+
+ while (plxdev->tail != plxdev->head) {
+ desc = plx_dma_get_desc(plxdev, plxdev->tail);
+
+ flags = le32_to_cpu(READ_ONCE(desc->hw->flags_and_size));
+
+ if (flags & PLX_DESC_FLAG_VALID)
+ break;
+
+ res.residue = desc->orig_size - (flags & PLX_DESC_SIZE_MASK);
+
+ if (flags & PLX_DESC_WB_SUCCESS)
+ res.result = DMA_TRANS_NOERROR;
+ else if (flags & PLX_DESC_WB_WR_FAIL)
+ res.result = DMA_TRANS_WRITE_FAILED;
+ else
+ res.result = DMA_TRANS_READ_FAILED;
+
+ dma_cookie_complete(&desc->txd);
+ dma_descriptor_unmap(&desc->txd);
+ dmaengine_desc_get_callback_invoke(&desc->txd, &res);
+ desc->txd.callback = NULL;
+ desc->txd.callback_result = NULL;
+
+ plxdev->tail++;
+ }
+
+ spin_unlock(&plxdev->ring_lock);
+}
+
+static void plx_dma_abort_desc(struct plx_dma_dev *plxdev)
+{
+ struct dmaengine_result res;
+ struct plx_dma_desc *desc;
+
+ plx_dma_process_desc(plxdev);
+
+ spin_lock_bh(&plxdev->ring_lock);
+
+ while (plxdev->tail != plxdev->head) {
+ desc = plx_dma_get_desc(plxdev, plxdev->tail);
+
+ res.residue = desc->orig_size;
+ res.result = DMA_TRANS_ABORTED;
+
+ dma_cookie_complete(&desc->txd);
+ dma_descriptor_unmap(&desc->txd);
+ dmaengine_desc_get_callback_invoke(&desc->txd, &res);
+ desc->txd.callback = NULL;
+ desc->txd.callback_result = NULL;
+
+ plxdev->tail++;
+ }
+
+ spin_unlock_bh(&plxdev->ring_lock);
+}
+
+static void __plx_dma_stop(struct plx_dma_dev *plxdev)
+{
+ unsigned long timeout = jiffies + msecs_to_jiffies(1000);
+ u32 val;
+
+ val = readl(plxdev->bar + PLX_REG_CTRL);
+ if (!(val & ~PLX_REG_CTRL_GRACEFUL_PAUSE))
+ return;
+
+ writel(PLX_REG_CTRL_RESET_VAL | PLX_REG_CTRL_GRACEFUL_PAUSE,
+ plxdev->bar + PLX_REG_CTRL);
+
+ while (!time_after(jiffies, timeout)) {
+ val = readl(plxdev->bar + PLX_REG_CTRL);
+ if (val & PLX_REG_CTRL_GRACEFUL_PAUSE_DONE)
+ break;
+
+ cpu_relax();
+ }
+
+ if (!(val & PLX_REG_CTRL_GRACEFUL_PAUSE_DONE))
+ dev_err(plxdev->dma_dev.dev,
+ "Timeout waiting for graceful pause!\n");
+
+ writel(PLX_REG_CTRL_RESET_VAL | PLX_REG_CTRL_GRACEFUL_PAUSE,
+ plxdev->bar + PLX_REG_CTRL);
+
+ writel(0, plxdev->bar + PLX_REG_DESC_RING_COUNT);
+ writel(0, plxdev->bar + PLX_REG_DESC_RING_ADDR);
+ writel(0, plxdev->bar + PLX_REG_DESC_RING_ADDR_HI);
+ writel(0, plxdev->bar + PLX_REG_DESC_RING_NEXT_ADDR);
+}
+
+static void plx_dma_stop(struct plx_dma_dev *plxdev)
+{
+ rcu_read_lock();
+ if (!rcu_dereference(plxdev->pdev)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ __plx_dma_stop(plxdev);
+
+ rcu_read_unlock();
+}
+
+static void plx_dma_desc_task(struct tasklet_struct *t)
+{
+ struct plx_dma_dev *plxdev = from_tasklet(plxdev, t, desc_task);
+
+ plx_dma_process_desc(plxdev);
+}
+
+static struct dma_async_tx_descriptor *plx_dma_prep_memcpy(struct dma_chan *c,
+ dma_addr_t dma_dst, dma_addr_t dma_src, size_t len,
+ unsigned long flags)
+ __acquires(plxdev->ring_lock)
+{
+ struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(c);
+ struct plx_dma_desc *plxdesc;
+
+ spin_lock_bh(&plxdev->ring_lock);
+ if (!plxdev->ring_active)
+ goto err_unlock;
+
+ if (!CIRC_SPACE(plxdev->head, plxdev->tail, PLX_DMA_RING_COUNT))
+ goto err_unlock;
+
+ if (len > PLX_DESC_SIZE_MASK)
+ goto err_unlock;
+
+ plxdesc = plx_dma_get_desc(plxdev, plxdev->head);
+ plxdev->head++;
+
+ plxdesc->hw->dst_addr_lo = cpu_to_le32(lower_32_bits(dma_dst));
+ plxdesc->hw->dst_addr_hi = cpu_to_le16(upper_32_bits(dma_dst));
+ plxdesc->hw->src_addr_lo = cpu_to_le32(lower_32_bits(dma_src));
+ plxdesc->hw->src_addr_hi = cpu_to_le16(upper_32_bits(dma_src));
+
+ plxdesc->orig_size = len;
+
+ if (flags & DMA_PREP_INTERRUPT)
+ len |= PLX_DESC_FLAG_INT_WHEN_DONE;
+
+ plxdesc->hw->flags_and_size = cpu_to_le32(len);
+ plxdesc->txd.flags = flags;
+
+ /* return with the lock held, it will be released in tx_submit */
+
+ return &plxdesc->txd;
+
+err_unlock:
+ /*
+ * Keep sparse happy by restoring an even lock count on
+ * this lock.
+ */
+ __acquire(plxdev->ring_lock);
+
+ spin_unlock_bh(&plxdev->ring_lock);
+ return NULL;
+}
+
+static dma_cookie_t plx_dma_tx_submit(struct dma_async_tx_descriptor *desc)
+ __releases(plxdev->ring_lock)
+{
+ struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(desc->chan);
+ struct plx_dma_desc *plxdesc = to_plx_desc(desc);
+ dma_cookie_t cookie;
+
+ cookie = dma_cookie_assign(desc);
+
+ /*
+ * Ensure the descriptor updates are visible to the dma device
+ * before setting the valid bit.
+ */
+ wmb();
+
+ plxdesc->hw->flags_and_size |= cpu_to_le32(PLX_DESC_FLAG_VALID);
+
+ spin_unlock_bh(&plxdev->ring_lock);
+
+ return cookie;
+}
+
+static enum dma_status plx_dma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+ struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(chan);
+ enum dma_status ret;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ plx_dma_process_desc(plxdev);
+
+ return dma_cookie_status(chan, cookie, txstate);
+}
+
+static void plx_dma_issue_pending(struct dma_chan *chan)
+{
+ struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(chan);
+
+ rcu_read_lock();
+ if (!rcu_dereference(plxdev->pdev)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ /*
+ * Ensure the valid bits are visible before starting the
+ * DMA engine.
+ */
+ wmb();
+
+ writew(PLX_REG_CTRL_START_VAL, plxdev->bar + PLX_REG_CTRL);
+
+ rcu_read_unlock();
+}
+
+static irqreturn_t plx_dma_isr(int irq, void *devid)
+{
+ struct plx_dma_dev *plxdev = devid;
+ u32 status;
+
+ status = readw(plxdev->bar + PLX_REG_INTR_STATUS);
+
+ if (!status)
+ return IRQ_NONE;
+
+ if (status & PLX_REG_INTR_STATUS_DESC_DONE && plxdev->ring_active)
+ tasklet_schedule(&plxdev->desc_task);
+
+ writew(status, plxdev->bar + PLX_REG_INTR_STATUS);
+
+ return IRQ_HANDLED;
+}
+
+static int plx_dma_alloc_desc(struct plx_dma_dev *plxdev)
+{
+ struct plx_dma_desc *desc;
+ int i;
+
+ plxdev->desc_ring = kcalloc(PLX_DMA_RING_COUNT,
+ sizeof(*plxdev->desc_ring), GFP_KERNEL);
+ if (!plxdev->desc_ring)
+ return -ENOMEM;
+
+ for (i = 0; i < PLX_DMA_RING_COUNT; i++) {
+ desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc)
+ goto free_and_exit;
+
+ dma_async_tx_descriptor_init(&desc->txd, &plxdev->dma_chan);
+ desc->txd.tx_submit = plx_dma_tx_submit;
+ desc->hw = &plxdev->hw_ring[i];
+
+ plxdev->desc_ring[i] = desc;
+ }
+
+ return 0;
+
+free_and_exit:
+ for (i = 0; i < PLX_DMA_RING_COUNT; i++)
+ kfree(plxdev->desc_ring[i]);
+ kfree(plxdev->desc_ring);
+ return -ENOMEM;
+}
+
+static int plx_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(chan);
+ size_t ring_sz = PLX_DMA_RING_COUNT * sizeof(*plxdev->hw_ring);
+ int rc;
+
+ plxdev->head = plxdev->tail = 0;
+ plxdev->hw_ring = dma_alloc_coherent(plxdev->dma_dev.dev, ring_sz,
+ &plxdev->hw_ring_dma, GFP_KERNEL);
+ if (!plxdev->hw_ring)
+ return -ENOMEM;
+
+ rc = plx_dma_alloc_desc(plxdev);
+ if (rc)
+ goto out_free_hw_ring;
+
+ rcu_read_lock();
+ if (!rcu_dereference(plxdev->pdev)) {
+ rcu_read_unlock();
+ rc = -ENODEV;
+ goto out_free_hw_ring;
+ }
+
+ writel(PLX_REG_CTRL_RESET_VAL, plxdev->bar + PLX_REG_CTRL);
+ writel(lower_32_bits(plxdev->hw_ring_dma),
+ plxdev->bar + PLX_REG_DESC_RING_ADDR);
+ writel(upper_32_bits(plxdev->hw_ring_dma),
+ plxdev->bar + PLX_REG_DESC_RING_ADDR_HI);
+ writel(lower_32_bits(plxdev->hw_ring_dma),
+ plxdev->bar + PLX_REG_DESC_RING_NEXT_ADDR);
+ writel(PLX_DMA_RING_COUNT, plxdev->bar + PLX_REG_DESC_RING_COUNT);
+ writel(PLX_REG_PREF_LIMIT_PREF_FOUR, plxdev->bar + PLX_REG_PREF_LIMIT);
+
+ plxdev->ring_active = true;
+
+ rcu_read_unlock();
+
+ return PLX_DMA_RING_COUNT;
+
+out_free_hw_ring:
+ dma_free_coherent(plxdev->dma_dev.dev, ring_sz, plxdev->hw_ring,
+ plxdev->hw_ring_dma);
+ return rc;
+}
+
+static void plx_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct plx_dma_dev *plxdev = chan_to_plx_dma_dev(chan);
+ size_t ring_sz = PLX_DMA_RING_COUNT * sizeof(*plxdev->hw_ring);
+ struct pci_dev *pdev;
+ int irq = -1;
+ int i;
+
+ spin_lock_bh(&plxdev->ring_lock);
+ plxdev->ring_active = false;
+ spin_unlock_bh(&plxdev->ring_lock);
+
+ plx_dma_stop(plxdev);
+
+ rcu_read_lock();
+ pdev = rcu_dereference(plxdev->pdev);
+ if (pdev)
+ irq = pci_irq_vector(pdev, 0);
+ rcu_read_unlock();
+
+ if (irq > 0)
+ synchronize_irq(irq);
+
+ tasklet_kill(&plxdev->desc_task);
+
+ plx_dma_abort_desc(plxdev);
+
+ for (i = 0; i < PLX_DMA_RING_COUNT; i++)
+ kfree(plxdev->desc_ring[i]);
+
+ kfree(plxdev->desc_ring);
+ dma_free_coherent(plxdev->dma_dev.dev, ring_sz, plxdev->hw_ring,
+ plxdev->hw_ring_dma);
+
+}
+
+static void plx_dma_release(struct dma_device *dma_dev)
+{
+ struct plx_dma_dev *plxdev =
+ container_of(dma_dev, struct plx_dma_dev, dma_dev);
+
+ put_device(dma_dev->dev);
+ kfree(plxdev);
+}
+
+static int plx_dma_create(struct pci_dev *pdev)
+{
+ struct plx_dma_dev *plxdev;
+ struct dma_device *dma;
+ struct dma_chan *chan;
+ int rc;
+
+ plxdev = kzalloc(sizeof(*plxdev), GFP_KERNEL);
+ if (!plxdev)
+ return -ENOMEM;
+
+ rc = request_irq(pci_irq_vector(pdev, 0), plx_dma_isr, 0,
+ KBUILD_MODNAME, plxdev);
+ if (rc)
+ goto free_plx;
+
+ spin_lock_init(&plxdev->ring_lock);
+ tasklet_setup(&plxdev->desc_task, plx_dma_desc_task);
+
+ RCU_INIT_POINTER(plxdev->pdev, pdev);
+ plxdev->bar = pcim_iomap_table(pdev)[0];
+
+ dma = &plxdev->dma_dev;
+ dma->chancnt = 1;
+ INIT_LIST_HEAD(&dma->channels);
+ dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+ dma->copy_align = DMAENGINE_ALIGN_1_BYTE;
+ dma->dev = get_device(&pdev->dev);
+
+ dma->device_alloc_chan_resources = plx_dma_alloc_chan_resources;
+ dma->device_free_chan_resources = plx_dma_free_chan_resources;
+ dma->device_prep_dma_memcpy = plx_dma_prep_memcpy;
+ dma->device_issue_pending = plx_dma_issue_pending;
+ dma->device_tx_status = plx_dma_tx_status;
+ dma->device_release = plx_dma_release;
+
+ chan = &plxdev->dma_chan;
+ chan->device = dma;
+ dma_cookie_init(chan);
+ list_add_tail(&chan->device_node, &dma->channels);
+
+ rc = dma_async_device_register(dma);
+ if (rc) {
+ pci_err(pdev, "Failed to register dma device: %d\n", rc);
+ goto put_device;
+ }
+
+ pci_set_drvdata(pdev, plxdev);
+
+ return 0;
+
+put_device:
+ put_device(&pdev->dev);
+ free_irq(pci_irq_vector(pdev, 0), plxdev);
+free_plx:
+ kfree(plxdev);
+
+ return rc;
+}
+
+static int plx_dma_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ int rc;
+
+ rc = pcim_enable_device(pdev);
+ if (rc)
+ return rc;
+
+ rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
+ if (rc)
+ rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ if (rc)
+ return rc;
+
+ rc = pcim_iomap_regions(pdev, 1, KBUILD_MODNAME);
+ if (rc)
+ return rc;
+
+ rc = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+ if (rc <= 0)
+ return rc;
+
+ pci_set_master(pdev);
+
+ rc = plx_dma_create(pdev);
+ if (rc)
+ goto err_free_irq_vectors;
+
+ pci_info(pdev, "PLX DMA Channel Registered\n");
+
+ return 0;
+
+err_free_irq_vectors:
+ pci_free_irq_vectors(pdev);
+ return rc;
+}
+
+static void plx_dma_remove(struct pci_dev *pdev)
+{
+ struct plx_dma_dev *plxdev = pci_get_drvdata(pdev);
+
+ free_irq(pci_irq_vector(pdev, 0), plxdev);
+
+ rcu_assign_pointer(plxdev->pdev, NULL);
+ synchronize_rcu();
+
+ spin_lock_bh(&plxdev->ring_lock);
+ plxdev->ring_active = false;
+ spin_unlock_bh(&plxdev->ring_lock);
+
+ __plx_dma_stop(plxdev);
+ plx_dma_abort_desc(plxdev);
+
+ plxdev->bar = NULL;
+ dma_async_device_unregister(&plxdev->dma_dev);
+
+ pci_free_irq_vectors(pdev);
+}
+
+static const struct pci_device_id plx_dma_pci_tbl[] = {
+ {
+ .vendor = PCI_VENDOR_ID_PLX,
+ .device = 0x87D0,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID,
+ .class = PCI_CLASS_SYSTEM_OTHER << 8,
+ .class_mask = 0xFFFFFFFF,
+ },
+ {0}
+};
+MODULE_DEVICE_TABLE(pci, plx_dma_pci_tbl);
+
+static struct pci_driver plx_dma_pci_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = plx_dma_pci_tbl,
+ .probe = plx_dma_probe,
+ .remove = plx_dma_remove,
+};
+module_pci_driver(plx_dma_pci_driver);
diff --git a/drivers/dma/ppc4xx/Makefile b/drivers/dma/ppc4xx/Makefile
new file mode 100644
index 000000000..69c2cfac9
--- /dev/null
+++ b/drivers/dma/ppc4xx/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += adma.o
diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
new file mode 100644
index 000000000..6b5e91f26
--- /dev/null
+++ b/drivers/dma/ppc4xx/adma.c
@@ -0,0 +1,4629 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2006-2009 DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * Further porting to arch/powerpc by
+ * Anatolij Gustschin <agust@denx.de>
+ */
+
+/*
+ * This driver supports the asynchrounous DMA copy and RAID engines available
+ * on the AMCC PPC440SPe Processors.
+ * Based on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
+ * ADMA driver written by D.Williams.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/async_tx.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include "adma.h"
+#include "../dmaengine.h"
+
+enum ppc_adma_init_code {
+ PPC_ADMA_INIT_OK = 0,
+ PPC_ADMA_INIT_MEMRES,
+ PPC_ADMA_INIT_MEMREG,
+ PPC_ADMA_INIT_ALLOC,
+ PPC_ADMA_INIT_COHERENT,
+ PPC_ADMA_INIT_CHANNEL,
+ PPC_ADMA_INIT_IRQ1,
+ PPC_ADMA_INIT_IRQ2,
+ PPC_ADMA_INIT_REGISTER
+};
+
+static char *ppc_adma_errors[] = {
+ [PPC_ADMA_INIT_OK] = "ok",
+ [PPC_ADMA_INIT_MEMRES] = "failed to get memory resource",
+ [PPC_ADMA_INIT_MEMREG] = "failed to request memory region",
+ [PPC_ADMA_INIT_ALLOC] = "failed to allocate memory for adev "
+ "structure",
+ [PPC_ADMA_INIT_COHERENT] = "failed to allocate coherent memory for "
+ "hardware descriptors",
+ [PPC_ADMA_INIT_CHANNEL] = "failed to allocate memory for channel",
+ [PPC_ADMA_INIT_IRQ1] = "failed to request first irq",
+ [PPC_ADMA_INIT_IRQ2] = "failed to request second irq",
+ [PPC_ADMA_INIT_REGISTER] = "failed to register dma async device",
+};
+
+static enum ppc_adma_init_code
+ppc440spe_adma_devices[PPC440SPE_ADMA_ENGINES_NUM];
+
+struct ppc_dma_chan_ref {
+ struct dma_chan *chan;
+ struct list_head node;
+};
+
+/* The list of channels exported by ppc440spe ADMA */
+static struct list_head
+ppc440spe_adma_chan_list = LIST_HEAD_INIT(ppc440spe_adma_chan_list);
+
+/* This flag is set when want to refetch the xor chain in the interrupt
+ * handler
+ */
+static u32 do_xor_refetch;
+
+/* Pointer to DMA0, DMA1 CP/CS FIFO */
+static void *ppc440spe_dma_fifo_buf;
+
+/* Pointers to last submitted to DMA0, DMA1 CDBs */
+static struct ppc440spe_adma_desc_slot *chan_last_sub[3];
+static struct ppc440spe_adma_desc_slot *chan_first_cdb[3];
+
+/* Pointer to last linked and submitted xor CB */
+static struct ppc440spe_adma_desc_slot *xor_last_linked;
+static struct ppc440spe_adma_desc_slot *xor_last_submit;
+
+/* This array is used in data-check operations for storing a pattern */
+static char ppc440spe_qword[16];
+
+static atomic_t ppc440spe_adma_err_irq_ref;
+static dcr_host_t ppc440spe_mq_dcr_host;
+static unsigned int ppc440spe_mq_dcr_len;
+
+/* Since RXOR operations use the common register (MQ0_CF2H) for setting-up
+ * the block size in transactions, then we do not allow to activate more than
+ * only one RXOR transactions simultaneously. So use this var to store
+ * the information about is RXOR currently active (PPC440SPE_RXOR_RUN bit is
+ * set) or not (PPC440SPE_RXOR_RUN is clear).
+ */
+static unsigned long ppc440spe_rxor_state;
+
+/* These are used in enable & check routines
+ */
+static u32 ppc440spe_r6_enabled;
+static struct ppc440spe_adma_chan *ppc440spe_r6_tchan;
+static struct completion ppc440spe_r6_test_comp;
+
+static int ppc440spe_adma_dma2rxor_prep_src(
+ struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_rxor *cursor, int index,
+ int src_cnt, u32 addr);
+static void ppc440spe_adma_dma2rxor_set_src(
+ struct ppc440spe_adma_desc_slot *desc,
+ int index, dma_addr_t addr);
+static void ppc440spe_adma_dma2rxor_set_mult(
+ struct ppc440spe_adma_desc_slot *desc,
+ int index, u8 mult);
+
+#ifdef ADMA_LL_DEBUG
+#define ADMA_LL_DBG(x) ({ if (1) x; 0; })
+#else
+#define ADMA_LL_DBG(x) ({ if (0) x; 0; })
+#endif
+
+static void print_cb(struct ppc440spe_adma_chan *chan, void *block)
+{
+ struct dma_cdb *cdb;
+ struct xor_cb *cb;
+ int i;
+
+ switch (chan->device->id) {
+ case 0:
+ case 1:
+ cdb = block;
+
+ pr_debug("CDB at %p [%d]:\n"
+ "\t attr 0x%02x opc 0x%02x cnt 0x%08x\n"
+ "\t sg1u 0x%08x sg1l 0x%08x\n"
+ "\t sg2u 0x%08x sg2l 0x%08x\n"
+ "\t sg3u 0x%08x sg3l 0x%08x\n",
+ cdb, chan->device->id,
+ cdb->attr, cdb->opc, le32_to_cpu(cdb->cnt),
+ le32_to_cpu(cdb->sg1u), le32_to_cpu(cdb->sg1l),
+ le32_to_cpu(cdb->sg2u), le32_to_cpu(cdb->sg2l),
+ le32_to_cpu(cdb->sg3u), le32_to_cpu(cdb->sg3l)
+ );
+ break;
+ case 2:
+ cb = block;
+
+ pr_debug("CB at %p [%d]:\n"
+ "\t cbc 0x%08x cbbc 0x%08x cbs 0x%08x\n"
+ "\t cbtah 0x%08x cbtal 0x%08x\n"
+ "\t cblah 0x%08x cblal 0x%08x\n",
+ cb, chan->device->id,
+ cb->cbc, cb->cbbc, cb->cbs,
+ cb->cbtah, cb->cbtal,
+ cb->cblah, cb->cblal);
+ for (i = 0; i < 16; i++) {
+ if (i && !cb->ops[i].h && !cb->ops[i].l)
+ continue;
+ pr_debug("\t ops[%2d]: h 0x%08x l 0x%08x\n",
+ i, cb->ops[i].h, cb->ops[i].l);
+ }
+ break;
+ }
+}
+
+static void print_cb_list(struct ppc440spe_adma_chan *chan,
+ struct ppc440spe_adma_desc_slot *iter)
+{
+ for (; iter; iter = iter->hw_next)
+ print_cb(chan, iter->hw_desc);
+}
+
+static void prep_dma_xor_dbg(int id, dma_addr_t dst, dma_addr_t *src,
+ unsigned int src_cnt)
+{
+ int i;
+
+ pr_debug("\n%s(%d):\nsrc: ", __func__, id);
+ for (i = 0; i < src_cnt; i++)
+ pr_debug("\t0x%016llx ", src[i]);
+ pr_debug("dst:\n\t0x%016llx\n", dst);
+}
+
+static void prep_dma_pq_dbg(int id, dma_addr_t *dst, dma_addr_t *src,
+ unsigned int src_cnt)
+{
+ int i;
+
+ pr_debug("\n%s(%d):\nsrc: ", __func__, id);
+ for (i = 0; i < src_cnt; i++)
+ pr_debug("\t0x%016llx ", src[i]);
+ pr_debug("dst: ");
+ for (i = 0; i < 2; i++)
+ pr_debug("\t0x%016llx ", dst[i]);
+}
+
+static void prep_dma_pqzero_sum_dbg(int id, dma_addr_t *src,
+ unsigned int src_cnt,
+ const unsigned char *scf)
+{
+ int i;
+
+ pr_debug("\n%s(%d):\nsrc(coef): ", __func__, id);
+ if (scf) {
+ for (i = 0; i < src_cnt; i++)
+ pr_debug("\t0x%016llx(0x%02x) ", src[i], scf[i]);
+ } else {
+ for (i = 0; i < src_cnt; i++)
+ pr_debug("\t0x%016llx(no) ", src[i]);
+ }
+
+ pr_debug("dst: ");
+ for (i = 0; i < 2; i++)
+ pr_debug("\t0x%016llx ", src[src_cnt + i]);
+}
+
+/******************************************************************************
+ * Command (Descriptor) Blocks low-level routines
+ ******************************************************************************/
+/**
+ * ppc440spe_desc_init_interrupt - initialize the descriptor for INTERRUPT
+ * pseudo operation
+ */
+static void ppc440spe_desc_init_interrupt(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan)
+{
+ struct xor_cb *p;
+
+ switch (chan->device->id) {
+ case PPC440SPE_XOR_ID:
+ p = desc->hw_desc;
+ memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+ /* NOP with Command Block Complete Enable */
+ p->cbc = XOR_CBCR_CBCE_BIT;
+ break;
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
+ /* NOP with interrupt */
+ set_bit(PPC440SPE_DESC_INT, &desc->flags);
+ break;
+ default:
+ printk(KERN_ERR "Unsupported id %d in %s\n", chan->device->id,
+ __func__);
+ break;
+ }
+}
+
+/**
+ * ppc440spe_desc_init_null_xor - initialize the descriptor for NULL XOR
+ * pseudo operation
+ */
+static void ppc440spe_desc_init_null_xor(struct ppc440spe_adma_desc_slot *desc)
+{
+ memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+ desc->hw_next = NULL;
+ desc->src_cnt = 0;
+ desc->dst_cnt = 1;
+}
+
+/**
+ * ppc440spe_desc_init_xor - initialize the descriptor for XOR operation
+ */
+static void ppc440spe_desc_init_xor(struct ppc440spe_adma_desc_slot *desc,
+ int src_cnt, unsigned long flags)
+{
+ struct xor_cb *hw_desc = desc->hw_desc;
+
+ memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+ desc->hw_next = NULL;
+ desc->src_cnt = src_cnt;
+ desc->dst_cnt = 1;
+
+ hw_desc->cbc = XOR_CBCR_TGT_BIT | src_cnt;
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Enable interrupt on completion */
+ hw_desc->cbc |= XOR_CBCR_CBCE_BIT;
+}
+
+/**
+ * ppc440spe_desc_init_dma2pq - initialize the descriptor for PQ
+ * operation in DMA2 controller
+ */
+static void ppc440spe_desc_init_dma2pq(struct ppc440spe_adma_desc_slot *desc,
+ int dst_cnt, int src_cnt, unsigned long flags)
+{
+ struct xor_cb *hw_desc = desc->hw_desc;
+
+ memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+ desc->hw_next = NULL;
+ desc->src_cnt = src_cnt;
+ desc->dst_cnt = dst_cnt;
+ memset(desc->reverse_flags, 0, sizeof(desc->reverse_flags));
+ desc->descs_per_op = 0;
+
+ hw_desc->cbc = XOR_CBCR_TGT_BIT;
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Enable interrupt on completion */
+ hw_desc->cbc |= XOR_CBCR_CBCE_BIT;
+}
+
+#define DMA_CTRL_FLAGS_LAST DMA_PREP_FENCE
+#define DMA_PREP_ZERO_P (DMA_CTRL_FLAGS_LAST << 1)
+#define DMA_PREP_ZERO_Q (DMA_PREP_ZERO_P << 1)
+
+/**
+ * ppc440spe_desc_init_dma01pq - initialize the descriptors for PQ operation
+ * with DMA0/1
+ */
+static void ppc440spe_desc_init_dma01pq(struct ppc440spe_adma_desc_slot *desc,
+ int dst_cnt, int src_cnt, unsigned long flags,
+ unsigned long op)
+{
+ struct dma_cdb *hw_desc;
+ struct ppc440spe_adma_desc_slot *iter;
+ u8 dopc;
+
+ /* Common initialization of a PQ descriptors chain */
+ set_bits(op, &desc->flags);
+ desc->src_cnt = src_cnt;
+ desc->dst_cnt = dst_cnt;
+
+ /* WXOR MULTICAST if both P and Q are being computed
+ * MV_SG1_SG2 if Q only
+ */
+ dopc = (desc->dst_cnt == DMA_DEST_MAX_NUM) ?
+ DMA_CDB_OPC_MULTICAST : DMA_CDB_OPC_MV_SG1_SG2;
+
+ list_for_each_entry(iter, &desc->group_list, chain_node) {
+ hw_desc = iter->hw_desc;
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+
+ if (likely(!list_is_last(&iter->chain_node,
+ &desc->group_list))) {
+ /* set 'next' pointer */
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot, chain_node);
+ clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+ } else {
+ /* this is the last descriptor.
+ * this slot will be pasted from ADMA level
+ * each time it wants to configure parameters
+ * of the transaction (src, dst, ...)
+ */
+ iter->hw_next = NULL;
+ if (flags & DMA_PREP_INTERRUPT)
+ set_bit(PPC440SPE_DESC_INT, &iter->flags);
+ else
+ clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+ }
+ }
+
+ /* Set OPS depending on WXOR/RXOR type of operation */
+ if (!test_bit(PPC440SPE_DESC_RXOR, &desc->flags)) {
+ /* This is a WXOR only chain:
+ * - first descriptors are for zeroing destinations
+ * if PPC440SPE_ZERO_P/Q set;
+ * - descriptors remained are for GF-XOR operations.
+ */
+ iter = list_first_entry(&desc->group_list,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+
+ if (test_bit(PPC440SPE_ZERO_P, &desc->flags)) {
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ }
+
+ if (test_bit(PPC440SPE_ZERO_Q, &desc->flags)) {
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ }
+
+ list_for_each_entry_from(iter, &desc->group_list, chain_node) {
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = dopc;
+ }
+ } else {
+ /* This is either RXOR-only or mixed RXOR/WXOR */
+
+ /* The first 1 or 2 slots in chain are always RXOR,
+ * if need to calculate P & Q, then there are two
+ * RXOR slots; if only P or only Q, then there is one
+ */
+ iter = list_first_entry(&desc->group_list,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+
+ if (desc->dst_cnt == DMA_DEST_MAX_NUM) {
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ }
+
+ /* The remaining descs (if any) are WXORs */
+ if (test_bit(PPC440SPE_DESC_WXOR, &desc->flags)) {
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ list_for_each_entry_from(iter, &desc->group_list,
+ chain_node) {
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = dopc;
+ }
+ }
+ }
+}
+
+/**
+ * ppc440spe_desc_init_dma01pqzero_sum - initialize the descriptor
+ * for PQ_ZERO_SUM operation
+ */
+static void ppc440spe_desc_init_dma01pqzero_sum(
+ struct ppc440spe_adma_desc_slot *desc,
+ int dst_cnt, int src_cnt)
+{
+ struct dma_cdb *hw_desc;
+ struct ppc440spe_adma_desc_slot *iter;
+ int i = 0;
+ u8 dopc = (dst_cnt == 2) ? DMA_CDB_OPC_MULTICAST :
+ DMA_CDB_OPC_MV_SG1_SG2;
+ /*
+ * Initialize starting from 2nd or 3rd descriptor dependent
+ * on dst_cnt. First one or two slots are for cloning P
+ * and/or Q to chan->pdest and/or chan->qdest as we have
+ * to preserve original P/Q.
+ */
+ iter = list_first_entry(&desc->group_list,
+ struct ppc440spe_adma_desc_slot, chain_node);
+ iter = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot, chain_node);
+
+ if (dst_cnt > 1) {
+ iter = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot, chain_node);
+ }
+ /* initialize each source descriptor in chain */
+ list_for_each_entry_from(iter, &desc->group_list, chain_node) {
+ hw_desc = iter->hw_desc;
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->src_cnt = 0;
+ iter->dst_cnt = 0;
+
+ /* This is a ZERO_SUM operation:
+ * - <src_cnt> descriptors starting from 2nd or 3rd
+ * descriptor are for GF-XOR operations;
+ * - remaining <dst_cnt> descriptors are for checking the result
+ */
+ if (i++ < src_cnt)
+ /* MV_SG1_SG2 if only Q is being verified
+ * MULTICAST if both P and Q are being verified
+ */
+ hw_desc->opc = dopc;
+ else
+ /* DMA_CDB_OPC_DCHECK128 operation */
+ hw_desc->opc = DMA_CDB_OPC_DCHECK128;
+
+ if (likely(!list_is_last(&iter->chain_node,
+ &desc->group_list))) {
+ /* set 'next' pointer */
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ } else {
+ /* this is the last descriptor.
+ * this slot will be pasted from ADMA level
+ * each time it wants to configure parameters
+ * of the transaction (src, dst, ...)
+ */
+ iter->hw_next = NULL;
+ /* always enable interrupt generation since we get
+ * the status of pqzero from the handler
+ */
+ set_bit(PPC440SPE_DESC_INT, &iter->flags);
+ }
+ }
+ desc->src_cnt = src_cnt;
+ desc->dst_cnt = dst_cnt;
+}
+
+/**
+ * ppc440spe_desc_init_memcpy - initialize the descriptor for MEMCPY operation
+ */
+static void ppc440spe_desc_init_memcpy(struct ppc440spe_adma_desc_slot *desc,
+ unsigned long flags)
+{
+ struct dma_cdb *hw_desc = desc->hw_desc;
+
+ memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
+ desc->hw_next = NULL;
+ desc->src_cnt = 1;
+ desc->dst_cnt = 1;
+
+ if (flags & DMA_PREP_INTERRUPT)
+ set_bit(PPC440SPE_DESC_INT, &desc->flags);
+ else
+ clear_bit(PPC440SPE_DESC_INT, &desc->flags);
+
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+}
+
+/**
+ * ppc440spe_desc_set_src_addr - set source address into the descriptor
+ */
+static void ppc440spe_desc_set_src_addr(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan,
+ int src_idx, dma_addr_t addrh,
+ dma_addr_t addrl)
+{
+ struct dma_cdb *dma_hw_desc;
+ struct xor_cb *xor_hw_desc;
+ phys_addr_t addr64, tmplow, tmphi;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ if (!addrh) {
+ addr64 = addrl;
+ tmphi = (addr64 >> 32);
+ tmplow = (addr64 & 0xFFFFFFFF);
+ } else {
+ tmphi = addrh;
+ tmplow = addrl;
+ }
+ dma_hw_desc = desc->hw_desc;
+ dma_hw_desc->sg1l = cpu_to_le32((u32)tmplow);
+ dma_hw_desc->sg1u |= cpu_to_le32((u32)tmphi);
+ break;
+ case PPC440SPE_XOR_ID:
+ xor_hw_desc = desc->hw_desc;
+ xor_hw_desc->ops[src_idx].l = addrl;
+ xor_hw_desc->ops[src_idx].h |= addrh;
+ break;
+ }
+}
+
+/**
+ * ppc440spe_desc_set_src_mult - set source address mult into the descriptor
+ */
+static void ppc440spe_desc_set_src_mult(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan, u32 mult_index,
+ int sg_index, unsigned char mult_value)
+{
+ struct dma_cdb *dma_hw_desc;
+ u32 *psgu;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_hw_desc = desc->hw_desc;
+
+ switch (sg_index) {
+ /* for RXOR operations set multiplier
+ * into source cued address
+ */
+ case DMA_CDB_SG_SRC:
+ psgu = &dma_hw_desc->sg1u;
+ break;
+ /* for WXOR operations set multiplier
+ * into destination cued address(es)
+ */
+ case DMA_CDB_SG_DST1:
+ psgu = &dma_hw_desc->sg2u;
+ break;
+ case DMA_CDB_SG_DST2:
+ psgu = &dma_hw_desc->sg3u;
+ break;
+ default:
+ BUG();
+ }
+
+ *psgu |= cpu_to_le32(mult_value << mult_index);
+ break;
+ case PPC440SPE_XOR_ID:
+ break;
+ default:
+ BUG();
+ }
+}
+
+/**
+ * ppc440spe_desc_set_dest_addr - set destination address into the descriptor
+ */
+static void ppc440spe_desc_set_dest_addr(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan,
+ dma_addr_t addrh, dma_addr_t addrl,
+ u32 dst_idx)
+{
+ struct dma_cdb *dma_hw_desc;
+ struct xor_cb *xor_hw_desc;
+ phys_addr_t addr64, tmphi, tmplow;
+ u32 *psgu, *psgl;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ if (!addrh) {
+ addr64 = addrl;
+ tmphi = (addr64 >> 32);
+ tmplow = (addr64 & 0xFFFFFFFF);
+ } else {
+ tmphi = addrh;
+ tmplow = addrl;
+ }
+ dma_hw_desc = desc->hw_desc;
+
+ psgu = dst_idx ? &dma_hw_desc->sg3u : &dma_hw_desc->sg2u;
+ psgl = dst_idx ? &dma_hw_desc->sg3l : &dma_hw_desc->sg2l;
+
+ *psgl = cpu_to_le32((u32)tmplow);
+ *psgu |= cpu_to_le32((u32)tmphi);
+ break;
+ case PPC440SPE_XOR_ID:
+ xor_hw_desc = desc->hw_desc;
+ xor_hw_desc->cbtal = addrl;
+ xor_hw_desc->cbtah |= addrh;
+ break;
+ }
+}
+
+/**
+ * ppc440spe_desc_set_byte_count - set number of data bytes involved
+ * into the operation
+ */
+static void ppc440spe_desc_set_byte_count(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan,
+ u32 byte_count)
+{
+ struct dma_cdb *dma_hw_desc;
+ struct xor_cb *xor_hw_desc;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_hw_desc = desc->hw_desc;
+ dma_hw_desc->cnt = cpu_to_le32(byte_count);
+ break;
+ case PPC440SPE_XOR_ID:
+ xor_hw_desc = desc->hw_desc;
+ xor_hw_desc->cbbc = byte_count;
+ break;
+ }
+}
+
+/**
+ * ppc440spe_desc_set_rxor_block_size - set RXOR block size
+ */
+static inline void ppc440spe_desc_set_rxor_block_size(u32 byte_count)
+{
+ /* assume that byte_count is aligned on the 512-boundary;
+ * thus write it directly to the register (bits 23:31 are
+ * reserved there).
+ */
+ dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CF2H, byte_count);
+}
+
+/**
+ * ppc440spe_desc_set_dcheck - set CHECK pattern
+ */
+static void ppc440spe_desc_set_dcheck(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan, u8 *qword)
+{
+ struct dma_cdb *dma_hw_desc;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_hw_desc = desc->hw_desc;
+ iowrite32(qword[0], &dma_hw_desc->sg3l);
+ iowrite32(qword[4], &dma_hw_desc->sg3u);
+ iowrite32(qword[8], &dma_hw_desc->sg2l);
+ iowrite32(qword[12], &dma_hw_desc->sg2u);
+ break;
+ default:
+ BUG();
+ }
+}
+
+/**
+ * ppc440spe_xor_set_link - set link address in xor CB
+ */
+static void ppc440spe_xor_set_link(struct ppc440spe_adma_desc_slot *prev_desc,
+ struct ppc440spe_adma_desc_slot *next_desc)
+{
+ struct xor_cb *xor_hw_desc = prev_desc->hw_desc;
+
+ if (unlikely(!next_desc || !(next_desc->phys))) {
+ printk(KERN_ERR "%s: next_desc=0x%p; next_desc->phys=0x%llx\n",
+ __func__, next_desc,
+ next_desc ? next_desc->phys : 0);
+ BUG();
+ }
+
+ xor_hw_desc->cbs = 0;
+ xor_hw_desc->cblal = next_desc->phys;
+ xor_hw_desc->cblah = 0;
+ xor_hw_desc->cbc |= XOR_CBCR_LNK_BIT;
+}
+
+/**
+ * ppc440spe_desc_set_link - set the address of descriptor following this
+ * descriptor in chain
+ */
+static void ppc440spe_desc_set_link(struct ppc440spe_adma_chan *chan,
+ struct ppc440spe_adma_desc_slot *prev_desc,
+ struct ppc440spe_adma_desc_slot *next_desc)
+{
+ unsigned long flags;
+ struct ppc440spe_adma_desc_slot *tail = next_desc;
+
+ if (unlikely(!prev_desc || !next_desc ||
+ (prev_desc->hw_next && prev_desc->hw_next != next_desc))) {
+ /* If previous next is overwritten something is wrong.
+ * though we may refetch from append to initiate list
+ * processing; in this case - it's ok.
+ */
+ printk(KERN_ERR "%s: prev_desc=0x%p; next_desc=0x%p; "
+ "prev->hw_next=0x%p\n", __func__, prev_desc,
+ next_desc, prev_desc ? prev_desc->hw_next : 0);
+ BUG();
+ }
+
+ local_irq_save(flags);
+
+ /* do s/w chaining both for DMA and XOR descriptors */
+ prev_desc->hw_next = next_desc;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ break;
+ case PPC440SPE_XOR_ID:
+ /* bind descriptor to the chain */
+ while (tail->hw_next)
+ tail = tail->hw_next;
+ xor_last_linked = tail;
+
+ if (prev_desc == xor_last_submit)
+ /* do not link to the last submitted CB */
+ break;
+ ppc440spe_xor_set_link(prev_desc, next_desc);
+ break;
+ }
+
+ local_irq_restore(flags);
+}
+
+/**
+ * ppc440spe_desc_get_link - get the address of the descriptor that
+ * follows this one
+ */
+static inline u32 ppc440spe_desc_get_link(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan)
+{
+ if (!desc->hw_next)
+ return 0;
+
+ return desc->hw_next->phys;
+}
+
+/**
+ * ppc440spe_desc_is_aligned - check alignment
+ */
+static inline int ppc440spe_desc_is_aligned(
+ struct ppc440spe_adma_desc_slot *desc, int num_slots)
+{
+ return (desc->idx & (num_slots - 1)) ? 0 : 1;
+}
+
+/**
+ * ppc440spe_chan_xor_slot_count - get the number of slots necessary for
+ * XOR operation
+ */
+static int ppc440spe_chan_xor_slot_count(size_t len, int src_cnt,
+ int *slots_per_op)
+{
+ int slot_cnt;
+
+ /* each XOR descriptor provides up to 16 source operands */
+ slot_cnt = *slots_per_op = (src_cnt + XOR_MAX_OPS - 1)/XOR_MAX_OPS;
+
+ if (likely(len <= PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT))
+ return slot_cnt;
+
+ printk(KERN_ERR "%s: len %d > max %d !!\n",
+ __func__, len, PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT);
+ BUG();
+ return slot_cnt;
+}
+
+/**
+ * ppc440spe_dma2_pq_slot_count - get the number of slots necessary for
+ * DMA2 PQ operation
+ */
+static int ppc440spe_dma2_pq_slot_count(dma_addr_t *srcs,
+ int src_cnt, size_t len)
+{
+ signed long long order = 0;
+ int state = 0;
+ int addr_count = 0;
+ int i;
+ for (i = 1; i < src_cnt; i++) {
+ dma_addr_t cur_addr = srcs[i];
+ dma_addr_t old_addr = srcs[i-1];
+ switch (state) {
+ case 0:
+ if (cur_addr == old_addr + len) {
+ /* direct RXOR */
+ order = 1;
+ state = 1;
+ if (i == src_cnt-1)
+ addr_count++;
+ } else if (old_addr == cur_addr + len) {
+ /* reverse RXOR */
+ order = -1;
+ state = 1;
+ if (i == src_cnt-1)
+ addr_count++;
+ } else {
+ state = 3;
+ }
+ break;
+ case 1:
+ if (i == src_cnt-2 || (order == -1
+ && cur_addr != old_addr - len)) {
+ order = 0;
+ state = 0;
+ addr_count++;
+ } else if (cur_addr == old_addr + len*order) {
+ state = 2;
+ if (i == src_cnt-1)
+ addr_count++;
+ } else if (cur_addr == old_addr + 2*len) {
+ state = 2;
+ if (i == src_cnt-1)
+ addr_count++;
+ } else if (cur_addr == old_addr + 3*len) {
+ state = 2;
+ if (i == src_cnt-1)
+ addr_count++;
+ } else {
+ order = 0;
+ state = 0;
+ addr_count++;
+ }
+ break;
+ case 2:
+ order = 0;
+ state = 0;
+ addr_count++;
+ break;
+ }
+ if (state == 3)
+ break;
+ }
+ if (src_cnt <= 1 || (state != 1 && state != 2)) {
+ pr_err("%s: src_cnt=%d, state=%d, addr_count=%d, order=%lld\n",
+ __func__, src_cnt, state, addr_count, order);
+ for (i = 0; i < src_cnt; i++)
+ pr_err("\t[%d] 0x%llx \n", i, srcs[i]);
+ BUG();
+ }
+
+ return (addr_count + XOR_MAX_OPS - 1) / XOR_MAX_OPS;
+}
+
+
+/******************************************************************************
+ * ADMA channel low-level routines
+ ******************************************************************************/
+
+static u32
+ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan);
+static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan);
+
+/**
+ * ppc440spe_adma_device_clear_eot_status - interrupt ack to XOR or DMA engine
+ */
+static void ppc440spe_adma_device_clear_eot_status(
+ struct ppc440spe_adma_chan *chan)
+{
+ struct dma_regs *dma_reg;
+ struct xor_regs *xor_reg;
+ u8 *p = chan->device->dma_desc_pool_virt;
+ struct dma_cdb *cdb;
+ u32 rv, i;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ /* read FIFO to ack */
+ dma_reg = chan->device->dma_reg;
+ while ((rv = ioread32(&dma_reg->csfpl))) {
+ i = rv & DMA_CDB_ADDR_MSK;
+ cdb = (struct dma_cdb *)&p[i -
+ (u32)chan->device->dma_desc_pool];
+
+ /* Clear opcode to ack. This is necessary for
+ * ZeroSum operations only
+ */
+ cdb->opc = 0;
+
+ if (test_bit(PPC440SPE_RXOR_RUN,
+ &ppc440spe_rxor_state)) {
+ /* probably this is a completed RXOR op,
+ * get pointer to CDB using the fact that
+ * physical and virtual addresses of CDB
+ * in pools have the same offsets
+ */
+ if (le32_to_cpu(cdb->sg1u) &
+ DMA_CUED_XOR_BASE) {
+ /* this is a RXOR */
+ clear_bit(PPC440SPE_RXOR_RUN,
+ &ppc440spe_rxor_state);
+ }
+ }
+
+ if (rv & DMA_CDB_STATUS_MSK) {
+ /* ZeroSum check failed
+ */
+ struct ppc440spe_adma_desc_slot *iter;
+ dma_addr_t phys = rv & ~DMA_CDB_MSK;
+
+ /*
+ * Update the status of corresponding
+ * descriptor.
+ */
+ list_for_each_entry(iter, &chan->chain,
+ chain_node) {
+ if (iter->phys == phys)
+ break;
+ }
+ /*
+ * if cannot find the corresponding
+ * slot it's a bug
+ */
+ BUG_ON(&iter->chain_node == &chan->chain);
+
+ if (iter->xor_check_result) {
+ if (test_bit(PPC440SPE_DESC_PCHECK,
+ &iter->flags)) {
+ *iter->xor_check_result |=
+ SUM_CHECK_P_RESULT;
+ } else
+ if (test_bit(PPC440SPE_DESC_QCHECK,
+ &iter->flags)) {
+ *iter->xor_check_result |=
+ SUM_CHECK_Q_RESULT;
+ } else
+ BUG();
+ }
+ }
+ }
+
+ rv = ioread32(&dma_reg->dsts);
+ if (rv) {
+ pr_err("DMA%d err status: 0x%x\n",
+ chan->device->id, rv);
+ /* write back to clear */
+ iowrite32(rv, &dma_reg->dsts);
+ }
+ break;
+ case PPC440SPE_XOR_ID:
+ /* reset status bits to ack */
+ xor_reg = chan->device->xor_reg;
+ rv = ioread32be(&xor_reg->sr);
+ iowrite32be(rv, &xor_reg->sr);
+
+ if (rv & (XOR_IE_ICBIE_BIT|XOR_IE_ICIE_BIT|XOR_IE_RPTIE_BIT)) {
+ if (rv & XOR_IE_RPTIE_BIT) {
+ /* Read PLB Timeout Error.
+ * Try to resubmit the CB
+ */
+ u32 val = ioread32be(&xor_reg->ccbalr);
+
+ iowrite32be(val, &xor_reg->cblalr);
+
+ val = ioread32be(&xor_reg->crsr);
+ iowrite32be(val | XOR_CRSR_XAE_BIT,
+ &xor_reg->crsr);
+ } else
+ pr_err("XOR ERR 0x%x status\n", rv);
+ break;
+ }
+
+ /* if the XORcore is idle, but there are unprocessed CBs
+ * then refetch the s/w chain here
+ */
+ if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) &&
+ do_xor_refetch)
+ ppc440spe_chan_append(chan);
+ break;
+ }
+}
+
+/**
+ * ppc440spe_chan_is_busy - get the channel status
+ */
+static int ppc440spe_chan_is_busy(struct ppc440spe_adma_chan *chan)
+{
+ struct dma_regs *dma_reg;
+ struct xor_regs *xor_reg;
+ int busy = 0;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_reg = chan->device->dma_reg;
+ /* if command FIFO's head and tail pointers are equal and
+ * status tail is the same as command, then channel is free
+ */
+ if (ioread16(&dma_reg->cpfhp) != ioread16(&dma_reg->cpftp) ||
+ ioread16(&dma_reg->cpftp) != ioread16(&dma_reg->csftp))
+ busy = 1;
+ break;
+ case PPC440SPE_XOR_ID:
+ /* use the special status bit for the XORcore
+ */
+ xor_reg = chan->device->xor_reg;
+ busy = (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) ? 1 : 0;
+ break;
+ }
+
+ return busy;
+}
+
+/**
+ * ppc440spe_chan_set_first_xor_descriptor - init XORcore chain
+ */
+static void ppc440spe_chan_set_first_xor_descriptor(
+ struct ppc440spe_adma_chan *chan,
+ struct ppc440spe_adma_desc_slot *next_desc)
+{
+ struct xor_regs *xor_reg = chan->device->xor_reg;
+
+ if (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)
+ printk(KERN_INFO "%s: Warn: XORcore is running "
+ "when try to set the first CDB!\n",
+ __func__);
+
+ xor_last_submit = xor_last_linked = next_desc;
+
+ iowrite32be(XOR_CRSR_64BA_BIT, &xor_reg->crsr);
+
+ iowrite32be(next_desc->phys, &xor_reg->cblalr);
+ iowrite32be(0, &xor_reg->cblahr);
+ iowrite32be(ioread32be(&xor_reg->cbcr) | XOR_CBCR_LNK_BIT,
+ &xor_reg->cbcr);
+
+ chan->hw_chain_inited = 1;
+}
+
+/**
+ * ppc440spe_dma_put_desc - put DMA0,1 descriptor to FIFO.
+ * called with irqs disabled
+ */
+static void ppc440spe_dma_put_desc(struct ppc440spe_adma_chan *chan,
+ struct ppc440spe_adma_desc_slot *desc)
+{
+ u32 pcdb;
+ struct dma_regs *dma_reg = chan->device->dma_reg;
+
+ pcdb = desc->phys;
+ if (!test_bit(PPC440SPE_DESC_INT, &desc->flags))
+ pcdb |= DMA_CDB_NO_INT;
+
+ chan_last_sub[chan->device->id] = desc;
+
+ ADMA_LL_DBG(print_cb(chan, desc->hw_desc));
+
+ iowrite32(pcdb, &dma_reg->cpfpl);
+}
+
+/**
+ * ppc440spe_chan_append - update the h/w chain in the channel
+ */
+static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan)
+{
+ struct xor_regs *xor_reg;
+ struct ppc440spe_adma_desc_slot *iter;
+ struct xor_cb *xcb;
+ u32 cur_desc;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ cur_desc = ppc440spe_chan_get_current_descriptor(chan);
+
+ if (likely(cur_desc)) {
+ iter = chan_last_sub[chan->device->id];
+ BUG_ON(!iter);
+ } else {
+ /* first peer */
+ iter = chan_first_cdb[chan->device->id];
+ BUG_ON(!iter);
+ ppc440spe_dma_put_desc(chan, iter);
+ chan->hw_chain_inited = 1;
+ }
+
+ /* is there something new to append */
+ if (!iter->hw_next)
+ break;
+
+ /* flush descriptors from the s/w queue to fifo */
+ list_for_each_entry_continue(iter, &chan->chain, chain_node) {
+ ppc440spe_dma_put_desc(chan, iter);
+ if (!iter->hw_next)
+ break;
+ }
+ break;
+ case PPC440SPE_XOR_ID:
+ /* update h/w links and refetch */
+ if (!xor_last_submit->hw_next)
+ break;
+
+ xor_reg = chan->device->xor_reg;
+ /* the last linked CDB has to generate an interrupt
+ * that we'd be able to append the next lists to h/w
+ * regardless of the XOR engine state at the moment of
+ * appending of these next lists
+ */
+ xcb = xor_last_linked->hw_desc;
+ xcb->cbc |= XOR_CBCR_CBCE_BIT;
+
+ if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)) {
+ /* XORcore is idle. Refetch now */
+ do_xor_refetch = 0;
+ ppc440spe_xor_set_link(xor_last_submit,
+ xor_last_submit->hw_next);
+
+ ADMA_LL_DBG(print_cb_list(chan,
+ xor_last_submit->hw_next));
+
+ xor_last_submit = xor_last_linked;
+ iowrite32be(ioread32be(&xor_reg->crsr) |
+ XOR_CRSR_RCBE_BIT | XOR_CRSR_64BA_BIT,
+ &xor_reg->crsr);
+ } else {
+ /* XORcore is running. Refetch later in the handler */
+ do_xor_refetch = 1;
+ }
+
+ break;
+ }
+
+ local_irq_restore(flags);
+}
+
+/**
+ * ppc440spe_chan_get_current_descriptor - get the currently executed descriptor
+ */
+static u32
+ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan)
+{
+ struct dma_regs *dma_reg;
+ struct xor_regs *xor_reg;
+
+ if (unlikely(!chan->hw_chain_inited))
+ /* h/w descriptor chain is not initialized yet */
+ return 0;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_reg = chan->device->dma_reg;
+ return ioread32(&dma_reg->acpl) & (~DMA_CDB_MSK);
+ case PPC440SPE_XOR_ID:
+ xor_reg = chan->device->xor_reg;
+ return ioread32be(&xor_reg->ccbalr);
+ }
+ return 0;
+}
+
+/**
+ * ppc440spe_chan_run - enable the channel
+ */
+static void ppc440spe_chan_run(struct ppc440spe_adma_chan *chan)
+{
+ struct xor_regs *xor_reg;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ /* DMAs are always enabled, do nothing */
+ break;
+ case PPC440SPE_XOR_ID:
+ /* drain write buffer */
+ xor_reg = chan->device->xor_reg;
+
+ /* fetch descriptor pointed to in <link> */
+ iowrite32be(XOR_CRSR_64BA_BIT | XOR_CRSR_XAE_BIT,
+ &xor_reg->crsr);
+ break;
+ }
+}
+
+/******************************************************************************
+ * ADMA device level
+ ******************************************************************************/
+
+static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan);
+static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan);
+
+static dma_cookie_t
+ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx);
+
+static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *tx,
+ dma_addr_t addr, int index);
+static void
+ppc440spe_adma_memcpy_xor_set_src(struct ppc440spe_adma_desc_slot *tx,
+ dma_addr_t addr, int index);
+
+static void
+ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *tx,
+ dma_addr_t *paddr, unsigned long flags);
+static void
+ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *tx,
+ dma_addr_t addr, int index);
+static void
+ppc440spe_adma_pq_set_src_mult(struct ppc440spe_adma_desc_slot *tx,
+ unsigned char mult, int index, int dst_pos);
+static void
+ppc440spe_adma_pqzero_sum_set_dest(struct ppc440spe_adma_desc_slot *tx,
+ dma_addr_t paddr, dma_addr_t qaddr);
+
+static struct page *ppc440spe_rxor_srcs[32];
+
+/**
+ * ppc440spe_can_rxor - check if the operands may be processed with RXOR
+ */
+static int ppc440spe_can_rxor(struct page **srcs, int src_cnt, size_t len)
+{
+ int i, order = 0, state = 0;
+ int idx = 0;
+
+ if (unlikely(!(src_cnt > 1)))
+ return 0;
+
+ BUG_ON(src_cnt > ARRAY_SIZE(ppc440spe_rxor_srcs));
+
+ /* Skip holes in the source list before checking */
+ for (i = 0; i < src_cnt; i++) {
+ if (!srcs[i])
+ continue;
+ ppc440spe_rxor_srcs[idx++] = srcs[i];
+ }
+ src_cnt = idx;
+
+ for (i = 1; i < src_cnt; i++) {
+ char *cur_addr = page_address(ppc440spe_rxor_srcs[i]);
+ char *old_addr = page_address(ppc440spe_rxor_srcs[i - 1]);
+
+ switch (state) {
+ case 0:
+ if (cur_addr == old_addr + len) {
+ /* direct RXOR */
+ order = 1;
+ state = 1;
+ } else if (old_addr == cur_addr + len) {
+ /* reverse RXOR */
+ order = -1;
+ state = 1;
+ } else
+ goto out;
+ break;
+ case 1:
+ if ((i == src_cnt - 2) ||
+ (order == -1 && cur_addr != old_addr - len)) {
+ order = 0;
+ state = 0;
+ } else if ((cur_addr == old_addr + len * order) ||
+ (cur_addr == old_addr + 2 * len) ||
+ (cur_addr == old_addr + 3 * len)) {
+ state = 2;
+ } else {
+ order = 0;
+ state = 0;
+ }
+ break;
+ case 2:
+ order = 0;
+ state = 0;
+ break;
+ }
+ }
+
+out:
+ if (state == 1 || state == 2)
+ return 1;
+
+ return 0;
+}
+
+/**
+ * ppc440spe_adma_device_estimate - estimate the efficiency of processing
+ * the operation given on this channel. It's assumed that 'chan' is
+ * capable to process 'cap' type of operation.
+ * @chan: channel to use
+ * @cap: type of transaction
+ * @dst_lst: array of destination pointers
+ * @dst_cnt: number of destination operands
+ * @src_lst: array of source pointers
+ * @src_cnt: number of source operands
+ * @src_sz: size of each source operand
+ */
+static int ppc440spe_adma_estimate(struct dma_chan *chan,
+ enum dma_transaction_type cap, struct page **dst_lst, int dst_cnt,
+ struct page **src_lst, int src_cnt, size_t src_sz)
+{
+ int ef = 1;
+
+ if (cap == DMA_PQ || cap == DMA_PQ_VAL) {
+ /* If RAID-6 capabilities were not activated don't try
+ * to use them
+ */
+ if (unlikely(!ppc440spe_r6_enabled))
+ return -1;
+ }
+ /* In the current implementation of ppc440spe ADMA driver it
+ * makes sense to pick out only pq case, because it may be
+ * processed:
+ * (1) either using Biskup method on DMA2;
+ * (2) or on DMA0/1.
+ * Thus we give a favour to (1) if the sources are suitable;
+ * else let it be processed on one of the DMA0/1 engines.
+ * In the sum_product case where destination is also the
+ * source process it on DMA0/1 only.
+ */
+ if (cap == DMA_PQ && chan->chan_id == PPC440SPE_XOR_ID) {
+
+ if (dst_cnt == 1 && src_cnt == 2 && dst_lst[0] == src_lst[1])
+ ef = 0; /* sum_product case, process on DMA0/1 */
+ else if (ppc440spe_can_rxor(src_lst, src_cnt, src_sz))
+ ef = 3; /* override (DMA0/1 + idle) */
+ else
+ ef = 0; /* can't process on DMA2 if !rxor */
+ }
+
+ /* channel idleness increases the priority */
+ if (likely(ef) &&
+ !ppc440spe_chan_is_busy(to_ppc440spe_adma_chan(chan)))
+ ef++;
+
+ return ef;
+}
+
+struct dma_chan *
+ppc440spe_async_tx_find_best_channel(enum dma_transaction_type cap,
+ struct page **dst_lst, int dst_cnt, struct page **src_lst,
+ int src_cnt, size_t src_sz)
+{
+ struct dma_chan *best_chan = NULL;
+ struct ppc_dma_chan_ref *ref;
+ int best_rank = -1;
+
+ if (unlikely(!src_sz))
+ return NULL;
+ if (src_sz > PAGE_SIZE) {
+ /*
+ * should a user of the api ever pass > PAGE_SIZE requests
+ * we sort out cases where temporary page-sized buffers
+ * are used.
+ */
+ switch (cap) {
+ case DMA_PQ:
+ if (src_cnt == 1 && dst_lst[1] == src_lst[0])
+ return NULL;
+ if (src_cnt == 2 && dst_lst[1] == src_lst[1])
+ return NULL;
+ break;
+ case DMA_PQ_VAL:
+ case DMA_XOR_VAL:
+ return NULL;
+ default:
+ break;
+ }
+ }
+
+ list_for_each_entry(ref, &ppc440spe_adma_chan_list, node) {
+ if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
+ int rank;
+
+ rank = ppc440spe_adma_estimate(ref->chan, cap, dst_lst,
+ dst_cnt, src_lst, src_cnt, src_sz);
+ if (rank > best_rank) {
+ best_rank = rank;
+ best_chan = ref->chan;
+ }
+ }
+ }
+
+ return best_chan;
+}
+EXPORT_SYMBOL_GPL(ppc440spe_async_tx_find_best_channel);
+
+/**
+ * ppc440spe_get_group_entry - get group entry with index idx
+ * @tdesc: is the last allocated slot in the group.
+ */
+static struct ppc440spe_adma_desc_slot *
+ppc440spe_get_group_entry(struct ppc440spe_adma_desc_slot *tdesc, u32 entry_idx)
+{
+ struct ppc440spe_adma_desc_slot *iter = tdesc->group_head;
+ int i = 0;
+
+ if (entry_idx < 0 || entry_idx >= (tdesc->src_cnt + tdesc->dst_cnt)) {
+ printk("%s: entry_idx %d, src_cnt %d, dst_cnt %d\n",
+ __func__, entry_idx, tdesc->src_cnt, tdesc->dst_cnt);
+ BUG();
+ }
+
+ list_for_each_entry(iter, &tdesc->group_list, chain_node) {
+ if (i++ == entry_idx)
+ break;
+ }
+ return iter;
+}
+
+/**
+ * ppc440spe_adma_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &ppc440spe_chan->lock while calling this function
+ */
+static void ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot,
+ struct ppc440spe_adma_chan *chan)
+{
+ int stride = slot->slots_per_op;
+
+ while (stride--) {
+ slot->slots_per_op = 0;
+ slot = list_entry(slot->slot_node.next,
+ struct ppc440spe_adma_desc_slot,
+ slot_node);
+ }
+}
+
+/**
+ * ppc440spe_adma_run_tx_complete_actions - call functions to be called
+ * upon completion
+ */
+static dma_cookie_t ppc440spe_adma_run_tx_complete_actions(
+ struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan,
+ dma_cookie_t cookie)
+{
+ BUG_ON(desc->async_tx.cookie < 0);
+ if (desc->async_tx.cookie > 0) {
+ cookie = desc->async_tx.cookie;
+ desc->async_tx.cookie = 0;
+
+ dma_descriptor_unmap(&desc->async_tx);
+ /* call the callback (must not sleep or submit new
+ * operations to this channel)
+ */
+ dmaengine_desc_get_callback_invoke(&desc->async_tx, NULL);
+ }
+
+ /* run dependent operations */
+ dma_run_dependencies(&desc->async_tx);
+
+ return cookie;
+}
+
+/**
+ * ppc440spe_adma_clean_slot - clean up CDB slot (if ack is set)
+ */
+static int ppc440spe_adma_clean_slot(struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_adma_chan *chan)
+{
+ /* the client is allowed to attach dependent operations
+ * until 'ack' is set
+ */
+ if (!async_tx_test_ack(&desc->async_tx))
+ return 0;
+
+ /* leave the last descriptor in the chain
+ * so we can append to it
+ */
+ if (list_is_last(&desc->chain_node, &chan->chain) ||
+ desc->phys == ppc440spe_chan_get_current_descriptor(chan))
+ return 1;
+
+ if (chan->device->id != PPC440SPE_XOR_ID) {
+ /* our DMA interrupt handler clears opc field of
+ * each processed descriptor. For all types of
+ * operations except for ZeroSum we do not actually
+ * need ack from the interrupt handler. ZeroSum is a
+ * special case since the result of this operation
+ * is available from the handler only, so if we see
+ * such type of descriptor (which is unprocessed yet)
+ * then leave it in chain.
+ */
+ struct dma_cdb *cdb = desc->hw_desc;
+ if (cdb->opc == DMA_CDB_OPC_DCHECK128)
+ return 1;
+ }
+
+ dev_dbg(chan->device->common.dev, "\tfree slot %llx: %d stride: %d\n",
+ desc->phys, desc->idx, desc->slots_per_op);
+
+ list_del(&desc->chain_node);
+ ppc440spe_adma_free_slots(desc, chan);
+ return 0;
+}
+
+/**
+ * __ppc440spe_adma_slot_cleanup - this is the common clean-up routine
+ * which runs through the channel CDBs list until reach the descriptor
+ * currently processed. When routine determines that all CDBs of group
+ * are completed then corresponding callbacks (if any) are called and slots
+ * are freed.
+ */
+static void __ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan)
+{
+ struct ppc440spe_adma_desc_slot *iter, *_iter, *group_start = NULL;
+ dma_cookie_t cookie = 0;
+ u32 current_desc = ppc440spe_chan_get_current_descriptor(chan);
+ int busy = ppc440spe_chan_is_busy(chan);
+ int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
+
+ dev_dbg(chan->device->common.dev, "ppc440spe adma%d: %s\n",
+ chan->device->id, __func__);
+
+ if (!current_desc) {
+ /* There were no transactions yet, so
+ * nothing to clean
+ */
+ return;
+ }
+
+ /* free completed slots from the chain starting with
+ * the oldest descriptor
+ */
+ list_for_each_entry_safe(iter, _iter, &chan->chain,
+ chain_node) {
+ dev_dbg(chan->device->common.dev, "\tcookie: %d slot: %d "
+ "busy: %d this_desc: %#llx next_desc: %#x "
+ "cur: %#x ack: %d\n",
+ iter->async_tx.cookie, iter->idx, busy, iter->phys,
+ ppc440spe_desc_get_link(iter, chan), current_desc,
+ async_tx_test_ack(&iter->async_tx));
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+
+ /* do not advance past the current descriptor loaded into the
+ * hardware channel,subsequent descriptors are either in process
+ * or have not been submitted
+ */
+ if (seen_current)
+ break;
+
+ /* stop the search if we reach the current descriptor and the
+ * channel is busy, or if it appears that the current descriptor
+ * needs to be re-read (i.e. has been appended to)
+ */
+ if (iter->phys == current_desc) {
+ BUG_ON(seen_current++);
+ if (busy || ppc440spe_desc_get_link(iter, chan)) {
+ /* not all descriptors of the group have
+ * been completed; exit.
+ */
+ break;
+ }
+ }
+
+ /* detect the start of a group transaction */
+ if (!slot_cnt && !slots_per_op) {
+ slot_cnt = iter->slot_cnt;
+ slots_per_op = iter->slots_per_op;
+ if (slot_cnt <= slots_per_op) {
+ slot_cnt = 0;
+ slots_per_op = 0;
+ }
+ }
+
+ if (slot_cnt) {
+ if (!group_start)
+ group_start = iter;
+ slot_cnt -= slots_per_op;
+ }
+
+ /* all the members of a group are complete */
+ if (slots_per_op != 0 && slot_cnt == 0) {
+ struct ppc440spe_adma_desc_slot *grp_iter, *_grp_iter;
+ int end_of_chain = 0;
+
+ /* clean up the group */
+ slot_cnt = group_start->slot_cnt;
+ grp_iter = group_start;
+ list_for_each_entry_safe_from(grp_iter, _grp_iter,
+ &chan->chain, chain_node) {
+
+ cookie = ppc440spe_adma_run_tx_complete_actions(
+ grp_iter, chan, cookie);
+
+ slot_cnt -= slots_per_op;
+ end_of_chain = ppc440spe_adma_clean_slot(
+ grp_iter, chan);
+ if (end_of_chain && slot_cnt) {
+ /* Should wait for ZeroSum completion */
+ if (cookie > 0)
+ chan->common.completed_cookie = cookie;
+ return;
+ }
+
+ if (slot_cnt == 0 || end_of_chain)
+ break;
+ }
+
+ /* the group should be complete at this point */
+ BUG_ON(slot_cnt);
+
+ slots_per_op = 0;
+ group_start = NULL;
+ if (end_of_chain)
+ break;
+ else
+ continue;
+ } else if (slots_per_op) /* wait for group completion */
+ continue;
+
+ cookie = ppc440spe_adma_run_tx_complete_actions(iter, chan,
+ cookie);
+
+ if (ppc440spe_adma_clean_slot(iter, chan))
+ break;
+ }
+
+ BUG_ON(!seen_current);
+
+ if (cookie > 0) {
+ chan->common.completed_cookie = cookie;
+ pr_debug("\tcompleted cookie %d\n", cookie);
+ }
+
+}
+
+/**
+ * ppc440spe_adma_tasklet - clean up watch-dog initiator
+ */
+static void ppc440spe_adma_tasklet(struct tasklet_struct *t)
+{
+ struct ppc440spe_adma_chan *chan = from_tasklet(chan, t, irq_tasklet);
+
+ spin_lock_nested(&chan->lock, SINGLE_DEPTH_NESTING);
+ __ppc440spe_adma_slot_cleanup(chan);
+ spin_unlock(&chan->lock);
+}
+
+/**
+ * ppc440spe_adma_slot_cleanup - clean up scheduled initiator
+ */
+static void ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan)
+{
+ spin_lock_bh(&chan->lock);
+ __ppc440spe_adma_slot_cleanup(chan);
+ spin_unlock_bh(&chan->lock);
+}
+
+/**
+ * ppc440spe_adma_alloc_slots - allocate free slots (if any)
+ */
+static struct ppc440spe_adma_desc_slot *ppc440spe_adma_alloc_slots(
+ struct ppc440spe_adma_chan *chan, int num_slots,
+ int slots_per_op)
+{
+ struct ppc440spe_adma_desc_slot *iter = NULL, *_iter;
+ struct ppc440spe_adma_desc_slot *alloc_start = NULL;
+ int slots_found, retry = 0;
+ LIST_HEAD(chain);
+
+
+ BUG_ON(!num_slots || !slots_per_op);
+ /* start search from the last allocated descrtiptor
+ * if a contiguous allocation can not be found start searching
+ * from the beginning of the list
+ */
+retry:
+ slots_found = 0;
+ if (retry == 0)
+ iter = chan->last_used;
+ else
+ iter = list_entry(&chan->all_slots,
+ struct ppc440spe_adma_desc_slot,
+ slot_node);
+ list_for_each_entry_safe_continue(iter, _iter, &chan->all_slots,
+ slot_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+ if (iter->slots_per_op) {
+ slots_found = 0;
+ continue;
+ }
+
+ /* start the allocation if the slot is correctly aligned */
+ if (!slots_found++)
+ alloc_start = iter;
+
+ if (slots_found == num_slots) {
+ struct ppc440spe_adma_desc_slot *alloc_tail = NULL;
+ struct ppc440spe_adma_desc_slot *last_used = NULL;
+
+ iter = alloc_start;
+ while (num_slots) {
+ int i;
+ /* pre-ack all but the last descriptor */
+ if (num_slots != slots_per_op)
+ async_tx_ack(&iter->async_tx);
+
+ list_add_tail(&iter->chain_node, &chain);
+ alloc_tail = iter;
+ iter->async_tx.cookie = 0;
+ iter->hw_next = NULL;
+ iter->flags = 0;
+ iter->slot_cnt = num_slots;
+ iter->xor_check_result = NULL;
+ for (i = 0; i < slots_per_op; i++) {
+ iter->slots_per_op = slots_per_op - i;
+ last_used = iter;
+ iter = list_entry(iter->slot_node.next,
+ struct ppc440spe_adma_desc_slot,
+ slot_node);
+ }
+ num_slots -= slots_per_op;
+ }
+ alloc_tail->group_head = alloc_start;
+ alloc_tail->async_tx.cookie = -EBUSY;
+ list_splice(&chain, &alloc_tail->group_list);
+ chan->last_used = last_used;
+ return alloc_tail;
+ }
+ }
+ if (!retry++)
+ goto retry;
+
+ /* try to free some slots if the allocation fails */
+ tasklet_schedule(&chan->irq_tasklet);
+ return NULL;
+}
+
+/**
+ * ppc440spe_adma_alloc_chan_resources - allocate pools for CDB slots
+ */
+static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ struct ppc440spe_adma_desc_slot *slot = NULL;
+ char *hw_desc;
+ int i, db_sz;
+ int init;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+ init = ppc440spe_chan->slots_allocated ? 0 : 1;
+ chan->chan_id = ppc440spe_chan->device->id;
+
+ /* Allocate descriptor slots */
+ i = ppc440spe_chan->slots_allocated;
+ if (ppc440spe_chan->device->id != PPC440SPE_XOR_ID)
+ db_sz = sizeof(struct dma_cdb);
+ else
+ db_sz = sizeof(struct xor_cb);
+
+ for (; i < (ppc440spe_chan->device->pool_size / db_sz); i++) {
+ slot = kzalloc(sizeof(struct ppc440spe_adma_desc_slot),
+ GFP_KERNEL);
+ if (!slot) {
+ printk(KERN_INFO "SPE ADMA Channel only initialized"
+ " %d descriptor slots", i--);
+ break;
+ }
+
+ hw_desc = (char *) ppc440spe_chan->device->dma_desc_pool_virt;
+ slot->hw_desc = (void *) &hw_desc[i * db_sz];
+ dma_async_tx_descriptor_init(&slot->async_tx, chan);
+ slot->async_tx.tx_submit = ppc440spe_adma_tx_submit;
+ INIT_LIST_HEAD(&slot->chain_node);
+ INIT_LIST_HEAD(&slot->slot_node);
+ INIT_LIST_HEAD(&slot->group_list);
+ slot->phys = ppc440spe_chan->device->dma_desc_pool + i * db_sz;
+ slot->idx = i;
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+ ppc440spe_chan->slots_allocated++;
+ list_add_tail(&slot->slot_node, &ppc440spe_chan->all_slots);
+ spin_unlock_bh(&ppc440spe_chan->lock);
+ }
+
+ if (i && !ppc440spe_chan->last_used) {
+ ppc440spe_chan->last_used =
+ list_entry(ppc440spe_chan->all_slots.next,
+ struct ppc440spe_adma_desc_slot,
+ slot_node);
+ }
+
+ dev_dbg(ppc440spe_chan->device->common.dev,
+ "ppc440spe adma%d: allocated %d descriptor slots\n",
+ ppc440spe_chan->device->id, i);
+
+ /* initialize the channel and the chain with a null operation */
+ if (init) {
+ switch (ppc440spe_chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ ppc440spe_chan->hw_chain_inited = 0;
+ /* Use WXOR for self-testing */
+ if (!ppc440spe_r6_tchan)
+ ppc440spe_r6_tchan = ppc440spe_chan;
+ break;
+ case PPC440SPE_XOR_ID:
+ ppc440spe_chan_start_null_xor(ppc440spe_chan);
+ break;
+ default:
+ BUG();
+ }
+ ppc440spe_chan->needs_unmap = 1;
+ }
+
+ return (i > 0) ? i : -ENOMEM;
+}
+
+/**
+ * ppc440spe_rxor_set_region_data -
+ */
+static void ppc440spe_rxor_set_region(struct ppc440spe_adma_desc_slot *desc,
+ u8 xor_arg_no, u32 mask)
+{
+ struct xor_cb *xcb = desc->hw_desc;
+
+ xcb->ops[xor_arg_no].h |= mask;
+}
+
+/**
+ * ppc440spe_rxor_set_src -
+ */
+static void ppc440spe_rxor_set_src(struct ppc440spe_adma_desc_slot *desc,
+ u8 xor_arg_no, dma_addr_t addr)
+{
+ struct xor_cb *xcb = desc->hw_desc;
+
+ xcb->ops[xor_arg_no].h |= DMA_CUED_XOR_BASE;
+ xcb->ops[xor_arg_no].l = addr;
+}
+
+/**
+ * ppc440spe_rxor_set_mult -
+ */
+static void ppc440spe_rxor_set_mult(struct ppc440spe_adma_desc_slot *desc,
+ u8 xor_arg_no, u8 idx, u8 mult)
+{
+ struct xor_cb *xcb = desc->hw_desc;
+
+ xcb->ops[xor_arg_no].h |= mult << (DMA_CUED_MULT1_OFF + idx * 8);
+}
+
+/**
+ * ppc440spe_adma_check_threshold - append CDBs to h/w chain if threshold
+ * has been achieved
+ */
+static void ppc440spe_adma_check_threshold(struct ppc440spe_adma_chan *chan)
+{
+ dev_dbg(chan->device->common.dev, "ppc440spe adma%d: pending: %d\n",
+ chan->device->id, chan->pending);
+
+ if (chan->pending >= PPC440SPE_ADMA_THRESHOLD) {
+ chan->pending = 0;
+ ppc440spe_chan_append(chan);
+ }
+}
+
+/**
+ * ppc440spe_adma_tx_submit - submit new descriptor group to the channel
+ * (it's not necessary that descriptors will be submitted to the h/w
+ * chains too right now)
+ */
+static dma_cookie_t ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct ppc440spe_adma_desc_slot *sw_desc;
+ struct ppc440spe_adma_chan *chan = to_ppc440spe_adma_chan(tx->chan);
+ struct ppc440spe_adma_desc_slot *group_start, *old_chain_tail;
+ int slot_cnt;
+ int slots_per_op;
+ dma_cookie_t cookie;
+
+ sw_desc = tx_to_ppc440spe_adma_slot(tx);
+
+ group_start = sw_desc->group_head;
+ slot_cnt = group_start->slot_cnt;
+ slots_per_op = group_start->slots_per_op;
+
+ spin_lock_bh(&chan->lock);
+ cookie = dma_cookie_assign(tx);
+
+ if (unlikely(list_empty(&chan->chain))) {
+ /* first peer */
+ list_splice_init(&sw_desc->group_list, &chan->chain);
+ chan_first_cdb[chan->device->id] = group_start;
+ } else {
+ /* isn't first peer, bind CDBs to chain */
+ old_chain_tail = list_entry(chan->chain.prev,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ list_splice_init(&sw_desc->group_list,
+ &old_chain_tail->chain_node);
+ /* fix up the hardware chain */
+ ppc440spe_desc_set_link(chan, old_chain_tail, group_start);
+ }
+
+ /* increment the pending count by the number of operations */
+ chan->pending += slot_cnt / slots_per_op;
+ ppc440spe_adma_check_threshold(chan);
+ spin_unlock_bh(&chan->lock);
+
+ dev_dbg(chan->device->common.dev,
+ "ppc440spe adma%d: %s cookie: %d slot: %d tx %p\n",
+ chan->device->id, __func__,
+ sw_desc->async_tx.cookie, sw_desc->idx, sw_desc);
+
+ return cookie;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_interrupt - prepare CDB for a pseudo DMA operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_interrupt(
+ struct dma_chan *chan, unsigned long flags)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+ int slot_cnt, slots_per_op;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+ dev_dbg(ppc440spe_chan->device->common.dev,
+ "ppc440spe adma%d: %s\n", ppc440spe_chan->device->id,
+ __func__);
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+ slot_cnt = slots_per_op = 1;
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+ slots_per_op);
+ if (sw_desc) {
+ group_start = sw_desc->group_head;
+ ppc440spe_desc_init_interrupt(group_start, ppc440spe_chan);
+ group_start->unmap_len = 0;
+ sw_desc->async_tx.flags = flags;
+ }
+ spin_unlock_bh(&ppc440spe_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_memcpy - prepare CDB for a MEMCPY operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memcpy(
+ struct dma_chan *chan, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+ int slot_cnt, slots_per_op;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+ if (unlikely(!len))
+ return NULL;
+
+ BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT);
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+
+ dev_dbg(ppc440spe_chan->device->common.dev,
+ "ppc440spe adma%d: %s len: %u int_en %d\n",
+ ppc440spe_chan->device->id, __func__, len,
+ flags & DMA_PREP_INTERRUPT ? 1 : 0);
+ slot_cnt = slots_per_op = 1;
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+ slots_per_op);
+ if (sw_desc) {
+ group_start = sw_desc->group_head;
+ ppc440spe_desc_init_memcpy(group_start, flags);
+ ppc440spe_adma_set_dest(group_start, dma_dest, 0);
+ ppc440spe_adma_memcpy_xor_set_src(group_start, dma_src, 0);
+ ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len);
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ }
+ spin_unlock_bh(&ppc440spe_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_xor - prepare CDB for a XOR operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor(
+ struct dma_chan *chan, dma_addr_t dma_dest,
+ dma_addr_t *dma_src, u32 src_cnt, size_t len,
+ unsigned long flags)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+ int slot_cnt, slots_per_op;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+ ADMA_LL_DBG(prep_dma_xor_dbg(ppc440spe_chan->device->id,
+ dma_dest, dma_src, src_cnt));
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT);
+
+ dev_dbg(ppc440spe_chan->device->common.dev,
+ "ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n",
+ ppc440spe_chan->device->id, __func__, src_cnt, len,
+ flags & DMA_PREP_INTERRUPT ? 1 : 0);
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+ slot_cnt = ppc440spe_chan_xor_slot_count(len, src_cnt, &slots_per_op);
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+ slots_per_op);
+ if (sw_desc) {
+ group_start = sw_desc->group_head;
+ ppc440spe_desc_init_xor(group_start, src_cnt, flags);
+ ppc440spe_adma_set_dest(group_start, dma_dest, 0);
+ while (src_cnt--)
+ ppc440spe_adma_memcpy_xor_set_src(group_start,
+ dma_src[src_cnt], src_cnt);
+ ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len);
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ }
+ spin_unlock_bh(&ppc440spe_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static inline void
+ppc440spe_desc_set_xor_src_cnt(struct ppc440spe_adma_desc_slot *desc,
+ int src_cnt);
+static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor);
+
+/**
+ * ppc440spe_adma_init_dma2rxor_slot -
+ */
+static void ppc440spe_adma_init_dma2rxor_slot(
+ struct ppc440spe_adma_desc_slot *desc,
+ dma_addr_t *src, int src_cnt)
+{
+ int i;
+
+ /* initialize CDB */
+ for (i = 0; i < src_cnt; i++) {
+ ppc440spe_adma_dma2rxor_prep_src(desc, &desc->rxor_cursor, i,
+ desc->src_cnt, (u32)src[i]);
+ }
+}
+
+/**
+ * ppc440spe_dma01_prep_mult -
+ * for Q operation where destination is also the source
+ */
+static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_mult(
+ struct ppc440spe_adma_chan *ppc440spe_chan,
+ dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
+ const unsigned char *scf, size_t len, unsigned long flags)
+{
+ struct ppc440spe_adma_desc_slot *sw_desc = NULL;
+ unsigned long op = 0;
+ int slot_cnt;
+
+ set_bit(PPC440SPE_DESC_WXOR, &op);
+ slot_cnt = 2;
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+
+ /* use WXOR, each descriptor occupies one slot */
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+ if (sw_desc) {
+ struct ppc440spe_adma_chan *chan;
+ struct ppc440spe_adma_desc_slot *iter;
+ struct dma_cdb *hw_desc;
+
+ chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+ set_bits(op, &sw_desc->flags);
+ sw_desc->src_cnt = src_cnt;
+ sw_desc->dst_cnt = dst_cnt;
+ /* First descriptor, zero data in the destination and copy it
+ * to q page using MULTICAST transfer.
+ */
+ iter = list_first_entry(&sw_desc->group_list,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ /* set 'next' pointer */
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MULTICAST;
+
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, dst[0], 0);
+ ppc440spe_desc_set_dest_addr(iter, chan, 0, dst[1], 1);
+ ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+ src[0]);
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+ iter->unmap_len = len;
+
+ /*
+ * Second descriptor, multiply data from the q page
+ * and store the result in real destination.
+ */
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->hw_next = NULL;
+ if (flags & DMA_PREP_INTERRUPT)
+ set_bit(PPC440SPE_DESC_INT, &iter->flags);
+ else
+ clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ ppc440spe_desc_set_src_addr(iter, chan, 0,
+ DMA_CUED_XOR_HB, dst[1]);
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, dst[0], 0);
+
+ ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+ DMA_CDB_SG_DST1, scf[0]);
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+ iter->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ }
+
+ spin_unlock_bh(&ppc440spe_chan->lock);
+
+ return sw_desc;
+}
+
+/**
+ * ppc440spe_dma01_prep_sum_product -
+ * Dx = A*(P+Pxy) + B*(Q+Qxy) operation where destination is also
+ * the source.
+ */
+static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_sum_product(
+ struct ppc440spe_adma_chan *ppc440spe_chan,
+ dma_addr_t *dst, dma_addr_t *src, int src_cnt,
+ const unsigned char *scf, size_t len, unsigned long flags)
+{
+ struct ppc440spe_adma_desc_slot *sw_desc = NULL;
+ unsigned long op = 0;
+ int slot_cnt;
+
+ set_bit(PPC440SPE_DESC_WXOR, &op);
+ slot_cnt = 3;
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+
+ /* WXOR, each descriptor occupies one slot */
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+ if (sw_desc) {
+ struct ppc440spe_adma_chan *chan;
+ struct ppc440spe_adma_desc_slot *iter;
+ struct dma_cdb *hw_desc;
+
+ chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+ set_bits(op, &sw_desc->flags);
+ sw_desc->src_cnt = src_cnt;
+ sw_desc->dst_cnt = 1;
+ /* 1st descriptor, src[1] data to q page and zero destination */
+ iter = list_first_entry(&sw_desc->group_list,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MULTICAST;
+
+ ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+ *dst, 0);
+ ppc440spe_desc_set_dest_addr(iter, chan, 0,
+ ppc440spe_chan->qdest, 1);
+ ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+ src[1]);
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+ iter->unmap_len = len;
+
+ /* 2nd descriptor, multiply src[1] data and store the
+ * result in destination */
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ /* set 'next' pointer */
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ if (flags & DMA_PREP_INTERRUPT)
+ set_bit(PPC440SPE_DESC_INT, &iter->flags);
+ else
+ clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+ ppc440spe_chan->qdest);
+ ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+ *dst, 0);
+ ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+ DMA_CDB_SG_DST1, scf[1]);
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+ iter->unmap_len = len;
+
+ /*
+ * 3rd descriptor, multiply src[0] data and xor it
+ * with destination
+ */
+ iter = list_first_entry(&iter->chain_node,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->hw_next = NULL;
+ if (flags & DMA_PREP_INTERRUPT)
+ set_bit(PPC440SPE_DESC_INT, &iter->flags);
+ else
+ clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+ src[0]);
+ ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+ *dst, 0);
+ ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+ DMA_CDB_SG_DST1, scf[0]);
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+ iter->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ }
+
+ spin_unlock_bh(&ppc440spe_chan->lock);
+
+ return sw_desc;
+}
+
+static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_pq(
+ struct ppc440spe_adma_chan *ppc440spe_chan,
+ dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
+ const unsigned char *scf, size_t len, unsigned long flags)
+{
+ int slot_cnt;
+ struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter;
+ unsigned long op = 0;
+ unsigned char mult = 1;
+
+ pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
+ __func__, dst_cnt, src_cnt, len);
+ /* select operations WXOR/RXOR depending on the
+ * source addresses of operators and the number
+ * of destinations (RXOR support only Q-parity calculations)
+ */
+ set_bit(PPC440SPE_DESC_WXOR, &op);
+ if (!test_and_set_bit(PPC440SPE_RXOR_RUN, &ppc440spe_rxor_state)) {
+ /* no active RXOR;
+ * do RXOR if:
+ * - there are more than 1 source,
+ * - len is aligned on 512-byte boundary,
+ * - source addresses fit to one of 4 possible regions.
+ */
+ if (src_cnt > 1 &&
+ !(len & MQ0_CF2H_RXOR_BS_MASK) &&
+ (src[0] + len) == src[1]) {
+ /* may do RXOR R1 R2 */
+ set_bit(PPC440SPE_DESC_RXOR, &op);
+ if (src_cnt != 2) {
+ /* may try to enhance region of RXOR */
+ if ((src[1] + len) == src[2]) {
+ /* do RXOR R1 R2 R3 */
+ set_bit(PPC440SPE_DESC_RXOR123,
+ &op);
+ } else if ((src[1] + len * 2) == src[2]) {
+ /* do RXOR R1 R2 R4 */
+ set_bit(PPC440SPE_DESC_RXOR124, &op);
+ } else if ((src[1] + len * 3) == src[2]) {
+ /* do RXOR R1 R2 R5 */
+ set_bit(PPC440SPE_DESC_RXOR125,
+ &op);
+ } else {
+ /* do RXOR R1 R2 */
+ set_bit(PPC440SPE_DESC_RXOR12,
+ &op);
+ }
+ } else {
+ /* do RXOR R1 R2 */
+ set_bit(PPC440SPE_DESC_RXOR12, &op);
+ }
+ }
+
+ if (!test_bit(PPC440SPE_DESC_RXOR, &op)) {
+ /* can not do this operation with RXOR */
+ clear_bit(PPC440SPE_RXOR_RUN,
+ &ppc440spe_rxor_state);
+ } else {
+ /* can do; set block size right now */
+ ppc440spe_desc_set_rxor_block_size(len);
+ }
+ }
+
+ /* Number of necessary slots depends on operation type selected */
+ if (!test_bit(PPC440SPE_DESC_RXOR, &op)) {
+ /* This is a WXOR only chain. Need descriptors for each
+ * source to GF-XOR them with WXOR, and need descriptors
+ * for each destination to zero them with WXOR
+ */
+ slot_cnt = src_cnt;
+
+ if (flags & DMA_PREP_ZERO_P) {
+ slot_cnt++;
+ set_bit(PPC440SPE_ZERO_P, &op);
+ }
+ if (flags & DMA_PREP_ZERO_Q) {
+ slot_cnt++;
+ set_bit(PPC440SPE_ZERO_Q, &op);
+ }
+ } else {
+ /* Need 1/2 descriptor for RXOR operation, and
+ * need (src_cnt - (2 or 3)) for WXOR of sources
+ * remained (if any)
+ */
+ slot_cnt = dst_cnt;
+
+ if (flags & DMA_PREP_ZERO_P)
+ set_bit(PPC440SPE_ZERO_P, &op);
+ if (flags & DMA_PREP_ZERO_Q)
+ set_bit(PPC440SPE_ZERO_Q, &op);
+
+ if (test_bit(PPC440SPE_DESC_RXOR12, &op))
+ slot_cnt += src_cnt - 2;
+ else
+ slot_cnt += src_cnt - 3;
+
+ /* Thus we have either RXOR only chain or
+ * mixed RXOR/WXOR
+ */
+ if (slot_cnt == dst_cnt)
+ /* RXOR only chain */
+ clear_bit(PPC440SPE_DESC_WXOR, &op);
+ }
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+ /* for both RXOR/WXOR each descriptor occupies one slot */
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+ if (sw_desc) {
+ ppc440spe_desc_init_dma01pq(sw_desc, dst_cnt, src_cnt,
+ flags, op);
+
+ /* setup dst/src/mult */
+ pr_debug("%s: set dst descriptor 0, 1: 0x%016llx, 0x%016llx\n",
+ __func__, dst[0], dst[1]);
+ ppc440spe_adma_pq_set_dest(sw_desc, dst, flags);
+ while (src_cnt--) {
+ ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt],
+ src_cnt);
+
+ /* NOTE: "Multi = 0 is equivalent to = 1" as it
+ * stated in 440SPSPe_RAID6_Addendum_UM_1_17.pdf
+ * doesn't work for RXOR with DMA0/1! Instead, multi=0
+ * leads to zeroing source data after RXOR.
+ * So, for P case set-up mult=1 explicitly.
+ */
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+ mult = scf[src_cnt];
+ ppc440spe_adma_pq_set_src_mult(sw_desc,
+ mult, src_cnt, dst_cnt - 1);
+ }
+
+ /* Setup byte count foreach slot just allocated */
+ sw_desc->async_tx.flags = flags;
+ list_for_each_entry(iter, &sw_desc->group_list,
+ chain_node) {
+ ppc440spe_desc_set_byte_count(iter,
+ ppc440spe_chan, len);
+ iter->unmap_len = len;
+ }
+ }
+ spin_unlock_bh(&ppc440spe_chan->lock);
+
+ return sw_desc;
+}
+
+static struct ppc440spe_adma_desc_slot *ppc440spe_dma2_prep_pq(
+ struct ppc440spe_adma_chan *ppc440spe_chan,
+ dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
+ const unsigned char *scf, size_t len, unsigned long flags)
+{
+ int slot_cnt, descs_per_op;
+ struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter;
+ unsigned long op = 0;
+ unsigned char mult = 1;
+
+ BUG_ON(!dst_cnt);
+ /*pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
+ __func__, dst_cnt, src_cnt, len);*/
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+ descs_per_op = ppc440spe_dma2_pq_slot_count(src, src_cnt, len);
+ if (descs_per_op < 0) {
+ spin_unlock_bh(&ppc440spe_chan->lock);
+ return NULL;
+ }
+
+ /* depending on number of sources we have 1 or 2 RXOR chains */
+ slot_cnt = descs_per_op * dst_cnt;
+
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+ if (sw_desc) {
+ op = slot_cnt;
+ sw_desc->async_tx.flags = flags;
+ list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+ ppc440spe_desc_init_dma2pq(iter, dst_cnt, src_cnt,
+ --op ? 0 : flags);
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
+ len);
+ iter->unmap_len = len;
+
+ ppc440spe_init_rxor_cursor(&(iter->rxor_cursor));
+ iter->rxor_cursor.len = len;
+ iter->descs_per_op = descs_per_op;
+ }
+ op = 0;
+ list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+ op++;
+ if (op % descs_per_op == 0)
+ ppc440spe_adma_init_dma2rxor_slot(iter, src,
+ src_cnt);
+ if (likely(!list_is_last(&iter->chain_node,
+ &sw_desc->group_list))) {
+ /* set 'next' pointer */
+ iter->hw_next =
+ list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ ppc440spe_xor_set_link(iter, iter->hw_next);
+ } else {
+ /* this is the last descriptor. */
+ iter->hw_next = NULL;
+ }
+ }
+
+ /* fixup head descriptor */
+ sw_desc->dst_cnt = dst_cnt;
+ if (flags & DMA_PREP_ZERO_P)
+ set_bit(PPC440SPE_ZERO_P, &sw_desc->flags);
+ if (flags & DMA_PREP_ZERO_Q)
+ set_bit(PPC440SPE_ZERO_Q, &sw_desc->flags);
+
+ /* setup dst/src/mult */
+ ppc440spe_adma_pq_set_dest(sw_desc, dst, flags);
+
+ while (src_cnt--) {
+ /* handle descriptors (if dst_cnt == 2) inside
+ * the ppc440spe_adma_pq_set_srcxxx() functions
+ */
+ ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt],
+ src_cnt);
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+ mult = scf[src_cnt];
+ ppc440spe_adma_pq_set_src_mult(sw_desc,
+ mult, src_cnt, dst_cnt - 1);
+ }
+ }
+ spin_unlock_bh(&ppc440spe_chan->lock);
+ ppc440spe_desc_set_rxor_block_size(len);
+ return sw_desc;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_pq - prepare CDB (group) for a GF-XOR operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pq(
+ struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf,
+ size_t len, unsigned long flags)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ struct ppc440spe_adma_desc_slot *sw_desc = NULL;
+ int dst_cnt = 0;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+ ADMA_LL_DBG(prep_dma_pq_dbg(ppc440spe_chan->device->id,
+ dst, src, src_cnt));
+ BUG_ON(!len);
+ BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT);
+ BUG_ON(!src_cnt);
+
+ if (src_cnt == 1 && dst[1] == src[0]) {
+ dma_addr_t dest[2];
+
+ /* dst[1] is real destination (Q) */
+ dest[0] = dst[1];
+ /* this is the page to multicast source data to */
+ dest[1] = ppc440spe_chan->qdest;
+ sw_desc = ppc440spe_dma01_prep_mult(ppc440spe_chan,
+ dest, 2, src, src_cnt, scf, len, flags);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+ }
+
+ if (src_cnt == 2 && dst[1] == src[1]) {
+ sw_desc = ppc440spe_dma01_prep_sum_product(ppc440spe_chan,
+ &dst[1], src, 2, scf, len, flags);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+ }
+
+ if (!(flags & DMA_PREP_PQ_DISABLE_P)) {
+ BUG_ON(!dst[0]);
+ dst_cnt++;
+ flags |= DMA_PREP_ZERO_P;
+ }
+
+ if (!(flags & DMA_PREP_PQ_DISABLE_Q)) {
+ BUG_ON(!dst[1]);
+ dst_cnt++;
+ flags |= DMA_PREP_ZERO_Q;
+ }
+
+ BUG_ON(!dst_cnt);
+
+ dev_dbg(ppc440spe_chan->device->common.dev,
+ "ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n",
+ ppc440spe_chan->device->id, __func__, src_cnt, len,
+ flags & DMA_PREP_INTERRUPT ? 1 : 0);
+
+ switch (ppc440spe_chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ sw_desc = ppc440spe_dma01_prep_pq(ppc440spe_chan,
+ dst, dst_cnt, src, src_cnt, scf,
+ len, flags);
+ break;
+
+ case PPC440SPE_XOR_ID:
+ sw_desc = ppc440spe_dma2_prep_pq(ppc440spe_chan,
+ dst, dst_cnt, src, src_cnt, scf,
+ len, flags);
+ break;
+ }
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_pqzero_sum - prepare CDB group for
+ * a PQ_ZERO_SUM operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pqzero_sum(
+ struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+ unsigned int src_cnt, const unsigned char *scf, size_t len,
+ enum sum_check_flags *pqres, unsigned long flags)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ struct ppc440spe_adma_desc_slot *sw_desc, *iter;
+ dma_addr_t pdest, qdest;
+ int slot_cnt, slots_per_op, idst, dst_cnt;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+ if (flags & DMA_PREP_PQ_DISABLE_P)
+ pdest = 0;
+ else
+ pdest = pq[0];
+
+ if (flags & DMA_PREP_PQ_DISABLE_Q)
+ qdest = 0;
+ else
+ qdest = pq[1];
+
+ ADMA_LL_DBG(prep_dma_pqzero_sum_dbg(ppc440spe_chan->device->id,
+ src, src_cnt, scf));
+
+ /* Always use WXOR for P/Q calculations (two destinations).
+ * Need 1 or 2 extra slots to verify results are zero.
+ */
+ idst = dst_cnt = (pdest && qdest) ? 2 : 1;
+
+ /* One additional slot per destination to clone P/Q
+ * before calculation (we have to preserve destinations).
+ */
+ slot_cnt = src_cnt + dst_cnt * 2;
+ slots_per_op = 1;
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+ sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+ slots_per_op);
+ if (sw_desc) {
+ ppc440spe_desc_init_dma01pqzero_sum(sw_desc, dst_cnt, src_cnt);
+
+ /* Setup byte count for each slot just allocated */
+ sw_desc->async_tx.flags = flags;
+ list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
+ len);
+ iter->unmap_len = len;
+ }
+
+ if (pdest) {
+ struct dma_cdb *hw_desc;
+ struct ppc440spe_adma_chan *chan;
+
+ iter = sw_desc->group_head;
+ chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ iter->src_cnt = 0;
+ iter->dst_cnt = 0;
+ ppc440spe_desc_set_dest_addr(iter, chan, 0,
+ ppc440spe_chan->pdest, 0);
+ ppc440spe_desc_set_src_addr(iter, chan, 0, 0, pdest);
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
+ len);
+ iter->unmap_len = 0;
+ /* override pdest to preserve original P */
+ pdest = ppc440spe_chan->pdest;
+ }
+ if (qdest) {
+ struct dma_cdb *hw_desc;
+ struct ppc440spe_adma_chan *chan;
+
+ iter = list_first_entry(&sw_desc->group_list,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
+
+ if (pdest) {
+ iter = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ }
+
+ memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+ iter->hw_next = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ hw_desc = iter->hw_desc;
+ hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+ iter->src_cnt = 0;
+ iter->dst_cnt = 0;
+ ppc440spe_desc_set_dest_addr(iter, chan, 0,
+ ppc440spe_chan->qdest, 0);
+ ppc440spe_desc_set_src_addr(iter, chan, 0, 0, qdest);
+ ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
+ len);
+ iter->unmap_len = 0;
+ /* override qdest to preserve original Q */
+ qdest = ppc440spe_chan->qdest;
+ }
+
+ /* Setup destinations for P/Q ops */
+ ppc440spe_adma_pqzero_sum_set_dest(sw_desc, pdest, qdest);
+
+ /* Setup zero QWORDs into DCHECK CDBs */
+ idst = dst_cnt;
+ list_for_each_entry_reverse(iter, &sw_desc->group_list,
+ chain_node) {
+ /*
+ * The last CDB corresponds to Q-parity check,
+ * the one before last CDB corresponds
+ * P-parity check
+ */
+ if (idst == DMA_DEST_MAX_NUM) {
+ if (idst == dst_cnt) {
+ set_bit(PPC440SPE_DESC_QCHECK,
+ &iter->flags);
+ } else {
+ set_bit(PPC440SPE_DESC_PCHECK,
+ &iter->flags);
+ }
+ } else {
+ if (qdest) {
+ set_bit(PPC440SPE_DESC_QCHECK,
+ &iter->flags);
+ } else {
+ set_bit(PPC440SPE_DESC_PCHECK,
+ &iter->flags);
+ }
+ }
+ iter->xor_check_result = pqres;
+
+ /*
+ * set it to zero, if check fail then result will
+ * be updated
+ */
+ *iter->xor_check_result = 0;
+ ppc440spe_desc_set_dcheck(iter, ppc440spe_chan,
+ ppc440spe_qword);
+
+ if (!(--dst_cnt))
+ break;
+ }
+
+ /* Setup sources and mults for P/Q ops */
+ list_for_each_entry_continue_reverse(iter, &sw_desc->group_list,
+ chain_node) {
+ struct ppc440spe_adma_chan *chan;
+ u32 mult_dst;
+
+ chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
+ ppc440spe_desc_set_src_addr(iter, chan, 0,
+ DMA_CUED_XOR_HB,
+ src[src_cnt - 1]);
+ if (qdest) {
+ mult_dst = (dst_cnt - 1) ? DMA_CDB_SG_DST2 :
+ DMA_CDB_SG_DST1;
+ ppc440spe_desc_set_src_mult(iter, chan,
+ DMA_CUED_MULT1_OFF,
+ mult_dst,
+ scf[src_cnt - 1]);
+ }
+ if (!(--src_cnt))
+ break;
+ }
+ }
+ spin_unlock_bh(&ppc440spe_chan->lock);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_xor_zero_sum - prepare CDB group for
+ * XOR ZERO_SUM operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor_zero_sum(
+ struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
+ size_t len, enum sum_check_flags *result, unsigned long flags)
+{
+ struct dma_async_tx_descriptor *tx;
+ dma_addr_t pq[2];
+
+ /* validate P, disable Q */
+ pq[0] = src[0];
+ pq[1] = 0;
+ flags |= DMA_PREP_PQ_DISABLE_Q;
+
+ tx = ppc440spe_adma_prep_dma_pqzero_sum(chan, pq, &src[1],
+ src_cnt - 1, 0, len,
+ result, flags);
+ return tx;
+}
+
+/**
+ * ppc440spe_adma_set_dest - set destination address into descriptor
+ */
+static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *sw_desc,
+ dma_addr_t addr, int index)
+{
+ struct ppc440spe_adma_chan *chan;
+
+ BUG_ON(index >= sw_desc->dst_cnt);
+
+ chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ /* to do: support transfers lengths >
+ * PPC440SPE_ADMA_DMA/XOR_MAX_BYTE_COUNT
+ */
+ ppc440spe_desc_set_dest_addr(sw_desc->group_head,
+ chan, 0, addr, index);
+ break;
+ case PPC440SPE_XOR_ID:
+ sw_desc = ppc440spe_get_group_entry(sw_desc, index);
+ ppc440spe_desc_set_dest_addr(sw_desc,
+ chan, 0, addr, index);
+ break;
+ }
+}
+
+static void ppc440spe_adma_pq_zero_op(struct ppc440spe_adma_desc_slot *iter,
+ struct ppc440spe_adma_chan *chan, dma_addr_t addr)
+{
+ /* To clear destinations update the descriptor
+ * (P or Q depending on index) as follows:
+ * addr is destination (0 corresponds to SG2):
+ */
+ ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, addr, 0);
+
+ /* ... and the addr is source: */
+ ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, addr);
+
+ /* addr is always SG2 then the mult is always DST1 */
+ ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+ DMA_CDB_SG_DST1, 1);
+}
+
+/**
+ * ppc440spe_adma_pq_set_dest - set destination address into descriptor
+ * for the PQXOR operation
+ */
+static void ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *sw_desc,
+ dma_addr_t *addrs, unsigned long flags)
+{
+ struct ppc440spe_adma_desc_slot *iter;
+ struct ppc440spe_adma_chan *chan;
+ dma_addr_t paddr, qaddr;
+ dma_addr_t addr = 0, ppath, qpath;
+ int index = 0, i;
+
+ chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+ if (flags & DMA_PREP_PQ_DISABLE_P)
+ paddr = 0;
+ else
+ paddr = addrs[0];
+
+ if (flags & DMA_PREP_PQ_DISABLE_Q)
+ qaddr = 0;
+ else
+ qaddr = addrs[1];
+
+ if (!paddr || !qaddr)
+ addr = paddr ? paddr : qaddr;
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ /* walk through the WXOR source list and set P/Q-destinations
+ * for each slot:
+ */
+ if (!test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
+ /* This is WXOR-only chain; may have 1/2 zero descs */
+ if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
+ index++;
+ if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
+ index++;
+
+ iter = ppc440spe_get_group_entry(sw_desc, index);
+ if (addr) {
+ /* one destination */
+ list_for_each_entry_from(iter,
+ &sw_desc->group_list, chain_node)
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, addr, 0);
+ } else {
+ /* two destinations */
+ list_for_each_entry_from(iter,
+ &sw_desc->group_list, chain_node) {
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, paddr, 0);
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, qaddr, 1);
+ }
+ }
+
+ if (index) {
+ /* To clear destinations update the descriptor
+ * (1st,2nd, or both depending on flags)
+ */
+ index = 0;
+ if (test_bit(PPC440SPE_ZERO_P,
+ &sw_desc->flags)) {
+ iter = ppc440spe_get_group_entry(
+ sw_desc, index++);
+ ppc440spe_adma_pq_zero_op(iter, chan,
+ paddr);
+ }
+
+ if (test_bit(PPC440SPE_ZERO_Q,
+ &sw_desc->flags)) {
+ iter = ppc440spe_get_group_entry(
+ sw_desc, index++);
+ ppc440spe_adma_pq_zero_op(iter, chan,
+ qaddr);
+ }
+
+ return;
+ }
+ } else {
+ /* This is RXOR-only or RXOR/WXOR mixed chain */
+
+ /* If we want to include destination into calculations,
+ * then make dest addresses cued with mult=1 (XOR).
+ */
+ ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ?
+ DMA_CUED_XOR_HB :
+ DMA_CUED_XOR_BASE |
+ (1 << DMA_CUED_MULT1_OFF);
+ qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ?
+ DMA_CUED_XOR_HB :
+ DMA_CUED_XOR_BASE |
+ (1 << DMA_CUED_MULT1_OFF);
+
+ /* Setup destination(s) in RXOR slot(s) */
+ iter = ppc440spe_get_group_entry(sw_desc, index++);
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ paddr ? ppath : qpath,
+ paddr ? paddr : qaddr, 0);
+ if (!addr) {
+ /* two destinations */
+ iter = ppc440spe_get_group_entry(sw_desc,
+ index++);
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ qpath, qaddr, 0);
+ }
+
+ if (test_bit(PPC440SPE_DESC_WXOR, &sw_desc->flags)) {
+ /* Setup destination(s) in remaining WXOR
+ * slots
+ */
+ iter = ppc440spe_get_group_entry(sw_desc,
+ index);
+ if (addr) {
+ /* one destination */
+ list_for_each_entry_from(iter,
+ &sw_desc->group_list,
+ chain_node)
+ ppc440spe_desc_set_dest_addr(
+ iter, chan,
+ DMA_CUED_XOR_BASE,
+ addr, 0);
+
+ } else {
+ /* two destinations */
+ list_for_each_entry_from(iter,
+ &sw_desc->group_list,
+ chain_node) {
+ ppc440spe_desc_set_dest_addr(
+ iter, chan,
+ DMA_CUED_XOR_BASE,
+ paddr, 0);
+ ppc440spe_desc_set_dest_addr(
+ iter, chan,
+ DMA_CUED_XOR_BASE,
+ qaddr, 1);
+ }
+ }
+ }
+
+ }
+ break;
+
+ case PPC440SPE_XOR_ID:
+ /* DMA2 descriptors have only 1 destination, so there are
+ * two chains - one for each dest.
+ * If we want to include destination into calculations,
+ * then make dest addresses cued with mult=1 (XOR).
+ */
+ ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ?
+ DMA_CUED_XOR_HB :
+ DMA_CUED_XOR_BASE |
+ (1 << DMA_CUED_MULT1_OFF);
+
+ qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ?
+ DMA_CUED_XOR_HB :
+ DMA_CUED_XOR_BASE |
+ (1 << DMA_CUED_MULT1_OFF);
+
+ iter = ppc440spe_get_group_entry(sw_desc, 0);
+ for (i = 0; i < sw_desc->descs_per_op; i++) {
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ paddr ? ppath : qpath,
+ paddr ? paddr : qaddr, 0);
+ iter = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ }
+
+ if (!addr) {
+ /* Two destinations; setup Q here */
+ iter = ppc440spe_get_group_entry(sw_desc,
+ sw_desc->descs_per_op);
+ for (i = 0; i < sw_desc->descs_per_op; i++) {
+ ppc440spe_desc_set_dest_addr(iter,
+ chan, qpath, qaddr, 0);
+ iter = list_entry(iter->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ }
+ }
+
+ break;
+ }
+}
+
+/**
+ * ppc440spe_adma_pq_zero_sum_set_dest - set destination address into descriptor
+ * for the PQ_ZERO_SUM operation
+ */
+static void ppc440spe_adma_pqzero_sum_set_dest(
+ struct ppc440spe_adma_desc_slot *sw_desc,
+ dma_addr_t paddr, dma_addr_t qaddr)
+{
+ struct ppc440spe_adma_desc_slot *iter, *end;
+ struct ppc440spe_adma_chan *chan;
+ dma_addr_t addr = 0;
+ int idx;
+
+ chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+ /* walk through the WXOR source list and set P/Q-destinations
+ * for each slot
+ */
+ idx = (paddr && qaddr) ? 2 : 1;
+ /* set end */
+ list_for_each_entry_reverse(end, &sw_desc->group_list,
+ chain_node) {
+ if (!(--idx))
+ break;
+ }
+ /* set start */
+ idx = (paddr && qaddr) ? 2 : 1;
+ iter = ppc440spe_get_group_entry(sw_desc, idx);
+
+ if (paddr && qaddr) {
+ /* two destinations */
+ list_for_each_entry_from(iter, &sw_desc->group_list,
+ chain_node) {
+ if (unlikely(iter == end))
+ break;
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, paddr, 0);
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, qaddr, 1);
+ }
+ } else {
+ /* one destination */
+ addr = paddr ? paddr : qaddr;
+ list_for_each_entry_from(iter, &sw_desc->group_list,
+ chain_node) {
+ if (unlikely(iter == end))
+ break;
+ ppc440spe_desc_set_dest_addr(iter, chan,
+ DMA_CUED_XOR_BASE, addr, 0);
+ }
+ }
+
+ /* The remaining descriptors are DATACHECK. These have no need in
+ * destination. Actually, these destinations are used there
+ * as sources for check operation. So, set addr as source.
+ */
+ ppc440spe_desc_set_src_addr(end, chan, 0, 0, addr ? addr : paddr);
+
+ if (!addr) {
+ end = list_entry(end->chain_node.next,
+ struct ppc440spe_adma_desc_slot, chain_node);
+ ppc440spe_desc_set_src_addr(end, chan, 0, 0, qaddr);
+ }
+}
+
+/**
+ * ppc440spe_desc_set_xor_src_cnt - set source count into descriptor
+ */
+static inline void ppc440spe_desc_set_xor_src_cnt(
+ struct ppc440spe_adma_desc_slot *desc,
+ int src_cnt)
+{
+ struct xor_cb *hw_desc = desc->hw_desc;
+
+ hw_desc->cbc &= ~XOR_CDCR_OAC_MSK;
+ hw_desc->cbc |= src_cnt;
+}
+
+/**
+ * ppc440spe_adma_pq_set_src - set source address into descriptor
+ */
+static void ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *sw_desc,
+ dma_addr_t addr, int index)
+{
+ struct ppc440spe_adma_chan *chan;
+ dma_addr_t haddr = 0;
+ struct ppc440spe_adma_desc_slot *iter = NULL;
+
+ chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ /* DMA0,1 may do: WXOR, RXOR, RXOR+WXORs chain
+ */
+ if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
+ /* RXOR-only or RXOR/WXOR operation */
+ int iskip = test_bit(PPC440SPE_DESC_RXOR12,
+ &sw_desc->flags) ? 2 : 3;
+
+ if (index == 0) {
+ /* 1st slot (RXOR) */
+ /* setup sources region (R1-2-3, R1-2-4,
+ * or R1-2-5)
+ */
+ if (test_bit(PPC440SPE_DESC_RXOR12,
+ &sw_desc->flags))
+ haddr = DMA_RXOR12 <<
+ DMA_CUED_REGION_OFF;
+ else if (test_bit(PPC440SPE_DESC_RXOR123,
+ &sw_desc->flags))
+ haddr = DMA_RXOR123 <<
+ DMA_CUED_REGION_OFF;
+ else if (test_bit(PPC440SPE_DESC_RXOR124,
+ &sw_desc->flags))
+ haddr = DMA_RXOR124 <<
+ DMA_CUED_REGION_OFF;
+ else if (test_bit(PPC440SPE_DESC_RXOR125,
+ &sw_desc->flags))
+ haddr = DMA_RXOR125 <<
+ DMA_CUED_REGION_OFF;
+ else
+ BUG();
+ haddr |= DMA_CUED_XOR_BASE;
+ iter = ppc440spe_get_group_entry(sw_desc, 0);
+ } else if (index < iskip) {
+ /* 1st slot (RXOR)
+ * shall actually set source address only once
+ * instead of first <iskip>
+ */
+ iter = NULL;
+ } else {
+ /* 2nd/3d and next slots (WXOR);
+ * skip first slot with RXOR
+ */
+ haddr = DMA_CUED_XOR_HB;
+ iter = ppc440spe_get_group_entry(sw_desc,
+ index - iskip + sw_desc->dst_cnt);
+ }
+ } else {
+ int znum = 0;
+
+ /* WXOR-only operation; skip first slots with
+ * zeroing destinations
+ */
+ if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
+ znum++;
+ if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
+ znum++;
+
+ haddr = DMA_CUED_XOR_HB;
+ iter = ppc440spe_get_group_entry(sw_desc,
+ index + znum);
+ }
+
+ if (likely(iter)) {
+ ppc440spe_desc_set_src_addr(iter, chan, 0, haddr, addr);
+
+ if (!index &&
+ test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags) &&
+ sw_desc->dst_cnt == 2) {
+ /* if we have two destinations for RXOR, then
+ * setup source in the second descr too
+ */
+ iter = ppc440spe_get_group_entry(sw_desc, 1);
+ ppc440spe_desc_set_src_addr(iter, chan, 0,
+ haddr, addr);
+ }
+ }
+ break;
+
+ case PPC440SPE_XOR_ID:
+ /* DMA2 may do Biskup */
+ iter = sw_desc->group_head;
+ if (iter->dst_cnt == 2) {
+ /* both P & Q calculations required; set P src here */
+ ppc440spe_adma_dma2rxor_set_src(iter, index, addr);
+
+ /* this is for Q */
+ iter = ppc440spe_get_group_entry(sw_desc,
+ sw_desc->descs_per_op);
+ }
+ ppc440spe_adma_dma2rxor_set_src(iter, index, addr);
+ break;
+ }
+}
+
+/**
+ * ppc440spe_adma_memcpy_xor_set_src - set source address into descriptor
+ */
+static void ppc440spe_adma_memcpy_xor_set_src(
+ struct ppc440spe_adma_desc_slot *sw_desc,
+ dma_addr_t addr, int index)
+{
+ struct ppc440spe_adma_chan *chan;
+
+ chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+ sw_desc = sw_desc->group_head;
+
+ if (likely(sw_desc))
+ ppc440spe_desc_set_src_addr(sw_desc, chan, index, 0, addr);
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_inc_addr -
+ */
+static void ppc440spe_adma_dma2rxor_inc_addr(
+ struct ppc440spe_adma_desc_slot *desc,
+ struct ppc440spe_rxor *cursor, int index, int src_cnt)
+{
+ cursor->addr_count++;
+ if (index == src_cnt - 1) {
+ ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count);
+ } else if (cursor->addr_count == XOR_MAX_OPS) {
+ ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count);
+ cursor->addr_count = 0;
+ cursor->desc_count++;
+ }
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_prep_src - setup RXOR types in DMA2 CDB
+ */
+static int ppc440spe_adma_dma2rxor_prep_src(
+ struct ppc440spe_adma_desc_slot *hdesc,
+ struct ppc440spe_rxor *cursor, int index,
+ int src_cnt, u32 addr)
+{
+ u32 sign;
+ struct ppc440spe_adma_desc_slot *desc = hdesc;
+ int i;
+
+ for (i = 0; i < cursor->desc_count; i++) {
+ desc = list_entry(hdesc->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ }
+
+ switch (cursor->state) {
+ case 0:
+ if (addr == cursor->addrl + cursor->len) {
+ /* direct RXOR */
+ cursor->state = 1;
+ cursor->xor_count++;
+ if (index == src_cnt-1) {
+ ppc440spe_rxor_set_region(desc,
+ cursor->addr_count,
+ DMA_RXOR12 << DMA_CUED_REGION_OFF);
+ ppc440spe_adma_dma2rxor_inc_addr(
+ desc, cursor, index, src_cnt);
+ }
+ } else if (cursor->addrl == addr + cursor->len) {
+ /* reverse RXOR */
+ cursor->state = 1;
+ cursor->xor_count++;
+ set_bit(cursor->addr_count, &desc->reverse_flags[0]);
+ if (index == src_cnt-1) {
+ ppc440spe_rxor_set_region(desc,
+ cursor->addr_count,
+ DMA_RXOR12 << DMA_CUED_REGION_OFF);
+ ppc440spe_adma_dma2rxor_inc_addr(
+ desc, cursor, index, src_cnt);
+ }
+ } else {
+ printk(KERN_ERR "Cannot build "
+ "DMA2 RXOR command block.\n");
+ BUG();
+ }
+ break;
+ case 1:
+ sign = test_bit(cursor->addr_count,
+ desc->reverse_flags)
+ ? -1 : 1;
+ if (index == src_cnt-2 || (sign == -1
+ && addr != cursor->addrl - 2*cursor->len)) {
+ cursor->state = 0;
+ cursor->xor_count = 1;
+ cursor->addrl = addr;
+ ppc440spe_rxor_set_region(desc,
+ cursor->addr_count,
+ DMA_RXOR12 << DMA_CUED_REGION_OFF);
+ ppc440spe_adma_dma2rxor_inc_addr(
+ desc, cursor, index, src_cnt);
+ } else if (addr == cursor->addrl + 2*sign*cursor->len) {
+ cursor->state = 2;
+ cursor->xor_count = 0;
+ ppc440spe_rxor_set_region(desc,
+ cursor->addr_count,
+ DMA_RXOR123 << DMA_CUED_REGION_OFF);
+ if (index == src_cnt-1) {
+ ppc440spe_adma_dma2rxor_inc_addr(
+ desc, cursor, index, src_cnt);
+ }
+ } else if (addr == cursor->addrl + 3*cursor->len) {
+ cursor->state = 2;
+ cursor->xor_count = 0;
+ ppc440spe_rxor_set_region(desc,
+ cursor->addr_count,
+ DMA_RXOR124 << DMA_CUED_REGION_OFF);
+ if (index == src_cnt-1) {
+ ppc440spe_adma_dma2rxor_inc_addr(
+ desc, cursor, index, src_cnt);
+ }
+ } else if (addr == cursor->addrl + 4*cursor->len) {
+ cursor->state = 2;
+ cursor->xor_count = 0;
+ ppc440spe_rxor_set_region(desc,
+ cursor->addr_count,
+ DMA_RXOR125 << DMA_CUED_REGION_OFF);
+ if (index == src_cnt-1) {
+ ppc440spe_adma_dma2rxor_inc_addr(
+ desc, cursor, index, src_cnt);
+ }
+ } else {
+ cursor->state = 0;
+ cursor->xor_count = 1;
+ cursor->addrl = addr;
+ ppc440spe_rxor_set_region(desc,
+ cursor->addr_count,
+ DMA_RXOR12 << DMA_CUED_REGION_OFF);
+ ppc440spe_adma_dma2rxor_inc_addr(
+ desc, cursor, index, src_cnt);
+ }
+ break;
+ case 2:
+ cursor->state = 0;
+ cursor->addrl = addr;
+ cursor->xor_count++;
+ if (index) {
+ ppc440spe_adma_dma2rxor_inc_addr(
+ desc, cursor, index, src_cnt);
+ }
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_set_src - set RXOR source address; it's assumed that
+ * ppc440spe_adma_dma2rxor_prep_src() has already done prior this call
+ */
+static void ppc440spe_adma_dma2rxor_set_src(
+ struct ppc440spe_adma_desc_slot *desc,
+ int index, dma_addr_t addr)
+{
+ struct xor_cb *xcb = desc->hw_desc;
+ int k = 0, op = 0, lop = 0;
+
+ /* get the RXOR operand which corresponds to index addr */
+ while (op <= index) {
+ lop = op;
+ if (k == XOR_MAX_OPS) {
+ k = 0;
+ desc = list_entry(desc->chain_node.next,
+ struct ppc440spe_adma_desc_slot, chain_node);
+ xcb = desc->hw_desc;
+
+ }
+ if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) ==
+ (DMA_RXOR12 << DMA_CUED_REGION_OFF))
+ op += 2;
+ else
+ op += 3;
+ }
+
+ BUG_ON(k < 1);
+
+ if (test_bit(k-1, desc->reverse_flags)) {
+ /* reverse operand order; put last op in RXOR group */
+ if (index == op - 1)
+ ppc440spe_rxor_set_src(desc, k - 1, addr);
+ } else {
+ /* direct operand order; put first op in RXOR group */
+ if (index == lop)
+ ppc440spe_rxor_set_src(desc, k - 1, addr);
+ }
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_set_mult - set RXOR multipliers; it's assumed that
+ * ppc440spe_adma_dma2rxor_prep_src() has already done prior this call
+ */
+static void ppc440spe_adma_dma2rxor_set_mult(
+ struct ppc440spe_adma_desc_slot *desc,
+ int index, u8 mult)
+{
+ struct xor_cb *xcb = desc->hw_desc;
+ int k = 0, op = 0, lop = 0;
+
+ /* get the RXOR operand which corresponds to index mult */
+ while (op <= index) {
+ lop = op;
+ if (k == XOR_MAX_OPS) {
+ k = 0;
+ desc = list_entry(desc->chain_node.next,
+ struct ppc440spe_adma_desc_slot,
+ chain_node);
+ xcb = desc->hw_desc;
+
+ }
+ if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) ==
+ (DMA_RXOR12 << DMA_CUED_REGION_OFF))
+ op += 2;
+ else
+ op += 3;
+ }
+
+ BUG_ON(k < 1);
+ if (test_bit(k-1, desc->reverse_flags)) {
+ /* reverse order */
+ ppc440spe_rxor_set_mult(desc, k - 1, op - index - 1, mult);
+ } else {
+ /* direct order */
+ ppc440spe_rxor_set_mult(desc, k - 1, index - lop, mult);
+ }
+}
+
+/**
+ * ppc440spe_init_rxor_cursor -
+ */
+static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor)
+{
+ memset(cursor, 0, sizeof(struct ppc440spe_rxor));
+ cursor->state = 2;
+}
+
+/**
+ * ppc440spe_adma_pq_set_src_mult - set multiplication coefficient into
+ * descriptor for the PQXOR operation
+ */
+static void ppc440spe_adma_pq_set_src_mult(
+ struct ppc440spe_adma_desc_slot *sw_desc,
+ unsigned char mult, int index, int dst_pos)
+{
+ struct ppc440spe_adma_chan *chan;
+ u32 mult_idx, mult_dst;
+ struct ppc440spe_adma_desc_slot *iter = NULL, *iter1 = NULL;
+
+ chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+ switch (chan->device->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
+ int region = test_bit(PPC440SPE_DESC_RXOR12,
+ &sw_desc->flags) ? 2 : 3;
+
+ if (index < region) {
+ /* RXOR multipliers */
+ iter = ppc440spe_get_group_entry(sw_desc,
+ sw_desc->dst_cnt - 1);
+ if (sw_desc->dst_cnt == 2)
+ iter1 = ppc440spe_get_group_entry(
+ sw_desc, 0);
+
+ mult_idx = DMA_CUED_MULT1_OFF + (index << 3);
+ mult_dst = DMA_CDB_SG_SRC;
+ } else {
+ /* WXOR multiplier */
+ iter = ppc440spe_get_group_entry(sw_desc,
+ index - region +
+ sw_desc->dst_cnt);
+ mult_idx = DMA_CUED_MULT1_OFF;
+ mult_dst = dst_pos ? DMA_CDB_SG_DST2 :
+ DMA_CDB_SG_DST1;
+ }
+ } else {
+ int znum = 0;
+
+ /* WXOR-only;
+ * skip first slots with destinations (if ZERO_DST has
+ * place)
+ */
+ if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
+ znum++;
+ if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
+ znum++;
+
+ iter = ppc440spe_get_group_entry(sw_desc, index + znum);
+ mult_idx = DMA_CUED_MULT1_OFF;
+ mult_dst = dst_pos ? DMA_CDB_SG_DST2 : DMA_CDB_SG_DST1;
+ }
+
+ if (likely(iter)) {
+ ppc440spe_desc_set_src_mult(iter, chan,
+ mult_idx, mult_dst, mult);
+
+ if (unlikely(iter1)) {
+ /* if we have two destinations for RXOR, then
+ * we've just set Q mult. Set-up P now.
+ */
+ ppc440spe_desc_set_src_mult(iter1, chan,
+ mult_idx, mult_dst, 1);
+ }
+
+ }
+ break;
+
+ case PPC440SPE_XOR_ID:
+ iter = sw_desc->group_head;
+ if (sw_desc->dst_cnt == 2) {
+ /* both P & Q calculations required; set P mult here */
+ ppc440spe_adma_dma2rxor_set_mult(iter, index, 1);
+
+ /* and then set Q mult */
+ iter = ppc440spe_get_group_entry(sw_desc,
+ sw_desc->descs_per_op);
+ }
+ ppc440spe_adma_dma2rxor_set_mult(iter, index, mult);
+ break;
+ }
+}
+
+/**
+ * ppc440spe_adma_free_chan_resources - free the resources allocated
+ */
+static void ppc440spe_adma_free_chan_resources(struct dma_chan *chan)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ struct ppc440spe_adma_desc_slot *iter, *_iter;
+ int in_use_descs = 0;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+ ppc440spe_adma_slot_cleanup(ppc440spe_chan);
+
+ spin_lock_bh(&ppc440spe_chan->lock);
+ list_for_each_entry_safe(iter, _iter, &ppc440spe_chan->chain,
+ chain_node) {
+ in_use_descs++;
+ list_del(&iter->chain_node);
+ }
+ list_for_each_entry_safe_reverse(iter, _iter,
+ &ppc440spe_chan->all_slots, slot_node) {
+ list_del(&iter->slot_node);
+ kfree(iter);
+ ppc440spe_chan->slots_allocated--;
+ }
+ ppc440spe_chan->last_used = NULL;
+
+ dev_dbg(ppc440spe_chan->device->common.dev,
+ "ppc440spe adma%d %s slots_allocated %d\n",
+ ppc440spe_chan->device->id,
+ __func__, ppc440spe_chan->slots_allocated);
+ spin_unlock_bh(&ppc440spe_chan->lock);
+
+ /* one is ok since we left it on there on purpose */
+ if (in_use_descs > 1)
+ printk(KERN_ERR "SPE: Freeing %d in use descriptors!\n",
+ in_use_descs - 1);
+}
+
+/**
+ * ppc440spe_adma_tx_status - poll the status of an ADMA transaction
+ * @chan: ADMA channel handle
+ * @cookie: ADMA transaction identifier
+ * @txstate: a holder for the current state of the channel
+ */
+static enum dma_status ppc440spe_adma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+ enum dma_status ret;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ ppc440spe_adma_slot_cleanup(ppc440spe_chan);
+
+ return dma_cookie_status(chan, cookie, txstate);
+}
+
+/**
+ * ppc440spe_adma_eot_handler - end of transfer interrupt handler
+ */
+static irqreturn_t ppc440spe_adma_eot_handler(int irq, void *data)
+{
+ struct ppc440spe_adma_chan *chan = data;
+
+ dev_dbg(chan->device->common.dev,
+ "ppc440spe adma%d: %s\n", chan->device->id, __func__);
+
+ tasklet_schedule(&chan->irq_tasklet);
+ ppc440spe_adma_device_clear_eot_status(chan);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * ppc440spe_adma_err_handler - DMA error interrupt handler;
+ * do the same things as a eot handler
+ */
+static irqreturn_t ppc440spe_adma_err_handler(int irq, void *data)
+{
+ struct ppc440spe_adma_chan *chan = data;
+
+ dev_dbg(chan->device->common.dev,
+ "ppc440spe adma%d: %s\n", chan->device->id, __func__);
+
+ tasklet_schedule(&chan->irq_tasklet);
+ ppc440spe_adma_device_clear_eot_status(chan);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * ppc440spe_test_callback - called when test operation has been done
+ */
+static void ppc440spe_test_callback(void *unused)
+{
+ complete(&ppc440spe_r6_test_comp);
+}
+
+/**
+ * ppc440spe_adma_issue_pending - flush all pending descriptors to h/w
+ */
+static void ppc440spe_adma_issue_pending(struct dma_chan *chan)
+{
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+ dev_dbg(ppc440spe_chan->device->common.dev,
+ "ppc440spe adma%d: %s %d \n", ppc440spe_chan->device->id,
+ __func__, ppc440spe_chan->pending);
+
+ if (ppc440spe_chan->pending) {
+ ppc440spe_chan->pending = 0;
+ ppc440spe_chan_append(ppc440spe_chan);
+ }
+}
+
+/**
+ * ppc440spe_chan_start_null_xor - initiate the first XOR operation (DMA engines
+ * use FIFOs (as opposite to chains used in XOR) so this is a XOR
+ * specific operation)
+ */
+static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan)
+{
+ struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+ dma_cookie_t cookie;
+ int slot_cnt, slots_per_op;
+
+ dev_dbg(chan->device->common.dev,
+ "ppc440spe adma%d: %s\n", chan->device->id, __func__);
+
+ spin_lock_bh(&chan->lock);
+ slot_cnt = ppc440spe_chan_xor_slot_count(0, 2, &slots_per_op);
+ sw_desc = ppc440spe_adma_alloc_slots(chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ group_start = sw_desc->group_head;
+ list_splice_init(&sw_desc->group_list, &chan->chain);
+ async_tx_ack(&sw_desc->async_tx);
+ ppc440spe_desc_init_null_xor(group_start);
+
+ cookie = dma_cookie_assign(&sw_desc->async_tx);
+
+ /* initialize the completed cookie to be less than
+ * the most recently used cookie
+ */
+ chan->common.completed_cookie = cookie - 1;
+
+ /* channel should not be busy */
+ BUG_ON(ppc440spe_chan_is_busy(chan));
+
+ /* set the descriptor address */
+ ppc440spe_chan_set_first_xor_descriptor(chan, sw_desc);
+
+ /* run the descriptor */
+ ppc440spe_chan_run(chan);
+ } else
+ printk(KERN_ERR "ppc440spe adma%d"
+ " failed to allocate null descriptor\n",
+ chan->device->id);
+ spin_unlock_bh(&chan->lock);
+}
+
+/**
+ * ppc440spe_test_raid6 - test are RAID-6 capabilities enabled successfully.
+ * For this we just perform one WXOR operation with the same source
+ * and destination addresses, the GF-multiplier is 1; so if RAID-6
+ * capabilities are enabled then we'll get src/dst filled with zero.
+ */
+static int ppc440spe_test_raid6(struct ppc440spe_adma_chan *chan)
+{
+ struct ppc440spe_adma_desc_slot *sw_desc, *iter;
+ struct page *pg;
+ char *a;
+ dma_addr_t dma_addr, addrs[2];
+ unsigned long op = 0;
+ int rval = 0;
+
+ set_bit(PPC440SPE_DESC_WXOR, &op);
+
+ pg = alloc_page(GFP_KERNEL);
+ if (!pg)
+ return -ENOMEM;
+
+ spin_lock_bh(&chan->lock);
+ sw_desc = ppc440spe_adma_alloc_slots(chan, 1, 1);
+ if (sw_desc) {
+ /* 1 src, 1 dsr, int_ena, WXOR */
+ ppc440spe_desc_init_dma01pq(sw_desc, 1, 1, 1, op);
+ list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+ ppc440spe_desc_set_byte_count(iter, chan, PAGE_SIZE);
+ iter->unmap_len = PAGE_SIZE;
+ }
+ } else {
+ rval = -EFAULT;
+ spin_unlock_bh(&chan->lock);
+ goto exit;
+ }
+ spin_unlock_bh(&chan->lock);
+
+ /* Fill the test page with ones */
+ memset(page_address(pg), 0xFF, PAGE_SIZE);
+ dma_addr = dma_map_page(chan->device->dev, pg, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+ /* Setup addresses */
+ ppc440spe_adma_pq_set_src(sw_desc, dma_addr, 0);
+ ppc440spe_adma_pq_set_src_mult(sw_desc, 1, 0, 0);
+ addrs[0] = dma_addr;
+ addrs[1] = 0;
+ ppc440spe_adma_pq_set_dest(sw_desc, addrs, DMA_PREP_PQ_DISABLE_Q);
+
+ async_tx_ack(&sw_desc->async_tx);
+ sw_desc->async_tx.callback = ppc440spe_test_callback;
+ sw_desc->async_tx.callback_param = NULL;
+
+ init_completion(&ppc440spe_r6_test_comp);
+
+ ppc440spe_adma_tx_submit(&sw_desc->async_tx);
+ ppc440spe_adma_issue_pending(&chan->common);
+
+ wait_for_completion(&ppc440spe_r6_test_comp);
+
+ /* Now check if the test page is zeroed */
+ a = page_address(pg);
+ if ((*(u32 *)a) == 0 && memcmp(a, a+4, PAGE_SIZE-4) == 0) {
+ /* page is zero - RAID-6 enabled */
+ rval = 0;
+ } else {
+ /* RAID-6 was not enabled */
+ rval = -EINVAL;
+ }
+exit:
+ __free_page(pg);
+ return rval;
+}
+
+static void ppc440spe_adma_init_capabilities(struct ppc440spe_adma_device *adev)
+{
+ switch (adev->id) {
+ case PPC440SPE_DMA0_ID:
+ case PPC440SPE_DMA1_ID:
+ dma_cap_set(DMA_MEMCPY, adev->common.cap_mask);
+ dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask);
+ dma_cap_set(DMA_PQ, adev->common.cap_mask);
+ dma_cap_set(DMA_PQ_VAL, adev->common.cap_mask);
+ dma_cap_set(DMA_XOR_VAL, adev->common.cap_mask);
+ break;
+ case PPC440SPE_XOR_ID:
+ dma_cap_set(DMA_XOR, adev->common.cap_mask);
+ dma_cap_set(DMA_PQ, adev->common.cap_mask);
+ dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask);
+ adev->common.cap_mask = adev->common.cap_mask;
+ break;
+ }
+
+ /* Set base routines */
+ adev->common.device_alloc_chan_resources =
+ ppc440spe_adma_alloc_chan_resources;
+ adev->common.device_free_chan_resources =
+ ppc440spe_adma_free_chan_resources;
+ adev->common.device_tx_status = ppc440spe_adma_tx_status;
+ adev->common.device_issue_pending = ppc440spe_adma_issue_pending;
+
+ /* Set prep routines based on capability */
+ if (dma_has_cap(DMA_MEMCPY, adev->common.cap_mask)) {
+ adev->common.device_prep_dma_memcpy =
+ ppc440spe_adma_prep_dma_memcpy;
+ }
+ if (dma_has_cap(DMA_XOR, adev->common.cap_mask)) {
+ adev->common.max_xor = XOR_MAX_OPS;
+ adev->common.device_prep_dma_xor =
+ ppc440spe_adma_prep_dma_xor;
+ }
+ if (dma_has_cap(DMA_PQ, adev->common.cap_mask)) {
+ switch (adev->id) {
+ case PPC440SPE_DMA0_ID:
+ dma_set_maxpq(&adev->common,
+ DMA0_FIFO_SIZE / sizeof(struct dma_cdb), 0);
+ break;
+ case PPC440SPE_DMA1_ID:
+ dma_set_maxpq(&adev->common,
+ DMA1_FIFO_SIZE / sizeof(struct dma_cdb), 0);
+ break;
+ case PPC440SPE_XOR_ID:
+ adev->common.max_pq = XOR_MAX_OPS * 3;
+ break;
+ }
+ adev->common.device_prep_dma_pq =
+ ppc440spe_adma_prep_dma_pq;
+ }
+ if (dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask)) {
+ switch (adev->id) {
+ case PPC440SPE_DMA0_ID:
+ adev->common.max_pq = DMA0_FIFO_SIZE /
+ sizeof(struct dma_cdb);
+ break;
+ case PPC440SPE_DMA1_ID:
+ adev->common.max_pq = DMA1_FIFO_SIZE /
+ sizeof(struct dma_cdb);
+ break;
+ }
+ adev->common.device_prep_dma_pq_val =
+ ppc440spe_adma_prep_dma_pqzero_sum;
+ }
+ if (dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask)) {
+ switch (adev->id) {
+ case PPC440SPE_DMA0_ID:
+ adev->common.max_xor = DMA0_FIFO_SIZE /
+ sizeof(struct dma_cdb);
+ break;
+ case PPC440SPE_DMA1_ID:
+ adev->common.max_xor = DMA1_FIFO_SIZE /
+ sizeof(struct dma_cdb);
+ break;
+ }
+ adev->common.device_prep_dma_xor_val =
+ ppc440spe_adma_prep_dma_xor_zero_sum;
+ }
+ if (dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask)) {
+ adev->common.device_prep_dma_interrupt =
+ ppc440spe_adma_prep_dma_interrupt;
+ }
+ pr_info("%s: AMCC(R) PPC440SP(E) ADMA Engine: "
+ "( %s%s%s%s%s%s)\n",
+ dev_name(adev->dev),
+ dma_has_cap(DMA_PQ, adev->common.cap_mask) ? "pq " : "",
+ dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask) ? "pq_val " : "",
+ dma_has_cap(DMA_XOR, adev->common.cap_mask) ? "xor " : "",
+ dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask) ? "xor_val " : "",
+ dma_has_cap(DMA_MEMCPY, adev->common.cap_mask) ? "memcpy " : "",
+ dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask) ? "intr " : "");
+}
+
+static int ppc440spe_adma_setup_irqs(struct ppc440spe_adma_device *adev,
+ struct ppc440spe_adma_chan *chan,
+ int *initcode)
+{
+ struct platform_device *ofdev;
+ struct device_node *np;
+ int ret;
+
+ ofdev = container_of(adev->dev, struct platform_device, dev);
+ np = ofdev->dev.of_node;
+ if (adev->id != PPC440SPE_XOR_ID) {
+ adev->err_irq = irq_of_parse_and_map(np, 1);
+ if (!adev->err_irq) {
+ dev_warn(adev->dev, "no err irq resource?\n");
+ *initcode = PPC_ADMA_INIT_IRQ2;
+ adev->err_irq = -ENXIO;
+ } else
+ atomic_inc(&ppc440spe_adma_err_irq_ref);
+ } else {
+ adev->err_irq = -ENXIO;
+ }
+
+ adev->irq = irq_of_parse_and_map(np, 0);
+ if (!adev->irq) {
+ dev_err(adev->dev, "no irq resource\n");
+ *initcode = PPC_ADMA_INIT_IRQ1;
+ ret = -ENXIO;
+ goto err_irq_map;
+ }
+ dev_dbg(adev->dev, "irq %d, err irq %d\n",
+ adev->irq, adev->err_irq);
+
+ ret = request_irq(adev->irq, ppc440spe_adma_eot_handler,
+ 0, dev_driver_string(adev->dev), chan);
+ if (ret) {
+ dev_err(adev->dev, "can't request irq %d\n",
+ adev->irq);
+ *initcode = PPC_ADMA_INIT_IRQ1;
+ ret = -EIO;
+ goto err_req1;
+ }
+
+ /* only DMA engines have a separate error IRQ
+ * so it's Ok if err_irq < 0 in XOR engine case.
+ */
+ if (adev->err_irq > 0) {
+ /* both DMA engines share common error IRQ */
+ ret = request_irq(adev->err_irq,
+ ppc440spe_adma_err_handler,
+ IRQF_SHARED,
+ dev_driver_string(adev->dev),
+ chan);
+ if (ret) {
+ dev_err(adev->dev, "can't request irq %d\n",
+ adev->err_irq);
+ *initcode = PPC_ADMA_INIT_IRQ2;
+ ret = -EIO;
+ goto err_req2;
+ }
+ }
+
+ if (adev->id == PPC440SPE_XOR_ID) {
+ /* enable XOR engine interrupts */
+ iowrite32be(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT |
+ XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT,
+ &adev->xor_reg->ier);
+ } else {
+ u32 mask, enable;
+
+ np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
+ if (!np) {
+ pr_err("%s: can't find I2O device tree node\n",
+ __func__);
+ ret = -ENODEV;
+ goto err_req2;
+ }
+ adev->i2o_reg = of_iomap(np, 0);
+ if (!adev->i2o_reg) {
+ pr_err("%s: failed to map I2O registers\n", __func__);
+ of_node_put(np);
+ ret = -EINVAL;
+ goto err_req2;
+ }
+ of_node_put(np);
+ /* Unmask 'CS FIFO Attention' interrupts and
+ * enable generating interrupts on errors
+ */
+ enable = (adev->id == PPC440SPE_DMA0_ID) ?
+ ~(I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) :
+ ~(I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM);
+ mask = ioread32(&adev->i2o_reg->iopim) & enable;
+ iowrite32(mask, &adev->i2o_reg->iopim);
+ }
+ return 0;
+
+err_req2:
+ free_irq(adev->irq, chan);
+err_req1:
+ irq_dispose_mapping(adev->irq);
+err_irq_map:
+ if (adev->err_irq > 0) {
+ if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref))
+ irq_dispose_mapping(adev->err_irq);
+ }
+ return ret;
+}
+
+static void ppc440spe_adma_release_irqs(struct ppc440spe_adma_device *adev,
+ struct ppc440spe_adma_chan *chan)
+{
+ u32 mask, disable;
+
+ if (adev->id == PPC440SPE_XOR_ID) {
+ /* disable XOR engine interrupts */
+ mask = ioread32be(&adev->xor_reg->ier);
+ mask &= ~(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT |
+ XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT);
+ iowrite32be(mask, &adev->xor_reg->ier);
+ } else {
+ /* disable DMAx engine interrupts */
+ disable = (adev->id == PPC440SPE_DMA0_ID) ?
+ (I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) :
+ (I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM);
+ mask = ioread32(&adev->i2o_reg->iopim) | disable;
+ iowrite32(mask, &adev->i2o_reg->iopim);
+ }
+ free_irq(adev->irq, chan);
+ irq_dispose_mapping(adev->irq);
+ if (adev->err_irq > 0) {
+ free_irq(adev->err_irq, chan);
+ if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref)) {
+ irq_dispose_mapping(adev->err_irq);
+ iounmap(adev->i2o_reg);
+ }
+ }
+}
+
+/**
+ * ppc440spe_adma_probe - probe the asynch device
+ */
+static int ppc440spe_adma_probe(struct platform_device *ofdev)
+{
+ struct device_node *np = ofdev->dev.of_node;
+ struct resource res;
+ struct ppc440spe_adma_device *adev;
+ struct ppc440spe_adma_chan *chan;
+ struct ppc_dma_chan_ref *ref, *_ref;
+ int ret = 0, initcode = PPC_ADMA_INIT_OK;
+ const u32 *idx;
+ int len;
+ void *regs;
+ u32 id, pool_size;
+
+ if (of_device_is_compatible(np, "amcc,xor-accelerator")) {
+ id = PPC440SPE_XOR_ID;
+ /* As far as the XOR engine is concerned, it does not
+ * use FIFOs but uses linked list. So there is no dependency
+ * between pool size to allocate and the engine configuration.
+ */
+ pool_size = PAGE_SIZE << 1;
+ } else {
+ /* it is DMA0 or DMA1 */
+ idx = of_get_property(np, "cell-index", &len);
+ if (!idx || (len != sizeof(u32))) {
+ dev_err(&ofdev->dev, "Device node %pOF has missing "
+ "or invalid cell-index property\n",
+ np);
+ return -EINVAL;
+ }
+ id = *idx;
+ /* DMA0,1 engines use FIFO to maintain CDBs, so we
+ * should allocate the pool accordingly to size of this
+ * FIFO. Thus, the pool size depends on the FIFO depth:
+ * how much CDBs pointers the FIFO may contain then so
+ * much CDBs we should provide in the pool.
+ * That is
+ * CDB size = 32B;
+ * CDBs number = (DMA0_FIFO_SIZE >> 3);
+ * Pool size = CDBs number * CDB size =
+ * = (DMA0_FIFO_SIZE >> 3) << 5 = DMA0_FIFO_SIZE << 2.
+ */
+ pool_size = (id == PPC440SPE_DMA0_ID) ?
+ DMA0_FIFO_SIZE : DMA1_FIFO_SIZE;
+ pool_size <<= 2;
+ }
+
+ if (of_address_to_resource(np, 0, &res)) {
+ dev_err(&ofdev->dev, "failed to get memory resource\n");
+ initcode = PPC_ADMA_INIT_MEMRES;
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (!request_mem_region(res.start, resource_size(&res),
+ dev_driver_string(&ofdev->dev))) {
+ dev_err(&ofdev->dev, "failed to request memory region %pR\n",
+ &res);
+ initcode = PPC_ADMA_INIT_MEMREG;
+ ret = -EBUSY;
+ goto out;
+ }
+
+ /* create a device */
+ adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+ if (!adev) {
+ initcode = PPC_ADMA_INIT_ALLOC;
+ ret = -ENOMEM;
+ goto err_adev_alloc;
+ }
+
+ adev->id = id;
+ adev->pool_size = pool_size;
+ /* allocate coherent memory for hardware descriptors */
+ adev->dma_desc_pool_virt = dma_alloc_coherent(&ofdev->dev,
+ adev->pool_size, &adev->dma_desc_pool,
+ GFP_KERNEL);
+ if (adev->dma_desc_pool_virt == NULL) {
+ dev_err(&ofdev->dev, "failed to allocate %d bytes of coherent "
+ "memory for hardware descriptors\n",
+ adev->pool_size);
+ initcode = PPC_ADMA_INIT_COHERENT;
+ ret = -ENOMEM;
+ goto err_dma_alloc;
+ }
+ dev_dbg(&ofdev->dev, "allocated descriptor pool virt 0x%p phys 0x%llx\n",
+ adev->dma_desc_pool_virt, (u64)adev->dma_desc_pool);
+
+ regs = ioremap(res.start, resource_size(&res));
+ if (!regs) {
+ dev_err(&ofdev->dev, "failed to ioremap regs!\n");
+ ret = -ENOMEM;
+ goto err_regs_alloc;
+ }
+
+ if (adev->id == PPC440SPE_XOR_ID) {
+ adev->xor_reg = regs;
+ /* Reset XOR */
+ iowrite32be(XOR_CRSR_XASR_BIT, &adev->xor_reg->crsr);
+ iowrite32be(XOR_CRSR_64BA_BIT, &adev->xor_reg->crrr);
+ } else {
+ size_t fifo_size = (adev->id == PPC440SPE_DMA0_ID) ?
+ DMA0_FIFO_SIZE : DMA1_FIFO_SIZE;
+ adev->dma_reg = regs;
+ /* DMAx_FIFO_SIZE is defined in bytes,
+ * <fsiz> - is defined in number of CDB pointers (8byte).
+ * DMA FIFO Length = CSlength + CPlength, where
+ * CSlength = CPlength = (fsiz + 1) * 8.
+ */
+ iowrite32(DMA_FIFO_ENABLE | ((fifo_size >> 3) - 2),
+ &adev->dma_reg->fsiz);
+ /* Configure DMA engine */
+ iowrite32(DMA_CFG_DXEPR_HP | DMA_CFG_DFMPP_HP | DMA_CFG_FALGN,
+ &adev->dma_reg->cfg);
+ /* Clear Status */
+ iowrite32(~0, &adev->dma_reg->dsts);
+ }
+
+ adev->dev = &ofdev->dev;
+ adev->common.dev = &ofdev->dev;
+ INIT_LIST_HEAD(&adev->common.channels);
+ platform_set_drvdata(ofdev, adev);
+
+ /* create a channel */
+ chan = kzalloc(sizeof(*chan), GFP_KERNEL);
+ if (!chan) {
+ initcode = PPC_ADMA_INIT_CHANNEL;
+ ret = -ENOMEM;
+ goto err_chan_alloc;
+ }
+
+ spin_lock_init(&chan->lock);
+ INIT_LIST_HEAD(&chan->chain);
+ INIT_LIST_HEAD(&chan->all_slots);
+ chan->device = adev;
+ chan->common.device = &adev->common;
+ dma_cookie_init(&chan->common);
+ list_add_tail(&chan->common.device_node, &adev->common.channels);
+ tasklet_setup(&chan->irq_tasklet, ppc440spe_adma_tasklet);
+
+ /* allocate and map helper pages for async validation or
+ * async_mult/async_sum_product operations on DMA0/1.
+ */
+ if (adev->id != PPC440SPE_XOR_ID) {
+ chan->pdest_page = alloc_page(GFP_KERNEL);
+ chan->qdest_page = alloc_page(GFP_KERNEL);
+ if (!chan->pdest_page ||
+ !chan->qdest_page) {
+ if (chan->pdest_page)
+ __free_page(chan->pdest_page);
+ if (chan->qdest_page)
+ __free_page(chan->qdest_page);
+ ret = -ENOMEM;
+ goto err_page_alloc;
+ }
+ chan->pdest = dma_map_page(&ofdev->dev, chan->pdest_page, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ chan->qdest = dma_map_page(&ofdev->dev, chan->qdest_page, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ }
+
+ ref = kmalloc(sizeof(*ref), GFP_KERNEL);
+ if (ref) {
+ ref->chan = &chan->common;
+ INIT_LIST_HEAD(&ref->node);
+ list_add_tail(&ref->node, &ppc440spe_adma_chan_list);
+ } else {
+ dev_err(&ofdev->dev, "failed to allocate channel reference!\n");
+ ret = -ENOMEM;
+ goto err_ref_alloc;
+ }
+
+ ret = ppc440spe_adma_setup_irqs(adev, chan, &initcode);
+ if (ret)
+ goto err_irq;
+
+ ppc440spe_adma_init_capabilities(adev);
+
+ ret = dma_async_device_register(&adev->common);
+ if (ret) {
+ initcode = PPC_ADMA_INIT_REGISTER;
+ dev_err(&ofdev->dev, "failed to register dma device\n");
+ goto err_dev_reg;
+ }
+
+ goto out;
+
+err_dev_reg:
+ ppc440spe_adma_release_irqs(adev, chan);
+err_irq:
+ list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list, node) {
+ if (chan == to_ppc440spe_adma_chan(ref->chan)) {
+ list_del(&ref->node);
+ kfree(ref);
+ }
+ }
+err_ref_alloc:
+ if (adev->id != PPC440SPE_XOR_ID) {
+ dma_unmap_page(&ofdev->dev, chan->pdest,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_unmap_page(&ofdev->dev, chan->qdest,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ __free_page(chan->pdest_page);
+ __free_page(chan->qdest_page);
+ }
+err_page_alloc:
+ kfree(chan);
+err_chan_alloc:
+ if (adev->id == PPC440SPE_XOR_ID)
+ iounmap(adev->xor_reg);
+ else
+ iounmap(adev->dma_reg);
+err_regs_alloc:
+ dma_free_coherent(adev->dev, adev->pool_size,
+ adev->dma_desc_pool_virt,
+ adev->dma_desc_pool);
+err_dma_alloc:
+ kfree(adev);
+err_adev_alloc:
+ release_mem_region(res.start, resource_size(&res));
+out:
+ if (id < PPC440SPE_ADMA_ENGINES_NUM)
+ ppc440spe_adma_devices[id] = initcode;
+
+ return ret;
+}
+
+/**
+ * ppc440spe_adma_remove - remove the asynch device
+ */
+static int ppc440spe_adma_remove(struct platform_device *ofdev)
+{
+ struct ppc440spe_adma_device *adev = platform_get_drvdata(ofdev);
+ struct device_node *np = ofdev->dev.of_node;
+ struct resource res;
+ struct dma_chan *chan, *_chan;
+ struct ppc_dma_chan_ref *ref, *_ref;
+ struct ppc440spe_adma_chan *ppc440spe_chan;
+
+ if (adev->id < PPC440SPE_ADMA_ENGINES_NUM)
+ ppc440spe_adma_devices[adev->id] = -1;
+
+ dma_async_device_unregister(&adev->common);
+
+ list_for_each_entry_safe(chan, _chan, &adev->common.channels,
+ device_node) {
+ ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+ ppc440spe_adma_release_irqs(adev, ppc440spe_chan);
+ tasklet_kill(&ppc440spe_chan->irq_tasklet);
+ if (adev->id != PPC440SPE_XOR_ID) {
+ dma_unmap_page(&ofdev->dev, ppc440spe_chan->pdest,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_unmap_page(&ofdev->dev, ppc440spe_chan->qdest,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ __free_page(ppc440spe_chan->pdest_page);
+ __free_page(ppc440spe_chan->qdest_page);
+ }
+ list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list,
+ node) {
+ if (ppc440spe_chan ==
+ to_ppc440spe_adma_chan(ref->chan)) {
+ list_del(&ref->node);
+ kfree(ref);
+ }
+ }
+ list_del(&chan->device_node);
+ kfree(ppc440spe_chan);
+ }
+
+ dma_free_coherent(adev->dev, adev->pool_size,
+ adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ if (adev->id == PPC440SPE_XOR_ID)
+ iounmap(adev->xor_reg);
+ else
+ iounmap(adev->dma_reg);
+ of_address_to_resource(np, 0, &res);
+ release_mem_region(res.start, resource_size(&res));
+ kfree(adev);
+ return 0;
+}
+
+/*
+ * /sys driver interface to enable h/w RAID-6 capabilities
+ * Files created in e.g. /sys/devices/plb.0/400100100.dma0/driver/
+ * directory are "devices", "enable" and "poly".
+ * "devices" shows available engines.
+ * "enable" is used to enable RAID-6 capabilities or to check
+ * whether these has been activated.
+ * "poly" allows setting/checking used polynomial (for PPC440SPe only).
+ */
+
+static ssize_t devices_show(struct device_driver *dev, char *buf)
+{
+ ssize_t size = 0;
+ int i;
+
+ for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++) {
+ if (ppc440spe_adma_devices[i] == -1)
+ continue;
+ size += scnprintf(buf + size, PAGE_SIZE - size,
+ "PPC440SP(E)-ADMA.%d: %s\n", i,
+ ppc_adma_errors[ppc440spe_adma_devices[i]]);
+ }
+ return size;
+}
+static DRIVER_ATTR_RO(devices);
+
+static ssize_t enable_show(struct device_driver *dev, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE,
+ "PPC440SP(e) RAID-6 capabilities are %sABLED.\n",
+ ppc440spe_r6_enabled ? "EN" : "DIS");
+}
+
+static ssize_t enable_store(struct device_driver *dev, const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ int err;
+
+ if (!count || count > 11)
+ return -EINVAL;
+
+ if (!ppc440spe_r6_tchan)
+ return -EFAULT;
+
+ /* Write a key */
+ err = kstrtoul(buf, 16, &val);
+ if (err)
+ return err;
+
+ dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_XORBA, val);
+ isync();
+
+ /* Verify whether it really works now */
+ if (ppc440spe_test_raid6(ppc440spe_r6_tchan) == 0) {
+ pr_info("PPC440SP(e) RAID-6 has been activated "
+ "successfully\n");
+ ppc440spe_r6_enabled = 1;
+ } else {
+ pr_info("PPC440SP(e) RAID-6 hasn't been activated!"
+ " Error key ?\n");
+ ppc440spe_r6_enabled = 0;
+ }
+ return count;
+}
+static DRIVER_ATTR_RW(enable);
+
+static ssize_t poly_show(struct device_driver *dev, char *buf)
+{
+ ssize_t size = 0;
+ u32 reg;
+
+#ifdef CONFIG_440SP
+ /* 440SP has fixed polynomial */
+ reg = 0x4d;
+#else
+ reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL);
+ reg >>= MQ0_CFBHL_POLY;
+ reg &= 0xFF;
+#endif
+
+ size = snprintf(buf, PAGE_SIZE, "PPC440SP(e) RAID-6 driver "
+ "uses 0x1%02x polynomial.\n", reg);
+ return size;
+}
+
+static ssize_t poly_store(struct device_driver *dev, const char *buf,
+ size_t count)
+{
+ unsigned long reg, val;
+ int err;
+#ifdef CONFIG_440SP
+ /* 440SP uses default 0x14D polynomial only */
+ return -EINVAL;
+#endif
+
+ if (!count || count > 6)
+ return -EINVAL;
+
+ /* e.g., 0x14D or 0x11D */
+ err = kstrtoul(buf, 16, &val);
+ if (err)
+ return err;
+
+ if (val & ~0x1FF)
+ return -EINVAL;
+
+ val &= 0xFF;
+ reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL);
+ reg &= ~(0xFF << MQ0_CFBHL_POLY);
+ reg |= val << MQ0_CFBHL_POLY;
+ dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL, reg);
+
+ return count;
+}
+static DRIVER_ATTR_RW(poly);
+
+/*
+ * Common initialisation for RAID engines; allocate memory for
+ * DMAx FIFOs, perform configuration common for all DMA engines.
+ * Further DMA engine specific configuration is done at probe time.
+ */
+static int ppc440spe_configure_raid_devices(void)
+{
+ struct device_node *np;
+ struct resource i2o_res;
+ struct i2o_regs __iomem *i2o_reg;
+ dcr_host_t i2o_dcr_host;
+ unsigned int dcr_base, dcr_len;
+ int i, ret;
+
+ np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
+ if (!np) {
+ pr_err("%s: can't find I2O device tree node\n",
+ __func__);
+ return -ENODEV;
+ }
+
+ if (of_address_to_resource(np, 0, &i2o_res)) {
+ of_node_put(np);
+ return -EINVAL;
+ }
+
+ i2o_reg = of_iomap(np, 0);
+ if (!i2o_reg) {
+ pr_err("%s: failed to map I2O registers\n", __func__);
+ of_node_put(np);
+ return -EINVAL;
+ }
+
+ /* Get I2O DCRs base */
+ dcr_base = dcr_resource_start(np, 0);
+ dcr_len = dcr_resource_len(np, 0);
+ if (!dcr_base && !dcr_len) {
+ pr_err("%pOF: can't get DCR registers base/len!\n", np);
+ of_node_put(np);
+ iounmap(i2o_reg);
+ return -ENODEV;
+ }
+
+ i2o_dcr_host = dcr_map(np, dcr_base, dcr_len);
+ if (!DCR_MAP_OK(i2o_dcr_host)) {
+ pr_err("%pOF: failed to map DCRs!\n", np);
+ of_node_put(np);
+ iounmap(i2o_reg);
+ return -ENODEV;
+ }
+ of_node_put(np);
+
+ /* Provide memory regions for DMA's FIFOs: I2O, DMA0 and DMA1 share
+ * the base address of FIFO memory space.
+ * Actually we need twice more physical memory than programmed in the
+ * <fsiz> register (because there are two FIFOs for each DMA: CP and CS)
+ */
+ ppc440spe_dma_fifo_buf = kmalloc((DMA0_FIFO_SIZE + DMA1_FIFO_SIZE) << 1,
+ GFP_KERNEL);
+ if (!ppc440spe_dma_fifo_buf) {
+ pr_err("%s: DMA FIFO buffer allocation failed.\n", __func__);
+ iounmap(i2o_reg);
+ dcr_unmap(i2o_dcr_host, dcr_len);
+ return -ENOMEM;
+ }
+
+ /*
+ * Configure h/w
+ */
+ /* Reset I2O/DMA */
+ mtdcri(SDR0, DCRN_SDR0_SRST, DCRN_SDR0_SRST_I2ODMA);
+ mtdcri(SDR0, DCRN_SDR0_SRST, 0);
+
+ /* Setup the base address of mmaped registers */
+ dcr_write(i2o_dcr_host, DCRN_I2O0_IBAH, (u32)(i2o_res.start >> 32));
+ dcr_write(i2o_dcr_host, DCRN_I2O0_IBAL, (u32)(i2o_res.start) |
+ I2O_REG_ENABLE);
+ dcr_unmap(i2o_dcr_host, dcr_len);
+
+ /* Setup FIFO memory space base address */
+ iowrite32(0, &i2o_reg->ifbah);
+ iowrite32(((u32)__pa(ppc440spe_dma_fifo_buf)), &i2o_reg->ifbal);
+
+ /* set zero FIFO size for I2O, so the whole
+ * ppc440spe_dma_fifo_buf is used by DMAs.
+ * DMAx_FIFOs will be configured while probe.
+ */
+ iowrite32(0, &i2o_reg->ifsiz);
+ iounmap(i2o_reg);
+
+ /* To prepare WXOR/RXOR functionality we need access to
+ * Memory Queue Module DCRs (finally it will be enabled
+ * via /sys interface of the ppc440spe ADMA driver).
+ */
+ np = of_find_compatible_node(NULL, NULL, "ibm,mq-440spe");
+ if (!np) {
+ pr_err("%s: can't find MQ device tree node\n",
+ __func__);
+ ret = -ENODEV;
+ goto out_free;
+ }
+
+ /* Get MQ DCRs base */
+ dcr_base = dcr_resource_start(np, 0);
+ dcr_len = dcr_resource_len(np, 0);
+ if (!dcr_base && !dcr_len) {
+ pr_err("%pOF: can't get DCR registers base/len!\n", np);
+ ret = -ENODEV;
+ goto out_mq;
+ }
+
+ ppc440spe_mq_dcr_host = dcr_map(np, dcr_base, dcr_len);
+ if (!DCR_MAP_OK(ppc440spe_mq_dcr_host)) {
+ pr_err("%pOF: failed to map DCRs!\n", np);
+ ret = -ENODEV;
+ goto out_mq;
+ }
+ of_node_put(np);
+ ppc440spe_mq_dcr_len = dcr_len;
+
+ /* Set HB alias */
+ dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_BAUH, DMA_CUED_XOR_HB);
+
+ /* Set:
+ * - LL transaction passing limit to 1;
+ * - Memory controller cycle limit to 1;
+ * - Galois Polynomial to 0x14d (default)
+ */
+ dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL,
+ (1 << MQ0_CFBHL_TPLM) | (1 << MQ0_CFBHL_HBCL) |
+ (PPC440SPE_DEFAULT_POLY << MQ0_CFBHL_POLY));
+
+ atomic_set(&ppc440spe_adma_err_irq_ref, 0);
+ for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++)
+ ppc440spe_adma_devices[i] = -1;
+
+ return 0;
+
+out_mq:
+ of_node_put(np);
+out_free:
+ kfree(ppc440spe_dma_fifo_buf);
+ return ret;
+}
+
+static const struct of_device_id ppc440spe_adma_of_match[] = {
+ { .compatible = "ibm,dma-440spe", },
+ { .compatible = "amcc,xor-accelerator", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, ppc440spe_adma_of_match);
+
+static struct platform_driver ppc440spe_adma_driver = {
+ .probe = ppc440spe_adma_probe,
+ .remove = ppc440spe_adma_remove,
+ .driver = {
+ .name = "PPC440SP(E)-ADMA",
+ .of_match_table = ppc440spe_adma_of_match,
+ },
+};
+
+static __init int ppc440spe_adma_init(void)
+{
+ int ret;
+
+ ret = ppc440spe_configure_raid_devices();
+ if (ret)
+ return ret;
+
+ ret = platform_driver_register(&ppc440spe_adma_driver);
+ if (ret) {
+ pr_err("%s: failed to register platform driver\n",
+ __func__);
+ goto out_reg;
+ }
+
+ /* Initialization status */
+ ret = driver_create_file(&ppc440spe_adma_driver.driver,
+ &driver_attr_devices);
+ if (ret)
+ goto out_dev;
+
+ /* RAID-6 h/w enable entry */
+ ret = driver_create_file(&ppc440spe_adma_driver.driver,
+ &driver_attr_enable);
+ if (ret)
+ goto out_en;
+
+ /* GF polynomial to use */
+ ret = driver_create_file(&ppc440spe_adma_driver.driver,
+ &driver_attr_poly);
+ if (!ret)
+ return ret;
+
+ driver_remove_file(&ppc440spe_adma_driver.driver,
+ &driver_attr_enable);
+out_en:
+ driver_remove_file(&ppc440spe_adma_driver.driver,
+ &driver_attr_devices);
+out_dev:
+ /* User will not be able to enable h/w RAID-6 */
+ pr_err("%s: failed to create RAID-6 driver interface\n",
+ __func__);
+ platform_driver_unregister(&ppc440spe_adma_driver);
+out_reg:
+ dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len);
+ kfree(ppc440spe_dma_fifo_buf);
+ return ret;
+}
+
+static void __exit ppc440spe_adma_exit(void)
+{
+ driver_remove_file(&ppc440spe_adma_driver.driver,
+ &driver_attr_poly);
+ driver_remove_file(&ppc440spe_adma_driver.driver,
+ &driver_attr_enable);
+ driver_remove_file(&ppc440spe_adma_driver.driver,
+ &driver_attr_devices);
+ platform_driver_unregister(&ppc440spe_adma_driver);
+ dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len);
+ kfree(ppc440spe_dma_fifo_buf);
+}
+
+arch_initcall(ppc440spe_adma_init);
+module_exit(ppc440spe_adma_exit);
+
+MODULE_AUTHOR("Yuri Tikhonov <yur@emcraft.com>");
+MODULE_DESCRIPTION("PPC440SPE ADMA Engine Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/ppc4xx/adma.h b/drivers/dma/ppc4xx/adma.h
new file mode 100644
index 000000000..f8a5d7c1f
--- /dev/null
+++ b/drivers/dma/ppc4xx/adma.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * 2006-2009 (C) DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ */
+
+#ifndef _PPC440SPE_ADMA_H
+#define _PPC440SPE_ADMA_H
+
+#include <linux/types.h>
+#include "dma.h"
+#include "xor.h"
+
+#define to_ppc440spe_adma_chan(chan) \
+ container_of(chan, struct ppc440spe_adma_chan, common)
+#define to_ppc440spe_adma_device(dev) \
+ container_of(dev, struct ppc440spe_adma_device, common)
+#define tx_to_ppc440spe_adma_slot(tx) \
+ container_of(tx, struct ppc440spe_adma_desc_slot, async_tx)
+
+/* Default polynomial (for 440SP is only available) */
+#define PPC440SPE_DEFAULT_POLY 0x4d
+
+#define PPC440SPE_ADMA_ENGINES_NUM (XOR_ENGINES_NUM + DMA_ENGINES_NUM)
+
+#define PPC440SPE_ADMA_WATCHDOG_MSEC 3
+#define PPC440SPE_ADMA_THRESHOLD 1
+
+#define PPC440SPE_DMA0_ID 0
+#define PPC440SPE_DMA1_ID 1
+#define PPC440SPE_XOR_ID 2
+
+#define PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT 0xFFFFFFUL
+/* this is the XOR_CBBCR width */
+#define PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT (1 << 31)
+#define PPC440SPE_ADMA_ZERO_SUM_MAX_BYTE_COUNT PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT
+
+#define PPC440SPE_RXOR_RUN 0
+
+#define MQ0_CF2H_RXOR_BS_MASK 0x1FF
+
+#undef ADMA_LL_DEBUG
+
+/**
+ * struct ppc440spe_adma_device - internal representation of an ADMA device
+ * @dev: device
+ * @dma_reg: base for DMAx register access
+ * @xor_reg: base for XOR register access
+ * @i2o_reg: base for I2O register access
+ * @id: HW ADMA Device selector
+ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @pool_size: size of the pool
+ * @irq: DMAx or XOR irq number
+ * @err_irq: DMAx error irq number
+ * @common: embedded struct dma_device
+ */
+struct ppc440spe_adma_device {
+ struct device *dev;
+ struct dma_regs __iomem *dma_reg;
+ struct xor_regs __iomem *xor_reg;
+ struct i2o_regs __iomem *i2o_reg;
+ int id;
+ void *dma_desc_pool_virt;
+ dma_addr_t dma_desc_pool;
+ size_t pool_size;
+ int irq;
+ int err_irq;
+ struct dma_device common;
+};
+
+/**
+ * struct ppc440spe_adma_chan - internal representation of an ADMA channel
+ * @lock: serializes enqueue/dequeue operations to the slot pool
+ * @device: parent device
+ * @chain: device chain view of the descriptors
+ * @common: common dmaengine channel object members
+ * @all_slots: complete domain of slots usable by the channel
+ * @pending: allows batching of hardware operations
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ * @hw_chain_inited: h/w descriptor chain initialization flag
+ * @irq_tasklet: bottom half where ppc440spe_adma_slot_cleanup runs
+ * @needs_unmap: if buffers should not be unmapped upon final processing
+ * @pdest_page: P destination page for async validate operation
+ * @qdest_page: Q destination page for async validate operation
+ * @pdest: P dma addr for async validate operation
+ * @qdest: Q dma addr for async validate operation
+ */
+struct ppc440spe_adma_chan {
+ spinlock_t lock;
+ struct ppc440spe_adma_device *device;
+ struct list_head chain;
+ struct dma_chan common;
+ struct list_head all_slots;
+ struct ppc440spe_adma_desc_slot *last_used;
+ int pending;
+ int slots_allocated;
+ int hw_chain_inited;
+ struct tasklet_struct irq_tasklet;
+ u8 needs_unmap;
+ struct page *pdest_page;
+ struct page *qdest_page;
+ dma_addr_t pdest;
+ dma_addr_t qdest;
+};
+
+struct ppc440spe_rxor {
+ u32 addrl;
+ u32 addrh;
+ int len;
+ int xor_count;
+ int addr_count;
+ int desc_count;
+ int state;
+};
+
+/**
+ * struct ppc440spe_adma_desc_slot - PPC440SPE-ADMA software descriptor
+ * @phys: hardware address of the hardware descriptor chain
+ * @group_head: first operation in a transaction
+ * @hw_next: pointer to the next descriptor in chain
+ * @async_tx: support for the async_tx api
+ * @slot_node: node on the iop_adma_chan.all_slots list
+ * @chain_node: node on the op_adma_chan.chain list
+ * @group_list: list of slots that make up a multi-descriptor transaction
+ * for example transfer lengths larger than the supported hw max
+ * @unmap_len: transaction bytecount
+ * @hw_desc: virtual address of the hardware descriptor chain
+ * @stride: currently chained or not
+ * @idx: pool index
+ * @slot_cnt: total slots used in an transaction (group of operations)
+ * @src_cnt: number of sources set in this descriptor
+ * @dst_cnt: number of destinations set in the descriptor
+ * @slots_per_op: number of slots per operation
+ * @descs_per_op: number of slot per P/Q operation see comment
+ * for ppc440spe_prep_dma_pqxor function
+ * @flags: desc state/type
+ * @reverse_flags: 1 if a corresponding rxor address uses reversed address order
+ * @xor_check_result: result of zero sum
+ * @crc32_result: result crc calculation
+ */
+struct ppc440spe_adma_desc_slot {
+ dma_addr_t phys;
+ struct ppc440spe_adma_desc_slot *group_head;
+ struct ppc440spe_adma_desc_slot *hw_next;
+ struct dma_async_tx_descriptor async_tx;
+ struct list_head slot_node;
+ struct list_head chain_node; /* node in channel ops list */
+ struct list_head group_list; /* list */
+ unsigned int unmap_len;
+ void *hw_desc;
+ u16 stride;
+ u16 idx;
+ u16 slot_cnt;
+ u8 src_cnt;
+ u8 dst_cnt;
+ u8 slots_per_op;
+ u8 descs_per_op;
+ unsigned long flags;
+ unsigned long reverse_flags[8];
+
+#define PPC440SPE_DESC_INT 0 /* generate interrupt on complete */
+#define PPC440SPE_ZERO_P 1 /* clear P destionaion */
+#define PPC440SPE_ZERO_Q 2 /* clear Q destination */
+#define PPC440SPE_COHERENT 3 /* src/dst are coherent */
+
+#define PPC440SPE_DESC_WXOR 4 /* WXORs are in chain */
+#define PPC440SPE_DESC_RXOR 5 /* RXOR is in chain */
+
+#define PPC440SPE_DESC_RXOR123 8 /* CDB for RXOR123 operation */
+#define PPC440SPE_DESC_RXOR124 9 /* CDB for RXOR124 operation */
+#define PPC440SPE_DESC_RXOR125 10 /* CDB for RXOR125 operation */
+#define PPC440SPE_DESC_RXOR12 11 /* CDB for RXOR12 operation */
+#define PPC440SPE_DESC_RXOR_REV 12 /* CDB has srcs in reversed order */
+
+#define PPC440SPE_DESC_PCHECK 13
+#define PPC440SPE_DESC_QCHECK 14
+
+#define PPC440SPE_DESC_RXOR_MSK 0x3
+
+ struct ppc440spe_rxor rxor_cursor;
+
+ union {
+ u32 *xor_check_result;
+ u32 *crc32_result;
+ };
+};
+
+#endif /* _PPC440SPE_ADMA_H */
diff --git a/drivers/dma/ppc4xx/dma.h b/drivers/dma/ppc4xx/dma.h
new file mode 100644
index 000000000..1ff4be23d
--- /dev/null
+++ b/drivers/dma/ppc4xx/dma.h
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * 440SPe's DMA engines support header file
+ *
+ * 2006-2009 (C) DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ */
+
+#ifndef _PPC440SPE_DMA_H
+#define _PPC440SPE_DMA_H
+
+#include <linux/types.h>
+
+/* Number of elements in the array with statical CDBs */
+#define MAX_STAT_DMA_CDBS 16
+/* Number of DMA engines available on the contoller */
+#define DMA_ENGINES_NUM 2
+
+/* Maximum h/w supported number of destinations */
+#define DMA_DEST_MAX_NUM 2
+
+/* FIFO's params */
+#define DMA0_FIFO_SIZE 0x1000
+#define DMA1_FIFO_SIZE 0x1000
+#define DMA_FIFO_ENABLE (1<<12)
+
+/* DMA Configuration Register. Data Transfer Engine PLB Priority: */
+#define DMA_CFG_DXEPR_LP (0<<26)
+#define DMA_CFG_DXEPR_HP (3<<26)
+#define DMA_CFG_DXEPR_HHP (2<<26)
+#define DMA_CFG_DXEPR_HHHP (1<<26)
+
+/* DMA Configuration Register. DMA FIFO Manager PLB Priority: */
+#define DMA_CFG_DFMPP_LP (0<<23)
+#define DMA_CFG_DFMPP_HP (3<<23)
+#define DMA_CFG_DFMPP_HHP (2<<23)
+#define DMA_CFG_DFMPP_HHHP (1<<23)
+
+/* DMA Configuration Register. Force 64-byte Alignment */
+#define DMA_CFG_FALGN (1 << 19)
+
+/*UIC0:*/
+#define D0CPF_INT (1<<12)
+#define D0CSF_INT (1<<11)
+#define D1CPF_INT (1<<10)
+#define D1CSF_INT (1<<9)
+/*UIC1:*/
+#define DMAE_INT (1<<9)
+
+/* I2O IOP Interrupt Mask Register */
+#define I2O_IOPIM_P0SNE (1<<3)
+#define I2O_IOPIM_P0EM (1<<5)
+#define I2O_IOPIM_P1SNE (1<<6)
+#define I2O_IOPIM_P1EM (1<<8)
+
+/* DMA CDB fields */
+#define DMA_CDB_MSK (0xF)
+#define DMA_CDB_64B_ADDR (1<<2)
+#define DMA_CDB_NO_INT (1<<3)
+#define DMA_CDB_STATUS_MSK (0x3)
+#define DMA_CDB_ADDR_MSK (0xFFFFFFF0)
+
+/* DMA CDB OpCodes */
+#define DMA_CDB_OPC_NO_OP (0x00)
+#define DMA_CDB_OPC_MV_SG1_SG2 (0x01)
+#define DMA_CDB_OPC_MULTICAST (0x05)
+#define DMA_CDB_OPC_DFILL128 (0x24)
+#define DMA_CDB_OPC_DCHECK128 (0x23)
+
+#define DMA_CUED_XOR_BASE (0x10000000)
+#define DMA_CUED_XOR_HB (0x00000008)
+
+#ifdef CONFIG_440SP
+#define DMA_CUED_MULT1_OFF 0
+#define DMA_CUED_MULT2_OFF 8
+#define DMA_CUED_MULT3_OFF 16
+#define DMA_CUED_REGION_OFF 24
+#define DMA_CUED_XOR_WIN_MSK (0xFC000000)
+#else
+#define DMA_CUED_MULT1_OFF 2
+#define DMA_CUED_MULT2_OFF 10
+#define DMA_CUED_MULT3_OFF 18
+#define DMA_CUED_REGION_OFF 26
+#define DMA_CUED_XOR_WIN_MSK (0xF0000000)
+#endif
+
+#define DMA_CUED_REGION_MSK 0x3
+#define DMA_RXOR123 0x0
+#define DMA_RXOR124 0x1
+#define DMA_RXOR125 0x2
+#define DMA_RXOR12 0x3
+
+/* S/G addresses */
+#define DMA_CDB_SG_SRC 1
+#define DMA_CDB_SG_DST1 2
+#define DMA_CDB_SG_DST2 3
+
+/*
+ * DMAx engines Command Descriptor Block Type
+ */
+struct dma_cdb {
+ /*
+ * Basic CDB structure (Table 20-17, p.499, 440spe_um_1_22.pdf)
+ */
+ u8 pad0[2]; /* reserved */
+ u8 attr; /* attributes */
+ u8 opc; /* opcode */
+ u32 sg1u; /* upper SG1 address */
+ u32 sg1l; /* lower SG1 address */
+ u32 cnt; /* SG count, 3B used */
+ u32 sg2u; /* upper SG2 address */
+ u32 sg2l; /* lower SG2 address */
+ u32 sg3u; /* upper SG3 address */
+ u32 sg3l; /* lower SG3 address */
+};
+
+/*
+ * DMAx hardware registers (p.515 in 440SPe UM 1.22)
+ */
+struct dma_regs {
+ u32 cpfpl;
+ u32 cpfph;
+ u32 csfpl;
+ u32 csfph;
+ u32 dsts;
+ u32 cfg;
+ u8 pad0[0x8];
+ u16 cpfhp;
+ u16 cpftp;
+ u16 csfhp;
+ u16 csftp;
+ u8 pad1[0x8];
+ u32 acpl;
+ u32 acph;
+ u32 s1bpl;
+ u32 s1bph;
+ u32 s2bpl;
+ u32 s2bph;
+ u32 s3bpl;
+ u32 s3bph;
+ u8 pad2[0x10];
+ u32 earl;
+ u32 earh;
+ u8 pad3[0x8];
+ u32 seat;
+ u32 sead;
+ u32 op;
+ u32 fsiz;
+};
+
+/*
+ * I2O hardware registers (p.528 in 440SPe UM 1.22)
+ */
+struct i2o_regs {
+ u32 ists;
+ u32 iseat;
+ u32 isead;
+ u8 pad0[0x14];
+ u32 idbel;
+ u8 pad1[0xc];
+ u32 ihis;
+ u32 ihim;
+ u8 pad2[0x8];
+ u32 ihiq;
+ u32 ihoq;
+ u8 pad3[0x8];
+ u32 iopis;
+ u32 iopim;
+ u32 iopiq;
+ u8 iopoq;
+ u8 pad4[3];
+ u16 iiflh;
+ u16 iiflt;
+ u16 iiplh;
+ u16 iiplt;
+ u16 ioflh;
+ u16 ioflt;
+ u16 ioplh;
+ u16 ioplt;
+ u32 iidc;
+ u32 ictl;
+ u32 ifcpp;
+ u8 pad5[0x4];
+ u16 mfac0;
+ u16 mfac1;
+ u16 mfac2;
+ u16 mfac3;
+ u16 mfac4;
+ u16 mfac5;
+ u16 mfac6;
+ u16 mfac7;
+ u16 ifcfh;
+ u16 ifcht;
+ u8 pad6[0x4];
+ u32 iifmc;
+ u32 iodb;
+ u32 iodbc;
+ u32 ifbal;
+ u32 ifbah;
+ u32 ifsiz;
+ u32 ispd0;
+ u32 ispd1;
+ u32 ispd2;
+ u32 ispd3;
+ u32 ihipl;
+ u32 ihiph;
+ u32 ihopl;
+ u32 ihoph;
+ u32 iiipl;
+ u32 iiiph;
+ u32 iiopl;
+ u32 iioph;
+ u32 ifcpl;
+ u32 ifcph;
+ u8 pad7[0x8];
+ u32 iopt;
+};
+
+#endif /* _PPC440SPE_DMA_H */
diff --git a/drivers/dma/ppc4xx/xor.h b/drivers/dma/ppc4xx/xor.h
new file mode 100644
index 000000000..da1230df2
--- /dev/null
+++ b/drivers/dma/ppc4xx/xor.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * 440SPe's XOR engines support header file
+ *
+ * 2006-2009 (C) DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ */
+
+#ifndef _PPC440SPE_XOR_H
+#define _PPC440SPE_XOR_H
+
+#include <linux/types.h>
+
+/* Number of XOR engines available on the contoller */
+#define XOR_ENGINES_NUM 1
+
+/* Number of operands supported in the h/w */
+#define XOR_MAX_OPS 16
+
+/*
+ * XOR Command Block Control Register bits
+ */
+#define XOR_CBCR_LNK_BIT (1<<31) /* link present */
+#define XOR_CBCR_TGT_BIT (1<<30) /* target present */
+#define XOR_CBCR_CBCE_BIT (1<<29) /* command block compete enable */
+#define XOR_CBCR_RNZE_BIT (1<<28) /* result not zero enable */
+#define XOR_CBCR_XNOR_BIT (1<<15) /* XOR/XNOR */
+#define XOR_CDCR_OAC_MSK (0x7F) /* operand address count */
+
+/*
+ * XORCore Status Register bits
+ */
+#define XOR_SR_XCP_BIT (1<<31) /* core processing */
+#define XOR_SR_ICB_BIT (1<<17) /* invalid CB */
+#define XOR_SR_IC_BIT (1<<16) /* invalid command */
+#define XOR_SR_IPE_BIT (1<<15) /* internal parity error */
+#define XOR_SR_RNZ_BIT (1<<2) /* result not Zero */
+#define XOR_SR_CBC_BIT (1<<1) /* CB complete */
+#define XOR_SR_CBLC_BIT (1<<0) /* CB list complete */
+
+/*
+ * XORCore Control Set and Reset Register bits
+ */
+#define XOR_CRSR_XASR_BIT (1<<31) /* soft reset */
+#define XOR_CRSR_XAE_BIT (1<<30) /* enable */
+#define XOR_CRSR_RCBE_BIT (1<<29) /* refetch CB enable */
+#define XOR_CRSR_PAUS_BIT (1<<28) /* pause */
+#define XOR_CRSR_64BA_BIT (1<<27) /* 64/32 CB format */
+#define XOR_CRSR_CLP_BIT (1<<25) /* continue list processing */
+
+/*
+ * XORCore Interrupt Enable Register
+ */
+#define XOR_IE_ICBIE_BIT (1<<17) /* Invalid Command Block IRQ Enable */
+#define XOR_IE_ICIE_BIT (1<<16) /* Invalid Command IRQ Enable */
+#define XOR_IE_RPTIE_BIT (1<<14) /* Read PLB Timeout Error IRQ Enable */
+#define XOR_IE_CBCIE_BIT (1<<1) /* CB complete interrupt enable */
+#define XOR_IE_CBLCI_BIT (1<<0) /* CB list complete interrupt enable */
+
+/*
+ * XOR Accelerator engine Command Block Type
+ */
+struct xor_cb {
+ /*
+ * Basic 64-bit format XOR CB (Table 19-1, p.463, 440spe_um_1_22.pdf)
+ */
+ u32 cbc; /* control */
+ u32 cbbc; /* byte count */
+ u32 cbs; /* status */
+ u8 pad0[4]; /* reserved */
+ u32 cbtah; /* target address high */
+ u32 cbtal; /* target address low */
+ u32 cblah; /* link address high */
+ u32 cblal; /* link address low */
+ struct {
+ u32 h;
+ u32 l;
+ } __attribute__ ((packed)) ops[16];
+} __attribute__ ((packed));
+
+/*
+ * XOR hardware registers Table 19-3, UM 1.22
+ */
+struct xor_regs {
+ u32 op_ar[16][2]; /* operand address[0]-high,[1]-low registers */
+ u8 pad0[352]; /* reserved */
+ u32 cbcr; /* CB control register */
+ u32 cbbcr; /* CB byte count register */
+ u32 cbsr; /* CB status register */
+ u8 pad1[4]; /* reserved */
+ u32 cbtahr; /* operand target address high register */
+ u32 cbtalr; /* operand target address low register */
+ u32 cblahr; /* CB link address high register */
+ u32 cblalr; /* CB link address low register */
+ u32 crsr; /* control set register */
+ u32 crrr; /* control reset register */
+ u32 ccbahr; /* current CB address high register */
+ u32 ccbalr; /* current CB address low register */
+ u32 plbr; /* PLB configuration register */
+ u32 ier; /* interrupt enable register */
+ u32 pecr; /* parity error count register */
+ u32 sr; /* status register */
+ u32 revidr; /* revision ID register */
+};
+
+#endif /* _PPC440SPE_XOR_H */
diff --git a/drivers/dma/ptdma/Kconfig b/drivers/dma/ptdma/Kconfig
new file mode 100644
index 000000000..b430edd70
--- /dev/null
+++ b/drivers/dma/ptdma/Kconfig
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config AMD_PTDMA
+ tristate "AMD PassThru DMA Engine"
+ depends on X86_64 && PCI
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for the AMD PTDMA controller. This controller
+ provides DMA capabilities to perform high bandwidth memory to
+ memory and IO copy operations. It performs DMA transfer through
+ queue-based descriptor management. This DMA controller is intended
+ to be used with AMD Non-Transparent Bridge devices and not for
+ general purpose peripheral DMA.
diff --git a/drivers/dma/ptdma/Makefile b/drivers/dma/ptdma/Makefile
new file mode 100644
index 000000000..ce5410268
--- /dev/null
+++ b/drivers/dma/ptdma/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# AMD Passthru DMA driver
+#
+
+obj-$(CONFIG_AMD_PTDMA) += ptdma.o
+
+ptdma-objs := ptdma-dev.o ptdma-dmaengine.o ptdma-debugfs.o
+
+ptdma-$(CONFIG_PCI) += ptdma-pci.o
diff --git a/drivers/dma/ptdma/ptdma-debugfs.c b/drivers/dma/ptdma/ptdma-debugfs.c
new file mode 100644
index 000000000..c8307d304
--- /dev/null
+++ b/drivers/dma/ptdma/ptdma-debugfs.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Passthrough DMA device driver
+ * -- Based on the CCP driver
+ *
+ * Copyright (C) 2016,2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Sanjay R Mehta <sanju.mehta@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "ptdma.h"
+
+/* DebugFS helpers */
+#define RI_VERSION_NUM 0x0000003F
+
+#define RI_NUM_VQM 0x00078000
+#define RI_NVQM_SHIFT 15
+
+static int pt_debugfs_info_show(struct seq_file *s, void *p)
+{
+ struct pt_device *pt = s->private;
+ unsigned int regval;
+
+ seq_printf(s, "Device name: %s\n", dev_name(pt->dev));
+ seq_printf(s, " # Queues: %d\n", 1);
+ seq_printf(s, " # Cmds: %d\n", pt->cmd_count);
+
+ regval = ioread32(pt->io_regs + CMD_PT_VERSION);
+
+ seq_printf(s, " Version: %d\n", regval & RI_VERSION_NUM);
+ seq_puts(s, " Engines:");
+ seq_puts(s, "\n");
+ seq_printf(s, " Queues: %d\n", (regval & RI_NUM_VQM) >> RI_NVQM_SHIFT);
+
+ return 0;
+}
+
+/*
+ * Return a formatted buffer containing the current
+ * statistics of queue for PTDMA
+ */
+static int pt_debugfs_stats_show(struct seq_file *s, void *p)
+{
+ struct pt_device *pt = s->private;
+
+ seq_printf(s, "Total Interrupts Handled: %ld\n", pt->total_interrupts);
+
+ return 0;
+}
+
+static int pt_debugfs_queue_show(struct seq_file *s, void *p)
+{
+ struct pt_cmd_queue *cmd_q = s->private;
+ unsigned int regval;
+
+ if (!cmd_q)
+ return 0;
+
+ seq_printf(s, " Pass-Thru: %ld\n", cmd_q->total_pt_ops);
+
+ regval = ioread32(cmd_q->reg_control + 0x000C);
+
+ seq_puts(s, " Enabled Interrupts:");
+ if (regval & INT_EMPTY_QUEUE)
+ seq_puts(s, " EMPTY");
+ if (regval & INT_QUEUE_STOPPED)
+ seq_puts(s, " STOPPED");
+ if (regval & INT_ERROR)
+ seq_puts(s, " ERROR");
+ if (regval & INT_COMPLETION)
+ seq_puts(s, " COMPLETION");
+ seq_puts(s, "\n");
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(pt_debugfs_info);
+DEFINE_SHOW_ATTRIBUTE(pt_debugfs_queue);
+DEFINE_SHOW_ATTRIBUTE(pt_debugfs_stats);
+
+void ptdma_debugfs_setup(struct pt_device *pt)
+{
+ struct pt_cmd_queue *cmd_q;
+ struct dentry *debugfs_q_instance;
+
+ if (!debugfs_initialized())
+ return;
+
+ debugfs_create_file("info", 0400, pt->dma_dev.dbg_dev_root, pt,
+ &pt_debugfs_info_fops);
+
+ debugfs_create_file("stats", 0400, pt->dma_dev.dbg_dev_root, pt,
+ &pt_debugfs_stats_fops);
+
+ cmd_q = &pt->cmd_q;
+
+ debugfs_q_instance =
+ debugfs_create_dir("q", pt->dma_dev.dbg_dev_root);
+
+ debugfs_create_file("stats", 0400, debugfs_q_instance, cmd_q,
+ &pt_debugfs_queue_fops);
+}
diff --git a/drivers/dma/ptdma/ptdma-dev.c b/drivers/dma/ptdma/ptdma-dev.c
new file mode 100644
index 000000000..a2bf13ff1
--- /dev/null
+++ b/drivers/dma/ptdma/ptdma-dev.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Passthru DMA device driver
+ * -- Based on the CCP driver
+ *
+ * Copyright (C) 2016,2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Sanjay R Mehta <sanju.mehta@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/dma-mapping.h>
+#include <linux/debugfs.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "ptdma.h"
+
+/* Human-readable error strings */
+static char *pt_error_codes[] = {
+ "",
+ "ERR 01: ILLEGAL_ENGINE",
+ "ERR 03: ILLEGAL_FUNCTION_TYPE",
+ "ERR 04: ILLEGAL_FUNCTION_MODE",
+ "ERR 06: ILLEGAL_FUNCTION_SIZE",
+ "ERR 08: ILLEGAL_FUNCTION_RSVD",
+ "ERR 09: ILLEGAL_BUFFER_LENGTH",
+ "ERR 10: VLSB_FAULT",
+ "ERR 11: ILLEGAL_MEM_ADDR",
+ "ERR 12: ILLEGAL_MEM_SEL",
+ "ERR 13: ILLEGAL_CONTEXT_ID",
+ "ERR 15: 0xF Reserved",
+ "ERR 18: CMD_TIMEOUT",
+ "ERR 19: IDMA0_AXI_SLVERR",
+ "ERR 20: IDMA0_AXI_DECERR",
+ "ERR 21: 0x15 Reserved",
+ "ERR 22: IDMA1_AXI_SLAVE_FAULT",
+ "ERR 23: IDMA1_AIXI_DECERR",
+ "ERR 24: 0x18 Reserved",
+ "ERR 27: 0x1B Reserved",
+ "ERR 38: ODMA0_AXI_SLVERR",
+ "ERR 39: ODMA0_AXI_DECERR",
+ "ERR 40: 0x28 Reserved",
+ "ERR 41: ODMA1_AXI_SLVERR",
+ "ERR 42: ODMA1_AXI_DECERR",
+ "ERR 43: LSB_PARITY_ERR",
+};
+
+static void pt_log_error(struct pt_device *d, int e)
+{
+ dev_err(d->dev, "PTDMA error: %s (0x%x)\n", pt_error_codes[e], e);
+}
+
+void pt_start_queue(struct pt_cmd_queue *cmd_q)
+{
+ /* Turn on the run bit */
+ iowrite32(cmd_q->qcontrol | CMD_Q_RUN, cmd_q->reg_control);
+}
+
+void pt_stop_queue(struct pt_cmd_queue *cmd_q)
+{
+ /* Turn off the run bit */
+ iowrite32(cmd_q->qcontrol & ~CMD_Q_RUN, cmd_q->reg_control);
+}
+
+static int pt_core_execute_cmd(struct ptdma_desc *desc, struct pt_cmd_queue *cmd_q)
+{
+ bool soc = FIELD_GET(DWORD0_SOC, desc->dw0);
+ u8 *q_desc = (u8 *)&cmd_q->qbase[cmd_q->qidx];
+ u32 tail;
+ unsigned long flags;
+
+ if (soc) {
+ desc->dw0 |= FIELD_PREP(DWORD0_IOC, desc->dw0);
+ desc->dw0 &= ~DWORD0_SOC;
+ }
+ spin_lock_irqsave(&cmd_q->q_lock, flags);
+
+ /* Copy 32-byte command descriptor to hw queue. */
+ memcpy(q_desc, desc, 32);
+ cmd_q->qidx = (cmd_q->qidx + 1) % CMD_Q_LEN;
+
+ /* The data used by this command must be flushed to memory */
+ wmb();
+
+ /* Write the new tail address back to the queue register */
+ tail = lower_32_bits(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
+ iowrite32(tail, cmd_q->reg_control + 0x0004);
+
+ /* Turn the queue back on using our cached control register */
+ pt_start_queue(cmd_q);
+ spin_unlock_irqrestore(&cmd_q->q_lock, flags);
+
+ return 0;
+}
+
+int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q,
+ struct pt_passthru_engine *pt_engine)
+{
+ struct ptdma_desc desc;
+ struct pt_device *pt = container_of(cmd_q, struct pt_device, cmd_q);
+
+ cmd_q->cmd_error = 0;
+ cmd_q->total_pt_ops++;
+ memset(&desc, 0, sizeof(desc));
+ desc.dw0 = CMD_DESC_DW0_VAL;
+ desc.length = pt_engine->src_len;
+ desc.src_lo = lower_32_bits(pt_engine->src_dma);
+ desc.dw3.src_hi = upper_32_bits(pt_engine->src_dma);
+ desc.dst_lo = lower_32_bits(pt_engine->dst_dma);
+ desc.dw5.dst_hi = upper_32_bits(pt_engine->dst_dma);
+
+ if (cmd_q->int_en)
+ pt_core_enable_queue_interrupts(pt);
+ else
+ pt_core_disable_queue_interrupts(pt);
+
+ return pt_core_execute_cmd(&desc, cmd_q);
+}
+
+static void pt_do_cmd_complete(unsigned long data)
+{
+ struct pt_tasklet_data *tdata = (struct pt_tasklet_data *)data;
+ struct pt_cmd *cmd = tdata->cmd;
+ struct pt_cmd_queue *cmd_q = &cmd->pt->cmd_q;
+ u32 tail;
+
+ if (cmd_q->cmd_error) {
+ /*
+ * Log the error and flush the queue by
+ * moving the head pointer
+ */
+ tail = lower_32_bits(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
+ pt_log_error(cmd_q->pt, cmd_q->cmd_error);
+ iowrite32(tail, cmd_q->reg_control + 0x0008);
+ }
+
+ cmd->pt_cmd_callback(cmd->data, cmd->ret);
+}
+
+void pt_check_status_trans(struct pt_device *pt, struct pt_cmd_queue *cmd_q)
+{
+ u32 status;
+
+ status = ioread32(cmd_q->reg_control + 0x0010);
+ if (status) {
+ cmd_q->int_status = status;
+ cmd_q->q_status = ioread32(cmd_q->reg_control + 0x0100);
+ cmd_q->q_int_status = ioread32(cmd_q->reg_control + 0x0104);
+
+ /* On error, only save the first error value */
+ if ((status & INT_ERROR) && !cmd_q->cmd_error)
+ cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
+
+ /* Acknowledge the completion */
+ iowrite32(status, cmd_q->reg_control + 0x0010);
+ pt_do_cmd_complete((ulong)&pt->tdata);
+ }
+}
+
+static irqreturn_t pt_core_irq_handler(int irq, void *data)
+{
+ struct pt_device *pt = data;
+ struct pt_cmd_queue *cmd_q = &pt->cmd_q;
+
+ pt_core_disable_queue_interrupts(pt);
+ pt->total_interrupts++;
+ pt_check_status_trans(pt, cmd_q);
+ pt_core_enable_queue_interrupts(pt);
+ return IRQ_HANDLED;
+}
+
+int pt_core_init(struct pt_device *pt)
+{
+ char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
+ struct pt_cmd_queue *cmd_q = &pt->cmd_q;
+ u32 dma_addr_lo, dma_addr_hi;
+ struct device *dev = pt->dev;
+ struct dma_pool *dma_pool;
+ int ret;
+
+ /* Allocate a dma pool for the queue */
+ snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q", dev_name(pt->dev));
+
+ dma_pool = dma_pool_create(dma_pool_name, dev,
+ PT_DMAPOOL_MAX_SIZE,
+ PT_DMAPOOL_ALIGN, 0);
+ if (!dma_pool)
+ return -ENOMEM;
+
+ /* ptdma core initialisation */
+ iowrite32(CMD_CONFIG_VHB_EN, pt->io_regs + CMD_CONFIG_OFFSET);
+ iowrite32(CMD_QUEUE_PRIO, pt->io_regs + CMD_QUEUE_PRIO_OFFSET);
+ iowrite32(CMD_TIMEOUT_DISABLE, pt->io_regs + CMD_TIMEOUT_OFFSET);
+ iowrite32(CMD_CLK_GATE_CONFIG, pt->io_regs + CMD_CLK_GATE_CTL_OFFSET);
+ iowrite32(CMD_CONFIG_REQID, pt->io_regs + CMD_REQID_CONFIG_OFFSET);
+
+ cmd_q->pt = pt;
+ cmd_q->dma_pool = dma_pool;
+ spin_lock_init(&cmd_q->q_lock);
+
+ /* Page alignment satisfies our needs for N <= 128 */
+ cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
+ cmd_q->qbase = dma_alloc_coherent(dev, cmd_q->qsize,
+ &cmd_q->qbase_dma,
+ GFP_KERNEL);
+ if (!cmd_q->qbase) {
+ dev_err(dev, "unable to allocate command queue\n");
+ ret = -ENOMEM;
+ goto e_destroy_pool;
+ }
+
+ cmd_q->qidx = 0;
+
+ /* Preset some register values */
+ cmd_q->reg_control = pt->io_regs + CMD_Q_STATUS_INCR;
+
+ /* Turn off the queues and disable interrupts until ready */
+ pt_core_disable_queue_interrupts(pt);
+
+ cmd_q->qcontrol = 0; /* Start with nothing */
+ iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
+
+ ioread32(cmd_q->reg_control + 0x0104);
+ ioread32(cmd_q->reg_control + 0x0100);
+
+ /* Clear the interrupt status */
+ iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010);
+
+ /* Request an irq */
+ ret = request_irq(pt->pt_irq, pt_core_irq_handler, 0, dev_name(pt->dev), pt);
+ if (ret) {
+ dev_err(dev, "unable to allocate an IRQ\n");
+ goto e_free_dma;
+ }
+
+ /* Update the device registers with queue information. */
+ cmd_q->qcontrol &= ~CMD_Q_SIZE;
+ cmd_q->qcontrol |= FIELD_PREP(CMD_Q_SIZE, QUEUE_SIZE_VAL);
+
+ cmd_q->qdma_tail = cmd_q->qbase_dma;
+ dma_addr_lo = lower_32_bits(cmd_q->qdma_tail);
+ iowrite32((u32)dma_addr_lo, cmd_q->reg_control + 0x0004);
+ iowrite32((u32)dma_addr_lo, cmd_q->reg_control + 0x0008);
+
+ dma_addr_hi = upper_32_bits(cmd_q->qdma_tail);
+ cmd_q->qcontrol |= (dma_addr_hi << 16);
+ iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
+
+ pt_core_enable_queue_interrupts(pt);
+
+ /* Register the DMA engine support */
+ ret = pt_dmaengine_register(pt);
+ if (ret)
+ goto e_free_irq;
+
+ /* Set up debugfs entries */
+ ptdma_debugfs_setup(pt);
+
+ return 0;
+
+e_free_irq:
+ free_irq(pt->pt_irq, pt);
+
+e_free_dma:
+ dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase, cmd_q->qbase_dma);
+
+e_destroy_pool:
+ dma_pool_destroy(pt->cmd_q.dma_pool);
+
+ return ret;
+}
+
+void pt_core_destroy(struct pt_device *pt)
+{
+ struct device *dev = pt->dev;
+ struct pt_cmd_queue *cmd_q = &pt->cmd_q;
+ struct pt_cmd *cmd;
+
+ /* Unregister the DMA engine */
+ pt_dmaengine_unregister(pt);
+
+ /* Disable and clear interrupts */
+ pt_core_disable_queue_interrupts(pt);
+
+ /* Turn off the run bit */
+ pt_stop_queue(cmd_q);
+
+ /* Clear the interrupt status */
+ iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010);
+ ioread32(cmd_q->reg_control + 0x0104);
+ ioread32(cmd_q->reg_control + 0x0100);
+
+ free_irq(pt->pt_irq, pt);
+
+ dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase,
+ cmd_q->qbase_dma);
+
+ /* Flush the cmd queue */
+ while (!list_empty(&pt->cmd)) {
+ /* Invoke the callback directly with an error code */
+ cmd = list_first_entry(&pt->cmd, struct pt_cmd, entry);
+ list_del(&cmd->entry);
+ cmd->pt_cmd_callback(cmd->data, -ENODEV);
+ }
+}
diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c
new file mode 100644
index 000000000..1aa65e5de
--- /dev/null
+++ b/drivers/dma/ptdma/ptdma-dmaengine.c
@@ -0,0 +1,413 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Passthrough DMA device driver
+ * -- Based on the CCP driver
+ *
+ * Copyright (C) 2016,2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Sanjay R Mehta <sanju.mehta@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#include "ptdma.h"
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+static inline struct pt_dma_chan *to_pt_chan(struct dma_chan *dma_chan)
+{
+ return container_of(dma_chan, struct pt_dma_chan, vc.chan);
+}
+
+static inline struct pt_dma_desc *to_pt_desc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct pt_dma_desc, vd);
+}
+
+static void pt_free_chan_resources(struct dma_chan *dma_chan)
+{
+ struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+
+ vchan_free_chan_resources(&chan->vc);
+}
+
+static void pt_synchronize(struct dma_chan *dma_chan)
+{
+ struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+
+ vchan_synchronize(&chan->vc);
+}
+
+static void pt_do_cleanup(struct virt_dma_desc *vd)
+{
+ struct pt_dma_desc *desc = to_pt_desc(vd);
+ struct pt_device *pt = desc->pt;
+
+ kmem_cache_free(pt->dma_desc_cache, desc);
+}
+
+static int pt_dma_start_desc(struct pt_dma_desc *desc)
+{
+ struct pt_passthru_engine *pt_engine;
+ struct pt_device *pt;
+ struct pt_cmd *pt_cmd;
+ struct pt_cmd_queue *cmd_q;
+
+ desc->issued_to_hw = 1;
+
+ pt_cmd = &desc->pt_cmd;
+ pt = pt_cmd->pt;
+ cmd_q = &pt->cmd_q;
+ pt_engine = &pt_cmd->passthru;
+
+ pt->tdata.cmd = pt_cmd;
+
+ /* Execute the command */
+ pt_cmd->ret = pt_core_perform_passthru(cmd_q, pt_engine);
+
+ return 0;
+}
+
+static struct pt_dma_desc *pt_next_dma_desc(struct pt_dma_chan *chan)
+{
+ /* Get the next DMA descriptor on the active list */
+ struct virt_dma_desc *vd = vchan_next_desc(&chan->vc);
+
+ return vd ? to_pt_desc(vd) : NULL;
+}
+
+static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
+ struct pt_dma_desc *desc)
+{
+ struct dma_async_tx_descriptor *tx_desc;
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+
+ /* Loop over descriptors until one is found with commands */
+ do {
+ if (desc) {
+ if (!desc->issued_to_hw) {
+ /* No errors, keep going */
+ if (desc->status != DMA_ERROR)
+ return desc;
+ }
+
+ tx_desc = &desc->vd.tx;
+ vd = &desc->vd;
+ } else {
+ tx_desc = NULL;
+ }
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+
+ if (desc) {
+ if (desc->status != DMA_COMPLETE) {
+ if (desc->status != DMA_ERROR)
+ desc->status = DMA_COMPLETE;
+
+ dma_cookie_complete(tx_desc);
+ dma_descriptor_unmap(tx_desc);
+ list_del(&desc->vd.node);
+ } else {
+ /* Don't handle it twice */
+ tx_desc = NULL;
+ }
+ }
+
+ desc = pt_next_dma_desc(chan);
+
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+ if (tx_desc) {
+ dmaengine_desc_get_callback_invoke(tx_desc, NULL);
+ dma_run_dependencies(tx_desc);
+ vchan_vdesc_fini(vd);
+ }
+ } while (desc);
+
+ return NULL;
+}
+
+static void pt_cmd_callback(void *data, int err)
+{
+ struct pt_dma_desc *desc = data;
+ struct dma_chan *dma_chan;
+ struct pt_dma_chan *chan;
+ int ret;
+
+ if (err == -EINPROGRESS)
+ return;
+
+ dma_chan = desc->vd.tx.chan;
+ chan = to_pt_chan(dma_chan);
+
+ if (err)
+ desc->status = DMA_ERROR;
+
+ while (true) {
+ /* Check for DMA descriptor completion */
+ desc = pt_handle_active_desc(chan, desc);
+
+ /* Don't submit cmd if no descriptor or DMA is paused */
+ if (!desc)
+ break;
+
+ ret = pt_dma_start_desc(desc);
+ if (!ret)
+ break;
+
+ desc->status = DMA_ERROR;
+ }
+}
+
+static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan,
+ unsigned long flags)
+{
+ struct pt_dma_desc *desc;
+
+ desc = kmem_cache_zalloc(chan->pt->dma_desc_cache, GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ vchan_tx_prep(&chan->vc, &desc->vd, flags);
+
+ desc->pt = chan->pt;
+ desc->pt->cmd_q.int_en = !!(flags & DMA_PREP_INTERRUPT);
+ desc->issued_to_hw = 0;
+ desc->status = DMA_IN_PROGRESS;
+
+ return desc;
+}
+
+static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
+ dma_addr_t dst,
+ dma_addr_t src,
+ unsigned int len,
+ unsigned long flags)
+{
+ struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+ struct pt_passthru_engine *pt_engine;
+ struct pt_dma_desc *desc;
+ struct pt_cmd *pt_cmd;
+
+ desc = pt_alloc_dma_desc(chan, flags);
+ if (!desc)
+ return NULL;
+
+ pt_cmd = &desc->pt_cmd;
+ pt_cmd->pt = chan->pt;
+ pt_engine = &pt_cmd->passthru;
+ pt_cmd->engine = PT_ENGINE_PASSTHRU;
+ pt_engine->src_dma = src;
+ pt_engine->dst_dma = dst;
+ pt_engine->src_len = len;
+ pt_cmd->pt_cmd_callback = pt_cmd_callback;
+ pt_cmd->data = desc;
+
+ desc->len = len;
+
+ return desc;
+}
+
+static struct dma_async_tx_descriptor *
+pt_prep_dma_memcpy(struct dma_chan *dma_chan, dma_addr_t dst,
+ dma_addr_t src, size_t len, unsigned long flags)
+{
+ struct pt_dma_desc *desc;
+
+ desc = pt_create_desc(dma_chan, dst, src, len, flags);
+ if (!desc)
+ return NULL;
+
+ return &desc->vd.tx;
+}
+
+static struct dma_async_tx_descriptor *
+pt_prep_dma_interrupt(struct dma_chan *dma_chan, unsigned long flags)
+{
+ struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+ struct pt_dma_desc *desc;
+
+ desc = pt_alloc_dma_desc(chan, flags);
+ if (!desc)
+ return NULL;
+
+ return &desc->vd.tx;
+}
+
+static void pt_issue_pending(struct dma_chan *dma_chan)
+{
+ struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+ struct pt_dma_desc *desc;
+ unsigned long flags;
+ bool engine_is_idle = true;
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+
+ desc = pt_next_dma_desc(chan);
+ if (desc)
+ engine_is_idle = false;
+
+ vchan_issue_pending(&chan->vc);
+
+ desc = pt_next_dma_desc(chan);
+
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+ /* If there was nothing active, start processing */
+ if (engine_is_idle && desc)
+ pt_cmd_callback(desc, 0);
+}
+
+static enum dma_status
+pt_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct pt_device *pt = to_pt_chan(c)->pt;
+ struct pt_cmd_queue *cmd_q = &pt->cmd_q;
+
+ pt_check_status_trans(pt, cmd_q);
+ return dma_cookie_status(c, cookie, txstate);
+}
+
+static int pt_pause(struct dma_chan *dma_chan)
+{
+ struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+ pt_stop_queue(&chan->pt->cmd_q);
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+ return 0;
+}
+
+static int pt_resume(struct dma_chan *dma_chan)
+{
+ struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+ struct pt_dma_desc *desc = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+ pt_start_queue(&chan->pt->cmd_q);
+ desc = pt_next_dma_desc(chan);
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+ /* If there was something active, re-start */
+ if (desc)
+ pt_cmd_callback(desc, 0);
+
+ return 0;
+}
+
+static int pt_terminate_all(struct dma_chan *dma_chan)
+{
+ struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+ unsigned long flags;
+ struct pt_cmd_queue *cmd_q = &chan->pt->cmd_q;
+ LIST_HEAD(head);
+
+ iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010);
+ spin_lock_irqsave(&chan->vc.lock, flags);
+ vchan_get_all_descriptors(&chan->vc, &head);
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+ vchan_dma_desc_free_list(&chan->vc, &head);
+ vchan_free_chan_resources(&chan->vc);
+
+ return 0;
+}
+
+int pt_dmaengine_register(struct pt_device *pt)
+{
+ struct pt_dma_chan *chan;
+ struct dma_device *dma_dev = &pt->dma_dev;
+ char *cmd_cache_name;
+ char *desc_cache_name;
+ int ret;
+
+ pt->pt_dma_chan = devm_kzalloc(pt->dev, sizeof(*pt->pt_dma_chan),
+ GFP_KERNEL);
+ if (!pt->pt_dma_chan)
+ return -ENOMEM;
+
+ cmd_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL,
+ "%s-dmaengine-cmd-cache",
+ dev_name(pt->dev));
+ if (!cmd_cache_name)
+ return -ENOMEM;
+
+ desc_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL,
+ "%s-dmaengine-desc-cache",
+ dev_name(pt->dev));
+ if (!desc_cache_name) {
+ ret = -ENOMEM;
+ goto err_cache;
+ }
+
+ pt->dma_desc_cache = kmem_cache_create(desc_cache_name,
+ sizeof(struct pt_dma_desc), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!pt->dma_desc_cache) {
+ ret = -ENOMEM;
+ goto err_cache;
+ }
+
+ dma_dev->dev = pt->dev;
+ dma_dev->src_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES;
+ dma_dev->dst_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES;
+ dma_dev->directions = DMA_MEM_TO_MEM;
+ dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+ dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+ dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);
+
+ /*
+ * PTDMA is intended to be used with the AMD NTB devices, hence
+ * marking it as DMA_PRIVATE.
+ */
+ dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ chan = pt->pt_dma_chan;
+ chan->pt = pt;
+
+ /* Set base and prep routines */
+ dma_dev->device_free_chan_resources = pt_free_chan_resources;
+ dma_dev->device_prep_dma_memcpy = pt_prep_dma_memcpy;
+ dma_dev->device_prep_dma_interrupt = pt_prep_dma_interrupt;
+ dma_dev->device_issue_pending = pt_issue_pending;
+ dma_dev->device_tx_status = pt_tx_status;
+ dma_dev->device_pause = pt_pause;
+ dma_dev->device_resume = pt_resume;
+ dma_dev->device_terminate_all = pt_terminate_all;
+ dma_dev->device_synchronize = pt_synchronize;
+
+ chan->vc.desc_free = pt_do_cleanup;
+ vchan_init(&chan->vc, dma_dev);
+
+ dma_set_mask_and_coherent(pt->dev, DMA_BIT_MASK(64));
+
+ ret = dma_async_device_register(dma_dev);
+ if (ret)
+ goto err_reg;
+
+ return 0;
+
+err_reg:
+ kmem_cache_destroy(pt->dma_desc_cache);
+
+err_cache:
+ kmem_cache_destroy(pt->dma_cmd_cache);
+
+ return ret;
+}
+
+void pt_dmaengine_unregister(struct pt_device *pt)
+{
+ struct dma_device *dma_dev = &pt->dma_dev;
+
+ dma_async_device_unregister(dma_dev);
+
+ kmem_cache_destroy(pt->dma_desc_cache);
+ kmem_cache_destroy(pt->dma_cmd_cache);
+}
diff --git a/drivers/dma/ptdma/ptdma-pci.c b/drivers/dma/ptdma/ptdma-pci.c
new file mode 100644
index 000000000..22739ff0c
--- /dev/null
+++ b/drivers/dma/ptdma/ptdma-pci.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Passthru DMA device driver
+ * -- Based on the CCP driver
+ *
+ * Copyright (C) 2016,2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Sanjay R Mehta <sanju.mehta@amd.com>
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/pci_ids.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+
+#include "ptdma.h"
+
+struct pt_msix {
+ int msix_count;
+ struct msix_entry msix_entry;
+};
+
+/*
+ * pt_alloc_struct - allocate and initialize the pt_device struct
+ *
+ * @dev: device struct of the PTDMA
+ */
+static struct pt_device *pt_alloc_struct(struct device *dev)
+{
+ struct pt_device *pt;
+
+ pt = devm_kzalloc(dev, sizeof(*pt), GFP_KERNEL);
+
+ if (!pt)
+ return NULL;
+ pt->dev = dev;
+
+ INIT_LIST_HEAD(&pt->cmd);
+
+ return pt;
+}
+
+static int pt_get_msix_irqs(struct pt_device *pt)
+{
+ struct pt_msix *pt_msix = pt->pt_msix;
+ struct device *dev = pt->dev;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int ret;
+
+ pt_msix->msix_entry.entry = 0;
+
+ ret = pci_enable_msix_range(pdev, &pt_msix->msix_entry, 1, 1);
+ if (ret < 0)
+ return ret;
+
+ pt_msix->msix_count = ret;
+
+ pt->pt_irq = pt_msix->msix_entry.vector;
+
+ return 0;
+}
+
+static int pt_get_msi_irq(struct pt_device *pt)
+{
+ struct device *dev = pt->dev;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int ret;
+
+ ret = pci_enable_msi(pdev);
+ if (ret)
+ return ret;
+
+ pt->pt_irq = pdev->irq;
+
+ return 0;
+}
+
+static int pt_get_irqs(struct pt_device *pt)
+{
+ struct device *dev = pt->dev;
+ int ret;
+
+ ret = pt_get_msix_irqs(pt);
+ if (!ret)
+ return 0;
+
+ /* Couldn't get MSI-X vectors, try MSI */
+ dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
+ ret = pt_get_msi_irq(pt);
+ if (!ret)
+ return 0;
+
+ /* Couldn't get MSI interrupt */
+ dev_err(dev, "could not enable MSI (%d)\n", ret);
+
+ return ret;
+}
+
+static void pt_free_irqs(struct pt_device *pt)
+{
+ struct pt_msix *pt_msix = pt->pt_msix;
+ struct device *dev = pt->dev;
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (pt_msix->msix_count)
+ pci_disable_msix(pdev);
+ else if (pt->pt_irq)
+ pci_disable_msi(pdev);
+
+ pt->pt_irq = 0;
+}
+
+static int pt_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct pt_device *pt;
+ struct pt_msix *pt_msix;
+ struct device *dev = &pdev->dev;
+ void __iomem * const *iomap_table;
+ int bar_mask;
+ int ret = -ENOMEM;
+
+ pt = pt_alloc_struct(dev);
+ if (!pt)
+ goto e_err;
+
+ pt_msix = devm_kzalloc(dev, sizeof(*pt_msix), GFP_KERNEL);
+ if (!pt_msix)
+ goto e_err;
+
+ pt->pt_msix = pt_msix;
+ pt->dev_vdata = (struct pt_dev_vdata *)id->driver_data;
+ if (!pt->dev_vdata) {
+ ret = -ENODEV;
+ dev_err(dev, "missing driver data\n");
+ goto e_err;
+ }
+
+ ret = pcim_enable_device(pdev);
+ if (ret) {
+ dev_err(dev, "pcim_enable_device failed (%d)\n", ret);
+ goto e_err;
+ }
+
+ bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
+ ret = pcim_iomap_regions(pdev, bar_mask, "ptdma");
+ if (ret) {
+ dev_err(dev, "pcim_iomap_regions failed (%d)\n", ret);
+ goto e_err;
+ }
+
+ iomap_table = pcim_iomap_table(pdev);
+ if (!iomap_table) {
+ dev_err(dev, "pcim_iomap_table failed\n");
+ ret = -ENOMEM;
+ goto e_err;
+ }
+
+ pt->io_regs = iomap_table[pt->dev_vdata->bar];
+ if (!pt->io_regs) {
+ dev_err(dev, "ioremap failed\n");
+ ret = -ENOMEM;
+ goto e_err;
+ }
+
+ ret = pt_get_irqs(pt);
+ if (ret)
+ goto e_err;
+
+ pci_set_master(pdev);
+
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+ if (ret) {
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+ if (ret) {
+ dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n",
+ ret);
+ goto e_err;
+ }
+ }
+
+ dev_set_drvdata(dev, pt);
+
+ if (pt->dev_vdata)
+ ret = pt_core_init(pt);
+
+ if (ret)
+ goto e_err;
+
+ return 0;
+
+e_err:
+ dev_err(dev, "initialization failed ret = %d\n", ret);
+
+ return ret;
+}
+
+static void pt_pci_remove(struct pci_dev *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct pt_device *pt = dev_get_drvdata(dev);
+
+ if (!pt)
+ return;
+
+ if (pt->dev_vdata)
+ pt_core_destroy(pt);
+
+ pt_free_irqs(pt);
+}
+
+static const struct pt_dev_vdata dev_vdata[] = {
+ {
+ .bar = 2,
+ },
+};
+
+static const struct pci_device_id pt_pci_table[] = {
+ { PCI_VDEVICE(AMD, 0x1498), (kernel_ulong_t)&dev_vdata[0] },
+ /* Last entry must be zero */
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, pt_pci_table);
+
+static struct pci_driver pt_pci_driver = {
+ .name = "ptdma",
+ .id_table = pt_pci_table,
+ .probe = pt_pci_probe,
+ .remove = pt_pci_remove,
+};
+
+module_pci_driver(pt_pci_driver);
+
+MODULE_AUTHOR("Sanjay R Mehta <sanju.mehta@amd.com>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("AMD PassThru DMA driver");
diff --git a/drivers/dma/ptdma/ptdma.h b/drivers/dma/ptdma/ptdma.h
new file mode 100644
index 000000000..21b4bf895
--- /dev/null
+++ b/drivers/dma/ptdma/ptdma.h
@@ -0,0 +1,337 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * AMD Passthru DMA device driver
+ * -- Based on the CCP driver
+ *
+ * Copyright (C) 2016,2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Sanjay R Mehta <sanju.mehta@amd.com>
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#ifndef __PT_DEV_H__
+#define __PT_DEV_H__
+
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/dmapool.h>
+
+#include "../virt-dma.h"
+
+#define MAX_PT_NAME_LEN 16
+#define MAX_DMAPOOL_NAME_LEN 32
+
+#define MAX_HW_QUEUES 1
+#define MAX_CMD_QLEN 100
+
+#define PT_ENGINE_PASSTHRU 5
+
+/* Register Mappings */
+#define IRQ_MASK_REG 0x040
+#define IRQ_STATUS_REG 0x200
+
+#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f)
+
+#define CMD_QUEUE_PRIO_OFFSET 0x00
+#define CMD_REQID_CONFIG_OFFSET 0x04
+#define CMD_TIMEOUT_OFFSET 0x08
+#define CMD_PT_VERSION 0x10
+
+#define CMD_Q_CONTROL_BASE 0x0000
+#define CMD_Q_TAIL_LO_BASE 0x0004
+#define CMD_Q_HEAD_LO_BASE 0x0008
+#define CMD_Q_INT_ENABLE_BASE 0x000C
+#define CMD_Q_INTERRUPT_STATUS_BASE 0x0010
+
+#define CMD_Q_STATUS_BASE 0x0100
+#define CMD_Q_INT_STATUS_BASE 0x0104
+#define CMD_Q_DMA_STATUS_BASE 0x0108
+#define CMD_Q_DMA_READ_STATUS_BASE 0x010C
+#define CMD_Q_DMA_WRITE_STATUS_BASE 0x0110
+#define CMD_Q_ABORT_BASE 0x0114
+#define CMD_Q_AX_CACHE_BASE 0x0118
+
+#define CMD_CONFIG_OFFSET 0x1120
+#define CMD_CLK_GATE_CTL_OFFSET 0x6004
+
+#define CMD_DESC_DW0_VAL 0x500012
+
+/* Address offset for virtual queue registers */
+#define CMD_Q_STATUS_INCR 0x1000
+
+/* Bit masks */
+#define CMD_CONFIG_REQID 0
+#define CMD_TIMEOUT_DISABLE 0
+#define CMD_CLK_DYN_GATING_DIS 0
+#define CMD_CLK_SW_GATE_MODE 0
+#define CMD_CLK_GATE_CTL 0
+#define CMD_QUEUE_PRIO GENMASK(2, 1)
+#define CMD_CONFIG_VHB_EN BIT(0)
+#define CMD_CLK_DYN_GATING_EN BIT(0)
+#define CMD_CLK_HW_GATE_MODE BIT(0)
+#define CMD_CLK_GATE_ON_DELAY BIT(12)
+#define CMD_CLK_GATE_OFF_DELAY BIT(12)
+
+#define CMD_CLK_GATE_CONFIG (CMD_CLK_GATE_CTL | \
+ CMD_CLK_HW_GATE_MODE | \
+ CMD_CLK_GATE_ON_DELAY | \
+ CMD_CLK_DYN_GATING_EN | \
+ CMD_CLK_GATE_OFF_DELAY)
+
+#define CMD_Q_LEN 32
+#define CMD_Q_RUN BIT(0)
+#define CMD_Q_HALT BIT(1)
+#define CMD_Q_MEM_LOCATION BIT(2)
+#define CMD_Q_SIZE_MASK GENMASK(4, 0)
+#define CMD_Q_SIZE GENMASK(7, 3)
+#define CMD_Q_SHIFT GENMASK(1, 0)
+#define QUEUE_SIZE_VAL ((ffs(CMD_Q_LEN) - 2) & \
+ CMD_Q_SIZE_MASK)
+#define Q_PTR_MASK (2 << (QUEUE_SIZE_VAL + 5) - 1)
+#define Q_DESC_SIZE sizeof(struct ptdma_desc)
+#define Q_SIZE(n) (CMD_Q_LEN * (n))
+
+#define INT_COMPLETION BIT(0)
+#define INT_ERROR BIT(1)
+#define INT_QUEUE_STOPPED BIT(2)
+#define INT_EMPTY_QUEUE BIT(3)
+#define SUPPORTED_INTERRUPTS (INT_COMPLETION | INT_ERROR)
+
+/****** Local Storage Block ******/
+#define LSB_START 0
+#define LSB_END 127
+#define LSB_COUNT (LSB_END - LSB_START + 1)
+
+#define PT_DMAPOOL_MAX_SIZE 64
+#define PT_DMAPOOL_ALIGN BIT(5)
+
+#define PT_PASSTHRU_BLOCKSIZE 512
+
+struct pt_device;
+
+struct pt_tasklet_data {
+ struct completion completion;
+ struct pt_cmd *cmd;
+};
+
+/*
+ * struct pt_passthru_engine - pass-through operation
+ * without performing DMA mapping
+ * @mask: mask to be applied to data
+ * @mask_len: length in bytes of mask
+ * @src_dma: data to be used for this operation
+ * @dst_dma: data produced by this operation
+ * @src_len: length in bytes of data used for this operation
+ *
+ * Variables required to be set when calling pt_enqueue_cmd():
+ * - bit_mod, byte_swap, src, dst, src_len
+ * - mask, mask_len if bit_mod is not PT_PASSTHRU_BITWISE_NOOP
+ */
+struct pt_passthru_engine {
+ dma_addr_t mask;
+ u32 mask_len; /* In bytes */
+
+ dma_addr_t src_dma, dst_dma;
+ u64 src_len; /* In bytes */
+};
+
+/*
+ * struct pt_cmd - PTDMA operation request
+ * @entry: list element
+ * @work: work element used for callbacks
+ * @pt: PT device to be run on
+ * @ret: operation return code
+ * @flags: cmd processing flags
+ * @engine: PTDMA operation to perform (passthru)
+ * @engine_error: PT engine return code
+ * @passthru: engine specific structures, refer to specific engine struct below
+ * @callback: operation completion callback function
+ * @data: parameter value to be supplied to the callback function
+ *
+ * Variables required to be set when calling pt_enqueue_cmd():
+ * - engine, callback
+ * - See the operation structures below for what is required for each
+ * operation.
+ */
+struct pt_cmd {
+ struct list_head entry;
+ struct work_struct work;
+ struct pt_device *pt;
+ int ret;
+ u32 engine;
+ u32 engine_error;
+ struct pt_passthru_engine passthru;
+ /* Completion callback support */
+ void (*pt_cmd_callback)(void *data, int err);
+ void *data;
+};
+
+struct pt_dma_desc {
+ struct virt_dma_desc vd;
+ struct pt_device *pt;
+ enum dma_status status;
+ size_t len;
+ bool issued_to_hw;
+ struct pt_cmd pt_cmd;
+};
+
+struct pt_dma_chan {
+ struct virt_dma_chan vc;
+ struct pt_device *pt;
+};
+
+struct pt_cmd_queue {
+ struct pt_device *pt;
+
+ /* Queue dma pool */
+ struct dma_pool *dma_pool;
+
+ /* Queue base address (not neccessarily aligned)*/
+ struct ptdma_desc *qbase;
+
+ /* Aligned queue start address (per requirement) */
+ spinlock_t q_lock ____cacheline_aligned;
+ unsigned int qidx;
+
+ unsigned int qsize;
+ dma_addr_t qbase_dma;
+ dma_addr_t qdma_tail;
+
+ unsigned int active;
+ unsigned int suspended;
+
+ /* Interrupt flag */
+ bool int_en;
+
+ /* Register addresses for queue */
+ void __iomem *reg_control;
+ u32 qcontrol; /* Cached control register */
+
+ /* Status values from job */
+ u32 int_status;
+ u32 q_status;
+ u32 q_int_status;
+ u32 cmd_error;
+ /* Queue Statistics */
+ unsigned long total_pt_ops;
+} ____cacheline_aligned;
+
+struct pt_device {
+ struct list_head entry;
+
+ unsigned int ord;
+ char name[MAX_PT_NAME_LEN];
+
+ struct device *dev;
+
+ /* Bus specific device information */
+ struct pt_msix *pt_msix;
+
+ struct pt_dev_vdata *dev_vdata;
+
+ unsigned int pt_irq;
+
+ /* I/O area used for device communication */
+ void __iomem *io_regs;
+
+ spinlock_t cmd_lock ____cacheline_aligned;
+ unsigned int cmd_count;
+ struct list_head cmd;
+
+ /*
+ * The command queue. This represent the queue available on the
+ * PTDMA that are available for processing cmds
+ */
+ struct pt_cmd_queue cmd_q;
+
+ /* Support for the DMA Engine capabilities */
+ struct dma_device dma_dev;
+ struct pt_dma_chan *pt_dma_chan;
+ struct kmem_cache *dma_cmd_cache;
+ struct kmem_cache *dma_desc_cache;
+
+ wait_queue_head_t lsb_queue;
+
+ /* Device Statistics */
+ unsigned long total_interrupts;
+
+ struct pt_tasklet_data tdata;
+};
+
+/*
+ * descriptor for PTDMA commands
+ * 8 32-bit words:
+ * word 0: function; engine; control bits
+ * word 1: length of source data
+ * word 2: low 32 bits of source pointer
+ * word 3: upper 16 bits of source pointer; source memory type
+ * word 4: low 32 bits of destination pointer
+ * word 5: upper 16 bits of destination pointer; destination memory type
+ * word 6: reserved 32 bits
+ * word 7: reserved 32 bits
+ */
+
+#define DWORD0_SOC BIT(0)
+#define DWORD0_IOC BIT(1)
+
+struct dword3 {
+ unsigned int src_hi:16;
+ unsigned int src_mem:2;
+ unsigned int lsb_cxt_id:8;
+ unsigned int rsvd1:5;
+ unsigned int fixed:1;
+};
+
+struct dword5 {
+ unsigned int dst_hi:16;
+ unsigned int dst_mem:2;
+ unsigned int rsvd1:13;
+ unsigned int fixed:1;
+};
+
+struct ptdma_desc {
+ u32 dw0;
+ u32 length;
+ u32 src_lo;
+ struct dword3 dw3;
+ u32 dst_lo;
+ struct dword5 dw5;
+ __le32 rsvd1;
+ __le32 rsvd2;
+};
+
+/* Structure to hold PT device data */
+struct pt_dev_vdata {
+ const unsigned int bar;
+};
+
+int pt_dmaengine_register(struct pt_device *pt);
+void pt_dmaengine_unregister(struct pt_device *pt);
+
+void ptdma_debugfs_setup(struct pt_device *pt);
+int pt_core_init(struct pt_device *pt);
+void pt_core_destroy(struct pt_device *pt);
+
+int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q,
+ struct pt_passthru_engine *pt_engine);
+
+void pt_check_status_trans(struct pt_device *pt, struct pt_cmd_queue *cmd_q);
+void pt_start_queue(struct pt_cmd_queue *cmd_q);
+void pt_stop_queue(struct pt_cmd_queue *cmd_q);
+
+static inline void pt_core_disable_queue_interrupts(struct pt_device *pt)
+{
+ iowrite32(0, pt->cmd_q.reg_control + 0x000C);
+}
+
+static inline void pt_core_enable_queue_interrupts(struct pt_device *pt)
+{
+ iowrite32(SUPPORTED_INTERRUPTS, pt->cmd_q.reg_control + 0x000C);
+}
+#endif
diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
new file mode 100644
index 000000000..04c1f2ee8
--- /dev/null
+++ b/drivers/dma/pxa_dma.c
@@ -0,0 +1,1469 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2015 Robert Jarzmik <robert.jarzmik@free.fr>
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/platform_data/mmp_dma.h>
+#include <linux/dmapool.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/of.h>
+#include <linux/wait.h>
+#include <linux/dma/pxa-dma.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define DCSR(n) (0x0000 + ((n) << 2))
+#define DALGN(n) 0x00a0
+#define DINT 0x00f0
+#define DDADR(n) (0x0200 + ((n) << 4))
+#define DSADR(n) (0x0204 + ((n) << 4))
+#define DTADR(n) (0x0208 + ((n) << 4))
+#define DCMD(n) (0x020c + ((n) << 4))
+
+#define PXA_DCSR_RUN BIT(31) /* Run Bit (read / write) */
+#define PXA_DCSR_NODESC BIT(30) /* No-Descriptor Fetch (read / write) */
+#define PXA_DCSR_STOPIRQEN BIT(29) /* Stop Interrupt Enable (R/W) */
+#define PXA_DCSR_REQPEND BIT(8) /* Request Pending (read-only) */
+#define PXA_DCSR_STOPSTATE BIT(3) /* Stop State (read-only) */
+#define PXA_DCSR_ENDINTR BIT(2) /* End Interrupt (read / write) */
+#define PXA_DCSR_STARTINTR BIT(1) /* Start Interrupt (read / write) */
+#define PXA_DCSR_BUSERR BIT(0) /* Bus Error Interrupt (read / write) */
+
+#define PXA_DCSR_EORIRQEN BIT(28) /* End of Receive IRQ Enable (R/W) */
+#define PXA_DCSR_EORJMPEN BIT(27) /* Jump to next descriptor on EOR */
+#define PXA_DCSR_EORSTOPEN BIT(26) /* STOP on an EOR */
+#define PXA_DCSR_SETCMPST BIT(25) /* Set Descriptor Compare Status */
+#define PXA_DCSR_CLRCMPST BIT(24) /* Clear Descriptor Compare Status */
+#define PXA_DCSR_CMPST BIT(10) /* The Descriptor Compare Status */
+#define PXA_DCSR_EORINTR BIT(9) /* The end of Receive */
+
+#define DRCMR_MAPVLD BIT(7) /* Map Valid (read / write) */
+#define DRCMR_CHLNUM 0x1f /* mask for Channel Number (read / write) */
+
+#define DDADR_DESCADDR 0xfffffff0 /* Address of next descriptor (mask) */
+#define DDADR_STOP BIT(0) /* Stop (read / write) */
+
+#define PXA_DCMD_INCSRCADDR BIT(31) /* Source Address Increment Setting. */
+#define PXA_DCMD_INCTRGADDR BIT(30) /* Target Address Increment Setting. */
+#define PXA_DCMD_FLOWSRC BIT(29) /* Flow Control by the source. */
+#define PXA_DCMD_FLOWTRG BIT(28) /* Flow Control by the target. */
+#define PXA_DCMD_STARTIRQEN BIT(22) /* Start Interrupt Enable */
+#define PXA_DCMD_ENDIRQEN BIT(21) /* End Interrupt Enable */
+#define PXA_DCMD_ENDIAN BIT(18) /* Device Endian-ness. */
+#define PXA_DCMD_BURST8 (1 << 16) /* 8 byte burst */
+#define PXA_DCMD_BURST16 (2 << 16) /* 16 byte burst */
+#define PXA_DCMD_BURST32 (3 << 16) /* 32 byte burst */
+#define PXA_DCMD_WIDTH1 (1 << 14) /* 1 byte width */
+#define PXA_DCMD_WIDTH2 (2 << 14) /* 2 byte width (HalfWord) */
+#define PXA_DCMD_WIDTH4 (3 << 14) /* 4 byte width (Word) */
+#define PXA_DCMD_LENGTH 0x01fff /* length mask (max = 8K - 1) */
+
+#define PDMA_ALIGNMENT 3
+#define PDMA_MAX_DESC_BYTES (PXA_DCMD_LENGTH & ~((1 << PDMA_ALIGNMENT) - 1))
+
+struct pxad_desc_hw {
+ u32 ddadr; /* Points to the next descriptor + flags */
+ u32 dsadr; /* DSADR value for the current transfer */
+ u32 dtadr; /* DTADR value for the current transfer */
+ u32 dcmd; /* DCMD value for the current transfer */
+} __aligned(16);
+
+struct pxad_desc_sw {
+ struct virt_dma_desc vd; /* Virtual descriptor */
+ int nb_desc; /* Number of hw. descriptors */
+ size_t len; /* Number of bytes xfered */
+ dma_addr_t first; /* First descriptor's addr */
+
+ /* At least one descriptor has an src/dst address not multiple of 8 */
+ bool misaligned;
+ bool cyclic;
+ struct dma_pool *desc_pool; /* Channel's used allocator */
+
+ struct pxad_desc_hw *hw_desc[]; /* DMA coherent descriptors */
+};
+
+struct pxad_phy {
+ int idx;
+ void __iomem *base;
+ struct pxad_chan *vchan;
+};
+
+struct pxad_chan {
+ struct virt_dma_chan vc; /* Virtual channel */
+ u32 drcmr; /* Requestor of the channel */
+ enum pxad_chan_prio prio; /* Required priority of phy */
+ /*
+ * At least one desc_sw in submitted or issued transfers on this channel
+ * has one address such as: addr % 8 != 0. This implies the DALGN
+ * setting on the phy.
+ */
+ bool misaligned;
+ struct dma_slave_config cfg; /* Runtime config */
+
+ /* protected by vc->lock */
+ struct pxad_phy *phy;
+ struct dma_pool *desc_pool; /* Descriptors pool */
+ dma_cookie_t bus_error;
+
+ wait_queue_head_t wq_state;
+};
+
+struct pxad_device {
+ struct dma_device slave;
+ int nr_chans;
+ int nr_requestors;
+ void __iomem *base;
+ struct pxad_phy *phys;
+ spinlock_t phy_lock; /* Phy association */
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *dbgfs_root;
+ struct dentry **dbgfs_chan;
+#endif
+};
+
+#define tx_to_pxad_desc(tx) \
+ container_of(tx, struct pxad_desc_sw, async_tx)
+#define to_pxad_chan(dchan) \
+ container_of(dchan, struct pxad_chan, vc.chan)
+#define to_pxad_dev(dmadev) \
+ container_of(dmadev, struct pxad_device, slave)
+#define to_pxad_sw_desc(_vd) \
+ container_of((_vd), struct pxad_desc_sw, vd)
+
+#define _phy_readl_relaxed(phy, _reg) \
+ readl_relaxed((phy)->base + _reg((phy)->idx))
+#define phy_readl_relaxed(phy, _reg) \
+ ({ \
+ u32 _v; \
+ _v = readl_relaxed((phy)->base + _reg((phy)->idx)); \
+ dev_vdbg(&phy->vchan->vc.chan.dev->device, \
+ "%s(): readl(%s): 0x%08x\n", __func__, #_reg, \
+ _v); \
+ _v; \
+ })
+#define phy_writel(phy, val, _reg) \
+ do { \
+ writel((val), (phy)->base + _reg((phy)->idx)); \
+ dev_vdbg(&phy->vchan->vc.chan.dev->device, \
+ "%s(): writel(0x%08x, %s)\n", \
+ __func__, (u32)(val), #_reg); \
+ } while (0)
+#define phy_writel_relaxed(phy, val, _reg) \
+ do { \
+ writel_relaxed((val), (phy)->base + _reg((phy)->idx)); \
+ dev_vdbg(&phy->vchan->vc.chan.dev->device, \
+ "%s(): writel_relaxed(0x%08x, %s)\n", \
+ __func__, (u32)(val), #_reg); \
+ } while (0)
+
+static unsigned int pxad_drcmr(unsigned int line)
+{
+ if (line < 64)
+ return 0x100 + line * 4;
+ return 0x1000 + line * 4;
+}
+
+static bool pxad_filter_fn(struct dma_chan *chan, void *param);
+
+/*
+ * Debug fs
+ */
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/seq_file.h>
+
+static int requester_chan_show(struct seq_file *s, void *p)
+{
+ struct pxad_phy *phy = s->private;
+ int i;
+ u32 drcmr;
+
+ seq_printf(s, "DMA channel %d requester :\n", phy->idx);
+ for (i = 0; i < 70; i++) {
+ drcmr = readl_relaxed(phy->base + pxad_drcmr(i));
+ if ((drcmr & DRCMR_CHLNUM) == phy->idx)
+ seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i,
+ !!(drcmr & DRCMR_MAPVLD));
+ }
+ return 0;
+}
+
+static inline int dbg_burst_from_dcmd(u32 dcmd)
+{
+ int burst = (dcmd >> 16) & 0x3;
+
+ return burst ? 4 << burst : 0;
+}
+
+static int is_phys_valid(unsigned long addr)
+{
+ return pfn_valid(__phys_to_pfn(addr));
+}
+
+#define PXA_DCSR_STR(flag) (dcsr & PXA_DCSR_##flag ? #flag" " : "")
+#define PXA_DCMD_STR(flag) (dcmd & PXA_DCMD_##flag ? #flag" " : "")
+
+static int descriptors_show(struct seq_file *s, void *p)
+{
+ struct pxad_phy *phy = s->private;
+ int i, max_show = 20, burst, width;
+ u32 dcmd;
+ unsigned long phys_desc, ddadr;
+ struct pxad_desc_hw *desc;
+
+ phys_desc = ddadr = _phy_readl_relaxed(phy, DDADR);
+
+ seq_printf(s, "DMA channel %d descriptors :\n", phy->idx);
+ seq_printf(s, "[%03d] First descriptor unknown\n", 0);
+ for (i = 1; i < max_show && is_phys_valid(phys_desc); i++) {
+ desc = phys_to_virt(phys_desc);
+ dcmd = desc->dcmd;
+ burst = dbg_burst_from_dcmd(dcmd);
+ width = (1 << ((dcmd >> 14) & 0x3)) >> 1;
+
+ seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n",
+ i, phys_desc, desc);
+ seq_printf(s, "\tDDADR = %08x\n", desc->ddadr);
+ seq_printf(s, "\tDSADR = %08x\n", desc->dsadr);
+ seq_printf(s, "\tDTADR = %08x\n", desc->dtadr);
+ seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n",
+ dcmd,
+ PXA_DCMD_STR(INCSRCADDR), PXA_DCMD_STR(INCTRGADDR),
+ PXA_DCMD_STR(FLOWSRC), PXA_DCMD_STR(FLOWTRG),
+ PXA_DCMD_STR(STARTIRQEN), PXA_DCMD_STR(ENDIRQEN),
+ PXA_DCMD_STR(ENDIAN), burst, width,
+ dcmd & PXA_DCMD_LENGTH);
+ phys_desc = desc->ddadr;
+ }
+ if (i == max_show)
+ seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n",
+ i, phys_desc);
+ else
+ seq_printf(s, "[%03d] Desc at %08lx is %s\n",
+ i, phys_desc, phys_desc == DDADR_STOP ?
+ "DDADR_STOP" : "invalid");
+
+ return 0;
+}
+
+static int chan_state_show(struct seq_file *s, void *p)
+{
+ struct pxad_phy *phy = s->private;
+ u32 dcsr, dcmd;
+ int burst, width;
+ static const char * const str_prio[] = {
+ "high", "normal", "low", "invalid"
+ };
+
+ dcsr = _phy_readl_relaxed(phy, DCSR);
+ dcmd = _phy_readl_relaxed(phy, DCMD);
+ burst = dbg_burst_from_dcmd(dcmd);
+ width = (1 << ((dcmd >> 14) & 0x3)) >> 1;
+
+ seq_printf(s, "DMA channel %d\n", phy->idx);
+ seq_printf(s, "\tPriority : %s\n",
+ str_prio[(phy->idx & 0xf) / 4]);
+ seq_printf(s, "\tUnaligned transfer bit: %s\n",
+ _phy_readl_relaxed(phy, DALGN) & BIT(phy->idx) ?
+ "yes" : "no");
+ seq_printf(s, "\tDCSR = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
+ dcsr, PXA_DCSR_STR(RUN), PXA_DCSR_STR(NODESC),
+ PXA_DCSR_STR(STOPIRQEN), PXA_DCSR_STR(EORIRQEN),
+ PXA_DCSR_STR(EORJMPEN), PXA_DCSR_STR(EORSTOPEN),
+ PXA_DCSR_STR(SETCMPST), PXA_DCSR_STR(CLRCMPST),
+ PXA_DCSR_STR(CMPST), PXA_DCSR_STR(EORINTR),
+ PXA_DCSR_STR(REQPEND), PXA_DCSR_STR(STOPSTATE),
+ PXA_DCSR_STR(ENDINTR), PXA_DCSR_STR(STARTINTR),
+ PXA_DCSR_STR(BUSERR));
+
+ seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n",
+ dcmd,
+ PXA_DCMD_STR(INCSRCADDR), PXA_DCMD_STR(INCTRGADDR),
+ PXA_DCMD_STR(FLOWSRC), PXA_DCMD_STR(FLOWTRG),
+ PXA_DCMD_STR(STARTIRQEN), PXA_DCMD_STR(ENDIRQEN),
+ PXA_DCMD_STR(ENDIAN), burst, width, dcmd & PXA_DCMD_LENGTH);
+ seq_printf(s, "\tDSADR = %08x\n", _phy_readl_relaxed(phy, DSADR));
+ seq_printf(s, "\tDTADR = %08x\n", _phy_readl_relaxed(phy, DTADR));
+ seq_printf(s, "\tDDADR = %08x\n", _phy_readl_relaxed(phy, DDADR));
+
+ return 0;
+}
+
+static int state_show(struct seq_file *s, void *p)
+{
+ struct pxad_device *pdev = s->private;
+
+ /* basic device status */
+ seq_puts(s, "DMA engine status\n");
+ seq_printf(s, "\tChannel number: %d\n", pdev->nr_chans);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(state);
+DEFINE_SHOW_ATTRIBUTE(chan_state);
+DEFINE_SHOW_ATTRIBUTE(descriptors);
+DEFINE_SHOW_ATTRIBUTE(requester_chan);
+
+static struct dentry *pxad_dbg_alloc_chan(struct pxad_device *pdev,
+ int ch, struct dentry *chandir)
+{
+ char chan_name[11];
+ struct dentry *chan;
+ void *dt;
+
+ scnprintf(chan_name, sizeof(chan_name), "%d", ch);
+ chan = debugfs_create_dir(chan_name, chandir);
+ dt = (void *)&pdev->phys[ch];
+
+ debugfs_create_file("state", 0400, chan, dt, &chan_state_fops);
+ debugfs_create_file("descriptors", 0400, chan, dt, &descriptors_fops);
+ debugfs_create_file("requesters", 0400, chan, dt, &requester_chan_fops);
+
+ return chan;
+}
+
+static void pxad_init_debugfs(struct pxad_device *pdev)
+{
+ int i;
+ struct dentry *chandir;
+
+ pdev->dbgfs_chan =
+ kmalloc_array(pdev->nr_chans, sizeof(struct dentry *),
+ GFP_KERNEL);
+ if (!pdev->dbgfs_chan)
+ return;
+
+ pdev->dbgfs_root = debugfs_create_dir(dev_name(pdev->slave.dev), NULL);
+
+ debugfs_create_file("state", 0400, pdev->dbgfs_root, pdev, &state_fops);
+
+ chandir = debugfs_create_dir("channels", pdev->dbgfs_root);
+
+ for (i = 0; i < pdev->nr_chans; i++)
+ pdev->dbgfs_chan[i] = pxad_dbg_alloc_chan(pdev, i, chandir);
+}
+
+static void pxad_cleanup_debugfs(struct pxad_device *pdev)
+{
+ debugfs_remove_recursive(pdev->dbgfs_root);
+}
+#else
+static inline void pxad_init_debugfs(struct pxad_device *pdev) {}
+static inline void pxad_cleanup_debugfs(struct pxad_device *pdev) {}
+#endif
+
+static struct pxad_phy *lookup_phy(struct pxad_chan *pchan)
+{
+ int prio, i;
+ struct pxad_device *pdev = to_pxad_dev(pchan->vc.chan.device);
+ struct pxad_phy *phy, *found = NULL;
+ unsigned long flags;
+
+ /*
+ * dma channel priorities
+ * ch 0 - 3, 16 - 19 <--> (0)
+ * ch 4 - 7, 20 - 23 <--> (1)
+ * ch 8 - 11, 24 - 27 <--> (2)
+ * ch 12 - 15, 28 - 31 <--> (3)
+ */
+
+ spin_lock_irqsave(&pdev->phy_lock, flags);
+ for (prio = pchan->prio; prio >= PXAD_PRIO_HIGHEST; prio--) {
+ for (i = 0; i < pdev->nr_chans; i++) {
+ if (prio != (i & 0xf) >> 2)
+ continue;
+ phy = &pdev->phys[i];
+ if (!phy->vchan) {
+ phy->vchan = pchan;
+ found = phy;
+ goto out_unlock;
+ }
+ }
+ }
+
+out_unlock:
+ spin_unlock_irqrestore(&pdev->phy_lock, flags);
+ dev_dbg(&pchan->vc.chan.dev->device,
+ "%s(): phy=%p(%d)\n", __func__, found,
+ found ? found->idx : -1);
+
+ return found;
+}
+
+static void pxad_free_phy(struct pxad_chan *chan)
+{
+ struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+ unsigned long flags;
+ u32 reg;
+
+ dev_dbg(&chan->vc.chan.dev->device,
+ "%s(): freeing\n", __func__);
+ if (!chan->phy)
+ return;
+
+ /* clear the channel mapping in DRCMR */
+ if (chan->drcmr <= pdev->nr_requestors) {
+ reg = pxad_drcmr(chan->drcmr);
+ writel_relaxed(0, chan->phy->base + reg);
+ }
+
+ spin_lock_irqsave(&pdev->phy_lock, flags);
+ chan->phy->vchan = NULL;
+ chan->phy = NULL;
+ spin_unlock_irqrestore(&pdev->phy_lock, flags);
+}
+
+static bool is_chan_running(struct pxad_chan *chan)
+{
+ u32 dcsr;
+ struct pxad_phy *phy = chan->phy;
+
+ if (!phy)
+ return false;
+ dcsr = phy_readl_relaxed(phy, DCSR);
+ return dcsr & PXA_DCSR_RUN;
+}
+
+static bool is_running_chan_misaligned(struct pxad_chan *chan)
+{
+ u32 dalgn;
+
+ BUG_ON(!chan->phy);
+ dalgn = phy_readl_relaxed(chan->phy, DALGN);
+ return dalgn & (BIT(chan->phy->idx));
+}
+
+static void phy_enable(struct pxad_phy *phy, bool misaligned)
+{
+ struct pxad_device *pdev;
+ u32 reg, dalgn;
+
+ if (!phy->vchan)
+ return;
+
+ dev_dbg(&phy->vchan->vc.chan.dev->device,
+ "%s(); phy=%p(%d) misaligned=%d\n", __func__,
+ phy, phy->idx, misaligned);
+
+ pdev = to_pxad_dev(phy->vchan->vc.chan.device);
+ if (phy->vchan->drcmr <= pdev->nr_requestors) {
+ reg = pxad_drcmr(phy->vchan->drcmr);
+ writel_relaxed(DRCMR_MAPVLD | phy->idx, phy->base + reg);
+ }
+
+ dalgn = phy_readl_relaxed(phy, DALGN);
+ if (misaligned)
+ dalgn |= BIT(phy->idx);
+ else
+ dalgn &= ~BIT(phy->idx);
+ phy_writel_relaxed(phy, dalgn, DALGN);
+
+ phy_writel(phy, PXA_DCSR_STOPIRQEN | PXA_DCSR_ENDINTR |
+ PXA_DCSR_BUSERR | PXA_DCSR_RUN, DCSR);
+}
+
+static void phy_disable(struct pxad_phy *phy)
+{
+ u32 dcsr;
+
+ if (!phy)
+ return;
+
+ dcsr = phy_readl_relaxed(phy, DCSR);
+ dev_dbg(&phy->vchan->vc.chan.dev->device,
+ "%s(): phy=%p(%d)\n", __func__, phy, phy->idx);
+ phy_writel(phy, dcsr & ~PXA_DCSR_RUN & ~PXA_DCSR_STOPIRQEN, DCSR);
+}
+
+static void pxad_launch_chan(struct pxad_chan *chan,
+ struct pxad_desc_sw *desc)
+{
+ dev_dbg(&chan->vc.chan.dev->device,
+ "%s(): desc=%p\n", __func__, desc);
+ if (!chan->phy) {
+ chan->phy = lookup_phy(chan);
+ if (!chan->phy) {
+ dev_dbg(&chan->vc.chan.dev->device,
+ "%s(): no free dma channel\n", __func__);
+ return;
+ }
+ }
+ chan->bus_error = 0;
+
+ /*
+ * Program the descriptor's address into the DMA controller,
+ * then start the DMA transaction
+ */
+ phy_writel(chan->phy, desc->first, DDADR);
+ phy_enable(chan->phy, chan->misaligned);
+ wake_up(&chan->wq_state);
+}
+
+static void set_updater_desc(struct pxad_desc_sw *sw_desc,
+ unsigned long flags)
+{
+ struct pxad_desc_hw *updater =
+ sw_desc->hw_desc[sw_desc->nb_desc - 1];
+ dma_addr_t dma = sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr;
+
+ updater->ddadr = DDADR_STOP;
+ updater->dsadr = dma;
+ updater->dtadr = dma + 8;
+ updater->dcmd = PXA_DCMD_WIDTH4 | PXA_DCMD_BURST32 |
+ (PXA_DCMD_LENGTH & sizeof(u32));
+ if (flags & DMA_PREP_INTERRUPT)
+ updater->dcmd |= PXA_DCMD_ENDIRQEN;
+ if (sw_desc->cyclic)
+ sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr = sw_desc->first;
+}
+
+static bool is_desc_completed(struct virt_dma_desc *vd)
+{
+ struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd);
+ struct pxad_desc_hw *updater =
+ sw_desc->hw_desc[sw_desc->nb_desc - 1];
+
+ return updater->dtadr != (updater->dsadr + 8);
+}
+
+static void pxad_desc_chain(struct virt_dma_desc *vd1,
+ struct virt_dma_desc *vd2)
+{
+ struct pxad_desc_sw *desc1 = to_pxad_sw_desc(vd1);
+ struct pxad_desc_sw *desc2 = to_pxad_sw_desc(vd2);
+ dma_addr_t dma_to_chain;
+
+ dma_to_chain = desc2->first;
+ desc1->hw_desc[desc1->nb_desc - 1]->ddadr = dma_to_chain;
+}
+
+static bool pxad_try_hotchain(struct virt_dma_chan *vc,
+ struct virt_dma_desc *vd)
+{
+ struct virt_dma_desc *vd_last_issued = NULL;
+ struct pxad_chan *chan = to_pxad_chan(&vc->chan);
+
+ /*
+ * Attempt to hot chain the tx if the phy is still running. This is
+ * considered successful only if either the channel is still running
+ * after the chaining, or if the chained transfer is completed after
+ * having been hot chained.
+ * A change of alignment is not allowed, and forbids hotchaining.
+ */
+ if (is_chan_running(chan)) {
+ BUG_ON(list_empty(&vc->desc_issued));
+
+ if (!is_running_chan_misaligned(chan) &&
+ to_pxad_sw_desc(vd)->misaligned)
+ return false;
+
+ vd_last_issued = list_entry(vc->desc_issued.prev,
+ struct virt_dma_desc, node);
+ pxad_desc_chain(vd_last_issued, vd);
+ if (is_chan_running(chan) || is_desc_completed(vd))
+ return true;
+ }
+
+ return false;
+}
+
+static unsigned int clear_chan_irq(struct pxad_phy *phy)
+{
+ u32 dcsr;
+ u32 dint = readl(phy->base + DINT);
+
+ if (!(dint & BIT(phy->idx)))
+ return PXA_DCSR_RUN;
+
+ /* clear irq */
+ dcsr = phy_readl_relaxed(phy, DCSR);
+ phy_writel(phy, dcsr, DCSR);
+ if ((dcsr & PXA_DCSR_BUSERR) && (phy->vchan))
+ dev_warn(&phy->vchan->vc.chan.dev->device,
+ "%s(chan=%p): PXA_DCSR_BUSERR\n",
+ __func__, &phy->vchan);
+
+ return dcsr & ~PXA_DCSR_RUN;
+}
+
+static irqreturn_t pxad_chan_handler(int irq, void *dev_id)
+{
+ struct pxad_phy *phy = dev_id;
+ struct pxad_chan *chan = phy->vchan;
+ struct virt_dma_desc *vd, *tmp;
+ unsigned int dcsr;
+ bool vd_completed;
+ dma_cookie_t last_started = 0;
+
+ BUG_ON(!chan);
+
+ dcsr = clear_chan_irq(phy);
+ if (dcsr & PXA_DCSR_RUN)
+ return IRQ_NONE;
+
+ spin_lock(&chan->vc.lock);
+ list_for_each_entry_safe(vd, tmp, &chan->vc.desc_issued, node) {
+ vd_completed = is_desc_completed(vd);
+ dev_dbg(&chan->vc.chan.dev->device,
+ "%s(): checking txd %p[%x]: completed=%d dcsr=0x%x\n",
+ __func__, vd, vd->tx.cookie, vd_completed,
+ dcsr);
+ last_started = vd->tx.cookie;
+ if (to_pxad_sw_desc(vd)->cyclic) {
+ vchan_cyclic_callback(vd);
+ break;
+ }
+ if (vd_completed) {
+ list_del(&vd->node);
+ vchan_cookie_complete(vd);
+ } else {
+ break;
+ }
+ }
+
+ if (dcsr & PXA_DCSR_BUSERR) {
+ chan->bus_error = last_started;
+ phy_disable(phy);
+ }
+
+ if (!chan->bus_error && dcsr & PXA_DCSR_STOPSTATE) {
+ dev_dbg(&chan->vc.chan.dev->device,
+ "%s(): channel stopped, submitted_empty=%d issued_empty=%d",
+ __func__,
+ list_empty(&chan->vc.desc_submitted),
+ list_empty(&chan->vc.desc_issued));
+ phy_writel_relaxed(phy, dcsr & ~PXA_DCSR_STOPIRQEN, DCSR);
+
+ if (list_empty(&chan->vc.desc_issued)) {
+ chan->misaligned =
+ !list_empty(&chan->vc.desc_submitted);
+ } else {
+ vd = list_first_entry(&chan->vc.desc_issued,
+ struct virt_dma_desc, node);
+ pxad_launch_chan(chan, to_pxad_sw_desc(vd));
+ }
+ }
+ spin_unlock(&chan->vc.lock);
+ wake_up(&chan->wq_state);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t pxad_int_handler(int irq, void *dev_id)
+{
+ struct pxad_device *pdev = dev_id;
+ struct pxad_phy *phy;
+ u32 dint = readl(pdev->base + DINT);
+ int i, ret = IRQ_NONE;
+
+ while (dint) {
+ i = __ffs(dint);
+ dint &= (dint - 1);
+ phy = &pdev->phys[i];
+ if (pxad_chan_handler(irq, phy) == IRQ_HANDLED)
+ ret = IRQ_HANDLED;
+ }
+
+ return ret;
+}
+
+static int pxad_alloc_chan_resources(struct dma_chan *dchan)
+{
+ struct pxad_chan *chan = to_pxad_chan(dchan);
+ struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+
+ if (chan->desc_pool)
+ return 1;
+
+ chan->desc_pool = dma_pool_create(dma_chan_name(dchan),
+ pdev->slave.dev,
+ sizeof(struct pxad_desc_hw),
+ __alignof__(struct pxad_desc_hw),
+ 0);
+ if (!chan->desc_pool) {
+ dev_err(&chan->vc.chan.dev->device,
+ "%s(): unable to allocate descriptor pool\n",
+ __func__);
+ return -ENOMEM;
+ }
+
+ return 1;
+}
+
+static void pxad_free_chan_resources(struct dma_chan *dchan)
+{
+ struct pxad_chan *chan = to_pxad_chan(dchan);
+
+ vchan_free_chan_resources(&chan->vc);
+ dma_pool_destroy(chan->desc_pool);
+ chan->desc_pool = NULL;
+
+ chan->drcmr = U32_MAX;
+ chan->prio = PXAD_PRIO_LOWEST;
+}
+
+static void pxad_free_desc(struct virt_dma_desc *vd)
+{
+ int i;
+ dma_addr_t dma;
+ struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd);
+
+ for (i = sw_desc->nb_desc - 1; i >= 0; i--) {
+ if (i > 0)
+ dma = sw_desc->hw_desc[i - 1]->ddadr;
+ else
+ dma = sw_desc->first;
+ dma_pool_free(sw_desc->desc_pool,
+ sw_desc->hw_desc[i], dma);
+ }
+ sw_desc->nb_desc = 0;
+ kfree(sw_desc);
+}
+
+static struct pxad_desc_sw *
+pxad_alloc_desc(struct pxad_chan *chan, unsigned int nb_hw_desc)
+{
+ struct pxad_desc_sw *sw_desc;
+ dma_addr_t dma;
+ int i;
+
+ sw_desc = kzalloc(struct_size(sw_desc, hw_desc, nb_hw_desc),
+ GFP_NOWAIT);
+ if (!sw_desc)
+ return NULL;
+ sw_desc->desc_pool = chan->desc_pool;
+
+ for (i = 0; i < nb_hw_desc; i++) {
+ sw_desc->hw_desc[i] = dma_pool_alloc(sw_desc->desc_pool,
+ GFP_NOWAIT, &dma);
+ if (!sw_desc->hw_desc[i]) {
+ dev_err(&chan->vc.chan.dev->device,
+ "%s(): Couldn't allocate the %dth hw_desc from dma_pool %p\n",
+ __func__, i, sw_desc->desc_pool);
+ goto err;
+ }
+
+ if (i == 0)
+ sw_desc->first = dma;
+ else
+ sw_desc->hw_desc[i - 1]->ddadr = dma;
+ sw_desc->nb_desc++;
+ }
+
+ return sw_desc;
+err:
+ pxad_free_desc(&sw_desc->vd);
+ return NULL;
+}
+
+static dma_cookie_t pxad_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct virt_dma_chan *vc = to_virt_chan(tx->chan);
+ struct pxad_chan *chan = to_pxad_chan(&vc->chan);
+ struct virt_dma_desc *vd_chained = NULL,
+ *vd = container_of(tx, struct virt_dma_desc, tx);
+ dma_cookie_t cookie;
+ unsigned long flags;
+
+ set_updater_desc(to_pxad_sw_desc(vd), tx->flags);
+
+ spin_lock_irqsave(&vc->lock, flags);
+ cookie = dma_cookie_assign(tx);
+
+ if (list_empty(&vc->desc_submitted) && pxad_try_hotchain(vc, vd)) {
+ list_move_tail(&vd->node, &vc->desc_issued);
+ dev_dbg(&chan->vc.chan.dev->device,
+ "%s(): txd %p[%x]: submitted (hot linked)\n",
+ __func__, vd, cookie);
+ goto out;
+ }
+
+ /*
+ * Fallback to placing the tx in the submitted queue
+ */
+ if (!list_empty(&vc->desc_submitted)) {
+ vd_chained = list_entry(vc->desc_submitted.prev,
+ struct virt_dma_desc, node);
+ /*
+ * Only chain the descriptors if no new misalignment is
+ * introduced. If a new misalignment is chained, let the channel
+ * stop, and be relaunched in misalign mode from the irq
+ * handler.
+ */
+ if (chan->misaligned || !to_pxad_sw_desc(vd)->misaligned)
+ pxad_desc_chain(vd_chained, vd);
+ else
+ vd_chained = NULL;
+ }
+ dev_dbg(&chan->vc.chan.dev->device,
+ "%s(): txd %p[%x]: submitted (%s linked)\n",
+ __func__, vd, cookie, vd_chained ? "cold" : "not");
+ list_move_tail(&vd->node, &vc->desc_submitted);
+ chan->misaligned |= to_pxad_sw_desc(vd)->misaligned;
+
+out:
+ spin_unlock_irqrestore(&vc->lock, flags);
+ return cookie;
+}
+
+static void pxad_issue_pending(struct dma_chan *dchan)
+{
+ struct pxad_chan *chan = to_pxad_chan(dchan);
+ struct virt_dma_desc *vd_first;
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+ if (list_empty(&chan->vc.desc_submitted))
+ goto out;
+
+ vd_first = list_first_entry(&chan->vc.desc_submitted,
+ struct virt_dma_desc, node);
+ dev_dbg(&chan->vc.chan.dev->device,
+ "%s(): txd %p[%x]", __func__, vd_first, vd_first->tx.cookie);
+
+ vchan_issue_pending(&chan->vc);
+ if (!pxad_try_hotchain(&chan->vc, vd_first))
+ pxad_launch_chan(chan, to_pxad_sw_desc(vd_first));
+out:
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static inline struct dma_async_tx_descriptor *
+pxad_tx_prep(struct virt_dma_chan *vc, struct virt_dma_desc *vd,
+ unsigned long tx_flags)
+{
+ struct dma_async_tx_descriptor *tx;
+ struct pxad_chan *chan = container_of(vc, struct pxad_chan, vc);
+
+ INIT_LIST_HEAD(&vd->node);
+ tx = vchan_tx_prep(vc, vd, tx_flags);
+ tx->tx_submit = pxad_tx_submit;
+ dev_dbg(&chan->vc.chan.dev->device,
+ "%s(): vc=%p txd=%p[%x] flags=0x%lx\n", __func__,
+ vc, vd, vd->tx.cookie,
+ tx_flags);
+
+ return tx;
+}
+
+static void pxad_get_config(struct pxad_chan *chan,
+ enum dma_transfer_direction dir,
+ u32 *dcmd, u32 *dev_src, u32 *dev_dst)
+{
+ u32 maxburst = 0, dev_addr = 0;
+ enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+ struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+
+ *dcmd = 0;
+ if (dir == DMA_DEV_TO_MEM) {
+ maxburst = chan->cfg.src_maxburst;
+ width = chan->cfg.src_addr_width;
+ dev_addr = chan->cfg.src_addr;
+ *dev_src = dev_addr;
+ *dcmd |= PXA_DCMD_INCTRGADDR;
+ if (chan->drcmr <= pdev->nr_requestors)
+ *dcmd |= PXA_DCMD_FLOWSRC;
+ }
+ if (dir == DMA_MEM_TO_DEV) {
+ maxburst = chan->cfg.dst_maxburst;
+ width = chan->cfg.dst_addr_width;
+ dev_addr = chan->cfg.dst_addr;
+ *dev_dst = dev_addr;
+ *dcmd |= PXA_DCMD_INCSRCADDR;
+ if (chan->drcmr <= pdev->nr_requestors)
+ *dcmd |= PXA_DCMD_FLOWTRG;
+ }
+ if (dir == DMA_MEM_TO_MEM)
+ *dcmd |= PXA_DCMD_BURST32 | PXA_DCMD_INCTRGADDR |
+ PXA_DCMD_INCSRCADDR;
+
+ dev_dbg(&chan->vc.chan.dev->device,
+ "%s(): dev_addr=0x%x maxburst=%d width=%d dir=%d\n",
+ __func__, dev_addr, maxburst, width, dir);
+
+ if (width == DMA_SLAVE_BUSWIDTH_1_BYTE)
+ *dcmd |= PXA_DCMD_WIDTH1;
+ else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES)
+ *dcmd |= PXA_DCMD_WIDTH2;
+ else if (width == DMA_SLAVE_BUSWIDTH_4_BYTES)
+ *dcmd |= PXA_DCMD_WIDTH4;
+
+ if (maxburst == 8)
+ *dcmd |= PXA_DCMD_BURST8;
+ else if (maxburst == 16)
+ *dcmd |= PXA_DCMD_BURST16;
+ else if (maxburst == 32)
+ *dcmd |= PXA_DCMD_BURST32;
+}
+
+static struct dma_async_tx_descriptor *
+pxad_prep_memcpy(struct dma_chan *dchan,
+ dma_addr_t dma_dst, dma_addr_t dma_src,
+ size_t len, unsigned long flags)
+{
+ struct pxad_chan *chan = to_pxad_chan(dchan);
+ struct pxad_desc_sw *sw_desc;
+ struct pxad_desc_hw *hw_desc;
+ u32 dcmd;
+ unsigned int i, nb_desc = 0;
+ size_t copy;
+
+ if (!dchan || !len)
+ return NULL;
+
+ dev_dbg(&chan->vc.chan.dev->device,
+ "%s(): dma_dst=0x%lx dma_src=0x%lx len=%zu flags=%lx\n",
+ __func__, (unsigned long)dma_dst, (unsigned long)dma_src,
+ len, flags);
+ pxad_get_config(chan, DMA_MEM_TO_MEM, &dcmd, NULL, NULL);
+
+ nb_desc = DIV_ROUND_UP(len, PDMA_MAX_DESC_BYTES);
+ sw_desc = pxad_alloc_desc(chan, nb_desc + 1);
+ if (!sw_desc)
+ return NULL;
+ sw_desc->len = len;
+
+ if (!IS_ALIGNED(dma_src, 1 << PDMA_ALIGNMENT) ||
+ !IS_ALIGNED(dma_dst, 1 << PDMA_ALIGNMENT))
+ sw_desc->misaligned = true;
+
+ i = 0;
+ do {
+ hw_desc = sw_desc->hw_desc[i++];
+ copy = min_t(size_t, len, PDMA_MAX_DESC_BYTES);
+ hw_desc->dcmd = dcmd | (PXA_DCMD_LENGTH & copy);
+ hw_desc->dsadr = dma_src;
+ hw_desc->dtadr = dma_dst;
+ len -= copy;
+ dma_src += copy;
+ dma_dst += copy;
+ } while (len);
+ set_updater_desc(sw_desc, flags);
+
+ return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+pxad_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction dir,
+ unsigned long flags, void *context)
+{
+ struct pxad_chan *chan = to_pxad_chan(dchan);
+ struct pxad_desc_sw *sw_desc;
+ size_t len, avail;
+ struct scatterlist *sg;
+ dma_addr_t dma;
+ u32 dcmd, dsadr = 0, dtadr = 0;
+ unsigned int nb_desc = 0, i, j = 0;
+
+ if ((sgl == NULL) || (sg_len == 0))
+ return NULL;
+
+ pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr);
+ dev_dbg(&chan->vc.chan.dev->device,
+ "%s(): dir=%d flags=%lx\n", __func__, dir, flags);
+
+ for_each_sg(sgl, sg, sg_len, i)
+ nb_desc += DIV_ROUND_UP(sg_dma_len(sg), PDMA_MAX_DESC_BYTES);
+ sw_desc = pxad_alloc_desc(chan, nb_desc + 1);
+ if (!sw_desc)
+ return NULL;
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ dma = sg_dma_address(sg);
+ avail = sg_dma_len(sg);
+ sw_desc->len += avail;
+
+ do {
+ len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES);
+ if (dma & 0x7)
+ sw_desc->misaligned = true;
+
+ sw_desc->hw_desc[j]->dcmd =
+ dcmd | (PXA_DCMD_LENGTH & len);
+ sw_desc->hw_desc[j]->dsadr = dsadr ? dsadr : dma;
+ sw_desc->hw_desc[j++]->dtadr = dtadr ? dtadr : dma;
+
+ dma += len;
+ avail -= len;
+ } while (avail);
+ }
+ set_updater_desc(sw_desc, flags);
+
+ return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+pxad_prep_dma_cyclic(struct dma_chan *dchan,
+ dma_addr_t buf_addr, size_t len, size_t period_len,
+ enum dma_transfer_direction dir, unsigned long flags)
+{
+ struct pxad_chan *chan = to_pxad_chan(dchan);
+ struct pxad_desc_sw *sw_desc;
+ struct pxad_desc_hw **phw_desc;
+ dma_addr_t dma;
+ u32 dcmd, dsadr = 0, dtadr = 0;
+ unsigned int nb_desc = 0;
+
+ if (!dchan || !len || !period_len)
+ return NULL;
+ if ((dir != DMA_DEV_TO_MEM) && (dir != DMA_MEM_TO_DEV)) {
+ dev_err(&chan->vc.chan.dev->device,
+ "Unsupported direction for cyclic DMA\n");
+ return NULL;
+ }
+ /* the buffer length must be a multiple of period_len */
+ if (len % period_len != 0 || period_len > PDMA_MAX_DESC_BYTES ||
+ !IS_ALIGNED(period_len, 1 << PDMA_ALIGNMENT))
+ return NULL;
+
+ pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr);
+ dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH & period_len);
+ dev_dbg(&chan->vc.chan.dev->device,
+ "%s(): buf_addr=0x%lx len=%zu period=%zu dir=%d flags=%lx\n",
+ __func__, (unsigned long)buf_addr, len, period_len, dir, flags);
+
+ nb_desc = DIV_ROUND_UP(period_len, PDMA_MAX_DESC_BYTES);
+ nb_desc *= DIV_ROUND_UP(len, period_len);
+ sw_desc = pxad_alloc_desc(chan, nb_desc + 1);
+ if (!sw_desc)
+ return NULL;
+ sw_desc->cyclic = true;
+ sw_desc->len = len;
+
+ phw_desc = sw_desc->hw_desc;
+ dma = buf_addr;
+ do {
+ phw_desc[0]->dsadr = dsadr ? dsadr : dma;
+ phw_desc[0]->dtadr = dtadr ? dtadr : dma;
+ phw_desc[0]->dcmd = dcmd;
+ phw_desc++;
+ dma += period_len;
+ len -= period_len;
+ } while (len);
+ set_updater_desc(sw_desc, flags);
+
+ return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags);
+}
+
+static int pxad_config(struct dma_chan *dchan,
+ struct dma_slave_config *cfg)
+{
+ struct pxad_chan *chan = to_pxad_chan(dchan);
+
+ if (!dchan)
+ return -EINVAL;
+
+ chan->cfg = *cfg;
+ return 0;
+}
+
+static int pxad_terminate_all(struct dma_chan *dchan)
+{
+ struct pxad_chan *chan = to_pxad_chan(dchan);
+ struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+ struct virt_dma_desc *vd = NULL;
+ unsigned long flags;
+ struct pxad_phy *phy;
+ LIST_HEAD(head);
+
+ dev_dbg(&chan->vc.chan.dev->device,
+ "%s(): vchan %p: terminate all\n", __func__, &chan->vc);
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+ vchan_get_all_descriptors(&chan->vc, &head);
+
+ list_for_each_entry(vd, &head, node) {
+ dev_dbg(&chan->vc.chan.dev->device,
+ "%s(): cancelling txd %p[%x] (completed=%d)", __func__,
+ vd, vd->tx.cookie, is_desc_completed(vd));
+ }
+
+ phy = chan->phy;
+ if (phy) {
+ phy_disable(chan->phy);
+ pxad_free_phy(chan);
+ chan->phy = NULL;
+ spin_lock(&pdev->phy_lock);
+ phy->vchan = NULL;
+ spin_unlock(&pdev->phy_lock);
+ }
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+ vchan_dma_desc_free_list(&chan->vc, &head);
+
+ return 0;
+}
+
+static unsigned int pxad_residue(struct pxad_chan *chan,
+ dma_cookie_t cookie)
+{
+ struct virt_dma_desc *vd = NULL;
+ struct pxad_desc_sw *sw_desc = NULL;
+ struct pxad_desc_hw *hw_desc = NULL;
+ u32 curr, start, len, end, residue = 0;
+ unsigned long flags;
+ bool passed = false;
+ int i;
+
+ /*
+ * If the channel does not have a phy pointer anymore, it has already
+ * been completed. Therefore, its residue is 0.
+ */
+ if (!chan->phy)
+ return 0;
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+
+ vd = vchan_find_desc(&chan->vc, cookie);
+ if (!vd)
+ goto out;
+
+ sw_desc = to_pxad_sw_desc(vd);
+ if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR)
+ curr = phy_readl_relaxed(chan->phy, DSADR);
+ else
+ curr = phy_readl_relaxed(chan->phy, DTADR);
+
+ /*
+ * curr has to be actually read before checking descriptor
+ * completion, so that a curr inside a status updater
+ * descriptor implies the following test returns true, and
+ * preventing reordering of curr load and the test.
+ */
+ rmb();
+ if (is_desc_completed(vd))
+ goto out;
+
+ for (i = 0; i < sw_desc->nb_desc - 1; i++) {
+ hw_desc = sw_desc->hw_desc[i];
+ if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR)
+ start = hw_desc->dsadr;
+ else
+ start = hw_desc->dtadr;
+ len = hw_desc->dcmd & PXA_DCMD_LENGTH;
+ end = start + len;
+
+ /*
+ * 'passed' will be latched once we found the descriptor
+ * which lies inside the boundaries of the curr
+ * pointer. All descriptors that occur in the list
+ * _after_ we found that partially handled descriptor
+ * are still to be processed and are hence added to the
+ * residual bytes counter.
+ */
+
+ if (passed) {
+ residue += len;
+ } else if (curr >= start && curr <= end) {
+ residue += end - curr;
+ passed = true;
+ }
+ }
+ if (!passed)
+ residue = sw_desc->len;
+
+out:
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+ dev_dbg(&chan->vc.chan.dev->device,
+ "%s(): txd %p[%x] sw_desc=%p: %d\n",
+ __func__, vd, cookie, sw_desc, residue);
+ return residue;
+}
+
+static enum dma_status pxad_tx_status(struct dma_chan *dchan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct pxad_chan *chan = to_pxad_chan(dchan);
+ enum dma_status ret;
+
+ if (cookie == chan->bus_error)
+ return DMA_ERROR;
+
+ ret = dma_cookie_status(dchan, cookie, txstate);
+ if (likely(txstate && (ret != DMA_ERROR)))
+ dma_set_residue(txstate, pxad_residue(chan, cookie));
+
+ return ret;
+}
+
+static void pxad_synchronize(struct dma_chan *dchan)
+{
+ struct pxad_chan *chan = to_pxad_chan(dchan);
+
+ wait_event(chan->wq_state, !is_chan_running(chan));
+ vchan_synchronize(&chan->vc);
+}
+
+static void pxad_free_channels(struct dma_device *dmadev)
+{
+ struct pxad_chan *c, *cn;
+
+ list_for_each_entry_safe(c, cn, &dmadev->channels,
+ vc.chan.device_node) {
+ list_del(&c->vc.chan.device_node);
+ tasklet_kill(&c->vc.task);
+ }
+}
+
+static int pxad_remove(struct platform_device *op)
+{
+ struct pxad_device *pdev = platform_get_drvdata(op);
+
+ pxad_cleanup_debugfs(pdev);
+ pxad_free_channels(&pdev->slave);
+ return 0;
+}
+
+static int pxad_init_phys(struct platform_device *op,
+ struct pxad_device *pdev,
+ unsigned int nb_phy_chans)
+{
+ int irq0, irq, nr_irq = 0, i, ret;
+ struct pxad_phy *phy;
+
+ irq0 = platform_get_irq(op, 0);
+ if (irq0 < 0)
+ return irq0;
+
+ pdev->phys = devm_kcalloc(&op->dev, nb_phy_chans,
+ sizeof(pdev->phys[0]), GFP_KERNEL);
+ if (!pdev->phys)
+ return -ENOMEM;
+
+ for (i = 0; i < nb_phy_chans; i++)
+ if (platform_get_irq_optional(op, i) > 0)
+ nr_irq++;
+
+ for (i = 0; i < nb_phy_chans; i++) {
+ phy = &pdev->phys[i];
+ phy->base = pdev->base;
+ phy->idx = i;
+ irq = platform_get_irq_optional(op, i);
+ if ((nr_irq > 1) && (irq > 0))
+ ret = devm_request_irq(&op->dev, irq,
+ pxad_chan_handler,
+ IRQF_SHARED, "pxa-dma", phy);
+ if ((nr_irq == 1) && (i == 0))
+ ret = devm_request_irq(&op->dev, irq0,
+ pxad_int_handler,
+ IRQF_SHARED, "pxa-dma", pdev);
+ if (ret) {
+ dev_err(pdev->slave.dev,
+ "%s(): can't request irq %d:%d\n", __func__,
+ irq, ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static const struct of_device_id pxad_dt_ids[] = {
+ { .compatible = "marvell,pdma-1.0", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, pxad_dt_ids);
+
+static struct dma_chan *pxad_dma_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct pxad_device *d = ofdma->of_dma_data;
+ struct dma_chan *chan;
+
+ chan = dma_get_any_slave_channel(&d->slave);
+ if (!chan)
+ return NULL;
+
+ to_pxad_chan(chan)->drcmr = dma_spec->args[0];
+ to_pxad_chan(chan)->prio = dma_spec->args[1];
+
+ return chan;
+}
+
+static int pxad_init_dmadev(struct platform_device *op,
+ struct pxad_device *pdev,
+ unsigned int nr_phy_chans,
+ unsigned int nr_requestors)
+{
+ int ret;
+ unsigned int i;
+ struct pxad_chan *c;
+
+ pdev->nr_chans = nr_phy_chans;
+ pdev->nr_requestors = nr_requestors;
+ INIT_LIST_HEAD(&pdev->slave.channels);
+ pdev->slave.device_alloc_chan_resources = pxad_alloc_chan_resources;
+ pdev->slave.device_free_chan_resources = pxad_free_chan_resources;
+ pdev->slave.device_tx_status = pxad_tx_status;
+ pdev->slave.device_issue_pending = pxad_issue_pending;
+ pdev->slave.device_config = pxad_config;
+ pdev->slave.device_synchronize = pxad_synchronize;
+ pdev->slave.device_terminate_all = pxad_terminate_all;
+
+ if (op->dev.coherent_dma_mask)
+ dma_set_mask(&op->dev, op->dev.coherent_dma_mask);
+ else
+ dma_set_mask(&op->dev, DMA_BIT_MASK(32));
+
+ ret = pxad_init_phys(op, pdev, nr_phy_chans);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < nr_phy_chans; i++) {
+ c = devm_kzalloc(&op->dev, sizeof(*c), GFP_KERNEL);
+ if (!c)
+ return -ENOMEM;
+
+ c->drcmr = U32_MAX;
+ c->prio = PXAD_PRIO_LOWEST;
+ c->vc.desc_free = pxad_free_desc;
+ vchan_init(&c->vc, &pdev->slave);
+ init_waitqueue_head(&c->wq_state);
+ }
+
+ return dmaenginem_async_device_register(&pdev->slave);
+}
+
+static int pxad_probe(struct platform_device *op)
+{
+ struct pxad_device *pdev;
+ const struct of_device_id *of_id;
+ const struct dma_slave_map *slave_map = NULL;
+ struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev);
+ struct resource *iores;
+ int ret, dma_channels = 0, nb_requestors = 0, slave_map_cnt = 0;
+ const enum dma_slave_buswidth widths =
+ DMA_SLAVE_BUSWIDTH_1_BYTE | DMA_SLAVE_BUSWIDTH_2_BYTES |
+ DMA_SLAVE_BUSWIDTH_4_BYTES;
+
+ pdev = devm_kzalloc(&op->dev, sizeof(*pdev), GFP_KERNEL);
+ if (!pdev)
+ return -ENOMEM;
+
+ spin_lock_init(&pdev->phy_lock);
+
+ iores = platform_get_resource(op, IORESOURCE_MEM, 0);
+ pdev->base = devm_ioremap_resource(&op->dev, iores);
+ if (IS_ERR(pdev->base))
+ return PTR_ERR(pdev->base);
+
+ of_id = of_match_device(pxad_dt_ids, &op->dev);
+ if (of_id) {
+ /* Parse new and deprecated dma-channels properties */
+ if (of_property_read_u32(op->dev.of_node, "dma-channels",
+ &dma_channels))
+ of_property_read_u32(op->dev.of_node, "#dma-channels",
+ &dma_channels);
+ /* Parse new and deprecated dma-requests properties */
+ ret = of_property_read_u32(op->dev.of_node, "dma-requests",
+ &nb_requestors);
+ if (ret)
+ ret = of_property_read_u32(op->dev.of_node, "#dma-requests",
+ &nb_requestors);
+ if (ret) {
+ dev_warn(pdev->slave.dev,
+ "#dma-requests set to default 32 as missing in OF: %d",
+ ret);
+ nb_requestors = 32;
+ }
+ } else if (pdata && pdata->dma_channels) {
+ dma_channels = pdata->dma_channels;
+ nb_requestors = pdata->nb_requestors;
+ slave_map = pdata->slave_map;
+ slave_map_cnt = pdata->slave_map_cnt;
+ } else {
+ dma_channels = 32; /* default 32 channel */
+ }
+
+ dma_cap_set(DMA_SLAVE, pdev->slave.cap_mask);
+ dma_cap_set(DMA_MEMCPY, pdev->slave.cap_mask);
+ dma_cap_set(DMA_CYCLIC, pdev->slave.cap_mask);
+ dma_cap_set(DMA_PRIVATE, pdev->slave.cap_mask);
+ pdev->slave.device_prep_dma_memcpy = pxad_prep_memcpy;
+ pdev->slave.device_prep_slave_sg = pxad_prep_slave_sg;
+ pdev->slave.device_prep_dma_cyclic = pxad_prep_dma_cyclic;
+ pdev->slave.filter.map = slave_map;
+ pdev->slave.filter.mapcnt = slave_map_cnt;
+ pdev->slave.filter.fn = pxad_filter_fn;
+
+ pdev->slave.copy_align = PDMA_ALIGNMENT;
+ pdev->slave.src_addr_widths = widths;
+ pdev->slave.dst_addr_widths = widths;
+ pdev->slave.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+ pdev->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+ pdev->slave.descriptor_reuse = true;
+
+ pdev->slave.dev = &op->dev;
+ ret = pxad_init_dmadev(op, pdev, dma_channels, nb_requestors);
+ if (ret) {
+ dev_err(pdev->slave.dev, "unable to register\n");
+ return ret;
+ }
+
+ if (op->dev.of_node) {
+ /* Device-tree DMA controller registration */
+ ret = of_dma_controller_register(op->dev.of_node,
+ pxad_dma_xlate, pdev);
+ if (ret < 0) {
+ dev_err(pdev->slave.dev,
+ "of_dma_controller_register failed\n");
+ return ret;
+ }
+ }
+
+ platform_set_drvdata(op, pdev);
+ pxad_init_debugfs(pdev);
+ dev_info(pdev->slave.dev, "initialized %d channels on %d requestors\n",
+ dma_channels, nb_requestors);
+ return 0;
+}
+
+static const struct platform_device_id pxad_id_table[] = {
+ { "pxa-dma", },
+ { },
+};
+
+static struct platform_driver pxad_driver = {
+ .driver = {
+ .name = "pxa-dma",
+ .of_match_table = pxad_dt_ids,
+ },
+ .id_table = pxad_id_table,
+ .probe = pxad_probe,
+ .remove = pxad_remove,
+};
+
+static bool pxad_filter_fn(struct dma_chan *chan, void *param)
+{
+ struct pxad_chan *c = to_pxad_chan(chan);
+ struct pxad_param *p = param;
+
+ if (chan->device->dev->driver != &pxad_driver.driver)
+ return false;
+
+ c->drcmr = p->drcmr;
+ c->prio = p->prio;
+
+ return true;
+}
+
+module_platform_driver(pxad_driver);
+
+MODULE_DESCRIPTION("Marvell PXA Peripheral DMA Driver");
+MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/qcom/Kconfig b/drivers/dma/qcom/Kconfig
new file mode 100644
index 000000000..3f926a653
--- /dev/null
+++ b/drivers/dma/qcom/Kconfig
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config QCOM_ADM
+ tristate "Qualcomm ADM support"
+ depends on (ARCH_QCOM || COMPILE_TEST) && !PHYS_ADDR_T_64BIT
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for the Qualcomm Application Data Mover (ADM) DMA
+ controller, as present on MSM8x60, APQ8064, and IPQ8064 devices.
+ This controller provides DMA capabilities for both general purpose
+ and on-chip peripheral devices.
+
+config QCOM_BAM_DMA
+ tristate "QCOM BAM DMA support"
+ depends on ARCH_QCOM || (COMPILE_TEST && OF && ARM)
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for the QCOM BAM DMA controller. This controller
+ provides DMA capabilities for a variety of on-chip devices.
+
+config QCOM_GPI_DMA
+ tristate "Qualcomm Technologies GPI DMA support"
+ depends on ARCH_QCOM
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for the QCOM GPI DMA controller. This controller
+ provides DMA capabilities for a variety of peripheral buses such
+ as I2C, UART, and SPI. By using GPI dmaengine driver, bus drivers
+ can use a standardize interface that is protocol independent to
+ transfer data between DDR and peripheral.
+
+config QCOM_HIDMA_MGMT
+ tristate "Qualcomm Technologies HIDMA Management support"
+ depends on HAS_IOMEM
+ select DMA_ENGINE
+ help
+ Enable support for the Qualcomm Technologies HIDMA Management.
+ Each DMA device requires one management interface driver
+ for basic initialization before QCOM_HIDMA channel driver can
+ start managing the channels. In a virtualized environment,
+ the guest OS would run QCOM_HIDMA channel driver and the
+ host would run the QCOM_HIDMA_MGMT management driver.
+
+config QCOM_HIDMA
+ tristate "Qualcomm Technologies HIDMA Channel support"
+ select DMA_ENGINE
+ help
+ Enable support for the Qualcomm Technologies HIDMA controller.
+ The HIDMA controller supports optimized buffer copies
+ (user to kernel, kernel to kernel, etc.). It only supports
+ memcpy interface. The core is not intended for general
+ purpose slave DMA.
diff --git a/drivers/dma/qcom/Makefile b/drivers/dma/qcom/Makefile
new file mode 100644
index 000000000..50f1e7014
--- /dev/null
+++ b/drivers/dma/qcom/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_QCOM_ADM) += qcom_adm.o
+obj-$(CONFIG_QCOM_BAM_DMA) += bam_dma.o
+obj-$(CONFIG_QCOM_GPI_DMA) += gpi.o
+obj-$(CONFIG_QCOM_HIDMA_MGMT) += hdma_mgmt.o
+hdma_mgmt-objs := hidma_mgmt.o hidma_mgmt_sys.o
+obj-$(CONFIG_QCOM_HIDMA) += hdma.o
+hdma-objs := hidma_ll.o hidma.o hidma_dbg.o
diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c
new file mode 100644
index 000000000..2ff787df5
--- /dev/null
+++ b/drivers/dma/qcom/bam_dma.c
@@ -0,0 +1,1492 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
+ */
+/*
+ * QCOM BAM DMA engine driver
+ *
+ * QCOM BAM DMA blocks are distributed amongst a number of the on-chip
+ * peripherals on the MSM 8x74. The configuration of the channels are dependent
+ * on the way they are hard wired to that specific peripheral. The peripheral
+ * device tree entries specify the configuration of each channel.
+ *
+ * The DMA controller requires the use of external memory for storage of the
+ * hardware descriptors for each channel. The descriptor FIFO is accessed as a
+ * circular buffer and operations are managed according to the offset within the
+ * FIFO. After pipe/channel reset, all of the pipe registers and internal state
+ * are back to defaults.
+ *
+ * During DMA operations, we write descriptors to the FIFO, being careful to
+ * handle wrapping and then write the last FIFO offset to that channel's
+ * P_EVNT_REG register to kick off the transaction. The P_SW_OFSTS register
+ * indicates the current FIFO offset that is being processed, so there is some
+ * indication of where the hardware is currently working.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_dma.h>
+#include <linux/circ_buf.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/pm_runtime.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+struct bam_desc_hw {
+ __le32 addr; /* Buffer physical address */
+ __le16 size; /* Buffer size in bytes */
+ __le16 flags;
+};
+
+#define BAM_DMA_AUTOSUSPEND_DELAY 100
+
+#define DESC_FLAG_INT BIT(15)
+#define DESC_FLAG_EOT BIT(14)
+#define DESC_FLAG_EOB BIT(13)
+#define DESC_FLAG_NWD BIT(12)
+#define DESC_FLAG_CMD BIT(11)
+
+struct bam_async_desc {
+ struct virt_dma_desc vd;
+
+ u32 num_desc;
+ u32 xfer_len;
+
+ /* transaction flags, EOT|EOB|NWD */
+ u16 flags;
+
+ struct bam_desc_hw *curr_desc;
+
+ /* list node for the desc in the bam_chan list of descriptors */
+ struct list_head desc_node;
+ enum dma_transfer_direction dir;
+ size_t length;
+ struct bam_desc_hw desc[];
+};
+
+enum bam_reg {
+ BAM_CTRL,
+ BAM_REVISION,
+ BAM_NUM_PIPES,
+ BAM_DESC_CNT_TRSHLD,
+ BAM_IRQ_SRCS,
+ BAM_IRQ_SRCS_MSK,
+ BAM_IRQ_SRCS_UNMASKED,
+ BAM_IRQ_STTS,
+ BAM_IRQ_CLR,
+ BAM_IRQ_EN,
+ BAM_CNFG_BITS,
+ BAM_IRQ_SRCS_EE,
+ BAM_IRQ_SRCS_MSK_EE,
+ BAM_P_CTRL,
+ BAM_P_RST,
+ BAM_P_HALT,
+ BAM_P_IRQ_STTS,
+ BAM_P_IRQ_CLR,
+ BAM_P_IRQ_EN,
+ BAM_P_EVNT_DEST_ADDR,
+ BAM_P_EVNT_REG,
+ BAM_P_SW_OFSTS,
+ BAM_P_DATA_FIFO_ADDR,
+ BAM_P_DESC_FIFO_ADDR,
+ BAM_P_EVNT_GEN_TRSHLD,
+ BAM_P_FIFO_SIZES,
+};
+
+struct reg_offset_data {
+ u32 base_offset;
+ unsigned int pipe_mult, evnt_mult, ee_mult;
+};
+
+static const struct reg_offset_data bam_v1_3_reg_info[] = {
+ [BAM_CTRL] = { 0x0F80, 0x00, 0x00, 0x00 },
+ [BAM_REVISION] = { 0x0F84, 0x00, 0x00, 0x00 },
+ [BAM_NUM_PIPES] = { 0x0FBC, 0x00, 0x00, 0x00 },
+ [BAM_DESC_CNT_TRSHLD] = { 0x0F88, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_SRCS] = { 0x0F8C, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_SRCS_MSK] = { 0x0F90, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_SRCS_UNMASKED] = { 0x0FB0, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_STTS] = { 0x0F94, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_CLR] = { 0x0F98, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_EN] = { 0x0F9C, 0x00, 0x00, 0x00 },
+ [BAM_CNFG_BITS] = { 0x0FFC, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_SRCS_EE] = { 0x1800, 0x00, 0x00, 0x80 },
+ [BAM_IRQ_SRCS_MSK_EE] = { 0x1804, 0x00, 0x00, 0x80 },
+ [BAM_P_CTRL] = { 0x0000, 0x80, 0x00, 0x00 },
+ [BAM_P_RST] = { 0x0004, 0x80, 0x00, 0x00 },
+ [BAM_P_HALT] = { 0x0008, 0x80, 0x00, 0x00 },
+ [BAM_P_IRQ_STTS] = { 0x0010, 0x80, 0x00, 0x00 },
+ [BAM_P_IRQ_CLR] = { 0x0014, 0x80, 0x00, 0x00 },
+ [BAM_P_IRQ_EN] = { 0x0018, 0x80, 0x00, 0x00 },
+ [BAM_P_EVNT_DEST_ADDR] = { 0x102C, 0x00, 0x40, 0x00 },
+ [BAM_P_EVNT_REG] = { 0x1018, 0x00, 0x40, 0x00 },
+ [BAM_P_SW_OFSTS] = { 0x1000, 0x00, 0x40, 0x00 },
+ [BAM_P_DATA_FIFO_ADDR] = { 0x1024, 0x00, 0x40, 0x00 },
+ [BAM_P_DESC_FIFO_ADDR] = { 0x101C, 0x00, 0x40, 0x00 },
+ [BAM_P_EVNT_GEN_TRSHLD] = { 0x1028, 0x00, 0x40, 0x00 },
+ [BAM_P_FIFO_SIZES] = { 0x1020, 0x00, 0x40, 0x00 },
+};
+
+static const struct reg_offset_data bam_v1_4_reg_info[] = {
+ [BAM_CTRL] = { 0x0000, 0x00, 0x00, 0x00 },
+ [BAM_REVISION] = { 0x0004, 0x00, 0x00, 0x00 },
+ [BAM_NUM_PIPES] = { 0x003C, 0x00, 0x00, 0x00 },
+ [BAM_DESC_CNT_TRSHLD] = { 0x0008, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_SRCS] = { 0x000C, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_SRCS_MSK] = { 0x0010, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_SRCS_UNMASKED] = { 0x0030, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_STTS] = { 0x0014, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_CLR] = { 0x0018, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_EN] = { 0x001C, 0x00, 0x00, 0x00 },
+ [BAM_CNFG_BITS] = { 0x007C, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_SRCS_EE] = { 0x0800, 0x00, 0x00, 0x80 },
+ [BAM_IRQ_SRCS_MSK_EE] = { 0x0804, 0x00, 0x00, 0x80 },
+ [BAM_P_CTRL] = { 0x1000, 0x1000, 0x00, 0x00 },
+ [BAM_P_RST] = { 0x1004, 0x1000, 0x00, 0x00 },
+ [BAM_P_HALT] = { 0x1008, 0x1000, 0x00, 0x00 },
+ [BAM_P_IRQ_STTS] = { 0x1010, 0x1000, 0x00, 0x00 },
+ [BAM_P_IRQ_CLR] = { 0x1014, 0x1000, 0x00, 0x00 },
+ [BAM_P_IRQ_EN] = { 0x1018, 0x1000, 0x00, 0x00 },
+ [BAM_P_EVNT_DEST_ADDR] = { 0x182C, 0x00, 0x1000, 0x00 },
+ [BAM_P_EVNT_REG] = { 0x1818, 0x00, 0x1000, 0x00 },
+ [BAM_P_SW_OFSTS] = { 0x1800, 0x00, 0x1000, 0x00 },
+ [BAM_P_DATA_FIFO_ADDR] = { 0x1824, 0x00, 0x1000, 0x00 },
+ [BAM_P_DESC_FIFO_ADDR] = { 0x181C, 0x00, 0x1000, 0x00 },
+ [BAM_P_EVNT_GEN_TRSHLD] = { 0x1828, 0x00, 0x1000, 0x00 },
+ [BAM_P_FIFO_SIZES] = { 0x1820, 0x00, 0x1000, 0x00 },
+};
+
+static const struct reg_offset_data bam_v1_7_reg_info[] = {
+ [BAM_CTRL] = { 0x00000, 0x00, 0x00, 0x00 },
+ [BAM_REVISION] = { 0x01000, 0x00, 0x00, 0x00 },
+ [BAM_NUM_PIPES] = { 0x01008, 0x00, 0x00, 0x00 },
+ [BAM_DESC_CNT_TRSHLD] = { 0x00008, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_SRCS] = { 0x03010, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_SRCS_MSK] = { 0x03014, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_SRCS_UNMASKED] = { 0x03018, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_STTS] = { 0x00014, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_CLR] = { 0x00018, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_EN] = { 0x0001C, 0x00, 0x00, 0x00 },
+ [BAM_CNFG_BITS] = { 0x0007C, 0x00, 0x00, 0x00 },
+ [BAM_IRQ_SRCS_EE] = { 0x03000, 0x00, 0x00, 0x1000 },
+ [BAM_IRQ_SRCS_MSK_EE] = { 0x03004, 0x00, 0x00, 0x1000 },
+ [BAM_P_CTRL] = { 0x13000, 0x1000, 0x00, 0x00 },
+ [BAM_P_RST] = { 0x13004, 0x1000, 0x00, 0x00 },
+ [BAM_P_HALT] = { 0x13008, 0x1000, 0x00, 0x00 },
+ [BAM_P_IRQ_STTS] = { 0x13010, 0x1000, 0x00, 0x00 },
+ [BAM_P_IRQ_CLR] = { 0x13014, 0x1000, 0x00, 0x00 },
+ [BAM_P_IRQ_EN] = { 0x13018, 0x1000, 0x00, 0x00 },
+ [BAM_P_EVNT_DEST_ADDR] = { 0x1382C, 0x00, 0x1000, 0x00 },
+ [BAM_P_EVNT_REG] = { 0x13818, 0x00, 0x1000, 0x00 },
+ [BAM_P_SW_OFSTS] = { 0x13800, 0x00, 0x1000, 0x00 },
+ [BAM_P_DATA_FIFO_ADDR] = { 0x13824, 0x00, 0x1000, 0x00 },
+ [BAM_P_DESC_FIFO_ADDR] = { 0x1381C, 0x00, 0x1000, 0x00 },
+ [BAM_P_EVNT_GEN_TRSHLD] = { 0x13828, 0x00, 0x1000, 0x00 },
+ [BAM_P_FIFO_SIZES] = { 0x13820, 0x00, 0x1000, 0x00 },
+};
+
+/* BAM CTRL */
+#define BAM_SW_RST BIT(0)
+#define BAM_EN BIT(1)
+#define BAM_EN_ACCUM BIT(4)
+#define BAM_TESTBUS_SEL_SHIFT 5
+#define BAM_TESTBUS_SEL_MASK 0x3F
+#define BAM_DESC_CACHE_SEL_SHIFT 13
+#define BAM_DESC_CACHE_SEL_MASK 0x3
+#define BAM_CACHED_DESC_STORE BIT(15)
+#define IBC_DISABLE BIT(16)
+
+/* BAM REVISION */
+#define REVISION_SHIFT 0
+#define REVISION_MASK 0xFF
+#define NUM_EES_SHIFT 8
+#define NUM_EES_MASK 0xF
+#define CE_BUFFER_SIZE BIT(13)
+#define AXI_ACTIVE BIT(14)
+#define USE_VMIDMT BIT(15)
+#define SECURED BIT(16)
+#define BAM_HAS_NO_BYPASS BIT(17)
+#define HIGH_FREQUENCY_BAM BIT(18)
+#define INACTIV_TMRS_EXST BIT(19)
+#define NUM_INACTIV_TMRS BIT(20)
+#define DESC_CACHE_DEPTH_SHIFT 21
+#define DESC_CACHE_DEPTH_1 (0 << DESC_CACHE_DEPTH_SHIFT)
+#define DESC_CACHE_DEPTH_2 (1 << DESC_CACHE_DEPTH_SHIFT)
+#define DESC_CACHE_DEPTH_3 (2 << DESC_CACHE_DEPTH_SHIFT)
+#define DESC_CACHE_DEPTH_4 (3 << DESC_CACHE_DEPTH_SHIFT)
+#define CMD_DESC_EN BIT(23)
+#define INACTIV_TMR_BASE_SHIFT 24
+#define INACTIV_TMR_BASE_MASK 0xFF
+
+/* BAM NUM PIPES */
+#define BAM_NUM_PIPES_SHIFT 0
+#define BAM_NUM_PIPES_MASK 0xFF
+#define PERIPH_NON_PIPE_GRP_SHIFT 16
+#define PERIPH_NON_PIP_GRP_MASK 0xFF
+#define BAM_NON_PIPE_GRP_SHIFT 24
+#define BAM_NON_PIPE_GRP_MASK 0xFF
+
+/* BAM CNFG BITS */
+#define BAM_PIPE_CNFG BIT(2)
+#define BAM_FULL_PIPE BIT(11)
+#define BAM_NO_EXT_P_RST BIT(12)
+#define BAM_IBC_DISABLE BIT(13)
+#define BAM_SB_CLK_REQ BIT(14)
+#define BAM_PSM_CSW_REQ BIT(15)
+#define BAM_PSM_P_RES BIT(16)
+#define BAM_AU_P_RES BIT(17)
+#define BAM_SI_P_RES BIT(18)
+#define BAM_WB_P_RES BIT(19)
+#define BAM_WB_BLK_CSW BIT(20)
+#define BAM_WB_CSW_ACK_IDL BIT(21)
+#define BAM_WB_RETR_SVPNT BIT(22)
+#define BAM_WB_DSC_AVL_P_RST BIT(23)
+#define BAM_REG_P_EN BIT(24)
+#define BAM_PSM_P_HD_DATA BIT(25)
+#define BAM_AU_ACCUMED BIT(26)
+#define BAM_CMD_ENABLE BIT(27)
+
+#define BAM_CNFG_BITS_DEFAULT (BAM_PIPE_CNFG | \
+ BAM_NO_EXT_P_RST | \
+ BAM_IBC_DISABLE | \
+ BAM_SB_CLK_REQ | \
+ BAM_PSM_CSW_REQ | \
+ BAM_PSM_P_RES | \
+ BAM_AU_P_RES | \
+ BAM_SI_P_RES | \
+ BAM_WB_P_RES | \
+ BAM_WB_BLK_CSW | \
+ BAM_WB_CSW_ACK_IDL | \
+ BAM_WB_RETR_SVPNT | \
+ BAM_WB_DSC_AVL_P_RST | \
+ BAM_REG_P_EN | \
+ BAM_PSM_P_HD_DATA | \
+ BAM_AU_ACCUMED | \
+ BAM_CMD_ENABLE)
+
+/* PIPE CTRL */
+#define P_EN BIT(1)
+#define P_DIRECTION BIT(3)
+#define P_SYS_STRM BIT(4)
+#define P_SYS_MODE BIT(5)
+#define P_AUTO_EOB BIT(6)
+#define P_AUTO_EOB_SEL_SHIFT 7
+#define P_AUTO_EOB_SEL_512 (0 << P_AUTO_EOB_SEL_SHIFT)
+#define P_AUTO_EOB_SEL_256 (1 << P_AUTO_EOB_SEL_SHIFT)
+#define P_AUTO_EOB_SEL_128 (2 << P_AUTO_EOB_SEL_SHIFT)
+#define P_AUTO_EOB_SEL_64 (3 << P_AUTO_EOB_SEL_SHIFT)
+#define P_PREFETCH_LIMIT_SHIFT 9
+#define P_PREFETCH_LIMIT_32 (0 << P_PREFETCH_LIMIT_SHIFT)
+#define P_PREFETCH_LIMIT_16 (1 << P_PREFETCH_LIMIT_SHIFT)
+#define P_PREFETCH_LIMIT_4 (2 << P_PREFETCH_LIMIT_SHIFT)
+#define P_WRITE_NWD BIT(11)
+#define P_LOCK_GROUP_SHIFT 16
+#define P_LOCK_GROUP_MASK 0x1F
+
+/* BAM_DESC_CNT_TRSHLD */
+#define CNT_TRSHLD 0xffff
+#define DEFAULT_CNT_THRSHLD 0x4
+
+/* BAM_IRQ_SRCS */
+#define BAM_IRQ BIT(31)
+#define P_IRQ 0x7fffffff
+
+/* BAM_IRQ_SRCS_MSK */
+#define BAM_IRQ_MSK BAM_IRQ
+#define P_IRQ_MSK P_IRQ
+
+/* BAM_IRQ_STTS */
+#define BAM_TIMER_IRQ BIT(4)
+#define BAM_EMPTY_IRQ BIT(3)
+#define BAM_ERROR_IRQ BIT(2)
+#define BAM_HRESP_ERR_IRQ BIT(1)
+
+/* BAM_IRQ_CLR */
+#define BAM_TIMER_CLR BIT(4)
+#define BAM_EMPTY_CLR BIT(3)
+#define BAM_ERROR_CLR BIT(2)
+#define BAM_HRESP_ERR_CLR BIT(1)
+
+/* BAM_IRQ_EN */
+#define BAM_TIMER_EN BIT(4)
+#define BAM_EMPTY_EN BIT(3)
+#define BAM_ERROR_EN BIT(2)
+#define BAM_HRESP_ERR_EN BIT(1)
+
+/* BAM_P_IRQ_EN */
+#define P_PRCSD_DESC_EN BIT(0)
+#define P_TIMER_EN BIT(1)
+#define P_WAKE_EN BIT(2)
+#define P_OUT_OF_DESC_EN BIT(3)
+#define P_ERR_EN BIT(4)
+#define P_TRNSFR_END_EN BIT(5)
+#define P_DEFAULT_IRQS_EN (P_PRCSD_DESC_EN | P_ERR_EN | P_TRNSFR_END_EN)
+
+/* BAM_P_SW_OFSTS */
+#define P_SW_OFSTS_MASK 0xffff
+
+#define BAM_DESC_FIFO_SIZE SZ_32K
+#define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1)
+#define BAM_FIFO_SIZE (SZ_32K - 8)
+#define IS_BUSY(chan) (CIRC_SPACE(bchan->tail, bchan->head,\
+ MAX_DESCRIPTORS + 1) == 0)
+
+struct bam_chan {
+ struct virt_dma_chan vc;
+
+ struct bam_device *bdev;
+
+ /* configuration from device tree */
+ u32 id;
+
+ /* runtime configuration */
+ struct dma_slave_config slave;
+
+ /* fifo storage */
+ struct bam_desc_hw *fifo_virt;
+ dma_addr_t fifo_phys;
+
+ /* fifo markers */
+ unsigned short head; /* start of active descriptor entries */
+ unsigned short tail; /* end of active descriptor entries */
+
+ unsigned int initialized; /* is the channel hw initialized? */
+ unsigned int paused; /* is the channel paused? */
+ unsigned int reconfigure; /* new slave config? */
+ /* list of descriptors currently processed */
+ struct list_head desc_list;
+
+ struct list_head node;
+};
+
+static inline struct bam_chan *to_bam_chan(struct dma_chan *common)
+{
+ return container_of(common, struct bam_chan, vc.chan);
+}
+
+struct bam_device {
+ void __iomem *regs;
+ struct device *dev;
+ struct dma_device common;
+ struct bam_chan *channels;
+ u32 num_channels;
+ u32 num_ees;
+
+ /* execution environment ID, from DT */
+ u32 ee;
+ bool controlled_remotely;
+ bool powered_remotely;
+ u32 active_channels;
+
+ const struct reg_offset_data *layout;
+
+ struct clk *bamclk;
+ int irq;
+
+ /* dma start transaction tasklet */
+ struct tasklet_struct task;
+};
+
+/**
+ * bam_addr - returns BAM register address
+ * @bdev: bam device
+ * @pipe: pipe instance (ignored when register doesn't have multiple instances)
+ * @reg: register enum
+ */
+static inline void __iomem *bam_addr(struct bam_device *bdev, u32 pipe,
+ enum bam_reg reg)
+{
+ const struct reg_offset_data r = bdev->layout[reg];
+
+ return bdev->regs + r.base_offset +
+ r.pipe_mult * pipe +
+ r.evnt_mult * pipe +
+ r.ee_mult * bdev->ee;
+}
+
+/**
+ * bam_reset() - reset and initialize BAM registers
+ * @bdev: bam device
+ */
+static void bam_reset(struct bam_device *bdev)
+{
+ u32 val;
+
+ /* s/w reset bam */
+ /* after reset all pipes are disabled and idle */
+ val = readl_relaxed(bam_addr(bdev, 0, BAM_CTRL));
+ val |= BAM_SW_RST;
+ writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
+ val &= ~BAM_SW_RST;
+ writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
+
+ /* make sure previous stores are visible before enabling BAM */
+ wmb();
+
+ /* enable bam */
+ val |= BAM_EN;
+ writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
+
+ /* set descriptor threshhold, start with 4 bytes */
+ writel_relaxed(DEFAULT_CNT_THRSHLD,
+ bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
+
+ /* Enable default set of h/w workarounds, ie all except BAM_FULL_PIPE */
+ writel_relaxed(BAM_CNFG_BITS_DEFAULT, bam_addr(bdev, 0, BAM_CNFG_BITS));
+
+ /* enable irqs for errors */
+ writel_relaxed(BAM_ERROR_EN | BAM_HRESP_ERR_EN,
+ bam_addr(bdev, 0, BAM_IRQ_EN));
+
+ /* unmask global bam interrupt */
+ writel_relaxed(BAM_IRQ_MSK, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
+}
+
+/**
+ * bam_reset_channel - Reset individual BAM DMA channel
+ * @bchan: bam channel
+ *
+ * This function resets a specific BAM channel
+ */
+static void bam_reset_channel(struct bam_chan *bchan)
+{
+ struct bam_device *bdev = bchan->bdev;
+
+ lockdep_assert_held(&bchan->vc.lock);
+
+ /* reset channel */
+ writel_relaxed(1, bam_addr(bdev, bchan->id, BAM_P_RST));
+ writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_RST));
+
+ /* don't allow cpu to reorder BAM register accesses done after this */
+ wmb();
+
+ /* make sure hw is initialized when channel is used the first time */
+ bchan->initialized = 0;
+}
+
+/**
+ * bam_chan_init_hw - Initialize channel hardware
+ * @bchan: bam channel
+ * @dir: DMA transfer direction
+ *
+ * This function resets and initializes the BAM channel
+ */
+static void bam_chan_init_hw(struct bam_chan *bchan,
+ enum dma_transfer_direction dir)
+{
+ struct bam_device *bdev = bchan->bdev;
+ u32 val;
+
+ /* Reset the channel to clear internal state of the FIFO */
+ bam_reset_channel(bchan);
+
+ /*
+ * write out 8 byte aligned address. We have enough space for this
+ * because we allocated 1 more descriptor (8 bytes) than we can use
+ */
+ writel_relaxed(ALIGN(bchan->fifo_phys, sizeof(struct bam_desc_hw)),
+ bam_addr(bdev, bchan->id, BAM_P_DESC_FIFO_ADDR));
+ writel_relaxed(BAM_FIFO_SIZE,
+ bam_addr(bdev, bchan->id, BAM_P_FIFO_SIZES));
+
+ /* enable the per pipe interrupts, enable EOT, ERR, and INT irqs */
+ writel_relaxed(P_DEFAULT_IRQS_EN,
+ bam_addr(bdev, bchan->id, BAM_P_IRQ_EN));
+
+ /* unmask the specific pipe and EE combo */
+ val = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
+ val |= BIT(bchan->id);
+ writel_relaxed(val, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
+
+ /* don't allow cpu to reorder the channel enable done below */
+ wmb();
+
+ /* set fixed direction and mode, then enable channel */
+ val = P_EN | P_SYS_MODE;
+ if (dir == DMA_DEV_TO_MEM)
+ val |= P_DIRECTION;
+
+ writel_relaxed(val, bam_addr(bdev, bchan->id, BAM_P_CTRL));
+
+ bchan->initialized = 1;
+
+ /* init FIFO pointers */
+ bchan->head = 0;
+ bchan->tail = 0;
+}
+
+/**
+ * bam_alloc_chan - Allocate channel resources for DMA channel.
+ * @chan: specified channel
+ *
+ * This function allocates the FIFO descriptor memory
+ */
+static int bam_alloc_chan(struct dma_chan *chan)
+{
+ struct bam_chan *bchan = to_bam_chan(chan);
+ struct bam_device *bdev = bchan->bdev;
+
+ if (bchan->fifo_virt)
+ return 0;
+
+ /* allocate FIFO descriptor space, but only if necessary */
+ bchan->fifo_virt = dma_alloc_wc(bdev->dev, BAM_DESC_FIFO_SIZE,
+ &bchan->fifo_phys, GFP_KERNEL);
+
+ if (!bchan->fifo_virt) {
+ dev_err(bdev->dev, "Failed to allocate desc fifo\n");
+ return -ENOMEM;
+ }
+
+ if (bdev->active_channels++ == 0 && bdev->powered_remotely)
+ bam_reset(bdev);
+
+ return 0;
+}
+
+/**
+ * bam_free_chan - Frees dma resources associated with specific channel
+ * @chan: specified channel
+ *
+ * Free the allocated fifo descriptor memory and channel resources
+ *
+ */
+static void bam_free_chan(struct dma_chan *chan)
+{
+ struct bam_chan *bchan = to_bam_chan(chan);
+ struct bam_device *bdev = bchan->bdev;
+ u32 val;
+ unsigned long flags;
+ int ret;
+
+ ret = pm_runtime_get_sync(bdev->dev);
+ if (ret < 0)
+ return;
+
+ vchan_free_chan_resources(to_virt_chan(chan));
+
+ if (!list_empty(&bchan->desc_list)) {
+ dev_err(bchan->bdev->dev, "Cannot free busy channel\n");
+ goto err;
+ }
+
+ spin_lock_irqsave(&bchan->vc.lock, flags);
+ bam_reset_channel(bchan);
+ spin_unlock_irqrestore(&bchan->vc.lock, flags);
+
+ dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
+ bchan->fifo_phys);
+ bchan->fifo_virt = NULL;
+
+ /* mask irq for pipe/channel */
+ val = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
+ val &= ~BIT(bchan->id);
+ writel_relaxed(val, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
+
+ /* disable irq */
+ writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_IRQ_EN));
+
+ if (--bdev->active_channels == 0 && bdev->powered_remotely) {
+ /* s/w reset bam */
+ val = readl_relaxed(bam_addr(bdev, 0, BAM_CTRL));
+ val |= BAM_SW_RST;
+ writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
+ }
+
+err:
+ pm_runtime_mark_last_busy(bdev->dev);
+ pm_runtime_put_autosuspend(bdev->dev);
+}
+
+/**
+ * bam_slave_config - set slave configuration for channel
+ * @chan: dma channel
+ * @cfg: slave configuration
+ *
+ * Sets slave configuration for channel
+ *
+ */
+static int bam_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *cfg)
+{
+ struct bam_chan *bchan = to_bam_chan(chan);
+ unsigned long flag;
+
+ spin_lock_irqsave(&bchan->vc.lock, flag);
+ memcpy(&bchan->slave, cfg, sizeof(*cfg));
+ bchan->reconfigure = 1;
+ spin_unlock_irqrestore(&bchan->vc.lock, flag);
+
+ return 0;
+}
+
+/**
+ * bam_prep_slave_sg - Prep slave sg transaction
+ *
+ * @chan: dma channel
+ * @sgl: scatter gather list
+ * @sg_len: length of sg
+ * @direction: DMA transfer direction
+ * @flags: DMA flags
+ * @context: transfer context (unused)
+ */
+static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
+ struct scatterlist *sgl, unsigned int sg_len,
+ enum dma_transfer_direction direction, unsigned long flags,
+ void *context)
+{
+ struct bam_chan *bchan = to_bam_chan(chan);
+ struct bam_device *bdev = bchan->bdev;
+ struct bam_async_desc *async_desc;
+ struct scatterlist *sg;
+ u32 i;
+ struct bam_desc_hw *desc;
+ unsigned int num_alloc = 0;
+
+
+ if (!is_slave_direction(direction)) {
+ dev_err(bdev->dev, "invalid dma direction\n");
+ return NULL;
+ }
+
+ /* calculate number of required entries */
+ for_each_sg(sgl, sg, sg_len, i)
+ num_alloc += DIV_ROUND_UP(sg_dma_len(sg), BAM_FIFO_SIZE);
+
+ /* allocate enough room to accomodate the number of entries */
+ async_desc = kzalloc(struct_size(async_desc, desc, num_alloc),
+ GFP_NOWAIT);
+
+ if (!async_desc)
+ return NULL;
+
+ if (flags & DMA_PREP_FENCE)
+ async_desc->flags |= DESC_FLAG_NWD;
+
+ if (flags & DMA_PREP_INTERRUPT)
+ async_desc->flags |= DESC_FLAG_EOT;
+
+ async_desc->num_desc = num_alloc;
+ async_desc->curr_desc = async_desc->desc;
+ async_desc->dir = direction;
+
+ /* fill in temporary descriptors */
+ desc = async_desc->desc;
+ for_each_sg(sgl, sg, sg_len, i) {
+ unsigned int remainder = sg_dma_len(sg);
+ unsigned int curr_offset = 0;
+
+ do {
+ if (flags & DMA_PREP_CMD)
+ desc->flags |= cpu_to_le16(DESC_FLAG_CMD);
+
+ desc->addr = cpu_to_le32(sg_dma_address(sg) +
+ curr_offset);
+
+ if (remainder > BAM_FIFO_SIZE) {
+ desc->size = cpu_to_le16(BAM_FIFO_SIZE);
+ remainder -= BAM_FIFO_SIZE;
+ curr_offset += BAM_FIFO_SIZE;
+ } else {
+ desc->size = cpu_to_le16(remainder);
+ remainder = 0;
+ }
+
+ async_desc->length += le16_to_cpu(desc->size);
+ desc++;
+ } while (remainder > 0);
+ }
+
+ return vchan_tx_prep(&bchan->vc, &async_desc->vd, flags);
+}
+
+/**
+ * bam_dma_terminate_all - terminate all transactions on a channel
+ * @chan: bam dma channel
+ *
+ * Dequeues and frees all transactions
+ * No callbacks are done
+ *
+ */
+static int bam_dma_terminate_all(struct dma_chan *chan)
+{
+ struct bam_chan *bchan = to_bam_chan(chan);
+ struct bam_async_desc *async_desc, *tmp;
+ unsigned long flag;
+ LIST_HEAD(head);
+
+ /* remove all transactions, including active transaction */
+ spin_lock_irqsave(&bchan->vc.lock, flag);
+ /*
+ * If we have transactions queued, then some might be committed to the
+ * hardware in the desc fifo. The only way to reset the desc fifo is
+ * to do a hardware reset (either by pipe or the entire block).
+ * bam_chan_init_hw() will trigger a pipe reset, and also reinit the
+ * pipe. If the pipe is left disabled (default state after pipe reset)
+ * and is accessed by a connected hardware engine, a fatal error in
+ * the BAM will occur. There is a small window where this could happen
+ * with bam_chan_init_hw(), but it is assumed that the caller has
+ * stopped activity on any attached hardware engine. Make sure to do
+ * this first so that the BAM hardware doesn't cause memory corruption
+ * by accessing freed resources.
+ */
+ if (!list_empty(&bchan->desc_list)) {
+ async_desc = list_first_entry(&bchan->desc_list,
+ struct bam_async_desc, desc_node);
+ bam_chan_init_hw(bchan, async_desc->dir);
+ }
+
+ list_for_each_entry_safe(async_desc, tmp,
+ &bchan->desc_list, desc_node) {
+ list_add(&async_desc->vd.node, &bchan->vc.desc_issued);
+ list_del(&async_desc->desc_node);
+ }
+
+ vchan_get_all_descriptors(&bchan->vc, &head);
+ spin_unlock_irqrestore(&bchan->vc.lock, flag);
+
+ vchan_dma_desc_free_list(&bchan->vc, &head);
+
+ return 0;
+}
+
+/**
+ * bam_pause - Pause DMA channel
+ * @chan: dma channel
+ *
+ */
+static int bam_pause(struct dma_chan *chan)
+{
+ struct bam_chan *bchan = to_bam_chan(chan);
+ struct bam_device *bdev = bchan->bdev;
+ unsigned long flag;
+ int ret;
+
+ ret = pm_runtime_get_sync(bdev->dev);
+ if (ret < 0)
+ return ret;
+
+ spin_lock_irqsave(&bchan->vc.lock, flag);
+ writel_relaxed(1, bam_addr(bdev, bchan->id, BAM_P_HALT));
+ bchan->paused = 1;
+ spin_unlock_irqrestore(&bchan->vc.lock, flag);
+ pm_runtime_mark_last_busy(bdev->dev);
+ pm_runtime_put_autosuspend(bdev->dev);
+
+ return 0;
+}
+
+/**
+ * bam_resume - Resume DMA channel operations
+ * @chan: dma channel
+ *
+ */
+static int bam_resume(struct dma_chan *chan)
+{
+ struct bam_chan *bchan = to_bam_chan(chan);
+ struct bam_device *bdev = bchan->bdev;
+ unsigned long flag;
+ int ret;
+
+ ret = pm_runtime_get_sync(bdev->dev);
+ if (ret < 0)
+ return ret;
+
+ spin_lock_irqsave(&bchan->vc.lock, flag);
+ writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_HALT));
+ bchan->paused = 0;
+ spin_unlock_irqrestore(&bchan->vc.lock, flag);
+ pm_runtime_mark_last_busy(bdev->dev);
+ pm_runtime_put_autosuspend(bdev->dev);
+
+ return 0;
+}
+
+/**
+ * process_channel_irqs - processes the channel interrupts
+ * @bdev: bam controller
+ *
+ * This function processes the channel interrupts
+ *
+ */
+static u32 process_channel_irqs(struct bam_device *bdev)
+{
+ u32 i, srcs, pipe_stts, offset, avail;
+ unsigned long flags;
+ struct bam_async_desc *async_desc, *tmp;
+
+ srcs = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_EE));
+
+ /* return early if no pipe/channel interrupts are present */
+ if (!(srcs & P_IRQ))
+ return srcs;
+
+ for (i = 0; i < bdev->num_channels; i++) {
+ struct bam_chan *bchan = &bdev->channels[i];
+
+ if (!(srcs & BIT(i)))
+ continue;
+
+ /* clear pipe irq */
+ pipe_stts = readl_relaxed(bam_addr(bdev, i, BAM_P_IRQ_STTS));
+
+ writel_relaxed(pipe_stts, bam_addr(bdev, i, BAM_P_IRQ_CLR));
+
+ spin_lock_irqsave(&bchan->vc.lock, flags);
+
+ offset = readl_relaxed(bam_addr(bdev, i, BAM_P_SW_OFSTS)) &
+ P_SW_OFSTS_MASK;
+ offset /= sizeof(struct bam_desc_hw);
+
+ /* Number of bytes available to read */
+ avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1);
+
+ if (offset < bchan->head)
+ avail--;
+
+ list_for_each_entry_safe(async_desc, tmp,
+ &bchan->desc_list, desc_node) {
+ /* Not enough data to read */
+ if (avail < async_desc->xfer_len)
+ break;
+
+ /* manage FIFO */
+ bchan->head += async_desc->xfer_len;
+ bchan->head %= MAX_DESCRIPTORS;
+
+ async_desc->num_desc -= async_desc->xfer_len;
+ async_desc->curr_desc += async_desc->xfer_len;
+ avail -= async_desc->xfer_len;
+
+ /*
+ * if complete, process cookie. Otherwise
+ * push back to front of desc_issued so that
+ * it gets restarted by the tasklet
+ */
+ if (!async_desc->num_desc) {
+ vchan_cookie_complete(&async_desc->vd);
+ } else {
+ list_add(&async_desc->vd.node,
+ &bchan->vc.desc_issued);
+ }
+ list_del(&async_desc->desc_node);
+ }
+
+ spin_unlock_irqrestore(&bchan->vc.lock, flags);
+ }
+
+ return srcs;
+}
+
+/**
+ * bam_dma_irq - irq handler for bam controller
+ * @irq: IRQ of interrupt
+ * @data: callback data
+ *
+ * IRQ handler for the bam controller
+ */
+static irqreturn_t bam_dma_irq(int irq, void *data)
+{
+ struct bam_device *bdev = data;
+ u32 clr_mask = 0, srcs = 0;
+ int ret;
+
+ srcs |= process_channel_irqs(bdev);
+
+ /* kick off tasklet to start next dma transfer */
+ if (srcs & P_IRQ)
+ tasklet_schedule(&bdev->task);
+
+ ret = pm_runtime_get_sync(bdev->dev);
+ if (ret < 0)
+ return IRQ_NONE;
+
+ if (srcs & BAM_IRQ) {
+ clr_mask = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_STTS));
+
+ /*
+ * don't allow reorder of the various accesses to the BAM
+ * registers
+ */
+ mb();
+
+ writel_relaxed(clr_mask, bam_addr(bdev, 0, BAM_IRQ_CLR));
+ }
+
+ pm_runtime_mark_last_busy(bdev->dev);
+ pm_runtime_put_autosuspend(bdev->dev);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * bam_tx_status - returns status of transaction
+ * @chan: dma channel
+ * @cookie: transaction cookie
+ * @txstate: DMA transaction state
+ *
+ * Return status of dma transaction
+ */
+static enum dma_status bam_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct bam_chan *bchan = to_bam_chan(chan);
+ struct bam_async_desc *async_desc;
+ struct virt_dma_desc *vd;
+ int ret;
+ size_t residue = 0;
+ unsigned int i;
+ unsigned long flags;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ if (!txstate)
+ return bchan->paused ? DMA_PAUSED : ret;
+
+ spin_lock_irqsave(&bchan->vc.lock, flags);
+ vd = vchan_find_desc(&bchan->vc, cookie);
+ if (vd) {
+ residue = container_of(vd, struct bam_async_desc, vd)->length;
+ } else {
+ list_for_each_entry(async_desc, &bchan->desc_list, desc_node) {
+ if (async_desc->vd.tx.cookie != cookie)
+ continue;
+
+ for (i = 0; i < async_desc->num_desc; i++)
+ residue += le16_to_cpu(
+ async_desc->curr_desc[i].size);
+ }
+ }
+
+ spin_unlock_irqrestore(&bchan->vc.lock, flags);
+
+ dma_set_residue(txstate, residue);
+
+ if (ret == DMA_IN_PROGRESS && bchan->paused)
+ ret = DMA_PAUSED;
+
+ return ret;
+}
+
+/**
+ * bam_apply_new_config
+ * @bchan: bam dma channel
+ * @dir: DMA direction
+ */
+static void bam_apply_new_config(struct bam_chan *bchan,
+ enum dma_transfer_direction dir)
+{
+ struct bam_device *bdev = bchan->bdev;
+ u32 maxburst;
+
+ if (!bdev->controlled_remotely) {
+ if (dir == DMA_DEV_TO_MEM)
+ maxburst = bchan->slave.src_maxburst;
+ else
+ maxburst = bchan->slave.dst_maxburst;
+
+ writel_relaxed(maxburst,
+ bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
+ }
+
+ bchan->reconfigure = 0;
+}
+
+/**
+ * bam_start_dma - start next transaction
+ * @bchan: bam dma channel
+ */
+static void bam_start_dma(struct bam_chan *bchan)
+{
+ struct virt_dma_desc *vd = vchan_next_desc(&bchan->vc);
+ struct bam_device *bdev = bchan->bdev;
+ struct bam_async_desc *async_desc = NULL;
+ struct bam_desc_hw *desc;
+ struct bam_desc_hw *fifo = PTR_ALIGN(bchan->fifo_virt,
+ sizeof(struct bam_desc_hw));
+ int ret;
+ unsigned int avail;
+ struct dmaengine_desc_callback cb;
+
+ lockdep_assert_held(&bchan->vc.lock);
+
+ if (!vd)
+ return;
+
+ ret = pm_runtime_get_sync(bdev->dev);
+ if (ret < 0)
+ return;
+
+ while (vd && !IS_BUSY(bchan)) {
+ list_del(&vd->node);
+
+ async_desc = container_of(vd, struct bam_async_desc, vd);
+
+ /* on first use, initialize the channel hardware */
+ if (!bchan->initialized)
+ bam_chan_init_hw(bchan, async_desc->dir);
+
+ /* apply new slave config changes, if necessary */
+ if (bchan->reconfigure)
+ bam_apply_new_config(bchan, async_desc->dir);
+
+ desc = async_desc->curr_desc;
+ avail = CIRC_SPACE(bchan->tail, bchan->head,
+ MAX_DESCRIPTORS + 1);
+
+ if (async_desc->num_desc > avail)
+ async_desc->xfer_len = avail;
+ else
+ async_desc->xfer_len = async_desc->num_desc;
+
+ /* set any special flags on the last descriptor */
+ if (async_desc->num_desc == async_desc->xfer_len)
+ desc[async_desc->xfer_len - 1].flags |=
+ cpu_to_le16(async_desc->flags);
+
+ vd = vchan_next_desc(&bchan->vc);
+
+ dmaengine_desc_get_callback(&async_desc->vd.tx, &cb);
+
+ /*
+ * An interrupt is generated at this desc, if
+ * - FIFO is FULL.
+ * - No more descriptors to add.
+ * - If a callback completion was requested for this DESC,
+ * In this case, BAM will deliver the completion callback
+ * for this desc and continue processing the next desc.
+ */
+ if (((avail <= async_desc->xfer_len) || !vd ||
+ dmaengine_desc_callback_valid(&cb)) &&
+ !(async_desc->flags & DESC_FLAG_EOT))
+ desc[async_desc->xfer_len - 1].flags |=
+ cpu_to_le16(DESC_FLAG_INT);
+
+ if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
+ u32 partial = MAX_DESCRIPTORS - bchan->tail;
+
+ memcpy(&fifo[bchan->tail], desc,
+ partial * sizeof(struct bam_desc_hw));
+ memcpy(fifo, &desc[partial],
+ (async_desc->xfer_len - partial) *
+ sizeof(struct bam_desc_hw));
+ } else {
+ memcpy(&fifo[bchan->tail], desc,
+ async_desc->xfer_len *
+ sizeof(struct bam_desc_hw));
+ }
+
+ bchan->tail += async_desc->xfer_len;
+ bchan->tail %= MAX_DESCRIPTORS;
+ list_add_tail(&async_desc->desc_node, &bchan->desc_list);
+ }
+
+ /* ensure descriptor writes and dma start not reordered */
+ wmb();
+ writel_relaxed(bchan->tail * sizeof(struct bam_desc_hw),
+ bam_addr(bdev, bchan->id, BAM_P_EVNT_REG));
+
+ pm_runtime_mark_last_busy(bdev->dev);
+ pm_runtime_put_autosuspend(bdev->dev);
+}
+
+/**
+ * dma_tasklet - DMA IRQ tasklet
+ * @t: tasklet argument (bam controller structure)
+ *
+ * Sets up next DMA operation and then processes all completed transactions
+ */
+static void dma_tasklet(struct tasklet_struct *t)
+{
+ struct bam_device *bdev = from_tasklet(bdev, t, task);
+ struct bam_chan *bchan;
+ unsigned long flags;
+ unsigned int i;
+
+ /* go through the channels and kick off transactions */
+ for (i = 0; i < bdev->num_channels; i++) {
+ bchan = &bdev->channels[i];
+ spin_lock_irqsave(&bchan->vc.lock, flags);
+
+ if (!list_empty(&bchan->vc.desc_issued) && !IS_BUSY(bchan))
+ bam_start_dma(bchan);
+ spin_unlock_irqrestore(&bchan->vc.lock, flags);
+ }
+
+}
+
+/**
+ * bam_issue_pending - starts pending transactions
+ * @chan: dma channel
+ *
+ * Calls tasklet directly which in turn starts any pending transactions
+ */
+static void bam_issue_pending(struct dma_chan *chan)
+{
+ struct bam_chan *bchan = to_bam_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&bchan->vc.lock, flags);
+
+ /* if work pending and idle, start a transaction */
+ if (vchan_issue_pending(&bchan->vc) && !IS_BUSY(bchan))
+ bam_start_dma(bchan);
+
+ spin_unlock_irqrestore(&bchan->vc.lock, flags);
+}
+
+/**
+ * bam_dma_free_desc - free descriptor memory
+ * @vd: virtual descriptor
+ *
+ */
+static void bam_dma_free_desc(struct virt_dma_desc *vd)
+{
+ struct bam_async_desc *async_desc = container_of(vd,
+ struct bam_async_desc, vd);
+
+ kfree(async_desc);
+}
+
+static struct dma_chan *bam_dma_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *of)
+{
+ struct bam_device *bdev = container_of(of->of_dma_data,
+ struct bam_device, common);
+ unsigned int request;
+
+ if (dma_spec->args_count != 1)
+ return NULL;
+
+ request = dma_spec->args[0];
+ if (request >= bdev->num_channels)
+ return NULL;
+
+ return dma_get_slave_channel(&(bdev->channels[request].vc.chan));
+}
+
+/**
+ * bam_init
+ * @bdev: bam device
+ *
+ * Initialization helper for global bam registers
+ */
+static int bam_init(struct bam_device *bdev)
+{
+ u32 val;
+
+ /* read revision and configuration information */
+ if (!bdev->num_ees) {
+ val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION));
+ bdev->num_ees = (val >> NUM_EES_SHIFT) & NUM_EES_MASK;
+ }
+
+ /* check that configured EE is within range */
+ if (bdev->ee >= bdev->num_ees)
+ return -EINVAL;
+
+ if (!bdev->num_channels) {
+ val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES));
+ bdev->num_channels = val & BAM_NUM_PIPES_MASK;
+ }
+
+ /* Reset BAM now if fully controlled locally */
+ if (!bdev->controlled_remotely && !bdev->powered_remotely)
+ bam_reset(bdev);
+
+ return 0;
+}
+
+static void bam_channel_init(struct bam_device *bdev, struct bam_chan *bchan,
+ u32 index)
+{
+ bchan->id = index;
+ bchan->bdev = bdev;
+
+ vchan_init(&bchan->vc, &bdev->common);
+ bchan->vc.desc_free = bam_dma_free_desc;
+ INIT_LIST_HEAD(&bchan->desc_list);
+}
+
+static const struct of_device_id bam_of_match[] = {
+ { .compatible = "qcom,bam-v1.3.0", .data = &bam_v1_3_reg_info },
+ { .compatible = "qcom,bam-v1.4.0", .data = &bam_v1_4_reg_info },
+ { .compatible = "qcom,bam-v1.7.0", .data = &bam_v1_7_reg_info },
+ {}
+};
+
+MODULE_DEVICE_TABLE(of, bam_of_match);
+
+static int bam_dma_probe(struct platform_device *pdev)
+{
+ struct bam_device *bdev;
+ const struct of_device_id *match;
+ struct resource *iores;
+ int ret, i;
+
+ bdev = devm_kzalloc(&pdev->dev, sizeof(*bdev), GFP_KERNEL);
+ if (!bdev)
+ return -ENOMEM;
+
+ bdev->dev = &pdev->dev;
+
+ match = of_match_node(bam_of_match, pdev->dev.of_node);
+ if (!match) {
+ dev_err(&pdev->dev, "Unsupported BAM module\n");
+ return -ENODEV;
+ }
+
+ bdev->layout = match->data;
+
+ iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ bdev->regs = devm_ioremap_resource(&pdev->dev, iores);
+ if (IS_ERR(bdev->regs))
+ return PTR_ERR(bdev->regs);
+
+ bdev->irq = platform_get_irq(pdev, 0);
+ if (bdev->irq < 0)
+ return bdev->irq;
+
+ ret = of_property_read_u32(pdev->dev.of_node, "qcom,ee", &bdev->ee);
+ if (ret) {
+ dev_err(bdev->dev, "Execution environment unspecified\n");
+ return ret;
+ }
+
+ bdev->controlled_remotely = of_property_read_bool(pdev->dev.of_node,
+ "qcom,controlled-remotely");
+ bdev->powered_remotely = of_property_read_bool(pdev->dev.of_node,
+ "qcom,powered-remotely");
+
+ if (bdev->controlled_remotely || bdev->powered_remotely) {
+ ret = of_property_read_u32(pdev->dev.of_node, "num-channels",
+ &bdev->num_channels);
+ if (ret)
+ dev_err(bdev->dev, "num-channels unspecified in dt\n");
+
+ ret = of_property_read_u32(pdev->dev.of_node, "qcom,num-ees",
+ &bdev->num_ees);
+ if (ret)
+ dev_err(bdev->dev, "num-ees unspecified in dt\n");
+ }
+
+ if (bdev->controlled_remotely || bdev->powered_remotely)
+ bdev->bamclk = devm_clk_get_optional(bdev->dev, "bam_clk");
+ else
+ bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
+
+ if (IS_ERR(bdev->bamclk))
+ return PTR_ERR(bdev->bamclk);
+
+ ret = clk_prepare_enable(bdev->bamclk);
+ if (ret) {
+ dev_err(bdev->dev, "failed to prepare/enable clock\n");
+ return ret;
+ }
+
+ ret = bam_init(bdev);
+ if (ret)
+ goto err_disable_clk;
+
+ tasklet_setup(&bdev->task, dma_tasklet);
+
+ bdev->channels = devm_kcalloc(bdev->dev, bdev->num_channels,
+ sizeof(*bdev->channels), GFP_KERNEL);
+
+ if (!bdev->channels) {
+ ret = -ENOMEM;
+ goto err_tasklet_kill;
+ }
+
+ /* allocate and initialize channels */
+ INIT_LIST_HEAD(&bdev->common.channels);
+
+ for (i = 0; i < bdev->num_channels; i++)
+ bam_channel_init(bdev, &bdev->channels[i], i);
+
+ ret = devm_request_irq(bdev->dev, bdev->irq, bam_dma_irq,
+ IRQF_TRIGGER_HIGH, "bam_dma", bdev);
+ if (ret)
+ goto err_bam_channel_exit;
+
+ /* set max dma segment size */
+ bdev->common.dev = bdev->dev;
+ ret = dma_set_max_seg_size(bdev->common.dev, BAM_FIFO_SIZE);
+ if (ret) {
+ dev_err(bdev->dev, "cannot set maximum segment size\n");
+ goto err_bam_channel_exit;
+ }
+
+ platform_set_drvdata(pdev, bdev);
+
+ /* set capabilities */
+ dma_cap_zero(bdev->common.cap_mask);
+ dma_cap_set(DMA_SLAVE, bdev->common.cap_mask);
+
+ /* initialize dmaengine apis */
+ bdev->common.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ bdev->common.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+ bdev->common.src_addr_widths = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ bdev->common.dst_addr_widths = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ bdev->common.device_alloc_chan_resources = bam_alloc_chan;
+ bdev->common.device_free_chan_resources = bam_free_chan;
+ bdev->common.device_prep_slave_sg = bam_prep_slave_sg;
+ bdev->common.device_config = bam_slave_config;
+ bdev->common.device_pause = bam_pause;
+ bdev->common.device_resume = bam_resume;
+ bdev->common.device_terminate_all = bam_dma_terminate_all;
+ bdev->common.device_issue_pending = bam_issue_pending;
+ bdev->common.device_tx_status = bam_tx_status;
+ bdev->common.dev = bdev->dev;
+
+ ret = dma_async_device_register(&bdev->common);
+ if (ret) {
+ dev_err(bdev->dev, "failed to register dma async device\n");
+ goto err_bam_channel_exit;
+ }
+
+ ret = of_dma_controller_register(pdev->dev.of_node, bam_dma_xlate,
+ &bdev->common);
+ if (ret)
+ goto err_unregister_dma;
+
+ pm_runtime_irq_safe(&pdev->dev);
+ pm_runtime_set_autosuspend_delay(&pdev->dev, BAM_DMA_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(&pdev->dev);
+ pm_runtime_mark_last_busy(&pdev->dev);
+ pm_runtime_set_active(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
+
+ return 0;
+
+err_unregister_dma:
+ dma_async_device_unregister(&bdev->common);
+err_bam_channel_exit:
+ for (i = 0; i < bdev->num_channels; i++)
+ tasklet_kill(&bdev->channels[i].vc.task);
+err_tasklet_kill:
+ tasklet_kill(&bdev->task);
+err_disable_clk:
+ clk_disable_unprepare(bdev->bamclk);
+
+ return ret;
+}
+
+static int bam_dma_remove(struct platform_device *pdev)
+{
+ struct bam_device *bdev = platform_get_drvdata(pdev);
+ u32 i;
+
+ pm_runtime_force_suspend(&pdev->dev);
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&bdev->common);
+
+ /* mask all interrupts for this execution environment */
+ writel_relaxed(0, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
+
+ devm_free_irq(bdev->dev, bdev->irq, bdev);
+
+ for (i = 0; i < bdev->num_channels; i++) {
+ bam_dma_terminate_all(&bdev->channels[i].vc.chan);
+ tasklet_kill(&bdev->channels[i].vc.task);
+
+ if (!bdev->channels[i].fifo_virt)
+ continue;
+
+ dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE,
+ bdev->channels[i].fifo_virt,
+ bdev->channels[i].fifo_phys);
+ }
+
+ tasklet_kill(&bdev->task);
+
+ clk_disable_unprepare(bdev->bamclk);
+
+ return 0;
+}
+
+static int __maybe_unused bam_dma_runtime_suspend(struct device *dev)
+{
+ struct bam_device *bdev = dev_get_drvdata(dev);
+
+ clk_disable(bdev->bamclk);
+
+ return 0;
+}
+
+static int __maybe_unused bam_dma_runtime_resume(struct device *dev)
+{
+ struct bam_device *bdev = dev_get_drvdata(dev);
+ int ret;
+
+ ret = clk_enable(bdev->bamclk);
+ if (ret < 0) {
+ dev_err(dev, "clk_enable failed: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int __maybe_unused bam_dma_suspend(struct device *dev)
+{
+ struct bam_device *bdev = dev_get_drvdata(dev);
+
+ pm_runtime_force_suspend(dev);
+ clk_unprepare(bdev->bamclk);
+
+ return 0;
+}
+
+static int __maybe_unused bam_dma_resume(struct device *dev)
+{
+ struct bam_device *bdev = dev_get_drvdata(dev);
+ int ret;
+
+ ret = clk_prepare(bdev->bamclk);
+ if (ret)
+ return ret;
+
+ pm_runtime_force_resume(dev);
+
+ return 0;
+}
+
+static const struct dev_pm_ops bam_dma_pm_ops = {
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(bam_dma_suspend, bam_dma_resume)
+ SET_RUNTIME_PM_OPS(bam_dma_runtime_suspend, bam_dma_runtime_resume,
+ NULL)
+};
+
+static struct platform_driver bam_dma_driver = {
+ .probe = bam_dma_probe,
+ .remove = bam_dma_remove,
+ .driver = {
+ .name = "bam-dma-engine",
+ .pm = &bam_dma_pm_ops,
+ .of_match_table = bam_of_match,
+ },
+};
+
+module_platform_driver(bam_dma_driver);
+
+MODULE_AUTHOR("Andy Gross <agross@codeaurora.org>");
+MODULE_DESCRIPTION("QCOM BAM DMA engine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
new file mode 100644
index 000000000..db6d0dc30
--- /dev/null
+++ b/drivers/dma/qcom/gpi.c
@@ -0,0 +1,2315 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2017-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2020, Linaro Limited
+ */
+
+#include <dt-bindings/dma/qcom-gpi.h>
+#include <linux/bitfield.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/dma/qcom-gpi-dma.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+#define TRE_TYPE_DMA 0x10
+#define TRE_TYPE_GO 0x20
+#define TRE_TYPE_CONFIG0 0x22
+
+/* TRE flags */
+#define TRE_FLAGS_CHAIN BIT(0)
+#define TRE_FLAGS_IEOB BIT(8)
+#define TRE_FLAGS_IEOT BIT(9)
+#define TRE_FLAGS_BEI BIT(10)
+#define TRE_FLAGS_LINK BIT(11)
+#define TRE_FLAGS_TYPE GENMASK(23, 16)
+
+/* SPI CONFIG0 WD0 */
+#define TRE_SPI_C0_WORD_SZ GENMASK(4, 0)
+#define TRE_SPI_C0_LOOPBACK BIT(8)
+#define TRE_SPI_C0_CS BIT(11)
+#define TRE_SPI_C0_CPHA BIT(12)
+#define TRE_SPI_C0_CPOL BIT(13)
+#define TRE_SPI_C0_TX_PACK BIT(24)
+#define TRE_SPI_C0_RX_PACK BIT(25)
+
+/* CONFIG0 WD2 */
+#define TRE_C0_CLK_DIV GENMASK(11, 0)
+#define TRE_C0_CLK_SRC GENMASK(19, 16)
+
+/* SPI GO WD0 */
+#define TRE_SPI_GO_CMD GENMASK(4, 0)
+#define TRE_SPI_GO_CS GENMASK(10, 8)
+#define TRE_SPI_GO_FRAG BIT(26)
+
+/* GO WD2 */
+#define TRE_RX_LEN GENMASK(23, 0)
+
+/* I2C Config0 WD0 */
+#define TRE_I2C_C0_TLOW GENMASK(7, 0)
+#define TRE_I2C_C0_THIGH GENMASK(15, 8)
+#define TRE_I2C_C0_TCYL GENMASK(23, 16)
+#define TRE_I2C_C0_TX_PACK BIT(24)
+#define TRE_I2C_C0_RX_PACK BIT(25)
+
+/* I2C GO WD0 */
+#define TRE_I2C_GO_CMD GENMASK(4, 0)
+#define TRE_I2C_GO_ADDR GENMASK(14, 8)
+#define TRE_I2C_GO_STRETCH BIT(26)
+
+/* DMA TRE */
+#define TRE_DMA_LEN GENMASK(23, 0)
+
+/* Register offsets from gpi-top */
+#define GPII_n_CH_k_CNTXT_0_OFFS(n, k) (0x20000 + (0x4000 * (n)) + (0x80 * (k)))
+#define GPII_n_CH_k_CNTXT_0_EL_SIZE GENMASK(31, 24)
+#define GPII_n_CH_k_CNTXT_0_CHSTATE GENMASK(23, 20)
+#define GPII_n_CH_k_CNTXT_0_ERIDX GENMASK(18, 14)
+#define GPII_n_CH_k_CNTXT_0_DIR BIT(3)
+#define GPII_n_CH_k_CNTXT_0_PROTO GENMASK(2, 0)
+
+#define GPII_n_CH_k_CNTXT_0(el_size, erindex, dir, chtype_proto) \
+ (FIELD_PREP(GPII_n_CH_k_CNTXT_0_EL_SIZE, el_size) | \
+ FIELD_PREP(GPII_n_CH_k_CNTXT_0_ERIDX, erindex) | \
+ FIELD_PREP(GPII_n_CH_k_CNTXT_0_DIR, dir) | \
+ FIELD_PREP(GPII_n_CH_k_CNTXT_0_PROTO, chtype_proto))
+
+#define GPI_CHTYPE_DIR_IN (0)
+#define GPI_CHTYPE_DIR_OUT (1)
+
+#define GPI_CHTYPE_PROTO_GPI (0x2)
+
+#define GPII_n_CH_k_DOORBELL_0_OFFS(n, k) (0x22000 + (0x4000 * (n)) + (0x8 * (k)))
+#define GPII_n_CH_CMD_OFFS(n) (0x23008 + (0x4000 * (n)))
+#define GPII_n_CH_CMD_OPCODE GENMASK(31, 24)
+#define GPII_n_CH_CMD_CHID GENMASK(7, 0)
+#define GPII_n_CH_CMD(opcode, chid) \
+ (FIELD_PREP(GPII_n_CH_CMD_OPCODE, opcode) | \
+ FIELD_PREP(GPII_n_CH_CMD_CHID, chid))
+
+#define GPII_n_CH_CMD_ALLOCATE (0)
+#define GPII_n_CH_CMD_START (1)
+#define GPII_n_CH_CMD_STOP (2)
+#define GPII_n_CH_CMD_RESET (9)
+#define GPII_n_CH_CMD_DE_ALLOC (10)
+#define GPII_n_CH_CMD_UART_SW_STALE (32)
+#define GPII_n_CH_CMD_UART_RFR_READY (33)
+#define GPII_n_CH_CMD_UART_RFR_NOT_READY (34)
+
+/* EV Context Array */
+#define GPII_n_EV_CH_k_CNTXT_0_OFFS(n, k) (0x21000 + (0x4000 * (n)) + (0x80 * (k)))
+#define GPII_n_EV_k_CNTXT_0_EL_SIZE GENMASK(31, 24)
+#define GPII_n_EV_k_CNTXT_0_CHSTATE GENMASK(23, 20)
+#define GPII_n_EV_k_CNTXT_0_INTYPE BIT(16)
+#define GPII_n_EV_k_CNTXT_0_CHTYPE GENMASK(3, 0)
+
+#define GPII_n_EV_k_CNTXT_0(el_size, inttype, chtype) \
+ (FIELD_PREP(GPII_n_EV_k_CNTXT_0_EL_SIZE, el_size) | \
+ FIELD_PREP(GPII_n_EV_k_CNTXT_0_INTYPE, inttype) | \
+ FIELD_PREP(GPII_n_EV_k_CNTXT_0_CHTYPE, chtype))
+
+#define GPI_INTTYPE_IRQ (1)
+#define GPI_CHTYPE_GPI_EV (0x2)
+
+enum CNTXT_OFFS {
+ CNTXT_0_CONFIG = 0x0,
+ CNTXT_1_R_LENGTH = 0x4,
+ CNTXT_2_RING_BASE_LSB = 0x8,
+ CNTXT_3_RING_BASE_MSB = 0xC,
+ CNTXT_4_RING_RP_LSB = 0x10,
+ CNTXT_5_RING_RP_MSB = 0x14,
+ CNTXT_6_RING_WP_LSB = 0x18,
+ CNTXT_7_RING_WP_MSB = 0x1C,
+ CNTXT_8_RING_INT_MOD = 0x20,
+ CNTXT_9_RING_INTVEC = 0x24,
+ CNTXT_10_RING_MSI_LSB = 0x28,
+ CNTXT_11_RING_MSI_MSB = 0x2C,
+ CNTXT_12_RING_RP_UPDATE_LSB = 0x30,
+ CNTXT_13_RING_RP_UPDATE_MSB = 0x34,
+};
+
+#define GPII_n_EV_CH_k_DOORBELL_0_OFFS(n, k) (0x22100 + (0x4000 * (n)) + (0x8 * (k)))
+#define GPII_n_EV_CH_CMD_OFFS(n) (0x23010 + (0x4000 * (n)))
+#define GPII_n_EV_CMD_OPCODE GENMASK(31, 24)
+#define GPII_n_EV_CMD_CHID GENMASK(7, 0)
+#define GPII_n_EV_CMD(opcode, chid) \
+ (FIELD_PREP(GPII_n_EV_CMD_OPCODE, opcode) | \
+ FIELD_PREP(GPII_n_EV_CMD_CHID, chid))
+
+#define GPII_n_EV_CH_CMD_ALLOCATE (0x00)
+#define GPII_n_EV_CH_CMD_RESET (0x09)
+#define GPII_n_EV_CH_CMD_DE_ALLOC (0x0A)
+
+#define GPII_n_CNTXT_TYPE_IRQ_OFFS(n) (0x23080 + (0x4000 * (n)))
+
+/* mask type register */
+#define GPII_n_CNTXT_TYPE_IRQ_MSK_OFFS(n) (0x23088 + (0x4000 * (n)))
+#define GPII_n_CNTXT_TYPE_IRQ_MSK_BMSK GENMASK(6, 0)
+#define GPII_n_CNTXT_TYPE_IRQ_MSK_GENERAL BIT(6)
+#define GPII_n_CNTXT_TYPE_IRQ_MSK_IEOB BIT(3)
+#define GPII_n_CNTXT_TYPE_IRQ_MSK_GLOB BIT(2)
+#define GPII_n_CNTXT_TYPE_IRQ_MSK_EV_CTRL BIT(1)
+#define GPII_n_CNTXT_TYPE_IRQ_MSK_CH_CTRL BIT(0)
+
+#define GPII_n_CNTXT_SRC_GPII_CH_IRQ_OFFS(n) (0x23090 + (0x4000 * (n)))
+#define GPII_n_CNTXT_SRC_EV_CH_IRQ_OFFS(n) (0x23094 + (0x4000 * (n)))
+
+/* Mask channel control interrupt register */
+#define GPII_n_CNTXT_SRC_CH_IRQ_MSK_OFFS(n) (0x23098 + (0x4000 * (n)))
+#define GPII_n_CNTXT_SRC_CH_IRQ_MSK_BMSK GENMASK(1, 0)
+
+/* Mask event control interrupt register */
+#define GPII_n_CNTXT_SRC_EV_CH_IRQ_MSK_OFFS(n) (0x2309C + (0x4000 * (n)))
+#define GPII_n_CNTXT_SRC_EV_CH_IRQ_MSK_BMSK BIT(0)
+
+#define GPII_n_CNTXT_SRC_CH_IRQ_CLR_OFFS(n) (0x230A0 + (0x4000 * (n)))
+#define GPII_n_CNTXT_SRC_EV_CH_IRQ_CLR_OFFS(n) (0x230A4 + (0x4000 * (n)))
+
+/* Mask event interrupt register */
+#define GPII_n_CNTXT_SRC_IEOB_IRQ_MSK_OFFS(n) (0x230B8 + (0x4000 * (n)))
+#define GPII_n_CNTXT_SRC_IEOB_IRQ_MSK_BMSK BIT(0)
+
+#define GPII_n_CNTXT_SRC_IEOB_IRQ_CLR_OFFS(n) (0x230C0 + (0x4000 * (n)))
+#define GPII_n_CNTXT_GLOB_IRQ_STTS_OFFS(n) (0x23100 + (0x4000 * (n)))
+#define GPI_GLOB_IRQ_ERROR_INT_MSK BIT(0)
+
+/* GPII specific Global - Enable bit register */
+#define GPII_n_CNTXT_GLOB_IRQ_EN_OFFS(n) (0x23108 + (0x4000 * (n)))
+#define GPII_n_CNTXT_GLOB_IRQ_CLR_OFFS(n) (0x23110 + (0x4000 * (n)))
+#define GPII_n_CNTXT_GPII_IRQ_STTS_OFFS(n) (0x23118 + (0x4000 * (n)))
+
+/* GPII general interrupt - Enable bit register */
+#define GPII_n_CNTXT_GPII_IRQ_EN_OFFS(n) (0x23120 + (0x4000 * (n)))
+#define GPII_n_CNTXT_GPII_IRQ_EN_BMSK GENMASK(3, 0)
+
+#define GPII_n_CNTXT_GPII_IRQ_CLR_OFFS(n) (0x23128 + (0x4000 * (n)))
+
+/* GPII Interrupt Type register */
+#define GPII_n_CNTXT_INTSET_OFFS(n) (0x23180 + (0x4000 * (n)))
+#define GPII_n_CNTXT_INTSET_BMSK BIT(0)
+
+#define GPII_n_CNTXT_MSI_BASE_LSB_OFFS(n) (0x23188 + (0x4000 * (n)))
+#define GPII_n_CNTXT_MSI_BASE_MSB_OFFS(n) (0x2318C + (0x4000 * (n)))
+#define GPII_n_CNTXT_SCRATCH_0_OFFS(n) (0x23400 + (0x4000 * (n)))
+#define GPII_n_CNTXT_SCRATCH_1_OFFS(n) (0x23404 + (0x4000 * (n)))
+
+#define GPII_n_ERROR_LOG_OFFS(n) (0x23200 + (0x4000 * (n)))
+
+/* QOS Registers */
+#define GPII_n_CH_k_QOS_OFFS(n, k) (0x2005C + (0x4000 * (n)) + (0x80 * (k)))
+
+/* Scratch registers */
+#define GPII_n_CH_k_SCRATCH_0_OFFS(n, k) (0x20060 + (0x4000 * (n)) + (0x80 * (k)))
+#define GPII_n_CH_k_SCRATCH_0_SEID GENMASK(2, 0)
+#define GPII_n_CH_k_SCRATCH_0_PROTO GENMASK(7, 4)
+#define GPII_n_CH_k_SCRATCH_0_PAIR GENMASK(20, 16)
+#define GPII_n_CH_k_SCRATCH_0(pair, proto, seid) \
+ (FIELD_PREP(GPII_n_CH_k_SCRATCH_0_PAIR, pair) | \
+ FIELD_PREP(GPII_n_CH_k_SCRATCH_0_PROTO, proto) | \
+ FIELD_PREP(GPII_n_CH_k_SCRATCH_0_SEID, seid))
+#define GPII_n_CH_k_SCRATCH_1_OFFS(n, k) (0x20064 + (0x4000 * (n)) + (0x80 * (k)))
+#define GPII_n_CH_k_SCRATCH_2_OFFS(n, k) (0x20068 + (0x4000 * (n)) + (0x80 * (k)))
+#define GPII_n_CH_k_SCRATCH_3_OFFS(n, k) (0x2006C + (0x4000 * (n)) + (0x80 * (k)))
+
+struct __packed gpi_tre {
+ u32 dword[4];
+};
+
+enum msm_gpi_tce_code {
+ MSM_GPI_TCE_SUCCESS = 1,
+ MSM_GPI_TCE_EOT = 2,
+ MSM_GPI_TCE_EOB = 4,
+ MSM_GPI_TCE_UNEXP_ERR = 16,
+};
+
+#define CMD_TIMEOUT_MS (250)
+
+#define MAX_CHANNELS_PER_GPII (2)
+#define GPI_TX_CHAN (0)
+#define GPI_RX_CHAN (1)
+#define STATE_IGNORE (U32_MAX)
+#define EV_FACTOR (2)
+#define REQ_OF_DMA_ARGS (5) /* # of arguments required from client */
+#define CHAN_TRES 64
+
+struct __packed xfer_compl_event {
+ u64 ptr;
+ u32 length:24;
+ u8 code;
+ u16 status;
+ u8 type;
+ u8 chid;
+};
+
+struct __packed immediate_data_event {
+ u8 data_bytes[8];
+ u8 length:4;
+ u8 resvd:4;
+ u16 tre_index;
+ u8 code;
+ u16 status;
+ u8 type;
+ u8 chid;
+};
+
+struct __packed qup_notif_event {
+ u32 status;
+ u32 time;
+ u32 count:24;
+ u8 resvd;
+ u16 resvd1;
+ u8 type;
+ u8 chid;
+};
+
+struct __packed gpi_ere {
+ u32 dword[4];
+};
+
+enum GPI_EV_TYPE {
+ XFER_COMPLETE_EV_TYPE = 0x22,
+ IMMEDIATE_DATA_EV_TYPE = 0x30,
+ QUP_NOTIF_EV_TYPE = 0x31,
+ STALE_EV_TYPE = 0xFF,
+};
+
+union __packed gpi_event {
+ struct __packed xfer_compl_event xfer_compl_event;
+ struct __packed immediate_data_event immediate_data_event;
+ struct __packed qup_notif_event qup_notif_event;
+ struct __packed gpi_ere gpi_ere;
+};
+
+enum gpii_irq_settings {
+ DEFAULT_IRQ_SETTINGS,
+ MASK_IEOB_SETTINGS,
+};
+
+enum gpi_ev_state {
+ DEFAULT_EV_CH_STATE = 0,
+ EV_STATE_NOT_ALLOCATED = DEFAULT_EV_CH_STATE,
+ EV_STATE_ALLOCATED,
+ MAX_EV_STATES
+};
+
+static const char *const gpi_ev_state_str[MAX_EV_STATES] = {
+ [EV_STATE_NOT_ALLOCATED] = "NOT ALLOCATED",
+ [EV_STATE_ALLOCATED] = "ALLOCATED",
+};
+
+#define TO_GPI_EV_STATE_STR(_state) (((_state) >= MAX_EV_STATES) ? \
+ "INVALID" : gpi_ev_state_str[(_state)])
+
+enum gpi_ch_state {
+ DEFAULT_CH_STATE = 0x0,
+ CH_STATE_NOT_ALLOCATED = DEFAULT_CH_STATE,
+ CH_STATE_ALLOCATED = 0x1,
+ CH_STATE_STARTED = 0x2,
+ CH_STATE_STOPPED = 0x3,
+ CH_STATE_STOP_IN_PROC = 0x4,
+ CH_STATE_ERROR = 0xf,
+ MAX_CH_STATES
+};
+
+enum gpi_cmd {
+ GPI_CH_CMD_BEGIN,
+ GPI_CH_CMD_ALLOCATE = GPI_CH_CMD_BEGIN,
+ GPI_CH_CMD_START,
+ GPI_CH_CMD_STOP,
+ GPI_CH_CMD_RESET,
+ GPI_CH_CMD_DE_ALLOC,
+ GPI_CH_CMD_UART_SW_STALE,
+ GPI_CH_CMD_UART_RFR_READY,
+ GPI_CH_CMD_UART_RFR_NOT_READY,
+ GPI_CH_CMD_END = GPI_CH_CMD_UART_RFR_NOT_READY,
+ GPI_EV_CMD_BEGIN,
+ GPI_EV_CMD_ALLOCATE = GPI_EV_CMD_BEGIN,
+ GPI_EV_CMD_RESET,
+ GPI_EV_CMD_DEALLOC,
+ GPI_EV_CMD_END = GPI_EV_CMD_DEALLOC,
+ GPI_MAX_CMD,
+};
+
+#define IS_CHAN_CMD(_cmd) ((_cmd) <= GPI_CH_CMD_END)
+
+static const char *const gpi_cmd_str[GPI_MAX_CMD] = {
+ [GPI_CH_CMD_ALLOCATE] = "CH ALLOCATE",
+ [GPI_CH_CMD_START] = "CH START",
+ [GPI_CH_CMD_STOP] = "CH STOP",
+ [GPI_CH_CMD_RESET] = "CH_RESET",
+ [GPI_CH_CMD_DE_ALLOC] = "DE ALLOC",
+ [GPI_CH_CMD_UART_SW_STALE] = "UART SW STALE",
+ [GPI_CH_CMD_UART_RFR_READY] = "UART RFR READY",
+ [GPI_CH_CMD_UART_RFR_NOT_READY] = "UART RFR NOT READY",
+ [GPI_EV_CMD_ALLOCATE] = "EV ALLOCATE",
+ [GPI_EV_CMD_RESET] = "EV RESET",
+ [GPI_EV_CMD_DEALLOC] = "EV DEALLOC",
+};
+
+#define TO_GPI_CMD_STR(_cmd) (((_cmd) >= GPI_MAX_CMD) ? "INVALID" : \
+ gpi_cmd_str[(_cmd)])
+
+/*
+ * @DISABLE_STATE: no register access allowed
+ * @CONFIG_STATE: client has configured the channel
+ * @PREP_HARDWARE: register access is allowed
+ * however, no processing EVENTS
+ * @ACTIVE_STATE: channels are fully operational
+ * @PREPARE_TERMINATE: graceful termination of channels
+ * register access is allowed
+ * @PAUSE_STATE: channels are active, but not processing any events
+ */
+enum gpi_pm_state {
+ DISABLE_STATE,
+ CONFIG_STATE,
+ PREPARE_HARDWARE,
+ ACTIVE_STATE,
+ PREPARE_TERMINATE,
+ PAUSE_STATE,
+ MAX_PM_STATE
+};
+
+#define REG_ACCESS_VALID(_pm_state) ((_pm_state) >= PREPARE_HARDWARE)
+
+static const char *const gpi_pm_state_str[MAX_PM_STATE] = {
+ [DISABLE_STATE] = "DISABLE",
+ [CONFIG_STATE] = "CONFIG",
+ [PREPARE_HARDWARE] = "PREPARE HARDWARE",
+ [ACTIVE_STATE] = "ACTIVE",
+ [PREPARE_TERMINATE] = "PREPARE TERMINATE",
+ [PAUSE_STATE] = "PAUSE",
+};
+
+#define TO_GPI_PM_STR(_state) (((_state) >= MAX_PM_STATE) ? \
+ "INVALID" : gpi_pm_state_str[(_state)])
+
+static const struct {
+ enum gpi_cmd gpi_cmd;
+ u32 opcode;
+ u32 state;
+} gpi_cmd_info[GPI_MAX_CMD] = {
+ {
+ GPI_CH_CMD_ALLOCATE,
+ GPII_n_CH_CMD_ALLOCATE,
+ CH_STATE_ALLOCATED,
+ },
+ {
+ GPI_CH_CMD_START,
+ GPII_n_CH_CMD_START,
+ CH_STATE_STARTED,
+ },
+ {
+ GPI_CH_CMD_STOP,
+ GPII_n_CH_CMD_STOP,
+ CH_STATE_STOPPED,
+ },
+ {
+ GPI_CH_CMD_RESET,
+ GPII_n_CH_CMD_RESET,
+ CH_STATE_ALLOCATED,
+ },
+ {
+ GPI_CH_CMD_DE_ALLOC,
+ GPII_n_CH_CMD_DE_ALLOC,
+ CH_STATE_NOT_ALLOCATED,
+ },
+ {
+ GPI_CH_CMD_UART_SW_STALE,
+ GPII_n_CH_CMD_UART_SW_STALE,
+ STATE_IGNORE,
+ },
+ {
+ GPI_CH_CMD_UART_RFR_READY,
+ GPII_n_CH_CMD_UART_RFR_READY,
+ STATE_IGNORE,
+ },
+ {
+ GPI_CH_CMD_UART_RFR_NOT_READY,
+ GPII_n_CH_CMD_UART_RFR_NOT_READY,
+ STATE_IGNORE,
+ },
+ {
+ GPI_EV_CMD_ALLOCATE,
+ GPII_n_EV_CH_CMD_ALLOCATE,
+ EV_STATE_ALLOCATED,
+ },
+ {
+ GPI_EV_CMD_RESET,
+ GPII_n_EV_CH_CMD_RESET,
+ EV_STATE_ALLOCATED,
+ },
+ {
+ GPI_EV_CMD_DEALLOC,
+ GPII_n_EV_CH_CMD_DE_ALLOC,
+ EV_STATE_NOT_ALLOCATED,
+ },
+};
+
+struct gpi_ring {
+ void *pre_aligned;
+ size_t alloc_size;
+ phys_addr_t phys_addr;
+ dma_addr_t dma_handle;
+ void *base;
+ void *wp;
+ void *rp;
+ u32 len;
+ u32 el_size;
+ u32 elements;
+ bool configured;
+};
+
+struct gpi_dev {
+ struct dma_device dma_device;
+ struct device *dev;
+ struct resource *res;
+ void __iomem *regs;
+ void __iomem *ee_base; /*ee register base address*/
+ u32 max_gpii; /* maximum # of gpii instances available per gpi block */
+ u32 gpii_mask; /* gpii instances available for apps */
+ u32 ev_factor; /* ev ring length factor */
+ struct gpii *gpiis;
+};
+
+struct reg_info {
+ char *name;
+ u32 offset;
+ u32 val;
+};
+
+struct gchan {
+ struct virt_dma_chan vc;
+ u32 chid;
+ u32 seid;
+ u32 protocol;
+ struct gpii *gpii;
+ enum gpi_ch_state ch_state;
+ enum gpi_pm_state pm_state;
+ void __iomem *ch_cntxt_base_reg;
+ void __iomem *ch_cntxt_db_reg;
+ void __iomem *ch_cmd_reg;
+ u32 dir;
+ struct gpi_ring ch_ring;
+ void *config;
+};
+
+struct gpii {
+ u32 gpii_id;
+ struct gchan gchan[MAX_CHANNELS_PER_GPII];
+ struct gpi_dev *gpi_dev;
+ int irq;
+ void __iomem *regs; /* points to gpi top */
+ void __iomem *ev_cntxt_base_reg;
+ void __iomem *ev_cntxt_db_reg;
+ void __iomem *ev_ring_rp_lsb_reg;
+ void __iomem *ev_cmd_reg;
+ void __iomem *ieob_clr_reg;
+ struct mutex ctrl_lock;
+ enum gpi_ev_state ev_state;
+ bool configured_irq;
+ enum gpi_pm_state pm_state;
+ rwlock_t pm_lock;
+ struct gpi_ring ev_ring;
+ struct tasklet_struct ev_task; /* event processing tasklet */
+ struct completion cmd_completion;
+ enum gpi_cmd gpi_cmd;
+ u32 cntxt_type_irq_msk;
+ bool ieob_set;
+};
+
+#define MAX_TRE 3
+
+struct gpi_desc {
+ struct virt_dma_desc vd;
+ size_t len;
+ void *db; /* DB register to program */
+ struct gchan *gchan;
+ struct gpi_tre tre[MAX_TRE];
+ u32 num_tre;
+};
+
+static const u32 GPII_CHAN_DIR[MAX_CHANNELS_PER_GPII] = {
+ GPI_CHTYPE_DIR_OUT, GPI_CHTYPE_DIR_IN
+};
+
+static irqreturn_t gpi_handle_irq(int irq, void *data);
+static void gpi_ring_recycle_ev_element(struct gpi_ring *ring);
+static int gpi_ring_add_element(struct gpi_ring *ring, void **wp);
+static void gpi_process_events(struct gpii *gpii);
+
+static inline struct gchan *to_gchan(struct dma_chan *dma_chan)
+{
+ return container_of(dma_chan, struct gchan, vc.chan);
+}
+
+static inline struct gpi_desc *to_gpi_desc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct gpi_desc, vd);
+}
+
+static inline phys_addr_t to_physical(const struct gpi_ring *const ring,
+ void *addr)
+{
+ return ring->phys_addr + (addr - ring->base);
+}
+
+static inline void *to_virtual(const struct gpi_ring *const ring, phys_addr_t addr)
+{
+ return ring->base + (addr - ring->phys_addr);
+}
+
+static inline u32 gpi_read_reg(struct gpii *gpii, void __iomem *addr)
+{
+ return readl_relaxed(addr);
+}
+
+static inline void gpi_write_reg(struct gpii *gpii, void __iomem *addr, u32 val)
+{
+ writel_relaxed(val, addr);
+}
+
+/* gpi_write_reg_field - write to specific bit field */
+static inline void gpi_write_reg_field(struct gpii *gpii, void __iomem *addr,
+ u32 mask, u32 shift, u32 val)
+{
+ u32 tmp = gpi_read_reg(gpii, addr);
+
+ tmp &= ~mask;
+ val = tmp | ((val << shift) & mask);
+ gpi_write_reg(gpii, addr, val);
+}
+
+static __always_inline void
+gpi_update_reg(struct gpii *gpii, u32 offset, u32 mask, u32 val)
+{
+ void __iomem *addr = gpii->regs + offset;
+ u32 tmp = gpi_read_reg(gpii, addr);
+
+ tmp &= ~mask;
+ tmp |= u32_encode_bits(val, mask);
+
+ gpi_write_reg(gpii, addr, tmp);
+}
+
+static void gpi_disable_interrupts(struct gpii *gpii)
+{
+ gpi_update_reg(gpii, GPII_n_CNTXT_TYPE_IRQ_MSK_OFFS(gpii->gpii_id),
+ GPII_n_CNTXT_TYPE_IRQ_MSK_BMSK, 0);
+ gpi_update_reg(gpii, GPII_n_CNTXT_SRC_IEOB_IRQ_MSK_OFFS(gpii->gpii_id),
+ GPII_n_CNTXT_SRC_IEOB_IRQ_MSK_BMSK, 0);
+ gpi_update_reg(gpii, GPII_n_CNTXT_SRC_CH_IRQ_MSK_OFFS(gpii->gpii_id),
+ GPII_n_CNTXT_SRC_CH_IRQ_MSK_BMSK, 0);
+ gpi_update_reg(gpii, GPII_n_CNTXT_SRC_EV_CH_IRQ_MSK_OFFS(gpii->gpii_id),
+ GPII_n_CNTXT_SRC_EV_CH_IRQ_MSK_BMSK, 0);
+ gpi_update_reg(gpii, GPII_n_CNTXT_GLOB_IRQ_EN_OFFS(gpii->gpii_id),
+ GPII_n_CNTXT_GPII_IRQ_EN_BMSK, 0);
+ gpi_update_reg(gpii, GPII_n_CNTXT_GPII_IRQ_EN_OFFS(gpii->gpii_id),
+ GPII_n_CNTXT_GPII_IRQ_EN_BMSK, 0);
+ gpi_update_reg(gpii, GPII_n_CNTXT_INTSET_OFFS(gpii->gpii_id),
+ GPII_n_CNTXT_INTSET_BMSK, 0);
+
+ gpii->cntxt_type_irq_msk = 0;
+ devm_free_irq(gpii->gpi_dev->dev, gpii->irq, gpii);
+ gpii->configured_irq = false;
+}
+
+/* configure and enable interrupts */
+static int gpi_config_interrupts(struct gpii *gpii, enum gpii_irq_settings settings, bool mask)
+{
+ const u32 enable = (GPII_n_CNTXT_TYPE_IRQ_MSK_GENERAL |
+ GPII_n_CNTXT_TYPE_IRQ_MSK_IEOB |
+ GPII_n_CNTXT_TYPE_IRQ_MSK_GLOB |
+ GPII_n_CNTXT_TYPE_IRQ_MSK_EV_CTRL |
+ GPII_n_CNTXT_TYPE_IRQ_MSK_CH_CTRL);
+ int ret;
+
+ if (!gpii->configured_irq) {
+ ret = devm_request_irq(gpii->gpi_dev->dev, gpii->irq,
+ gpi_handle_irq, IRQF_TRIGGER_HIGH,
+ "gpi-dma", gpii);
+ if (ret < 0) {
+ dev_err(gpii->gpi_dev->dev, "error request irq:%d ret:%d\n",
+ gpii->irq, ret);
+ return ret;
+ }
+ }
+
+ if (settings == MASK_IEOB_SETTINGS) {
+ /*
+ * GPII only uses one EV ring per gpii so we can globally
+ * enable/disable IEOB interrupt
+ */
+ if (mask)
+ gpii->cntxt_type_irq_msk |= GPII_n_CNTXT_TYPE_IRQ_MSK_IEOB;
+ else
+ gpii->cntxt_type_irq_msk &= ~(GPII_n_CNTXT_TYPE_IRQ_MSK_IEOB);
+ gpi_update_reg(gpii, GPII_n_CNTXT_TYPE_IRQ_MSK_OFFS(gpii->gpii_id),
+ GPII_n_CNTXT_TYPE_IRQ_MSK_BMSK, gpii->cntxt_type_irq_msk);
+ } else {
+ gpi_update_reg(gpii, GPII_n_CNTXT_TYPE_IRQ_MSK_OFFS(gpii->gpii_id),
+ GPII_n_CNTXT_TYPE_IRQ_MSK_BMSK, enable);
+ gpi_update_reg(gpii, GPII_n_CNTXT_SRC_IEOB_IRQ_MSK_OFFS(gpii->gpii_id),
+ GPII_n_CNTXT_SRC_IEOB_IRQ_MSK_BMSK,
+ GPII_n_CNTXT_SRC_IEOB_IRQ_MSK_BMSK);
+ gpi_update_reg(gpii, GPII_n_CNTXT_SRC_CH_IRQ_MSK_OFFS(gpii->gpii_id),
+ GPII_n_CNTXT_SRC_CH_IRQ_MSK_BMSK,
+ GPII_n_CNTXT_SRC_CH_IRQ_MSK_BMSK);
+ gpi_update_reg(gpii, GPII_n_CNTXT_SRC_EV_CH_IRQ_MSK_OFFS(gpii->gpii_id),
+ GPII_n_CNTXT_SRC_EV_CH_IRQ_MSK_BMSK,
+ GPII_n_CNTXT_SRC_EV_CH_IRQ_MSK_BMSK);
+ gpi_update_reg(gpii, GPII_n_CNTXT_GLOB_IRQ_EN_OFFS(gpii->gpii_id),
+ GPII_n_CNTXT_GPII_IRQ_EN_BMSK,
+ GPII_n_CNTXT_GPII_IRQ_EN_BMSK);
+ gpi_update_reg(gpii, GPII_n_CNTXT_GPII_IRQ_EN_OFFS(gpii->gpii_id),
+ GPII_n_CNTXT_GPII_IRQ_EN_BMSK, GPII_n_CNTXT_GPII_IRQ_EN_BMSK);
+ gpi_update_reg(gpii, GPII_n_CNTXT_MSI_BASE_LSB_OFFS(gpii->gpii_id), U32_MAX, 0);
+ gpi_update_reg(gpii, GPII_n_CNTXT_MSI_BASE_MSB_OFFS(gpii->gpii_id), U32_MAX, 0);
+ gpi_update_reg(gpii, GPII_n_CNTXT_SCRATCH_0_OFFS(gpii->gpii_id), U32_MAX, 0);
+ gpi_update_reg(gpii, GPII_n_CNTXT_SCRATCH_1_OFFS(gpii->gpii_id), U32_MAX, 0);
+ gpi_update_reg(gpii, GPII_n_CNTXT_INTSET_OFFS(gpii->gpii_id),
+ GPII_n_CNTXT_INTSET_BMSK, 1);
+ gpi_update_reg(gpii, GPII_n_ERROR_LOG_OFFS(gpii->gpii_id), U32_MAX, 0);
+
+ gpii->cntxt_type_irq_msk = enable;
+ }
+
+ gpii->configured_irq = true;
+ return 0;
+}
+
+/* Sends gpii event or channel command */
+static int gpi_send_cmd(struct gpii *gpii, struct gchan *gchan,
+ enum gpi_cmd gpi_cmd)
+{
+ u32 chid = MAX_CHANNELS_PER_GPII;
+ unsigned long timeout;
+ void __iomem *cmd_reg;
+ u32 cmd;
+
+ if (gpi_cmd >= GPI_MAX_CMD)
+ return -EINVAL;
+ if (IS_CHAN_CMD(gpi_cmd))
+ chid = gchan->chid;
+
+ dev_dbg(gpii->gpi_dev->dev,
+ "sending cmd: %s:%u\n", TO_GPI_CMD_STR(gpi_cmd), chid);
+
+ /* send opcode and wait for completion */
+ reinit_completion(&gpii->cmd_completion);
+ gpii->gpi_cmd = gpi_cmd;
+
+ cmd_reg = IS_CHAN_CMD(gpi_cmd) ? gchan->ch_cmd_reg : gpii->ev_cmd_reg;
+ cmd = IS_CHAN_CMD(gpi_cmd) ? GPII_n_CH_CMD(gpi_cmd_info[gpi_cmd].opcode, chid) :
+ GPII_n_EV_CMD(gpi_cmd_info[gpi_cmd].opcode, 0);
+ gpi_write_reg(gpii, cmd_reg, cmd);
+ timeout = wait_for_completion_timeout(&gpii->cmd_completion,
+ msecs_to_jiffies(CMD_TIMEOUT_MS));
+ if (!timeout) {
+ dev_err(gpii->gpi_dev->dev, "cmd: %s completion timeout:%u\n",
+ TO_GPI_CMD_STR(gpi_cmd), chid);
+ return -EIO;
+ }
+
+ /* confirm new ch state is correct , if the cmd is a state change cmd */
+ if (gpi_cmd_info[gpi_cmd].state == STATE_IGNORE)
+ return 0;
+
+ if (IS_CHAN_CMD(gpi_cmd) && gchan->ch_state == gpi_cmd_info[gpi_cmd].state)
+ return 0;
+
+ if (!IS_CHAN_CMD(gpi_cmd) && gpii->ev_state == gpi_cmd_info[gpi_cmd].state)
+ return 0;
+
+ return -EIO;
+}
+
+/* program transfer ring DB register */
+static inline void gpi_write_ch_db(struct gchan *gchan,
+ struct gpi_ring *ring, void *wp)
+{
+ struct gpii *gpii = gchan->gpii;
+ phys_addr_t p_wp;
+
+ p_wp = to_physical(ring, wp);
+ gpi_write_reg(gpii, gchan->ch_cntxt_db_reg, p_wp);
+}
+
+/* program event ring DB register */
+static inline void gpi_write_ev_db(struct gpii *gpii,
+ struct gpi_ring *ring, void *wp)
+{
+ phys_addr_t p_wp;
+
+ p_wp = ring->phys_addr + (wp - ring->base);
+ gpi_write_reg(gpii, gpii->ev_cntxt_db_reg, p_wp);
+}
+
+/* process transfer completion interrupt */
+static void gpi_process_ieob(struct gpii *gpii)
+{
+ gpi_write_reg(gpii, gpii->ieob_clr_reg, BIT(0));
+
+ gpi_config_interrupts(gpii, MASK_IEOB_SETTINGS, 0);
+ tasklet_hi_schedule(&gpii->ev_task);
+}
+
+/* process channel control interrupt */
+static void gpi_process_ch_ctrl_irq(struct gpii *gpii)
+{
+ u32 gpii_id = gpii->gpii_id;
+ u32 offset = GPII_n_CNTXT_SRC_GPII_CH_IRQ_OFFS(gpii_id);
+ u32 ch_irq = gpi_read_reg(gpii, gpii->regs + offset);
+ struct gchan *gchan;
+ u32 chid, state;
+
+ /* clear the status */
+ offset = GPII_n_CNTXT_SRC_CH_IRQ_CLR_OFFS(gpii_id);
+ gpi_write_reg(gpii, gpii->regs + offset, (u32)ch_irq);
+
+ for (chid = 0; chid < MAX_CHANNELS_PER_GPII; chid++) {
+ if (!(BIT(chid) & ch_irq))
+ continue;
+
+ gchan = &gpii->gchan[chid];
+ state = gpi_read_reg(gpii, gchan->ch_cntxt_base_reg +
+ CNTXT_0_CONFIG);
+ state = FIELD_GET(GPII_n_CH_k_CNTXT_0_CHSTATE, state);
+
+ /*
+ * CH_CMD_DEALLOC cmd always successful. However cmd does
+ * not change hardware status. So overwriting software state
+ * to default state.
+ */
+ if (gpii->gpi_cmd == GPI_CH_CMD_DE_ALLOC)
+ state = DEFAULT_CH_STATE;
+ gchan->ch_state = state;
+
+ /*
+ * Triggering complete all if ch_state is not a stop in process.
+ * Stop in process is a transition state and we will wait for
+ * stop interrupt before notifying.
+ */
+ if (gchan->ch_state != CH_STATE_STOP_IN_PROC)
+ complete_all(&gpii->cmd_completion);
+ }
+}
+
+/* processing gpi general error interrupts */
+static void gpi_process_gen_err_irq(struct gpii *gpii)
+{
+ u32 gpii_id = gpii->gpii_id;
+ u32 offset = GPII_n_CNTXT_GPII_IRQ_STTS_OFFS(gpii_id);
+ u32 irq_stts = gpi_read_reg(gpii, gpii->regs + offset);
+
+ /* clear the status */
+ dev_dbg(gpii->gpi_dev->dev, "irq_stts:0x%x\n", irq_stts);
+
+ /* Clear the register */
+ offset = GPII_n_CNTXT_GPII_IRQ_CLR_OFFS(gpii_id);
+ gpi_write_reg(gpii, gpii->regs + offset, irq_stts);
+}
+
+/* processing gpi level error interrupts */
+static void gpi_process_glob_err_irq(struct gpii *gpii)
+{
+ u32 gpii_id = gpii->gpii_id;
+ u32 offset = GPII_n_CNTXT_GLOB_IRQ_STTS_OFFS(gpii_id);
+ u32 irq_stts = gpi_read_reg(gpii, gpii->regs + offset);
+
+ offset = GPII_n_CNTXT_GLOB_IRQ_CLR_OFFS(gpii_id);
+ gpi_write_reg(gpii, gpii->regs + offset, irq_stts);
+
+ /* only error interrupt should be set */
+ if (irq_stts & ~GPI_GLOB_IRQ_ERROR_INT_MSK) {
+ dev_err(gpii->gpi_dev->dev, "invalid error status:0x%x\n", irq_stts);
+ return;
+ }
+
+ offset = GPII_n_ERROR_LOG_OFFS(gpii_id);
+ gpi_write_reg(gpii, gpii->regs + offset, 0);
+}
+
+/* gpii interrupt handler */
+static irqreturn_t gpi_handle_irq(int irq, void *data)
+{
+ struct gpii *gpii = data;
+ u32 gpii_id = gpii->gpii_id;
+ u32 type, offset;
+ unsigned long flags;
+
+ read_lock_irqsave(&gpii->pm_lock, flags);
+
+ /*
+ * States are out of sync to receive interrupt
+ * while software state is in DISABLE state, bailing out.
+ */
+ if (!REG_ACCESS_VALID(gpii->pm_state)) {
+ dev_err(gpii->gpi_dev->dev, "receive interrupt while in %s state\n",
+ TO_GPI_PM_STR(gpii->pm_state));
+ goto exit_irq;
+ }
+
+ offset = GPII_n_CNTXT_TYPE_IRQ_OFFS(gpii->gpii_id);
+ type = gpi_read_reg(gpii, gpii->regs + offset);
+
+ do {
+ /* global gpii error */
+ if (type & GPII_n_CNTXT_TYPE_IRQ_MSK_GLOB) {
+ gpi_process_glob_err_irq(gpii);
+ type &= ~(GPII_n_CNTXT_TYPE_IRQ_MSK_GLOB);
+ }
+
+ /* transfer complete interrupt */
+ if (type & GPII_n_CNTXT_TYPE_IRQ_MSK_IEOB) {
+ gpi_process_ieob(gpii);
+ type &= ~GPII_n_CNTXT_TYPE_IRQ_MSK_IEOB;
+ }
+
+ /* event control irq */
+ if (type & GPII_n_CNTXT_TYPE_IRQ_MSK_EV_CTRL) {
+ u32 ev_state;
+ u32 ev_ch_irq;
+
+ dev_dbg(gpii->gpi_dev->dev,
+ "processing EV CTRL interrupt\n");
+ offset = GPII_n_CNTXT_SRC_EV_CH_IRQ_OFFS(gpii_id);
+ ev_ch_irq = gpi_read_reg(gpii, gpii->regs + offset);
+
+ offset = GPII_n_CNTXT_SRC_EV_CH_IRQ_CLR_OFFS
+ (gpii_id);
+ gpi_write_reg(gpii, gpii->regs + offset, ev_ch_irq);
+ ev_state = gpi_read_reg(gpii, gpii->ev_cntxt_base_reg +
+ CNTXT_0_CONFIG);
+ ev_state = FIELD_GET(GPII_n_EV_k_CNTXT_0_CHSTATE, ev_state);
+
+ /*
+ * CMD EV_CMD_DEALLOC is always successful. However
+ * cmd does not change hardware status. So overwriting
+ * software state to default state.
+ */
+ if (gpii->gpi_cmd == GPI_EV_CMD_DEALLOC)
+ ev_state = DEFAULT_EV_CH_STATE;
+
+ gpii->ev_state = ev_state;
+ dev_dbg(gpii->gpi_dev->dev, "setting EV state to %s\n",
+ TO_GPI_EV_STATE_STR(gpii->ev_state));
+ complete_all(&gpii->cmd_completion);
+ type &= ~(GPII_n_CNTXT_TYPE_IRQ_MSK_EV_CTRL);
+ }
+
+ /* channel control irq */
+ if (type & GPII_n_CNTXT_TYPE_IRQ_MSK_CH_CTRL) {
+ dev_dbg(gpii->gpi_dev->dev, "process CH CTRL interrupts\n");
+ gpi_process_ch_ctrl_irq(gpii);
+ type &= ~(GPII_n_CNTXT_TYPE_IRQ_MSK_CH_CTRL);
+ }
+
+ if (type) {
+ dev_err(gpii->gpi_dev->dev, "Unhandled interrupt status:0x%x\n", type);
+ gpi_process_gen_err_irq(gpii);
+ goto exit_irq;
+ }
+
+ offset = GPII_n_CNTXT_TYPE_IRQ_OFFS(gpii->gpii_id);
+ type = gpi_read_reg(gpii, gpii->regs + offset);
+ } while (type);
+
+exit_irq:
+ read_unlock_irqrestore(&gpii->pm_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+/* process DMA Immediate completion data events */
+static void gpi_process_imed_data_event(struct gchan *gchan,
+ struct immediate_data_event *imed_event)
+{
+ struct gpii *gpii = gchan->gpii;
+ struct gpi_ring *ch_ring = &gchan->ch_ring;
+ void *tre = ch_ring->base + (ch_ring->el_size * imed_event->tre_index);
+ struct dmaengine_result result;
+ struct gpi_desc *gpi_desc;
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+ u32 chid;
+
+ /*
+ * If channel not active don't process event
+ */
+ if (gchan->pm_state != ACTIVE_STATE) {
+ dev_err(gpii->gpi_dev->dev, "skipping processing event because ch @ %s state\n",
+ TO_GPI_PM_STR(gchan->pm_state));
+ return;
+ }
+
+ spin_lock_irqsave(&gchan->vc.lock, flags);
+ vd = vchan_next_desc(&gchan->vc);
+ if (!vd) {
+ struct gpi_ere *gpi_ere;
+ struct gpi_tre *gpi_tre;
+
+ spin_unlock_irqrestore(&gchan->vc.lock, flags);
+ dev_dbg(gpii->gpi_dev->dev, "event without a pending descriptor!\n");
+ gpi_ere = (struct gpi_ere *)imed_event;
+ dev_dbg(gpii->gpi_dev->dev,
+ "Event: %08x %08x %08x %08x\n",
+ gpi_ere->dword[0], gpi_ere->dword[1],
+ gpi_ere->dword[2], gpi_ere->dword[3]);
+ gpi_tre = tre;
+ dev_dbg(gpii->gpi_dev->dev,
+ "Pending TRE: %08x %08x %08x %08x\n",
+ gpi_tre->dword[0], gpi_tre->dword[1],
+ gpi_tre->dword[2], gpi_tre->dword[3]);
+ return;
+ }
+ gpi_desc = to_gpi_desc(vd);
+ spin_unlock_irqrestore(&gchan->vc.lock, flags);
+
+ /*
+ * RP pointed by Event is to last TRE processed,
+ * we need to update ring rp to tre + 1
+ */
+ tre += ch_ring->el_size;
+ if (tre >= (ch_ring->base + ch_ring->len))
+ tre = ch_ring->base;
+ ch_ring->rp = tre;
+
+ /* make sure rp updates are immediately visible to all cores */
+ smp_wmb();
+
+ chid = imed_event->chid;
+ if (imed_event->code == MSM_GPI_TCE_EOT && gpii->ieob_set) {
+ if (chid == GPI_RX_CHAN)
+ goto gpi_free_desc;
+ else
+ return;
+ }
+
+ if (imed_event->code == MSM_GPI_TCE_UNEXP_ERR)
+ result.result = DMA_TRANS_ABORTED;
+ else
+ result.result = DMA_TRANS_NOERROR;
+ result.residue = gpi_desc->len - imed_event->length;
+
+ dma_cookie_complete(&vd->tx);
+ dmaengine_desc_get_callback_invoke(&vd->tx, &result);
+
+gpi_free_desc:
+ spin_lock_irqsave(&gchan->vc.lock, flags);
+ list_del(&vd->node);
+ spin_unlock_irqrestore(&gchan->vc.lock, flags);
+ kfree(gpi_desc);
+ gpi_desc = NULL;
+}
+
+/* processing transfer completion events */
+static void gpi_process_xfer_compl_event(struct gchan *gchan,
+ struct xfer_compl_event *compl_event)
+{
+ struct gpii *gpii = gchan->gpii;
+ struct gpi_ring *ch_ring = &gchan->ch_ring;
+ void *ev_rp = to_virtual(ch_ring, compl_event->ptr);
+ struct virt_dma_desc *vd;
+ struct gpi_desc *gpi_desc;
+ struct dmaengine_result result;
+ unsigned long flags;
+ u32 chid;
+
+ /* only process events on active channel */
+ if (unlikely(gchan->pm_state != ACTIVE_STATE)) {
+ dev_err(gpii->gpi_dev->dev, "skipping processing event because ch @ %s state\n",
+ TO_GPI_PM_STR(gchan->pm_state));
+ return;
+ }
+
+ spin_lock_irqsave(&gchan->vc.lock, flags);
+ vd = vchan_next_desc(&gchan->vc);
+ if (!vd) {
+ struct gpi_ere *gpi_ere;
+
+ spin_unlock_irqrestore(&gchan->vc.lock, flags);
+ dev_err(gpii->gpi_dev->dev, "Event without a pending descriptor!\n");
+ gpi_ere = (struct gpi_ere *)compl_event;
+ dev_err(gpii->gpi_dev->dev,
+ "Event: %08x %08x %08x %08x\n",
+ gpi_ere->dword[0], gpi_ere->dword[1],
+ gpi_ere->dword[2], gpi_ere->dword[3]);
+ return;
+ }
+
+ gpi_desc = to_gpi_desc(vd);
+ spin_unlock_irqrestore(&gchan->vc.lock, flags);
+
+ /*
+ * RP pointed by Event is to last TRE processed,
+ * we need to update ring rp to ev_rp + 1
+ */
+ ev_rp += ch_ring->el_size;
+ if (ev_rp >= (ch_ring->base + ch_ring->len))
+ ev_rp = ch_ring->base;
+ ch_ring->rp = ev_rp;
+
+ /* update must be visible to other cores */
+ smp_wmb();
+
+ chid = compl_event->chid;
+ if (compl_event->code == MSM_GPI_TCE_EOT && gpii->ieob_set) {
+ if (chid == GPI_RX_CHAN)
+ goto gpi_free_desc;
+ else
+ return;
+ }
+
+ if (compl_event->code == MSM_GPI_TCE_UNEXP_ERR) {
+ dev_err(gpii->gpi_dev->dev, "Error in Transaction\n");
+ result.result = DMA_TRANS_ABORTED;
+ } else {
+ dev_dbg(gpii->gpi_dev->dev, "Transaction Success\n");
+ result.result = DMA_TRANS_NOERROR;
+ }
+ result.residue = gpi_desc->len - compl_event->length;
+ dev_dbg(gpii->gpi_dev->dev, "Residue %d\n", result.residue);
+
+ dma_cookie_complete(&vd->tx);
+ dmaengine_desc_get_callback_invoke(&vd->tx, &result);
+
+gpi_free_desc:
+ spin_lock_irqsave(&gchan->vc.lock, flags);
+ list_del(&vd->node);
+ spin_unlock_irqrestore(&gchan->vc.lock, flags);
+ kfree(gpi_desc);
+ gpi_desc = NULL;
+}
+
+/* process all events */
+static void gpi_process_events(struct gpii *gpii)
+{
+ struct gpi_ring *ev_ring = &gpii->ev_ring;
+ phys_addr_t cntxt_rp;
+ void *rp;
+ union gpi_event *gpi_event;
+ struct gchan *gchan;
+ u32 chid, type;
+
+ cntxt_rp = gpi_read_reg(gpii, gpii->ev_ring_rp_lsb_reg);
+ rp = to_virtual(ev_ring, cntxt_rp);
+
+ do {
+ while (rp != ev_ring->rp) {
+ gpi_event = ev_ring->rp;
+ chid = gpi_event->xfer_compl_event.chid;
+ type = gpi_event->xfer_compl_event.type;
+
+ dev_dbg(gpii->gpi_dev->dev,
+ "Event: CHID:%u, type:%x %08x %08x %08x %08x\n",
+ chid, type, gpi_event->gpi_ere.dword[0],
+ gpi_event->gpi_ere.dword[1], gpi_event->gpi_ere.dword[2],
+ gpi_event->gpi_ere.dword[3]);
+
+ switch (type) {
+ case XFER_COMPLETE_EV_TYPE:
+ gchan = &gpii->gchan[chid];
+ gpi_process_xfer_compl_event(gchan,
+ &gpi_event->xfer_compl_event);
+ break;
+ case STALE_EV_TYPE:
+ dev_dbg(gpii->gpi_dev->dev, "stale event, not processing\n");
+ break;
+ case IMMEDIATE_DATA_EV_TYPE:
+ gchan = &gpii->gchan[chid];
+ gpi_process_imed_data_event(gchan,
+ &gpi_event->immediate_data_event);
+ break;
+ case QUP_NOTIF_EV_TYPE:
+ dev_dbg(gpii->gpi_dev->dev, "QUP_NOTIF_EV_TYPE\n");
+ break;
+ default:
+ dev_dbg(gpii->gpi_dev->dev,
+ "not supported event type:0x%x\n", type);
+ }
+ gpi_ring_recycle_ev_element(ev_ring);
+ }
+ gpi_write_ev_db(gpii, ev_ring, ev_ring->wp);
+
+ /* clear pending IEOB events */
+ gpi_write_reg(gpii, gpii->ieob_clr_reg, BIT(0));
+
+ cntxt_rp = gpi_read_reg(gpii, gpii->ev_ring_rp_lsb_reg);
+ rp = to_virtual(ev_ring, cntxt_rp);
+
+ } while (rp != ev_ring->rp);
+}
+
+/* processing events using tasklet */
+static void gpi_ev_tasklet(unsigned long data)
+{
+ struct gpii *gpii = (struct gpii *)data;
+
+ read_lock(&gpii->pm_lock);
+ if (!REG_ACCESS_VALID(gpii->pm_state)) {
+ read_unlock(&gpii->pm_lock);
+ dev_err(gpii->gpi_dev->dev, "not processing any events, pm_state:%s\n",
+ TO_GPI_PM_STR(gpii->pm_state));
+ return;
+ }
+
+ /* process the events */
+ gpi_process_events(gpii);
+
+ /* enable IEOB, switching back to interrupts */
+ gpi_config_interrupts(gpii, MASK_IEOB_SETTINGS, 1);
+ read_unlock(&gpii->pm_lock);
+}
+
+/* marks all pending events for the channel as stale */
+static void gpi_mark_stale_events(struct gchan *gchan)
+{
+ struct gpii *gpii = gchan->gpii;
+ struct gpi_ring *ev_ring = &gpii->ev_ring;
+ u32 cntxt_rp, local_rp;
+ void *ev_rp;
+
+ cntxt_rp = gpi_read_reg(gpii, gpii->ev_ring_rp_lsb_reg);
+
+ ev_rp = ev_ring->rp;
+ local_rp = (u32)to_physical(ev_ring, ev_rp);
+ while (local_rp != cntxt_rp) {
+ union gpi_event *gpi_event = ev_rp;
+ u32 chid = gpi_event->xfer_compl_event.chid;
+
+ if (chid == gchan->chid)
+ gpi_event->xfer_compl_event.type = STALE_EV_TYPE;
+ ev_rp += ev_ring->el_size;
+ if (ev_rp >= (ev_ring->base + ev_ring->len))
+ ev_rp = ev_ring->base;
+ cntxt_rp = gpi_read_reg(gpii, gpii->ev_ring_rp_lsb_reg);
+ local_rp = (u32)to_physical(ev_ring, ev_rp);
+ }
+}
+
+/* reset sw state and issue channel reset or de-alloc */
+static int gpi_reset_chan(struct gchan *gchan, enum gpi_cmd gpi_cmd)
+{
+ struct gpii *gpii = gchan->gpii;
+ struct gpi_ring *ch_ring = &gchan->ch_ring;
+ unsigned long flags;
+ LIST_HEAD(list);
+ int ret;
+
+ ret = gpi_send_cmd(gpii, gchan, gpi_cmd);
+ if (ret) {
+ dev_err(gpii->gpi_dev->dev, "Error with cmd:%s ret:%d\n",
+ TO_GPI_CMD_STR(gpi_cmd), ret);
+ return ret;
+ }
+
+ /* initialize the local ring ptrs */
+ ch_ring->rp = ch_ring->base;
+ ch_ring->wp = ch_ring->base;
+
+ /* visible to other cores */
+ smp_wmb();
+
+ /* check event ring for any stale events */
+ write_lock_irq(&gpii->pm_lock);
+ gpi_mark_stale_events(gchan);
+
+ /* remove all async descriptors */
+ spin_lock_irqsave(&gchan->vc.lock, flags);
+ vchan_get_all_descriptors(&gchan->vc, &list);
+ spin_unlock_irqrestore(&gchan->vc.lock, flags);
+ write_unlock_irq(&gpii->pm_lock);
+ vchan_dma_desc_free_list(&gchan->vc, &list);
+
+ return 0;
+}
+
+static int gpi_start_chan(struct gchan *gchan)
+{
+ struct gpii *gpii = gchan->gpii;
+ int ret;
+
+ ret = gpi_send_cmd(gpii, gchan, GPI_CH_CMD_START);
+ if (ret) {
+ dev_err(gpii->gpi_dev->dev, "Error with cmd:%s ret:%d\n",
+ TO_GPI_CMD_STR(GPI_CH_CMD_START), ret);
+ return ret;
+ }
+
+ /* gpii CH is active now */
+ write_lock_irq(&gpii->pm_lock);
+ gchan->pm_state = ACTIVE_STATE;
+ write_unlock_irq(&gpii->pm_lock);
+
+ return 0;
+}
+
+static int gpi_stop_chan(struct gchan *gchan)
+{
+ struct gpii *gpii = gchan->gpii;
+ int ret;
+
+ ret = gpi_send_cmd(gpii, gchan, GPI_CH_CMD_STOP);
+ if (ret) {
+ dev_err(gpii->gpi_dev->dev, "Error with cmd:%s ret:%d\n",
+ TO_GPI_CMD_STR(GPI_CH_CMD_STOP), ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+/* allocate and configure the transfer channel */
+static int gpi_alloc_chan(struct gchan *chan, bool send_alloc_cmd)
+{
+ struct gpii *gpii = chan->gpii;
+ struct gpi_ring *ring = &chan->ch_ring;
+ int ret;
+ u32 id = gpii->gpii_id;
+ u32 chid = chan->chid;
+ u32 pair_chid = !chid;
+
+ if (send_alloc_cmd) {
+ ret = gpi_send_cmd(gpii, chan, GPI_CH_CMD_ALLOCATE);
+ if (ret) {
+ dev_err(gpii->gpi_dev->dev, "Error with cmd:%s ret:%d\n",
+ TO_GPI_CMD_STR(GPI_CH_CMD_ALLOCATE), ret);
+ return ret;
+ }
+ }
+
+ gpi_write_reg(gpii, chan->ch_cntxt_base_reg + CNTXT_0_CONFIG,
+ GPII_n_CH_k_CNTXT_0(ring->el_size, 0, chan->dir, GPI_CHTYPE_PROTO_GPI));
+ gpi_write_reg(gpii, chan->ch_cntxt_base_reg + CNTXT_1_R_LENGTH, ring->len);
+ gpi_write_reg(gpii, chan->ch_cntxt_base_reg + CNTXT_2_RING_BASE_LSB, ring->phys_addr);
+ gpi_write_reg(gpii, chan->ch_cntxt_base_reg + CNTXT_3_RING_BASE_MSB,
+ upper_32_bits(ring->phys_addr));
+ gpi_write_reg(gpii, chan->ch_cntxt_db_reg + CNTXT_5_RING_RP_MSB - CNTXT_4_RING_RP_LSB,
+ upper_32_bits(ring->phys_addr));
+ gpi_write_reg(gpii, gpii->regs + GPII_n_CH_k_SCRATCH_0_OFFS(id, chid),
+ GPII_n_CH_k_SCRATCH_0(pair_chid, chan->protocol, chan->seid));
+ gpi_write_reg(gpii, gpii->regs + GPII_n_CH_k_SCRATCH_1_OFFS(id, chid), 0);
+ gpi_write_reg(gpii, gpii->regs + GPII_n_CH_k_SCRATCH_2_OFFS(id, chid), 0);
+ gpi_write_reg(gpii, gpii->regs + GPII_n_CH_k_SCRATCH_3_OFFS(id, chid), 0);
+ gpi_write_reg(gpii, gpii->regs + GPII_n_CH_k_QOS_OFFS(id, chid), 1);
+
+ /* flush all the writes */
+ wmb();
+ return 0;
+}
+
+/* allocate and configure event ring */
+static int gpi_alloc_ev_chan(struct gpii *gpii)
+{
+ struct gpi_ring *ring = &gpii->ev_ring;
+ void __iomem *base = gpii->ev_cntxt_base_reg;
+ int ret;
+
+ ret = gpi_send_cmd(gpii, NULL, GPI_EV_CMD_ALLOCATE);
+ if (ret) {
+ dev_err(gpii->gpi_dev->dev, "error with cmd:%s ret:%d\n",
+ TO_GPI_CMD_STR(GPI_EV_CMD_ALLOCATE), ret);
+ return ret;
+ }
+
+ /* program event context */
+ gpi_write_reg(gpii, base + CNTXT_0_CONFIG,
+ GPII_n_EV_k_CNTXT_0(ring->el_size, GPI_INTTYPE_IRQ, GPI_CHTYPE_GPI_EV));
+ gpi_write_reg(gpii, base + CNTXT_1_R_LENGTH, ring->len);
+ gpi_write_reg(gpii, base + CNTXT_2_RING_BASE_LSB, lower_32_bits(ring->phys_addr));
+ gpi_write_reg(gpii, base + CNTXT_3_RING_BASE_MSB, upper_32_bits(ring->phys_addr));
+ gpi_write_reg(gpii, gpii->ev_cntxt_db_reg + CNTXT_5_RING_RP_MSB - CNTXT_4_RING_RP_LSB,
+ upper_32_bits(ring->phys_addr));
+ gpi_write_reg(gpii, base + CNTXT_8_RING_INT_MOD, 0);
+ gpi_write_reg(gpii, base + CNTXT_10_RING_MSI_LSB, 0);
+ gpi_write_reg(gpii, base + CNTXT_11_RING_MSI_MSB, 0);
+ gpi_write_reg(gpii, base + CNTXT_8_RING_INT_MOD, 0);
+ gpi_write_reg(gpii, base + CNTXT_12_RING_RP_UPDATE_LSB, 0);
+ gpi_write_reg(gpii, base + CNTXT_13_RING_RP_UPDATE_MSB, 0);
+
+ /* add events to ring */
+ ring->wp = (ring->base + ring->len - ring->el_size);
+
+ /* flush all the writes */
+ wmb();
+
+ /* gpii is active now */
+ write_lock_irq(&gpii->pm_lock);
+ gpii->pm_state = ACTIVE_STATE;
+ write_unlock_irq(&gpii->pm_lock);
+ gpi_write_ev_db(gpii, ring, ring->wp);
+
+ return 0;
+}
+
+/* calculate # of ERE/TRE available to queue */
+static int gpi_ring_num_elements_avail(const struct gpi_ring * const ring)
+{
+ int elements = 0;
+
+ if (ring->wp < ring->rp) {
+ elements = ((ring->rp - ring->wp) / ring->el_size) - 1;
+ } else {
+ elements = (ring->rp - ring->base) / ring->el_size;
+ elements += ((ring->base + ring->len - ring->wp) / ring->el_size) - 1;
+ }
+
+ return elements;
+}
+
+static int gpi_ring_add_element(struct gpi_ring *ring, void **wp)
+{
+ if (gpi_ring_num_elements_avail(ring) <= 0)
+ return -ENOMEM;
+
+ *wp = ring->wp;
+ ring->wp += ring->el_size;
+ if (ring->wp >= (ring->base + ring->len))
+ ring->wp = ring->base;
+
+ /* visible to other cores */
+ smp_wmb();
+
+ return 0;
+}
+
+static void gpi_ring_recycle_ev_element(struct gpi_ring *ring)
+{
+ /* Update the WP */
+ ring->wp += ring->el_size;
+ if (ring->wp >= (ring->base + ring->len))
+ ring->wp = ring->base;
+
+ /* Update the RP */
+ ring->rp += ring->el_size;
+ if (ring->rp >= (ring->base + ring->len))
+ ring->rp = ring->base;
+
+ /* visible to other cores */
+ smp_wmb();
+}
+
+static void gpi_free_ring(struct gpi_ring *ring,
+ struct gpii *gpii)
+{
+ dma_free_coherent(gpii->gpi_dev->dev, ring->alloc_size,
+ ring->pre_aligned, ring->dma_handle);
+ memset(ring, 0, sizeof(*ring));
+}
+
+/* allocate memory for transfer and event rings */
+static int gpi_alloc_ring(struct gpi_ring *ring, u32 elements,
+ u32 el_size, struct gpii *gpii)
+{
+ u64 len = elements * el_size;
+ int bit;
+
+ /* ring len must be power of 2 */
+ bit = find_last_bit((unsigned long *)&len, 32);
+ if (((1 << bit) - 1) & len)
+ bit++;
+ len = 1 << bit;
+ ring->alloc_size = (len + (len - 1));
+ dev_dbg(gpii->gpi_dev->dev,
+ "#el:%u el_size:%u len:%u actual_len:%llu alloc_size:%zu\n",
+ elements, el_size, (elements * el_size), len,
+ ring->alloc_size);
+
+ ring->pre_aligned = dma_alloc_coherent(gpii->gpi_dev->dev,
+ ring->alloc_size,
+ &ring->dma_handle, GFP_KERNEL);
+ if (!ring->pre_aligned) {
+ dev_err(gpii->gpi_dev->dev, "could not alloc size:%zu mem for ring\n",
+ ring->alloc_size);
+ return -ENOMEM;
+ }
+
+ /* align the physical mem */
+ ring->phys_addr = (ring->dma_handle + (len - 1)) & ~(len - 1);
+ ring->base = ring->pre_aligned + (ring->phys_addr - ring->dma_handle);
+ ring->rp = ring->base;
+ ring->wp = ring->base;
+ ring->len = len;
+ ring->el_size = el_size;
+ ring->elements = ring->len / ring->el_size;
+ memset(ring->base, 0, ring->len);
+ ring->configured = true;
+
+ /* update to other cores */
+ smp_wmb();
+
+ dev_dbg(gpii->gpi_dev->dev,
+ "phy_pre:%pad phy_alig:%pa len:%u el_size:%u elements:%u\n",
+ &ring->dma_handle, &ring->phys_addr, ring->len,
+ ring->el_size, ring->elements);
+
+ return 0;
+}
+
+/* copy tre into transfer ring */
+static void gpi_queue_xfer(struct gpii *gpii, struct gchan *gchan,
+ struct gpi_tre *gpi_tre, void **wp)
+{
+ struct gpi_tre *ch_tre;
+ int ret;
+
+ /* get next tre location we can copy */
+ ret = gpi_ring_add_element(&gchan->ch_ring, (void **)&ch_tre);
+ if (unlikely(ret)) {
+ dev_err(gpii->gpi_dev->dev, "Error adding ring element to xfer ring\n");
+ return;
+ }
+
+ /* copy the tre info */
+ memcpy(ch_tre, gpi_tre, sizeof(*ch_tre));
+ *wp = ch_tre;
+}
+
+/* reset and restart transfer channel */
+static int gpi_terminate_all(struct dma_chan *chan)
+{
+ struct gchan *gchan = to_gchan(chan);
+ struct gpii *gpii = gchan->gpii;
+ int schid, echid, i;
+ int ret = 0;
+
+ mutex_lock(&gpii->ctrl_lock);
+
+ /*
+ * treat both channels as a group if its protocol is not UART
+ * STOP, RESET, or START needs to be in lockstep
+ */
+ schid = (gchan->protocol == QCOM_GPI_UART) ? gchan->chid : 0;
+ echid = (gchan->protocol == QCOM_GPI_UART) ? schid + 1 : MAX_CHANNELS_PER_GPII;
+
+ /* stop the channel */
+ for (i = schid; i < echid; i++) {
+ gchan = &gpii->gchan[i];
+
+ /* disable ch state so no more TRE processing */
+ write_lock_irq(&gpii->pm_lock);
+ gchan->pm_state = PREPARE_TERMINATE;
+ write_unlock_irq(&gpii->pm_lock);
+
+ /* send command to Stop the channel */
+ ret = gpi_stop_chan(gchan);
+ }
+
+ /* reset the channels (clears any pending tre) */
+ for (i = schid; i < echid; i++) {
+ gchan = &gpii->gchan[i];
+
+ ret = gpi_reset_chan(gchan, GPI_CH_CMD_RESET);
+ if (ret) {
+ dev_err(gpii->gpi_dev->dev, "Error resetting channel ret:%d\n", ret);
+ goto terminate_exit;
+ }
+
+ /* reprogram channel CNTXT */
+ ret = gpi_alloc_chan(gchan, false);
+ if (ret) {
+ dev_err(gpii->gpi_dev->dev, "Error alloc_channel ret:%d\n", ret);
+ goto terminate_exit;
+ }
+ }
+
+ /* restart the channels */
+ for (i = schid; i < echid; i++) {
+ gchan = &gpii->gchan[i];
+
+ ret = gpi_start_chan(gchan);
+ if (ret) {
+ dev_err(gpii->gpi_dev->dev, "Error Starting Channel ret:%d\n", ret);
+ goto terminate_exit;
+ }
+ }
+
+terminate_exit:
+ mutex_unlock(&gpii->ctrl_lock);
+ return ret;
+}
+
+/* pause dma transfer for all channels */
+static int gpi_pause(struct dma_chan *chan)
+{
+ struct gchan *gchan = to_gchan(chan);
+ struct gpii *gpii = gchan->gpii;
+ int i, ret;
+
+ mutex_lock(&gpii->ctrl_lock);
+
+ /*
+ * pause/resume are per gpii not per channel, so
+ * client needs to call pause only once
+ */
+ if (gpii->pm_state == PAUSE_STATE) {
+ dev_dbg(gpii->gpi_dev->dev, "channel is already paused\n");
+ mutex_unlock(&gpii->ctrl_lock);
+ return 0;
+ }
+
+ /* send stop command to stop the channels */
+ for (i = 0; i < MAX_CHANNELS_PER_GPII; i++) {
+ ret = gpi_stop_chan(&gpii->gchan[i]);
+ if (ret) {
+ mutex_unlock(&gpii->ctrl_lock);
+ return ret;
+ }
+ }
+
+ disable_irq(gpii->irq);
+
+ /* Wait for threads to complete out */
+ tasklet_kill(&gpii->ev_task);
+
+ write_lock_irq(&gpii->pm_lock);
+ gpii->pm_state = PAUSE_STATE;
+ write_unlock_irq(&gpii->pm_lock);
+ mutex_unlock(&gpii->ctrl_lock);
+
+ return 0;
+}
+
+/* resume dma transfer */
+static int gpi_resume(struct dma_chan *chan)
+{
+ struct gchan *gchan = to_gchan(chan);
+ struct gpii *gpii = gchan->gpii;
+ int i, ret;
+
+ mutex_lock(&gpii->ctrl_lock);
+ if (gpii->pm_state == ACTIVE_STATE) {
+ dev_dbg(gpii->gpi_dev->dev, "channel is already active\n");
+ mutex_unlock(&gpii->ctrl_lock);
+ return 0;
+ }
+
+ enable_irq(gpii->irq);
+
+ /* send start command to start the channels */
+ for (i = 0; i < MAX_CHANNELS_PER_GPII; i++) {
+ ret = gpi_send_cmd(gpii, &gpii->gchan[i], GPI_CH_CMD_START);
+ if (ret) {
+ dev_err(gpii->gpi_dev->dev, "Error starting chan, ret:%d\n", ret);
+ mutex_unlock(&gpii->ctrl_lock);
+ return ret;
+ }
+ }
+
+ write_lock_irq(&gpii->pm_lock);
+ gpii->pm_state = ACTIVE_STATE;
+ write_unlock_irq(&gpii->pm_lock);
+ mutex_unlock(&gpii->ctrl_lock);
+
+ return 0;
+}
+
+static void gpi_desc_free(struct virt_dma_desc *vd)
+{
+ struct gpi_desc *gpi_desc = to_gpi_desc(vd);
+
+ kfree(gpi_desc);
+ gpi_desc = NULL;
+}
+
+static int
+gpi_peripheral_config(struct dma_chan *chan, struct dma_slave_config *config)
+{
+ struct gchan *gchan = to_gchan(chan);
+
+ if (!config->peripheral_config)
+ return -EINVAL;
+
+ gchan->config = krealloc(gchan->config, config->peripheral_size, GFP_NOWAIT);
+ if (!gchan->config)
+ return -ENOMEM;
+
+ memcpy(gchan->config, config->peripheral_config, config->peripheral_size);
+
+ return 0;
+}
+
+static int gpi_create_i2c_tre(struct gchan *chan, struct gpi_desc *desc,
+ struct scatterlist *sgl, enum dma_transfer_direction direction)
+{
+ struct gpi_i2c_config *i2c = chan->config;
+ struct device *dev = chan->gpii->gpi_dev->dev;
+ unsigned int tre_idx = 0;
+ dma_addr_t address;
+ struct gpi_tre *tre;
+ unsigned int i;
+
+ /* first create config tre if applicable */
+ if (i2c->set_config) {
+ tre = &desc->tre[tre_idx];
+ tre_idx++;
+
+ tre->dword[0] = u32_encode_bits(i2c->low_count, TRE_I2C_C0_TLOW);
+ tre->dword[0] |= u32_encode_bits(i2c->high_count, TRE_I2C_C0_THIGH);
+ tre->dword[0] |= u32_encode_bits(i2c->cycle_count, TRE_I2C_C0_TCYL);
+ tre->dword[0] |= u32_encode_bits(i2c->pack_enable, TRE_I2C_C0_TX_PACK);
+ tre->dword[0] |= u32_encode_bits(i2c->pack_enable, TRE_I2C_C0_RX_PACK);
+
+ tre->dword[1] = 0;
+
+ tre->dword[2] = u32_encode_bits(i2c->clk_div, TRE_C0_CLK_DIV);
+
+ tre->dword[3] = u32_encode_bits(TRE_TYPE_CONFIG0, TRE_FLAGS_TYPE);
+ tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_CHAIN);
+ }
+
+ /* create the GO tre for Tx */
+ if (i2c->op == I2C_WRITE) {
+ tre = &desc->tre[tre_idx];
+ tre_idx++;
+
+ if (i2c->multi_msg)
+ tre->dword[0] = u32_encode_bits(I2C_READ, TRE_I2C_GO_CMD);
+ else
+ tre->dword[0] = u32_encode_bits(i2c->op, TRE_I2C_GO_CMD);
+
+ tre->dword[0] |= u32_encode_bits(i2c->addr, TRE_I2C_GO_ADDR);
+ tre->dword[0] |= u32_encode_bits(i2c->stretch, TRE_I2C_GO_STRETCH);
+
+ tre->dword[1] = 0;
+ tre->dword[2] = u32_encode_bits(i2c->rx_len, TRE_RX_LEN);
+
+ tre->dword[3] = u32_encode_bits(TRE_TYPE_GO, TRE_FLAGS_TYPE);
+
+ if (i2c->multi_msg)
+ tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_LINK);
+ else
+ tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_CHAIN);
+ }
+
+ if (i2c->op == I2C_READ || i2c->multi_msg == false) {
+ /* create the DMA TRE */
+ tre = &desc->tre[tre_idx];
+ tre_idx++;
+
+ address = sg_dma_address(sgl);
+ tre->dword[0] = lower_32_bits(address);
+ tre->dword[1] = upper_32_bits(address);
+
+ tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN);
+
+ tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
+ tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
+ }
+
+ for (i = 0; i < tre_idx; i++)
+ dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0],
+ desc->tre[i].dword[1], desc->tre[i].dword[2], desc->tre[i].dword[3]);
+
+ return tre_idx;
+}
+
+static int gpi_create_spi_tre(struct gchan *chan, struct gpi_desc *desc,
+ struct scatterlist *sgl, enum dma_transfer_direction direction)
+{
+ struct gpi_spi_config *spi = chan->config;
+ struct device *dev = chan->gpii->gpi_dev->dev;
+ unsigned int tre_idx = 0;
+ dma_addr_t address;
+ struct gpi_tre *tre;
+ unsigned int i;
+
+ /* first create config tre if applicable */
+ if (direction == DMA_MEM_TO_DEV && spi->set_config) {
+ tre = &desc->tre[tre_idx];
+ tre_idx++;
+
+ tre->dword[0] = u32_encode_bits(spi->word_len, TRE_SPI_C0_WORD_SZ);
+ tre->dword[0] |= u32_encode_bits(spi->loopback_en, TRE_SPI_C0_LOOPBACK);
+ tre->dword[0] |= u32_encode_bits(spi->clock_pol_high, TRE_SPI_C0_CPOL);
+ tre->dword[0] |= u32_encode_bits(spi->data_pol_high, TRE_SPI_C0_CPHA);
+ tre->dword[0] |= u32_encode_bits(spi->pack_en, TRE_SPI_C0_TX_PACK);
+ tre->dword[0] |= u32_encode_bits(spi->pack_en, TRE_SPI_C0_RX_PACK);
+
+ tre->dword[1] = 0;
+
+ tre->dword[2] = u32_encode_bits(spi->clk_div, TRE_C0_CLK_DIV);
+ tre->dword[2] |= u32_encode_bits(spi->clk_src, TRE_C0_CLK_SRC);
+
+ tre->dword[3] = u32_encode_bits(TRE_TYPE_CONFIG0, TRE_FLAGS_TYPE);
+ tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_CHAIN);
+ }
+
+ /* create the GO tre for Tx */
+ if (direction == DMA_MEM_TO_DEV) {
+ tre = &desc->tre[tre_idx];
+ tre_idx++;
+
+ tre->dword[0] = u32_encode_bits(spi->fragmentation, TRE_SPI_GO_FRAG);
+ tre->dword[0] |= u32_encode_bits(spi->cs, TRE_SPI_GO_CS);
+ tre->dword[0] |= u32_encode_bits(spi->cmd, TRE_SPI_GO_CMD);
+
+ tre->dword[1] = 0;
+
+ tre->dword[2] = u32_encode_bits(spi->rx_len, TRE_RX_LEN);
+
+ tre->dword[3] = u32_encode_bits(TRE_TYPE_GO, TRE_FLAGS_TYPE);
+ if (spi->cmd == SPI_RX) {
+ tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOB);
+ tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_LINK);
+ } else if (spi->cmd == SPI_TX) {
+ tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_CHAIN);
+ } else { /* SPI_DUPLEX */
+ tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_CHAIN);
+ tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_LINK);
+ }
+ }
+
+ /* create the dma tre */
+ tre = &desc->tre[tre_idx];
+ tre_idx++;
+
+ address = sg_dma_address(sgl);
+ tre->dword[0] = lower_32_bits(address);
+ tre->dword[1] = upper_32_bits(address);
+
+ tre->dword[2] = u32_encode_bits(sg_dma_len(sgl), TRE_DMA_LEN);
+
+ tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
+ if (direction == DMA_MEM_TO_DEV)
+ tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
+
+ for (i = 0; i < tre_idx; i++)
+ dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0],
+ desc->tre[i].dword[1], desc->tre[i].dword[2], desc->tre[i].dword[3]);
+
+ return tre_idx;
+}
+
+/* copy tre into transfer ring */
+static struct dma_async_tx_descriptor *
+gpi_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct gchan *gchan = to_gchan(chan);
+ struct gpii *gpii = gchan->gpii;
+ struct device *dev = gpii->gpi_dev->dev;
+ struct gpi_ring *ch_ring = &gchan->ch_ring;
+ struct gpi_desc *gpi_desc;
+ u32 nr, nr_tre = 0;
+ u8 set_config;
+ int i;
+
+ gpii->ieob_set = false;
+ if (!is_slave_direction(direction)) {
+ dev_err(gpii->gpi_dev->dev, "invalid dma direction: %d\n", direction);
+ return NULL;
+ }
+
+ if (sg_len > 1) {
+ dev_err(dev, "Multi sg sent, we support only one atm: %d\n", sg_len);
+ return NULL;
+ }
+
+ nr_tre = 3;
+ set_config = *(u32 *)gchan->config;
+ if (!set_config)
+ nr_tre = 2;
+ if (direction == DMA_DEV_TO_MEM) /* rx */
+ nr_tre = 1;
+
+ /* calculate # of elements required & available */
+ nr = gpi_ring_num_elements_avail(ch_ring);
+ if (nr < nr_tre) {
+ dev_err(dev, "not enough space in ring, avail:%u required:%u\n", nr, nr_tre);
+ return NULL;
+ }
+
+ gpi_desc = kzalloc(sizeof(*gpi_desc), GFP_NOWAIT);
+ if (!gpi_desc)
+ return NULL;
+
+ /* create TREs for xfer */
+ if (gchan->protocol == QCOM_GPI_SPI) {
+ i = gpi_create_spi_tre(gchan, gpi_desc, sgl, direction);
+ } else if (gchan->protocol == QCOM_GPI_I2C) {
+ i = gpi_create_i2c_tre(gchan, gpi_desc, sgl, direction);
+ } else {
+ dev_err(dev, "invalid peripheral: %d\n", gchan->protocol);
+ kfree(gpi_desc);
+ return NULL;
+ }
+
+ /* set up the descriptor */
+ gpi_desc->gchan = gchan;
+ gpi_desc->len = sg_dma_len(sgl);
+ gpi_desc->num_tre = i;
+
+ return vchan_tx_prep(&gchan->vc, &gpi_desc->vd, flags);
+}
+
+/* rings transfer ring db to being transfer */
+static void gpi_issue_pending(struct dma_chan *chan)
+{
+ struct gchan *gchan = to_gchan(chan);
+ struct gpii *gpii = gchan->gpii;
+ unsigned long flags, pm_lock_flags;
+ struct virt_dma_desc *vd = NULL;
+ struct gpi_desc *gpi_desc;
+ struct gpi_ring *ch_ring = &gchan->ch_ring;
+ void *tre, *wp = NULL;
+ int i;
+
+ read_lock_irqsave(&gpii->pm_lock, pm_lock_flags);
+
+ /* move all submitted discriptors to issued list */
+ spin_lock_irqsave(&gchan->vc.lock, flags);
+ if (vchan_issue_pending(&gchan->vc))
+ vd = list_last_entry(&gchan->vc.desc_issued,
+ struct virt_dma_desc, node);
+ spin_unlock_irqrestore(&gchan->vc.lock, flags);
+
+ /* nothing to do list is empty */
+ if (!vd) {
+ read_unlock_irqrestore(&gpii->pm_lock, pm_lock_flags);
+ return;
+ }
+
+ gpi_desc = to_gpi_desc(vd);
+ for (i = 0; i < gpi_desc->num_tre; i++) {
+ tre = &gpi_desc->tre[i];
+ gpi_queue_xfer(gpii, gchan, tre, &wp);
+ }
+
+ gpi_desc->db = ch_ring->wp;
+ gpi_write_ch_db(gchan, &gchan->ch_ring, gpi_desc->db);
+ read_unlock_irqrestore(&gpii->pm_lock, pm_lock_flags);
+}
+
+static int gpi_ch_init(struct gchan *gchan)
+{
+ struct gpii *gpii = gchan->gpii;
+ const int ev_factor = gpii->gpi_dev->ev_factor;
+ u32 elements;
+ int i = 0, ret = 0;
+
+ gchan->pm_state = CONFIG_STATE;
+
+ /* check if both channels are configured before continue */
+ for (i = 0; i < MAX_CHANNELS_PER_GPII; i++)
+ if (gpii->gchan[i].pm_state != CONFIG_STATE)
+ goto exit_gpi_init;
+
+ /* protocol must be same for both channels */
+ if (gpii->gchan[0].protocol != gpii->gchan[1].protocol) {
+ dev_err(gpii->gpi_dev->dev, "protocol did not match protocol %u != %u\n",
+ gpii->gchan[0].protocol, gpii->gchan[1].protocol);
+ ret = -EINVAL;
+ goto exit_gpi_init;
+ }
+
+ /* allocate memory for event ring */
+ elements = CHAN_TRES << ev_factor;
+ ret = gpi_alloc_ring(&gpii->ev_ring, elements,
+ sizeof(union gpi_event), gpii);
+ if (ret)
+ goto exit_gpi_init;
+
+ /* configure interrupts */
+ write_lock_irq(&gpii->pm_lock);
+ gpii->pm_state = PREPARE_HARDWARE;
+ write_unlock_irq(&gpii->pm_lock);
+ ret = gpi_config_interrupts(gpii, DEFAULT_IRQ_SETTINGS, 0);
+ if (ret) {
+ dev_err(gpii->gpi_dev->dev, "error config. interrupts, ret:%d\n", ret);
+ goto error_config_int;
+ }
+
+ /* allocate event rings */
+ ret = gpi_alloc_ev_chan(gpii);
+ if (ret) {
+ dev_err(gpii->gpi_dev->dev, "error alloc_ev_chan:%d\n", ret);
+ goto error_alloc_ev_ring;
+ }
+
+ /* Allocate all channels */
+ for (i = 0; i < MAX_CHANNELS_PER_GPII; i++) {
+ ret = gpi_alloc_chan(&gpii->gchan[i], true);
+ if (ret) {
+ dev_err(gpii->gpi_dev->dev, "Error allocating chan:%d\n", ret);
+ goto error_alloc_chan;
+ }
+ }
+
+ /* start channels */
+ for (i = 0; i < MAX_CHANNELS_PER_GPII; i++) {
+ ret = gpi_start_chan(&gpii->gchan[i]);
+ if (ret) {
+ dev_err(gpii->gpi_dev->dev, "Error start chan:%d\n", ret);
+ goto error_start_chan;
+ }
+ }
+ return ret;
+
+error_start_chan:
+ for (i = i - 1; i >= 0; i--) {
+ gpi_stop_chan(&gpii->gchan[i]);
+ gpi_send_cmd(gpii, gchan, GPI_CH_CMD_RESET);
+ }
+ i = 2;
+error_alloc_chan:
+ for (i = i - 1; i >= 0; i--)
+ gpi_reset_chan(gchan, GPI_CH_CMD_DE_ALLOC);
+error_alloc_ev_ring:
+ gpi_disable_interrupts(gpii);
+error_config_int:
+ gpi_free_ring(&gpii->ev_ring, gpii);
+exit_gpi_init:
+ return ret;
+}
+
+/* release all channel resources */
+static void gpi_free_chan_resources(struct dma_chan *chan)
+{
+ struct gchan *gchan = to_gchan(chan);
+ struct gpii *gpii = gchan->gpii;
+ enum gpi_pm_state cur_state;
+ int ret, i;
+
+ mutex_lock(&gpii->ctrl_lock);
+
+ cur_state = gchan->pm_state;
+
+ /* disable ch state so no more TRE processing for this channel */
+ write_lock_irq(&gpii->pm_lock);
+ gchan->pm_state = PREPARE_TERMINATE;
+ write_unlock_irq(&gpii->pm_lock);
+
+ /* attempt to do graceful hardware shutdown */
+ if (cur_state == ACTIVE_STATE) {
+ gpi_stop_chan(gchan);
+
+ ret = gpi_send_cmd(gpii, gchan, GPI_CH_CMD_RESET);
+ if (ret)
+ dev_err(gpii->gpi_dev->dev, "error resetting channel:%d\n", ret);
+
+ gpi_reset_chan(gchan, GPI_CH_CMD_DE_ALLOC);
+ }
+
+ /* free all allocated memory */
+ gpi_free_ring(&gchan->ch_ring, gpii);
+ vchan_free_chan_resources(&gchan->vc);
+ kfree(gchan->config);
+
+ write_lock_irq(&gpii->pm_lock);
+ gchan->pm_state = DISABLE_STATE;
+ write_unlock_irq(&gpii->pm_lock);
+
+ /* if other rings are still active exit */
+ for (i = 0; i < MAX_CHANNELS_PER_GPII; i++)
+ if (gpii->gchan[i].ch_ring.configured)
+ goto exit_free;
+
+ /* deallocate EV Ring */
+ cur_state = gpii->pm_state;
+ write_lock_irq(&gpii->pm_lock);
+ gpii->pm_state = PREPARE_TERMINATE;
+ write_unlock_irq(&gpii->pm_lock);
+
+ /* wait for threads to complete out */
+ tasklet_kill(&gpii->ev_task);
+
+ /* send command to de allocate event ring */
+ if (cur_state == ACTIVE_STATE)
+ gpi_send_cmd(gpii, NULL, GPI_EV_CMD_DEALLOC);
+
+ gpi_free_ring(&gpii->ev_ring, gpii);
+
+ /* disable interrupts */
+ if (cur_state == ACTIVE_STATE)
+ gpi_disable_interrupts(gpii);
+
+ /* set final state to disable */
+ write_lock_irq(&gpii->pm_lock);
+ gpii->pm_state = DISABLE_STATE;
+ write_unlock_irq(&gpii->pm_lock);
+
+exit_free:
+ mutex_unlock(&gpii->ctrl_lock);
+}
+
+/* allocate channel resources */
+static int gpi_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct gchan *gchan = to_gchan(chan);
+ struct gpii *gpii = gchan->gpii;
+ int ret;
+
+ mutex_lock(&gpii->ctrl_lock);
+
+ /* allocate memory for transfer ring */
+ ret = gpi_alloc_ring(&gchan->ch_ring, CHAN_TRES,
+ sizeof(struct gpi_tre), gpii);
+ if (ret)
+ goto xfer_alloc_err;
+
+ ret = gpi_ch_init(gchan);
+
+ mutex_unlock(&gpii->ctrl_lock);
+
+ return ret;
+xfer_alloc_err:
+ mutex_unlock(&gpii->ctrl_lock);
+
+ return ret;
+}
+
+static int gpi_find_avail_gpii(struct gpi_dev *gpi_dev, u32 seid)
+{
+ struct gchan *tx_chan, *rx_chan;
+ unsigned int gpii;
+
+ /* check if same seid is already configured for another chid */
+ for (gpii = 0; gpii < gpi_dev->max_gpii; gpii++) {
+ if (!((1 << gpii) & gpi_dev->gpii_mask))
+ continue;
+
+ tx_chan = &gpi_dev->gpiis[gpii].gchan[GPI_TX_CHAN];
+ rx_chan = &gpi_dev->gpiis[gpii].gchan[GPI_RX_CHAN];
+
+ if (rx_chan->vc.chan.client_count && rx_chan->seid == seid)
+ return gpii;
+ if (tx_chan->vc.chan.client_count && tx_chan->seid == seid)
+ return gpii;
+ }
+
+ /* no channels configured with same seid, return next avail gpii */
+ for (gpii = 0; gpii < gpi_dev->max_gpii; gpii++) {
+ if (!((1 << gpii) & gpi_dev->gpii_mask))
+ continue;
+
+ tx_chan = &gpi_dev->gpiis[gpii].gchan[GPI_TX_CHAN];
+ rx_chan = &gpi_dev->gpiis[gpii].gchan[GPI_RX_CHAN];
+
+ /* check if gpii is configured */
+ if (tx_chan->vc.chan.client_count ||
+ rx_chan->vc.chan.client_count)
+ continue;
+
+ /* found a free gpii */
+ return gpii;
+ }
+
+ /* no gpii instance available to use */
+ return -EIO;
+}
+
+/* gpi_of_dma_xlate: open client requested channel */
+static struct dma_chan *gpi_of_dma_xlate(struct of_phandle_args *args,
+ struct of_dma *of_dma)
+{
+ struct gpi_dev *gpi_dev = (struct gpi_dev *)of_dma->of_dma_data;
+ u32 seid, chid;
+ int gpii;
+ struct gchan *gchan;
+
+ if (args->args_count < 3) {
+ dev_err(gpi_dev->dev, "gpii require minimum 2 args, client passed:%d args\n",
+ args->args_count);
+ return NULL;
+ }
+
+ chid = args->args[0];
+ if (chid >= MAX_CHANNELS_PER_GPII) {
+ dev_err(gpi_dev->dev, "gpii channel:%d not valid\n", chid);
+ return NULL;
+ }
+
+ seid = args->args[1];
+
+ /* find next available gpii to use */
+ gpii = gpi_find_avail_gpii(gpi_dev, seid);
+ if (gpii < 0) {
+ dev_err(gpi_dev->dev, "no available gpii instances\n");
+ return NULL;
+ }
+
+ gchan = &gpi_dev->gpiis[gpii].gchan[chid];
+ if (gchan->vc.chan.client_count) {
+ dev_err(gpi_dev->dev, "gpii:%d chid:%d seid:%d already configured\n",
+ gpii, chid, gchan->seid);
+ return NULL;
+ }
+
+ gchan->seid = seid;
+ gchan->protocol = args->args[2];
+
+ return dma_get_slave_channel(&gchan->vc.chan);
+}
+
+static int gpi_probe(struct platform_device *pdev)
+{
+ struct gpi_dev *gpi_dev;
+ unsigned int i;
+ u32 ee_offset;
+ int ret;
+
+ gpi_dev = devm_kzalloc(&pdev->dev, sizeof(*gpi_dev), GFP_KERNEL);
+ if (!gpi_dev)
+ return -ENOMEM;
+
+ gpi_dev->dev = &pdev->dev;
+ gpi_dev->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ gpi_dev->regs = devm_ioremap_resource(gpi_dev->dev, gpi_dev->res);
+ if (IS_ERR(gpi_dev->regs))
+ return PTR_ERR(gpi_dev->regs);
+ gpi_dev->ee_base = gpi_dev->regs;
+
+ ret = of_property_read_u32(gpi_dev->dev->of_node, "dma-channels",
+ &gpi_dev->max_gpii);
+ if (ret) {
+ dev_err(gpi_dev->dev, "missing 'max-no-gpii' DT node\n");
+ return ret;
+ }
+
+ ret = of_property_read_u32(gpi_dev->dev->of_node, "dma-channel-mask",
+ &gpi_dev->gpii_mask);
+ if (ret) {
+ dev_err(gpi_dev->dev, "missing 'gpii-mask' DT node\n");
+ return ret;
+ }
+
+ ee_offset = (uintptr_t)device_get_match_data(gpi_dev->dev);
+ gpi_dev->ee_base = gpi_dev->ee_base - ee_offset;
+
+ gpi_dev->ev_factor = EV_FACTOR;
+
+ ret = dma_set_mask(gpi_dev->dev, DMA_BIT_MASK(64));
+ if (ret) {
+ dev_err(gpi_dev->dev, "Error setting dma_mask to 64, ret:%d\n", ret);
+ return ret;
+ }
+
+ gpi_dev->gpiis = devm_kzalloc(gpi_dev->dev, sizeof(*gpi_dev->gpiis) *
+ gpi_dev->max_gpii, GFP_KERNEL);
+ if (!gpi_dev->gpiis)
+ return -ENOMEM;
+
+ /* setup all the supported gpii */
+ INIT_LIST_HEAD(&gpi_dev->dma_device.channels);
+ for (i = 0; i < gpi_dev->max_gpii; i++) {
+ struct gpii *gpii = &gpi_dev->gpiis[i];
+ int chan;
+
+ if (!((1 << i) & gpi_dev->gpii_mask))
+ continue;
+
+ /* set up ev cntxt register map */
+ gpii->ev_cntxt_base_reg = gpi_dev->ee_base + GPII_n_EV_CH_k_CNTXT_0_OFFS(i, 0);
+ gpii->ev_cntxt_db_reg = gpi_dev->ee_base + GPII_n_EV_CH_k_DOORBELL_0_OFFS(i, 0);
+ gpii->ev_ring_rp_lsb_reg = gpii->ev_cntxt_base_reg + CNTXT_4_RING_RP_LSB;
+ gpii->ev_cmd_reg = gpi_dev->ee_base + GPII_n_EV_CH_CMD_OFFS(i);
+ gpii->ieob_clr_reg = gpi_dev->ee_base + GPII_n_CNTXT_SRC_IEOB_IRQ_CLR_OFFS(i);
+
+ /* set up irq */
+ ret = platform_get_irq(pdev, i);
+ if (ret < 0)
+ return ret;
+ gpii->irq = ret;
+
+ /* set up channel specific register info */
+ for (chan = 0; chan < MAX_CHANNELS_PER_GPII; chan++) {
+ struct gchan *gchan = &gpii->gchan[chan];
+
+ /* set up ch cntxt register map */
+ gchan->ch_cntxt_base_reg = gpi_dev->ee_base +
+ GPII_n_CH_k_CNTXT_0_OFFS(i, chan);
+ gchan->ch_cntxt_db_reg = gpi_dev->ee_base +
+ GPII_n_CH_k_DOORBELL_0_OFFS(i, chan);
+ gchan->ch_cmd_reg = gpi_dev->ee_base + GPII_n_CH_CMD_OFFS(i);
+
+ /* vchan setup */
+ vchan_init(&gchan->vc, &gpi_dev->dma_device);
+ gchan->vc.desc_free = gpi_desc_free;
+ gchan->chid = chan;
+ gchan->gpii = gpii;
+ gchan->dir = GPII_CHAN_DIR[chan];
+ }
+ mutex_init(&gpii->ctrl_lock);
+ rwlock_init(&gpii->pm_lock);
+ tasklet_init(&gpii->ev_task, gpi_ev_tasklet,
+ (unsigned long)gpii);
+ init_completion(&gpii->cmd_completion);
+ gpii->gpii_id = i;
+ gpii->regs = gpi_dev->ee_base;
+ gpii->gpi_dev = gpi_dev;
+ }
+
+ platform_set_drvdata(pdev, gpi_dev);
+
+ /* clear and Set capabilities */
+ dma_cap_zero(gpi_dev->dma_device.cap_mask);
+ dma_cap_set(DMA_SLAVE, gpi_dev->dma_device.cap_mask);
+
+ /* configure dmaengine apis */
+ gpi_dev->dma_device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ gpi_dev->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+ gpi_dev->dma_device.src_addr_widths = DMA_SLAVE_BUSWIDTH_8_BYTES;
+ gpi_dev->dma_device.dst_addr_widths = DMA_SLAVE_BUSWIDTH_8_BYTES;
+ gpi_dev->dma_device.device_alloc_chan_resources = gpi_alloc_chan_resources;
+ gpi_dev->dma_device.device_free_chan_resources = gpi_free_chan_resources;
+ gpi_dev->dma_device.device_tx_status = dma_cookie_status;
+ gpi_dev->dma_device.device_issue_pending = gpi_issue_pending;
+ gpi_dev->dma_device.device_prep_slave_sg = gpi_prep_slave_sg;
+ gpi_dev->dma_device.device_config = gpi_peripheral_config;
+ gpi_dev->dma_device.device_terminate_all = gpi_terminate_all;
+ gpi_dev->dma_device.dev = gpi_dev->dev;
+ gpi_dev->dma_device.device_pause = gpi_pause;
+ gpi_dev->dma_device.device_resume = gpi_resume;
+
+ /* register with dmaengine framework */
+ ret = dma_async_device_register(&gpi_dev->dma_device);
+ if (ret) {
+ dev_err(gpi_dev->dev, "async_device_register failed ret:%d", ret);
+ return ret;
+ }
+
+ ret = of_dma_controller_register(gpi_dev->dev->of_node,
+ gpi_of_dma_xlate, gpi_dev);
+ if (ret) {
+ dev_err(gpi_dev->dev, "of_dma_controller_reg failed ret:%d", ret);
+ return ret;
+ }
+
+ return ret;
+}
+
+static const struct of_device_id gpi_of_match[] = {
+ { .compatible = "qcom,sc7280-gpi-dma", .data = (void *)0x10000 },
+ { .compatible = "qcom,sdm845-gpi-dma", .data = (void *)0x0 },
+ { .compatible = "qcom,sm6350-gpi-dma", .data = (void *)0x10000 },
+ { .compatible = "qcom,sm8150-gpi-dma", .data = (void *)0x0 },
+ { .compatible = "qcom,sm8250-gpi-dma", .data = (void *)0x0 },
+ { .compatible = "qcom,sm8350-gpi-dma", .data = (void *)0x10000 },
+ { .compatible = "qcom,sm8450-gpi-dma", .data = (void *)0x10000 },
+ { },
+};
+MODULE_DEVICE_TABLE(of, gpi_of_match);
+
+static struct platform_driver gpi_driver = {
+ .probe = gpi_probe,
+ .driver = {
+ .name = KBUILD_MODNAME,
+ .of_match_table = gpi_of_match,
+ },
+};
+
+static int __init gpi_init(void)
+{
+ return platform_driver_register(&gpi_driver);
+}
+subsys_initcall(gpi_init)
+
+MODULE_DESCRIPTION("QCOM GPI DMA engine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/qcom/hidma.c b/drivers/dma/qcom/hidma.c
new file mode 100644
index 000000000..210f1a9eb
--- /dev/null
+++ b/drivers/dma/qcom/hidma.c
@@ -0,0 +1,976 @@
+/*
+ * Qualcomm Technologies HIDMA DMA engine interface
+ *
+ * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Copyright (C) Freescale Semicondutor, Inc. 2007, 2008.
+ * Copyright (C) Semihalf 2009
+ * Copyright (C) Ilya Yanok, Emcraft Systems 2010
+ * Copyright (C) Alexander Popov, Promcontroller 2014
+ *
+ * Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description
+ * (defines, structures and comments) was taken from MPC5121 DMA driver
+ * written by Hongjun Chen <hong-jun.chen@freescale.com>.
+ *
+ * Approved as OSADL project by a majority of OSADL members and funded
+ * by OSADL membership fees in 2009; for details see www.osadl.org.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/* Linux Foundation elects GPLv2 license only. */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of_dma.h>
+#include <linux/of_device.h>
+#include <linux/property.h>
+#include <linux/delay.h>
+#include <linux/acpi.h>
+#include <linux/irq.h>
+#include <linux/atomic.h>
+#include <linux/pm_runtime.h>
+#include <linux/msi.h>
+
+#include "../dmaengine.h"
+#include "hidma.h"
+
+/*
+ * Default idle time is 2 seconds. This parameter can
+ * be overridden by changing the following
+ * /sys/bus/platform/devices/QCOM8061:<xy>/power/autosuspend_delay_ms
+ * during kernel boot.
+ */
+#define HIDMA_AUTOSUSPEND_TIMEOUT 2000
+#define HIDMA_ERR_INFO_SW 0xFF
+#define HIDMA_ERR_CODE_UNEXPECTED_TERMINATE 0x0
+#define HIDMA_NR_DEFAULT_DESC 10
+#define HIDMA_MSI_INTS 11
+
+static inline struct hidma_dev *to_hidma_dev(struct dma_device *dmadev)
+{
+ return container_of(dmadev, struct hidma_dev, ddev);
+}
+
+static inline
+struct hidma_dev *to_hidma_dev_from_lldev(struct hidma_lldev **_lldevp)
+{
+ return container_of(_lldevp, struct hidma_dev, lldev);
+}
+
+static inline struct hidma_chan *to_hidma_chan(struct dma_chan *dmach)
+{
+ return container_of(dmach, struct hidma_chan, chan);
+}
+
+static void hidma_free(struct hidma_dev *dmadev)
+{
+ INIT_LIST_HEAD(&dmadev->ddev.channels);
+}
+
+static unsigned int nr_desc_prm;
+module_param(nr_desc_prm, uint, 0644);
+MODULE_PARM_DESC(nr_desc_prm, "number of descriptors (default: 0)");
+
+enum hidma_cap {
+ HIDMA_MSI_CAP = 1,
+ HIDMA_IDENTITY_CAP,
+};
+
+/* process completed descriptors */
+static void hidma_process_completed(struct hidma_chan *mchan)
+{
+ struct dma_device *ddev = mchan->chan.device;
+ struct hidma_dev *mdma = to_hidma_dev(ddev);
+ struct dma_async_tx_descriptor *desc;
+ dma_cookie_t last_cookie;
+ struct hidma_desc *mdesc;
+ struct hidma_desc *next;
+ unsigned long irqflags;
+ struct list_head list;
+
+ INIT_LIST_HEAD(&list);
+
+ /* Get all completed descriptors */
+ spin_lock_irqsave(&mchan->lock, irqflags);
+ list_splice_tail_init(&mchan->completed, &list);
+ spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+ /* Execute callbacks and run dependencies */
+ list_for_each_entry_safe(mdesc, next, &list, node) {
+ enum dma_status llstat;
+ struct dmaengine_desc_callback cb;
+ struct dmaengine_result result;
+
+ desc = &mdesc->desc;
+ last_cookie = desc->cookie;
+
+ llstat = hidma_ll_status(mdma->lldev, mdesc->tre_ch);
+
+ spin_lock_irqsave(&mchan->lock, irqflags);
+ if (llstat == DMA_COMPLETE) {
+ mchan->last_success = last_cookie;
+ result.result = DMA_TRANS_NOERROR;
+ } else {
+ result.result = DMA_TRANS_ABORTED;
+ }
+
+ dma_cookie_complete(desc);
+ spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+ dmaengine_desc_get_callback(desc, &cb);
+
+ dma_run_dependencies(desc);
+
+ spin_lock_irqsave(&mchan->lock, irqflags);
+ list_move(&mdesc->node, &mchan->free);
+ spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+ dmaengine_desc_callback_invoke(&cb, &result);
+ }
+}
+
+/*
+ * Called once for each submitted descriptor.
+ * PM is locked once for each descriptor that is currently
+ * in execution.
+ */
+static void hidma_callback(void *data)
+{
+ struct hidma_desc *mdesc = data;
+ struct hidma_chan *mchan = to_hidma_chan(mdesc->desc.chan);
+ struct dma_device *ddev = mchan->chan.device;
+ struct hidma_dev *dmadev = to_hidma_dev(ddev);
+ unsigned long irqflags;
+ bool queued = false;
+
+ spin_lock_irqsave(&mchan->lock, irqflags);
+ if (mdesc->node.next) {
+ /* Delete from the active list, add to completed list */
+ list_move_tail(&mdesc->node, &mchan->completed);
+ queued = true;
+
+ /* calculate the next running descriptor */
+ mchan->running = list_first_entry(&mchan->active,
+ struct hidma_desc, node);
+ }
+ spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+ hidma_process_completed(mchan);
+
+ if (queued) {
+ pm_runtime_mark_last_busy(dmadev->ddev.dev);
+ pm_runtime_put_autosuspend(dmadev->ddev.dev);
+ }
+}
+
+static int hidma_chan_init(struct hidma_dev *dmadev, u32 dma_sig)
+{
+ struct hidma_chan *mchan;
+ struct dma_device *ddev;
+
+ mchan = devm_kzalloc(dmadev->ddev.dev, sizeof(*mchan), GFP_KERNEL);
+ if (!mchan)
+ return -ENOMEM;
+
+ ddev = &dmadev->ddev;
+ mchan->dma_sig = dma_sig;
+ mchan->dmadev = dmadev;
+ mchan->chan.device = ddev;
+ dma_cookie_init(&mchan->chan);
+
+ INIT_LIST_HEAD(&mchan->free);
+ INIT_LIST_HEAD(&mchan->prepared);
+ INIT_LIST_HEAD(&mchan->active);
+ INIT_LIST_HEAD(&mchan->completed);
+ INIT_LIST_HEAD(&mchan->queued);
+
+ spin_lock_init(&mchan->lock);
+ list_add_tail(&mchan->chan.device_node, &ddev->channels);
+ dmadev->ddev.chancnt++;
+ return 0;
+}
+
+static void hidma_issue_task(struct tasklet_struct *t)
+{
+ struct hidma_dev *dmadev = from_tasklet(dmadev, t, task);
+
+ pm_runtime_get_sync(dmadev->ddev.dev);
+ hidma_ll_start(dmadev->lldev);
+}
+
+static void hidma_issue_pending(struct dma_chan *dmach)
+{
+ struct hidma_chan *mchan = to_hidma_chan(dmach);
+ struct hidma_dev *dmadev = mchan->dmadev;
+ unsigned long flags;
+ struct hidma_desc *qdesc, *next;
+ int status;
+
+ spin_lock_irqsave(&mchan->lock, flags);
+ list_for_each_entry_safe(qdesc, next, &mchan->queued, node) {
+ hidma_ll_queue_request(dmadev->lldev, qdesc->tre_ch);
+ list_move_tail(&qdesc->node, &mchan->active);
+ }
+
+ if (!mchan->running) {
+ struct hidma_desc *desc = list_first_entry(&mchan->active,
+ struct hidma_desc,
+ node);
+ mchan->running = desc;
+ }
+ spin_unlock_irqrestore(&mchan->lock, flags);
+
+ /* PM will be released in hidma_callback function. */
+ status = pm_runtime_get(dmadev->ddev.dev);
+ if (status < 0)
+ tasklet_schedule(&dmadev->task);
+ else
+ hidma_ll_start(dmadev->lldev);
+}
+
+static inline bool hidma_txn_is_success(dma_cookie_t cookie,
+ dma_cookie_t last_success, dma_cookie_t last_used)
+{
+ if (last_success <= last_used) {
+ if ((cookie <= last_success) || (cookie > last_used))
+ return true;
+ } else {
+ if ((cookie <= last_success) && (cookie > last_used))
+ return true;
+ }
+ return false;
+}
+
+static enum dma_status hidma_tx_status(struct dma_chan *dmach,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct hidma_chan *mchan = to_hidma_chan(dmach);
+ enum dma_status ret;
+
+ ret = dma_cookie_status(dmach, cookie, txstate);
+ if (ret == DMA_COMPLETE) {
+ bool is_success;
+
+ is_success = hidma_txn_is_success(cookie, mchan->last_success,
+ dmach->cookie);
+ return is_success ? ret : DMA_ERROR;
+ }
+
+ if (mchan->paused && (ret == DMA_IN_PROGRESS)) {
+ unsigned long flags;
+ dma_cookie_t runcookie;
+
+ spin_lock_irqsave(&mchan->lock, flags);
+ if (mchan->running)
+ runcookie = mchan->running->desc.cookie;
+ else
+ runcookie = -EINVAL;
+
+ if (runcookie == cookie)
+ ret = DMA_PAUSED;
+
+ spin_unlock_irqrestore(&mchan->lock, flags);
+ }
+
+ return ret;
+}
+
+/*
+ * Submit descriptor to hardware.
+ * Lock the PM for each descriptor we are sending.
+ */
+static dma_cookie_t hidma_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+ struct hidma_chan *mchan = to_hidma_chan(txd->chan);
+ struct hidma_dev *dmadev = mchan->dmadev;
+ struct hidma_desc *mdesc;
+ unsigned long irqflags;
+ dma_cookie_t cookie;
+
+ pm_runtime_get_sync(dmadev->ddev.dev);
+ if (!hidma_ll_isenabled(dmadev->lldev)) {
+ pm_runtime_mark_last_busy(dmadev->ddev.dev);
+ pm_runtime_put_autosuspend(dmadev->ddev.dev);
+ return -ENODEV;
+ }
+ pm_runtime_mark_last_busy(dmadev->ddev.dev);
+ pm_runtime_put_autosuspend(dmadev->ddev.dev);
+
+ mdesc = container_of(txd, struct hidma_desc, desc);
+ spin_lock_irqsave(&mchan->lock, irqflags);
+
+ /* Move descriptor to queued */
+ list_move_tail(&mdesc->node, &mchan->queued);
+
+ /* Update cookie */
+ cookie = dma_cookie_assign(txd);
+
+ spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+ return cookie;
+}
+
+static int hidma_alloc_chan_resources(struct dma_chan *dmach)
+{
+ struct hidma_chan *mchan = to_hidma_chan(dmach);
+ struct hidma_dev *dmadev = mchan->dmadev;
+ struct hidma_desc *mdesc, *tmp;
+ unsigned long irqflags;
+ LIST_HEAD(descs);
+ unsigned int i;
+ int rc = 0;
+
+ if (mchan->allocated)
+ return 0;
+
+ /* Alloc descriptors for this channel */
+ for (i = 0; i < dmadev->nr_descriptors; i++) {
+ mdesc = kzalloc(sizeof(struct hidma_desc), GFP_NOWAIT);
+ if (!mdesc) {
+ rc = -ENOMEM;
+ break;
+ }
+ dma_async_tx_descriptor_init(&mdesc->desc, dmach);
+ mdesc->desc.tx_submit = hidma_tx_submit;
+
+ rc = hidma_ll_request(dmadev->lldev, mchan->dma_sig,
+ "DMA engine", hidma_callback, mdesc,
+ &mdesc->tre_ch);
+ if (rc) {
+ dev_err(dmach->device->dev,
+ "channel alloc failed at %u\n", i);
+ kfree(mdesc);
+ break;
+ }
+ list_add_tail(&mdesc->node, &descs);
+ }
+
+ if (rc) {
+ /* return the allocated descriptors */
+ list_for_each_entry_safe(mdesc, tmp, &descs, node) {
+ hidma_ll_free(dmadev->lldev, mdesc->tre_ch);
+ kfree(mdesc);
+ }
+ return rc;
+ }
+
+ spin_lock_irqsave(&mchan->lock, irqflags);
+ list_splice_tail_init(&descs, &mchan->free);
+ mchan->allocated = true;
+ spin_unlock_irqrestore(&mchan->lock, irqflags);
+ return 1;
+}
+
+static struct dma_async_tx_descriptor *
+hidma_prep_dma_memcpy(struct dma_chan *dmach, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct hidma_chan *mchan = to_hidma_chan(dmach);
+ struct hidma_desc *mdesc = NULL;
+ struct hidma_dev *mdma = mchan->dmadev;
+ unsigned long irqflags;
+
+ /* Get free descriptor */
+ spin_lock_irqsave(&mchan->lock, irqflags);
+ if (!list_empty(&mchan->free)) {
+ mdesc = list_first_entry(&mchan->free, struct hidma_desc, node);
+ list_del(&mdesc->node);
+ }
+ spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+ if (!mdesc)
+ return NULL;
+
+ mdesc->desc.flags = flags;
+ hidma_ll_set_transfer_params(mdma->lldev, mdesc->tre_ch,
+ src, dest, len, flags,
+ HIDMA_TRE_MEMCPY);
+
+ /* Place descriptor in prepared list */
+ spin_lock_irqsave(&mchan->lock, irqflags);
+ list_add_tail(&mdesc->node, &mchan->prepared);
+ spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+ return &mdesc->desc;
+}
+
+static struct dma_async_tx_descriptor *
+hidma_prep_dma_memset(struct dma_chan *dmach, dma_addr_t dest, int value,
+ size_t len, unsigned long flags)
+{
+ struct hidma_chan *mchan = to_hidma_chan(dmach);
+ struct hidma_desc *mdesc = NULL;
+ struct hidma_dev *mdma = mchan->dmadev;
+ unsigned long irqflags;
+ u64 byte_pattern, fill_pattern;
+
+ /* Get free descriptor */
+ spin_lock_irqsave(&mchan->lock, irqflags);
+ if (!list_empty(&mchan->free)) {
+ mdesc = list_first_entry(&mchan->free, struct hidma_desc, node);
+ list_del(&mdesc->node);
+ }
+ spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+ if (!mdesc)
+ return NULL;
+
+ byte_pattern = (char)value;
+ fill_pattern = (byte_pattern << 56) |
+ (byte_pattern << 48) |
+ (byte_pattern << 40) |
+ (byte_pattern << 32) |
+ (byte_pattern << 24) |
+ (byte_pattern << 16) |
+ (byte_pattern << 8) |
+ byte_pattern;
+
+ mdesc->desc.flags = flags;
+ hidma_ll_set_transfer_params(mdma->lldev, mdesc->tre_ch,
+ fill_pattern, dest, len, flags,
+ HIDMA_TRE_MEMSET);
+
+ /* Place descriptor in prepared list */
+ spin_lock_irqsave(&mchan->lock, irqflags);
+ list_add_tail(&mdesc->node, &mchan->prepared);
+ spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+ return &mdesc->desc;
+}
+
+static int hidma_terminate_channel(struct dma_chan *chan)
+{
+ struct hidma_chan *mchan = to_hidma_chan(chan);
+ struct hidma_dev *dmadev = to_hidma_dev(mchan->chan.device);
+ struct hidma_desc *tmp, *mdesc;
+ unsigned long irqflags;
+ LIST_HEAD(list);
+ int rc;
+
+ pm_runtime_get_sync(dmadev->ddev.dev);
+ /* give completed requests a chance to finish */
+ hidma_process_completed(mchan);
+
+ spin_lock_irqsave(&mchan->lock, irqflags);
+ mchan->last_success = 0;
+ list_splice_init(&mchan->active, &list);
+ list_splice_init(&mchan->prepared, &list);
+ list_splice_init(&mchan->completed, &list);
+ list_splice_init(&mchan->queued, &list);
+ spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+ /* this suspends the existing transfer */
+ rc = hidma_ll_disable(dmadev->lldev);
+ if (rc) {
+ dev_err(dmadev->ddev.dev, "channel did not pause\n");
+ goto out;
+ }
+
+ /* return all user requests */
+ list_for_each_entry_safe(mdesc, tmp, &list, node) {
+ struct dma_async_tx_descriptor *txd = &mdesc->desc;
+
+ dma_descriptor_unmap(txd);
+ dmaengine_desc_get_callback_invoke(txd, NULL);
+ dma_run_dependencies(txd);
+
+ /* move myself to free_list */
+ list_move(&mdesc->node, &mchan->free);
+ }
+
+ rc = hidma_ll_enable(dmadev->lldev);
+out:
+ pm_runtime_mark_last_busy(dmadev->ddev.dev);
+ pm_runtime_put_autosuspend(dmadev->ddev.dev);
+ return rc;
+}
+
+static int hidma_terminate_all(struct dma_chan *chan)
+{
+ struct hidma_chan *mchan = to_hidma_chan(chan);
+ struct hidma_dev *dmadev = to_hidma_dev(mchan->chan.device);
+ int rc;
+
+ rc = hidma_terminate_channel(chan);
+ if (rc)
+ return rc;
+
+ /* reinitialize the hardware */
+ pm_runtime_get_sync(dmadev->ddev.dev);
+ rc = hidma_ll_setup(dmadev->lldev);
+ pm_runtime_mark_last_busy(dmadev->ddev.dev);
+ pm_runtime_put_autosuspend(dmadev->ddev.dev);
+ return rc;
+}
+
+static void hidma_free_chan_resources(struct dma_chan *dmach)
+{
+ struct hidma_chan *mchan = to_hidma_chan(dmach);
+ struct hidma_dev *mdma = mchan->dmadev;
+ struct hidma_desc *mdesc, *tmp;
+ unsigned long irqflags;
+ LIST_HEAD(descs);
+
+ /* terminate running transactions and free descriptors */
+ hidma_terminate_channel(dmach);
+
+ spin_lock_irqsave(&mchan->lock, irqflags);
+
+ /* Move data */
+ list_splice_tail_init(&mchan->free, &descs);
+
+ /* Free descriptors */
+ list_for_each_entry_safe(mdesc, tmp, &descs, node) {
+ hidma_ll_free(mdma->lldev, mdesc->tre_ch);
+ list_del(&mdesc->node);
+ kfree(mdesc);
+ }
+
+ mchan->allocated = false;
+ spin_unlock_irqrestore(&mchan->lock, irqflags);
+}
+
+static int hidma_pause(struct dma_chan *chan)
+{
+ struct hidma_chan *mchan;
+ struct hidma_dev *dmadev;
+
+ mchan = to_hidma_chan(chan);
+ dmadev = to_hidma_dev(mchan->chan.device);
+ if (!mchan->paused) {
+ pm_runtime_get_sync(dmadev->ddev.dev);
+ if (hidma_ll_disable(dmadev->lldev))
+ dev_warn(dmadev->ddev.dev, "channel did not stop\n");
+ mchan->paused = true;
+ pm_runtime_mark_last_busy(dmadev->ddev.dev);
+ pm_runtime_put_autosuspend(dmadev->ddev.dev);
+ }
+ return 0;
+}
+
+static int hidma_resume(struct dma_chan *chan)
+{
+ struct hidma_chan *mchan;
+ struct hidma_dev *dmadev;
+ int rc = 0;
+
+ mchan = to_hidma_chan(chan);
+ dmadev = to_hidma_dev(mchan->chan.device);
+ if (mchan->paused) {
+ pm_runtime_get_sync(dmadev->ddev.dev);
+ rc = hidma_ll_enable(dmadev->lldev);
+ if (!rc)
+ mchan->paused = false;
+ else
+ dev_err(dmadev->ddev.dev,
+ "failed to resume the channel");
+ pm_runtime_mark_last_busy(dmadev->ddev.dev);
+ pm_runtime_put_autosuspend(dmadev->ddev.dev);
+ }
+ return rc;
+}
+
+static irqreturn_t hidma_chirq_handler(int chirq, void *arg)
+{
+ struct hidma_lldev *lldev = arg;
+
+ /*
+ * All interrupts are request driven.
+ * HW doesn't send an interrupt by itself.
+ */
+ return hidma_ll_inthandler(chirq, lldev);
+}
+
+#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+static irqreturn_t hidma_chirq_handler_msi(int chirq, void *arg)
+{
+ struct hidma_lldev **lldevp = arg;
+ struct hidma_dev *dmadev = to_hidma_dev_from_lldev(lldevp);
+
+ return hidma_ll_inthandler_msi(chirq, *lldevp,
+ 1 << (chirq - dmadev->msi_virqbase));
+}
+#endif
+
+static ssize_t hidma_show_values(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hidma_dev *mdev = dev_get_drvdata(dev);
+
+ buf[0] = 0;
+
+ if (strcmp(attr->attr.name, "chid") == 0)
+ sprintf(buf, "%d\n", mdev->chidx);
+
+ return strlen(buf);
+}
+
+static inline void hidma_sysfs_uninit(struct hidma_dev *dev)
+{
+ device_remove_file(dev->ddev.dev, dev->chid_attrs);
+}
+
+static struct device_attribute*
+hidma_create_sysfs_entry(struct hidma_dev *dev, char *name, int mode)
+{
+ struct device_attribute *attrs;
+ char *name_copy;
+
+ attrs = devm_kmalloc(dev->ddev.dev, sizeof(struct device_attribute),
+ GFP_KERNEL);
+ if (!attrs)
+ return NULL;
+
+ name_copy = devm_kstrdup(dev->ddev.dev, name, GFP_KERNEL);
+ if (!name_copy)
+ return NULL;
+
+ attrs->attr.name = name_copy;
+ attrs->attr.mode = mode;
+ attrs->show = hidma_show_values;
+ sysfs_attr_init(&attrs->attr);
+
+ return attrs;
+}
+
+static int hidma_sysfs_init(struct hidma_dev *dev)
+{
+ dev->chid_attrs = hidma_create_sysfs_entry(dev, "chid", S_IRUGO);
+ if (!dev->chid_attrs)
+ return -ENOMEM;
+
+ return device_create_file(dev->ddev.dev, dev->chid_attrs);
+}
+
+#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+static void hidma_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+ struct device *dev = msi_desc_to_dev(desc);
+ struct hidma_dev *dmadev = dev_get_drvdata(dev);
+
+ if (!desc->msi_index) {
+ writel(msg->address_lo, dmadev->dev_evca + 0x118);
+ writel(msg->address_hi, dmadev->dev_evca + 0x11C);
+ writel(msg->data, dmadev->dev_evca + 0x120);
+ }
+}
+#endif
+
+static void hidma_free_msis(struct hidma_dev *dmadev)
+{
+#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+ struct device *dev = dmadev->ddev.dev;
+ int i, virq;
+
+ for (i = 0; i < HIDMA_MSI_INTS; i++) {
+ virq = msi_get_virq(dev, i);
+ if (virq)
+ devm_free_irq(dev, virq, &dmadev->lldev);
+ }
+
+ platform_msi_domain_free_irqs(dev);
+#endif
+}
+
+static int hidma_request_msi(struct hidma_dev *dmadev,
+ struct platform_device *pdev)
+{
+#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+ int rc, i, virq;
+
+ rc = platform_msi_domain_alloc_irqs(&pdev->dev, HIDMA_MSI_INTS,
+ hidma_write_msi_msg);
+ if (rc)
+ return rc;
+
+ for (i = 0; i < HIDMA_MSI_INTS; i++) {
+ virq = msi_get_virq(&pdev->dev, i);
+ rc = devm_request_irq(&pdev->dev, virq,
+ hidma_chirq_handler_msi,
+ 0, "qcom-hidma-msi",
+ &dmadev->lldev);
+ if (rc)
+ break;
+ if (!i)
+ dmadev->msi_virqbase = virq;
+ }
+
+ if (rc) {
+ /* free allocated MSI interrupts above */
+ for (--i; i >= 0; i--) {
+ virq = msi_get_virq(&pdev->dev, i);
+ devm_free_irq(&pdev->dev, virq, &dmadev->lldev);
+ }
+ dev_warn(&pdev->dev,
+ "failed to request MSI irq, falling back to wired IRQ\n");
+ } else {
+ /* Add callback to free MSIs on teardown */
+ hidma_ll_setup_irq(dmadev->lldev, true);
+ }
+ return rc;
+#else
+ return -EINVAL;
+#endif
+}
+
+static bool hidma_test_capability(struct device *dev, enum hidma_cap test_cap)
+{
+ enum hidma_cap cap;
+
+ cap = (enum hidma_cap) device_get_match_data(dev);
+ return cap ? ((cap & test_cap) > 0) : 0;
+}
+
+static int hidma_probe(struct platform_device *pdev)
+{
+ struct hidma_dev *dmadev;
+ struct resource *trca_resource;
+ struct resource *evca_resource;
+ int chirq;
+ void __iomem *evca;
+ void __iomem *trca;
+ int rc;
+ bool msi;
+
+ pm_runtime_set_autosuspend_delay(&pdev->dev, HIDMA_AUTOSUSPEND_TIMEOUT);
+ pm_runtime_use_autosuspend(&pdev->dev);
+ pm_runtime_set_active(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
+
+ trca_resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ trca = devm_ioremap_resource(&pdev->dev, trca_resource);
+ if (IS_ERR(trca)) {
+ rc = -ENOMEM;
+ goto bailout;
+ }
+
+ evca_resource = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ evca = devm_ioremap_resource(&pdev->dev, evca_resource);
+ if (IS_ERR(evca)) {
+ rc = -ENOMEM;
+ goto bailout;
+ }
+
+ /*
+ * This driver only handles the channel IRQs.
+ * Common IRQ is handled by the management driver.
+ */
+ chirq = platform_get_irq(pdev, 0);
+ if (chirq < 0) {
+ rc = -ENODEV;
+ goto bailout;
+ }
+
+ dmadev = devm_kzalloc(&pdev->dev, sizeof(*dmadev), GFP_KERNEL);
+ if (!dmadev) {
+ rc = -ENOMEM;
+ goto bailout;
+ }
+
+ INIT_LIST_HEAD(&dmadev->ddev.channels);
+ spin_lock_init(&dmadev->lock);
+ dmadev->ddev.dev = &pdev->dev;
+ pm_runtime_get_sync(dmadev->ddev.dev);
+
+ dma_cap_set(DMA_MEMCPY, dmadev->ddev.cap_mask);
+ dma_cap_set(DMA_MEMSET, dmadev->ddev.cap_mask);
+ if (WARN_ON(!pdev->dev.dma_mask)) {
+ rc = -ENXIO;
+ goto dmafree;
+ }
+
+ dmadev->dev_evca = evca;
+ dmadev->evca_resource = evca_resource;
+ dmadev->dev_trca = trca;
+ dmadev->trca_resource = trca_resource;
+ dmadev->ddev.device_prep_dma_memcpy = hidma_prep_dma_memcpy;
+ dmadev->ddev.device_prep_dma_memset = hidma_prep_dma_memset;
+ dmadev->ddev.device_alloc_chan_resources = hidma_alloc_chan_resources;
+ dmadev->ddev.device_free_chan_resources = hidma_free_chan_resources;
+ dmadev->ddev.device_tx_status = hidma_tx_status;
+ dmadev->ddev.device_issue_pending = hidma_issue_pending;
+ dmadev->ddev.device_pause = hidma_pause;
+ dmadev->ddev.device_resume = hidma_resume;
+ dmadev->ddev.device_terminate_all = hidma_terminate_all;
+ dmadev->ddev.copy_align = 8;
+
+ /*
+ * Determine the MSI capability of the platform. Old HW doesn't
+ * support MSI.
+ */
+ msi = hidma_test_capability(&pdev->dev, HIDMA_MSI_CAP);
+ device_property_read_u32(&pdev->dev, "desc-count",
+ &dmadev->nr_descriptors);
+
+ if (nr_desc_prm) {
+ dev_info(&pdev->dev, "overriding number of descriptors as %d\n",
+ nr_desc_prm);
+ dmadev->nr_descriptors = nr_desc_prm;
+ }
+
+ if (!dmadev->nr_descriptors)
+ dmadev->nr_descriptors = HIDMA_NR_DEFAULT_DESC;
+
+ if (hidma_test_capability(&pdev->dev, HIDMA_IDENTITY_CAP))
+ dmadev->chidx = readl(dmadev->dev_trca + 0x40);
+ else
+ dmadev->chidx = readl(dmadev->dev_trca + 0x28);
+
+ /* Set DMA mask to 64 bits. */
+ rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (rc) {
+ dev_warn(&pdev->dev, "unable to set coherent mask to 64");
+ goto dmafree;
+ }
+
+ dmadev->lldev = hidma_ll_init(dmadev->ddev.dev,
+ dmadev->nr_descriptors, dmadev->dev_trca,
+ dmadev->dev_evca, dmadev->chidx);
+ if (!dmadev->lldev) {
+ rc = -EPROBE_DEFER;
+ goto dmafree;
+ }
+
+ platform_set_drvdata(pdev, dmadev);
+ if (msi)
+ rc = hidma_request_msi(dmadev, pdev);
+
+ if (!msi || rc) {
+ hidma_ll_setup_irq(dmadev->lldev, false);
+ rc = devm_request_irq(&pdev->dev, chirq, hidma_chirq_handler,
+ 0, "qcom-hidma", dmadev->lldev);
+ if (rc)
+ goto uninit;
+ }
+
+ INIT_LIST_HEAD(&dmadev->ddev.channels);
+ rc = hidma_chan_init(dmadev, 0);
+ if (rc)
+ goto uninit;
+
+ rc = dma_async_device_register(&dmadev->ddev);
+ if (rc)
+ goto uninit;
+
+ dmadev->irq = chirq;
+ tasklet_setup(&dmadev->task, hidma_issue_task);
+ hidma_debug_init(dmadev);
+ hidma_sysfs_init(dmadev);
+ dev_info(&pdev->dev, "HI-DMA engine driver registration complete\n");
+ pm_runtime_mark_last_busy(dmadev->ddev.dev);
+ pm_runtime_put_autosuspend(dmadev->ddev.dev);
+ return 0;
+
+uninit:
+ if (msi)
+ hidma_free_msis(dmadev);
+
+ hidma_ll_uninit(dmadev->lldev);
+dmafree:
+ if (dmadev)
+ hidma_free(dmadev);
+bailout:
+ pm_runtime_put_sync(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+ return rc;
+}
+
+static void hidma_shutdown(struct platform_device *pdev)
+{
+ struct hidma_dev *dmadev = platform_get_drvdata(pdev);
+
+ dev_info(dmadev->ddev.dev, "HI-DMA engine shutdown\n");
+
+ pm_runtime_get_sync(dmadev->ddev.dev);
+ if (hidma_ll_disable(dmadev->lldev))
+ dev_warn(dmadev->ddev.dev, "channel did not stop\n");
+ pm_runtime_mark_last_busy(dmadev->ddev.dev);
+ pm_runtime_put_autosuspend(dmadev->ddev.dev);
+
+}
+
+static int hidma_remove(struct platform_device *pdev)
+{
+ struct hidma_dev *dmadev = platform_get_drvdata(pdev);
+
+ pm_runtime_get_sync(dmadev->ddev.dev);
+ dma_async_device_unregister(&dmadev->ddev);
+ if (!dmadev->lldev->msi_support)
+ devm_free_irq(dmadev->ddev.dev, dmadev->irq, dmadev->lldev);
+ else
+ hidma_free_msis(dmadev);
+
+ tasklet_kill(&dmadev->task);
+ hidma_sysfs_uninit(dmadev);
+ hidma_debug_uninit(dmadev);
+ hidma_ll_uninit(dmadev->lldev);
+ hidma_free(dmadev);
+
+ dev_info(&pdev->dev, "HI-DMA engine removed\n");
+ pm_runtime_put_sync_suspend(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+
+ return 0;
+}
+
+#if IS_ENABLED(CONFIG_ACPI)
+static const struct acpi_device_id hidma_acpi_ids[] = {
+ {"QCOM8061"},
+ {"QCOM8062", HIDMA_MSI_CAP},
+ {"QCOM8063", (HIDMA_MSI_CAP | HIDMA_IDENTITY_CAP)},
+ {},
+};
+MODULE_DEVICE_TABLE(acpi, hidma_acpi_ids);
+#endif
+
+static const struct of_device_id hidma_match[] = {
+ {.compatible = "qcom,hidma-1.0",},
+ {.compatible = "qcom,hidma-1.1", .data = (void *)(HIDMA_MSI_CAP),},
+ {.compatible = "qcom,hidma-1.2",
+ .data = (void *)(HIDMA_MSI_CAP | HIDMA_IDENTITY_CAP),},
+ {},
+};
+MODULE_DEVICE_TABLE(of, hidma_match);
+
+static struct platform_driver hidma_driver = {
+ .probe = hidma_probe,
+ .remove = hidma_remove,
+ .shutdown = hidma_shutdown,
+ .driver = {
+ .name = "hidma",
+ .of_match_table = hidma_match,
+ .acpi_match_table = ACPI_PTR(hidma_acpi_ids),
+ },
+};
+
+module_platform_driver(hidma_driver);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/qcom/hidma.h b/drivers/dma/qcom/hidma.h
new file mode 100644
index 000000000..f21246674
--- /dev/null
+++ b/drivers/dma/qcom/hidma.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Qualcomm Technologies HIDMA data structures
+ *
+ * Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef QCOM_HIDMA_H
+#define QCOM_HIDMA_H
+
+#include <linux/kfifo.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+
+#define HIDMA_TRE_SIZE 32 /* each TRE is 32 bytes */
+#define HIDMA_TRE_CFG_IDX 0
+#define HIDMA_TRE_LEN_IDX 1
+#define HIDMA_TRE_SRC_LOW_IDX 2
+#define HIDMA_TRE_SRC_HI_IDX 3
+#define HIDMA_TRE_DEST_LOW_IDX 4
+#define HIDMA_TRE_DEST_HI_IDX 5
+
+enum tre_type {
+ HIDMA_TRE_MEMCPY = 3,
+ HIDMA_TRE_MEMSET = 4,
+};
+
+struct hidma_tre {
+ atomic_t allocated; /* if this channel is allocated */
+ bool queued; /* flag whether this is pending */
+ u16 status; /* status */
+ u32 idx; /* index of the tre */
+ u32 dma_sig; /* signature of the tre */
+ const char *dev_name; /* name of the device */
+ void (*callback)(void *data); /* requester callback */
+ void *data; /* Data associated with this channel*/
+ struct hidma_lldev *lldev; /* lldma device pointer */
+ u32 tre_local[HIDMA_TRE_SIZE / sizeof(u32) + 1]; /* TRE local copy */
+ u32 tre_index; /* the offset where this was written*/
+ u32 int_flags; /* interrupt flags */
+ u8 err_info; /* error record in this transfer */
+ u8 err_code; /* completion code */
+};
+
+struct hidma_lldev {
+ bool msi_support; /* flag indicating MSI support */
+ bool initialized; /* initialized flag */
+ u8 trch_state; /* trch_state of the device */
+ u8 evch_state; /* evch_state of the device */
+ u8 chidx; /* channel index in the core */
+ u32 nr_tres; /* max number of configs */
+ spinlock_t lock; /* reentrancy */
+ struct hidma_tre *trepool; /* trepool of user configs */
+ struct device *dev; /* device */
+ void __iomem *trca; /* Transfer Channel address */
+ void __iomem *evca; /* Event Channel address */
+ struct hidma_tre
+ **pending_tre_list; /* Pointers to pending TREs */
+ atomic_t pending_tre_count; /* Number of TREs pending */
+
+ void *tre_ring; /* TRE ring */
+ dma_addr_t tre_dma; /* TRE ring to be shared with HW */
+ u32 tre_ring_size; /* Byte size of the ring */
+ u32 tre_processed_off; /* last processed TRE */
+
+ void *evre_ring; /* EVRE ring */
+ dma_addr_t evre_dma; /* EVRE ring to be shared with HW */
+ u32 evre_ring_size; /* Byte size of the ring */
+ u32 evre_processed_off; /* last processed EVRE */
+
+ u32 tre_write_offset; /* TRE write location */
+ struct tasklet_struct task; /* task delivering notifications */
+ DECLARE_KFIFO_PTR(handoff_fifo,
+ struct hidma_tre *); /* pending TREs FIFO */
+};
+
+struct hidma_desc {
+ struct dma_async_tx_descriptor desc;
+ /* link list node for this channel*/
+ struct list_head node;
+ u32 tre_ch;
+};
+
+struct hidma_chan {
+ bool paused;
+ bool allocated;
+ char dbg_name[16];
+ u32 dma_sig;
+ dma_cookie_t last_success;
+
+ /*
+ * active descriptor on this channel
+ * It is used by the DMA complete notification to
+ * locate the descriptor that initiated the transfer.
+ */
+ struct hidma_dev *dmadev;
+ struct hidma_desc *running;
+
+ struct dma_chan chan;
+ struct list_head free;
+ struct list_head prepared;
+ struct list_head queued;
+ struct list_head active;
+ struct list_head completed;
+
+ /* Lock for this structure */
+ spinlock_t lock;
+};
+
+struct hidma_dev {
+ int irq;
+ int chidx;
+ u32 nr_descriptors;
+ int msi_virqbase;
+
+ struct hidma_lldev *lldev;
+ void __iomem *dev_trca;
+ struct resource *trca_resource;
+ void __iomem *dev_evca;
+ struct resource *evca_resource;
+
+ /* used to protect the pending channel list*/
+ spinlock_t lock;
+ struct dma_device ddev;
+
+ struct dentry *debugfs;
+
+ /* sysfs entry for the channel id */
+ struct device_attribute *chid_attrs;
+
+ /* Task delivering issue_pending */
+ struct tasklet_struct task;
+};
+
+int hidma_ll_request(struct hidma_lldev *llhndl, u32 dev_id,
+ const char *dev_name,
+ void (*callback)(void *data), void *data, u32 *tre_ch);
+
+void hidma_ll_free(struct hidma_lldev *llhndl, u32 tre_ch);
+enum dma_status hidma_ll_status(struct hidma_lldev *llhndl, u32 tre_ch);
+bool hidma_ll_isenabled(struct hidma_lldev *llhndl);
+void hidma_ll_queue_request(struct hidma_lldev *llhndl, u32 tre_ch);
+void hidma_ll_start(struct hidma_lldev *llhndl);
+int hidma_ll_disable(struct hidma_lldev *lldev);
+int hidma_ll_enable(struct hidma_lldev *llhndl);
+void hidma_ll_set_transfer_params(struct hidma_lldev *llhndl, u32 tre_ch,
+ dma_addr_t src, dma_addr_t dest, u32 len, u32 flags, u32 txntype);
+void hidma_ll_setup_irq(struct hidma_lldev *lldev, bool msi);
+int hidma_ll_setup(struct hidma_lldev *lldev);
+struct hidma_lldev *hidma_ll_init(struct device *dev, u32 max_channels,
+ void __iomem *trca, void __iomem *evca,
+ u8 chidx);
+int hidma_ll_uninit(struct hidma_lldev *llhndl);
+irqreturn_t hidma_ll_inthandler(int irq, void *arg);
+irqreturn_t hidma_ll_inthandler_msi(int irq, void *arg, int cause);
+void hidma_cleanup_pending_tre(struct hidma_lldev *llhndl, u8 err_info,
+ u8 err_code);
+void hidma_debug_init(struct hidma_dev *dmadev);
+void hidma_debug_uninit(struct hidma_dev *dmadev);
+#endif
diff --git a/drivers/dma/qcom/hidma_dbg.c b/drivers/dma/qcom/hidma_dbg.c
new file mode 100644
index 000000000..ce87c7937
--- /dev/null
+++ b/drivers/dma/qcom/hidma_dbg.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Qualcomm Technologies HIDMA debug file
+ *
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/pm_runtime.h>
+
+#include "hidma.h"
+
+static void hidma_ll_chstats(struct seq_file *s, void *llhndl, u32 tre_ch)
+{
+ struct hidma_lldev *lldev = llhndl;
+ struct hidma_tre *tre;
+ u32 length;
+ dma_addr_t src_start;
+ dma_addr_t dest_start;
+ u32 *tre_local;
+
+ if (tre_ch >= lldev->nr_tres) {
+ dev_err(lldev->dev, "invalid TRE number in chstats:%d", tre_ch);
+ return;
+ }
+ tre = &lldev->trepool[tre_ch];
+ seq_printf(s, "------Channel %d -----\n", tre_ch);
+ seq_printf(s, "allocated=%d\n", atomic_read(&tre->allocated));
+ seq_printf(s, "queued = 0x%x\n", tre->queued);
+ seq_printf(s, "err_info = 0x%x\n", tre->err_info);
+ seq_printf(s, "err_code = 0x%x\n", tre->err_code);
+ seq_printf(s, "status = 0x%x\n", tre->status);
+ seq_printf(s, "idx = 0x%x\n", tre->idx);
+ seq_printf(s, "dma_sig = 0x%x\n", tre->dma_sig);
+ seq_printf(s, "dev_name=%s\n", tre->dev_name);
+ seq_printf(s, "callback=%p\n", tre->callback);
+ seq_printf(s, "data=%p\n", tre->data);
+ seq_printf(s, "tre_index = 0x%x\n", tre->tre_index);
+
+ tre_local = &tre->tre_local[0];
+ src_start = tre_local[HIDMA_TRE_SRC_LOW_IDX];
+ src_start = ((u64) (tre_local[HIDMA_TRE_SRC_HI_IDX]) << 32) + src_start;
+ dest_start = tre_local[HIDMA_TRE_DEST_LOW_IDX];
+ dest_start += ((u64) (tre_local[HIDMA_TRE_DEST_HI_IDX]) << 32);
+ length = tre_local[HIDMA_TRE_LEN_IDX];
+
+ seq_printf(s, "src=%pap\n", &src_start);
+ seq_printf(s, "dest=%pap\n", &dest_start);
+ seq_printf(s, "length = 0x%x\n", length);
+}
+
+static void hidma_ll_devstats(struct seq_file *s, void *llhndl)
+{
+ struct hidma_lldev *lldev = llhndl;
+
+ seq_puts(s, "------Device -----\n");
+ seq_printf(s, "lldev init = 0x%x\n", lldev->initialized);
+ seq_printf(s, "trch_state = 0x%x\n", lldev->trch_state);
+ seq_printf(s, "evch_state = 0x%x\n", lldev->evch_state);
+ seq_printf(s, "chidx = 0x%x\n", lldev->chidx);
+ seq_printf(s, "nr_tres = 0x%x\n", lldev->nr_tres);
+ seq_printf(s, "trca=%p\n", lldev->trca);
+ seq_printf(s, "tre_ring=%p\n", lldev->tre_ring);
+ seq_printf(s, "tre_ring_handle=%pap\n", &lldev->tre_dma);
+ seq_printf(s, "tre_ring_size = 0x%x\n", lldev->tre_ring_size);
+ seq_printf(s, "tre_processed_off = 0x%x\n", lldev->tre_processed_off);
+ seq_printf(s, "pending_tre_count=%d\n",
+ atomic_read(&lldev->pending_tre_count));
+ seq_printf(s, "evca=%p\n", lldev->evca);
+ seq_printf(s, "evre_ring=%p\n", lldev->evre_ring);
+ seq_printf(s, "evre_ring_handle=%pap\n", &lldev->evre_dma);
+ seq_printf(s, "evre_ring_size = 0x%x\n", lldev->evre_ring_size);
+ seq_printf(s, "evre_processed_off = 0x%x\n", lldev->evre_processed_off);
+ seq_printf(s, "tre_write_offset = 0x%x\n", lldev->tre_write_offset);
+}
+
+/*
+ * hidma_chan_show: display HIDMA channel statistics
+ *
+ * Display the statistics for the current HIDMA virtual channel device.
+ */
+static int hidma_chan_show(struct seq_file *s, void *unused)
+{
+ struct hidma_chan *mchan = s->private;
+ struct hidma_desc *mdesc;
+ struct hidma_dev *dmadev = mchan->dmadev;
+
+ pm_runtime_get_sync(dmadev->ddev.dev);
+ seq_printf(s, "paused=%u\n", mchan->paused);
+ seq_printf(s, "dma_sig=%u\n", mchan->dma_sig);
+ seq_puts(s, "prepared\n");
+ list_for_each_entry(mdesc, &mchan->prepared, node)
+ hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch);
+
+ seq_puts(s, "active\n");
+ list_for_each_entry(mdesc, &mchan->active, node)
+ hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch);
+
+ seq_puts(s, "completed\n");
+ list_for_each_entry(mdesc, &mchan->completed, node)
+ hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch);
+
+ hidma_ll_devstats(s, mchan->dmadev->lldev);
+ pm_runtime_mark_last_busy(dmadev->ddev.dev);
+ pm_runtime_put_autosuspend(dmadev->ddev.dev);
+ return 0;
+}
+
+/*
+ * hidma_dma_show: display HIDMA device info
+ *
+ * Display the info for the current HIDMA device.
+ */
+static int hidma_dma_show(struct seq_file *s, void *unused)
+{
+ struct hidma_dev *dmadev = s->private;
+ resource_size_t sz;
+
+ seq_printf(s, "nr_descriptors=%d\n", dmadev->nr_descriptors);
+ seq_printf(s, "dev_trca=%p\n", &dmadev->dev_trca);
+ seq_printf(s, "dev_trca_phys=%pa\n", &dmadev->trca_resource->start);
+ sz = resource_size(dmadev->trca_resource);
+ seq_printf(s, "dev_trca_size=%pa\n", &sz);
+ seq_printf(s, "dev_evca=%p\n", &dmadev->dev_evca);
+ seq_printf(s, "dev_evca_phys=%pa\n", &dmadev->evca_resource->start);
+ sz = resource_size(dmadev->evca_resource);
+ seq_printf(s, "dev_evca_size=%pa\n", &sz);
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(hidma_chan);
+DEFINE_SHOW_ATTRIBUTE(hidma_dma);
+
+void hidma_debug_uninit(struct hidma_dev *dmadev)
+{
+ debugfs_remove_recursive(dmadev->debugfs);
+}
+
+void hidma_debug_init(struct hidma_dev *dmadev)
+{
+ int chidx = 0;
+ struct list_head *position = NULL;
+ struct dentry *dir;
+
+ dmadev->debugfs = debugfs_create_dir(dev_name(dmadev->ddev.dev), NULL);
+
+ /* walk through the virtual channel list */
+ list_for_each(position, &dmadev->ddev.channels) {
+ struct hidma_chan *chan;
+
+ chan = list_entry(position, struct hidma_chan,
+ chan.device_node);
+ sprintf(chan->dbg_name, "chan%d", chidx);
+ dir = debugfs_create_dir(chan->dbg_name,
+ dmadev->debugfs);
+ debugfs_create_file("stats", S_IRUGO, dir, chan,
+ &hidma_chan_fops);
+ chidx++;
+ }
+
+ debugfs_create_file("stats", S_IRUGO, dmadev->debugfs, dmadev,
+ &hidma_dma_fops);
+}
diff --git a/drivers/dma/qcom/hidma_ll.c b/drivers/dma/qcom/hidma_ll.c
new file mode 100644
index 000000000..53244e0e3
--- /dev/null
+++ b/drivers/dma/qcom/hidma_ll.c
@@ -0,0 +1,855 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Qualcomm Technologies HIDMA DMA engine low level code
+ *
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/atomic.h>
+#include <linux/iopoll.h>
+#include <linux/kfifo.h>
+#include <linux/bitops.h>
+
+#include "hidma.h"
+
+#define HIDMA_EVRE_SIZE 16 /* each EVRE is 16 bytes */
+
+#define HIDMA_TRCA_CTRLSTS_REG 0x000
+#define HIDMA_TRCA_RING_LOW_REG 0x008
+#define HIDMA_TRCA_RING_HIGH_REG 0x00C
+#define HIDMA_TRCA_RING_LEN_REG 0x010
+#define HIDMA_TRCA_DOORBELL_REG 0x400
+
+#define HIDMA_EVCA_CTRLSTS_REG 0x000
+#define HIDMA_EVCA_INTCTRL_REG 0x004
+#define HIDMA_EVCA_RING_LOW_REG 0x008
+#define HIDMA_EVCA_RING_HIGH_REG 0x00C
+#define HIDMA_EVCA_RING_LEN_REG 0x010
+#define HIDMA_EVCA_WRITE_PTR_REG 0x020
+#define HIDMA_EVCA_DOORBELL_REG 0x400
+
+#define HIDMA_EVCA_IRQ_STAT_REG 0x100
+#define HIDMA_EVCA_IRQ_CLR_REG 0x108
+#define HIDMA_EVCA_IRQ_EN_REG 0x110
+
+#define HIDMA_EVRE_CFG_IDX 0
+
+#define HIDMA_EVRE_ERRINFO_BIT_POS 24
+#define HIDMA_EVRE_CODE_BIT_POS 28
+
+#define HIDMA_EVRE_ERRINFO_MASK GENMASK(3, 0)
+#define HIDMA_EVRE_CODE_MASK GENMASK(3, 0)
+
+#define HIDMA_CH_CONTROL_MASK GENMASK(7, 0)
+#define HIDMA_CH_STATE_MASK GENMASK(7, 0)
+#define HIDMA_CH_STATE_BIT_POS 0x8
+
+#define HIDMA_IRQ_EV_CH_EOB_IRQ_BIT_POS 0
+#define HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS 1
+#define HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS 9
+#define HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS 10
+#define HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS 11
+#define HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS 14
+
+#define ENABLE_IRQS (BIT(HIDMA_IRQ_EV_CH_EOB_IRQ_BIT_POS) | \
+ BIT(HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS) | \
+ BIT(HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS) | \
+ BIT(HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS) | \
+ BIT(HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS) | \
+ BIT(HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS))
+
+#define HIDMA_INCREMENT_ITERATOR(iter, size, ring_size) \
+do { \
+ iter += size; \
+ if (iter >= ring_size) \
+ iter -= ring_size; \
+} while (0)
+
+#define HIDMA_CH_STATE(val) \
+ ((val >> HIDMA_CH_STATE_BIT_POS) & HIDMA_CH_STATE_MASK)
+
+#define HIDMA_ERR_INT_MASK \
+ (BIT(HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS) | \
+ BIT(HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS) | \
+ BIT(HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS) | \
+ BIT(HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS) | \
+ BIT(HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS))
+
+enum ch_command {
+ HIDMA_CH_DISABLE = 0,
+ HIDMA_CH_ENABLE = 1,
+ HIDMA_CH_SUSPEND = 2,
+ HIDMA_CH_RESET = 9,
+};
+
+enum ch_state {
+ HIDMA_CH_DISABLED = 0,
+ HIDMA_CH_ENABLED = 1,
+ HIDMA_CH_RUNNING = 2,
+ HIDMA_CH_SUSPENDED = 3,
+ HIDMA_CH_STOPPED = 4,
+};
+
+enum err_code {
+ HIDMA_EVRE_STATUS_COMPLETE = 1,
+ HIDMA_EVRE_STATUS_ERROR = 4,
+};
+
+static int hidma_is_chan_enabled(int state)
+{
+ switch (state) {
+ case HIDMA_CH_ENABLED:
+ case HIDMA_CH_RUNNING:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void hidma_ll_free(struct hidma_lldev *lldev, u32 tre_ch)
+{
+ struct hidma_tre *tre;
+
+ if (tre_ch >= lldev->nr_tres) {
+ dev_err(lldev->dev, "invalid TRE number in free:%d", tre_ch);
+ return;
+ }
+
+ tre = &lldev->trepool[tre_ch];
+ if (atomic_read(&tre->allocated) != true) {
+ dev_err(lldev->dev, "trying to free an unused TRE:%d", tre_ch);
+ return;
+ }
+
+ atomic_set(&tre->allocated, 0);
+}
+
+int hidma_ll_request(struct hidma_lldev *lldev, u32 sig, const char *dev_name,
+ void (*callback)(void *data), void *data, u32 *tre_ch)
+{
+ unsigned int i;
+ struct hidma_tre *tre;
+ u32 *tre_local;
+
+ if (!tre_ch || !lldev)
+ return -EINVAL;
+
+ /* need to have at least one empty spot in the queue */
+ for (i = 0; i < lldev->nr_tres - 1; i++) {
+ if (atomic_add_unless(&lldev->trepool[i].allocated, 1, 1))
+ break;
+ }
+
+ if (i == (lldev->nr_tres - 1))
+ return -ENOMEM;
+
+ tre = &lldev->trepool[i];
+ tre->dma_sig = sig;
+ tre->dev_name = dev_name;
+ tre->callback = callback;
+ tre->data = data;
+ tre->idx = i;
+ tre->status = 0;
+ tre->queued = 0;
+ tre->err_code = 0;
+ tre->err_info = 0;
+ tre->lldev = lldev;
+ tre_local = &tre->tre_local[0];
+ tre_local[HIDMA_TRE_CFG_IDX] = (lldev->chidx & 0xFF) << 8;
+ tre_local[HIDMA_TRE_CFG_IDX] |= BIT(16); /* set IEOB */
+ *tre_ch = i;
+ if (callback)
+ callback(data);
+ return 0;
+}
+
+/*
+ * Multiple TREs may be queued and waiting in the pending queue.
+ */
+static void hidma_ll_tre_complete(struct tasklet_struct *t)
+{
+ struct hidma_lldev *lldev = from_tasklet(lldev, t, task);
+ struct hidma_tre *tre;
+
+ while (kfifo_out(&lldev->handoff_fifo, &tre, 1)) {
+ /* call the user if it has been read by the hardware */
+ if (tre->callback)
+ tre->callback(tre->data);
+ }
+}
+
+static int hidma_post_completed(struct hidma_lldev *lldev, u8 err_info,
+ u8 err_code)
+{
+ struct hidma_tre *tre;
+ unsigned long flags;
+ u32 tre_iterator;
+
+ spin_lock_irqsave(&lldev->lock, flags);
+
+ tre_iterator = lldev->tre_processed_off;
+ tre = lldev->pending_tre_list[tre_iterator / HIDMA_TRE_SIZE];
+ if (!tre) {
+ spin_unlock_irqrestore(&lldev->lock, flags);
+ dev_warn(lldev->dev, "tre_index [%d] and tre out of sync\n",
+ tre_iterator / HIDMA_TRE_SIZE);
+ return -EINVAL;
+ }
+ lldev->pending_tre_list[tre->tre_index] = NULL;
+
+ /*
+ * Keep track of pending TREs that SW is expecting to receive
+ * from HW. We got one now. Decrement our counter.
+ */
+ if (atomic_dec_return(&lldev->pending_tre_count) < 0) {
+ dev_warn(lldev->dev, "tre count mismatch on completion");
+ atomic_set(&lldev->pending_tre_count, 0);
+ }
+
+ HIDMA_INCREMENT_ITERATOR(tre_iterator, HIDMA_TRE_SIZE,
+ lldev->tre_ring_size);
+ lldev->tre_processed_off = tre_iterator;
+ spin_unlock_irqrestore(&lldev->lock, flags);
+
+ tre->err_info = err_info;
+ tre->err_code = err_code;
+ tre->queued = 0;
+
+ kfifo_put(&lldev->handoff_fifo, tre);
+ tasklet_schedule(&lldev->task);
+
+ return 0;
+}
+
+/*
+ * Called to handle the interrupt for the channel.
+ * Return a positive number if TRE or EVRE were consumed on this run.
+ * Return a positive number if there are pending TREs or EVREs.
+ * Return 0 if there is nothing to consume or no pending TREs/EVREs found.
+ */
+static int hidma_handle_tre_completion(struct hidma_lldev *lldev)
+{
+ u32 evre_ring_size = lldev->evre_ring_size;
+ u32 err_info, err_code, evre_write_off;
+ u32 evre_iterator;
+ u32 num_completed = 0;
+
+ evre_write_off = readl_relaxed(lldev->evca + HIDMA_EVCA_WRITE_PTR_REG);
+ evre_iterator = lldev->evre_processed_off;
+
+ if ((evre_write_off > evre_ring_size) ||
+ (evre_write_off % HIDMA_EVRE_SIZE)) {
+ dev_err(lldev->dev, "HW reports invalid EVRE write offset\n");
+ return 0;
+ }
+
+ /*
+ * By the time control reaches here the number of EVREs and TREs
+ * may not match. Only consume the ones that hardware told us.
+ */
+ while ((evre_iterator != evre_write_off)) {
+ u32 *current_evre = lldev->evre_ring + evre_iterator;
+ u32 cfg;
+
+ cfg = current_evre[HIDMA_EVRE_CFG_IDX];
+ err_info = cfg >> HIDMA_EVRE_ERRINFO_BIT_POS;
+ err_info &= HIDMA_EVRE_ERRINFO_MASK;
+ err_code =
+ (cfg >> HIDMA_EVRE_CODE_BIT_POS) & HIDMA_EVRE_CODE_MASK;
+
+ if (hidma_post_completed(lldev, err_info, err_code))
+ break;
+
+ HIDMA_INCREMENT_ITERATOR(evre_iterator, HIDMA_EVRE_SIZE,
+ evre_ring_size);
+
+ /*
+ * Read the new event descriptor written by the HW.
+ * As we are processing the delivered events, other events
+ * get queued to the SW for processing.
+ */
+ evre_write_off =
+ readl_relaxed(lldev->evca + HIDMA_EVCA_WRITE_PTR_REG);
+ num_completed++;
+
+ /*
+ * An error interrupt might have arrived while we are processing
+ * the completed interrupt.
+ */
+ if (!hidma_ll_isenabled(lldev))
+ break;
+ }
+
+ if (num_completed) {
+ u32 evre_read_off = (lldev->evre_processed_off +
+ HIDMA_EVRE_SIZE * num_completed);
+ evre_read_off = evre_read_off % evre_ring_size;
+ writel(evre_read_off, lldev->evca + HIDMA_EVCA_DOORBELL_REG);
+
+ /* record the last processed tre offset */
+ lldev->evre_processed_off = evre_read_off;
+ }
+
+ return num_completed;
+}
+
+void hidma_cleanup_pending_tre(struct hidma_lldev *lldev, u8 err_info,
+ u8 err_code)
+{
+ while (atomic_read(&lldev->pending_tre_count)) {
+ if (hidma_post_completed(lldev, err_info, err_code))
+ break;
+ }
+}
+
+static int hidma_ll_reset(struct hidma_lldev *lldev)
+{
+ u32 val;
+ int ret;
+
+ val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+ val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+ val |= HIDMA_CH_RESET << 16;
+ writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+
+ /*
+ * Delay 10ms after reset to allow DMA logic to quiesce.
+ * Do a polled read up to 1ms and 10ms maximum.
+ */
+ ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val,
+ HIDMA_CH_STATE(val) == HIDMA_CH_DISABLED,
+ 1000, 10000);
+ if (ret) {
+ dev_err(lldev->dev, "transfer channel did not reset\n");
+ return ret;
+ }
+
+ val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+ val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+ val |= HIDMA_CH_RESET << 16;
+ writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+
+ /*
+ * Delay 10ms after reset to allow DMA logic to quiesce.
+ * Do a polled read up to 1ms and 10ms maximum.
+ */
+ ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val,
+ HIDMA_CH_STATE(val) == HIDMA_CH_DISABLED,
+ 1000, 10000);
+ if (ret)
+ return ret;
+
+ lldev->trch_state = HIDMA_CH_DISABLED;
+ lldev->evch_state = HIDMA_CH_DISABLED;
+ return 0;
+}
+
+/*
+ * The interrupt handler for HIDMA will try to consume as many pending
+ * EVRE from the event queue as possible. Each EVRE has an associated
+ * TRE that holds the user interface parameters. EVRE reports the
+ * result of the transaction. Hardware guarantees ordering between EVREs
+ * and TREs. We use last processed offset to figure out which TRE is
+ * associated with which EVRE. If two TREs are consumed by HW, the EVREs
+ * are in order in the event ring.
+ *
+ * This handler will do a one pass for consuming EVREs. Other EVREs may
+ * be delivered while we are working. It will try to consume incoming
+ * EVREs one more time and return.
+ *
+ * For unprocessed EVREs, hardware will trigger another interrupt until
+ * all the interrupt bits are cleared.
+ *
+ * Hardware guarantees that by the time interrupt is observed, all data
+ * transactions in flight are delivered to their respective places and
+ * are visible to the CPU.
+ *
+ * On demand paging for IOMMU is only supported for PCIe via PRI
+ * (Page Request Interface) not for HIDMA. All other hardware instances
+ * including HIDMA work on pinned DMA addresses.
+ *
+ * HIDMA is not aware of IOMMU presence since it follows the DMA API. All
+ * IOMMU latency will be built into the data movement time. By the time
+ * interrupt happens, IOMMU lookups + data movement has already taken place.
+ *
+ * While the first read in a typical PCI endpoint ISR flushes all outstanding
+ * requests traditionally to the destination, this concept does not apply
+ * here for this HW.
+ */
+static void hidma_ll_int_handler_internal(struct hidma_lldev *lldev, int cause)
+{
+ unsigned long irqflags;
+
+ if (cause & HIDMA_ERR_INT_MASK) {
+ dev_err(lldev->dev, "error 0x%x, disabling...\n",
+ cause);
+
+ /* Clear out pending interrupts */
+ writel(cause, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+
+ /* No further submissions. */
+ hidma_ll_disable(lldev);
+
+ /* Driver completes the txn and intimates the client.*/
+ hidma_cleanup_pending_tre(lldev, 0xFF,
+ HIDMA_EVRE_STATUS_ERROR);
+
+ return;
+ }
+
+ spin_lock_irqsave(&lldev->lock, irqflags);
+ writel_relaxed(cause, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+ spin_unlock_irqrestore(&lldev->lock, irqflags);
+
+ /*
+ * Fine tuned for this HW...
+ *
+ * This ISR has been designed for this particular hardware. Relaxed
+ * read and write accessors are used for performance reasons due to
+ * interrupt delivery guarantees. Do not copy this code blindly and
+ * expect that to work.
+ *
+ * Try to consume as many EVREs as possible.
+ */
+ hidma_handle_tre_completion(lldev);
+}
+
+irqreturn_t hidma_ll_inthandler(int chirq, void *arg)
+{
+ struct hidma_lldev *lldev = arg;
+ u32 status;
+ u32 enable;
+ u32 cause;
+
+ status = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG);
+ enable = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+ cause = status & enable;
+
+ while (cause) {
+ hidma_ll_int_handler_internal(lldev, cause);
+
+ /*
+ * Another interrupt might have arrived while we are
+ * processing this one. Read the new cause.
+ */
+ status = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG);
+ enable = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+ cause = status & enable;
+ }
+
+ return IRQ_HANDLED;
+}
+
+irqreturn_t hidma_ll_inthandler_msi(int chirq, void *arg, int cause)
+{
+ struct hidma_lldev *lldev = arg;
+
+ hidma_ll_int_handler_internal(lldev, cause);
+ return IRQ_HANDLED;
+}
+
+int hidma_ll_enable(struct hidma_lldev *lldev)
+{
+ u32 val;
+ int ret;
+
+ val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+ val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+ val |= HIDMA_CH_ENABLE << 16;
+ writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+
+ ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val,
+ hidma_is_chan_enabled(HIDMA_CH_STATE(val)),
+ 1000, 10000);
+ if (ret) {
+ dev_err(lldev->dev, "event channel did not get enabled\n");
+ return ret;
+ }
+
+ val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+ val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+ val |= HIDMA_CH_ENABLE << 16;
+ writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+
+ ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val,
+ hidma_is_chan_enabled(HIDMA_CH_STATE(val)),
+ 1000, 10000);
+ if (ret) {
+ dev_err(lldev->dev, "transfer channel did not get enabled\n");
+ return ret;
+ }
+
+ lldev->trch_state = HIDMA_CH_ENABLED;
+ lldev->evch_state = HIDMA_CH_ENABLED;
+
+ /* enable irqs */
+ writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+
+ return 0;
+}
+
+void hidma_ll_start(struct hidma_lldev *lldev)
+{
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&lldev->lock, irqflags);
+ writel(lldev->tre_write_offset, lldev->trca + HIDMA_TRCA_DOORBELL_REG);
+ spin_unlock_irqrestore(&lldev->lock, irqflags);
+}
+
+bool hidma_ll_isenabled(struct hidma_lldev *lldev)
+{
+ u32 val;
+
+ val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+ lldev->trch_state = HIDMA_CH_STATE(val);
+ val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+ lldev->evch_state = HIDMA_CH_STATE(val);
+
+ /* both channels have to be enabled before calling this function */
+ if (hidma_is_chan_enabled(lldev->trch_state) &&
+ hidma_is_chan_enabled(lldev->evch_state))
+ return true;
+
+ return false;
+}
+
+void hidma_ll_queue_request(struct hidma_lldev *lldev, u32 tre_ch)
+{
+ struct hidma_tre *tre;
+ unsigned long flags;
+
+ tre = &lldev->trepool[tre_ch];
+
+ /* copy the TRE into its location in the TRE ring */
+ spin_lock_irqsave(&lldev->lock, flags);
+ tre->tre_index = lldev->tre_write_offset / HIDMA_TRE_SIZE;
+ lldev->pending_tre_list[tre->tre_index] = tre;
+ memcpy(lldev->tre_ring + lldev->tre_write_offset,
+ &tre->tre_local[0], HIDMA_TRE_SIZE);
+ tre->err_code = 0;
+ tre->err_info = 0;
+ tre->queued = 1;
+ atomic_inc(&lldev->pending_tre_count);
+ lldev->tre_write_offset = (lldev->tre_write_offset + HIDMA_TRE_SIZE)
+ % lldev->tre_ring_size;
+ spin_unlock_irqrestore(&lldev->lock, flags);
+}
+
+/*
+ * Note that even though we stop this channel if there is a pending transaction
+ * in flight it will complete and follow the callback. This request will
+ * prevent further requests to be made.
+ */
+int hidma_ll_disable(struct hidma_lldev *lldev)
+{
+ u32 val;
+ int ret;
+
+ /* The channel needs to be in working state */
+ if (!hidma_ll_isenabled(lldev))
+ return 0;
+
+ val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+ val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+ val |= HIDMA_CH_SUSPEND << 16;
+ writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+
+ /*
+ * Start the wait right after the suspend is confirmed.
+ * Do a polled read up to 1ms and 10ms maximum.
+ */
+ ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val,
+ HIDMA_CH_STATE(val) == HIDMA_CH_SUSPENDED,
+ 1000, 10000);
+ if (ret)
+ return ret;
+
+ val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+ val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+ val |= HIDMA_CH_SUSPEND << 16;
+ writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+
+ /*
+ * Start the wait right after the suspend is confirmed
+ * Delay up to 10ms after reset to allow DMA logic to quiesce.
+ */
+ ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val,
+ HIDMA_CH_STATE(val) == HIDMA_CH_SUSPENDED,
+ 1000, 10000);
+ if (ret)
+ return ret;
+
+ lldev->trch_state = HIDMA_CH_SUSPENDED;
+ lldev->evch_state = HIDMA_CH_SUSPENDED;
+
+ /* disable interrupts */
+ writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+ return 0;
+}
+
+void hidma_ll_set_transfer_params(struct hidma_lldev *lldev, u32 tre_ch,
+ dma_addr_t src, dma_addr_t dest, u32 len,
+ u32 flags, u32 txntype)
+{
+ struct hidma_tre *tre;
+ u32 *tre_local;
+
+ if (tre_ch >= lldev->nr_tres) {
+ dev_err(lldev->dev, "invalid TRE number in transfer params:%d",
+ tre_ch);
+ return;
+ }
+
+ tre = &lldev->trepool[tre_ch];
+ if (atomic_read(&tre->allocated) != true) {
+ dev_err(lldev->dev, "trying to set params on an unused TRE:%d",
+ tre_ch);
+ return;
+ }
+
+ tre_local = &tre->tre_local[0];
+ tre_local[HIDMA_TRE_CFG_IDX] &= ~GENMASK(7, 0);
+ tre_local[HIDMA_TRE_CFG_IDX] |= txntype;
+ tre_local[HIDMA_TRE_LEN_IDX] = len;
+ tre_local[HIDMA_TRE_SRC_LOW_IDX] = lower_32_bits(src);
+ tre_local[HIDMA_TRE_SRC_HI_IDX] = upper_32_bits(src);
+ tre_local[HIDMA_TRE_DEST_LOW_IDX] = lower_32_bits(dest);
+ tre_local[HIDMA_TRE_DEST_HI_IDX] = upper_32_bits(dest);
+ tre->int_flags = flags;
+}
+
+/*
+ * Called during initialization and after an error condition
+ * to restore hardware state.
+ */
+int hidma_ll_setup(struct hidma_lldev *lldev)
+{
+ int rc;
+ u64 addr;
+ u32 val;
+ u32 nr_tres = lldev->nr_tres;
+
+ atomic_set(&lldev->pending_tre_count, 0);
+ lldev->tre_processed_off = 0;
+ lldev->evre_processed_off = 0;
+ lldev->tre_write_offset = 0;
+
+ /* disable interrupts */
+ writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+
+ /* clear all pending interrupts */
+ val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG);
+ writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+
+ rc = hidma_ll_reset(lldev);
+ if (rc)
+ return rc;
+
+ /*
+ * Clear all pending interrupts again.
+ * Otherwise, we observe reset complete interrupts.
+ */
+ val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG);
+ writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+
+ /* disable interrupts again after reset */
+ writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+
+ addr = lldev->tre_dma;
+ writel(lower_32_bits(addr), lldev->trca + HIDMA_TRCA_RING_LOW_REG);
+ writel(upper_32_bits(addr), lldev->trca + HIDMA_TRCA_RING_HIGH_REG);
+ writel(lldev->tre_ring_size, lldev->trca + HIDMA_TRCA_RING_LEN_REG);
+
+ addr = lldev->evre_dma;
+ writel(lower_32_bits(addr), lldev->evca + HIDMA_EVCA_RING_LOW_REG);
+ writel(upper_32_bits(addr), lldev->evca + HIDMA_EVCA_RING_HIGH_REG);
+ writel(HIDMA_EVRE_SIZE * nr_tres,
+ lldev->evca + HIDMA_EVCA_RING_LEN_REG);
+
+ /* configure interrupts */
+ hidma_ll_setup_irq(lldev, lldev->msi_support);
+
+ rc = hidma_ll_enable(lldev);
+ if (rc)
+ return rc;
+
+ return rc;
+}
+
+void hidma_ll_setup_irq(struct hidma_lldev *lldev, bool msi)
+{
+ u32 val;
+
+ lldev->msi_support = msi;
+
+ /* disable interrupts again after reset */
+ writel(0, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+ writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+
+ /* support IRQ by default */
+ val = readl(lldev->evca + HIDMA_EVCA_INTCTRL_REG);
+ val &= ~0xF;
+ if (!lldev->msi_support)
+ val = val | 0x1;
+ writel(val, lldev->evca + HIDMA_EVCA_INTCTRL_REG);
+
+ /* clear all pending interrupts and enable them */
+ writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+ writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+}
+
+struct hidma_lldev *hidma_ll_init(struct device *dev, u32 nr_tres,
+ void __iomem *trca, void __iomem *evca,
+ u8 chidx)
+{
+ u32 required_bytes;
+ struct hidma_lldev *lldev;
+ int rc;
+ size_t sz;
+
+ if (!trca || !evca || !dev || !nr_tres)
+ return NULL;
+
+ /* need at least four TREs */
+ if (nr_tres < 4)
+ return NULL;
+
+ /* need an extra space */
+ nr_tres += 1;
+
+ lldev = devm_kzalloc(dev, sizeof(struct hidma_lldev), GFP_KERNEL);
+ if (!lldev)
+ return NULL;
+
+ lldev->evca = evca;
+ lldev->trca = trca;
+ lldev->dev = dev;
+ sz = sizeof(struct hidma_tre);
+ lldev->trepool = devm_kcalloc(lldev->dev, nr_tres, sz, GFP_KERNEL);
+ if (!lldev->trepool)
+ return NULL;
+
+ required_bytes = sizeof(lldev->pending_tre_list[0]);
+ lldev->pending_tre_list = devm_kcalloc(dev, nr_tres, required_bytes,
+ GFP_KERNEL);
+ if (!lldev->pending_tre_list)
+ return NULL;
+
+ sz = (HIDMA_TRE_SIZE + 1) * nr_tres;
+ lldev->tre_ring = dmam_alloc_coherent(dev, sz, &lldev->tre_dma,
+ GFP_KERNEL);
+ if (!lldev->tre_ring)
+ return NULL;
+
+ lldev->tre_ring_size = HIDMA_TRE_SIZE * nr_tres;
+ lldev->nr_tres = nr_tres;
+
+ /* the TRE ring has to be TRE_SIZE aligned */
+ if (!IS_ALIGNED(lldev->tre_dma, HIDMA_TRE_SIZE)) {
+ u8 tre_ring_shift;
+
+ tre_ring_shift = lldev->tre_dma % HIDMA_TRE_SIZE;
+ tre_ring_shift = HIDMA_TRE_SIZE - tre_ring_shift;
+ lldev->tre_dma += tre_ring_shift;
+ lldev->tre_ring += tre_ring_shift;
+ }
+
+ sz = (HIDMA_EVRE_SIZE + 1) * nr_tres;
+ lldev->evre_ring = dmam_alloc_coherent(dev, sz, &lldev->evre_dma,
+ GFP_KERNEL);
+ if (!lldev->evre_ring)
+ return NULL;
+
+ lldev->evre_ring_size = HIDMA_EVRE_SIZE * nr_tres;
+
+ /* the EVRE ring has to be EVRE_SIZE aligned */
+ if (!IS_ALIGNED(lldev->evre_dma, HIDMA_EVRE_SIZE)) {
+ u8 evre_ring_shift;
+
+ evre_ring_shift = lldev->evre_dma % HIDMA_EVRE_SIZE;
+ evre_ring_shift = HIDMA_EVRE_SIZE - evre_ring_shift;
+ lldev->evre_dma += evre_ring_shift;
+ lldev->evre_ring += evre_ring_shift;
+ }
+ lldev->nr_tres = nr_tres;
+ lldev->chidx = chidx;
+
+ sz = nr_tres * sizeof(struct hidma_tre *);
+ rc = kfifo_alloc(&lldev->handoff_fifo, sz, GFP_KERNEL);
+ if (rc)
+ return NULL;
+
+ rc = hidma_ll_setup(lldev);
+ if (rc)
+ return NULL;
+
+ spin_lock_init(&lldev->lock);
+ tasklet_setup(&lldev->task, hidma_ll_tre_complete);
+ lldev->initialized = 1;
+ writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+ return lldev;
+}
+
+int hidma_ll_uninit(struct hidma_lldev *lldev)
+{
+ u32 required_bytes;
+ int rc = 0;
+ u32 val;
+
+ if (!lldev)
+ return -ENODEV;
+
+ if (!lldev->initialized)
+ return 0;
+
+ lldev->initialized = 0;
+
+ required_bytes = sizeof(struct hidma_tre) * lldev->nr_tres;
+ tasklet_kill(&lldev->task);
+ memset(lldev->trepool, 0, required_bytes);
+ lldev->trepool = NULL;
+ atomic_set(&lldev->pending_tre_count, 0);
+ lldev->tre_write_offset = 0;
+
+ rc = hidma_ll_reset(lldev);
+
+ /*
+ * Clear all pending interrupts again.
+ * Otherwise, we observe reset complete interrupts.
+ */
+ val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG);
+ writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+ writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+ return rc;
+}
+
+enum dma_status hidma_ll_status(struct hidma_lldev *lldev, u32 tre_ch)
+{
+ enum dma_status ret = DMA_ERROR;
+ struct hidma_tre *tre;
+ unsigned long flags;
+ u8 err_code;
+
+ spin_lock_irqsave(&lldev->lock, flags);
+
+ tre = &lldev->trepool[tre_ch];
+ err_code = tre->err_code;
+
+ if (err_code & HIDMA_EVRE_STATUS_COMPLETE)
+ ret = DMA_COMPLETE;
+ else if (err_code & HIDMA_EVRE_STATUS_ERROR)
+ ret = DMA_ERROR;
+ else
+ ret = DMA_IN_PROGRESS;
+ spin_unlock_irqrestore(&lldev->lock, flags);
+
+ return ret;
+}
diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c
new file mode 100644
index 000000000..62026607f
--- /dev/null
+++ b/drivers/dma/qcom/hidma_mgmt.c
@@ -0,0 +1,440 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Qualcomm Technologies HIDMA DMA engine Management interface
+ *
+ * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/acpi.h>
+#include <linux/of.h>
+#include <linux/property.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
+
+#include "hidma_mgmt.h"
+
+#define HIDMA_QOS_N_OFFSET 0x700
+#define HIDMA_CFG_OFFSET 0x400
+#define HIDMA_MAX_BUS_REQ_LEN_OFFSET 0x41C
+#define HIDMA_MAX_XACTIONS_OFFSET 0x420
+#define HIDMA_HW_VERSION_OFFSET 0x424
+#define HIDMA_CHRESET_TIMEOUT_OFFSET 0x418
+
+#define HIDMA_MAX_WR_XACTIONS_MASK GENMASK(4, 0)
+#define HIDMA_MAX_RD_XACTIONS_MASK GENMASK(4, 0)
+#define HIDMA_WEIGHT_MASK GENMASK(6, 0)
+#define HIDMA_MAX_BUS_REQ_LEN_MASK GENMASK(15, 0)
+#define HIDMA_CHRESET_TIMEOUT_MASK GENMASK(19, 0)
+
+#define HIDMA_MAX_WR_XACTIONS_BIT_POS 16
+#define HIDMA_MAX_BUS_WR_REQ_BIT_POS 16
+#define HIDMA_WRR_BIT_POS 8
+#define HIDMA_PRIORITY_BIT_POS 15
+
+#define HIDMA_AUTOSUSPEND_TIMEOUT 2000
+#define HIDMA_MAX_CHANNEL_WEIGHT 15
+
+static unsigned int max_write_request;
+module_param(max_write_request, uint, 0644);
+MODULE_PARM_DESC(max_write_request,
+ "maximum write burst (default: ACPI/DT value)");
+
+static unsigned int max_read_request;
+module_param(max_read_request, uint, 0644);
+MODULE_PARM_DESC(max_read_request,
+ "maximum read burst (default: ACPI/DT value)");
+
+static unsigned int max_wr_xactions;
+module_param(max_wr_xactions, uint, 0644);
+MODULE_PARM_DESC(max_wr_xactions,
+ "maximum number of write transactions (default: ACPI/DT value)");
+
+static unsigned int max_rd_xactions;
+module_param(max_rd_xactions, uint, 0644);
+MODULE_PARM_DESC(max_rd_xactions,
+ "maximum number of read transactions (default: ACPI/DT value)");
+
+int hidma_mgmt_setup(struct hidma_mgmt_dev *mgmtdev)
+{
+ unsigned int i;
+ u32 val;
+
+ if (!is_power_of_2(mgmtdev->max_write_request) ||
+ (mgmtdev->max_write_request < 128) ||
+ (mgmtdev->max_write_request > 1024)) {
+ dev_err(&mgmtdev->pdev->dev, "invalid write request %d\n",
+ mgmtdev->max_write_request);
+ return -EINVAL;
+ }
+
+ if (!is_power_of_2(mgmtdev->max_read_request) ||
+ (mgmtdev->max_read_request < 128) ||
+ (mgmtdev->max_read_request > 1024)) {
+ dev_err(&mgmtdev->pdev->dev, "invalid read request %d\n",
+ mgmtdev->max_read_request);
+ return -EINVAL;
+ }
+
+ if (mgmtdev->max_wr_xactions > HIDMA_MAX_WR_XACTIONS_MASK) {
+ dev_err(&mgmtdev->pdev->dev,
+ "max_wr_xactions cannot be bigger than %ld\n",
+ HIDMA_MAX_WR_XACTIONS_MASK);
+ return -EINVAL;
+ }
+
+ if (mgmtdev->max_rd_xactions > HIDMA_MAX_RD_XACTIONS_MASK) {
+ dev_err(&mgmtdev->pdev->dev,
+ "max_rd_xactions cannot be bigger than %ld\n",
+ HIDMA_MAX_RD_XACTIONS_MASK);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < mgmtdev->dma_channels; i++) {
+ if (mgmtdev->priority[i] > 1) {
+ dev_err(&mgmtdev->pdev->dev,
+ "priority can be 0 or 1\n");
+ return -EINVAL;
+ }
+
+ if (mgmtdev->weight[i] > HIDMA_MAX_CHANNEL_WEIGHT) {
+ dev_err(&mgmtdev->pdev->dev,
+ "max value of weight can be %d.\n",
+ HIDMA_MAX_CHANNEL_WEIGHT);
+ return -EINVAL;
+ }
+
+ /* weight needs to be at least one */
+ if (mgmtdev->weight[i] == 0)
+ mgmtdev->weight[i] = 1;
+ }
+
+ pm_runtime_get_sync(&mgmtdev->pdev->dev);
+ val = readl(mgmtdev->virtaddr + HIDMA_MAX_BUS_REQ_LEN_OFFSET);
+ val &= ~(HIDMA_MAX_BUS_REQ_LEN_MASK << HIDMA_MAX_BUS_WR_REQ_BIT_POS);
+ val |= mgmtdev->max_write_request << HIDMA_MAX_BUS_WR_REQ_BIT_POS;
+ val &= ~HIDMA_MAX_BUS_REQ_LEN_MASK;
+ val |= mgmtdev->max_read_request;
+ writel(val, mgmtdev->virtaddr + HIDMA_MAX_BUS_REQ_LEN_OFFSET);
+
+ val = readl(mgmtdev->virtaddr + HIDMA_MAX_XACTIONS_OFFSET);
+ val &= ~(HIDMA_MAX_WR_XACTIONS_MASK << HIDMA_MAX_WR_XACTIONS_BIT_POS);
+ val |= mgmtdev->max_wr_xactions << HIDMA_MAX_WR_XACTIONS_BIT_POS;
+ val &= ~HIDMA_MAX_RD_XACTIONS_MASK;
+ val |= mgmtdev->max_rd_xactions;
+ writel(val, mgmtdev->virtaddr + HIDMA_MAX_XACTIONS_OFFSET);
+
+ mgmtdev->hw_version =
+ readl(mgmtdev->virtaddr + HIDMA_HW_VERSION_OFFSET);
+ mgmtdev->hw_version_major = (mgmtdev->hw_version >> 28) & 0xF;
+ mgmtdev->hw_version_minor = (mgmtdev->hw_version >> 16) & 0xF;
+
+ for (i = 0; i < mgmtdev->dma_channels; i++) {
+ u32 weight = mgmtdev->weight[i];
+ u32 priority = mgmtdev->priority[i];
+
+ val = readl(mgmtdev->virtaddr + HIDMA_QOS_N_OFFSET + (4 * i));
+ val &= ~(1 << HIDMA_PRIORITY_BIT_POS);
+ val |= (priority & 0x1) << HIDMA_PRIORITY_BIT_POS;
+ val &= ~(HIDMA_WEIGHT_MASK << HIDMA_WRR_BIT_POS);
+ val |= (weight & HIDMA_WEIGHT_MASK) << HIDMA_WRR_BIT_POS;
+ writel(val, mgmtdev->virtaddr + HIDMA_QOS_N_OFFSET + (4 * i));
+ }
+
+ val = readl(mgmtdev->virtaddr + HIDMA_CHRESET_TIMEOUT_OFFSET);
+ val &= ~HIDMA_CHRESET_TIMEOUT_MASK;
+ val |= mgmtdev->chreset_timeout_cycles & HIDMA_CHRESET_TIMEOUT_MASK;
+ writel(val, mgmtdev->virtaddr + HIDMA_CHRESET_TIMEOUT_OFFSET);
+
+ pm_runtime_mark_last_busy(&mgmtdev->pdev->dev);
+ pm_runtime_put_autosuspend(&mgmtdev->pdev->dev);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(hidma_mgmt_setup);
+
+static int hidma_mgmt_probe(struct platform_device *pdev)
+{
+ struct hidma_mgmt_dev *mgmtdev;
+ struct resource *res;
+ void __iomem *virtaddr;
+ int irq;
+ int rc;
+ u32 val;
+
+ pm_runtime_set_autosuspend_delay(&pdev->dev, HIDMA_AUTOSUSPEND_TIMEOUT);
+ pm_runtime_use_autosuspend(&pdev->dev);
+ pm_runtime_set_active(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
+ pm_runtime_get_sync(&pdev->dev);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ virtaddr = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(virtaddr)) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ rc = irq;
+ goto out;
+ }
+
+ mgmtdev = devm_kzalloc(&pdev->dev, sizeof(*mgmtdev), GFP_KERNEL);
+ if (!mgmtdev) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ mgmtdev->pdev = pdev;
+ mgmtdev->addrsize = resource_size(res);
+ mgmtdev->virtaddr = virtaddr;
+
+ rc = device_property_read_u32(&pdev->dev, "dma-channels",
+ &mgmtdev->dma_channels);
+ if (rc) {
+ dev_err(&pdev->dev, "number of channels missing\n");
+ goto out;
+ }
+
+ rc = device_property_read_u32(&pdev->dev,
+ "channel-reset-timeout-cycles",
+ &mgmtdev->chreset_timeout_cycles);
+ if (rc) {
+ dev_err(&pdev->dev, "channel reset timeout missing\n");
+ goto out;
+ }
+
+ rc = device_property_read_u32(&pdev->dev, "max-write-burst-bytes",
+ &mgmtdev->max_write_request);
+ if (rc) {
+ dev_err(&pdev->dev, "max-write-burst-bytes missing\n");
+ goto out;
+ }
+
+ if (max_write_request &&
+ (max_write_request != mgmtdev->max_write_request)) {
+ dev_info(&pdev->dev, "overriding max-write-burst-bytes: %d\n",
+ max_write_request);
+ mgmtdev->max_write_request = max_write_request;
+ } else
+ max_write_request = mgmtdev->max_write_request;
+
+ rc = device_property_read_u32(&pdev->dev, "max-read-burst-bytes",
+ &mgmtdev->max_read_request);
+ if (rc) {
+ dev_err(&pdev->dev, "max-read-burst-bytes missing\n");
+ goto out;
+ }
+ if (max_read_request &&
+ (max_read_request != mgmtdev->max_read_request)) {
+ dev_info(&pdev->dev, "overriding max-read-burst-bytes: %d\n",
+ max_read_request);
+ mgmtdev->max_read_request = max_read_request;
+ } else
+ max_read_request = mgmtdev->max_read_request;
+
+ rc = device_property_read_u32(&pdev->dev, "max-write-transactions",
+ &mgmtdev->max_wr_xactions);
+ if (rc) {
+ dev_err(&pdev->dev, "max-write-transactions missing\n");
+ goto out;
+ }
+ if (max_wr_xactions &&
+ (max_wr_xactions != mgmtdev->max_wr_xactions)) {
+ dev_info(&pdev->dev, "overriding max-write-transactions: %d\n",
+ max_wr_xactions);
+ mgmtdev->max_wr_xactions = max_wr_xactions;
+ } else
+ max_wr_xactions = mgmtdev->max_wr_xactions;
+
+ rc = device_property_read_u32(&pdev->dev, "max-read-transactions",
+ &mgmtdev->max_rd_xactions);
+ if (rc) {
+ dev_err(&pdev->dev, "max-read-transactions missing\n");
+ goto out;
+ }
+ if (max_rd_xactions &&
+ (max_rd_xactions != mgmtdev->max_rd_xactions)) {
+ dev_info(&pdev->dev, "overriding max-read-transactions: %d\n",
+ max_rd_xactions);
+ mgmtdev->max_rd_xactions = max_rd_xactions;
+ } else
+ max_rd_xactions = mgmtdev->max_rd_xactions;
+
+ mgmtdev->priority = devm_kcalloc(&pdev->dev,
+ mgmtdev->dma_channels,
+ sizeof(*mgmtdev->priority),
+ GFP_KERNEL);
+ if (!mgmtdev->priority) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ mgmtdev->weight = devm_kcalloc(&pdev->dev,
+ mgmtdev->dma_channels,
+ sizeof(*mgmtdev->weight), GFP_KERNEL);
+ if (!mgmtdev->weight) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = hidma_mgmt_setup(mgmtdev);
+ if (rc) {
+ dev_err(&pdev->dev, "setup failed\n");
+ goto out;
+ }
+
+ /* start the HW */
+ val = readl(mgmtdev->virtaddr + HIDMA_CFG_OFFSET);
+ val |= 1;
+ writel(val, mgmtdev->virtaddr + HIDMA_CFG_OFFSET);
+
+ rc = hidma_mgmt_init_sys(mgmtdev);
+ if (rc) {
+ dev_err(&pdev->dev, "sysfs setup failed\n");
+ goto out;
+ }
+
+ dev_info(&pdev->dev,
+ "HW rev: %d.%d @ %pa with %d physical channels\n",
+ mgmtdev->hw_version_major, mgmtdev->hw_version_minor,
+ &res->start, mgmtdev->dma_channels);
+
+ platform_set_drvdata(pdev, mgmtdev);
+ pm_runtime_mark_last_busy(&pdev->dev);
+ pm_runtime_put_autosuspend(&pdev->dev);
+ return 0;
+out:
+ pm_runtime_put_sync_suspend(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+ return rc;
+}
+
+#if IS_ENABLED(CONFIG_ACPI)
+static const struct acpi_device_id hidma_mgmt_acpi_ids[] = {
+ {"QCOM8060"},
+ {},
+};
+MODULE_DEVICE_TABLE(acpi, hidma_mgmt_acpi_ids);
+#endif
+
+static const struct of_device_id hidma_mgmt_match[] = {
+ {.compatible = "qcom,hidma-mgmt-1.0",},
+ {},
+};
+MODULE_DEVICE_TABLE(of, hidma_mgmt_match);
+
+static struct platform_driver hidma_mgmt_driver = {
+ .probe = hidma_mgmt_probe,
+ .driver = {
+ .name = "hidma-mgmt",
+ .of_match_table = hidma_mgmt_match,
+ .acpi_match_table = ACPI_PTR(hidma_mgmt_acpi_ids),
+ },
+};
+
+#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ)
+static int object_counter;
+
+static int __init hidma_mgmt_of_populate_channels(struct device_node *np)
+{
+ struct platform_device *pdev_parent = of_find_device_by_node(np);
+ struct platform_device_info pdevinfo;
+ struct device_node *child;
+ struct resource *res;
+ int ret = 0;
+
+ /* allocate a resource array */
+ res = kcalloc(3, sizeof(*res), GFP_KERNEL);
+ if (!res)
+ return -ENOMEM;
+
+ for_each_available_child_of_node(np, child) {
+ struct platform_device *new_pdev;
+
+ ret = of_address_to_resource(child, 0, &res[0]);
+ if (!ret)
+ goto out;
+
+ ret = of_address_to_resource(child, 1, &res[1]);
+ if (!ret)
+ goto out;
+
+ ret = of_irq_to_resource(child, 0, &res[2]);
+ if (ret <= 0)
+ goto out;
+
+ memset(&pdevinfo, 0, sizeof(pdevinfo));
+ pdevinfo.fwnode = &child->fwnode;
+ pdevinfo.parent = pdev_parent ? &pdev_parent->dev : NULL;
+ pdevinfo.name = child->name;
+ pdevinfo.id = object_counter++;
+ pdevinfo.res = res;
+ pdevinfo.num_res = 3;
+ pdevinfo.data = NULL;
+ pdevinfo.size_data = 0;
+ pdevinfo.dma_mask = DMA_BIT_MASK(64);
+ new_pdev = platform_device_register_full(&pdevinfo);
+ if (IS_ERR(new_pdev)) {
+ ret = PTR_ERR(new_pdev);
+ goto out;
+ }
+ new_pdev->dev.of_node = child;
+ of_dma_configure(&new_pdev->dev, child, true);
+ /*
+ * It is assumed that calling of_msi_configure is safe on
+ * platforms with or without MSI support.
+ */
+ of_msi_configure(&new_pdev->dev, child);
+ }
+
+ kfree(res);
+
+ return ret;
+
+out:
+ of_node_put(child);
+ kfree(res);
+
+ return ret;
+}
+#endif
+
+static int __init hidma_mgmt_init(void)
+{
+#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ)
+ struct device_node *child;
+
+ for_each_matching_node(child, hidma_mgmt_match) {
+ /* device tree based firmware here */
+ hidma_mgmt_of_populate_channels(child);
+ }
+#endif
+ /*
+ * We do not check for return value here, as it is assumed that
+ * platform_driver_register must not fail. The reason for this is that
+ * the (potential) hidma_mgmt_of_populate_channels calls above are not
+ * cleaned up if it does fail, and to do this work is quite
+ * complicated. In particular, various calls of of_address_to_resource,
+ * of_irq_to_resource, platform_device_register_full, of_dma_configure,
+ * and of_msi_configure which then call other functions and so on, must
+ * be cleaned up - this is not a trivial exercise.
+ *
+ * Currently, this module is not intended to be unloaded, and there is
+ * no module_exit function defined which does the needed cleanup. For
+ * this reason, we have to assume success here.
+ */
+ platform_driver_register(&hidma_mgmt_driver);
+
+ return 0;
+}
+module_init(hidma_mgmt_init);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/qcom/hidma_mgmt.h b/drivers/dma/qcom/hidma_mgmt.h
new file mode 100644
index 000000000..30e809598
--- /dev/null
+++ b/drivers/dma/qcom/hidma_mgmt.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Qualcomm Technologies HIDMA Management common header
+ *
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ */
+
+struct hidma_mgmt_dev {
+ u8 hw_version_major;
+ u8 hw_version_minor;
+
+ u32 max_wr_xactions;
+ u32 max_rd_xactions;
+ u32 max_write_request;
+ u32 max_read_request;
+ u32 dma_channels;
+ u32 chreset_timeout_cycles;
+ u32 hw_version;
+ u32 *priority;
+ u32 *weight;
+
+ /* Hardware device constants */
+ void __iomem *virtaddr;
+ resource_size_t addrsize;
+
+ struct kobject **chroots;
+ struct platform_device *pdev;
+};
+
+int hidma_mgmt_init_sys(struct hidma_mgmt_dev *dev);
+int hidma_mgmt_setup(struct hidma_mgmt_dev *mgmtdev);
diff --git a/drivers/dma/qcom/hidma_mgmt_sys.c b/drivers/dma/qcom/hidma_mgmt_sys.c
new file mode 100644
index 000000000..930eae0a6
--- /dev/null
+++ b/drivers/dma/qcom/hidma_mgmt_sys.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Qualcomm Technologies HIDMA Management SYS interface
+ *
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/sysfs.h>
+#include <linux/platform_device.h>
+
+#include "hidma_mgmt.h"
+
+struct hidma_chan_attr {
+ struct hidma_mgmt_dev *mdev;
+ int index;
+ struct kobj_attribute attr;
+};
+
+struct hidma_mgmt_fileinfo {
+ char *name;
+ int mode;
+ int (*get)(struct hidma_mgmt_dev *mdev);
+ int (*set)(struct hidma_mgmt_dev *mdev, u64 val);
+};
+
+#define IMPLEMENT_GETSET(name) \
+static int get_##name(struct hidma_mgmt_dev *mdev) \
+{ \
+ return mdev->name; \
+} \
+static int set_##name(struct hidma_mgmt_dev *mdev, u64 val) \
+{ \
+ u64 tmp; \
+ int rc; \
+ \
+ tmp = mdev->name; \
+ mdev->name = val; \
+ rc = hidma_mgmt_setup(mdev); \
+ if (rc) \
+ mdev->name = tmp; \
+ return rc; \
+}
+
+#define DECLARE_ATTRIBUTE(name, mode) \
+ {#name, mode, get_##name, set_##name}
+
+IMPLEMENT_GETSET(hw_version_major)
+IMPLEMENT_GETSET(hw_version_minor)
+IMPLEMENT_GETSET(max_wr_xactions)
+IMPLEMENT_GETSET(max_rd_xactions)
+IMPLEMENT_GETSET(max_write_request)
+IMPLEMENT_GETSET(max_read_request)
+IMPLEMENT_GETSET(dma_channels)
+IMPLEMENT_GETSET(chreset_timeout_cycles)
+
+static int set_priority(struct hidma_mgmt_dev *mdev, unsigned int i, u64 val)
+{
+ u64 tmp;
+ int rc;
+
+ if (i >= mdev->dma_channels)
+ return -EINVAL;
+
+ tmp = mdev->priority[i];
+ mdev->priority[i] = val;
+ rc = hidma_mgmt_setup(mdev);
+ if (rc)
+ mdev->priority[i] = tmp;
+ return rc;
+}
+
+static int set_weight(struct hidma_mgmt_dev *mdev, unsigned int i, u64 val)
+{
+ u64 tmp;
+ int rc;
+
+ if (i >= mdev->dma_channels)
+ return -EINVAL;
+
+ tmp = mdev->weight[i];
+ mdev->weight[i] = val;
+ rc = hidma_mgmt_setup(mdev);
+ if (rc)
+ mdev->weight[i] = tmp;
+ return rc;
+}
+
+static struct hidma_mgmt_fileinfo hidma_mgmt_files[] = {
+ DECLARE_ATTRIBUTE(hw_version_major, S_IRUGO),
+ DECLARE_ATTRIBUTE(hw_version_minor, S_IRUGO),
+ DECLARE_ATTRIBUTE(dma_channels, S_IRUGO),
+ DECLARE_ATTRIBUTE(chreset_timeout_cycles, S_IRUGO),
+ DECLARE_ATTRIBUTE(max_wr_xactions, S_IRUGO),
+ DECLARE_ATTRIBUTE(max_rd_xactions, S_IRUGO),
+ DECLARE_ATTRIBUTE(max_write_request, S_IRUGO),
+ DECLARE_ATTRIBUTE(max_read_request, S_IRUGO),
+};
+
+static ssize_t show_values(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hidma_mgmt_dev *mdev = dev_get_drvdata(dev);
+ unsigned int i;
+
+ buf[0] = 0;
+
+ for (i = 0; i < ARRAY_SIZE(hidma_mgmt_files); i++) {
+ if (strcmp(attr->attr.name, hidma_mgmt_files[i].name) == 0) {
+ sprintf(buf, "%d\n", hidma_mgmt_files[i].get(mdev));
+ break;
+ }
+ }
+ return strlen(buf);
+}
+
+static ssize_t set_values(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hidma_mgmt_dev *mdev = dev_get_drvdata(dev);
+ unsigned long tmp;
+ unsigned int i;
+ int rc;
+
+ rc = kstrtoul(buf, 0, &tmp);
+ if (rc)
+ return rc;
+
+ for (i = 0; i < ARRAY_SIZE(hidma_mgmt_files); i++) {
+ if (strcmp(attr->attr.name, hidma_mgmt_files[i].name) == 0) {
+ rc = hidma_mgmt_files[i].set(mdev, tmp);
+ if (rc)
+ return rc;
+
+ break;
+ }
+ }
+ return count;
+}
+
+static ssize_t show_values_channel(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct hidma_chan_attr *chattr;
+ struct hidma_mgmt_dev *mdev;
+
+ buf[0] = 0;
+ chattr = container_of(attr, struct hidma_chan_attr, attr);
+ mdev = chattr->mdev;
+ if (strcmp(attr->attr.name, "priority") == 0)
+ sprintf(buf, "%d\n", mdev->priority[chattr->index]);
+ else if (strcmp(attr->attr.name, "weight") == 0)
+ sprintf(buf, "%d\n", mdev->weight[chattr->index]);
+
+ return strlen(buf);
+}
+
+static ssize_t set_values_channel(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct hidma_chan_attr *chattr;
+ struct hidma_mgmt_dev *mdev;
+ unsigned long tmp;
+ int rc;
+
+ chattr = container_of(attr, struct hidma_chan_attr, attr);
+ mdev = chattr->mdev;
+
+ rc = kstrtoul(buf, 0, &tmp);
+ if (rc)
+ return rc;
+
+ if (strcmp(attr->attr.name, "priority") == 0) {
+ rc = set_priority(mdev, chattr->index, tmp);
+ if (rc)
+ return rc;
+ } else if (strcmp(attr->attr.name, "weight") == 0) {
+ rc = set_weight(mdev, chattr->index, tmp);
+ if (rc)
+ return rc;
+ }
+ return count;
+}
+
+static int create_sysfs_entry(struct hidma_mgmt_dev *dev, char *name, int mode)
+{
+ struct device_attribute *attrs;
+ char *name_copy;
+
+ attrs = devm_kmalloc(&dev->pdev->dev,
+ sizeof(struct device_attribute), GFP_KERNEL);
+ if (!attrs)
+ return -ENOMEM;
+
+ name_copy = devm_kstrdup(&dev->pdev->dev, name, GFP_KERNEL);
+ if (!name_copy)
+ return -ENOMEM;
+
+ attrs->attr.name = name_copy;
+ attrs->attr.mode = mode;
+ attrs->show = show_values;
+ attrs->store = set_values;
+ sysfs_attr_init(&attrs->attr);
+
+ return device_create_file(&dev->pdev->dev, attrs);
+}
+
+static int create_sysfs_entry_channel(struct hidma_mgmt_dev *mdev, char *name,
+ int mode, int index,
+ struct kobject *parent)
+{
+ struct hidma_chan_attr *chattr;
+ char *name_copy;
+
+ chattr = devm_kmalloc(&mdev->pdev->dev, sizeof(*chattr), GFP_KERNEL);
+ if (!chattr)
+ return -ENOMEM;
+
+ name_copy = devm_kstrdup(&mdev->pdev->dev, name, GFP_KERNEL);
+ if (!name_copy)
+ return -ENOMEM;
+
+ chattr->mdev = mdev;
+ chattr->index = index;
+ chattr->attr.attr.name = name_copy;
+ chattr->attr.attr.mode = mode;
+ chattr->attr.show = show_values_channel;
+ chattr->attr.store = set_values_channel;
+ sysfs_attr_init(&chattr->attr.attr);
+
+ return sysfs_create_file(parent, &chattr->attr.attr);
+}
+
+int hidma_mgmt_init_sys(struct hidma_mgmt_dev *mdev)
+{
+ unsigned int i;
+ int rc;
+ int required;
+ struct kobject *chanops;
+
+ required = sizeof(*mdev->chroots) * mdev->dma_channels;
+ mdev->chroots = devm_kmalloc(&mdev->pdev->dev, required, GFP_KERNEL);
+ if (!mdev->chroots)
+ return -ENOMEM;
+
+ chanops = kobject_create_and_add("chanops", &mdev->pdev->dev.kobj);
+ if (!chanops)
+ return -ENOMEM;
+
+ /* create each channel directory here */
+ for (i = 0; i < mdev->dma_channels; i++) {
+ char name[20];
+
+ snprintf(name, sizeof(name), "chan%d", i);
+ mdev->chroots[i] = kobject_create_and_add(name, chanops);
+ if (!mdev->chroots[i])
+ return -ENOMEM;
+ }
+
+ /* populate common parameters */
+ for (i = 0; i < ARRAY_SIZE(hidma_mgmt_files); i++) {
+ rc = create_sysfs_entry(mdev, hidma_mgmt_files[i].name,
+ hidma_mgmt_files[i].mode);
+ if (rc)
+ return rc;
+ }
+
+ /* populate parameters that are per channel */
+ for (i = 0; i < mdev->dma_channels; i++) {
+ rc = create_sysfs_entry_channel(mdev, "priority",
+ (S_IRUGO | S_IWUGO), i,
+ mdev->chroots[i]);
+ if (rc)
+ return rc;
+
+ rc = create_sysfs_entry_channel(mdev, "weight",
+ (S_IRUGO | S_IWUGO), i,
+ mdev->chroots[i]);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(hidma_mgmt_init_sys);
diff --git a/drivers/dma/qcom/qcom_adm.c b/drivers/dma/qcom/qcom_adm.c
new file mode 100644
index 000000000..d56caf168
--- /dev/null
+++ b/drivers/dma/qcom/qcom_adm.c
@@ -0,0 +1,953 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma/qcom_adm.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+/* ADM registers - calculated from channel number and security domain */
+#define ADM_CHAN_MULTI 0x4
+#define ADM_CI_MULTI 0x4
+#define ADM_CRCI_MULTI 0x4
+#define ADM_EE_MULTI 0x800
+#define ADM_CHAN_OFFS(chan) (ADM_CHAN_MULTI * (chan))
+#define ADM_EE_OFFS(ee) (ADM_EE_MULTI * (ee))
+#define ADM_CHAN_EE_OFFS(chan, ee) (ADM_CHAN_OFFS(chan) + ADM_EE_OFFS(ee))
+#define ADM_CHAN_OFFS(chan) (ADM_CHAN_MULTI * (chan))
+#define ADM_CI_OFFS(ci) (ADM_CHAN_OFF(ci))
+#define ADM_CH_CMD_PTR(chan, ee) (ADM_CHAN_EE_OFFS(chan, ee))
+#define ADM_CH_RSLT(chan, ee) (0x40 + ADM_CHAN_EE_OFFS(chan, ee))
+#define ADM_CH_FLUSH_STATE0(chan, ee) (0x80 + ADM_CHAN_EE_OFFS(chan, ee))
+#define ADM_CH_STATUS_SD(chan, ee) (0x200 + ADM_CHAN_EE_OFFS(chan, ee))
+#define ADM_CH_CONF(chan) (0x240 + ADM_CHAN_OFFS(chan))
+#define ADM_CH_RSLT_CONF(chan, ee) (0x300 + ADM_CHAN_EE_OFFS(chan, ee))
+#define ADM_SEC_DOMAIN_IRQ_STATUS(ee) (0x380 + ADM_EE_OFFS(ee))
+#define ADM_CI_CONF(ci) (0x390 + (ci) * ADM_CI_MULTI)
+#define ADM_GP_CTL 0x3d8
+#define ADM_CRCI_CTL(crci, ee) (0x400 + (crci) * ADM_CRCI_MULTI + \
+ ADM_EE_OFFS(ee))
+
+/* channel status */
+#define ADM_CH_STATUS_VALID BIT(1)
+
+/* channel result */
+#define ADM_CH_RSLT_VALID BIT(31)
+#define ADM_CH_RSLT_ERR BIT(3)
+#define ADM_CH_RSLT_FLUSH BIT(2)
+#define ADM_CH_RSLT_TPD BIT(1)
+
+/* channel conf */
+#define ADM_CH_CONF_SHADOW_EN BIT(12)
+#define ADM_CH_CONF_MPU_DISABLE BIT(11)
+#define ADM_CH_CONF_PERM_MPU_CONF BIT(9)
+#define ADM_CH_CONF_FORCE_RSLT_EN BIT(7)
+#define ADM_CH_CONF_SEC_DOMAIN(ee) ((((ee) & 0x3) << 4) | (((ee) & 0x4) << 11))
+
+/* channel result conf */
+#define ADM_CH_RSLT_CONF_FLUSH_EN BIT(1)
+#define ADM_CH_RSLT_CONF_IRQ_EN BIT(0)
+
+/* CRCI CTL */
+#define ADM_CRCI_CTL_MUX_SEL BIT(18)
+#define ADM_CRCI_CTL_RST BIT(17)
+
+/* CI configuration */
+#define ADM_CI_RANGE_END(x) ((x) << 24)
+#define ADM_CI_RANGE_START(x) ((x) << 16)
+#define ADM_CI_BURST_4_WORDS BIT(2)
+#define ADM_CI_BURST_8_WORDS BIT(3)
+
+/* GP CTL */
+#define ADM_GP_CTL_LP_EN BIT(12)
+#define ADM_GP_CTL_LP_CNT(x) ((x) << 8)
+
+/* Command pointer list entry */
+#define ADM_CPLE_LP BIT(31)
+#define ADM_CPLE_CMD_PTR_LIST BIT(29)
+
+/* Command list entry */
+#define ADM_CMD_LC BIT(31)
+#define ADM_CMD_DST_CRCI(n) (((n) & 0xf) << 7)
+#define ADM_CMD_SRC_CRCI(n) (((n) & 0xf) << 3)
+
+#define ADM_CMD_TYPE_SINGLE 0x0
+#define ADM_CMD_TYPE_BOX 0x3
+
+#define ADM_CRCI_MUX_SEL BIT(4)
+#define ADM_DESC_ALIGN 8
+#define ADM_MAX_XFER (SZ_64K - 1)
+#define ADM_MAX_ROWS (SZ_64K - 1)
+#define ADM_MAX_CHANNELS 16
+
+struct adm_desc_hw_box {
+ u32 cmd;
+ u32 src_addr;
+ u32 dst_addr;
+ u32 row_len;
+ u32 num_rows;
+ u32 row_offset;
+};
+
+struct adm_desc_hw_single {
+ u32 cmd;
+ u32 src_addr;
+ u32 dst_addr;
+ u32 len;
+};
+
+struct adm_async_desc {
+ struct virt_dma_desc vd;
+ struct adm_device *adev;
+
+ size_t length;
+ enum dma_transfer_direction dir;
+ dma_addr_t dma_addr;
+ size_t dma_len;
+
+ void *cpl;
+ dma_addr_t cp_addr;
+ u32 crci;
+ u32 mux;
+ u32 blk_size;
+};
+
+struct adm_chan {
+ struct virt_dma_chan vc;
+ struct adm_device *adev;
+
+ /* parsed from DT */
+ u32 id; /* channel id */
+
+ struct adm_async_desc *curr_txd;
+ struct dma_slave_config slave;
+ u32 crci;
+ u32 mux;
+ struct list_head node;
+
+ int error;
+ int initialized;
+};
+
+static inline struct adm_chan *to_adm_chan(struct dma_chan *common)
+{
+ return container_of(common, struct adm_chan, vc.chan);
+}
+
+struct adm_device {
+ void __iomem *regs;
+ struct device *dev;
+ struct dma_device common;
+ struct device_dma_parameters dma_parms;
+ struct adm_chan *channels;
+
+ u32 ee;
+
+ struct clk *core_clk;
+ struct clk *iface_clk;
+
+ struct reset_control *clk_reset;
+ struct reset_control *c0_reset;
+ struct reset_control *c1_reset;
+ struct reset_control *c2_reset;
+ int irq;
+};
+
+/**
+ * adm_free_chan - Frees dma resources associated with the specific channel
+ *
+ * @chan: dma channel
+ *
+ * Free all allocated descriptors associated with this channel
+ */
+static void adm_free_chan(struct dma_chan *chan)
+{
+ /* free all queued descriptors */
+ vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+/**
+ * adm_get_blksize - Get block size from burst value
+ *
+ * @burst: Burst size of transaction
+ */
+static int adm_get_blksize(unsigned int burst)
+{
+ int ret;
+
+ switch (burst) {
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ ret = ffs(burst >> 4) - 1;
+ break;
+ case 192:
+ ret = 4;
+ break;
+ case 256:
+ ret = 5;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+/**
+ * adm_process_fc_descriptors - Process descriptors for flow controlled xfers
+ *
+ * @achan: ADM channel
+ * @desc: Descriptor memory pointer
+ * @sg: Scatterlist entry
+ * @crci: CRCI value
+ * @burst: Burst size of transaction
+ * @direction: DMA transfer direction
+ */
+static void *adm_process_fc_descriptors(struct adm_chan *achan, void *desc,
+ struct scatterlist *sg, u32 crci,
+ u32 burst,
+ enum dma_transfer_direction direction)
+{
+ struct adm_desc_hw_box *box_desc = NULL;
+ struct adm_desc_hw_single *single_desc;
+ u32 remainder = sg_dma_len(sg);
+ u32 rows, row_offset, crci_cmd;
+ u32 mem_addr = sg_dma_address(sg);
+ u32 *incr_addr = &mem_addr;
+ u32 *src, *dst;
+
+ if (direction == DMA_DEV_TO_MEM) {
+ crci_cmd = ADM_CMD_SRC_CRCI(crci);
+ row_offset = burst;
+ src = &achan->slave.src_addr;
+ dst = &mem_addr;
+ } else {
+ crci_cmd = ADM_CMD_DST_CRCI(crci);
+ row_offset = burst << 16;
+ src = &mem_addr;
+ dst = &achan->slave.dst_addr;
+ }
+
+ while (remainder >= burst) {
+ box_desc = desc;
+ box_desc->cmd = ADM_CMD_TYPE_BOX | crci_cmd;
+ box_desc->row_offset = row_offset;
+ box_desc->src_addr = *src;
+ box_desc->dst_addr = *dst;
+
+ rows = remainder / burst;
+ rows = min_t(u32, rows, ADM_MAX_ROWS);
+ box_desc->num_rows = rows << 16 | rows;
+ box_desc->row_len = burst << 16 | burst;
+
+ *incr_addr += burst * rows;
+ remainder -= burst * rows;
+ desc += sizeof(*box_desc);
+ }
+
+ /* if leftover bytes, do one single descriptor */
+ if (remainder) {
+ single_desc = desc;
+ single_desc->cmd = ADM_CMD_TYPE_SINGLE | crci_cmd;
+ single_desc->len = remainder;
+ single_desc->src_addr = *src;
+ single_desc->dst_addr = *dst;
+ desc += sizeof(*single_desc);
+
+ if (sg_is_last(sg))
+ single_desc->cmd |= ADM_CMD_LC;
+ } else {
+ if (box_desc && sg_is_last(sg))
+ box_desc->cmd |= ADM_CMD_LC;
+ }
+
+ return desc;
+}
+
+/**
+ * adm_process_non_fc_descriptors - Process descriptors for non-fc xfers
+ *
+ * @achan: ADM channel
+ * @desc: Descriptor memory pointer
+ * @sg: Scatterlist entry
+ * @direction: DMA transfer direction
+ */
+static void *adm_process_non_fc_descriptors(struct adm_chan *achan, void *desc,
+ struct scatterlist *sg,
+ enum dma_transfer_direction direction)
+{
+ struct adm_desc_hw_single *single_desc;
+ u32 remainder = sg_dma_len(sg);
+ u32 mem_addr = sg_dma_address(sg);
+ u32 *incr_addr = &mem_addr;
+ u32 *src, *dst;
+
+ if (direction == DMA_DEV_TO_MEM) {
+ src = &achan->slave.src_addr;
+ dst = &mem_addr;
+ } else {
+ src = &mem_addr;
+ dst = &achan->slave.dst_addr;
+ }
+
+ do {
+ single_desc = desc;
+ single_desc->cmd = ADM_CMD_TYPE_SINGLE;
+ single_desc->src_addr = *src;
+ single_desc->dst_addr = *dst;
+ single_desc->len = (remainder > ADM_MAX_XFER) ?
+ ADM_MAX_XFER : remainder;
+
+ remainder -= single_desc->len;
+ *incr_addr += single_desc->len;
+ desc += sizeof(*single_desc);
+ } while (remainder);
+
+ /* set last command if this is the end of the whole transaction */
+ if (sg_is_last(sg))
+ single_desc->cmd |= ADM_CMD_LC;
+
+ return desc;
+}
+
+/**
+ * adm_prep_slave_sg - Prep slave sg transaction
+ *
+ * @chan: dma channel
+ * @sgl: scatter gather list
+ * @sg_len: length of sg
+ * @direction: DMA transfer direction
+ * @flags: DMA flags
+ * @context: transfer context (unused)
+ */
+static struct dma_async_tx_descriptor *adm_prep_slave_sg(struct dma_chan *chan,
+ struct scatterlist *sgl,
+ unsigned int sg_len,
+ enum dma_transfer_direction direction,
+ unsigned long flags,
+ void *context)
+{
+ struct adm_chan *achan = to_adm_chan(chan);
+ struct adm_device *adev = achan->adev;
+ struct adm_async_desc *async_desc;
+ struct scatterlist *sg;
+ dma_addr_t cple_addr;
+ u32 i, burst;
+ u32 single_count = 0, box_count = 0, crci = 0;
+ void *desc;
+ u32 *cple;
+ int blk_size = 0;
+
+ if (!is_slave_direction(direction)) {
+ dev_err(adev->dev, "invalid dma direction\n");
+ return NULL;
+ }
+
+ /*
+ * get burst value from slave configuration
+ */
+ burst = (direction == DMA_MEM_TO_DEV) ?
+ achan->slave.dst_maxburst :
+ achan->slave.src_maxburst;
+
+ /* if using flow control, validate burst and crci values */
+ if (achan->slave.device_fc) {
+ blk_size = adm_get_blksize(burst);
+ if (blk_size < 0) {
+ dev_err(adev->dev, "invalid burst value: %d\n",
+ burst);
+ return NULL;
+ }
+
+ crci = achan->crci & 0xf;
+ if (!crci || achan->crci > 0x1f) {
+ dev_err(adev->dev, "invalid crci value\n");
+ return NULL;
+ }
+ }
+
+ /* iterate through sgs and compute allocation size of structures */
+ for_each_sg(sgl, sg, sg_len, i) {
+ if (achan->slave.device_fc) {
+ box_count += DIV_ROUND_UP(sg_dma_len(sg) / burst,
+ ADM_MAX_ROWS);
+ if (sg_dma_len(sg) % burst)
+ single_count++;
+ } else {
+ single_count += DIV_ROUND_UP(sg_dma_len(sg),
+ ADM_MAX_XFER);
+ }
+ }
+
+ async_desc = kzalloc(sizeof(*async_desc), GFP_NOWAIT);
+ if (!async_desc) {
+ dev_err(adev->dev, "not enough memory for async_desc struct\n");
+ return NULL;
+ }
+
+ async_desc->mux = achan->mux ? ADM_CRCI_CTL_MUX_SEL : 0;
+ async_desc->crci = crci;
+ async_desc->blk_size = blk_size;
+ async_desc->dma_len = single_count * sizeof(struct adm_desc_hw_single) +
+ box_count * sizeof(struct adm_desc_hw_box) +
+ sizeof(*cple) + 2 * ADM_DESC_ALIGN;
+
+ async_desc->cpl = kzalloc(async_desc->dma_len, GFP_NOWAIT);
+ if (!async_desc->cpl) {
+ dev_err(adev->dev, "not enough memory for cpl struct\n");
+ goto free;
+ }
+
+ async_desc->adev = adev;
+
+ /* both command list entry and descriptors must be 8 byte aligned */
+ cple = PTR_ALIGN(async_desc->cpl, ADM_DESC_ALIGN);
+ desc = PTR_ALIGN(cple + 1, ADM_DESC_ALIGN);
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ async_desc->length += sg_dma_len(sg);
+
+ if (achan->slave.device_fc)
+ desc = adm_process_fc_descriptors(achan, desc, sg, crci,
+ burst, direction);
+ else
+ desc = adm_process_non_fc_descriptors(achan, desc, sg,
+ direction);
+ }
+
+ async_desc->dma_addr = dma_map_single(adev->dev, async_desc->cpl,
+ async_desc->dma_len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(adev->dev, async_desc->dma_addr)) {
+ dev_err(adev->dev, "dma mapping error for cpl\n");
+ goto free;
+ }
+
+ cple_addr = async_desc->dma_addr + ((void *)cple - async_desc->cpl);
+
+ /* init cmd list */
+ dma_sync_single_for_cpu(adev->dev, cple_addr, sizeof(*cple),
+ DMA_TO_DEVICE);
+ *cple = ADM_CPLE_LP;
+ *cple |= (async_desc->dma_addr + ADM_DESC_ALIGN) >> 3;
+ dma_sync_single_for_device(adev->dev, cple_addr, sizeof(*cple),
+ DMA_TO_DEVICE);
+
+ return vchan_tx_prep(&achan->vc, &async_desc->vd, flags);
+
+free:
+ kfree(async_desc);
+ return NULL;
+}
+
+/**
+ * adm_terminate_all - terminate all transactions on a channel
+ * @chan: dma channel
+ *
+ * Dequeues and frees all transactions, aborts current transaction
+ * No callbacks are done
+ *
+ */
+static int adm_terminate_all(struct dma_chan *chan)
+{
+ struct adm_chan *achan = to_adm_chan(chan);
+ struct adm_device *adev = achan->adev;
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&achan->vc.lock, flags);
+ vchan_get_all_descriptors(&achan->vc, &head);
+
+ /* send flush command to terminate current transaction */
+ writel_relaxed(0x0,
+ adev->regs + ADM_CH_FLUSH_STATE0(achan->id, adev->ee));
+
+ spin_unlock_irqrestore(&achan->vc.lock, flags);
+
+ vchan_dma_desc_free_list(&achan->vc, &head);
+
+ return 0;
+}
+
+static int adm_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg)
+{
+ struct adm_chan *achan = to_adm_chan(chan);
+ struct qcom_adm_peripheral_config *config = cfg->peripheral_config;
+ unsigned long flag;
+
+ spin_lock_irqsave(&achan->vc.lock, flag);
+ memcpy(&achan->slave, cfg, sizeof(struct dma_slave_config));
+ if (cfg->peripheral_size == sizeof(*config))
+ achan->crci = config->crci;
+ spin_unlock_irqrestore(&achan->vc.lock, flag);
+
+ return 0;
+}
+
+/**
+ * adm_start_dma - start next transaction
+ * @achan: ADM dma channel
+ */
+static void adm_start_dma(struct adm_chan *achan)
+{
+ struct virt_dma_desc *vd = vchan_next_desc(&achan->vc);
+ struct adm_device *adev = achan->adev;
+ struct adm_async_desc *async_desc;
+
+ lockdep_assert_held(&achan->vc.lock);
+
+ if (!vd)
+ return;
+
+ list_del(&vd->node);
+
+ /* write next command list out to the CMD FIFO */
+ async_desc = container_of(vd, struct adm_async_desc, vd);
+ achan->curr_txd = async_desc;
+
+ /* reset channel error */
+ achan->error = 0;
+
+ if (!achan->initialized) {
+ /* enable interrupts */
+ writel(ADM_CH_CONF_SHADOW_EN |
+ ADM_CH_CONF_PERM_MPU_CONF |
+ ADM_CH_CONF_MPU_DISABLE |
+ ADM_CH_CONF_SEC_DOMAIN(adev->ee),
+ adev->regs + ADM_CH_CONF(achan->id));
+
+ writel(ADM_CH_RSLT_CONF_IRQ_EN | ADM_CH_RSLT_CONF_FLUSH_EN,
+ adev->regs + ADM_CH_RSLT_CONF(achan->id, adev->ee));
+
+ achan->initialized = 1;
+ }
+
+ /* set the crci block size if this transaction requires CRCI */
+ if (async_desc->crci) {
+ writel(async_desc->mux | async_desc->blk_size,
+ adev->regs + ADM_CRCI_CTL(async_desc->crci, adev->ee));
+ }
+
+ /* make sure IRQ enable doesn't get reordered */
+ wmb();
+
+ /* write next command list out to the CMD FIFO */
+ writel(ALIGN(async_desc->dma_addr, ADM_DESC_ALIGN) >> 3,
+ adev->regs + ADM_CH_CMD_PTR(achan->id, adev->ee));
+}
+
+/**
+ * adm_dma_irq - irq handler for ADM controller
+ * @irq: IRQ of interrupt
+ * @data: callback data
+ *
+ * IRQ handler for the bam controller
+ */
+static irqreturn_t adm_dma_irq(int irq, void *data)
+{
+ struct adm_device *adev = data;
+ u32 srcs, i;
+ struct adm_async_desc *async_desc;
+ unsigned long flags;
+
+ srcs = readl_relaxed(adev->regs +
+ ADM_SEC_DOMAIN_IRQ_STATUS(adev->ee));
+
+ for (i = 0; i < ADM_MAX_CHANNELS; i++) {
+ struct adm_chan *achan = &adev->channels[i];
+ u32 status, result;
+
+ if (srcs & BIT(i)) {
+ status = readl_relaxed(adev->regs +
+ ADM_CH_STATUS_SD(i, adev->ee));
+
+ /* if no result present, skip */
+ if (!(status & ADM_CH_STATUS_VALID))
+ continue;
+
+ result = readl_relaxed(adev->regs +
+ ADM_CH_RSLT(i, adev->ee));
+
+ /* no valid results, skip */
+ if (!(result & ADM_CH_RSLT_VALID))
+ continue;
+
+ /* flag error if transaction was flushed or failed */
+ if (result & (ADM_CH_RSLT_ERR | ADM_CH_RSLT_FLUSH))
+ achan->error = 1;
+
+ spin_lock_irqsave(&achan->vc.lock, flags);
+ async_desc = achan->curr_txd;
+
+ achan->curr_txd = NULL;
+
+ if (async_desc) {
+ vchan_cookie_complete(&async_desc->vd);
+
+ /* kick off next DMA */
+ adm_start_dma(achan);
+ }
+
+ spin_unlock_irqrestore(&achan->vc.lock, flags);
+ }
+ }
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * adm_tx_status - returns status of transaction
+ * @chan: dma channel
+ * @cookie: transaction cookie
+ * @txstate: DMA transaction state
+ *
+ * Return status of dma transaction
+ */
+static enum dma_status adm_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct adm_chan *achan = to_adm_chan(chan);
+ struct virt_dma_desc *vd;
+ enum dma_status ret;
+ unsigned long flags;
+ size_t residue = 0;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE || !txstate)
+ return ret;
+
+ spin_lock_irqsave(&achan->vc.lock, flags);
+
+ vd = vchan_find_desc(&achan->vc, cookie);
+ if (vd)
+ residue = container_of(vd, struct adm_async_desc, vd)->length;
+
+ spin_unlock_irqrestore(&achan->vc.lock, flags);
+
+ /*
+ * residue is either the full length if it is in the issued list, or 0
+ * if it is in progress. We have no reliable way of determining
+ * anything inbetween
+ */
+ dma_set_residue(txstate, residue);
+
+ if (achan->error)
+ return DMA_ERROR;
+
+ return ret;
+}
+
+/**
+ * adm_issue_pending - starts pending transactions
+ * @chan: dma channel
+ *
+ * Issues all pending transactions and starts DMA
+ */
+static void adm_issue_pending(struct dma_chan *chan)
+{
+ struct adm_chan *achan = to_adm_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&achan->vc.lock, flags);
+
+ if (vchan_issue_pending(&achan->vc) && !achan->curr_txd)
+ adm_start_dma(achan);
+ spin_unlock_irqrestore(&achan->vc.lock, flags);
+}
+
+/**
+ * adm_dma_free_desc - free descriptor memory
+ * @vd: virtual descriptor
+ *
+ */
+static void adm_dma_free_desc(struct virt_dma_desc *vd)
+{
+ struct adm_async_desc *async_desc = container_of(vd,
+ struct adm_async_desc, vd);
+
+ dma_unmap_single(async_desc->adev->dev, async_desc->dma_addr,
+ async_desc->dma_len, DMA_TO_DEVICE);
+ kfree(async_desc->cpl);
+ kfree(async_desc);
+}
+
+static void adm_channel_init(struct adm_device *adev, struct adm_chan *achan,
+ u32 index)
+{
+ achan->id = index;
+ achan->adev = adev;
+
+ vchan_init(&achan->vc, &adev->common);
+ achan->vc.desc_free = adm_dma_free_desc;
+}
+
+/**
+ * adm_dma_xlate
+ * @dma_spec: pointer to DMA specifier as found in the device tree
+ * @ofdma: pointer to DMA controller data
+ *
+ * This can use either 1-cell or 2-cell formats, the first cell
+ * identifies the slave device, while the optional second cell
+ * contains the crci value.
+ *
+ * Returns pointer to appropriate dma channel on success or NULL on error.
+ */
+static struct dma_chan *adm_dma_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct dma_device *dev = ofdma->of_dma_data;
+ struct dma_chan *chan, *candidate = NULL;
+ struct adm_chan *achan;
+
+ if (!dev || dma_spec->args_count > 2)
+ return NULL;
+
+ list_for_each_entry(chan, &dev->channels, device_node)
+ if (chan->chan_id == dma_spec->args[0]) {
+ candidate = chan;
+ break;
+ }
+
+ if (!candidate)
+ return NULL;
+
+ achan = to_adm_chan(candidate);
+ if (dma_spec->args_count == 2)
+ achan->crci = dma_spec->args[1];
+ else
+ achan->crci = 0;
+
+ return dma_get_slave_channel(candidate);
+}
+
+static int adm_dma_probe(struct platform_device *pdev)
+{
+ struct adm_device *adev;
+ int ret;
+ u32 i;
+
+ adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL);
+ if (!adev)
+ return -ENOMEM;
+
+ adev->dev = &pdev->dev;
+
+ adev->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(adev->regs))
+ return PTR_ERR(adev->regs);
+
+ adev->irq = platform_get_irq(pdev, 0);
+ if (adev->irq < 0)
+ return adev->irq;
+
+ ret = of_property_read_u32(pdev->dev.of_node, "qcom,ee", &adev->ee);
+ if (ret) {
+ dev_err(adev->dev, "Execution environment unspecified\n");
+ return ret;
+ }
+
+ adev->core_clk = devm_clk_get(adev->dev, "core");
+ if (IS_ERR(adev->core_clk))
+ return PTR_ERR(adev->core_clk);
+
+ adev->iface_clk = devm_clk_get(adev->dev, "iface");
+ if (IS_ERR(adev->iface_clk))
+ return PTR_ERR(adev->iface_clk);
+
+ adev->clk_reset = devm_reset_control_get_exclusive(&pdev->dev, "clk");
+ if (IS_ERR(adev->clk_reset)) {
+ dev_err(adev->dev, "failed to get ADM0 reset\n");
+ return PTR_ERR(adev->clk_reset);
+ }
+
+ adev->c0_reset = devm_reset_control_get_exclusive(&pdev->dev, "c0");
+ if (IS_ERR(adev->c0_reset)) {
+ dev_err(adev->dev, "failed to get ADM0 C0 reset\n");
+ return PTR_ERR(adev->c0_reset);
+ }
+
+ adev->c1_reset = devm_reset_control_get_exclusive(&pdev->dev, "c1");
+ if (IS_ERR(adev->c1_reset)) {
+ dev_err(adev->dev, "failed to get ADM0 C1 reset\n");
+ return PTR_ERR(adev->c1_reset);
+ }
+
+ adev->c2_reset = devm_reset_control_get_exclusive(&pdev->dev, "c2");
+ if (IS_ERR(adev->c2_reset)) {
+ dev_err(adev->dev, "failed to get ADM0 C2 reset\n");
+ return PTR_ERR(adev->c2_reset);
+ }
+
+ ret = clk_prepare_enable(adev->core_clk);
+ if (ret) {
+ dev_err(adev->dev, "failed to prepare/enable core clock\n");
+ return ret;
+ }
+
+ ret = clk_prepare_enable(adev->iface_clk);
+ if (ret) {
+ dev_err(adev->dev, "failed to prepare/enable iface clock\n");
+ goto err_disable_core_clk;
+ }
+
+ reset_control_assert(adev->clk_reset);
+ reset_control_assert(adev->c0_reset);
+ reset_control_assert(adev->c1_reset);
+ reset_control_assert(adev->c2_reset);
+
+ udelay(2);
+
+ reset_control_deassert(adev->clk_reset);
+ reset_control_deassert(adev->c0_reset);
+ reset_control_deassert(adev->c1_reset);
+ reset_control_deassert(adev->c2_reset);
+
+ adev->channels = devm_kcalloc(adev->dev, ADM_MAX_CHANNELS,
+ sizeof(*adev->channels), GFP_KERNEL);
+
+ if (!adev->channels) {
+ ret = -ENOMEM;
+ goto err_disable_clks;
+ }
+
+ /* allocate and initialize channels */
+ INIT_LIST_HEAD(&adev->common.channels);
+
+ for (i = 0; i < ADM_MAX_CHANNELS; i++)
+ adm_channel_init(adev, &adev->channels[i], i);
+
+ /* reset CRCIs */
+ for (i = 0; i < 16; i++)
+ writel(ADM_CRCI_CTL_RST, adev->regs +
+ ADM_CRCI_CTL(i, adev->ee));
+
+ /* configure client interfaces */
+ writel(ADM_CI_RANGE_START(0x40) | ADM_CI_RANGE_END(0xb0) |
+ ADM_CI_BURST_8_WORDS, adev->regs + ADM_CI_CONF(0));
+ writel(ADM_CI_RANGE_START(0x2a) | ADM_CI_RANGE_END(0x2c) |
+ ADM_CI_BURST_8_WORDS, adev->regs + ADM_CI_CONF(1));
+ writel(ADM_CI_RANGE_START(0x12) | ADM_CI_RANGE_END(0x28) |
+ ADM_CI_BURST_8_WORDS, adev->regs + ADM_CI_CONF(2));
+ writel(ADM_GP_CTL_LP_EN | ADM_GP_CTL_LP_CNT(0xf),
+ adev->regs + ADM_GP_CTL);
+
+ ret = devm_request_irq(adev->dev, adev->irq, adm_dma_irq,
+ 0, "adm_dma", adev);
+ if (ret)
+ goto err_disable_clks;
+
+ platform_set_drvdata(pdev, adev);
+
+ adev->common.dev = adev->dev;
+ adev->common.dev->dma_parms = &adev->dma_parms;
+
+ /* set capabilities */
+ dma_cap_zero(adev->common.cap_mask);
+ dma_cap_set(DMA_SLAVE, adev->common.cap_mask);
+ dma_cap_set(DMA_PRIVATE, adev->common.cap_mask);
+
+ /* initialize dmaengine apis */
+ adev->common.directions = BIT(DMA_DEV_TO_MEM | DMA_MEM_TO_DEV);
+ adev->common.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+ adev->common.src_addr_widths = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ adev->common.dst_addr_widths = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ adev->common.device_free_chan_resources = adm_free_chan;
+ adev->common.device_prep_slave_sg = adm_prep_slave_sg;
+ adev->common.device_issue_pending = adm_issue_pending;
+ adev->common.device_tx_status = adm_tx_status;
+ adev->common.device_terminate_all = adm_terminate_all;
+ adev->common.device_config = adm_slave_config;
+
+ ret = dma_async_device_register(&adev->common);
+ if (ret) {
+ dev_err(adev->dev, "failed to register dma async device\n");
+ goto err_disable_clks;
+ }
+
+ ret = of_dma_controller_register(pdev->dev.of_node, adm_dma_xlate,
+ &adev->common);
+ if (ret)
+ goto err_unregister_dma;
+
+ return 0;
+
+err_unregister_dma:
+ dma_async_device_unregister(&adev->common);
+err_disable_clks:
+ clk_disable_unprepare(adev->iface_clk);
+err_disable_core_clk:
+ clk_disable_unprepare(adev->core_clk);
+
+ return ret;
+}
+
+static int adm_dma_remove(struct platform_device *pdev)
+{
+ struct adm_device *adev = platform_get_drvdata(pdev);
+ struct adm_chan *achan;
+ u32 i;
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&adev->common);
+
+ for (i = 0; i < ADM_MAX_CHANNELS; i++) {
+ achan = &adev->channels[i];
+
+ /* mask IRQs for this channel/EE pair */
+ writel(0, adev->regs + ADM_CH_RSLT_CONF(achan->id, adev->ee));
+
+ tasklet_kill(&adev->channels[i].vc.task);
+ adm_terminate_all(&adev->channels[i].vc.chan);
+ }
+
+ devm_free_irq(adev->dev, adev->irq, adev);
+
+ clk_disable_unprepare(adev->core_clk);
+ clk_disable_unprepare(adev->iface_clk);
+
+ return 0;
+}
+
+static const struct of_device_id adm_of_match[] = {
+ { .compatible = "qcom,adm", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, adm_of_match);
+
+static struct platform_driver adm_dma_driver = {
+ .probe = adm_dma_probe,
+ .remove = adm_dma_remove,
+ .driver = {
+ .name = "adm-dma-engine",
+ .of_match_table = adm_of_match,
+ },
+};
+
+module_platform_driver(adm_dma_driver);
+
+MODULE_AUTHOR("Andy Gross <agross@codeaurora.org>");
+MODULE_DESCRIPTION("QCOM ADM DMA engine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
new file mode 100644
index 000000000..a09eeb545
--- /dev/null
+++ b/drivers/dma/s3c24xx-dma.c
@@ -0,0 +1,1428 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * S3C24XX DMA handling
+ *
+ * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
+ *
+ * based on amba-pl08x.c
+ *
+ * Copyright (c) 2006 ARM Ltd.
+ * Copyright (c) 2010 ST-Ericsson SA
+ *
+ * Author: Peter Pearse <peter.pearse@arm.com>
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ * The DMA controllers in S3C24XX SoCs have a varying number of DMA signals
+ * that can be routed to any of the 4 to 8 hardware-channels.
+ *
+ * Therefore on these DMA controllers the number of channels
+ * and the number of incoming DMA signals are two totally different things.
+ * It is usually not possible to theoretically handle all physical signals,
+ * so a multiplexing scheme with possible denial of use is necessary.
+ *
+ * Open items:
+ * - bursts
+ */
+
+#include <linux/platform_device.h>
+#include <linux/types.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+#include <linux/platform_data/dma-s3c24xx.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define MAX_DMA_CHANNELS 8
+
+#define S3C24XX_DISRC 0x00
+#define S3C24XX_DISRCC 0x04
+#define S3C24XX_DISRCC_INC_INCREMENT 0
+#define S3C24XX_DISRCC_INC_FIXED BIT(0)
+#define S3C24XX_DISRCC_LOC_AHB 0
+#define S3C24XX_DISRCC_LOC_APB BIT(1)
+
+#define S3C24XX_DIDST 0x08
+#define S3C24XX_DIDSTC 0x0c
+#define S3C24XX_DIDSTC_INC_INCREMENT 0
+#define S3C24XX_DIDSTC_INC_FIXED BIT(0)
+#define S3C24XX_DIDSTC_LOC_AHB 0
+#define S3C24XX_DIDSTC_LOC_APB BIT(1)
+#define S3C24XX_DIDSTC_INT_TC0 0
+#define S3C24XX_DIDSTC_INT_RELOAD BIT(2)
+
+#define S3C24XX_DCON 0x10
+
+#define S3C24XX_DCON_TC_MASK 0xfffff
+#define S3C24XX_DCON_DSZ_BYTE (0 << 20)
+#define S3C24XX_DCON_DSZ_HALFWORD (1 << 20)
+#define S3C24XX_DCON_DSZ_WORD (2 << 20)
+#define S3C24XX_DCON_DSZ_MASK (3 << 20)
+#define S3C24XX_DCON_DSZ_SHIFT 20
+#define S3C24XX_DCON_AUTORELOAD 0
+#define S3C24XX_DCON_NORELOAD BIT(22)
+#define S3C24XX_DCON_HWTRIG BIT(23)
+#define S3C24XX_DCON_HWSRC_SHIFT 24
+#define S3C24XX_DCON_SERV_SINGLE 0
+#define S3C24XX_DCON_SERV_WHOLE BIT(27)
+#define S3C24XX_DCON_TSZ_UNIT 0
+#define S3C24XX_DCON_TSZ_BURST4 BIT(28)
+#define S3C24XX_DCON_INT BIT(29)
+#define S3C24XX_DCON_SYNC_PCLK 0
+#define S3C24XX_DCON_SYNC_HCLK BIT(30)
+#define S3C24XX_DCON_DEMAND 0
+#define S3C24XX_DCON_HANDSHAKE BIT(31)
+
+#define S3C24XX_DSTAT 0x14
+#define S3C24XX_DSTAT_STAT_BUSY BIT(20)
+#define S3C24XX_DSTAT_CURRTC_MASK 0xfffff
+
+#define S3C24XX_DMASKTRIG 0x20
+#define S3C24XX_DMASKTRIG_SWTRIG BIT(0)
+#define S3C24XX_DMASKTRIG_ON BIT(1)
+#define S3C24XX_DMASKTRIG_STOP BIT(2)
+
+#define S3C24XX_DMAREQSEL 0x24
+#define S3C24XX_DMAREQSEL_HW BIT(0)
+
+/*
+ * S3C2410, S3C2440 and S3C2442 SoCs cannot select any physical channel
+ * for a DMA source. Instead only specific channels are valid.
+ * All of these SoCs have 4 physical channels and the number of request
+ * source bits is 3. Additionally we also need 1 bit to mark the channel
+ * as valid.
+ * Therefore we separate the chansel element of the channel data into 4
+ * parts of 4 bits each, to hold the information if the channel is valid
+ * and the hw request source to use.
+ *
+ * Example:
+ * SDI is valid on channels 0, 2 and 3 - with varying hw request sources.
+ * For it the chansel field would look like
+ *
+ * ((BIT(3) | 1) << 3 * 4) | // channel 3, with request source 1
+ * ((BIT(3) | 2) << 2 * 4) | // channel 2, with request source 2
+ * ((BIT(3) | 2) << 0 * 4) // channel 0, with request source 2
+ */
+#define S3C24XX_CHANSEL_WIDTH 4
+#define S3C24XX_CHANSEL_VALID BIT(3)
+#define S3C24XX_CHANSEL_REQ_MASK 7
+
+/*
+ * struct soc_data - vendor-specific config parameters for individual SoCs
+ * @stride: spacing between the registers of each channel
+ * @has_reqsel: does the controller use the newer requestselection mechanism
+ * @has_clocks: are controllable dma-clocks present
+ */
+struct soc_data {
+ int stride;
+ bool has_reqsel;
+ bool has_clocks;
+};
+
+/*
+ * enum s3c24xx_dma_chan_state - holds the virtual channel states
+ * @S3C24XX_DMA_CHAN_IDLE: the channel is idle
+ * @S3C24XX_DMA_CHAN_RUNNING: the channel has allocated a physical transport
+ * channel and is running a transfer on it
+ * @S3C24XX_DMA_CHAN_WAITING: the channel is waiting for a physical transport
+ * channel to become available (only pertains to memcpy channels)
+ */
+enum s3c24xx_dma_chan_state {
+ S3C24XX_DMA_CHAN_IDLE,
+ S3C24XX_DMA_CHAN_RUNNING,
+ S3C24XX_DMA_CHAN_WAITING,
+};
+
+/*
+ * struct s3c24xx_sg - structure containing data per sg
+ * @src_addr: src address of sg
+ * @dst_addr: dst address of sg
+ * @len: transfer len in bytes
+ * @node: node for txd's dsg_list
+ */
+struct s3c24xx_sg {
+ dma_addr_t src_addr;
+ dma_addr_t dst_addr;
+ size_t len;
+ struct list_head node;
+};
+
+/*
+ * struct s3c24xx_txd - wrapper for struct dma_async_tx_descriptor
+ * @vd: virtual DMA descriptor
+ * @dsg_list: list of children sg's
+ * @at: sg currently being transfered
+ * @width: transfer width
+ * @disrcc: value for source control register
+ * @didstc: value for destination control register
+ * @dcon: base value for dcon register
+ * @cyclic: indicate cyclic transfer
+ */
+struct s3c24xx_txd {
+ struct virt_dma_desc vd;
+ struct list_head dsg_list;
+ struct list_head *at;
+ u8 width;
+ u32 disrcc;
+ u32 didstc;
+ u32 dcon;
+ bool cyclic;
+};
+
+struct s3c24xx_dma_chan;
+
+/*
+ * struct s3c24xx_dma_phy - holder for the physical channels
+ * @id: physical index to this channel
+ * @valid: does the channel have all required elements
+ * @base: virtual memory base (remapped) for the this channel
+ * @irq: interrupt for this channel
+ * @clk: clock for this channel
+ * @lock: a lock to use when altering an instance of this struct
+ * @serving: virtual channel currently being served by this physicalchannel
+ * @host: a pointer to the host (internal use)
+ */
+struct s3c24xx_dma_phy {
+ unsigned int id;
+ bool valid;
+ void __iomem *base;
+ int irq;
+ struct clk *clk;
+ spinlock_t lock;
+ struct s3c24xx_dma_chan *serving;
+ struct s3c24xx_dma_engine *host;
+};
+
+/*
+ * struct s3c24xx_dma_chan - this structure wraps a DMA ENGINE channel
+ * @id: the id of the channel
+ * @name: name of the channel
+ * @vc: wrapped virtual channel
+ * @phy: the physical channel utilized by this channel, if there is one
+ * @runtime_addr: address for RX/TX according to the runtime config
+ * @at: active transaction on this channel
+ * @lock: a lock for this channel data
+ * @host: a pointer to the host (internal use)
+ * @state: whether the channel is idle, running etc
+ * @slave: whether this channel is a device (slave) or for memcpy
+ */
+struct s3c24xx_dma_chan {
+ int id;
+ const char *name;
+ struct virt_dma_chan vc;
+ struct s3c24xx_dma_phy *phy;
+ struct dma_slave_config cfg;
+ struct s3c24xx_txd *at;
+ struct s3c24xx_dma_engine *host;
+ enum s3c24xx_dma_chan_state state;
+ bool slave;
+};
+
+/*
+ * struct s3c24xx_dma_engine - the local state holder for the S3C24XX
+ * @pdev: the corresponding platform device
+ * @pdata: platform data passed in from the platform/machine
+ * @base: virtual memory base (remapped)
+ * @slave: slave engine for this instance
+ * @memcpy: memcpy engine for this instance
+ * @phy_chans: array of data for the physical channels
+ */
+struct s3c24xx_dma_engine {
+ struct platform_device *pdev;
+ const struct s3c24xx_dma_platdata *pdata;
+ struct soc_data *sdata;
+ void __iomem *base;
+ struct dma_device slave;
+ struct dma_device memcpy;
+ struct s3c24xx_dma_phy *phy_chans;
+};
+
+/*
+ * Physical channel handling
+ */
+
+/*
+ * Check whether a certain channel is busy or not.
+ */
+static int s3c24xx_dma_phy_busy(struct s3c24xx_dma_phy *phy)
+{
+ unsigned int val = readl(phy->base + S3C24XX_DSTAT);
+ return val & S3C24XX_DSTAT_STAT_BUSY;
+}
+
+static bool s3c24xx_dma_phy_valid(struct s3c24xx_dma_chan *s3cchan,
+ struct s3c24xx_dma_phy *phy)
+{
+ struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+ const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+ struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+ int phyvalid;
+
+ /* every phy is valid for memcopy channels */
+ if (!s3cchan->slave)
+ return true;
+
+ /* On newer variants all phys can be used for all virtual channels */
+ if (s3cdma->sdata->has_reqsel)
+ return true;
+
+ phyvalid = (cdata->chansel >> (phy->id * S3C24XX_CHANSEL_WIDTH));
+ return (phyvalid & S3C24XX_CHANSEL_VALID) ? true : false;
+}
+
+/*
+ * Allocate a physical channel for a virtual channel
+ *
+ * Try to locate a physical channel to be used for this transfer. If all
+ * are taken return NULL and the requester will have to cope by using
+ * some fallback PIO mode or retrying later.
+ */
+static
+struct s3c24xx_dma_phy *s3c24xx_dma_get_phy(struct s3c24xx_dma_chan *s3cchan)
+{
+ struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+ struct s3c24xx_dma_phy *phy = NULL;
+ unsigned long flags;
+ int i;
+ int ret;
+
+ for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) {
+ phy = &s3cdma->phy_chans[i];
+
+ if (!phy->valid)
+ continue;
+
+ if (!s3c24xx_dma_phy_valid(s3cchan, phy))
+ continue;
+
+ spin_lock_irqsave(&phy->lock, flags);
+
+ if (!phy->serving) {
+ phy->serving = s3cchan;
+ spin_unlock_irqrestore(&phy->lock, flags);
+ break;
+ }
+
+ spin_unlock_irqrestore(&phy->lock, flags);
+ }
+
+ /* No physical channel available, cope with it */
+ if (i == s3cdma->pdata->num_phy_channels) {
+ dev_warn(&s3cdma->pdev->dev, "no phy channel available\n");
+ return NULL;
+ }
+
+ /* start the phy clock */
+ if (s3cdma->sdata->has_clocks) {
+ ret = clk_enable(phy->clk);
+ if (ret) {
+ dev_err(&s3cdma->pdev->dev, "could not enable clock for channel %d, err %d\n",
+ phy->id, ret);
+ phy->serving = NULL;
+ return NULL;
+ }
+ }
+
+ return phy;
+}
+
+/*
+ * Mark the physical channel as free.
+ *
+ * This drops the link between the physical and virtual channel.
+ */
+static inline void s3c24xx_dma_put_phy(struct s3c24xx_dma_phy *phy)
+{
+ struct s3c24xx_dma_engine *s3cdma = phy->host;
+
+ if (s3cdma->sdata->has_clocks)
+ clk_disable(phy->clk);
+
+ phy->serving = NULL;
+}
+
+/*
+ * Stops the channel by writing the stop bit.
+ * This should not be used for an on-going transfer, but as a method of
+ * shutting down a channel (eg, when it's no longer used) or terminating a
+ * transfer.
+ */
+static void s3c24xx_dma_terminate_phy(struct s3c24xx_dma_phy *phy)
+{
+ writel(S3C24XX_DMASKTRIG_STOP, phy->base + S3C24XX_DMASKTRIG);
+}
+
+/*
+ * Virtual channel handling
+ */
+
+static inline
+struct s3c24xx_dma_chan *to_s3c24xx_dma_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct s3c24xx_dma_chan, vc.chan);
+}
+
+static u32 s3c24xx_dma_getbytes_chan(struct s3c24xx_dma_chan *s3cchan)
+{
+ struct s3c24xx_dma_phy *phy = s3cchan->phy;
+ struct s3c24xx_txd *txd = s3cchan->at;
+ u32 tc = readl(phy->base + S3C24XX_DSTAT) & S3C24XX_DSTAT_CURRTC_MASK;
+
+ return tc * txd->width;
+}
+
+static int s3c24xx_dma_set_runtime_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+ unsigned long flags;
+ int ret = 0;
+
+ /* Reject definitely invalid configurations */
+ if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES ||
+ config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+ return -EINVAL;
+
+ spin_lock_irqsave(&s3cchan->vc.lock, flags);
+
+ if (!s3cchan->slave) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ s3cchan->cfg = *config;
+
+out:
+ spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+ return ret;
+}
+
+/*
+ * Transfer handling
+ */
+
+static inline
+struct s3c24xx_txd *to_s3c24xx_txd(struct dma_async_tx_descriptor *tx)
+{
+ return container_of(tx, struct s3c24xx_txd, vd.tx);
+}
+
+static struct s3c24xx_txd *s3c24xx_dma_get_txd(void)
+{
+ struct s3c24xx_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+
+ if (txd) {
+ INIT_LIST_HEAD(&txd->dsg_list);
+ txd->dcon = S3C24XX_DCON_INT | S3C24XX_DCON_NORELOAD;
+ }
+
+ return txd;
+}
+
+static void s3c24xx_dma_free_txd(struct s3c24xx_txd *txd)
+{
+ struct s3c24xx_sg *dsg, *_dsg;
+
+ list_for_each_entry_safe(dsg, _dsg, &txd->dsg_list, node) {
+ list_del(&dsg->node);
+ kfree(dsg);
+ }
+
+ kfree(txd);
+}
+
+static void s3c24xx_dma_start_next_sg(struct s3c24xx_dma_chan *s3cchan,
+ struct s3c24xx_txd *txd)
+{
+ struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+ struct s3c24xx_dma_phy *phy = s3cchan->phy;
+ const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+ struct s3c24xx_sg *dsg = list_entry(txd->at, struct s3c24xx_sg, node);
+ u32 dcon = txd->dcon;
+ u32 val;
+
+ /* transfer-size and -count from len and width */
+ switch (txd->width) {
+ case 1:
+ dcon |= S3C24XX_DCON_DSZ_BYTE | dsg->len;
+ break;
+ case 2:
+ dcon |= S3C24XX_DCON_DSZ_HALFWORD | (dsg->len / 2);
+ break;
+ case 4:
+ dcon |= S3C24XX_DCON_DSZ_WORD | (dsg->len / 4);
+ break;
+ }
+
+ if (s3cchan->slave) {
+ struct s3c24xx_dma_channel *cdata =
+ &pdata->channels[s3cchan->id];
+
+ if (s3cdma->sdata->has_reqsel) {
+ writel_relaxed((cdata->chansel << 1) |
+ S3C24XX_DMAREQSEL_HW,
+ phy->base + S3C24XX_DMAREQSEL);
+ } else {
+ int csel = cdata->chansel >> (phy->id *
+ S3C24XX_CHANSEL_WIDTH);
+
+ csel &= S3C24XX_CHANSEL_REQ_MASK;
+ dcon |= csel << S3C24XX_DCON_HWSRC_SHIFT;
+ dcon |= S3C24XX_DCON_HWTRIG;
+ }
+ } else {
+ if (s3cdma->sdata->has_reqsel)
+ writel_relaxed(0, phy->base + S3C24XX_DMAREQSEL);
+ }
+
+ writel_relaxed(dsg->src_addr, phy->base + S3C24XX_DISRC);
+ writel_relaxed(txd->disrcc, phy->base + S3C24XX_DISRCC);
+ writel_relaxed(dsg->dst_addr, phy->base + S3C24XX_DIDST);
+ writel_relaxed(txd->didstc, phy->base + S3C24XX_DIDSTC);
+ writel_relaxed(dcon, phy->base + S3C24XX_DCON);
+
+ val = readl_relaxed(phy->base + S3C24XX_DMASKTRIG);
+ val &= ~S3C24XX_DMASKTRIG_STOP;
+ val |= S3C24XX_DMASKTRIG_ON;
+
+ /* trigger the dma operation for memcpy transfers */
+ if (!s3cchan->slave)
+ val |= S3C24XX_DMASKTRIG_SWTRIG;
+
+ writel(val, phy->base + S3C24XX_DMASKTRIG);
+}
+
+/*
+ * Set the initial DMA register values and start first sg.
+ */
+static void s3c24xx_dma_start_next_txd(struct s3c24xx_dma_chan *s3cchan)
+{
+ struct s3c24xx_dma_phy *phy = s3cchan->phy;
+ struct virt_dma_desc *vd = vchan_next_desc(&s3cchan->vc);
+ struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx);
+
+ list_del(&txd->vd.node);
+
+ s3cchan->at = txd;
+
+ /* Wait for channel inactive */
+ while (s3c24xx_dma_phy_busy(phy))
+ cpu_relax();
+
+ /* point to the first element of the sg list */
+ txd->at = txd->dsg_list.next;
+ s3c24xx_dma_start_next_sg(s3cchan, txd);
+}
+
+/*
+ * Try to allocate a physical channel. When successful, assign it to
+ * this virtual channel, and initiate the next descriptor. The
+ * virtual channel lock must be held at this point.
+ */
+static void s3c24xx_dma_phy_alloc_and_start(struct s3c24xx_dma_chan *s3cchan)
+{
+ struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+ struct s3c24xx_dma_phy *phy;
+
+ phy = s3c24xx_dma_get_phy(s3cchan);
+ if (!phy) {
+ dev_dbg(&s3cdma->pdev->dev, "no physical channel available for xfer on %s\n",
+ s3cchan->name);
+ s3cchan->state = S3C24XX_DMA_CHAN_WAITING;
+ return;
+ }
+
+ dev_dbg(&s3cdma->pdev->dev, "allocated physical channel %d for xfer on %s\n",
+ phy->id, s3cchan->name);
+
+ s3cchan->phy = phy;
+ s3cchan->state = S3C24XX_DMA_CHAN_RUNNING;
+
+ s3c24xx_dma_start_next_txd(s3cchan);
+}
+
+static void s3c24xx_dma_phy_reassign_start(struct s3c24xx_dma_phy *phy,
+ struct s3c24xx_dma_chan *s3cchan)
+{
+ struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+
+ dev_dbg(&s3cdma->pdev->dev, "reassigned physical channel %d for xfer on %s\n",
+ phy->id, s3cchan->name);
+
+ /*
+ * We do this without taking the lock; we're really only concerned
+ * about whether this pointer is NULL or not, and we're guaranteed
+ * that this will only be called when it _already_ is non-NULL.
+ */
+ phy->serving = s3cchan;
+ s3cchan->phy = phy;
+ s3cchan->state = S3C24XX_DMA_CHAN_RUNNING;
+ s3c24xx_dma_start_next_txd(s3cchan);
+}
+
+/*
+ * Free a physical DMA channel, potentially reallocating it to another
+ * virtual channel if we have any pending.
+ */
+static void s3c24xx_dma_phy_free(struct s3c24xx_dma_chan *s3cchan)
+{
+ struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+ struct s3c24xx_dma_chan *p, *next;
+
+retry:
+ next = NULL;
+
+ /* Find a waiting virtual channel for the next transfer. */
+ list_for_each_entry(p, &s3cdma->memcpy.channels, vc.chan.device_node)
+ if (p->state == S3C24XX_DMA_CHAN_WAITING) {
+ next = p;
+ break;
+ }
+
+ if (!next) {
+ list_for_each_entry(p, &s3cdma->slave.channels,
+ vc.chan.device_node)
+ if (p->state == S3C24XX_DMA_CHAN_WAITING &&
+ s3c24xx_dma_phy_valid(p, s3cchan->phy)) {
+ next = p;
+ break;
+ }
+ }
+
+ /* Ensure that the physical channel is stopped */
+ s3c24xx_dma_terminate_phy(s3cchan->phy);
+
+ if (next) {
+ bool success;
+
+ /*
+ * Eww. We know this isn't going to deadlock
+ * but lockdep probably doesn't.
+ */
+ spin_lock(&next->vc.lock);
+ /* Re-check the state now that we have the lock */
+ success = next->state == S3C24XX_DMA_CHAN_WAITING;
+ if (success)
+ s3c24xx_dma_phy_reassign_start(s3cchan->phy, next);
+ spin_unlock(&next->vc.lock);
+
+ /* If the state changed, try to find another channel */
+ if (!success)
+ goto retry;
+ } else {
+ /* No more jobs, so free up the physical channel */
+ s3c24xx_dma_put_phy(s3cchan->phy);
+ }
+
+ s3cchan->phy = NULL;
+ s3cchan->state = S3C24XX_DMA_CHAN_IDLE;
+}
+
+static void s3c24xx_dma_desc_free(struct virt_dma_desc *vd)
+{
+ struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx);
+ struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(vd->tx.chan);
+
+ if (!s3cchan->slave)
+ dma_descriptor_unmap(&vd->tx);
+
+ s3c24xx_dma_free_txd(txd);
+}
+
+static irqreturn_t s3c24xx_dma_irq(int irq, void *data)
+{
+ struct s3c24xx_dma_phy *phy = data;
+ struct s3c24xx_dma_chan *s3cchan = phy->serving;
+ struct s3c24xx_txd *txd;
+
+ dev_dbg(&phy->host->pdev->dev, "interrupt on channel %d\n", phy->id);
+
+ /*
+ * Interrupts happen to notify the completion of a transfer and the
+ * channel should have moved into its stop state already on its own.
+ * Therefore interrupts on channels not bound to a virtual channel
+ * should never happen. Nevertheless send a terminate command to the
+ * channel if the unlikely case happens.
+ */
+ if (unlikely(!s3cchan)) {
+ dev_err(&phy->host->pdev->dev, "interrupt on unused channel %d\n",
+ phy->id);
+
+ s3c24xx_dma_terminate_phy(phy);
+
+ return IRQ_HANDLED;
+ }
+
+ spin_lock(&s3cchan->vc.lock);
+ txd = s3cchan->at;
+ if (txd) {
+ /* when more sg's are in this txd, start the next one */
+ if (!list_is_last(txd->at, &txd->dsg_list)) {
+ txd->at = txd->at->next;
+ if (txd->cyclic)
+ vchan_cyclic_callback(&txd->vd);
+ s3c24xx_dma_start_next_sg(s3cchan, txd);
+ } else if (!txd->cyclic) {
+ s3cchan->at = NULL;
+ vchan_cookie_complete(&txd->vd);
+
+ /*
+ * And start the next descriptor (if any),
+ * otherwise free this channel.
+ */
+ if (vchan_next_desc(&s3cchan->vc))
+ s3c24xx_dma_start_next_txd(s3cchan);
+ else
+ s3c24xx_dma_phy_free(s3cchan);
+ } else {
+ vchan_cyclic_callback(&txd->vd);
+
+ /* Cyclic: reset at beginning */
+ txd->at = txd->dsg_list.next;
+ s3c24xx_dma_start_next_sg(s3cchan, txd);
+ }
+ }
+ spin_unlock(&s3cchan->vc.lock);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * The DMA ENGINE API
+ */
+
+static int s3c24xx_dma_terminate_all(struct dma_chan *chan)
+{
+ struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+ struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+ LIST_HEAD(head);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&s3cchan->vc.lock, flags);
+
+ if (!s3cchan->phy && !s3cchan->at) {
+ dev_err(&s3cdma->pdev->dev, "trying to terminate already stopped channel %d\n",
+ s3cchan->id);
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ s3cchan->state = S3C24XX_DMA_CHAN_IDLE;
+
+ /* Mark physical channel as free */
+ if (s3cchan->phy)
+ s3c24xx_dma_phy_free(s3cchan);
+
+ /* Dequeue current job */
+ if (s3cchan->at) {
+ vchan_terminate_vdesc(&s3cchan->at->vd);
+ s3cchan->at = NULL;
+ }
+
+ /* Dequeue jobs not yet fired as well */
+
+ vchan_get_all_descriptors(&s3cchan->vc, &head);
+
+ spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+
+ vchan_dma_desc_free_list(&s3cchan->vc, &head);
+
+ return 0;
+
+unlock:
+ spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+
+ return ret;
+}
+
+static void s3c24xx_dma_synchronize(struct dma_chan *chan)
+{
+ struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+
+ vchan_synchronize(&s3cchan->vc);
+}
+
+static void s3c24xx_dma_free_chan_resources(struct dma_chan *chan)
+{
+ /* Ensure all queued descriptors are freed */
+ vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+static enum dma_status s3c24xx_dma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+ struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+ struct s3c24xx_txd *txd;
+ struct s3c24xx_sg *dsg;
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+ enum dma_status ret;
+ size_t bytes = 0;
+
+ spin_lock_irqsave(&s3cchan->vc.lock, flags);
+ ret = dma_cookie_status(chan, cookie, txstate);
+
+ /*
+ * There's no point calculating the residue if there's
+ * no txstate to store the value.
+ */
+ if (ret == DMA_COMPLETE || !txstate) {
+ spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+ return ret;
+ }
+
+ vd = vchan_find_desc(&s3cchan->vc, cookie);
+ if (vd) {
+ /* On the issued list, so hasn't been processed yet */
+ txd = to_s3c24xx_txd(&vd->tx);
+
+ list_for_each_entry(dsg, &txd->dsg_list, node)
+ bytes += dsg->len;
+ } else {
+ /*
+ * Currently running, so sum over the pending sg's and
+ * the currently active one.
+ */
+ txd = s3cchan->at;
+
+ dsg = list_entry(txd->at, struct s3c24xx_sg, node);
+ list_for_each_entry_from(dsg, &txd->dsg_list, node)
+ bytes += dsg->len;
+
+ bytes += s3c24xx_dma_getbytes_chan(s3cchan);
+ }
+ spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+
+ /*
+ * This cookie not complete yet
+ * Get number of bytes left in the active transactions and queue
+ */
+ dma_set_residue(txstate, bytes);
+
+ /* Whether waiting or running, we're in progress */
+ return ret;
+}
+
+/*
+ * Initialize a descriptor to be used by memcpy submit
+ */
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_memcpy(
+ struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+ struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+ struct s3c24xx_txd *txd;
+ struct s3c24xx_sg *dsg;
+ int src_mod, dest_mod;
+
+ dev_dbg(&s3cdma->pdev->dev, "prepare memcpy of %zu bytes from %s\n",
+ len, s3cchan->name);
+
+ if ((len & S3C24XX_DCON_TC_MASK) != len) {
+ dev_err(&s3cdma->pdev->dev, "memcpy size %zu to large\n", len);
+ return NULL;
+ }
+
+ txd = s3c24xx_dma_get_txd();
+ if (!txd)
+ return NULL;
+
+ dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+ if (!dsg) {
+ s3c24xx_dma_free_txd(txd);
+ return NULL;
+ }
+ list_add_tail(&dsg->node, &txd->dsg_list);
+
+ dsg->src_addr = src;
+ dsg->dst_addr = dest;
+ dsg->len = len;
+
+ /*
+ * Determine a suitable transfer width.
+ * The DMA controller cannot fetch/store information which is not
+ * naturally aligned on the bus, i.e., a 4 byte fetch must start at
+ * an address divisible by 4 - more generally addr % width must be 0.
+ */
+ src_mod = src % 4;
+ dest_mod = dest % 4;
+ switch (len % 4) {
+ case 0:
+ txd->width = (src_mod == 0 && dest_mod == 0) ? 4 : 1;
+ break;
+ case 2:
+ txd->width = ((src_mod == 2 || src_mod == 0) &&
+ (dest_mod == 2 || dest_mod == 0)) ? 2 : 1;
+ break;
+ default:
+ txd->width = 1;
+ break;
+ }
+
+ txd->disrcc = S3C24XX_DISRCC_LOC_AHB | S3C24XX_DISRCC_INC_INCREMENT;
+ txd->didstc = S3C24XX_DIDSTC_LOC_AHB | S3C24XX_DIDSTC_INC_INCREMENT;
+ txd->dcon |= S3C24XX_DCON_DEMAND | S3C24XX_DCON_SYNC_HCLK |
+ S3C24XX_DCON_SERV_WHOLE;
+
+ return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period,
+ enum dma_transfer_direction direction, unsigned long flags)
+{
+ struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+ struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+ const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+ struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+ struct s3c24xx_txd *txd;
+ struct s3c24xx_sg *dsg;
+ unsigned sg_len;
+ dma_addr_t slave_addr;
+ u32 hwcfg = 0;
+ int i;
+
+ dev_dbg(&s3cdma->pdev->dev,
+ "prepare cyclic transaction of %zu bytes with period %zu from %s\n",
+ size, period, s3cchan->name);
+
+ if (!is_slave_direction(direction)) {
+ dev_err(&s3cdma->pdev->dev,
+ "direction %d unsupported\n", direction);
+ return NULL;
+ }
+
+ txd = s3c24xx_dma_get_txd();
+ if (!txd)
+ return NULL;
+
+ txd->cyclic = 1;
+
+ if (cdata->handshake)
+ txd->dcon |= S3C24XX_DCON_HANDSHAKE;
+
+ switch (cdata->bus) {
+ case S3C24XX_DMA_APB:
+ txd->dcon |= S3C24XX_DCON_SYNC_PCLK;
+ hwcfg |= S3C24XX_DISRCC_LOC_APB;
+ break;
+ case S3C24XX_DMA_AHB:
+ txd->dcon |= S3C24XX_DCON_SYNC_HCLK;
+ hwcfg |= S3C24XX_DISRCC_LOC_AHB;
+ break;
+ }
+
+ /*
+ * Always assume our peripheral desintation is a fixed
+ * address in memory.
+ */
+ hwcfg |= S3C24XX_DISRCC_INC_FIXED;
+
+ /*
+ * Individual dma operations are requested by the slave,
+ * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE).
+ */
+ txd->dcon |= S3C24XX_DCON_SERV_SINGLE;
+
+ if (direction == DMA_MEM_TO_DEV) {
+ txd->disrcc = S3C24XX_DISRCC_LOC_AHB |
+ S3C24XX_DISRCC_INC_INCREMENT;
+ txd->didstc = hwcfg;
+ slave_addr = s3cchan->cfg.dst_addr;
+ txd->width = s3cchan->cfg.dst_addr_width;
+ } else {
+ txd->disrcc = hwcfg;
+ txd->didstc = S3C24XX_DIDSTC_LOC_AHB |
+ S3C24XX_DIDSTC_INC_INCREMENT;
+ slave_addr = s3cchan->cfg.src_addr;
+ txd->width = s3cchan->cfg.src_addr_width;
+ }
+
+ sg_len = size / period;
+
+ for (i = 0; i < sg_len; i++) {
+ dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+ if (!dsg) {
+ s3c24xx_dma_free_txd(txd);
+ return NULL;
+ }
+ list_add_tail(&dsg->node, &txd->dsg_list);
+
+ dsg->len = period;
+ /* Check last period length */
+ if (i == sg_len - 1)
+ dsg->len = size - period * i;
+ if (direction == DMA_MEM_TO_DEV) {
+ dsg->src_addr = addr + period * i;
+ dsg->dst_addr = slave_addr;
+ } else { /* DMA_DEV_TO_MEM */
+ dsg->src_addr = slave_addr;
+ dsg->dst_addr = addr + period * i;
+ }
+ }
+
+ return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+ struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+ const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+ struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+ struct s3c24xx_txd *txd;
+ struct s3c24xx_sg *dsg;
+ struct scatterlist *sg;
+ dma_addr_t slave_addr;
+ u32 hwcfg = 0;
+ int tmp;
+
+ dev_dbg(&s3cdma->pdev->dev, "prepare transaction of %d bytes from %s\n",
+ sg_dma_len(sgl), s3cchan->name);
+
+ txd = s3c24xx_dma_get_txd();
+ if (!txd)
+ return NULL;
+
+ if (cdata->handshake)
+ txd->dcon |= S3C24XX_DCON_HANDSHAKE;
+
+ switch (cdata->bus) {
+ case S3C24XX_DMA_APB:
+ txd->dcon |= S3C24XX_DCON_SYNC_PCLK;
+ hwcfg |= S3C24XX_DISRCC_LOC_APB;
+ break;
+ case S3C24XX_DMA_AHB:
+ txd->dcon |= S3C24XX_DCON_SYNC_HCLK;
+ hwcfg |= S3C24XX_DISRCC_LOC_AHB;
+ break;
+ }
+
+ /*
+ * Always assume our peripheral desintation is a fixed
+ * address in memory.
+ */
+ hwcfg |= S3C24XX_DISRCC_INC_FIXED;
+
+ /*
+ * Individual dma operations are requested by the slave,
+ * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE).
+ */
+ txd->dcon |= S3C24XX_DCON_SERV_SINGLE;
+
+ if (direction == DMA_MEM_TO_DEV) {
+ txd->disrcc = S3C24XX_DISRCC_LOC_AHB |
+ S3C24XX_DISRCC_INC_INCREMENT;
+ txd->didstc = hwcfg;
+ slave_addr = s3cchan->cfg.dst_addr;
+ txd->width = s3cchan->cfg.dst_addr_width;
+ } else if (direction == DMA_DEV_TO_MEM) {
+ txd->disrcc = hwcfg;
+ txd->didstc = S3C24XX_DIDSTC_LOC_AHB |
+ S3C24XX_DIDSTC_INC_INCREMENT;
+ slave_addr = s3cchan->cfg.src_addr;
+ txd->width = s3cchan->cfg.src_addr_width;
+ } else {
+ s3c24xx_dma_free_txd(txd);
+ dev_err(&s3cdma->pdev->dev,
+ "direction %d unsupported\n", direction);
+ return NULL;
+ }
+
+ for_each_sg(sgl, sg, sg_len, tmp) {
+ dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+ if (!dsg) {
+ s3c24xx_dma_free_txd(txd);
+ return NULL;
+ }
+ list_add_tail(&dsg->node, &txd->dsg_list);
+
+ dsg->len = sg_dma_len(sg);
+ if (direction == DMA_MEM_TO_DEV) {
+ dsg->src_addr = sg_dma_address(sg);
+ dsg->dst_addr = slave_addr;
+ } else { /* DMA_DEV_TO_MEM */
+ dsg->src_addr = slave_addr;
+ dsg->dst_addr = sg_dma_address(sg);
+ }
+ }
+
+ return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
+/*
+ * Slave transactions callback to the slave device to allow
+ * synchronization of slave DMA signals with the DMAC enable
+ */
+static void s3c24xx_dma_issue_pending(struct dma_chan *chan)
+{
+ struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&s3cchan->vc.lock, flags);
+ if (vchan_issue_pending(&s3cchan->vc)) {
+ if (!s3cchan->phy && s3cchan->state != S3C24XX_DMA_CHAN_WAITING)
+ s3c24xx_dma_phy_alloc_and_start(s3cchan);
+ }
+ spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+}
+
+/*
+ * Bringup and teardown
+ */
+
+/*
+ * Initialise the DMAC memcpy/slave channels.
+ * Make a local wrapper to hold required data
+ */
+static int s3c24xx_dma_init_virtual_channels(struct s3c24xx_dma_engine *s3cdma,
+ struct dma_device *dmadev, unsigned int channels, bool slave)
+{
+ struct s3c24xx_dma_chan *chan;
+ int i;
+
+ INIT_LIST_HEAD(&dmadev->channels);
+
+ /*
+ * Register as many memcpy as we have physical channels,
+ * we won't always be able to use all but the code will have
+ * to cope with that situation.
+ */
+ for (i = 0; i < channels; i++) {
+ chan = devm_kzalloc(dmadev->dev, sizeof(*chan), GFP_KERNEL);
+ if (!chan)
+ return -ENOMEM;
+
+ chan->id = i;
+ chan->host = s3cdma;
+ chan->state = S3C24XX_DMA_CHAN_IDLE;
+
+ if (slave) {
+ chan->slave = true;
+ chan->name = kasprintf(GFP_KERNEL, "slave%d", i);
+ if (!chan->name)
+ return -ENOMEM;
+ } else {
+ chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
+ if (!chan->name)
+ return -ENOMEM;
+ }
+ dev_dbg(dmadev->dev,
+ "initialize virtual channel \"%s\"\n",
+ chan->name);
+
+ chan->vc.desc_free = s3c24xx_dma_desc_free;
+ vchan_init(&chan->vc, dmadev);
+ }
+ dev_info(dmadev->dev, "initialized %d virtual %s channels\n",
+ i, slave ? "slave" : "memcpy");
+ return i;
+}
+
+static void s3c24xx_dma_free_virtual_channels(struct dma_device *dmadev)
+{
+ struct s3c24xx_dma_chan *chan = NULL;
+ struct s3c24xx_dma_chan *next;
+
+ list_for_each_entry_safe(chan,
+ next, &dmadev->channels, vc.chan.device_node) {
+ list_del(&chan->vc.chan.device_node);
+ tasklet_kill(&chan->vc.task);
+ }
+}
+
+/* s3c2410, s3c2440 and s3c2442 have a 0x40 stride without separate clocks */
+static struct soc_data soc_s3c2410 = {
+ .stride = 0x40,
+ .has_reqsel = false,
+ .has_clocks = false,
+};
+
+/* s3c2412 and s3c2413 have a 0x40 stride and dmareqsel mechanism */
+static struct soc_data soc_s3c2412 = {
+ .stride = 0x40,
+ .has_reqsel = true,
+ .has_clocks = true,
+};
+
+/* s3c2443 and following have a 0x100 stride and dmareqsel mechanism */
+static struct soc_data soc_s3c2443 = {
+ .stride = 0x100,
+ .has_reqsel = true,
+ .has_clocks = true,
+};
+
+static const struct platform_device_id s3c24xx_dma_driver_ids[] = {
+ {
+ .name = "s3c2410-dma",
+ .driver_data = (kernel_ulong_t)&soc_s3c2410,
+ }, {
+ .name = "s3c2412-dma",
+ .driver_data = (kernel_ulong_t)&soc_s3c2412,
+ }, {
+ .name = "s3c2443-dma",
+ .driver_data = (kernel_ulong_t)&soc_s3c2443,
+ },
+ { },
+};
+
+static struct soc_data *s3c24xx_dma_get_soc_data(struct platform_device *pdev)
+{
+ return (struct soc_data *)
+ platform_get_device_id(pdev)->driver_data;
+}
+
+static int s3c24xx_dma_probe(struct platform_device *pdev)
+{
+ const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev);
+ struct s3c24xx_dma_engine *s3cdma;
+ struct soc_data *sdata;
+ struct resource *res;
+ int ret;
+ int i;
+
+ if (!pdata) {
+ dev_err(&pdev->dev, "platform data missing\n");
+ return -ENODEV;
+ }
+
+ /* Basic sanity check */
+ if (pdata->num_phy_channels > MAX_DMA_CHANNELS) {
+ dev_err(&pdev->dev, "too many dma channels %d, max %d\n",
+ pdata->num_phy_channels, MAX_DMA_CHANNELS);
+ return -EINVAL;
+ }
+
+ sdata = s3c24xx_dma_get_soc_data(pdev);
+ if (!sdata)
+ return -EINVAL;
+
+ s3cdma = devm_kzalloc(&pdev->dev, sizeof(*s3cdma), GFP_KERNEL);
+ if (!s3cdma)
+ return -ENOMEM;
+
+ s3cdma->pdev = pdev;
+ s3cdma->pdata = pdata;
+ s3cdma->sdata = sdata;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ s3cdma->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(s3cdma->base))
+ return PTR_ERR(s3cdma->base);
+
+ s3cdma->phy_chans = devm_kcalloc(&pdev->dev,
+ pdata->num_phy_channels,
+ sizeof(struct s3c24xx_dma_phy),
+ GFP_KERNEL);
+ if (!s3cdma->phy_chans)
+ return -ENOMEM;
+
+ /* acquire irqs and clocks for all physical channels */
+ for (i = 0; i < pdata->num_phy_channels; i++) {
+ struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+ char clk_name[6];
+
+ phy->id = i;
+ phy->base = s3cdma->base + (i * sdata->stride);
+ phy->host = s3cdma;
+
+ phy->irq = platform_get_irq(pdev, i);
+ if (phy->irq < 0)
+ continue;
+
+ ret = devm_request_irq(&pdev->dev, phy->irq, s3c24xx_dma_irq,
+ 0, pdev->name, phy);
+ if (ret) {
+ dev_err(&pdev->dev, "Unable to request irq for channel %d, error %d\n",
+ i, ret);
+ continue;
+ }
+
+ if (sdata->has_clocks) {
+ sprintf(clk_name, "dma.%d", i);
+ phy->clk = devm_clk_get(&pdev->dev, clk_name);
+ if (IS_ERR(phy->clk) && sdata->has_clocks) {
+ dev_err(&pdev->dev, "unable to acquire clock for channel %d, error %lu\n",
+ i, PTR_ERR(phy->clk));
+ continue;
+ }
+
+ ret = clk_prepare(phy->clk);
+ if (ret) {
+ dev_err(&pdev->dev, "clock for phy %d failed, error %d\n",
+ i, ret);
+ continue;
+ }
+ }
+
+ spin_lock_init(&phy->lock);
+ phy->valid = true;
+
+ dev_dbg(&pdev->dev, "physical channel %d is %s\n",
+ i, s3c24xx_dma_phy_busy(phy) ? "BUSY" : "FREE");
+ }
+
+ /* Initialize memcpy engine */
+ dma_cap_set(DMA_MEMCPY, s3cdma->memcpy.cap_mask);
+ dma_cap_set(DMA_PRIVATE, s3cdma->memcpy.cap_mask);
+ s3cdma->memcpy.dev = &pdev->dev;
+ s3cdma->memcpy.device_free_chan_resources =
+ s3c24xx_dma_free_chan_resources;
+ s3cdma->memcpy.device_prep_dma_memcpy = s3c24xx_dma_prep_memcpy;
+ s3cdma->memcpy.device_tx_status = s3c24xx_dma_tx_status;
+ s3cdma->memcpy.device_issue_pending = s3c24xx_dma_issue_pending;
+ s3cdma->memcpy.device_config = s3c24xx_dma_set_runtime_config;
+ s3cdma->memcpy.device_terminate_all = s3c24xx_dma_terminate_all;
+ s3cdma->memcpy.device_synchronize = s3c24xx_dma_synchronize;
+
+ /* Initialize slave engine for SoC internal dedicated peripherals */
+ dma_cap_set(DMA_SLAVE, s3cdma->slave.cap_mask);
+ dma_cap_set(DMA_CYCLIC, s3cdma->slave.cap_mask);
+ dma_cap_set(DMA_PRIVATE, s3cdma->slave.cap_mask);
+ s3cdma->slave.dev = &pdev->dev;
+ s3cdma->slave.device_free_chan_resources =
+ s3c24xx_dma_free_chan_resources;
+ s3cdma->slave.device_tx_status = s3c24xx_dma_tx_status;
+ s3cdma->slave.device_issue_pending = s3c24xx_dma_issue_pending;
+ s3cdma->slave.device_prep_slave_sg = s3c24xx_dma_prep_slave_sg;
+ s3cdma->slave.device_prep_dma_cyclic = s3c24xx_dma_prep_dma_cyclic;
+ s3cdma->slave.device_config = s3c24xx_dma_set_runtime_config;
+ s3cdma->slave.device_terminate_all = s3c24xx_dma_terminate_all;
+ s3cdma->slave.device_synchronize = s3c24xx_dma_synchronize;
+ s3cdma->slave.filter.map = pdata->slave_map;
+ s3cdma->slave.filter.mapcnt = pdata->slavecnt;
+ s3cdma->slave.filter.fn = s3c24xx_dma_filter;
+
+ /* Register as many memcpy channels as there are physical channels */
+ ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->memcpy,
+ pdata->num_phy_channels, false);
+ if (ret <= 0) {
+ dev_warn(&pdev->dev,
+ "%s failed to enumerate memcpy channels - %d\n",
+ __func__, ret);
+ goto err_memcpy;
+ }
+
+ /* Register slave channels */
+ ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->slave,
+ pdata->num_channels, true);
+ if (ret <= 0) {
+ dev_warn(&pdev->dev,
+ "%s failed to enumerate slave channels - %d\n",
+ __func__, ret);
+ goto err_slave;
+ }
+
+ ret = dma_async_device_register(&s3cdma->memcpy);
+ if (ret) {
+ dev_warn(&pdev->dev,
+ "%s failed to register memcpy as an async device - %d\n",
+ __func__, ret);
+ goto err_memcpy_reg;
+ }
+
+ ret = dma_async_device_register(&s3cdma->slave);
+ if (ret) {
+ dev_warn(&pdev->dev,
+ "%s failed to register slave as an async device - %d\n",
+ __func__, ret);
+ goto err_slave_reg;
+ }
+
+ platform_set_drvdata(pdev, s3cdma);
+ dev_info(&pdev->dev, "Loaded dma driver with %d physical channels\n",
+ pdata->num_phy_channels);
+
+ return 0;
+
+err_slave_reg:
+ dma_async_device_unregister(&s3cdma->memcpy);
+err_memcpy_reg:
+ s3c24xx_dma_free_virtual_channels(&s3cdma->slave);
+err_slave:
+ s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy);
+err_memcpy:
+ if (sdata->has_clocks)
+ for (i = 0; i < pdata->num_phy_channels; i++) {
+ struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+ if (phy->valid)
+ clk_unprepare(phy->clk);
+ }
+
+ return ret;
+}
+
+static void s3c24xx_dma_free_irq(struct platform_device *pdev,
+ struct s3c24xx_dma_engine *s3cdma)
+{
+ int i;
+
+ for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) {
+ struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+
+ devm_free_irq(&pdev->dev, phy->irq, phy);
+ }
+}
+
+static int s3c24xx_dma_remove(struct platform_device *pdev)
+{
+ const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev);
+ struct s3c24xx_dma_engine *s3cdma = platform_get_drvdata(pdev);
+ struct soc_data *sdata = s3c24xx_dma_get_soc_data(pdev);
+ int i;
+
+ dma_async_device_unregister(&s3cdma->slave);
+ dma_async_device_unregister(&s3cdma->memcpy);
+
+ s3c24xx_dma_free_irq(pdev, s3cdma);
+
+ s3c24xx_dma_free_virtual_channels(&s3cdma->slave);
+ s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy);
+
+ if (sdata->has_clocks)
+ for (i = 0; i < pdata->num_phy_channels; i++) {
+ struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+ if (phy->valid)
+ clk_unprepare(phy->clk);
+ }
+
+ return 0;
+}
+
+static struct platform_driver s3c24xx_dma_driver = {
+ .driver = {
+ .name = "s3c24xx-dma",
+ },
+ .id_table = s3c24xx_dma_driver_ids,
+ .probe = s3c24xx_dma_probe,
+ .remove = s3c24xx_dma_remove,
+};
+
+module_platform_driver(s3c24xx_dma_driver);
+
+bool s3c24xx_dma_filter(struct dma_chan *chan, void *param)
+{
+ struct s3c24xx_dma_chan *s3cchan;
+
+ if (chan->device->dev->driver != &s3c24xx_dma_driver.driver)
+ return false;
+
+ s3cchan = to_s3c24xx_dma_chan(chan);
+
+ return s3cchan->id == (uintptr_t)param;
+}
+EXPORT_SYMBOL(s3c24xx_dma_filter);
+
+MODULE_DESCRIPTION("S3C24XX DMA Driver");
+MODULE_AUTHOR("Heiko Stuebner");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c
new file mode 100644
index 000000000..a29c13cae
--- /dev/null
+++ b/drivers/dma/sa11x0-dma.c
@@ -0,0 +1,1102 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SA11x0 DMAengine support
+ *
+ * Copyright (C) 2012 Russell King
+ * Derived in part from arch/arm/mach-sa1100/dma.c,
+ * Copyright (C) 2000, 2001 by Nicolas Pitre
+ */
+#include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "virt-dma.h"
+
+#define NR_PHY_CHAN 6
+#define DMA_ALIGN 3
+#define DMA_MAX_SIZE 0x1fff
+#define DMA_CHUNK_SIZE 0x1000
+
+#define DMA_DDAR 0x00
+#define DMA_DCSR_S 0x04
+#define DMA_DCSR_C 0x08
+#define DMA_DCSR_R 0x0c
+#define DMA_DBSA 0x10
+#define DMA_DBTA 0x14
+#define DMA_DBSB 0x18
+#define DMA_DBTB 0x1c
+#define DMA_SIZE 0x20
+
+#define DCSR_RUN (1 << 0)
+#define DCSR_IE (1 << 1)
+#define DCSR_ERROR (1 << 2)
+#define DCSR_DONEA (1 << 3)
+#define DCSR_STRTA (1 << 4)
+#define DCSR_DONEB (1 << 5)
+#define DCSR_STRTB (1 << 6)
+#define DCSR_BIU (1 << 7)
+
+#define DDAR_RW (1 << 0) /* 0 = W, 1 = R */
+#define DDAR_E (1 << 1) /* 0 = LE, 1 = BE */
+#define DDAR_BS (1 << 2) /* 0 = BS4, 1 = BS8 */
+#define DDAR_DW (1 << 3) /* 0 = 8b, 1 = 16b */
+#define DDAR_Ser0UDCTr (0x0 << 4)
+#define DDAR_Ser0UDCRc (0x1 << 4)
+#define DDAR_Ser1SDLCTr (0x2 << 4)
+#define DDAR_Ser1SDLCRc (0x3 << 4)
+#define DDAR_Ser1UARTTr (0x4 << 4)
+#define DDAR_Ser1UARTRc (0x5 << 4)
+#define DDAR_Ser2ICPTr (0x6 << 4)
+#define DDAR_Ser2ICPRc (0x7 << 4)
+#define DDAR_Ser3UARTTr (0x8 << 4)
+#define DDAR_Ser3UARTRc (0x9 << 4)
+#define DDAR_Ser4MCP0Tr (0xa << 4)
+#define DDAR_Ser4MCP0Rc (0xb << 4)
+#define DDAR_Ser4MCP1Tr (0xc << 4)
+#define DDAR_Ser4MCP1Rc (0xd << 4)
+#define DDAR_Ser4SSPTr (0xe << 4)
+#define DDAR_Ser4SSPRc (0xf << 4)
+
+struct sa11x0_dma_sg {
+ u32 addr;
+ u32 len;
+};
+
+struct sa11x0_dma_desc {
+ struct virt_dma_desc vd;
+
+ u32 ddar;
+ size_t size;
+ unsigned period;
+ bool cyclic;
+
+ unsigned sglen;
+ struct sa11x0_dma_sg sg[];
+};
+
+struct sa11x0_dma_phy;
+
+struct sa11x0_dma_chan {
+ struct virt_dma_chan vc;
+
+ /* protected by c->vc.lock */
+ struct sa11x0_dma_phy *phy;
+ enum dma_status status;
+
+ /* protected by d->lock */
+ struct list_head node;
+
+ u32 ddar;
+ const char *name;
+};
+
+struct sa11x0_dma_phy {
+ void __iomem *base;
+ struct sa11x0_dma_dev *dev;
+ unsigned num;
+
+ struct sa11x0_dma_chan *vchan;
+
+ /* Protected by c->vc.lock */
+ unsigned sg_load;
+ struct sa11x0_dma_desc *txd_load;
+ unsigned sg_done;
+ struct sa11x0_dma_desc *txd_done;
+ u32 dbs[2];
+ u32 dbt[2];
+ u32 dcsr;
+};
+
+struct sa11x0_dma_dev {
+ struct dma_device slave;
+ void __iomem *base;
+ spinlock_t lock;
+ struct tasklet_struct task;
+ struct list_head chan_pending;
+ struct sa11x0_dma_phy phy[NR_PHY_CHAN];
+};
+
+static struct sa11x0_dma_chan *to_sa11x0_dma_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct sa11x0_dma_chan, vc.chan);
+}
+
+static struct sa11x0_dma_dev *to_sa11x0_dma(struct dma_device *dmadev)
+{
+ return container_of(dmadev, struct sa11x0_dma_dev, slave);
+}
+
+static struct sa11x0_dma_desc *sa11x0_dma_next_desc(struct sa11x0_dma_chan *c)
+{
+ struct virt_dma_desc *vd = vchan_next_desc(&c->vc);
+
+ return vd ? container_of(vd, struct sa11x0_dma_desc, vd) : NULL;
+}
+
+static void sa11x0_dma_free_desc(struct virt_dma_desc *vd)
+{
+ kfree(container_of(vd, struct sa11x0_dma_desc, vd));
+}
+
+static void sa11x0_dma_start_desc(struct sa11x0_dma_phy *p, struct sa11x0_dma_desc *txd)
+{
+ list_del(&txd->vd.node);
+ p->txd_load = txd;
+ p->sg_load = 0;
+
+ dev_vdbg(p->dev->slave.dev, "pchan %u: txd %p[%x]: starting: DDAR:%x\n",
+ p->num, &txd->vd, txd->vd.tx.cookie, txd->ddar);
+}
+
+static void noinline sa11x0_dma_start_sg(struct sa11x0_dma_phy *p,
+ struct sa11x0_dma_chan *c)
+{
+ struct sa11x0_dma_desc *txd = p->txd_load;
+ struct sa11x0_dma_sg *sg;
+ void __iomem *base = p->base;
+ unsigned dbsx, dbtx;
+ u32 dcsr;
+
+ if (!txd)
+ return;
+
+ dcsr = readl_relaxed(base + DMA_DCSR_R);
+
+ /* Don't try to load the next transfer if both buffers are started */
+ if ((dcsr & (DCSR_STRTA | DCSR_STRTB)) == (DCSR_STRTA | DCSR_STRTB))
+ return;
+
+ if (p->sg_load == txd->sglen) {
+ if (!txd->cyclic) {
+ struct sa11x0_dma_desc *txn = sa11x0_dma_next_desc(c);
+
+ /*
+ * We have reached the end of the current descriptor.
+ * Peek at the next descriptor, and if compatible with
+ * the current, start processing it.
+ */
+ if (txn && txn->ddar == txd->ddar) {
+ txd = txn;
+ sa11x0_dma_start_desc(p, txn);
+ } else {
+ p->txd_load = NULL;
+ return;
+ }
+ } else {
+ /* Cyclic: reset back to beginning */
+ p->sg_load = 0;
+ }
+ }
+
+ sg = &txd->sg[p->sg_load++];
+
+ /* Select buffer to load according to channel status */
+ if (((dcsr & (DCSR_BIU | DCSR_STRTB)) == (DCSR_BIU | DCSR_STRTB)) ||
+ ((dcsr & (DCSR_BIU | DCSR_STRTA)) == 0)) {
+ dbsx = DMA_DBSA;
+ dbtx = DMA_DBTA;
+ dcsr = DCSR_STRTA | DCSR_IE | DCSR_RUN;
+ } else {
+ dbsx = DMA_DBSB;
+ dbtx = DMA_DBTB;
+ dcsr = DCSR_STRTB | DCSR_IE | DCSR_RUN;
+ }
+
+ writel_relaxed(sg->addr, base + dbsx);
+ writel_relaxed(sg->len, base + dbtx);
+ writel(dcsr, base + DMA_DCSR_S);
+
+ dev_dbg(p->dev->slave.dev, "pchan %u: load: DCSR:%02x DBS%c:%08x DBT%c:%08x\n",
+ p->num, dcsr,
+ 'A' + (dbsx == DMA_DBSB), sg->addr,
+ 'A' + (dbtx == DMA_DBTB), sg->len);
+}
+
+static void noinline sa11x0_dma_complete(struct sa11x0_dma_phy *p,
+ struct sa11x0_dma_chan *c)
+{
+ struct sa11x0_dma_desc *txd = p->txd_done;
+
+ if (++p->sg_done == txd->sglen) {
+ if (!txd->cyclic) {
+ vchan_cookie_complete(&txd->vd);
+
+ p->sg_done = 0;
+ p->txd_done = p->txd_load;
+
+ if (!p->txd_done)
+ tasklet_schedule(&p->dev->task);
+ } else {
+ if ((p->sg_done % txd->period) == 0)
+ vchan_cyclic_callback(&txd->vd);
+
+ /* Cyclic: reset back to beginning */
+ p->sg_done = 0;
+ }
+ }
+
+ sa11x0_dma_start_sg(p, c);
+}
+
+static irqreturn_t sa11x0_dma_irq(int irq, void *dev_id)
+{
+ struct sa11x0_dma_phy *p = dev_id;
+ struct sa11x0_dma_dev *d = p->dev;
+ struct sa11x0_dma_chan *c;
+ u32 dcsr;
+
+ dcsr = readl_relaxed(p->base + DMA_DCSR_R);
+ if (!(dcsr & (DCSR_ERROR | DCSR_DONEA | DCSR_DONEB)))
+ return IRQ_NONE;
+
+ /* Clear reported status bits */
+ writel_relaxed(dcsr & (DCSR_ERROR | DCSR_DONEA | DCSR_DONEB),
+ p->base + DMA_DCSR_C);
+
+ dev_dbg(d->slave.dev, "pchan %u: irq: DCSR:%02x\n", p->num, dcsr);
+
+ if (dcsr & DCSR_ERROR) {
+ dev_err(d->slave.dev, "pchan %u: error. DCSR:%02x DDAR:%08x DBSA:%08x DBTA:%08x DBSB:%08x DBTB:%08x\n",
+ p->num, dcsr,
+ readl_relaxed(p->base + DMA_DDAR),
+ readl_relaxed(p->base + DMA_DBSA),
+ readl_relaxed(p->base + DMA_DBTA),
+ readl_relaxed(p->base + DMA_DBSB),
+ readl_relaxed(p->base + DMA_DBTB));
+ }
+
+ c = p->vchan;
+ if (c) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+ /*
+ * Now that we're holding the lock, check that the vchan
+ * really is associated with this pchan before touching the
+ * hardware. This should always succeed, because we won't
+ * change p->vchan or c->phy while the channel is actively
+ * transferring.
+ */
+ if (c->phy == p) {
+ if (dcsr & DCSR_DONEA)
+ sa11x0_dma_complete(p, c);
+ if (dcsr & DCSR_DONEB)
+ sa11x0_dma_complete(p, c);
+ }
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void sa11x0_dma_start_txd(struct sa11x0_dma_chan *c)
+{
+ struct sa11x0_dma_desc *txd = sa11x0_dma_next_desc(c);
+
+ /* If the issued list is empty, we have no further txds to process */
+ if (txd) {
+ struct sa11x0_dma_phy *p = c->phy;
+
+ sa11x0_dma_start_desc(p, txd);
+ p->txd_done = txd;
+ p->sg_done = 0;
+
+ /* The channel should not have any transfers started */
+ WARN_ON(readl_relaxed(p->base + DMA_DCSR_R) &
+ (DCSR_STRTA | DCSR_STRTB));
+
+ /* Clear the run and start bits before changing DDAR */
+ writel_relaxed(DCSR_RUN | DCSR_STRTA | DCSR_STRTB,
+ p->base + DMA_DCSR_C);
+ writel_relaxed(txd->ddar, p->base + DMA_DDAR);
+
+ /* Try to start both buffers */
+ sa11x0_dma_start_sg(p, c);
+ sa11x0_dma_start_sg(p, c);
+ }
+}
+
+static void sa11x0_dma_tasklet(struct tasklet_struct *t)
+{
+ struct sa11x0_dma_dev *d = from_tasklet(d, t, task);
+ struct sa11x0_dma_phy *p;
+ struct sa11x0_dma_chan *c;
+ unsigned pch, pch_alloc = 0;
+
+ dev_dbg(d->slave.dev, "tasklet enter\n");
+
+ list_for_each_entry(c, &d->slave.channels, vc.chan.device_node) {
+ spin_lock_irq(&c->vc.lock);
+ p = c->phy;
+ if (p && !p->txd_done) {
+ sa11x0_dma_start_txd(c);
+ if (!p->txd_done) {
+ /* No current txd associated with this channel */
+ dev_dbg(d->slave.dev, "pchan %u: free\n", p->num);
+
+ /* Mark this channel free */
+ c->phy = NULL;
+ p->vchan = NULL;
+ }
+ }
+ spin_unlock_irq(&c->vc.lock);
+ }
+
+ spin_lock_irq(&d->lock);
+ for (pch = 0; pch < NR_PHY_CHAN; pch++) {
+ p = &d->phy[pch];
+
+ if (p->vchan == NULL && !list_empty(&d->chan_pending)) {
+ c = list_first_entry(&d->chan_pending,
+ struct sa11x0_dma_chan, node);
+ list_del_init(&c->node);
+
+ pch_alloc |= 1 << pch;
+
+ /* Mark this channel allocated */
+ p->vchan = c;
+
+ dev_dbg(d->slave.dev, "pchan %u: alloc vchan %p\n", pch, &c->vc);
+ }
+ }
+ spin_unlock_irq(&d->lock);
+
+ for (pch = 0; pch < NR_PHY_CHAN; pch++) {
+ if (pch_alloc & (1 << pch)) {
+ p = &d->phy[pch];
+ c = p->vchan;
+
+ spin_lock_irq(&c->vc.lock);
+ c->phy = p;
+
+ sa11x0_dma_start_txd(c);
+ spin_unlock_irq(&c->vc.lock);
+ }
+ }
+
+ dev_dbg(d->slave.dev, "tasklet exit\n");
+}
+
+
+static void sa11x0_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+ struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
+ unsigned long flags;
+
+ spin_lock_irqsave(&d->lock, flags);
+ list_del_init(&c->node);
+ spin_unlock_irqrestore(&d->lock, flags);
+
+ vchan_free_chan_resources(&c->vc);
+}
+
+static dma_addr_t sa11x0_dma_pos(struct sa11x0_dma_phy *p)
+{
+ unsigned reg;
+ u32 dcsr;
+
+ dcsr = readl_relaxed(p->base + DMA_DCSR_R);
+
+ if ((dcsr & (DCSR_BIU | DCSR_STRTA)) == DCSR_STRTA ||
+ (dcsr & (DCSR_BIU | DCSR_STRTB)) == DCSR_BIU)
+ reg = DMA_DBSA;
+ else
+ reg = DMA_DBSB;
+
+ return readl_relaxed(p->base + reg);
+}
+
+static enum dma_status sa11x0_dma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *state)
+{
+ struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+ struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
+ struct sa11x0_dma_phy *p;
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+ enum dma_status ret;
+
+ ret = dma_cookie_status(&c->vc.chan, cookie, state);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ if (!state)
+ return c->status;
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+ p = c->phy;
+
+ /*
+ * If the cookie is on our issue queue, then the residue is
+ * its total size.
+ */
+ vd = vchan_find_desc(&c->vc, cookie);
+ if (vd) {
+ state->residue = container_of(vd, struct sa11x0_dma_desc, vd)->size;
+ } else if (!p) {
+ state->residue = 0;
+ } else {
+ struct sa11x0_dma_desc *txd;
+ size_t bytes = 0;
+
+ if (p->txd_done && p->txd_done->vd.tx.cookie == cookie)
+ txd = p->txd_done;
+ else if (p->txd_load && p->txd_load->vd.tx.cookie == cookie)
+ txd = p->txd_load;
+ else
+ txd = NULL;
+
+ ret = c->status;
+ if (txd) {
+ dma_addr_t addr = sa11x0_dma_pos(p);
+ unsigned i;
+
+ dev_vdbg(d->slave.dev, "tx_status: addr:%pad\n", &addr);
+
+ for (i = 0; i < txd->sglen; i++) {
+ dev_vdbg(d->slave.dev, "tx_status: [%u] %x+%x\n",
+ i, txd->sg[i].addr, txd->sg[i].len);
+ if (addr >= txd->sg[i].addr &&
+ addr < txd->sg[i].addr + txd->sg[i].len) {
+ unsigned len;
+
+ len = txd->sg[i].len -
+ (addr - txd->sg[i].addr);
+ dev_vdbg(d->slave.dev, "tx_status: [%u] +%x\n",
+ i, len);
+ bytes += len;
+ i++;
+ break;
+ }
+ }
+ for (; i < txd->sglen; i++) {
+ dev_vdbg(d->slave.dev, "tx_status: [%u] %x+%x ++\n",
+ i, txd->sg[i].addr, txd->sg[i].len);
+ bytes += txd->sg[i].len;
+ }
+ }
+ state->residue = bytes;
+ }
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+
+ dev_vdbg(d->slave.dev, "tx_status: bytes 0x%x\n", state->residue);
+
+ return ret;
+}
+
+/*
+ * Move pending txds to the issued list, and re-init pending list.
+ * If not already pending, add this channel to the list of pending
+ * channels and trigger the tasklet to run.
+ */
+static void sa11x0_dma_issue_pending(struct dma_chan *chan)
+{
+ struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+ struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
+ unsigned long flags;
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+ if (vchan_issue_pending(&c->vc)) {
+ if (!c->phy) {
+ spin_lock(&d->lock);
+ if (list_empty(&c->node)) {
+ list_add_tail(&c->node, &d->chan_pending);
+ tasklet_schedule(&d->task);
+ dev_dbg(d->slave.dev, "vchan %p: issued\n", &c->vc);
+ }
+ spin_unlock(&d->lock);
+ }
+ } else
+ dev_dbg(d->slave.dev, "vchan %p: nothing to issue\n", &c->vc);
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+}
+
+static struct dma_async_tx_descriptor *sa11x0_dma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sg, unsigned int sglen,
+ enum dma_transfer_direction dir, unsigned long flags, void *context)
+{
+ struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+ struct sa11x0_dma_desc *txd;
+ struct scatterlist *sgent;
+ unsigned i, j = sglen;
+ size_t size = 0;
+
+ /* SA11x0 channels can only operate in their native direction */
+ if (dir != (c->ddar & DDAR_RW ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV)) {
+ dev_err(chan->device->dev, "vchan %p: bad DMA direction: DDAR:%08x dir:%u\n",
+ &c->vc, c->ddar, dir);
+ return NULL;
+ }
+
+ /* Do not allow zero-sized txds */
+ if (sglen == 0)
+ return NULL;
+
+ for_each_sg(sg, sgent, sglen, i) {
+ dma_addr_t addr = sg_dma_address(sgent);
+ unsigned int len = sg_dma_len(sgent);
+
+ if (len > DMA_MAX_SIZE)
+ j += DIV_ROUND_UP(len, DMA_MAX_SIZE & ~DMA_ALIGN) - 1;
+ if (addr & DMA_ALIGN) {
+ dev_dbg(chan->device->dev, "vchan %p: bad buffer alignment: %pad\n",
+ &c->vc, &addr);
+ return NULL;
+ }
+ }
+
+ txd = kzalloc(struct_size(txd, sg, j), GFP_ATOMIC);
+ if (!txd) {
+ dev_dbg(chan->device->dev, "vchan %p: kzalloc failed\n", &c->vc);
+ return NULL;
+ }
+
+ j = 0;
+ for_each_sg(sg, sgent, sglen, i) {
+ dma_addr_t addr = sg_dma_address(sgent);
+ unsigned len = sg_dma_len(sgent);
+
+ size += len;
+
+ do {
+ unsigned tlen = len;
+
+ /*
+ * Check whether the transfer will fit. If not, try
+ * to split the transfer up such that we end up with
+ * equal chunks - but make sure that we preserve the
+ * alignment. This avoids small segments.
+ */
+ if (tlen > DMA_MAX_SIZE) {
+ unsigned mult = DIV_ROUND_UP(tlen,
+ DMA_MAX_SIZE & ~DMA_ALIGN);
+
+ tlen = (tlen / mult) & ~DMA_ALIGN;
+ }
+
+ txd->sg[j].addr = addr;
+ txd->sg[j].len = tlen;
+
+ addr += tlen;
+ len -= tlen;
+ j++;
+ } while (len);
+ }
+
+ txd->ddar = c->ddar;
+ txd->size = size;
+ txd->sglen = j;
+
+ dev_dbg(chan->device->dev, "vchan %p: txd %p: size %zu nr %u\n",
+ &c->vc, &txd->vd, txd->size, txd->sglen);
+
+ return vchan_tx_prep(&c->vc, &txd->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *sa11x0_dma_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period,
+ enum dma_transfer_direction dir, unsigned long flags)
+{
+ struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+ struct sa11x0_dma_desc *txd;
+ unsigned i, j, k, sglen, sgperiod;
+
+ /* SA11x0 channels can only operate in their native direction */
+ if (dir != (c->ddar & DDAR_RW ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV)) {
+ dev_err(chan->device->dev, "vchan %p: bad DMA direction: DDAR:%08x dir:%u\n",
+ &c->vc, c->ddar, dir);
+ return NULL;
+ }
+
+ sgperiod = DIV_ROUND_UP(period, DMA_MAX_SIZE & ~DMA_ALIGN);
+ sglen = size * sgperiod / period;
+
+ /* Do not allow zero-sized txds */
+ if (sglen == 0)
+ return NULL;
+
+ txd = kzalloc(struct_size(txd, sg, sglen), GFP_ATOMIC);
+ if (!txd) {
+ dev_dbg(chan->device->dev, "vchan %p: kzalloc failed\n", &c->vc);
+ return NULL;
+ }
+
+ for (i = k = 0; i < size / period; i++) {
+ size_t tlen, len = period;
+
+ for (j = 0; j < sgperiod; j++, k++) {
+ tlen = len;
+
+ if (tlen > DMA_MAX_SIZE) {
+ unsigned mult = DIV_ROUND_UP(tlen, DMA_MAX_SIZE & ~DMA_ALIGN);
+ tlen = (tlen / mult) & ~DMA_ALIGN;
+ }
+
+ txd->sg[k].addr = addr;
+ txd->sg[k].len = tlen;
+ addr += tlen;
+ len -= tlen;
+ }
+
+ WARN_ON(len != 0);
+ }
+
+ WARN_ON(k != sglen);
+
+ txd->ddar = c->ddar;
+ txd->size = size;
+ txd->sglen = sglen;
+ txd->cyclic = 1;
+ txd->period = sgperiod;
+
+ return vchan_tx_prep(&c->vc, &txd->vd, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+}
+
+static int sa11x0_dma_device_config(struct dma_chan *chan,
+ struct dma_slave_config *cfg)
+{
+ struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+ u32 ddar = c->ddar & ((0xf << 4) | DDAR_RW);
+ dma_addr_t addr;
+ enum dma_slave_buswidth width;
+ u32 maxburst;
+
+ if (ddar & DDAR_RW) {
+ addr = cfg->src_addr;
+ width = cfg->src_addr_width;
+ maxburst = cfg->src_maxburst;
+ } else {
+ addr = cfg->dst_addr;
+ width = cfg->dst_addr_width;
+ maxburst = cfg->dst_maxburst;
+ }
+
+ if ((width != DMA_SLAVE_BUSWIDTH_1_BYTE &&
+ width != DMA_SLAVE_BUSWIDTH_2_BYTES) ||
+ (maxburst != 4 && maxburst != 8))
+ return -EINVAL;
+
+ if (width == DMA_SLAVE_BUSWIDTH_2_BYTES)
+ ddar |= DDAR_DW;
+ if (maxburst == 8)
+ ddar |= DDAR_BS;
+
+ dev_dbg(c->vc.chan.device->dev, "vchan %p: dma_slave_config addr %pad width %u burst %u\n",
+ &c->vc, &addr, width, maxburst);
+
+ c->ddar = ddar | (addr & 0xf0000000) | (addr & 0x003ffffc) << 6;
+
+ return 0;
+}
+
+static int sa11x0_dma_device_pause(struct dma_chan *chan)
+{
+ struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+ struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
+ struct sa11x0_dma_phy *p;
+ unsigned long flags;
+
+ dev_dbg(d->slave.dev, "vchan %p: pause\n", &c->vc);
+ spin_lock_irqsave(&c->vc.lock, flags);
+ if (c->status == DMA_IN_PROGRESS) {
+ c->status = DMA_PAUSED;
+
+ p = c->phy;
+ if (p) {
+ writel(DCSR_RUN | DCSR_IE, p->base + DMA_DCSR_C);
+ } else {
+ spin_lock(&d->lock);
+ list_del_init(&c->node);
+ spin_unlock(&d->lock);
+ }
+ }
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+
+ return 0;
+}
+
+static int sa11x0_dma_device_resume(struct dma_chan *chan)
+{
+ struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+ struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
+ struct sa11x0_dma_phy *p;
+ unsigned long flags;
+
+ dev_dbg(d->slave.dev, "vchan %p: resume\n", &c->vc);
+ spin_lock_irqsave(&c->vc.lock, flags);
+ if (c->status == DMA_PAUSED) {
+ c->status = DMA_IN_PROGRESS;
+
+ p = c->phy;
+ if (p) {
+ writel(DCSR_RUN | DCSR_IE, p->base + DMA_DCSR_S);
+ } else if (!list_empty(&c->vc.desc_issued)) {
+ spin_lock(&d->lock);
+ list_add_tail(&c->node, &d->chan_pending);
+ spin_unlock(&d->lock);
+ }
+ }
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+
+ return 0;
+}
+
+static int sa11x0_dma_device_terminate_all(struct dma_chan *chan)
+{
+ struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+ struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
+ struct sa11x0_dma_phy *p;
+ LIST_HEAD(head);
+ unsigned long flags;
+
+ dev_dbg(d->slave.dev, "vchan %p: terminate all\n", &c->vc);
+ /* Clear the tx descriptor lists */
+ spin_lock_irqsave(&c->vc.lock, flags);
+ vchan_get_all_descriptors(&c->vc, &head);
+
+ p = c->phy;
+ if (p) {
+ dev_dbg(d->slave.dev, "pchan %u: terminating\n", p->num);
+ /* vchan is assigned to a pchan - stop the channel */
+ writel(DCSR_RUN | DCSR_IE |
+ DCSR_STRTA | DCSR_DONEA |
+ DCSR_STRTB | DCSR_DONEB,
+ p->base + DMA_DCSR_C);
+
+ if (p->txd_load) {
+ if (p->txd_load != p->txd_done)
+ list_add_tail(&p->txd_load->vd.node, &head);
+ p->txd_load = NULL;
+ }
+ if (p->txd_done) {
+ list_add_tail(&p->txd_done->vd.node, &head);
+ p->txd_done = NULL;
+ }
+ c->phy = NULL;
+ spin_lock(&d->lock);
+ p->vchan = NULL;
+ spin_unlock(&d->lock);
+ tasklet_schedule(&d->task);
+ }
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+ vchan_dma_desc_free_list(&c->vc, &head);
+
+ return 0;
+}
+
+struct sa11x0_dma_channel_desc {
+ u32 ddar;
+ const char *name;
+};
+
+#define CD(d1, d2) { .ddar = DDAR_##d1 | d2, .name = #d1 }
+static const struct sa11x0_dma_channel_desc chan_desc[] = {
+ CD(Ser0UDCTr, 0),
+ CD(Ser0UDCRc, DDAR_RW),
+ CD(Ser1SDLCTr, 0),
+ CD(Ser1SDLCRc, DDAR_RW),
+ CD(Ser1UARTTr, 0),
+ CD(Ser1UARTRc, DDAR_RW),
+ CD(Ser2ICPTr, 0),
+ CD(Ser2ICPRc, DDAR_RW),
+ CD(Ser3UARTTr, 0),
+ CD(Ser3UARTRc, DDAR_RW),
+ CD(Ser4MCP0Tr, 0),
+ CD(Ser4MCP0Rc, DDAR_RW),
+ CD(Ser4MCP1Tr, 0),
+ CD(Ser4MCP1Rc, DDAR_RW),
+ CD(Ser4SSPTr, 0),
+ CD(Ser4SSPRc, DDAR_RW),
+};
+
+static const struct dma_slave_map sa11x0_dma_map[] = {
+ { "sa11x0-ir", "tx", "Ser2ICPTr" },
+ { "sa11x0-ir", "rx", "Ser2ICPRc" },
+ { "sa11x0-ssp", "tx", "Ser4SSPTr" },
+ { "sa11x0-ssp", "rx", "Ser4SSPRc" },
+};
+
+static bool sa11x0_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+ struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+ const char *p = param;
+
+ return !strcmp(c->name, p);
+}
+
+static int sa11x0_dma_init_dmadev(struct dma_device *dmadev,
+ struct device *dev)
+{
+ unsigned i;
+
+ INIT_LIST_HEAD(&dmadev->channels);
+ dmadev->dev = dev;
+ dmadev->device_free_chan_resources = sa11x0_dma_free_chan_resources;
+ dmadev->device_config = sa11x0_dma_device_config;
+ dmadev->device_pause = sa11x0_dma_device_pause;
+ dmadev->device_resume = sa11x0_dma_device_resume;
+ dmadev->device_terminate_all = sa11x0_dma_device_terminate_all;
+ dmadev->device_tx_status = sa11x0_dma_tx_status;
+ dmadev->device_issue_pending = sa11x0_dma_issue_pending;
+
+ for (i = 0; i < ARRAY_SIZE(chan_desc); i++) {
+ struct sa11x0_dma_chan *c;
+
+ c = kzalloc(sizeof(*c), GFP_KERNEL);
+ if (!c) {
+ dev_err(dev, "no memory for channel %u\n", i);
+ return -ENOMEM;
+ }
+
+ c->status = DMA_IN_PROGRESS;
+ c->ddar = chan_desc[i].ddar;
+ c->name = chan_desc[i].name;
+ INIT_LIST_HEAD(&c->node);
+
+ c->vc.desc_free = sa11x0_dma_free_desc;
+ vchan_init(&c->vc, dmadev);
+ }
+
+ return dma_async_device_register(dmadev);
+}
+
+static int sa11x0_dma_request_irq(struct platform_device *pdev, int nr,
+ void *data)
+{
+ int irq = platform_get_irq(pdev, nr);
+
+ if (irq <= 0)
+ return -ENXIO;
+
+ return request_irq(irq, sa11x0_dma_irq, 0, dev_name(&pdev->dev), data);
+}
+
+static void sa11x0_dma_free_irq(struct platform_device *pdev, int nr,
+ void *data)
+{
+ int irq = platform_get_irq(pdev, nr);
+ if (irq > 0)
+ free_irq(irq, data);
+}
+
+static void sa11x0_dma_free_channels(struct dma_device *dmadev)
+{
+ struct sa11x0_dma_chan *c, *cn;
+
+ list_for_each_entry_safe(c, cn, &dmadev->channels, vc.chan.device_node) {
+ list_del(&c->vc.chan.device_node);
+ tasklet_kill(&c->vc.task);
+ kfree(c);
+ }
+}
+
+static int sa11x0_dma_probe(struct platform_device *pdev)
+{
+ struct sa11x0_dma_dev *d;
+ struct resource *res;
+ unsigned i;
+ int ret;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENXIO;
+
+ d = kzalloc(sizeof(*d), GFP_KERNEL);
+ if (!d) {
+ ret = -ENOMEM;
+ goto err_alloc;
+ }
+
+ spin_lock_init(&d->lock);
+ INIT_LIST_HEAD(&d->chan_pending);
+
+ d->slave.filter.fn = sa11x0_dma_filter_fn;
+ d->slave.filter.mapcnt = ARRAY_SIZE(sa11x0_dma_map);
+ d->slave.filter.map = sa11x0_dma_map;
+
+ d->base = ioremap(res->start, resource_size(res));
+ if (!d->base) {
+ ret = -ENOMEM;
+ goto err_ioremap;
+ }
+
+ tasklet_setup(&d->task, sa11x0_dma_tasklet);
+
+ for (i = 0; i < NR_PHY_CHAN; i++) {
+ struct sa11x0_dma_phy *p = &d->phy[i];
+
+ p->dev = d;
+ p->num = i;
+ p->base = d->base + i * DMA_SIZE;
+ writel_relaxed(DCSR_RUN | DCSR_IE | DCSR_ERROR |
+ DCSR_DONEA | DCSR_STRTA | DCSR_DONEB | DCSR_STRTB,
+ p->base + DMA_DCSR_C);
+ writel_relaxed(0, p->base + DMA_DDAR);
+
+ ret = sa11x0_dma_request_irq(pdev, i, p);
+ if (ret) {
+ while (i) {
+ i--;
+ sa11x0_dma_free_irq(pdev, i, &d->phy[i]);
+ }
+ goto err_irq;
+ }
+ }
+
+ dma_cap_set(DMA_SLAVE, d->slave.cap_mask);
+ dma_cap_set(DMA_CYCLIC, d->slave.cap_mask);
+ d->slave.device_prep_slave_sg = sa11x0_dma_prep_slave_sg;
+ d->slave.device_prep_dma_cyclic = sa11x0_dma_prep_dma_cyclic;
+ d->slave.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ d->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ d->slave.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES);
+ d->slave.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES);
+ ret = sa11x0_dma_init_dmadev(&d->slave, &pdev->dev);
+ if (ret) {
+ dev_warn(d->slave.dev, "failed to register slave async device: %d\n",
+ ret);
+ goto err_slave_reg;
+ }
+
+ platform_set_drvdata(pdev, d);
+ return 0;
+
+ err_slave_reg:
+ sa11x0_dma_free_channels(&d->slave);
+ for (i = 0; i < NR_PHY_CHAN; i++)
+ sa11x0_dma_free_irq(pdev, i, &d->phy[i]);
+ err_irq:
+ tasklet_kill(&d->task);
+ iounmap(d->base);
+ err_ioremap:
+ kfree(d);
+ err_alloc:
+ return ret;
+}
+
+static int sa11x0_dma_remove(struct platform_device *pdev)
+{
+ struct sa11x0_dma_dev *d = platform_get_drvdata(pdev);
+ unsigned pch;
+
+ dma_async_device_unregister(&d->slave);
+
+ sa11x0_dma_free_channels(&d->slave);
+ for (pch = 0; pch < NR_PHY_CHAN; pch++)
+ sa11x0_dma_free_irq(pdev, pch, &d->phy[pch]);
+ tasklet_kill(&d->task);
+ iounmap(d->base);
+ kfree(d);
+
+ return 0;
+}
+
+static __maybe_unused int sa11x0_dma_suspend(struct device *dev)
+{
+ struct sa11x0_dma_dev *d = dev_get_drvdata(dev);
+ unsigned pch;
+
+ for (pch = 0; pch < NR_PHY_CHAN; pch++) {
+ struct sa11x0_dma_phy *p = &d->phy[pch];
+ u32 dcsr, saved_dcsr;
+
+ dcsr = saved_dcsr = readl_relaxed(p->base + DMA_DCSR_R);
+ if (dcsr & DCSR_RUN) {
+ writel(DCSR_RUN | DCSR_IE, p->base + DMA_DCSR_C);
+ dcsr = readl_relaxed(p->base + DMA_DCSR_R);
+ }
+
+ saved_dcsr &= DCSR_RUN | DCSR_IE;
+ if (dcsr & DCSR_BIU) {
+ p->dbs[0] = readl_relaxed(p->base + DMA_DBSB);
+ p->dbt[0] = readl_relaxed(p->base + DMA_DBTB);
+ p->dbs[1] = readl_relaxed(p->base + DMA_DBSA);
+ p->dbt[1] = readl_relaxed(p->base + DMA_DBTA);
+ saved_dcsr |= (dcsr & DCSR_STRTA ? DCSR_STRTB : 0) |
+ (dcsr & DCSR_STRTB ? DCSR_STRTA : 0);
+ } else {
+ p->dbs[0] = readl_relaxed(p->base + DMA_DBSA);
+ p->dbt[0] = readl_relaxed(p->base + DMA_DBTA);
+ p->dbs[1] = readl_relaxed(p->base + DMA_DBSB);
+ p->dbt[1] = readl_relaxed(p->base + DMA_DBTB);
+ saved_dcsr |= dcsr & (DCSR_STRTA | DCSR_STRTB);
+ }
+ p->dcsr = saved_dcsr;
+
+ writel(DCSR_STRTA | DCSR_STRTB, p->base + DMA_DCSR_C);
+ }
+
+ return 0;
+}
+
+static __maybe_unused int sa11x0_dma_resume(struct device *dev)
+{
+ struct sa11x0_dma_dev *d = dev_get_drvdata(dev);
+ unsigned pch;
+
+ for (pch = 0; pch < NR_PHY_CHAN; pch++) {
+ struct sa11x0_dma_phy *p = &d->phy[pch];
+ struct sa11x0_dma_desc *txd = NULL;
+ u32 dcsr = readl_relaxed(p->base + DMA_DCSR_R);
+
+ WARN_ON(dcsr & (DCSR_BIU | DCSR_STRTA | DCSR_STRTB | DCSR_RUN));
+
+ if (p->txd_done)
+ txd = p->txd_done;
+ else if (p->txd_load)
+ txd = p->txd_load;
+
+ if (!txd)
+ continue;
+
+ writel_relaxed(txd->ddar, p->base + DMA_DDAR);
+
+ writel_relaxed(p->dbs[0], p->base + DMA_DBSA);
+ writel_relaxed(p->dbt[0], p->base + DMA_DBTA);
+ writel_relaxed(p->dbs[1], p->base + DMA_DBSB);
+ writel_relaxed(p->dbt[1], p->base + DMA_DBTB);
+ writel_relaxed(p->dcsr, p->base + DMA_DCSR_S);
+ }
+
+ return 0;
+}
+
+static const struct dev_pm_ops sa11x0_dma_pm_ops = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(sa11x0_dma_suspend, sa11x0_dma_resume)
+};
+
+static struct platform_driver sa11x0_dma_driver = {
+ .driver = {
+ .name = "sa11x0-dma",
+ .pm = &sa11x0_dma_pm_ops,
+ },
+ .probe = sa11x0_dma_probe,
+ .remove = sa11x0_dma_remove,
+};
+
+static int __init sa11x0_dma_init(void)
+{
+ return platform_driver_register(&sa11x0_dma_driver);
+}
+subsys_initcall(sa11x0_dma_init);
+
+static void __exit sa11x0_dma_exit(void)
+{
+ platform_driver_unregister(&sa11x0_dma_driver);
+}
+module_exit(sa11x0_dma_exit);
+
+MODULE_AUTHOR("Russell King");
+MODULE_DESCRIPTION("SA-11x0 DMA driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:sa11x0-dma");
diff --git a/drivers/dma/sf-pdma/Kconfig b/drivers/dma/sf-pdma/Kconfig
new file mode 100644
index 000000000..ba46a0a15
--- /dev/null
+++ b/drivers/dma/sf-pdma/Kconfig
@@ -0,0 +1,7 @@
+config SF_PDMA
+ tristate "Sifive PDMA controller driver"
+ depends on HAS_IOMEM
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Support the SiFive PDMA controller.
diff --git a/drivers/dma/sf-pdma/Makefile b/drivers/dma/sf-pdma/Makefile
new file mode 100644
index 000000000..764552ab8
--- /dev/null
+++ b/drivers/dma/sf-pdma/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SF_PDMA) += sf-pdma.o
diff --git a/drivers/dma/sf-pdma/sf-pdma.c b/drivers/dma/sf-pdma/sf-pdma.c
new file mode 100644
index 000000000..e578ad556
--- /dev/null
+++ b/drivers/dma/sf-pdma/sf-pdma.c
@@ -0,0 +1,625 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SiFive FU540 Platform DMA driver
+ * Copyright (C) 2019 SiFive
+ *
+ * Based partially on:
+ * - drivers/dma/fsl-edma.c
+ * - drivers/dma/dw-edma/
+ * - drivers/dma/pxa-dma.c
+ *
+ * See the following sources for further documentation:
+ * - Chapter 12 "Platform DMA Engine (PDMA)" of
+ * SiFive FU540-C000 v1.0
+ * https://static.dev.sifive.com/FU540-C000-v1.0.pdf
+ */
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/mod_devicetable.h>
+#include <linux/dma-mapping.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+
+#include "sf-pdma.h"
+
+#ifndef readq
+static inline unsigned long long readq(void __iomem *addr)
+{
+ return readl(addr) | (((unsigned long long)readl(addr + 4)) << 32LL);
+}
+#endif
+
+#ifndef writeq
+static inline void writeq(unsigned long long v, void __iomem *addr)
+{
+ writel(lower_32_bits(v), addr);
+ writel(upper_32_bits(v), addr + 4);
+}
+#endif
+
+static inline struct sf_pdma_chan *to_sf_pdma_chan(struct dma_chan *dchan)
+{
+ return container_of(dchan, struct sf_pdma_chan, vchan.chan);
+}
+
+static inline struct sf_pdma_desc *to_sf_pdma_desc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct sf_pdma_desc, vdesc);
+}
+
+static struct sf_pdma_desc *sf_pdma_alloc_desc(struct sf_pdma_chan *chan)
+{
+ struct sf_pdma_desc *desc;
+
+ desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ desc->chan = chan;
+
+ return desc;
+}
+
+static void sf_pdma_fill_desc(struct sf_pdma_desc *desc,
+ u64 dst, u64 src, u64 size)
+{
+ desc->xfer_type = PDMA_FULL_SPEED;
+ desc->xfer_size = size;
+ desc->dst_addr = dst;
+ desc->src_addr = src;
+}
+
+static void sf_pdma_disclaim_chan(struct sf_pdma_chan *chan)
+{
+ struct pdma_regs *regs = &chan->regs;
+
+ writel(PDMA_CLEAR_CTRL, regs->ctrl);
+}
+
+static struct dma_async_tx_descriptor *
+sf_pdma_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan);
+ struct sf_pdma_desc *desc;
+ unsigned long iflags;
+
+ if (chan && (!len || !dest || !src)) {
+ dev_err(chan->pdma->dma_dev.dev,
+ "Please check dma len, dest, src!\n");
+ return NULL;
+ }
+
+ desc = sf_pdma_alloc_desc(chan);
+ if (!desc)
+ return NULL;
+
+ desc->dirn = DMA_MEM_TO_MEM;
+ desc->async_tx = vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+
+ spin_lock_irqsave(&chan->vchan.lock, iflags);
+ sf_pdma_fill_desc(desc, dest, src, len);
+ spin_unlock_irqrestore(&chan->vchan.lock, iflags);
+
+ return desc->async_tx;
+}
+
+static int sf_pdma_slave_config(struct dma_chan *dchan,
+ struct dma_slave_config *cfg)
+{
+ struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan);
+
+ memcpy(&chan->cfg, cfg, sizeof(*cfg));
+
+ return 0;
+}
+
+static int sf_pdma_alloc_chan_resources(struct dma_chan *dchan)
+{
+ struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan);
+ struct pdma_regs *regs = &chan->regs;
+
+ dma_cookie_init(dchan);
+ writel(PDMA_CLAIM_MASK, regs->ctrl);
+
+ return 0;
+}
+
+static void sf_pdma_disable_request(struct sf_pdma_chan *chan)
+{
+ struct pdma_regs *regs = &chan->regs;
+
+ writel(readl(regs->ctrl) & ~PDMA_RUN_MASK, regs->ctrl);
+}
+
+static void sf_pdma_free_chan_resources(struct dma_chan *dchan)
+{
+ struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+ sf_pdma_disable_request(chan);
+ kfree(chan->desc);
+ chan->desc = NULL;
+ vchan_get_all_descriptors(&chan->vchan, &head);
+ sf_pdma_disclaim_chan(chan);
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+ vchan_dma_desc_free_list(&chan->vchan, &head);
+}
+
+static size_t sf_pdma_desc_residue(struct sf_pdma_chan *chan,
+ dma_cookie_t cookie)
+{
+ struct virt_dma_desc *vd = NULL;
+ struct pdma_regs *regs = &chan->regs;
+ unsigned long flags;
+ u64 residue = 0;
+ struct sf_pdma_desc *desc;
+ struct dma_async_tx_descriptor *tx = NULL;
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+
+ list_for_each_entry(vd, &chan->vchan.desc_submitted, node)
+ if (vd->tx.cookie == cookie)
+ tx = &vd->tx;
+
+ if (!tx)
+ goto out;
+
+ if (cookie == tx->chan->completed_cookie)
+ goto out;
+
+ if (cookie == tx->cookie) {
+ residue = readq(regs->residue);
+ } else {
+ vd = vchan_find_desc(&chan->vchan, cookie);
+ if (!vd)
+ goto out;
+
+ desc = to_sf_pdma_desc(vd);
+ residue = desc->xfer_size;
+ }
+
+out:
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+ return residue;
+}
+
+static enum dma_status
+sf_pdma_tx_status(struct dma_chan *dchan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan);
+ enum dma_status status;
+
+ status = dma_cookie_status(dchan, cookie, txstate);
+
+ if (txstate && status != DMA_ERROR)
+ dma_set_residue(txstate, sf_pdma_desc_residue(chan, cookie));
+
+ return status;
+}
+
+static int sf_pdma_terminate_all(struct dma_chan *dchan)
+{
+ struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+ sf_pdma_disable_request(chan);
+ kfree(chan->desc);
+ chan->desc = NULL;
+ chan->xfer_err = false;
+ vchan_get_all_descriptors(&chan->vchan, &head);
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+ vchan_dma_desc_free_list(&chan->vchan, &head);
+
+ return 0;
+}
+
+static void sf_pdma_enable_request(struct sf_pdma_chan *chan)
+{
+ struct pdma_regs *regs = &chan->regs;
+ u32 v;
+
+ v = PDMA_CLAIM_MASK |
+ PDMA_ENABLE_DONE_INT_MASK |
+ PDMA_ENABLE_ERR_INT_MASK |
+ PDMA_RUN_MASK;
+
+ writel(v, regs->ctrl);
+}
+
+static struct sf_pdma_desc *sf_pdma_get_first_pending_desc(struct sf_pdma_chan *chan)
+{
+ struct virt_dma_chan *vchan = &chan->vchan;
+ struct virt_dma_desc *vdesc;
+
+ if (list_empty(&vchan->desc_issued))
+ return NULL;
+
+ vdesc = list_first_entry(&vchan->desc_issued, struct virt_dma_desc, node);
+
+ return container_of(vdesc, struct sf_pdma_desc, vdesc);
+}
+
+static void sf_pdma_xfer_desc(struct sf_pdma_chan *chan)
+{
+ struct sf_pdma_desc *desc = chan->desc;
+ struct pdma_regs *regs = &chan->regs;
+
+ if (!desc) {
+ dev_err(chan->pdma->dma_dev.dev, "NULL desc.\n");
+ return;
+ }
+
+ writel(desc->xfer_type, regs->xfer_type);
+ writeq(desc->xfer_size, regs->xfer_size);
+ writeq(desc->dst_addr, regs->dst_addr);
+ writeq(desc->src_addr, regs->src_addr);
+
+ chan->desc = desc;
+ chan->status = DMA_IN_PROGRESS;
+ sf_pdma_enable_request(chan);
+}
+
+static void sf_pdma_issue_pending(struct dma_chan *dchan)
+{
+ struct sf_pdma_chan *chan = to_sf_pdma_chan(dchan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+
+ if (!chan->desc && vchan_issue_pending(&chan->vchan)) {
+ /* vchan_issue_pending has made a check that desc in not NULL */
+ chan->desc = sf_pdma_get_first_pending_desc(chan);
+ sf_pdma_xfer_desc(chan);
+ }
+
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+}
+
+static void sf_pdma_free_desc(struct virt_dma_desc *vdesc)
+{
+ struct sf_pdma_desc *desc;
+
+ desc = to_sf_pdma_desc(vdesc);
+ kfree(desc);
+}
+
+static void sf_pdma_donebh_tasklet(struct tasklet_struct *t)
+{
+ struct sf_pdma_chan *chan = from_tasklet(chan, t, done_tasklet);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->lock, flags);
+ if (chan->xfer_err) {
+ chan->retries = MAX_RETRY;
+ chan->status = DMA_COMPLETE;
+ chan->xfer_err = false;
+ }
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+ list_del(&chan->desc->vdesc.node);
+ vchan_cookie_complete(&chan->desc->vdesc);
+
+ chan->desc = sf_pdma_get_first_pending_desc(chan);
+ if (chan->desc)
+ sf_pdma_xfer_desc(chan);
+
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+}
+
+static void sf_pdma_errbh_tasklet(struct tasklet_struct *t)
+{
+ struct sf_pdma_chan *chan = from_tasklet(chan, t, err_tasklet);
+ struct sf_pdma_desc *desc = chan->desc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->lock, flags);
+ if (chan->retries <= 0) {
+ /* fail to recover */
+ spin_unlock_irqrestore(&chan->lock, flags);
+ dmaengine_desc_get_callback_invoke(desc->async_tx, NULL);
+ } else {
+ /* retry */
+ chan->retries--;
+ chan->xfer_err = true;
+ chan->status = DMA_ERROR;
+
+ sf_pdma_enable_request(chan);
+ spin_unlock_irqrestore(&chan->lock, flags);
+ }
+}
+
+static irqreturn_t sf_pdma_done_isr(int irq, void *dev_id)
+{
+ struct sf_pdma_chan *chan = dev_id;
+ struct pdma_regs *regs = &chan->regs;
+ u64 residue;
+
+ spin_lock(&chan->vchan.lock);
+ writel((readl(regs->ctrl)) & ~PDMA_DONE_STATUS_MASK, regs->ctrl);
+ residue = readq(regs->residue);
+
+ if (!residue) {
+ tasklet_hi_schedule(&chan->done_tasklet);
+ } else {
+ /* submit next trascatioin if possible */
+ struct sf_pdma_desc *desc = chan->desc;
+
+ desc->src_addr += desc->xfer_size - residue;
+ desc->dst_addr += desc->xfer_size - residue;
+ desc->xfer_size = residue;
+
+ sf_pdma_xfer_desc(chan);
+ }
+
+ spin_unlock(&chan->vchan.lock);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t sf_pdma_err_isr(int irq, void *dev_id)
+{
+ struct sf_pdma_chan *chan = dev_id;
+ struct pdma_regs *regs = &chan->regs;
+
+ spin_lock(&chan->lock);
+ writel((readl(regs->ctrl)) & ~PDMA_ERR_STATUS_MASK, regs->ctrl);
+ spin_unlock(&chan->lock);
+
+ tasklet_schedule(&chan->err_tasklet);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * sf_pdma_irq_init() - Init PDMA IRQ Handlers
+ * @pdev: pointer of platform_device
+ * @pdma: pointer of PDMA engine. Caller should check NULL
+ *
+ * Initialize DONE and ERROR interrupt handler for 4 channels. Caller should
+ * make sure the pointer passed in are non-NULL. This function should be called
+ * only one time during the device probe.
+ *
+ * Context: Any context.
+ *
+ * Return:
+ * * 0 - OK to init all IRQ handlers
+ * * -EINVAL - Fail to request IRQ
+ */
+static int sf_pdma_irq_init(struct platform_device *pdev, struct sf_pdma *pdma)
+{
+ int irq, r, i;
+ struct sf_pdma_chan *chan;
+
+ for (i = 0; i < pdma->n_chans; i++) {
+ chan = &pdma->chans[i];
+
+ irq = platform_get_irq(pdev, i * 2);
+ if (irq < 0)
+ return -EINVAL;
+
+ r = devm_request_irq(&pdev->dev, irq, sf_pdma_done_isr, 0,
+ dev_name(&pdev->dev), (void *)chan);
+ if (r) {
+ dev_err(&pdev->dev, "Fail to attach done ISR: %d\n", r);
+ return -EINVAL;
+ }
+
+ chan->txirq = irq;
+
+ irq = platform_get_irq(pdev, (i * 2) + 1);
+ if (irq < 0)
+ return -EINVAL;
+
+ r = devm_request_irq(&pdev->dev, irq, sf_pdma_err_isr, 0,
+ dev_name(&pdev->dev), (void *)chan);
+ if (r) {
+ dev_err(&pdev->dev, "Fail to attach err ISR: %d\n", r);
+ return -EINVAL;
+ }
+
+ chan->errirq = irq;
+ }
+
+ return 0;
+}
+
+/**
+ * sf_pdma_setup_chans() - Init settings of each channel
+ * @pdma: pointer of PDMA engine. Caller should check NULL
+ *
+ * Initialize all data structure and register base. Caller should make sure
+ * the pointer passed in are non-NULL. This function should be called only
+ * one time during the device probe.
+ *
+ * Context: Any context.
+ *
+ * Return: none
+ */
+static void sf_pdma_setup_chans(struct sf_pdma *pdma)
+{
+ int i;
+ struct sf_pdma_chan *chan;
+
+ INIT_LIST_HEAD(&pdma->dma_dev.channels);
+
+ for (i = 0; i < pdma->n_chans; i++) {
+ chan = &pdma->chans[i];
+
+ chan->regs.ctrl =
+ SF_PDMA_REG_BASE(i) + PDMA_CTRL;
+ chan->regs.xfer_type =
+ SF_PDMA_REG_BASE(i) + PDMA_XFER_TYPE;
+ chan->regs.xfer_size =
+ SF_PDMA_REG_BASE(i) + PDMA_XFER_SIZE;
+ chan->regs.dst_addr =
+ SF_PDMA_REG_BASE(i) + PDMA_DST_ADDR;
+ chan->regs.src_addr =
+ SF_PDMA_REG_BASE(i) + PDMA_SRC_ADDR;
+ chan->regs.act_type =
+ SF_PDMA_REG_BASE(i) + PDMA_ACT_TYPE;
+ chan->regs.residue =
+ SF_PDMA_REG_BASE(i) + PDMA_REMAINING_BYTE;
+ chan->regs.cur_dst_addr =
+ SF_PDMA_REG_BASE(i) + PDMA_CUR_DST_ADDR;
+ chan->regs.cur_src_addr =
+ SF_PDMA_REG_BASE(i) + PDMA_CUR_SRC_ADDR;
+
+ chan->pdma = pdma;
+ chan->pm_state = RUNNING;
+ chan->slave_id = i;
+ chan->xfer_err = false;
+ spin_lock_init(&chan->lock);
+
+ chan->vchan.desc_free = sf_pdma_free_desc;
+ vchan_init(&chan->vchan, &pdma->dma_dev);
+
+ writel(PDMA_CLEAR_CTRL, chan->regs.ctrl);
+
+ tasklet_setup(&chan->done_tasklet, sf_pdma_donebh_tasklet);
+ tasklet_setup(&chan->err_tasklet, sf_pdma_errbh_tasklet);
+ }
+}
+
+static int sf_pdma_probe(struct platform_device *pdev)
+{
+ struct sf_pdma *pdma;
+ struct resource *res;
+ int ret, n_chans;
+ const enum dma_slave_buswidth widths =
+ DMA_SLAVE_BUSWIDTH_1_BYTE | DMA_SLAVE_BUSWIDTH_2_BYTES |
+ DMA_SLAVE_BUSWIDTH_4_BYTES | DMA_SLAVE_BUSWIDTH_8_BYTES |
+ DMA_SLAVE_BUSWIDTH_16_BYTES | DMA_SLAVE_BUSWIDTH_32_BYTES |
+ DMA_SLAVE_BUSWIDTH_64_BYTES;
+
+ ret = of_property_read_u32(pdev->dev.of_node, "dma-channels", &n_chans);
+ if (ret) {
+ /* backwards-compatibility for no dma-channels property */
+ dev_dbg(&pdev->dev, "set number of channels to default value: 4\n");
+ n_chans = PDMA_MAX_NR_CH;
+ } else if (n_chans > PDMA_MAX_NR_CH) {
+ dev_err(&pdev->dev, "the number of channels exceeds the maximum\n");
+ return -EINVAL;
+ }
+
+ pdma = devm_kzalloc(&pdev->dev, struct_size(pdma, chans, n_chans),
+ GFP_KERNEL);
+ if (!pdma)
+ return -ENOMEM;
+
+ pdma->n_chans = n_chans;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ pdma->membase = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(pdma->membase))
+ return PTR_ERR(pdma->membase);
+
+ ret = sf_pdma_irq_init(pdev, pdma);
+ if (ret)
+ return ret;
+
+ sf_pdma_setup_chans(pdma);
+
+ pdma->dma_dev.dev = &pdev->dev;
+
+ /* Setup capability */
+ dma_cap_set(DMA_MEMCPY, pdma->dma_dev.cap_mask);
+ pdma->dma_dev.copy_align = 2;
+ pdma->dma_dev.src_addr_widths = widths;
+ pdma->dma_dev.dst_addr_widths = widths;
+ pdma->dma_dev.directions = BIT(DMA_MEM_TO_MEM);
+ pdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+ pdma->dma_dev.descriptor_reuse = true;
+
+ /* Setup DMA APIs */
+ pdma->dma_dev.device_alloc_chan_resources =
+ sf_pdma_alloc_chan_resources;
+ pdma->dma_dev.device_free_chan_resources =
+ sf_pdma_free_chan_resources;
+ pdma->dma_dev.device_tx_status = sf_pdma_tx_status;
+ pdma->dma_dev.device_prep_dma_memcpy = sf_pdma_prep_dma_memcpy;
+ pdma->dma_dev.device_config = sf_pdma_slave_config;
+ pdma->dma_dev.device_terminate_all = sf_pdma_terminate_all;
+ pdma->dma_dev.device_issue_pending = sf_pdma_issue_pending;
+
+ platform_set_drvdata(pdev, pdma);
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (ret)
+ dev_warn(&pdev->dev,
+ "Failed to set DMA mask. Fall back to default.\n");
+
+ ret = dma_async_device_register(&pdma->dma_dev);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "Can't register SiFive Platform DMA. (%d)\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int sf_pdma_remove(struct platform_device *pdev)
+{
+ struct sf_pdma *pdma = platform_get_drvdata(pdev);
+ struct sf_pdma_chan *ch;
+ int i;
+
+ for (i = 0; i < pdma->n_chans; i++) {
+ ch = &pdma->chans[i];
+
+ devm_free_irq(&pdev->dev, ch->txirq, ch);
+ devm_free_irq(&pdev->dev, ch->errirq, ch);
+ list_del(&ch->vchan.chan.device_node);
+ tasklet_kill(&ch->vchan.task);
+ tasklet_kill(&ch->done_tasklet);
+ tasklet_kill(&ch->err_tasklet);
+ }
+
+ dma_async_device_unregister(&pdma->dma_dev);
+
+ return 0;
+}
+
+static const struct of_device_id sf_pdma_dt_ids[] = {
+ { .compatible = "sifive,fu540-c000-pdma" },
+ { .compatible = "sifive,pdma0" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, sf_pdma_dt_ids);
+
+static struct platform_driver sf_pdma_driver = {
+ .probe = sf_pdma_probe,
+ .remove = sf_pdma_remove,
+ .driver = {
+ .name = "sf-pdma",
+ .of_match_table = sf_pdma_dt_ids,
+ },
+};
+
+static int __init sf_pdma_init(void)
+{
+ return platform_driver_register(&sf_pdma_driver);
+}
+
+static void __exit sf_pdma_exit(void)
+{
+ platform_driver_unregister(&sf_pdma_driver);
+}
+
+/* do early init */
+subsys_initcall(sf_pdma_init);
+module_exit(sf_pdma_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("SiFive Platform DMA driver");
+MODULE_AUTHOR("Green Wan <green.wan@sifive.com>");
diff --git a/drivers/dma/sf-pdma/sf-pdma.h b/drivers/dma/sf-pdma/sf-pdma.h
new file mode 100644
index 000000000..5c398a83b
--- /dev/null
+++ b/drivers/dma/sf-pdma/sf-pdma.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SiFive FU540 Platform DMA driver
+ * Copyright (C) 2019 SiFive
+ *
+ * Based partially on:
+ * - drivers/dma/fsl-edma.c
+ * - drivers/dma/dw-edma/
+ * - drivers/dma/pxa-dma.c
+ *
+ * See the following sources for further documentation:
+ * - Chapter 12 "Platform DMA Engine (PDMA)" of
+ * SiFive FU540-C000 v1.0
+ * https://static.dev.sifive.com/FU540-C000-v1.0.pdf
+ */
+#ifndef _SF_PDMA_H
+#define _SF_PDMA_H
+
+#include <linux/dmaengine.h>
+#include <linux/dma-direction.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+#define PDMA_MAX_NR_CH 4
+
+#define PDMA_BASE_ADDR 0x3000000
+#define PDMA_CHAN_OFFSET 0x1000
+
+/* Register Offset */
+#define PDMA_CTRL 0x000
+#define PDMA_XFER_TYPE 0x004
+#define PDMA_XFER_SIZE 0x008
+#define PDMA_DST_ADDR 0x010
+#define PDMA_SRC_ADDR 0x018
+#define PDMA_ACT_TYPE 0x104 /* Read-only */
+#define PDMA_REMAINING_BYTE 0x108 /* Read-only */
+#define PDMA_CUR_DST_ADDR 0x110 /* Read-only*/
+#define PDMA_CUR_SRC_ADDR 0x118 /* Read-only*/
+
+/* CTRL */
+#define PDMA_CLEAR_CTRL 0x0
+#define PDMA_CLAIM_MASK GENMASK(0, 0)
+#define PDMA_RUN_MASK GENMASK(1, 1)
+#define PDMA_ENABLE_DONE_INT_MASK GENMASK(14, 14)
+#define PDMA_ENABLE_ERR_INT_MASK GENMASK(15, 15)
+#define PDMA_DONE_STATUS_MASK GENMASK(30, 30)
+#define PDMA_ERR_STATUS_MASK GENMASK(31, 31)
+
+/* Transfer Type */
+#define PDMA_FULL_SPEED 0xFF000008
+
+/* Error Recovery */
+#define MAX_RETRY 1
+
+#define SF_PDMA_REG_BASE(ch) (pdma->membase + (PDMA_CHAN_OFFSET * (ch)))
+
+struct pdma_regs {
+ /* read-write regs */
+ void __iomem *ctrl; /* 4 bytes */
+
+ void __iomem *xfer_type; /* 4 bytes */
+ void __iomem *xfer_size; /* 8 bytes */
+ void __iomem *dst_addr; /* 8 bytes */
+ void __iomem *src_addr; /* 8 bytes */
+
+ /* read-only */
+ void __iomem *act_type; /* 4 bytes */
+ void __iomem *residue; /* 8 bytes */
+ void __iomem *cur_dst_addr; /* 8 bytes */
+ void __iomem *cur_src_addr; /* 8 bytes */
+};
+
+struct sf_pdma_desc {
+ u32 xfer_type;
+ u64 xfer_size;
+ u64 dst_addr;
+ u64 src_addr;
+ struct virt_dma_desc vdesc;
+ struct sf_pdma_chan *chan;
+ enum dma_transfer_direction dirn;
+ struct dma_async_tx_descriptor *async_tx;
+};
+
+enum sf_pdma_pm_state {
+ RUNNING = 0,
+ SUSPENDED,
+};
+
+struct sf_pdma_chan {
+ struct virt_dma_chan vchan;
+ enum dma_status status;
+ enum sf_pdma_pm_state pm_state;
+ u32 slave_id;
+ struct sf_pdma *pdma;
+ struct sf_pdma_desc *desc;
+ struct dma_slave_config cfg;
+ u32 attr;
+ dma_addr_t dma_dev_addr;
+ u32 dma_dev_size;
+ struct tasklet_struct done_tasklet;
+ struct tasklet_struct err_tasklet;
+ struct pdma_regs regs;
+ spinlock_t lock; /* protect chan data */
+ bool xfer_err;
+ int txirq;
+ int errirq;
+ int retries;
+};
+
+struct sf_pdma {
+ struct dma_device dma_dev;
+ void __iomem *membase;
+ void __iomem *mappedbase;
+ u32 n_chans;
+ struct sf_pdma_chan chans[];
+};
+
+#endif /* _SF_PDMA_H */
diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig
new file mode 100644
index 000000000..c0b2997ab
--- /dev/null
+++ b/drivers/dma/sh/Kconfig
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# DMA engine configuration for sh
+#
+
+config RENESAS_DMA
+ bool
+ select DMA_ENGINE
+
+#
+# DMA Engine Helpers
+#
+
+config SH_DMAE_BASE
+ bool "Renesas SuperH DMA Engine support"
+ depends on SUPERH || COMPILE_TEST
+ depends on !SUPERH || SH_DMA
+ depends on !SH_DMA_API
+ default y
+ select RENESAS_DMA
+ help
+ Enable support for the Renesas SuperH DMA controllers.
+
+#
+# DMA Controllers
+#
+
+config SH_DMAE
+ tristate "Renesas SuperH DMAC support"
+ depends on SH_DMAE_BASE
+ help
+ Enable support for the Renesas SuperH DMA controllers.
+
+config RCAR_DMAC
+ tristate "Renesas R-Car Gen{2,3} and RZ/G{1,2} DMA Controller"
+ depends on ARCH_RENESAS || COMPILE_TEST
+ select RENESAS_DMA
+ help
+ This driver supports the general purpose DMA controller found in the
+ Renesas R-Car Gen{2,3} and RZ/G{1,2} SoCs.
+
+config RENESAS_USB_DMAC
+ tristate "Renesas USB-DMA Controller"
+ depends on ARCH_RENESAS || COMPILE_TEST
+ select RENESAS_DMA
+ select DMA_VIRTUAL_CHANNELS
+ help
+ This driver supports the USB-DMA controller found in the Renesas
+ SoCs.
+
+config RZ_DMAC
+ tristate "Renesas RZ/{G2L,V2L} DMA Controller"
+ depends on ARCH_RZG2L || COMPILE_TEST
+ select RENESAS_DMA
+ select DMA_VIRTUAL_CHANNELS
+ help
+ This driver supports the general purpose DMA controller found in the
+ Renesas RZ/{G2L,V2L} SoC variants.
diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile
new file mode 100644
index 000000000..360ab6d25
--- /dev/null
+++ b/drivers/dma/sh/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# DMA Engine Helpers
+#
+
+obj-$(CONFIG_SH_DMAE_BASE) += shdma-base.o
+
+#
+# DMA Controllers
+#
+
+shdma-y := shdmac.o
+shdma-objs := $(shdma-y)
+obj-$(CONFIG_SH_DMAE) += shdma.o
+
+obj-$(CONFIG_RCAR_DMAC) += rcar-dmac.o
+obj-$(CONFIG_RENESAS_USB_DMAC) += usb-dmac.o
+obj-$(CONFIG_RZ_DMAC) += rz-dmac.o
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
new file mode 100644
index 000000000..641d689d1
--- /dev/null
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -0,0 +1,2052 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Renesas R-Car Gen2/Gen3 DMA Controller Driver
+ *
+ * Copyright (C) 2014-2019 Renesas Electronics Inc.
+ *
+ * Author: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../dmaengine.h"
+
+/*
+ * struct rcar_dmac_xfer_chunk - Descriptor for a hardware transfer
+ * @node: entry in the parent's chunks list
+ * @src_addr: device source address
+ * @dst_addr: device destination address
+ * @size: transfer size in bytes
+ */
+struct rcar_dmac_xfer_chunk {
+ struct list_head node;
+
+ dma_addr_t src_addr;
+ dma_addr_t dst_addr;
+ u32 size;
+};
+
+/*
+ * struct rcar_dmac_hw_desc - Hardware descriptor for a transfer chunk
+ * @sar: value of the SAR register (source address)
+ * @dar: value of the DAR register (destination address)
+ * @tcr: value of the TCR register (transfer count)
+ */
+struct rcar_dmac_hw_desc {
+ u32 sar;
+ u32 dar;
+ u32 tcr;
+ u32 reserved;
+} __attribute__((__packed__));
+
+/*
+ * struct rcar_dmac_desc - R-Car Gen2 DMA Transfer Descriptor
+ * @async_tx: base DMA asynchronous transaction descriptor
+ * @direction: direction of the DMA transfer
+ * @xfer_shift: log2 of the transfer size
+ * @chcr: value of the channel configuration register for this transfer
+ * @node: entry in the channel's descriptors lists
+ * @chunks: list of transfer chunks for this transfer
+ * @running: the transfer chunk being currently processed
+ * @nchunks: number of transfer chunks for this transfer
+ * @hwdescs.use: whether the transfer descriptor uses hardware descriptors
+ * @hwdescs.mem: hardware descriptors memory for the transfer
+ * @hwdescs.dma: device address of the hardware descriptors memory
+ * @hwdescs.size: size of the hardware descriptors in bytes
+ * @size: transfer size in bytes
+ * @cyclic: when set indicates that the DMA transfer is cyclic
+ */
+struct rcar_dmac_desc {
+ struct dma_async_tx_descriptor async_tx;
+ enum dma_transfer_direction direction;
+ unsigned int xfer_shift;
+ u32 chcr;
+
+ struct list_head node;
+ struct list_head chunks;
+ struct rcar_dmac_xfer_chunk *running;
+ unsigned int nchunks;
+
+ struct {
+ bool use;
+ struct rcar_dmac_hw_desc *mem;
+ dma_addr_t dma;
+ size_t size;
+ } hwdescs;
+
+ unsigned int size;
+ bool cyclic;
+};
+
+#define to_rcar_dmac_desc(d) container_of(d, struct rcar_dmac_desc, async_tx)
+
+/*
+ * struct rcar_dmac_desc_page - One page worth of descriptors
+ * @node: entry in the channel's pages list
+ * @descs: array of DMA descriptors
+ * @chunks: array of transfer chunk descriptors
+ */
+struct rcar_dmac_desc_page {
+ struct list_head node;
+
+ union {
+ DECLARE_FLEX_ARRAY(struct rcar_dmac_desc, descs);
+ DECLARE_FLEX_ARRAY(struct rcar_dmac_xfer_chunk, chunks);
+ };
+};
+
+#define RCAR_DMAC_DESCS_PER_PAGE \
+ ((PAGE_SIZE - offsetof(struct rcar_dmac_desc_page, descs)) / \
+ sizeof(struct rcar_dmac_desc))
+#define RCAR_DMAC_XFER_CHUNKS_PER_PAGE \
+ ((PAGE_SIZE - offsetof(struct rcar_dmac_desc_page, chunks)) / \
+ sizeof(struct rcar_dmac_xfer_chunk))
+
+/*
+ * struct rcar_dmac_chan_slave - Slave configuration
+ * @slave_addr: slave memory address
+ * @xfer_size: size (in bytes) of hardware transfers
+ */
+struct rcar_dmac_chan_slave {
+ phys_addr_t slave_addr;
+ unsigned int xfer_size;
+};
+
+/*
+ * struct rcar_dmac_chan_map - Map of slave device phys to dma address
+ * @addr: slave dma address
+ * @dir: direction of mapping
+ * @slave: slave configuration that is mapped
+ */
+struct rcar_dmac_chan_map {
+ dma_addr_t addr;
+ enum dma_data_direction dir;
+ struct rcar_dmac_chan_slave slave;
+};
+
+/*
+ * struct rcar_dmac_chan - R-Car Gen2 DMA Controller Channel
+ * @chan: base DMA channel object
+ * @iomem: channel I/O memory base
+ * @index: index of this channel in the controller
+ * @irq: channel IRQ
+ * @src: slave memory address and size on the source side
+ * @dst: slave memory address and size on the destination side
+ * @mid_rid: hardware MID/RID for the DMA client using this channel
+ * @lock: protects the channel CHCR register and the desc members
+ * @desc.free: list of free descriptors
+ * @desc.pending: list of pending descriptors (submitted with tx_submit)
+ * @desc.active: list of active descriptors (activated with issue_pending)
+ * @desc.done: list of completed descriptors
+ * @desc.wait: list of descriptors waiting for an ack
+ * @desc.running: the descriptor being processed (a member of the active list)
+ * @desc.chunks_free: list of free transfer chunk descriptors
+ * @desc.pages: list of pages used by allocated descriptors
+ */
+struct rcar_dmac_chan {
+ struct dma_chan chan;
+ void __iomem *iomem;
+ unsigned int index;
+ int irq;
+
+ struct rcar_dmac_chan_slave src;
+ struct rcar_dmac_chan_slave dst;
+ struct rcar_dmac_chan_map map;
+ int mid_rid;
+
+ spinlock_t lock;
+
+ struct {
+ struct list_head free;
+ struct list_head pending;
+ struct list_head active;
+ struct list_head done;
+ struct list_head wait;
+ struct rcar_dmac_desc *running;
+
+ struct list_head chunks_free;
+
+ struct list_head pages;
+ } desc;
+};
+
+#define to_rcar_dmac_chan(c) container_of(c, struct rcar_dmac_chan, chan)
+
+/*
+ * struct rcar_dmac - R-Car Gen2 DMA Controller
+ * @engine: base DMA engine object
+ * @dev: the hardware device
+ * @dmac_base: remapped base register block
+ * @chan_base: remapped channel register block (optional)
+ * @n_channels: number of available channels
+ * @channels: array of DMAC channels
+ * @channels_mask: bitfield of which DMA channels are managed by this driver
+ * @modules: bitmask of client modules in use
+ */
+struct rcar_dmac {
+ struct dma_device engine;
+ struct device *dev;
+ void __iomem *dmac_base;
+ void __iomem *chan_base;
+
+ unsigned int n_channels;
+ struct rcar_dmac_chan *channels;
+ u32 channels_mask;
+
+ DECLARE_BITMAP(modules, 256);
+};
+
+#define to_rcar_dmac(d) container_of(d, struct rcar_dmac, engine)
+
+#define for_each_rcar_dmac_chan(i, dmac, chan) \
+ for (i = 0, chan = &(dmac)->channels[0]; i < (dmac)->n_channels; i++, chan++) \
+ if (!((dmac)->channels_mask & BIT(i))) continue; else
+
+/*
+ * struct rcar_dmac_of_data - This driver's OF data
+ * @chan_offset_base: DMAC channels base offset
+ * @chan_offset_stride: DMAC channels offset stride
+ */
+struct rcar_dmac_of_data {
+ u32 chan_offset_base;
+ u32 chan_offset_stride;
+};
+
+/* -----------------------------------------------------------------------------
+ * Registers
+ */
+
+#define RCAR_DMAISTA 0x0020
+#define RCAR_DMASEC 0x0030
+#define RCAR_DMAOR 0x0060
+#define RCAR_DMAOR_PRI_FIXED (0 << 8)
+#define RCAR_DMAOR_PRI_ROUND_ROBIN (3 << 8)
+#define RCAR_DMAOR_AE (1 << 2)
+#define RCAR_DMAOR_DME (1 << 0)
+#define RCAR_DMACHCLR 0x0080 /* Not on R-Car Gen4 */
+#define RCAR_DMADPSEC 0x00a0
+
+#define RCAR_DMASAR 0x0000
+#define RCAR_DMADAR 0x0004
+#define RCAR_DMATCR 0x0008
+#define RCAR_DMATCR_MASK 0x00ffffff
+#define RCAR_DMATSR 0x0028
+#define RCAR_DMACHCR 0x000c
+#define RCAR_DMACHCR_CAE (1 << 31)
+#define RCAR_DMACHCR_CAIE (1 << 30)
+#define RCAR_DMACHCR_DPM_DISABLED (0 << 28)
+#define RCAR_DMACHCR_DPM_ENABLED (1 << 28)
+#define RCAR_DMACHCR_DPM_REPEAT (2 << 28)
+#define RCAR_DMACHCR_DPM_INFINITE (3 << 28)
+#define RCAR_DMACHCR_RPT_SAR (1 << 27)
+#define RCAR_DMACHCR_RPT_DAR (1 << 26)
+#define RCAR_DMACHCR_RPT_TCR (1 << 25)
+#define RCAR_DMACHCR_DPB (1 << 22)
+#define RCAR_DMACHCR_DSE (1 << 19)
+#define RCAR_DMACHCR_DSIE (1 << 18)
+#define RCAR_DMACHCR_TS_1B ((0 << 20) | (0 << 3))
+#define RCAR_DMACHCR_TS_2B ((0 << 20) | (1 << 3))
+#define RCAR_DMACHCR_TS_4B ((0 << 20) | (2 << 3))
+#define RCAR_DMACHCR_TS_16B ((0 << 20) | (3 << 3))
+#define RCAR_DMACHCR_TS_32B ((1 << 20) | (0 << 3))
+#define RCAR_DMACHCR_TS_64B ((1 << 20) | (1 << 3))
+#define RCAR_DMACHCR_TS_8B ((1 << 20) | (3 << 3))
+#define RCAR_DMACHCR_DM_FIXED (0 << 14)
+#define RCAR_DMACHCR_DM_INC (1 << 14)
+#define RCAR_DMACHCR_DM_DEC (2 << 14)
+#define RCAR_DMACHCR_SM_FIXED (0 << 12)
+#define RCAR_DMACHCR_SM_INC (1 << 12)
+#define RCAR_DMACHCR_SM_DEC (2 << 12)
+#define RCAR_DMACHCR_RS_AUTO (4 << 8)
+#define RCAR_DMACHCR_RS_DMARS (8 << 8)
+#define RCAR_DMACHCR_IE (1 << 2)
+#define RCAR_DMACHCR_TE (1 << 1)
+#define RCAR_DMACHCR_DE (1 << 0)
+#define RCAR_DMATCRB 0x0018
+#define RCAR_DMATSRB 0x0038
+#define RCAR_DMACHCRB 0x001c
+#define RCAR_DMACHCRB_DCNT(n) ((n) << 24)
+#define RCAR_DMACHCRB_DPTR_MASK (0xff << 16)
+#define RCAR_DMACHCRB_DPTR_SHIFT 16
+#define RCAR_DMACHCRB_DRST (1 << 15)
+#define RCAR_DMACHCRB_DTS (1 << 8)
+#define RCAR_DMACHCRB_SLM_NORMAL (0 << 4)
+#define RCAR_DMACHCRB_SLM_CLK(n) ((8 | (n)) << 4)
+#define RCAR_DMACHCRB_PRI(n) ((n) << 0)
+#define RCAR_DMARS 0x0040
+#define RCAR_DMABUFCR 0x0048
+#define RCAR_DMABUFCR_MBU(n) ((n) << 16)
+#define RCAR_DMABUFCR_ULB(n) ((n) << 0)
+#define RCAR_DMADPBASE 0x0050
+#define RCAR_DMADPBASE_MASK 0xfffffff0
+#define RCAR_DMADPBASE_SEL (1 << 0)
+#define RCAR_DMADPCR 0x0054
+#define RCAR_DMADPCR_DIPT(n) ((n) << 24)
+#define RCAR_DMAFIXSAR 0x0010
+#define RCAR_DMAFIXDAR 0x0014
+#define RCAR_DMAFIXDPBASE 0x0060
+
+/* For R-Car Gen4 */
+#define RCAR_GEN4_DMACHCLR 0x0100
+
+/* Hardcode the MEMCPY transfer size to 4 bytes. */
+#define RCAR_DMAC_MEMCPY_XFER_SIZE 4
+
+/* -----------------------------------------------------------------------------
+ * Device access
+ */
+
+static void rcar_dmac_write(struct rcar_dmac *dmac, u32 reg, u32 data)
+{
+ if (reg == RCAR_DMAOR)
+ writew(data, dmac->dmac_base + reg);
+ else
+ writel(data, dmac->dmac_base + reg);
+}
+
+static u32 rcar_dmac_read(struct rcar_dmac *dmac, u32 reg)
+{
+ if (reg == RCAR_DMAOR)
+ return readw(dmac->dmac_base + reg);
+ else
+ return readl(dmac->dmac_base + reg);
+}
+
+static u32 rcar_dmac_chan_read(struct rcar_dmac_chan *chan, u32 reg)
+{
+ if (reg == RCAR_DMARS)
+ return readw(chan->iomem + reg);
+ else
+ return readl(chan->iomem + reg);
+}
+
+static void rcar_dmac_chan_write(struct rcar_dmac_chan *chan, u32 reg, u32 data)
+{
+ if (reg == RCAR_DMARS)
+ writew(data, chan->iomem + reg);
+ else
+ writel(data, chan->iomem + reg);
+}
+
+static void rcar_dmac_chan_clear(struct rcar_dmac *dmac,
+ struct rcar_dmac_chan *chan)
+{
+ if (dmac->chan_base)
+ rcar_dmac_chan_write(chan, RCAR_GEN4_DMACHCLR, 1);
+ else
+ rcar_dmac_write(dmac, RCAR_DMACHCLR, BIT(chan->index));
+}
+
+static void rcar_dmac_chan_clear_all(struct rcar_dmac *dmac)
+{
+ struct rcar_dmac_chan *chan;
+ unsigned int i;
+
+ if (dmac->chan_base) {
+ for_each_rcar_dmac_chan(i, dmac, chan)
+ rcar_dmac_chan_write(chan, RCAR_GEN4_DMACHCLR, 1);
+ } else {
+ rcar_dmac_write(dmac, RCAR_DMACHCLR, dmac->channels_mask);
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Initialization and configuration
+ */
+
+static bool rcar_dmac_chan_is_busy(struct rcar_dmac_chan *chan)
+{
+ u32 chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+
+ return !!(chcr & (RCAR_DMACHCR_DE | RCAR_DMACHCR_TE));
+}
+
+static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
+{
+ struct rcar_dmac_desc *desc = chan->desc.running;
+ u32 chcr = desc->chcr;
+
+ WARN_ON_ONCE(rcar_dmac_chan_is_busy(chan));
+
+ if (chan->mid_rid >= 0)
+ rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);
+
+ if (desc->hwdescs.use) {
+ struct rcar_dmac_xfer_chunk *chunk =
+ list_first_entry(&desc->chunks,
+ struct rcar_dmac_xfer_chunk, node);
+
+ dev_dbg(chan->chan.device->dev,
+ "chan%u: queue desc %p: %u@%pad\n",
+ chan->index, desc, desc->nchunks, &desc->hwdescs.dma);
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+ rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR,
+ chunk->src_addr >> 32);
+ rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR,
+ chunk->dst_addr >> 32);
+ rcar_dmac_chan_write(chan, RCAR_DMAFIXDPBASE,
+ desc->hwdescs.dma >> 32);
+#endif
+ rcar_dmac_chan_write(chan, RCAR_DMADPBASE,
+ (desc->hwdescs.dma & 0xfffffff0) |
+ RCAR_DMADPBASE_SEL);
+ rcar_dmac_chan_write(chan, RCAR_DMACHCRB,
+ RCAR_DMACHCRB_DCNT(desc->nchunks - 1) |
+ RCAR_DMACHCRB_DRST);
+
+ /*
+ * Errata: When descriptor memory is accessed through an IOMMU
+ * the DMADAR register isn't initialized automatically from the
+ * first descriptor at beginning of transfer by the DMAC like it
+ * should. Initialize it manually with the destination address
+ * of the first chunk.
+ */
+ rcar_dmac_chan_write(chan, RCAR_DMADAR,
+ chunk->dst_addr & 0xffffffff);
+
+ /*
+ * Program the descriptor stage interrupt to occur after the end
+ * of the first stage.
+ */
+ rcar_dmac_chan_write(chan, RCAR_DMADPCR, RCAR_DMADPCR_DIPT(1));
+
+ chcr |= RCAR_DMACHCR_RPT_SAR | RCAR_DMACHCR_RPT_DAR
+ | RCAR_DMACHCR_RPT_TCR | RCAR_DMACHCR_DPB;
+
+ /*
+ * If the descriptor isn't cyclic enable normal descriptor mode
+ * and the transfer completion interrupt.
+ */
+ if (!desc->cyclic)
+ chcr |= RCAR_DMACHCR_DPM_ENABLED | RCAR_DMACHCR_IE;
+ /*
+ * If the descriptor is cyclic and has a callback enable the
+ * descriptor stage interrupt in infinite repeat mode.
+ */
+ else if (desc->async_tx.callback)
+ chcr |= RCAR_DMACHCR_DPM_INFINITE | RCAR_DMACHCR_DSIE;
+ /*
+ * Otherwise just select infinite repeat mode without any
+ * interrupt.
+ */
+ else
+ chcr |= RCAR_DMACHCR_DPM_INFINITE;
+ } else {
+ struct rcar_dmac_xfer_chunk *chunk = desc->running;
+
+ dev_dbg(chan->chan.device->dev,
+ "chan%u: queue chunk %p: %u@%pad -> %pad\n",
+ chan->index, chunk, chunk->size, &chunk->src_addr,
+ &chunk->dst_addr);
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+ rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR,
+ chunk->src_addr >> 32);
+ rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR,
+ chunk->dst_addr >> 32);
+#endif
+ rcar_dmac_chan_write(chan, RCAR_DMASAR,
+ chunk->src_addr & 0xffffffff);
+ rcar_dmac_chan_write(chan, RCAR_DMADAR,
+ chunk->dst_addr & 0xffffffff);
+ rcar_dmac_chan_write(chan, RCAR_DMATCR,
+ chunk->size >> desc->xfer_shift);
+
+ chcr |= RCAR_DMACHCR_DPM_DISABLED | RCAR_DMACHCR_IE;
+ }
+
+ rcar_dmac_chan_write(chan, RCAR_DMACHCR,
+ chcr | RCAR_DMACHCR_DE | RCAR_DMACHCR_CAIE);
+}
+
+static int rcar_dmac_init(struct rcar_dmac *dmac)
+{
+ u16 dmaor;
+
+ /* Clear all channels and enable the DMAC globally. */
+ rcar_dmac_chan_clear_all(dmac);
+ rcar_dmac_write(dmac, RCAR_DMAOR,
+ RCAR_DMAOR_PRI_FIXED | RCAR_DMAOR_DME);
+
+ dmaor = rcar_dmac_read(dmac, RCAR_DMAOR);
+ if ((dmaor & (RCAR_DMAOR_AE | RCAR_DMAOR_DME)) != RCAR_DMAOR_DME) {
+ dev_warn(dmac->dev, "DMAOR initialization failed.\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors submission
+ */
+
+static dma_cookie_t rcar_dmac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct rcar_dmac_chan *chan = to_rcar_dmac_chan(tx->chan);
+ struct rcar_dmac_desc *desc = to_rcar_dmac_desc(tx);
+ unsigned long flags;
+ dma_cookie_t cookie;
+
+ spin_lock_irqsave(&chan->lock, flags);
+
+ cookie = dma_cookie_assign(tx);
+
+ dev_dbg(chan->chan.device->dev, "chan%u: submit #%d@%p\n",
+ chan->index, tx->cookie, desc);
+
+ list_add_tail(&desc->node, &chan->desc.pending);
+ desc->running = list_first_entry(&desc->chunks,
+ struct rcar_dmac_xfer_chunk, node);
+
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ return cookie;
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors allocation and free
+ */
+
+/*
+ * rcar_dmac_desc_alloc - Allocate a page worth of DMA descriptors
+ * @chan: the DMA channel
+ * @gfp: allocation flags
+ */
+static int rcar_dmac_desc_alloc(struct rcar_dmac_chan *chan, gfp_t gfp)
+{
+ struct rcar_dmac_desc_page *page;
+ unsigned long flags;
+ LIST_HEAD(list);
+ unsigned int i;
+
+ page = (void *)get_zeroed_page(gfp);
+ if (!page)
+ return -ENOMEM;
+
+ for (i = 0; i < RCAR_DMAC_DESCS_PER_PAGE; ++i) {
+ struct rcar_dmac_desc *desc = &page->descs[i];
+
+ dma_async_tx_descriptor_init(&desc->async_tx, &chan->chan);
+ desc->async_tx.tx_submit = rcar_dmac_tx_submit;
+ INIT_LIST_HEAD(&desc->chunks);
+
+ list_add_tail(&desc->node, &list);
+ }
+
+ spin_lock_irqsave(&chan->lock, flags);
+ list_splice_tail(&list, &chan->desc.free);
+ list_add_tail(&page->node, &chan->desc.pages);
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ return 0;
+}
+
+/*
+ * rcar_dmac_desc_put - Release a DMA transfer descriptor
+ * @chan: the DMA channel
+ * @desc: the descriptor
+ *
+ * Put the descriptor and its transfer chunk descriptors back in the channel's
+ * free descriptors lists. The descriptor's chunks list will be reinitialized to
+ * an empty list as a result.
+ *
+ * The descriptor must have been removed from the channel's lists before calling
+ * this function.
+ */
+static void rcar_dmac_desc_put(struct rcar_dmac_chan *chan,
+ struct rcar_dmac_desc *desc)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->lock, flags);
+ list_splice_tail_init(&desc->chunks, &chan->desc.chunks_free);
+ list_add(&desc->node, &chan->desc.free);
+ spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static void rcar_dmac_desc_recycle_acked(struct rcar_dmac_chan *chan)
+{
+ struct rcar_dmac_desc *desc, *_desc;
+ unsigned long flags;
+ LIST_HEAD(list);
+
+ /*
+ * We have to temporarily move all descriptors from the wait list to a
+ * local list as iterating over the wait list, even with
+ * list_for_each_entry_safe, isn't safe if we release the channel lock
+ * around the rcar_dmac_desc_put() call.
+ */
+ spin_lock_irqsave(&chan->lock, flags);
+ list_splice_init(&chan->desc.wait, &list);
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ list_for_each_entry_safe(desc, _desc, &list, node) {
+ if (async_tx_test_ack(&desc->async_tx)) {
+ list_del(&desc->node);
+ rcar_dmac_desc_put(chan, desc);
+ }
+ }
+
+ if (list_empty(&list))
+ return;
+
+ /* Put the remaining descriptors back in the wait list. */
+ spin_lock_irqsave(&chan->lock, flags);
+ list_splice(&list, &chan->desc.wait);
+ spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/*
+ * rcar_dmac_desc_get - Allocate a descriptor for a DMA transfer
+ * @chan: the DMA channel
+ *
+ * Locking: This function must be called in a non-atomic context.
+ *
+ * Return: A pointer to the allocated descriptor or NULL if no descriptor can
+ * be allocated.
+ */
+static struct rcar_dmac_desc *rcar_dmac_desc_get(struct rcar_dmac_chan *chan)
+{
+ struct rcar_dmac_desc *desc;
+ unsigned long flags;
+ int ret;
+
+ /* Recycle acked descriptors before attempting allocation. */
+ rcar_dmac_desc_recycle_acked(chan);
+
+ spin_lock_irqsave(&chan->lock, flags);
+
+ while (list_empty(&chan->desc.free)) {
+ /*
+ * No free descriptors, allocate a page worth of them and try
+ * again, as someone else could race us to get the newly
+ * allocated descriptors. If the allocation fails return an
+ * error.
+ */
+ spin_unlock_irqrestore(&chan->lock, flags);
+ ret = rcar_dmac_desc_alloc(chan, GFP_NOWAIT);
+ if (ret < 0)
+ return NULL;
+ spin_lock_irqsave(&chan->lock, flags);
+ }
+
+ desc = list_first_entry(&chan->desc.free, struct rcar_dmac_desc, node);
+ list_del(&desc->node);
+
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ return desc;
+}
+
+/*
+ * rcar_dmac_xfer_chunk_alloc - Allocate a page worth of transfer chunks
+ * @chan: the DMA channel
+ * @gfp: allocation flags
+ */
+static int rcar_dmac_xfer_chunk_alloc(struct rcar_dmac_chan *chan, gfp_t gfp)
+{
+ struct rcar_dmac_desc_page *page;
+ unsigned long flags;
+ LIST_HEAD(list);
+ unsigned int i;
+
+ page = (void *)get_zeroed_page(gfp);
+ if (!page)
+ return -ENOMEM;
+
+ for (i = 0; i < RCAR_DMAC_XFER_CHUNKS_PER_PAGE; ++i) {
+ struct rcar_dmac_xfer_chunk *chunk = &page->chunks[i];
+
+ list_add_tail(&chunk->node, &list);
+ }
+
+ spin_lock_irqsave(&chan->lock, flags);
+ list_splice_tail(&list, &chan->desc.chunks_free);
+ list_add_tail(&page->node, &chan->desc.pages);
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ return 0;
+}
+
+/*
+ * rcar_dmac_xfer_chunk_get - Allocate a transfer chunk for a DMA transfer
+ * @chan: the DMA channel
+ *
+ * Locking: This function must be called in a non-atomic context.
+ *
+ * Return: A pointer to the allocated transfer chunk descriptor or NULL if no
+ * descriptor can be allocated.
+ */
+static struct rcar_dmac_xfer_chunk *
+rcar_dmac_xfer_chunk_get(struct rcar_dmac_chan *chan)
+{
+ struct rcar_dmac_xfer_chunk *chunk;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&chan->lock, flags);
+
+ while (list_empty(&chan->desc.chunks_free)) {
+ /*
+ * No free descriptors, allocate a page worth of them and try
+ * again, as someone else could race us to get the newly
+ * allocated descriptors. If the allocation fails return an
+ * error.
+ */
+ spin_unlock_irqrestore(&chan->lock, flags);
+ ret = rcar_dmac_xfer_chunk_alloc(chan, GFP_NOWAIT);
+ if (ret < 0)
+ return NULL;
+ spin_lock_irqsave(&chan->lock, flags);
+ }
+
+ chunk = list_first_entry(&chan->desc.chunks_free,
+ struct rcar_dmac_xfer_chunk, node);
+ list_del(&chunk->node);
+
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ return chunk;
+}
+
+static void rcar_dmac_realloc_hwdesc(struct rcar_dmac_chan *chan,
+ struct rcar_dmac_desc *desc, size_t size)
+{
+ /*
+ * dma_alloc_coherent() allocates memory in page size increments. To
+ * avoid reallocating the hardware descriptors when the allocated size
+ * wouldn't change align the requested size to a multiple of the page
+ * size.
+ */
+ size = PAGE_ALIGN(size);
+
+ if (desc->hwdescs.size == size)
+ return;
+
+ if (desc->hwdescs.mem) {
+ dma_free_coherent(chan->chan.device->dev, desc->hwdescs.size,
+ desc->hwdescs.mem, desc->hwdescs.dma);
+ desc->hwdescs.mem = NULL;
+ desc->hwdescs.size = 0;
+ }
+
+ if (!size)
+ return;
+
+ desc->hwdescs.mem = dma_alloc_coherent(chan->chan.device->dev, size,
+ &desc->hwdescs.dma, GFP_NOWAIT);
+ if (!desc->hwdescs.mem)
+ return;
+
+ desc->hwdescs.size = size;
+}
+
+static int rcar_dmac_fill_hwdesc(struct rcar_dmac_chan *chan,
+ struct rcar_dmac_desc *desc)
+{
+ struct rcar_dmac_xfer_chunk *chunk;
+ struct rcar_dmac_hw_desc *hwdesc;
+
+ rcar_dmac_realloc_hwdesc(chan, desc, desc->nchunks * sizeof(*hwdesc));
+
+ hwdesc = desc->hwdescs.mem;
+ if (!hwdesc)
+ return -ENOMEM;
+
+ list_for_each_entry(chunk, &desc->chunks, node) {
+ hwdesc->sar = chunk->src_addr;
+ hwdesc->dar = chunk->dst_addr;
+ hwdesc->tcr = chunk->size >> desc->xfer_shift;
+ hwdesc++;
+ }
+
+ return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Stop and reset
+ */
+static void rcar_dmac_chcr_de_barrier(struct rcar_dmac_chan *chan)
+{
+ u32 chcr;
+ unsigned int i;
+
+ /*
+ * Ensure that the setting of the DE bit is actually 0 after
+ * clearing it.
+ */
+ for (i = 0; i < 1024; i++) {
+ chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+ if (!(chcr & RCAR_DMACHCR_DE))
+ return;
+ udelay(1);
+ }
+
+ dev_err(chan->chan.device->dev, "CHCR DE check error\n");
+}
+
+static void rcar_dmac_clear_chcr_de(struct rcar_dmac_chan *chan)
+{
+ u32 chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+
+ /* set DE=0 and flush remaining data */
+ rcar_dmac_chan_write(chan, RCAR_DMACHCR, (chcr & ~RCAR_DMACHCR_DE));
+
+ /* make sure all remaining data was flushed */
+ rcar_dmac_chcr_de_barrier(chan);
+}
+
+static void rcar_dmac_chan_halt(struct rcar_dmac_chan *chan)
+{
+ u32 chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+
+ chcr &= ~(RCAR_DMACHCR_DSE | RCAR_DMACHCR_DSIE | RCAR_DMACHCR_IE |
+ RCAR_DMACHCR_TE | RCAR_DMACHCR_DE |
+ RCAR_DMACHCR_CAE | RCAR_DMACHCR_CAIE);
+ rcar_dmac_chan_write(chan, RCAR_DMACHCR, chcr);
+ rcar_dmac_chcr_de_barrier(chan);
+}
+
+static void rcar_dmac_chan_reinit(struct rcar_dmac_chan *chan)
+{
+ struct rcar_dmac_desc *desc, *_desc;
+ unsigned long flags;
+ LIST_HEAD(descs);
+
+ spin_lock_irqsave(&chan->lock, flags);
+
+ /* Move all non-free descriptors to the local lists. */
+ list_splice_init(&chan->desc.pending, &descs);
+ list_splice_init(&chan->desc.active, &descs);
+ list_splice_init(&chan->desc.done, &descs);
+ list_splice_init(&chan->desc.wait, &descs);
+
+ chan->desc.running = NULL;
+
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ list_for_each_entry_safe(desc, _desc, &descs, node) {
+ list_del(&desc->node);
+ rcar_dmac_desc_put(chan, desc);
+ }
+}
+
+static void rcar_dmac_stop_all_chan(struct rcar_dmac *dmac)
+{
+ struct rcar_dmac_chan *chan;
+ unsigned int i;
+
+ /* Stop all channels. */
+ for_each_rcar_dmac_chan(i, dmac, chan) {
+ /* Stop and reinitialize the channel. */
+ spin_lock_irq(&chan->lock);
+ rcar_dmac_chan_halt(chan);
+ spin_unlock_irq(&chan->lock);
+ }
+}
+
+static int rcar_dmac_chan_pause(struct dma_chan *chan)
+{
+ unsigned long flags;
+ struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+
+ spin_lock_irqsave(&rchan->lock, flags);
+ rcar_dmac_clear_chcr_de(rchan);
+ spin_unlock_irqrestore(&rchan->lock, flags);
+
+ return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors preparation
+ */
+
+static void rcar_dmac_chan_configure_desc(struct rcar_dmac_chan *chan,
+ struct rcar_dmac_desc *desc)
+{
+ static const u32 chcr_ts[] = {
+ RCAR_DMACHCR_TS_1B, RCAR_DMACHCR_TS_2B,
+ RCAR_DMACHCR_TS_4B, RCAR_DMACHCR_TS_8B,
+ RCAR_DMACHCR_TS_16B, RCAR_DMACHCR_TS_32B,
+ RCAR_DMACHCR_TS_64B,
+ };
+
+ unsigned int xfer_size;
+ u32 chcr;
+
+ switch (desc->direction) {
+ case DMA_DEV_TO_MEM:
+ chcr = RCAR_DMACHCR_DM_INC | RCAR_DMACHCR_SM_FIXED
+ | RCAR_DMACHCR_RS_DMARS;
+ xfer_size = chan->src.xfer_size;
+ break;
+
+ case DMA_MEM_TO_DEV:
+ chcr = RCAR_DMACHCR_DM_FIXED | RCAR_DMACHCR_SM_INC
+ | RCAR_DMACHCR_RS_DMARS;
+ xfer_size = chan->dst.xfer_size;
+ break;
+
+ case DMA_MEM_TO_MEM:
+ default:
+ chcr = RCAR_DMACHCR_DM_INC | RCAR_DMACHCR_SM_INC
+ | RCAR_DMACHCR_RS_AUTO;
+ xfer_size = RCAR_DMAC_MEMCPY_XFER_SIZE;
+ break;
+ }
+
+ desc->xfer_shift = ilog2(xfer_size);
+ desc->chcr = chcr | chcr_ts[desc->xfer_shift];
+}
+
+/*
+ * rcar_dmac_chan_prep_sg - prepare transfer descriptors from an SG list
+ *
+ * Common routine for public (MEMCPY) and slave DMA. The MEMCPY case is also
+ * converted to scatter-gather to guarantee consistent locking and a correct
+ * list manipulation. For slave DMA direction carries the usual meaning, and,
+ * logically, the SG list is RAM and the addr variable contains slave address,
+ * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM
+ * and the SG list contains only one element and points at the source buffer.
+ */
+static struct dma_async_tx_descriptor *
+rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, dma_addr_t dev_addr,
+ enum dma_transfer_direction dir, unsigned long dma_flags,
+ bool cyclic)
+{
+ struct rcar_dmac_xfer_chunk *chunk;
+ struct rcar_dmac_desc *desc;
+ struct scatterlist *sg;
+ unsigned int nchunks = 0;
+ unsigned int max_chunk_size;
+ unsigned int full_size = 0;
+ bool cross_boundary = false;
+ unsigned int i;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+ u32 high_dev_addr;
+ u32 high_mem_addr;
+#endif
+
+ desc = rcar_dmac_desc_get(chan);
+ if (!desc)
+ return NULL;
+
+ desc->async_tx.flags = dma_flags;
+ desc->async_tx.cookie = -EBUSY;
+
+ desc->cyclic = cyclic;
+ desc->direction = dir;
+
+ rcar_dmac_chan_configure_desc(chan, desc);
+
+ max_chunk_size = RCAR_DMATCR_MASK << desc->xfer_shift;
+
+ /*
+ * Allocate and fill the transfer chunk descriptors. We own the only
+ * reference to the DMA descriptor, there's no need for locking.
+ */
+ for_each_sg(sgl, sg, sg_len, i) {
+ dma_addr_t mem_addr = sg_dma_address(sg);
+ unsigned int len = sg_dma_len(sg);
+
+ full_size += len;
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+ if (i == 0) {
+ high_dev_addr = dev_addr >> 32;
+ high_mem_addr = mem_addr >> 32;
+ }
+
+ if ((dev_addr >> 32 != high_dev_addr) ||
+ (mem_addr >> 32 != high_mem_addr))
+ cross_boundary = true;
+#endif
+ while (len) {
+ unsigned int size = min(len, max_chunk_size);
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+ /*
+ * Prevent individual transfers from crossing 4GB
+ * boundaries.
+ */
+ if (dev_addr >> 32 != (dev_addr + size - 1) >> 32) {
+ size = ALIGN(dev_addr, 1ULL << 32) - dev_addr;
+ cross_boundary = true;
+ }
+ if (mem_addr >> 32 != (mem_addr + size - 1) >> 32) {
+ size = ALIGN(mem_addr, 1ULL << 32) - mem_addr;
+ cross_boundary = true;
+ }
+#endif
+
+ chunk = rcar_dmac_xfer_chunk_get(chan);
+ if (!chunk) {
+ rcar_dmac_desc_put(chan, desc);
+ return NULL;
+ }
+
+ if (dir == DMA_DEV_TO_MEM) {
+ chunk->src_addr = dev_addr;
+ chunk->dst_addr = mem_addr;
+ } else {
+ chunk->src_addr = mem_addr;
+ chunk->dst_addr = dev_addr;
+ }
+
+ chunk->size = size;
+
+ dev_dbg(chan->chan.device->dev,
+ "chan%u: chunk %p/%p sgl %u@%p, %u/%u %pad -> %pad\n",
+ chan->index, chunk, desc, i, sg, size, len,
+ &chunk->src_addr, &chunk->dst_addr);
+
+ mem_addr += size;
+ if (dir == DMA_MEM_TO_MEM)
+ dev_addr += size;
+
+ len -= size;
+
+ list_add_tail(&chunk->node, &desc->chunks);
+ nchunks++;
+ }
+ }
+
+ desc->nchunks = nchunks;
+ desc->size = full_size;
+
+ /*
+ * Use hardware descriptor lists if possible when more than one chunk
+ * needs to be transferred (otherwise they don't make much sense).
+ *
+ * Source/Destination address should be located in same 4GiB region
+ * in the 40bit address space when it uses Hardware descriptor,
+ * and cross_boundary is checking it.
+ */
+ desc->hwdescs.use = !cross_boundary && nchunks > 1;
+ if (desc->hwdescs.use) {
+ if (rcar_dmac_fill_hwdesc(chan, desc) < 0)
+ desc->hwdescs.use = false;
+ }
+
+ return &desc->async_tx;
+}
+
+/* -----------------------------------------------------------------------------
+ * DMA engine operations
+ */
+
+static int rcar_dmac_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+ int ret;
+
+ INIT_LIST_HEAD(&rchan->desc.chunks_free);
+ INIT_LIST_HEAD(&rchan->desc.pages);
+
+ /* Preallocate descriptors. */
+ ret = rcar_dmac_xfer_chunk_alloc(rchan, GFP_KERNEL);
+ if (ret < 0)
+ return -ENOMEM;
+
+ ret = rcar_dmac_desc_alloc(rchan, GFP_KERNEL);
+ if (ret < 0)
+ return -ENOMEM;
+
+ return pm_runtime_get_sync(chan->device->dev);
+}
+
+static void rcar_dmac_free_chan_resources(struct dma_chan *chan)
+{
+ struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+ struct rcar_dmac *dmac = to_rcar_dmac(chan->device);
+ struct rcar_dmac_chan_map *map = &rchan->map;
+ struct rcar_dmac_desc_page *page, *_page;
+ struct rcar_dmac_desc *desc;
+ LIST_HEAD(list);
+
+ /* Protect against ISR */
+ spin_lock_irq(&rchan->lock);
+ rcar_dmac_chan_halt(rchan);
+ spin_unlock_irq(&rchan->lock);
+
+ /*
+ * Now no new interrupts will occur, but one might already be
+ * running. Wait for it to finish before freeing resources.
+ */
+ synchronize_irq(rchan->irq);
+
+ if (rchan->mid_rid >= 0) {
+ /* The caller is holding dma_list_mutex */
+ clear_bit(rchan->mid_rid, dmac->modules);
+ rchan->mid_rid = -EINVAL;
+ }
+
+ list_splice_init(&rchan->desc.free, &list);
+ list_splice_init(&rchan->desc.pending, &list);
+ list_splice_init(&rchan->desc.active, &list);
+ list_splice_init(&rchan->desc.done, &list);
+ list_splice_init(&rchan->desc.wait, &list);
+
+ rchan->desc.running = NULL;
+
+ list_for_each_entry(desc, &list, node)
+ rcar_dmac_realloc_hwdesc(rchan, desc, 0);
+
+ list_for_each_entry_safe(page, _page, &rchan->desc.pages, node) {
+ list_del(&page->node);
+ free_page((unsigned long)page);
+ }
+
+ /* Remove slave mapping if present. */
+ if (map->slave.xfer_size) {
+ dma_unmap_resource(chan->device->dev, map->addr,
+ map->slave.xfer_size, map->dir, 0);
+ map->slave.xfer_size = 0;
+ }
+
+ pm_runtime_put(chan->device->dev);
+}
+
+static struct dma_async_tx_descriptor *
+rcar_dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+ struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+ struct scatterlist sgl;
+
+ if (!len)
+ return NULL;
+
+ sg_init_table(&sgl, 1);
+ sg_set_page(&sgl, pfn_to_page(PFN_DOWN(dma_src)), len,
+ offset_in_page(dma_src));
+ sg_dma_address(&sgl) = dma_src;
+ sg_dma_len(&sgl) = len;
+
+ return rcar_dmac_chan_prep_sg(rchan, &sgl, 1, dma_dest,
+ DMA_MEM_TO_MEM, flags, false);
+}
+
+static int rcar_dmac_map_slave_addr(struct dma_chan *chan,
+ enum dma_transfer_direction dir)
+{
+ struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+ struct rcar_dmac_chan_map *map = &rchan->map;
+ phys_addr_t dev_addr;
+ size_t dev_size;
+ enum dma_data_direction dev_dir;
+
+ if (dir == DMA_DEV_TO_MEM) {
+ dev_addr = rchan->src.slave_addr;
+ dev_size = rchan->src.xfer_size;
+ dev_dir = DMA_TO_DEVICE;
+ } else {
+ dev_addr = rchan->dst.slave_addr;
+ dev_size = rchan->dst.xfer_size;
+ dev_dir = DMA_FROM_DEVICE;
+ }
+
+ /* Reuse current map if possible. */
+ if (dev_addr == map->slave.slave_addr &&
+ dev_size == map->slave.xfer_size &&
+ dev_dir == map->dir)
+ return 0;
+
+ /* Remove old mapping if present. */
+ if (map->slave.xfer_size)
+ dma_unmap_resource(chan->device->dev, map->addr,
+ map->slave.xfer_size, map->dir, 0);
+ map->slave.xfer_size = 0;
+
+ /* Create new slave address map. */
+ map->addr = dma_map_resource(chan->device->dev, dev_addr, dev_size,
+ dev_dir, 0);
+
+ if (dma_mapping_error(chan->device->dev, map->addr)) {
+ dev_err(chan->device->dev,
+ "chan%u: failed to map %zx@%pap", rchan->index,
+ dev_size, &dev_addr);
+ return -EIO;
+ }
+
+ dev_dbg(chan->device->dev, "chan%u: map %zx@%pap to %pad dir: %s\n",
+ rchan->index, dev_size, &dev_addr, &map->addr,
+ dev_dir == DMA_TO_DEVICE ? "DMA_TO_DEVICE" : "DMA_FROM_DEVICE");
+
+ map->slave.slave_addr = dev_addr;
+ map->slave.xfer_size = dev_size;
+ map->dir = dev_dir;
+
+ return 0;
+}
+
+static struct dma_async_tx_descriptor *
+rcar_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction dir,
+ unsigned long flags, void *context)
+{
+ struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+
+ /* Someone calling slave DMA on a generic channel? */
+ if (rchan->mid_rid < 0 || !sg_len || !sg_dma_len(sgl)) {
+ dev_warn(chan->device->dev,
+ "%s: bad parameter: len=%d, id=%d\n",
+ __func__, sg_len, rchan->mid_rid);
+ return NULL;
+ }
+
+ if (rcar_dmac_map_slave_addr(chan, dir))
+ return NULL;
+
+ return rcar_dmac_chan_prep_sg(rchan, sgl, sg_len, rchan->map.addr,
+ dir, flags, false);
+}
+
+#define RCAR_DMAC_MAX_SG_LEN 32
+
+static struct dma_async_tx_descriptor *
+rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
+ size_t buf_len, size_t period_len,
+ enum dma_transfer_direction dir, unsigned long flags)
+{
+ struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+ struct dma_async_tx_descriptor *desc;
+ struct scatterlist *sgl;
+ unsigned int sg_len;
+ unsigned int i;
+
+ /* Someone calling slave DMA on a generic channel? */
+ if (rchan->mid_rid < 0 || buf_len < period_len) {
+ dev_warn(chan->device->dev,
+ "%s: bad parameter: buf_len=%zu, period_len=%zu, id=%d\n",
+ __func__, buf_len, period_len, rchan->mid_rid);
+ return NULL;
+ }
+
+ if (rcar_dmac_map_slave_addr(chan, dir))
+ return NULL;
+
+ sg_len = buf_len / period_len;
+ if (sg_len > RCAR_DMAC_MAX_SG_LEN) {
+ dev_err(chan->device->dev,
+ "chan%u: sg length %d exceeds limit %d",
+ rchan->index, sg_len, RCAR_DMAC_MAX_SG_LEN);
+ return NULL;
+ }
+
+ /*
+ * Allocate the sg list dynamically as it would consume too much stack
+ * space.
+ */
+ sgl = kmalloc_array(sg_len, sizeof(*sgl), GFP_NOWAIT);
+ if (!sgl)
+ return NULL;
+
+ sg_init_table(sgl, sg_len);
+
+ for (i = 0; i < sg_len; ++i) {
+ dma_addr_t src = buf_addr + (period_len * i);
+
+ sg_set_page(&sgl[i], pfn_to_page(PFN_DOWN(src)), period_len,
+ offset_in_page(src));
+ sg_dma_address(&sgl[i]) = src;
+ sg_dma_len(&sgl[i]) = period_len;
+ }
+
+ desc = rcar_dmac_chan_prep_sg(rchan, sgl, sg_len, rchan->map.addr,
+ dir, flags, true);
+
+ kfree(sgl);
+ return desc;
+}
+
+static int rcar_dmac_device_config(struct dma_chan *chan,
+ struct dma_slave_config *cfg)
+{
+ struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+
+ /*
+ * We could lock this, but you shouldn't be configuring the
+ * channel, while using it...
+ */
+ rchan->src.slave_addr = cfg->src_addr;
+ rchan->dst.slave_addr = cfg->dst_addr;
+ rchan->src.xfer_size = cfg->src_addr_width;
+ rchan->dst.xfer_size = cfg->dst_addr_width;
+
+ return 0;
+}
+
+static int rcar_dmac_chan_terminate_all(struct dma_chan *chan)
+{
+ struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&rchan->lock, flags);
+ rcar_dmac_chan_halt(rchan);
+ spin_unlock_irqrestore(&rchan->lock, flags);
+
+ /*
+ * FIXME: No new interrupt can occur now, but the IRQ thread might still
+ * be running.
+ */
+
+ rcar_dmac_chan_reinit(rchan);
+
+ return 0;
+}
+
+static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
+ dma_cookie_t cookie)
+{
+ struct rcar_dmac_desc *desc = chan->desc.running;
+ struct rcar_dmac_xfer_chunk *running = NULL;
+ struct rcar_dmac_xfer_chunk *chunk;
+ enum dma_status status;
+ unsigned int residue = 0;
+ unsigned int dptr = 0;
+ unsigned int chcrb;
+ unsigned int tcrb;
+ unsigned int i;
+
+ if (!desc)
+ return 0;
+
+ /*
+ * If the cookie corresponds to a descriptor that has been completed
+ * there is no residue. The same check has already been performed by the
+ * caller but without holding the channel lock, so the descriptor could
+ * now be complete.
+ */
+ status = dma_cookie_status(&chan->chan, cookie, NULL);
+ if (status == DMA_COMPLETE)
+ return 0;
+
+ /*
+ * If the cookie doesn't correspond to the currently running transfer
+ * then the descriptor hasn't been processed yet, and the residue is
+ * equal to the full descriptor size.
+ * Also, a client driver is possible to call this function before
+ * rcar_dmac_isr_channel_thread() runs. In this case, the "desc.running"
+ * will be the next descriptor, and the done list will appear. So, if
+ * the argument cookie matches the done list's cookie, we can assume
+ * the residue is zero.
+ */
+ if (cookie != desc->async_tx.cookie) {
+ list_for_each_entry(desc, &chan->desc.done, node) {
+ if (cookie == desc->async_tx.cookie)
+ return 0;
+ }
+ list_for_each_entry(desc, &chan->desc.pending, node) {
+ if (cookie == desc->async_tx.cookie)
+ return desc->size;
+ }
+ list_for_each_entry(desc, &chan->desc.active, node) {
+ if (cookie == desc->async_tx.cookie)
+ return desc->size;
+ }
+
+ /*
+ * No descriptor found for the cookie, there's thus no residue.
+ * This shouldn't happen if the calling driver passes a correct
+ * cookie value.
+ */
+ WARN(1, "No descriptor for cookie!");
+ return 0;
+ }
+
+ /*
+ * We need to read two registers.
+ * Make sure the control register does not skip to next chunk
+ * while reading the counter.
+ * Trying it 3 times should be enough: Initial read, retry, retry
+ * for the paranoid.
+ */
+ for (i = 0; i < 3; i++) {
+ chcrb = rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
+ RCAR_DMACHCRB_DPTR_MASK;
+ tcrb = rcar_dmac_chan_read(chan, RCAR_DMATCRB);
+ /* Still the same? */
+ if (chcrb == (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
+ RCAR_DMACHCRB_DPTR_MASK))
+ break;
+ }
+ WARN_ONCE(i >= 3, "residue might be not continuous!");
+
+ /*
+ * In descriptor mode the descriptor running pointer is not maintained
+ * by the interrupt handler, find the running descriptor from the
+ * descriptor pointer field in the CHCRB register. In non-descriptor
+ * mode just use the running descriptor pointer.
+ */
+ if (desc->hwdescs.use) {
+ dptr = chcrb >> RCAR_DMACHCRB_DPTR_SHIFT;
+ if (dptr == 0)
+ dptr = desc->nchunks;
+ dptr--;
+ WARN_ON(dptr >= desc->nchunks);
+ } else {
+ running = desc->running;
+ }
+
+ /* Compute the size of all chunks still to be transferred. */
+ list_for_each_entry_reverse(chunk, &desc->chunks, node) {
+ if (chunk == running || ++dptr == desc->nchunks)
+ break;
+
+ residue += chunk->size;
+ }
+
+ /* Add the residue for the current chunk. */
+ residue += tcrb << desc->xfer_shift;
+
+ return residue;
+}
+
+static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+ enum dma_status status;
+ unsigned long flags;
+ unsigned int residue;
+ bool cyclic;
+
+ status = dma_cookie_status(chan, cookie, txstate);
+ if (status == DMA_COMPLETE || !txstate)
+ return status;
+
+ spin_lock_irqsave(&rchan->lock, flags);
+ residue = rcar_dmac_chan_get_residue(rchan, cookie);
+ cyclic = rchan->desc.running ? rchan->desc.running->cyclic : false;
+ spin_unlock_irqrestore(&rchan->lock, flags);
+
+ /* if there's no residue, the cookie is complete */
+ if (!residue && !cyclic)
+ return DMA_COMPLETE;
+
+ dma_set_residue(txstate, residue);
+
+ return status;
+}
+
+static void rcar_dmac_issue_pending(struct dma_chan *chan)
+{
+ struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&rchan->lock, flags);
+
+ if (list_empty(&rchan->desc.pending))
+ goto done;
+
+ /* Append the pending list to the active list. */
+ list_splice_tail_init(&rchan->desc.pending, &rchan->desc.active);
+
+ /*
+ * If no transfer is running pick the first descriptor from the active
+ * list and start the transfer.
+ */
+ if (!rchan->desc.running) {
+ struct rcar_dmac_desc *desc;
+
+ desc = list_first_entry(&rchan->desc.active,
+ struct rcar_dmac_desc, node);
+ rchan->desc.running = desc;
+
+ rcar_dmac_chan_start_xfer(rchan);
+ }
+
+done:
+ spin_unlock_irqrestore(&rchan->lock, flags);
+}
+
+static void rcar_dmac_device_synchronize(struct dma_chan *chan)
+{
+ struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+
+ synchronize_irq(rchan->irq);
+}
+
+/* -----------------------------------------------------------------------------
+ * IRQ handling
+ */
+
+static irqreturn_t rcar_dmac_isr_desc_stage_end(struct rcar_dmac_chan *chan)
+{
+ struct rcar_dmac_desc *desc = chan->desc.running;
+ unsigned int stage;
+
+ if (WARN_ON(!desc || !desc->cyclic)) {
+ /*
+ * This should never happen, there should always be a running
+ * cyclic descriptor when a descriptor stage end interrupt is
+ * triggered. Warn and return.
+ */
+ return IRQ_NONE;
+ }
+
+ /* Program the interrupt pointer to the next stage. */
+ stage = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
+ RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT;
+ rcar_dmac_chan_write(chan, RCAR_DMADPCR, RCAR_DMADPCR_DIPT(stage));
+
+ return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t rcar_dmac_isr_transfer_end(struct rcar_dmac_chan *chan)
+{
+ struct rcar_dmac_desc *desc = chan->desc.running;
+ irqreturn_t ret = IRQ_WAKE_THREAD;
+
+ if (WARN_ON_ONCE(!desc)) {
+ /*
+ * This should never happen, there should always be a running
+ * descriptor when a transfer end interrupt is triggered. Warn
+ * and return.
+ */
+ return IRQ_NONE;
+ }
+
+ /*
+ * The transfer end interrupt isn't generated for each chunk when using
+ * descriptor mode. Only update the running chunk pointer in
+ * non-descriptor mode.
+ */
+ if (!desc->hwdescs.use) {
+ /*
+ * If we haven't completed the last transfer chunk simply move
+ * to the next one. Only wake the IRQ thread if the transfer is
+ * cyclic.
+ */
+ if (!list_is_last(&desc->running->node, &desc->chunks)) {
+ desc->running = list_next_entry(desc->running, node);
+ if (!desc->cyclic)
+ ret = IRQ_HANDLED;
+ goto done;
+ }
+
+ /*
+ * We've completed the last transfer chunk. If the transfer is
+ * cyclic, move back to the first one.
+ */
+ if (desc->cyclic) {
+ desc->running =
+ list_first_entry(&desc->chunks,
+ struct rcar_dmac_xfer_chunk,
+ node);
+ goto done;
+ }
+ }
+
+ /* The descriptor is complete, move it to the done list. */
+ list_move_tail(&desc->node, &chan->desc.done);
+
+ /* Queue the next descriptor, if any. */
+ if (!list_empty(&chan->desc.active))
+ chan->desc.running = list_first_entry(&chan->desc.active,
+ struct rcar_dmac_desc,
+ node);
+ else
+ chan->desc.running = NULL;
+
+done:
+ if (chan->desc.running)
+ rcar_dmac_chan_start_xfer(chan);
+
+ return ret;
+}
+
+static irqreturn_t rcar_dmac_isr_channel(int irq, void *dev)
+{
+ u32 mask = RCAR_DMACHCR_DSE | RCAR_DMACHCR_TE;
+ struct rcar_dmac_chan *chan = dev;
+ irqreturn_t ret = IRQ_NONE;
+ bool reinit = false;
+ u32 chcr;
+
+ spin_lock(&chan->lock);
+
+ chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+ if (chcr & RCAR_DMACHCR_CAE) {
+ struct rcar_dmac *dmac = to_rcar_dmac(chan->chan.device);
+
+ /*
+ * We don't need to call rcar_dmac_chan_halt()
+ * because channel is already stopped in error case.
+ * We need to clear register and check DE bit as recovery.
+ */
+ rcar_dmac_chan_clear(dmac, chan);
+ rcar_dmac_chcr_de_barrier(chan);
+ reinit = true;
+ goto spin_lock_end;
+ }
+
+ if (chcr & RCAR_DMACHCR_TE)
+ mask |= RCAR_DMACHCR_DE;
+ rcar_dmac_chan_write(chan, RCAR_DMACHCR, chcr & ~mask);
+ if (mask & RCAR_DMACHCR_DE)
+ rcar_dmac_chcr_de_barrier(chan);
+
+ if (chcr & RCAR_DMACHCR_DSE)
+ ret |= rcar_dmac_isr_desc_stage_end(chan);
+
+ if (chcr & RCAR_DMACHCR_TE)
+ ret |= rcar_dmac_isr_transfer_end(chan);
+
+spin_lock_end:
+ spin_unlock(&chan->lock);
+
+ if (reinit) {
+ dev_err(chan->chan.device->dev, "Channel Address Error\n");
+
+ rcar_dmac_chan_reinit(chan);
+ ret = IRQ_HANDLED;
+ }
+
+ return ret;
+}
+
+static irqreturn_t rcar_dmac_isr_channel_thread(int irq, void *dev)
+{
+ struct rcar_dmac_chan *chan = dev;
+ struct rcar_dmac_desc *desc;
+ struct dmaengine_desc_callback cb;
+
+ spin_lock_irq(&chan->lock);
+
+ /* For cyclic transfers notify the user after every chunk. */
+ if (chan->desc.running && chan->desc.running->cyclic) {
+ desc = chan->desc.running;
+ dmaengine_desc_get_callback(&desc->async_tx, &cb);
+
+ if (dmaengine_desc_callback_valid(&cb)) {
+ spin_unlock_irq(&chan->lock);
+ dmaengine_desc_callback_invoke(&cb, NULL);
+ spin_lock_irq(&chan->lock);
+ }
+ }
+
+ /*
+ * Call the callback function for all descriptors on the done list and
+ * move them to the ack wait list.
+ */
+ while (!list_empty(&chan->desc.done)) {
+ desc = list_first_entry(&chan->desc.done, struct rcar_dmac_desc,
+ node);
+ dma_cookie_complete(&desc->async_tx);
+ list_del(&desc->node);
+
+ dmaengine_desc_get_callback(&desc->async_tx, &cb);
+ if (dmaengine_desc_callback_valid(&cb)) {
+ spin_unlock_irq(&chan->lock);
+ /*
+ * We own the only reference to this descriptor, we can
+ * safely dereference it without holding the channel
+ * lock.
+ */
+ dmaengine_desc_callback_invoke(&cb, NULL);
+ spin_lock_irq(&chan->lock);
+ }
+
+ list_add_tail(&desc->node, &chan->desc.wait);
+ }
+
+ spin_unlock_irq(&chan->lock);
+
+ /* Recycle all acked descriptors. */
+ rcar_dmac_desc_recycle_acked(chan);
+
+ return IRQ_HANDLED;
+}
+
+/* -----------------------------------------------------------------------------
+ * OF xlate and channel filter
+ */
+
+static bool rcar_dmac_chan_filter(struct dma_chan *chan, void *arg)
+{
+ struct rcar_dmac *dmac = to_rcar_dmac(chan->device);
+ struct of_phandle_args *dma_spec = arg;
+
+ /*
+ * FIXME: Using a filter on OF platforms is a nonsense. The OF xlate
+ * function knows from which device it wants to allocate a channel from,
+ * and would be perfectly capable of selecting the channel it wants.
+ * Forcing it to call dma_request_channel() and iterate through all
+ * channels from all controllers is just pointless.
+ */
+ if (chan->device->device_config != rcar_dmac_device_config)
+ return false;
+
+ return !test_and_set_bit(dma_spec->args[0], dmac->modules);
+}
+
+static struct dma_chan *rcar_dmac_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct rcar_dmac_chan *rchan;
+ struct dma_chan *chan;
+ dma_cap_mask_t mask;
+
+ if (dma_spec->args_count != 1)
+ return NULL;
+
+ /* Only slave DMA channels can be allocated via DT */
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_SLAVE, mask);
+
+ chan = __dma_request_channel(&mask, rcar_dmac_chan_filter, dma_spec,
+ ofdma->of_node);
+ if (!chan)
+ return NULL;
+
+ rchan = to_rcar_dmac_chan(chan);
+ rchan->mid_rid = dma_spec->args[0];
+
+ return chan;
+}
+
+/* -----------------------------------------------------------------------------
+ * Power management
+ */
+
+#ifdef CONFIG_PM
+static int rcar_dmac_runtime_suspend(struct device *dev)
+{
+ return 0;
+}
+
+static int rcar_dmac_runtime_resume(struct device *dev)
+{
+ struct rcar_dmac *dmac = dev_get_drvdata(dev);
+
+ return rcar_dmac_init(dmac);
+}
+#endif
+
+static const struct dev_pm_ops rcar_dmac_pm = {
+ /*
+ * TODO for system sleep/resume:
+ * - Wait for the current transfer to complete and stop the device,
+ * - Resume transfers, if any.
+ */
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
+ SET_RUNTIME_PM_OPS(rcar_dmac_runtime_suspend, rcar_dmac_runtime_resume,
+ NULL)
+};
+
+/* -----------------------------------------------------------------------------
+ * Probe and remove
+ */
+
+static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
+ struct rcar_dmac_chan *rchan)
+{
+ struct platform_device *pdev = to_platform_device(dmac->dev);
+ struct dma_chan *chan = &rchan->chan;
+ char pdev_irqname[5];
+ char *irqname;
+ int ret;
+
+ rchan->mid_rid = -EINVAL;
+
+ spin_lock_init(&rchan->lock);
+
+ INIT_LIST_HEAD(&rchan->desc.free);
+ INIT_LIST_HEAD(&rchan->desc.pending);
+ INIT_LIST_HEAD(&rchan->desc.active);
+ INIT_LIST_HEAD(&rchan->desc.done);
+ INIT_LIST_HEAD(&rchan->desc.wait);
+
+ /* Request the channel interrupt. */
+ sprintf(pdev_irqname, "ch%u", rchan->index);
+ rchan->irq = platform_get_irq_byname(pdev, pdev_irqname);
+ if (rchan->irq < 0)
+ return -ENODEV;
+
+ irqname = devm_kasprintf(dmac->dev, GFP_KERNEL, "%s:%u",
+ dev_name(dmac->dev), rchan->index);
+ if (!irqname)
+ return -ENOMEM;
+
+ /*
+ * Initialize the DMA engine channel and add it to the DMA engine
+ * channels list.
+ */
+ chan->device = &dmac->engine;
+ dma_cookie_init(chan);
+
+ list_add_tail(&chan->device_node, &dmac->engine.channels);
+
+ ret = devm_request_threaded_irq(dmac->dev, rchan->irq,
+ rcar_dmac_isr_channel,
+ rcar_dmac_isr_channel_thread, 0,
+ irqname, rchan);
+ if (ret) {
+ dev_err(dmac->dev, "failed to request IRQ %u (%d)\n",
+ rchan->irq, ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+#define RCAR_DMAC_MAX_CHANNELS 32
+
+static int rcar_dmac_parse_of(struct device *dev, struct rcar_dmac *dmac)
+{
+ struct device_node *np = dev->of_node;
+ int ret;
+
+ ret = of_property_read_u32(np, "dma-channels", &dmac->n_channels);
+ if (ret < 0) {
+ dev_err(dev, "unable to read dma-channels property\n");
+ return ret;
+ }
+
+ /* The hardware and driver don't support more than 32 bits in CHCLR */
+ if (dmac->n_channels <= 0 ||
+ dmac->n_channels >= RCAR_DMAC_MAX_CHANNELS) {
+ dev_err(dev, "invalid number of channels %u\n",
+ dmac->n_channels);
+ return -EINVAL;
+ }
+
+ /*
+ * If the driver is unable to read dma-channel-mask property,
+ * the driver assumes that it can use all channels.
+ */
+ dmac->channels_mask = GENMASK(dmac->n_channels - 1, 0);
+ of_property_read_u32(np, "dma-channel-mask", &dmac->channels_mask);
+
+ /* If the property has out-of-channel mask, this driver clears it */
+ dmac->channels_mask &= GENMASK(dmac->n_channels - 1, 0);
+
+ return 0;
+}
+
+static int rcar_dmac_probe(struct platform_device *pdev)
+{
+ const enum dma_slave_buswidth widths = DMA_SLAVE_BUSWIDTH_1_BYTE |
+ DMA_SLAVE_BUSWIDTH_2_BYTES | DMA_SLAVE_BUSWIDTH_4_BYTES |
+ DMA_SLAVE_BUSWIDTH_8_BYTES | DMA_SLAVE_BUSWIDTH_16_BYTES |
+ DMA_SLAVE_BUSWIDTH_32_BYTES | DMA_SLAVE_BUSWIDTH_64_BYTES;
+ const struct rcar_dmac_of_data *data;
+ struct rcar_dmac_chan *chan;
+ struct dma_device *engine;
+ void __iomem *chan_base;
+ struct rcar_dmac *dmac;
+ unsigned int i;
+ int ret;
+
+ data = of_device_get_match_data(&pdev->dev);
+ if (!data)
+ return -EINVAL;
+
+ dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
+ if (!dmac)
+ return -ENOMEM;
+
+ dmac->dev = &pdev->dev;
+ platform_set_drvdata(pdev, dmac);
+ ret = dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK);
+ if (ret)
+ return ret;
+
+ ret = dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40));
+ if (ret)
+ return ret;
+
+ ret = rcar_dmac_parse_of(&pdev->dev, dmac);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * A still unconfirmed hardware bug prevents the IPMMU microTLB 0 to be
+ * flushed correctly, resulting in memory corruption. DMAC 0 channel 0
+ * is connected to microTLB 0 on currently supported platforms, so we
+ * can't use it with the IPMMU. As the IOMMU API operates at the device
+ * level we can't disable it selectively, so ignore channel 0 for now if
+ * the device is part of an IOMMU group.
+ */
+ if (device_iommu_mapped(&pdev->dev))
+ dmac->channels_mask &= ~BIT(0);
+
+ dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels,
+ sizeof(*dmac->channels), GFP_KERNEL);
+ if (!dmac->channels)
+ return -ENOMEM;
+
+ /* Request resources. */
+ dmac->dmac_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(dmac->dmac_base))
+ return PTR_ERR(dmac->dmac_base);
+
+ if (!data->chan_offset_base) {
+ dmac->chan_base = devm_platform_ioremap_resource(pdev, 1);
+ if (IS_ERR(dmac->chan_base))
+ return PTR_ERR(dmac->chan_base);
+
+ chan_base = dmac->chan_base;
+ } else {
+ chan_base = dmac->dmac_base + data->chan_offset_base;
+ }
+
+ for_each_rcar_dmac_chan(i, dmac, chan) {
+ chan->index = i;
+ chan->iomem = chan_base + i * data->chan_offset_stride;
+ }
+
+ /* Enable runtime PM and initialize the device. */
+ pm_runtime_enable(&pdev->dev);
+ ret = pm_runtime_resume_and_get(&pdev->dev);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
+ goto err_pm_disable;
+ }
+
+ ret = rcar_dmac_init(dmac);
+ pm_runtime_put(&pdev->dev);
+
+ if (ret) {
+ dev_err(&pdev->dev, "failed to reset device\n");
+ goto err_pm_disable;
+ }
+
+ /* Initialize engine */
+ engine = &dmac->engine;
+
+ dma_cap_set(DMA_MEMCPY, engine->cap_mask);
+ dma_cap_set(DMA_SLAVE, engine->cap_mask);
+
+ engine->dev = &pdev->dev;
+ engine->copy_align = ilog2(RCAR_DMAC_MEMCPY_XFER_SIZE);
+
+ engine->src_addr_widths = widths;
+ engine->dst_addr_widths = widths;
+ engine->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+ engine->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+ engine->device_alloc_chan_resources = rcar_dmac_alloc_chan_resources;
+ engine->device_free_chan_resources = rcar_dmac_free_chan_resources;
+ engine->device_prep_dma_memcpy = rcar_dmac_prep_dma_memcpy;
+ engine->device_prep_slave_sg = rcar_dmac_prep_slave_sg;
+ engine->device_prep_dma_cyclic = rcar_dmac_prep_dma_cyclic;
+ engine->device_config = rcar_dmac_device_config;
+ engine->device_pause = rcar_dmac_chan_pause;
+ engine->device_terminate_all = rcar_dmac_chan_terminate_all;
+ engine->device_tx_status = rcar_dmac_tx_status;
+ engine->device_issue_pending = rcar_dmac_issue_pending;
+ engine->device_synchronize = rcar_dmac_device_synchronize;
+
+ INIT_LIST_HEAD(&engine->channels);
+
+ for_each_rcar_dmac_chan(i, dmac, chan) {
+ ret = rcar_dmac_chan_probe(dmac, chan);
+ if (ret < 0)
+ goto err_pm_disable;
+ }
+
+ /* Register the DMAC as a DMA provider for DT. */
+ ret = of_dma_controller_register(pdev->dev.of_node, rcar_dmac_of_xlate,
+ NULL);
+ if (ret < 0)
+ goto err_pm_disable;
+
+ /*
+ * Register the DMA engine device.
+ *
+ * Default transfer size of 32 bytes requires 32-byte alignment.
+ */
+ ret = dma_async_device_register(engine);
+ if (ret < 0)
+ goto err_dma_free;
+
+ return 0;
+
+err_dma_free:
+ of_dma_controller_free(pdev->dev.of_node);
+err_pm_disable:
+ pm_runtime_disable(&pdev->dev);
+ return ret;
+}
+
+static int rcar_dmac_remove(struct platform_device *pdev)
+{
+ struct rcar_dmac *dmac = platform_get_drvdata(pdev);
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&dmac->engine);
+
+ pm_runtime_disable(&pdev->dev);
+
+ return 0;
+}
+
+static void rcar_dmac_shutdown(struct platform_device *pdev)
+{
+ struct rcar_dmac *dmac = platform_get_drvdata(pdev);
+
+ rcar_dmac_stop_all_chan(dmac);
+}
+
+static const struct rcar_dmac_of_data rcar_dmac_data = {
+ .chan_offset_base = 0x8000,
+ .chan_offset_stride = 0x80,
+};
+
+static const struct rcar_dmac_of_data rcar_gen4_dmac_data = {
+ .chan_offset_base = 0x0,
+ .chan_offset_stride = 0x1000,
+};
+
+static const struct of_device_id rcar_dmac_of_ids[] = {
+ {
+ .compatible = "renesas,rcar-dmac",
+ .data = &rcar_dmac_data,
+ }, {
+ .compatible = "renesas,rcar-gen4-dmac",
+ .data = &rcar_gen4_dmac_data,
+ }, {
+ .compatible = "renesas,dmac-r8a779a0",
+ .data = &rcar_gen4_dmac_data,
+ },
+ { /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, rcar_dmac_of_ids);
+
+static struct platform_driver rcar_dmac_driver = {
+ .driver = {
+ .pm = &rcar_dmac_pm,
+ .name = "rcar-dmac",
+ .of_match_table = rcar_dmac_of_ids,
+ },
+ .probe = rcar_dmac_probe,
+ .remove = rcar_dmac_remove,
+ .shutdown = rcar_dmac_shutdown,
+};
+
+module_platform_driver(rcar_dmac_driver);
+
+MODULE_DESCRIPTION("R-Car Gen2 DMA Controller Driver");
+MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/sh/rz-dmac.c b/drivers/dma/sh/rz-dmac.c
new file mode 100644
index 000000000..2967141f4
--- /dev/null
+++ b/drivers/dma/sh/rz-dmac.c
@@ -0,0 +1,1003 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Renesas RZ/G2L DMA Controller Driver
+ *
+ * Based on imx-dma.c
+ *
+ * Copyright (C) 2021 Renesas Electronics Corp.
+ * Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+ * Copyright 2012 Javier Martin, Vista Silicon <javier.martin@vista-silicon.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+enum rz_dmac_prep_type {
+ RZ_DMAC_DESC_MEMCPY,
+ RZ_DMAC_DESC_SLAVE_SG,
+};
+
+struct rz_lmdesc {
+ u32 header;
+ u32 sa;
+ u32 da;
+ u32 tb;
+ u32 chcfg;
+ u32 chitvl;
+ u32 chext;
+ u32 nxla;
+};
+
+struct rz_dmac_desc {
+ struct virt_dma_desc vd;
+ dma_addr_t src;
+ dma_addr_t dest;
+ size_t len;
+ struct list_head node;
+ enum dma_transfer_direction direction;
+ enum rz_dmac_prep_type type;
+ /* For slave sg */
+ struct scatterlist *sg;
+ unsigned int sgcount;
+};
+
+#define to_rz_dmac_desc(d) container_of(d, struct rz_dmac_desc, vd)
+
+struct rz_dmac_chan {
+ struct virt_dma_chan vc;
+ void __iomem *ch_base;
+ void __iomem *ch_cmn_base;
+ unsigned int index;
+ int irq;
+ struct rz_dmac_desc *desc;
+ int descs_allocated;
+
+ enum dma_slave_buswidth src_word_size;
+ enum dma_slave_buswidth dst_word_size;
+ dma_addr_t src_per_address;
+ dma_addr_t dst_per_address;
+
+ u32 chcfg;
+ u32 chctrl;
+ int mid_rid;
+
+ struct list_head ld_free;
+ struct list_head ld_queue;
+ struct list_head ld_active;
+
+ struct {
+ struct rz_lmdesc *base;
+ struct rz_lmdesc *head;
+ struct rz_lmdesc *tail;
+ dma_addr_t base_dma;
+ } lmdesc;
+};
+
+#define to_rz_dmac_chan(c) container_of(c, struct rz_dmac_chan, vc.chan)
+
+struct rz_dmac {
+ struct dma_device engine;
+ struct device *dev;
+ void __iomem *base;
+ void __iomem *ext_base;
+
+ unsigned int n_channels;
+ struct rz_dmac_chan *channels;
+
+ DECLARE_BITMAP(modules, 1024);
+};
+
+#define to_rz_dmac(d) container_of(d, struct rz_dmac, engine)
+
+/*
+ * -----------------------------------------------------------------------------
+ * Registers
+ */
+
+#define CHSTAT 0x0024
+#define CHCTRL 0x0028
+#define CHCFG 0x002c
+#define NXLA 0x0038
+
+#define DCTRL 0x0000
+
+#define EACH_CHANNEL_OFFSET 0x0040
+#define CHANNEL_0_7_OFFSET 0x0000
+#define CHANNEL_0_7_COMMON_BASE 0x0300
+#define CHANNEL_8_15_OFFSET 0x0400
+#define CHANNEL_8_15_COMMON_BASE 0x0700
+
+#define CHSTAT_ER BIT(4)
+#define CHSTAT_EN BIT(0)
+
+#define CHCTRL_CLRINTMSK BIT(17)
+#define CHCTRL_CLRSUS BIT(9)
+#define CHCTRL_CLRTC BIT(6)
+#define CHCTRL_CLREND BIT(5)
+#define CHCTRL_CLRRQ BIT(4)
+#define CHCTRL_SWRST BIT(3)
+#define CHCTRL_STG BIT(2)
+#define CHCTRL_CLREN BIT(1)
+#define CHCTRL_SETEN BIT(0)
+#define CHCTRL_DEFAULT (CHCTRL_CLRINTMSK | CHCTRL_CLRSUS | \
+ CHCTRL_CLRTC | CHCTRL_CLREND | \
+ CHCTRL_CLRRQ | CHCTRL_SWRST | \
+ CHCTRL_CLREN)
+
+#define CHCFG_DMS BIT(31)
+#define CHCFG_DEM BIT(24)
+#define CHCFG_DAD BIT(21)
+#define CHCFG_SAD BIT(20)
+#define CHCFG_REQD BIT(3)
+#define CHCFG_SEL(bits) ((bits) & 0x07)
+#define CHCFG_MEM_COPY (0x80400008)
+#define CHCFG_FILL_DDS_MASK GENMASK(19, 16)
+#define CHCFG_FILL_SDS_MASK GENMASK(15, 12)
+#define CHCFG_FILL_TM(a) (((a) & BIT(5)) << 22)
+#define CHCFG_FILL_AM(a) (((a) & GENMASK(4, 2)) << 6)
+#define CHCFG_FILL_LVL(a) (((a) & BIT(1)) << 5)
+#define CHCFG_FILL_HIEN(a) (((a) & BIT(0)) << 5)
+
+#define MID_RID_MASK GENMASK(9, 0)
+#define CHCFG_MASK GENMASK(15, 10)
+#define CHCFG_DS_INVALID 0xFF
+#define DCTRL_LVINT BIT(1)
+#define DCTRL_PR BIT(0)
+#define DCTRL_DEFAULT (DCTRL_LVINT | DCTRL_PR)
+
+/* LINK MODE DESCRIPTOR */
+#define HEADER_LV BIT(0)
+
+#define RZ_DMAC_MAX_CHAN_DESCRIPTORS 16
+#define RZ_DMAC_MAX_CHANNELS 16
+#define DMAC_NR_LMDESC 64
+
+/*
+ * -----------------------------------------------------------------------------
+ * Device access
+ */
+
+static void rz_dmac_writel(struct rz_dmac *dmac, unsigned int val,
+ unsigned int offset)
+{
+ writel(val, dmac->base + offset);
+}
+
+static void rz_dmac_ext_writel(struct rz_dmac *dmac, unsigned int val,
+ unsigned int offset)
+{
+ writel(val, dmac->ext_base + offset);
+}
+
+static u32 rz_dmac_ext_readl(struct rz_dmac *dmac, unsigned int offset)
+{
+ return readl(dmac->ext_base + offset);
+}
+
+static void rz_dmac_ch_writel(struct rz_dmac_chan *channel, unsigned int val,
+ unsigned int offset, int which)
+{
+ if (which)
+ writel(val, channel->ch_base + offset);
+ else
+ writel(val, channel->ch_cmn_base + offset);
+}
+
+static u32 rz_dmac_ch_readl(struct rz_dmac_chan *channel,
+ unsigned int offset, int which)
+{
+ if (which)
+ return readl(channel->ch_base + offset);
+ else
+ return readl(channel->ch_cmn_base + offset);
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * Initialization
+ */
+
+static void rz_lmdesc_setup(struct rz_dmac_chan *channel,
+ struct rz_lmdesc *lmdesc)
+{
+ u32 nxla;
+
+ channel->lmdesc.base = lmdesc;
+ channel->lmdesc.head = lmdesc;
+ channel->lmdesc.tail = lmdesc;
+ nxla = channel->lmdesc.base_dma;
+ while (lmdesc < (channel->lmdesc.base + (DMAC_NR_LMDESC - 1))) {
+ lmdesc->header = 0;
+ nxla += sizeof(*lmdesc);
+ lmdesc->nxla = nxla;
+ lmdesc++;
+ }
+
+ lmdesc->header = 0;
+ lmdesc->nxla = channel->lmdesc.base_dma;
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * Descriptors preparation
+ */
+
+static void rz_dmac_lmdesc_recycle(struct rz_dmac_chan *channel)
+{
+ struct rz_lmdesc *lmdesc = channel->lmdesc.head;
+
+ while (!(lmdesc->header & HEADER_LV)) {
+ lmdesc->header = 0;
+ lmdesc++;
+ if (lmdesc >= (channel->lmdesc.base + DMAC_NR_LMDESC))
+ lmdesc = channel->lmdesc.base;
+ }
+ channel->lmdesc.head = lmdesc;
+}
+
+static void rz_dmac_enable_hw(struct rz_dmac_chan *channel)
+{
+ struct dma_chan *chan = &channel->vc.chan;
+ struct rz_dmac *dmac = to_rz_dmac(chan->device);
+ unsigned long flags;
+ u32 nxla;
+ u32 chctrl;
+ u32 chstat;
+
+ dev_dbg(dmac->dev, "%s channel %d\n", __func__, channel->index);
+
+ local_irq_save(flags);
+
+ rz_dmac_lmdesc_recycle(channel);
+
+ nxla = channel->lmdesc.base_dma +
+ (sizeof(struct rz_lmdesc) * (channel->lmdesc.head -
+ channel->lmdesc.base));
+
+ chstat = rz_dmac_ch_readl(channel, CHSTAT, 1);
+ if (!(chstat & CHSTAT_EN)) {
+ chctrl = (channel->chctrl | CHCTRL_SETEN);
+ rz_dmac_ch_writel(channel, nxla, NXLA, 1);
+ rz_dmac_ch_writel(channel, channel->chcfg, CHCFG, 1);
+ rz_dmac_ch_writel(channel, CHCTRL_SWRST, CHCTRL, 1);
+ rz_dmac_ch_writel(channel, chctrl, CHCTRL, 1);
+ }
+
+ local_irq_restore(flags);
+}
+
+static void rz_dmac_disable_hw(struct rz_dmac_chan *channel)
+{
+ struct dma_chan *chan = &channel->vc.chan;
+ struct rz_dmac *dmac = to_rz_dmac(chan->device);
+ unsigned long flags;
+
+ dev_dbg(dmac->dev, "%s channel %d\n", __func__, channel->index);
+
+ local_irq_save(flags);
+ rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1);
+ local_irq_restore(flags);
+}
+
+static void rz_dmac_set_dmars_register(struct rz_dmac *dmac, int nr, u32 dmars)
+{
+ u32 dmars_offset = (nr / 2) * 4;
+ u32 shift = (nr % 2) * 16;
+ u32 dmars32;
+
+ dmars32 = rz_dmac_ext_readl(dmac, dmars_offset);
+ dmars32 &= ~(0xffff << shift);
+ dmars32 |= dmars << shift;
+
+ rz_dmac_ext_writel(dmac, dmars32, dmars_offset);
+}
+
+static void rz_dmac_prepare_desc_for_memcpy(struct rz_dmac_chan *channel)
+{
+ struct dma_chan *chan = &channel->vc.chan;
+ struct rz_dmac *dmac = to_rz_dmac(chan->device);
+ struct rz_lmdesc *lmdesc = channel->lmdesc.tail;
+ struct rz_dmac_desc *d = channel->desc;
+ u32 chcfg = CHCFG_MEM_COPY;
+
+ /* prepare descriptor */
+ lmdesc->sa = d->src;
+ lmdesc->da = d->dest;
+ lmdesc->tb = d->len;
+ lmdesc->chcfg = chcfg;
+ lmdesc->chitvl = 0;
+ lmdesc->chext = 0;
+ lmdesc->header = HEADER_LV;
+
+ rz_dmac_set_dmars_register(dmac, channel->index, 0);
+
+ channel->chcfg = chcfg;
+ channel->chctrl = CHCTRL_STG | CHCTRL_SETEN;
+}
+
+static void rz_dmac_prepare_descs_for_slave_sg(struct rz_dmac_chan *channel)
+{
+ struct dma_chan *chan = &channel->vc.chan;
+ struct rz_dmac *dmac = to_rz_dmac(chan->device);
+ struct rz_dmac_desc *d = channel->desc;
+ struct scatterlist *sg, *sgl = d->sg;
+ struct rz_lmdesc *lmdesc;
+ unsigned int i, sg_len = d->sgcount;
+
+ channel->chcfg |= CHCFG_SEL(channel->index) | CHCFG_DEM | CHCFG_DMS;
+
+ if (d->direction == DMA_DEV_TO_MEM) {
+ channel->chcfg |= CHCFG_SAD;
+ channel->chcfg &= ~CHCFG_REQD;
+ } else {
+ channel->chcfg |= CHCFG_DAD | CHCFG_REQD;
+ }
+
+ lmdesc = channel->lmdesc.tail;
+
+ for (i = 0, sg = sgl; i < sg_len; i++, sg = sg_next(sg)) {
+ if (d->direction == DMA_DEV_TO_MEM) {
+ lmdesc->sa = channel->src_per_address;
+ lmdesc->da = sg_dma_address(sg);
+ } else {
+ lmdesc->sa = sg_dma_address(sg);
+ lmdesc->da = channel->dst_per_address;
+ }
+
+ lmdesc->tb = sg_dma_len(sg);
+ lmdesc->chitvl = 0;
+ lmdesc->chext = 0;
+ if (i == (sg_len - 1)) {
+ lmdesc->chcfg = (channel->chcfg & ~CHCFG_DEM);
+ lmdesc->header = HEADER_LV;
+ } else {
+ lmdesc->chcfg = channel->chcfg;
+ lmdesc->header = HEADER_LV;
+ }
+ if (++lmdesc >= (channel->lmdesc.base + DMAC_NR_LMDESC))
+ lmdesc = channel->lmdesc.base;
+ }
+
+ channel->lmdesc.tail = lmdesc;
+
+ rz_dmac_set_dmars_register(dmac, channel->index, channel->mid_rid);
+ channel->chctrl = CHCTRL_SETEN;
+}
+
+static int rz_dmac_xfer_desc(struct rz_dmac_chan *chan)
+{
+ struct rz_dmac_desc *d = chan->desc;
+ struct virt_dma_desc *vd;
+
+ vd = vchan_next_desc(&chan->vc);
+ if (!vd)
+ return 0;
+
+ list_del(&vd->node);
+
+ switch (d->type) {
+ case RZ_DMAC_DESC_MEMCPY:
+ rz_dmac_prepare_desc_for_memcpy(chan);
+ break;
+
+ case RZ_DMAC_DESC_SLAVE_SG:
+ rz_dmac_prepare_descs_for_slave_sg(chan);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ rz_dmac_enable_hw(chan);
+
+ return 0;
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * DMA engine operations
+ */
+
+static int rz_dmac_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+
+ while (channel->descs_allocated < RZ_DMAC_MAX_CHAN_DESCRIPTORS) {
+ struct rz_dmac_desc *desc;
+
+ desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc)
+ break;
+
+ list_add_tail(&desc->node, &channel->ld_free);
+ channel->descs_allocated++;
+ }
+
+ if (!channel->descs_allocated)
+ return -ENOMEM;
+
+ return channel->descs_allocated;
+}
+
+static void rz_dmac_free_chan_resources(struct dma_chan *chan)
+{
+ struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+ struct rz_dmac *dmac = to_rz_dmac(chan->device);
+ struct rz_lmdesc *lmdesc = channel->lmdesc.base;
+ struct rz_dmac_desc *desc, *_desc;
+ unsigned long flags;
+ unsigned int i;
+
+ spin_lock_irqsave(&channel->vc.lock, flags);
+
+ for (i = 0; i < DMAC_NR_LMDESC; i++)
+ lmdesc[i].header = 0;
+
+ rz_dmac_disable_hw(channel);
+ list_splice_tail_init(&channel->ld_active, &channel->ld_free);
+ list_splice_tail_init(&channel->ld_queue, &channel->ld_free);
+
+ if (channel->mid_rid >= 0) {
+ clear_bit(channel->mid_rid, dmac->modules);
+ channel->mid_rid = -EINVAL;
+ }
+
+ spin_unlock_irqrestore(&channel->vc.lock, flags);
+
+ list_for_each_entry_safe(desc, _desc, &channel->ld_free, node) {
+ kfree(desc);
+ channel->descs_allocated--;
+ }
+
+ INIT_LIST_HEAD(&channel->ld_free);
+ vchan_free_chan_resources(&channel->vc);
+}
+
+static struct dma_async_tx_descriptor *
+rz_dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+ struct rz_dmac *dmac = to_rz_dmac(chan->device);
+ struct rz_dmac_desc *desc;
+
+ dev_dbg(dmac->dev, "%s channel: %d src=0x%pad dst=0x%pad len=%zu\n",
+ __func__, channel->index, &src, &dest, len);
+
+ if (list_empty(&channel->ld_free))
+ return NULL;
+
+ desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node);
+
+ desc->type = RZ_DMAC_DESC_MEMCPY;
+ desc->src = src;
+ desc->dest = dest;
+ desc->len = len;
+ desc->direction = DMA_MEM_TO_MEM;
+
+ list_move_tail(channel->ld_free.next, &channel->ld_queue);
+ return vchan_tx_prep(&channel->vc, &desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+rz_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len,
+ enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+ struct rz_dmac_desc *desc;
+ struct scatterlist *sg;
+ int dma_length = 0;
+ int i = 0;
+
+ if (list_empty(&channel->ld_free))
+ return NULL;
+
+ desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node);
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ dma_length += sg_dma_len(sg);
+ }
+
+ desc->type = RZ_DMAC_DESC_SLAVE_SG;
+ desc->sg = sgl;
+ desc->sgcount = sg_len;
+ desc->len = dma_length;
+ desc->direction = direction;
+
+ if (direction == DMA_DEV_TO_MEM)
+ desc->src = channel->src_per_address;
+ else
+ desc->dest = channel->dst_per_address;
+
+ list_move_tail(channel->ld_free.next, &channel->ld_queue);
+ return vchan_tx_prep(&channel->vc, &desc->vd, flags);
+}
+
+static int rz_dmac_terminate_all(struct dma_chan *chan)
+{
+ struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ rz_dmac_disable_hw(channel);
+ spin_lock_irqsave(&channel->vc.lock, flags);
+ list_splice_tail_init(&channel->ld_active, &channel->ld_free);
+ list_splice_tail_init(&channel->ld_queue, &channel->ld_free);
+ spin_unlock_irqrestore(&channel->vc.lock, flags);
+ vchan_get_all_descriptors(&channel->vc, &head);
+ vchan_dma_desc_free_list(&channel->vc, &head);
+
+ return 0;
+}
+
+static void rz_dmac_issue_pending(struct dma_chan *chan)
+{
+ struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+ struct rz_dmac *dmac = to_rz_dmac(chan->device);
+ struct rz_dmac_desc *desc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&channel->vc.lock, flags);
+
+ if (!list_empty(&channel->ld_queue)) {
+ desc = list_first_entry(&channel->ld_queue,
+ struct rz_dmac_desc, node);
+ channel->desc = desc;
+ if (vchan_issue_pending(&channel->vc)) {
+ if (rz_dmac_xfer_desc(channel) < 0)
+ dev_warn(dmac->dev, "ch: %d couldn't issue DMA xfer\n",
+ channel->index);
+ else
+ list_move_tail(channel->ld_queue.next,
+ &channel->ld_active);
+ }
+ }
+
+ spin_unlock_irqrestore(&channel->vc.lock, flags);
+}
+
+static u8 rz_dmac_ds_to_val_mapping(enum dma_slave_buswidth ds)
+{
+ u8 i;
+ static const enum dma_slave_buswidth ds_lut[] = {
+ DMA_SLAVE_BUSWIDTH_1_BYTE,
+ DMA_SLAVE_BUSWIDTH_2_BYTES,
+ DMA_SLAVE_BUSWIDTH_4_BYTES,
+ DMA_SLAVE_BUSWIDTH_8_BYTES,
+ DMA_SLAVE_BUSWIDTH_16_BYTES,
+ DMA_SLAVE_BUSWIDTH_32_BYTES,
+ DMA_SLAVE_BUSWIDTH_64_BYTES,
+ DMA_SLAVE_BUSWIDTH_128_BYTES,
+ };
+
+ for (i = 0; i < ARRAY_SIZE(ds_lut); i++) {
+ if (ds_lut[i] == ds)
+ return i;
+ }
+
+ return CHCFG_DS_INVALID;
+}
+
+static int rz_dmac_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+ u32 val;
+
+ channel->src_per_address = config->src_addr;
+ channel->src_word_size = config->src_addr_width;
+ channel->dst_per_address = config->dst_addr;
+ channel->dst_word_size = config->dst_addr_width;
+
+ val = rz_dmac_ds_to_val_mapping(config->dst_addr_width);
+ if (val == CHCFG_DS_INVALID)
+ return -EINVAL;
+
+ channel->chcfg &= ~CHCFG_FILL_DDS_MASK;
+ channel->chcfg |= FIELD_PREP(CHCFG_FILL_DDS_MASK, val);
+
+ val = rz_dmac_ds_to_val_mapping(config->src_addr_width);
+ if (val == CHCFG_DS_INVALID)
+ return -EINVAL;
+
+ channel->chcfg &= ~CHCFG_FILL_SDS_MASK;
+ channel->chcfg |= FIELD_PREP(CHCFG_FILL_SDS_MASK, val);
+
+ return 0;
+}
+
+static void rz_dmac_virt_desc_free(struct virt_dma_desc *vd)
+{
+ /*
+ * Place holder
+ * Descriptor allocation is done during alloc_chan_resources and
+ * get freed during free_chan_resources.
+ * list is used to manage the descriptors and avoid any memory
+ * allocation/free during DMA read/write.
+ */
+}
+
+static void rz_dmac_device_synchronize(struct dma_chan *chan)
+{
+ struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+ struct rz_dmac *dmac = to_rz_dmac(chan->device);
+ u32 chstat;
+ int ret;
+
+ ret = read_poll_timeout(rz_dmac_ch_readl, chstat, !(chstat & CHSTAT_EN),
+ 100, 100000, false, channel, CHSTAT, 1);
+ if (ret < 0)
+ dev_warn(dmac->dev, "DMA Timeout");
+
+ rz_dmac_set_dmars_register(dmac, channel->index, 0);
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * IRQ handling
+ */
+
+static void rz_dmac_irq_handle_channel(struct rz_dmac_chan *channel)
+{
+ struct dma_chan *chan = &channel->vc.chan;
+ struct rz_dmac *dmac = to_rz_dmac(chan->device);
+ u32 chstat, chctrl;
+
+ chstat = rz_dmac_ch_readl(channel, CHSTAT, 1);
+ if (chstat & CHSTAT_ER) {
+ dev_err(dmac->dev, "DMAC err CHSTAT_%d = %08X\n",
+ channel->index, chstat);
+ rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1);
+ goto done;
+ }
+
+ chctrl = rz_dmac_ch_readl(channel, CHCTRL, 1);
+ rz_dmac_ch_writel(channel, chctrl | CHCTRL_CLREND, CHCTRL, 1);
+done:
+ return;
+}
+
+static irqreturn_t rz_dmac_irq_handler(int irq, void *dev_id)
+{
+ struct rz_dmac_chan *channel = dev_id;
+
+ if (channel) {
+ rz_dmac_irq_handle_channel(channel);
+ return IRQ_WAKE_THREAD;
+ }
+ /* handle DMAERR irq */
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t rz_dmac_irq_handler_thread(int irq, void *dev_id)
+{
+ struct rz_dmac_chan *channel = dev_id;
+ struct rz_dmac_desc *desc = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&channel->vc.lock, flags);
+
+ if (list_empty(&channel->ld_active)) {
+ /* Someone might have called terminate all */
+ goto out;
+ }
+
+ desc = list_first_entry(&channel->ld_active, struct rz_dmac_desc, node);
+ vchan_cookie_complete(&desc->vd);
+ list_move_tail(channel->ld_active.next, &channel->ld_free);
+ if (!list_empty(&channel->ld_queue)) {
+ desc = list_first_entry(&channel->ld_queue, struct rz_dmac_desc,
+ node);
+ channel->desc = desc;
+ if (rz_dmac_xfer_desc(channel) == 0)
+ list_move_tail(channel->ld_queue.next, &channel->ld_active);
+ }
+out:
+ spin_unlock_irqrestore(&channel->vc.lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * OF xlate and channel filter
+ */
+
+static bool rz_dmac_chan_filter(struct dma_chan *chan, void *arg)
+{
+ struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+ struct rz_dmac *dmac = to_rz_dmac(chan->device);
+ struct of_phandle_args *dma_spec = arg;
+ u32 ch_cfg;
+
+ channel->mid_rid = dma_spec->args[0] & MID_RID_MASK;
+ ch_cfg = (dma_spec->args[0] & CHCFG_MASK) >> 10;
+ channel->chcfg = CHCFG_FILL_TM(ch_cfg) | CHCFG_FILL_AM(ch_cfg) |
+ CHCFG_FILL_LVL(ch_cfg) | CHCFG_FILL_HIEN(ch_cfg);
+
+ return !test_and_set_bit(channel->mid_rid, dmac->modules);
+}
+
+static struct dma_chan *rz_dmac_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ dma_cap_mask_t mask;
+
+ if (dma_spec->args_count != 1)
+ return NULL;
+
+ /* Only slave DMA channels can be allocated via DT */
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_SLAVE, mask);
+
+ return dma_request_channel(mask, rz_dmac_chan_filter, dma_spec);
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * Probe and remove
+ */
+
+static int rz_dmac_chan_probe(struct rz_dmac *dmac,
+ struct rz_dmac_chan *channel,
+ unsigned int index)
+{
+ struct platform_device *pdev = to_platform_device(dmac->dev);
+ struct rz_lmdesc *lmdesc;
+ char pdev_irqname[5];
+ char *irqname;
+ int ret;
+
+ channel->index = index;
+ channel->mid_rid = -EINVAL;
+
+ /* Request the channel interrupt. */
+ sprintf(pdev_irqname, "ch%u", index);
+ channel->irq = platform_get_irq_byname(pdev, pdev_irqname);
+ if (channel->irq < 0)
+ return channel->irq;
+
+ irqname = devm_kasprintf(dmac->dev, GFP_KERNEL, "%s:%u",
+ dev_name(dmac->dev), index);
+ if (!irqname)
+ return -ENOMEM;
+
+ ret = devm_request_threaded_irq(dmac->dev, channel->irq,
+ rz_dmac_irq_handler,
+ rz_dmac_irq_handler_thread, 0,
+ irqname, channel);
+ if (ret) {
+ dev_err(dmac->dev, "failed to request IRQ %u (%d)\n",
+ channel->irq, ret);
+ return ret;
+ }
+
+ /* Set io base address for each channel */
+ if (index < 8) {
+ channel->ch_base = dmac->base + CHANNEL_0_7_OFFSET +
+ EACH_CHANNEL_OFFSET * index;
+ channel->ch_cmn_base = dmac->base + CHANNEL_0_7_COMMON_BASE;
+ } else {
+ channel->ch_base = dmac->base + CHANNEL_8_15_OFFSET +
+ EACH_CHANNEL_OFFSET * (index - 8);
+ channel->ch_cmn_base = dmac->base + CHANNEL_8_15_COMMON_BASE;
+ }
+
+ /* Allocate descriptors */
+ lmdesc = dma_alloc_coherent(&pdev->dev,
+ sizeof(struct rz_lmdesc) * DMAC_NR_LMDESC,
+ &channel->lmdesc.base_dma, GFP_KERNEL);
+ if (!lmdesc) {
+ dev_err(&pdev->dev, "Can't allocate memory (lmdesc)\n");
+ return -ENOMEM;
+ }
+ rz_lmdesc_setup(channel, lmdesc);
+
+ /* Initialize register for each channel */
+ rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1);
+
+ channel->vc.desc_free = rz_dmac_virt_desc_free;
+ vchan_init(&channel->vc, &dmac->engine);
+ INIT_LIST_HEAD(&channel->ld_queue);
+ INIT_LIST_HEAD(&channel->ld_free);
+ INIT_LIST_HEAD(&channel->ld_active);
+
+ return 0;
+}
+
+static int rz_dmac_parse_of(struct device *dev, struct rz_dmac *dmac)
+{
+ struct device_node *np = dev->of_node;
+ int ret;
+
+ ret = of_property_read_u32(np, "dma-channels", &dmac->n_channels);
+ if (ret < 0) {
+ dev_err(dev, "unable to read dma-channels property\n");
+ return ret;
+ }
+
+ if (!dmac->n_channels || dmac->n_channels > RZ_DMAC_MAX_CHANNELS) {
+ dev_err(dev, "invalid number of channels %u\n", dmac->n_channels);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int rz_dmac_probe(struct platform_device *pdev)
+{
+ const char *irqname = "error";
+ struct dma_device *engine;
+ struct rz_dmac *dmac;
+ int channel_num;
+ unsigned int i;
+ int ret;
+ int irq;
+
+ dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
+ if (!dmac)
+ return -ENOMEM;
+
+ dmac->dev = &pdev->dev;
+ platform_set_drvdata(pdev, dmac);
+
+ ret = rz_dmac_parse_of(&pdev->dev, dmac);
+ if (ret < 0)
+ return ret;
+
+ dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels,
+ sizeof(*dmac->channels), GFP_KERNEL);
+ if (!dmac->channels)
+ return -ENOMEM;
+
+ /* Request resources */
+ dmac->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(dmac->base))
+ return PTR_ERR(dmac->base);
+
+ dmac->ext_base = devm_platform_ioremap_resource(pdev, 1);
+ if (IS_ERR(dmac->ext_base))
+ return PTR_ERR(dmac->ext_base);
+
+ /* Register interrupt handler for error */
+ irq = platform_get_irq_byname(pdev, irqname);
+ if (irq < 0)
+ return irq;
+
+ ret = devm_request_irq(&pdev->dev, irq, rz_dmac_irq_handler, 0,
+ irqname, NULL);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to request IRQ %u (%d)\n",
+ irq, ret);
+ return ret;
+ }
+
+ /* Initialize the channels. */
+ INIT_LIST_HEAD(&dmac->engine.channels);
+
+ pm_runtime_enable(&pdev->dev);
+ ret = pm_runtime_resume_and_get(&pdev->dev);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "pm_runtime_resume_and_get failed\n");
+ goto err_pm_disable;
+ }
+
+ for (i = 0; i < dmac->n_channels; i++) {
+ ret = rz_dmac_chan_probe(dmac, &dmac->channels[i], i);
+ if (ret < 0)
+ goto err;
+ }
+
+ /* Register the DMAC as a DMA provider for DT. */
+ ret = of_dma_controller_register(pdev->dev.of_node, rz_dmac_of_xlate,
+ NULL);
+ if (ret < 0)
+ goto err;
+
+ /* Register the DMA engine device. */
+ engine = &dmac->engine;
+ dma_cap_set(DMA_SLAVE, engine->cap_mask);
+ dma_cap_set(DMA_MEMCPY, engine->cap_mask);
+ rz_dmac_writel(dmac, DCTRL_DEFAULT, CHANNEL_0_7_COMMON_BASE + DCTRL);
+ rz_dmac_writel(dmac, DCTRL_DEFAULT, CHANNEL_8_15_COMMON_BASE + DCTRL);
+
+ engine->dev = &pdev->dev;
+
+ engine->device_alloc_chan_resources = rz_dmac_alloc_chan_resources;
+ engine->device_free_chan_resources = rz_dmac_free_chan_resources;
+ engine->device_tx_status = dma_cookie_status;
+ engine->device_prep_slave_sg = rz_dmac_prep_slave_sg;
+ engine->device_prep_dma_memcpy = rz_dmac_prep_dma_memcpy;
+ engine->device_config = rz_dmac_config;
+ engine->device_terminate_all = rz_dmac_terminate_all;
+ engine->device_issue_pending = rz_dmac_issue_pending;
+ engine->device_synchronize = rz_dmac_device_synchronize;
+
+ engine->copy_align = DMAENGINE_ALIGN_1_BYTE;
+ dma_set_max_seg_size(engine->dev, U32_MAX);
+
+ ret = dma_async_device_register(engine);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "unable to register\n");
+ goto dma_register_err;
+ }
+ return 0;
+
+dma_register_err:
+ of_dma_controller_free(pdev->dev.of_node);
+err:
+ channel_num = i ? i - 1 : 0;
+ for (i = 0; i < channel_num; i++) {
+ struct rz_dmac_chan *channel = &dmac->channels[i];
+
+ dma_free_coherent(&pdev->dev,
+ sizeof(struct rz_lmdesc) * DMAC_NR_LMDESC,
+ channel->lmdesc.base,
+ channel->lmdesc.base_dma);
+ }
+
+ pm_runtime_put(&pdev->dev);
+err_pm_disable:
+ pm_runtime_disable(&pdev->dev);
+
+ return ret;
+}
+
+static int rz_dmac_remove(struct platform_device *pdev)
+{
+ struct rz_dmac *dmac = platform_get_drvdata(pdev);
+ unsigned int i;
+
+ for (i = 0; i < dmac->n_channels; i++) {
+ struct rz_dmac_chan *channel = &dmac->channels[i];
+
+ dma_free_coherent(&pdev->dev,
+ sizeof(struct rz_lmdesc) * DMAC_NR_LMDESC,
+ channel->lmdesc.base,
+ channel->lmdesc.base_dma);
+ }
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&dmac->engine);
+ pm_runtime_put(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+
+ return 0;
+}
+
+static const struct of_device_id of_rz_dmac_match[] = {
+ { .compatible = "renesas,rz-dmac", },
+ { /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, of_rz_dmac_match);
+
+static struct platform_driver rz_dmac_driver = {
+ .driver = {
+ .name = "rz-dmac",
+ .of_match_table = of_rz_dmac_match,
+ },
+ .probe = rz_dmac_probe,
+ .remove = rz_dmac_remove,
+};
+
+module_platform_driver(rz_dmac_driver);
+
+MODULE_DESCRIPTION("Renesas RZ/G2L DMA Controller Driver");
+MODULE_AUTHOR("Biju Das <biju.das.jz@bp.renesas.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/sh/shdma-arm.h b/drivers/dma/sh/shdma-arm.h
new file mode 100644
index 000000000..7459f9a13
--- /dev/null
+++ b/drivers/dma/sh/shdma-arm.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * Copyright (C) 2013 Renesas Electronics, Inc.
+ */
+
+#ifndef SHDMA_ARM_H
+#define SHDMA_ARM_H
+
+#include "shdma.h"
+
+/* Transmit sizes and respective CHCR register values */
+enum {
+ XMIT_SZ_8BIT = 0,
+ XMIT_SZ_16BIT = 1,
+ XMIT_SZ_32BIT = 2,
+ XMIT_SZ_64BIT = 7,
+ XMIT_SZ_128BIT = 3,
+ XMIT_SZ_256BIT = 4,
+ XMIT_SZ_512BIT = 5,
+};
+
+/* log2(size / 8) - used to calculate number of transfers */
+#define SH_DMAE_TS_SHIFT { \
+ [XMIT_SZ_8BIT] = 0, \
+ [XMIT_SZ_16BIT] = 1, \
+ [XMIT_SZ_32BIT] = 2, \
+ [XMIT_SZ_64BIT] = 3, \
+ [XMIT_SZ_128BIT] = 4, \
+ [XMIT_SZ_256BIT] = 5, \
+ [XMIT_SZ_512BIT] = 6, \
+}
+
+#define TS_LOW_BIT 0x3 /* --xx */
+#define TS_HI_BIT 0xc /* xx-- */
+
+#define TS_LOW_SHIFT (3)
+#define TS_HI_SHIFT (20 - 2) /* 2 bits for shifted low TS */
+
+#define TS_INDEX2VAL(i) \
+ ((((i) & TS_LOW_BIT) << TS_LOW_SHIFT) |\
+ (((i) & TS_HI_BIT) << TS_HI_SHIFT))
+
+#define CHCR_TX(xmit_sz) (DM_FIX | SM_INC | RS_ERS | TS_INDEX2VAL((xmit_sz)))
+#define CHCR_RX(xmit_sz) (DM_INC | SM_FIX | RS_ERS | TS_INDEX2VAL((xmit_sz)))
+
+#endif
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
new file mode 100644
index 000000000..158e5e7de
--- /dev/null
+++ b/drivers/dma/sh/shdma-base.c
@@ -0,0 +1,1052 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Dmaengine driver base library for DMA controllers, found on SH-based SoCs
+ *
+ * extracted from shdma.c
+ *
+ * Copyright (C) 2011-2012 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ */
+
+#include <linux/delay.h>
+#include <linux/shdma-base.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../dmaengine.h"
+
+/* DMA descriptor control */
+enum shdma_desc_status {
+ DESC_IDLE,
+ DESC_PREPARED,
+ DESC_SUBMITTED,
+ DESC_COMPLETED, /* completed, have to call callback */
+ DESC_WAITING, /* callback called, waiting for ack / re-submit */
+};
+
+#define NR_DESCS_PER_CHANNEL 32
+
+#define to_shdma_chan(c) container_of(c, struct shdma_chan, dma_chan)
+#define to_shdma_dev(d) container_of(d, struct shdma_dev, dma_dev)
+
+/*
+ * For slave DMA we assume, that there is a finite number of DMA slaves in the
+ * system, and that each such slave can only use a finite number of channels.
+ * We use slave channel IDs to make sure, that no such slave channel ID is
+ * allocated more than once.
+ */
+static unsigned int slave_num = 256;
+module_param(slave_num, uint, 0444);
+
+/* A bitmask with slave_num bits */
+static unsigned long *shdma_slave_used;
+
+/* Called under spin_lock_irq(&schan->chan_lock") */
+static void shdma_chan_xfer_ld_queue(struct shdma_chan *schan)
+{
+ struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+ const struct shdma_ops *ops = sdev->ops;
+ struct shdma_desc *sdesc;
+
+ /* DMA work check */
+ if (ops->channel_busy(schan))
+ return;
+
+ /* Find the first not transferred descriptor */
+ list_for_each_entry(sdesc, &schan->ld_queue, node)
+ if (sdesc->mark == DESC_SUBMITTED) {
+ ops->start_xfer(schan, sdesc);
+ break;
+ }
+}
+
+static dma_cookie_t shdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct shdma_desc *chunk, *c, *desc =
+ container_of(tx, struct shdma_desc, async_tx);
+ struct shdma_chan *schan = to_shdma_chan(tx->chan);
+ dma_async_tx_callback callback = tx->callback;
+ dma_cookie_t cookie;
+ bool power_up;
+
+ spin_lock_irq(&schan->chan_lock);
+
+ power_up = list_empty(&schan->ld_queue);
+
+ cookie = dma_cookie_assign(tx);
+
+ /* Mark all chunks of this descriptor as submitted, move to the queue */
+ list_for_each_entry_safe(chunk, c, desc->node.prev, node) {
+ /*
+ * All chunks are on the global ld_free, so, we have to find
+ * the end of the chain ourselves
+ */
+ if (chunk != desc && (chunk->mark == DESC_IDLE ||
+ chunk->async_tx.cookie > 0 ||
+ chunk->async_tx.cookie == -EBUSY ||
+ &chunk->node == &schan->ld_free))
+ break;
+ chunk->mark = DESC_SUBMITTED;
+ if (chunk->chunks == 1) {
+ chunk->async_tx.callback = callback;
+ chunk->async_tx.callback_param = tx->callback_param;
+ } else {
+ /* Callback goes to the last chunk */
+ chunk->async_tx.callback = NULL;
+ }
+ chunk->cookie = cookie;
+ list_move_tail(&chunk->node, &schan->ld_queue);
+
+ dev_dbg(schan->dev, "submit #%d@%p on %d\n",
+ tx->cookie, &chunk->async_tx, schan->id);
+ }
+
+ if (power_up) {
+ int ret;
+ schan->pm_state = SHDMA_PM_BUSY;
+
+ ret = pm_runtime_get(schan->dev);
+
+ spin_unlock_irq(&schan->chan_lock);
+ if (ret < 0)
+ dev_err(schan->dev, "%s(): GET = %d\n", __func__, ret);
+
+ pm_runtime_barrier(schan->dev);
+
+ spin_lock_irq(&schan->chan_lock);
+
+ /* Have we been reset, while waiting? */
+ if (schan->pm_state != SHDMA_PM_ESTABLISHED) {
+ struct shdma_dev *sdev =
+ to_shdma_dev(schan->dma_chan.device);
+ const struct shdma_ops *ops = sdev->ops;
+ dev_dbg(schan->dev, "Bring up channel %d\n",
+ schan->id);
+ /*
+ * TODO: .xfer_setup() might fail on some platforms.
+ * Make it int then, on error remove chunks from the
+ * queue again
+ */
+ ops->setup_xfer(schan, schan->slave_id);
+
+ if (schan->pm_state == SHDMA_PM_PENDING)
+ shdma_chan_xfer_ld_queue(schan);
+ schan->pm_state = SHDMA_PM_ESTABLISHED;
+ }
+ } else {
+ /*
+ * Tell .device_issue_pending() not to run the queue, interrupts
+ * will do it anyway
+ */
+ schan->pm_state = SHDMA_PM_PENDING;
+ }
+
+ spin_unlock_irq(&schan->chan_lock);
+
+ return cookie;
+}
+
+/* Called with desc_lock held */
+static struct shdma_desc *shdma_get_desc(struct shdma_chan *schan)
+{
+ struct shdma_desc *sdesc;
+
+ list_for_each_entry(sdesc, &schan->ld_free, node)
+ if (sdesc->mark != DESC_PREPARED) {
+ BUG_ON(sdesc->mark != DESC_IDLE);
+ list_del(&sdesc->node);
+ return sdesc;
+ }
+
+ return NULL;
+}
+
+static int shdma_setup_slave(struct shdma_chan *schan, dma_addr_t slave_addr)
+{
+ struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+ const struct shdma_ops *ops = sdev->ops;
+ int ret, match;
+
+ if (schan->dev->of_node) {
+ match = schan->hw_req;
+ ret = ops->set_slave(schan, match, slave_addr, true);
+ if (ret < 0)
+ return ret;
+ } else {
+ match = schan->real_slave_id;
+ }
+
+ if (schan->real_slave_id < 0 || schan->real_slave_id >= slave_num)
+ return -EINVAL;
+
+ if (test_and_set_bit(schan->real_slave_id, shdma_slave_used))
+ return -EBUSY;
+
+ ret = ops->set_slave(schan, match, slave_addr, false);
+ if (ret < 0) {
+ clear_bit(schan->real_slave_id, shdma_slave_used);
+ return ret;
+ }
+
+ schan->slave_id = schan->real_slave_id;
+
+ return 0;
+}
+
+static int shdma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct shdma_chan *schan = to_shdma_chan(chan);
+ struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+ const struct shdma_ops *ops = sdev->ops;
+ struct shdma_desc *desc;
+ struct shdma_slave *slave = chan->private;
+ int ret, i;
+
+ /*
+ * This relies on the guarantee from dmaengine that alloc_chan_resources
+ * never runs concurrently with itself or free_chan_resources.
+ */
+ if (slave) {
+ /* Legacy mode: .private is set in filter */
+ schan->real_slave_id = slave->slave_id;
+ ret = shdma_setup_slave(schan, 0);
+ if (ret < 0)
+ goto esetslave;
+ } else {
+ /* Normal mode: real_slave_id was set by filter */
+ schan->slave_id = -EINVAL;
+ }
+
+ schan->desc = kcalloc(NR_DESCS_PER_CHANNEL,
+ sdev->desc_size, GFP_KERNEL);
+ if (!schan->desc) {
+ ret = -ENOMEM;
+ goto edescalloc;
+ }
+ schan->desc_num = NR_DESCS_PER_CHANNEL;
+
+ for (i = 0; i < NR_DESCS_PER_CHANNEL; i++) {
+ desc = ops->embedded_desc(schan->desc, i);
+ dma_async_tx_descriptor_init(&desc->async_tx,
+ &schan->dma_chan);
+ desc->async_tx.tx_submit = shdma_tx_submit;
+ desc->mark = DESC_IDLE;
+
+ list_add(&desc->node, &schan->ld_free);
+ }
+
+ return NR_DESCS_PER_CHANNEL;
+
+edescalloc:
+ if (slave)
+esetslave:
+ clear_bit(slave->slave_id, shdma_slave_used);
+ chan->private = NULL;
+ return ret;
+}
+
+/*
+ * This is the standard shdma filter function to be used as a replacement to the
+ * "old" method, using the .private pointer.
+ * You always have to pass a valid slave id as the argument, old drivers that
+ * pass ERR_PTR(-EINVAL) as a filter parameter and set it up in dma_slave_config
+ * need to be updated so we can remove the slave_id field from dma_slave_config.
+ * parameter. If this filter is used, the slave driver, after calling
+ * dma_request_channel(), will also have to call dmaengine_slave_config() with
+ * .direction, and either .src_addr or .dst_addr set.
+ *
+ * NOTE: this filter doesn't support multiple DMAC drivers with the DMA_SLAVE
+ * capability! If this becomes a requirement, hardware glue drivers, using this
+ * services would have to provide their own filters, which first would check
+ * the device driver, similar to how other DMAC drivers, e.g., sa11x0-dma.c, do
+ * this, and only then, in case of a match, call this common filter.
+ * NOTE 2: This filter function is also used in the DT case by shdma_of_xlate().
+ * In that case the MID-RID value is used for slave channel filtering and is
+ * passed to this function in the "arg" parameter.
+ */
+bool shdma_chan_filter(struct dma_chan *chan, void *arg)
+{
+ struct shdma_chan *schan;
+ struct shdma_dev *sdev;
+ int slave_id = (long)arg;
+ int ret;
+
+ /* Only support channels handled by this driver. */
+ if (chan->device->device_alloc_chan_resources !=
+ shdma_alloc_chan_resources)
+ return false;
+
+ schan = to_shdma_chan(chan);
+ sdev = to_shdma_dev(chan->device);
+
+ /*
+ * For DT, the schan->slave_id field is generated by the
+ * set_slave function from the slave ID that is passed in
+ * from xlate. For the non-DT case, the slave ID is
+ * directly passed into the filter function by the driver
+ */
+ if (schan->dev->of_node) {
+ ret = sdev->ops->set_slave(schan, slave_id, 0, true);
+ if (ret < 0)
+ return false;
+
+ schan->real_slave_id = schan->slave_id;
+ return true;
+ }
+
+ if (slave_id < 0) {
+ /* No slave requested - arbitrary channel */
+ dev_warn(sdev->dma_dev.dev, "invalid slave ID passed to dma_request_slave\n");
+ return true;
+ }
+
+ if (slave_id >= slave_num)
+ return false;
+
+ ret = sdev->ops->set_slave(schan, slave_id, 0, true);
+ if (ret < 0)
+ return false;
+
+ schan->real_slave_id = slave_id;
+
+ return true;
+}
+EXPORT_SYMBOL(shdma_chan_filter);
+
+static dma_async_tx_callback __ld_cleanup(struct shdma_chan *schan, bool all)
+{
+ struct shdma_desc *desc, *_desc;
+ /* Is the "exposed" head of a chain acked? */
+ bool head_acked = false;
+ dma_cookie_t cookie = 0;
+ dma_async_tx_callback callback = NULL;
+ struct dmaengine_desc_callback cb;
+ unsigned long flags;
+ LIST_HEAD(cyclic_list);
+
+ memset(&cb, 0, sizeof(cb));
+ spin_lock_irqsave(&schan->chan_lock, flags);
+ list_for_each_entry_safe(desc, _desc, &schan->ld_queue, node) {
+ struct dma_async_tx_descriptor *tx = &desc->async_tx;
+
+ BUG_ON(tx->cookie > 0 && tx->cookie != desc->cookie);
+ BUG_ON(desc->mark != DESC_SUBMITTED &&
+ desc->mark != DESC_COMPLETED &&
+ desc->mark != DESC_WAITING);
+
+ /*
+ * queue is ordered, and we use this loop to (1) clean up all
+ * completed descriptors, and to (2) update descriptor flags of
+ * any chunks in a (partially) completed chain
+ */
+ if (!all && desc->mark == DESC_SUBMITTED &&
+ desc->cookie != cookie)
+ break;
+
+ if (tx->cookie > 0)
+ cookie = tx->cookie;
+
+ if (desc->mark == DESC_COMPLETED && desc->chunks == 1) {
+ if (schan->dma_chan.completed_cookie != desc->cookie - 1)
+ dev_dbg(schan->dev,
+ "Completing cookie %d, expected %d\n",
+ desc->cookie,
+ schan->dma_chan.completed_cookie + 1);
+ schan->dma_chan.completed_cookie = desc->cookie;
+ }
+
+ /* Call callback on the last chunk */
+ if (desc->mark == DESC_COMPLETED && tx->callback) {
+ desc->mark = DESC_WAITING;
+ dmaengine_desc_get_callback(tx, &cb);
+ callback = tx->callback;
+ dev_dbg(schan->dev, "descriptor #%d@%p on %d callback\n",
+ tx->cookie, tx, schan->id);
+ BUG_ON(desc->chunks != 1);
+ break;
+ }
+
+ if (tx->cookie > 0 || tx->cookie == -EBUSY) {
+ if (desc->mark == DESC_COMPLETED) {
+ BUG_ON(tx->cookie < 0);
+ desc->mark = DESC_WAITING;
+ }
+ head_acked = async_tx_test_ack(tx);
+ } else {
+ switch (desc->mark) {
+ case DESC_COMPLETED:
+ desc->mark = DESC_WAITING;
+ fallthrough;
+ case DESC_WAITING:
+ if (head_acked)
+ async_tx_ack(&desc->async_tx);
+ }
+ }
+
+ dev_dbg(schan->dev, "descriptor %p #%d completed.\n",
+ tx, tx->cookie);
+
+ if (((desc->mark == DESC_COMPLETED ||
+ desc->mark == DESC_WAITING) &&
+ async_tx_test_ack(&desc->async_tx)) || all) {
+
+ if (all || !desc->cyclic) {
+ /* Remove from ld_queue list */
+ desc->mark = DESC_IDLE;
+ list_move(&desc->node, &schan->ld_free);
+ } else {
+ /* reuse as cyclic */
+ desc->mark = DESC_SUBMITTED;
+ list_move_tail(&desc->node, &cyclic_list);
+ }
+
+ if (list_empty(&schan->ld_queue)) {
+ dev_dbg(schan->dev, "Bring down channel %d\n", schan->id);
+ pm_runtime_put(schan->dev);
+ schan->pm_state = SHDMA_PM_ESTABLISHED;
+ } else if (schan->pm_state == SHDMA_PM_PENDING) {
+ shdma_chan_xfer_ld_queue(schan);
+ }
+ }
+ }
+
+ if (all && !callback)
+ /*
+ * Terminating and the loop completed normally: forgive
+ * uncompleted cookies
+ */
+ schan->dma_chan.completed_cookie = schan->dma_chan.cookie;
+
+ list_splice_tail(&cyclic_list, &schan->ld_queue);
+
+ spin_unlock_irqrestore(&schan->chan_lock, flags);
+
+ dmaengine_desc_callback_invoke(&cb, NULL);
+
+ return callback;
+}
+
+/*
+ * shdma_chan_ld_cleanup - Clean up link descriptors
+ *
+ * Clean up the ld_queue of DMA channel.
+ */
+static void shdma_chan_ld_cleanup(struct shdma_chan *schan, bool all)
+{
+ while (__ld_cleanup(schan, all))
+ ;
+}
+
+/*
+ * shdma_free_chan_resources - Free all resources of the channel.
+ */
+static void shdma_free_chan_resources(struct dma_chan *chan)
+{
+ struct shdma_chan *schan = to_shdma_chan(chan);
+ struct shdma_dev *sdev = to_shdma_dev(chan->device);
+ const struct shdma_ops *ops = sdev->ops;
+ LIST_HEAD(list);
+
+ /* Protect against ISR */
+ spin_lock_irq(&schan->chan_lock);
+ ops->halt_channel(schan);
+ spin_unlock_irq(&schan->chan_lock);
+
+ /* Now no new interrupts will occur */
+
+ /* Prepared and not submitted descriptors can still be on the queue */
+ if (!list_empty(&schan->ld_queue))
+ shdma_chan_ld_cleanup(schan, true);
+
+ if (schan->slave_id >= 0) {
+ /* The caller is holding dma_list_mutex */
+ clear_bit(schan->slave_id, shdma_slave_used);
+ chan->private = NULL;
+ }
+
+ schan->real_slave_id = 0;
+
+ spin_lock_irq(&schan->chan_lock);
+
+ list_splice_init(&schan->ld_free, &list);
+ schan->desc_num = 0;
+
+ spin_unlock_irq(&schan->chan_lock);
+
+ kfree(schan->desc);
+}
+
+/**
+ * shdma_add_desc - get, set up and return one transfer descriptor
+ * @schan: DMA channel
+ * @flags: DMA transfer flags
+ * @dst: destination DMA address, incremented when direction equals
+ * DMA_DEV_TO_MEM or DMA_MEM_TO_MEM
+ * @src: source DMA address, incremented when direction equals
+ * DMA_MEM_TO_DEV or DMA_MEM_TO_MEM
+ * @len: DMA transfer length
+ * @first: if NULL, set to the current descriptor and cookie set to -EBUSY
+ * @direction: needed for slave DMA to decide which address to keep constant,
+ * equals DMA_MEM_TO_MEM for MEMCPY
+ * Returns 0 or an error
+ * Locks: called with desc_lock held
+ */
+static struct shdma_desc *shdma_add_desc(struct shdma_chan *schan,
+ unsigned long flags, dma_addr_t *dst, dma_addr_t *src, size_t *len,
+ struct shdma_desc **first, enum dma_transfer_direction direction)
+{
+ struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+ const struct shdma_ops *ops = sdev->ops;
+ struct shdma_desc *new;
+ size_t copy_size = *len;
+
+ if (!copy_size)
+ return NULL;
+
+ /* Allocate the link descriptor from the free list */
+ new = shdma_get_desc(schan);
+ if (!new) {
+ dev_err(schan->dev, "No free link descriptor available\n");
+ return NULL;
+ }
+
+ ops->desc_setup(schan, new, *src, *dst, &copy_size);
+
+ if (!*first) {
+ /* First desc */
+ new->async_tx.cookie = -EBUSY;
+ *first = new;
+ } else {
+ /* Other desc - invisible to the user */
+ new->async_tx.cookie = -EINVAL;
+ }
+
+ dev_dbg(schan->dev,
+ "chaining (%zu/%zu)@%pad -> %pad with %p, cookie %d\n",
+ copy_size, *len, src, dst, &new->async_tx,
+ new->async_tx.cookie);
+
+ new->mark = DESC_PREPARED;
+ new->async_tx.flags = flags;
+ new->direction = direction;
+ new->partial = 0;
+
+ *len -= copy_size;
+ if (direction == DMA_MEM_TO_MEM || direction == DMA_MEM_TO_DEV)
+ *src += copy_size;
+ if (direction == DMA_MEM_TO_MEM || direction == DMA_DEV_TO_MEM)
+ *dst += copy_size;
+
+ return new;
+}
+
+/*
+ * shdma_prep_sg - prepare transfer descriptors from an SG list
+ *
+ * Common routine for public (MEMCPY) and slave DMA. The MEMCPY case is also
+ * converted to scatter-gather to guarantee consistent locking and a correct
+ * list manipulation. For slave DMA direction carries the usual meaning, and,
+ * logically, the SG list is RAM and the addr variable contains slave address,
+ * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM
+ * and the SG list contains only one element and points at the source buffer.
+ */
+static struct dma_async_tx_descriptor *shdma_prep_sg(struct shdma_chan *schan,
+ struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr,
+ enum dma_transfer_direction direction, unsigned long flags, bool cyclic)
+{
+ struct scatterlist *sg;
+ struct shdma_desc *first = NULL, *new = NULL /* compiler... */;
+ LIST_HEAD(tx_list);
+ int chunks = 0;
+ unsigned long irq_flags;
+ int i;
+
+ for_each_sg(sgl, sg, sg_len, i)
+ chunks += DIV_ROUND_UP(sg_dma_len(sg), schan->max_xfer_len);
+
+ /* Have to lock the whole loop to protect against concurrent release */
+ spin_lock_irqsave(&schan->chan_lock, irq_flags);
+
+ /*
+ * Chaining:
+ * first descriptor is what user is dealing with in all API calls, its
+ * cookie is at first set to -EBUSY, at tx-submit to a positive
+ * number
+ * if more than one chunk is needed further chunks have cookie = -EINVAL
+ * the last chunk, if not equal to the first, has cookie = -ENOSPC
+ * all chunks are linked onto the tx_list head with their .node heads
+ * only during this function, then they are immediately spliced
+ * back onto the free list in form of a chain
+ */
+ for_each_sg(sgl, sg, sg_len, i) {
+ dma_addr_t sg_addr = sg_dma_address(sg);
+ size_t len = sg_dma_len(sg);
+
+ if (!len)
+ goto err_get_desc;
+
+ do {
+ dev_dbg(schan->dev, "Add SG #%d@%p[%zu], dma %pad\n",
+ i, sg, len, &sg_addr);
+
+ if (direction == DMA_DEV_TO_MEM)
+ new = shdma_add_desc(schan, flags,
+ &sg_addr, addr, &len, &first,
+ direction);
+ else
+ new = shdma_add_desc(schan, flags,
+ addr, &sg_addr, &len, &first,
+ direction);
+ if (!new)
+ goto err_get_desc;
+
+ new->cyclic = cyclic;
+ if (cyclic)
+ new->chunks = 1;
+ else
+ new->chunks = chunks--;
+ list_add_tail(&new->node, &tx_list);
+ } while (len);
+ }
+
+ if (new != first)
+ new->async_tx.cookie = -ENOSPC;
+
+ /* Put them back on the free list, so, they don't get lost */
+ list_splice_tail(&tx_list, &schan->ld_free);
+
+ spin_unlock_irqrestore(&schan->chan_lock, irq_flags);
+
+ return &first->async_tx;
+
+err_get_desc:
+ list_for_each_entry(new, &tx_list, node)
+ new->mark = DESC_IDLE;
+ list_splice(&tx_list, &schan->ld_free);
+
+ spin_unlock_irqrestore(&schan->chan_lock, irq_flags);
+
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *shdma_prep_memcpy(
+ struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
+ size_t len, unsigned long flags)
+{
+ struct shdma_chan *schan = to_shdma_chan(chan);
+ struct scatterlist sg;
+
+ if (!chan || !len)
+ return NULL;
+
+ BUG_ON(!schan->desc_num);
+
+ sg_init_table(&sg, 1);
+ sg_set_page(&sg, pfn_to_page(PFN_DOWN(dma_src)), len,
+ offset_in_page(dma_src));
+ sg_dma_address(&sg) = dma_src;
+ sg_dma_len(&sg) = len;
+
+ return shdma_prep_sg(schan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM,
+ flags, false);
+}
+
+static struct dma_async_tx_descriptor *shdma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
+ enum dma_transfer_direction direction, unsigned long flags, void *context)
+{
+ struct shdma_chan *schan = to_shdma_chan(chan);
+ struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+ const struct shdma_ops *ops = sdev->ops;
+ int slave_id = schan->slave_id;
+ dma_addr_t slave_addr;
+
+ if (!chan)
+ return NULL;
+
+ BUG_ON(!schan->desc_num);
+
+ /* Someone calling slave DMA on a generic channel? */
+ if (slave_id < 0 || !sg_len) {
+ dev_warn(schan->dev, "%s: bad parameter: len=%d, id=%d\n",
+ __func__, sg_len, slave_id);
+ return NULL;
+ }
+
+ slave_addr = ops->slave_addr(schan);
+
+ return shdma_prep_sg(schan, sgl, sg_len, &slave_addr,
+ direction, flags, false);
+}
+
+#define SHDMA_MAX_SG_LEN 32
+
+static struct dma_async_tx_descriptor *shdma_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct shdma_chan *schan = to_shdma_chan(chan);
+ struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+ struct dma_async_tx_descriptor *desc;
+ const struct shdma_ops *ops = sdev->ops;
+ unsigned int sg_len = buf_len / period_len;
+ int slave_id = schan->slave_id;
+ dma_addr_t slave_addr;
+ struct scatterlist *sgl;
+ int i;
+
+ if (!chan)
+ return NULL;
+
+ BUG_ON(!schan->desc_num);
+
+ if (sg_len > SHDMA_MAX_SG_LEN) {
+ dev_err(schan->dev, "sg length %d exceeds limit %d",
+ sg_len, SHDMA_MAX_SG_LEN);
+ return NULL;
+ }
+
+ /* Someone calling slave DMA on a generic channel? */
+ if (slave_id < 0 || (buf_len < period_len)) {
+ dev_warn(schan->dev,
+ "%s: bad parameter: buf_len=%zu, period_len=%zu, id=%d\n",
+ __func__, buf_len, period_len, slave_id);
+ return NULL;
+ }
+
+ slave_addr = ops->slave_addr(schan);
+
+ /*
+ * Allocate the sg list dynamically as it would consumer too much stack
+ * space.
+ */
+ sgl = kmalloc_array(sg_len, sizeof(*sgl), GFP_KERNEL);
+ if (!sgl)
+ return NULL;
+
+ sg_init_table(sgl, sg_len);
+
+ for (i = 0; i < sg_len; i++) {
+ dma_addr_t src = buf_addr + (period_len * i);
+
+ sg_set_page(&sgl[i], pfn_to_page(PFN_DOWN(src)), period_len,
+ offset_in_page(src));
+ sg_dma_address(&sgl[i]) = src;
+ sg_dma_len(&sgl[i]) = period_len;
+ }
+
+ desc = shdma_prep_sg(schan, sgl, sg_len, &slave_addr,
+ direction, flags, true);
+
+ kfree(sgl);
+ return desc;
+}
+
+static int shdma_terminate_all(struct dma_chan *chan)
+{
+ struct shdma_chan *schan = to_shdma_chan(chan);
+ struct shdma_dev *sdev = to_shdma_dev(chan->device);
+ const struct shdma_ops *ops = sdev->ops;
+ unsigned long flags;
+
+ spin_lock_irqsave(&schan->chan_lock, flags);
+ ops->halt_channel(schan);
+
+ if (ops->get_partial && !list_empty(&schan->ld_queue)) {
+ /* Record partial transfer */
+ struct shdma_desc *desc = list_first_entry(&schan->ld_queue,
+ struct shdma_desc, node);
+ desc->partial = ops->get_partial(schan, desc);
+ }
+
+ spin_unlock_irqrestore(&schan->chan_lock, flags);
+
+ shdma_chan_ld_cleanup(schan, true);
+
+ return 0;
+}
+
+static int shdma_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct shdma_chan *schan = to_shdma_chan(chan);
+
+ /*
+ * So far only .slave_id is used, but the slave drivers are
+ * encouraged to also set a transfer direction and an address.
+ */
+ if (!config)
+ return -EINVAL;
+
+ /*
+ * We could lock this, but you shouldn't be configuring the
+ * channel, while using it...
+ */
+ return shdma_setup_slave(schan,
+ config->direction == DMA_DEV_TO_MEM ?
+ config->src_addr : config->dst_addr);
+}
+
+static void shdma_issue_pending(struct dma_chan *chan)
+{
+ struct shdma_chan *schan = to_shdma_chan(chan);
+
+ spin_lock_irq(&schan->chan_lock);
+ if (schan->pm_state == SHDMA_PM_ESTABLISHED)
+ shdma_chan_xfer_ld_queue(schan);
+ else
+ schan->pm_state = SHDMA_PM_PENDING;
+ spin_unlock_irq(&schan->chan_lock);
+}
+
+static enum dma_status shdma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct shdma_chan *schan = to_shdma_chan(chan);
+ enum dma_status status;
+ unsigned long flags;
+
+ shdma_chan_ld_cleanup(schan, false);
+
+ spin_lock_irqsave(&schan->chan_lock, flags);
+
+ status = dma_cookie_status(chan, cookie, txstate);
+
+ /*
+ * If we don't find cookie on the queue, it has been aborted and we have
+ * to report error
+ */
+ if (status != DMA_COMPLETE) {
+ struct shdma_desc *sdesc;
+ status = DMA_ERROR;
+ list_for_each_entry(sdesc, &schan->ld_queue, node)
+ if (sdesc->cookie == cookie) {
+ status = DMA_IN_PROGRESS;
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&schan->chan_lock, flags);
+
+ return status;
+}
+
+/* Called from error IRQ or NMI */
+bool shdma_reset(struct shdma_dev *sdev)
+{
+ const struct shdma_ops *ops = sdev->ops;
+ struct shdma_chan *schan;
+ unsigned int handled = 0;
+ int i;
+
+ /* Reset all channels */
+ shdma_for_each_chan(schan, sdev, i) {
+ struct shdma_desc *sdesc;
+ LIST_HEAD(dl);
+
+ if (!schan)
+ continue;
+
+ spin_lock(&schan->chan_lock);
+
+ /* Stop the channel */
+ ops->halt_channel(schan);
+
+ list_splice_init(&schan->ld_queue, &dl);
+
+ if (!list_empty(&dl)) {
+ dev_dbg(schan->dev, "Bring down channel %d\n", schan->id);
+ pm_runtime_put(schan->dev);
+ }
+ schan->pm_state = SHDMA_PM_ESTABLISHED;
+
+ spin_unlock(&schan->chan_lock);
+
+ /* Complete all */
+ list_for_each_entry(sdesc, &dl, node) {
+ struct dma_async_tx_descriptor *tx = &sdesc->async_tx;
+
+ sdesc->mark = DESC_IDLE;
+ dmaengine_desc_get_callback_invoke(tx, NULL);
+ }
+
+ spin_lock(&schan->chan_lock);
+ list_splice(&dl, &schan->ld_free);
+ spin_unlock(&schan->chan_lock);
+
+ handled++;
+ }
+
+ return !!handled;
+}
+EXPORT_SYMBOL(shdma_reset);
+
+static irqreturn_t chan_irq(int irq, void *dev)
+{
+ struct shdma_chan *schan = dev;
+ const struct shdma_ops *ops =
+ to_shdma_dev(schan->dma_chan.device)->ops;
+ irqreturn_t ret;
+
+ spin_lock(&schan->chan_lock);
+
+ ret = ops->chan_irq(schan, irq) ? IRQ_WAKE_THREAD : IRQ_NONE;
+
+ spin_unlock(&schan->chan_lock);
+
+ return ret;
+}
+
+static irqreturn_t chan_irqt(int irq, void *dev)
+{
+ struct shdma_chan *schan = dev;
+ const struct shdma_ops *ops =
+ to_shdma_dev(schan->dma_chan.device)->ops;
+ struct shdma_desc *sdesc;
+
+ spin_lock_irq(&schan->chan_lock);
+ list_for_each_entry(sdesc, &schan->ld_queue, node) {
+ if (sdesc->mark == DESC_SUBMITTED &&
+ ops->desc_completed(schan, sdesc)) {
+ dev_dbg(schan->dev, "done #%d@%p\n",
+ sdesc->async_tx.cookie, &sdesc->async_tx);
+ sdesc->mark = DESC_COMPLETED;
+ break;
+ }
+ }
+ /* Next desc */
+ shdma_chan_xfer_ld_queue(schan);
+ spin_unlock_irq(&schan->chan_lock);
+
+ shdma_chan_ld_cleanup(schan, false);
+
+ return IRQ_HANDLED;
+}
+
+int shdma_request_irq(struct shdma_chan *schan, int irq,
+ unsigned long flags, const char *name)
+{
+ int ret = devm_request_threaded_irq(schan->dev, irq, chan_irq,
+ chan_irqt, flags, name, schan);
+
+ schan->irq = ret < 0 ? ret : irq;
+
+ return ret;
+}
+EXPORT_SYMBOL(shdma_request_irq);
+
+void shdma_chan_probe(struct shdma_dev *sdev,
+ struct shdma_chan *schan, int id)
+{
+ schan->pm_state = SHDMA_PM_ESTABLISHED;
+
+ /* reference struct dma_device */
+ schan->dma_chan.device = &sdev->dma_dev;
+ dma_cookie_init(&schan->dma_chan);
+
+ schan->dev = sdev->dma_dev.dev;
+ schan->id = id;
+
+ if (!schan->max_xfer_len)
+ schan->max_xfer_len = PAGE_SIZE;
+
+ spin_lock_init(&schan->chan_lock);
+
+ /* Init descripter manage list */
+ INIT_LIST_HEAD(&schan->ld_queue);
+ INIT_LIST_HEAD(&schan->ld_free);
+
+ /* Add the channel to DMA device channel list */
+ list_add_tail(&schan->dma_chan.device_node,
+ &sdev->dma_dev.channels);
+ sdev->schan[id] = schan;
+}
+EXPORT_SYMBOL(shdma_chan_probe);
+
+void shdma_chan_remove(struct shdma_chan *schan)
+{
+ list_del(&schan->dma_chan.device_node);
+}
+EXPORT_SYMBOL(shdma_chan_remove);
+
+int shdma_init(struct device *dev, struct shdma_dev *sdev,
+ int chan_num)
+{
+ struct dma_device *dma_dev = &sdev->dma_dev;
+
+ /*
+ * Require all call-backs for now, they can trivially be made optional
+ * later as required
+ */
+ if (!sdev->ops ||
+ !sdev->desc_size ||
+ !sdev->ops->embedded_desc ||
+ !sdev->ops->start_xfer ||
+ !sdev->ops->setup_xfer ||
+ !sdev->ops->set_slave ||
+ !sdev->ops->desc_setup ||
+ !sdev->ops->slave_addr ||
+ !sdev->ops->channel_busy ||
+ !sdev->ops->halt_channel ||
+ !sdev->ops->desc_completed)
+ return -EINVAL;
+
+ sdev->schan = kcalloc(chan_num, sizeof(*sdev->schan), GFP_KERNEL);
+ if (!sdev->schan)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ /* Common and MEMCPY operations */
+ dma_dev->device_alloc_chan_resources
+ = shdma_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = shdma_free_chan_resources;
+ dma_dev->device_prep_dma_memcpy = shdma_prep_memcpy;
+ dma_dev->device_tx_status = shdma_tx_status;
+ dma_dev->device_issue_pending = shdma_issue_pending;
+
+ /* Compulsory for DMA_SLAVE fields */
+ dma_dev->device_prep_slave_sg = shdma_prep_slave_sg;
+ dma_dev->device_prep_dma_cyclic = shdma_prep_dma_cyclic;
+ dma_dev->device_config = shdma_config;
+ dma_dev->device_terminate_all = shdma_terminate_all;
+
+ dma_dev->dev = dev;
+
+ return 0;
+}
+EXPORT_SYMBOL(shdma_init);
+
+void shdma_cleanup(struct shdma_dev *sdev)
+{
+ kfree(sdev->schan);
+}
+EXPORT_SYMBOL(shdma_cleanup);
+
+static int __init shdma_enter(void)
+{
+ shdma_slave_used = bitmap_zalloc(slave_num, GFP_KERNEL);
+ if (!shdma_slave_used)
+ return -ENOMEM;
+ return 0;
+}
+module_init(shdma_enter);
+
+static void __exit shdma_exit(void)
+{
+ bitmap_free(shdma_slave_used);
+}
+module_exit(shdma_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("SH-DMA driver base library");
+MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>");
diff --git a/drivers/dma/sh/shdma.h b/drivers/dma/sh/shdma.h
new file mode 100644
index 000000000..9c121a4b3
--- /dev/null
+++ b/drivers/dma/sh/shdma.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ *
+ */
+#ifndef __DMA_SHDMA_H
+#define __DMA_SHDMA_H
+
+#include <linux/sh_dma.h>
+#include <linux/shdma-base.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+
+#define SH_DMAE_MAX_CHANNELS 20
+#define SH_DMAE_TCR_MAX 0x00FFFFFF /* 16MB */
+
+struct device;
+
+struct sh_dmae_chan {
+ struct shdma_chan shdma_chan;
+ const struct sh_dmae_slave_config *config; /* Slave DMA configuration */
+ int xmit_shift; /* log_2(bytes_per_xfer) */
+ void __iomem *base;
+ char dev_id[16]; /* unique name per DMAC of channel */
+ int pm_error;
+ dma_addr_t slave_addr;
+};
+
+struct sh_dmae_device {
+ struct shdma_dev shdma_dev;
+ struct sh_dmae_chan *chan[SH_DMAE_MAX_CHANNELS];
+ const struct sh_dmae_pdata *pdata;
+ struct list_head node;
+ void __iomem *chan_reg;
+ void __iomem *dmars;
+ unsigned int chcr_offset;
+ u32 chcr_ie_bit;
+};
+
+struct sh_dmae_regs {
+ u32 sar; /* SAR / source address */
+ u32 dar; /* DAR / destination address */
+ u32 tcr; /* TCR / transfer count */
+};
+
+struct sh_dmae_desc {
+ struct sh_dmae_regs hw;
+ struct shdma_desc shdma_desc;
+};
+
+#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, shdma_chan)
+#define to_sh_desc(lh) container_of(lh, struct sh_desc, node)
+#define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx)
+#define to_sh_dev(chan) container_of(chan->shdma_chan.dma_chan.device,\
+ struct sh_dmae_device, shdma_dev.dma_dev)
+
+#endif /* __DMA_SHDMA_H */
diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c
new file mode 100644
index 000000000..5aafe548c
--- /dev/null
+++ b/drivers/dma/sh/shdmac.c
@@ -0,0 +1,938 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * base is drivers/dma/flsdma.c
+ *
+ * Copyright (C) 2011-2012 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * - DMA of SuperH does not have Hardware DMA chain mode.
+ * - MAX DMA size is 16MB.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/rculist.h>
+#include <linux/sh_dma.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../dmaengine.h"
+#include "shdma.h"
+
+/* DMA registers */
+#define SAR 0x00 /* Source Address Register */
+#define DAR 0x04 /* Destination Address Register */
+#define TCR 0x08 /* Transfer Count Register */
+#define CHCR 0x0C /* Channel Control Register */
+#define DMAOR 0x40 /* DMA Operation Register */
+
+#define TEND 0x18 /* USB-DMAC */
+
+#define SH_DMAE_DRV_NAME "sh-dma-engine"
+
+/* Default MEMCPY transfer size = 2^2 = 4 bytes */
+#define LOG2_DEFAULT_XFER_SIZE 2
+#define SH_DMA_SLAVE_NUMBER 256
+#define SH_DMA_TCR_MAX (16 * 1024 * 1024 - 1)
+
+/*
+ * Used for write-side mutual exclusion for the global device list,
+ * read-side synchronization by way of RCU, and per-controller data.
+ */
+static DEFINE_SPINLOCK(sh_dmae_lock);
+static LIST_HEAD(sh_dmae_devices);
+
+/*
+ * Different DMAC implementations provide different ways to clear DMA channels:
+ * (1) none - no CHCLR registers are available
+ * (2) one CHCLR register per channel - 0 has to be written to it to clear
+ * channel buffers
+ * (3) one CHCLR per several channels - 1 has to be written to the bit,
+ * corresponding to the specific channel to reset it
+ */
+static void channel_clear(struct sh_dmae_chan *sh_dc)
+{
+ struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+ const struct sh_dmae_channel *chan_pdata = shdev->pdata->channel +
+ sh_dc->shdma_chan.id;
+ u32 val = shdev->pdata->chclr_bitwise ? 1 << chan_pdata->chclr_bit : 0;
+
+ __raw_writel(val, shdev->chan_reg + chan_pdata->chclr_offset);
+}
+
+static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
+{
+ __raw_writel(data, sh_dc->base + reg);
+}
+
+static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg)
+{
+ return __raw_readl(sh_dc->base + reg);
+}
+
+static u16 dmaor_read(struct sh_dmae_device *shdev)
+{
+ void __iomem *addr = shdev->chan_reg + DMAOR;
+
+ if (shdev->pdata->dmaor_is_32bit)
+ return __raw_readl(addr);
+ else
+ return __raw_readw(addr);
+}
+
+static void dmaor_write(struct sh_dmae_device *shdev, u16 data)
+{
+ void __iomem *addr = shdev->chan_reg + DMAOR;
+
+ if (shdev->pdata->dmaor_is_32bit)
+ __raw_writel(data, addr);
+ else
+ __raw_writew(data, addr);
+}
+
+static void chcr_write(struct sh_dmae_chan *sh_dc, u32 data)
+{
+ struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+
+ __raw_writel(data, sh_dc->base + shdev->chcr_offset);
+}
+
+static u32 chcr_read(struct sh_dmae_chan *sh_dc)
+{
+ struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+
+ return __raw_readl(sh_dc->base + shdev->chcr_offset);
+}
+
+/*
+ * Reset DMA controller
+ *
+ * SH7780 has two DMAOR register
+ */
+static void sh_dmae_ctl_stop(struct sh_dmae_device *shdev)
+{
+ unsigned short dmaor;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sh_dmae_lock, flags);
+
+ dmaor = dmaor_read(shdev);
+ dmaor_write(shdev, dmaor & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME));
+
+ spin_unlock_irqrestore(&sh_dmae_lock, flags);
+}
+
+static int sh_dmae_rst(struct sh_dmae_device *shdev)
+{
+ unsigned short dmaor;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sh_dmae_lock, flags);
+
+ dmaor = dmaor_read(shdev) & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME);
+
+ if (shdev->pdata->chclr_present) {
+ int i;
+ for (i = 0; i < shdev->pdata->channel_num; i++) {
+ struct sh_dmae_chan *sh_chan = shdev->chan[i];
+ if (sh_chan)
+ channel_clear(sh_chan);
+ }
+ }
+
+ dmaor_write(shdev, dmaor | shdev->pdata->dmaor_init);
+
+ dmaor = dmaor_read(shdev);
+
+ spin_unlock_irqrestore(&sh_dmae_lock, flags);
+
+ if (dmaor & (DMAOR_AE | DMAOR_NMIF)) {
+ dev_warn(shdev->shdma_dev.dma_dev.dev, "Can't initialize DMAOR.\n");
+ return -EIO;
+ }
+ if (shdev->pdata->dmaor_init & ~dmaor)
+ dev_warn(shdev->shdma_dev.dma_dev.dev,
+ "DMAOR=0x%x hasn't latched the initial value 0x%x.\n",
+ dmaor, shdev->pdata->dmaor_init);
+ return 0;
+}
+
+static bool dmae_is_busy(struct sh_dmae_chan *sh_chan)
+{
+ u32 chcr = chcr_read(sh_chan);
+
+ if ((chcr & (CHCR_DE | CHCR_TE)) == CHCR_DE)
+ return true; /* working */
+
+ return false; /* waiting */
+}
+
+static unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan, u32 chcr)
+{
+ struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+ const struct sh_dmae_pdata *pdata = shdev->pdata;
+ int cnt = ((chcr & pdata->ts_low_mask) >> pdata->ts_low_shift) |
+ ((chcr & pdata->ts_high_mask) >> pdata->ts_high_shift);
+
+ if (cnt >= pdata->ts_shift_num)
+ cnt = 0;
+
+ return pdata->ts_shift[cnt];
+}
+
+static u32 log2size_to_chcr(struct sh_dmae_chan *sh_chan, int l2size)
+{
+ struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+ const struct sh_dmae_pdata *pdata = shdev->pdata;
+ int i;
+
+ for (i = 0; i < pdata->ts_shift_num; i++)
+ if (pdata->ts_shift[i] == l2size)
+ break;
+
+ if (i == pdata->ts_shift_num)
+ i = 0;
+
+ return ((i << pdata->ts_low_shift) & pdata->ts_low_mask) |
+ ((i << pdata->ts_high_shift) & pdata->ts_high_mask);
+}
+
+static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs *hw)
+{
+ sh_dmae_writel(sh_chan, hw->sar, SAR);
+ sh_dmae_writel(sh_chan, hw->dar, DAR);
+ sh_dmae_writel(sh_chan, hw->tcr >> sh_chan->xmit_shift, TCR);
+}
+
+static void dmae_start(struct sh_dmae_chan *sh_chan)
+{
+ struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+ u32 chcr = chcr_read(sh_chan);
+
+ if (shdev->pdata->needs_tend_set)
+ sh_dmae_writel(sh_chan, 0xFFFFFFFF, TEND);
+
+ chcr |= CHCR_DE | shdev->chcr_ie_bit;
+ chcr_write(sh_chan, chcr & ~CHCR_TE);
+}
+
+static void dmae_init(struct sh_dmae_chan *sh_chan)
+{
+ /*
+ * Default configuration for dual address memory-memory transfer.
+ */
+ u32 chcr = DM_INC | SM_INC | RS_AUTO | log2size_to_chcr(sh_chan,
+ LOG2_DEFAULT_XFER_SIZE);
+ sh_chan->xmit_shift = calc_xmit_shift(sh_chan, chcr);
+ chcr_write(sh_chan, chcr);
+}
+
+static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val)
+{
+ /* If DMA is active, cannot set CHCR. TODO: remove this superfluous check */
+ if (dmae_is_busy(sh_chan))
+ return -EBUSY;
+
+ sh_chan->xmit_shift = calc_xmit_shift(sh_chan, val);
+ chcr_write(sh_chan, val);
+
+ return 0;
+}
+
+static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
+{
+ struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+ const struct sh_dmae_pdata *pdata = shdev->pdata;
+ const struct sh_dmae_channel *chan_pdata = &pdata->channel[sh_chan->shdma_chan.id];
+ void __iomem *addr = shdev->dmars;
+ unsigned int shift = chan_pdata->dmars_bit;
+
+ if (dmae_is_busy(sh_chan))
+ return -EBUSY;
+
+ if (pdata->no_dmars)
+ return 0;
+
+ /* in the case of a missing DMARS resource use first memory window */
+ if (!addr)
+ addr = shdev->chan_reg;
+ addr += chan_pdata->dmars;
+
+ __raw_writew((__raw_readw(addr) & (0xff00 >> shift)) | (val << shift),
+ addr);
+
+ return 0;
+}
+
+static void sh_dmae_start_xfer(struct shdma_chan *schan,
+ struct shdma_desc *sdesc)
+{
+ struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+ shdma_chan);
+ struct sh_dmae_desc *sh_desc = container_of(sdesc,
+ struct sh_dmae_desc, shdma_desc);
+ dev_dbg(sh_chan->shdma_chan.dev, "Queue #%d to %d: %u@%x -> %x\n",
+ sdesc->async_tx.cookie, sh_chan->shdma_chan.id,
+ sh_desc->hw.tcr, sh_desc->hw.sar, sh_desc->hw.dar);
+ /* Get the ld start address from ld_queue */
+ dmae_set_reg(sh_chan, &sh_desc->hw);
+ dmae_start(sh_chan);
+}
+
+static bool sh_dmae_channel_busy(struct shdma_chan *schan)
+{
+ struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+ shdma_chan);
+ return dmae_is_busy(sh_chan);
+}
+
+static void sh_dmae_setup_xfer(struct shdma_chan *schan,
+ int slave_id)
+{
+ struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+ shdma_chan);
+
+ if (slave_id >= 0) {
+ const struct sh_dmae_slave_config *cfg =
+ sh_chan->config;
+
+ dmae_set_dmars(sh_chan, cfg->mid_rid);
+ dmae_set_chcr(sh_chan, cfg->chcr);
+ } else {
+ dmae_init(sh_chan);
+ }
+}
+
+/*
+ * Find a slave channel configuration from the contoller list by either a slave
+ * ID in the non-DT case, or by a MID/RID value in the DT case
+ */
+static const struct sh_dmae_slave_config *dmae_find_slave(
+ struct sh_dmae_chan *sh_chan, int match)
+{
+ struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+ const struct sh_dmae_pdata *pdata = shdev->pdata;
+ const struct sh_dmae_slave_config *cfg;
+ int i;
+
+ if (!sh_chan->shdma_chan.dev->of_node) {
+ if (match >= SH_DMA_SLAVE_NUMBER)
+ return NULL;
+
+ for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++)
+ if (cfg->slave_id == match)
+ return cfg;
+ } else {
+ for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++)
+ if (cfg->mid_rid == match) {
+ sh_chan->shdma_chan.slave_id = i;
+ return cfg;
+ }
+ }
+
+ return NULL;
+}
+
+static int sh_dmae_set_slave(struct shdma_chan *schan,
+ int slave_id, dma_addr_t slave_addr, bool try)
+{
+ struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+ shdma_chan);
+ const struct sh_dmae_slave_config *cfg = dmae_find_slave(sh_chan, slave_id);
+ if (!cfg)
+ return -ENXIO;
+
+ if (!try) {
+ sh_chan->config = cfg;
+ sh_chan->slave_addr = slave_addr ? : cfg->addr;
+ }
+
+ return 0;
+}
+
+static void dmae_halt(struct sh_dmae_chan *sh_chan)
+{
+ struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+ u32 chcr = chcr_read(sh_chan);
+
+ chcr &= ~(CHCR_DE | CHCR_TE | shdev->chcr_ie_bit);
+ chcr_write(sh_chan, chcr);
+}
+
+static int sh_dmae_desc_setup(struct shdma_chan *schan,
+ struct shdma_desc *sdesc,
+ dma_addr_t src, dma_addr_t dst, size_t *len)
+{
+ struct sh_dmae_desc *sh_desc = container_of(sdesc,
+ struct sh_dmae_desc, shdma_desc);
+
+ if (*len > schan->max_xfer_len)
+ *len = schan->max_xfer_len;
+
+ sh_desc->hw.sar = src;
+ sh_desc->hw.dar = dst;
+ sh_desc->hw.tcr = *len;
+
+ return 0;
+}
+
+static void sh_dmae_halt(struct shdma_chan *schan)
+{
+ struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+ shdma_chan);
+ dmae_halt(sh_chan);
+}
+
+static bool sh_dmae_chan_irq(struct shdma_chan *schan, int irq)
+{
+ struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+ shdma_chan);
+
+ if (!(chcr_read(sh_chan) & CHCR_TE))
+ return false;
+
+ /* DMA stop */
+ dmae_halt(sh_chan);
+
+ return true;
+}
+
+static size_t sh_dmae_get_partial(struct shdma_chan *schan,
+ struct shdma_desc *sdesc)
+{
+ struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+ shdma_chan);
+ struct sh_dmae_desc *sh_desc = container_of(sdesc,
+ struct sh_dmae_desc, shdma_desc);
+ return sh_desc->hw.tcr -
+ (sh_dmae_readl(sh_chan, TCR) << sh_chan->xmit_shift);
+}
+
+/* Called from error IRQ or NMI */
+static bool sh_dmae_reset(struct sh_dmae_device *shdev)
+{
+ bool ret;
+
+ /* halt the dma controller */
+ sh_dmae_ctl_stop(shdev);
+
+ /* We cannot detect, which channel caused the error, have to reset all */
+ ret = shdma_reset(&shdev->shdma_dev);
+
+ sh_dmae_rst(shdev);
+
+ return ret;
+}
+
+static irqreturn_t sh_dmae_err(int irq, void *data)
+{
+ struct sh_dmae_device *shdev = data;
+
+ if (!(dmaor_read(shdev) & DMAOR_AE))
+ return IRQ_NONE;
+
+ sh_dmae_reset(shdev);
+ return IRQ_HANDLED;
+}
+
+static bool sh_dmae_desc_completed(struct shdma_chan *schan,
+ struct shdma_desc *sdesc)
+{
+ struct sh_dmae_chan *sh_chan = container_of(schan,
+ struct sh_dmae_chan, shdma_chan);
+ struct sh_dmae_desc *sh_desc = container_of(sdesc,
+ struct sh_dmae_desc, shdma_desc);
+ u32 sar_buf = sh_dmae_readl(sh_chan, SAR);
+ u32 dar_buf = sh_dmae_readl(sh_chan, DAR);
+
+ return (sdesc->direction == DMA_DEV_TO_MEM &&
+ (sh_desc->hw.dar + sh_desc->hw.tcr) == dar_buf) ||
+ (sdesc->direction != DMA_DEV_TO_MEM &&
+ (sh_desc->hw.sar + sh_desc->hw.tcr) == sar_buf);
+}
+
+static bool sh_dmae_nmi_notify(struct sh_dmae_device *shdev)
+{
+ /* Fast path out if NMIF is not asserted for this controller */
+ if ((dmaor_read(shdev) & DMAOR_NMIF) == 0)
+ return false;
+
+ return sh_dmae_reset(shdev);
+}
+
+static int sh_dmae_nmi_handler(struct notifier_block *self,
+ unsigned long cmd, void *data)
+{
+ struct sh_dmae_device *shdev;
+ int ret = NOTIFY_DONE;
+ bool triggered;
+
+ /*
+ * Only concern ourselves with NMI events.
+ *
+ * Normally we would check the die chain value, but as this needs
+ * to be architecture independent, check for NMI context instead.
+ */
+ if (!in_nmi())
+ return NOTIFY_DONE;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(shdev, &sh_dmae_devices, node) {
+ /*
+ * Only stop if one of the controllers has NMIF asserted,
+ * we do not want to interfere with regular address error
+ * handling or NMI events that don't concern the DMACs.
+ */
+ triggered = sh_dmae_nmi_notify(shdev);
+ if (triggered == true)
+ ret = NOTIFY_OK;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static struct notifier_block sh_dmae_nmi_notifier __read_mostly = {
+ .notifier_call = sh_dmae_nmi_handler,
+
+ /* Run before NMI debug handler and KGDB */
+ .priority = 1,
+};
+
+static int sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id,
+ int irq, unsigned long flags)
+{
+ const struct sh_dmae_channel *chan_pdata = &shdev->pdata->channel[id];
+ struct shdma_dev *sdev = &shdev->shdma_dev;
+ struct platform_device *pdev = to_platform_device(sdev->dma_dev.dev);
+ struct sh_dmae_chan *sh_chan;
+ struct shdma_chan *schan;
+ int err;
+
+ sh_chan = devm_kzalloc(sdev->dma_dev.dev, sizeof(struct sh_dmae_chan),
+ GFP_KERNEL);
+ if (!sh_chan)
+ return -ENOMEM;
+
+ schan = &sh_chan->shdma_chan;
+ schan->max_xfer_len = SH_DMA_TCR_MAX + 1;
+
+ shdma_chan_probe(sdev, schan, id);
+
+ sh_chan->base = shdev->chan_reg + chan_pdata->offset;
+
+ /* set up channel irq */
+ if (pdev->id >= 0)
+ snprintf(sh_chan->dev_id, sizeof(sh_chan->dev_id),
+ "sh-dmae%d.%d", pdev->id, id);
+ else
+ snprintf(sh_chan->dev_id, sizeof(sh_chan->dev_id),
+ "sh-dma%d", id);
+
+ err = shdma_request_irq(schan, irq, flags, sh_chan->dev_id);
+ if (err) {
+ dev_err(sdev->dma_dev.dev,
+ "DMA channel %d request_irq error %d\n",
+ id, err);
+ goto err_no_irq;
+ }
+
+ shdev->chan[id] = sh_chan;
+ return 0;
+
+err_no_irq:
+ /* remove from dmaengine device node */
+ shdma_chan_remove(schan);
+ return err;
+}
+
+static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
+{
+ struct shdma_chan *schan;
+ int i;
+
+ shdma_for_each_chan(schan, &shdev->shdma_dev, i) {
+ BUG_ON(!schan);
+
+ shdma_chan_remove(schan);
+ }
+}
+
+#ifdef CONFIG_PM
+static int sh_dmae_runtime_suspend(struct device *dev)
+{
+ struct sh_dmae_device *shdev = dev_get_drvdata(dev);
+
+ sh_dmae_ctl_stop(shdev);
+ return 0;
+}
+
+static int sh_dmae_runtime_resume(struct device *dev)
+{
+ struct sh_dmae_device *shdev = dev_get_drvdata(dev);
+
+ return sh_dmae_rst(shdev);
+}
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+static int sh_dmae_suspend(struct device *dev)
+{
+ struct sh_dmae_device *shdev = dev_get_drvdata(dev);
+
+ sh_dmae_ctl_stop(shdev);
+ return 0;
+}
+
+static int sh_dmae_resume(struct device *dev)
+{
+ struct sh_dmae_device *shdev = dev_get_drvdata(dev);
+ int i, ret;
+
+ ret = sh_dmae_rst(shdev);
+ if (ret < 0)
+ dev_err(dev, "Failed to reset!\n");
+
+ for (i = 0; i < shdev->pdata->channel_num; i++) {
+ struct sh_dmae_chan *sh_chan = shdev->chan[i];
+
+ if (!sh_chan->shdma_chan.desc_num)
+ continue;
+
+ if (sh_chan->shdma_chan.slave_id >= 0) {
+ const struct sh_dmae_slave_config *cfg = sh_chan->config;
+ dmae_set_dmars(sh_chan, cfg->mid_rid);
+ dmae_set_chcr(sh_chan, cfg->chcr);
+ } else {
+ dmae_init(sh_chan);
+ }
+ }
+
+ return 0;
+}
+#endif
+
+static const struct dev_pm_ops sh_dmae_pm = {
+ SET_SYSTEM_SLEEP_PM_OPS(sh_dmae_suspend, sh_dmae_resume)
+ SET_RUNTIME_PM_OPS(sh_dmae_runtime_suspend, sh_dmae_runtime_resume,
+ NULL)
+};
+
+static dma_addr_t sh_dmae_slave_addr(struct shdma_chan *schan)
+{
+ struct sh_dmae_chan *sh_chan = container_of(schan,
+ struct sh_dmae_chan, shdma_chan);
+
+ /*
+ * Implicit BUG_ON(!sh_chan->config)
+ * This is an exclusive slave DMA operation, may only be called after a
+ * successful slave configuration.
+ */
+ return sh_chan->slave_addr;
+}
+
+static struct shdma_desc *sh_dmae_embedded_desc(void *buf, int i)
+{
+ return &((struct sh_dmae_desc *)buf)[i].shdma_desc;
+}
+
+static const struct shdma_ops sh_dmae_shdma_ops = {
+ .desc_completed = sh_dmae_desc_completed,
+ .halt_channel = sh_dmae_halt,
+ .channel_busy = sh_dmae_channel_busy,
+ .slave_addr = sh_dmae_slave_addr,
+ .desc_setup = sh_dmae_desc_setup,
+ .set_slave = sh_dmae_set_slave,
+ .setup_xfer = sh_dmae_setup_xfer,
+ .start_xfer = sh_dmae_start_xfer,
+ .embedded_desc = sh_dmae_embedded_desc,
+ .chan_irq = sh_dmae_chan_irq,
+ .get_partial = sh_dmae_get_partial,
+};
+
+static int sh_dmae_probe(struct platform_device *pdev)
+{
+ const enum dma_slave_buswidth widths =
+ DMA_SLAVE_BUSWIDTH_1_BYTE | DMA_SLAVE_BUSWIDTH_2_BYTES |
+ DMA_SLAVE_BUSWIDTH_4_BYTES | DMA_SLAVE_BUSWIDTH_8_BYTES |
+ DMA_SLAVE_BUSWIDTH_16_BYTES | DMA_SLAVE_BUSWIDTH_32_BYTES;
+ const struct sh_dmae_pdata *pdata;
+ unsigned long chan_flag[SH_DMAE_MAX_CHANNELS] = {};
+ int chan_irq[SH_DMAE_MAX_CHANNELS];
+ unsigned long irqflags = 0;
+ int err, errirq, i, irq_cnt = 0, irqres = 0, irq_cap = 0;
+ struct sh_dmae_device *shdev;
+ struct dma_device *dma_dev;
+ struct resource *chan, *dmars, *errirq_res, *chanirq_res;
+
+ if (pdev->dev.of_node)
+ pdata = of_device_get_match_data(&pdev->dev);
+ else
+ pdata = dev_get_platdata(&pdev->dev);
+
+ /* get platform data */
+ if (!pdata || !pdata->channel_num)
+ return -ENODEV;
+
+ chan = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ /* DMARS area is optional */
+ dmars = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ /*
+ * IRQ resources:
+ * 1. there always must be at least one IRQ IO-resource. On SH4 it is
+ * the error IRQ, in which case it is the only IRQ in this resource:
+ * start == end. If it is the only IRQ resource, all channels also
+ * use the same IRQ.
+ * 2. DMA channel IRQ resources can be specified one per resource or in
+ * ranges (start != end)
+ * 3. iff all events (channels and, optionally, error) on this
+ * controller use the same IRQ, only one IRQ resource can be
+ * specified, otherwise there must be one IRQ per channel, even if
+ * some of them are equal
+ * 4. if all IRQs on this controller are equal or if some specific IRQs
+ * specify IORESOURCE_IRQ_SHAREABLE in their resources, they will be
+ * requested with the IRQF_SHARED flag
+ */
+ errirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+ if (!chan || !errirq_res)
+ return -ENODEV;
+
+ shdev = devm_kzalloc(&pdev->dev, sizeof(struct sh_dmae_device),
+ GFP_KERNEL);
+ if (!shdev)
+ return -ENOMEM;
+
+ dma_dev = &shdev->shdma_dev.dma_dev;
+
+ shdev->chan_reg = devm_ioremap_resource(&pdev->dev, chan);
+ if (IS_ERR(shdev->chan_reg))
+ return PTR_ERR(shdev->chan_reg);
+ if (dmars) {
+ shdev->dmars = devm_ioremap_resource(&pdev->dev, dmars);
+ if (IS_ERR(shdev->dmars))
+ return PTR_ERR(shdev->dmars);
+ }
+
+ dma_dev->src_addr_widths = widths;
+ dma_dev->dst_addr_widths = widths;
+ dma_dev->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+ dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+ if (!pdata->slave_only)
+ dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+ if (pdata->slave && pdata->slave_num)
+ dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+
+ /* Default transfer size of 32 bytes requires 32-byte alignment */
+ dma_dev->copy_align = LOG2_DEFAULT_XFER_SIZE;
+
+ shdev->shdma_dev.ops = &sh_dmae_shdma_ops;
+ shdev->shdma_dev.desc_size = sizeof(struct sh_dmae_desc);
+ err = shdma_init(&pdev->dev, &shdev->shdma_dev,
+ pdata->channel_num);
+ if (err < 0)
+ goto eshdma;
+
+ /* platform data */
+ shdev->pdata = pdata;
+
+ if (pdata->chcr_offset)
+ shdev->chcr_offset = pdata->chcr_offset;
+ else
+ shdev->chcr_offset = CHCR;
+
+ if (pdata->chcr_ie_bit)
+ shdev->chcr_ie_bit = pdata->chcr_ie_bit;
+ else
+ shdev->chcr_ie_bit = CHCR_IE;
+
+ platform_set_drvdata(pdev, shdev);
+
+ pm_runtime_enable(&pdev->dev);
+ err = pm_runtime_get_sync(&pdev->dev);
+ if (err < 0)
+ dev_err(&pdev->dev, "%s(): GET = %d\n", __func__, err);
+
+ spin_lock_irq(&sh_dmae_lock);
+ list_add_tail_rcu(&shdev->node, &sh_dmae_devices);
+ spin_unlock_irq(&sh_dmae_lock);
+
+ /* reset dma controller - only needed as a test */
+ err = sh_dmae_rst(shdev);
+ if (err)
+ goto rst_err;
+
+ if (IS_ENABLED(CONFIG_CPU_SH4) || IS_ENABLED(CONFIG_ARCH_RENESAS)) {
+ chanirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
+
+ if (!chanirq_res)
+ chanirq_res = errirq_res;
+ else
+ irqres++;
+
+ if (chanirq_res == errirq_res ||
+ (errirq_res->flags & IORESOURCE_BITS) == IORESOURCE_IRQ_SHAREABLE)
+ irqflags = IRQF_SHARED;
+
+ errirq = errirq_res->start;
+
+ err = devm_request_irq(&pdev->dev, errirq, sh_dmae_err,
+ irqflags, "DMAC Address Error", shdev);
+ if (err) {
+ dev_err(&pdev->dev,
+ "DMA failed requesting irq #%d, error %d\n",
+ errirq, err);
+ goto eirq_err;
+ }
+ } else {
+ chanirq_res = errirq_res;
+ }
+
+ if (chanirq_res->start == chanirq_res->end &&
+ !platform_get_resource(pdev, IORESOURCE_IRQ, 1)) {
+ /* Special case - all multiplexed */
+ for (; irq_cnt < pdata->channel_num; irq_cnt++) {
+ if (irq_cnt < SH_DMAE_MAX_CHANNELS) {
+ chan_irq[irq_cnt] = chanirq_res->start;
+ chan_flag[irq_cnt] = IRQF_SHARED;
+ } else {
+ irq_cap = 1;
+ break;
+ }
+ }
+ } else {
+ do {
+ for (i = chanirq_res->start; i <= chanirq_res->end; i++) {
+ if (irq_cnt >= SH_DMAE_MAX_CHANNELS) {
+ irq_cap = 1;
+ break;
+ }
+
+ if ((errirq_res->flags & IORESOURCE_BITS) ==
+ IORESOURCE_IRQ_SHAREABLE)
+ chan_flag[irq_cnt] = IRQF_SHARED;
+ else
+ chan_flag[irq_cnt] = 0;
+ dev_dbg(&pdev->dev,
+ "Found IRQ %d for channel %d\n",
+ i, irq_cnt);
+ chan_irq[irq_cnt++] = i;
+ }
+
+ if (irq_cnt >= SH_DMAE_MAX_CHANNELS)
+ break;
+
+ chanirq_res = platform_get_resource(pdev,
+ IORESOURCE_IRQ, ++irqres);
+ } while (irq_cnt < pdata->channel_num && chanirq_res);
+ }
+
+ /* Create DMA Channel */
+ for (i = 0; i < irq_cnt; i++) {
+ err = sh_dmae_chan_probe(shdev, i, chan_irq[i], chan_flag[i]);
+ if (err)
+ goto chan_probe_err;
+ }
+
+ if (irq_cap)
+ dev_notice(&pdev->dev, "Attempting to register %d DMA "
+ "channels when a maximum of %d are supported.\n",
+ pdata->channel_num, SH_DMAE_MAX_CHANNELS);
+
+ pm_runtime_put(&pdev->dev);
+
+ err = dma_async_device_register(&shdev->shdma_dev.dma_dev);
+ if (err < 0)
+ goto edmadevreg;
+
+ return err;
+
+edmadevreg:
+ pm_runtime_get(&pdev->dev);
+
+chan_probe_err:
+ sh_dmae_chan_remove(shdev);
+
+eirq_err:
+rst_err:
+ spin_lock_irq(&sh_dmae_lock);
+ list_del_rcu(&shdev->node);
+ spin_unlock_irq(&sh_dmae_lock);
+
+ pm_runtime_put(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+
+ shdma_cleanup(&shdev->shdma_dev);
+eshdma:
+ synchronize_rcu();
+
+ return err;
+}
+
+static int sh_dmae_remove(struct platform_device *pdev)
+{
+ struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+ struct dma_device *dma_dev = &shdev->shdma_dev.dma_dev;
+
+ dma_async_device_unregister(dma_dev);
+
+ spin_lock_irq(&sh_dmae_lock);
+ list_del_rcu(&shdev->node);
+ spin_unlock_irq(&sh_dmae_lock);
+
+ pm_runtime_disable(&pdev->dev);
+
+ sh_dmae_chan_remove(shdev);
+ shdma_cleanup(&shdev->shdma_dev);
+
+ synchronize_rcu();
+
+ return 0;
+}
+
+static struct platform_driver sh_dmae_driver = {
+ .driver = {
+ .pm = &sh_dmae_pm,
+ .name = SH_DMAE_DRV_NAME,
+ },
+ .remove = sh_dmae_remove,
+};
+
+static int __init sh_dmae_init(void)
+{
+ /* Wire up NMI handling */
+ int err = register_die_notifier(&sh_dmae_nmi_notifier);
+ if (err)
+ return err;
+
+ return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe);
+}
+module_init(sh_dmae_init);
+
+static void __exit sh_dmae_exit(void)
+{
+ platform_driver_unregister(&sh_dmae_driver);
+
+ unregister_die_notifier(&sh_dmae_nmi_notifier);
+}
+module_exit(sh_dmae_exit);
+
+MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>");
+MODULE_DESCRIPTION("Renesas SH DMA Engine driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" SH_DMAE_DRV_NAME);
diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
new file mode 100644
index 000000000..5edaeb89d
--- /dev/null
+++ b/drivers/dma/sh/usb-dmac.c
@@ -0,0 +1,914 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Renesas USB DMA Controller Driver
+ *
+ * Copyright (C) 2015 Renesas Electronics Corporation
+ *
+ * based on rcar-dmac.c
+ * Copyright (C) 2014 Renesas Electronics Inc.
+ * Author: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+/*
+ * struct usb_dmac_sg - Descriptor for a hardware transfer
+ * @mem_addr: memory address
+ * @size: transfer size in bytes
+ */
+struct usb_dmac_sg {
+ dma_addr_t mem_addr;
+ u32 size;
+};
+
+/*
+ * struct usb_dmac_desc - USB DMA Transfer Descriptor
+ * @vd: base virtual channel DMA transaction descriptor
+ * @direction: direction of the DMA transfer
+ * @sg_allocated_len: length of allocated sg
+ * @sg_len: length of sg
+ * @sg_index: index of sg
+ * @residue: residue after the DMAC completed a transfer
+ * @node: node for desc_got and desc_freed
+ * @done_cookie: cookie after the DMAC completed a transfer
+ * @sg: information for the transfer
+ */
+struct usb_dmac_desc {
+ struct virt_dma_desc vd;
+ enum dma_transfer_direction direction;
+ unsigned int sg_allocated_len;
+ unsigned int sg_len;
+ unsigned int sg_index;
+ u32 residue;
+ struct list_head node;
+ dma_cookie_t done_cookie;
+ struct usb_dmac_sg sg[];
+};
+
+#define to_usb_dmac_desc(vd) container_of(vd, struct usb_dmac_desc, vd)
+
+/*
+ * struct usb_dmac_chan - USB DMA Controller Channel
+ * @vc: base virtual DMA channel object
+ * @iomem: channel I/O memory base
+ * @index: index of this channel in the controller
+ * @irq: irq number of this channel
+ * @desc: the current descriptor
+ * @descs_allocated: number of descriptors allocated
+ * @desc_got: got descriptors
+ * @desc_freed: freed descriptors after the DMAC completed a transfer
+ */
+struct usb_dmac_chan {
+ struct virt_dma_chan vc;
+ void __iomem *iomem;
+ unsigned int index;
+ int irq;
+ struct usb_dmac_desc *desc;
+ int descs_allocated;
+ struct list_head desc_got;
+ struct list_head desc_freed;
+};
+
+#define to_usb_dmac_chan(c) container_of(c, struct usb_dmac_chan, vc.chan)
+
+/*
+ * struct usb_dmac - USB DMA Controller
+ * @engine: base DMA engine object
+ * @dev: the hardware device
+ * @iomem: remapped I/O memory base
+ * @n_channels: number of available channels
+ * @channels: array of DMAC channels
+ */
+struct usb_dmac {
+ struct dma_device engine;
+ struct device *dev;
+ void __iomem *iomem;
+
+ unsigned int n_channels;
+ struct usb_dmac_chan *channels;
+};
+
+#define to_usb_dmac(d) container_of(d, struct usb_dmac, engine)
+
+/* -----------------------------------------------------------------------------
+ * Registers
+ */
+
+#define USB_DMAC_CHAN_OFFSET(i) (0x20 + 0x20 * (i))
+
+#define USB_DMASWR 0x0008
+#define USB_DMASWR_SWR (1 << 0)
+#define USB_DMAOR 0x0060
+#define USB_DMAOR_AE (1 << 1)
+#define USB_DMAOR_DME (1 << 0)
+
+#define USB_DMASAR 0x0000
+#define USB_DMADAR 0x0004
+#define USB_DMATCR 0x0008
+#define USB_DMATCR_MASK 0x00ffffff
+#define USB_DMACHCR 0x0014
+#define USB_DMACHCR_FTE (1 << 24)
+#define USB_DMACHCR_NULLE (1 << 16)
+#define USB_DMACHCR_NULL (1 << 12)
+#define USB_DMACHCR_TS_8B ((0 << 7) | (0 << 6))
+#define USB_DMACHCR_TS_16B ((0 << 7) | (1 << 6))
+#define USB_DMACHCR_TS_32B ((1 << 7) | (0 << 6))
+#define USB_DMACHCR_IE (1 << 5)
+#define USB_DMACHCR_SP (1 << 2)
+#define USB_DMACHCR_TE (1 << 1)
+#define USB_DMACHCR_DE (1 << 0)
+#define USB_DMATEND 0x0018
+
+/* Hardcode the xfer_shift to 5 (32bytes) */
+#define USB_DMAC_XFER_SHIFT 5
+#define USB_DMAC_XFER_SIZE (1 << USB_DMAC_XFER_SHIFT)
+#define USB_DMAC_CHCR_TS USB_DMACHCR_TS_32B
+#define USB_DMAC_SLAVE_BUSWIDTH DMA_SLAVE_BUSWIDTH_32_BYTES
+
+/* for descriptors */
+#define USB_DMAC_INITIAL_NR_DESC 16
+#define USB_DMAC_INITIAL_NR_SG 8
+
+/* -----------------------------------------------------------------------------
+ * Device access
+ */
+
+static void usb_dmac_write(struct usb_dmac *dmac, u32 reg, u32 data)
+{
+ writel(data, dmac->iomem + reg);
+}
+
+static u32 usb_dmac_read(struct usb_dmac *dmac, u32 reg)
+{
+ return readl(dmac->iomem + reg);
+}
+
+static u32 usb_dmac_chan_read(struct usb_dmac_chan *chan, u32 reg)
+{
+ return readl(chan->iomem + reg);
+}
+
+static void usb_dmac_chan_write(struct usb_dmac_chan *chan, u32 reg, u32 data)
+{
+ writel(data, chan->iomem + reg);
+}
+
+/* -----------------------------------------------------------------------------
+ * Initialization and configuration
+ */
+
+static bool usb_dmac_chan_is_busy(struct usb_dmac_chan *chan)
+{
+ u32 chcr = usb_dmac_chan_read(chan, USB_DMACHCR);
+
+ return (chcr & (USB_DMACHCR_DE | USB_DMACHCR_TE)) == USB_DMACHCR_DE;
+}
+
+static u32 usb_dmac_calc_tend(u32 size)
+{
+ /*
+ * Please refer to the Figure "Example of Final Transaction Valid
+ * Data Transfer Enable (EDTEN) Setting" in the data sheet.
+ */
+ return 0xffffffff << (32 - (size % USB_DMAC_XFER_SIZE ? :
+ USB_DMAC_XFER_SIZE));
+}
+
+/* This function is already held by vc.lock */
+static void usb_dmac_chan_start_sg(struct usb_dmac_chan *chan,
+ unsigned int index)
+{
+ struct usb_dmac_desc *desc = chan->desc;
+ struct usb_dmac_sg *sg = desc->sg + index;
+ dma_addr_t src_addr = 0, dst_addr = 0;
+
+ WARN_ON_ONCE(usb_dmac_chan_is_busy(chan));
+
+ if (desc->direction == DMA_DEV_TO_MEM)
+ dst_addr = sg->mem_addr;
+ else
+ src_addr = sg->mem_addr;
+
+ dev_dbg(chan->vc.chan.device->dev,
+ "chan%u: queue sg %p: %u@%pad -> %pad\n",
+ chan->index, sg, sg->size, &src_addr, &dst_addr);
+
+ usb_dmac_chan_write(chan, USB_DMASAR, src_addr & 0xffffffff);
+ usb_dmac_chan_write(chan, USB_DMADAR, dst_addr & 0xffffffff);
+ usb_dmac_chan_write(chan, USB_DMATCR,
+ DIV_ROUND_UP(sg->size, USB_DMAC_XFER_SIZE));
+ usb_dmac_chan_write(chan, USB_DMATEND, usb_dmac_calc_tend(sg->size));
+
+ usb_dmac_chan_write(chan, USB_DMACHCR, USB_DMAC_CHCR_TS |
+ USB_DMACHCR_NULLE | USB_DMACHCR_IE | USB_DMACHCR_DE);
+}
+
+/* This function is already held by vc.lock */
+static void usb_dmac_chan_start_desc(struct usb_dmac_chan *chan)
+{
+ struct virt_dma_desc *vd;
+
+ vd = vchan_next_desc(&chan->vc);
+ if (!vd) {
+ chan->desc = NULL;
+ return;
+ }
+
+ /*
+ * Remove this request from vc->desc_issued. Otherwise, this driver
+ * will get the previous value from vchan_next_desc() after a transfer
+ * was completed.
+ */
+ list_del(&vd->node);
+
+ chan->desc = to_usb_dmac_desc(vd);
+ chan->desc->sg_index = 0;
+ usb_dmac_chan_start_sg(chan, 0);
+}
+
+static int usb_dmac_init(struct usb_dmac *dmac)
+{
+ u16 dmaor;
+
+ /* Clear all channels and enable the DMAC globally. */
+ usb_dmac_write(dmac, USB_DMAOR, USB_DMAOR_DME);
+
+ dmaor = usb_dmac_read(dmac, USB_DMAOR);
+ if ((dmaor & (USB_DMAOR_AE | USB_DMAOR_DME)) != USB_DMAOR_DME) {
+ dev_warn(dmac->dev, "DMAOR initialization failed.\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors allocation and free
+ */
+static int usb_dmac_desc_alloc(struct usb_dmac_chan *chan, unsigned int sg_len,
+ gfp_t gfp)
+{
+ struct usb_dmac_desc *desc;
+ unsigned long flags;
+
+ desc = kzalloc(struct_size(desc, sg, sg_len), gfp);
+ if (!desc)
+ return -ENOMEM;
+
+ desc->sg_allocated_len = sg_len;
+ INIT_LIST_HEAD(&desc->node);
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+ list_add_tail(&desc->node, &chan->desc_freed);
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+ return 0;
+}
+
+static void usb_dmac_desc_free(struct usb_dmac_chan *chan)
+{
+ struct usb_dmac_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ list_splice_init(&chan->desc_freed, &list);
+ list_splice_init(&chan->desc_got, &list);
+
+ list_for_each_entry_safe(desc, _desc, &list, node) {
+ list_del(&desc->node);
+ kfree(desc);
+ }
+ chan->descs_allocated = 0;
+}
+
+static struct usb_dmac_desc *usb_dmac_desc_get(struct usb_dmac_chan *chan,
+ unsigned int sg_len, gfp_t gfp)
+{
+ struct usb_dmac_desc *desc = NULL;
+ unsigned long flags;
+
+ /* Get a freed descritpor */
+ spin_lock_irqsave(&chan->vc.lock, flags);
+ list_for_each_entry(desc, &chan->desc_freed, node) {
+ if (sg_len <= desc->sg_allocated_len) {
+ list_move_tail(&desc->node, &chan->desc_got);
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+ return desc;
+ }
+ }
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+ /* Allocate a new descriptor */
+ if (!usb_dmac_desc_alloc(chan, sg_len, gfp)) {
+ /* If allocated the desc, it was added to tail of the list */
+ spin_lock_irqsave(&chan->vc.lock, flags);
+ desc = list_last_entry(&chan->desc_freed, struct usb_dmac_desc,
+ node);
+ list_move_tail(&desc->node, &chan->desc_got);
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+ return desc;
+ }
+
+ return NULL;
+}
+
+static void usb_dmac_desc_put(struct usb_dmac_chan *chan,
+ struct usb_dmac_desc *desc)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+ list_move_tail(&desc->node, &chan->desc_freed);
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+/* -----------------------------------------------------------------------------
+ * Stop and reset
+ */
+
+static void usb_dmac_soft_reset(struct usb_dmac_chan *uchan)
+{
+ struct dma_chan *chan = &uchan->vc.chan;
+ struct usb_dmac *dmac = to_usb_dmac(chan->device);
+ int i;
+
+ /* Don't issue soft reset if any one of channels is busy */
+ for (i = 0; i < dmac->n_channels; ++i) {
+ if (usb_dmac_chan_is_busy(uchan))
+ return;
+ }
+
+ usb_dmac_write(dmac, USB_DMAOR, 0);
+ usb_dmac_write(dmac, USB_DMASWR, USB_DMASWR_SWR);
+ udelay(100);
+ usb_dmac_write(dmac, USB_DMASWR, 0);
+ usb_dmac_write(dmac, USB_DMAOR, 1);
+}
+
+static void usb_dmac_chan_halt(struct usb_dmac_chan *chan)
+{
+ u32 chcr = usb_dmac_chan_read(chan, USB_DMACHCR);
+
+ chcr &= ~(USB_DMACHCR_IE | USB_DMACHCR_TE | USB_DMACHCR_DE);
+ usb_dmac_chan_write(chan, USB_DMACHCR, chcr);
+
+ usb_dmac_soft_reset(chan);
+}
+
+static void usb_dmac_stop(struct usb_dmac *dmac)
+{
+ usb_dmac_write(dmac, USB_DMAOR, 0);
+}
+
+/* -----------------------------------------------------------------------------
+ * DMA engine operations
+ */
+
+static int usb_dmac_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+ int ret;
+
+ while (uchan->descs_allocated < USB_DMAC_INITIAL_NR_DESC) {
+ ret = usb_dmac_desc_alloc(uchan, USB_DMAC_INITIAL_NR_SG,
+ GFP_KERNEL);
+ if (ret < 0) {
+ usb_dmac_desc_free(uchan);
+ return ret;
+ }
+ uchan->descs_allocated++;
+ }
+
+ return pm_runtime_get_sync(chan->device->dev);
+}
+
+static void usb_dmac_free_chan_resources(struct dma_chan *chan)
+{
+ struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+ unsigned long flags;
+
+ /* Protect against ISR */
+ spin_lock_irqsave(&uchan->vc.lock, flags);
+ usb_dmac_chan_halt(uchan);
+ spin_unlock_irqrestore(&uchan->vc.lock, flags);
+
+ usb_dmac_desc_free(uchan);
+ vchan_free_chan_resources(&uchan->vc);
+
+ pm_runtime_put(chan->device->dev);
+}
+
+static struct dma_async_tx_descriptor *
+usb_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction dir,
+ unsigned long dma_flags, void *context)
+{
+ struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+ struct usb_dmac_desc *desc;
+ struct scatterlist *sg;
+ int i;
+
+ if (!sg_len) {
+ dev_warn(chan->device->dev,
+ "%s: bad parameter: len=%d\n", __func__, sg_len);
+ return NULL;
+ }
+
+ desc = usb_dmac_desc_get(uchan, sg_len, GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ desc->direction = dir;
+ desc->sg_len = sg_len;
+ for_each_sg(sgl, sg, sg_len, i) {
+ desc->sg[i].mem_addr = sg_dma_address(sg);
+ desc->sg[i].size = sg_dma_len(sg);
+ }
+
+ return vchan_tx_prep(&uchan->vc, &desc->vd, dma_flags);
+}
+
+static int usb_dmac_chan_terminate_all(struct dma_chan *chan)
+{
+ struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+ struct usb_dmac_desc *desc, *_desc;
+ unsigned long flags;
+ LIST_HEAD(head);
+ LIST_HEAD(list);
+
+ spin_lock_irqsave(&uchan->vc.lock, flags);
+ usb_dmac_chan_halt(uchan);
+ vchan_get_all_descriptors(&uchan->vc, &head);
+ if (uchan->desc)
+ uchan->desc = NULL;
+ list_splice_init(&uchan->desc_got, &list);
+ list_for_each_entry_safe(desc, _desc, &list, node)
+ list_move_tail(&desc->node, &uchan->desc_freed);
+ spin_unlock_irqrestore(&uchan->vc.lock, flags);
+ vchan_dma_desc_free_list(&uchan->vc, &head);
+
+ return 0;
+}
+
+static unsigned int usb_dmac_get_current_residue(struct usb_dmac_chan *chan,
+ struct usb_dmac_desc *desc,
+ unsigned int sg_index)
+{
+ struct usb_dmac_sg *sg = desc->sg + sg_index;
+ u32 mem_addr = sg->mem_addr & 0xffffffff;
+ unsigned int residue = sg->size;
+
+ /*
+ * We cannot use USB_DMATCR to calculate residue because USB_DMATCR
+ * has unsuited value to calculate.
+ */
+ if (desc->direction == DMA_DEV_TO_MEM)
+ residue -= usb_dmac_chan_read(chan, USB_DMADAR) - mem_addr;
+ else
+ residue -= usb_dmac_chan_read(chan, USB_DMASAR) - mem_addr;
+
+ return residue;
+}
+
+static u32 usb_dmac_chan_get_residue_if_complete(struct usb_dmac_chan *chan,
+ dma_cookie_t cookie)
+{
+ struct usb_dmac_desc *desc;
+ u32 residue = 0;
+
+ list_for_each_entry_reverse(desc, &chan->desc_freed, node) {
+ if (desc->done_cookie == cookie) {
+ residue = desc->residue;
+ break;
+ }
+ }
+
+ return residue;
+}
+
+static u32 usb_dmac_chan_get_residue(struct usb_dmac_chan *chan,
+ dma_cookie_t cookie)
+{
+ u32 residue = 0;
+ struct virt_dma_desc *vd;
+ struct usb_dmac_desc *desc = chan->desc;
+ int i;
+
+ if (!desc) {
+ vd = vchan_find_desc(&chan->vc, cookie);
+ if (!vd)
+ return 0;
+ desc = to_usb_dmac_desc(vd);
+ }
+
+ /* Compute the size of all usb_dmac_sg still to be transferred */
+ for (i = desc->sg_index + 1; i < desc->sg_len; i++)
+ residue += desc->sg[i].size;
+
+ /* Add the residue for the current sg */
+ residue += usb_dmac_get_current_residue(chan, desc, desc->sg_index);
+
+ return residue;
+}
+
+static enum dma_status usb_dmac_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+ enum dma_status status;
+ unsigned int residue = 0;
+ unsigned long flags;
+
+ status = dma_cookie_status(chan, cookie, txstate);
+ /* a client driver will get residue after DMA_COMPLETE */
+ if (!txstate)
+ return status;
+
+ spin_lock_irqsave(&uchan->vc.lock, flags);
+ if (status == DMA_COMPLETE)
+ residue = usb_dmac_chan_get_residue_if_complete(uchan, cookie);
+ else
+ residue = usb_dmac_chan_get_residue(uchan, cookie);
+ spin_unlock_irqrestore(&uchan->vc.lock, flags);
+
+ dma_set_residue(txstate, residue);
+
+ return status;
+}
+
+static void usb_dmac_issue_pending(struct dma_chan *chan)
+{
+ struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&uchan->vc.lock, flags);
+ if (vchan_issue_pending(&uchan->vc) && !uchan->desc)
+ usb_dmac_chan_start_desc(uchan);
+ spin_unlock_irqrestore(&uchan->vc.lock, flags);
+}
+
+static void usb_dmac_virt_desc_free(struct virt_dma_desc *vd)
+{
+ struct usb_dmac_desc *desc = to_usb_dmac_desc(vd);
+ struct usb_dmac_chan *chan = to_usb_dmac_chan(vd->tx.chan);
+
+ usb_dmac_desc_put(chan, desc);
+}
+
+/* -----------------------------------------------------------------------------
+ * IRQ handling
+ */
+
+static void usb_dmac_isr_transfer_end(struct usb_dmac_chan *chan)
+{
+ struct usb_dmac_desc *desc = chan->desc;
+
+ BUG_ON(!desc);
+
+ if (++desc->sg_index < desc->sg_len) {
+ usb_dmac_chan_start_sg(chan, desc->sg_index);
+ } else {
+ desc->residue = usb_dmac_get_current_residue(chan, desc,
+ desc->sg_index - 1);
+ desc->done_cookie = desc->vd.tx.cookie;
+ desc->vd.tx_result.result = DMA_TRANS_NOERROR;
+ desc->vd.tx_result.residue = desc->residue;
+ vchan_cookie_complete(&desc->vd);
+
+ /* Restart the next transfer if this driver has a next desc */
+ usb_dmac_chan_start_desc(chan);
+ }
+}
+
+static irqreturn_t usb_dmac_isr_channel(int irq, void *dev)
+{
+ struct usb_dmac_chan *chan = dev;
+ irqreturn_t ret = IRQ_NONE;
+ u32 mask = 0;
+ u32 chcr;
+ bool xfer_end = false;
+
+ spin_lock(&chan->vc.lock);
+
+ chcr = usb_dmac_chan_read(chan, USB_DMACHCR);
+ if (chcr & (USB_DMACHCR_TE | USB_DMACHCR_SP)) {
+ mask |= USB_DMACHCR_DE | USB_DMACHCR_TE | USB_DMACHCR_SP;
+ if (chcr & USB_DMACHCR_DE)
+ xfer_end = true;
+ ret |= IRQ_HANDLED;
+ }
+ if (chcr & USB_DMACHCR_NULL) {
+ /* An interruption of TE will happen after we set FTE */
+ mask |= USB_DMACHCR_NULL;
+ chcr |= USB_DMACHCR_FTE;
+ ret |= IRQ_HANDLED;
+ }
+ if (mask)
+ usb_dmac_chan_write(chan, USB_DMACHCR, chcr & ~mask);
+
+ if (xfer_end)
+ usb_dmac_isr_transfer_end(chan);
+
+ spin_unlock(&chan->vc.lock);
+
+ return ret;
+}
+
+/* -----------------------------------------------------------------------------
+ * OF xlate and channel filter
+ */
+
+static bool usb_dmac_chan_filter(struct dma_chan *chan, void *arg)
+{
+ struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+ struct of_phandle_args *dma_spec = arg;
+
+ /* USB-DMAC should be used with fixed usb controller's FIFO */
+ if (uchan->index != dma_spec->args[0])
+ return false;
+
+ return true;
+}
+
+static struct dma_chan *usb_dmac_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct dma_chan *chan;
+ dma_cap_mask_t mask;
+
+ if (dma_spec->args_count != 1)
+ return NULL;
+
+ /* Only slave DMA channels can be allocated via DT */
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_SLAVE, mask);
+
+ chan = __dma_request_channel(&mask, usb_dmac_chan_filter, dma_spec,
+ ofdma->of_node);
+ if (!chan)
+ return NULL;
+
+ return chan;
+}
+
+/* -----------------------------------------------------------------------------
+ * Power management
+ */
+
+#ifdef CONFIG_PM
+static int usb_dmac_runtime_suspend(struct device *dev)
+{
+ struct usb_dmac *dmac = dev_get_drvdata(dev);
+ int i;
+
+ for (i = 0; i < dmac->n_channels; ++i) {
+ if (!dmac->channels[i].iomem)
+ break;
+ usb_dmac_chan_halt(&dmac->channels[i]);
+ }
+
+ return 0;
+}
+
+static int usb_dmac_runtime_resume(struct device *dev)
+{
+ struct usb_dmac *dmac = dev_get_drvdata(dev);
+
+ return usb_dmac_init(dmac);
+}
+#endif /* CONFIG_PM */
+
+static const struct dev_pm_ops usb_dmac_pm = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
+ SET_RUNTIME_PM_OPS(usb_dmac_runtime_suspend, usb_dmac_runtime_resume,
+ NULL)
+};
+
+/* -----------------------------------------------------------------------------
+ * Probe and remove
+ */
+
+static int usb_dmac_chan_probe(struct usb_dmac *dmac,
+ struct usb_dmac_chan *uchan,
+ unsigned int index)
+{
+ struct platform_device *pdev = to_platform_device(dmac->dev);
+ char pdev_irqname[5];
+ char *irqname;
+ int ret;
+
+ uchan->index = index;
+ uchan->iomem = dmac->iomem + USB_DMAC_CHAN_OFFSET(index);
+
+ /* Request the channel interrupt. */
+ sprintf(pdev_irqname, "ch%u", index);
+ uchan->irq = platform_get_irq_byname(pdev, pdev_irqname);
+ if (uchan->irq < 0)
+ return -ENODEV;
+
+ irqname = devm_kasprintf(dmac->dev, GFP_KERNEL, "%s:%u",
+ dev_name(dmac->dev), index);
+ if (!irqname)
+ return -ENOMEM;
+
+ ret = devm_request_irq(dmac->dev, uchan->irq, usb_dmac_isr_channel,
+ IRQF_SHARED, irqname, uchan);
+ if (ret) {
+ dev_err(dmac->dev, "failed to request IRQ %u (%d)\n",
+ uchan->irq, ret);
+ return ret;
+ }
+
+ uchan->vc.desc_free = usb_dmac_virt_desc_free;
+ vchan_init(&uchan->vc, &dmac->engine);
+ INIT_LIST_HEAD(&uchan->desc_freed);
+ INIT_LIST_HEAD(&uchan->desc_got);
+
+ return 0;
+}
+
+static int usb_dmac_parse_of(struct device *dev, struct usb_dmac *dmac)
+{
+ struct device_node *np = dev->of_node;
+ int ret;
+
+ ret = of_property_read_u32(np, "dma-channels", &dmac->n_channels);
+ if (ret < 0) {
+ dev_err(dev, "unable to read dma-channels property\n");
+ return ret;
+ }
+
+ if (dmac->n_channels <= 0 || dmac->n_channels >= 100) {
+ dev_err(dev, "invalid number of channels %u\n",
+ dmac->n_channels);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int usb_dmac_probe(struct platform_device *pdev)
+{
+ const enum dma_slave_buswidth widths = USB_DMAC_SLAVE_BUSWIDTH;
+ struct dma_device *engine;
+ struct usb_dmac *dmac;
+ struct resource *mem;
+ unsigned int i;
+ int ret;
+
+ dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
+ if (!dmac)
+ return -ENOMEM;
+
+ dmac->dev = &pdev->dev;
+ platform_set_drvdata(pdev, dmac);
+
+ ret = usb_dmac_parse_of(&pdev->dev, dmac);
+ if (ret < 0)
+ return ret;
+
+ dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels,
+ sizeof(*dmac->channels), GFP_KERNEL);
+ if (!dmac->channels)
+ return -ENOMEM;
+
+ /* Request resources. */
+ mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ dmac->iomem = devm_ioremap_resource(&pdev->dev, mem);
+ if (IS_ERR(dmac->iomem))
+ return PTR_ERR(dmac->iomem);
+
+ /* Enable runtime PM and initialize the device. */
+ pm_runtime_enable(&pdev->dev);
+ ret = pm_runtime_get_sync(&pdev->dev);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
+ goto error_pm;
+ }
+
+ ret = usb_dmac_init(dmac);
+
+ if (ret) {
+ dev_err(&pdev->dev, "failed to reset device\n");
+ goto error;
+ }
+
+ /* Initialize the channels. */
+ INIT_LIST_HEAD(&dmac->engine.channels);
+
+ for (i = 0; i < dmac->n_channels; ++i) {
+ ret = usb_dmac_chan_probe(dmac, &dmac->channels[i], i);
+ if (ret < 0)
+ goto error;
+ }
+
+ /* Register the DMAC as a DMA provider for DT. */
+ ret = of_dma_controller_register(pdev->dev.of_node, usb_dmac_of_xlate,
+ NULL);
+ if (ret < 0)
+ goto error;
+
+ /*
+ * Register the DMA engine device.
+ *
+ * Default transfer size of 32 bytes requires 32-byte alignment.
+ */
+ engine = &dmac->engine;
+ dma_cap_set(DMA_SLAVE, engine->cap_mask);
+
+ engine->dev = &pdev->dev;
+
+ engine->src_addr_widths = widths;
+ engine->dst_addr_widths = widths;
+ engine->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+ engine->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+ engine->device_alloc_chan_resources = usb_dmac_alloc_chan_resources;
+ engine->device_free_chan_resources = usb_dmac_free_chan_resources;
+ engine->device_prep_slave_sg = usb_dmac_prep_slave_sg;
+ engine->device_terminate_all = usb_dmac_chan_terminate_all;
+ engine->device_tx_status = usb_dmac_tx_status;
+ engine->device_issue_pending = usb_dmac_issue_pending;
+
+ ret = dma_async_device_register(engine);
+ if (ret < 0)
+ goto error;
+
+ pm_runtime_put(&pdev->dev);
+ return 0;
+
+error:
+ of_dma_controller_free(pdev->dev.of_node);
+error_pm:
+ pm_runtime_put(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+ return ret;
+}
+
+static void usb_dmac_chan_remove(struct usb_dmac *dmac,
+ struct usb_dmac_chan *uchan)
+{
+ usb_dmac_chan_halt(uchan);
+ devm_free_irq(dmac->dev, uchan->irq, uchan);
+}
+
+static int usb_dmac_remove(struct platform_device *pdev)
+{
+ struct usb_dmac *dmac = platform_get_drvdata(pdev);
+ int i;
+
+ for (i = 0; i < dmac->n_channels; ++i)
+ usb_dmac_chan_remove(dmac, &dmac->channels[i]);
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&dmac->engine);
+
+ pm_runtime_disable(&pdev->dev);
+
+ return 0;
+}
+
+static void usb_dmac_shutdown(struct platform_device *pdev)
+{
+ struct usb_dmac *dmac = platform_get_drvdata(pdev);
+
+ usb_dmac_stop(dmac);
+}
+
+static const struct of_device_id usb_dmac_of_ids[] = {
+ { .compatible = "renesas,usb-dmac", },
+ { /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, usb_dmac_of_ids);
+
+static struct platform_driver usb_dmac_driver = {
+ .driver = {
+ .pm = &usb_dmac_pm,
+ .name = "usb-dmac",
+ .of_match_table = usb_dmac_of_ids,
+ },
+ .probe = usb_dmac_probe,
+ .remove = usb_dmac_remove,
+ .shutdown = usb_dmac_shutdown,
+};
+
+module_platform_driver(usb_dmac_driver);
+
+MODULE_DESCRIPTION("Renesas USB DMA Controller Driver");
+MODULE_AUTHOR("Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c
new file mode 100644
index 000000000..474d3ba8e
--- /dev/null
+++ b/drivers/dma/sprd-dma.c
@@ -0,0 +1,1309 @@
+/*
+ * Copyright (C) 2017 Spreadtrum Communications Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma/sprd-dma.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+#include "virt-dma.h"
+
+#define SPRD_DMA_CHN_REG_OFFSET 0x1000
+#define SPRD_DMA_CHN_REG_LENGTH 0x40
+#define SPRD_DMA_MEMCPY_MIN_SIZE 64
+
+/* DMA global registers definition */
+#define SPRD_DMA_GLB_PAUSE 0x0
+#define SPRD_DMA_GLB_FRAG_WAIT 0x4
+#define SPRD_DMA_GLB_REQ_PEND0_EN 0x8
+#define SPRD_DMA_GLB_REQ_PEND1_EN 0xc
+#define SPRD_DMA_GLB_INT_RAW_STS 0x10
+#define SPRD_DMA_GLB_INT_MSK_STS 0x14
+#define SPRD_DMA_GLB_REQ_STS 0x18
+#define SPRD_DMA_GLB_CHN_EN_STS 0x1c
+#define SPRD_DMA_GLB_DEBUG_STS 0x20
+#define SPRD_DMA_GLB_ARB_SEL_STS 0x24
+#define SPRD_DMA_GLB_2STAGE_GRP1 0x28
+#define SPRD_DMA_GLB_2STAGE_GRP2 0x2c
+#define SPRD_DMA_GLB_REQ_UID(uid) (0x4 * ((uid) - 1))
+#define SPRD_DMA_GLB_REQ_UID_OFFSET 0x2000
+
+/* DMA channel registers definition */
+#define SPRD_DMA_CHN_PAUSE 0x0
+#define SPRD_DMA_CHN_REQ 0x4
+#define SPRD_DMA_CHN_CFG 0x8
+#define SPRD_DMA_CHN_INTC 0xc
+#define SPRD_DMA_CHN_SRC_ADDR 0x10
+#define SPRD_DMA_CHN_DES_ADDR 0x14
+#define SPRD_DMA_CHN_FRG_LEN 0x18
+#define SPRD_DMA_CHN_BLK_LEN 0x1c
+#define SPRD_DMA_CHN_TRSC_LEN 0x20
+#define SPRD_DMA_CHN_TRSF_STEP 0x24
+#define SPRD_DMA_CHN_WARP_PTR 0x28
+#define SPRD_DMA_CHN_WARP_TO 0x2c
+#define SPRD_DMA_CHN_LLIST_PTR 0x30
+#define SPRD_DMA_CHN_FRAG_STEP 0x34
+#define SPRD_DMA_CHN_SRC_BLK_STEP 0x38
+#define SPRD_DMA_CHN_DES_BLK_STEP 0x3c
+
+/* SPRD_DMA_GLB_2STAGE_GRP register definition */
+#define SPRD_DMA_GLB_2STAGE_EN BIT(24)
+#define SPRD_DMA_GLB_CHN_INT_MASK GENMASK(23, 20)
+#define SPRD_DMA_GLB_DEST_INT BIT(22)
+#define SPRD_DMA_GLB_SRC_INT BIT(20)
+#define SPRD_DMA_GLB_LIST_DONE_TRG BIT(19)
+#define SPRD_DMA_GLB_TRANS_DONE_TRG BIT(18)
+#define SPRD_DMA_GLB_BLOCK_DONE_TRG BIT(17)
+#define SPRD_DMA_GLB_FRAG_DONE_TRG BIT(16)
+#define SPRD_DMA_GLB_TRG_OFFSET 16
+#define SPRD_DMA_GLB_DEST_CHN_MASK GENMASK(13, 8)
+#define SPRD_DMA_GLB_DEST_CHN_OFFSET 8
+#define SPRD_DMA_GLB_SRC_CHN_MASK GENMASK(5, 0)
+
+/* SPRD_DMA_CHN_INTC register definition */
+#define SPRD_DMA_INT_MASK GENMASK(4, 0)
+#define SPRD_DMA_INT_CLR_OFFSET 24
+#define SPRD_DMA_FRAG_INT_EN BIT(0)
+#define SPRD_DMA_BLK_INT_EN BIT(1)
+#define SPRD_DMA_TRANS_INT_EN BIT(2)
+#define SPRD_DMA_LIST_INT_EN BIT(3)
+#define SPRD_DMA_CFG_ERR_INT_EN BIT(4)
+
+/* SPRD_DMA_CHN_CFG register definition */
+#define SPRD_DMA_CHN_EN BIT(0)
+#define SPRD_DMA_LINKLIST_EN BIT(4)
+#define SPRD_DMA_WAIT_BDONE_OFFSET 24
+#define SPRD_DMA_DONOT_WAIT_BDONE 1
+
+/* SPRD_DMA_CHN_REQ register definition */
+#define SPRD_DMA_REQ_EN BIT(0)
+
+/* SPRD_DMA_CHN_PAUSE register definition */
+#define SPRD_DMA_PAUSE_EN BIT(0)
+#define SPRD_DMA_PAUSE_STS BIT(2)
+#define SPRD_DMA_PAUSE_CNT 0x2000
+
+/* DMA_CHN_WARP_* register definition */
+#define SPRD_DMA_HIGH_ADDR_MASK GENMASK(31, 28)
+#define SPRD_DMA_LOW_ADDR_MASK GENMASK(31, 0)
+#define SPRD_DMA_WRAP_ADDR_MASK GENMASK(27, 0)
+#define SPRD_DMA_HIGH_ADDR_OFFSET 4
+
+/* SPRD_DMA_CHN_INTC register definition */
+#define SPRD_DMA_FRAG_INT_STS BIT(16)
+#define SPRD_DMA_BLK_INT_STS BIT(17)
+#define SPRD_DMA_TRSC_INT_STS BIT(18)
+#define SPRD_DMA_LIST_INT_STS BIT(19)
+#define SPRD_DMA_CFGERR_INT_STS BIT(20)
+#define SPRD_DMA_CHN_INT_STS \
+ (SPRD_DMA_FRAG_INT_STS | SPRD_DMA_BLK_INT_STS | \
+ SPRD_DMA_TRSC_INT_STS | SPRD_DMA_LIST_INT_STS | \
+ SPRD_DMA_CFGERR_INT_STS)
+
+/* SPRD_DMA_CHN_FRG_LEN register definition */
+#define SPRD_DMA_SRC_DATAWIDTH_OFFSET 30
+#define SPRD_DMA_DES_DATAWIDTH_OFFSET 28
+#define SPRD_DMA_SWT_MODE_OFFSET 26
+#define SPRD_DMA_REQ_MODE_OFFSET 24
+#define SPRD_DMA_REQ_MODE_MASK GENMASK(1, 0)
+#define SPRD_DMA_WRAP_SEL_DEST BIT(23)
+#define SPRD_DMA_WRAP_EN BIT(22)
+#define SPRD_DMA_FIX_SEL_OFFSET 21
+#define SPRD_DMA_FIX_EN_OFFSET 20
+#define SPRD_DMA_LLIST_END BIT(19)
+#define SPRD_DMA_FRG_LEN_MASK GENMASK(16, 0)
+
+/* SPRD_DMA_CHN_BLK_LEN register definition */
+#define SPRD_DMA_BLK_LEN_MASK GENMASK(16, 0)
+
+/* SPRD_DMA_CHN_TRSC_LEN register definition */
+#define SPRD_DMA_TRSC_LEN_MASK GENMASK(27, 0)
+
+/* SPRD_DMA_CHN_TRSF_STEP register definition */
+#define SPRD_DMA_DEST_TRSF_STEP_OFFSET 16
+#define SPRD_DMA_SRC_TRSF_STEP_OFFSET 0
+#define SPRD_DMA_TRSF_STEP_MASK GENMASK(15, 0)
+
+/* SPRD DMA_SRC_BLK_STEP register definition */
+#define SPRD_DMA_LLIST_HIGH_MASK GENMASK(31, 28)
+#define SPRD_DMA_LLIST_HIGH_SHIFT 28
+
+/* define DMA channel mode & trigger mode mask */
+#define SPRD_DMA_CHN_MODE_MASK GENMASK(7, 0)
+#define SPRD_DMA_TRG_MODE_MASK GENMASK(7, 0)
+#define SPRD_DMA_INT_TYPE_MASK GENMASK(7, 0)
+
+/* define the DMA transfer step type */
+#define SPRD_DMA_NONE_STEP 0
+#define SPRD_DMA_BYTE_STEP 1
+#define SPRD_DMA_SHORT_STEP 2
+#define SPRD_DMA_WORD_STEP 4
+#define SPRD_DMA_DWORD_STEP 8
+
+#define SPRD_DMA_SOFTWARE_UID 0
+
+/* dma data width values */
+enum sprd_dma_datawidth {
+ SPRD_DMA_DATAWIDTH_1_BYTE,
+ SPRD_DMA_DATAWIDTH_2_BYTES,
+ SPRD_DMA_DATAWIDTH_4_BYTES,
+ SPRD_DMA_DATAWIDTH_8_BYTES,
+};
+
+/* dma channel hardware configuration */
+struct sprd_dma_chn_hw {
+ u32 pause;
+ u32 req;
+ u32 cfg;
+ u32 intc;
+ u32 src_addr;
+ u32 des_addr;
+ u32 frg_len;
+ u32 blk_len;
+ u32 trsc_len;
+ u32 trsf_step;
+ u32 wrap_ptr;
+ u32 wrap_to;
+ u32 llist_ptr;
+ u32 frg_step;
+ u32 src_blk_step;
+ u32 des_blk_step;
+};
+
+/* dma request description */
+struct sprd_dma_desc {
+ struct virt_dma_desc vd;
+ struct sprd_dma_chn_hw chn_hw;
+ enum dma_transfer_direction dir;
+};
+
+/* dma channel description */
+struct sprd_dma_chn {
+ struct virt_dma_chan vc;
+ void __iomem *chn_base;
+ struct sprd_dma_linklist linklist;
+ struct dma_slave_config slave_cfg;
+ u32 chn_num;
+ u32 dev_id;
+ enum sprd_dma_chn_mode chn_mode;
+ enum sprd_dma_trg_mode trg_mode;
+ enum sprd_dma_int_type int_type;
+ struct sprd_dma_desc *cur_desc;
+};
+
+/* SPRD dma device */
+struct sprd_dma_dev {
+ struct dma_device dma_dev;
+ void __iomem *glb_base;
+ struct clk *clk;
+ struct clk *ashb_clk;
+ int irq;
+ u32 total_chns;
+ struct sprd_dma_chn channels[];
+};
+
+static void sprd_dma_free_desc(struct virt_dma_desc *vd);
+static bool sprd_dma_filter_fn(struct dma_chan *chan, void *param);
+static struct of_dma_filter_info sprd_dma_info = {
+ .filter_fn = sprd_dma_filter_fn,
+};
+
+static inline struct sprd_dma_chn *to_sprd_dma_chan(struct dma_chan *c)
+{
+ return container_of(c, struct sprd_dma_chn, vc.chan);
+}
+
+static inline struct sprd_dma_dev *to_sprd_dma_dev(struct dma_chan *c)
+{
+ struct sprd_dma_chn *schan = to_sprd_dma_chan(c);
+
+ return container_of(schan, struct sprd_dma_dev, channels[c->chan_id]);
+}
+
+static inline struct sprd_dma_desc *to_sprd_dma_desc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct sprd_dma_desc, vd);
+}
+
+static void sprd_dma_glb_update(struct sprd_dma_dev *sdev, u32 reg,
+ u32 mask, u32 val)
+{
+ u32 orig = readl(sdev->glb_base + reg);
+ u32 tmp;
+
+ tmp = (orig & ~mask) | val;
+ writel(tmp, sdev->glb_base + reg);
+}
+
+static void sprd_dma_chn_update(struct sprd_dma_chn *schan, u32 reg,
+ u32 mask, u32 val)
+{
+ u32 orig = readl(schan->chn_base + reg);
+ u32 tmp;
+
+ tmp = (orig & ~mask) | val;
+ writel(tmp, schan->chn_base + reg);
+}
+
+static int sprd_dma_enable(struct sprd_dma_dev *sdev)
+{
+ int ret;
+
+ ret = clk_prepare_enable(sdev->clk);
+ if (ret)
+ return ret;
+
+ /*
+ * The ashb_clk is optional and only for AGCP DMA controller, so we
+ * need add one condition to check if the ashb_clk need enable.
+ */
+ if (!IS_ERR(sdev->ashb_clk))
+ ret = clk_prepare_enable(sdev->ashb_clk);
+
+ return ret;
+}
+
+static void sprd_dma_disable(struct sprd_dma_dev *sdev)
+{
+ clk_disable_unprepare(sdev->clk);
+
+ /*
+ * Need to check if we need disable the optional ashb_clk for AGCP DMA.
+ */
+ if (!IS_ERR(sdev->ashb_clk))
+ clk_disable_unprepare(sdev->ashb_clk);
+}
+
+static void sprd_dma_set_uid(struct sprd_dma_chn *schan)
+{
+ struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan);
+ u32 dev_id = schan->dev_id;
+
+ if (dev_id != SPRD_DMA_SOFTWARE_UID) {
+ u32 uid_offset = SPRD_DMA_GLB_REQ_UID_OFFSET +
+ SPRD_DMA_GLB_REQ_UID(dev_id);
+
+ writel(schan->chn_num + 1, sdev->glb_base + uid_offset);
+ }
+}
+
+static void sprd_dma_unset_uid(struct sprd_dma_chn *schan)
+{
+ struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan);
+ u32 dev_id = schan->dev_id;
+
+ if (dev_id != SPRD_DMA_SOFTWARE_UID) {
+ u32 uid_offset = SPRD_DMA_GLB_REQ_UID_OFFSET +
+ SPRD_DMA_GLB_REQ_UID(dev_id);
+
+ writel(0, sdev->glb_base + uid_offset);
+ }
+}
+
+static void sprd_dma_clear_int(struct sprd_dma_chn *schan)
+{
+ sprd_dma_chn_update(schan, SPRD_DMA_CHN_INTC,
+ SPRD_DMA_INT_MASK << SPRD_DMA_INT_CLR_OFFSET,
+ SPRD_DMA_INT_MASK << SPRD_DMA_INT_CLR_OFFSET);
+}
+
+static void sprd_dma_enable_chn(struct sprd_dma_chn *schan)
+{
+ sprd_dma_chn_update(schan, SPRD_DMA_CHN_CFG, SPRD_DMA_CHN_EN,
+ SPRD_DMA_CHN_EN);
+}
+
+static void sprd_dma_disable_chn(struct sprd_dma_chn *schan)
+{
+ sprd_dma_chn_update(schan, SPRD_DMA_CHN_CFG, SPRD_DMA_CHN_EN, 0);
+}
+
+static void sprd_dma_soft_request(struct sprd_dma_chn *schan)
+{
+ sprd_dma_chn_update(schan, SPRD_DMA_CHN_REQ, SPRD_DMA_REQ_EN,
+ SPRD_DMA_REQ_EN);
+}
+
+static void sprd_dma_pause_resume(struct sprd_dma_chn *schan, bool enable)
+{
+ struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan);
+ u32 pause, timeout = SPRD_DMA_PAUSE_CNT;
+
+ if (enable) {
+ sprd_dma_chn_update(schan, SPRD_DMA_CHN_PAUSE,
+ SPRD_DMA_PAUSE_EN, SPRD_DMA_PAUSE_EN);
+
+ do {
+ pause = readl(schan->chn_base + SPRD_DMA_CHN_PAUSE);
+ if (pause & SPRD_DMA_PAUSE_STS)
+ break;
+
+ cpu_relax();
+ } while (--timeout > 0);
+
+ if (!timeout)
+ dev_warn(sdev->dma_dev.dev,
+ "pause dma controller timeout\n");
+ } else {
+ sprd_dma_chn_update(schan, SPRD_DMA_CHN_PAUSE,
+ SPRD_DMA_PAUSE_EN, 0);
+ }
+}
+
+static void sprd_dma_stop_and_disable(struct sprd_dma_chn *schan)
+{
+ u32 cfg = readl(schan->chn_base + SPRD_DMA_CHN_CFG);
+
+ if (!(cfg & SPRD_DMA_CHN_EN))
+ return;
+
+ sprd_dma_pause_resume(schan, true);
+ sprd_dma_disable_chn(schan);
+}
+
+static unsigned long sprd_dma_get_src_addr(struct sprd_dma_chn *schan)
+{
+ unsigned long addr, addr_high;
+
+ addr = readl(schan->chn_base + SPRD_DMA_CHN_SRC_ADDR);
+ addr_high = readl(schan->chn_base + SPRD_DMA_CHN_WARP_PTR) &
+ SPRD_DMA_HIGH_ADDR_MASK;
+
+ return addr | (addr_high << SPRD_DMA_HIGH_ADDR_OFFSET);
+}
+
+static unsigned long sprd_dma_get_dst_addr(struct sprd_dma_chn *schan)
+{
+ unsigned long addr, addr_high;
+
+ addr = readl(schan->chn_base + SPRD_DMA_CHN_DES_ADDR);
+ addr_high = readl(schan->chn_base + SPRD_DMA_CHN_WARP_TO) &
+ SPRD_DMA_HIGH_ADDR_MASK;
+
+ return addr | (addr_high << SPRD_DMA_HIGH_ADDR_OFFSET);
+}
+
+static enum sprd_dma_int_type sprd_dma_get_int_type(struct sprd_dma_chn *schan)
+{
+ struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan);
+ u32 intc_sts = readl(schan->chn_base + SPRD_DMA_CHN_INTC) &
+ SPRD_DMA_CHN_INT_STS;
+
+ switch (intc_sts) {
+ case SPRD_DMA_CFGERR_INT_STS:
+ return SPRD_DMA_CFGERR_INT;
+
+ case SPRD_DMA_LIST_INT_STS:
+ return SPRD_DMA_LIST_INT;
+
+ case SPRD_DMA_TRSC_INT_STS:
+ return SPRD_DMA_TRANS_INT;
+
+ case SPRD_DMA_BLK_INT_STS:
+ return SPRD_DMA_BLK_INT;
+
+ case SPRD_DMA_FRAG_INT_STS:
+ return SPRD_DMA_FRAG_INT;
+
+ default:
+ dev_warn(sdev->dma_dev.dev, "incorrect dma interrupt type\n");
+ return SPRD_DMA_NO_INT;
+ }
+}
+
+static enum sprd_dma_req_mode sprd_dma_get_req_type(struct sprd_dma_chn *schan)
+{
+ u32 frag_reg = readl(schan->chn_base + SPRD_DMA_CHN_FRG_LEN);
+
+ return (frag_reg >> SPRD_DMA_REQ_MODE_OFFSET) & SPRD_DMA_REQ_MODE_MASK;
+}
+
+static int sprd_dma_set_2stage_config(struct sprd_dma_chn *schan)
+{
+ struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan);
+ u32 val, chn = schan->chn_num + 1;
+
+ switch (schan->chn_mode) {
+ case SPRD_DMA_SRC_CHN0:
+ val = chn & SPRD_DMA_GLB_SRC_CHN_MASK;
+ val |= BIT(schan->trg_mode - 1) << SPRD_DMA_GLB_TRG_OFFSET;
+ val |= SPRD_DMA_GLB_2STAGE_EN;
+ if (schan->int_type != SPRD_DMA_NO_INT)
+ val |= SPRD_DMA_GLB_SRC_INT;
+
+ sprd_dma_glb_update(sdev, SPRD_DMA_GLB_2STAGE_GRP1, val, val);
+ break;
+
+ case SPRD_DMA_SRC_CHN1:
+ val = chn & SPRD_DMA_GLB_SRC_CHN_MASK;
+ val |= BIT(schan->trg_mode - 1) << SPRD_DMA_GLB_TRG_OFFSET;
+ val |= SPRD_DMA_GLB_2STAGE_EN;
+ if (schan->int_type != SPRD_DMA_NO_INT)
+ val |= SPRD_DMA_GLB_SRC_INT;
+
+ sprd_dma_glb_update(sdev, SPRD_DMA_GLB_2STAGE_GRP2, val, val);
+ break;
+
+ case SPRD_DMA_DST_CHN0:
+ val = (chn << SPRD_DMA_GLB_DEST_CHN_OFFSET) &
+ SPRD_DMA_GLB_DEST_CHN_MASK;
+ val |= SPRD_DMA_GLB_2STAGE_EN;
+ if (schan->int_type != SPRD_DMA_NO_INT)
+ val |= SPRD_DMA_GLB_DEST_INT;
+
+ sprd_dma_glb_update(sdev, SPRD_DMA_GLB_2STAGE_GRP1, val, val);
+ break;
+
+ case SPRD_DMA_DST_CHN1:
+ val = (chn << SPRD_DMA_GLB_DEST_CHN_OFFSET) &
+ SPRD_DMA_GLB_DEST_CHN_MASK;
+ val |= SPRD_DMA_GLB_2STAGE_EN;
+ if (schan->int_type != SPRD_DMA_NO_INT)
+ val |= SPRD_DMA_GLB_DEST_INT;
+
+ sprd_dma_glb_update(sdev, SPRD_DMA_GLB_2STAGE_GRP2, val, val);
+ break;
+
+ default:
+ dev_err(sdev->dma_dev.dev, "invalid channel mode setting %d\n",
+ schan->chn_mode);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void sprd_dma_set_pending(struct sprd_dma_chn *schan, bool enable)
+{
+ struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan);
+ u32 reg, val, req_id;
+
+ if (schan->dev_id == SPRD_DMA_SOFTWARE_UID)
+ return;
+
+ /* The DMA request id always starts from 0. */
+ req_id = schan->dev_id - 1;
+
+ if (req_id < 32) {
+ reg = SPRD_DMA_GLB_REQ_PEND0_EN;
+ val = BIT(req_id);
+ } else {
+ reg = SPRD_DMA_GLB_REQ_PEND1_EN;
+ val = BIT(req_id - 32);
+ }
+
+ sprd_dma_glb_update(sdev, reg, val, enable ? val : 0);
+}
+
+static void sprd_dma_set_chn_config(struct sprd_dma_chn *schan,
+ struct sprd_dma_desc *sdesc)
+{
+ struct sprd_dma_chn_hw *cfg = &sdesc->chn_hw;
+
+ writel(cfg->pause, schan->chn_base + SPRD_DMA_CHN_PAUSE);
+ writel(cfg->cfg, schan->chn_base + SPRD_DMA_CHN_CFG);
+ writel(cfg->intc, schan->chn_base + SPRD_DMA_CHN_INTC);
+ writel(cfg->src_addr, schan->chn_base + SPRD_DMA_CHN_SRC_ADDR);
+ writel(cfg->des_addr, schan->chn_base + SPRD_DMA_CHN_DES_ADDR);
+ writel(cfg->frg_len, schan->chn_base + SPRD_DMA_CHN_FRG_LEN);
+ writel(cfg->blk_len, schan->chn_base + SPRD_DMA_CHN_BLK_LEN);
+ writel(cfg->trsc_len, schan->chn_base + SPRD_DMA_CHN_TRSC_LEN);
+ writel(cfg->trsf_step, schan->chn_base + SPRD_DMA_CHN_TRSF_STEP);
+ writel(cfg->wrap_ptr, schan->chn_base + SPRD_DMA_CHN_WARP_PTR);
+ writel(cfg->wrap_to, schan->chn_base + SPRD_DMA_CHN_WARP_TO);
+ writel(cfg->llist_ptr, schan->chn_base + SPRD_DMA_CHN_LLIST_PTR);
+ writel(cfg->frg_step, schan->chn_base + SPRD_DMA_CHN_FRAG_STEP);
+ writel(cfg->src_blk_step, schan->chn_base + SPRD_DMA_CHN_SRC_BLK_STEP);
+ writel(cfg->des_blk_step, schan->chn_base + SPRD_DMA_CHN_DES_BLK_STEP);
+ writel(cfg->req, schan->chn_base + SPRD_DMA_CHN_REQ);
+}
+
+static void sprd_dma_start(struct sprd_dma_chn *schan)
+{
+ struct virt_dma_desc *vd = vchan_next_desc(&schan->vc);
+
+ if (!vd)
+ return;
+
+ list_del(&vd->node);
+ schan->cur_desc = to_sprd_dma_desc(vd);
+
+ /*
+ * Set 2-stage configuration if the channel starts one 2-stage
+ * transfer.
+ */
+ if (schan->chn_mode && sprd_dma_set_2stage_config(schan))
+ return;
+
+ /*
+ * Copy the DMA configuration from DMA descriptor to this hardware
+ * channel.
+ */
+ sprd_dma_set_chn_config(schan, schan->cur_desc);
+ sprd_dma_set_uid(schan);
+ sprd_dma_set_pending(schan, true);
+ sprd_dma_enable_chn(schan);
+
+ if (schan->dev_id == SPRD_DMA_SOFTWARE_UID &&
+ schan->chn_mode != SPRD_DMA_DST_CHN0 &&
+ schan->chn_mode != SPRD_DMA_DST_CHN1)
+ sprd_dma_soft_request(schan);
+}
+
+static void sprd_dma_stop(struct sprd_dma_chn *schan)
+{
+ sprd_dma_stop_and_disable(schan);
+ sprd_dma_set_pending(schan, false);
+ sprd_dma_unset_uid(schan);
+ sprd_dma_clear_int(schan);
+ schan->cur_desc = NULL;
+}
+
+static bool sprd_dma_check_trans_done(struct sprd_dma_desc *sdesc,
+ enum sprd_dma_int_type int_type,
+ enum sprd_dma_req_mode req_mode)
+{
+ if (int_type == SPRD_DMA_NO_INT)
+ return false;
+
+ if (int_type >= req_mode + 1)
+ return true;
+ else
+ return false;
+}
+
+static irqreturn_t dma_irq_handle(int irq, void *dev_id)
+{
+ struct sprd_dma_dev *sdev = (struct sprd_dma_dev *)dev_id;
+ u32 irq_status = readl(sdev->glb_base + SPRD_DMA_GLB_INT_MSK_STS);
+ struct sprd_dma_chn *schan;
+ struct sprd_dma_desc *sdesc;
+ enum sprd_dma_req_mode req_type;
+ enum sprd_dma_int_type int_type;
+ bool trans_done = false, cyclic = false;
+ u32 i;
+
+ while (irq_status) {
+ i = __ffs(irq_status);
+ irq_status &= (irq_status - 1);
+ schan = &sdev->channels[i];
+
+ spin_lock(&schan->vc.lock);
+
+ sdesc = schan->cur_desc;
+ if (!sdesc) {
+ spin_unlock(&schan->vc.lock);
+ return IRQ_HANDLED;
+ }
+
+ int_type = sprd_dma_get_int_type(schan);
+ req_type = sprd_dma_get_req_type(schan);
+ sprd_dma_clear_int(schan);
+
+ /* cyclic mode schedule callback */
+ cyclic = schan->linklist.phy_addr ? true : false;
+ if (cyclic == true) {
+ vchan_cyclic_callback(&sdesc->vd);
+ } else {
+ /* Check if the dma request descriptor is done. */
+ trans_done = sprd_dma_check_trans_done(sdesc, int_type,
+ req_type);
+ if (trans_done == true) {
+ vchan_cookie_complete(&sdesc->vd);
+ schan->cur_desc = NULL;
+ sprd_dma_start(schan);
+ }
+ }
+ spin_unlock(&schan->vc.lock);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int sprd_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+ return pm_runtime_get_sync(chan->device->dev);
+}
+
+static void sprd_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+ struct virt_dma_desc *cur_vd = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&schan->vc.lock, flags);
+ if (schan->cur_desc)
+ cur_vd = &schan->cur_desc->vd;
+
+ sprd_dma_stop(schan);
+ spin_unlock_irqrestore(&schan->vc.lock, flags);
+
+ if (cur_vd)
+ sprd_dma_free_desc(cur_vd);
+
+ vchan_free_chan_resources(&schan->vc);
+ pm_runtime_put(chan->device->dev);
+}
+
+static enum dma_status sprd_dma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+ enum dma_status ret;
+ u32 pos;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE || !txstate)
+ return ret;
+
+ spin_lock_irqsave(&schan->vc.lock, flags);
+ vd = vchan_find_desc(&schan->vc, cookie);
+ if (vd) {
+ struct sprd_dma_desc *sdesc = to_sprd_dma_desc(vd);
+ struct sprd_dma_chn_hw *hw = &sdesc->chn_hw;
+
+ if (hw->trsc_len > 0)
+ pos = hw->trsc_len;
+ else if (hw->blk_len > 0)
+ pos = hw->blk_len;
+ else if (hw->frg_len > 0)
+ pos = hw->frg_len;
+ else
+ pos = 0;
+ } else if (schan->cur_desc && schan->cur_desc->vd.tx.cookie == cookie) {
+ struct sprd_dma_desc *sdesc = schan->cur_desc;
+
+ if (sdesc->dir == DMA_DEV_TO_MEM)
+ pos = sprd_dma_get_dst_addr(schan);
+ else
+ pos = sprd_dma_get_src_addr(schan);
+ } else {
+ pos = 0;
+ }
+ spin_unlock_irqrestore(&schan->vc.lock, flags);
+
+ dma_set_residue(txstate, pos);
+ return ret;
+}
+
+static void sprd_dma_issue_pending(struct dma_chan *chan)
+{
+ struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&schan->vc.lock, flags);
+ if (vchan_issue_pending(&schan->vc) && !schan->cur_desc)
+ sprd_dma_start(schan);
+ spin_unlock_irqrestore(&schan->vc.lock, flags);
+}
+
+static int sprd_dma_get_datawidth(enum dma_slave_buswidth buswidth)
+{
+ switch (buswidth) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ case DMA_SLAVE_BUSWIDTH_8_BYTES:
+ return ffs(buswidth) - 1;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+static int sprd_dma_get_step(enum dma_slave_buswidth buswidth)
+{
+ switch (buswidth) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ case DMA_SLAVE_BUSWIDTH_8_BYTES:
+ return buswidth;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+static int sprd_dma_fill_desc(struct dma_chan *chan,
+ struct sprd_dma_chn_hw *hw,
+ unsigned int sglen, int sg_index,
+ dma_addr_t src, dma_addr_t dst, u32 len,
+ enum dma_transfer_direction dir,
+ unsigned long flags,
+ struct dma_slave_config *slave_cfg)
+{
+ struct sprd_dma_dev *sdev = to_sprd_dma_dev(chan);
+ struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+ enum sprd_dma_chn_mode chn_mode = schan->chn_mode;
+ u32 req_mode = (flags >> SPRD_DMA_REQ_SHIFT) & SPRD_DMA_REQ_MODE_MASK;
+ u32 int_mode = flags & SPRD_DMA_INT_MASK;
+ int src_datawidth, dst_datawidth, src_step, dst_step;
+ u32 temp, fix_mode = 0, fix_en = 0;
+ phys_addr_t llist_ptr;
+
+ if (dir == DMA_MEM_TO_DEV) {
+ src_step = sprd_dma_get_step(slave_cfg->src_addr_width);
+ if (src_step < 0) {
+ dev_err(sdev->dma_dev.dev, "invalid source step\n");
+ return src_step;
+ }
+
+ /*
+ * For 2-stage transfer, destination channel step can not be 0,
+ * since destination device is AON IRAM.
+ */
+ if (chn_mode == SPRD_DMA_DST_CHN0 ||
+ chn_mode == SPRD_DMA_DST_CHN1)
+ dst_step = src_step;
+ else
+ dst_step = SPRD_DMA_NONE_STEP;
+ } else {
+ dst_step = sprd_dma_get_step(slave_cfg->dst_addr_width);
+ if (dst_step < 0) {
+ dev_err(sdev->dma_dev.dev, "invalid destination step\n");
+ return dst_step;
+ }
+ src_step = SPRD_DMA_NONE_STEP;
+ }
+
+ src_datawidth = sprd_dma_get_datawidth(slave_cfg->src_addr_width);
+ if (src_datawidth < 0) {
+ dev_err(sdev->dma_dev.dev, "invalid source datawidth\n");
+ return src_datawidth;
+ }
+
+ dst_datawidth = sprd_dma_get_datawidth(slave_cfg->dst_addr_width);
+ if (dst_datawidth < 0) {
+ dev_err(sdev->dma_dev.dev, "invalid destination datawidth\n");
+ return dst_datawidth;
+ }
+
+ hw->cfg = SPRD_DMA_DONOT_WAIT_BDONE << SPRD_DMA_WAIT_BDONE_OFFSET;
+
+ /*
+ * wrap_ptr and wrap_to will save the high 4 bits source address and
+ * destination address.
+ */
+ hw->wrap_ptr = (src >> SPRD_DMA_HIGH_ADDR_OFFSET) & SPRD_DMA_HIGH_ADDR_MASK;
+ hw->wrap_to = (dst >> SPRD_DMA_HIGH_ADDR_OFFSET) & SPRD_DMA_HIGH_ADDR_MASK;
+ hw->src_addr = src & SPRD_DMA_LOW_ADDR_MASK;
+ hw->des_addr = dst & SPRD_DMA_LOW_ADDR_MASK;
+
+ /*
+ * If the src step and dst step both are 0 or both are not 0, that means
+ * we can not enable the fix mode. If one is 0 and another one is not,
+ * we can enable the fix mode.
+ */
+ if ((src_step != 0 && dst_step != 0) || (src_step | dst_step) == 0) {
+ fix_en = 0;
+ } else {
+ fix_en = 1;
+ if (src_step)
+ fix_mode = 1;
+ else
+ fix_mode = 0;
+ }
+
+ hw->intc = int_mode | SPRD_DMA_CFG_ERR_INT_EN;
+
+ temp = src_datawidth << SPRD_DMA_SRC_DATAWIDTH_OFFSET;
+ temp |= dst_datawidth << SPRD_DMA_DES_DATAWIDTH_OFFSET;
+ temp |= req_mode << SPRD_DMA_REQ_MODE_OFFSET;
+ temp |= fix_mode << SPRD_DMA_FIX_SEL_OFFSET;
+ temp |= fix_en << SPRD_DMA_FIX_EN_OFFSET;
+ temp |= schan->linklist.wrap_addr ?
+ SPRD_DMA_WRAP_EN | SPRD_DMA_WRAP_SEL_DEST : 0;
+ temp |= slave_cfg->src_maxburst & SPRD_DMA_FRG_LEN_MASK;
+ hw->frg_len = temp;
+
+ hw->blk_len = slave_cfg->src_maxburst & SPRD_DMA_BLK_LEN_MASK;
+ hw->trsc_len = len & SPRD_DMA_TRSC_LEN_MASK;
+
+ temp = (dst_step & SPRD_DMA_TRSF_STEP_MASK) << SPRD_DMA_DEST_TRSF_STEP_OFFSET;
+ temp |= (src_step & SPRD_DMA_TRSF_STEP_MASK) << SPRD_DMA_SRC_TRSF_STEP_OFFSET;
+ hw->trsf_step = temp;
+
+ /* link-list configuration */
+ if (schan->linklist.phy_addr) {
+ hw->cfg |= SPRD_DMA_LINKLIST_EN;
+
+ /* link-list index */
+ temp = sglen ? (sg_index + 1) % sglen : 0;
+
+ /* Next link-list configuration's physical address offset */
+ temp = temp * sizeof(*hw) + SPRD_DMA_CHN_SRC_ADDR;
+ /*
+ * Set the link-list pointer point to next link-list
+ * configuration's physical address.
+ */
+ llist_ptr = schan->linklist.phy_addr + temp;
+ hw->llist_ptr = lower_32_bits(llist_ptr);
+ hw->src_blk_step = (upper_32_bits(llist_ptr) << SPRD_DMA_LLIST_HIGH_SHIFT) &
+ SPRD_DMA_LLIST_HIGH_MASK;
+
+ if (schan->linklist.wrap_addr) {
+ hw->wrap_ptr |= schan->linklist.wrap_addr &
+ SPRD_DMA_WRAP_ADDR_MASK;
+ hw->wrap_to |= dst & SPRD_DMA_WRAP_ADDR_MASK;
+ }
+ } else {
+ hw->llist_ptr = 0;
+ hw->src_blk_step = 0;
+ }
+
+ hw->frg_step = 0;
+ hw->des_blk_step = 0;
+ return 0;
+}
+
+static int sprd_dma_fill_linklist_desc(struct dma_chan *chan,
+ unsigned int sglen, int sg_index,
+ dma_addr_t src, dma_addr_t dst, u32 len,
+ enum dma_transfer_direction dir,
+ unsigned long flags,
+ struct dma_slave_config *slave_cfg)
+{
+ struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+ struct sprd_dma_chn_hw *hw;
+
+ if (!schan->linklist.virt_addr)
+ return -EINVAL;
+
+ hw = (struct sprd_dma_chn_hw *)(schan->linklist.virt_addr +
+ sg_index * sizeof(*hw));
+
+ return sprd_dma_fill_desc(chan, hw, sglen, sg_index, src, dst, len,
+ dir, flags, slave_cfg);
+}
+
+static struct dma_async_tx_descriptor *
+sprd_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+ struct sprd_dma_desc *sdesc;
+ struct sprd_dma_chn_hw *hw;
+ enum sprd_dma_datawidth datawidth;
+ u32 step, temp;
+
+ sdesc = kzalloc(sizeof(*sdesc), GFP_NOWAIT);
+ if (!sdesc)
+ return NULL;
+
+ hw = &sdesc->chn_hw;
+
+ hw->cfg = SPRD_DMA_DONOT_WAIT_BDONE << SPRD_DMA_WAIT_BDONE_OFFSET;
+ hw->intc = SPRD_DMA_TRANS_INT | SPRD_DMA_CFG_ERR_INT_EN;
+ hw->src_addr = src & SPRD_DMA_LOW_ADDR_MASK;
+ hw->des_addr = dest & SPRD_DMA_LOW_ADDR_MASK;
+ hw->wrap_ptr = (src >> SPRD_DMA_HIGH_ADDR_OFFSET) &
+ SPRD_DMA_HIGH_ADDR_MASK;
+ hw->wrap_to = (dest >> SPRD_DMA_HIGH_ADDR_OFFSET) &
+ SPRD_DMA_HIGH_ADDR_MASK;
+
+ if (IS_ALIGNED(len, 8)) {
+ datawidth = SPRD_DMA_DATAWIDTH_8_BYTES;
+ step = SPRD_DMA_DWORD_STEP;
+ } else if (IS_ALIGNED(len, 4)) {
+ datawidth = SPRD_DMA_DATAWIDTH_4_BYTES;
+ step = SPRD_DMA_WORD_STEP;
+ } else if (IS_ALIGNED(len, 2)) {
+ datawidth = SPRD_DMA_DATAWIDTH_2_BYTES;
+ step = SPRD_DMA_SHORT_STEP;
+ } else {
+ datawidth = SPRD_DMA_DATAWIDTH_1_BYTE;
+ step = SPRD_DMA_BYTE_STEP;
+ }
+
+ temp = datawidth << SPRD_DMA_SRC_DATAWIDTH_OFFSET;
+ temp |= datawidth << SPRD_DMA_DES_DATAWIDTH_OFFSET;
+ temp |= SPRD_DMA_TRANS_REQ << SPRD_DMA_REQ_MODE_OFFSET;
+ temp |= len & SPRD_DMA_FRG_LEN_MASK;
+ hw->frg_len = temp;
+
+ hw->blk_len = len & SPRD_DMA_BLK_LEN_MASK;
+ hw->trsc_len = len & SPRD_DMA_TRSC_LEN_MASK;
+
+ temp = (step & SPRD_DMA_TRSF_STEP_MASK) << SPRD_DMA_DEST_TRSF_STEP_OFFSET;
+ temp |= (step & SPRD_DMA_TRSF_STEP_MASK) << SPRD_DMA_SRC_TRSF_STEP_OFFSET;
+ hw->trsf_step = temp;
+
+ return vchan_tx_prep(&schan->vc, &sdesc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+sprd_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sglen, enum dma_transfer_direction dir,
+ unsigned long flags, void *context)
+{
+ struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+ struct dma_slave_config *slave_cfg = &schan->slave_cfg;
+ dma_addr_t src = 0, dst = 0;
+ dma_addr_t start_src = 0, start_dst = 0;
+ struct sprd_dma_desc *sdesc;
+ struct scatterlist *sg;
+ u32 len = 0;
+ int ret, i;
+
+ if (!is_slave_direction(dir))
+ return NULL;
+
+ if (context) {
+ struct sprd_dma_linklist *ll_cfg =
+ (struct sprd_dma_linklist *)context;
+
+ schan->linklist.phy_addr = ll_cfg->phy_addr;
+ schan->linklist.virt_addr = ll_cfg->virt_addr;
+ schan->linklist.wrap_addr = ll_cfg->wrap_addr;
+ } else {
+ schan->linklist.phy_addr = 0;
+ schan->linklist.virt_addr = 0;
+ schan->linklist.wrap_addr = 0;
+ }
+
+ /*
+ * Set channel mode, interrupt mode and trigger mode for 2-stage
+ * transfer.
+ */
+ schan->chn_mode =
+ (flags >> SPRD_DMA_CHN_MODE_SHIFT) & SPRD_DMA_CHN_MODE_MASK;
+ schan->trg_mode =
+ (flags >> SPRD_DMA_TRG_MODE_SHIFT) & SPRD_DMA_TRG_MODE_MASK;
+ schan->int_type = flags & SPRD_DMA_INT_TYPE_MASK;
+
+ sdesc = kzalloc(sizeof(*sdesc), GFP_NOWAIT);
+ if (!sdesc)
+ return NULL;
+
+ sdesc->dir = dir;
+
+ for_each_sg(sgl, sg, sglen, i) {
+ len = sg_dma_len(sg);
+
+ if (dir == DMA_MEM_TO_DEV) {
+ src = sg_dma_address(sg);
+ dst = slave_cfg->dst_addr;
+ } else {
+ src = slave_cfg->src_addr;
+ dst = sg_dma_address(sg);
+ }
+
+ if (!i) {
+ start_src = src;
+ start_dst = dst;
+ }
+
+ /*
+ * The link-list mode needs at least 2 link-list
+ * configurations. If there is only one sg, it doesn't
+ * need to fill the link-list configuration.
+ */
+ if (sglen < 2)
+ break;
+
+ ret = sprd_dma_fill_linklist_desc(chan, sglen, i, src, dst, len,
+ dir, flags, slave_cfg);
+ if (ret) {
+ kfree(sdesc);
+ return NULL;
+ }
+ }
+
+ ret = sprd_dma_fill_desc(chan, &sdesc->chn_hw, 0, 0, start_src,
+ start_dst, len, dir, flags, slave_cfg);
+ if (ret) {
+ kfree(sdesc);
+ return NULL;
+ }
+
+ return vchan_tx_prep(&schan->vc, &sdesc->vd, flags);
+}
+
+static int sprd_dma_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+ struct dma_slave_config *slave_cfg = &schan->slave_cfg;
+
+ memcpy(slave_cfg, config, sizeof(*config));
+ return 0;
+}
+
+static int sprd_dma_pause(struct dma_chan *chan)
+{
+ struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&schan->vc.lock, flags);
+ sprd_dma_pause_resume(schan, true);
+ spin_unlock_irqrestore(&schan->vc.lock, flags);
+
+ return 0;
+}
+
+static int sprd_dma_resume(struct dma_chan *chan)
+{
+ struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&schan->vc.lock, flags);
+ sprd_dma_pause_resume(schan, false);
+ spin_unlock_irqrestore(&schan->vc.lock, flags);
+
+ return 0;
+}
+
+static int sprd_dma_terminate_all(struct dma_chan *chan)
+{
+ struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+ struct virt_dma_desc *cur_vd = NULL;
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&schan->vc.lock, flags);
+ if (schan->cur_desc)
+ cur_vd = &schan->cur_desc->vd;
+
+ sprd_dma_stop(schan);
+
+ vchan_get_all_descriptors(&schan->vc, &head);
+ spin_unlock_irqrestore(&schan->vc.lock, flags);
+
+ if (cur_vd)
+ sprd_dma_free_desc(cur_vd);
+
+ vchan_dma_desc_free_list(&schan->vc, &head);
+ return 0;
+}
+
+static void sprd_dma_free_desc(struct virt_dma_desc *vd)
+{
+ struct sprd_dma_desc *sdesc = to_sprd_dma_desc(vd);
+
+ kfree(sdesc);
+}
+
+static bool sprd_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+ struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+ u32 slave_id = *(u32 *)param;
+
+ schan->dev_id = slave_id;
+ return true;
+}
+
+static int sprd_dma_probe(struct platform_device *pdev)
+{
+ struct device_node *np = pdev->dev.of_node;
+ struct sprd_dma_dev *sdev;
+ struct sprd_dma_chn *dma_chn;
+ u32 chn_count;
+ int ret, i;
+
+ /* Parse new and deprecated dma-channels properties */
+ ret = device_property_read_u32(&pdev->dev, "dma-channels", &chn_count);
+ if (ret)
+ ret = device_property_read_u32(&pdev->dev, "#dma-channels",
+ &chn_count);
+ if (ret) {
+ dev_err(&pdev->dev, "get dma channels count failed\n");
+ return ret;
+ }
+
+ sdev = devm_kzalloc(&pdev->dev,
+ struct_size(sdev, channels, chn_count),
+ GFP_KERNEL);
+ if (!sdev)
+ return -ENOMEM;
+
+ sdev->clk = devm_clk_get(&pdev->dev, "enable");
+ if (IS_ERR(sdev->clk)) {
+ dev_err(&pdev->dev, "get enable clock failed\n");
+ return PTR_ERR(sdev->clk);
+ }
+
+ /* ashb clock is optional for AGCP DMA */
+ sdev->ashb_clk = devm_clk_get(&pdev->dev, "ashb_eb");
+ if (IS_ERR(sdev->ashb_clk))
+ dev_warn(&pdev->dev, "no optional ashb eb clock\n");
+
+ /*
+ * We have three DMA controllers: AP DMA, AON DMA and AGCP DMA. For AGCP
+ * DMA controller, it can or do not request the irq, which will save
+ * system power without resuming system by DMA interrupts if AGCP DMA
+ * does not request the irq. Thus the DMA interrupts property should
+ * be optional.
+ */
+ sdev->irq = platform_get_irq(pdev, 0);
+ if (sdev->irq > 0) {
+ ret = devm_request_irq(&pdev->dev, sdev->irq, dma_irq_handle,
+ 0, "sprd_dma", (void *)sdev);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "request dma irq failed\n");
+ return ret;
+ }
+ } else {
+ dev_warn(&pdev->dev, "no interrupts for the dma controller\n");
+ }
+
+ sdev->glb_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(sdev->glb_base))
+ return PTR_ERR(sdev->glb_base);
+
+ dma_cap_set(DMA_MEMCPY, sdev->dma_dev.cap_mask);
+ sdev->total_chns = chn_count;
+ sdev->dma_dev.chancnt = chn_count;
+ INIT_LIST_HEAD(&sdev->dma_dev.channels);
+ INIT_LIST_HEAD(&sdev->dma_dev.global_node);
+ sdev->dma_dev.dev = &pdev->dev;
+ sdev->dma_dev.device_alloc_chan_resources = sprd_dma_alloc_chan_resources;
+ sdev->dma_dev.device_free_chan_resources = sprd_dma_free_chan_resources;
+ sdev->dma_dev.device_tx_status = sprd_dma_tx_status;
+ sdev->dma_dev.device_issue_pending = sprd_dma_issue_pending;
+ sdev->dma_dev.device_prep_dma_memcpy = sprd_dma_prep_dma_memcpy;
+ sdev->dma_dev.device_prep_slave_sg = sprd_dma_prep_slave_sg;
+ sdev->dma_dev.device_config = sprd_dma_slave_config;
+ sdev->dma_dev.device_pause = sprd_dma_pause;
+ sdev->dma_dev.device_resume = sprd_dma_resume;
+ sdev->dma_dev.device_terminate_all = sprd_dma_terminate_all;
+
+ for (i = 0; i < chn_count; i++) {
+ dma_chn = &sdev->channels[i];
+ dma_chn->chn_num = i;
+ dma_chn->cur_desc = NULL;
+ /* get each channel's registers base address. */
+ dma_chn->chn_base = sdev->glb_base + SPRD_DMA_CHN_REG_OFFSET +
+ SPRD_DMA_CHN_REG_LENGTH * i;
+
+ dma_chn->vc.desc_free = sprd_dma_free_desc;
+ vchan_init(&dma_chn->vc, &sdev->dma_dev);
+ }
+
+ platform_set_drvdata(pdev, sdev);
+ ret = sprd_dma_enable(sdev);
+ if (ret)
+ return ret;
+
+ pm_runtime_set_active(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
+
+ ret = pm_runtime_get_sync(&pdev->dev);
+ if (ret < 0)
+ goto err_rpm;
+
+ ret = dma_async_device_register(&sdev->dma_dev);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "register dma device failed:%d\n", ret);
+ goto err_register;
+ }
+
+ sprd_dma_info.dma_cap = sdev->dma_dev.cap_mask;
+ ret = of_dma_controller_register(np, of_dma_simple_xlate,
+ &sprd_dma_info);
+ if (ret)
+ goto err_of_register;
+
+ pm_runtime_put(&pdev->dev);
+ return 0;
+
+err_of_register:
+ dma_async_device_unregister(&sdev->dma_dev);
+err_register:
+ pm_runtime_put_noidle(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+err_rpm:
+ sprd_dma_disable(sdev);
+ return ret;
+}
+
+static int sprd_dma_remove(struct platform_device *pdev)
+{
+ struct sprd_dma_dev *sdev = platform_get_drvdata(pdev);
+ struct sprd_dma_chn *c, *cn;
+
+ pm_runtime_get_sync(&pdev->dev);
+
+ /* explicitly free the irq */
+ if (sdev->irq > 0)
+ devm_free_irq(&pdev->dev, sdev->irq, sdev);
+
+ list_for_each_entry_safe(c, cn, &sdev->dma_dev.channels,
+ vc.chan.device_node) {
+ list_del(&c->vc.chan.device_node);
+ tasklet_kill(&c->vc.task);
+ }
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&sdev->dma_dev);
+ sprd_dma_disable(sdev);
+
+ pm_runtime_put_noidle(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+ return 0;
+}
+
+static const struct of_device_id sprd_dma_match[] = {
+ { .compatible = "sprd,sc9860-dma", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, sprd_dma_match);
+
+static int __maybe_unused sprd_dma_runtime_suspend(struct device *dev)
+{
+ struct sprd_dma_dev *sdev = dev_get_drvdata(dev);
+
+ sprd_dma_disable(sdev);
+ return 0;
+}
+
+static int __maybe_unused sprd_dma_runtime_resume(struct device *dev)
+{
+ struct sprd_dma_dev *sdev = dev_get_drvdata(dev);
+ int ret;
+
+ ret = sprd_dma_enable(sdev);
+ if (ret)
+ dev_err(sdev->dma_dev.dev, "enable dma failed\n");
+
+ return ret;
+}
+
+static const struct dev_pm_ops sprd_dma_pm_ops = {
+ SET_RUNTIME_PM_OPS(sprd_dma_runtime_suspend,
+ sprd_dma_runtime_resume,
+ NULL)
+};
+
+static struct platform_driver sprd_dma_driver = {
+ .probe = sprd_dma_probe,
+ .remove = sprd_dma_remove,
+ .driver = {
+ .name = "sprd-dma",
+ .of_match_table = sprd_dma_match,
+ .pm = &sprd_dma_pm_ops,
+ },
+};
+module_platform_driver(sprd_dma_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("DMA driver for Spreadtrum");
+MODULE_AUTHOR("Baolin Wang <baolin.wang@spreadtrum.com>");
+MODULE_AUTHOR("Eric Long <eric.long@spreadtrum.com>");
+MODULE_ALIAS("platform:sprd-dma");
diff --git a/drivers/dma/st_fdma.c b/drivers/dma/st_fdma.c
new file mode 100644
index 000000000..d95c42187
--- /dev/null
+++ b/drivers/dma/st_fdma.c
@@ -0,0 +1,877 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * DMA driver for STMicroelectronics STi FDMA controller
+ *
+ * Copyright (C) 2014 STMicroelectronics
+ *
+ * Author: Ludovic Barre <Ludovic.barre@st.com>
+ * Peter Griffin <peter.griffin@linaro.org>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/remoteproc.h>
+#include <linux/slab.h>
+
+#include "st_fdma.h"
+
+static inline struct st_fdma_chan *to_st_fdma_chan(struct dma_chan *c)
+{
+ return container_of(c, struct st_fdma_chan, vchan.chan);
+}
+
+static struct st_fdma_desc *to_st_fdma_desc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct st_fdma_desc, vdesc);
+}
+
+static int st_fdma_dreq_get(struct st_fdma_chan *fchan)
+{
+ struct st_fdma_dev *fdev = fchan->fdev;
+ u32 req_line_cfg = fchan->cfg.req_line;
+ u32 dreq_line;
+ int try = 0;
+
+ /*
+ * dreq_mask is shared for n channels of fdma, so all accesses must be
+ * atomic. if the dreq_mask is changed between ffz and set_bit,
+ * we retry
+ */
+ do {
+ if (fdev->dreq_mask == ~0L) {
+ dev_err(fdev->dev, "No req lines available\n");
+ return -EINVAL;
+ }
+
+ if (try || req_line_cfg >= ST_FDMA_NR_DREQS) {
+ dev_err(fdev->dev, "Invalid or used req line\n");
+ return -EINVAL;
+ } else {
+ dreq_line = req_line_cfg;
+ }
+
+ try++;
+ } while (test_and_set_bit(dreq_line, &fdev->dreq_mask));
+
+ dev_dbg(fdev->dev, "get dreq_line:%d mask:%#lx\n",
+ dreq_line, fdev->dreq_mask);
+
+ return dreq_line;
+}
+
+static void st_fdma_dreq_put(struct st_fdma_chan *fchan)
+{
+ struct st_fdma_dev *fdev = fchan->fdev;
+
+ dev_dbg(fdev->dev, "put dreq_line:%#x\n", fchan->dreq_line);
+ clear_bit(fchan->dreq_line, &fdev->dreq_mask);
+}
+
+static void st_fdma_xfer_desc(struct st_fdma_chan *fchan)
+{
+ struct virt_dma_desc *vdesc;
+ unsigned long nbytes, ch_cmd, cmd;
+
+ vdesc = vchan_next_desc(&fchan->vchan);
+ if (!vdesc)
+ return;
+
+ fchan->fdesc = to_st_fdma_desc(vdesc);
+ nbytes = fchan->fdesc->node[0].desc->nbytes;
+ cmd = FDMA_CMD_START(fchan->vchan.chan.chan_id);
+ ch_cmd = fchan->fdesc->node[0].pdesc | FDMA_CH_CMD_STA_START;
+
+ /* start the channel for the descriptor */
+ fnode_write(fchan, nbytes, FDMA_CNTN_OFST);
+ fchan_write(fchan, ch_cmd, FDMA_CH_CMD_OFST);
+ writel(cmd,
+ fchan->fdev->slim_rproc->peri + FDMA_CMD_SET_OFST);
+
+ dev_dbg(fchan->fdev->dev, "start chan:%d\n", fchan->vchan.chan.chan_id);
+}
+
+static void st_fdma_ch_sta_update(struct st_fdma_chan *fchan,
+ unsigned long int_sta)
+{
+ unsigned long ch_sta, ch_err;
+ int ch_id = fchan->vchan.chan.chan_id;
+ struct st_fdma_dev *fdev = fchan->fdev;
+
+ ch_sta = fchan_read(fchan, FDMA_CH_CMD_OFST);
+ ch_err = ch_sta & FDMA_CH_CMD_ERR_MASK;
+ ch_sta &= FDMA_CH_CMD_STA_MASK;
+
+ if (int_sta & FDMA_INT_STA_ERR) {
+ dev_warn(fdev->dev, "chan:%d, error:%ld\n", ch_id, ch_err);
+ fchan->status = DMA_ERROR;
+ return;
+ }
+
+ switch (ch_sta) {
+ case FDMA_CH_CMD_STA_PAUSED:
+ fchan->status = DMA_PAUSED;
+ break;
+
+ case FDMA_CH_CMD_STA_RUNNING:
+ fchan->status = DMA_IN_PROGRESS;
+ break;
+ }
+}
+
+static irqreturn_t st_fdma_irq_handler(int irq, void *dev_id)
+{
+ struct st_fdma_dev *fdev = dev_id;
+ irqreturn_t ret = IRQ_NONE;
+ struct st_fdma_chan *fchan = &fdev->chans[0];
+ unsigned long int_sta, clr;
+
+ int_sta = fdma_read(fdev, FDMA_INT_STA_OFST);
+ clr = int_sta;
+
+ for (; int_sta != 0 ; int_sta >>= 2, fchan++) {
+ if (!(int_sta & (FDMA_INT_STA_CH | FDMA_INT_STA_ERR)))
+ continue;
+
+ spin_lock(&fchan->vchan.lock);
+ st_fdma_ch_sta_update(fchan, int_sta);
+
+ if (fchan->fdesc) {
+ if (!fchan->fdesc->iscyclic) {
+ list_del(&fchan->fdesc->vdesc.node);
+ vchan_cookie_complete(&fchan->fdesc->vdesc);
+ fchan->fdesc = NULL;
+ fchan->status = DMA_COMPLETE;
+ } else {
+ vchan_cyclic_callback(&fchan->fdesc->vdesc);
+ }
+
+ /* Start the next descriptor (if available) */
+ if (!fchan->fdesc)
+ st_fdma_xfer_desc(fchan);
+ }
+
+ spin_unlock(&fchan->vchan.lock);
+ ret = IRQ_HANDLED;
+ }
+
+ fdma_write(fdev, clr, FDMA_INT_CLR_OFST);
+
+ return ret;
+}
+
+static struct dma_chan *st_fdma_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct st_fdma_dev *fdev = ofdma->of_dma_data;
+ struct dma_chan *chan;
+ struct st_fdma_chan *fchan;
+ int ret;
+
+ if (dma_spec->args_count < 1)
+ return ERR_PTR(-EINVAL);
+
+ if (fdev->dma_device.dev->of_node != dma_spec->np)
+ return ERR_PTR(-EINVAL);
+
+ ret = rproc_boot(fdev->slim_rproc->rproc);
+ if (ret == -ENOENT)
+ return ERR_PTR(-EPROBE_DEFER);
+ else if (ret)
+ return ERR_PTR(ret);
+
+ chan = dma_get_any_slave_channel(&fdev->dma_device);
+ if (!chan)
+ goto err_chan;
+
+ fchan = to_st_fdma_chan(chan);
+
+ fchan->cfg.of_node = dma_spec->np;
+ fchan->cfg.req_line = dma_spec->args[0];
+ fchan->cfg.req_ctrl = 0;
+ fchan->cfg.type = ST_FDMA_TYPE_FREE_RUN;
+
+ if (dma_spec->args_count > 1)
+ fchan->cfg.req_ctrl = dma_spec->args[1]
+ & FDMA_REQ_CTRL_CFG_MASK;
+
+ if (dma_spec->args_count > 2)
+ fchan->cfg.type = dma_spec->args[2];
+
+ if (fchan->cfg.type == ST_FDMA_TYPE_FREE_RUN) {
+ fchan->dreq_line = 0;
+ } else {
+ fchan->dreq_line = st_fdma_dreq_get(fchan);
+ if (IS_ERR_VALUE(fchan->dreq_line)) {
+ chan = ERR_PTR(fchan->dreq_line);
+ goto err_chan;
+ }
+ }
+
+ dev_dbg(fdev->dev, "xlate req_line:%d type:%d req_ctrl:%#lx\n",
+ fchan->cfg.req_line, fchan->cfg.type, fchan->cfg.req_ctrl);
+
+ return chan;
+
+err_chan:
+ rproc_shutdown(fdev->slim_rproc->rproc);
+ return chan;
+
+}
+
+static void st_fdma_free_desc(struct virt_dma_desc *vdesc)
+{
+ struct st_fdma_desc *fdesc;
+ int i;
+
+ fdesc = to_st_fdma_desc(vdesc);
+ for (i = 0; i < fdesc->n_nodes; i++)
+ dma_pool_free(fdesc->fchan->node_pool, fdesc->node[i].desc,
+ fdesc->node[i].pdesc);
+ kfree(fdesc);
+}
+
+static struct st_fdma_desc *st_fdma_alloc_desc(struct st_fdma_chan *fchan,
+ int sg_len)
+{
+ struct st_fdma_desc *fdesc;
+ int i;
+
+ fdesc = kzalloc(struct_size(fdesc, node, sg_len), GFP_NOWAIT);
+ if (!fdesc)
+ return NULL;
+
+ fdesc->fchan = fchan;
+ fdesc->n_nodes = sg_len;
+ for (i = 0; i < sg_len; i++) {
+ fdesc->node[i].desc = dma_pool_alloc(fchan->node_pool,
+ GFP_NOWAIT, &fdesc->node[i].pdesc);
+ if (!fdesc->node[i].desc)
+ goto err;
+ }
+ return fdesc;
+
+err:
+ while (--i >= 0)
+ dma_pool_free(fchan->node_pool, fdesc->node[i].desc,
+ fdesc->node[i].pdesc);
+ kfree(fdesc);
+ return NULL;
+}
+
+static int st_fdma_alloc_chan_res(struct dma_chan *chan)
+{
+ struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
+
+ /* Create the dma pool for descriptor allocation */
+ fchan->node_pool = dma_pool_create(dev_name(&chan->dev->device),
+ fchan->fdev->dev,
+ sizeof(struct st_fdma_hw_node),
+ __alignof__(struct st_fdma_hw_node),
+ 0);
+
+ if (!fchan->node_pool) {
+ dev_err(fchan->fdev->dev, "unable to allocate desc pool\n");
+ return -ENOMEM;
+ }
+
+ dev_dbg(fchan->fdev->dev, "alloc ch_id:%d type:%d\n",
+ fchan->vchan.chan.chan_id, fchan->cfg.type);
+
+ return 0;
+}
+
+static void st_fdma_free_chan_res(struct dma_chan *chan)
+{
+ struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
+ struct rproc *rproc = fchan->fdev->slim_rproc->rproc;
+ unsigned long flags;
+
+ dev_dbg(fchan->fdev->dev, "%s: freeing chan:%d\n",
+ __func__, fchan->vchan.chan.chan_id);
+
+ if (fchan->cfg.type != ST_FDMA_TYPE_FREE_RUN)
+ st_fdma_dreq_put(fchan);
+
+ spin_lock_irqsave(&fchan->vchan.lock, flags);
+ fchan->fdesc = NULL;
+ spin_unlock_irqrestore(&fchan->vchan.lock, flags);
+
+ dma_pool_destroy(fchan->node_pool);
+ fchan->node_pool = NULL;
+ memset(&fchan->cfg, 0, sizeof(struct st_fdma_cfg));
+
+ rproc_shutdown(rproc);
+}
+
+static struct dma_async_tx_descriptor *st_fdma_prep_dma_memcpy(
+ struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct st_fdma_chan *fchan;
+ struct st_fdma_desc *fdesc;
+ struct st_fdma_hw_node *hw_node;
+
+ if (!len)
+ return NULL;
+
+ fchan = to_st_fdma_chan(chan);
+
+ /* We only require a single descriptor */
+ fdesc = st_fdma_alloc_desc(fchan, 1);
+ if (!fdesc) {
+ dev_err(fchan->fdev->dev, "no memory for desc\n");
+ return NULL;
+ }
+
+ hw_node = fdesc->node[0].desc;
+ hw_node->next = 0;
+ hw_node->control = FDMA_NODE_CTRL_REQ_MAP_FREE_RUN;
+ hw_node->control |= FDMA_NODE_CTRL_SRC_INCR;
+ hw_node->control |= FDMA_NODE_CTRL_DST_INCR;
+ hw_node->control |= FDMA_NODE_CTRL_INT_EON;
+ hw_node->nbytes = len;
+ hw_node->saddr = src;
+ hw_node->daddr = dst;
+ hw_node->generic.length = len;
+ hw_node->generic.sstride = 0;
+ hw_node->generic.dstride = 0;
+
+ return vchan_tx_prep(&fchan->vchan, &fdesc->vdesc, flags);
+}
+
+static int config_reqctrl(struct st_fdma_chan *fchan,
+ enum dma_transfer_direction direction)
+{
+ u32 maxburst = 0, addr = 0;
+ enum dma_slave_buswidth width;
+ int ch_id = fchan->vchan.chan.chan_id;
+ struct st_fdma_dev *fdev = fchan->fdev;
+
+ switch (direction) {
+
+ case DMA_DEV_TO_MEM:
+ fchan->cfg.req_ctrl &= ~FDMA_REQ_CTRL_WNR;
+ maxburst = fchan->scfg.src_maxburst;
+ width = fchan->scfg.src_addr_width;
+ addr = fchan->scfg.src_addr;
+ break;
+
+ case DMA_MEM_TO_DEV:
+ fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_WNR;
+ maxburst = fchan->scfg.dst_maxburst;
+ width = fchan->scfg.dst_addr_width;
+ addr = fchan->scfg.dst_addr;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ fchan->cfg.req_ctrl &= ~FDMA_REQ_CTRL_OPCODE_MASK;
+
+ switch (width) {
+
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_OPCODE_LD_ST1;
+ break;
+
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_OPCODE_LD_ST2;
+ break;
+
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_OPCODE_LD_ST4;
+ break;
+
+ case DMA_SLAVE_BUSWIDTH_8_BYTES:
+ fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_OPCODE_LD_ST8;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ fchan->cfg.req_ctrl &= ~FDMA_REQ_CTRL_NUM_OPS_MASK;
+ fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_NUM_OPS(maxburst-1);
+ dreq_write(fchan, fchan->cfg.req_ctrl, FDMA_REQ_CTRL_OFST);
+
+ fchan->cfg.dev_addr = addr;
+ fchan->cfg.dir = direction;
+
+ dev_dbg(fdev->dev, "chan:%d config_reqctrl:%#x req_ctrl:%#lx\n",
+ ch_id, addr, fchan->cfg.req_ctrl);
+
+ return 0;
+}
+
+static void fill_hw_node(struct st_fdma_hw_node *hw_node,
+ struct st_fdma_chan *fchan,
+ enum dma_transfer_direction direction)
+{
+ if (direction == DMA_MEM_TO_DEV) {
+ hw_node->control |= FDMA_NODE_CTRL_SRC_INCR;
+ hw_node->control |= FDMA_NODE_CTRL_DST_STATIC;
+ hw_node->daddr = fchan->cfg.dev_addr;
+ } else {
+ hw_node->control |= FDMA_NODE_CTRL_SRC_STATIC;
+ hw_node->control |= FDMA_NODE_CTRL_DST_INCR;
+ hw_node->saddr = fchan->cfg.dev_addr;
+ }
+
+ hw_node->generic.sstride = 0;
+ hw_node->generic.dstride = 0;
+}
+
+static inline struct st_fdma_chan *st_fdma_prep_common(struct dma_chan *chan,
+ size_t len, enum dma_transfer_direction direction)
+{
+ struct st_fdma_chan *fchan;
+
+ if (!chan || !len)
+ return NULL;
+
+ fchan = to_st_fdma_chan(chan);
+
+ if (!is_slave_direction(direction)) {
+ dev_err(fchan->fdev->dev, "bad direction?\n");
+ return NULL;
+ }
+
+ return fchan;
+}
+
+static struct dma_async_tx_descriptor *st_fdma_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t buf_addr, size_t len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct st_fdma_chan *fchan;
+ struct st_fdma_desc *fdesc;
+ int sg_len, i;
+
+ fchan = st_fdma_prep_common(chan, len, direction);
+ if (!fchan)
+ return NULL;
+
+ if (!period_len)
+ return NULL;
+
+ if (config_reqctrl(fchan, direction)) {
+ dev_err(fchan->fdev->dev, "bad width or direction\n");
+ return NULL;
+ }
+
+ /* the buffer length must be a multiple of period_len */
+ if (len % period_len != 0) {
+ dev_err(fchan->fdev->dev, "len is not multiple of period\n");
+ return NULL;
+ }
+
+ sg_len = len / period_len;
+ fdesc = st_fdma_alloc_desc(fchan, sg_len);
+ if (!fdesc) {
+ dev_err(fchan->fdev->dev, "no memory for desc\n");
+ return NULL;
+ }
+
+ fdesc->iscyclic = true;
+
+ for (i = 0; i < sg_len; i++) {
+ struct st_fdma_hw_node *hw_node = fdesc->node[i].desc;
+
+ hw_node->next = fdesc->node[(i + 1) % sg_len].pdesc;
+
+ hw_node->control =
+ FDMA_NODE_CTRL_REQ_MAP_DREQ(fchan->dreq_line);
+ hw_node->control |= FDMA_NODE_CTRL_INT_EON;
+
+ fill_hw_node(hw_node, fchan, direction);
+
+ if (direction == DMA_MEM_TO_DEV)
+ hw_node->saddr = buf_addr + (i * period_len);
+ else
+ hw_node->daddr = buf_addr + (i * period_len);
+
+ hw_node->nbytes = period_len;
+ hw_node->generic.length = period_len;
+ }
+
+ return vchan_tx_prep(&fchan->vchan, &fdesc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *st_fdma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct st_fdma_chan *fchan;
+ struct st_fdma_desc *fdesc;
+ struct st_fdma_hw_node *hw_node;
+ struct scatterlist *sg;
+ int i;
+
+ fchan = st_fdma_prep_common(chan, sg_len, direction);
+ if (!fchan)
+ return NULL;
+
+ if (!sgl)
+ return NULL;
+
+ fdesc = st_fdma_alloc_desc(fchan, sg_len);
+ if (!fdesc) {
+ dev_err(fchan->fdev->dev, "no memory for desc\n");
+ return NULL;
+ }
+
+ fdesc->iscyclic = false;
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ hw_node = fdesc->node[i].desc;
+
+ hw_node->next = fdesc->node[(i + 1) % sg_len].pdesc;
+ hw_node->control = FDMA_NODE_CTRL_REQ_MAP_DREQ(fchan->dreq_line);
+
+ fill_hw_node(hw_node, fchan, direction);
+
+ if (direction == DMA_MEM_TO_DEV)
+ hw_node->saddr = sg_dma_address(sg);
+ else
+ hw_node->daddr = sg_dma_address(sg);
+
+ hw_node->nbytes = sg_dma_len(sg);
+ hw_node->generic.length = sg_dma_len(sg);
+ }
+
+ /* interrupt at end of last node */
+ hw_node->control |= FDMA_NODE_CTRL_INT_EON;
+
+ return vchan_tx_prep(&fchan->vchan, &fdesc->vdesc, flags);
+}
+
+static size_t st_fdma_desc_residue(struct st_fdma_chan *fchan,
+ struct virt_dma_desc *vdesc,
+ bool in_progress)
+{
+ struct st_fdma_desc *fdesc = fchan->fdesc;
+ size_t residue = 0;
+ dma_addr_t cur_addr = 0;
+ int i;
+
+ if (in_progress) {
+ cur_addr = fchan_read(fchan, FDMA_CH_CMD_OFST);
+ cur_addr &= FDMA_CH_CMD_DATA_MASK;
+ }
+
+ for (i = fchan->fdesc->n_nodes - 1 ; i >= 0; i--) {
+ if (cur_addr == fdesc->node[i].pdesc) {
+ residue += fnode_read(fchan, FDMA_CNTN_OFST);
+ break;
+ }
+ residue += fdesc->node[i].desc->nbytes;
+ }
+
+ return residue;
+}
+
+static enum dma_status st_fdma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
+ struct virt_dma_desc *vd;
+ enum dma_status ret;
+ unsigned long flags;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE || !txstate)
+ return ret;
+
+ spin_lock_irqsave(&fchan->vchan.lock, flags);
+ vd = vchan_find_desc(&fchan->vchan, cookie);
+ if (fchan->fdesc && cookie == fchan->fdesc->vdesc.tx.cookie)
+ txstate->residue = st_fdma_desc_residue(fchan, vd, true);
+ else if (vd)
+ txstate->residue = st_fdma_desc_residue(fchan, vd, false);
+ else
+ txstate->residue = 0;
+
+ spin_unlock_irqrestore(&fchan->vchan.lock, flags);
+
+ return ret;
+}
+
+static void st_fdma_issue_pending(struct dma_chan *chan)
+{
+ struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&fchan->vchan.lock, flags);
+
+ if (vchan_issue_pending(&fchan->vchan) && !fchan->fdesc)
+ st_fdma_xfer_desc(fchan);
+
+ spin_unlock_irqrestore(&fchan->vchan.lock, flags);
+}
+
+static int st_fdma_pause(struct dma_chan *chan)
+{
+ unsigned long flags;
+ struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
+ int ch_id = fchan->vchan.chan.chan_id;
+ unsigned long cmd = FDMA_CMD_PAUSE(ch_id);
+
+ dev_dbg(fchan->fdev->dev, "pause chan:%d\n", ch_id);
+
+ spin_lock_irqsave(&fchan->vchan.lock, flags);
+ if (fchan->fdesc)
+ fdma_write(fchan->fdev, cmd, FDMA_CMD_SET_OFST);
+ spin_unlock_irqrestore(&fchan->vchan.lock, flags);
+
+ return 0;
+}
+
+static int st_fdma_resume(struct dma_chan *chan)
+{
+ unsigned long flags;
+ unsigned long val;
+ struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
+ int ch_id = fchan->vchan.chan.chan_id;
+
+ dev_dbg(fchan->fdev->dev, "resume chan:%d\n", ch_id);
+
+ spin_lock_irqsave(&fchan->vchan.lock, flags);
+ if (fchan->fdesc) {
+ val = fchan_read(fchan, FDMA_CH_CMD_OFST);
+ val &= FDMA_CH_CMD_DATA_MASK;
+ fchan_write(fchan, val, FDMA_CH_CMD_OFST);
+ }
+ spin_unlock_irqrestore(&fchan->vchan.lock, flags);
+
+ return 0;
+}
+
+static int st_fdma_terminate_all(struct dma_chan *chan)
+{
+ unsigned long flags;
+ LIST_HEAD(head);
+ struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
+ int ch_id = fchan->vchan.chan.chan_id;
+ unsigned long cmd = FDMA_CMD_PAUSE(ch_id);
+
+ dev_dbg(fchan->fdev->dev, "terminate chan:%d\n", ch_id);
+
+ spin_lock_irqsave(&fchan->vchan.lock, flags);
+ fdma_write(fchan->fdev, cmd, FDMA_CMD_SET_OFST);
+ fchan->fdesc = NULL;
+ vchan_get_all_descriptors(&fchan->vchan, &head);
+ spin_unlock_irqrestore(&fchan->vchan.lock, flags);
+ vchan_dma_desc_free_list(&fchan->vchan, &head);
+
+ return 0;
+}
+
+static int st_fdma_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *slave_cfg)
+{
+ struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
+
+ memcpy(&fchan->scfg, slave_cfg, sizeof(fchan->scfg));
+ return 0;
+}
+
+static const struct st_fdma_driverdata fdma_mpe31_stih407_11 = {
+ .name = "STiH407",
+ .id = 0,
+};
+
+static const struct st_fdma_driverdata fdma_mpe31_stih407_12 = {
+ .name = "STiH407",
+ .id = 1,
+};
+
+static const struct st_fdma_driverdata fdma_mpe31_stih407_13 = {
+ .name = "STiH407",
+ .id = 2,
+};
+
+static const struct of_device_id st_fdma_match[] = {
+ { .compatible = "st,stih407-fdma-mpe31-11"
+ , .data = &fdma_mpe31_stih407_11 },
+ { .compatible = "st,stih407-fdma-mpe31-12"
+ , .data = &fdma_mpe31_stih407_12 },
+ { .compatible = "st,stih407-fdma-mpe31-13"
+ , .data = &fdma_mpe31_stih407_13 },
+ {},
+};
+MODULE_DEVICE_TABLE(of, st_fdma_match);
+
+static int st_fdma_parse_dt(struct platform_device *pdev,
+ const struct st_fdma_driverdata *drvdata,
+ struct st_fdma_dev *fdev)
+{
+ snprintf(fdev->fw_name, FW_NAME_SIZE, "fdma_%s_%d.elf",
+ drvdata->name, drvdata->id);
+
+ return of_property_read_u32(pdev->dev.of_node, "dma-channels",
+ &fdev->nr_channels);
+}
+#define FDMA_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+static void st_fdma_free(struct st_fdma_dev *fdev)
+{
+ struct st_fdma_chan *fchan;
+ int i;
+
+ for (i = 0; i < fdev->nr_channels; i++) {
+ fchan = &fdev->chans[i];
+ list_del(&fchan->vchan.chan.device_node);
+ tasklet_kill(&fchan->vchan.task);
+ }
+}
+
+static int st_fdma_probe(struct platform_device *pdev)
+{
+ struct st_fdma_dev *fdev;
+ const struct of_device_id *match;
+ struct device_node *np = pdev->dev.of_node;
+ const struct st_fdma_driverdata *drvdata;
+ int ret, i;
+
+ match = of_match_device((st_fdma_match), &pdev->dev);
+ if (!match || !match->data) {
+ dev_err(&pdev->dev, "No device match found\n");
+ return -ENODEV;
+ }
+
+ drvdata = match->data;
+
+ fdev = devm_kzalloc(&pdev->dev, sizeof(*fdev), GFP_KERNEL);
+ if (!fdev)
+ return -ENOMEM;
+
+ ret = st_fdma_parse_dt(pdev, drvdata, fdev);
+ if (ret) {
+ dev_err(&pdev->dev, "unable to find platform data\n");
+ goto err;
+ }
+
+ fdev->chans = devm_kcalloc(&pdev->dev, fdev->nr_channels,
+ sizeof(struct st_fdma_chan), GFP_KERNEL);
+ if (!fdev->chans)
+ return -ENOMEM;
+
+ fdev->dev = &pdev->dev;
+ fdev->drvdata = drvdata;
+ platform_set_drvdata(pdev, fdev);
+
+ fdev->irq = platform_get_irq(pdev, 0);
+ if (fdev->irq < 0)
+ return -EINVAL;
+
+ ret = devm_request_irq(&pdev->dev, fdev->irq, st_fdma_irq_handler, 0,
+ dev_name(&pdev->dev), fdev);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to request irq (%d)\n", ret);
+ goto err;
+ }
+
+ fdev->slim_rproc = st_slim_rproc_alloc(pdev, fdev->fw_name);
+ if (IS_ERR(fdev->slim_rproc)) {
+ ret = PTR_ERR(fdev->slim_rproc);
+ dev_err(&pdev->dev, "slim_rproc_alloc failed (%d)\n", ret);
+ goto err;
+ }
+
+ /* Initialise list of FDMA channels */
+ INIT_LIST_HEAD(&fdev->dma_device.channels);
+ for (i = 0; i < fdev->nr_channels; i++) {
+ struct st_fdma_chan *fchan = &fdev->chans[i];
+
+ fchan->fdev = fdev;
+ fchan->vchan.desc_free = st_fdma_free_desc;
+ vchan_init(&fchan->vchan, &fdev->dma_device);
+ }
+
+ /* Initialise the FDMA dreq (reserve 0 & 31 for FDMA use) */
+ fdev->dreq_mask = BIT(0) | BIT(31);
+
+ dma_cap_set(DMA_SLAVE, fdev->dma_device.cap_mask);
+ dma_cap_set(DMA_CYCLIC, fdev->dma_device.cap_mask);
+ dma_cap_set(DMA_MEMCPY, fdev->dma_device.cap_mask);
+
+ fdev->dma_device.dev = &pdev->dev;
+ fdev->dma_device.device_alloc_chan_resources = st_fdma_alloc_chan_res;
+ fdev->dma_device.device_free_chan_resources = st_fdma_free_chan_res;
+ fdev->dma_device.device_prep_dma_cyclic = st_fdma_prep_dma_cyclic;
+ fdev->dma_device.device_prep_slave_sg = st_fdma_prep_slave_sg;
+ fdev->dma_device.device_prep_dma_memcpy = st_fdma_prep_dma_memcpy;
+ fdev->dma_device.device_tx_status = st_fdma_tx_status;
+ fdev->dma_device.device_issue_pending = st_fdma_issue_pending;
+ fdev->dma_device.device_terminate_all = st_fdma_terminate_all;
+ fdev->dma_device.device_config = st_fdma_slave_config;
+ fdev->dma_device.device_pause = st_fdma_pause;
+ fdev->dma_device.device_resume = st_fdma_resume;
+
+ fdev->dma_device.src_addr_widths = FDMA_DMA_BUSWIDTHS;
+ fdev->dma_device.dst_addr_widths = FDMA_DMA_BUSWIDTHS;
+ fdev->dma_device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ fdev->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+ ret = dmaenginem_async_device_register(&fdev->dma_device);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "Failed to register DMA device (%d)\n", ret);
+ goto err_rproc;
+ }
+
+ ret = of_dma_controller_register(np, st_fdma_of_xlate, fdev);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "Failed to register controller (%d)\n", ret);
+ goto err_rproc;
+ }
+
+ dev_info(&pdev->dev, "ST FDMA engine driver, irq:%d\n", fdev->irq);
+
+ return 0;
+
+err_rproc:
+ st_fdma_free(fdev);
+ st_slim_rproc_put(fdev->slim_rproc);
+err:
+ return ret;
+}
+
+static int st_fdma_remove(struct platform_device *pdev)
+{
+ struct st_fdma_dev *fdev = platform_get_drvdata(pdev);
+
+ devm_free_irq(&pdev->dev, fdev->irq, fdev);
+ st_slim_rproc_put(fdev->slim_rproc);
+ of_dma_controller_free(pdev->dev.of_node);
+
+ return 0;
+}
+
+static struct platform_driver st_fdma_platform_driver = {
+ .driver = {
+ .name = DRIVER_NAME,
+ .of_match_table = st_fdma_match,
+ },
+ .probe = st_fdma_probe,
+ .remove = st_fdma_remove,
+};
+module_platform_driver(st_fdma_platform_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("STMicroelectronics FDMA engine driver");
+MODULE_AUTHOR("Ludovic.barre <Ludovic.barre@st.com>");
+MODULE_AUTHOR("Peter Griffin <peter.griffin@linaro.org>");
+MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/drivers/dma/st_fdma.h b/drivers/dma/st_fdma.h
new file mode 100644
index 000000000..fa15b97a3
--- /dev/null
+++ b/drivers/dma/st_fdma.h
@@ -0,0 +1,245 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * DMA driver header for STMicroelectronics STi FDMA controller
+ *
+ * Copyright (C) 2014 STMicroelectronics
+ *
+ * Author: Ludovic Barre <Ludovic.barre@st.com>
+ */
+#ifndef __DMA_ST_FDMA_H
+#define __DMA_ST_FDMA_H
+
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/io.h>
+#include <linux/remoteproc/st_slim_rproc.h>
+#include "virt-dma.h"
+
+#define ST_FDMA_NR_DREQS 32
+#define FW_NAME_SIZE 30
+#define DRIVER_NAME "st-fdma"
+
+/**
+ * struct st_fdma_generic_node - Free running/paced generic node
+ *
+ * @length: Length in bytes of a line in a 2D mem to mem
+ * @sstride: Stride, in bytes, between source lines in a 2D data move
+ * @dstride: Stride, in bytes, between destination lines in a 2D data move
+ */
+struct st_fdma_generic_node {
+ u32 length;
+ u32 sstride;
+ u32 dstride;
+};
+
+/**
+ * struct st_fdma_hw_node - Node structure used by fdma hw
+ *
+ * @next: Pointer to next node
+ * @control: Transfer Control Parameters
+ * @nbytes: Number of Bytes to read
+ * @saddr: Source address
+ * @daddr: Destination address
+ *
+ * @generic: generic node for free running/paced transfert type
+ * 2 others transfert type are possible, but not yet implemented
+ *
+ * The NODE structures must be aligned to a 32 byte boundary
+ */
+struct st_fdma_hw_node {
+ u32 next;
+ u32 control;
+ u32 nbytes;
+ u32 saddr;
+ u32 daddr;
+ union {
+ struct st_fdma_generic_node generic;
+ };
+} __aligned(32);
+
+/*
+ * node control parameters
+ */
+#define FDMA_NODE_CTRL_REQ_MAP_MASK GENMASK(4, 0)
+#define FDMA_NODE_CTRL_REQ_MAP_FREE_RUN 0x0
+#define FDMA_NODE_CTRL_REQ_MAP_DREQ(n) ((n)&FDMA_NODE_CTRL_REQ_MAP_MASK)
+#define FDMA_NODE_CTRL_REQ_MAP_EXT FDMA_NODE_CTRL_REQ_MAP_MASK
+#define FDMA_NODE_CTRL_SRC_MASK GENMASK(6, 5)
+#define FDMA_NODE_CTRL_SRC_STATIC BIT(5)
+#define FDMA_NODE_CTRL_SRC_INCR BIT(6)
+#define FDMA_NODE_CTRL_DST_MASK GENMASK(8, 7)
+#define FDMA_NODE_CTRL_DST_STATIC BIT(7)
+#define FDMA_NODE_CTRL_DST_INCR BIT(8)
+#define FDMA_NODE_CTRL_SECURE BIT(15)
+#define FDMA_NODE_CTRL_PAUSE_EON BIT(30)
+#define FDMA_NODE_CTRL_INT_EON BIT(31)
+
+/**
+ * struct st_fdma_sw_node - descriptor structure for link list
+ *
+ * @pdesc: Physical address of desc
+ * @node: link used for putting this into a channel queue
+ */
+struct st_fdma_sw_node {
+ dma_addr_t pdesc;
+ struct st_fdma_hw_node *desc;
+};
+
+#define NAME_SZ 10
+
+struct st_fdma_driverdata {
+ u32 id;
+ char name[NAME_SZ];
+};
+
+struct st_fdma_desc {
+ struct virt_dma_desc vdesc;
+ struct st_fdma_chan *fchan;
+ bool iscyclic;
+ unsigned int n_nodes;
+ struct st_fdma_sw_node node[];
+};
+
+enum st_fdma_type {
+ ST_FDMA_TYPE_FREE_RUN,
+ ST_FDMA_TYPE_PACED,
+};
+
+struct st_fdma_cfg {
+ struct device_node *of_node;
+ enum st_fdma_type type;
+ dma_addr_t dev_addr;
+ enum dma_transfer_direction dir;
+ int req_line; /* request line */
+ long req_ctrl; /* Request control */
+};
+
+struct st_fdma_chan {
+ struct st_fdma_dev *fdev;
+ struct dma_pool *node_pool;
+ struct dma_slave_config scfg;
+ struct st_fdma_cfg cfg;
+
+ int dreq_line;
+
+ struct virt_dma_chan vchan;
+ struct st_fdma_desc *fdesc;
+ enum dma_status status;
+};
+
+struct st_fdma_dev {
+ struct device *dev;
+ const struct st_fdma_driverdata *drvdata;
+ struct dma_device dma_device;
+
+ struct st_slim_rproc *slim_rproc;
+
+ int irq;
+
+ struct st_fdma_chan *chans;
+
+ spinlock_t dreq_lock;
+ unsigned long dreq_mask;
+
+ u32 nr_channels;
+ char fw_name[FW_NAME_SIZE];
+};
+
+/* Peripheral Registers*/
+
+#define FDMA_CMD_STA_OFST 0xFC0
+#define FDMA_CMD_SET_OFST 0xFC4
+#define FDMA_CMD_CLR_OFST 0xFC8
+#define FDMA_CMD_MASK_OFST 0xFCC
+#define FDMA_CMD_START(ch) (0x1 << (ch << 1))
+#define FDMA_CMD_PAUSE(ch) (0x2 << (ch << 1))
+#define FDMA_CMD_FLUSH(ch) (0x3 << (ch << 1))
+
+#define FDMA_INT_STA_OFST 0xFD0
+#define FDMA_INT_STA_CH 0x1
+#define FDMA_INT_STA_ERR 0x2
+
+#define FDMA_INT_SET_OFST 0xFD4
+#define FDMA_INT_CLR_OFST 0xFD8
+#define FDMA_INT_MASK_OFST 0xFDC
+
+#define fdma_read(fdev, name) \
+ readl((fdev)->slim_rproc->peri + name)
+
+#define fdma_write(fdev, val, name) \
+ writel((val), (fdev)->slim_rproc->peri + name)
+
+/* fchan interface (dmem) */
+#define FDMA_CH_CMD_OFST 0x200
+#define FDMA_CH_CMD_STA_MASK GENMASK(1, 0)
+#define FDMA_CH_CMD_STA_IDLE (0x0)
+#define FDMA_CH_CMD_STA_START (0x1)
+#define FDMA_CH_CMD_STA_RUNNING (0x2)
+#define FDMA_CH_CMD_STA_PAUSED (0x3)
+#define FDMA_CH_CMD_ERR_MASK GENMASK(4, 2)
+#define FDMA_CH_CMD_ERR_INT (0x0 << 2)
+#define FDMA_CH_CMD_ERR_NAND (0x1 << 2)
+#define FDMA_CH_CMD_ERR_MCHI (0x2 << 2)
+#define FDMA_CH_CMD_DATA_MASK GENMASK(31, 5)
+#define fchan_read(fchan, name) \
+ readl((fchan)->fdev->slim_rproc->mem[ST_SLIM_DMEM].cpu_addr \
+ + (fchan)->vchan.chan.chan_id * 0x4 \
+ + name)
+
+#define fchan_write(fchan, val, name) \
+ writel((val), (fchan)->fdev->slim_rproc->mem[ST_SLIM_DMEM].cpu_addr \
+ + (fchan)->vchan.chan.chan_id * 0x4 \
+ + name)
+
+/* req interface */
+#define FDMA_REQ_CTRL_OFST 0x240
+#define dreq_write(fchan, val, name) \
+ writel((val), (fchan)->fdev->slim_rproc->mem[ST_SLIM_DMEM].cpu_addr \
+ + fchan->dreq_line * 0x04 \
+ + name)
+/* node interface */
+#define FDMA_NODE_SZ 128
+#define FDMA_PTRN_OFST 0x800
+#define FDMA_CNTN_OFST 0x808
+#define FDMA_SADDRN_OFST 0x80c
+#define FDMA_DADDRN_OFST 0x810
+#define fnode_read(fchan, name) \
+ readl((fchan)->fdev->slim_rproc->mem[ST_SLIM_DMEM].cpu_addr \
+ + (fchan)->vchan.chan.chan_id * FDMA_NODE_SZ \
+ + name)
+
+#define fnode_write(fchan, val, name) \
+ writel((val), (fchan)->fdev->slim_rproc->mem[ST_SLIM_DMEM].cpu_addr \
+ + (fchan)->vchan.chan.chan_id * FDMA_NODE_SZ \
+ + name)
+
+/*
+ * request control bits
+ */
+#define FDMA_REQ_CTRL_NUM_OPS_MASK GENMASK(31, 24)
+#define FDMA_REQ_CTRL_NUM_OPS(n) (FDMA_REQ_CTRL_NUM_OPS_MASK & \
+ ((n) << 24))
+#define FDMA_REQ_CTRL_INITIATOR_MASK BIT(22)
+#define FDMA_REQ_CTRL_INIT0 (0x0 << 22)
+#define FDMA_REQ_CTRL_INIT1 (0x1 << 22)
+#define FDMA_REQ_CTRL_INC_ADDR_ON BIT(21)
+#define FDMA_REQ_CTRL_DATA_SWAP_ON BIT(17)
+#define FDMA_REQ_CTRL_WNR BIT(14)
+#define FDMA_REQ_CTRL_OPCODE_MASK GENMASK(7, 4)
+#define FDMA_REQ_CTRL_OPCODE_LD_ST1 (0x0 << 4)
+#define FDMA_REQ_CTRL_OPCODE_LD_ST2 (0x1 << 4)
+#define FDMA_REQ_CTRL_OPCODE_LD_ST4 (0x2 << 4)
+#define FDMA_REQ_CTRL_OPCODE_LD_ST8 (0x3 << 4)
+#define FDMA_REQ_CTRL_OPCODE_LD_ST16 (0x4 << 4)
+#define FDMA_REQ_CTRL_OPCODE_LD_ST32 (0x5 << 4)
+#define FDMA_REQ_CTRL_OPCODE_LD_ST64 (0x6 << 4)
+#define FDMA_REQ_CTRL_HOLDOFF_MASK GENMASK(2, 0)
+#define FDMA_REQ_CTRL_HOLDOFF(n) ((n) & FDMA_REQ_CTRL_HOLDOFF_MASK)
+
+/* bits used by client to configure request control */
+#define FDMA_REQ_CTRL_CFG_MASK (FDMA_REQ_CTRL_HOLDOFF_MASK | \
+ FDMA_REQ_CTRL_DATA_SWAP_ON | \
+ FDMA_REQ_CTRL_INC_ADDR_ON | \
+ FDMA_REQ_CTRL_INITIATOR_MASK)
+
+#endif /* __DMA_ST_FDMA_H */
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
new file mode 100644
index 000000000..594b016e7
--- /dev/null
+++ b/drivers/dma/ste_dma40.c
@@ -0,0 +1,3729 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) Ericsson AB 2007-2008
+ * Copyright (C) ST-Ericsson SA 2008-2010
+ * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/log2.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/amba/bus.h>
+#include <linux/regulator/consumer.h>
+#include <linux/platform_data/dma-ste-dma40.h>
+
+#include "dmaengine.h"
+#include "ste_dma40_ll.h"
+
+#define D40_NAME "dma40"
+
+#define D40_PHY_CHAN -1
+
+/* For masking out/in 2 bit channel positions */
+#define D40_CHAN_POS(chan) (2 * (chan / 2))
+#define D40_CHAN_POS_MASK(chan) (0x3 << D40_CHAN_POS(chan))
+
+/* Maximum iterations taken before giving up suspending a channel */
+#define D40_SUSPEND_MAX_IT 500
+
+/* Milliseconds */
+#define DMA40_AUTOSUSPEND_DELAY 100
+
+/* Hardware requirement on LCLA alignment */
+#define LCLA_ALIGNMENT 0x40000
+
+/* Max number of links per event group */
+#define D40_LCLA_LINK_PER_EVENT_GRP 128
+#define D40_LCLA_END D40_LCLA_LINK_PER_EVENT_GRP
+
+/* Max number of logical channels per physical channel */
+#define D40_MAX_LOG_CHAN_PER_PHY 32
+
+/* Attempts before giving up to trying to get pages that are aligned */
+#define MAX_LCLA_ALLOC_ATTEMPTS 256
+
+/* Bit markings for allocation map */
+#define D40_ALLOC_FREE BIT(31)
+#define D40_ALLOC_PHY BIT(30)
+#define D40_ALLOC_LOG_FREE 0
+
+#define D40_MEMCPY_MAX_CHANS 8
+
+/* Reserved event lines for memcpy only. */
+#define DB8500_DMA_MEMCPY_EV_0 51
+#define DB8500_DMA_MEMCPY_EV_1 56
+#define DB8500_DMA_MEMCPY_EV_2 57
+#define DB8500_DMA_MEMCPY_EV_3 58
+#define DB8500_DMA_MEMCPY_EV_4 59
+#define DB8500_DMA_MEMCPY_EV_5 60
+
+static int dma40_memcpy_channels[] = {
+ DB8500_DMA_MEMCPY_EV_0,
+ DB8500_DMA_MEMCPY_EV_1,
+ DB8500_DMA_MEMCPY_EV_2,
+ DB8500_DMA_MEMCPY_EV_3,
+ DB8500_DMA_MEMCPY_EV_4,
+ DB8500_DMA_MEMCPY_EV_5,
+};
+
+/* Default configuration for physical memcpy */
+static const struct stedma40_chan_cfg dma40_memcpy_conf_phy = {
+ .mode = STEDMA40_MODE_PHYSICAL,
+ .dir = DMA_MEM_TO_MEM,
+
+ .src_info.data_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
+ .src_info.psize = STEDMA40_PSIZE_PHY_1,
+ .src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL,
+
+ .dst_info.data_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
+ .dst_info.psize = STEDMA40_PSIZE_PHY_1,
+ .dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL,
+};
+
+/* Default configuration for logical memcpy */
+static const struct stedma40_chan_cfg dma40_memcpy_conf_log = {
+ .mode = STEDMA40_MODE_LOGICAL,
+ .dir = DMA_MEM_TO_MEM,
+
+ .src_info.data_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
+ .src_info.psize = STEDMA40_PSIZE_LOG_1,
+ .src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL,
+
+ .dst_info.data_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
+ .dst_info.psize = STEDMA40_PSIZE_LOG_1,
+ .dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL,
+};
+
+/**
+ * enum 40_command - The different commands and/or statuses.
+ *
+ * @D40_DMA_STOP: DMA channel command STOP or status STOPPED,
+ * @D40_DMA_RUN: The DMA channel is RUNNING of the command RUN.
+ * @D40_DMA_SUSPEND_REQ: Request the DMA to SUSPEND as soon as possible.
+ * @D40_DMA_SUSPENDED: The DMA channel is SUSPENDED.
+ */
+enum d40_command {
+ D40_DMA_STOP = 0,
+ D40_DMA_RUN = 1,
+ D40_DMA_SUSPEND_REQ = 2,
+ D40_DMA_SUSPENDED = 3
+};
+
+/*
+ * enum d40_events - The different Event Enables for the event lines.
+ *
+ * @D40_DEACTIVATE_EVENTLINE: De-activate Event line, stopping the logical chan.
+ * @D40_ACTIVATE_EVENTLINE: Activate the Event line, to start a logical chan.
+ * @D40_SUSPEND_REQ_EVENTLINE: Requesting for suspending a event line.
+ * @D40_ROUND_EVENTLINE: Status check for event line.
+ */
+
+enum d40_events {
+ D40_DEACTIVATE_EVENTLINE = 0,
+ D40_ACTIVATE_EVENTLINE = 1,
+ D40_SUSPEND_REQ_EVENTLINE = 2,
+ D40_ROUND_EVENTLINE = 3
+};
+
+/*
+ * These are the registers that has to be saved and later restored
+ * when the DMA hw is powered off.
+ * TODO: Add save/restore of D40_DREG_GCC on dma40 v3 or later, if that works.
+ */
+static __maybe_unused u32 d40_backup_regs[] = {
+ D40_DREG_LCPA,
+ D40_DREG_LCLA,
+ D40_DREG_PRMSE,
+ D40_DREG_PRMSO,
+ D40_DREG_PRMOE,
+ D40_DREG_PRMOO,
+};
+
+#define BACKUP_REGS_SZ ARRAY_SIZE(d40_backup_regs)
+
+/*
+ * since 9540 and 8540 has the same HW revision
+ * use v4a for 9540 or ealier
+ * use v4b for 8540 or later
+ * HW revision:
+ * DB8500ed has revision 0
+ * DB8500v1 has revision 2
+ * DB8500v2 has revision 3
+ * AP9540v1 has revision 4
+ * DB8540v1 has revision 4
+ * TODO: Check if all these registers have to be saved/restored on dma40 v4a
+ */
+static u32 d40_backup_regs_v4a[] = {
+ D40_DREG_PSEG1,
+ D40_DREG_PSEG2,
+ D40_DREG_PSEG3,
+ D40_DREG_PSEG4,
+ D40_DREG_PCEG1,
+ D40_DREG_PCEG2,
+ D40_DREG_PCEG3,
+ D40_DREG_PCEG4,
+ D40_DREG_RSEG1,
+ D40_DREG_RSEG2,
+ D40_DREG_RSEG3,
+ D40_DREG_RSEG4,
+ D40_DREG_RCEG1,
+ D40_DREG_RCEG2,
+ D40_DREG_RCEG3,
+ D40_DREG_RCEG4,
+};
+
+#define BACKUP_REGS_SZ_V4A ARRAY_SIZE(d40_backup_regs_v4a)
+
+static u32 d40_backup_regs_v4b[] = {
+ D40_DREG_CPSEG1,
+ D40_DREG_CPSEG2,
+ D40_DREG_CPSEG3,
+ D40_DREG_CPSEG4,
+ D40_DREG_CPSEG5,
+ D40_DREG_CPCEG1,
+ D40_DREG_CPCEG2,
+ D40_DREG_CPCEG3,
+ D40_DREG_CPCEG4,
+ D40_DREG_CPCEG5,
+ D40_DREG_CRSEG1,
+ D40_DREG_CRSEG2,
+ D40_DREG_CRSEG3,
+ D40_DREG_CRSEG4,
+ D40_DREG_CRSEG5,
+ D40_DREG_CRCEG1,
+ D40_DREG_CRCEG2,
+ D40_DREG_CRCEG3,
+ D40_DREG_CRCEG4,
+ D40_DREG_CRCEG5,
+};
+
+#define BACKUP_REGS_SZ_V4B ARRAY_SIZE(d40_backup_regs_v4b)
+
+static __maybe_unused u32 d40_backup_regs_chan[] = {
+ D40_CHAN_REG_SSCFG,
+ D40_CHAN_REG_SSELT,
+ D40_CHAN_REG_SSPTR,
+ D40_CHAN_REG_SSLNK,
+ D40_CHAN_REG_SDCFG,
+ D40_CHAN_REG_SDELT,
+ D40_CHAN_REG_SDPTR,
+ D40_CHAN_REG_SDLNK,
+};
+
+#define BACKUP_REGS_SZ_MAX ((BACKUP_REGS_SZ_V4A > BACKUP_REGS_SZ_V4B) ? \
+ BACKUP_REGS_SZ_V4A : BACKUP_REGS_SZ_V4B)
+
+/**
+ * struct d40_interrupt_lookup - lookup table for interrupt handler
+ *
+ * @src: Interrupt mask register.
+ * @clr: Interrupt clear register.
+ * @is_error: true if this is an error interrupt.
+ * @offset: start delta in the lookup_log_chans in d40_base. If equals to
+ * D40_PHY_CHAN, the lookup_phy_chans shall be used instead.
+ */
+struct d40_interrupt_lookup {
+ u32 src;
+ u32 clr;
+ bool is_error;
+ int offset;
+};
+
+
+static struct d40_interrupt_lookup il_v4a[] = {
+ {D40_DREG_LCTIS0, D40_DREG_LCICR0, false, 0},
+ {D40_DREG_LCTIS1, D40_DREG_LCICR1, false, 32},
+ {D40_DREG_LCTIS2, D40_DREG_LCICR2, false, 64},
+ {D40_DREG_LCTIS3, D40_DREG_LCICR3, false, 96},
+ {D40_DREG_LCEIS0, D40_DREG_LCICR0, true, 0},
+ {D40_DREG_LCEIS1, D40_DREG_LCICR1, true, 32},
+ {D40_DREG_LCEIS2, D40_DREG_LCICR2, true, 64},
+ {D40_DREG_LCEIS3, D40_DREG_LCICR3, true, 96},
+ {D40_DREG_PCTIS, D40_DREG_PCICR, false, D40_PHY_CHAN},
+ {D40_DREG_PCEIS, D40_DREG_PCICR, true, D40_PHY_CHAN},
+};
+
+static struct d40_interrupt_lookup il_v4b[] = {
+ {D40_DREG_CLCTIS1, D40_DREG_CLCICR1, false, 0},
+ {D40_DREG_CLCTIS2, D40_DREG_CLCICR2, false, 32},
+ {D40_DREG_CLCTIS3, D40_DREG_CLCICR3, false, 64},
+ {D40_DREG_CLCTIS4, D40_DREG_CLCICR4, false, 96},
+ {D40_DREG_CLCTIS5, D40_DREG_CLCICR5, false, 128},
+ {D40_DREG_CLCEIS1, D40_DREG_CLCICR1, true, 0},
+ {D40_DREG_CLCEIS2, D40_DREG_CLCICR2, true, 32},
+ {D40_DREG_CLCEIS3, D40_DREG_CLCICR3, true, 64},
+ {D40_DREG_CLCEIS4, D40_DREG_CLCICR4, true, 96},
+ {D40_DREG_CLCEIS5, D40_DREG_CLCICR5, true, 128},
+ {D40_DREG_CPCTIS, D40_DREG_CPCICR, false, D40_PHY_CHAN},
+ {D40_DREG_CPCEIS, D40_DREG_CPCICR, true, D40_PHY_CHAN},
+};
+
+/**
+ * struct d40_reg_val - simple lookup struct
+ *
+ * @reg: The register.
+ * @val: The value that belongs to the register in reg.
+ */
+struct d40_reg_val {
+ unsigned int reg;
+ unsigned int val;
+};
+
+static __initdata struct d40_reg_val dma_init_reg_v4a[] = {
+ /* Clock every part of the DMA block from start */
+ { .reg = D40_DREG_GCC, .val = D40_DREG_GCC_ENABLE_ALL},
+
+ /* Interrupts on all logical channels */
+ { .reg = D40_DREG_LCMIS0, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_LCMIS1, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_LCMIS2, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_LCMIS3, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_LCICR0, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_LCICR1, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_LCICR2, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_LCICR3, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_LCTIS0, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_LCTIS1, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_LCTIS2, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_LCTIS3, .val = 0xFFFFFFFF}
+};
+static __initdata struct d40_reg_val dma_init_reg_v4b[] = {
+ /* Clock every part of the DMA block from start */
+ { .reg = D40_DREG_GCC, .val = D40_DREG_GCC_ENABLE_ALL},
+
+ /* Interrupts on all logical channels */
+ { .reg = D40_DREG_CLCMIS1, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_CLCMIS2, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_CLCMIS3, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_CLCMIS4, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_CLCMIS5, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_CLCICR1, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_CLCICR2, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_CLCICR3, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_CLCICR4, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_CLCICR5, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_CLCTIS1, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_CLCTIS2, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_CLCTIS3, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_CLCTIS4, .val = 0xFFFFFFFF},
+ { .reg = D40_DREG_CLCTIS5, .val = 0xFFFFFFFF}
+};
+
+/**
+ * struct d40_lli_pool - Structure for keeping LLIs in memory
+ *
+ * @base: Pointer to memory area when the pre_alloc_lli's are not large
+ * enough, IE bigger than the most common case, 1 dst and 1 src. NULL if
+ * pre_alloc_lli is used.
+ * @dma_addr: DMA address, if mapped
+ * @size: The size in bytes of the memory at base or the size of pre_alloc_lli.
+ * @pre_alloc_lli: Pre allocated area for the most common case of transfers,
+ * one buffer to one buffer.
+ */
+struct d40_lli_pool {
+ void *base;
+ int size;
+ dma_addr_t dma_addr;
+ /* Space for dst and src, plus an extra for padding */
+ u8 pre_alloc_lli[3 * sizeof(struct d40_phy_lli)];
+};
+
+/**
+ * struct d40_desc - A descriptor is one DMA job.
+ *
+ * @lli_phy: LLI settings for physical channel. Both src and dst=
+ * points into the lli_pool, to base if lli_len > 1 or to pre_alloc_lli if
+ * lli_len equals one.
+ * @lli_log: Same as above but for logical channels.
+ * @lli_pool: The pool with two entries pre-allocated.
+ * @lli_len: Number of llis of current descriptor.
+ * @lli_current: Number of transferred llis.
+ * @lcla_alloc: Number of LCLA entries allocated.
+ * @txd: DMA engine struct. Used for among other things for communication
+ * during a transfer.
+ * @node: List entry.
+ * @is_in_client_list: true if the client owns this descriptor.
+ * @cyclic: true if this is a cyclic job
+ *
+ * This descriptor is used for both logical and physical transfers.
+ */
+struct d40_desc {
+ /* LLI physical */
+ struct d40_phy_lli_bidir lli_phy;
+ /* LLI logical */
+ struct d40_log_lli_bidir lli_log;
+
+ struct d40_lli_pool lli_pool;
+ int lli_len;
+ int lli_current;
+ int lcla_alloc;
+
+ struct dma_async_tx_descriptor txd;
+ struct list_head node;
+
+ bool is_in_client_list;
+ bool cyclic;
+};
+
+/**
+ * struct d40_lcla_pool - LCLA pool settings and data.
+ *
+ * @base: The virtual address of LCLA. 18 bit aligned.
+ * @dma_addr: DMA address, if mapped
+ * @base_unaligned: The orignal kmalloc pointer, if kmalloc is used.
+ * This pointer is only there for clean-up on error.
+ * @pages: The number of pages needed for all physical channels.
+ * Only used later for clean-up on error
+ * @lock: Lock to protect the content in this struct.
+ * @alloc_map: big map over which LCLA entry is own by which job.
+ */
+struct d40_lcla_pool {
+ void *base;
+ dma_addr_t dma_addr;
+ void *base_unaligned;
+ int pages;
+ spinlock_t lock;
+ struct d40_desc **alloc_map;
+};
+
+/**
+ * struct d40_phy_res - struct for handling eventlines mapped to physical
+ * channels.
+ *
+ * @lock: A lock protection this entity.
+ * @reserved: True if used by secure world or otherwise.
+ * @num: The physical channel number of this entity.
+ * @allocated_src: Bit mapped to show which src event line's are mapped to
+ * this physical channel. Can also be free or physically allocated.
+ * @allocated_dst: Same as for src but is dst.
+ * allocated_dst and allocated_src uses the D40_ALLOC* defines as well as
+ * event line number.
+ * @use_soft_lli: To mark if the linked lists of channel are managed by SW.
+ */
+struct d40_phy_res {
+ spinlock_t lock;
+ bool reserved;
+ int num;
+ u32 allocated_src;
+ u32 allocated_dst;
+ bool use_soft_lli;
+};
+
+struct d40_base;
+
+/**
+ * struct d40_chan - Struct that describes a channel.
+ *
+ * @lock: A spinlock to protect this struct.
+ * @log_num: The logical number, if any of this channel.
+ * @pending_tx: The number of pending transfers. Used between interrupt handler
+ * and tasklet.
+ * @busy: Set to true when transfer is ongoing on this channel.
+ * @phy_chan: Pointer to physical channel which this instance runs on. If this
+ * point is NULL, then the channel is not allocated.
+ * @chan: DMA engine handle.
+ * @tasklet: Tasklet that gets scheduled from interrupt context to complete a
+ * transfer and call client callback.
+ * @client: Cliented owned descriptor list.
+ * @pending_queue: Submitted jobs, to be issued by issue_pending()
+ * @active: Active descriptor.
+ * @done: Completed jobs
+ * @queue: Queued jobs.
+ * @prepare_queue: Prepared jobs.
+ * @dma_cfg: The client configuration of this dma channel.
+ * @slave_config: DMA slave configuration.
+ * @configured: whether the dma_cfg configuration is valid
+ * @base: Pointer to the device instance struct.
+ * @src_def_cfg: Default cfg register setting for src.
+ * @dst_def_cfg: Default cfg register setting for dst.
+ * @log_def: Default logical channel settings.
+ * @lcpa: Pointer to dst and src lcpa settings.
+ * @runtime_addr: runtime configured address.
+ * @runtime_direction: runtime configured direction.
+ *
+ * This struct can either "be" a logical or a physical channel.
+ */
+struct d40_chan {
+ spinlock_t lock;
+ int log_num;
+ int pending_tx;
+ bool busy;
+ struct d40_phy_res *phy_chan;
+ struct dma_chan chan;
+ struct tasklet_struct tasklet;
+ struct list_head client;
+ struct list_head pending_queue;
+ struct list_head active;
+ struct list_head done;
+ struct list_head queue;
+ struct list_head prepare_queue;
+ struct stedma40_chan_cfg dma_cfg;
+ struct dma_slave_config slave_config;
+ bool configured;
+ struct d40_base *base;
+ /* Default register configurations */
+ u32 src_def_cfg;
+ u32 dst_def_cfg;
+ struct d40_def_lcsp log_def;
+ struct d40_log_lli_full *lcpa;
+ /* Runtime reconfiguration */
+ dma_addr_t runtime_addr;
+ enum dma_transfer_direction runtime_direction;
+};
+
+/**
+ * struct d40_gen_dmac - generic values to represent u8500/u8540 DMA
+ * controller
+ *
+ * @backup: the pointer to the registers address array for backup
+ * @backup_size: the size of the registers address array for backup
+ * @realtime_en: the realtime enable register
+ * @realtime_clear: the realtime clear register
+ * @high_prio_en: the high priority enable register
+ * @high_prio_clear: the high priority clear register
+ * @interrupt_en: the interrupt enable register
+ * @interrupt_clear: the interrupt clear register
+ * @il: the pointer to struct d40_interrupt_lookup
+ * @il_size: the size of d40_interrupt_lookup array
+ * @init_reg: the pointer to the struct d40_reg_val
+ * @init_reg_size: the size of d40_reg_val array
+ */
+struct d40_gen_dmac {
+ u32 *backup;
+ u32 backup_size;
+ u32 realtime_en;
+ u32 realtime_clear;
+ u32 high_prio_en;
+ u32 high_prio_clear;
+ u32 interrupt_en;
+ u32 interrupt_clear;
+ struct d40_interrupt_lookup *il;
+ u32 il_size;
+ struct d40_reg_val *init_reg;
+ u32 init_reg_size;
+};
+
+/**
+ * struct d40_base - The big global struct, one for each probe'd instance.
+ *
+ * @interrupt_lock: Lock used to make sure one interrupt is handle a time.
+ * @execmd_lock: Lock for execute command usage since several channels share
+ * the same physical register.
+ * @dev: The device structure.
+ * @virtbase: The virtual base address of the DMA's register.
+ * @rev: silicon revision detected.
+ * @clk: Pointer to the DMA clock structure.
+ * @phy_start: Physical memory start of the DMA registers.
+ * @phy_size: Size of the DMA register map.
+ * @irq: The IRQ number.
+ * @num_memcpy_chans: The number of channels used for memcpy (mem-to-mem
+ * transfers).
+ * @num_phy_chans: The number of physical channels. Read from HW. This
+ * is the number of available channels for this driver, not counting "Secure
+ * mode" allocated physical channels.
+ * @num_log_chans: The number of logical channels. Calculated from
+ * num_phy_chans.
+ * @dma_both: dma_device channels that can do both memcpy and slave transfers.
+ * @dma_slave: dma_device channels that can do only do slave transfers.
+ * @dma_memcpy: dma_device channels that can do only do memcpy transfers.
+ * @phy_chans: Room for all possible physical channels in system.
+ * @log_chans: Room for all possible logical channels in system.
+ * @lookup_log_chans: Used to map interrupt number to logical channel. Points
+ * to log_chans entries.
+ * @lookup_phy_chans: Used to map interrupt number to physical channel. Points
+ * to phy_chans entries.
+ * @plat_data: Pointer to provided platform_data which is the driver
+ * configuration.
+ * @lcpa_regulator: Pointer to hold the regulator for the esram bank for lcla.
+ * @phy_res: Vector containing all physical channels.
+ * @lcla_pool: lcla pool settings and data.
+ * @lcpa_base: The virtual mapped address of LCPA.
+ * @phy_lcpa: The physical address of the LCPA.
+ * @lcpa_size: The size of the LCPA area.
+ * @desc_slab: cache for descriptors.
+ * @reg_val_backup: Here the values of some hardware registers are stored
+ * before the DMA is powered off. They are restored when the power is back on.
+ * @reg_val_backup_v4: Backup of registers that only exits on dma40 v3 and
+ * later
+ * @reg_val_backup_chan: Backup data for standard channel parameter registers.
+ * @regs_interrupt: Scratch space for registers during interrupt.
+ * @gcc_pwr_off_mask: Mask to maintain the channels that can be turned off.
+ * @gen_dmac: the struct for generic registers values to represent u8500/8540
+ * DMA controller
+ */
+struct d40_base {
+ spinlock_t interrupt_lock;
+ spinlock_t execmd_lock;
+ struct device *dev;
+ void __iomem *virtbase;
+ u8 rev:4;
+ struct clk *clk;
+ phys_addr_t phy_start;
+ resource_size_t phy_size;
+ int irq;
+ int num_memcpy_chans;
+ int num_phy_chans;
+ int num_log_chans;
+ struct dma_device dma_both;
+ struct dma_device dma_slave;
+ struct dma_device dma_memcpy;
+ struct d40_chan *phy_chans;
+ struct d40_chan *log_chans;
+ struct d40_chan **lookup_log_chans;
+ struct d40_chan **lookup_phy_chans;
+ struct stedma40_platform_data *plat_data;
+ struct regulator *lcpa_regulator;
+ /* Physical half channels */
+ struct d40_phy_res *phy_res;
+ struct d40_lcla_pool lcla_pool;
+ void *lcpa_base;
+ dma_addr_t phy_lcpa;
+ resource_size_t lcpa_size;
+ struct kmem_cache *desc_slab;
+ u32 reg_val_backup[BACKUP_REGS_SZ];
+ u32 reg_val_backup_v4[BACKUP_REGS_SZ_MAX];
+ u32 *reg_val_backup_chan;
+ u32 *regs_interrupt;
+ u16 gcc_pwr_off_mask;
+ struct d40_gen_dmac gen_dmac;
+};
+
+static struct device *chan2dev(struct d40_chan *d40c)
+{
+ return &d40c->chan.dev->device;
+}
+
+static bool chan_is_physical(struct d40_chan *chan)
+{
+ return chan->log_num == D40_PHY_CHAN;
+}
+
+static bool chan_is_logical(struct d40_chan *chan)
+{
+ return !chan_is_physical(chan);
+}
+
+static void __iomem *chan_base(struct d40_chan *chan)
+{
+ return chan->base->virtbase + D40_DREG_PCBASE +
+ chan->phy_chan->num * D40_DREG_PCDELTA;
+}
+
+#define d40_err(dev, format, arg...) \
+ dev_err(dev, "[%s] " format, __func__, ## arg)
+
+#define chan_err(d40c, format, arg...) \
+ d40_err(chan2dev(d40c), format, ## arg)
+
+static int d40_set_runtime_config_write(struct dma_chan *chan,
+ struct dma_slave_config *config,
+ enum dma_transfer_direction direction);
+
+static int d40_pool_lli_alloc(struct d40_chan *d40c, struct d40_desc *d40d,
+ int lli_len)
+{
+ bool is_log = chan_is_logical(d40c);
+ u32 align;
+ void *base;
+
+ if (is_log)
+ align = sizeof(struct d40_log_lli);
+ else
+ align = sizeof(struct d40_phy_lli);
+
+ if (lli_len == 1) {
+ base = d40d->lli_pool.pre_alloc_lli;
+ d40d->lli_pool.size = sizeof(d40d->lli_pool.pre_alloc_lli);
+ d40d->lli_pool.base = NULL;
+ } else {
+ d40d->lli_pool.size = lli_len * 2 * align;
+
+ base = kmalloc(d40d->lli_pool.size + align, GFP_NOWAIT);
+ d40d->lli_pool.base = base;
+
+ if (d40d->lli_pool.base == NULL)
+ return -ENOMEM;
+ }
+
+ if (is_log) {
+ d40d->lli_log.src = PTR_ALIGN(base, align);
+ d40d->lli_log.dst = d40d->lli_log.src + lli_len;
+
+ d40d->lli_pool.dma_addr = 0;
+ } else {
+ d40d->lli_phy.src = PTR_ALIGN(base, align);
+ d40d->lli_phy.dst = d40d->lli_phy.src + lli_len;
+
+ d40d->lli_pool.dma_addr = dma_map_single(d40c->base->dev,
+ d40d->lli_phy.src,
+ d40d->lli_pool.size,
+ DMA_TO_DEVICE);
+
+ if (dma_mapping_error(d40c->base->dev,
+ d40d->lli_pool.dma_addr)) {
+ kfree(d40d->lli_pool.base);
+ d40d->lli_pool.base = NULL;
+ d40d->lli_pool.dma_addr = 0;
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static void d40_pool_lli_free(struct d40_chan *d40c, struct d40_desc *d40d)
+{
+ if (d40d->lli_pool.dma_addr)
+ dma_unmap_single(d40c->base->dev, d40d->lli_pool.dma_addr,
+ d40d->lli_pool.size, DMA_TO_DEVICE);
+
+ kfree(d40d->lli_pool.base);
+ d40d->lli_pool.base = NULL;
+ d40d->lli_pool.size = 0;
+ d40d->lli_log.src = NULL;
+ d40d->lli_log.dst = NULL;
+ d40d->lli_phy.src = NULL;
+ d40d->lli_phy.dst = NULL;
+}
+
+static int d40_lcla_alloc_one(struct d40_chan *d40c,
+ struct d40_desc *d40d)
+{
+ unsigned long flags;
+ int i;
+ int ret = -EINVAL;
+
+ spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
+
+ /*
+ * Allocate both src and dst at the same time, therefore the half
+ * start on 1 since 0 can't be used since zero is used as end marker.
+ */
+ for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) {
+ int idx = d40c->phy_chan->num * D40_LCLA_LINK_PER_EVENT_GRP + i;
+
+ if (!d40c->base->lcla_pool.alloc_map[idx]) {
+ d40c->base->lcla_pool.alloc_map[idx] = d40d;
+ d40d->lcla_alloc++;
+ ret = i;
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
+
+ return ret;
+}
+
+static int d40_lcla_free_all(struct d40_chan *d40c,
+ struct d40_desc *d40d)
+{
+ unsigned long flags;
+ int i;
+ int ret = -EINVAL;
+
+ if (chan_is_physical(d40c))
+ return 0;
+
+ spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
+
+ for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) {
+ int idx = d40c->phy_chan->num * D40_LCLA_LINK_PER_EVENT_GRP + i;
+
+ if (d40c->base->lcla_pool.alloc_map[idx] == d40d) {
+ d40c->base->lcla_pool.alloc_map[idx] = NULL;
+ d40d->lcla_alloc--;
+ if (d40d->lcla_alloc == 0) {
+ ret = 0;
+ break;
+ }
+ }
+ }
+
+ spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
+
+ return ret;
+
+}
+
+static void d40_desc_remove(struct d40_desc *d40d)
+{
+ list_del(&d40d->node);
+}
+
+static struct d40_desc *d40_desc_get(struct d40_chan *d40c)
+{
+ struct d40_desc *desc = NULL;
+
+ if (!list_empty(&d40c->client)) {
+ struct d40_desc *d;
+ struct d40_desc *_d;
+
+ list_for_each_entry_safe(d, _d, &d40c->client, node) {
+ if (async_tx_test_ack(&d->txd)) {
+ d40_desc_remove(d);
+ desc = d;
+ memset(desc, 0, sizeof(*desc));
+ break;
+ }
+ }
+ }
+
+ if (!desc)
+ desc = kmem_cache_zalloc(d40c->base->desc_slab, GFP_NOWAIT);
+
+ if (desc)
+ INIT_LIST_HEAD(&desc->node);
+
+ return desc;
+}
+
+static void d40_desc_free(struct d40_chan *d40c, struct d40_desc *d40d)
+{
+
+ d40_pool_lli_free(d40c, d40d);
+ d40_lcla_free_all(d40c, d40d);
+ kmem_cache_free(d40c->base->desc_slab, d40d);
+}
+
+static void d40_desc_submit(struct d40_chan *d40c, struct d40_desc *desc)
+{
+ list_add_tail(&desc->node, &d40c->active);
+}
+
+static void d40_phy_lli_load(struct d40_chan *chan, struct d40_desc *desc)
+{
+ struct d40_phy_lli *lli_dst = desc->lli_phy.dst;
+ struct d40_phy_lli *lli_src = desc->lli_phy.src;
+ void __iomem *base = chan_base(chan);
+
+ writel(lli_src->reg_cfg, base + D40_CHAN_REG_SSCFG);
+ writel(lli_src->reg_elt, base + D40_CHAN_REG_SSELT);
+ writel(lli_src->reg_ptr, base + D40_CHAN_REG_SSPTR);
+ writel(lli_src->reg_lnk, base + D40_CHAN_REG_SSLNK);
+
+ writel(lli_dst->reg_cfg, base + D40_CHAN_REG_SDCFG);
+ writel(lli_dst->reg_elt, base + D40_CHAN_REG_SDELT);
+ writel(lli_dst->reg_ptr, base + D40_CHAN_REG_SDPTR);
+ writel(lli_dst->reg_lnk, base + D40_CHAN_REG_SDLNK);
+}
+
+static void d40_desc_done(struct d40_chan *d40c, struct d40_desc *desc)
+{
+ list_add_tail(&desc->node, &d40c->done);
+}
+
+static void d40_log_lli_to_lcxa(struct d40_chan *chan, struct d40_desc *desc)
+{
+ struct d40_lcla_pool *pool = &chan->base->lcla_pool;
+ struct d40_log_lli_bidir *lli = &desc->lli_log;
+ int lli_current = desc->lli_current;
+ int lli_len = desc->lli_len;
+ bool cyclic = desc->cyclic;
+ int curr_lcla = -EINVAL;
+ int first_lcla = 0;
+ bool use_esram_lcla = chan->base->plat_data->use_esram_lcla;
+ bool linkback;
+
+ /*
+ * We may have partially running cyclic transfers, in case we did't get
+ * enough LCLA entries.
+ */
+ linkback = cyclic && lli_current == 0;
+
+ /*
+ * For linkback, we need one LCLA even with only one link, because we
+ * can't link back to the one in LCPA space
+ */
+ if (linkback || (lli_len - lli_current > 1)) {
+ /*
+ * If the channel is expected to use only soft_lli don't
+ * allocate a lcla. This is to avoid a HW issue that exists
+ * in some controller during a peripheral to memory transfer
+ * that uses linked lists.
+ */
+ if (!(chan->phy_chan->use_soft_lli &&
+ chan->dma_cfg.dir == DMA_DEV_TO_MEM))
+ curr_lcla = d40_lcla_alloc_one(chan, desc);
+
+ first_lcla = curr_lcla;
+ }
+
+ /*
+ * For linkback, we normally load the LCPA in the loop since we need to
+ * link it to the second LCLA and not the first. However, if we
+ * couldn't even get a first LCLA, then we have to run in LCPA and
+ * reload manually.
+ */
+ if (!linkback || curr_lcla == -EINVAL) {
+ unsigned int flags = 0;
+
+ if (curr_lcla == -EINVAL)
+ flags |= LLI_TERM_INT;
+
+ d40_log_lli_lcpa_write(chan->lcpa,
+ &lli->dst[lli_current],
+ &lli->src[lli_current],
+ curr_lcla,
+ flags);
+ lli_current++;
+ }
+
+ if (curr_lcla < 0)
+ goto set_current;
+
+ for (; lli_current < lli_len; lli_current++) {
+ unsigned int lcla_offset = chan->phy_chan->num * 1024 +
+ 8 * curr_lcla * 2;
+ struct d40_log_lli *lcla = pool->base + lcla_offset;
+ unsigned int flags = 0;
+ int next_lcla;
+
+ if (lli_current + 1 < lli_len)
+ next_lcla = d40_lcla_alloc_one(chan, desc);
+ else
+ next_lcla = linkback ? first_lcla : -EINVAL;
+
+ if (cyclic || next_lcla == -EINVAL)
+ flags |= LLI_TERM_INT;
+
+ if (linkback && curr_lcla == first_lcla) {
+ /* First link goes in both LCPA and LCLA */
+ d40_log_lli_lcpa_write(chan->lcpa,
+ &lli->dst[lli_current],
+ &lli->src[lli_current],
+ next_lcla, flags);
+ }
+
+ /*
+ * One unused LCLA in the cyclic case if the very first
+ * next_lcla fails...
+ */
+ d40_log_lli_lcla_write(lcla,
+ &lli->dst[lli_current],
+ &lli->src[lli_current],
+ next_lcla, flags);
+
+ /*
+ * Cache maintenance is not needed if lcla is
+ * mapped in esram
+ */
+ if (!use_esram_lcla) {
+ dma_sync_single_range_for_device(chan->base->dev,
+ pool->dma_addr, lcla_offset,
+ 2 * sizeof(struct d40_log_lli),
+ DMA_TO_DEVICE);
+ }
+ curr_lcla = next_lcla;
+
+ if (curr_lcla == -EINVAL || curr_lcla == first_lcla) {
+ lli_current++;
+ break;
+ }
+ }
+ set_current:
+ desc->lli_current = lli_current;
+}
+
+static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
+{
+ if (chan_is_physical(d40c)) {
+ d40_phy_lli_load(d40c, d40d);
+ d40d->lli_current = d40d->lli_len;
+ } else
+ d40_log_lli_to_lcxa(d40c, d40d);
+}
+
+static struct d40_desc *d40_first_active_get(struct d40_chan *d40c)
+{
+ return list_first_entry_or_null(&d40c->active, struct d40_desc, node);
+}
+
+/* remove desc from current queue and add it to the pending_queue */
+static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc)
+{
+ d40_desc_remove(desc);
+ desc->is_in_client_list = false;
+ list_add_tail(&desc->node, &d40c->pending_queue);
+}
+
+static struct d40_desc *d40_first_pending(struct d40_chan *d40c)
+{
+ return list_first_entry_or_null(&d40c->pending_queue, struct d40_desc,
+ node);
+}
+
+static struct d40_desc *d40_first_queued(struct d40_chan *d40c)
+{
+ return list_first_entry_or_null(&d40c->queue, struct d40_desc, node);
+}
+
+static struct d40_desc *d40_first_done(struct d40_chan *d40c)
+{
+ return list_first_entry_or_null(&d40c->done, struct d40_desc, node);
+}
+
+static int d40_psize_2_burst_size(bool is_log, int psize)
+{
+ if (is_log) {
+ if (psize == STEDMA40_PSIZE_LOG_1)
+ return 1;
+ } else {
+ if (psize == STEDMA40_PSIZE_PHY_1)
+ return 1;
+ }
+
+ return 2 << psize;
+}
+
+/*
+ * The dma only supports transmitting packages up to
+ * STEDMA40_MAX_SEG_SIZE * data_width, where data_width is stored in Bytes.
+ *
+ * Calculate the total number of dma elements required to send the entire sg list.
+ */
+static int d40_size_2_dmalen(int size, u32 data_width1, u32 data_width2)
+{
+ int dmalen;
+ u32 max_w = max(data_width1, data_width2);
+ u32 min_w = min(data_width1, data_width2);
+ u32 seg_max = ALIGN(STEDMA40_MAX_SEG_SIZE * min_w, max_w);
+
+ if (seg_max > STEDMA40_MAX_SEG_SIZE)
+ seg_max -= max_w;
+
+ if (!IS_ALIGNED(size, max_w))
+ return -EINVAL;
+
+ if (size <= seg_max)
+ dmalen = 1;
+ else {
+ dmalen = size / seg_max;
+ if (dmalen * seg_max < size)
+ dmalen++;
+ }
+ return dmalen;
+}
+
+static int d40_sg_2_dmalen(struct scatterlist *sgl, int sg_len,
+ u32 data_width1, u32 data_width2)
+{
+ struct scatterlist *sg;
+ int i;
+ int len = 0;
+ int ret;
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ ret = d40_size_2_dmalen(sg_dma_len(sg),
+ data_width1, data_width2);
+ if (ret < 0)
+ return ret;
+ len += ret;
+ }
+ return len;
+}
+
+static int __d40_execute_command_phy(struct d40_chan *d40c,
+ enum d40_command command)
+{
+ u32 status;
+ int i;
+ void __iomem *active_reg;
+ int ret = 0;
+ unsigned long flags;
+ u32 wmask;
+
+ if (command == D40_DMA_STOP) {
+ ret = __d40_execute_command_phy(d40c, D40_DMA_SUSPEND_REQ);
+ if (ret)
+ return ret;
+ }
+
+ spin_lock_irqsave(&d40c->base->execmd_lock, flags);
+
+ if (d40c->phy_chan->num % 2 == 0)
+ active_reg = d40c->base->virtbase + D40_DREG_ACTIVE;
+ else
+ active_reg = d40c->base->virtbase + D40_DREG_ACTIVO;
+
+ if (command == D40_DMA_SUSPEND_REQ) {
+ status = (readl(active_reg) &
+ D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
+ D40_CHAN_POS(d40c->phy_chan->num);
+
+ if (status == D40_DMA_SUSPENDED || status == D40_DMA_STOP)
+ goto unlock;
+ }
+
+ wmask = 0xffffffff & ~(D40_CHAN_POS_MASK(d40c->phy_chan->num));
+ writel(wmask | (command << D40_CHAN_POS(d40c->phy_chan->num)),
+ active_reg);
+
+ if (command == D40_DMA_SUSPEND_REQ) {
+
+ for (i = 0 ; i < D40_SUSPEND_MAX_IT; i++) {
+ status = (readl(active_reg) &
+ D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
+ D40_CHAN_POS(d40c->phy_chan->num);
+
+ cpu_relax();
+ /*
+ * Reduce the number of bus accesses while
+ * waiting for the DMA to suspend.
+ */
+ udelay(3);
+
+ if (status == D40_DMA_STOP ||
+ status == D40_DMA_SUSPENDED)
+ break;
+ }
+
+ if (i == D40_SUSPEND_MAX_IT) {
+ chan_err(d40c,
+ "unable to suspend the chl %d (log: %d) status %x\n",
+ d40c->phy_chan->num, d40c->log_num,
+ status);
+ dump_stack();
+ ret = -EBUSY;
+ }
+
+ }
+ unlock:
+ spin_unlock_irqrestore(&d40c->base->execmd_lock, flags);
+ return ret;
+}
+
+static void d40_term_all(struct d40_chan *d40c)
+{
+ struct d40_desc *d40d;
+ struct d40_desc *_d;
+
+ /* Release completed descriptors */
+ while ((d40d = d40_first_done(d40c))) {
+ d40_desc_remove(d40d);
+ d40_desc_free(d40c, d40d);
+ }
+
+ /* Release active descriptors */
+ while ((d40d = d40_first_active_get(d40c))) {
+ d40_desc_remove(d40d);
+ d40_desc_free(d40c, d40d);
+ }
+
+ /* Release queued descriptors waiting for transfer */
+ while ((d40d = d40_first_queued(d40c))) {
+ d40_desc_remove(d40d);
+ d40_desc_free(d40c, d40d);
+ }
+
+ /* Release pending descriptors */
+ while ((d40d = d40_first_pending(d40c))) {
+ d40_desc_remove(d40d);
+ d40_desc_free(d40c, d40d);
+ }
+
+ /* Release client owned descriptors */
+ if (!list_empty(&d40c->client))
+ list_for_each_entry_safe(d40d, _d, &d40c->client, node) {
+ d40_desc_remove(d40d);
+ d40_desc_free(d40c, d40d);
+ }
+
+ /* Release descriptors in prepare queue */
+ if (!list_empty(&d40c->prepare_queue))
+ list_for_each_entry_safe(d40d, _d,
+ &d40c->prepare_queue, node) {
+ d40_desc_remove(d40d);
+ d40_desc_free(d40c, d40d);
+ }
+
+ d40c->pending_tx = 0;
+}
+
+static void __d40_config_set_event(struct d40_chan *d40c,
+ enum d40_events event_type, u32 event,
+ int reg)
+{
+ void __iomem *addr = chan_base(d40c) + reg;
+ int tries;
+ u32 status;
+
+ switch (event_type) {
+
+ case D40_DEACTIVATE_EVENTLINE:
+
+ writel((D40_DEACTIVATE_EVENTLINE << D40_EVENTLINE_POS(event))
+ | ~D40_EVENTLINE_MASK(event), addr);
+ break;
+
+ case D40_SUSPEND_REQ_EVENTLINE:
+ status = (readl(addr) & D40_EVENTLINE_MASK(event)) >>
+ D40_EVENTLINE_POS(event);
+
+ if (status == D40_DEACTIVATE_EVENTLINE ||
+ status == D40_SUSPEND_REQ_EVENTLINE)
+ break;
+
+ writel((D40_SUSPEND_REQ_EVENTLINE << D40_EVENTLINE_POS(event))
+ | ~D40_EVENTLINE_MASK(event), addr);
+
+ for (tries = 0 ; tries < D40_SUSPEND_MAX_IT; tries++) {
+
+ status = (readl(addr) & D40_EVENTLINE_MASK(event)) >>
+ D40_EVENTLINE_POS(event);
+
+ cpu_relax();
+ /*
+ * Reduce the number of bus accesses while
+ * waiting for the DMA to suspend.
+ */
+ udelay(3);
+
+ if (status == D40_DEACTIVATE_EVENTLINE)
+ break;
+ }
+
+ if (tries == D40_SUSPEND_MAX_IT) {
+ chan_err(d40c,
+ "unable to stop the event_line chl %d (log: %d)"
+ "status %x\n", d40c->phy_chan->num,
+ d40c->log_num, status);
+ }
+ break;
+
+ case D40_ACTIVATE_EVENTLINE:
+ /*
+ * The hardware sometimes doesn't register the enable when src and dst
+ * event lines are active on the same logical channel. Retry to ensure
+ * it does. Usually only one retry is sufficient.
+ */
+ tries = 100;
+ while (--tries) {
+ writel((D40_ACTIVATE_EVENTLINE <<
+ D40_EVENTLINE_POS(event)) |
+ ~D40_EVENTLINE_MASK(event), addr);
+
+ if (readl(addr) & D40_EVENTLINE_MASK(event))
+ break;
+ }
+
+ if (tries != 99)
+ dev_dbg(chan2dev(d40c),
+ "[%s] workaround enable S%cLNK (%d tries)\n",
+ __func__, reg == D40_CHAN_REG_SSLNK ? 'S' : 'D',
+ 100 - tries);
+
+ WARN_ON(!tries);
+ break;
+
+ case D40_ROUND_EVENTLINE:
+ BUG();
+ break;
+
+ }
+}
+
+static void d40_config_set_event(struct d40_chan *d40c,
+ enum d40_events event_type)
+{
+ u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dev_type);
+
+ /* Enable event line connected to device (or memcpy) */
+ if ((d40c->dma_cfg.dir == DMA_DEV_TO_MEM) ||
+ (d40c->dma_cfg.dir == DMA_DEV_TO_DEV))
+ __d40_config_set_event(d40c, event_type, event,
+ D40_CHAN_REG_SSLNK);
+
+ if (d40c->dma_cfg.dir != DMA_DEV_TO_MEM)
+ __d40_config_set_event(d40c, event_type, event,
+ D40_CHAN_REG_SDLNK);
+}
+
+static u32 d40_chan_has_events(struct d40_chan *d40c)
+{
+ void __iomem *chanbase = chan_base(d40c);
+ u32 val;
+
+ val = readl(chanbase + D40_CHAN_REG_SSLNK);
+ val |= readl(chanbase + D40_CHAN_REG_SDLNK);
+
+ return val;
+}
+
+static int
+__d40_execute_command_log(struct d40_chan *d40c, enum d40_command command)
+{
+ unsigned long flags;
+ int ret = 0;
+ u32 active_status;
+ void __iomem *active_reg;
+
+ if (d40c->phy_chan->num % 2 == 0)
+ active_reg = d40c->base->virtbase + D40_DREG_ACTIVE;
+ else
+ active_reg = d40c->base->virtbase + D40_DREG_ACTIVO;
+
+
+ spin_lock_irqsave(&d40c->phy_chan->lock, flags);
+
+ switch (command) {
+ case D40_DMA_STOP:
+ case D40_DMA_SUSPEND_REQ:
+
+ active_status = (readl(active_reg) &
+ D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
+ D40_CHAN_POS(d40c->phy_chan->num);
+
+ if (active_status == D40_DMA_RUN)
+ d40_config_set_event(d40c, D40_SUSPEND_REQ_EVENTLINE);
+ else
+ d40_config_set_event(d40c, D40_DEACTIVATE_EVENTLINE);
+
+ if (!d40_chan_has_events(d40c) && (command == D40_DMA_STOP))
+ ret = __d40_execute_command_phy(d40c, command);
+
+ break;
+
+ case D40_DMA_RUN:
+
+ d40_config_set_event(d40c, D40_ACTIVATE_EVENTLINE);
+ ret = __d40_execute_command_phy(d40c, command);
+ break;
+
+ case D40_DMA_SUSPENDED:
+ BUG();
+ break;
+ }
+
+ spin_unlock_irqrestore(&d40c->phy_chan->lock, flags);
+ return ret;
+}
+
+static int d40_channel_execute_command(struct d40_chan *d40c,
+ enum d40_command command)
+{
+ if (chan_is_logical(d40c))
+ return __d40_execute_command_log(d40c, command);
+ else
+ return __d40_execute_command_phy(d40c, command);
+}
+
+static u32 d40_get_prmo(struct d40_chan *d40c)
+{
+ static const unsigned int phy_map[] = {
+ [STEDMA40_PCHAN_BASIC_MODE]
+ = D40_DREG_PRMO_PCHAN_BASIC,
+ [STEDMA40_PCHAN_MODULO_MODE]
+ = D40_DREG_PRMO_PCHAN_MODULO,
+ [STEDMA40_PCHAN_DOUBLE_DST_MODE]
+ = D40_DREG_PRMO_PCHAN_DOUBLE_DST,
+ };
+ static const unsigned int log_map[] = {
+ [STEDMA40_LCHAN_SRC_PHY_DST_LOG]
+ = D40_DREG_PRMO_LCHAN_SRC_PHY_DST_LOG,
+ [STEDMA40_LCHAN_SRC_LOG_DST_PHY]
+ = D40_DREG_PRMO_LCHAN_SRC_LOG_DST_PHY,
+ [STEDMA40_LCHAN_SRC_LOG_DST_LOG]
+ = D40_DREG_PRMO_LCHAN_SRC_LOG_DST_LOG,
+ };
+
+ if (chan_is_physical(d40c))
+ return phy_map[d40c->dma_cfg.mode_opt];
+ else
+ return log_map[d40c->dma_cfg.mode_opt];
+}
+
+static void d40_config_write(struct d40_chan *d40c)
+{
+ u32 addr_base;
+ u32 var;
+
+ /* Odd addresses are even addresses + 4 */
+ addr_base = (d40c->phy_chan->num % 2) * 4;
+ /* Setup channel mode to logical or physical */
+ var = ((u32)(chan_is_logical(d40c)) + 1) <<
+ D40_CHAN_POS(d40c->phy_chan->num);
+ writel(var, d40c->base->virtbase + D40_DREG_PRMSE + addr_base);
+
+ /* Setup operational mode option register */
+ var = d40_get_prmo(d40c) << D40_CHAN_POS(d40c->phy_chan->num);
+
+ writel(var, d40c->base->virtbase + D40_DREG_PRMOE + addr_base);
+
+ if (chan_is_logical(d40c)) {
+ int lidx = (d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS)
+ & D40_SREG_ELEM_LOG_LIDX_MASK;
+ void __iomem *chanbase = chan_base(d40c);
+
+ /* Set default config for CFG reg */
+ writel(d40c->src_def_cfg, chanbase + D40_CHAN_REG_SSCFG);
+ writel(d40c->dst_def_cfg, chanbase + D40_CHAN_REG_SDCFG);
+
+ /* Set LIDX for lcla */
+ writel(lidx, chanbase + D40_CHAN_REG_SSELT);
+ writel(lidx, chanbase + D40_CHAN_REG_SDELT);
+
+ /* Clear LNK which will be used by d40_chan_has_events() */
+ writel(0, chanbase + D40_CHAN_REG_SSLNK);
+ writel(0, chanbase + D40_CHAN_REG_SDLNK);
+ }
+}
+
+static u32 d40_residue(struct d40_chan *d40c)
+{
+ u32 num_elt;
+
+ if (chan_is_logical(d40c))
+ num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK)
+ >> D40_MEM_LCSP2_ECNT_POS;
+ else {
+ u32 val = readl(chan_base(d40c) + D40_CHAN_REG_SDELT);
+ num_elt = (val & D40_SREG_ELEM_PHY_ECNT_MASK)
+ >> D40_SREG_ELEM_PHY_ECNT_POS;
+ }
+
+ return num_elt * d40c->dma_cfg.dst_info.data_width;
+}
+
+static bool d40_tx_is_linked(struct d40_chan *d40c)
+{
+ bool is_link;
+
+ if (chan_is_logical(d40c))
+ is_link = readl(&d40c->lcpa->lcsp3) & D40_MEM_LCSP3_DLOS_MASK;
+ else
+ is_link = readl(chan_base(d40c) + D40_CHAN_REG_SDLNK)
+ & D40_SREG_LNK_PHYS_LNK_MASK;
+
+ return is_link;
+}
+
+static int d40_pause(struct dma_chan *chan)
+{
+ struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+ int res = 0;
+ unsigned long flags;
+
+ if (d40c->phy_chan == NULL) {
+ chan_err(d40c, "Channel is not allocated!\n");
+ return -EINVAL;
+ }
+
+ if (!d40c->busy)
+ return 0;
+
+ spin_lock_irqsave(&d40c->lock, flags);
+ pm_runtime_get_sync(d40c->base->dev);
+
+ res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
+
+ pm_runtime_mark_last_busy(d40c->base->dev);
+ pm_runtime_put_autosuspend(d40c->base->dev);
+ spin_unlock_irqrestore(&d40c->lock, flags);
+ return res;
+}
+
+static int d40_resume(struct dma_chan *chan)
+{
+ struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+ int res = 0;
+ unsigned long flags;
+
+ if (d40c->phy_chan == NULL) {
+ chan_err(d40c, "Channel is not allocated!\n");
+ return -EINVAL;
+ }
+
+ if (!d40c->busy)
+ return 0;
+
+ spin_lock_irqsave(&d40c->lock, flags);
+ pm_runtime_get_sync(d40c->base->dev);
+
+ /* If bytes left to transfer or linked tx resume job */
+ if (d40_residue(d40c) || d40_tx_is_linked(d40c))
+ res = d40_channel_execute_command(d40c, D40_DMA_RUN);
+
+ pm_runtime_mark_last_busy(d40c->base->dev);
+ pm_runtime_put_autosuspend(d40c->base->dev);
+ spin_unlock_irqrestore(&d40c->lock, flags);
+ return res;
+}
+
+static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct d40_chan *d40c = container_of(tx->chan,
+ struct d40_chan,
+ chan);
+ struct d40_desc *d40d = container_of(tx, struct d40_desc, txd);
+ unsigned long flags;
+ dma_cookie_t cookie;
+
+ spin_lock_irqsave(&d40c->lock, flags);
+ cookie = dma_cookie_assign(tx);
+ d40_desc_queue(d40c, d40d);
+ spin_unlock_irqrestore(&d40c->lock, flags);
+
+ return cookie;
+}
+
+static int d40_start(struct d40_chan *d40c)
+{
+ return d40_channel_execute_command(d40c, D40_DMA_RUN);
+}
+
+static struct d40_desc *d40_queue_start(struct d40_chan *d40c)
+{
+ struct d40_desc *d40d;
+ int err;
+
+ /* Start queued jobs, if any */
+ d40d = d40_first_queued(d40c);
+
+ if (d40d != NULL) {
+ if (!d40c->busy) {
+ d40c->busy = true;
+ pm_runtime_get_sync(d40c->base->dev);
+ }
+
+ /* Remove from queue */
+ d40_desc_remove(d40d);
+
+ /* Add to active queue */
+ d40_desc_submit(d40c, d40d);
+
+ /* Initiate DMA job */
+ d40_desc_load(d40c, d40d);
+
+ /* Start dma job */
+ err = d40_start(d40c);
+
+ if (err)
+ return NULL;
+ }
+
+ return d40d;
+}
+
+/* called from interrupt context */
+static void dma_tc_handle(struct d40_chan *d40c)
+{
+ struct d40_desc *d40d;
+
+ /* Get first active entry from list */
+ d40d = d40_first_active_get(d40c);
+
+ if (d40d == NULL)
+ return;
+
+ if (d40d->cyclic) {
+ /*
+ * If this was a paritially loaded list, we need to reloaded
+ * it, and only when the list is completed. We need to check
+ * for done because the interrupt will hit for every link, and
+ * not just the last one.
+ */
+ if (d40d->lli_current < d40d->lli_len
+ && !d40_tx_is_linked(d40c)
+ && !d40_residue(d40c)) {
+ d40_lcla_free_all(d40c, d40d);
+ d40_desc_load(d40c, d40d);
+ (void) d40_start(d40c);
+
+ if (d40d->lli_current == d40d->lli_len)
+ d40d->lli_current = 0;
+ }
+ } else {
+ d40_lcla_free_all(d40c, d40d);
+
+ if (d40d->lli_current < d40d->lli_len) {
+ d40_desc_load(d40c, d40d);
+ /* Start dma job */
+ (void) d40_start(d40c);
+ return;
+ }
+
+ if (d40_queue_start(d40c) == NULL) {
+ d40c->busy = false;
+
+ pm_runtime_mark_last_busy(d40c->base->dev);
+ pm_runtime_put_autosuspend(d40c->base->dev);
+ }
+
+ d40_desc_remove(d40d);
+ d40_desc_done(d40c, d40d);
+ }
+
+ d40c->pending_tx++;
+ tasklet_schedule(&d40c->tasklet);
+
+}
+
+static void dma_tasklet(struct tasklet_struct *t)
+{
+ struct d40_chan *d40c = from_tasklet(d40c, t, tasklet);
+ struct d40_desc *d40d;
+ unsigned long flags;
+ bool callback_active;
+ struct dmaengine_desc_callback cb;
+
+ spin_lock_irqsave(&d40c->lock, flags);
+
+ /* Get first entry from the done list */
+ d40d = d40_first_done(d40c);
+ if (d40d == NULL) {
+ /* Check if we have reached here for cyclic job */
+ d40d = d40_first_active_get(d40c);
+ if (d40d == NULL || !d40d->cyclic)
+ goto check_pending_tx;
+ }
+
+ if (!d40d->cyclic)
+ dma_cookie_complete(&d40d->txd);
+
+ /*
+ * If terminating a channel pending_tx is set to zero.
+ * This prevents any finished active jobs to return to the client.
+ */
+ if (d40c->pending_tx == 0) {
+ spin_unlock_irqrestore(&d40c->lock, flags);
+ return;
+ }
+
+ /* Callback to client */
+ callback_active = !!(d40d->txd.flags & DMA_PREP_INTERRUPT);
+ dmaengine_desc_get_callback(&d40d->txd, &cb);
+
+ if (!d40d->cyclic) {
+ if (async_tx_test_ack(&d40d->txd)) {
+ d40_desc_remove(d40d);
+ d40_desc_free(d40c, d40d);
+ } else if (!d40d->is_in_client_list) {
+ d40_desc_remove(d40d);
+ d40_lcla_free_all(d40c, d40d);
+ list_add_tail(&d40d->node, &d40c->client);
+ d40d->is_in_client_list = true;
+ }
+ }
+
+ d40c->pending_tx--;
+
+ if (d40c->pending_tx)
+ tasklet_schedule(&d40c->tasklet);
+
+ spin_unlock_irqrestore(&d40c->lock, flags);
+
+ if (callback_active)
+ dmaengine_desc_callback_invoke(&cb, NULL);
+
+ return;
+ check_pending_tx:
+ /* Rescue manouver if receiving double interrupts */
+ if (d40c->pending_tx > 0)
+ d40c->pending_tx--;
+ spin_unlock_irqrestore(&d40c->lock, flags);
+}
+
+static irqreturn_t d40_handle_interrupt(int irq, void *data)
+{
+ int i;
+ u32 idx;
+ u32 row;
+ long chan = -1;
+ struct d40_chan *d40c;
+ struct d40_base *base = data;
+ u32 *regs = base->regs_interrupt;
+ struct d40_interrupt_lookup *il = base->gen_dmac.il;
+ u32 il_size = base->gen_dmac.il_size;
+
+ spin_lock(&base->interrupt_lock);
+
+ /* Read interrupt status of both logical and physical channels */
+ for (i = 0; i < il_size; i++)
+ regs[i] = readl(base->virtbase + il[i].src);
+
+ for (;;) {
+
+ chan = find_next_bit((unsigned long *)regs,
+ BITS_PER_LONG * il_size, chan + 1);
+
+ /* No more set bits found? */
+ if (chan == BITS_PER_LONG * il_size)
+ break;
+
+ row = chan / BITS_PER_LONG;
+ idx = chan & (BITS_PER_LONG - 1);
+
+ if (il[row].offset == D40_PHY_CHAN)
+ d40c = base->lookup_phy_chans[idx];
+ else
+ d40c = base->lookup_log_chans[il[row].offset + idx];
+
+ if (!d40c) {
+ /*
+ * No error because this can happen if something else
+ * in the system is using the channel.
+ */
+ continue;
+ }
+
+ /* ACK interrupt */
+ writel(BIT(idx), base->virtbase + il[row].clr);
+
+ spin_lock(&d40c->lock);
+
+ if (!il[row].is_error)
+ dma_tc_handle(d40c);
+ else
+ d40_err(base->dev, "IRQ chan: %ld offset %d idx %d\n",
+ chan, il[row].offset, idx);
+
+ spin_unlock(&d40c->lock);
+ }
+
+ spin_unlock(&base->interrupt_lock);
+
+ return IRQ_HANDLED;
+}
+
+static int d40_validate_conf(struct d40_chan *d40c,
+ struct stedma40_chan_cfg *conf)
+{
+ int res = 0;
+ bool is_log = conf->mode == STEDMA40_MODE_LOGICAL;
+
+ if (!conf->dir) {
+ chan_err(d40c, "Invalid direction.\n");
+ res = -EINVAL;
+ }
+
+ if ((is_log && conf->dev_type > d40c->base->num_log_chans) ||
+ (!is_log && conf->dev_type > d40c->base->num_phy_chans) ||
+ (conf->dev_type < 0)) {
+ chan_err(d40c, "Invalid device type (%d)\n", conf->dev_type);
+ res = -EINVAL;
+ }
+
+ if (conf->dir == DMA_DEV_TO_DEV) {
+ /*
+ * DMAC HW supports it. Will be added to this driver,
+ * in case any dma client requires it.
+ */
+ chan_err(d40c, "periph to periph not supported\n");
+ res = -EINVAL;
+ }
+
+ if (d40_psize_2_burst_size(is_log, conf->src_info.psize) *
+ conf->src_info.data_width !=
+ d40_psize_2_burst_size(is_log, conf->dst_info.psize) *
+ conf->dst_info.data_width) {
+ /*
+ * The DMAC hardware only supports
+ * src (burst x width) == dst (burst x width)
+ */
+
+ chan_err(d40c, "src (burst x width) != dst (burst x width)\n");
+ res = -EINVAL;
+ }
+
+ return res;
+}
+
+static bool d40_alloc_mask_set(struct d40_phy_res *phy,
+ bool is_src, int log_event_line, bool is_log,
+ bool *first_user)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&phy->lock, flags);
+
+ *first_user = ((phy->allocated_src | phy->allocated_dst)
+ == D40_ALLOC_FREE);
+
+ if (!is_log) {
+ /* Physical interrupts are masked per physical full channel */
+ if (phy->allocated_src == D40_ALLOC_FREE &&
+ phy->allocated_dst == D40_ALLOC_FREE) {
+ phy->allocated_dst = D40_ALLOC_PHY;
+ phy->allocated_src = D40_ALLOC_PHY;
+ goto found_unlock;
+ } else
+ goto not_found_unlock;
+ }
+
+ /* Logical channel */
+ if (is_src) {
+ if (phy->allocated_src == D40_ALLOC_PHY)
+ goto not_found_unlock;
+
+ if (phy->allocated_src == D40_ALLOC_FREE)
+ phy->allocated_src = D40_ALLOC_LOG_FREE;
+
+ if (!(phy->allocated_src & BIT(log_event_line))) {
+ phy->allocated_src |= BIT(log_event_line);
+ goto found_unlock;
+ } else
+ goto not_found_unlock;
+ } else {
+ if (phy->allocated_dst == D40_ALLOC_PHY)
+ goto not_found_unlock;
+
+ if (phy->allocated_dst == D40_ALLOC_FREE)
+ phy->allocated_dst = D40_ALLOC_LOG_FREE;
+
+ if (!(phy->allocated_dst & BIT(log_event_line))) {
+ phy->allocated_dst |= BIT(log_event_line);
+ goto found_unlock;
+ }
+ }
+ not_found_unlock:
+ spin_unlock_irqrestore(&phy->lock, flags);
+ return false;
+ found_unlock:
+ spin_unlock_irqrestore(&phy->lock, flags);
+ return true;
+}
+
+static bool d40_alloc_mask_free(struct d40_phy_res *phy, bool is_src,
+ int log_event_line)
+{
+ unsigned long flags;
+ bool is_free = false;
+
+ spin_lock_irqsave(&phy->lock, flags);
+ if (!log_event_line) {
+ phy->allocated_dst = D40_ALLOC_FREE;
+ phy->allocated_src = D40_ALLOC_FREE;
+ is_free = true;
+ goto unlock;
+ }
+
+ /* Logical channel */
+ if (is_src) {
+ phy->allocated_src &= ~BIT(log_event_line);
+ if (phy->allocated_src == D40_ALLOC_LOG_FREE)
+ phy->allocated_src = D40_ALLOC_FREE;
+ } else {
+ phy->allocated_dst &= ~BIT(log_event_line);
+ if (phy->allocated_dst == D40_ALLOC_LOG_FREE)
+ phy->allocated_dst = D40_ALLOC_FREE;
+ }
+
+ is_free = ((phy->allocated_src | phy->allocated_dst) ==
+ D40_ALLOC_FREE);
+ unlock:
+ spin_unlock_irqrestore(&phy->lock, flags);
+
+ return is_free;
+}
+
+static int d40_allocate_channel(struct d40_chan *d40c, bool *first_phy_user)
+{
+ int dev_type = d40c->dma_cfg.dev_type;
+ int event_group;
+ int event_line;
+ struct d40_phy_res *phys;
+ int i;
+ int j;
+ int log_num;
+ int num_phy_chans;
+ bool is_src;
+ bool is_log = d40c->dma_cfg.mode == STEDMA40_MODE_LOGICAL;
+
+ phys = d40c->base->phy_res;
+ num_phy_chans = d40c->base->num_phy_chans;
+
+ if (d40c->dma_cfg.dir == DMA_DEV_TO_MEM) {
+ log_num = 2 * dev_type;
+ is_src = true;
+ } else if (d40c->dma_cfg.dir == DMA_MEM_TO_DEV ||
+ d40c->dma_cfg.dir == DMA_MEM_TO_MEM) {
+ /* dst event lines are used for logical memcpy */
+ log_num = 2 * dev_type + 1;
+ is_src = false;
+ } else
+ return -EINVAL;
+
+ event_group = D40_TYPE_TO_GROUP(dev_type);
+ event_line = D40_TYPE_TO_EVENT(dev_type);
+
+ if (!is_log) {
+ if (d40c->dma_cfg.dir == DMA_MEM_TO_MEM) {
+ /* Find physical half channel */
+ if (d40c->dma_cfg.use_fixed_channel) {
+ i = d40c->dma_cfg.phy_channel;
+ if (d40_alloc_mask_set(&phys[i], is_src,
+ 0, is_log,
+ first_phy_user))
+ goto found_phy;
+ } else {
+ for (i = 0; i < num_phy_chans; i++) {
+ if (d40_alloc_mask_set(&phys[i], is_src,
+ 0, is_log,
+ first_phy_user))
+ goto found_phy;
+ }
+ }
+ } else
+ for (j = 0; j < d40c->base->num_phy_chans; j += 8) {
+ int phy_num = j + event_group * 2;
+ for (i = phy_num; i < phy_num + 2; i++) {
+ if (d40_alloc_mask_set(&phys[i],
+ is_src,
+ 0,
+ is_log,
+ first_phy_user))
+ goto found_phy;
+ }
+ }
+ return -EINVAL;
+found_phy:
+ d40c->phy_chan = &phys[i];
+ d40c->log_num = D40_PHY_CHAN;
+ goto out;
+ }
+ if (dev_type == -1)
+ return -EINVAL;
+
+ /* Find logical channel */
+ for (j = 0; j < d40c->base->num_phy_chans; j += 8) {
+ int phy_num = j + event_group * 2;
+
+ if (d40c->dma_cfg.use_fixed_channel) {
+ i = d40c->dma_cfg.phy_channel;
+
+ if ((i != phy_num) && (i != phy_num + 1)) {
+ dev_err(chan2dev(d40c),
+ "invalid fixed phy channel %d\n", i);
+ return -EINVAL;
+ }
+
+ if (d40_alloc_mask_set(&phys[i], is_src, event_line,
+ is_log, first_phy_user))
+ goto found_log;
+
+ dev_err(chan2dev(d40c),
+ "could not allocate fixed phy channel %d\n", i);
+ return -EINVAL;
+ }
+
+ /*
+ * Spread logical channels across all available physical rather
+ * than pack every logical channel at the first available phy
+ * channels.
+ */
+ if (is_src) {
+ for (i = phy_num; i < phy_num + 2; i++) {
+ if (d40_alloc_mask_set(&phys[i], is_src,
+ event_line, is_log,
+ first_phy_user))
+ goto found_log;
+ }
+ } else {
+ for (i = phy_num + 1; i >= phy_num; i--) {
+ if (d40_alloc_mask_set(&phys[i], is_src,
+ event_line, is_log,
+ first_phy_user))
+ goto found_log;
+ }
+ }
+ }
+ return -EINVAL;
+
+found_log:
+ d40c->phy_chan = &phys[i];
+ d40c->log_num = log_num;
+out:
+
+ if (is_log)
+ d40c->base->lookup_log_chans[d40c->log_num] = d40c;
+ else
+ d40c->base->lookup_phy_chans[d40c->phy_chan->num] = d40c;
+
+ return 0;
+
+}
+
+static int d40_config_memcpy(struct d40_chan *d40c)
+{
+ dma_cap_mask_t cap = d40c->chan.device->cap_mask;
+
+ if (dma_has_cap(DMA_MEMCPY, cap) && !dma_has_cap(DMA_SLAVE, cap)) {
+ d40c->dma_cfg = dma40_memcpy_conf_log;
+ d40c->dma_cfg.dev_type = dma40_memcpy_channels[d40c->chan.chan_id];
+
+ d40_log_cfg(&d40c->dma_cfg,
+ &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
+
+ } else if (dma_has_cap(DMA_MEMCPY, cap) &&
+ dma_has_cap(DMA_SLAVE, cap)) {
+ d40c->dma_cfg = dma40_memcpy_conf_phy;
+
+ /* Generate interrupt at end of transfer or relink. */
+ d40c->dst_def_cfg |= BIT(D40_SREG_CFG_TIM_POS);
+
+ /* Generate interrupt on error. */
+ d40c->src_def_cfg |= BIT(D40_SREG_CFG_EIM_POS);
+ d40c->dst_def_cfg |= BIT(D40_SREG_CFG_EIM_POS);
+
+ } else {
+ chan_err(d40c, "No memcpy\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int d40_free_dma(struct d40_chan *d40c)
+{
+
+ int res = 0;
+ u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dev_type);
+ struct d40_phy_res *phy = d40c->phy_chan;
+ bool is_src;
+
+ /* Terminate all queued and active transfers */
+ d40_term_all(d40c);
+
+ if (phy == NULL) {
+ chan_err(d40c, "phy == null\n");
+ return -EINVAL;
+ }
+
+ if (phy->allocated_src == D40_ALLOC_FREE &&
+ phy->allocated_dst == D40_ALLOC_FREE) {
+ chan_err(d40c, "channel already free\n");
+ return -EINVAL;
+ }
+
+ if (d40c->dma_cfg.dir == DMA_MEM_TO_DEV ||
+ d40c->dma_cfg.dir == DMA_MEM_TO_MEM)
+ is_src = false;
+ else if (d40c->dma_cfg.dir == DMA_DEV_TO_MEM)
+ is_src = true;
+ else {
+ chan_err(d40c, "Unknown direction\n");
+ return -EINVAL;
+ }
+
+ pm_runtime_get_sync(d40c->base->dev);
+ res = d40_channel_execute_command(d40c, D40_DMA_STOP);
+ if (res) {
+ chan_err(d40c, "stop failed\n");
+ goto mark_last_busy;
+ }
+
+ d40_alloc_mask_free(phy, is_src, chan_is_logical(d40c) ? event : 0);
+
+ if (chan_is_logical(d40c))
+ d40c->base->lookup_log_chans[d40c->log_num] = NULL;
+ else
+ d40c->base->lookup_phy_chans[phy->num] = NULL;
+
+ if (d40c->busy) {
+ pm_runtime_mark_last_busy(d40c->base->dev);
+ pm_runtime_put_autosuspend(d40c->base->dev);
+ }
+
+ d40c->busy = false;
+ d40c->phy_chan = NULL;
+ d40c->configured = false;
+ mark_last_busy:
+ pm_runtime_mark_last_busy(d40c->base->dev);
+ pm_runtime_put_autosuspend(d40c->base->dev);
+ return res;
+}
+
+static bool d40_is_paused(struct d40_chan *d40c)
+{
+ void __iomem *chanbase = chan_base(d40c);
+ bool is_paused = false;
+ unsigned long flags;
+ void __iomem *active_reg;
+ u32 status;
+ u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dev_type);
+
+ spin_lock_irqsave(&d40c->lock, flags);
+
+ if (chan_is_physical(d40c)) {
+ if (d40c->phy_chan->num % 2 == 0)
+ active_reg = d40c->base->virtbase + D40_DREG_ACTIVE;
+ else
+ active_reg = d40c->base->virtbase + D40_DREG_ACTIVO;
+
+ status = (readl(active_reg) &
+ D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
+ D40_CHAN_POS(d40c->phy_chan->num);
+ if (status == D40_DMA_SUSPENDED || status == D40_DMA_STOP)
+ is_paused = true;
+ goto unlock;
+ }
+
+ if (d40c->dma_cfg.dir == DMA_MEM_TO_DEV ||
+ d40c->dma_cfg.dir == DMA_MEM_TO_MEM) {
+ status = readl(chanbase + D40_CHAN_REG_SDLNK);
+ } else if (d40c->dma_cfg.dir == DMA_DEV_TO_MEM) {
+ status = readl(chanbase + D40_CHAN_REG_SSLNK);
+ } else {
+ chan_err(d40c, "Unknown direction\n");
+ goto unlock;
+ }
+
+ status = (status & D40_EVENTLINE_MASK(event)) >>
+ D40_EVENTLINE_POS(event);
+
+ if (status != D40_DMA_RUN)
+ is_paused = true;
+ unlock:
+ spin_unlock_irqrestore(&d40c->lock, flags);
+ return is_paused;
+
+}
+
+static u32 stedma40_residue(struct dma_chan *chan)
+{
+ struct d40_chan *d40c =
+ container_of(chan, struct d40_chan, chan);
+ u32 bytes_left;
+ unsigned long flags;
+
+ spin_lock_irqsave(&d40c->lock, flags);
+ bytes_left = d40_residue(d40c);
+ spin_unlock_irqrestore(&d40c->lock, flags);
+
+ return bytes_left;
+}
+
+static int
+d40_prep_sg_log(struct d40_chan *chan, struct d40_desc *desc,
+ struct scatterlist *sg_src, struct scatterlist *sg_dst,
+ unsigned int sg_len, dma_addr_t src_dev_addr,
+ dma_addr_t dst_dev_addr)
+{
+ struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+ struct stedma40_half_channel_info *src_info = &cfg->src_info;
+ struct stedma40_half_channel_info *dst_info = &cfg->dst_info;
+ int ret;
+
+ ret = d40_log_sg_to_lli(sg_src, sg_len,
+ src_dev_addr,
+ desc->lli_log.src,
+ chan->log_def.lcsp1,
+ src_info->data_width,
+ dst_info->data_width);
+
+ ret = d40_log_sg_to_lli(sg_dst, sg_len,
+ dst_dev_addr,
+ desc->lli_log.dst,
+ chan->log_def.lcsp3,
+ dst_info->data_width,
+ src_info->data_width);
+
+ return ret < 0 ? ret : 0;
+}
+
+static int
+d40_prep_sg_phy(struct d40_chan *chan, struct d40_desc *desc,
+ struct scatterlist *sg_src, struct scatterlist *sg_dst,
+ unsigned int sg_len, dma_addr_t src_dev_addr,
+ dma_addr_t dst_dev_addr)
+{
+ struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+ struct stedma40_half_channel_info *src_info = &cfg->src_info;
+ struct stedma40_half_channel_info *dst_info = &cfg->dst_info;
+ unsigned long flags = 0;
+ int ret;
+
+ if (desc->cyclic)
+ flags |= LLI_CYCLIC | LLI_TERM_INT;
+
+ ret = d40_phy_sg_to_lli(sg_src, sg_len, src_dev_addr,
+ desc->lli_phy.src,
+ virt_to_phys(desc->lli_phy.src),
+ chan->src_def_cfg,
+ src_info, dst_info, flags);
+
+ ret = d40_phy_sg_to_lli(sg_dst, sg_len, dst_dev_addr,
+ desc->lli_phy.dst,
+ virt_to_phys(desc->lli_phy.dst),
+ chan->dst_def_cfg,
+ dst_info, src_info, flags);
+
+ dma_sync_single_for_device(chan->base->dev, desc->lli_pool.dma_addr,
+ desc->lli_pool.size, DMA_TO_DEVICE);
+
+ return ret < 0 ? ret : 0;
+}
+
+static struct d40_desc *
+d40_prep_desc(struct d40_chan *chan, struct scatterlist *sg,
+ unsigned int sg_len, unsigned long dma_flags)
+{
+ struct stedma40_chan_cfg *cfg;
+ struct d40_desc *desc;
+ int ret;
+
+ desc = d40_desc_get(chan);
+ if (!desc)
+ return NULL;
+
+ cfg = &chan->dma_cfg;
+ desc->lli_len = d40_sg_2_dmalen(sg, sg_len, cfg->src_info.data_width,
+ cfg->dst_info.data_width);
+ if (desc->lli_len < 0) {
+ chan_err(chan, "Unaligned size\n");
+ goto free_desc;
+ }
+
+ ret = d40_pool_lli_alloc(chan, desc, desc->lli_len);
+ if (ret < 0) {
+ chan_err(chan, "Could not allocate lli\n");
+ goto free_desc;
+ }
+
+ desc->lli_current = 0;
+ desc->txd.flags = dma_flags;
+ desc->txd.tx_submit = d40_tx_submit;
+
+ dma_async_tx_descriptor_init(&desc->txd, &chan->chan);
+
+ return desc;
+ free_desc:
+ d40_desc_free(chan, desc);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
+ struct scatterlist *sg_dst, unsigned int sg_len,
+ enum dma_transfer_direction direction, unsigned long dma_flags)
+{
+ struct d40_chan *chan = container_of(dchan, struct d40_chan, chan);
+ dma_addr_t src_dev_addr;
+ dma_addr_t dst_dev_addr;
+ struct d40_desc *desc;
+ unsigned long flags;
+ int ret;
+
+ if (!chan->phy_chan) {
+ chan_err(chan, "Cannot prepare unallocated channel\n");
+ return NULL;
+ }
+
+ d40_set_runtime_config_write(dchan, &chan->slave_config, direction);
+
+ spin_lock_irqsave(&chan->lock, flags);
+
+ desc = d40_prep_desc(chan, sg_src, sg_len, dma_flags);
+ if (desc == NULL)
+ goto unlock;
+
+ if (sg_next(&sg_src[sg_len - 1]) == sg_src)
+ desc->cyclic = true;
+
+ src_dev_addr = 0;
+ dst_dev_addr = 0;
+ if (direction == DMA_DEV_TO_MEM)
+ src_dev_addr = chan->runtime_addr;
+ else if (direction == DMA_MEM_TO_DEV)
+ dst_dev_addr = chan->runtime_addr;
+
+ if (chan_is_logical(chan))
+ ret = d40_prep_sg_log(chan, desc, sg_src, sg_dst,
+ sg_len, src_dev_addr, dst_dev_addr);
+ else
+ ret = d40_prep_sg_phy(chan, desc, sg_src, sg_dst,
+ sg_len, src_dev_addr, dst_dev_addr);
+
+ if (ret) {
+ chan_err(chan, "Failed to prepare %s sg job: %d\n",
+ chan_is_logical(chan) ? "log" : "phy", ret);
+ goto free_desc;
+ }
+
+ /*
+ * add descriptor to the prepare queue in order to be able
+ * to free them later in terminate_all
+ */
+ list_add_tail(&desc->node, &chan->prepare_queue);
+
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ return &desc->txd;
+ free_desc:
+ d40_desc_free(chan, desc);
+ unlock:
+ spin_unlock_irqrestore(&chan->lock, flags);
+ return NULL;
+}
+
+bool stedma40_filter(struct dma_chan *chan, void *data)
+{
+ struct stedma40_chan_cfg *info = data;
+ struct d40_chan *d40c =
+ container_of(chan, struct d40_chan, chan);
+ int err;
+
+ if (data) {
+ err = d40_validate_conf(d40c, info);
+ if (!err)
+ d40c->dma_cfg = *info;
+ } else
+ err = d40_config_memcpy(d40c);
+
+ if (!err)
+ d40c->configured = true;
+
+ return err == 0;
+}
+EXPORT_SYMBOL(stedma40_filter);
+
+static void __d40_set_prio_rt(struct d40_chan *d40c, int dev_type, bool src)
+{
+ bool realtime = d40c->dma_cfg.realtime;
+ bool highprio = d40c->dma_cfg.high_priority;
+ u32 rtreg;
+ u32 event = D40_TYPE_TO_EVENT(dev_type);
+ u32 group = D40_TYPE_TO_GROUP(dev_type);
+ u32 bit = BIT(event);
+ u32 prioreg;
+ struct d40_gen_dmac *dmac = &d40c->base->gen_dmac;
+
+ rtreg = realtime ? dmac->realtime_en : dmac->realtime_clear;
+ /*
+ * Due to a hardware bug, in some cases a logical channel triggered by
+ * a high priority destination event line can generate extra packet
+ * transactions.
+ *
+ * The workaround is to not set the high priority level for the
+ * destination event lines that trigger logical channels.
+ */
+ if (!src && chan_is_logical(d40c))
+ highprio = false;
+
+ prioreg = highprio ? dmac->high_prio_en : dmac->high_prio_clear;
+
+ /* Destination event lines are stored in the upper halfword */
+ if (!src)
+ bit <<= 16;
+
+ writel(bit, d40c->base->virtbase + prioreg + group * 4);
+ writel(bit, d40c->base->virtbase + rtreg + group * 4);
+}
+
+static void d40_set_prio_realtime(struct d40_chan *d40c)
+{
+ if (d40c->base->rev < 3)
+ return;
+
+ if ((d40c->dma_cfg.dir == DMA_DEV_TO_MEM) ||
+ (d40c->dma_cfg.dir == DMA_DEV_TO_DEV))
+ __d40_set_prio_rt(d40c, d40c->dma_cfg.dev_type, true);
+
+ if ((d40c->dma_cfg.dir == DMA_MEM_TO_DEV) ||
+ (d40c->dma_cfg.dir == DMA_DEV_TO_DEV))
+ __d40_set_prio_rt(d40c, d40c->dma_cfg.dev_type, false);
+}
+
+#define D40_DT_FLAGS_MODE(flags) ((flags >> 0) & 0x1)
+#define D40_DT_FLAGS_DIR(flags) ((flags >> 1) & 0x1)
+#define D40_DT_FLAGS_BIG_ENDIAN(flags) ((flags >> 2) & 0x1)
+#define D40_DT_FLAGS_FIXED_CHAN(flags) ((flags >> 3) & 0x1)
+#define D40_DT_FLAGS_HIGH_PRIO(flags) ((flags >> 4) & 0x1)
+
+static struct dma_chan *d40_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct stedma40_chan_cfg cfg;
+ dma_cap_mask_t cap;
+ u32 flags;
+
+ memset(&cfg, 0, sizeof(struct stedma40_chan_cfg));
+
+ dma_cap_zero(cap);
+ dma_cap_set(DMA_SLAVE, cap);
+
+ cfg.dev_type = dma_spec->args[0];
+ flags = dma_spec->args[2];
+
+ switch (D40_DT_FLAGS_MODE(flags)) {
+ case 0: cfg.mode = STEDMA40_MODE_LOGICAL; break;
+ case 1: cfg.mode = STEDMA40_MODE_PHYSICAL; break;
+ }
+
+ switch (D40_DT_FLAGS_DIR(flags)) {
+ case 0:
+ cfg.dir = DMA_MEM_TO_DEV;
+ cfg.dst_info.big_endian = D40_DT_FLAGS_BIG_ENDIAN(flags);
+ break;
+ case 1:
+ cfg.dir = DMA_DEV_TO_MEM;
+ cfg.src_info.big_endian = D40_DT_FLAGS_BIG_ENDIAN(flags);
+ break;
+ }
+
+ if (D40_DT_FLAGS_FIXED_CHAN(flags)) {
+ cfg.phy_channel = dma_spec->args[1];
+ cfg.use_fixed_channel = true;
+ }
+
+ if (D40_DT_FLAGS_HIGH_PRIO(flags))
+ cfg.high_priority = true;
+
+ return dma_request_channel(cap, stedma40_filter, &cfg);
+}
+
+/* DMA ENGINE functions */
+static int d40_alloc_chan_resources(struct dma_chan *chan)
+{
+ int err;
+ unsigned long flags;
+ struct d40_chan *d40c =
+ container_of(chan, struct d40_chan, chan);
+ bool is_free_phy;
+ spin_lock_irqsave(&d40c->lock, flags);
+
+ dma_cookie_init(chan);
+
+ /* If no dma configuration is set use default configuration (memcpy) */
+ if (!d40c->configured) {
+ err = d40_config_memcpy(d40c);
+ if (err) {
+ chan_err(d40c, "Failed to configure memcpy channel\n");
+ goto mark_last_busy;
+ }
+ }
+
+ err = d40_allocate_channel(d40c, &is_free_phy);
+ if (err) {
+ chan_err(d40c, "Failed to allocate channel\n");
+ d40c->configured = false;
+ goto mark_last_busy;
+ }
+
+ pm_runtime_get_sync(d40c->base->dev);
+
+ d40_set_prio_realtime(d40c);
+
+ if (chan_is_logical(d40c)) {
+ if (d40c->dma_cfg.dir == DMA_DEV_TO_MEM)
+ d40c->lcpa = d40c->base->lcpa_base +
+ d40c->dma_cfg.dev_type * D40_LCPA_CHAN_SIZE;
+ else
+ d40c->lcpa = d40c->base->lcpa_base +
+ d40c->dma_cfg.dev_type *
+ D40_LCPA_CHAN_SIZE + D40_LCPA_CHAN_DST_DELTA;
+
+ /* Unmask the Global Interrupt Mask. */
+ d40c->src_def_cfg |= BIT(D40_SREG_CFG_LOG_GIM_POS);
+ d40c->dst_def_cfg |= BIT(D40_SREG_CFG_LOG_GIM_POS);
+ }
+
+ dev_dbg(chan2dev(d40c), "allocated %s channel (phy %d%s)\n",
+ chan_is_logical(d40c) ? "logical" : "physical",
+ d40c->phy_chan->num,
+ d40c->dma_cfg.use_fixed_channel ? ", fixed" : "");
+
+
+ /*
+ * Only write channel configuration to the DMA if the physical
+ * resource is free. In case of multiple logical channels
+ * on the same physical resource, only the first write is necessary.
+ */
+ if (is_free_phy)
+ d40_config_write(d40c);
+ mark_last_busy:
+ pm_runtime_mark_last_busy(d40c->base->dev);
+ pm_runtime_put_autosuspend(d40c->base->dev);
+ spin_unlock_irqrestore(&d40c->lock, flags);
+ return err;
+}
+
+static void d40_free_chan_resources(struct dma_chan *chan)
+{
+ struct d40_chan *d40c =
+ container_of(chan, struct d40_chan, chan);
+ int err;
+ unsigned long flags;
+
+ if (d40c->phy_chan == NULL) {
+ chan_err(d40c, "Cannot free unallocated channel\n");
+ return;
+ }
+
+ spin_lock_irqsave(&d40c->lock, flags);
+
+ err = d40_free_dma(d40c);
+
+ if (err)
+ chan_err(d40c, "Failed to free channel\n");
+ spin_unlock_irqrestore(&d40c->lock, flags);
+}
+
+static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
+ dma_addr_t dst,
+ dma_addr_t src,
+ size_t size,
+ unsigned long dma_flags)
+{
+ struct scatterlist dst_sg;
+ struct scatterlist src_sg;
+
+ sg_init_table(&dst_sg, 1);
+ sg_init_table(&src_sg, 1);
+
+ sg_dma_address(&dst_sg) = dst;
+ sg_dma_address(&src_sg) = src;
+
+ sg_dma_len(&dst_sg) = size;
+ sg_dma_len(&src_sg) = size;
+
+ return d40_prep_sg(chan, &src_sg, &dst_sg, 1,
+ DMA_MEM_TO_MEM, dma_flags);
+}
+
+static struct dma_async_tx_descriptor *
+d40_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long dma_flags, void *context)
+{
+ if (!is_slave_direction(direction))
+ return NULL;
+
+ return d40_prep_sg(chan, sgl, sgl, sg_len, direction, dma_flags);
+}
+
+static struct dma_async_tx_descriptor *
+dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
+ size_t buf_len, size_t period_len,
+ enum dma_transfer_direction direction, unsigned long flags)
+{
+ unsigned int periods = buf_len / period_len;
+ struct dma_async_tx_descriptor *txd;
+ struct scatterlist *sg;
+ int i;
+
+ sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_NOWAIT);
+ if (!sg)
+ return NULL;
+
+ for (i = 0; i < periods; i++) {
+ sg_dma_address(&sg[i]) = dma_addr;
+ sg_dma_len(&sg[i]) = period_len;
+ dma_addr += period_len;
+ }
+
+ sg_chain(sg, periods + 1, sg);
+
+ txd = d40_prep_sg(chan, sg, sg, periods, direction,
+ DMA_PREP_INTERRUPT);
+
+ kfree(sg);
+
+ return txd;
+}
+
+static enum dma_status d40_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+ enum dma_status ret;
+
+ if (d40c->phy_chan == NULL) {
+ chan_err(d40c, "Cannot read status of unallocated channel\n");
+ return -EINVAL;
+ }
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret != DMA_COMPLETE && txstate)
+ dma_set_residue(txstate, stedma40_residue(chan));
+
+ if (d40_is_paused(d40c))
+ ret = DMA_PAUSED;
+
+ return ret;
+}
+
+static void d40_issue_pending(struct dma_chan *chan)
+{
+ struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+ unsigned long flags;
+
+ if (d40c->phy_chan == NULL) {
+ chan_err(d40c, "Channel is not allocated!\n");
+ return;
+ }
+
+ spin_lock_irqsave(&d40c->lock, flags);
+
+ list_splice_tail_init(&d40c->pending_queue, &d40c->queue);
+
+ /* Busy means that queued jobs are already being processed */
+ if (!d40c->busy)
+ (void) d40_queue_start(d40c);
+
+ spin_unlock_irqrestore(&d40c->lock, flags);
+}
+
+static int d40_terminate_all(struct dma_chan *chan)
+{
+ unsigned long flags;
+ struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+ int ret;
+
+ if (d40c->phy_chan == NULL) {
+ chan_err(d40c, "Channel is not allocated!\n");
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&d40c->lock, flags);
+
+ pm_runtime_get_sync(d40c->base->dev);
+ ret = d40_channel_execute_command(d40c, D40_DMA_STOP);
+ if (ret)
+ chan_err(d40c, "Failed to stop channel\n");
+
+ d40_term_all(d40c);
+ pm_runtime_mark_last_busy(d40c->base->dev);
+ pm_runtime_put_autosuspend(d40c->base->dev);
+ if (d40c->busy) {
+ pm_runtime_mark_last_busy(d40c->base->dev);
+ pm_runtime_put_autosuspend(d40c->base->dev);
+ }
+ d40c->busy = false;
+
+ spin_unlock_irqrestore(&d40c->lock, flags);
+ return 0;
+}
+
+static int
+dma40_config_to_halfchannel(struct d40_chan *d40c,
+ struct stedma40_half_channel_info *info,
+ u32 maxburst)
+{
+ int psize;
+
+ if (chan_is_logical(d40c)) {
+ if (maxburst >= 16)
+ psize = STEDMA40_PSIZE_LOG_16;
+ else if (maxburst >= 8)
+ psize = STEDMA40_PSIZE_LOG_8;
+ else if (maxburst >= 4)
+ psize = STEDMA40_PSIZE_LOG_4;
+ else
+ psize = STEDMA40_PSIZE_LOG_1;
+ } else {
+ if (maxburst >= 16)
+ psize = STEDMA40_PSIZE_PHY_16;
+ else if (maxburst >= 8)
+ psize = STEDMA40_PSIZE_PHY_8;
+ else if (maxburst >= 4)
+ psize = STEDMA40_PSIZE_PHY_4;
+ else
+ psize = STEDMA40_PSIZE_PHY_1;
+ }
+
+ info->psize = psize;
+ info->flow_ctrl = STEDMA40_NO_FLOW_CTRL;
+
+ return 0;
+}
+
+static int d40_set_runtime_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+
+ memcpy(&d40c->slave_config, config, sizeof(*config));
+
+ return 0;
+}
+
+/* Runtime reconfiguration extension */
+static int d40_set_runtime_config_write(struct dma_chan *chan,
+ struct dma_slave_config *config,
+ enum dma_transfer_direction direction)
+{
+ struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+ struct stedma40_chan_cfg *cfg = &d40c->dma_cfg;
+ enum dma_slave_buswidth src_addr_width, dst_addr_width;
+ dma_addr_t config_addr;
+ u32 src_maxburst, dst_maxburst;
+ int ret;
+
+ if (d40c->phy_chan == NULL) {
+ chan_err(d40c, "Channel is not allocated!\n");
+ return -EINVAL;
+ }
+
+ src_addr_width = config->src_addr_width;
+ src_maxburst = config->src_maxburst;
+ dst_addr_width = config->dst_addr_width;
+ dst_maxburst = config->dst_maxburst;
+
+ if (direction == DMA_DEV_TO_MEM) {
+ config_addr = config->src_addr;
+
+ if (cfg->dir != DMA_DEV_TO_MEM)
+ dev_dbg(d40c->base->dev,
+ "channel was not configured for peripheral "
+ "to memory transfer (%d) overriding\n",
+ cfg->dir);
+ cfg->dir = DMA_DEV_TO_MEM;
+
+ /* Configure the memory side */
+ if (dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+ dst_addr_width = src_addr_width;
+ if (dst_maxburst == 0)
+ dst_maxburst = src_maxburst;
+
+ } else if (direction == DMA_MEM_TO_DEV) {
+ config_addr = config->dst_addr;
+
+ if (cfg->dir != DMA_MEM_TO_DEV)
+ dev_dbg(d40c->base->dev,
+ "channel was not configured for memory "
+ "to peripheral transfer (%d) overriding\n",
+ cfg->dir);
+ cfg->dir = DMA_MEM_TO_DEV;
+
+ /* Configure the memory side */
+ if (src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+ src_addr_width = dst_addr_width;
+ if (src_maxburst == 0)
+ src_maxburst = dst_maxburst;
+ } else {
+ dev_err(d40c->base->dev,
+ "unrecognized channel direction %d\n",
+ direction);
+ return -EINVAL;
+ }
+
+ if (config_addr <= 0) {
+ dev_err(d40c->base->dev, "no address supplied\n");
+ return -EINVAL;
+ }
+
+ if (src_maxburst * src_addr_width != dst_maxburst * dst_addr_width) {
+ dev_err(d40c->base->dev,
+ "src/dst width/maxburst mismatch: %d*%d != %d*%d\n",
+ src_maxburst,
+ src_addr_width,
+ dst_maxburst,
+ dst_addr_width);
+ return -EINVAL;
+ }
+
+ if (src_maxburst > 16) {
+ src_maxburst = 16;
+ dst_maxburst = src_maxburst * src_addr_width / dst_addr_width;
+ } else if (dst_maxburst > 16) {
+ dst_maxburst = 16;
+ src_maxburst = dst_maxburst * dst_addr_width / src_addr_width;
+ }
+
+ /* Only valid widths are; 1, 2, 4 and 8. */
+ if (src_addr_width <= DMA_SLAVE_BUSWIDTH_UNDEFINED ||
+ src_addr_width > DMA_SLAVE_BUSWIDTH_8_BYTES ||
+ dst_addr_width <= DMA_SLAVE_BUSWIDTH_UNDEFINED ||
+ dst_addr_width > DMA_SLAVE_BUSWIDTH_8_BYTES ||
+ !is_power_of_2(src_addr_width) ||
+ !is_power_of_2(dst_addr_width))
+ return -EINVAL;
+
+ cfg->src_info.data_width = src_addr_width;
+ cfg->dst_info.data_width = dst_addr_width;
+
+ ret = dma40_config_to_halfchannel(d40c, &cfg->src_info,
+ src_maxburst);
+ if (ret)
+ return ret;
+
+ ret = dma40_config_to_halfchannel(d40c, &cfg->dst_info,
+ dst_maxburst);
+ if (ret)
+ return ret;
+
+ /* Fill in register values */
+ if (chan_is_logical(d40c))
+ d40_log_cfg(cfg, &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
+ else
+ d40_phy_cfg(cfg, &d40c->src_def_cfg, &d40c->dst_def_cfg);
+
+ /* These settings will take precedence later */
+ d40c->runtime_addr = config_addr;
+ d40c->runtime_direction = direction;
+ dev_dbg(d40c->base->dev,
+ "configured channel %s for %s, data width %d/%d, "
+ "maxburst %d/%d elements, LE, no flow control\n",
+ dma_chan_name(chan),
+ (direction == DMA_DEV_TO_MEM) ? "RX" : "TX",
+ src_addr_width, dst_addr_width,
+ src_maxburst, dst_maxburst);
+
+ return 0;
+}
+
+/* Initialization functions */
+
+static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma,
+ struct d40_chan *chans, int offset,
+ int num_chans)
+{
+ int i = 0;
+ struct d40_chan *d40c;
+
+ INIT_LIST_HEAD(&dma->channels);
+
+ for (i = offset; i < offset + num_chans; i++) {
+ d40c = &chans[i];
+ d40c->base = base;
+ d40c->chan.device = dma;
+
+ spin_lock_init(&d40c->lock);
+
+ d40c->log_num = D40_PHY_CHAN;
+
+ INIT_LIST_HEAD(&d40c->done);
+ INIT_LIST_HEAD(&d40c->active);
+ INIT_LIST_HEAD(&d40c->queue);
+ INIT_LIST_HEAD(&d40c->pending_queue);
+ INIT_LIST_HEAD(&d40c->client);
+ INIT_LIST_HEAD(&d40c->prepare_queue);
+
+ tasklet_setup(&d40c->tasklet, dma_tasklet);
+
+ list_add_tail(&d40c->chan.device_node,
+ &dma->channels);
+ }
+}
+
+static void d40_ops_init(struct d40_base *base, struct dma_device *dev)
+{
+ if (dma_has_cap(DMA_SLAVE, dev->cap_mask)) {
+ dev->device_prep_slave_sg = d40_prep_slave_sg;
+ dev->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ }
+
+ if (dma_has_cap(DMA_MEMCPY, dev->cap_mask)) {
+ dev->device_prep_dma_memcpy = d40_prep_memcpy;
+ dev->directions = BIT(DMA_MEM_TO_MEM);
+ /*
+ * This controller can only access address at even
+ * 32bit boundaries, i.e. 2^2
+ */
+ dev->copy_align = DMAENGINE_ALIGN_4_BYTES;
+ }
+
+ if (dma_has_cap(DMA_CYCLIC, dev->cap_mask))
+ dev->device_prep_dma_cyclic = dma40_prep_dma_cyclic;
+
+ dev->device_alloc_chan_resources = d40_alloc_chan_resources;
+ dev->device_free_chan_resources = d40_free_chan_resources;
+ dev->device_issue_pending = d40_issue_pending;
+ dev->device_tx_status = d40_tx_status;
+ dev->device_config = d40_set_runtime_config;
+ dev->device_pause = d40_pause;
+ dev->device_resume = d40_resume;
+ dev->device_terminate_all = d40_terminate_all;
+ dev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ dev->dev = base->dev;
+}
+
+static int __init d40_dmaengine_init(struct d40_base *base,
+ int num_reserved_chans)
+{
+ int err ;
+
+ d40_chan_init(base, &base->dma_slave, base->log_chans,
+ 0, base->num_log_chans);
+
+ dma_cap_zero(base->dma_slave.cap_mask);
+ dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask);
+ dma_cap_set(DMA_CYCLIC, base->dma_slave.cap_mask);
+
+ d40_ops_init(base, &base->dma_slave);
+
+ err = dmaenginem_async_device_register(&base->dma_slave);
+
+ if (err) {
+ d40_err(base->dev, "Failed to register slave channels\n");
+ goto exit;
+ }
+
+ d40_chan_init(base, &base->dma_memcpy, base->log_chans,
+ base->num_log_chans, base->num_memcpy_chans);
+
+ dma_cap_zero(base->dma_memcpy.cap_mask);
+ dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask);
+
+ d40_ops_init(base, &base->dma_memcpy);
+
+ err = dmaenginem_async_device_register(&base->dma_memcpy);
+
+ if (err) {
+ d40_err(base->dev,
+ "Failed to register memcpy only channels\n");
+ goto exit;
+ }
+
+ d40_chan_init(base, &base->dma_both, base->phy_chans,
+ 0, num_reserved_chans);
+
+ dma_cap_zero(base->dma_both.cap_mask);
+ dma_cap_set(DMA_SLAVE, base->dma_both.cap_mask);
+ dma_cap_set(DMA_MEMCPY, base->dma_both.cap_mask);
+ dma_cap_set(DMA_CYCLIC, base->dma_slave.cap_mask);
+
+ d40_ops_init(base, &base->dma_both);
+ err = dmaenginem_async_device_register(&base->dma_both);
+
+ if (err) {
+ d40_err(base->dev,
+ "Failed to register logical and physical capable channels\n");
+ goto exit;
+ }
+ return 0;
+ exit:
+ return err;
+}
+
+/* Suspend resume functionality */
+#ifdef CONFIG_PM_SLEEP
+static int dma40_suspend(struct device *dev)
+{
+ struct d40_base *base = dev_get_drvdata(dev);
+ int ret;
+
+ ret = pm_runtime_force_suspend(dev);
+ if (ret)
+ return ret;
+
+ if (base->lcpa_regulator)
+ ret = regulator_disable(base->lcpa_regulator);
+ return ret;
+}
+
+static int dma40_resume(struct device *dev)
+{
+ struct d40_base *base = dev_get_drvdata(dev);
+ int ret = 0;
+
+ if (base->lcpa_regulator) {
+ ret = regulator_enable(base->lcpa_regulator);
+ if (ret)
+ return ret;
+ }
+
+ return pm_runtime_force_resume(dev);
+}
+#endif
+
+#ifdef CONFIG_PM
+static void dma40_backup(void __iomem *baseaddr, u32 *backup,
+ u32 *regaddr, int num, bool save)
+{
+ int i;
+
+ for (i = 0; i < num; i++) {
+ void __iomem *addr = baseaddr + regaddr[i];
+
+ if (save)
+ backup[i] = readl_relaxed(addr);
+ else
+ writel_relaxed(backup[i], addr);
+ }
+}
+
+static void d40_save_restore_registers(struct d40_base *base, bool save)
+{
+ int i;
+
+ /* Save/Restore channel specific registers */
+ for (i = 0; i < base->num_phy_chans; i++) {
+ void __iomem *addr;
+ int idx;
+
+ if (base->phy_res[i].reserved)
+ continue;
+
+ addr = base->virtbase + D40_DREG_PCBASE + i * D40_DREG_PCDELTA;
+ idx = i * ARRAY_SIZE(d40_backup_regs_chan);
+
+ dma40_backup(addr, &base->reg_val_backup_chan[idx],
+ d40_backup_regs_chan,
+ ARRAY_SIZE(d40_backup_regs_chan),
+ save);
+ }
+
+ /* Save/Restore global registers */
+ dma40_backup(base->virtbase, base->reg_val_backup,
+ d40_backup_regs, ARRAY_SIZE(d40_backup_regs),
+ save);
+
+ /* Save/Restore registers only existing on dma40 v3 and later */
+ if (base->gen_dmac.backup)
+ dma40_backup(base->virtbase, base->reg_val_backup_v4,
+ base->gen_dmac.backup,
+ base->gen_dmac.backup_size,
+ save);
+}
+
+static int dma40_runtime_suspend(struct device *dev)
+{
+ struct d40_base *base = dev_get_drvdata(dev);
+
+ d40_save_restore_registers(base, true);
+
+ /* Don't disable/enable clocks for v1 due to HW bugs */
+ if (base->rev != 1)
+ writel_relaxed(base->gcc_pwr_off_mask,
+ base->virtbase + D40_DREG_GCC);
+
+ return 0;
+}
+
+static int dma40_runtime_resume(struct device *dev)
+{
+ struct d40_base *base = dev_get_drvdata(dev);
+
+ d40_save_restore_registers(base, false);
+
+ writel_relaxed(D40_DREG_GCC_ENABLE_ALL,
+ base->virtbase + D40_DREG_GCC);
+ return 0;
+}
+#endif
+
+static const struct dev_pm_ops dma40_pm_ops = {
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(dma40_suspend, dma40_resume)
+ SET_RUNTIME_PM_OPS(dma40_runtime_suspend,
+ dma40_runtime_resume,
+ NULL)
+};
+
+/* Initialization functions. */
+
+static int __init d40_phy_res_init(struct d40_base *base)
+{
+ int i;
+ int num_phy_chans_avail = 0;
+ u32 val[2];
+ int odd_even_bit = -2;
+ int gcc = D40_DREG_GCC_ENA;
+
+ val[0] = readl(base->virtbase + D40_DREG_PRSME);
+ val[1] = readl(base->virtbase + D40_DREG_PRSMO);
+
+ for (i = 0; i < base->num_phy_chans; i++) {
+ base->phy_res[i].num = i;
+ odd_even_bit += 2 * ((i % 2) == 0);
+ if (((val[i % 2] >> odd_even_bit) & 3) == 1) {
+ /* Mark security only channels as occupied */
+ base->phy_res[i].allocated_src = D40_ALLOC_PHY;
+ base->phy_res[i].allocated_dst = D40_ALLOC_PHY;
+ base->phy_res[i].reserved = true;
+ gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(i),
+ D40_DREG_GCC_SRC);
+ gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(i),
+ D40_DREG_GCC_DST);
+
+
+ } else {
+ base->phy_res[i].allocated_src = D40_ALLOC_FREE;
+ base->phy_res[i].allocated_dst = D40_ALLOC_FREE;
+ base->phy_res[i].reserved = false;
+ num_phy_chans_avail++;
+ }
+ spin_lock_init(&base->phy_res[i].lock);
+ }
+
+ /* Mark disabled channels as occupied */
+ for (i = 0; base->plat_data->disabled_channels[i] != -1; i++) {
+ int chan = base->plat_data->disabled_channels[i];
+
+ base->phy_res[chan].allocated_src = D40_ALLOC_PHY;
+ base->phy_res[chan].allocated_dst = D40_ALLOC_PHY;
+ base->phy_res[chan].reserved = true;
+ gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(chan),
+ D40_DREG_GCC_SRC);
+ gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(chan),
+ D40_DREG_GCC_DST);
+ num_phy_chans_avail--;
+ }
+
+ /* Mark soft_lli channels */
+ for (i = 0; i < base->plat_data->num_of_soft_lli_chans; i++) {
+ int chan = base->plat_data->soft_lli_chans[i];
+
+ base->phy_res[chan].use_soft_lli = true;
+ }
+
+ dev_info(base->dev, "%d of %d physical DMA channels available\n",
+ num_phy_chans_avail, base->num_phy_chans);
+
+ /* Verify settings extended vs standard */
+ val[0] = readl(base->virtbase + D40_DREG_PRTYP);
+
+ for (i = 0; i < base->num_phy_chans; i++) {
+
+ if (base->phy_res[i].allocated_src == D40_ALLOC_FREE &&
+ (val[0] & 0x3) != 1)
+ dev_info(base->dev,
+ "[%s] INFO: channel %d is misconfigured (%d)\n",
+ __func__, i, val[0] & 0x3);
+
+ val[0] = val[0] >> 2;
+ }
+
+ /*
+ * To keep things simple, Enable all clocks initially.
+ * The clocks will get managed later post channel allocation.
+ * The clocks for the event lines on which reserved channels exists
+ * are not managed here.
+ */
+ writel(D40_DREG_GCC_ENABLE_ALL, base->virtbase + D40_DREG_GCC);
+ base->gcc_pwr_off_mask = gcc;
+
+ return num_phy_chans_avail;
+}
+
+static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
+{
+ struct stedma40_platform_data *plat_data = dev_get_platdata(&pdev->dev);
+ struct clk *clk;
+ void __iomem *virtbase;
+ struct resource *res;
+ struct d40_base *base;
+ int num_log_chans;
+ int num_phy_chans;
+ int num_memcpy_chans;
+ int clk_ret = -EINVAL;
+ int i;
+ u32 pid;
+ u32 cid;
+ u8 rev;
+
+ clk = clk_get(&pdev->dev, NULL);
+ if (IS_ERR(clk)) {
+ d40_err(&pdev->dev, "No matching clock found\n");
+ goto check_prepare_enabled;
+ }
+
+ clk_ret = clk_prepare_enable(clk);
+ if (clk_ret) {
+ d40_err(&pdev->dev, "Failed to prepare/enable clock\n");
+ goto disable_unprepare;
+ }
+
+ /* Get IO for DMAC base address */
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "base");
+ if (!res)
+ goto disable_unprepare;
+
+ if (request_mem_region(res->start, resource_size(res),
+ D40_NAME " I/O base") == NULL)
+ goto release_region;
+
+ virtbase = ioremap(res->start, resource_size(res));
+ if (!virtbase)
+ goto release_region;
+
+ /* This is just a regular AMBA PrimeCell ID actually */
+ for (pid = 0, i = 0; i < 4; i++)
+ pid |= (readl(virtbase + resource_size(res) - 0x20 + 4 * i)
+ & 255) << (i * 8);
+ for (cid = 0, i = 0; i < 4; i++)
+ cid |= (readl(virtbase + resource_size(res) - 0x10 + 4 * i)
+ & 255) << (i * 8);
+
+ if (cid != AMBA_CID) {
+ d40_err(&pdev->dev, "Unknown hardware! No PrimeCell ID\n");
+ goto unmap_io;
+ }
+ if (AMBA_MANF_BITS(pid) != AMBA_VENDOR_ST) {
+ d40_err(&pdev->dev, "Unknown designer! Got %x wanted %x\n",
+ AMBA_MANF_BITS(pid),
+ AMBA_VENDOR_ST);
+ goto unmap_io;
+ }
+ /*
+ * HW revision:
+ * DB8500ed has revision 0
+ * ? has revision 1
+ * DB8500v1 has revision 2
+ * DB8500v2 has revision 3
+ * AP9540v1 has revision 4
+ * DB8540v1 has revision 4
+ */
+ rev = AMBA_REV_BITS(pid);
+ if (rev < 2) {
+ d40_err(&pdev->dev, "hardware revision: %d is not supported", rev);
+ goto unmap_io;
+ }
+
+ /* The number of physical channels on this HW */
+ if (plat_data->num_of_phy_chans)
+ num_phy_chans = plat_data->num_of_phy_chans;
+ else
+ num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4;
+
+ /* The number of channels used for memcpy */
+ if (plat_data->num_of_memcpy_chans)
+ num_memcpy_chans = plat_data->num_of_memcpy_chans;
+ else
+ num_memcpy_chans = ARRAY_SIZE(dma40_memcpy_channels);
+
+ num_log_chans = num_phy_chans * D40_MAX_LOG_CHAN_PER_PHY;
+
+ dev_info(&pdev->dev,
+ "hardware rev: %d @ %pa with %d physical and %d logical channels\n",
+ rev, &res->start, num_phy_chans, num_log_chans);
+
+ base = kzalloc(ALIGN(sizeof(struct d40_base), 4) +
+ (num_phy_chans + num_log_chans + num_memcpy_chans) *
+ sizeof(struct d40_chan), GFP_KERNEL);
+
+ if (base == NULL)
+ goto unmap_io;
+
+ base->rev = rev;
+ base->clk = clk;
+ base->num_memcpy_chans = num_memcpy_chans;
+ base->num_phy_chans = num_phy_chans;
+ base->num_log_chans = num_log_chans;
+ base->phy_start = res->start;
+ base->phy_size = resource_size(res);
+ base->virtbase = virtbase;
+ base->plat_data = plat_data;
+ base->dev = &pdev->dev;
+ base->phy_chans = ((void *)base) + ALIGN(sizeof(struct d40_base), 4);
+ base->log_chans = &base->phy_chans[num_phy_chans];
+
+ if (base->plat_data->num_of_phy_chans == 14) {
+ base->gen_dmac.backup = d40_backup_regs_v4b;
+ base->gen_dmac.backup_size = BACKUP_REGS_SZ_V4B;
+ base->gen_dmac.interrupt_en = D40_DREG_CPCMIS;
+ base->gen_dmac.interrupt_clear = D40_DREG_CPCICR;
+ base->gen_dmac.realtime_en = D40_DREG_CRSEG1;
+ base->gen_dmac.realtime_clear = D40_DREG_CRCEG1;
+ base->gen_dmac.high_prio_en = D40_DREG_CPSEG1;
+ base->gen_dmac.high_prio_clear = D40_DREG_CPCEG1;
+ base->gen_dmac.il = il_v4b;
+ base->gen_dmac.il_size = ARRAY_SIZE(il_v4b);
+ base->gen_dmac.init_reg = dma_init_reg_v4b;
+ base->gen_dmac.init_reg_size = ARRAY_SIZE(dma_init_reg_v4b);
+ } else {
+ if (base->rev >= 3) {
+ base->gen_dmac.backup = d40_backup_regs_v4a;
+ base->gen_dmac.backup_size = BACKUP_REGS_SZ_V4A;
+ }
+ base->gen_dmac.interrupt_en = D40_DREG_PCMIS;
+ base->gen_dmac.interrupt_clear = D40_DREG_PCICR;
+ base->gen_dmac.realtime_en = D40_DREG_RSEG1;
+ base->gen_dmac.realtime_clear = D40_DREG_RCEG1;
+ base->gen_dmac.high_prio_en = D40_DREG_PSEG1;
+ base->gen_dmac.high_prio_clear = D40_DREG_PCEG1;
+ base->gen_dmac.il = il_v4a;
+ base->gen_dmac.il_size = ARRAY_SIZE(il_v4a);
+ base->gen_dmac.init_reg = dma_init_reg_v4a;
+ base->gen_dmac.init_reg_size = ARRAY_SIZE(dma_init_reg_v4a);
+ }
+
+ base->phy_res = kcalloc(num_phy_chans,
+ sizeof(*base->phy_res),
+ GFP_KERNEL);
+ if (!base->phy_res)
+ goto free_base;
+
+ base->lookup_phy_chans = kcalloc(num_phy_chans,
+ sizeof(*base->lookup_phy_chans),
+ GFP_KERNEL);
+ if (!base->lookup_phy_chans)
+ goto free_phy_res;
+
+ base->lookup_log_chans = kcalloc(num_log_chans,
+ sizeof(*base->lookup_log_chans),
+ GFP_KERNEL);
+ if (!base->lookup_log_chans)
+ goto free_phy_chans;
+
+ base->reg_val_backup_chan = kmalloc_array(base->num_phy_chans,
+ sizeof(d40_backup_regs_chan),
+ GFP_KERNEL);
+ if (!base->reg_val_backup_chan)
+ goto free_log_chans;
+
+ base->lcla_pool.alloc_map = kcalloc(num_phy_chans
+ * D40_LCLA_LINK_PER_EVENT_GRP,
+ sizeof(*base->lcla_pool.alloc_map),
+ GFP_KERNEL);
+ if (!base->lcla_pool.alloc_map)
+ goto free_backup_chan;
+
+ base->regs_interrupt = kmalloc_array(base->gen_dmac.il_size,
+ sizeof(*base->regs_interrupt),
+ GFP_KERNEL);
+ if (!base->regs_interrupt)
+ goto free_map;
+
+ base->desc_slab = kmem_cache_create(D40_NAME, sizeof(struct d40_desc),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (base->desc_slab == NULL)
+ goto free_regs;
+
+
+ return base;
+ free_regs:
+ kfree(base->regs_interrupt);
+ free_map:
+ kfree(base->lcla_pool.alloc_map);
+ free_backup_chan:
+ kfree(base->reg_val_backup_chan);
+ free_log_chans:
+ kfree(base->lookup_log_chans);
+ free_phy_chans:
+ kfree(base->lookup_phy_chans);
+ free_phy_res:
+ kfree(base->phy_res);
+ free_base:
+ kfree(base);
+ unmap_io:
+ iounmap(virtbase);
+ release_region:
+ release_mem_region(res->start, resource_size(res));
+ check_prepare_enabled:
+ if (!clk_ret)
+ disable_unprepare:
+ clk_disable_unprepare(clk);
+ if (!IS_ERR(clk))
+ clk_put(clk);
+ return NULL;
+}
+
+static void __init d40_hw_init(struct d40_base *base)
+{
+
+ int i;
+ u32 prmseo[2] = {0, 0};
+ u32 activeo[2] = {0xFFFFFFFF, 0xFFFFFFFF};
+ u32 pcmis = 0;
+ u32 pcicr = 0;
+ struct d40_reg_val *dma_init_reg = base->gen_dmac.init_reg;
+ u32 reg_size = base->gen_dmac.init_reg_size;
+
+ for (i = 0; i < reg_size; i++)
+ writel(dma_init_reg[i].val,
+ base->virtbase + dma_init_reg[i].reg);
+
+ /* Configure all our dma channels to default settings */
+ for (i = 0; i < base->num_phy_chans; i++) {
+
+ activeo[i % 2] = activeo[i % 2] << 2;
+
+ if (base->phy_res[base->num_phy_chans - i - 1].allocated_src
+ == D40_ALLOC_PHY) {
+ activeo[i % 2] |= 3;
+ continue;
+ }
+
+ /* Enable interrupt # */
+ pcmis = (pcmis << 1) | 1;
+
+ /* Clear interrupt # */
+ pcicr = (pcicr << 1) | 1;
+
+ /* Set channel to physical mode */
+ prmseo[i % 2] = prmseo[i % 2] << 2;
+ prmseo[i % 2] |= 1;
+
+ }
+
+ writel(prmseo[1], base->virtbase + D40_DREG_PRMSE);
+ writel(prmseo[0], base->virtbase + D40_DREG_PRMSO);
+ writel(activeo[1], base->virtbase + D40_DREG_ACTIVE);
+ writel(activeo[0], base->virtbase + D40_DREG_ACTIVO);
+
+ /* Write which interrupt to enable */
+ writel(pcmis, base->virtbase + base->gen_dmac.interrupt_en);
+
+ /* Write which interrupt to clear */
+ writel(pcicr, base->virtbase + base->gen_dmac.interrupt_clear);
+
+ /* These are __initdata and cannot be accessed after init */
+ base->gen_dmac.init_reg = NULL;
+ base->gen_dmac.init_reg_size = 0;
+}
+
+static int __init d40_lcla_allocate(struct d40_base *base)
+{
+ struct d40_lcla_pool *pool = &base->lcla_pool;
+ unsigned long *page_list;
+ int i, j;
+ int ret;
+
+ /*
+ * This is somewhat ugly. We need 8192 bytes that are 18 bit aligned,
+ * To full fill this hardware requirement without wasting 256 kb
+ * we allocate pages until we get an aligned one.
+ */
+ page_list = kmalloc_array(MAX_LCLA_ALLOC_ATTEMPTS,
+ sizeof(*page_list),
+ GFP_KERNEL);
+ if (!page_list)
+ return -ENOMEM;
+
+ /* Calculating how many pages that are required */
+ base->lcla_pool.pages = SZ_1K * base->num_phy_chans / PAGE_SIZE;
+
+ for (i = 0; i < MAX_LCLA_ALLOC_ATTEMPTS; i++) {
+ page_list[i] = __get_free_pages(GFP_KERNEL,
+ base->lcla_pool.pages);
+ if (!page_list[i]) {
+
+ d40_err(base->dev, "Failed to allocate %d pages.\n",
+ base->lcla_pool.pages);
+ ret = -ENOMEM;
+
+ for (j = 0; j < i; j++)
+ free_pages(page_list[j], base->lcla_pool.pages);
+ goto free_page_list;
+ }
+
+ if ((virt_to_phys((void *)page_list[i]) &
+ (LCLA_ALIGNMENT - 1)) == 0)
+ break;
+ }
+
+ for (j = 0; j < i; j++)
+ free_pages(page_list[j], base->lcla_pool.pages);
+
+ if (i < MAX_LCLA_ALLOC_ATTEMPTS) {
+ base->lcla_pool.base = (void *)page_list[i];
+ } else {
+ /*
+ * After many attempts and no succees with finding the correct
+ * alignment, try with allocating a big buffer.
+ */
+ dev_warn(base->dev,
+ "[%s] Failed to get %d pages @ 18 bit align.\n",
+ __func__, base->lcla_pool.pages);
+ base->lcla_pool.base_unaligned = kmalloc(SZ_1K *
+ base->num_phy_chans +
+ LCLA_ALIGNMENT,
+ GFP_KERNEL);
+ if (!base->lcla_pool.base_unaligned) {
+ ret = -ENOMEM;
+ goto free_page_list;
+ }
+
+ base->lcla_pool.base = PTR_ALIGN(base->lcla_pool.base_unaligned,
+ LCLA_ALIGNMENT);
+ }
+
+ pool->dma_addr = dma_map_single(base->dev, pool->base,
+ SZ_1K * base->num_phy_chans,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(base->dev, pool->dma_addr)) {
+ pool->dma_addr = 0;
+ ret = -ENOMEM;
+ goto free_page_list;
+ }
+
+ writel(virt_to_phys(base->lcla_pool.base),
+ base->virtbase + D40_DREG_LCLA);
+ ret = 0;
+ free_page_list:
+ kfree(page_list);
+ return ret;
+}
+
+static int __init d40_of_probe(struct platform_device *pdev,
+ struct device_node *np)
+{
+ struct stedma40_platform_data *pdata;
+ int num_phy = 0, num_memcpy = 0, num_disabled = 0;
+ const __be32 *list;
+
+ pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+
+ /* If absent this value will be obtained from h/w. */
+ of_property_read_u32(np, "dma-channels", &num_phy);
+ if (num_phy > 0)
+ pdata->num_of_phy_chans = num_phy;
+
+ list = of_get_property(np, "memcpy-channels", &num_memcpy);
+ num_memcpy /= sizeof(*list);
+
+ if (num_memcpy > D40_MEMCPY_MAX_CHANS || num_memcpy <= 0) {
+ d40_err(&pdev->dev,
+ "Invalid number of memcpy channels specified (%d)\n",
+ num_memcpy);
+ return -EINVAL;
+ }
+ pdata->num_of_memcpy_chans = num_memcpy;
+
+ of_property_read_u32_array(np, "memcpy-channels",
+ dma40_memcpy_channels,
+ num_memcpy);
+
+ list = of_get_property(np, "disabled-channels", &num_disabled);
+ num_disabled /= sizeof(*list);
+
+ if (num_disabled >= STEDMA40_MAX_PHYS || num_disabled < 0) {
+ d40_err(&pdev->dev,
+ "Invalid number of disabled channels specified (%d)\n",
+ num_disabled);
+ return -EINVAL;
+ }
+
+ of_property_read_u32_array(np, "disabled-channels",
+ pdata->disabled_channels,
+ num_disabled);
+ pdata->disabled_channels[num_disabled] = -1;
+
+ pdev->dev.platform_data = pdata;
+
+ return 0;
+}
+
+static int __init d40_probe(struct platform_device *pdev)
+{
+ struct stedma40_platform_data *plat_data = dev_get_platdata(&pdev->dev);
+ struct device_node *np = pdev->dev.of_node;
+ int ret = -ENOENT;
+ struct d40_base *base;
+ struct resource *res;
+ int num_reserved_chans;
+ u32 val;
+
+ if (!plat_data) {
+ if (np) {
+ if (d40_of_probe(pdev, np)) {
+ ret = -ENOMEM;
+ goto report_failure;
+ }
+ } else {
+ d40_err(&pdev->dev, "No pdata or Device Tree provided\n");
+ goto report_failure;
+ }
+ }
+
+ base = d40_hw_detect_init(pdev);
+ if (!base)
+ goto report_failure;
+
+ num_reserved_chans = d40_phy_res_init(base);
+
+ platform_set_drvdata(pdev, base);
+
+ spin_lock_init(&base->interrupt_lock);
+ spin_lock_init(&base->execmd_lock);
+
+ /* Get IO for logical channel parameter address */
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "lcpa");
+ if (!res) {
+ ret = -ENOENT;
+ d40_err(&pdev->dev, "No \"lcpa\" memory resource\n");
+ goto destroy_cache;
+ }
+ base->lcpa_size = resource_size(res);
+ base->phy_lcpa = res->start;
+
+ if (request_mem_region(res->start, resource_size(res),
+ D40_NAME " I/O lcpa") == NULL) {
+ ret = -EBUSY;
+ d40_err(&pdev->dev, "Failed to request LCPA region %pR\n", res);
+ goto destroy_cache;
+ }
+
+ /* We make use of ESRAM memory for this. */
+ val = readl(base->virtbase + D40_DREG_LCPA);
+ if (res->start != val && val != 0) {
+ dev_warn(&pdev->dev,
+ "[%s] Mismatch LCPA dma 0x%x, def %pa\n",
+ __func__, val, &res->start);
+ } else
+ writel(res->start, base->virtbase + D40_DREG_LCPA);
+
+ base->lcpa_base = ioremap(res->start, resource_size(res));
+ if (!base->lcpa_base) {
+ ret = -ENOMEM;
+ d40_err(&pdev->dev, "Failed to ioremap LCPA region\n");
+ goto destroy_cache;
+ }
+ /* If lcla has to be located in ESRAM we don't need to allocate */
+ if (base->plat_data->use_esram_lcla) {
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+ "lcla_esram");
+ if (!res) {
+ ret = -ENOENT;
+ d40_err(&pdev->dev,
+ "No \"lcla_esram\" memory resource\n");
+ goto destroy_cache;
+ }
+ base->lcla_pool.base = ioremap(res->start,
+ resource_size(res));
+ if (!base->lcla_pool.base) {
+ ret = -ENOMEM;
+ d40_err(&pdev->dev, "Failed to ioremap LCLA region\n");
+ goto destroy_cache;
+ }
+ writel(res->start, base->virtbase + D40_DREG_LCLA);
+
+ } else {
+ ret = d40_lcla_allocate(base);
+ if (ret) {
+ d40_err(&pdev->dev, "Failed to allocate LCLA area\n");
+ goto destroy_cache;
+ }
+ }
+
+ spin_lock_init(&base->lcla_pool.lock);
+
+ base->irq = platform_get_irq(pdev, 0);
+ if (base->irq < 0) {
+ ret = base->irq;
+ goto destroy_cache;
+ }
+
+ ret = request_irq(base->irq, d40_handle_interrupt, 0, D40_NAME, base);
+ if (ret) {
+ d40_err(&pdev->dev, "No IRQ defined\n");
+ goto destroy_cache;
+ }
+
+ if (base->plat_data->use_esram_lcla) {
+
+ base->lcpa_regulator = regulator_get(base->dev, "lcla_esram");
+ if (IS_ERR(base->lcpa_regulator)) {
+ d40_err(&pdev->dev, "Failed to get lcpa_regulator\n");
+ ret = PTR_ERR(base->lcpa_regulator);
+ base->lcpa_regulator = NULL;
+ goto destroy_cache;
+ }
+
+ ret = regulator_enable(base->lcpa_regulator);
+ if (ret) {
+ d40_err(&pdev->dev,
+ "Failed to enable lcpa_regulator\n");
+ regulator_put(base->lcpa_regulator);
+ base->lcpa_regulator = NULL;
+ goto destroy_cache;
+ }
+ }
+
+ writel_relaxed(D40_DREG_GCC_ENABLE_ALL, base->virtbase + D40_DREG_GCC);
+
+ pm_runtime_irq_safe(base->dev);
+ pm_runtime_set_autosuspend_delay(base->dev, DMA40_AUTOSUSPEND_DELAY);
+ pm_runtime_use_autosuspend(base->dev);
+ pm_runtime_mark_last_busy(base->dev);
+ pm_runtime_set_active(base->dev);
+ pm_runtime_enable(base->dev);
+
+ ret = d40_dmaengine_init(base, num_reserved_chans);
+ if (ret)
+ goto destroy_cache;
+
+ ret = dma_set_max_seg_size(base->dev, STEDMA40_MAX_SEG_SIZE);
+ if (ret) {
+ d40_err(&pdev->dev, "Failed to set dma max seg size\n");
+ goto destroy_cache;
+ }
+
+ d40_hw_init(base);
+
+ if (np) {
+ ret = of_dma_controller_register(np, d40_xlate, NULL);
+ if (ret)
+ dev_err(&pdev->dev,
+ "could not register of_dma_controller\n");
+ }
+
+ dev_info(base->dev, "initialized\n");
+ return 0;
+ destroy_cache:
+ kmem_cache_destroy(base->desc_slab);
+ if (base->virtbase)
+ iounmap(base->virtbase);
+
+ if (base->lcla_pool.base && base->plat_data->use_esram_lcla) {
+ iounmap(base->lcla_pool.base);
+ base->lcla_pool.base = NULL;
+ }
+
+ if (base->lcla_pool.dma_addr)
+ dma_unmap_single(base->dev, base->lcla_pool.dma_addr,
+ SZ_1K * base->num_phy_chans,
+ DMA_TO_DEVICE);
+
+ if (!base->lcla_pool.base_unaligned && base->lcla_pool.base)
+ free_pages((unsigned long)base->lcla_pool.base,
+ base->lcla_pool.pages);
+
+ kfree(base->lcla_pool.base_unaligned);
+
+ if (base->lcpa_base)
+ iounmap(base->lcpa_base);
+
+ if (base->phy_lcpa)
+ release_mem_region(base->phy_lcpa,
+ base->lcpa_size);
+ if (base->phy_start)
+ release_mem_region(base->phy_start,
+ base->phy_size);
+ if (base->clk) {
+ clk_disable_unprepare(base->clk);
+ clk_put(base->clk);
+ }
+
+ if (base->lcpa_regulator) {
+ regulator_disable(base->lcpa_regulator);
+ regulator_put(base->lcpa_regulator);
+ }
+ pm_runtime_disable(base->dev);
+
+ kfree(base->lcla_pool.alloc_map);
+ kfree(base->lookup_log_chans);
+ kfree(base->lookup_phy_chans);
+ kfree(base->phy_res);
+ kfree(base);
+ report_failure:
+ d40_err(&pdev->dev, "probe failed\n");
+ return ret;
+}
+
+static const struct of_device_id d40_match[] = {
+ { .compatible = "stericsson,dma40", },
+ {}
+};
+
+static struct platform_driver d40_driver = {
+ .driver = {
+ .name = D40_NAME,
+ .pm = &dma40_pm_ops,
+ .of_match_table = d40_match,
+ },
+};
+
+static int __init stedma40_init(void)
+{
+ return platform_driver_probe(&d40_driver, d40_probe);
+}
+subsys_initcall(stedma40_init);
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
new file mode 100644
index 000000000..b5287c661
--- /dev/null
+++ b/drivers/dma/ste_dma40_ll.c
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) ST-Ericsson SA 2007-2010
+ * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
+ */
+
+#include <linux/kernel.h>
+#include <linux/platform_data/dma-ste-dma40.h>
+
+#include "ste_dma40_ll.h"
+
+static u8 d40_width_to_bits(enum dma_slave_buswidth width)
+{
+ if (width == DMA_SLAVE_BUSWIDTH_1_BYTE)
+ return STEDMA40_ESIZE_8_BIT;
+ else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES)
+ return STEDMA40_ESIZE_16_BIT;
+ else if (width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+ return STEDMA40_ESIZE_64_BIT;
+ else
+ return STEDMA40_ESIZE_32_BIT;
+}
+
+/* Sets up proper LCSP1 and LCSP3 register for a logical channel */
+void d40_log_cfg(struct stedma40_chan_cfg *cfg,
+ u32 *lcsp1, u32 *lcsp3)
+{
+ u32 l3 = 0; /* dst */
+ u32 l1 = 0; /* src */
+
+ /* src is mem? -> increase address pos */
+ if (cfg->dir == DMA_MEM_TO_DEV ||
+ cfg->dir == DMA_MEM_TO_MEM)
+ l1 |= BIT(D40_MEM_LCSP1_SCFG_INCR_POS);
+
+ /* dst is mem? -> increase address pos */
+ if (cfg->dir == DMA_DEV_TO_MEM ||
+ cfg->dir == DMA_MEM_TO_MEM)
+ l3 |= BIT(D40_MEM_LCSP3_DCFG_INCR_POS);
+
+ /* src is hw? -> master port 1 */
+ if (cfg->dir == DMA_DEV_TO_MEM ||
+ cfg->dir == DMA_DEV_TO_DEV)
+ l1 |= BIT(D40_MEM_LCSP1_SCFG_MST_POS);
+
+ /* dst is hw? -> master port 1 */
+ if (cfg->dir == DMA_MEM_TO_DEV ||
+ cfg->dir == DMA_DEV_TO_DEV)
+ l3 |= BIT(D40_MEM_LCSP3_DCFG_MST_POS);
+
+ l3 |= BIT(D40_MEM_LCSP3_DCFG_EIM_POS);
+ l3 |= cfg->dst_info.psize << D40_MEM_LCSP3_DCFG_PSIZE_POS;
+ l3 |= d40_width_to_bits(cfg->dst_info.data_width)
+ << D40_MEM_LCSP3_DCFG_ESIZE_POS;
+
+ l1 |= BIT(D40_MEM_LCSP1_SCFG_EIM_POS);
+ l1 |= cfg->src_info.psize << D40_MEM_LCSP1_SCFG_PSIZE_POS;
+ l1 |= d40_width_to_bits(cfg->src_info.data_width)
+ << D40_MEM_LCSP1_SCFG_ESIZE_POS;
+
+ *lcsp1 = l1;
+ *lcsp3 = l3;
+
+}
+
+void d40_phy_cfg(struct stedma40_chan_cfg *cfg, u32 *src_cfg, u32 *dst_cfg)
+{
+ u32 src = 0;
+ u32 dst = 0;
+
+ if ((cfg->dir == DMA_DEV_TO_MEM) ||
+ (cfg->dir == DMA_DEV_TO_DEV)) {
+ /* Set master port to 1 */
+ src |= BIT(D40_SREG_CFG_MST_POS);
+ src |= D40_TYPE_TO_EVENT(cfg->dev_type);
+
+ if (cfg->src_info.flow_ctrl == STEDMA40_NO_FLOW_CTRL)
+ src |= BIT(D40_SREG_CFG_PHY_TM_POS);
+ else
+ src |= 3 << D40_SREG_CFG_PHY_TM_POS;
+ }
+ if ((cfg->dir == DMA_MEM_TO_DEV) ||
+ (cfg->dir == DMA_DEV_TO_DEV)) {
+ /* Set master port to 1 */
+ dst |= BIT(D40_SREG_CFG_MST_POS);
+ dst |= D40_TYPE_TO_EVENT(cfg->dev_type);
+
+ if (cfg->dst_info.flow_ctrl == STEDMA40_NO_FLOW_CTRL)
+ dst |= BIT(D40_SREG_CFG_PHY_TM_POS);
+ else
+ dst |= 3 << D40_SREG_CFG_PHY_TM_POS;
+ }
+ /* Interrupt on end of transfer for destination */
+ dst |= BIT(D40_SREG_CFG_TIM_POS);
+
+ /* Generate interrupt on error */
+ src |= BIT(D40_SREG_CFG_EIM_POS);
+ dst |= BIT(D40_SREG_CFG_EIM_POS);
+
+ /* PSIZE */
+ if (cfg->src_info.psize != STEDMA40_PSIZE_PHY_1) {
+ src |= BIT(D40_SREG_CFG_PHY_PEN_POS);
+ src |= cfg->src_info.psize << D40_SREG_CFG_PSIZE_POS;
+ }
+ if (cfg->dst_info.psize != STEDMA40_PSIZE_PHY_1) {
+ dst |= BIT(D40_SREG_CFG_PHY_PEN_POS);
+ dst |= cfg->dst_info.psize << D40_SREG_CFG_PSIZE_POS;
+ }
+
+ /* Element size */
+ src |= d40_width_to_bits(cfg->src_info.data_width)
+ << D40_SREG_CFG_ESIZE_POS;
+ dst |= d40_width_to_bits(cfg->dst_info.data_width)
+ << D40_SREG_CFG_ESIZE_POS;
+
+ /* Set the priority bit to high for the physical channel */
+ if (cfg->high_priority) {
+ src |= BIT(D40_SREG_CFG_PRI_POS);
+ dst |= BIT(D40_SREG_CFG_PRI_POS);
+ }
+
+ if (cfg->src_info.big_endian)
+ src |= BIT(D40_SREG_CFG_LBE_POS);
+ if (cfg->dst_info.big_endian)
+ dst |= BIT(D40_SREG_CFG_LBE_POS);
+
+ *src_cfg = src;
+ *dst_cfg = dst;
+}
+
+static int d40_phy_fill_lli(struct d40_phy_lli *lli,
+ dma_addr_t data,
+ u32 data_size,
+ dma_addr_t next_lli,
+ u32 reg_cfg,
+ struct stedma40_half_channel_info *info,
+ unsigned int flags)
+{
+ bool addr_inc = flags & LLI_ADDR_INC;
+ bool term_int = flags & LLI_TERM_INT;
+ unsigned int data_width = info->data_width;
+ int psize = info->psize;
+ int num_elems;
+
+ if (psize == STEDMA40_PSIZE_PHY_1)
+ num_elems = 1;
+ else
+ num_elems = 2 << psize;
+
+ /* Must be aligned */
+ if (!IS_ALIGNED(data, data_width))
+ return -EINVAL;
+
+ /* Transfer size can't be smaller than (num_elms * elem_size) */
+ if (data_size < num_elems * data_width)
+ return -EINVAL;
+
+ /* The number of elements. IE now many chunks */
+ lli->reg_elt = (data_size / data_width) << D40_SREG_ELEM_PHY_ECNT_POS;
+
+ /*
+ * Distance to next element sized entry.
+ * Usually the size of the element unless you want gaps.
+ */
+ if (addr_inc)
+ lli->reg_elt |= data_width << D40_SREG_ELEM_PHY_EIDX_POS;
+
+ /* Where the data is */
+ lli->reg_ptr = data;
+ lli->reg_cfg = reg_cfg;
+
+ /* If this scatter list entry is the last one, no next link */
+ if (next_lli == 0)
+ lli->reg_lnk = BIT(D40_SREG_LNK_PHY_TCP_POS);
+ else
+ lli->reg_lnk = next_lli;
+
+ /* Set/clear interrupt generation on this link item.*/
+ if (term_int)
+ lli->reg_cfg |= BIT(D40_SREG_CFG_TIM_POS);
+ else
+ lli->reg_cfg &= ~BIT(D40_SREG_CFG_TIM_POS);
+
+ /*
+ * Post link - D40_SREG_LNK_PHY_PRE_POS = 0
+ * Relink happens after transfer completion.
+ */
+
+ return 0;
+}
+
+static int d40_seg_size(int size, int data_width1, int data_width2)
+{
+ u32 max_w = max(data_width1, data_width2);
+ u32 min_w = min(data_width1, data_width2);
+ u32 seg_max = ALIGN(STEDMA40_MAX_SEG_SIZE * min_w, max_w);
+
+ if (seg_max > STEDMA40_MAX_SEG_SIZE)
+ seg_max -= max_w;
+
+ if (size <= seg_max)
+ return size;
+
+ if (size <= 2 * seg_max)
+ return ALIGN(size / 2, max_w);
+
+ return seg_max;
+}
+
+static struct d40_phy_lli *
+d40_phy_buf_to_lli(struct d40_phy_lli *lli, dma_addr_t addr, u32 size,
+ dma_addr_t lli_phys, dma_addr_t first_phys, u32 reg_cfg,
+ struct stedma40_half_channel_info *info,
+ struct stedma40_half_channel_info *otherinfo,
+ unsigned long flags)
+{
+ bool lastlink = flags & LLI_LAST_LINK;
+ bool addr_inc = flags & LLI_ADDR_INC;
+ bool term_int = flags & LLI_TERM_INT;
+ bool cyclic = flags & LLI_CYCLIC;
+ int err;
+ dma_addr_t next = lli_phys;
+ int size_rest = size;
+ int size_seg = 0;
+
+ /*
+ * This piece may be split up based on d40_seg_size(); we only want the
+ * term int on the last part.
+ */
+ if (term_int)
+ flags &= ~LLI_TERM_INT;
+
+ do {
+ size_seg = d40_seg_size(size_rest, info->data_width,
+ otherinfo->data_width);
+ size_rest -= size_seg;
+
+ if (size_rest == 0 && term_int)
+ flags |= LLI_TERM_INT;
+
+ if (size_rest == 0 && lastlink)
+ next = cyclic ? first_phys : 0;
+ else
+ next = ALIGN(next + sizeof(struct d40_phy_lli),
+ D40_LLI_ALIGN);
+
+ err = d40_phy_fill_lli(lli, addr, size_seg, next,
+ reg_cfg, info, flags);
+
+ if (err)
+ goto err;
+
+ lli++;
+ if (addr_inc)
+ addr += size_seg;
+ } while (size_rest);
+
+ return lli;
+
+err:
+ return NULL;
+}
+
+int d40_phy_sg_to_lli(struct scatterlist *sg,
+ int sg_len,
+ dma_addr_t target,
+ struct d40_phy_lli *lli_sg,
+ dma_addr_t lli_phys,
+ u32 reg_cfg,
+ struct stedma40_half_channel_info *info,
+ struct stedma40_half_channel_info *otherinfo,
+ unsigned long flags)
+{
+ int total_size = 0;
+ int i;
+ struct scatterlist *current_sg = sg;
+ struct d40_phy_lli *lli = lli_sg;
+ dma_addr_t l_phys = lli_phys;
+
+ if (!target)
+ flags |= LLI_ADDR_INC;
+
+ for_each_sg(sg, current_sg, sg_len, i) {
+ dma_addr_t sg_addr = sg_dma_address(current_sg);
+ unsigned int len = sg_dma_len(current_sg);
+ dma_addr_t dst = target ?: sg_addr;
+
+ total_size += sg_dma_len(current_sg);
+
+ if (i == sg_len - 1)
+ flags |= LLI_TERM_INT | LLI_LAST_LINK;
+
+ l_phys = ALIGN(lli_phys + (lli - lli_sg) *
+ sizeof(struct d40_phy_lli), D40_LLI_ALIGN);
+
+ lli = d40_phy_buf_to_lli(lli, dst, len, l_phys, lli_phys,
+ reg_cfg, info, otherinfo, flags);
+
+ if (lli == NULL)
+ return -EINVAL;
+ }
+
+ return total_size;
+}
+
+
+/* DMA logical lli operations */
+
+static void d40_log_lli_link(struct d40_log_lli *lli_dst,
+ struct d40_log_lli *lli_src,
+ int next, unsigned int flags)
+{
+ bool interrupt = flags & LLI_TERM_INT;
+ u32 slos = 0;
+ u32 dlos = 0;
+
+ if (next != -EINVAL) {
+ slos = next * 2;
+ dlos = next * 2 + 1;
+ }
+
+ if (interrupt) {
+ lli_dst->lcsp13 |= D40_MEM_LCSP1_SCFG_TIM_MASK;
+ lli_dst->lcsp13 |= D40_MEM_LCSP3_DTCP_MASK;
+ }
+
+ lli_src->lcsp13 = (lli_src->lcsp13 & ~D40_MEM_LCSP1_SLOS_MASK) |
+ (slos << D40_MEM_LCSP1_SLOS_POS);
+
+ lli_dst->lcsp13 = (lli_dst->lcsp13 & ~D40_MEM_LCSP1_SLOS_MASK) |
+ (dlos << D40_MEM_LCSP1_SLOS_POS);
+}
+
+void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa,
+ struct d40_log_lli *lli_dst,
+ struct d40_log_lli *lli_src,
+ int next, unsigned int flags)
+{
+ d40_log_lli_link(lli_dst, lli_src, next, flags);
+
+ writel_relaxed(lli_src->lcsp02, &lcpa[0].lcsp0);
+ writel_relaxed(lli_src->lcsp13, &lcpa[0].lcsp1);
+ writel_relaxed(lli_dst->lcsp02, &lcpa[0].lcsp2);
+ writel_relaxed(lli_dst->lcsp13, &lcpa[0].lcsp3);
+}
+
+void d40_log_lli_lcla_write(struct d40_log_lli *lcla,
+ struct d40_log_lli *lli_dst,
+ struct d40_log_lli *lli_src,
+ int next, unsigned int flags)
+{
+ d40_log_lli_link(lli_dst, lli_src, next, flags);
+
+ writel_relaxed(lli_src->lcsp02, &lcla[0].lcsp02);
+ writel_relaxed(lli_src->lcsp13, &lcla[0].lcsp13);
+ writel_relaxed(lli_dst->lcsp02, &lcla[1].lcsp02);
+ writel_relaxed(lli_dst->lcsp13, &lcla[1].lcsp13);
+}
+
+static void d40_log_fill_lli(struct d40_log_lli *lli,
+ dma_addr_t data, u32 data_size,
+ u32 reg_cfg,
+ u32 data_width,
+ unsigned int flags)
+{
+ bool addr_inc = flags & LLI_ADDR_INC;
+
+ lli->lcsp13 = reg_cfg;
+
+ /* The number of elements to transfer */
+ lli->lcsp02 = ((data_size / data_width) <<
+ D40_MEM_LCSP0_ECNT_POS) & D40_MEM_LCSP0_ECNT_MASK;
+
+ BUG_ON((data_size / data_width) > STEDMA40_MAX_SEG_SIZE);
+
+ /* 16 LSBs address of the current element */
+ lli->lcsp02 |= data & D40_MEM_LCSP0_SPTR_MASK;
+ /* 16 MSBs address of the current element */
+ lli->lcsp13 |= data & D40_MEM_LCSP1_SPTR_MASK;
+
+ if (addr_inc)
+ lli->lcsp13 |= D40_MEM_LCSP1_SCFG_INCR_MASK;
+
+}
+
+static struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
+ dma_addr_t addr,
+ int size,
+ u32 lcsp13, /* src or dst*/
+ u32 data_width1,
+ u32 data_width2,
+ unsigned int flags)
+{
+ bool addr_inc = flags & LLI_ADDR_INC;
+ struct d40_log_lli *lli = lli_sg;
+ int size_rest = size;
+ int size_seg = 0;
+
+ do {
+ size_seg = d40_seg_size(size_rest, data_width1, data_width2);
+ size_rest -= size_seg;
+
+ d40_log_fill_lli(lli,
+ addr,
+ size_seg,
+ lcsp13, data_width1,
+ flags);
+ if (addr_inc)
+ addr += size_seg;
+ lli++;
+ } while (size_rest);
+
+ return lli;
+}
+
+int d40_log_sg_to_lli(struct scatterlist *sg,
+ int sg_len,
+ dma_addr_t dev_addr,
+ struct d40_log_lli *lli_sg,
+ u32 lcsp13, /* src or dst*/
+ u32 data_width1, u32 data_width2)
+{
+ int total_size = 0;
+ struct scatterlist *current_sg = sg;
+ int i;
+ struct d40_log_lli *lli = lli_sg;
+ unsigned long flags = 0;
+
+ if (!dev_addr)
+ flags |= LLI_ADDR_INC;
+
+ for_each_sg(sg, current_sg, sg_len, i) {
+ dma_addr_t sg_addr = sg_dma_address(current_sg);
+ unsigned int len = sg_dma_len(current_sg);
+ dma_addr_t addr = dev_addr ?: sg_addr;
+
+ total_size += sg_dma_len(current_sg);
+
+ lli = d40_log_buf_to_lli(lli, addr, len,
+ lcsp13,
+ data_width1,
+ data_width2,
+ flags);
+ }
+
+ return total_size;
+}
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
new file mode 100644
index 000000000..c504e855e
--- /dev/null
+++ b/drivers/dma/ste_dma40_ll.h
@@ -0,0 +1,470 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) ST-Ericsson SA 2007-2010
+ * Author: Per Friden <per.friden@stericsson.com> for ST-Ericsson SA
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson SA
+ */
+#ifndef STE_DMA40_LL_H
+#define STE_DMA40_LL_H
+
+#define D40_DREG_PCBASE 0x400
+#define D40_DREG_PCDELTA (8 * 4)
+#define D40_LLI_ALIGN 16 /* LLI alignment must be 16 bytes. */
+
+#define D40_LCPA_CHAN_SIZE 32
+#define D40_LCPA_CHAN_DST_DELTA 16
+
+#define D40_TYPE_TO_GROUP(type) (type / 16)
+#define D40_TYPE_TO_EVENT(type) (type % 16)
+#define D40_GROUP_SIZE 8
+#define D40_PHYS_TO_GROUP(phys) ((phys & (D40_GROUP_SIZE - 1)) / 2)
+
+/* Most bits of the CFG register are the same in log as in phy mode */
+#define D40_SREG_CFG_MST_POS 15
+#define D40_SREG_CFG_TIM_POS 14
+#define D40_SREG_CFG_EIM_POS 13
+#define D40_SREG_CFG_LOG_INCR_POS 12
+#define D40_SREG_CFG_PHY_PEN_POS 12
+#define D40_SREG_CFG_PSIZE_POS 10
+#define D40_SREG_CFG_ESIZE_POS 8
+#define D40_SREG_CFG_PRI_POS 7
+#define D40_SREG_CFG_LBE_POS 6
+#define D40_SREG_CFG_LOG_GIM_POS 5
+#define D40_SREG_CFG_LOG_MFU_POS 4
+#define D40_SREG_CFG_PHY_TM_POS 4
+#define D40_SREG_CFG_PHY_EVTL_POS 0
+
+
+/* Standard channel parameters - basic mode (element register) */
+#define D40_SREG_ELEM_PHY_ECNT_POS 16
+#define D40_SREG_ELEM_PHY_EIDX_POS 0
+
+#define D40_SREG_ELEM_PHY_ECNT_MASK (0xFFFF << D40_SREG_ELEM_PHY_ECNT_POS)
+
+/* Standard channel parameters - basic mode (Link register) */
+#define D40_SREG_LNK_PHY_TCP_POS 0
+#define D40_SREG_LNK_PHY_LMP_POS 1
+#define D40_SREG_LNK_PHY_PRE_POS 2
+/*
+ * Source destination link address. Contains the
+ * 29-bit byte word aligned address of the reload area.
+ */
+#define D40_SREG_LNK_PHYS_LNK_MASK 0xFFFFFFF8UL
+
+/* Standard basic channel logical mode */
+
+/* Element register */
+#define D40_SREG_ELEM_LOG_ECNT_POS 16
+#define D40_SREG_ELEM_LOG_LIDX_POS 8
+#define D40_SREG_ELEM_LOG_LOS_POS 1
+#define D40_SREG_ELEM_LOG_TCP_POS 0
+
+#define D40_SREG_ELEM_LOG_LIDX_MASK (0xFF << D40_SREG_ELEM_LOG_LIDX_POS)
+
+/* Link register */
+#define D40_EVENTLINE_POS(i) (2 * i)
+#define D40_EVENTLINE_MASK(i) (0x3 << D40_EVENTLINE_POS(i))
+
+/* Standard basic channel logical params in memory */
+
+/* LCSP0 */
+#define D40_MEM_LCSP0_ECNT_POS 16
+#define D40_MEM_LCSP0_SPTR_POS 0
+
+#define D40_MEM_LCSP0_ECNT_MASK (0xFFFF << D40_MEM_LCSP0_ECNT_POS)
+#define D40_MEM_LCSP0_SPTR_MASK (0xFFFF << D40_MEM_LCSP0_SPTR_POS)
+
+/* LCSP1 */
+#define D40_MEM_LCSP1_SPTR_POS 16
+#define D40_MEM_LCSP1_SCFG_MST_POS 15
+#define D40_MEM_LCSP1_SCFG_TIM_POS 14
+#define D40_MEM_LCSP1_SCFG_EIM_POS 13
+#define D40_MEM_LCSP1_SCFG_INCR_POS 12
+#define D40_MEM_LCSP1_SCFG_PSIZE_POS 10
+#define D40_MEM_LCSP1_SCFG_ESIZE_POS 8
+#define D40_MEM_LCSP1_SLOS_POS 1
+#define D40_MEM_LCSP1_STCP_POS 0
+
+#define D40_MEM_LCSP1_SPTR_MASK (0xFFFF << D40_MEM_LCSP1_SPTR_POS)
+#define D40_MEM_LCSP1_SCFG_TIM_MASK (0x1 << D40_MEM_LCSP1_SCFG_TIM_POS)
+#define D40_MEM_LCSP1_SCFG_INCR_MASK (0x1 << D40_MEM_LCSP1_SCFG_INCR_POS)
+#define D40_MEM_LCSP1_SCFG_PSIZE_MASK (0x3 << D40_MEM_LCSP1_SCFG_PSIZE_POS)
+#define D40_MEM_LCSP1_SLOS_MASK (0x7F << D40_MEM_LCSP1_SLOS_POS)
+#define D40_MEM_LCSP1_STCP_MASK (0x1 << D40_MEM_LCSP1_STCP_POS)
+
+/* LCSP2 */
+#define D40_MEM_LCSP2_ECNT_POS 16
+
+#define D40_MEM_LCSP2_ECNT_MASK (0xFFFF << D40_MEM_LCSP2_ECNT_POS)
+
+/* LCSP3 */
+#define D40_MEM_LCSP3_DCFG_MST_POS 15
+#define D40_MEM_LCSP3_DCFG_TIM_POS 14
+#define D40_MEM_LCSP3_DCFG_EIM_POS 13
+#define D40_MEM_LCSP3_DCFG_INCR_POS 12
+#define D40_MEM_LCSP3_DCFG_PSIZE_POS 10
+#define D40_MEM_LCSP3_DCFG_ESIZE_POS 8
+#define D40_MEM_LCSP3_DLOS_POS 1
+#define D40_MEM_LCSP3_DTCP_POS 0
+
+#define D40_MEM_LCSP3_DLOS_MASK (0x7F << D40_MEM_LCSP3_DLOS_POS)
+#define D40_MEM_LCSP3_DTCP_MASK (0x1 << D40_MEM_LCSP3_DTCP_POS)
+
+
+/* Standard channel parameter register offsets */
+#define D40_CHAN_REG_SSCFG 0x00
+#define D40_CHAN_REG_SSELT 0x04
+#define D40_CHAN_REG_SSPTR 0x08
+#define D40_CHAN_REG_SSLNK 0x0C
+#define D40_CHAN_REG_SDCFG 0x10
+#define D40_CHAN_REG_SDELT 0x14
+#define D40_CHAN_REG_SDPTR 0x18
+#define D40_CHAN_REG_SDLNK 0x1C
+
+/* DMA Register Offsets */
+#define D40_DREG_GCC 0x000
+#define D40_DREG_GCC_ENA 0x1
+/* This assumes that there are only 4 event groups */
+#define D40_DREG_GCC_ENABLE_ALL 0x3ff01
+#define D40_DREG_GCC_EVTGRP_POS 8
+#define D40_DREG_GCC_SRC 0
+#define D40_DREG_GCC_DST 1
+#define D40_DREG_GCC_EVTGRP_ENA(x, y) \
+ (1 << (D40_DREG_GCC_EVTGRP_POS + 2 * x + y))
+
+#define D40_DREG_PRTYP 0x004
+#define D40_DREG_PRSME 0x008
+#define D40_DREG_PRSMO 0x00C
+#define D40_DREG_PRMSE 0x010
+#define D40_DREG_PRMSO 0x014
+#define D40_DREG_PRMOE 0x018
+#define D40_DREG_PRMOO 0x01C
+#define D40_DREG_PRMO_PCHAN_BASIC 0x1
+#define D40_DREG_PRMO_PCHAN_MODULO 0x2
+#define D40_DREG_PRMO_PCHAN_DOUBLE_DST 0x3
+#define D40_DREG_PRMO_LCHAN_SRC_PHY_DST_LOG 0x1
+#define D40_DREG_PRMO_LCHAN_SRC_LOG_DST_PHY 0x2
+#define D40_DREG_PRMO_LCHAN_SRC_LOG_DST_LOG 0x3
+
+#define D40_DREG_LCPA 0x020
+#define D40_DREG_LCLA 0x024
+
+#define D40_DREG_SSEG1 0x030
+#define D40_DREG_SSEG2 0x034
+#define D40_DREG_SSEG3 0x038
+#define D40_DREG_SSEG4 0x03C
+
+#define D40_DREG_SCEG1 0x040
+#define D40_DREG_SCEG2 0x044
+#define D40_DREG_SCEG3 0x048
+#define D40_DREG_SCEG4 0x04C
+
+#define D40_DREG_ACTIVE 0x050
+#define D40_DREG_ACTIVO 0x054
+#define D40_DREG_CIDMOD 0x058
+#define D40_DREG_TCIDV 0x05C
+#define D40_DREG_PCMIS 0x060
+#define D40_DREG_PCICR 0x064
+#define D40_DREG_PCTIS 0x068
+#define D40_DREG_PCEIS 0x06C
+
+#define D40_DREG_SPCMIS 0x070
+#define D40_DREG_SPCICR 0x074
+#define D40_DREG_SPCTIS 0x078
+#define D40_DREG_SPCEIS 0x07C
+
+#define D40_DREG_LCMIS0 0x080
+#define D40_DREG_LCMIS1 0x084
+#define D40_DREG_LCMIS2 0x088
+#define D40_DREG_LCMIS3 0x08C
+#define D40_DREG_LCICR0 0x090
+#define D40_DREG_LCICR1 0x094
+#define D40_DREG_LCICR2 0x098
+#define D40_DREG_LCICR3 0x09C
+#define D40_DREG_LCTIS0 0x0A0
+#define D40_DREG_LCTIS1 0x0A4
+#define D40_DREG_LCTIS2 0x0A8
+#define D40_DREG_LCTIS3 0x0AC
+#define D40_DREG_LCEIS0 0x0B0
+#define D40_DREG_LCEIS1 0x0B4
+#define D40_DREG_LCEIS2 0x0B8
+#define D40_DREG_LCEIS3 0x0BC
+
+#define D40_DREG_SLCMIS1 0x0C0
+#define D40_DREG_SLCMIS2 0x0C4
+#define D40_DREG_SLCMIS3 0x0C8
+#define D40_DREG_SLCMIS4 0x0CC
+
+#define D40_DREG_SLCICR1 0x0D0
+#define D40_DREG_SLCICR2 0x0D4
+#define D40_DREG_SLCICR3 0x0D8
+#define D40_DREG_SLCICR4 0x0DC
+
+#define D40_DREG_SLCTIS1 0x0E0
+#define D40_DREG_SLCTIS2 0x0E4
+#define D40_DREG_SLCTIS3 0x0E8
+#define D40_DREG_SLCTIS4 0x0EC
+
+#define D40_DREG_SLCEIS1 0x0F0
+#define D40_DREG_SLCEIS2 0x0F4
+#define D40_DREG_SLCEIS3 0x0F8
+#define D40_DREG_SLCEIS4 0x0FC
+
+#define D40_DREG_FSESS1 0x100
+#define D40_DREG_FSESS2 0x104
+
+#define D40_DREG_FSEBS1 0x108
+#define D40_DREG_FSEBS2 0x10C
+
+#define D40_DREG_PSEG1 0x110
+#define D40_DREG_PSEG2 0x114
+#define D40_DREG_PSEG3 0x118
+#define D40_DREG_PSEG4 0x11C
+#define D40_DREG_PCEG1 0x120
+#define D40_DREG_PCEG2 0x124
+#define D40_DREG_PCEG3 0x128
+#define D40_DREG_PCEG4 0x12C
+#define D40_DREG_RSEG1 0x130
+#define D40_DREG_RSEG2 0x134
+#define D40_DREG_RSEG3 0x138
+#define D40_DREG_RSEG4 0x13C
+#define D40_DREG_RCEG1 0x140
+#define D40_DREG_RCEG2 0x144
+#define D40_DREG_RCEG3 0x148
+#define D40_DREG_RCEG4 0x14C
+
+#define D40_DREG_PREFOT 0x15C
+#define D40_DREG_EXTCFG 0x160
+
+#define D40_DREG_CPSEG1 0x200
+#define D40_DREG_CPSEG2 0x204
+#define D40_DREG_CPSEG3 0x208
+#define D40_DREG_CPSEG4 0x20C
+#define D40_DREG_CPSEG5 0x210
+
+#define D40_DREG_CPCEG1 0x220
+#define D40_DREG_CPCEG2 0x224
+#define D40_DREG_CPCEG3 0x228
+#define D40_DREG_CPCEG4 0x22C
+#define D40_DREG_CPCEG5 0x230
+
+#define D40_DREG_CRSEG1 0x240
+#define D40_DREG_CRSEG2 0x244
+#define D40_DREG_CRSEG3 0x248
+#define D40_DREG_CRSEG4 0x24C
+#define D40_DREG_CRSEG5 0x250
+
+#define D40_DREG_CRCEG1 0x260
+#define D40_DREG_CRCEG2 0x264
+#define D40_DREG_CRCEG3 0x268
+#define D40_DREG_CRCEG4 0x26C
+#define D40_DREG_CRCEG5 0x270
+
+#define D40_DREG_CFSESS1 0x280
+#define D40_DREG_CFSESS2 0x284
+#define D40_DREG_CFSESS3 0x288
+
+#define D40_DREG_CFSEBS1 0x290
+#define D40_DREG_CFSEBS2 0x294
+#define D40_DREG_CFSEBS3 0x298
+
+#define D40_DREG_CLCMIS1 0x300
+#define D40_DREG_CLCMIS2 0x304
+#define D40_DREG_CLCMIS3 0x308
+#define D40_DREG_CLCMIS4 0x30C
+#define D40_DREG_CLCMIS5 0x310
+
+#define D40_DREG_CLCICR1 0x320
+#define D40_DREG_CLCICR2 0x324
+#define D40_DREG_CLCICR3 0x328
+#define D40_DREG_CLCICR4 0x32C
+#define D40_DREG_CLCICR5 0x330
+
+#define D40_DREG_CLCTIS1 0x340
+#define D40_DREG_CLCTIS2 0x344
+#define D40_DREG_CLCTIS3 0x348
+#define D40_DREG_CLCTIS4 0x34C
+#define D40_DREG_CLCTIS5 0x350
+
+#define D40_DREG_CLCEIS1 0x360
+#define D40_DREG_CLCEIS2 0x364
+#define D40_DREG_CLCEIS3 0x368
+#define D40_DREG_CLCEIS4 0x36C
+#define D40_DREG_CLCEIS5 0x370
+
+#define D40_DREG_CPCMIS 0x380
+#define D40_DREG_CPCICR 0x384
+#define D40_DREG_CPCTIS 0x388
+#define D40_DREG_CPCEIS 0x38C
+
+#define D40_DREG_SCCIDA1 0xE80
+#define D40_DREG_SCCIDA2 0xE90
+#define D40_DREG_SCCIDA3 0xEA0
+#define D40_DREG_SCCIDA4 0xEB0
+#define D40_DREG_SCCIDA5 0xEC0
+
+#define D40_DREG_SCCIDB1 0xE84
+#define D40_DREG_SCCIDB2 0xE94
+#define D40_DREG_SCCIDB3 0xEA4
+#define D40_DREG_SCCIDB4 0xEB4
+#define D40_DREG_SCCIDB5 0xEC4
+
+#define D40_DREG_PRSCCIDA 0xF80
+#define D40_DREG_PRSCCIDB 0xF84
+
+#define D40_DREG_STFU 0xFC8
+#define D40_DREG_ICFG 0xFCC
+#define D40_DREG_PERIPHID0 0xFE0
+#define D40_DREG_PERIPHID1 0xFE4
+#define D40_DREG_PERIPHID2 0xFE8
+#define D40_DREG_PERIPHID3 0xFEC
+#define D40_DREG_CELLID0 0xFF0
+#define D40_DREG_CELLID1 0xFF4
+#define D40_DREG_CELLID2 0xFF8
+#define D40_DREG_CELLID3 0xFFC
+
+/* LLI related structures */
+
+/**
+ * struct d40_phy_lli - The basic configuration register for each physical
+ * channel.
+ *
+ * @reg_cfg: The configuration register.
+ * @reg_elt: The element register.
+ * @reg_ptr: The pointer register.
+ * @reg_lnk: The link register.
+ *
+ * These registers are set up for both physical and logical transfers
+ * Note that the bit in each register means differently in logical and
+ * physical(standard) mode.
+ *
+ * This struct must be 16 bytes aligned, and only contain physical registers
+ * since it will be directly accessed by the DMA.
+ */
+struct d40_phy_lli {
+ u32 reg_cfg;
+ u32 reg_elt;
+ u32 reg_ptr;
+ u32 reg_lnk;
+};
+
+/**
+ * struct d40_phy_lli_bidir - struct for a transfer.
+ *
+ * @src: Register settings for src channel.
+ * @dst: Register settings for dst channel.
+ *
+ * All DMA transfers have a source and a destination.
+ */
+
+struct d40_phy_lli_bidir {
+ struct d40_phy_lli *src;
+ struct d40_phy_lli *dst;
+};
+
+
+/**
+ * struct d40_log_lli - logical lli configuration
+ *
+ * @lcsp02: Either maps to register lcsp0 if src or lcsp2 if dst.
+ * @lcsp13: Either maps to register lcsp1 if src or lcsp3 if dst.
+ *
+ * This struct must be 8 bytes aligned since it will be accessed directy by
+ * the DMA. Never add any none hw mapped registers to this struct.
+ */
+
+struct d40_log_lli {
+ u32 lcsp02;
+ u32 lcsp13;
+};
+
+/**
+ * struct d40_log_lli_bidir - For both src and dst
+ *
+ * @src: pointer to src lli configuration.
+ * @dst: pointer to dst lli configuration.
+ *
+ * You always have a src and a dst when doing DMA transfers.
+ */
+
+struct d40_log_lli_bidir {
+ struct d40_log_lli *src;
+ struct d40_log_lli *dst;
+};
+
+/**
+ * struct d40_log_lli_full - LCPA layout
+ *
+ * @lcsp0: Logical Channel Standard Param 0 - Src.
+ * @lcsp1: Logical Channel Standard Param 1 - Src.
+ * @lcsp2: Logical Channel Standard Param 2 - Dst.
+ * @lcsp3: Logical Channel Standard Param 3 - Dst.
+ *
+ * This struct maps to LCPA physical memory layout. Must map to
+ * the hw.
+ */
+struct d40_log_lli_full {
+ u32 lcsp0;
+ u32 lcsp1;
+ u32 lcsp2;
+ u32 lcsp3;
+};
+
+/**
+ * struct d40_def_lcsp - Default LCSP1 and LCSP3 settings
+ *
+ * @lcsp3: The default configuration for dst.
+ * @lcsp1: The default configuration for src.
+ */
+struct d40_def_lcsp {
+ u32 lcsp3;
+ u32 lcsp1;
+};
+
+/* Physical channels */
+
+enum d40_lli_flags {
+ LLI_ADDR_INC = 1 << 0,
+ LLI_TERM_INT = 1 << 1,
+ LLI_CYCLIC = 1 << 2,
+ LLI_LAST_LINK = 1 << 3,
+};
+
+void d40_phy_cfg(struct stedma40_chan_cfg *cfg,
+ u32 *src_cfg,
+ u32 *dst_cfg);
+
+void d40_log_cfg(struct stedma40_chan_cfg *cfg,
+ u32 *lcsp1,
+ u32 *lcsp2);
+
+int d40_phy_sg_to_lli(struct scatterlist *sg,
+ int sg_len,
+ dma_addr_t target,
+ struct d40_phy_lli *lli,
+ dma_addr_t lli_phys,
+ u32 reg_cfg,
+ struct stedma40_half_channel_info *info,
+ struct stedma40_half_channel_info *otherinfo,
+ unsigned long flags);
+
+/* Logical channels */
+
+int d40_log_sg_to_lli(struct scatterlist *sg,
+ int sg_len,
+ dma_addr_t dev_addr,
+ struct d40_log_lli *lli_sg,
+ u32 lcsp13, /* src or dst*/
+ u32 data_width1, u32 data_width2);
+
+void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa,
+ struct d40_log_lli *lli_dst,
+ struct d40_log_lli *lli_src,
+ int next, unsigned int flags);
+
+void d40_log_lli_lcla_write(struct d40_log_lli *lcla,
+ struct d40_log_lli *lli_dst,
+ struct d40_log_lli *lli_src,
+ int next, unsigned int flags);
+
+#endif /* STE_DMA40_LLI_H */
diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
new file mode 100644
index 000000000..7abcd7f28
--- /dev/null
+++ b/drivers/dma/stm32-dma.c
@@ -0,0 +1,1794 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for STM32 DMA controller
+ *
+ * Inspired by dma-jz4740.c and tegra20-apb-dma.c
+ *
+ * Copyright (C) M'boumba Cedric Madianga 2015
+ * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
+ * Pierre-Yves Mordret <pierre-yves.mordret@st.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "virt-dma.h"
+
+#define STM32_DMA_LISR 0x0000 /* DMA Low Int Status Reg */
+#define STM32_DMA_HISR 0x0004 /* DMA High Int Status Reg */
+#define STM32_DMA_ISR(n) (((n) & 4) ? STM32_DMA_HISR : STM32_DMA_LISR)
+#define STM32_DMA_LIFCR 0x0008 /* DMA Low Int Flag Clear Reg */
+#define STM32_DMA_HIFCR 0x000c /* DMA High Int Flag Clear Reg */
+#define STM32_DMA_IFCR(n) (((n) & 4) ? STM32_DMA_HIFCR : STM32_DMA_LIFCR)
+#define STM32_DMA_TCI BIT(5) /* Transfer Complete Interrupt */
+#define STM32_DMA_HTI BIT(4) /* Half Transfer Interrupt */
+#define STM32_DMA_TEI BIT(3) /* Transfer Error Interrupt */
+#define STM32_DMA_DMEI BIT(2) /* Direct Mode Error Interrupt */
+#define STM32_DMA_FEI BIT(0) /* FIFO Error Interrupt */
+#define STM32_DMA_MASKI (STM32_DMA_TCI \
+ | STM32_DMA_TEI \
+ | STM32_DMA_DMEI \
+ | STM32_DMA_FEI)
+/*
+ * If (chan->id % 4) is 2 or 3, left shift the mask by 16 bits;
+ * if (ch % 4) is 1 or 3, additionally left shift the mask by 6 bits.
+ */
+#define STM32_DMA_FLAGS_SHIFT(n) ({ typeof(n) (_n) = (n); \
+ (((_n) & 2) << 3) | (((_n) & 1) * 6); })
+
+/* DMA Stream x Configuration Register */
+#define STM32_DMA_SCR(x) (0x0010 + 0x18 * (x)) /* x = 0..7 */
+#define STM32_DMA_SCR_REQ_MASK GENMASK(27, 25)
+#define STM32_DMA_SCR_MBURST_MASK GENMASK(24, 23)
+#define STM32_DMA_SCR_PBURST_MASK GENMASK(22, 21)
+#define STM32_DMA_SCR_PL_MASK GENMASK(17, 16)
+#define STM32_DMA_SCR_MSIZE_MASK GENMASK(14, 13)
+#define STM32_DMA_SCR_PSIZE_MASK GENMASK(12, 11)
+#define STM32_DMA_SCR_DIR_MASK GENMASK(7, 6)
+#define STM32_DMA_SCR_TRBUFF BIT(20) /* Bufferable transfer for USART/UART */
+#define STM32_DMA_SCR_CT BIT(19) /* Target in double buffer */
+#define STM32_DMA_SCR_DBM BIT(18) /* Double Buffer Mode */
+#define STM32_DMA_SCR_PINCOS BIT(15) /* Peripheral inc offset size */
+#define STM32_DMA_SCR_MINC BIT(10) /* Memory increment mode */
+#define STM32_DMA_SCR_PINC BIT(9) /* Peripheral increment mode */
+#define STM32_DMA_SCR_CIRC BIT(8) /* Circular mode */
+#define STM32_DMA_SCR_PFCTRL BIT(5) /* Peripheral Flow Controller */
+#define STM32_DMA_SCR_TCIE BIT(4) /* Transfer Complete Int Enable
+ */
+#define STM32_DMA_SCR_TEIE BIT(2) /* Transfer Error Int Enable */
+#define STM32_DMA_SCR_DMEIE BIT(1) /* Direct Mode Err Int Enable */
+#define STM32_DMA_SCR_EN BIT(0) /* Stream Enable */
+#define STM32_DMA_SCR_CFG_MASK (STM32_DMA_SCR_PINC \
+ | STM32_DMA_SCR_MINC \
+ | STM32_DMA_SCR_PINCOS \
+ | STM32_DMA_SCR_PL_MASK)
+#define STM32_DMA_SCR_IRQ_MASK (STM32_DMA_SCR_TCIE \
+ | STM32_DMA_SCR_TEIE \
+ | STM32_DMA_SCR_DMEIE)
+
+/* DMA Stream x number of data register */
+#define STM32_DMA_SNDTR(x) (0x0014 + 0x18 * (x))
+
+/* DMA stream peripheral address register */
+#define STM32_DMA_SPAR(x) (0x0018 + 0x18 * (x))
+
+/* DMA stream x memory 0 address register */
+#define STM32_DMA_SM0AR(x) (0x001c + 0x18 * (x))
+
+/* DMA stream x memory 1 address register */
+#define STM32_DMA_SM1AR(x) (0x0020 + 0x18 * (x))
+
+/* DMA stream x FIFO control register */
+#define STM32_DMA_SFCR(x) (0x0024 + 0x18 * (x))
+#define STM32_DMA_SFCR_FTH_MASK GENMASK(1, 0)
+#define STM32_DMA_SFCR_FEIE BIT(7) /* FIFO error interrupt enable */
+#define STM32_DMA_SFCR_DMDIS BIT(2) /* Direct mode disable */
+#define STM32_DMA_SFCR_MASK (STM32_DMA_SFCR_FEIE \
+ | STM32_DMA_SFCR_DMDIS)
+
+/* DMA direction */
+#define STM32_DMA_DEV_TO_MEM 0x00
+#define STM32_DMA_MEM_TO_DEV 0x01
+#define STM32_DMA_MEM_TO_MEM 0x02
+
+/* DMA priority level */
+#define STM32_DMA_PRIORITY_LOW 0x00
+#define STM32_DMA_PRIORITY_MEDIUM 0x01
+#define STM32_DMA_PRIORITY_HIGH 0x02
+#define STM32_DMA_PRIORITY_VERY_HIGH 0x03
+
+/* DMA FIFO threshold selection */
+#define STM32_DMA_FIFO_THRESHOLD_1QUARTERFULL 0x00
+#define STM32_DMA_FIFO_THRESHOLD_HALFFULL 0x01
+#define STM32_DMA_FIFO_THRESHOLD_3QUARTERSFULL 0x02
+#define STM32_DMA_FIFO_THRESHOLD_FULL 0x03
+#define STM32_DMA_FIFO_THRESHOLD_NONE 0x04
+
+#define STM32_DMA_MAX_DATA_ITEMS 0xffff
+/*
+ * Valid transfer starts from @0 to @0xFFFE leading to unaligned scatter
+ * gather at boundary. Thus it's safer to round down this value on FIFO
+ * size (16 Bytes)
+ */
+#define STM32_DMA_ALIGNED_MAX_DATA_ITEMS \
+ ALIGN_DOWN(STM32_DMA_MAX_DATA_ITEMS, 16)
+#define STM32_DMA_MAX_CHANNELS 0x08
+#define STM32_DMA_MAX_REQUEST_ID 0x08
+#define STM32_DMA_MAX_DATA_PARAM 0x03
+#define STM32_DMA_FIFO_SIZE 16 /* FIFO is 16 bytes */
+#define STM32_DMA_MIN_BURST 4
+#define STM32_DMA_MAX_BURST 16
+
+/* DMA Features */
+#define STM32_DMA_THRESHOLD_FTR_MASK GENMASK(1, 0)
+#define STM32_DMA_DIRECT_MODE_MASK BIT(2)
+#define STM32_DMA_ALT_ACK_MODE_MASK BIT(4)
+#define STM32_DMA_MDMA_STREAM_ID_MASK GENMASK(19, 16)
+
+enum stm32_dma_width {
+ STM32_DMA_BYTE,
+ STM32_DMA_HALF_WORD,
+ STM32_DMA_WORD,
+};
+
+enum stm32_dma_burst_size {
+ STM32_DMA_BURST_SINGLE,
+ STM32_DMA_BURST_INCR4,
+ STM32_DMA_BURST_INCR8,
+ STM32_DMA_BURST_INCR16,
+};
+
+/**
+ * struct stm32_dma_cfg - STM32 DMA custom configuration
+ * @channel_id: channel ID
+ * @request_line: DMA request
+ * @stream_config: 32bit mask specifying the DMA channel configuration
+ * @features: 32bit mask specifying the DMA Feature list
+ */
+struct stm32_dma_cfg {
+ u32 channel_id;
+ u32 request_line;
+ u32 stream_config;
+ u32 features;
+};
+
+struct stm32_dma_chan_reg {
+ u32 dma_lisr;
+ u32 dma_hisr;
+ u32 dma_lifcr;
+ u32 dma_hifcr;
+ u32 dma_scr;
+ u32 dma_sndtr;
+ u32 dma_spar;
+ u32 dma_sm0ar;
+ u32 dma_sm1ar;
+ u32 dma_sfcr;
+};
+
+struct stm32_dma_sg_req {
+ u32 len;
+ struct stm32_dma_chan_reg chan_reg;
+};
+
+struct stm32_dma_desc {
+ struct virt_dma_desc vdesc;
+ bool cyclic;
+ u32 num_sgs;
+ struct stm32_dma_sg_req sg_req[];
+};
+
+/**
+ * struct stm32_dma_mdma_config - STM32 DMA MDMA configuration
+ * @stream_id: DMA request to trigger STM32 MDMA transfer
+ * @ifcr: DMA interrupt flag clear register address,
+ * used by STM32 MDMA to clear DMA Transfer Complete flag
+ * @tcf: DMA Transfer Complete flag
+ */
+struct stm32_dma_mdma_config {
+ u32 stream_id;
+ u32 ifcr;
+ u32 tcf;
+};
+
+struct stm32_dma_chan {
+ struct virt_dma_chan vchan;
+ bool config_init;
+ bool busy;
+ u32 id;
+ u32 irq;
+ struct stm32_dma_desc *desc;
+ u32 next_sg;
+ struct dma_slave_config dma_sconfig;
+ struct stm32_dma_chan_reg chan_reg;
+ u32 threshold;
+ u32 mem_burst;
+ u32 mem_width;
+ enum dma_status status;
+ bool trig_mdma;
+ struct stm32_dma_mdma_config mdma_config;
+};
+
+struct stm32_dma_device {
+ struct dma_device ddev;
+ void __iomem *base;
+ struct clk *clk;
+ bool mem2mem;
+ struct stm32_dma_chan chan[STM32_DMA_MAX_CHANNELS];
+};
+
+static struct stm32_dma_device *stm32_dma_get_dev(struct stm32_dma_chan *chan)
+{
+ return container_of(chan->vchan.chan.device, struct stm32_dma_device,
+ ddev);
+}
+
+static struct stm32_dma_chan *to_stm32_dma_chan(struct dma_chan *c)
+{
+ return container_of(c, struct stm32_dma_chan, vchan.chan);
+}
+
+static struct stm32_dma_desc *to_stm32_dma_desc(struct virt_dma_desc *vdesc)
+{
+ return container_of(vdesc, struct stm32_dma_desc, vdesc);
+}
+
+static struct device *chan2dev(struct stm32_dma_chan *chan)
+{
+ return &chan->vchan.chan.dev->device;
+}
+
+static u32 stm32_dma_read(struct stm32_dma_device *dmadev, u32 reg)
+{
+ return readl_relaxed(dmadev->base + reg);
+}
+
+static void stm32_dma_write(struct stm32_dma_device *dmadev, u32 reg, u32 val)
+{
+ writel_relaxed(val, dmadev->base + reg);
+}
+
+static int stm32_dma_get_width(struct stm32_dma_chan *chan,
+ enum dma_slave_buswidth width)
+{
+ switch (width) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ return STM32_DMA_BYTE;
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ return STM32_DMA_HALF_WORD;
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ return STM32_DMA_WORD;
+ default:
+ dev_err(chan2dev(chan), "Dma bus width not supported\n");
+ return -EINVAL;
+ }
+}
+
+static enum dma_slave_buswidth stm32_dma_get_max_width(u32 buf_len,
+ dma_addr_t buf_addr,
+ u32 threshold)
+{
+ enum dma_slave_buswidth max_width;
+
+ if (threshold == STM32_DMA_FIFO_THRESHOLD_FULL)
+ max_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ else
+ max_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+
+ while ((buf_len < max_width || buf_len % max_width) &&
+ max_width > DMA_SLAVE_BUSWIDTH_1_BYTE)
+ max_width = max_width >> 1;
+
+ if (buf_addr & (max_width - 1))
+ max_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+
+ return max_width;
+}
+
+static bool stm32_dma_fifo_threshold_is_allowed(u32 burst, u32 threshold,
+ enum dma_slave_buswidth width)
+{
+ u32 remaining;
+
+ if (threshold == STM32_DMA_FIFO_THRESHOLD_NONE)
+ return false;
+
+ if (width != DMA_SLAVE_BUSWIDTH_UNDEFINED) {
+ if (burst != 0) {
+ /*
+ * If number of beats fit in several whole bursts
+ * this configuration is allowed.
+ */
+ remaining = ((STM32_DMA_FIFO_SIZE / width) *
+ (threshold + 1) / 4) % burst;
+
+ if (remaining == 0)
+ return true;
+ } else {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool stm32_dma_is_burst_possible(u32 buf_len, u32 threshold)
+{
+ /* If FIFO direct mode, burst is not possible */
+ if (threshold == STM32_DMA_FIFO_THRESHOLD_NONE)
+ return false;
+
+ /*
+ * Buffer or period length has to be aligned on FIFO depth.
+ * Otherwise bytes may be stuck within FIFO at buffer or period
+ * length.
+ */
+ return ((buf_len % ((threshold + 1) * 4)) == 0);
+}
+
+static u32 stm32_dma_get_best_burst(u32 buf_len, u32 max_burst, u32 threshold,
+ enum dma_slave_buswidth width)
+{
+ u32 best_burst = max_burst;
+
+ if (best_burst == 1 || !stm32_dma_is_burst_possible(buf_len, threshold))
+ return 0;
+
+ while ((buf_len < best_burst * width && best_burst > 1) ||
+ !stm32_dma_fifo_threshold_is_allowed(best_burst, threshold,
+ width)) {
+ if (best_burst > STM32_DMA_MIN_BURST)
+ best_burst = best_burst >> 1;
+ else
+ best_burst = 0;
+ }
+
+ return best_burst;
+}
+
+static int stm32_dma_get_burst(struct stm32_dma_chan *chan, u32 maxburst)
+{
+ switch (maxburst) {
+ case 0:
+ case 1:
+ return STM32_DMA_BURST_SINGLE;
+ case 4:
+ return STM32_DMA_BURST_INCR4;
+ case 8:
+ return STM32_DMA_BURST_INCR8;
+ case 16:
+ return STM32_DMA_BURST_INCR16;
+ default:
+ dev_err(chan2dev(chan), "Dma burst size not supported\n");
+ return -EINVAL;
+ }
+}
+
+static void stm32_dma_set_fifo_config(struct stm32_dma_chan *chan,
+ u32 src_burst, u32 dst_burst)
+{
+ chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_MASK;
+ chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_DMEIE;
+
+ if (!src_burst && !dst_burst) {
+ /* Using direct mode */
+ chan->chan_reg.dma_scr |= STM32_DMA_SCR_DMEIE;
+ } else {
+ /* Using FIFO mode */
+ chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_MASK;
+ }
+}
+
+static int stm32_dma_slave_config(struct dma_chan *c,
+ struct dma_slave_config *config)
+{
+ struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+
+ memcpy(&chan->dma_sconfig, config, sizeof(*config));
+
+ /* Check if user is requesting DMA to trigger STM32 MDMA */
+ if (config->peripheral_size) {
+ config->peripheral_config = &chan->mdma_config;
+ config->peripheral_size = sizeof(chan->mdma_config);
+ chan->trig_mdma = true;
+ }
+
+ chan->config_init = true;
+
+ return 0;
+}
+
+static u32 stm32_dma_irq_status(struct stm32_dma_chan *chan)
+{
+ struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+ u32 flags, dma_isr;
+
+ /*
+ * Read "flags" from DMA_xISR register corresponding to the selected
+ * DMA channel at the correct bit offset inside that register.
+ */
+
+ dma_isr = stm32_dma_read(dmadev, STM32_DMA_ISR(chan->id));
+ flags = dma_isr >> STM32_DMA_FLAGS_SHIFT(chan->id);
+
+ return flags & STM32_DMA_MASKI;
+}
+
+static void stm32_dma_irq_clear(struct stm32_dma_chan *chan, u32 flags)
+{
+ struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+ u32 dma_ifcr;
+
+ /*
+ * Write "flags" to the DMA_xIFCR register corresponding to the selected
+ * DMA channel at the correct bit offset inside that register.
+ */
+ flags &= STM32_DMA_MASKI;
+ dma_ifcr = flags << STM32_DMA_FLAGS_SHIFT(chan->id);
+
+ stm32_dma_write(dmadev, STM32_DMA_IFCR(chan->id), dma_ifcr);
+}
+
+static int stm32_dma_disable_chan(struct stm32_dma_chan *chan)
+{
+ struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+ u32 dma_scr, id, reg;
+
+ id = chan->id;
+ reg = STM32_DMA_SCR(id);
+ dma_scr = stm32_dma_read(dmadev, reg);
+
+ if (dma_scr & STM32_DMA_SCR_EN) {
+ dma_scr &= ~STM32_DMA_SCR_EN;
+ stm32_dma_write(dmadev, reg, dma_scr);
+
+ return readl_relaxed_poll_timeout_atomic(dmadev->base + reg,
+ dma_scr, !(dma_scr & STM32_DMA_SCR_EN),
+ 10, 1000000);
+ }
+
+ return 0;
+}
+
+static void stm32_dma_stop(struct stm32_dma_chan *chan)
+{
+ struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+ u32 dma_scr, dma_sfcr, status;
+ int ret;
+
+ /* Disable interrupts */
+ dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
+ dma_scr &= ~STM32_DMA_SCR_IRQ_MASK;
+ stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), dma_scr);
+ dma_sfcr = stm32_dma_read(dmadev, STM32_DMA_SFCR(chan->id));
+ dma_sfcr &= ~STM32_DMA_SFCR_FEIE;
+ stm32_dma_write(dmadev, STM32_DMA_SFCR(chan->id), dma_sfcr);
+
+ /* Disable DMA */
+ ret = stm32_dma_disable_chan(chan);
+ if (ret < 0)
+ return;
+
+ /* Clear interrupt status if it is there */
+ status = stm32_dma_irq_status(chan);
+ if (status) {
+ dev_dbg(chan2dev(chan), "%s(): clearing interrupt: 0x%08x\n",
+ __func__, status);
+ stm32_dma_irq_clear(chan, status);
+ }
+
+ chan->busy = false;
+ chan->status = DMA_COMPLETE;
+}
+
+static int stm32_dma_terminate_all(struct dma_chan *c)
+{
+ struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+
+ if (chan->desc) {
+ dma_cookie_complete(&chan->desc->vdesc.tx);
+ vchan_terminate_vdesc(&chan->desc->vdesc);
+ if (chan->busy)
+ stm32_dma_stop(chan);
+ chan->desc = NULL;
+ }
+
+ vchan_get_all_descriptors(&chan->vchan, &head);
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+ vchan_dma_desc_free_list(&chan->vchan, &head);
+
+ return 0;
+}
+
+static void stm32_dma_synchronize(struct dma_chan *c)
+{
+ struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+
+ vchan_synchronize(&chan->vchan);
+}
+
+static void stm32_dma_dump_reg(struct stm32_dma_chan *chan)
+{
+ struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+ u32 scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
+ u32 ndtr = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id));
+ u32 spar = stm32_dma_read(dmadev, STM32_DMA_SPAR(chan->id));
+ u32 sm0ar = stm32_dma_read(dmadev, STM32_DMA_SM0AR(chan->id));
+ u32 sm1ar = stm32_dma_read(dmadev, STM32_DMA_SM1AR(chan->id));
+ u32 sfcr = stm32_dma_read(dmadev, STM32_DMA_SFCR(chan->id));
+
+ dev_dbg(chan2dev(chan), "SCR: 0x%08x\n", scr);
+ dev_dbg(chan2dev(chan), "NDTR: 0x%08x\n", ndtr);
+ dev_dbg(chan2dev(chan), "SPAR: 0x%08x\n", spar);
+ dev_dbg(chan2dev(chan), "SM0AR: 0x%08x\n", sm0ar);
+ dev_dbg(chan2dev(chan), "SM1AR: 0x%08x\n", sm1ar);
+ dev_dbg(chan2dev(chan), "SFCR: 0x%08x\n", sfcr);
+}
+
+static void stm32_dma_sg_inc(struct stm32_dma_chan *chan)
+{
+ chan->next_sg++;
+ if (chan->desc->cyclic && (chan->next_sg == chan->desc->num_sgs))
+ chan->next_sg = 0;
+}
+
+static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan);
+
+static void stm32_dma_start_transfer(struct stm32_dma_chan *chan)
+{
+ struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+ struct virt_dma_desc *vdesc;
+ struct stm32_dma_sg_req *sg_req;
+ struct stm32_dma_chan_reg *reg;
+ u32 status;
+ int ret;
+
+ ret = stm32_dma_disable_chan(chan);
+ if (ret < 0)
+ return;
+
+ if (!chan->desc) {
+ vdesc = vchan_next_desc(&chan->vchan);
+ if (!vdesc)
+ return;
+
+ list_del(&vdesc->node);
+
+ chan->desc = to_stm32_dma_desc(vdesc);
+ chan->next_sg = 0;
+ }
+
+ if (chan->next_sg == chan->desc->num_sgs)
+ chan->next_sg = 0;
+
+ sg_req = &chan->desc->sg_req[chan->next_sg];
+ reg = &sg_req->chan_reg;
+
+ /* When DMA triggers STM32 MDMA, DMA Transfer Complete is managed by STM32 MDMA */
+ if (chan->trig_mdma && chan->dma_sconfig.direction != DMA_MEM_TO_DEV)
+ reg->dma_scr &= ~STM32_DMA_SCR_TCIE;
+
+ reg->dma_scr &= ~STM32_DMA_SCR_EN;
+ stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr);
+ stm32_dma_write(dmadev, STM32_DMA_SPAR(chan->id), reg->dma_spar);
+ stm32_dma_write(dmadev, STM32_DMA_SM0AR(chan->id), reg->dma_sm0ar);
+ stm32_dma_write(dmadev, STM32_DMA_SFCR(chan->id), reg->dma_sfcr);
+ stm32_dma_write(dmadev, STM32_DMA_SM1AR(chan->id), reg->dma_sm1ar);
+ stm32_dma_write(dmadev, STM32_DMA_SNDTR(chan->id), reg->dma_sndtr);
+
+ stm32_dma_sg_inc(chan);
+
+ /* Clear interrupt status if it is there */
+ status = stm32_dma_irq_status(chan);
+ if (status)
+ stm32_dma_irq_clear(chan, status);
+
+ if (chan->desc->cyclic)
+ stm32_dma_configure_next_sg(chan);
+
+ stm32_dma_dump_reg(chan);
+
+ /* Start DMA */
+ chan->busy = true;
+ chan->status = DMA_IN_PROGRESS;
+ reg->dma_scr |= STM32_DMA_SCR_EN;
+ stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr);
+
+ dev_dbg(chan2dev(chan), "vchan %pK: started\n", &chan->vchan);
+}
+
+static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan)
+{
+ struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+ struct stm32_dma_sg_req *sg_req;
+ u32 dma_scr, dma_sm0ar, dma_sm1ar, id;
+
+ id = chan->id;
+ dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(id));
+
+ sg_req = &chan->desc->sg_req[chan->next_sg];
+
+ if (dma_scr & STM32_DMA_SCR_CT) {
+ dma_sm0ar = sg_req->chan_reg.dma_sm0ar;
+ stm32_dma_write(dmadev, STM32_DMA_SM0AR(id), dma_sm0ar);
+ dev_dbg(chan2dev(chan), "CT=1 <=> SM0AR: 0x%08x\n",
+ stm32_dma_read(dmadev, STM32_DMA_SM0AR(id)));
+ } else {
+ dma_sm1ar = sg_req->chan_reg.dma_sm1ar;
+ stm32_dma_write(dmadev, STM32_DMA_SM1AR(id), dma_sm1ar);
+ dev_dbg(chan2dev(chan), "CT=0 <=> SM1AR: 0x%08x\n",
+ stm32_dma_read(dmadev, STM32_DMA_SM1AR(id)));
+ }
+}
+
+static void stm32_dma_handle_chan_paused(struct stm32_dma_chan *chan)
+{
+ struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+ u32 dma_scr;
+
+ /*
+ * Read and store current remaining data items and peripheral/memory addresses to be
+ * updated on resume
+ */
+ dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
+ /*
+ * Transfer can be paused while between a previous resume and reconfiguration on transfer
+ * complete. If transfer is cyclic and CIRC and DBM have been deactivated for resume, need
+ * to set it here in SCR backup to ensure a good reconfiguration on transfer complete.
+ */
+ if (chan->desc && chan->desc->cyclic) {
+ if (chan->desc->num_sgs == 1)
+ dma_scr |= STM32_DMA_SCR_CIRC;
+ else
+ dma_scr |= STM32_DMA_SCR_DBM;
+ }
+ chan->chan_reg.dma_scr = dma_scr;
+
+ /*
+ * Need to temporarily deactivate CIRC/DBM until next Transfer Complete interrupt, otherwise
+ * on resume NDTR autoreload value will be wrong (lower than the initial period length)
+ */
+ if (chan->desc && chan->desc->cyclic) {
+ dma_scr &= ~(STM32_DMA_SCR_DBM | STM32_DMA_SCR_CIRC);
+ stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), dma_scr);
+ }
+
+ chan->chan_reg.dma_sndtr = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id));
+
+ chan->status = DMA_PAUSED;
+
+ dev_dbg(chan2dev(chan), "vchan %pK: paused\n", &chan->vchan);
+}
+
+static void stm32_dma_post_resume_reconfigure(struct stm32_dma_chan *chan)
+{
+ struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+ struct stm32_dma_sg_req *sg_req;
+ u32 dma_scr, status, id;
+
+ id = chan->id;
+ dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(id));
+
+ /* Clear interrupt status if it is there */
+ status = stm32_dma_irq_status(chan);
+ if (status)
+ stm32_dma_irq_clear(chan, status);
+
+ if (!chan->next_sg)
+ sg_req = &chan->desc->sg_req[chan->desc->num_sgs - 1];
+ else
+ sg_req = &chan->desc->sg_req[chan->next_sg - 1];
+
+ /* Reconfigure NDTR with the initial value */
+ stm32_dma_write(dmadev, STM32_DMA_SNDTR(chan->id), sg_req->chan_reg.dma_sndtr);
+
+ /* Restore SPAR */
+ stm32_dma_write(dmadev, STM32_DMA_SPAR(id), sg_req->chan_reg.dma_spar);
+
+ /* Restore SM0AR/SM1AR whatever DBM/CT as they may have been modified */
+ stm32_dma_write(dmadev, STM32_DMA_SM0AR(id), sg_req->chan_reg.dma_sm0ar);
+ stm32_dma_write(dmadev, STM32_DMA_SM1AR(id), sg_req->chan_reg.dma_sm1ar);
+
+ /* Reactivate CIRC/DBM if needed */
+ if (chan->chan_reg.dma_scr & STM32_DMA_SCR_DBM) {
+ dma_scr |= STM32_DMA_SCR_DBM;
+ /* Restore CT */
+ if (chan->chan_reg.dma_scr & STM32_DMA_SCR_CT)
+ dma_scr &= ~STM32_DMA_SCR_CT;
+ else
+ dma_scr |= STM32_DMA_SCR_CT;
+ } else if (chan->chan_reg.dma_scr & STM32_DMA_SCR_CIRC) {
+ dma_scr |= STM32_DMA_SCR_CIRC;
+ }
+ stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), dma_scr);
+
+ stm32_dma_configure_next_sg(chan);
+
+ stm32_dma_dump_reg(chan);
+
+ dma_scr |= STM32_DMA_SCR_EN;
+ stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), dma_scr);
+
+ dev_dbg(chan2dev(chan), "vchan %pK: reconfigured after pause/resume\n", &chan->vchan);
+}
+
+static void stm32_dma_handle_chan_done(struct stm32_dma_chan *chan, u32 scr)
+{
+ if (!chan->desc)
+ return;
+
+ if (chan->desc->cyclic) {
+ vchan_cyclic_callback(&chan->desc->vdesc);
+ if (chan->trig_mdma)
+ return;
+ stm32_dma_sg_inc(chan);
+ /* cyclic while CIRC/DBM disable => post resume reconfiguration needed */
+ if (!(scr & (STM32_DMA_SCR_CIRC | STM32_DMA_SCR_DBM)))
+ stm32_dma_post_resume_reconfigure(chan);
+ else if (scr & STM32_DMA_SCR_DBM)
+ stm32_dma_configure_next_sg(chan);
+ } else {
+ chan->busy = false;
+ chan->status = DMA_COMPLETE;
+ if (chan->next_sg == chan->desc->num_sgs) {
+ vchan_cookie_complete(&chan->desc->vdesc);
+ chan->desc = NULL;
+ }
+ stm32_dma_start_transfer(chan);
+ }
+}
+
+static irqreturn_t stm32_dma_chan_irq(int irq, void *devid)
+{
+ struct stm32_dma_chan *chan = devid;
+ struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+ u32 status, scr, sfcr;
+
+ spin_lock(&chan->vchan.lock);
+
+ status = stm32_dma_irq_status(chan);
+ scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
+ sfcr = stm32_dma_read(dmadev, STM32_DMA_SFCR(chan->id));
+
+ if (status & STM32_DMA_FEI) {
+ stm32_dma_irq_clear(chan, STM32_DMA_FEI);
+ status &= ~STM32_DMA_FEI;
+ if (sfcr & STM32_DMA_SFCR_FEIE) {
+ if (!(scr & STM32_DMA_SCR_EN) &&
+ !(status & STM32_DMA_TCI))
+ dev_err(chan2dev(chan), "FIFO Error\n");
+ else
+ dev_dbg(chan2dev(chan), "FIFO over/underrun\n");
+ }
+ }
+ if (status & STM32_DMA_DMEI) {
+ stm32_dma_irq_clear(chan, STM32_DMA_DMEI);
+ status &= ~STM32_DMA_DMEI;
+ if (sfcr & STM32_DMA_SCR_DMEIE)
+ dev_dbg(chan2dev(chan), "Direct mode overrun\n");
+ }
+
+ if (status & STM32_DMA_TCI) {
+ stm32_dma_irq_clear(chan, STM32_DMA_TCI);
+ if (scr & STM32_DMA_SCR_TCIE) {
+ if (chan->status != DMA_PAUSED)
+ stm32_dma_handle_chan_done(chan, scr);
+ }
+ status &= ~STM32_DMA_TCI;
+ }
+
+ if (status & STM32_DMA_HTI) {
+ stm32_dma_irq_clear(chan, STM32_DMA_HTI);
+ status &= ~STM32_DMA_HTI;
+ }
+
+ if (status) {
+ stm32_dma_irq_clear(chan, status);
+ dev_err(chan2dev(chan), "DMA error: status=0x%08x\n", status);
+ if (!(scr & STM32_DMA_SCR_EN))
+ dev_err(chan2dev(chan), "chan disabled by HW\n");
+ }
+
+ spin_unlock(&chan->vchan.lock);
+
+ return IRQ_HANDLED;
+}
+
+static void stm32_dma_issue_pending(struct dma_chan *c)
+{
+ struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+ if (vchan_issue_pending(&chan->vchan) && !chan->desc && !chan->busy) {
+ dev_dbg(chan2dev(chan), "vchan %pK: issued\n", &chan->vchan);
+ stm32_dma_start_transfer(chan);
+
+ }
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+}
+
+static int stm32_dma_pause(struct dma_chan *c)
+{
+ struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+ unsigned long flags;
+ int ret;
+
+ if (chan->status != DMA_IN_PROGRESS)
+ return -EPERM;
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+
+ ret = stm32_dma_disable_chan(chan);
+ if (!ret)
+ stm32_dma_handle_chan_paused(chan);
+
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+ return ret;
+}
+
+static int stm32_dma_resume(struct dma_chan *c)
+{
+ struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+ struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+ struct stm32_dma_chan_reg chan_reg = chan->chan_reg;
+ u32 id = chan->id, scr, ndtr, offset, spar, sm0ar, sm1ar;
+ struct stm32_dma_sg_req *sg_req;
+ unsigned long flags;
+
+ if (chan->status != DMA_PAUSED)
+ return -EPERM;
+
+ scr = stm32_dma_read(dmadev, STM32_DMA_SCR(id));
+ if (WARN_ON(scr & STM32_DMA_SCR_EN))
+ return -EPERM;
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+
+ /* sg_reg[prev_sg] contains original ndtr, sm0ar and sm1ar before pausing the transfer */
+ if (!chan->next_sg)
+ sg_req = &chan->desc->sg_req[chan->desc->num_sgs - 1];
+ else
+ sg_req = &chan->desc->sg_req[chan->next_sg - 1];
+
+ ndtr = sg_req->chan_reg.dma_sndtr;
+ offset = (ndtr - chan_reg.dma_sndtr);
+ offset <<= FIELD_GET(STM32_DMA_SCR_PSIZE_MASK, chan_reg.dma_scr);
+ spar = sg_req->chan_reg.dma_spar;
+ sm0ar = sg_req->chan_reg.dma_sm0ar;
+ sm1ar = sg_req->chan_reg.dma_sm1ar;
+
+ /*
+ * The peripheral and/or memory addresses have to be updated in order to adjust the
+ * address pointers. Need to check increment.
+ */
+ if (chan_reg.dma_scr & STM32_DMA_SCR_PINC)
+ stm32_dma_write(dmadev, STM32_DMA_SPAR(id), spar + offset);
+ else
+ stm32_dma_write(dmadev, STM32_DMA_SPAR(id), spar);
+
+ if (!(chan_reg.dma_scr & STM32_DMA_SCR_MINC))
+ offset = 0;
+
+ /*
+ * In case of DBM, the current target could be SM1AR.
+ * Need to temporarily deactivate CIRC/DBM to finish the current transfer, so
+ * SM0AR becomes the current target and must be updated with SM1AR + offset if CT=1.
+ */
+ if ((chan_reg.dma_scr & STM32_DMA_SCR_DBM) && (chan_reg.dma_scr & STM32_DMA_SCR_CT))
+ stm32_dma_write(dmadev, STM32_DMA_SM1AR(id), sm1ar + offset);
+ else
+ stm32_dma_write(dmadev, STM32_DMA_SM0AR(id), sm0ar + offset);
+
+ /* NDTR must be restored otherwise internal HW counter won't be correctly reset */
+ stm32_dma_write(dmadev, STM32_DMA_SNDTR(id), chan_reg.dma_sndtr);
+
+ /*
+ * Need to temporarily deactivate CIRC/DBM until next Transfer Complete interrupt,
+ * otherwise NDTR autoreload value will be wrong (lower than the initial period length)
+ */
+ if (chan_reg.dma_scr & (STM32_DMA_SCR_CIRC | STM32_DMA_SCR_DBM))
+ chan_reg.dma_scr &= ~(STM32_DMA_SCR_CIRC | STM32_DMA_SCR_DBM);
+
+ if (chan_reg.dma_scr & STM32_DMA_SCR_DBM)
+ stm32_dma_configure_next_sg(chan);
+
+ stm32_dma_dump_reg(chan);
+
+ /* The stream may then be re-enabled to restart transfer from the point it was stopped */
+ chan->status = DMA_IN_PROGRESS;
+ chan_reg.dma_scr |= STM32_DMA_SCR_EN;
+ stm32_dma_write(dmadev, STM32_DMA_SCR(id), chan_reg.dma_scr);
+
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+ dev_dbg(chan2dev(chan), "vchan %pK: resumed\n", &chan->vchan);
+
+ return 0;
+}
+
+static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
+ enum dma_transfer_direction direction,
+ enum dma_slave_buswidth *buswidth,
+ u32 buf_len, dma_addr_t buf_addr)
+{
+ enum dma_slave_buswidth src_addr_width, dst_addr_width;
+ int src_bus_width, dst_bus_width;
+ int src_burst_size, dst_burst_size;
+ u32 src_maxburst, dst_maxburst, src_best_burst, dst_best_burst;
+ u32 dma_scr, fifoth;
+
+ src_addr_width = chan->dma_sconfig.src_addr_width;
+ dst_addr_width = chan->dma_sconfig.dst_addr_width;
+ src_maxburst = chan->dma_sconfig.src_maxburst;
+ dst_maxburst = chan->dma_sconfig.dst_maxburst;
+ fifoth = chan->threshold;
+
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+ /* Set device data size */
+ dst_bus_width = stm32_dma_get_width(chan, dst_addr_width);
+ if (dst_bus_width < 0)
+ return dst_bus_width;
+
+ /* Set device burst size */
+ dst_best_burst = stm32_dma_get_best_burst(buf_len,
+ dst_maxburst,
+ fifoth,
+ dst_addr_width);
+
+ dst_burst_size = stm32_dma_get_burst(chan, dst_best_burst);
+ if (dst_burst_size < 0)
+ return dst_burst_size;
+
+ /* Set memory data size */
+ src_addr_width = stm32_dma_get_max_width(buf_len, buf_addr,
+ fifoth);
+ chan->mem_width = src_addr_width;
+ src_bus_width = stm32_dma_get_width(chan, src_addr_width);
+ if (src_bus_width < 0)
+ return src_bus_width;
+
+ /*
+ * Set memory burst size - burst not possible if address is not aligned on
+ * the address boundary equal to the size of the transfer
+ */
+ if (buf_addr & (buf_len - 1))
+ src_maxburst = 1;
+ else
+ src_maxburst = STM32_DMA_MAX_BURST;
+ src_best_burst = stm32_dma_get_best_burst(buf_len,
+ src_maxburst,
+ fifoth,
+ src_addr_width);
+ src_burst_size = stm32_dma_get_burst(chan, src_best_burst);
+ if (src_burst_size < 0)
+ return src_burst_size;
+
+ dma_scr = FIELD_PREP(STM32_DMA_SCR_DIR_MASK, STM32_DMA_MEM_TO_DEV) |
+ FIELD_PREP(STM32_DMA_SCR_PSIZE_MASK, dst_bus_width) |
+ FIELD_PREP(STM32_DMA_SCR_MSIZE_MASK, src_bus_width) |
+ FIELD_PREP(STM32_DMA_SCR_PBURST_MASK, dst_burst_size) |
+ FIELD_PREP(STM32_DMA_SCR_MBURST_MASK, src_burst_size);
+
+ /* Set FIFO threshold */
+ chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK;
+ if (fifoth != STM32_DMA_FIFO_THRESHOLD_NONE)
+ chan->chan_reg.dma_sfcr |= FIELD_PREP(STM32_DMA_SFCR_FTH_MASK, fifoth);
+
+ /* Set peripheral address */
+ chan->chan_reg.dma_spar = chan->dma_sconfig.dst_addr;
+ *buswidth = dst_addr_width;
+ break;
+
+ case DMA_DEV_TO_MEM:
+ /* Set device data size */
+ src_bus_width = stm32_dma_get_width(chan, src_addr_width);
+ if (src_bus_width < 0)
+ return src_bus_width;
+
+ /* Set device burst size */
+ src_best_burst = stm32_dma_get_best_burst(buf_len,
+ src_maxburst,
+ fifoth,
+ src_addr_width);
+ chan->mem_burst = src_best_burst;
+ src_burst_size = stm32_dma_get_burst(chan, src_best_burst);
+ if (src_burst_size < 0)
+ return src_burst_size;
+
+ /* Set memory data size */
+ dst_addr_width = stm32_dma_get_max_width(buf_len, buf_addr,
+ fifoth);
+ chan->mem_width = dst_addr_width;
+ dst_bus_width = stm32_dma_get_width(chan, dst_addr_width);
+ if (dst_bus_width < 0)
+ return dst_bus_width;
+
+ /*
+ * Set memory burst size - burst not possible if address is not aligned on
+ * the address boundary equal to the size of the transfer
+ */
+ if (buf_addr & (buf_len - 1))
+ dst_maxburst = 1;
+ else
+ dst_maxburst = STM32_DMA_MAX_BURST;
+ dst_best_burst = stm32_dma_get_best_burst(buf_len,
+ dst_maxburst,
+ fifoth,
+ dst_addr_width);
+ chan->mem_burst = dst_best_burst;
+ dst_burst_size = stm32_dma_get_burst(chan, dst_best_burst);
+ if (dst_burst_size < 0)
+ return dst_burst_size;
+
+ dma_scr = FIELD_PREP(STM32_DMA_SCR_DIR_MASK, STM32_DMA_DEV_TO_MEM) |
+ FIELD_PREP(STM32_DMA_SCR_PSIZE_MASK, src_bus_width) |
+ FIELD_PREP(STM32_DMA_SCR_MSIZE_MASK, dst_bus_width) |
+ FIELD_PREP(STM32_DMA_SCR_PBURST_MASK, src_burst_size) |
+ FIELD_PREP(STM32_DMA_SCR_MBURST_MASK, dst_burst_size);
+
+ /* Set FIFO threshold */
+ chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK;
+ if (fifoth != STM32_DMA_FIFO_THRESHOLD_NONE)
+ chan->chan_reg.dma_sfcr |= FIELD_PREP(STM32_DMA_SFCR_FTH_MASK, fifoth);
+
+ /* Set peripheral address */
+ chan->chan_reg.dma_spar = chan->dma_sconfig.src_addr;
+ *buswidth = chan->dma_sconfig.src_addr_width;
+ break;
+
+ default:
+ dev_err(chan2dev(chan), "Dma direction is not supported\n");
+ return -EINVAL;
+ }
+
+ stm32_dma_set_fifo_config(chan, src_best_burst, dst_best_burst);
+
+ /* Set DMA control register */
+ chan->chan_reg.dma_scr &= ~(STM32_DMA_SCR_DIR_MASK |
+ STM32_DMA_SCR_PSIZE_MASK | STM32_DMA_SCR_MSIZE_MASK |
+ STM32_DMA_SCR_PBURST_MASK | STM32_DMA_SCR_MBURST_MASK);
+ chan->chan_reg.dma_scr |= dma_scr;
+
+ return 0;
+}
+
+static void stm32_dma_clear_reg(struct stm32_dma_chan_reg *regs)
+{
+ memset(regs, 0, sizeof(struct stm32_dma_chan_reg));
+}
+
+static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg(
+ struct dma_chan *c, struct scatterlist *sgl,
+ u32 sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+ struct stm32_dma_desc *desc;
+ struct scatterlist *sg;
+ enum dma_slave_buswidth buswidth;
+ u32 nb_data_items;
+ int i, ret;
+
+ if (!chan->config_init) {
+ dev_err(chan2dev(chan), "dma channel is not configured\n");
+ return NULL;
+ }
+
+ if (sg_len < 1) {
+ dev_err(chan2dev(chan), "Invalid segment length %d\n", sg_len);
+ return NULL;
+ }
+
+ desc = kzalloc(struct_size(desc, sg_req, sg_len), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ /* Set peripheral flow controller */
+ if (chan->dma_sconfig.device_fc)
+ chan->chan_reg.dma_scr |= STM32_DMA_SCR_PFCTRL;
+ else
+ chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL;
+
+ /* Activate Double Buffer Mode if DMA triggers STM32 MDMA and more than 1 sg */
+ if (chan->trig_mdma && sg_len > 1) {
+ chan->chan_reg.dma_scr |= STM32_DMA_SCR_DBM;
+ chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_CT;
+ }
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ ret = stm32_dma_set_xfer_param(chan, direction, &buswidth,
+ sg_dma_len(sg),
+ sg_dma_address(sg));
+ if (ret < 0)
+ goto err;
+
+ desc->sg_req[i].len = sg_dma_len(sg);
+
+ nb_data_items = desc->sg_req[i].len / buswidth;
+ if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) {
+ dev_err(chan2dev(chan), "nb items not supported\n");
+ goto err;
+ }
+
+ stm32_dma_clear_reg(&desc->sg_req[i].chan_reg);
+ desc->sg_req[i].chan_reg.dma_scr = chan->chan_reg.dma_scr;
+ desc->sg_req[i].chan_reg.dma_sfcr = chan->chan_reg.dma_sfcr;
+ desc->sg_req[i].chan_reg.dma_spar = chan->chan_reg.dma_spar;
+ desc->sg_req[i].chan_reg.dma_sm0ar = sg_dma_address(sg);
+ desc->sg_req[i].chan_reg.dma_sm1ar = sg_dma_address(sg);
+ if (chan->trig_mdma)
+ desc->sg_req[i].chan_reg.dma_sm1ar += sg_dma_len(sg);
+ desc->sg_req[i].chan_reg.dma_sndtr = nb_data_items;
+ }
+
+ desc->num_sgs = sg_len;
+ desc->cyclic = false;
+
+ return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+
+err:
+ kfree(desc);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *stm32_dma_prep_dma_cyclic(
+ struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+ struct stm32_dma_desc *desc;
+ enum dma_slave_buswidth buswidth;
+ u32 num_periods, nb_data_items;
+ int i, ret;
+
+ if (!buf_len || !period_len) {
+ dev_err(chan2dev(chan), "Invalid buffer/period len\n");
+ return NULL;
+ }
+
+ if (!chan->config_init) {
+ dev_err(chan2dev(chan), "dma channel is not configured\n");
+ return NULL;
+ }
+
+ if (buf_len % period_len) {
+ dev_err(chan2dev(chan), "buf_len not multiple of period_len\n");
+ return NULL;
+ }
+
+ /*
+ * We allow to take more number of requests till DMA is
+ * not started. The driver will loop over all requests.
+ * Once DMA is started then new requests can be queued only after
+ * terminating the DMA.
+ */
+ if (chan->busy) {
+ dev_err(chan2dev(chan), "Request not allowed when dma busy\n");
+ return NULL;
+ }
+
+ ret = stm32_dma_set_xfer_param(chan, direction, &buswidth, period_len,
+ buf_addr);
+ if (ret < 0)
+ return NULL;
+
+ nb_data_items = period_len / buswidth;
+ if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) {
+ dev_err(chan2dev(chan), "number of items not supported\n");
+ return NULL;
+ }
+
+ /* Enable Circular mode or double buffer mode */
+ if (buf_len == period_len) {
+ chan->chan_reg.dma_scr |= STM32_DMA_SCR_CIRC;
+ } else {
+ chan->chan_reg.dma_scr |= STM32_DMA_SCR_DBM;
+ chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_CT;
+ }
+
+ /* Clear periph ctrl if client set it */
+ chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL;
+
+ num_periods = buf_len / period_len;
+
+ desc = kzalloc(struct_size(desc, sg_req, num_periods), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ for (i = 0; i < num_periods; i++) {
+ desc->sg_req[i].len = period_len;
+
+ stm32_dma_clear_reg(&desc->sg_req[i].chan_reg);
+ desc->sg_req[i].chan_reg.dma_scr = chan->chan_reg.dma_scr;
+ desc->sg_req[i].chan_reg.dma_sfcr = chan->chan_reg.dma_sfcr;
+ desc->sg_req[i].chan_reg.dma_spar = chan->chan_reg.dma_spar;
+ desc->sg_req[i].chan_reg.dma_sm0ar = buf_addr;
+ desc->sg_req[i].chan_reg.dma_sm1ar = buf_addr;
+ if (chan->trig_mdma)
+ desc->sg_req[i].chan_reg.dma_sm1ar += period_len;
+ desc->sg_req[i].chan_reg.dma_sndtr = nb_data_items;
+ if (!chan->trig_mdma)
+ buf_addr += period_len;
+ }
+
+ desc->num_sgs = num_periods;
+ desc->cyclic = true;
+
+ return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy(
+ struct dma_chan *c, dma_addr_t dest,
+ dma_addr_t src, size_t len, unsigned long flags)
+{
+ struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+ enum dma_slave_buswidth max_width;
+ struct stm32_dma_desc *desc;
+ size_t xfer_count, offset;
+ u32 num_sgs, best_burst, threshold;
+ int dma_burst, i;
+
+ num_sgs = DIV_ROUND_UP(len, STM32_DMA_ALIGNED_MAX_DATA_ITEMS);
+ desc = kzalloc(struct_size(desc, sg_req, num_sgs), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ threshold = chan->threshold;
+
+ for (offset = 0, i = 0; offset < len; offset += xfer_count, i++) {
+ xfer_count = min_t(size_t, len - offset,
+ STM32_DMA_ALIGNED_MAX_DATA_ITEMS);
+
+ /* Compute best burst size */
+ max_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+ best_burst = stm32_dma_get_best_burst(len, STM32_DMA_MAX_BURST,
+ threshold, max_width);
+ dma_burst = stm32_dma_get_burst(chan, best_burst);
+ if (dma_burst < 0) {
+ kfree(desc);
+ return NULL;
+ }
+
+ stm32_dma_clear_reg(&desc->sg_req[i].chan_reg);
+ desc->sg_req[i].chan_reg.dma_scr =
+ FIELD_PREP(STM32_DMA_SCR_DIR_MASK, STM32_DMA_MEM_TO_MEM) |
+ FIELD_PREP(STM32_DMA_SCR_PBURST_MASK, dma_burst) |
+ FIELD_PREP(STM32_DMA_SCR_MBURST_MASK, dma_burst) |
+ STM32_DMA_SCR_MINC |
+ STM32_DMA_SCR_PINC |
+ STM32_DMA_SCR_TCIE |
+ STM32_DMA_SCR_TEIE;
+ desc->sg_req[i].chan_reg.dma_sfcr |= STM32_DMA_SFCR_MASK;
+ desc->sg_req[i].chan_reg.dma_sfcr |= FIELD_PREP(STM32_DMA_SFCR_FTH_MASK, threshold);
+ desc->sg_req[i].chan_reg.dma_spar = src + offset;
+ desc->sg_req[i].chan_reg.dma_sm0ar = dest + offset;
+ desc->sg_req[i].chan_reg.dma_sndtr = xfer_count;
+ desc->sg_req[i].len = xfer_count;
+ }
+
+ desc->num_sgs = num_sgs;
+ desc->cyclic = false;
+
+ return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static u32 stm32_dma_get_remaining_bytes(struct stm32_dma_chan *chan)
+{
+ u32 dma_scr, width, ndtr;
+ struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+
+ dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
+ width = FIELD_GET(STM32_DMA_SCR_PSIZE_MASK, dma_scr);
+ ndtr = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id));
+
+ return ndtr << width;
+}
+
+/**
+ * stm32_dma_is_current_sg - check that expected sg_req is currently transferred
+ * @chan: dma channel
+ *
+ * This function called when IRQ are disable, checks that the hardware has not
+ * switched on the next transfer in double buffer mode. The test is done by
+ * comparing the next_sg memory address with the hardware related register
+ * (based on CT bit value).
+ *
+ * Returns true if expected current transfer is still running or double
+ * buffer mode is not activated.
+ */
+static bool stm32_dma_is_current_sg(struct stm32_dma_chan *chan)
+{
+ struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+ struct stm32_dma_sg_req *sg_req;
+ u32 dma_scr, dma_smar, id, period_len;
+
+ id = chan->id;
+ dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(id));
+
+ /* In cyclic CIRC but not DBM, CT is not used */
+ if (!(dma_scr & STM32_DMA_SCR_DBM))
+ return true;
+
+ sg_req = &chan->desc->sg_req[chan->next_sg];
+ period_len = sg_req->len;
+
+ /* DBM - take care of a previous pause/resume not yet post reconfigured */
+ if (dma_scr & STM32_DMA_SCR_CT) {
+ dma_smar = stm32_dma_read(dmadev, STM32_DMA_SM0AR(id));
+ /*
+ * If transfer has been pause/resumed,
+ * SM0AR is in the range of [SM0AR:SM0AR+period_len]
+ */
+ return (dma_smar >= sg_req->chan_reg.dma_sm0ar &&
+ dma_smar < sg_req->chan_reg.dma_sm0ar + period_len);
+ }
+
+ dma_smar = stm32_dma_read(dmadev, STM32_DMA_SM1AR(id));
+ /*
+ * If transfer has been pause/resumed,
+ * SM1AR is in the range of [SM1AR:SM1AR+period_len]
+ */
+ return (dma_smar >= sg_req->chan_reg.dma_sm1ar &&
+ dma_smar < sg_req->chan_reg.dma_sm1ar + period_len);
+}
+
+static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan,
+ struct stm32_dma_desc *desc,
+ u32 next_sg)
+{
+ u32 modulo, burst_size;
+ u32 residue;
+ u32 n_sg = next_sg;
+ struct stm32_dma_sg_req *sg_req = &chan->desc->sg_req[chan->next_sg];
+ int i;
+
+ /*
+ * Calculate the residue means compute the descriptors
+ * information:
+ * - the sg_req currently transferred
+ * - the Hardware remaining position in this sg (NDTR bits field).
+ *
+ * A race condition may occur if DMA is running in cyclic or double
+ * buffer mode, since the DMA register are automatically reloaded at end
+ * of period transfer. The hardware may have switched to the next
+ * transfer (CT bit updated) just before the position (SxNDTR reg) is
+ * read.
+ * In this case the SxNDTR reg could (or not) correspond to the new
+ * transfer position, and not the expected one.
+ * The strategy implemented in the stm32 driver is to:
+ * - read the SxNDTR register
+ * - crosscheck that hardware is still in current transfer.
+ * In case of switch, we can assume that the DMA is at the beginning of
+ * the next transfer. So we approximate the residue in consequence, by
+ * pointing on the beginning of next transfer.
+ *
+ * This race condition doesn't apply for none cyclic mode, as double
+ * buffer is not used. In such situation registers are updated by the
+ * software.
+ */
+
+ residue = stm32_dma_get_remaining_bytes(chan);
+
+ if ((chan->desc->cyclic || chan->trig_mdma) && !stm32_dma_is_current_sg(chan)) {
+ n_sg++;
+ if (n_sg == chan->desc->num_sgs)
+ n_sg = 0;
+ if (!chan->trig_mdma)
+ residue = sg_req->len;
+ }
+
+ /*
+ * In cyclic mode, for the last period, residue = remaining bytes
+ * from NDTR,
+ * else for all other periods in cyclic mode, and in sg mode,
+ * residue = remaining bytes from NDTR + remaining
+ * periods/sg to be transferred
+ */
+ if ((!chan->desc->cyclic && !chan->trig_mdma) || n_sg != 0)
+ for (i = n_sg; i < desc->num_sgs; i++)
+ residue += desc->sg_req[i].len;
+
+ if (!chan->mem_burst)
+ return residue;
+
+ burst_size = chan->mem_burst * chan->mem_width;
+ modulo = residue % burst_size;
+ if (modulo)
+ residue = residue - modulo + burst_size;
+
+ return residue;
+}
+
+static enum dma_status stm32_dma_tx_status(struct dma_chan *c,
+ dma_cookie_t cookie,
+ struct dma_tx_state *state)
+{
+ struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+ struct virt_dma_desc *vdesc;
+ enum dma_status status;
+ unsigned long flags;
+ u32 residue = 0;
+
+ status = dma_cookie_status(c, cookie, state);
+ if (status == DMA_COMPLETE)
+ return status;
+
+ status = chan->status;
+
+ if (!state)
+ return status;
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+ vdesc = vchan_find_desc(&chan->vchan, cookie);
+ if (chan->desc && cookie == chan->desc->vdesc.tx.cookie)
+ residue = stm32_dma_desc_residue(chan, chan->desc,
+ chan->next_sg);
+ else if (vdesc)
+ residue = stm32_dma_desc_residue(chan,
+ to_stm32_dma_desc(vdesc), 0);
+ dma_set_residue(state, residue);
+
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+ return status;
+}
+
+static int stm32_dma_alloc_chan_resources(struct dma_chan *c)
+{
+ struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+ struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+ int ret;
+
+ chan->config_init = false;
+
+ ret = pm_runtime_resume_and_get(dmadev->ddev.dev);
+ if (ret < 0)
+ return ret;
+
+ ret = stm32_dma_disable_chan(chan);
+ if (ret < 0)
+ pm_runtime_put(dmadev->ddev.dev);
+
+ return ret;
+}
+
+static void stm32_dma_free_chan_resources(struct dma_chan *c)
+{
+ struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+ struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+ unsigned long flags;
+
+ dev_dbg(chan2dev(chan), "Freeing channel %d\n", chan->id);
+
+ if (chan->busy) {
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+ stm32_dma_stop(chan);
+ chan->desc = NULL;
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+ }
+
+ pm_runtime_put(dmadev->ddev.dev);
+
+ vchan_free_chan_resources(to_virt_chan(c));
+ stm32_dma_clear_reg(&chan->chan_reg);
+ chan->threshold = 0;
+}
+
+static void stm32_dma_desc_free(struct virt_dma_desc *vdesc)
+{
+ kfree(container_of(vdesc, struct stm32_dma_desc, vdesc));
+}
+
+static void stm32_dma_set_config(struct stm32_dma_chan *chan,
+ struct stm32_dma_cfg *cfg)
+{
+ stm32_dma_clear_reg(&chan->chan_reg);
+
+ chan->chan_reg.dma_scr = cfg->stream_config & STM32_DMA_SCR_CFG_MASK;
+ chan->chan_reg.dma_scr |= FIELD_PREP(STM32_DMA_SCR_REQ_MASK, cfg->request_line);
+
+ /* Enable Interrupts */
+ chan->chan_reg.dma_scr |= STM32_DMA_SCR_TEIE | STM32_DMA_SCR_TCIE;
+
+ chan->threshold = FIELD_GET(STM32_DMA_THRESHOLD_FTR_MASK, cfg->features);
+ if (FIELD_GET(STM32_DMA_DIRECT_MODE_MASK, cfg->features))
+ chan->threshold = STM32_DMA_FIFO_THRESHOLD_NONE;
+ if (FIELD_GET(STM32_DMA_ALT_ACK_MODE_MASK, cfg->features))
+ chan->chan_reg.dma_scr |= STM32_DMA_SCR_TRBUFF;
+ chan->mdma_config.stream_id = FIELD_GET(STM32_DMA_MDMA_STREAM_ID_MASK, cfg->features);
+}
+
+static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct stm32_dma_device *dmadev = ofdma->of_dma_data;
+ struct device *dev = dmadev->ddev.dev;
+ struct stm32_dma_cfg cfg;
+ struct stm32_dma_chan *chan;
+ struct dma_chan *c;
+
+ if (dma_spec->args_count < 4) {
+ dev_err(dev, "Bad number of cells\n");
+ return NULL;
+ }
+
+ cfg.channel_id = dma_spec->args[0];
+ cfg.request_line = dma_spec->args[1];
+ cfg.stream_config = dma_spec->args[2];
+ cfg.features = dma_spec->args[3];
+
+ if (cfg.channel_id >= STM32_DMA_MAX_CHANNELS ||
+ cfg.request_line >= STM32_DMA_MAX_REQUEST_ID) {
+ dev_err(dev, "Bad channel and/or request id\n");
+ return NULL;
+ }
+
+ chan = &dmadev->chan[cfg.channel_id];
+
+ c = dma_get_slave_channel(&chan->vchan.chan);
+ if (!c) {
+ dev_err(dev, "No more channels available\n");
+ return NULL;
+ }
+
+ stm32_dma_set_config(chan, &cfg);
+
+ return c;
+}
+
+static const struct of_device_id stm32_dma_of_match[] = {
+ { .compatible = "st,stm32-dma", },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, stm32_dma_of_match);
+
+static int stm32_dma_probe(struct platform_device *pdev)
+{
+ struct stm32_dma_chan *chan;
+ struct stm32_dma_device *dmadev;
+ struct dma_device *dd;
+ const struct of_device_id *match;
+ struct resource *res;
+ struct reset_control *rst;
+ int i, ret;
+
+ match = of_match_device(stm32_dma_of_match, &pdev->dev);
+ if (!match) {
+ dev_err(&pdev->dev, "Error: No device match found\n");
+ return -ENODEV;
+ }
+
+ dmadev = devm_kzalloc(&pdev->dev, sizeof(*dmadev), GFP_KERNEL);
+ if (!dmadev)
+ return -ENOMEM;
+
+ dd = &dmadev->ddev;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ dmadev->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(dmadev->base))
+ return PTR_ERR(dmadev->base);
+
+ dmadev->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(dmadev->clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(dmadev->clk), "Can't get clock\n");
+
+ ret = clk_prepare_enable(dmadev->clk);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "clk_prep_enable error: %d\n", ret);
+ return ret;
+ }
+
+ dmadev->mem2mem = of_property_read_bool(pdev->dev.of_node,
+ "st,mem2mem");
+
+ rst = devm_reset_control_get(&pdev->dev, NULL);
+ if (IS_ERR(rst)) {
+ ret = PTR_ERR(rst);
+ if (ret == -EPROBE_DEFER)
+ goto clk_free;
+ } else {
+ reset_control_assert(rst);
+ udelay(2);
+ reset_control_deassert(rst);
+ }
+
+ dma_set_max_seg_size(&pdev->dev, STM32_DMA_ALIGNED_MAX_DATA_ITEMS);
+
+ dma_cap_set(DMA_SLAVE, dd->cap_mask);
+ dma_cap_set(DMA_PRIVATE, dd->cap_mask);
+ dma_cap_set(DMA_CYCLIC, dd->cap_mask);
+ dd->device_alloc_chan_resources = stm32_dma_alloc_chan_resources;
+ dd->device_free_chan_resources = stm32_dma_free_chan_resources;
+ dd->device_tx_status = stm32_dma_tx_status;
+ dd->device_issue_pending = stm32_dma_issue_pending;
+ dd->device_prep_slave_sg = stm32_dma_prep_slave_sg;
+ dd->device_prep_dma_cyclic = stm32_dma_prep_dma_cyclic;
+ dd->device_config = stm32_dma_slave_config;
+ dd->device_pause = stm32_dma_pause;
+ dd->device_resume = stm32_dma_resume;
+ dd->device_terminate_all = stm32_dma_terminate_all;
+ dd->device_synchronize = stm32_dma_synchronize;
+ dd->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ dd->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ dd->copy_align = DMAENGINE_ALIGN_32_BYTES;
+ dd->max_burst = STM32_DMA_MAX_BURST;
+ dd->max_sg_burst = STM32_DMA_ALIGNED_MAX_DATA_ITEMS;
+ dd->descriptor_reuse = true;
+ dd->dev = &pdev->dev;
+ INIT_LIST_HEAD(&dd->channels);
+
+ if (dmadev->mem2mem) {
+ dma_cap_set(DMA_MEMCPY, dd->cap_mask);
+ dd->device_prep_dma_memcpy = stm32_dma_prep_dma_memcpy;
+ dd->directions |= BIT(DMA_MEM_TO_MEM);
+ }
+
+ for (i = 0; i < STM32_DMA_MAX_CHANNELS; i++) {
+ chan = &dmadev->chan[i];
+ chan->id = i;
+ chan->vchan.desc_free = stm32_dma_desc_free;
+ vchan_init(&chan->vchan, dd);
+
+ chan->mdma_config.ifcr = res->start;
+ chan->mdma_config.ifcr += STM32_DMA_IFCR(chan->id);
+
+ chan->mdma_config.tcf = STM32_DMA_TCI;
+ chan->mdma_config.tcf <<= STM32_DMA_FLAGS_SHIFT(chan->id);
+ }
+
+ ret = dma_async_device_register(dd);
+ if (ret)
+ goto clk_free;
+
+ for (i = 0; i < STM32_DMA_MAX_CHANNELS; i++) {
+ chan = &dmadev->chan[i];
+ ret = platform_get_irq(pdev, i);
+ if (ret < 0)
+ goto err_unregister;
+ chan->irq = ret;
+
+ ret = devm_request_irq(&pdev->dev, chan->irq,
+ stm32_dma_chan_irq, 0,
+ dev_name(chan2dev(chan)), chan);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "request_irq failed with err %d channel %d\n",
+ ret, i);
+ goto err_unregister;
+ }
+ }
+
+ ret = of_dma_controller_register(pdev->dev.of_node,
+ stm32_dma_of_xlate, dmadev);
+ if (ret < 0) {
+ dev_err(&pdev->dev,
+ "STM32 DMA DMA OF registration failed %d\n", ret);
+ goto err_unregister;
+ }
+
+ platform_set_drvdata(pdev, dmadev);
+
+ pm_runtime_set_active(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
+ pm_runtime_get_noresume(&pdev->dev);
+ pm_runtime_put(&pdev->dev);
+
+ dev_info(&pdev->dev, "STM32 DMA driver registered\n");
+
+ return 0;
+
+err_unregister:
+ dma_async_device_unregister(dd);
+clk_free:
+ clk_disable_unprepare(dmadev->clk);
+
+ return ret;
+}
+
+#ifdef CONFIG_PM
+static int stm32_dma_runtime_suspend(struct device *dev)
+{
+ struct stm32_dma_device *dmadev = dev_get_drvdata(dev);
+
+ clk_disable_unprepare(dmadev->clk);
+
+ return 0;
+}
+
+static int stm32_dma_runtime_resume(struct device *dev)
+{
+ struct stm32_dma_device *dmadev = dev_get_drvdata(dev);
+ int ret;
+
+ ret = clk_prepare_enable(dmadev->clk);
+ if (ret) {
+ dev_err(dev, "failed to prepare_enable clock\n");
+ return ret;
+ }
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+static int stm32_dma_pm_suspend(struct device *dev)
+{
+ struct stm32_dma_device *dmadev = dev_get_drvdata(dev);
+ int id, ret, scr;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0)
+ return ret;
+
+ for (id = 0; id < STM32_DMA_MAX_CHANNELS; id++) {
+ scr = stm32_dma_read(dmadev, STM32_DMA_SCR(id));
+ if (scr & STM32_DMA_SCR_EN) {
+ dev_warn(dev, "Suspend is prevented by Chan %i\n", id);
+ return -EBUSY;
+ }
+ }
+
+ pm_runtime_put_sync(dev);
+
+ pm_runtime_force_suspend(dev);
+
+ return 0;
+}
+
+static int stm32_dma_pm_resume(struct device *dev)
+{
+ return pm_runtime_force_resume(dev);
+}
+#endif
+
+static const struct dev_pm_ops stm32_dma_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(stm32_dma_pm_suspend, stm32_dma_pm_resume)
+ SET_RUNTIME_PM_OPS(stm32_dma_runtime_suspend,
+ stm32_dma_runtime_resume, NULL)
+};
+
+static struct platform_driver stm32_dma_driver = {
+ .driver = {
+ .name = "stm32-dma",
+ .of_match_table = stm32_dma_of_match,
+ .pm = &stm32_dma_pm_ops,
+ },
+ .probe = stm32_dma_probe,
+};
+
+static int __init stm32_dma_init(void)
+{
+ return platform_driver_register(&stm32_dma_driver);
+}
+subsys_initcall(stm32_dma_init);
diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c
new file mode 100644
index 000000000..ee3cbbf51
--- /dev/null
+++ b/drivers/dma/stm32-dmamux.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright (C) STMicroelectronics SA 2017
+ * Author(s): M'boumba Cedric Madianga <cedric.madianga@gmail.com>
+ * Pierre-Yves Mordret <pierre-yves.mordret@st.com>
+ *
+ * DMA Router driver for STM32 DMA MUX
+ *
+ * Based on TI DMA Crossbar driver
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#define STM32_DMAMUX_CCR(x) (0x4 * (x))
+#define STM32_DMAMUX_MAX_DMA_REQUESTS 32
+#define STM32_DMAMUX_MAX_REQUESTS 255
+
+struct stm32_dmamux {
+ u32 master;
+ u32 request;
+ u32 chan_id;
+};
+
+struct stm32_dmamux_data {
+ struct dma_router dmarouter;
+ struct clk *clk;
+ void __iomem *iomem;
+ u32 dma_requests; /* Number of DMA requests connected to DMAMUX */
+ u32 dmamux_requests; /* Number of DMA requests routed toward DMAs */
+ spinlock_t lock; /* Protects register access */
+ DECLARE_BITMAP(dma_inuse, STM32_DMAMUX_MAX_DMA_REQUESTS); /* Used DMA channel */
+ u32 ccr[STM32_DMAMUX_MAX_DMA_REQUESTS]; /* Used to backup CCR register
+ * in suspend
+ */
+ u32 dma_reqs[]; /* Number of DMA Request per DMA masters.
+ * [0] holds number of DMA Masters.
+ * To be kept at very end of this structure
+ */
+};
+
+static inline u32 stm32_dmamux_read(void __iomem *iomem, u32 reg)
+{
+ return readl_relaxed(iomem + reg);
+}
+
+static inline void stm32_dmamux_write(void __iomem *iomem, u32 reg, u32 val)
+{
+ writel_relaxed(val, iomem + reg);
+}
+
+static void stm32_dmamux_free(struct device *dev, void *route_data)
+{
+ struct stm32_dmamux_data *dmamux = dev_get_drvdata(dev);
+ struct stm32_dmamux *mux = route_data;
+ unsigned long flags;
+
+ /* Clear dma request */
+ spin_lock_irqsave(&dmamux->lock, flags);
+
+ stm32_dmamux_write(dmamux->iomem, STM32_DMAMUX_CCR(mux->chan_id), 0);
+ clear_bit(mux->chan_id, dmamux->dma_inuse);
+
+ pm_runtime_put_sync(dev);
+
+ spin_unlock_irqrestore(&dmamux->lock, flags);
+
+ dev_dbg(dev, "Unmapping DMAMUX(%u) to DMA%u(%u)\n",
+ mux->request, mux->master, mux->chan_id);
+
+ kfree(mux);
+}
+
+static void *stm32_dmamux_route_allocate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct platform_device *pdev = of_find_device_by_node(ofdma->of_node);
+ struct stm32_dmamux_data *dmamux = platform_get_drvdata(pdev);
+ struct stm32_dmamux *mux;
+ u32 i, min, max;
+ int ret;
+ unsigned long flags;
+
+ if (dma_spec->args_count != 3) {
+ dev_err(&pdev->dev, "invalid number of dma mux args\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (dma_spec->args[0] > dmamux->dmamux_requests) {
+ dev_err(&pdev->dev, "invalid mux request number: %d\n",
+ dma_spec->args[0]);
+ return ERR_PTR(-EINVAL);
+ }
+
+ mux = kzalloc(sizeof(*mux), GFP_KERNEL);
+ if (!mux)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_irqsave(&dmamux->lock, flags);
+ mux->chan_id = find_first_zero_bit(dmamux->dma_inuse,
+ dmamux->dma_requests);
+
+ if (mux->chan_id == dmamux->dma_requests) {
+ spin_unlock_irqrestore(&dmamux->lock, flags);
+ dev_err(&pdev->dev, "Run out of free DMA requests\n");
+ ret = -ENOMEM;
+ goto error_chan_id;
+ }
+ set_bit(mux->chan_id, dmamux->dma_inuse);
+ spin_unlock_irqrestore(&dmamux->lock, flags);
+
+ /* Look for DMA Master */
+ for (i = 1, min = 0, max = dmamux->dma_reqs[i];
+ i <= dmamux->dma_reqs[0];
+ min += dmamux->dma_reqs[i], max += dmamux->dma_reqs[++i])
+ if (mux->chan_id < max)
+ break;
+ mux->master = i - 1;
+
+ /* The of_node_put() will be done in of_dma_router_xlate function */
+ dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", i - 1);
+ if (!dma_spec->np) {
+ dev_err(&pdev->dev, "can't get dma master\n");
+ ret = -EINVAL;
+ goto error;
+ }
+
+ /* Set dma request */
+ spin_lock_irqsave(&dmamux->lock, flags);
+ ret = pm_runtime_resume_and_get(&pdev->dev);
+ if (ret < 0) {
+ spin_unlock_irqrestore(&dmamux->lock, flags);
+ goto error;
+ }
+ spin_unlock_irqrestore(&dmamux->lock, flags);
+
+ mux->request = dma_spec->args[0];
+
+ /* craft DMA spec */
+ dma_spec->args[3] = dma_spec->args[2] | mux->chan_id << 16;
+ dma_spec->args[2] = dma_spec->args[1];
+ dma_spec->args[1] = 0;
+ dma_spec->args[0] = mux->chan_id - min;
+ dma_spec->args_count = 4;
+
+ stm32_dmamux_write(dmamux->iomem, STM32_DMAMUX_CCR(mux->chan_id),
+ mux->request);
+ dev_dbg(&pdev->dev, "Mapping DMAMUX(%u) to DMA%u(%u)\n",
+ mux->request, mux->master, mux->chan_id);
+
+ return mux;
+
+error:
+ clear_bit(mux->chan_id, dmamux->dma_inuse);
+
+error_chan_id:
+ kfree(mux);
+ return ERR_PTR(ret);
+}
+
+static const struct of_device_id stm32_stm32dma_master_match[] __maybe_unused = {
+ { .compatible = "st,stm32-dma", },
+ {},
+};
+
+static int stm32_dmamux_probe(struct platform_device *pdev)
+{
+ struct device_node *node = pdev->dev.of_node;
+ const struct of_device_id *match;
+ struct device_node *dma_node;
+ struct stm32_dmamux_data *stm32_dmamux;
+ struct resource *res;
+ void __iomem *iomem;
+ struct reset_control *rst;
+ int i, count, ret;
+ u32 dma_req;
+
+ if (!node)
+ return -ENODEV;
+
+ count = device_property_count_u32(&pdev->dev, "dma-masters");
+ if (count < 0) {
+ dev_err(&pdev->dev, "Can't get DMA master(s) node\n");
+ return -ENODEV;
+ }
+
+ stm32_dmamux = devm_kzalloc(&pdev->dev, sizeof(*stm32_dmamux) +
+ sizeof(u32) * (count + 1), GFP_KERNEL);
+ if (!stm32_dmamux)
+ return -ENOMEM;
+
+ dma_req = 0;
+ for (i = 1; i <= count; i++) {
+ dma_node = of_parse_phandle(node, "dma-masters", i - 1);
+
+ match = of_match_node(stm32_stm32dma_master_match, dma_node);
+ if (!match) {
+ dev_err(&pdev->dev, "DMA master is not supported\n");
+ of_node_put(dma_node);
+ return -EINVAL;
+ }
+
+ if (of_property_read_u32(dma_node, "dma-requests",
+ &stm32_dmamux->dma_reqs[i])) {
+ dev_info(&pdev->dev,
+ "Missing MUX output information, using %u.\n",
+ STM32_DMAMUX_MAX_DMA_REQUESTS);
+ stm32_dmamux->dma_reqs[i] =
+ STM32_DMAMUX_MAX_DMA_REQUESTS;
+ }
+ dma_req += stm32_dmamux->dma_reqs[i];
+ of_node_put(dma_node);
+ }
+
+ if (dma_req > STM32_DMAMUX_MAX_DMA_REQUESTS) {
+ dev_err(&pdev->dev, "Too many DMA Master Requests to manage\n");
+ return -ENODEV;
+ }
+
+ stm32_dmamux->dma_requests = dma_req;
+ stm32_dmamux->dma_reqs[0] = count;
+
+ if (device_property_read_u32(&pdev->dev, "dma-requests",
+ &stm32_dmamux->dmamux_requests)) {
+ stm32_dmamux->dmamux_requests = STM32_DMAMUX_MAX_REQUESTS;
+ dev_warn(&pdev->dev, "DMAMUX defaulting on %u requests\n",
+ stm32_dmamux->dmamux_requests);
+ }
+ pm_runtime_get_noresume(&pdev->dev);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ iomem = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(iomem))
+ return PTR_ERR(iomem);
+
+ spin_lock_init(&stm32_dmamux->lock);
+
+ stm32_dmamux->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(stm32_dmamux->clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(stm32_dmamux->clk),
+ "Missing clock controller\n");
+
+ ret = clk_prepare_enable(stm32_dmamux->clk);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "clk_prep_enable error: %d\n", ret);
+ return ret;
+ }
+
+ rst = devm_reset_control_get(&pdev->dev, NULL);
+ if (IS_ERR(rst)) {
+ ret = PTR_ERR(rst);
+ if (ret == -EPROBE_DEFER)
+ goto err_clk;
+ } else if (count > 1) { /* Don't reset if there is only one dma-master */
+ reset_control_assert(rst);
+ udelay(2);
+ reset_control_deassert(rst);
+ }
+
+ stm32_dmamux->iomem = iomem;
+ stm32_dmamux->dmarouter.dev = &pdev->dev;
+ stm32_dmamux->dmarouter.route_free = stm32_dmamux_free;
+
+ platform_set_drvdata(pdev, stm32_dmamux);
+ pm_runtime_set_active(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
+
+ pm_runtime_get_noresume(&pdev->dev);
+
+ /* Reset the dmamux */
+ for (i = 0; i < stm32_dmamux->dma_requests; i++)
+ stm32_dmamux_write(stm32_dmamux->iomem, STM32_DMAMUX_CCR(i), 0);
+
+ pm_runtime_put(&pdev->dev);
+
+ ret = of_dma_router_register(node, stm32_dmamux_route_allocate,
+ &stm32_dmamux->dmarouter);
+ if (ret)
+ goto pm_disable;
+
+ return 0;
+
+pm_disable:
+ pm_runtime_disable(&pdev->dev);
+err_clk:
+ clk_disable_unprepare(stm32_dmamux->clk);
+
+ return ret;
+}
+
+#ifdef CONFIG_PM
+static int stm32_dmamux_runtime_suspend(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct stm32_dmamux_data *stm32_dmamux = platform_get_drvdata(pdev);
+
+ clk_disable_unprepare(stm32_dmamux->clk);
+
+ return 0;
+}
+
+static int stm32_dmamux_runtime_resume(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct stm32_dmamux_data *stm32_dmamux = platform_get_drvdata(pdev);
+ int ret;
+
+ ret = clk_prepare_enable(stm32_dmamux->clk);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to prepare_enable clock\n");
+ return ret;
+ }
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+static int stm32_dmamux_suspend(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct stm32_dmamux_data *stm32_dmamux = platform_get_drvdata(pdev);
+ int i, ret;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; i < stm32_dmamux->dma_requests; i++)
+ stm32_dmamux->ccr[i] = stm32_dmamux_read(stm32_dmamux->iomem,
+ STM32_DMAMUX_CCR(i));
+
+ pm_runtime_put_sync(dev);
+
+ pm_runtime_force_suspend(dev);
+
+ return 0;
+}
+
+static int stm32_dmamux_resume(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct stm32_dmamux_data *stm32_dmamux = platform_get_drvdata(pdev);
+ int i, ret;
+
+ ret = pm_runtime_force_resume(dev);
+ if (ret < 0)
+ return ret;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; i < stm32_dmamux->dma_requests; i++)
+ stm32_dmamux_write(stm32_dmamux->iomem, STM32_DMAMUX_CCR(i),
+ stm32_dmamux->ccr[i]);
+
+ pm_runtime_put_sync(dev);
+
+ return 0;
+}
+#endif
+
+static const struct dev_pm_ops stm32_dmamux_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(stm32_dmamux_suspend, stm32_dmamux_resume)
+ SET_RUNTIME_PM_OPS(stm32_dmamux_runtime_suspend,
+ stm32_dmamux_runtime_resume, NULL)
+};
+
+static const struct of_device_id stm32_dmamux_match[] = {
+ { .compatible = "st,stm32h7-dmamux" },
+ {},
+};
+
+static struct platform_driver stm32_dmamux_driver = {
+ .probe = stm32_dmamux_probe,
+ .driver = {
+ .name = "stm32-dmamux",
+ .of_match_table = stm32_dmamux_match,
+ .pm = &stm32_dmamux_pm_ops,
+ },
+};
+
+static int __init stm32_dmamux_init(void)
+{
+ return platform_driver_register(&stm32_dmamux_driver);
+}
+arch_initcall(stm32_dmamux_init);
+
+MODULE_DESCRIPTION("DMA Router driver for STM32 DMA MUX");
+MODULE_AUTHOR("M'boumba Cedric Madianga <cedric.madianga@gmail.com>");
+MODULE_AUTHOR("Pierre-Yves Mordret <pierre-yves.mordret@st.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
new file mode 100644
index 000000000..65ef1f5ca
--- /dev/null
+++ b/drivers/dma/stm32-mdma.c
@@ -0,0 +1,1834 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright (C) STMicroelectronics SA 2017
+ * Author(s): M'boumba Cedric Madianga <cedric.madianga@gmail.com>
+ * Pierre-Yves Mordret <pierre-yves.mordret@st.com>
+ *
+ * Driver for STM32 MDMA controller
+ *
+ * Inspired by stm32-dma.c and dma-jz4780.c
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+
+#include "virt-dma.h"
+
+#define STM32_MDMA_GISR0 0x0000 /* MDMA Int Status Reg 1 */
+
+/* MDMA Channel x interrupt/status register */
+#define STM32_MDMA_CISR(x) (0x40 + 0x40 * (x)) /* x = 0..62 */
+#define STM32_MDMA_CISR_CRQA BIT(16)
+#define STM32_MDMA_CISR_TCIF BIT(4)
+#define STM32_MDMA_CISR_BTIF BIT(3)
+#define STM32_MDMA_CISR_BRTIF BIT(2)
+#define STM32_MDMA_CISR_CTCIF BIT(1)
+#define STM32_MDMA_CISR_TEIF BIT(0)
+
+/* MDMA Channel x interrupt flag clear register */
+#define STM32_MDMA_CIFCR(x) (0x44 + 0x40 * (x))
+#define STM32_MDMA_CIFCR_CLTCIF BIT(4)
+#define STM32_MDMA_CIFCR_CBTIF BIT(3)
+#define STM32_MDMA_CIFCR_CBRTIF BIT(2)
+#define STM32_MDMA_CIFCR_CCTCIF BIT(1)
+#define STM32_MDMA_CIFCR_CTEIF BIT(0)
+#define STM32_MDMA_CIFCR_CLEAR_ALL (STM32_MDMA_CIFCR_CLTCIF \
+ | STM32_MDMA_CIFCR_CBTIF \
+ | STM32_MDMA_CIFCR_CBRTIF \
+ | STM32_MDMA_CIFCR_CCTCIF \
+ | STM32_MDMA_CIFCR_CTEIF)
+
+/* MDMA Channel x error status register */
+#define STM32_MDMA_CESR(x) (0x48 + 0x40 * (x))
+#define STM32_MDMA_CESR_BSE BIT(11)
+#define STM32_MDMA_CESR_ASR BIT(10)
+#define STM32_MDMA_CESR_TEMD BIT(9)
+#define STM32_MDMA_CESR_TELD BIT(8)
+#define STM32_MDMA_CESR_TED BIT(7)
+#define STM32_MDMA_CESR_TEA_MASK GENMASK(6, 0)
+
+/* MDMA Channel x control register */
+#define STM32_MDMA_CCR(x) (0x4C + 0x40 * (x))
+#define STM32_MDMA_CCR_SWRQ BIT(16)
+#define STM32_MDMA_CCR_WEX BIT(14)
+#define STM32_MDMA_CCR_HEX BIT(13)
+#define STM32_MDMA_CCR_BEX BIT(12)
+#define STM32_MDMA_CCR_SM BIT(8)
+#define STM32_MDMA_CCR_PL_MASK GENMASK(7, 6)
+#define STM32_MDMA_CCR_PL(n) FIELD_PREP(STM32_MDMA_CCR_PL_MASK, (n))
+#define STM32_MDMA_CCR_TCIE BIT(5)
+#define STM32_MDMA_CCR_BTIE BIT(4)
+#define STM32_MDMA_CCR_BRTIE BIT(3)
+#define STM32_MDMA_CCR_CTCIE BIT(2)
+#define STM32_MDMA_CCR_TEIE BIT(1)
+#define STM32_MDMA_CCR_EN BIT(0)
+#define STM32_MDMA_CCR_IRQ_MASK (STM32_MDMA_CCR_TCIE \
+ | STM32_MDMA_CCR_BTIE \
+ | STM32_MDMA_CCR_BRTIE \
+ | STM32_MDMA_CCR_CTCIE \
+ | STM32_MDMA_CCR_TEIE)
+
+/* MDMA Channel x transfer configuration register */
+#define STM32_MDMA_CTCR(x) (0x50 + 0x40 * (x))
+#define STM32_MDMA_CTCR_BWM BIT(31)
+#define STM32_MDMA_CTCR_SWRM BIT(30)
+#define STM32_MDMA_CTCR_TRGM_MSK GENMASK(29, 28)
+#define STM32_MDMA_CTCR_TRGM(n) FIELD_PREP(STM32_MDMA_CTCR_TRGM_MSK, (n))
+#define STM32_MDMA_CTCR_TRGM_GET(n) FIELD_GET(STM32_MDMA_CTCR_TRGM_MSK, (n))
+#define STM32_MDMA_CTCR_PAM_MASK GENMASK(27, 26)
+#define STM32_MDMA_CTCR_PAM(n) FIELD_PREP(STM32_MDMA_CTCR_PAM_MASK, (n))
+#define STM32_MDMA_CTCR_PKE BIT(25)
+#define STM32_MDMA_CTCR_TLEN_MSK GENMASK(24, 18)
+#define STM32_MDMA_CTCR_TLEN(n) FIELD_PREP(STM32_MDMA_CTCR_TLEN_MSK, (n))
+#define STM32_MDMA_CTCR_TLEN_GET(n) FIELD_GET(STM32_MDMA_CTCR_TLEN_MSK, (n))
+#define STM32_MDMA_CTCR_LEN2_MSK GENMASK(25, 18)
+#define STM32_MDMA_CTCR_LEN2(n) FIELD_PREP(STM32_MDMA_CTCR_LEN2_MSK, (n))
+#define STM32_MDMA_CTCR_LEN2_GET(n) FIELD_GET(STM32_MDMA_CTCR_LEN2_MSK, (n))
+#define STM32_MDMA_CTCR_DBURST_MASK GENMASK(17, 15)
+#define STM32_MDMA_CTCR_DBURST(n) FIELD_PREP(STM32_MDMA_CTCR_DBURST_MASK, (n))
+#define STM32_MDMA_CTCR_SBURST_MASK GENMASK(14, 12)
+#define STM32_MDMA_CTCR_SBURST(n) FIELD_PREP(STM32_MDMA_CTCR_SBURST_MASK, (n))
+#define STM32_MDMA_CTCR_DINCOS_MASK GENMASK(11, 10)
+#define STM32_MDMA_CTCR_DINCOS(n) FIELD_PREP(STM32_MDMA_CTCR_DINCOS_MASK, (n))
+#define STM32_MDMA_CTCR_SINCOS_MASK GENMASK(9, 8)
+#define STM32_MDMA_CTCR_SINCOS(n) FIELD_PREP(STM32_MDMA_CTCR_SINCOS_MASK, (n))
+#define STM32_MDMA_CTCR_DSIZE_MASK GENMASK(7, 6)
+#define STM32_MDMA_CTCR_DSIZE(n) FIELD_PREP(STM32_MDMA_CTCR_DSIZE_MASK, (n))
+#define STM32_MDMA_CTCR_SSIZE_MASK GENMASK(5, 4)
+#define STM32_MDMA_CTCR_SSIZE(n) FIELD_PREP(STM32_MDMA_CTCR_SSIZE_MASK, (n))
+#define STM32_MDMA_CTCR_DINC_MASK GENMASK(3, 2)
+#define STM32_MDMA_CTCR_DINC(n) FIELD_PREP(STM32_MDMA_CTCR_DINC_MASK, (n))
+#define STM32_MDMA_CTCR_SINC_MASK GENMASK(1, 0)
+#define STM32_MDMA_CTCR_SINC(n) FIELD_PREP(STM32_MDMA_CTCR_SINC_MASK, (n))
+#define STM32_MDMA_CTCR_CFG_MASK (STM32_MDMA_CTCR_SINC_MASK \
+ | STM32_MDMA_CTCR_DINC_MASK \
+ | STM32_MDMA_CTCR_SINCOS_MASK \
+ | STM32_MDMA_CTCR_DINCOS_MASK \
+ | STM32_MDMA_CTCR_LEN2_MSK \
+ | STM32_MDMA_CTCR_TRGM_MSK)
+
+/* MDMA Channel x block number of data register */
+#define STM32_MDMA_CBNDTR(x) (0x54 + 0x40 * (x))
+#define STM32_MDMA_CBNDTR_BRC_MK GENMASK(31, 20)
+#define STM32_MDMA_CBNDTR_BRC(n) FIELD_PREP(STM32_MDMA_CBNDTR_BRC_MK, (n))
+#define STM32_MDMA_CBNDTR_BRC_GET(n) FIELD_GET(STM32_MDMA_CBNDTR_BRC_MK, (n))
+
+#define STM32_MDMA_CBNDTR_BRDUM BIT(19)
+#define STM32_MDMA_CBNDTR_BRSUM BIT(18)
+#define STM32_MDMA_CBNDTR_BNDT_MASK GENMASK(16, 0)
+#define STM32_MDMA_CBNDTR_BNDT(n) FIELD_PREP(STM32_MDMA_CBNDTR_BNDT_MASK, (n))
+
+/* MDMA Channel x source address register */
+#define STM32_MDMA_CSAR(x) (0x58 + 0x40 * (x))
+
+/* MDMA Channel x destination address register */
+#define STM32_MDMA_CDAR(x) (0x5C + 0x40 * (x))
+
+/* MDMA Channel x block repeat address update register */
+#define STM32_MDMA_CBRUR(x) (0x60 + 0x40 * (x))
+#define STM32_MDMA_CBRUR_DUV_MASK GENMASK(31, 16)
+#define STM32_MDMA_CBRUR_DUV(n) FIELD_PREP(STM32_MDMA_CBRUR_DUV_MASK, (n))
+#define STM32_MDMA_CBRUR_SUV_MASK GENMASK(15, 0)
+#define STM32_MDMA_CBRUR_SUV(n) FIELD_PREP(STM32_MDMA_CBRUR_SUV_MASK, (n))
+
+/* MDMA Channel x link address register */
+#define STM32_MDMA_CLAR(x) (0x64 + 0x40 * (x))
+
+/* MDMA Channel x trigger and bus selection register */
+#define STM32_MDMA_CTBR(x) (0x68 + 0x40 * (x))
+#define STM32_MDMA_CTBR_DBUS BIT(17)
+#define STM32_MDMA_CTBR_SBUS BIT(16)
+#define STM32_MDMA_CTBR_TSEL_MASK GENMASK(5, 0)
+#define STM32_MDMA_CTBR_TSEL(n) FIELD_PREP(STM32_MDMA_CTBR_TSEL_MASK, (n))
+
+/* MDMA Channel x mask address register */
+#define STM32_MDMA_CMAR(x) (0x70 + 0x40 * (x))
+
+/* MDMA Channel x mask data register */
+#define STM32_MDMA_CMDR(x) (0x74 + 0x40 * (x))
+
+#define STM32_MDMA_MAX_BUF_LEN 128
+#define STM32_MDMA_MAX_BLOCK_LEN 65536
+#define STM32_MDMA_MAX_CHANNELS 32
+#define STM32_MDMA_MAX_REQUESTS 256
+#define STM32_MDMA_MAX_BURST 128
+#define STM32_MDMA_VERY_HIGH_PRIORITY 0x3
+
+enum stm32_mdma_trigger_mode {
+ STM32_MDMA_BUFFER,
+ STM32_MDMA_BLOCK,
+ STM32_MDMA_BLOCK_REP,
+ STM32_MDMA_LINKED_LIST,
+};
+
+enum stm32_mdma_width {
+ STM32_MDMA_BYTE,
+ STM32_MDMA_HALF_WORD,
+ STM32_MDMA_WORD,
+ STM32_MDMA_DOUBLE_WORD,
+};
+
+enum stm32_mdma_inc_mode {
+ STM32_MDMA_FIXED = 0,
+ STM32_MDMA_INC = 2,
+ STM32_MDMA_DEC = 3,
+};
+
+struct stm32_mdma_chan_config {
+ u32 request;
+ u32 priority_level;
+ u32 transfer_config;
+ u32 mask_addr;
+ u32 mask_data;
+ bool m2m_hw; /* True when MDMA is triggered by STM32 DMA */
+};
+
+struct stm32_mdma_hwdesc {
+ u32 ctcr;
+ u32 cbndtr;
+ u32 csar;
+ u32 cdar;
+ u32 cbrur;
+ u32 clar;
+ u32 ctbr;
+ u32 dummy;
+ u32 cmar;
+ u32 cmdr;
+} __aligned(64);
+
+struct stm32_mdma_desc_node {
+ struct stm32_mdma_hwdesc *hwdesc;
+ dma_addr_t hwdesc_phys;
+};
+
+struct stm32_mdma_desc {
+ struct virt_dma_desc vdesc;
+ u32 ccr;
+ bool cyclic;
+ u32 count;
+ struct stm32_mdma_desc_node node[];
+};
+
+struct stm32_mdma_dma_config {
+ u32 request; /* STM32 DMA channel stream id, triggering MDMA */
+ u32 cmar; /* STM32 DMA interrupt flag clear register address */
+ u32 cmdr; /* STM32 DMA Transfer Complete flag */
+};
+
+struct stm32_mdma_chan {
+ struct virt_dma_chan vchan;
+ struct dma_pool *desc_pool;
+ u32 id;
+ struct stm32_mdma_desc *desc;
+ u32 curr_hwdesc;
+ struct dma_slave_config dma_config;
+ struct stm32_mdma_chan_config chan_config;
+ bool busy;
+ u32 mem_burst;
+ u32 mem_width;
+};
+
+struct stm32_mdma_device {
+ struct dma_device ddev;
+ void __iomem *base;
+ struct clk *clk;
+ int irq;
+ u32 nr_channels;
+ u32 nr_requests;
+ u32 nr_ahb_addr_masks;
+ u32 chan_reserved;
+ struct stm32_mdma_chan chan[STM32_MDMA_MAX_CHANNELS];
+ u32 ahb_addr_masks[];
+};
+
+static struct stm32_mdma_device *stm32_mdma_get_dev(
+ struct stm32_mdma_chan *chan)
+{
+ return container_of(chan->vchan.chan.device, struct stm32_mdma_device,
+ ddev);
+}
+
+static struct stm32_mdma_chan *to_stm32_mdma_chan(struct dma_chan *c)
+{
+ return container_of(c, struct stm32_mdma_chan, vchan.chan);
+}
+
+static struct stm32_mdma_desc *to_stm32_mdma_desc(struct virt_dma_desc *vdesc)
+{
+ return container_of(vdesc, struct stm32_mdma_desc, vdesc);
+}
+
+static struct device *chan2dev(struct stm32_mdma_chan *chan)
+{
+ return &chan->vchan.chan.dev->device;
+}
+
+static struct device *mdma2dev(struct stm32_mdma_device *mdma_dev)
+{
+ return mdma_dev->ddev.dev;
+}
+
+static u32 stm32_mdma_read(struct stm32_mdma_device *dmadev, u32 reg)
+{
+ return readl_relaxed(dmadev->base + reg);
+}
+
+static void stm32_mdma_write(struct stm32_mdma_device *dmadev, u32 reg, u32 val)
+{
+ writel_relaxed(val, dmadev->base + reg);
+}
+
+static void stm32_mdma_set_bits(struct stm32_mdma_device *dmadev, u32 reg,
+ u32 mask)
+{
+ void __iomem *addr = dmadev->base + reg;
+
+ writel_relaxed(readl_relaxed(addr) | mask, addr);
+}
+
+static void stm32_mdma_clr_bits(struct stm32_mdma_device *dmadev, u32 reg,
+ u32 mask)
+{
+ void __iomem *addr = dmadev->base + reg;
+
+ writel_relaxed(readl_relaxed(addr) & ~mask, addr);
+}
+
+static struct stm32_mdma_desc *stm32_mdma_alloc_desc(
+ struct stm32_mdma_chan *chan, u32 count)
+{
+ struct stm32_mdma_desc *desc;
+ int i;
+
+ desc = kzalloc(struct_size(desc, node, count), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ for (i = 0; i < count; i++) {
+ desc->node[i].hwdesc =
+ dma_pool_alloc(chan->desc_pool, GFP_NOWAIT,
+ &desc->node[i].hwdesc_phys);
+ if (!desc->node[i].hwdesc)
+ goto err;
+ }
+
+ desc->count = count;
+
+ return desc;
+
+err:
+ dev_err(chan2dev(chan), "Failed to allocate descriptor\n");
+ while (--i >= 0)
+ dma_pool_free(chan->desc_pool, desc->node[i].hwdesc,
+ desc->node[i].hwdesc_phys);
+ kfree(desc);
+ return NULL;
+}
+
+static void stm32_mdma_desc_free(struct virt_dma_desc *vdesc)
+{
+ struct stm32_mdma_desc *desc = to_stm32_mdma_desc(vdesc);
+ struct stm32_mdma_chan *chan = to_stm32_mdma_chan(vdesc->tx.chan);
+ int i;
+
+ for (i = 0; i < desc->count; i++)
+ dma_pool_free(chan->desc_pool, desc->node[i].hwdesc,
+ desc->node[i].hwdesc_phys);
+ kfree(desc);
+}
+
+static int stm32_mdma_get_width(struct stm32_mdma_chan *chan,
+ enum dma_slave_buswidth width)
+{
+ switch (width) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ case DMA_SLAVE_BUSWIDTH_8_BYTES:
+ return ffs(width) - 1;
+ default:
+ dev_err(chan2dev(chan), "Dma bus width %i not supported\n",
+ width);
+ return -EINVAL;
+ }
+}
+
+static enum dma_slave_buswidth stm32_mdma_get_max_width(dma_addr_t addr,
+ u32 buf_len, u32 tlen)
+{
+ enum dma_slave_buswidth max_width = DMA_SLAVE_BUSWIDTH_8_BYTES;
+
+ for (max_width = DMA_SLAVE_BUSWIDTH_8_BYTES;
+ max_width > DMA_SLAVE_BUSWIDTH_1_BYTE;
+ max_width >>= 1) {
+ /*
+ * Address and buffer length both have to be aligned on
+ * bus width
+ */
+ if ((((buf_len | addr) & (max_width - 1)) == 0) &&
+ tlen >= max_width)
+ break;
+ }
+
+ return max_width;
+}
+
+static u32 stm32_mdma_get_best_burst(u32 buf_len, u32 tlen, u32 max_burst,
+ enum dma_slave_buswidth width)
+{
+ u32 best_burst;
+
+ best_burst = min((u32)1 << __ffs(tlen | buf_len),
+ max_burst * width) / width;
+
+ return (best_burst > 0) ? best_burst : 1;
+}
+
+static int stm32_mdma_disable_chan(struct stm32_mdma_chan *chan)
+{
+ struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+ u32 ccr, cisr, id, reg;
+ int ret;
+
+ id = chan->id;
+ reg = STM32_MDMA_CCR(id);
+
+ /* Disable interrupts */
+ stm32_mdma_clr_bits(dmadev, reg, STM32_MDMA_CCR_IRQ_MASK);
+
+ ccr = stm32_mdma_read(dmadev, reg);
+ if (ccr & STM32_MDMA_CCR_EN) {
+ stm32_mdma_clr_bits(dmadev, reg, STM32_MDMA_CCR_EN);
+
+ /* Ensure that any ongoing transfer has been completed */
+ ret = readl_relaxed_poll_timeout_atomic(
+ dmadev->base + STM32_MDMA_CISR(id), cisr,
+ (cisr & STM32_MDMA_CISR_CTCIF), 10, 1000);
+ if (ret) {
+ dev_err(chan2dev(chan), "%s: timeout!\n", __func__);
+ return -EBUSY;
+ }
+ }
+
+ return 0;
+}
+
+static void stm32_mdma_stop(struct stm32_mdma_chan *chan)
+{
+ struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+ u32 status;
+ int ret;
+
+ /* Disable DMA */
+ ret = stm32_mdma_disable_chan(chan);
+ if (ret < 0)
+ return;
+
+ /* Clear interrupt status if it is there */
+ status = stm32_mdma_read(dmadev, STM32_MDMA_CISR(chan->id));
+ if (status) {
+ dev_dbg(chan2dev(chan), "%s(): clearing interrupt: 0x%08x\n",
+ __func__, status);
+ stm32_mdma_set_bits(dmadev, STM32_MDMA_CIFCR(chan->id), status);
+ }
+
+ chan->busy = false;
+}
+
+static void stm32_mdma_set_bus(struct stm32_mdma_device *dmadev, u32 *ctbr,
+ u32 ctbr_mask, u32 src_addr)
+{
+ u32 mask;
+ int i;
+
+ /* Check if memory device is on AHB or AXI */
+ *ctbr &= ~ctbr_mask;
+ mask = src_addr & 0xF0000000;
+ for (i = 0; i < dmadev->nr_ahb_addr_masks; i++) {
+ if (mask == dmadev->ahb_addr_masks[i]) {
+ *ctbr |= ctbr_mask;
+ break;
+ }
+ }
+}
+
+static int stm32_mdma_set_xfer_param(struct stm32_mdma_chan *chan,
+ enum dma_transfer_direction direction,
+ u32 *mdma_ccr, u32 *mdma_ctcr,
+ u32 *mdma_ctbr, dma_addr_t addr,
+ u32 buf_len)
+{
+ struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+ struct stm32_mdma_chan_config *chan_config = &chan->chan_config;
+ enum dma_slave_buswidth src_addr_width, dst_addr_width;
+ phys_addr_t src_addr, dst_addr;
+ int src_bus_width, dst_bus_width;
+ u32 src_maxburst, dst_maxburst, src_best_burst, dst_best_burst;
+ u32 ccr, ctcr, ctbr, tlen;
+
+ src_addr_width = chan->dma_config.src_addr_width;
+ dst_addr_width = chan->dma_config.dst_addr_width;
+ src_maxburst = chan->dma_config.src_maxburst;
+ dst_maxburst = chan->dma_config.dst_maxburst;
+
+ ccr = stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)) & ~STM32_MDMA_CCR_EN;
+ ctcr = stm32_mdma_read(dmadev, STM32_MDMA_CTCR(chan->id));
+ ctbr = stm32_mdma_read(dmadev, STM32_MDMA_CTBR(chan->id));
+
+ /* Enable HW request mode */
+ ctcr &= ~STM32_MDMA_CTCR_SWRM;
+
+ /* Set DINC, SINC, DINCOS, SINCOS, TRGM and TLEN retrieve from DT */
+ ctcr &= ~STM32_MDMA_CTCR_CFG_MASK;
+ ctcr |= chan_config->transfer_config & STM32_MDMA_CTCR_CFG_MASK;
+
+ /*
+ * For buffer transfer length (TLEN) we have to set
+ * the number of bytes - 1 in CTCR register
+ */
+ tlen = STM32_MDMA_CTCR_LEN2_GET(ctcr);
+ ctcr &= ~STM32_MDMA_CTCR_LEN2_MSK;
+ ctcr |= STM32_MDMA_CTCR_TLEN((tlen - 1));
+
+ /* Disable Pack Enable */
+ ctcr &= ~STM32_MDMA_CTCR_PKE;
+
+ /* Check burst size constraints */
+ if (src_maxburst * src_addr_width > STM32_MDMA_MAX_BURST ||
+ dst_maxburst * dst_addr_width > STM32_MDMA_MAX_BURST) {
+ dev_err(chan2dev(chan),
+ "burst size * bus width higher than %d bytes\n",
+ STM32_MDMA_MAX_BURST);
+ return -EINVAL;
+ }
+
+ if ((!is_power_of_2(src_maxburst) && src_maxburst > 0) ||
+ (!is_power_of_2(dst_maxburst) && dst_maxburst > 0)) {
+ dev_err(chan2dev(chan), "burst size must be a power of 2\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Configure channel control:
+ * - Clear SW request as in this case this is a HW one
+ * - Clear WEX, HEX and BEX bits
+ * - Set priority level
+ */
+ ccr &= ~(STM32_MDMA_CCR_SWRQ | STM32_MDMA_CCR_WEX | STM32_MDMA_CCR_HEX |
+ STM32_MDMA_CCR_BEX | STM32_MDMA_CCR_PL_MASK);
+ ccr |= STM32_MDMA_CCR_PL(chan_config->priority_level);
+
+ /* Configure Trigger selection */
+ ctbr &= ~STM32_MDMA_CTBR_TSEL_MASK;
+ ctbr |= STM32_MDMA_CTBR_TSEL(chan_config->request);
+
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+ dst_addr = chan->dma_config.dst_addr;
+
+ /* Set device data size */
+ if (chan_config->m2m_hw)
+ dst_addr_width = stm32_mdma_get_max_width(dst_addr, buf_len,
+ STM32_MDMA_MAX_BUF_LEN);
+ dst_bus_width = stm32_mdma_get_width(chan, dst_addr_width);
+ if (dst_bus_width < 0)
+ return dst_bus_width;
+ ctcr &= ~STM32_MDMA_CTCR_DSIZE_MASK;
+ ctcr |= STM32_MDMA_CTCR_DSIZE(dst_bus_width);
+ if (chan_config->m2m_hw) {
+ ctcr &= ~STM32_MDMA_CTCR_DINCOS_MASK;
+ ctcr |= STM32_MDMA_CTCR_DINCOS(dst_bus_width);
+ }
+
+ /* Set device burst value */
+ if (chan_config->m2m_hw)
+ dst_maxburst = STM32_MDMA_MAX_BUF_LEN / dst_addr_width;
+
+ dst_best_burst = stm32_mdma_get_best_burst(buf_len, tlen,
+ dst_maxburst,
+ dst_addr_width);
+ chan->mem_burst = dst_best_burst;
+ ctcr &= ~STM32_MDMA_CTCR_DBURST_MASK;
+ ctcr |= STM32_MDMA_CTCR_DBURST((ilog2(dst_best_burst)));
+
+ /* Set memory data size */
+ src_addr_width = stm32_mdma_get_max_width(addr, buf_len, tlen);
+ chan->mem_width = src_addr_width;
+ src_bus_width = stm32_mdma_get_width(chan, src_addr_width);
+ if (src_bus_width < 0)
+ return src_bus_width;
+ ctcr &= ~STM32_MDMA_CTCR_SSIZE_MASK |
+ STM32_MDMA_CTCR_SINCOS_MASK;
+ ctcr |= STM32_MDMA_CTCR_SSIZE(src_bus_width) |
+ STM32_MDMA_CTCR_SINCOS(src_bus_width);
+
+ /* Set memory burst value */
+ src_maxburst = STM32_MDMA_MAX_BUF_LEN / src_addr_width;
+ src_best_burst = stm32_mdma_get_best_burst(buf_len, tlen,
+ src_maxburst,
+ src_addr_width);
+ chan->mem_burst = src_best_burst;
+ ctcr &= ~STM32_MDMA_CTCR_SBURST_MASK;
+ ctcr |= STM32_MDMA_CTCR_SBURST((ilog2(src_best_burst)));
+
+ /* Select bus */
+ stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_DBUS,
+ dst_addr);
+
+ if (dst_bus_width != src_bus_width)
+ ctcr |= STM32_MDMA_CTCR_PKE;
+
+ /* Set destination address */
+ stm32_mdma_write(dmadev, STM32_MDMA_CDAR(chan->id), dst_addr);
+ break;
+
+ case DMA_DEV_TO_MEM:
+ src_addr = chan->dma_config.src_addr;
+
+ /* Set device data size */
+ if (chan_config->m2m_hw)
+ src_addr_width = stm32_mdma_get_max_width(src_addr, buf_len,
+ STM32_MDMA_MAX_BUF_LEN);
+
+ src_bus_width = stm32_mdma_get_width(chan, src_addr_width);
+ if (src_bus_width < 0)
+ return src_bus_width;
+ ctcr &= ~STM32_MDMA_CTCR_SSIZE_MASK;
+ ctcr |= STM32_MDMA_CTCR_SSIZE(src_bus_width);
+ if (chan_config->m2m_hw) {
+ ctcr &= ~STM32_MDMA_CTCR_SINCOS_MASK;
+ ctcr |= STM32_MDMA_CTCR_SINCOS(src_bus_width);
+ }
+
+ /* Set device burst value */
+ if (chan_config->m2m_hw)
+ src_maxburst = STM32_MDMA_MAX_BUF_LEN / src_addr_width;
+
+ src_best_burst = stm32_mdma_get_best_burst(buf_len, tlen,
+ src_maxburst,
+ src_addr_width);
+ ctcr &= ~STM32_MDMA_CTCR_SBURST_MASK;
+ ctcr |= STM32_MDMA_CTCR_SBURST((ilog2(src_best_burst)));
+
+ /* Set memory data size */
+ dst_addr_width = stm32_mdma_get_max_width(addr, buf_len, tlen);
+ chan->mem_width = dst_addr_width;
+ dst_bus_width = stm32_mdma_get_width(chan, dst_addr_width);
+ if (dst_bus_width < 0)
+ return dst_bus_width;
+ ctcr &= ~(STM32_MDMA_CTCR_DSIZE_MASK |
+ STM32_MDMA_CTCR_DINCOS_MASK);
+ ctcr |= STM32_MDMA_CTCR_DSIZE(dst_bus_width) |
+ STM32_MDMA_CTCR_DINCOS(dst_bus_width);
+
+ /* Set memory burst value */
+ dst_maxburst = STM32_MDMA_MAX_BUF_LEN / dst_addr_width;
+ dst_best_burst = stm32_mdma_get_best_burst(buf_len, tlen,
+ dst_maxburst,
+ dst_addr_width);
+ ctcr &= ~STM32_MDMA_CTCR_DBURST_MASK;
+ ctcr |= STM32_MDMA_CTCR_DBURST((ilog2(dst_best_burst)));
+
+ /* Select bus */
+ stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_SBUS,
+ src_addr);
+
+ if (dst_bus_width != src_bus_width)
+ ctcr |= STM32_MDMA_CTCR_PKE;
+
+ /* Set source address */
+ stm32_mdma_write(dmadev, STM32_MDMA_CSAR(chan->id), src_addr);
+ break;
+
+ default:
+ dev_err(chan2dev(chan), "Dma direction is not supported\n");
+ return -EINVAL;
+ }
+
+ *mdma_ccr = ccr;
+ *mdma_ctcr = ctcr;
+ *mdma_ctbr = ctbr;
+
+ return 0;
+}
+
+static void stm32_mdma_dump_hwdesc(struct stm32_mdma_chan *chan,
+ struct stm32_mdma_desc_node *node)
+{
+ dev_dbg(chan2dev(chan), "hwdesc: %pad\n", &node->hwdesc_phys);
+ dev_dbg(chan2dev(chan), "CTCR: 0x%08x\n", node->hwdesc->ctcr);
+ dev_dbg(chan2dev(chan), "CBNDTR: 0x%08x\n", node->hwdesc->cbndtr);
+ dev_dbg(chan2dev(chan), "CSAR: 0x%08x\n", node->hwdesc->csar);
+ dev_dbg(chan2dev(chan), "CDAR: 0x%08x\n", node->hwdesc->cdar);
+ dev_dbg(chan2dev(chan), "CBRUR: 0x%08x\n", node->hwdesc->cbrur);
+ dev_dbg(chan2dev(chan), "CLAR: 0x%08x\n", node->hwdesc->clar);
+ dev_dbg(chan2dev(chan), "CTBR: 0x%08x\n", node->hwdesc->ctbr);
+ dev_dbg(chan2dev(chan), "CMAR: 0x%08x\n", node->hwdesc->cmar);
+ dev_dbg(chan2dev(chan), "CMDR: 0x%08x\n\n", node->hwdesc->cmdr);
+}
+
+static void stm32_mdma_setup_hwdesc(struct stm32_mdma_chan *chan,
+ struct stm32_mdma_desc *desc,
+ enum dma_transfer_direction dir, u32 count,
+ dma_addr_t src_addr, dma_addr_t dst_addr,
+ u32 len, u32 ctcr, u32 ctbr, bool is_last,
+ bool is_first, bool is_cyclic)
+{
+ struct stm32_mdma_chan_config *config = &chan->chan_config;
+ struct stm32_mdma_hwdesc *hwdesc;
+ u32 next = count + 1;
+
+ hwdesc = desc->node[count].hwdesc;
+ hwdesc->ctcr = ctcr;
+ hwdesc->cbndtr &= ~(STM32_MDMA_CBNDTR_BRC_MK |
+ STM32_MDMA_CBNDTR_BRDUM |
+ STM32_MDMA_CBNDTR_BRSUM |
+ STM32_MDMA_CBNDTR_BNDT_MASK);
+ hwdesc->cbndtr |= STM32_MDMA_CBNDTR_BNDT(len);
+ hwdesc->csar = src_addr;
+ hwdesc->cdar = dst_addr;
+ hwdesc->cbrur = 0;
+ hwdesc->ctbr = ctbr;
+ hwdesc->cmar = config->mask_addr;
+ hwdesc->cmdr = config->mask_data;
+
+ if (is_last) {
+ if (is_cyclic)
+ hwdesc->clar = desc->node[0].hwdesc_phys;
+ else
+ hwdesc->clar = 0;
+ } else {
+ hwdesc->clar = desc->node[next].hwdesc_phys;
+ }
+
+ stm32_mdma_dump_hwdesc(chan, &desc->node[count]);
+}
+
+static int stm32_mdma_setup_xfer(struct stm32_mdma_chan *chan,
+ struct stm32_mdma_desc *desc,
+ struct scatterlist *sgl, u32 sg_len,
+ enum dma_transfer_direction direction)
+{
+ struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+ struct dma_slave_config *dma_config = &chan->dma_config;
+ struct stm32_mdma_chan_config *chan_config = &chan->chan_config;
+ struct scatterlist *sg;
+ dma_addr_t src_addr, dst_addr;
+ u32 m2m_hw_period, ccr, ctcr, ctbr;
+ int i, ret = 0;
+
+ if (chan_config->m2m_hw)
+ m2m_hw_period = sg_dma_len(sgl);
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ if (sg_dma_len(sg) > STM32_MDMA_MAX_BLOCK_LEN) {
+ dev_err(chan2dev(chan), "Invalid block len\n");
+ return -EINVAL;
+ }
+
+ if (direction == DMA_MEM_TO_DEV) {
+ src_addr = sg_dma_address(sg);
+ dst_addr = dma_config->dst_addr;
+ if (chan_config->m2m_hw && (i & 1))
+ dst_addr += m2m_hw_period;
+ ret = stm32_mdma_set_xfer_param(chan, direction, &ccr,
+ &ctcr, &ctbr, src_addr,
+ sg_dma_len(sg));
+ stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_SBUS,
+ src_addr);
+ } else {
+ src_addr = dma_config->src_addr;
+ if (chan_config->m2m_hw && (i & 1))
+ src_addr += m2m_hw_period;
+ dst_addr = sg_dma_address(sg);
+ ret = stm32_mdma_set_xfer_param(chan, direction, &ccr,
+ &ctcr, &ctbr, dst_addr,
+ sg_dma_len(sg));
+ stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_DBUS,
+ dst_addr);
+ }
+
+ if (ret < 0)
+ return ret;
+
+ stm32_mdma_setup_hwdesc(chan, desc, direction, i, src_addr,
+ dst_addr, sg_dma_len(sg), ctcr, ctbr,
+ i == sg_len - 1, i == 0, false);
+ }
+
+ /* Enable interrupts */
+ ccr &= ~STM32_MDMA_CCR_IRQ_MASK;
+ ccr |= STM32_MDMA_CCR_TEIE | STM32_MDMA_CCR_CTCIE;
+ desc->ccr = ccr;
+
+ return 0;
+}
+
+static struct dma_async_tx_descriptor *
+stm32_mdma_prep_slave_sg(struct dma_chan *c, struct scatterlist *sgl,
+ u32 sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+ struct stm32_mdma_chan_config *chan_config = &chan->chan_config;
+ struct stm32_mdma_desc *desc;
+ int i, ret;
+
+ /*
+ * Once DMA is in setup cyclic mode the channel we cannot assign this
+ * channel anymore. The DMA channel needs to be aborted or terminated
+ * for allowing another request.
+ */
+ if (chan->desc && chan->desc->cyclic) {
+ dev_err(chan2dev(chan),
+ "Request not allowed when dma in cyclic mode\n");
+ return NULL;
+ }
+
+ desc = stm32_mdma_alloc_desc(chan, sg_len);
+ if (!desc)
+ return NULL;
+
+ ret = stm32_mdma_setup_xfer(chan, desc, sgl, sg_len, direction);
+ if (ret < 0)
+ goto xfer_setup_err;
+
+ /*
+ * In case of M2M HW transfer triggered by STM32 DMA, we do not have to clear the
+ * transfer complete flag by hardware in order to let the CPU rearm the STM32 DMA
+ * with the next sg element and update some data in dmaengine framework.
+ */
+ if (chan_config->m2m_hw && direction == DMA_MEM_TO_DEV) {
+ struct stm32_mdma_hwdesc *hwdesc;
+
+ for (i = 0; i < sg_len; i++) {
+ hwdesc = desc->node[i].hwdesc;
+ hwdesc->cmar = 0;
+ hwdesc->cmdr = 0;
+ }
+ }
+
+ desc->cyclic = false;
+
+ return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+
+xfer_setup_err:
+ for (i = 0; i < desc->count; i++)
+ dma_pool_free(chan->desc_pool, desc->node[i].hwdesc,
+ desc->node[i].hwdesc_phys);
+ kfree(desc);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+stm32_mdma_prep_dma_cyclic(struct dma_chan *c, dma_addr_t buf_addr,
+ size_t buf_len, size_t period_len,
+ enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+ struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+ struct dma_slave_config *dma_config = &chan->dma_config;
+ struct stm32_mdma_chan_config *chan_config = &chan->chan_config;
+ struct stm32_mdma_desc *desc;
+ dma_addr_t src_addr, dst_addr;
+ u32 ccr, ctcr, ctbr, count;
+ int i, ret;
+
+ /*
+ * Once DMA is in setup cyclic mode the channel we cannot assign this
+ * channel anymore. The DMA channel needs to be aborted or terminated
+ * for allowing another request.
+ */
+ if (chan->desc && chan->desc->cyclic) {
+ dev_err(chan2dev(chan),
+ "Request not allowed when dma in cyclic mode\n");
+ return NULL;
+ }
+
+ if (!buf_len || !period_len || period_len > STM32_MDMA_MAX_BLOCK_LEN) {
+ dev_err(chan2dev(chan), "Invalid buffer/period len\n");
+ return NULL;
+ }
+
+ if (buf_len % period_len) {
+ dev_err(chan2dev(chan), "buf_len not multiple of period_len\n");
+ return NULL;
+ }
+
+ count = buf_len / period_len;
+
+ desc = stm32_mdma_alloc_desc(chan, count);
+ if (!desc)
+ return NULL;
+
+ /* Select bus */
+ if (direction == DMA_MEM_TO_DEV) {
+ src_addr = buf_addr;
+ ret = stm32_mdma_set_xfer_param(chan, direction, &ccr, &ctcr,
+ &ctbr, src_addr, period_len);
+ stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_SBUS,
+ src_addr);
+ } else {
+ dst_addr = buf_addr;
+ ret = stm32_mdma_set_xfer_param(chan, direction, &ccr, &ctcr,
+ &ctbr, dst_addr, period_len);
+ stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_DBUS,
+ dst_addr);
+ }
+
+ if (ret < 0)
+ goto xfer_setup_err;
+
+ /* Enable interrupts */
+ ccr &= ~STM32_MDMA_CCR_IRQ_MASK;
+ ccr |= STM32_MDMA_CCR_TEIE | STM32_MDMA_CCR_CTCIE | STM32_MDMA_CCR_BTIE;
+ desc->ccr = ccr;
+
+ /* Configure hwdesc list */
+ for (i = 0; i < count; i++) {
+ if (direction == DMA_MEM_TO_DEV) {
+ src_addr = buf_addr + i * period_len;
+ dst_addr = dma_config->dst_addr;
+ if (chan_config->m2m_hw && (i & 1))
+ dst_addr += period_len;
+ } else {
+ src_addr = dma_config->src_addr;
+ if (chan_config->m2m_hw && (i & 1))
+ src_addr += period_len;
+ dst_addr = buf_addr + i * period_len;
+ }
+
+ stm32_mdma_setup_hwdesc(chan, desc, direction, i, src_addr,
+ dst_addr, period_len, ctcr, ctbr,
+ i == count - 1, i == 0, true);
+ }
+
+ desc->cyclic = true;
+
+ return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+
+xfer_setup_err:
+ for (i = 0; i < desc->count; i++)
+ dma_pool_free(chan->desc_pool, desc->node[i].hwdesc,
+ desc->node[i].hwdesc_phys);
+ kfree(desc);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+stm32_mdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+ struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+ enum dma_slave_buswidth max_width;
+ struct stm32_mdma_desc *desc;
+ struct stm32_mdma_hwdesc *hwdesc;
+ u32 ccr, ctcr, ctbr, cbndtr, count, max_burst, mdma_burst;
+ u32 best_burst, tlen;
+ size_t xfer_count, offset;
+ int src_bus_width, dst_bus_width;
+ int i;
+
+ /*
+ * Once DMA is in setup cyclic mode the channel we cannot assign this
+ * channel anymore. The DMA channel needs to be aborted or terminated
+ * to allow another request
+ */
+ if (chan->desc && chan->desc->cyclic) {
+ dev_err(chan2dev(chan),
+ "Request not allowed when dma in cyclic mode\n");
+ return NULL;
+ }
+
+ count = DIV_ROUND_UP(len, STM32_MDMA_MAX_BLOCK_LEN);
+ desc = stm32_mdma_alloc_desc(chan, count);
+ if (!desc)
+ return NULL;
+
+ ccr = stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)) & ~STM32_MDMA_CCR_EN;
+ ctcr = stm32_mdma_read(dmadev, STM32_MDMA_CTCR(chan->id));
+ ctbr = stm32_mdma_read(dmadev, STM32_MDMA_CTBR(chan->id));
+ cbndtr = stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id));
+
+ /* Enable sw req, some interrupts and clear other bits */
+ ccr &= ~(STM32_MDMA_CCR_WEX | STM32_MDMA_CCR_HEX |
+ STM32_MDMA_CCR_BEX | STM32_MDMA_CCR_PL_MASK |
+ STM32_MDMA_CCR_IRQ_MASK);
+ ccr |= STM32_MDMA_CCR_TEIE;
+
+ /* Enable SW request mode, dest/src inc and clear other bits */
+ ctcr &= ~(STM32_MDMA_CTCR_BWM | STM32_MDMA_CTCR_TRGM_MSK |
+ STM32_MDMA_CTCR_PAM_MASK | STM32_MDMA_CTCR_PKE |
+ STM32_MDMA_CTCR_TLEN_MSK | STM32_MDMA_CTCR_DBURST_MASK |
+ STM32_MDMA_CTCR_SBURST_MASK | STM32_MDMA_CTCR_DINCOS_MASK |
+ STM32_MDMA_CTCR_SINCOS_MASK | STM32_MDMA_CTCR_DSIZE_MASK |
+ STM32_MDMA_CTCR_SSIZE_MASK | STM32_MDMA_CTCR_DINC_MASK |
+ STM32_MDMA_CTCR_SINC_MASK);
+ ctcr |= STM32_MDMA_CTCR_SWRM | STM32_MDMA_CTCR_SINC(STM32_MDMA_INC) |
+ STM32_MDMA_CTCR_DINC(STM32_MDMA_INC);
+
+ /* Reset HW request */
+ ctbr &= ~STM32_MDMA_CTBR_TSEL_MASK;
+
+ /* Select bus */
+ stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_SBUS, src);
+ stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_DBUS, dest);
+
+ /* Clear CBNDTR registers */
+ cbndtr &= ~(STM32_MDMA_CBNDTR_BRC_MK | STM32_MDMA_CBNDTR_BRDUM |
+ STM32_MDMA_CBNDTR_BRSUM | STM32_MDMA_CBNDTR_BNDT_MASK);
+
+ if (len <= STM32_MDMA_MAX_BLOCK_LEN) {
+ cbndtr |= STM32_MDMA_CBNDTR_BNDT(len);
+ if (len <= STM32_MDMA_MAX_BUF_LEN) {
+ /* Setup a buffer transfer */
+ ccr |= STM32_MDMA_CCR_TCIE | STM32_MDMA_CCR_CTCIE;
+ ctcr |= STM32_MDMA_CTCR_TRGM(STM32_MDMA_BUFFER);
+ } else {
+ /* Setup a block transfer */
+ ccr |= STM32_MDMA_CCR_BTIE | STM32_MDMA_CCR_CTCIE;
+ ctcr |= STM32_MDMA_CTCR_TRGM(STM32_MDMA_BLOCK);
+ }
+
+ tlen = STM32_MDMA_MAX_BUF_LEN;
+ ctcr |= STM32_MDMA_CTCR_TLEN((tlen - 1));
+
+ /* Set source best burst size */
+ max_width = stm32_mdma_get_max_width(src, len, tlen);
+ src_bus_width = stm32_mdma_get_width(chan, max_width);
+
+ max_burst = tlen / max_width;
+ best_burst = stm32_mdma_get_best_burst(len, tlen, max_burst,
+ max_width);
+ mdma_burst = ilog2(best_burst);
+
+ ctcr |= STM32_MDMA_CTCR_SBURST(mdma_burst) |
+ STM32_MDMA_CTCR_SSIZE(src_bus_width) |
+ STM32_MDMA_CTCR_SINCOS(src_bus_width);
+
+ /* Set destination best burst size */
+ max_width = stm32_mdma_get_max_width(dest, len, tlen);
+ dst_bus_width = stm32_mdma_get_width(chan, max_width);
+
+ max_burst = tlen / max_width;
+ best_burst = stm32_mdma_get_best_burst(len, tlen, max_burst,
+ max_width);
+ mdma_burst = ilog2(best_burst);
+
+ ctcr |= STM32_MDMA_CTCR_DBURST(mdma_burst) |
+ STM32_MDMA_CTCR_DSIZE(dst_bus_width) |
+ STM32_MDMA_CTCR_DINCOS(dst_bus_width);
+
+ if (dst_bus_width != src_bus_width)
+ ctcr |= STM32_MDMA_CTCR_PKE;
+
+ /* Prepare hardware descriptor */
+ hwdesc = desc->node[0].hwdesc;
+ hwdesc->ctcr = ctcr;
+ hwdesc->cbndtr = cbndtr;
+ hwdesc->csar = src;
+ hwdesc->cdar = dest;
+ hwdesc->cbrur = 0;
+ hwdesc->clar = 0;
+ hwdesc->ctbr = ctbr;
+ hwdesc->cmar = 0;
+ hwdesc->cmdr = 0;
+
+ stm32_mdma_dump_hwdesc(chan, &desc->node[0]);
+ } else {
+ /* Setup a LLI transfer */
+ ctcr |= STM32_MDMA_CTCR_TRGM(STM32_MDMA_LINKED_LIST) |
+ STM32_MDMA_CTCR_TLEN((STM32_MDMA_MAX_BUF_LEN - 1));
+ ccr |= STM32_MDMA_CCR_BTIE | STM32_MDMA_CCR_CTCIE;
+ tlen = STM32_MDMA_MAX_BUF_LEN;
+
+ for (i = 0, offset = 0; offset < len;
+ i++, offset += xfer_count) {
+ xfer_count = min_t(size_t, len - offset,
+ STM32_MDMA_MAX_BLOCK_LEN);
+
+ /* Set source best burst size */
+ max_width = stm32_mdma_get_max_width(src, len, tlen);
+ src_bus_width = stm32_mdma_get_width(chan, max_width);
+
+ max_burst = tlen / max_width;
+ best_burst = stm32_mdma_get_best_burst(len, tlen,
+ max_burst,
+ max_width);
+ mdma_burst = ilog2(best_burst);
+
+ ctcr |= STM32_MDMA_CTCR_SBURST(mdma_burst) |
+ STM32_MDMA_CTCR_SSIZE(src_bus_width) |
+ STM32_MDMA_CTCR_SINCOS(src_bus_width);
+
+ /* Set destination best burst size */
+ max_width = stm32_mdma_get_max_width(dest, len, tlen);
+ dst_bus_width = stm32_mdma_get_width(chan, max_width);
+
+ max_burst = tlen / max_width;
+ best_burst = stm32_mdma_get_best_burst(len, tlen,
+ max_burst,
+ max_width);
+ mdma_burst = ilog2(best_burst);
+
+ ctcr |= STM32_MDMA_CTCR_DBURST(mdma_burst) |
+ STM32_MDMA_CTCR_DSIZE(dst_bus_width) |
+ STM32_MDMA_CTCR_DINCOS(dst_bus_width);
+
+ if (dst_bus_width != src_bus_width)
+ ctcr |= STM32_MDMA_CTCR_PKE;
+
+ /* Prepare hardware descriptor */
+ stm32_mdma_setup_hwdesc(chan, desc, DMA_MEM_TO_MEM, i,
+ src + offset, dest + offset,
+ xfer_count, ctcr, ctbr,
+ i == count - 1, i == 0, false);
+ }
+ }
+
+ desc->ccr = ccr;
+
+ desc->cyclic = false;
+
+ return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static void stm32_mdma_dump_reg(struct stm32_mdma_chan *chan)
+{
+ struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+
+ dev_dbg(chan2dev(chan), "CCR: 0x%08x\n",
+ stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)));
+ dev_dbg(chan2dev(chan), "CTCR: 0x%08x\n",
+ stm32_mdma_read(dmadev, STM32_MDMA_CTCR(chan->id)));
+ dev_dbg(chan2dev(chan), "CBNDTR: 0x%08x\n",
+ stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id)));
+ dev_dbg(chan2dev(chan), "CSAR: 0x%08x\n",
+ stm32_mdma_read(dmadev, STM32_MDMA_CSAR(chan->id)));
+ dev_dbg(chan2dev(chan), "CDAR: 0x%08x\n",
+ stm32_mdma_read(dmadev, STM32_MDMA_CDAR(chan->id)));
+ dev_dbg(chan2dev(chan), "CBRUR: 0x%08x\n",
+ stm32_mdma_read(dmadev, STM32_MDMA_CBRUR(chan->id)));
+ dev_dbg(chan2dev(chan), "CLAR: 0x%08x\n",
+ stm32_mdma_read(dmadev, STM32_MDMA_CLAR(chan->id)));
+ dev_dbg(chan2dev(chan), "CTBR: 0x%08x\n",
+ stm32_mdma_read(dmadev, STM32_MDMA_CTBR(chan->id)));
+ dev_dbg(chan2dev(chan), "CMAR: 0x%08x\n",
+ stm32_mdma_read(dmadev, STM32_MDMA_CMAR(chan->id)));
+ dev_dbg(chan2dev(chan), "CMDR: 0x%08x\n",
+ stm32_mdma_read(dmadev, STM32_MDMA_CMDR(chan->id)));
+}
+
+static void stm32_mdma_start_transfer(struct stm32_mdma_chan *chan)
+{
+ struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+ struct virt_dma_desc *vdesc;
+ struct stm32_mdma_hwdesc *hwdesc;
+ u32 id = chan->id;
+ u32 status, reg;
+
+ vdesc = vchan_next_desc(&chan->vchan);
+ if (!vdesc) {
+ chan->desc = NULL;
+ return;
+ }
+
+ list_del(&vdesc->node);
+
+ chan->desc = to_stm32_mdma_desc(vdesc);
+ hwdesc = chan->desc->node[0].hwdesc;
+ chan->curr_hwdesc = 0;
+
+ stm32_mdma_write(dmadev, STM32_MDMA_CCR(id), chan->desc->ccr);
+ stm32_mdma_write(dmadev, STM32_MDMA_CTCR(id), hwdesc->ctcr);
+ stm32_mdma_write(dmadev, STM32_MDMA_CBNDTR(id), hwdesc->cbndtr);
+ stm32_mdma_write(dmadev, STM32_MDMA_CSAR(id), hwdesc->csar);
+ stm32_mdma_write(dmadev, STM32_MDMA_CDAR(id), hwdesc->cdar);
+ stm32_mdma_write(dmadev, STM32_MDMA_CBRUR(id), hwdesc->cbrur);
+ stm32_mdma_write(dmadev, STM32_MDMA_CLAR(id), hwdesc->clar);
+ stm32_mdma_write(dmadev, STM32_MDMA_CTBR(id), hwdesc->ctbr);
+ stm32_mdma_write(dmadev, STM32_MDMA_CMAR(id), hwdesc->cmar);
+ stm32_mdma_write(dmadev, STM32_MDMA_CMDR(id), hwdesc->cmdr);
+
+ /* Clear interrupt status if it is there */
+ status = stm32_mdma_read(dmadev, STM32_MDMA_CISR(id));
+ if (status)
+ stm32_mdma_set_bits(dmadev, STM32_MDMA_CIFCR(id), status);
+
+ stm32_mdma_dump_reg(chan);
+
+ /* Start DMA */
+ stm32_mdma_set_bits(dmadev, STM32_MDMA_CCR(id), STM32_MDMA_CCR_EN);
+
+ /* Set SW request in case of MEM2MEM transfer */
+ if (hwdesc->ctcr & STM32_MDMA_CTCR_SWRM) {
+ reg = STM32_MDMA_CCR(id);
+ stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CCR_SWRQ);
+ }
+
+ chan->busy = true;
+
+ dev_dbg(chan2dev(chan), "vchan %pK: started\n", &chan->vchan);
+}
+
+static void stm32_mdma_issue_pending(struct dma_chan *c)
+{
+ struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+
+ if (!vchan_issue_pending(&chan->vchan))
+ goto end;
+
+ dev_dbg(chan2dev(chan), "vchan %pK: issued\n", &chan->vchan);
+
+ if (!chan->desc && !chan->busy)
+ stm32_mdma_start_transfer(chan);
+
+end:
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+}
+
+static int stm32_mdma_pause(struct dma_chan *c)
+{
+ struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+ ret = stm32_mdma_disable_chan(chan);
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+ if (!ret)
+ dev_dbg(chan2dev(chan), "vchan %pK: pause\n", &chan->vchan);
+
+ return ret;
+}
+
+static int stm32_mdma_resume(struct dma_chan *c)
+{
+ struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+ struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+ struct stm32_mdma_hwdesc *hwdesc;
+ unsigned long flags;
+ u32 status, reg;
+
+ /* Transfer can be terminated */
+ if (!chan->desc || (stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)) & STM32_MDMA_CCR_EN))
+ return -EPERM;
+
+ hwdesc = chan->desc->node[chan->curr_hwdesc].hwdesc;
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+
+ /* Re-configure control register */
+ stm32_mdma_write(dmadev, STM32_MDMA_CCR(chan->id), chan->desc->ccr);
+
+ /* Clear interrupt status if it is there */
+ status = stm32_mdma_read(dmadev, STM32_MDMA_CISR(chan->id));
+ if (status)
+ stm32_mdma_set_bits(dmadev, STM32_MDMA_CIFCR(chan->id), status);
+
+ stm32_mdma_dump_reg(chan);
+
+ /* Re-start DMA */
+ reg = STM32_MDMA_CCR(chan->id);
+ stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CCR_EN);
+
+ /* Set SW request in case of MEM2MEM transfer */
+ if (hwdesc->ctcr & STM32_MDMA_CTCR_SWRM)
+ stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CCR_SWRQ);
+
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+ dev_dbg(chan2dev(chan), "vchan %pK: resume\n", &chan->vchan);
+
+ return 0;
+}
+
+static int stm32_mdma_terminate_all(struct dma_chan *c)
+{
+ struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+ if (chan->desc) {
+ vchan_terminate_vdesc(&chan->desc->vdesc);
+ if (chan->busy)
+ stm32_mdma_stop(chan);
+ chan->desc = NULL;
+ }
+ vchan_get_all_descriptors(&chan->vchan, &head);
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+ vchan_dma_desc_free_list(&chan->vchan, &head);
+
+ return 0;
+}
+
+static void stm32_mdma_synchronize(struct dma_chan *c)
+{
+ struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+
+ vchan_synchronize(&chan->vchan);
+}
+
+static int stm32_mdma_slave_config(struct dma_chan *c,
+ struct dma_slave_config *config)
+{
+ struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+
+ memcpy(&chan->dma_config, config, sizeof(*config));
+
+ /* Check if user is requesting STM32 DMA to trigger MDMA */
+ if (config->peripheral_size) {
+ struct stm32_mdma_dma_config *mdma_config;
+
+ mdma_config = (struct stm32_mdma_dma_config *)chan->dma_config.peripheral_config;
+ chan->chan_config.request = mdma_config->request;
+ chan->chan_config.mask_addr = mdma_config->cmar;
+ chan->chan_config.mask_data = mdma_config->cmdr;
+ chan->chan_config.m2m_hw = true;
+ }
+
+ return 0;
+}
+
+static size_t stm32_mdma_desc_residue(struct stm32_mdma_chan *chan,
+ struct stm32_mdma_desc *desc,
+ u32 curr_hwdesc,
+ struct dma_tx_state *state)
+{
+ struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+ struct stm32_mdma_hwdesc *hwdesc;
+ u32 cisr, clar, cbndtr, residue, modulo, burst_size;
+ int i;
+
+ cisr = stm32_mdma_read(dmadev, STM32_MDMA_CISR(chan->id));
+
+ residue = 0;
+ /* Get the next hw descriptor to process from current transfer */
+ clar = stm32_mdma_read(dmadev, STM32_MDMA_CLAR(chan->id));
+ for (i = desc->count - 1; i >= 0; i--) {
+ hwdesc = desc->node[i].hwdesc;
+
+ if (hwdesc->clar == clar)
+ break;/* Current transfer found, stop cumulating */
+
+ /* Cumulate residue of unprocessed hw descriptors */
+ residue += STM32_MDMA_CBNDTR_BNDT(hwdesc->cbndtr);
+ }
+ cbndtr = stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id));
+ residue += cbndtr & STM32_MDMA_CBNDTR_BNDT_MASK;
+
+ state->in_flight_bytes = 0;
+ if (chan->chan_config.m2m_hw && (cisr & STM32_MDMA_CISR_CRQA))
+ state->in_flight_bytes = cbndtr & STM32_MDMA_CBNDTR_BNDT_MASK;
+
+ if (!chan->mem_burst)
+ return residue;
+
+ burst_size = chan->mem_burst * chan->mem_width;
+ modulo = residue % burst_size;
+ if (modulo)
+ residue = residue - modulo + burst_size;
+
+ return residue;
+}
+
+static enum dma_status stm32_mdma_tx_status(struct dma_chan *c,
+ dma_cookie_t cookie,
+ struct dma_tx_state *state)
+{
+ struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+ struct virt_dma_desc *vdesc;
+ enum dma_status status;
+ unsigned long flags;
+ u32 residue = 0;
+
+ status = dma_cookie_status(c, cookie, state);
+ if ((status == DMA_COMPLETE) || (!state))
+ return status;
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+
+ vdesc = vchan_find_desc(&chan->vchan, cookie);
+ if (chan->desc && cookie == chan->desc->vdesc.tx.cookie)
+ residue = stm32_mdma_desc_residue(chan, chan->desc, chan->curr_hwdesc, state);
+ else if (vdesc)
+ residue = stm32_mdma_desc_residue(chan, to_stm32_mdma_desc(vdesc), 0, state);
+
+ dma_set_residue(state, residue);
+
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+ return status;
+}
+
+static void stm32_mdma_xfer_end(struct stm32_mdma_chan *chan)
+{
+ vchan_cookie_complete(&chan->desc->vdesc);
+ chan->desc = NULL;
+ chan->busy = false;
+
+ /* Start the next transfer if this driver has a next desc */
+ stm32_mdma_start_transfer(chan);
+}
+
+static irqreturn_t stm32_mdma_irq_handler(int irq, void *devid)
+{
+ struct stm32_mdma_device *dmadev = devid;
+ struct stm32_mdma_chan *chan;
+ u32 reg, id, ccr, ien, status;
+
+ /* Find out which channel generates the interrupt */
+ status = readl_relaxed(dmadev->base + STM32_MDMA_GISR0);
+ if (!status) {
+ dev_dbg(mdma2dev(dmadev), "spurious it\n");
+ return IRQ_NONE;
+ }
+ id = __ffs(status);
+ chan = &dmadev->chan[id];
+
+ /* Handle interrupt for the channel */
+ spin_lock(&chan->vchan.lock);
+ status = stm32_mdma_read(dmadev, STM32_MDMA_CISR(id));
+ /* Mask Channel ReQuest Active bit which can be set in case of MEM2MEM */
+ status &= ~STM32_MDMA_CISR_CRQA;
+ ccr = stm32_mdma_read(dmadev, STM32_MDMA_CCR(id));
+ ien = (ccr & STM32_MDMA_CCR_IRQ_MASK) >> 1;
+
+ if (!(status & ien)) {
+ spin_unlock(&chan->vchan.lock);
+ if (chan->busy)
+ dev_warn(chan2dev(chan),
+ "spurious it (status=0x%04x, ien=0x%04x)\n", status, ien);
+ else
+ dev_dbg(chan2dev(chan),
+ "spurious it (status=0x%04x, ien=0x%04x)\n", status, ien);
+ return IRQ_NONE;
+ }
+
+ reg = STM32_MDMA_CIFCR(id);
+
+ if (status & STM32_MDMA_CISR_TEIF) {
+ dev_err(chan2dev(chan), "Transfer Err: stat=0x%08x\n",
+ readl_relaxed(dmadev->base + STM32_MDMA_CESR(id)));
+ stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CTEIF);
+ status &= ~STM32_MDMA_CISR_TEIF;
+ }
+
+ if (status & STM32_MDMA_CISR_CTCIF) {
+ stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CCTCIF);
+ status &= ~STM32_MDMA_CISR_CTCIF;
+ stm32_mdma_xfer_end(chan);
+ }
+
+ if (status & STM32_MDMA_CISR_BRTIF) {
+ stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CBRTIF);
+ status &= ~STM32_MDMA_CISR_BRTIF;
+ }
+
+ if (status & STM32_MDMA_CISR_BTIF) {
+ stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CBTIF);
+ status &= ~STM32_MDMA_CISR_BTIF;
+ chan->curr_hwdesc++;
+ if (chan->desc && chan->desc->cyclic) {
+ if (chan->curr_hwdesc == chan->desc->count)
+ chan->curr_hwdesc = 0;
+ vchan_cyclic_callback(&chan->desc->vdesc);
+ }
+ }
+
+ if (status & STM32_MDMA_CISR_TCIF) {
+ stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CLTCIF);
+ status &= ~STM32_MDMA_CISR_TCIF;
+ }
+
+ if (status) {
+ stm32_mdma_set_bits(dmadev, reg, status);
+ dev_err(chan2dev(chan), "DMA error: status=0x%08x\n", status);
+ if (!(ccr & STM32_MDMA_CCR_EN))
+ dev_err(chan2dev(chan), "chan disabled by HW\n");
+ }
+
+ spin_unlock(&chan->vchan.lock);
+
+ return IRQ_HANDLED;
+}
+
+static int stm32_mdma_alloc_chan_resources(struct dma_chan *c)
+{
+ struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+ struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+ int ret;
+
+ chan->desc_pool = dmam_pool_create(dev_name(&c->dev->device),
+ c->device->dev,
+ sizeof(struct stm32_mdma_hwdesc),
+ __alignof__(struct stm32_mdma_hwdesc),
+ 0);
+ if (!chan->desc_pool) {
+ dev_err(chan2dev(chan), "failed to allocate descriptor pool\n");
+ return -ENOMEM;
+ }
+
+ ret = pm_runtime_resume_and_get(dmadev->ddev.dev);
+ if (ret < 0)
+ return ret;
+
+ ret = stm32_mdma_disable_chan(chan);
+ if (ret < 0)
+ pm_runtime_put(dmadev->ddev.dev);
+
+ return ret;
+}
+
+static void stm32_mdma_free_chan_resources(struct dma_chan *c)
+{
+ struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+ struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+ unsigned long flags;
+
+ dev_dbg(chan2dev(chan), "Freeing channel %d\n", chan->id);
+
+ if (chan->busy) {
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+ stm32_mdma_stop(chan);
+ chan->desc = NULL;
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+ }
+
+ pm_runtime_put(dmadev->ddev.dev);
+ vchan_free_chan_resources(to_virt_chan(c));
+ dmam_pool_destroy(chan->desc_pool);
+ chan->desc_pool = NULL;
+}
+
+static bool stm32_mdma_filter_fn(struct dma_chan *c, void *fn_param)
+{
+ struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+ struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+
+ /* Check if chan is marked Secure */
+ if (dmadev->chan_reserved & BIT(chan->id))
+ return false;
+
+ return true;
+}
+
+static struct dma_chan *stm32_mdma_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct stm32_mdma_device *dmadev = ofdma->of_dma_data;
+ dma_cap_mask_t mask = dmadev->ddev.cap_mask;
+ struct stm32_mdma_chan *chan;
+ struct dma_chan *c;
+ struct stm32_mdma_chan_config config;
+
+ if (dma_spec->args_count < 5) {
+ dev_err(mdma2dev(dmadev), "Bad number of args\n");
+ return NULL;
+ }
+
+ memset(&config, 0, sizeof(config));
+ config.request = dma_spec->args[0];
+ config.priority_level = dma_spec->args[1];
+ config.transfer_config = dma_spec->args[2];
+ config.mask_addr = dma_spec->args[3];
+ config.mask_data = dma_spec->args[4];
+
+ if (config.request >= dmadev->nr_requests) {
+ dev_err(mdma2dev(dmadev), "Bad request line\n");
+ return NULL;
+ }
+
+ if (config.priority_level > STM32_MDMA_VERY_HIGH_PRIORITY) {
+ dev_err(mdma2dev(dmadev), "Priority level not supported\n");
+ return NULL;
+ }
+
+ c = __dma_request_channel(&mask, stm32_mdma_filter_fn, &config, ofdma->of_node);
+ if (!c) {
+ dev_err(mdma2dev(dmadev), "No more channels available\n");
+ return NULL;
+ }
+
+ chan = to_stm32_mdma_chan(c);
+ chan->chan_config = config;
+
+ return c;
+}
+
+static const struct of_device_id stm32_mdma_of_match[] = {
+ { .compatible = "st,stm32h7-mdma", },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, stm32_mdma_of_match);
+
+static int stm32_mdma_probe(struct platform_device *pdev)
+{
+ struct stm32_mdma_chan *chan;
+ struct stm32_mdma_device *dmadev;
+ struct dma_device *dd;
+ struct device_node *of_node;
+ struct resource *res;
+ struct reset_control *rst;
+ u32 nr_channels, nr_requests;
+ int i, count, ret;
+
+ of_node = pdev->dev.of_node;
+ if (!of_node)
+ return -ENODEV;
+
+ ret = device_property_read_u32(&pdev->dev, "dma-channels",
+ &nr_channels);
+ if (ret) {
+ nr_channels = STM32_MDMA_MAX_CHANNELS;
+ dev_warn(&pdev->dev, "MDMA defaulting on %i channels\n",
+ nr_channels);
+ }
+
+ ret = device_property_read_u32(&pdev->dev, "dma-requests",
+ &nr_requests);
+ if (ret) {
+ nr_requests = STM32_MDMA_MAX_REQUESTS;
+ dev_warn(&pdev->dev, "MDMA defaulting on %i request lines\n",
+ nr_requests);
+ }
+
+ count = device_property_count_u32(&pdev->dev, "st,ahb-addr-masks");
+ if (count < 0)
+ count = 0;
+
+ dmadev = devm_kzalloc(&pdev->dev,
+ struct_size(dmadev, ahb_addr_masks, count),
+ GFP_KERNEL);
+ if (!dmadev)
+ return -ENOMEM;
+
+ dmadev->nr_channels = nr_channels;
+ dmadev->nr_requests = nr_requests;
+ device_property_read_u32_array(&pdev->dev, "st,ahb-addr-masks",
+ dmadev->ahb_addr_masks,
+ count);
+ dmadev->nr_ahb_addr_masks = count;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ dmadev->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(dmadev->base))
+ return PTR_ERR(dmadev->base);
+
+ dmadev->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(dmadev->clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(dmadev->clk),
+ "Missing clock controller\n");
+
+ ret = clk_prepare_enable(dmadev->clk);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "clk_prep_enable error: %d\n", ret);
+ return ret;
+ }
+
+ rst = devm_reset_control_get(&pdev->dev, NULL);
+ if (IS_ERR(rst)) {
+ ret = PTR_ERR(rst);
+ if (ret == -EPROBE_DEFER)
+ goto err_clk;
+ } else {
+ reset_control_assert(rst);
+ udelay(2);
+ reset_control_deassert(rst);
+ }
+
+ dd = &dmadev->ddev;
+ dma_cap_set(DMA_SLAVE, dd->cap_mask);
+ dma_cap_set(DMA_PRIVATE, dd->cap_mask);
+ dma_cap_set(DMA_CYCLIC, dd->cap_mask);
+ dma_cap_set(DMA_MEMCPY, dd->cap_mask);
+ dd->device_alloc_chan_resources = stm32_mdma_alloc_chan_resources;
+ dd->device_free_chan_resources = stm32_mdma_free_chan_resources;
+ dd->device_tx_status = stm32_mdma_tx_status;
+ dd->device_issue_pending = stm32_mdma_issue_pending;
+ dd->device_prep_slave_sg = stm32_mdma_prep_slave_sg;
+ dd->device_prep_dma_cyclic = stm32_mdma_prep_dma_cyclic;
+ dd->device_prep_dma_memcpy = stm32_mdma_prep_dma_memcpy;
+ dd->device_config = stm32_mdma_slave_config;
+ dd->device_pause = stm32_mdma_pause;
+ dd->device_resume = stm32_mdma_resume;
+ dd->device_terminate_all = stm32_mdma_terminate_all;
+ dd->device_synchronize = stm32_mdma_synchronize;
+ dd->descriptor_reuse = true;
+
+ dd->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES);
+ dd->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES);
+ dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) |
+ BIT(DMA_MEM_TO_MEM);
+ dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ dd->max_burst = STM32_MDMA_MAX_BURST;
+ dd->dev = &pdev->dev;
+ INIT_LIST_HEAD(&dd->channels);
+
+ for (i = 0; i < dmadev->nr_channels; i++) {
+ chan = &dmadev->chan[i];
+ chan->id = i;
+
+ if (stm32_mdma_read(dmadev, STM32_MDMA_CCR(i)) & STM32_MDMA_CCR_SM)
+ dmadev->chan_reserved |= BIT(i);
+
+ chan->vchan.desc_free = stm32_mdma_desc_free;
+ vchan_init(&chan->vchan, dd);
+ }
+
+ dmadev->irq = platform_get_irq(pdev, 0);
+ if (dmadev->irq < 0) {
+ ret = dmadev->irq;
+ goto err_clk;
+ }
+
+ ret = devm_request_irq(&pdev->dev, dmadev->irq, stm32_mdma_irq_handler,
+ 0, dev_name(&pdev->dev), dmadev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to request IRQ\n");
+ goto err_clk;
+ }
+
+ ret = dmaenginem_async_device_register(dd);
+ if (ret)
+ goto err_clk;
+
+ ret = of_dma_controller_register(of_node, stm32_mdma_of_xlate, dmadev);
+ if (ret < 0) {
+ dev_err(&pdev->dev,
+ "STM32 MDMA DMA OF registration failed %d\n", ret);
+ goto err_clk;
+ }
+
+ platform_set_drvdata(pdev, dmadev);
+ pm_runtime_set_active(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
+ pm_runtime_get_noresume(&pdev->dev);
+ pm_runtime_put(&pdev->dev);
+
+ dev_info(&pdev->dev, "STM32 MDMA driver registered\n");
+
+ return 0;
+
+err_clk:
+ clk_disable_unprepare(dmadev->clk);
+
+ return ret;
+}
+
+#ifdef CONFIG_PM
+static int stm32_mdma_runtime_suspend(struct device *dev)
+{
+ struct stm32_mdma_device *dmadev = dev_get_drvdata(dev);
+
+ clk_disable_unprepare(dmadev->clk);
+
+ return 0;
+}
+
+static int stm32_mdma_runtime_resume(struct device *dev)
+{
+ struct stm32_mdma_device *dmadev = dev_get_drvdata(dev);
+ int ret;
+
+ ret = clk_prepare_enable(dmadev->clk);
+ if (ret) {
+ dev_err(dev, "failed to prepare_enable clock\n");
+ return ret;
+ }
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+static int stm32_mdma_pm_suspend(struct device *dev)
+{
+ struct stm32_mdma_device *dmadev = dev_get_drvdata(dev);
+ u32 ccr, id;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0)
+ return ret;
+
+ for (id = 0; id < dmadev->nr_channels; id++) {
+ ccr = stm32_mdma_read(dmadev, STM32_MDMA_CCR(id));
+ if (ccr & STM32_MDMA_CCR_EN) {
+ dev_warn(dev, "Suspend is prevented by Chan %i\n", id);
+ return -EBUSY;
+ }
+ }
+
+ pm_runtime_put_sync(dev);
+
+ pm_runtime_force_suspend(dev);
+
+ return 0;
+}
+
+static int stm32_mdma_pm_resume(struct device *dev)
+{
+ return pm_runtime_force_resume(dev);
+}
+#endif
+
+static const struct dev_pm_ops stm32_mdma_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(stm32_mdma_pm_suspend, stm32_mdma_pm_resume)
+ SET_RUNTIME_PM_OPS(stm32_mdma_runtime_suspend,
+ stm32_mdma_runtime_resume, NULL)
+};
+
+static struct platform_driver stm32_mdma_driver = {
+ .probe = stm32_mdma_probe,
+ .driver = {
+ .name = "stm32-mdma",
+ .of_match_table = stm32_mdma_of_match,
+ .pm = &stm32_mdma_pm_ops,
+ },
+};
+
+static int __init stm32_mdma_init(void)
+{
+ return platform_driver_register(&stm32_mdma_driver);
+}
+
+subsys_initcall(stm32_mdma_init);
+
+MODULE_DESCRIPTION("Driver for STM32 MDMA controller");
+MODULE_AUTHOR("M'boumba Cedric Madianga <cedric.madianga@gmail.com>");
+MODULE_AUTHOR("Pierre-Yves Mordret <pierre-yves.mordret@st.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c
new file mode 100644
index 000000000..f291b1b4d
--- /dev/null
+++ b/drivers/dma/sun4i-dma.c
@@ -0,0 +1,1310 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2014 Emilio López
+ * Emilio López <emilio@elopez.com.ar>
+ */
+
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "virt-dma.h"
+
+/** Common macros to normal and dedicated DMA registers **/
+
+#define SUN4I_DMA_CFG_LOADING BIT(31)
+#define SUN4I_DMA_CFG_DST_DATA_WIDTH(width) ((width) << 25)
+#define SUN4I_DMA_CFG_DST_BURST_LENGTH(len) ((len) << 23)
+#define SUN4I_DMA_CFG_DST_ADDR_MODE(mode) ((mode) << 21)
+#define SUN4I_DMA_CFG_DST_DRQ_TYPE(type) ((type) << 16)
+#define SUN4I_DMA_CFG_SRC_DATA_WIDTH(width) ((width) << 9)
+#define SUN4I_DMA_CFG_SRC_BURST_LENGTH(len) ((len) << 7)
+#define SUN4I_DMA_CFG_SRC_ADDR_MODE(mode) ((mode) << 5)
+#define SUN4I_DMA_CFG_SRC_DRQ_TYPE(type) (type)
+
+/** Normal DMA register values **/
+
+/* Normal DMA source/destination data request type values */
+#define SUN4I_NDMA_DRQ_TYPE_SDRAM 0x16
+#define SUN4I_NDMA_DRQ_TYPE_LIMIT (0x1F + 1)
+
+/** Normal DMA register layout **/
+
+/* Dedicated DMA source/destination address mode values */
+#define SUN4I_NDMA_ADDR_MODE_LINEAR 0
+#define SUN4I_NDMA_ADDR_MODE_IO 1
+
+/* Normal DMA configuration register layout */
+#define SUN4I_NDMA_CFG_CONT_MODE BIT(30)
+#define SUN4I_NDMA_CFG_WAIT_STATE(n) ((n) << 27)
+#define SUN4I_NDMA_CFG_DST_NON_SECURE BIT(22)
+#define SUN4I_NDMA_CFG_BYTE_COUNT_MODE_REMAIN BIT(15)
+#define SUN4I_NDMA_CFG_SRC_NON_SECURE BIT(6)
+
+/** Dedicated DMA register values **/
+
+/* Dedicated DMA source/destination address mode values */
+#define SUN4I_DDMA_ADDR_MODE_LINEAR 0
+#define SUN4I_DDMA_ADDR_MODE_IO 1
+#define SUN4I_DDMA_ADDR_MODE_HORIZONTAL_PAGE 2
+#define SUN4I_DDMA_ADDR_MODE_VERTICAL_PAGE 3
+
+/* Dedicated DMA source/destination data request type values */
+#define SUN4I_DDMA_DRQ_TYPE_SDRAM 0x1
+#define SUN4I_DDMA_DRQ_TYPE_LIMIT (0x1F + 1)
+
+/** Dedicated DMA register layout **/
+
+/* Dedicated DMA configuration register layout */
+#define SUN4I_DDMA_CFG_BUSY BIT(30)
+#define SUN4I_DDMA_CFG_CONT_MODE BIT(29)
+#define SUN4I_DDMA_CFG_DST_NON_SECURE BIT(28)
+#define SUN4I_DDMA_CFG_BYTE_COUNT_MODE_REMAIN BIT(15)
+#define SUN4I_DDMA_CFG_SRC_NON_SECURE BIT(12)
+
+/* Dedicated DMA parameter register layout */
+#define SUN4I_DDMA_PARA_DST_DATA_BLK_SIZE(n) (((n) - 1) << 24)
+#define SUN4I_DDMA_PARA_DST_WAIT_CYCLES(n) (((n) - 1) << 16)
+#define SUN4I_DDMA_PARA_SRC_DATA_BLK_SIZE(n) (((n) - 1) << 8)
+#define SUN4I_DDMA_PARA_SRC_WAIT_CYCLES(n) (((n) - 1) << 0)
+
+/** DMA register offsets **/
+
+/* General register offsets */
+#define SUN4I_DMA_IRQ_ENABLE_REG 0x0
+#define SUN4I_DMA_IRQ_PENDING_STATUS_REG 0x4
+
+/* Normal DMA register offsets */
+#define SUN4I_NDMA_CHANNEL_REG_BASE(n) (0x100 + (n) * 0x20)
+#define SUN4I_NDMA_CFG_REG 0x0
+#define SUN4I_NDMA_SRC_ADDR_REG 0x4
+#define SUN4I_NDMA_DST_ADDR_REG 0x8
+#define SUN4I_NDMA_BYTE_COUNT_REG 0xC
+
+/* Dedicated DMA register offsets */
+#define SUN4I_DDMA_CHANNEL_REG_BASE(n) (0x300 + (n) * 0x20)
+#define SUN4I_DDMA_CFG_REG 0x0
+#define SUN4I_DDMA_SRC_ADDR_REG 0x4
+#define SUN4I_DDMA_DST_ADDR_REG 0x8
+#define SUN4I_DDMA_BYTE_COUNT_REG 0xC
+#define SUN4I_DDMA_PARA_REG 0x18
+
+/** DMA Driver **/
+
+/*
+ * Normal DMA has 8 channels, and Dedicated DMA has another 8, so
+ * that's 16 channels. As for endpoints, there's 29 and 21
+ * respectively. Given that the Normal DMA endpoints (other than
+ * SDRAM) can be used as tx/rx, we need 78 vchans in total
+ */
+#define SUN4I_NDMA_NR_MAX_CHANNELS 8
+#define SUN4I_DDMA_NR_MAX_CHANNELS 8
+#define SUN4I_DMA_NR_MAX_CHANNELS \
+ (SUN4I_NDMA_NR_MAX_CHANNELS + SUN4I_DDMA_NR_MAX_CHANNELS)
+#define SUN4I_NDMA_NR_MAX_VCHANS (29 * 2 - 1)
+#define SUN4I_DDMA_NR_MAX_VCHANS 21
+#define SUN4I_DMA_NR_MAX_VCHANS \
+ (SUN4I_NDMA_NR_MAX_VCHANS + SUN4I_DDMA_NR_MAX_VCHANS)
+
+/* This set of SUN4I_DDMA timing parameters were found experimentally while
+ * working with the SPI driver and seem to make it behave correctly */
+#define SUN4I_DDMA_MAGIC_SPI_PARAMETERS \
+ (SUN4I_DDMA_PARA_DST_DATA_BLK_SIZE(1) | \
+ SUN4I_DDMA_PARA_SRC_DATA_BLK_SIZE(1) | \
+ SUN4I_DDMA_PARA_DST_WAIT_CYCLES(2) | \
+ SUN4I_DDMA_PARA_SRC_WAIT_CYCLES(2))
+
+/*
+ * Normal DMA supports individual transfers (segments) up to 128k.
+ * Dedicated DMA supports transfers up to 16M. We can only report
+ * one size limit, so we have to use the smaller value.
+ */
+#define SUN4I_NDMA_MAX_SEG_SIZE SZ_128K
+#define SUN4I_DDMA_MAX_SEG_SIZE SZ_16M
+#define SUN4I_DMA_MAX_SEG_SIZE SUN4I_NDMA_MAX_SEG_SIZE
+
+struct sun4i_dma_pchan {
+ /* Register base of channel */
+ void __iomem *base;
+ /* vchan currently being serviced */
+ struct sun4i_dma_vchan *vchan;
+ /* Is this a dedicated pchan? */
+ int is_dedicated;
+};
+
+struct sun4i_dma_vchan {
+ struct virt_dma_chan vc;
+ struct dma_slave_config cfg;
+ struct sun4i_dma_pchan *pchan;
+ struct sun4i_dma_promise *processing;
+ struct sun4i_dma_contract *contract;
+ u8 endpoint;
+ int is_dedicated;
+};
+
+struct sun4i_dma_promise {
+ u32 cfg;
+ u32 para;
+ dma_addr_t src;
+ dma_addr_t dst;
+ size_t len;
+ struct list_head list;
+};
+
+/* A contract is a set of promises */
+struct sun4i_dma_contract {
+ struct virt_dma_desc vd;
+ struct list_head demands;
+ struct list_head completed_demands;
+ bool is_cyclic : 1;
+ bool use_half_int : 1;
+};
+
+struct sun4i_dma_dev {
+ DECLARE_BITMAP(pchans_used, SUN4I_DMA_NR_MAX_CHANNELS);
+ struct dma_device slave;
+ struct sun4i_dma_pchan *pchans;
+ struct sun4i_dma_vchan *vchans;
+ void __iomem *base;
+ struct clk *clk;
+ int irq;
+ spinlock_t lock;
+};
+
+static struct sun4i_dma_dev *to_sun4i_dma_dev(struct dma_device *dev)
+{
+ return container_of(dev, struct sun4i_dma_dev, slave);
+}
+
+static struct sun4i_dma_vchan *to_sun4i_dma_vchan(struct dma_chan *chan)
+{
+ return container_of(chan, struct sun4i_dma_vchan, vc.chan);
+}
+
+static struct sun4i_dma_contract *to_sun4i_dma_contract(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct sun4i_dma_contract, vd);
+}
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+ return &chan->dev->device;
+}
+
+static int convert_burst(u32 maxburst)
+{
+ if (maxburst > 8)
+ return -EINVAL;
+
+ /* 1 -> 0, 4 -> 1, 8 -> 2 */
+ return (maxburst >> 2);
+}
+
+static int convert_buswidth(enum dma_slave_buswidth addr_width)
+{
+ if (addr_width > DMA_SLAVE_BUSWIDTH_4_BYTES)
+ return -EINVAL;
+
+ /* 8 (1 byte) -> 0, 16 (2 bytes) -> 1, 32 (4 bytes) -> 2 */
+ return (addr_width >> 1);
+}
+
+static void sun4i_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+
+ vchan_free_chan_resources(&vchan->vc);
+}
+
+static struct sun4i_dma_pchan *find_and_use_pchan(struct sun4i_dma_dev *priv,
+ struct sun4i_dma_vchan *vchan)
+{
+ struct sun4i_dma_pchan *pchan = NULL, *pchans = priv->pchans;
+ unsigned long flags;
+ int i, max;
+
+ /*
+ * pchans 0-SUN4I_NDMA_NR_MAX_CHANNELS are normal, and
+ * SUN4I_NDMA_NR_MAX_CHANNELS+ are dedicated ones
+ */
+ if (vchan->is_dedicated) {
+ i = SUN4I_NDMA_NR_MAX_CHANNELS;
+ max = SUN4I_DMA_NR_MAX_CHANNELS;
+ } else {
+ i = 0;
+ max = SUN4I_NDMA_NR_MAX_CHANNELS;
+ }
+
+ spin_lock_irqsave(&priv->lock, flags);
+ for_each_clear_bit_from(i, priv->pchans_used, max) {
+ pchan = &pchans[i];
+ pchan->vchan = vchan;
+ set_bit(i, priv->pchans_used);
+ break;
+ }
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return pchan;
+}
+
+static void release_pchan(struct sun4i_dma_dev *priv,
+ struct sun4i_dma_pchan *pchan)
+{
+ unsigned long flags;
+ int nr = pchan - priv->pchans;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ pchan->vchan = NULL;
+ clear_bit(nr, priv->pchans_used);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void configure_pchan(struct sun4i_dma_pchan *pchan,
+ struct sun4i_dma_promise *d)
+{
+ /*
+ * Configure addresses and misc parameters depending on type
+ * SUN4I_DDMA has an extra field with timing parameters
+ */
+ if (pchan->is_dedicated) {
+ writel_relaxed(d->src, pchan->base + SUN4I_DDMA_SRC_ADDR_REG);
+ writel_relaxed(d->dst, pchan->base + SUN4I_DDMA_DST_ADDR_REG);
+ writel_relaxed(d->len, pchan->base + SUN4I_DDMA_BYTE_COUNT_REG);
+ writel_relaxed(d->para, pchan->base + SUN4I_DDMA_PARA_REG);
+ writel_relaxed(d->cfg, pchan->base + SUN4I_DDMA_CFG_REG);
+ } else {
+ writel_relaxed(d->src, pchan->base + SUN4I_NDMA_SRC_ADDR_REG);
+ writel_relaxed(d->dst, pchan->base + SUN4I_NDMA_DST_ADDR_REG);
+ writel_relaxed(d->len, pchan->base + SUN4I_NDMA_BYTE_COUNT_REG);
+ writel_relaxed(d->cfg, pchan->base + SUN4I_NDMA_CFG_REG);
+ }
+}
+
+static void set_pchan_interrupt(struct sun4i_dma_dev *priv,
+ struct sun4i_dma_pchan *pchan,
+ int half, int end)
+{
+ u32 reg;
+ int pchan_number = pchan - priv->pchans;
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ reg = readl_relaxed(priv->base + SUN4I_DMA_IRQ_ENABLE_REG);
+
+ if (half)
+ reg |= BIT(pchan_number * 2);
+ else
+ reg &= ~BIT(pchan_number * 2);
+
+ if (end)
+ reg |= BIT(pchan_number * 2 + 1);
+ else
+ reg &= ~BIT(pchan_number * 2 + 1);
+
+ writel_relaxed(reg, priv->base + SUN4I_DMA_IRQ_ENABLE_REG);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+/*
+ * Execute pending operations on a vchan
+ *
+ * When given a vchan, this function will try to acquire a suitable
+ * pchan and, if successful, will configure it to fulfill a promise
+ * from the next pending contract.
+ *
+ * This function must be called with &vchan->vc.lock held.
+ */
+static int __execute_vchan_pending(struct sun4i_dma_dev *priv,
+ struct sun4i_dma_vchan *vchan)
+{
+ struct sun4i_dma_promise *promise = NULL;
+ struct sun4i_dma_contract *contract = NULL;
+ struct sun4i_dma_pchan *pchan;
+ struct virt_dma_desc *vd;
+ int ret;
+
+ lockdep_assert_held(&vchan->vc.lock);
+
+ /* We need a pchan to do anything, so secure one if available */
+ pchan = find_and_use_pchan(priv, vchan);
+ if (!pchan)
+ return -EBUSY;
+
+ /*
+ * Channel endpoints must not be repeated, so if this vchan
+ * has already submitted some work, we can't do anything else
+ */
+ if (vchan->processing) {
+ dev_dbg(chan2dev(&vchan->vc.chan),
+ "processing something to this endpoint already\n");
+ ret = -EBUSY;
+ goto release_pchan;
+ }
+
+ do {
+ /* Figure out which contract we're working with today */
+ vd = vchan_next_desc(&vchan->vc);
+ if (!vd) {
+ dev_dbg(chan2dev(&vchan->vc.chan),
+ "No pending contract found");
+ ret = 0;
+ goto release_pchan;
+ }
+
+ contract = to_sun4i_dma_contract(vd);
+ if (list_empty(&contract->demands)) {
+ /* The contract has been completed so mark it as such */
+ list_del(&contract->vd.node);
+ vchan_cookie_complete(&contract->vd);
+ dev_dbg(chan2dev(&vchan->vc.chan),
+ "Empty contract found and marked complete");
+ }
+ } while (list_empty(&contract->demands));
+
+ /* Now find out what we need to do */
+ promise = list_first_entry(&contract->demands,
+ struct sun4i_dma_promise, list);
+ vchan->processing = promise;
+
+ /* ... and make it reality */
+ if (promise) {
+ vchan->contract = contract;
+ vchan->pchan = pchan;
+ set_pchan_interrupt(priv, pchan, contract->use_half_int, 1);
+ configure_pchan(pchan, promise);
+ }
+
+ return 0;
+
+release_pchan:
+ release_pchan(priv, pchan);
+ return ret;
+}
+
+static int sanitize_config(struct dma_slave_config *sconfig,
+ enum dma_transfer_direction direction)
+{
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+ if ((sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) ||
+ !sconfig->dst_maxburst)
+ return -EINVAL;
+
+ if (sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+ sconfig->src_addr_width = sconfig->dst_addr_width;
+
+ if (!sconfig->src_maxburst)
+ sconfig->src_maxburst = sconfig->dst_maxburst;
+
+ break;
+
+ case DMA_DEV_TO_MEM:
+ if ((sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) ||
+ !sconfig->src_maxburst)
+ return -EINVAL;
+
+ if (sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+ sconfig->dst_addr_width = sconfig->src_addr_width;
+
+ if (!sconfig->dst_maxburst)
+ sconfig->dst_maxburst = sconfig->src_maxburst;
+
+ break;
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+/*
+ * Generate a promise, to be used in a normal DMA contract.
+ *
+ * A NDMA promise contains all the information required to program the
+ * normal part of the DMA Engine and get data copied. A non-executed
+ * promise will live in the demands list on a contract. Once it has been
+ * completed, it will be moved to the completed demands list for later freeing.
+ * All linked promises will be freed when the corresponding contract is freed
+ */
+static struct sun4i_dma_promise *
+generate_ndma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest,
+ size_t len, struct dma_slave_config *sconfig,
+ enum dma_transfer_direction direction)
+{
+ struct sun4i_dma_promise *promise;
+ int ret;
+
+ ret = sanitize_config(sconfig, direction);
+ if (ret)
+ return NULL;
+
+ promise = kzalloc(sizeof(*promise), GFP_NOWAIT);
+ if (!promise)
+ return NULL;
+
+ promise->src = src;
+ promise->dst = dest;
+ promise->len = len;
+ promise->cfg = SUN4I_DMA_CFG_LOADING |
+ SUN4I_NDMA_CFG_BYTE_COUNT_MODE_REMAIN;
+
+ dev_dbg(chan2dev(chan),
+ "src burst %d, dst burst %d, src buswidth %d, dst buswidth %d",
+ sconfig->src_maxburst, sconfig->dst_maxburst,
+ sconfig->src_addr_width, sconfig->dst_addr_width);
+
+ /* Source burst */
+ ret = convert_burst(sconfig->src_maxburst);
+ if (ret < 0)
+ goto fail;
+ promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret);
+
+ /* Destination burst */
+ ret = convert_burst(sconfig->dst_maxburst);
+ if (ret < 0)
+ goto fail;
+ promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret);
+
+ /* Source bus width */
+ ret = convert_buswidth(sconfig->src_addr_width);
+ if (ret < 0)
+ goto fail;
+ promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret);
+
+ /* Destination bus width */
+ ret = convert_buswidth(sconfig->dst_addr_width);
+ if (ret < 0)
+ goto fail;
+ promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret);
+
+ return promise;
+
+fail:
+ kfree(promise);
+ return NULL;
+}
+
+/*
+ * Generate a promise, to be used in a dedicated DMA contract.
+ *
+ * A DDMA promise contains all the information required to program the
+ * Dedicated part of the DMA Engine and get data copied. A non-executed
+ * promise will live in the demands list on a contract. Once it has been
+ * completed, it will be moved to the completed demands list for later freeing.
+ * All linked promises will be freed when the corresponding contract is freed
+ */
+static struct sun4i_dma_promise *
+generate_ddma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest,
+ size_t len, struct dma_slave_config *sconfig)
+{
+ struct sun4i_dma_promise *promise;
+ int ret;
+
+ promise = kzalloc(sizeof(*promise), GFP_NOWAIT);
+ if (!promise)
+ return NULL;
+
+ promise->src = src;
+ promise->dst = dest;
+ promise->len = len;
+ promise->cfg = SUN4I_DMA_CFG_LOADING |
+ SUN4I_DDMA_CFG_BYTE_COUNT_MODE_REMAIN;
+
+ /* Source burst */
+ ret = convert_burst(sconfig->src_maxburst);
+ if (ret < 0)
+ goto fail;
+ promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret);
+
+ /* Destination burst */
+ ret = convert_burst(sconfig->dst_maxburst);
+ if (ret < 0)
+ goto fail;
+ promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret);
+
+ /* Source bus width */
+ ret = convert_buswidth(sconfig->src_addr_width);
+ if (ret < 0)
+ goto fail;
+ promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret);
+
+ /* Destination bus width */
+ ret = convert_buswidth(sconfig->dst_addr_width);
+ if (ret < 0)
+ goto fail;
+ promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret);
+
+ return promise;
+
+fail:
+ kfree(promise);
+ return NULL;
+}
+
+/*
+ * Generate a contract
+ *
+ * Contracts function as DMA descriptors. As our hardware does not support
+ * linked lists, we need to implement SG via software. We use a contract
+ * to hold all the pieces of the request and process them serially one
+ * after another. Each piece is represented as a promise.
+ */
+static struct sun4i_dma_contract *generate_dma_contract(void)
+{
+ struct sun4i_dma_contract *contract;
+
+ contract = kzalloc(sizeof(*contract), GFP_NOWAIT);
+ if (!contract)
+ return NULL;
+
+ INIT_LIST_HEAD(&contract->demands);
+ INIT_LIST_HEAD(&contract->completed_demands);
+
+ return contract;
+}
+
+/*
+ * Get next promise on a cyclic transfer
+ *
+ * Cyclic contracts contain a series of promises which are executed on a
+ * loop. This function returns the next promise from a cyclic contract,
+ * so it can be programmed into the hardware.
+ */
+static struct sun4i_dma_promise *
+get_next_cyclic_promise(struct sun4i_dma_contract *contract)
+{
+ struct sun4i_dma_promise *promise;
+
+ promise = list_first_entry_or_null(&contract->demands,
+ struct sun4i_dma_promise, list);
+ if (!promise) {
+ list_splice_init(&contract->completed_demands,
+ &contract->demands);
+ promise = list_first_entry(&contract->demands,
+ struct sun4i_dma_promise, list);
+ }
+
+ return promise;
+}
+
+/*
+ * Free a contract and all its associated promises
+ */
+static void sun4i_dma_free_contract(struct virt_dma_desc *vd)
+{
+ struct sun4i_dma_contract *contract = to_sun4i_dma_contract(vd);
+ struct sun4i_dma_promise *promise, *tmp;
+
+ /* Free all the demands and completed demands */
+ list_for_each_entry_safe(promise, tmp, &contract->demands, list)
+ kfree(promise);
+
+ list_for_each_entry_safe(promise, tmp, &contract->completed_demands, list)
+ kfree(promise);
+
+ kfree(contract);
+}
+
+static struct dma_async_tx_descriptor *
+sun4i_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
+ dma_addr_t src, size_t len, unsigned long flags)
+{
+ struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+ struct dma_slave_config *sconfig = &vchan->cfg;
+ struct sun4i_dma_promise *promise;
+ struct sun4i_dma_contract *contract;
+
+ contract = generate_dma_contract();
+ if (!contract)
+ return NULL;
+
+ /*
+ * We can only do the copy to bus aligned addresses, so
+ * choose the best one so we get decent performance. We also
+ * maximize the burst size for this same reason.
+ */
+ sconfig->src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ sconfig->dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ sconfig->src_maxburst = 8;
+ sconfig->dst_maxburst = 8;
+
+ if (vchan->is_dedicated)
+ promise = generate_ddma_promise(chan, src, dest, len, sconfig);
+ else
+ promise = generate_ndma_promise(chan, src, dest, len, sconfig,
+ DMA_MEM_TO_MEM);
+
+ if (!promise) {
+ kfree(contract);
+ return NULL;
+ }
+
+ /* Configure memcpy mode */
+ if (vchan->is_dedicated) {
+ promise->cfg |= SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_DDMA_DRQ_TYPE_SDRAM) |
+ SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_DDMA_DRQ_TYPE_SDRAM);
+ } else {
+ promise->cfg |= SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM) |
+ SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM);
+ }
+
+ /* Fill the contract with our only promise */
+ list_add_tail(&promise->list, &contract->demands);
+
+ /* And add it to the vchan */
+ return vchan_tx_prep(&vchan->vc, &contract->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+sun4i_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf, size_t len,
+ size_t period_len, enum dma_transfer_direction dir,
+ unsigned long flags)
+{
+ struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+ struct dma_slave_config *sconfig = &vchan->cfg;
+ struct sun4i_dma_promise *promise;
+ struct sun4i_dma_contract *contract;
+ dma_addr_t src, dest;
+ u32 endpoints;
+ int nr_periods, offset, plength, i;
+ u8 ram_type, io_mode, linear_mode;
+
+ if (!is_slave_direction(dir)) {
+ dev_err(chan2dev(chan), "Invalid DMA direction\n");
+ return NULL;
+ }
+
+ contract = generate_dma_contract();
+ if (!contract)
+ return NULL;
+
+ contract->is_cyclic = 1;
+
+ if (vchan->is_dedicated) {
+ io_mode = SUN4I_DDMA_ADDR_MODE_IO;
+ linear_mode = SUN4I_DDMA_ADDR_MODE_LINEAR;
+ ram_type = SUN4I_DDMA_DRQ_TYPE_SDRAM;
+ } else {
+ io_mode = SUN4I_NDMA_ADDR_MODE_IO;
+ linear_mode = SUN4I_NDMA_ADDR_MODE_LINEAR;
+ ram_type = SUN4I_NDMA_DRQ_TYPE_SDRAM;
+ }
+
+ if (dir == DMA_MEM_TO_DEV) {
+ src = buf;
+ dest = sconfig->dst_addr;
+ endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) |
+ SUN4I_DMA_CFG_DST_ADDR_MODE(io_mode) |
+ SUN4I_DMA_CFG_SRC_DRQ_TYPE(ram_type) |
+ SUN4I_DMA_CFG_SRC_ADDR_MODE(linear_mode);
+ } else {
+ src = sconfig->src_addr;
+ dest = buf;
+ endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(ram_type) |
+ SUN4I_DMA_CFG_DST_ADDR_MODE(linear_mode) |
+ SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) |
+ SUN4I_DMA_CFG_SRC_ADDR_MODE(io_mode);
+ }
+
+ /*
+ * We will be using half done interrupts to make two periods
+ * out of a promise, so we need to program the DMA engine less
+ * often
+ */
+
+ /*
+ * The engine can interrupt on half-transfer, so we can use
+ * this feature to program the engine half as often as if we
+ * didn't use it (keep in mind the hardware doesn't support
+ * linked lists).
+ *
+ * Say you have a set of periods (| marks the start/end, I for
+ * interrupt, P for programming the engine to do a new
+ * transfer), the easy but slow way would be to do
+ *
+ * |---|---|---|---| (periods / promises)
+ * P I,P I,P I,P I
+ *
+ * Using half transfer interrupts you can do
+ *
+ * |-------|-------| (promises as configured on hw)
+ * |---|---|---|---| (periods)
+ * P I I,P I I
+ *
+ * Which requires half the engine programming for the same
+ * functionality.
+ *
+ * This only works if two periods fit in a single promise. That will
+ * always be the case for dedicated DMA, where the hardware has a much
+ * larger maximum transfer size than advertised to clients.
+ */
+ if (vchan->is_dedicated || period_len <= SUN4I_NDMA_MAX_SEG_SIZE / 2) {
+ period_len *= 2;
+ contract->use_half_int = 1;
+ }
+
+ nr_periods = DIV_ROUND_UP(len, period_len);
+ for (i = 0; i < nr_periods; i++) {
+ /* Calculate the offset in the buffer and the length needed */
+ offset = i * period_len;
+ plength = min((len - offset), period_len);
+ if (dir == DMA_MEM_TO_DEV)
+ src = buf + offset;
+ else
+ dest = buf + offset;
+
+ /* Make the promise */
+ if (vchan->is_dedicated)
+ promise = generate_ddma_promise(chan, src, dest,
+ plength, sconfig);
+ else
+ promise = generate_ndma_promise(chan, src, dest,
+ plength, sconfig, dir);
+
+ if (!promise) {
+ /* TODO: should we free everything? */
+ return NULL;
+ }
+ promise->cfg |= endpoints;
+
+ /* Then add it to the contract */
+ list_add_tail(&promise->list, &contract->demands);
+ }
+
+ /* And add it to the vchan */
+ return vchan_tx_prep(&vchan->vc, &contract->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+sun4i_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction dir,
+ unsigned long flags, void *context)
+{
+ struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+ struct dma_slave_config *sconfig = &vchan->cfg;
+ struct sun4i_dma_promise *promise;
+ struct sun4i_dma_contract *contract;
+ u8 ram_type, io_mode, linear_mode;
+ struct scatterlist *sg;
+ dma_addr_t srcaddr, dstaddr;
+ u32 endpoints, para;
+ int i;
+
+ if (!sgl)
+ return NULL;
+
+ if (!is_slave_direction(dir)) {
+ dev_err(chan2dev(chan), "Invalid DMA direction\n");
+ return NULL;
+ }
+
+ contract = generate_dma_contract();
+ if (!contract)
+ return NULL;
+
+ if (vchan->is_dedicated) {
+ io_mode = SUN4I_DDMA_ADDR_MODE_IO;
+ linear_mode = SUN4I_DDMA_ADDR_MODE_LINEAR;
+ ram_type = SUN4I_DDMA_DRQ_TYPE_SDRAM;
+ } else {
+ io_mode = SUN4I_NDMA_ADDR_MODE_IO;
+ linear_mode = SUN4I_NDMA_ADDR_MODE_LINEAR;
+ ram_type = SUN4I_NDMA_DRQ_TYPE_SDRAM;
+ }
+
+ if (dir == DMA_MEM_TO_DEV)
+ endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) |
+ SUN4I_DMA_CFG_DST_ADDR_MODE(io_mode) |
+ SUN4I_DMA_CFG_SRC_DRQ_TYPE(ram_type) |
+ SUN4I_DMA_CFG_SRC_ADDR_MODE(linear_mode);
+ else
+ endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(ram_type) |
+ SUN4I_DMA_CFG_DST_ADDR_MODE(linear_mode) |
+ SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) |
+ SUN4I_DMA_CFG_SRC_ADDR_MODE(io_mode);
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ /* Figure out addresses */
+ if (dir == DMA_MEM_TO_DEV) {
+ srcaddr = sg_dma_address(sg);
+ dstaddr = sconfig->dst_addr;
+ } else {
+ srcaddr = sconfig->src_addr;
+ dstaddr = sg_dma_address(sg);
+ }
+
+ /*
+ * These are the magic DMA engine timings that keep SPI going.
+ * I haven't seen any interface on DMAEngine to configure
+ * timings, and so far they seem to work for everything we
+ * support, so I've kept them here. I don't know if other
+ * devices need different timings because, as usual, we only
+ * have the "para" bitfield meanings, but no comment on what
+ * the values should be when doing a certain operation :|
+ */
+ para = SUN4I_DDMA_MAGIC_SPI_PARAMETERS;
+
+ /* And make a suitable promise */
+ if (vchan->is_dedicated)
+ promise = generate_ddma_promise(chan, srcaddr, dstaddr,
+ sg_dma_len(sg),
+ sconfig);
+ else
+ promise = generate_ndma_promise(chan, srcaddr, dstaddr,
+ sg_dma_len(sg),
+ sconfig, dir);
+
+ if (!promise)
+ return NULL; /* TODO: should we free everything? */
+
+ promise->cfg |= endpoints;
+ promise->para = para;
+
+ /* Then add it to the contract */
+ list_add_tail(&promise->list, &contract->demands);
+ }
+
+ /*
+ * Once we've got all the promises ready, add the contract
+ * to the pending list on the vchan
+ */
+ return vchan_tx_prep(&vchan->vc, &contract->vd, flags);
+}
+
+static int sun4i_dma_terminate_all(struct dma_chan *chan)
+{
+ struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device);
+ struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+ struct sun4i_dma_pchan *pchan = vchan->pchan;
+ LIST_HEAD(head);
+ unsigned long flags;
+
+ spin_lock_irqsave(&vchan->vc.lock, flags);
+ vchan_get_all_descriptors(&vchan->vc, &head);
+ spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+ /*
+ * Clearing the configuration register will halt the pchan. Interrupts
+ * may still trigger, so don't forget to disable them.
+ */
+ if (pchan) {
+ if (pchan->is_dedicated)
+ writel(0, pchan->base + SUN4I_DDMA_CFG_REG);
+ else
+ writel(0, pchan->base + SUN4I_NDMA_CFG_REG);
+ set_pchan_interrupt(priv, pchan, 0, 0);
+ release_pchan(priv, pchan);
+ }
+
+ spin_lock_irqsave(&vchan->vc.lock, flags);
+ /* Clear these so the vchan is usable again */
+ vchan->processing = NULL;
+ vchan->pchan = NULL;
+ spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+ vchan_dma_desc_free_list(&vchan->vc, &head);
+
+ return 0;
+}
+
+static int sun4i_dma_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+
+ memcpy(&vchan->cfg, config, sizeof(*config));
+
+ return 0;
+}
+
+static struct dma_chan *sun4i_dma_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct sun4i_dma_dev *priv = ofdma->of_dma_data;
+ struct sun4i_dma_vchan *vchan;
+ struct dma_chan *chan;
+ u8 is_dedicated = dma_spec->args[0];
+ u8 endpoint = dma_spec->args[1];
+
+ /* Check if type is Normal or Dedicated */
+ if (is_dedicated != 0 && is_dedicated != 1)
+ return NULL;
+
+ /* Make sure the endpoint looks sane */
+ if ((is_dedicated && endpoint >= SUN4I_DDMA_DRQ_TYPE_LIMIT) ||
+ (!is_dedicated && endpoint >= SUN4I_NDMA_DRQ_TYPE_LIMIT))
+ return NULL;
+
+ chan = dma_get_any_slave_channel(&priv->slave);
+ if (!chan)
+ return NULL;
+
+ /* Assign the endpoint to the vchan */
+ vchan = to_sun4i_dma_vchan(chan);
+ vchan->is_dedicated = is_dedicated;
+ vchan->endpoint = endpoint;
+
+ return chan;
+}
+
+static enum dma_status sun4i_dma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *state)
+{
+ struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+ struct sun4i_dma_pchan *pchan = vchan->pchan;
+ struct sun4i_dma_contract *contract;
+ struct sun4i_dma_promise *promise;
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+ enum dma_status ret;
+ size_t bytes = 0;
+
+ ret = dma_cookie_status(chan, cookie, state);
+ if (!state || (ret == DMA_COMPLETE))
+ return ret;
+
+ spin_lock_irqsave(&vchan->vc.lock, flags);
+ vd = vchan_find_desc(&vchan->vc, cookie);
+ if (!vd)
+ goto exit;
+ contract = to_sun4i_dma_contract(vd);
+
+ list_for_each_entry(promise, &contract->demands, list)
+ bytes += promise->len;
+
+ /*
+ * The hardware is configured to return the remaining byte
+ * quantity. If possible, replace the first listed element's
+ * full size with the actual remaining amount
+ */
+ promise = list_first_entry_or_null(&contract->demands,
+ struct sun4i_dma_promise, list);
+ if (promise && pchan) {
+ bytes -= promise->len;
+ if (pchan->is_dedicated)
+ bytes += readl(pchan->base + SUN4I_DDMA_BYTE_COUNT_REG);
+ else
+ bytes += readl(pchan->base + SUN4I_NDMA_BYTE_COUNT_REG);
+ }
+
+exit:
+
+ dma_set_residue(state, bytes);
+ spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+ return ret;
+}
+
+static void sun4i_dma_issue_pending(struct dma_chan *chan)
+{
+ struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device);
+ struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&vchan->vc.lock, flags);
+
+ /*
+ * If there are pending transactions for this vchan, push one of
+ * them into the engine to get the ball rolling.
+ */
+ if (vchan_issue_pending(&vchan->vc))
+ __execute_vchan_pending(priv, vchan);
+
+ spin_unlock_irqrestore(&vchan->vc.lock, flags);
+}
+
+static irqreturn_t sun4i_dma_interrupt(int irq, void *dev_id)
+{
+ struct sun4i_dma_dev *priv = dev_id;
+ struct sun4i_dma_pchan *pchans = priv->pchans, *pchan;
+ struct sun4i_dma_vchan *vchan;
+ struct sun4i_dma_contract *contract;
+ struct sun4i_dma_promise *promise;
+ unsigned long pendirq, irqs, disableirqs;
+ int bit, i, free_room, allow_mitigation = 1;
+
+ pendirq = readl_relaxed(priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG);
+
+handle_pending:
+
+ disableirqs = 0;
+ free_room = 0;
+
+ for_each_set_bit(bit, &pendirq, 32) {
+ pchan = &pchans[bit >> 1];
+ vchan = pchan->vchan;
+ if (!vchan) /* a terminated channel may still interrupt */
+ continue;
+ contract = vchan->contract;
+
+ /*
+ * Disable the IRQ and free the pchan if it's an end
+ * interrupt (odd bit)
+ */
+ if (bit & 1) {
+ spin_lock(&vchan->vc.lock);
+
+ /*
+ * Move the promise into the completed list now that
+ * we're done with it
+ */
+ list_move_tail(&vchan->processing->list,
+ &contract->completed_demands);
+
+ /*
+ * Cyclic DMA transfers are special:
+ * - There's always something we can dispatch
+ * - We need to run the callback
+ * - Latency is very important, as this is used by audio
+ * We therefore just cycle through the list and dispatch
+ * whatever we have here, reusing the pchan. There's
+ * no need to run the thread after this.
+ *
+ * For non-cyclic transfers we need to look around,
+ * so we can program some more work, or notify the
+ * client that their transfers have been completed.
+ */
+ if (contract->is_cyclic) {
+ promise = get_next_cyclic_promise(contract);
+ vchan->processing = promise;
+ configure_pchan(pchan, promise);
+ vchan_cyclic_callback(&contract->vd);
+ } else {
+ vchan->processing = NULL;
+ vchan->pchan = NULL;
+
+ free_room = 1;
+ disableirqs |= BIT(bit);
+ release_pchan(priv, pchan);
+ }
+
+ spin_unlock(&vchan->vc.lock);
+ } else {
+ /* Half done interrupt */
+ if (contract->is_cyclic)
+ vchan_cyclic_callback(&contract->vd);
+ else
+ disableirqs |= BIT(bit);
+ }
+ }
+
+ /* Disable the IRQs for events we handled */
+ spin_lock(&priv->lock);
+ irqs = readl_relaxed(priv->base + SUN4I_DMA_IRQ_ENABLE_REG);
+ writel_relaxed(irqs & ~disableirqs,
+ priv->base + SUN4I_DMA_IRQ_ENABLE_REG);
+ spin_unlock(&priv->lock);
+
+ /* Writing 1 to the pending field will clear the pending interrupt */
+ writel_relaxed(pendirq, priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG);
+
+ /*
+ * If a pchan was freed, we may be able to schedule something else,
+ * so have a look around
+ */
+ if (free_room) {
+ for (i = 0; i < SUN4I_DMA_NR_MAX_VCHANS; i++) {
+ vchan = &priv->vchans[i];
+ spin_lock(&vchan->vc.lock);
+ __execute_vchan_pending(priv, vchan);
+ spin_unlock(&vchan->vc.lock);
+ }
+ }
+
+ /*
+ * Handle newer interrupts if some showed up, but only do it once
+ * to avoid a too long a loop
+ */
+ if (allow_mitigation) {
+ pendirq = readl_relaxed(priv->base +
+ SUN4I_DMA_IRQ_PENDING_STATUS_REG);
+ if (pendirq) {
+ allow_mitigation = 0;
+ goto handle_pending;
+ }
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int sun4i_dma_probe(struct platform_device *pdev)
+{
+ struct sun4i_dma_dev *priv;
+ struct resource *res;
+ int i, j, ret;
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ priv->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(priv->base))
+ return PTR_ERR(priv->base);
+
+ priv->irq = platform_get_irq(pdev, 0);
+ if (priv->irq < 0)
+ return priv->irq;
+
+ priv->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(priv->clk)) {
+ dev_err(&pdev->dev, "No clock specified\n");
+ return PTR_ERR(priv->clk);
+ }
+
+ platform_set_drvdata(pdev, priv);
+ spin_lock_init(&priv->lock);
+
+ dma_set_max_seg_size(&pdev->dev, SUN4I_DMA_MAX_SEG_SIZE);
+
+ dma_cap_zero(priv->slave.cap_mask);
+ dma_cap_set(DMA_PRIVATE, priv->slave.cap_mask);
+ dma_cap_set(DMA_MEMCPY, priv->slave.cap_mask);
+ dma_cap_set(DMA_CYCLIC, priv->slave.cap_mask);
+ dma_cap_set(DMA_SLAVE, priv->slave.cap_mask);
+
+ INIT_LIST_HEAD(&priv->slave.channels);
+ priv->slave.device_free_chan_resources = sun4i_dma_free_chan_resources;
+ priv->slave.device_tx_status = sun4i_dma_tx_status;
+ priv->slave.device_issue_pending = sun4i_dma_issue_pending;
+ priv->slave.device_prep_slave_sg = sun4i_dma_prep_slave_sg;
+ priv->slave.device_prep_dma_memcpy = sun4i_dma_prep_dma_memcpy;
+ priv->slave.device_prep_dma_cyclic = sun4i_dma_prep_dma_cyclic;
+ priv->slave.device_config = sun4i_dma_config;
+ priv->slave.device_terminate_all = sun4i_dma_terminate_all;
+ priv->slave.copy_align = 2;
+ priv->slave.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ priv->slave.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ priv->slave.directions = BIT(DMA_DEV_TO_MEM) |
+ BIT(DMA_MEM_TO_DEV);
+ priv->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+ priv->slave.dev = &pdev->dev;
+
+ priv->pchans = devm_kcalloc(&pdev->dev, SUN4I_DMA_NR_MAX_CHANNELS,
+ sizeof(struct sun4i_dma_pchan), GFP_KERNEL);
+ priv->vchans = devm_kcalloc(&pdev->dev, SUN4I_DMA_NR_MAX_VCHANS,
+ sizeof(struct sun4i_dma_vchan), GFP_KERNEL);
+ if (!priv->vchans || !priv->pchans)
+ return -ENOMEM;
+
+ /*
+ * [0..SUN4I_NDMA_NR_MAX_CHANNELS) are normal pchans, and
+ * [SUN4I_NDMA_NR_MAX_CHANNELS..SUN4I_DMA_NR_MAX_CHANNELS) are
+ * dedicated ones
+ */
+ for (i = 0; i < SUN4I_NDMA_NR_MAX_CHANNELS; i++)
+ priv->pchans[i].base = priv->base +
+ SUN4I_NDMA_CHANNEL_REG_BASE(i);
+
+ for (j = 0; i < SUN4I_DMA_NR_MAX_CHANNELS; i++, j++) {
+ priv->pchans[i].base = priv->base +
+ SUN4I_DDMA_CHANNEL_REG_BASE(j);
+ priv->pchans[i].is_dedicated = 1;
+ }
+
+ for (i = 0; i < SUN4I_DMA_NR_MAX_VCHANS; i++) {
+ struct sun4i_dma_vchan *vchan = &priv->vchans[i];
+
+ spin_lock_init(&vchan->vc.lock);
+ vchan->vc.desc_free = sun4i_dma_free_contract;
+ vchan_init(&vchan->vc, &priv->slave);
+ }
+
+ ret = clk_prepare_enable(priv->clk);
+ if (ret) {
+ dev_err(&pdev->dev, "Couldn't enable the clock\n");
+ return ret;
+ }
+
+ /*
+ * Make sure the IRQs are all disabled and accounted for. The bootloader
+ * likes to leave these dirty
+ */
+ writel(0, priv->base + SUN4I_DMA_IRQ_ENABLE_REG);
+ writel(0xFFFFFFFF, priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG);
+
+ ret = devm_request_irq(&pdev->dev, priv->irq, sun4i_dma_interrupt,
+ 0, dev_name(&pdev->dev), priv);
+ if (ret) {
+ dev_err(&pdev->dev, "Cannot request IRQ\n");
+ goto err_clk_disable;
+ }
+
+ ret = dma_async_device_register(&priv->slave);
+ if (ret) {
+ dev_warn(&pdev->dev, "Failed to register DMA engine device\n");
+ goto err_clk_disable;
+ }
+
+ ret = of_dma_controller_register(pdev->dev.of_node, sun4i_dma_of_xlate,
+ priv);
+ if (ret) {
+ dev_err(&pdev->dev, "of_dma_controller_register failed\n");
+ goto err_dma_unregister;
+ }
+
+ dev_dbg(&pdev->dev, "Successfully probed SUN4I_DMA\n");
+
+ return 0;
+
+err_dma_unregister:
+ dma_async_device_unregister(&priv->slave);
+err_clk_disable:
+ clk_disable_unprepare(priv->clk);
+ return ret;
+}
+
+static int sun4i_dma_remove(struct platform_device *pdev)
+{
+ struct sun4i_dma_dev *priv = platform_get_drvdata(pdev);
+
+ /* Disable IRQ so no more work is scheduled */
+ disable_irq(priv->irq);
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&priv->slave);
+
+ clk_disable_unprepare(priv->clk);
+
+ return 0;
+}
+
+static const struct of_device_id sun4i_dma_match[] = {
+ { .compatible = "allwinner,sun4i-a10-dma" },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, sun4i_dma_match);
+
+static struct platform_driver sun4i_dma_driver = {
+ .probe = sun4i_dma_probe,
+ .remove = sun4i_dma_remove,
+ .driver = {
+ .name = "sun4i-dma",
+ .of_match_table = sun4i_dma_match,
+ },
+};
+
+module_platform_driver(sun4i_dma_driver);
+
+MODULE_DESCRIPTION("Allwinner A10 Dedicated DMA Controller Driver");
+MODULE_AUTHOR("Emilio López <emilio@elopez.com.ar>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c
new file mode 100644
index 000000000..b7557f437
--- /dev/null
+++ b/drivers/dma/sun6i-dma.c
@@ -0,0 +1,1503 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2013-2014 Allwinner Tech Co., Ltd
+ * Author: Sugar <shuge@allwinnertech.com>
+ *
+ * Copyright (C) 2014 Maxime Ripard
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "virt-dma.h"
+
+/*
+ * Common registers
+ */
+#define DMA_IRQ_EN(x) ((x) * 0x04)
+#define DMA_IRQ_HALF BIT(0)
+#define DMA_IRQ_PKG BIT(1)
+#define DMA_IRQ_QUEUE BIT(2)
+
+#define DMA_IRQ_CHAN_NR 8
+#define DMA_IRQ_CHAN_WIDTH 4
+
+
+#define DMA_IRQ_STAT(x) ((x) * 0x04 + 0x10)
+
+#define DMA_STAT 0x30
+
+/* Offset between DMA_IRQ_EN and DMA_IRQ_STAT limits number of channels */
+#define DMA_MAX_CHANNELS (DMA_IRQ_CHAN_NR * 0x10 / 4)
+
+/*
+ * sun8i specific registers
+ */
+#define SUN8I_DMA_GATE 0x20
+#define SUN8I_DMA_GATE_ENABLE 0x4
+
+#define SUNXI_H3_SECURE_REG 0x20
+#define SUNXI_H3_DMA_GATE 0x28
+#define SUNXI_H3_DMA_GATE_ENABLE 0x4
+/*
+ * Channels specific registers
+ */
+#define DMA_CHAN_ENABLE 0x00
+#define DMA_CHAN_ENABLE_START BIT(0)
+#define DMA_CHAN_ENABLE_STOP 0
+
+#define DMA_CHAN_PAUSE 0x04
+#define DMA_CHAN_PAUSE_PAUSE BIT(1)
+#define DMA_CHAN_PAUSE_RESUME 0
+
+#define DMA_CHAN_LLI_ADDR 0x08
+
+#define DMA_CHAN_CUR_CFG 0x0c
+#define DMA_CHAN_MAX_DRQ_A31 0x1f
+#define DMA_CHAN_MAX_DRQ_H6 0x3f
+#define DMA_CHAN_CFG_SRC_DRQ_A31(x) ((x) & DMA_CHAN_MAX_DRQ_A31)
+#define DMA_CHAN_CFG_SRC_DRQ_H6(x) ((x) & DMA_CHAN_MAX_DRQ_H6)
+#define DMA_CHAN_CFG_SRC_MODE_A31(x) (((x) & 0x1) << 5)
+#define DMA_CHAN_CFG_SRC_MODE_H6(x) (((x) & 0x1) << 8)
+#define DMA_CHAN_CFG_SRC_BURST_A31(x) (((x) & 0x3) << 7)
+#define DMA_CHAN_CFG_SRC_BURST_H3(x) (((x) & 0x3) << 6)
+#define DMA_CHAN_CFG_SRC_WIDTH(x) (((x) & 0x3) << 9)
+
+#define DMA_CHAN_CFG_DST_DRQ_A31(x) (DMA_CHAN_CFG_SRC_DRQ_A31(x) << 16)
+#define DMA_CHAN_CFG_DST_DRQ_H6(x) (DMA_CHAN_CFG_SRC_DRQ_H6(x) << 16)
+#define DMA_CHAN_CFG_DST_MODE_A31(x) (DMA_CHAN_CFG_SRC_MODE_A31(x) << 16)
+#define DMA_CHAN_CFG_DST_MODE_H6(x) (DMA_CHAN_CFG_SRC_MODE_H6(x) << 16)
+#define DMA_CHAN_CFG_DST_BURST_A31(x) (DMA_CHAN_CFG_SRC_BURST_A31(x) << 16)
+#define DMA_CHAN_CFG_DST_BURST_H3(x) (DMA_CHAN_CFG_SRC_BURST_H3(x) << 16)
+#define DMA_CHAN_CFG_DST_WIDTH(x) (DMA_CHAN_CFG_SRC_WIDTH(x) << 16)
+
+#define DMA_CHAN_CUR_SRC 0x10
+
+#define DMA_CHAN_CUR_DST 0x14
+
+#define DMA_CHAN_CUR_CNT 0x18
+
+#define DMA_CHAN_CUR_PARA 0x1c
+
+/*
+ * LLI address mangling
+ *
+ * The LLI link physical address is also mangled, but we avoid dealing
+ * with that by allocating LLIs from the DMA32 zone.
+ */
+#define SRC_HIGH_ADDR(x) (((x) & 0x3U) << 16)
+#define DST_HIGH_ADDR(x) (((x) & 0x3U) << 18)
+
+/*
+ * Various hardware related defines
+ */
+#define LLI_LAST_ITEM 0xfffff800
+#define NORMAL_WAIT 8
+#define DRQ_SDRAM 1
+#define LINEAR_MODE 0
+#define IO_MODE 1
+
+/* forward declaration */
+struct sun6i_dma_dev;
+
+/*
+ * Hardware channels / ports representation
+ *
+ * The hardware is used in several SoCs, with differing numbers
+ * of channels and endpoints. This structure ties those numbers
+ * to a certain compatible string.
+ */
+struct sun6i_dma_config {
+ u32 nr_max_channels;
+ u32 nr_max_requests;
+ u32 nr_max_vchans;
+ /*
+ * In the datasheets/user manuals of newer Allwinner SoCs, a special
+ * bit (bit 2 at register 0x20) is present.
+ * It's named "DMA MCLK interface circuit auto gating bit" in the
+ * documents, and the footnote of this register says that this bit
+ * should be set up when initializing the DMA controller.
+ * Allwinner A23/A33 user manuals do not have this bit documented,
+ * however these SoCs really have and need this bit, as seen in the
+ * BSP kernel source code.
+ */
+ void (*clock_autogate_enable)(struct sun6i_dma_dev *);
+ void (*set_burst_length)(u32 *p_cfg, s8 src_burst, s8 dst_burst);
+ void (*set_drq)(u32 *p_cfg, s8 src_drq, s8 dst_drq);
+ void (*set_mode)(u32 *p_cfg, s8 src_mode, s8 dst_mode);
+ u32 src_burst_lengths;
+ u32 dst_burst_lengths;
+ u32 src_addr_widths;
+ u32 dst_addr_widths;
+ bool has_high_addr;
+ bool has_mbus_clk;
+};
+
+/*
+ * Hardware representation of the LLI
+ *
+ * The hardware will be fed the physical address of this structure,
+ * and read its content in order to start the transfer.
+ */
+struct sun6i_dma_lli {
+ u32 cfg;
+ u32 src;
+ u32 dst;
+ u32 len;
+ u32 para;
+ u32 p_lli_next;
+
+ /*
+ * This field is not used by the DMA controller, but will be
+ * used by the CPU to go through the list (mostly for dumping
+ * or freeing it).
+ */
+ struct sun6i_dma_lli *v_lli_next;
+};
+
+
+struct sun6i_desc {
+ struct virt_dma_desc vd;
+ dma_addr_t p_lli;
+ struct sun6i_dma_lli *v_lli;
+};
+
+struct sun6i_pchan {
+ u32 idx;
+ void __iomem *base;
+ struct sun6i_vchan *vchan;
+ struct sun6i_desc *desc;
+ struct sun6i_desc *done;
+};
+
+struct sun6i_vchan {
+ struct virt_dma_chan vc;
+ struct list_head node;
+ struct dma_slave_config cfg;
+ struct sun6i_pchan *phy;
+ u8 port;
+ u8 irq_type;
+ bool cyclic;
+};
+
+struct sun6i_dma_dev {
+ struct dma_device slave;
+ void __iomem *base;
+ struct clk *clk;
+ struct clk *clk_mbus;
+ int irq;
+ spinlock_t lock;
+ struct reset_control *rstc;
+ struct tasklet_struct task;
+ atomic_t tasklet_shutdown;
+ struct list_head pending;
+ struct dma_pool *pool;
+ struct sun6i_pchan *pchans;
+ struct sun6i_vchan *vchans;
+ const struct sun6i_dma_config *cfg;
+ u32 num_pchans;
+ u32 num_vchans;
+ u32 max_request;
+};
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+ return &chan->dev->device;
+}
+
+static inline struct sun6i_dma_dev *to_sun6i_dma_dev(struct dma_device *d)
+{
+ return container_of(d, struct sun6i_dma_dev, slave);
+}
+
+static inline struct sun6i_vchan *to_sun6i_vchan(struct dma_chan *chan)
+{
+ return container_of(chan, struct sun6i_vchan, vc.chan);
+}
+
+static inline struct sun6i_desc *
+to_sun6i_desc(struct dma_async_tx_descriptor *tx)
+{
+ return container_of(tx, struct sun6i_desc, vd.tx);
+}
+
+static inline void sun6i_dma_dump_com_regs(struct sun6i_dma_dev *sdev)
+{
+ dev_dbg(sdev->slave.dev, "Common register:\n"
+ "\tmask0(%04x): 0x%08x\n"
+ "\tmask1(%04x): 0x%08x\n"
+ "\tpend0(%04x): 0x%08x\n"
+ "\tpend1(%04x): 0x%08x\n"
+ "\tstats(%04x): 0x%08x\n",
+ DMA_IRQ_EN(0), readl(sdev->base + DMA_IRQ_EN(0)),
+ DMA_IRQ_EN(1), readl(sdev->base + DMA_IRQ_EN(1)),
+ DMA_IRQ_STAT(0), readl(sdev->base + DMA_IRQ_STAT(0)),
+ DMA_IRQ_STAT(1), readl(sdev->base + DMA_IRQ_STAT(1)),
+ DMA_STAT, readl(sdev->base + DMA_STAT));
+}
+
+static inline void sun6i_dma_dump_chan_regs(struct sun6i_dma_dev *sdev,
+ struct sun6i_pchan *pchan)
+{
+ dev_dbg(sdev->slave.dev, "Chan %d reg:\n"
+ "\t___en(%04x): \t0x%08x\n"
+ "\tpause(%04x): \t0x%08x\n"
+ "\tstart(%04x): \t0x%08x\n"
+ "\t__cfg(%04x): \t0x%08x\n"
+ "\t__src(%04x): \t0x%08x\n"
+ "\t__dst(%04x): \t0x%08x\n"
+ "\tcount(%04x): \t0x%08x\n"
+ "\t_para(%04x): \t0x%08x\n\n",
+ pchan->idx,
+ DMA_CHAN_ENABLE,
+ readl(pchan->base + DMA_CHAN_ENABLE),
+ DMA_CHAN_PAUSE,
+ readl(pchan->base + DMA_CHAN_PAUSE),
+ DMA_CHAN_LLI_ADDR,
+ readl(pchan->base + DMA_CHAN_LLI_ADDR),
+ DMA_CHAN_CUR_CFG,
+ readl(pchan->base + DMA_CHAN_CUR_CFG),
+ DMA_CHAN_CUR_SRC,
+ readl(pchan->base + DMA_CHAN_CUR_SRC),
+ DMA_CHAN_CUR_DST,
+ readl(pchan->base + DMA_CHAN_CUR_DST),
+ DMA_CHAN_CUR_CNT,
+ readl(pchan->base + DMA_CHAN_CUR_CNT),
+ DMA_CHAN_CUR_PARA,
+ readl(pchan->base + DMA_CHAN_CUR_PARA));
+}
+
+static inline s8 convert_burst(u32 maxburst)
+{
+ switch (maxburst) {
+ case 1:
+ return 0;
+ case 4:
+ return 1;
+ case 8:
+ return 2;
+ case 16:
+ return 3;
+ default:
+ return -EINVAL;
+ }
+}
+
+static inline s8 convert_buswidth(enum dma_slave_buswidth addr_width)
+{
+ return ilog2(addr_width);
+}
+
+static void sun6i_enable_clock_autogate_a23(struct sun6i_dma_dev *sdev)
+{
+ writel(SUN8I_DMA_GATE_ENABLE, sdev->base + SUN8I_DMA_GATE);
+}
+
+static void sun6i_enable_clock_autogate_h3(struct sun6i_dma_dev *sdev)
+{
+ writel(SUNXI_H3_DMA_GATE_ENABLE, sdev->base + SUNXI_H3_DMA_GATE);
+}
+
+static void sun6i_set_burst_length_a31(u32 *p_cfg, s8 src_burst, s8 dst_burst)
+{
+ *p_cfg |= DMA_CHAN_CFG_SRC_BURST_A31(src_burst) |
+ DMA_CHAN_CFG_DST_BURST_A31(dst_burst);
+}
+
+static void sun6i_set_burst_length_h3(u32 *p_cfg, s8 src_burst, s8 dst_burst)
+{
+ *p_cfg |= DMA_CHAN_CFG_SRC_BURST_H3(src_burst) |
+ DMA_CHAN_CFG_DST_BURST_H3(dst_burst);
+}
+
+static void sun6i_set_drq_a31(u32 *p_cfg, s8 src_drq, s8 dst_drq)
+{
+ *p_cfg |= DMA_CHAN_CFG_SRC_DRQ_A31(src_drq) |
+ DMA_CHAN_CFG_DST_DRQ_A31(dst_drq);
+}
+
+static void sun6i_set_drq_h6(u32 *p_cfg, s8 src_drq, s8 dst_drq)
+{
+ *p_cfg |= DMA_CHAN_CFG_SRC_DRQ_H6(src_drq) |
+ DMA_CHAN_CFG_DST_DRQ_H6(dst_drq);
+}
+
+static void sun6i_set_mode_a31(u32 *p_cfg, s8 src_mode, s8 dst_mode)
+{
+ *p_cfg |= DMA_CHAN_CFG_SRC_MODE_A31(src_mode) |
+ DMA_CHAN_CFG_DST_MODE_A31(dst_mode);
+}
+
+static void sun6i_set_mode_h6(u32 *p_cfg, s8 src_mode, s8 dst_mode)
+{
+ *p_cfg |= DMA_CHAN_CFG_SRC_MODE_H6(src_mode) |
+ DMA_CHAN_CFG_DST_MODE_H6(dst_mode);
+}
+
+static size_t sun6i_get_chan_size(struct sun6i_pchan *pchan)
+{
+ struct sun6i_desc *txd = pchan->desc;
+ struct sun6i_dma_lli *lli;
+ size_t bytes;
+ dma_addr_t pos;
+
+ pos = readl(pchan->base + DMA_CHAN_LLI_ADDR);
+ bytes = readl(pchan->base + DMA_CHAN_CUR_CNT);
+
+ if (pos == LLI_LAST_ITEM)
+ return bytes;
+
+ for (lli = txd->v_lli; lli; lli = lli->v_lli_next) {
+ if (lli->p_lli_next == pos) {
+ for (lli = lli->v_lli_next; lli; lli = lli->v_lli_next)
+ bytes += lli->len;
+ break;
+ }
+ }
+
+ return bytes;
+}
+
+static void *sun6i_dma_lli_add(struct sun6i_dma_lli *prev,
+ struct sun6i_dma_lli *next,
+ dma_addr_t next_phy,
+ struct sun6i_desc *txd)
+{
+ if ((!prev && !txd) || !next)
+ return NULL;
+
+ if (!prev) {
+ txd->p_lli = next_phy;
+ txd->v_lli = next;
+ } else {
+ prev->p_lli_next = next_phy;
+ prev->v_lli_next = next;
+ }
+
+ next->p_lli_next = LLI_LAST_ITEM;
+ next->v_lli_next = NULL;
+
+ return next;
+}
+
+static inline void sun6i_dma_dump_lli(struct sun6i_vchan *vchan,
+ struct sun6i_dma_lli *v_lli,
+ dma_addr_t p_lli)
+{
+ dev_dbg(chan2dev(&vchan->vc.chan),
+ "\n\tdesc:\tp - %pad v - 0x%p\n"
+ "\t\tc - 0x%08x s - 0x%08x d - 0x%08x\n"
+ "\t\tl - 0x%08x p - 0x%08x n - 0x%08x\n",
+ &p_lli, v_lli,
+ v_lli->cfg, v_lli->src, v_lli->dst,
+ v_lli->len, v_lli->para, v_lli->p_lli_next);
+}
+
+static void sun6i_dma_free_desc(struct virt_dma_desc *vd)
+{
+ struct sun6i_desc *txd = to_sun6i_desc(&vd->tx);
+ struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vd->tx.chan->device);
+ struct sun6i_dma_lli *v_lli, *v_next;
+ dma_addr_t p_lli, p_next;
+
+ if (unlikely(!txd))
+ return;
+
+ p_lli = txd->p_lli;
+ v_lli = txd->v_lli;
+
+ while (v_lli) {
+ v_next = v_lli->v_lli_next;
+ p_next = v_lli->p_lli_next;
+
+ dma_pool_free(sdev->pool, v_lli, p_lli);
+
+ v_lli = v_next;
+ p_lli = p_next;
+ }
+
+ kfree(txd);
+}
+
+static int sun6i_dma_start_desc(struct sun6i_vchan *vchan)
+{
+ struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vchan->vc.chan.device);
+ struct virt_dma_desc *desc = vchan_next_desc(&vchan->vc);
+ struct sun6i_pchan *pchan = vchan->phy;
+ u32 irq_val, irq_reg, irq_offset;
+
+ if (!pchan)
+ return -EAGAIN;
+
+ if (!desc) {
+ pchan->desc = NULL;
+ pchan->done = NULL;
+ return -EAGAIN;
+ }
+
+ list_del(&desc->node);
+
+ pchan->desc = to_sun6i_desc(&desc->tx);
+ pchan->done = NULL;
+
+ sun6i_dma_dump_lli(vchan, pchan->desc->v_lli, pchan->desc->p_lli);
+
+ irq_reg = pchan->idx / DMA_IRQ_CHAN_NR;
+ irq_offset = pchan->idx % DMA_IRQ_CHAN_NR;
+
+ vchan->irq_type = vchan->cyclic ? DMA_IRQ_PKG : DMA_IRQ_QUEUE;
+
+ irq_val = readl(sdev->base + DMA_IRQ_EN(irq_reg));
+ irq_val &= ~((DMA_IRQ_HALF | DMA_IRQ_PKG | DMA_IRQ_QUEUE) <<
+ (irq_offset * DMA_IRQ_CHAN_WIDTH));
+ irq_val |= vchan->irq_type << (irq_offset * DMA_IRQ_CHAN_WIDTH);
+ writel(irq_val, sdev->base + DMA_IRQ_EN(irq_reg));
+
+ writel(pchan->desc->p_lli, pchan->base + DMA_CHAN_LLI_ADDR);
+ writel(DMA_CHAN_ENABLE_START, pchan->base + DMA_CHAN_ENABLE);
+
+ sun6i_dma_dump_com_regs(sdev);
+ sun6i_dma_dump_chan_regs(sdev, pchan);
+
+ return 0;
+}
+
+static void sun6i_dma_tasklet(struct tasklet_struct *t)
+{
+ struct sun6i_dma_dev *sdev = from_tasklet(sdev, t, task);
+ struct sun6i_vchan *vchan;
+ struct sun6i_pchan *pchan;
+ unsigned int pchan_alloc = 0;
+ unsigned int pchan_idx;
+
+ list_for_each_entry(vchan, &sdev->slave.channels, vc.chan.device_node) {
+ spin_lock_irq(&vchan->vc.lock);
+
+ pchan = vchan->phy;
+
+ if (pchan && pchan->done) {
+ if (sun6i_dma_start_desc(vchan)) {
+ /*
+ * No current txd associated with this channel
+ */
+ dev_dbg(sdev->slave.dev, "pchan %u: free\n",
+ pchan->idx);
+
+ /* Mark this channel free */
+ vchan->phy = NULL;
+ pchan->vchan = NULL;
+ }
+ }
+ spin_unlock_irq(&vchan->vc.lock);
+ }
+
+ spin_lock_irq(&sdev->lock);
+ for (pchan_idx = 0; pchan_idx < sdev->num_pchans; pchan_idx++) {
+ pchan = &sdev->pchans[pchan_idx];
+
+ if (pchan->vchan || list_empty(&sdev->pending))
+ continue;
+
+ vchan = list_first_entry(&sdev->pending,
+ struct sun6i_vchan, node);
+
+ /* Remove from pending channels */
+ list_del_init(&vchan->node);
+ pchan_alloc |= BIT(pchan_idx);
+
+ /* Mark this channel allocated */
+ pchan->vchan = vchan;
+ vchan->phy = pchan;
+ dev_dbg(sdev->slave.dev, "pchan %u: alloc vchan %p\n",
+ pchan->idx, &vchan->vc);
+ }
+ spin_unlock_irq(&sdev->lock);
+
+ for (pchan_idx = 0; pchan_idx < sdev->num_pchans; pchan_idx++) {
+ if (!(pchan_alloc & BIT(pchan_idx)))
+ continue;
+
+ pchan = sdev->pchans + pchan_idx;
+ vchan = pchan->vchan;
+ if (vchan) {
+ spin_lock_irq(&vchan->vc.lock);
+ sun6i_dma_start_desc(vchan);
+ spin_unlock_irq(&vchan->vc.lock);
+ }
+ }
+}
+
+static irqreturn_t sun6i_dma_interrupt(int irq, void *dev_id)
+{
+ struct sun6i_dma_dev *sdev = dev_id;
+ struct sun6i_vchan *vchan;
+ struct sun6i_pchan *pchan;
+ int i, j, ret = IRQ_NONE;
+ u32 status;
+
+ for (i = 0; i < sdev->num_pchans / DMA_IRQ_CHAN_NR; i++) {
+ status = readl(sdev->base + DMA_IRQ_STAT(i));
+ if (!status)
+ continue;
+
+ dev_dbg(sdev->slave.dev, "DMA irq status %s: 0x%x\n",
+ i ? "high" : "low", status);
+
+ writel(status, sdev->base + DMA_IRQ_STAT(i));
+
+ for (j = 0; (j < DMA_IRQ_CHAN_NR) && status; j++) {
+ pchan = sdev->pchans + j;
+ vchan = pchan->vchan;
+ if (vchan && (status & vchan->irq_type)) {
+ if (vchan->cyclic) {
+ vchan_cyclic_callback(&pchan->desc->vd);
+ } else {
+ spin_lock(&vchan->vc.lock);
+ vchan_cookie_complete(&pchan->desc->vd);
+ pchan->done = pchan->desc;
+ spin_unlock(&vchan->vc.lock);
+ }
+ }
+
+ status = status >> DMA_IRQ_CHAN_WIDTH;
+ }
+
+ if (!atomic_read(&sdev->tasklet_shutdown))
+ tasklet_schedule(&sdev->task);
+ ret = IRQ_HANDLED;
+ }
+
+ return ret;
+}
+
+static int set_config(struct sun6i_dma_dev *sdev,
+ struct dma_slave_config *sconfig,
+ enum dma_transfer_direction direction,
+ u32 *p_cfg)
+{
+ enum dma_slave_buswidth src_addr_width, dst_addr_width;
+ u32 src_maxburst, dst_maxburst;
+ s8 src_width, dst_width, src_burst, dst_burst;
+
+ src_addr_width = sconfig->src_addr_width;
+ dst_addr_width = sconfig->dst_addr_width;
+ src_maxburst = sconfig->src_maxburst;
+ dst_maxburst = sconfig->dst_maxburst;
+
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+ if (src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+ src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ src_maxburst = src_maxburst ? src_maxburst : 8;
+ break;
+ case DMA_DEV_TO_MEM:
+ if (dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+ dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ dst_maxburst = dst_maxburst ? dst_maxburst : 8;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!(BIT(src_addr_width) & sdev->slave.src_addr_widths))
+ return -EINVAL;
+ if (!(BIT(dst_addr_width) & sdev->slave.dst_addr_widths))
+ return -EINVAL;
+ if (!(BIT(src_maxburst) & sdev->cfg->src_burst_lengths))
+ return -EINVAL;
+ if (!(BIT(dst_maxburst) & sdev->cfg->dst_burst_lengths))
+ return -EINVAL;
+
+ src_width = convert_buswidth(src_addr_width);
+ dst_width = convert_buswidth(dst_addr_width);
+ dst_burst = convert_burst(dst_maxburst);
+ src_burst = convert_burst(src_maxburst);
+
+ *p_cfg = DMA_CHAN_CFG_SRC_WIDTH(src_width) |
+ DMA_CHAN_CFG_DST_WIDTH(dst_width);
+
+ sdev->cfg->set_burst_length(p_cfg, src_burst, dst_burst);
+
+ return 0;
+}
+
+static inline void sun6i_dma_set_addr(struct sun6i_dma_dev *sdev,
+ struct sun6i_dma_lli *v_lli,
+ dma_addr_t src, dma_addr_t dst)
+{
+ v_lli->src = lower_32_bits(src);
+ v_lli->dst = lower_32_bits(dst);
+
+ if (sdev->cfg->has_high_addr)
+ v_lli->para |= SRC_HIGH_ADDR(upper_32_bits(src)) |
+ DST_HIGH_ADDR(upper_32_bits(dst));
+}
+
+static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy(
+ struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+ struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+ struct sun6i_dma_lli *v_lli;
+ struct sun6i_desc *txd;
+ dma_addr_t p_lli;
+ s8 burst, width;
+
+ dev_dbg(chan2dev(chan),
+ "%s; chan: %d, dest: %pad, src: %pad, len: %zu. flags: 0x%08lx\n",
+ __func__, vchan->vc.chan.chan_id, &dest, &src, len, flags);
+
+ if (!len)
+ return NULL;
+
+ txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+ if (!txd)
+ return NULL;
+
+ v_lli = dma_pool_alloc(sdev->pool, GFP_DMA32 | GFP_NOWAIT, &p_lli);
+ if (!v_lli) {
+ dev_err(sdev->slave.dev, "Failed to alloc lli memory\n");
+ goto err_txd_free;
+ }
+
+ v_lli->len = len;
+ v_lli->para = NORMAL_WAIT;
+ sun6i_dma_set_addr(sdev, v_lli, src, dest);
+
+ burst = convert_burst(8);
+ width = convert_buswidth(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ v_lli->cfg = DMA_CHAN_CFG_SRC_WIDTH(width) |
+ DMA_CHAN_CFG_DST_WIDTH(width);
+
+ sdev->cfg->set_burst_length(&v_lli->cfg, burst, burst);
+ sdev->cfg->set_drq(&v_lli->cfg, DRQ_SDRAM, DRQ_SDRAM);
+ sdev->cfg->set_mode(&v_lli->cfg, LINEAR_MODE, LINEAR_MODE);
+
+ sun6i_dma_lli_add(NULL, v_lli, p_lli, txd);
+
+ sun6i_dma_dump_lli(vchan, v_lli, p_lli);
+
+ return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
+
+err_txd_free:
+ kfree(txd);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction dir,
+ unsigned long flags, void *context)
+{
+ struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+ struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+ struct dma_slave_config *sconfig = &vchan->cfg;
+ struct sun6i_dma_lli *v_lli, *prev = NULL;
+ struct sun6i_desc *txd;
+ struct scatterlist *sg;
+ dma_addr_t p_lli;
+ u32 lli_cfg;
+ int i, ret;
+
+ if (!sgl)
+ return NULL;
+
+ ret = set_config(sdev, sconfig, dir, &lli_cfg);
+ if (ret) {
+ dev_err(chan2dev(chan), "Invalid DMA configuration\n");
+ return NULL;
+ }
+
+ txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+ if (!txd)
+ return NULL;
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ v_lli = dma_pool_alloc(sdev->pool, GFP_DMA32 | GFP_NOWAIT, &p_lli);
+ if (!v_lli)
+ goto err_lli_free;
+
+ v_lli->len = sg_dma_len(sg);
+ v_lli->para = NORMAL_WAIT;
+
+ if (dir == DMA_MEM_TO_DEV) {
+ sun6i_dma_set_addr(sdev, v_lli,
+ sg_dma_address(sg),
+ sconfig->dst_addr);
+ v_lli->cfg = lli_cfg;
+ sdev->cfg->set_drq(&v_lli->cfg, DRQ_SDRAM, vchan->port);
+ sdev->cfg->set_mode(&v_lli->cfg, LINEAR_MODE, IO_MODE);
+
+ dev_dbg(chan2dev(chan),
+ "%s; chan: %d, dest: %pad, src: %pad, len: %u. flags: 0x%08lx\n",
+ __func__, vchan->vc.chan.chan_id,
+ &sconfig->dst_addr, &sg_dma_address(sg),
+ sg_dma_len(sg), flags);
+
+ } else {
+ sun6i_dma_set_addr(sdev, v_lli,
+ sconfig->src_addr,
+ sg_dma_address(sg));
+ v_lli->cfg = lli_cfg;
+ sdev->cfg->set_drq(&v_lli->cfg, vchan->port, DRQ_SDRAM);
+ sdev->cfg->set_mode(&v_lli->cfg, IO_MODE, LINEAR_MODE);
+
+ dev_dbg(chan2dev(chan),
+ "%s; chan: %d, dest: %pad, src: %pad, len: %u. flags: 0x%08lx\n",
+ __func__, vchan->vc.chan.chan_id,
+ &sg_dma_address(sg), &sconfig->src_addr,
+ sg_dma_len(sg), flags);
+ }
+
+ prev = sun6i_dma_lli_add(prev, v_lli, p_lli, txd);
+ }
+
+ dev_dbg(chan2dev(chan), "First: %pad\n", &txd->p_lli);
+ for (p_lli = txd->p_lli, v_lli = txd->v_lli; v_lli;
+ p_lli = v_lli->p_lli_next, v_lli = v_lli->v_lli_next)
+ sun6i_dma_dump_lli(vchan, v_lli, p_lli);
+
+ return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
+
+err_lli_free:
+ for (p_lli = txd->p_lli, v_lli = txd->v_lli; v_lli;
+ p_lli = v_lli->p_lli_next, v_lli = v_lli->v_lli_next)
+ dma_pool_free(sdev->pool, v_lli, p_lli);
+ kfree(txd);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_cyclic(
+ struct dma_chan *chan,
+ dma_addr_t buf_addr,
+ size_t buf_len,
+ size_t period_len,
+ enum dma_transfer_direction dir,
+ unsigned long flags)
+{
+ struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+ struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+ struct dma_slave_config *sconfig = &vchan->cfg;
+ struct sun6i_dma_lli *v_lli, *prev = NULL;
+ struct sun6i_desc *txd;
+ dma_addr_t p_lli;
+ u32 lli_cfg;
+ unsigned int i, periods = buf_len / period_len;
+ int ret;
+
+ ret = set_config(sdev, sconfig, dir, &lli_cfg);
+ if (ret) {
+ dev_err(chan2dev(chan), "Invalid DMA configuration\n");
+ return NULL;
+ }
+
+ txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+ if (!txd)
+ return NULL;
+
+ for (i = 0; i < periods; i++) {
+ v_lli = dma_pool_alloc(sdev->pool, GFP_DMA32 | GFP_NOWAIT, &p_lli);
+ if (!v_lli) {
+ dev_err(sdev->slave.dev, "Failed to alloc lli memory\n");
+ goto err_lli_free;
+ }
+
+ v_lli->len = period_len;
+ v_lli->para = NORMAL_WAIT;
+
+ if (dir == DMA_MEM_TO_DEV) {
+ sun6i_dma_set_addr(sdev, v_lli,
+ buf_addr + period_len * i,
+ sconfig->dst_addr);
+ v_lli->cfg = lli_cfg;
+ sdev->cfg->set_drq(&v_lli->cfg, DRQ_SDRAM, vchan->port);
+ sdev->cfg->set_mode(&v_lli->cfg, LINEAR_MODE, IO_MODE);
+ } else {
+ sun6i_dma_set_addr(sdev, v_lli,
+ sconfig->src_addr,
+ buf_addr + period_len * i);
+ v_lli->cfg = lli_cfg;
+ sdev->cfg->set_drq(&v_lli->cfg, vchan->port, DRQ_SDRAM);
+ sdev->cfg->set_mode(&v_lli->cfg, IO_MODE, LINEAR_MODE);
+ }
+
+ prev = sun6i_dma_lli_add(prev, v_lli, p_lli, txd);
+ }
+
+ prev->p_lli_next = txd->p_lli; /* cyclic list */
+
+ vchan->cyclic = true;
+
+ return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
+
+err_lli_free:
+ for (p_lli = txd->p_lli, v_lli = txd->v_lli; v_lli;
+ p_lli = v_lli->p_lli_next, v_lli = v_lli->v_lli_next)
+ dma_pool_free(sdev->pool, v_lli, p_lli);
+ kfree(txd);
+ return NULL;
+}
+
+static int sun6i_dma_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+
+ memcpy(&vchan->cfg, config, sizeof(*config));
+
+ return 0;
+}
+
+static int sun6i_dma_pause(struct dma_chan *chan)
+{
+ struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+ struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+ struct sun6i_pchan *pchan = vchan->phy;
+
+ dev_dbg(chan2dev(chan), "vchan %p: pause\n", &vchan->vc);
+
+ if (pchan) {
+ writel(DMA_CHAN_PAUSE_PAUSE,
+ pchan->base + DMA_CHAN_PAUSE);
+ } else {
+ spin_lock(&sdev->lock);
+ list_del_init(&vchan->node);
+ spin_unlock(&sdev->lock);
+ }
+
+ return 0;
+}
+
+static int sun6i_dma_resume(struct dma_chan *chan)
+{
+ struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+ struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+ struct sun6i_pchan *pchan = vchan->phy;
+ unsigned long flags;
+
+ dev_dbg(chan2dev(chan), "vchan %p: resume\n", &vchan->vc);
+
+ spin_lock_irqsave(&vchan->vc.lock, flags);
+
+ if (pchan) {
+ writel(DMA_CHAN_PAUSE_RESUME,
+ pchan->base + DMA_CHAN_PAUSE);
+ } else if (!list_empty(&vchan->vc.desc_issued)) {
+ spin_lock(&sdev->lock);
+ list_add_tail(&vchan->node, &sdev->pending);
+ spin_unlock(&sdev->lock);
+ }
+
+ spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+ return 0;
+}
+
+static int sun6i_dma_terminate_all(struct dma_chan *chan)
+{
+ struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+ struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+ struct sun6i_pchan *pchan = vchan->phy;
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock(&sdev->lock);
+ list_del_init(&vchan->node);
+ spin_unlock(&sdev->lock);
+
+ spin_lock_irqsave(&vchan->vc.lock, flags);
+
+ if (vchan->cyclic) {
+ vchan->cyclic = false;
+ if (pchan && pchan->desc) {
+ struct virt_dma_desc *vd = &pchan->desc->vd;
+ struct virt_dma_chan *vc = &vchan->vc;
+
+ list_add_tail(&vd->node, &vc->desc_completed);
+ }
+ }
+
+ vchan_get_all_descriptors(&vchan->vc, &head);
+
+ if (pchan) {
+ writel(DMA_CHAN_ENABLE_STOP, pchan->base + DMA_CHAN_ENABLE);
+ writel(DMA_CHAN_PAUSE_RESUME, pchan->base + DMA_CHAN_PAUSE);
+
+ vchan->phy = NULL;
+ pchan->vchan = NULL;
+ pchan->desc = NULL;
+ pchan->done = NULL;
+ }
+
+ spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+ vchan_dma_desc_free_list(&vchan->vc, &head);
+
+ return 0;
+}
+
+static enum dma_status sun6i_dma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *state)
+{
+ struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+ struct sun6i_pchan *pchan = vchan->phy;
+ struct sun6i_dma_lli *lli;
+ struct virt_dma_desc *vd;
+ struct sun6i_desc *txd;
+ enum dma_status ret;
+ unsigned long flags;
+ size_t bytes = 0;
+
+ ret = dma_cookie_status(chan, cookie, state);
+ if (ret == DMA_COMPLETE || !state)
+ return ret;
+
+ spin_lock_irqsave(&vchan->vc.lock, flags);
+
+ vd = vchan_find_desc(&vchan->vc, cookie);
+ txd = to_sun6i_desc(&vd->tx);
+
+ if (vd) {
+ for (lli = txd->v_lli; lli != NULL; lli = lli->v_lli_next)
+ bytes += lli->len;
+ } else if (!pchan || !pchan->desc) {
+ bytes = 0;
+ } else {
+ bytes = sun6i_get_chan_size(pchan);
+ }
+
+ spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+ dma_set_residue(state, bytes);
+
+ return ret;
+}
+
+static void sun6i_dma_issue_pending(struct dma_chan *chan)
+{
+ struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+ struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&vchan->vc.lock, flags);
+
+ if (vchan_issue_pending(&vchan->vc)) {
+ spin_lock(&sdev->lock);
+
+ if (!vchan->phy && list_empty(&vchan->node)) {
+ list_add_tail(&vchan->node, &sdev->pending);
+ tasklet_schedule(&sdev->task);
+ dev_dbg(chan2dev(chan), "vchan %p: issued\n",
+ &vchan->vc);
+ }
+
+ spin_unlock(&sdev->lock);
+ } else {
+ dev_dbg(chan2dev(chan), "vchan %p: nothing to issue\n",
+ &vchan->vc);
+ }
+
+ spin_unlock_irqrestore(&vchan->vc.lock, flags);
+}
+
+static void sun6i_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+ struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&sdev->lock, flags);
+ list_del_init(&vchan->node);
+ spin_unlock_irqrestore(&sdev->lock, flags);
+
+ vchan_free_chan_resources(&vchan->vc);
+}
+
+static struct dma_chan *sun6i_dma_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct sun6i_dma_dev *sdev = ofdma->of_dma_data;
+ struct sun6i_vchan *vchan;
+ struct dma_chan *chan;
+ u8 port = dma_spec->args[0];
+
+ if (port > sdev->max_request)
+ return NULL;
+
+ chan = dma_get_any_slave_channel(&sdev->slave);
+ if (!chan)
+ return NULL;
+
+ vchan = to_sun6i_vchan(chan);
+ vchan->port = port;
+
+ return chan;
+}
+
+static inline void sun6i_kill_tasklet(struct sun6i_dma_dev *sdev)
+{
+ /* Disable all interrupts from DMA */
+ writel(0, sdev->base + DMA_IRQ_EN(0));
+ writel(0, sdev->base + DMA_IRQ_EN(1));
+
+ /* Prevent spurious interrupts from scheduling the tasklet */
+ atomic_inc(&sdev->tasklet_shutdown);
+
+ /* Make sure we won't have any further interrupts */
+ devm_free_irq(sdev->slave.dev, sdev->irq, sdev);
+
+ /* Actually prevent the tasklet from being scheduled */
+ tasklet_kill(&sdev->task);
+}
+
+static inline void sun6i_dma_free(struct sun6i_dma_dev *sdev)
+{
+ int i;
+
+ for (i = 0; i < sdev->num_vchans; i++) {
+ struct sun6i_vchan *vchan = &sdev->vchans[i];
+
+ list_del(&vchan->vc.chan.device_node);
+ tasklet_kill(&vchan->vc.task);
+ }
+}
+
+/*
+ * For A31:
+ *
+ * There's 16 physical channels that can work in parallel.
+ *
+ * However we have 30 different endpoints for our requests.
+ *
+ * Since the channels are able to handle only an unidirectional
+ * transfer, we need to allocate more virtual channels so that
+ * everyone can grab one channel.
+ *
+ * Some devices can't work in both direction (mostly because it
+ * wouldn't make sense), so we have a bit fewer virtual channels than
+ * 2 channels per endpoints.
+ */
+
+static struct sun6i_dma_config sun6i_a31_dma_cfg = {
+ .nr_max_channels = 16,
+ .nr_max_requests = 30,
+ .nr_max_vchans = 53,
+ .set_burst_length = sun6i_set_burst_length_a31,
+ .set_drq = sun6i_set_drq_a31,
+ .set_mode = sun6i_set_mode_a31,
+ .src_burst_lengths = BIT(1) | BIT(8),
+ .dst_burst_lengths = BIT(1) | BIT(8),
+ .src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES),
+ .dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES),
+};
+
+/*
+ * The A23 only has 8 physical channels, a maximum DRQ port id of 24,
+ * and a total of 37 usable source and destination endpoints.
+ */
+
+static struct sun6i_dma_config sun8i_a23_dma_cfg = {
+ .nr_max_channels = 8,
+ .nr_max_requests = 24,
+ .nr_max_vchans = 37,
+ .clock_autogate_enable = sun6i_enable_clock_autogate_a23,
+ .set_burst_length = sun6i_set_burst_length_a31,
+ .set_drq = sun6i_set_drq_a31,
+ .set_mode = sun6i_set_mode_a31,
+ .src_burst_lengths = BIT(1) | BIT(8),
+ .dst_burst_lengths = BIT(1) | BIT(8),
+ .src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES),
+ .dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES),
+};
+
+static struct sun6i_dma_config sun8i_a83t_dma_cfg = {
+ .nr_max_channels = 8,
+ .nr_max_requests = 28,
+ .nr_max_vchans = 39,
+ .clock_autogate_enable = sun6i_enable_clock_autogate_a23,
+ .set_burst_length = sun6i_set_burst_length_a31,
+ .set_drq = sun6i_set_drq_a31,
+ .set_mode = sun6i_set_mode_a31,
+ .src_burst_lengths = BIT(1) | BIT(8),
+ .dst_burst_lengths = BIT(1) | BIT(8),
+ .src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES),
+ .dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES),
+};
+
+/*
+ * The H3 has 12 physical channels, a maximum DRQ port id of 27,
+ * and a total of 34 usable source and destination endpoints.
+ * It also supports additional burst lengths and bus widths,
+ * and the burst length fields have different offsets.
+ */
+
+static struct sun6i_dma_config sun8i_h3_dma_cfg = {
+ .nr_max_channels = 12,
+ .nr_max_requests = 27,
+ .nr_max_vchans = 34,
+ .clock_autogate_enable = sun6i_enable_clock_autogate_h3,
+ .set_burst_length = sun6i_set_burst_length_h3,
+ .set_drq = sun6i_set_drq_a31,
+ .set_mode = sun6i_set_mode_a31,
+ .src_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16),
+ .dst_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16),
+ .src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES),
+ .dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES),
+};
+
+/*
+ * The A64 binding uses the number of dma channels from the
+ * device tree node.
+ */
+static struct sun6i_dma_config sun50i_a64_dma_cfg = {
+ .clock_autogate_enable = sun6i_enable_clock_autogate_h3,
+ .set_burst_length = sun6i_set_burst_length_h3,
+ .set_drq = sun6i_set_drq_a31,
+ .set_mode = sun6i_set_mode_a31,
+ .src_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16),
+ .dst_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16),
+ .src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES),
+ .dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES),
+};
+
+/*
+ * The A100 binding uses the number of dma channels from the
+ * device tree node.
+ */
+static struct sun6i_dma_config sun50i_a100_dma_cfg = {
+ .clock_autogate_enable = sun6i_enable_clock_autogate_h3,
+ .set_burst_length = sun6i_set_burst_length_h3,
+ .set_drq = sun6i_set_drq_h6,
+ .set_mode = sun6i_set_mode_h6,
+ .src_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16),
+ .dst_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16),
+ .src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES),
+ .dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES),
+ .has_high_addr = true,
+ .has_mbus_clk = true,
+};
+
+/*
+ * The H6 binding uses the number of dma channels from the
+ * device tree node.
+ */
+static struct sun6i_dma_config sun50i_h6_dma_cfg = {
+ .clock_autogate_enable = sun6i_enable_clock_autogate_h3,
+ .set_burst_length = sun6i_set_burst_length_h3,
+ .set_drq = sun6i_set_drq_h6,
+ .set_mode = sun6i_set_mode_h6,
+ .src_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16),
+ .dst_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16),
+ .src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES),
+ .dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES),
+ .has_mbus_clk = true,
+};
+
+/*
+ * The V3s have only 8 physical channels, a maximum DRQ port id of 23,
+ * and a total of 24 usable source and destination endpoints.
+ */
+
+static struct sun6i_dma_config sun8i_v3s_dma_cfg = {
+ .nr_max_channels = 8,
+ .nr_max_requests = 23,
+ .nr_max_vchans = 24,
+ .clock_autogate_enable = sun6i_enable_clock_autogate_a23,
+ .set_burst_length = sun6i_set_burst_length_a31,
+ .set_drq = sun6i_set_drq_a31,
+ .set_mode = sun6i_set_mode_a31,
+ .src_burst_lengths = BIT(1) | BIT(8),
+ .dst_burst_lengths = BIT(1) | BIT(8),
+ .src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES),
+ .dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES),
+};
+
+static const struct of_device_id sun6i_dma_match[] = {
+ { .compatible = "allwinner,sun6i-a31-dma", .data = &sun6i_a31_dma_cfg },
+ { .compatible = "allwinner,sun8i-a23-dma", .data = &sun8i_a23_dma_cfg },
+ { .compatible = "allwinner,sun8i-a83t-dma", .data = &sun8i_a83t_dma_cfg },
+ { .compatible = "allwinner,sun8i-h3-dma", .data = &sun8i_h3_dma_cfg },
+ { .compatible = "allwinner,sun8i-v3s-dma", .data = &sun8i_v3s_dma_cfg },
+ { .compatible = "allwinner,sun20i-d1-dma", .data = &sun50i_a100_dma_cfg },
+ { .compatible = "allwinner,sun50i-a64-dma", .data = &sun50i_a64_dma_cfg },
+ { .compatible = "allwinner,sun50i-a100-dma", .data = &sun50i_a100_dma_cfg },
+ { .compatible = "allwinner,sun50i-h6-dma", .data = &sun50i_h6_dma_cfg },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sun6i_dma_match);
+
+static int sun6i_dma_probe(struct platform_device *pdev)
+{
+ struct device_node *np = pdev->dev.of_node;
+ struct sun6i_dma_dev *sdc;
+ struct resource *res;
+ int ret, i;
+
+ sdc = devm_kzalloc(&pdev->dev, sizeof(*sdc), GFP_KERNEL);
+ if (!sdc)
+ return -ENOMEM;
+
+ sdc->cfg = of_device_get_match_data(&pdev->dev);
+ if (!sdc->cfg)
+ return -ENODEV;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ sdc->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(sdc->base))
+ return PTR_ERR(sdc->base);
+
+ sdc->irq = platform_get_irq(pdev, 0);
+ if (sdc->irq < 0)
+ return sdc->irq;
+
+ sdc->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(sdc->clk)) {
+ dev_err(&pdev->dev, "No clock specified\n");
+ return PTR_ERR(sdc->clk);
+ }
+
+ if (sdc->cfg->has_mbus_clk) {
+ sdc->clk_mbus = devm_clk_get(&pdev->dev, "mbus");
+ if (IS_ERR(sdc->clk_mbus)) {
+ dev_err(&pdev->dev, "No mbus clock specified\n");
+ return PTR_ERR(sdc->clk_mbus);
+ }
+ }
+
+ sdc->rstc = devm_reset_control_get(&pdev->dev, NULL);
+ if (IS_ERR(sdc->rstc)) {
+ dev_err(&pdev->dev, "No reset controller specified\n");
+ return PTR_ERR(sdc->rstc);
+ }
+
+ sdc->pool = dmam_pool_create(dev_name(&pdev->dev), &pdev->dev,
+ sizeof(struct sun6i_dma_lli), 4, 0);
+ if (!sdc->pool) {
+ dev_err(&pdev->dev, "No memory for descriptors dma pool\n");
+ return -ENOMEM;
+ }
+
+ platform_set_drvdata(pdev, sdc);
+ INIT_LIST_HEAD(&sdc->pending);
+ spin_lock_init(&sdc->lock);
+
+ dma_cap_set(DMA_PRIVATE, sdc->slave.cap_mask);
+ dma_cap_set(DMA_MEMCPY, sdc->slave.cap_mask);
+ dma_cap_set(DMA_SLAVE, sdc->slave.cap_mask);
+ dma_cap_set(DMA_CYCLIC, sdc->slave.cap_mask);
+
+ INIT_LIST_HEAD(&sdc->slave.channels);
+ sdc->slave.device_free_chan_resources = sun6i_dma_free_chan_resources;
+ sdc->slave.device_tx_status = sun6i_dma_tx_status;
+ sdc->slave.device_issue_pending = sun6i_dma_issue_pending;
+ sdc->slave.device_prep_slave_sg = sun6i_dma_prep_slave_sg;
+ sdc->slave.device_prep_dma_memcpy = sun6i_dma_prep_dma_memcpy;
+ sdc->slave.device_prep_dma_cyclic = sun6i_dma_prep_dma_cyclic;
+ sdc->slave.copy_align = DMAENGINE_ALIGN_4_BYTES;
+ sdc->slave.device_config = sun6i_dma_config;
+ sdc->slave.device_pause = sun6i_dma_pause;
+ sdc->slave.device_resume = sun6i_dma_resume;
+ sdc->slave.device_terminate_all = sun6i_dma_terminate_all;
+ sdc->slave.src_addr_widths = sdc->cfg->src_addr_widths;
+ sdc->slave.dst_addr_widths = sdc->cfg->dst_addr_widths;
+ sdc->slave.directions = BIT(DMA_DEV_TO_MEM) |
+ BIT(DMA_MEM_TO_DEV);
+ sdc->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ sdc->slave.dev = &pdev->dev;
+
+ sdc->num_pchans = sdc->cfg->nr_max_channels;
+ sdc->num_vchans = sdc->cfg->nr_max_vchans;
+ sdc->max_request = sdc->cfg->nr_max_requests;
+
+ ret = of_property_read_u32(np, "dma-channels", &sdc->num_pchans);
+ if (ret && !sdc->num_pchans) {
+ dev_err(&pdev->dev, "Can't get dma-channels.\n");
+ return ret;
+ }
+
+ ret = of_property_read_u32(np, "dma-requests", &sdc->max_request);
+ if (ret && !sdc->max_request) {
+ dev_info(&pdev->dev, "Missing dma-requests, using %u.\n",
+ DMA_CHAN_MAX_DRQ_A31);
+ sdc->max_request = DMA_CHAN_MAX_DRQ_A31;
+ }
+
+ /*
+ * If the number of vchans is not specified, derive it from the
+ * highest port number, at most one channel per port and direction.
+ */
+ if (!sdc->num_vchans)
+ sdc->num_vchans = 2 * (sdc->max_request + 1);
+
+ sdc->pchans = devm_kcalloc(&pdev->dev, sdc->num_pchans,
+ sizeof(struct sun6i_pchan), GFP_KERNEL);
+ if (!sdc->pchans)
+ return -ENOMEM;
+
+ sdc->vchans = devm_kcalloc(&pdev->dev, sdc->num_vchans,
+ sizeof(struct sun6i_vchan), GFP_KERNEL);
+ if (!sdc->vchans)
+ return -ENOMEM;
+
+ tasklet_setup(&sdc->task, sun6i_dma_tasklet);
+
+ for (i = 0; i < sdc->num_pchans; i++) {
+ struct sun6i_pchan *pchan = &sdc->pchans[i];
+
+ pchan->idx = i;
+ pchan->base = sdc->base + 0x100 + i * 0x40;
+ }
+
+ for (i = 0; i < sdc->num_vchans; i++) {
+ struct sun6i_vchan *vchan = &sdc->vchans[i];
+
+ INIT_LIST_HEAD(&vchan->node);
+ vchan->vc.desc_free = sun6i_dma_free_desc;
+ vchan_init(&vchan->vc, &sdc->slave);
+ }
+
+ ret = reset_control_deassert(sdc->rstc);
+ if (ret) {
+ dev_err(&pdev->dev, "Couldn't deassert the device from reset\n");
+ goto err_chan_free;
+ }
+
+ ret = clk_prepare_enable(sdc->clk);
+ if (ret) {
+ dev_err(&pdev->dev, "Couldn't enable the clock\n");
+ goto err_reset_assert;
+ }
+
+ if (sdc->cfg->has_mbus_clk) {
+ ret = clk_prepare_enable(sdc->clk_mbus);
+ if (ret) {
+ dev_err(&pdev->dev, "Couldn't enable mbus clock\n");
+ goto err_clk_disable;
+ }
+ }
+
+ ret = devm_request_irq(&pdev->dev, sdc->irq, sun6i_dma_interrupt, 0,
+ dev_name(&pdev->dev), sdc);
+ if (ret) {
+ dev_err(&pdev->dev, "Cannot request IRQ\n");
+ goto err_mbus_clk_disable;
+ }
+
+ ret = dma_async_device_register(&sdc->slave);
+ if (ret) {
+ dev_warn(&pdev->dev, "Failed to register DMA engine device\n");
+ goto err_irq_disable;
+ }
+
+ ret = of_dma_controller_register(pdev->dev.of_node, sun6i_dma_of_xlate,
+ sdc);
+ if (ret) {
+ dev_err(&pdev->dev, "of_dma_controller_register failed\n");
+ goto err_dma_unregister;
+ }
+
+ if (sdc->cfg->clock_autogate_enable)
+ sdc->cfg->clock_autogate_enable(sdc);
+
+ return 0;
+
+err_dma_unregister:
+ dma_async_device_unregister(&sdc->slave);
+err_irq_disable:
+ sun6i_kill_tasklet(sdc);
+err_mbus_clk_disable:
+ clk_disable_unprepare(sdc->clk_mbus);
+err_clk_disable:
+ clk_disable_unprepare(sdc->clk);
+err_reset_assert:
+ reset_control_assert(sdc->rstc);
+err_chan_free:
+ sun6i_dma_free(sdc);
+ return ret;
+}
+
+static int sun6i_dma_remove(struct platform_device *pdev)
+{
+ struct sun6i_dma_dev *sdc = platform_get_drvdata(pdev);
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&sdc->slave);
+
+ sun6i_kill_tasklet(sdc);
+
+ clk_disable_unprepare(sdc->clk_mbus);
+ clk_disable_unprepare(sdc->clk);
+ reset_control_assert(sdc->rstc);
+
+ sun6i_dma_free(sdc);
+
+ return 0;
+}
+
+static struct platform_driver sun6i_dma_driver = {
+ .probe = sun6i_dma_probe,
+ .remove = sun6i_dma_remove,
+ .driver = {
+ .name = "sun6i-dma",
+ .of_match_table = sun6i_dma_match,
+ },
+};
+module_platform_driver(sun6i_dma_driver);
+
+MODULE_DESCRIPTION("Allwinner A31 DMA Controller Driver");
+MODULE_AUTHOR("Sugar <shuge@allwinnertech.com>");
+MODULE_AUTHOR("Maxime Ripard <maxime.ripard@free-electrons.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/tegra186-gpc-dma.c b/drivers/dma/tegra186-gpc-dma.c
new file mode 100644
index 000000000..75af3488a
--- /dev/null
+++ b/drivers/dma/tegra186-gpc-dma.c
@@ -0,0 +1,1521 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * DMA driver for NVIDIA Tegra GPC DMA controller.
+ *
+ * Copyright (c) 2014-2022, NVIDIA CORPORATION. All rights reserved.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/minmax.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+#include <dt-bindings/memory/tegra186-mc.h>
+#include "virt-dma.h"
+
+/* CSR register */
+#define TEGRA_GPCDMA_CHAN_CSR 0x00
+#define TEGRA_GPCDMA_CSR_ENB BIT(31)
+#define TEGRA_GPCDMA_CSR_IE_EOC BIT(30)
+#define TEGRA_GPCDMA_CSR_ONCE BIT(27)
+
+#define TEGRA_GPCDMA_CSR_FC_MODE GENMASK(25, 24)
+#define TEGRA_GPCDMA_CSR_FC_MODE_NO_MMIO \
+ FIELD_PREP(TEGRA_GPCDMA_CSR_FC_MODE, 0)
+#define TEGRA_GPCDMA_CSR_FC_MODE_ONE_MMIO \
+ FIELD_PREP(TEGRA_GPCDMA_CSR_FC_MODE, 1)
+#define TEGRA_GPCDMA_CSR_FC_MODE_TWO_MMIO \
+ FIELD_PREP(TEGRA_GPCDMA_CSR_FC_MODE, 2)
+#define TEGRA_GPCDMA_CSR_FC_MODE_FOUR_MMIO \
+ FIELD_PREP(TEGRA_GPCDMA_CSR_FC_MODE, 3)
+
+#define TEGRA_GPCDMA_CSR_DMA GENMASK(23, 21)
+#define TEGRA_GPCDMA_CSR_DMA_IO2MEM_NO_FC \
+ FIELD_PREP(TEGRA_GPCDMA_CSR_DMA, 0)
+#define TEGRA_GPCDMA_CSR_DMA_IO2MEM_FC \
+ FIELD_PREP(TEGRA_GPCDMA_CSR_DMA, 1)
+#define TEGRA_GPCDMA_CSR_DMA_MEM2IO_NO_FC \
+ FIELD_PREP(TEGRA_GPCDMA_CSR_DMA, 2)
+#define TEGRA_GPCDMA_CSR_DMA_MEM2IO_FC \
+ FIELD_PREP(TEGRA_GPCDMA_CSR_DMA, 3)
+#define TEGRA_GPCDMA_CSR_DMA_MEM2MEM \
+ FIELD_PREP(TEGRA_GPCDMA_CSR_DMA, 4)
+#define TEGRA_GPCDMA_CSR_DMA_FIXED_PAT \
+ FIELD_PREP(TEGRA_GPCDMA_CSR_DMA, 6)
+
+#define TEGRA_GPCDMA_CSR_REQ_SEL_MASK GENMASK(20, 16)
+#define TEGRA_GPCDMA_CSR_REQ_SEL_UNUSED \
+ FIELD_PREP(TEGRA_GPCDMA_CSR_REQ_SEL_MASK, 4)
+#define TEGRA_GPCDMA_CSR_IRQ_MASK BIT(15)
+#define TEGRA_GPCDMA_CSR_WEIGHT GENMASK(13, 10)
+
+/* STATUS register */
+#define TEGRA_GPCDMA_CHAN_STATUS 0x004
+#define TEGRA_GPCDMA_STATUS_BUSY BIT(31)
+#define TEGRA_GPCDMA_STATUS_ISE_EOC BIT(30)
+#define TEGRA_GPCDMA_STATUS_PING_PONG BIT(28)
+#define TEGRA_GPCDMA_STATUS_DMA_ACTIVITY BIT(27)
+#define TEGRA_GPCDMA_STATUS_CHANNEL_PAUSE BIT(26)
+#define TEGRA_GPCDMA_STATUS_CHANNEL_RX BIT(25)
+#define TEGRA_GPCDMA_STATUS_CHANNEL_TX BIT(24)
+#define TEGRA_GPCDMA_STATUS_IRQ_INTR_STA BIT(23)
+#define TEGRA_GPCDMA_STATUS_IRQ_STA BIT(21)
+#define TEGRA_GPCDMA_STATUS_IRQ_TRIG_STA BIT(20)
+
+#define TEGRA_GPCDMA_CHAN_CSRE 0x008
+#define TEGRA_GPCDMA_CHAN_CSRE_PAUSE BIT(31)
+
+/* Source address */
+#define TEGRA_GPCDMA_CHAN_SRC_PTR 0x00C
+
+/* Destination address */
+#define TEGRA_GPCDMA_CHAN_DST_PTR 0x010
+
+/* High address pointer */
+#define TEGRA_GPCDMA_CHAN_HIGH_ADDR_PTR 0x014
+#define TEGRA_GPCDMA_HIGH_ADDR_SRC_PTR GENMASK(7, 0)
+#define TEGRA_GPCDMA_HIGH_ADDR_DST_PTR GENMASK(23, 16)
+
+/* MC sequence register */
+#define TEGRA_GPCDMA_CHAN_MCSEQ 0x18
+#define TEGRA_GPCDMA_MCSEQ_DATA_SWAP BIT(31)
+#define TEGRA_GPCDMA_MCSEQ_REQ_COUNT GENMASK(30, 25)
+#define TEGRA_GPCDMA_MCSEQ_BURST GENMASK(24, 23)
+#define TEGRA_GPCDMA_MCSEQ_BURST_2 \
+ FIELD_PREP(TEGRA_GPCDMA_MCSEQ_BURST, 0)
+#define TEGRA_GPCDMA_MCSEQ_BURST_16 \
+ FIELD_PREP(TEGRA_GPCDMA_MCSEQ_BURST, 3)
+#define TEGRA_GPCDMA_MCSEQ_WRAP1 GENMASK(22, 20)
+#define TEGRA_GPCDMA_MCSEQ_WRAP0 GENMASK(19, 17)
+#define TEGRA_GPCDMA_MCSEQ_WRAP_NONE 0
+
+#define TEGRA_GPCDMA_MCSEQ_STREAM_ID1_MASK GENMASK(13, 7)
+#define TEGRA_GPCDMA_MCSEQ_STREAM_ID0_MASK GENMASK(6, 0)
+
+/* MMIO sequence register */
+#define TEGRA_GPCDMA_CHAN_MMIOSEQ 0x01c
+#define TEGRA_GPCDMA_MMIOSEQ_DBL_BUF BIT(31)
+#define TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH GENMASK(30, 28)
+#define TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH_8 \
+ FIELD_PREP(TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH, 0)
+#define TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH_16 \
+ FIELD_PREP(TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH, 1)
+#define TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH_32 \
+ FIELD_PREP(TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH, 2)
+#define TEGRA_GPCDMA_MMIOSEQ_DATA_SWAP BIT(27)
+#define TEGRA_GPCDMA_MMIOSEQ_BURST_SHIFT 23
+#define TEGRA_GPCDMA_MMIOSEQ_BURST_MIN 2U
+#define TEGRA_GPCDMA_MMIOSEQ_BURST_MAX 32U
+#define TEGRA_GPCDMA_MMIOSEQ_BURST(bs) \
+ (GENMASK((fls(bs) - 2), 0) << TEGRA_GPCDMA_MMIOSEQ_BURST_SHIFT)
+#define TEGRA_GPCDMA_MMIOSEQ_MASTER_ID GENMASK(22, 19)
+#define TEGRA_GPCDMA_MMIOSEQ_WRAP_WORD GENMASK(18, 16)
+#define TEGRA_GPCDMA_MMIOSEQ_MMIO_PROT GENMASK(8, 7)
+
+/* Channel WCOUNT */
+#define TEGRA_GPCDMA_CHAN_WCOUNT 0x20
+
+/* Transfer count */
+#define TEGRA_GPCDMA_CHAN_XFER_COUNT 0x24
+
+/* DMA byte count status */
+#define TEGRA_GPCDMA_CHAN_DMA_BYTE_STATUS 0x28
+
+/* Error Status Register */
+#define TEGRA_GPCDMA_CHAN_ERR_STATUS 0x30
+#define TEGRA_GPCDMA_CHAN_ERR_TYPE_SHIFT 8
+#define TEGRA_GPCDMA_CHAN_ERR_TYPE_MASK 0xF
+#define TEGRA_GPCDMA_CHAN_ERR_TYPE(err) ( \
+ ((err) >> TEGRA_GPCDMA_CHAN_ERR_TYPE_SHIFT) & \
+ TEGRA_GPCDMA_CHAN_ERR_TYPE_MASK)
+#define TEGRA_DMA_BM_FIFO_FULL_ERR 0xF
+#define TEGRA_DMA_PERIPH_FIFO_FULL_ERR 0xE
+#define TEGRA_DMA_PERIPH_ID_ERR 0xD
+#define TEGRA_DMA_STREAM_ID_ERR 0xC
+#define TEGRA_DMA_MC_SLAVE_ERR 0xB
+#define TEGRA_DMA_MMIO_SLAVE_ERR 0xA
+
+/* Fixed Pattern */
+#define TEGRA_GPCDMA_CHAN_FIXED_PATTERN 0x34
+
+#define TEGRA_GPCDMA_CHAN_TZ 0x38
+#define TEGRA_GPCDMA_CHAN_TZ_MMIO_PROT_1 BIT(0)
+#define TEGRA_GPCDMA_CHAN_TZ_MC_PROT_1 BIT(1)
+
+#define TEGRA_GPCDMA_CHAN_SPARE 0x3c
+#define TEGRA_GPCDMA_CHAN_SPARE_EN_LEGACY_FC BIT(16)
+
+/*
+ * If any burst is in flight and DMA paused then this is the time to complete
+ * on-flight burst and update DMA status register.
+ */
+#define TEGRA_GPCDMA_BURST_COMPLETE_TIME 10
+#define TEGRA_GPCDMA_BURST_COMPLETION_TIMEOUT 5000 /* 5 msec */
+
+/* Channel base address offset from GPCDMA base address */
+#define TEGRA_GPCDMA_CHANNEL_BASE_ADD_OFFSET 0x20000
+
+struct tegra_dma;
+struct tegra_dma_channel;
+
+/*
+ * tegra_dma_chip_data Tegra chip specific DMA data
+ * @nr_channels: Number of channels available in the controller.
+ * @channel_reg_size: Channel register size.
+ * @max_dma_count: Maximum DMA transfer count supported by DMA controller.
+ * @hw_support_pause: DMA HW engine support pause of the channel.
+ */
+struct tegra_dma_chip_data {
+ bool hw_support_pause;
+ unsigned int nr_channels;
+ unsigned int channel_reg_size;
+ unsigned int max_dma_count;
+ int (*terminate)(struct tegra_dma_channel *tdc);
+};
+
+/* DMA channel registers */
+struct tegra_dma_channel_regs {
+ u32 csr;
+ u32 src_ptr;
+ u32 dst_ptr;
+ u32 high_addr_ptr;
+ u32 mc_seq;
+ u32 mmio_seq;
+ u32 wcount;
+ u32 fixed_pattern;
+};
+
+/*
+ * tegra_dma_sg_req: DMA request details to configure hardware. This
+ * contains the details for one transfer to configure DMA hw.
+ * The client's request for data transfer can be broken into multiple
+ * sub-transfer as per requester details and hw support. This sub transfer
+ * get added as an array in Tegra DMA desc which manages the transfer details.
+ */
+struct tegra_dma_sg_req {
+ unsigned int len;
+ struct tegra_dma_channel_regs ch_regs;
+};
+
+/*
+ * tegra_dma_desc: Tegra DMA descriptors which uses virt_dma_desc to
+ * manage client request and keep track of transfer status, callbacks
+ * and request counts etc.
+ */
+struct tegra_dma_desc {
+ bool cyclic;
+ unsigned int bytes_req;
+ unsigned int bytes_xfer;
+ unsigned int sg_idx;
+ unsigned int sg_count;
+ struct virt_dma_desc vd;
+ struct tegra_dma_channel *tdc;
+ struct tegra_dma_sg_req sg_req[];
+};
+
+/*
+ * tegra_dma_channel: Channel specific information
+ */
+struct tegra_dma_channel {
+ bool config_init;
+ char name[30];
+ enum dma_transfer_direction sid_dir;
+ int id;
+ int irq;
+ int slave_id;
+ struct tegra_dma *tdma;
+ struct virt_dma_chan vc;
+ struct tegra_dma_desc *dma_desc;
+ struct dma_slave_config dma_sconfig;
+ unsigned int stream_id;
+ unsigned long chan_base_offset;
+};
+
+/*
+ * tegra_dma: Tegra DMA specific information
+ */
+struct tegra_dma {
+ const struct tegra_dma_chip_data *chip_data;
+ unsigned long sid_m2d_reserved;
+ unsigned long sid_d2m_reserved;
+ void __iomem *base_addr;
+ struct device *dev;
+ struct dma_device dma_dev;
+ struct reset_control *rst;
+ struct tegra_dma_channel channels[];
+};
+
+static inline void tdc_write(struct tegra_dma_channel *tdc,
+ u32 reg, u32 val)
+{
+ writel_relaxed(val, tdc->tdma->base_addr + tdc->chan_base_offset + reg);
+}
+
+static inline u32 tdc_read(struct tegra_dma_channel *tdc, u32 reg)
+{
+ return readl_relaxed(tdc->tdma->base_addr + tdc->chan_base_offset + reg);
+}
+
+static inline struct tegra_dma_channel *to_tegra_dma_chan(struct dma_chan *dc)
+{
+ return container_of(dc, struct tegra_dma_channel, vc.chan);
+}
+
+static inline struct tegra_dma_desc *vd_to_tegra_dma_desc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct tegra_dma_desc, vd);
+}
+
+static inline struct device *tdc2dev(struct tegra_dma_channel *tdc)
+{
+ return tdc->vc.chan.device->dev;
+}
+
+static void tegra_dma_dump_chan_regs(struct tegra_dma_channel *tdc)
+{
+ dev_dbg(tdc2dev(tdc), "DMA Channel %d name %s register dump:\n",
+ tdc->id, tdc->name);
+ dev_dbg(tdc2dev(tdc), "CSR %x STA %x CSRE %x SRC %x DST %x\n",
+ tdc_read(tdc, TEGRA_GPCDMA_CHAN_CSR),
+ tdc_read(tdc, TEGRA_GPCDMA_CHAN_STATUS),
+ tdc_read(tdc, TEGRA_GPCDMA_CHAN_CSRE),
+ tdc_read(tdc, TEGRA_GPCDMA_CHAN_SRC_PTR),
+ tdc_read(tdc, TEGRA_GPCDMA_CHAN_DST_PTR)
+ );
+ dev_dbg(tdc2dev(tdc), "MCSEQ %x IOSEQ %x WCNT %x XFER %x BSTA %x\n",
+ tdc_read(tdc, TEGRA_GPCDMA_CHAN_MCSEQ),
+ tdc_read(tdc, TEGRA_GPCDMA_CHAN_MMIOSEQ),
+ tdc_read(tdc, TEGRA_GPCDMA_CHAN_WCOUNT),
+ tdc_read(tdc, TEGRA_GPCDMA_CHAN_XFER_COUNT),
+ tdc_read(tdc, TEGRA_GPCDMA_CHAN_DMA_BYTE_STATUS)
+ );
+ dev_dbg(tdc2dev(tdc), "DMA ERR_STA %x\n",
+ tdc_read(tdc, TEGRA_GPCDMA_CHAN_ERR_STATUS));
+}
+
+static int tegra_dma_sid_reserve(struct tegra_dma_channel *tdc,
+ enum dma_transfer_direction direction)
+{
+ struct tegra_dma *tdma = tdc->tdma;
+ int sid = tdc->slave_id;
+
+ if (!is_slave_direction(direction))
+ return 0;
+
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+ if (test_and_set_bit(sid, &tdma->sid_m2d_reserved)) {
+ dev_err(tdma->dev, "slave id already in use\n");
+ return -EINVAL;
+ }
+ break;
+ case DMA_DEV_TO_MEM:
+ if (test_and_set_bit(sid, &tdma->sid_d2m_reserved)) {
+ dev_err(tdma->dev, "slave id already in use\n");
+ return -EINVAL;
+ }
+ break;
+ default:
+ break;
+ }
+
+ tdc->sid_dir = direction;
+
+ return 0;
+}
+
+static void tegra_dma_sid_free(struct tegra_dma_channel *tdc)
+{
+ struct tegra_dma *tdma = tdc->tdma;
+ int sid = tdc->slave_id;
+
+ switch (tdc->sid_dir) {
+ case DMA_MEM_TO_DEV:
+ clear_bit(sid, &tdma->sid_m2d_reserved);
+ break;
+ case DMA_DEV_TO_MEM:
+ clear_bit(sid, &tdma->sid_d2m_reserved);
+ break;
+ default:
+ break;
+ }
+
+ tdc->sid_dir = DMA_TRANS_NONE;
+}
+
+static void tegra_dma_desc_free(struct virt_dma_desc *vd)
+{
+ kfree(container_of(vd, struct tegra_dma_desc, vd));
+}
+
+static int tegra_dma_slave_config(struct dma_chan *dc,
+ struct dma_slave_config *sconfig)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+
+ memcpy(&tdc->dma_sconfig, sconfig, sizeof(*sconfig));
+ tdc->config_init = true;
+
+ return 0;
+}
+
+static int tegra_dma_pause(struct tegra_dma_channel *tdc)
+{
+ int ret;
+ u32 val;
+
+ val = tdc_read(tdc, TEGRA_GPCDMA_CHAN_CSRE);
+ val |= TEGRA_GPCDMA_CHAN_CSRE_PAUSE;
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSRE, val);
+
+ /* Wait until busy bit is de-asserted */
+ ret = readl_relaxed_poll_timeout_atomic(tdc->tdma->base_addr +
+ tdc->chan_base_offset + TEGRA_GPCDMA_CHAN_STATUS,
+ val,
+ !(val & TEGRA_GPCDMA_STATUS_BUSY),
+ TEGRA_GPCDMA_BURST_COMPLETE_TIME,
+ TEGRA_GPCDMA_BURST_COMPLETION_TIMEOUT);
+
+ if (ret) {
+ dev_err(tdc2dev(tdc), "DMA pause timed out\n");
+ tegra_dma_dump_chan_regs(tdc);
+ }
+
+ return ret;
+}
+
+static int tegra_dma_device_pause(struct dma_chan *dc)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ unsigned long flags;
+ int ret;
+
+ if (!tdc->tdma->chip_data->hw_support_pause)
+ return -ENOSYS;
+
+ spin_lock_irqsave(&tdc->vc.lock, flags);
+ ret = tegra_dma_pause(tdc);
+ spin_unlock_irqrestore(&tdc->vc.lock, flags);
+
+ return ret;
+}
+
+static void tegra_dma_resume(struct tegra_dma_channel *tdc)
+{
+ u32 val;
+
+ val = tdc_read(tdc, TEGRA_GPCDMA_CHAN_CSRE);
+ val &= ~TEGRA_GPCDMA_CHAN_CSRE_PAUSE;
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSRE, val);
+}
+
+static int tegra_dma_device_resume(struct dma_chan *dc)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ unsigned long flags;
+
+ if (!tdc->tdma->chip_data->hw_support_pause)
+ return -ENOSYS;
+
+ spin_lock_irqsave(&tdc->vc.lock, flags);
+ tegra_dma_resume(tdc);
+ spin_unlock_irqrestore(&tdc->vc.lock, flags);
+
+ return 0;
+}
+
+static inline int tegra_dma_pause_noerr(struct tegra_dma_channel *tdc)
+{
+ /* Return 0 irrespective of PAUSE status.
+ * This is useful to recover channels that can exit out of flush
+ * state when the channel is disabled.
+ */
+
+ tegra_dma_pause(tdc);
+ return 0;
+}
+
+static void tegra_dma_disable(struct tegra_dma_channel *tdc)
+{
+ u32 csr, status;
+
+ csr = tdc_read(tdc, TEGRA_GPCDMA_CHAN_CSR);
+
+ /* Disable interrupts */
+ csr &= ~TEGRA_GPCDMA_CSR_IE_EOC;
+
+ /* Disable DMA */
+ csr &= ~TEGRA_GPCDMA_CSR_ENB;
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSR, csr);
+
+ /* Clear interrupt status if it is there */
+ status = tdc_read(tdc, TEGRA_GPCDMA_CHAN_STATUS);
+ if (status & TEGRA_GPCDMA_STATUS_ISE_EOC) {
+ dev_dbg(tdc2dev(tdc), "%s():clearing interrupt\n", __func__);
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_STATUS, status);
+ }
+}
+
+static void tegra_dma_configure_next_sg(struct tegra_dma_channel *tdc)
+{
+ struct tegra_dma_desc *dma_desc = tdc->dma_desc;
+ struct tegra_dma_channel_regs *ch_regs;
+ int ret;
+ u32 val;
+
+ dma_desc->sg_idx++;
+
+ /* Reset the sg index for cyclic transfers */
+ if (dma_desc->sg_idx == dma_desc->sg_count)
+ dma_desc->sg_idx = 0;
+
+ /* Configure next transfer immediately after DMA is busy */
+ ret = readl_relaxed_poll_timeout_atomic(tdc->tdma->base_addr +
+ tdc->chan_base_offset + TEGRA_GPCDMA_CHAN_STATUS,
+ val,
+ (val & TEGRA_GPCDMA_STATUS_BUSY), 0,
+ TEGRA_GPCDMA_BURST_COMPLETION_TIMEOUT);
+ if (ret)
+ return;
+
+ ch_regs = &dma_desc->sg_req[dma_desc->sg_idx].ch_regs;
+
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_WCOUNT, ch_regs->wcount);
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_SRC_PTR, ch_regs->src_ptr);
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_DST_PTR, ch_regs->dst_ptr);
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_HIGH_ADDR_PTR, ch_regs->high_addr_ptr);
+
+ /* Start DMA */
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSR,
+ ch_regs->csr | TEGRA_GPCDMA_CSR_ENB);
+}
+
+static void tegra_dma_start(struct tegra_dma_channel *tdc)
+{
+ struct tegra_dma_desc *dma_desc = tdc->dma_desc;
+ struct tegra_dma_channel_regs *ch_regs;
+ struct virt_dma_desc *vdesc;
+
+ if (!dma_desc) {
+ vdesc = vchan_next_desc(&tdc->vc);
+ if (!vdesc)
+ return;
+
+ dma_desc = vd_to_tegra_dma_desc(vdesc);
+ list_del(&vdesc->node);
+ dma_desc->tdc = tdc;
+ tdc->dma_desc = dma_desc;
+
+ tegra_dma_resume(tdc);
+ }
+
+ ch_regs = &dma_desc->sg_req[dma_desc->sg_idx].ch_regs;
+
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_WCOUNT, ch_regs->wcount);
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSR, 0);
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_SRC_PTR, ch_regs->src_ptr);
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_DST_PTR, ch_regs->dst_ptr);
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_HIGH_ADDR_PTR, ch_regs->high_addr_ptr);
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_FIXED_PATTERN, ch_regs->fixed_pattern);
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_MMIOSEQ, ch_regs->mmio_seq);
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_MCSEQ, ch_regs->mc_seq);
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSR, ch_regs->csr);
+
+ /* Start DMA */
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSR,
+ ch_regs->csr | TEGRA_GPCDMA_CSR_ENB);
+}
+
+static void tegra_dma_xfer_complete(struct tegra_dma_channel *tdc)
+{
+ vchan_cookie_complete(&tdc->dma_desc->vd);
+
+ tegra_dma_sid_free(tdc);
+ tdc->dma_desc = NULL;
+}
+
+static void tegra_dma_chan_decode_error(struct tegra_dma_channel *tdc,
+ unsigned int err_status)
+{
+ switch (TEGRA_GPCDMA_CHAN_ERR_TYPE(err_status)) {
+ case TEGRA_DMA_BM_FIFO_FULL_ERR:
+ dev_err(tdc->tdma->dev,
+ "GPCDMA CH%d bm fifo full\n", tdc->id);
+ break;
+
+ case TEGRA_DMA_PERIPH_FIFO_FULL_ERR:
+ dev_err(tdc->tdma->dev,
+ "GPCDMA CH%d peripheral fifo full\n", tdc->id);
+ break;
+
+ case TEGRA_DMA_PERIPH_ID_ERR:
+ dev_err(tdc->tdma->dev,
+ "GPCDMA CH%d illegal peripheral id\n", tdc->id);
+ break;
+
+ case TEGRA_DMA_STREAM_ID_ERR:
+ dev_err(tdc->tdma->dev,
+ "GPCDMA CH%d illegal stream id\n", tdc->id);
+ break;
+
+ case TEGRA_DMA_MC_SLAVE_ERR:
+ dev_err(tdc->tdma->dev,
+ "GPCDMA CH%d mc slave error\n", tdc->id);
+ break;
+
+ case TEGRA_DMA_MMIO_SLAVE_ERR:
+ dev_err(tdc->tdma->dev,
+ "GPCDMA CH%d mmio slave error\n", tdc->id);
+ break;
+
+ default:
+ dev_err(tdc->tdma->dev,
+ "GPCDMA CH%d security violation %x\n", tdc->id,
+ err_status);
+ }
+}
+
+static irqreturn_t tegra_dma_isr(int irq, void *dev_id)
+{
+ struct tegra_dma_channel *tdc = dev_id;
+ struct tegra_dma_desc *dma_desc = tdc->dma_desc;
+ struct tegra_dma_sg_req *sg_req;
+ u32 status;
+
+ /* Check channel error status register */
+ status = tdc_read(tdc, TEGRA_GPCDMA_CHAN_ERR_STATUS);
+ if (status) {
+ tegra_dma_chan_decode_error(tdc, status);
+ tegra_dma_dump_chan_regs(tdc);
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_ERR_STATUS, 0xFFFFFFFF);
+ }
+
+ spin_lock(&tdc->vc.lock);
+ status = tdc_read(tdc, TEGRA_GPCDMA_CHAN_STATUS);
+ if (!(status & TEGRA_GPCDMA_STATUS_ISE_EOC))
+ goto irq_done;
+
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_STATUS,
+ TEGRA_GPCDMA_STATUS_ISE_EOC);
+
+ if (!dma_desc)
+ goto irq_done;
+
+ sg_req = dma_desc->sg_req;
+ dma_desc->bytes_xfer += sg_req[dma_desc->sg_idx].len;
+
+ if (dma_desc->cyclic) {
+ vchan_cyclic_callback(&dma_desc->vd);
+ tegra_dma_configure_next_sg(tdc);
+ } else {
+ dma_desc->sg_idx++;
+ if (dma_desc->sg_idx == dma_desc->sg_count)
+ tegra_dma_xfer_complete(tdc);
+ else
+ tegra_dma_start(tdc);
+ }
+
+irq_done:
+ spin_unlock(&tdc->vc.lock);
+ return IRQ_HANDLED;
+}
+
+static void tegra_dma_issue_pending(struct dma_chan *dc)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ unsigned long flags;
+
+ if (tdc->dma_desc)
+ return;
+
+ spin_lock_irqsave(&tdc->vc.lock, flags);
+ if (vchan_issue_pending(&tdc->vc))
+ tegra_dma_start(tdc);
+
+ /*
+ * For cyclic DMA transfers, program the second
+ * transfer parameters as soon as the first DMA
+ * transfer is started inorder for the DMA
+ * controller to trigger the second transfer
+ * with the correct parameters.
+ */
+ if (tdc->dma_desc && tdc->dma_desc->cyclic)
+ tegra_dma_configure_next_sg(tdc);
+
+ spin_unlock_irqrestore(&tdc->vc.lock, flags);
+}
+
+static int tegra_dma_stop_client(struct tegra_dma_channel *tdc)
+{
+ int ret;
+ u32 status, csr;
+
+ /*
+ * Change the client associated with the DMA channel
+ * to stop DMA engine from starting any more bursts for
+ * the given client and wait for in flight bursts to complete
+ */
+ csr = tdc_read(tdc, TEGRA_GPCDMA_CHAN_CSR);
+ csr &= ~(TEGRA_GPCDMA_CSR_REQ_SEL_MASK);
+ csr |= TEGRA_GPCDMA_CSR_REQ_SEL_UNUSED;
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSR, csr);
+
+ /* Wait for in flight data transfer to finish */
+ udelay(TEGRA_GPCDMA_BURST_COMPLETE_TIME);
+
+ /* If TX/RX path is still active wait till it becomes
+ * inactive
+ */
+
+ ret = readl_relaxed_poll_timeout_atomic(tdc->tdma->base_addr +
+ tdc->chan_base_offset +
+ TEGRA_GPCDMA_CHAN_STATUS,
+ status,
+ !(status & (TEGRA_GPCDMA_STATUS_CHANNEL_TX |
+ TEGRA_GPCDMA_STATUS_CHANNEL_RX)),
+ 5,
+ TEGRA_GPCDMA_BURST_COMPLETION_TIMEOUT);
+ if (ret) {
+ dev_err(tdc2dev(tdc), "Timeout waiting for DMA burst completion!\n");
+ tegra_dma_dump_chan_regs(tdc);
+ }
+
+ return ret;
+}
+
+static int tegra_dma_terminate_all(struct dma_chan *dc)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ unsigned long flags;
+ LIST_HEAD(head);
+ int err;
+
+ spin_lock_irqsave(&tdc->vc.lock, flags);
+
+ if (tdc->dma_desc) {
+ err = tdc->tdma->chip_data->terminate(tdc);
+ if (err) {
+ spin_unlock_irqrestore(&tdc->vc.lock, flags);
+ return err;
+ }
+
+ vchan_terminate_vdesc(&tdc->dma_desc->vd);
+ tegra_dma_disable(tdc);
+ tdc->dma_desc = NULL;
+ }
+
+ tegra_dma_sid_free(tdc);
+ vchan_get_all_descriptors(&tdc->vc, &head);
+ spin_unlock_irqrestore(&tdc->vc.lock, flags);
+
+ vchan_dma_desc_free_list(&tdc->vc, &head);
+
+ return 0;
+}
+
+static int tegra_dma_get_residual(struct tegra_dma_channel *tdc)
+{
+ struct tegra_dma_desc *dma_desc = tdc->dma_desc;
+ struct tegra_dma_sg_req *sg_req = dma_desc->sg_req;
+ unsigned int bytes_xfer, residual;
+ u32 wcount = 0, status;
+
+ wcount = tdc_read(tdc, TEGRA_GPCDMA_CHAN_XFER_COUNT);
+
+ /*
+ * Set wcount = 0 if EOC bit is set. The transfer would have
+ * already completed and the CHAN_XFER_COUNT could have updated
+ * for the next transfer, specifically in case of cyclic transfers.
+ */
+ status = tdc_read(tdc, TEGRA_GPCDMA_CHAN_STATUS);
+ if (status & TEGRA_GPCDMA_STATUS_ISE_EOC)
+ wcount = 0;
+
+ bytes_xfer = dma_desc->bytes_xfer +
+ sg_req[dma_desc->sg_idx].len - (wcount * 4);
+
+ residual = dma_desc->bytes_req - (bytes_xfer % dma_desc->bytes_req);
+
+ return residual;
+}
+
+static enum dma_status tegra_dma_tx_status(struct dma_chan *dc,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ struct tegra_dma_desc *dma_desc;
+ struct virt_dma_desc *vd;
+ unsigned int residual;
+ unsigned long flags;
+ enum dma_status ret;
+
+ ret = dma_cookie_status(dc, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ spin_lock_irqsave(&tdc->vc.lock, flags);
+ vd = vchan_find_desc(&tdc->vc, cookie);
+ if (vd) {
+ dma_desc = vd_to_tegra_dma_desc(vd);
+ residual = dma_desc->bytes_req;
+ dma_set_residue(txstate, residual);
+ } else if (tdc->dma_desc && tdc->dma_desc->vd.tx.cookie == cookie) {
+ residual = tegra_dma_get_residual(tdc);
+ dma_set_residue(txstate, residual);
+ } else {
+ dev_err(tdc2dev(tdc), "cookie %d is not found\n", cookie);
+ }
+ spin_unlock_irqrestore(&tdc->vc.lock, flags);
+
+ return ret;
+}
+
+static inline int get_bus_width(struct tegra_dma_channel *tdc,
+ enum dma_slave_buswidth slave_bw)
+{
+ switch (slave_bw) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ return TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH_8;
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ return TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH_16;
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ return TEGRA_GPCDMA_MMIOSEQ_BUS_WIDTH_32;
+ default:
+ dev_err(tdc2dev(tdc), "given slave bus width is not supported\n");
+ return -EINVAL;
+ }
+}
+
+static unsigned int get_burst_size(struct tegra_dma_channel *tdc,
+ u32 burst_size, enum dma_slave_buswidth slave_bw,
+ int len)
+{
+ unsigned int burst_mmio_width, burst_byte;
+
+ /*
+ * burst_size from client is in terms of the bus_width.
+ * convert that into words.
+ * If burst_size is not specified from client, then use
+ * len to calculate the optimum burst size
+ */
+ burst_byte = burst_size ? burst_size * slave_bw : len;
+ burst_mmio_width = burst_byte / 4;
+
+ if (burst_mmio_width < TEGRA_GPCDMA_MMIOSEQ_BURST_MIN)
+ return 0;
+
+ burst_mmio_width = min(burst_mmio_width, TEGRA_GPCDMA_MMIOSEQ_BURST_MAX);
+
+ return TEGRA_GPCDMA_MMIOSEQ_BURST(burst_mmio_width);
+}
+
+static int get_transfer_param(struct tegra_dma_channel *tdc,
+ enum dma_transfer_direction direction,
+ u32 *apb_addr,
+ u32 *mmio_seq,
+ u32 *csr,
+ unsigned int *burst_size,
+ enum dma_slave_buswidth *slave_bw)
+{
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+ *apb_addr = tdc->dma_sconfig.dst_addr;
+ *mmio_seq = get_bus_width(tdc, tdc->dma_sconfig.dst_addr_width);
+ *burst_size = tdc->dma_sconfig.dst_maxburst;
+ *slave_bw = tdc->dma_sconfig.dst_addr_width;
+ *csr = TEGRA_GPCDMA_CSR_DMA_MEM2IO_FC;
+ return 0;
+ case DMA_DEV_TO_MEM:
+ *apb_addr = tdc->dma_sconfig.src_addr;
+ *mmio_seq = get_bus_width(tdc, tdc->dma_sconfig.src_addr_width);
+ *burst_size = tdc->dma_sconfig.src_maxburst;
+ *slave_bw = tdc->dma_sconfig.src_addr_width;
+ *csr = TEGRA_GPCDMA_CSR_DMA_IO2MEM_FC;
+ return 0;
+ default:
+ dev_err(tdc2dev(tdc), "DMA direction is not supported\n");
+ }
+
+ return -EINVAL;
+}
+
+static struct dma_async_tx_descriptor *
+tegra_dma_prep_dma_memset(struct dma_chan *dc, dma_addr_t dest, int value,
+ size_t len, unsigned long flags)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ unsigned int max_dma_count = tdc->tdma->chip_data->max_dma_count;
+ struct tegra_dma_sg_req *sg_req;
+ struct tegra_dma_desc *dma_desc;
+ u32 csr, mc_seq;
+
+ if ((len & 3) || (dest & 3) || len > max_dma_count) {
+ dev_err(tdc2dev(tdc),
+ "DMA length/memory address is not supported\n");
+ return NULL;
+ }
+
+ /* Set DMA mode to fixed pattern */
+ csr = TEGRA_GPCDMA_CSR_DMA_FIXED_PAT;
+ /* Enable once or continuous mode */
+ csr |= TEGRA_GPCDMA_CSR_ONCE;
+ /* Enable IRQ mask */
+ csr |= TEGRA_GPCDMA_CSR_IRQ_MASK;
+ /* Enable the DMA interrupt */
+ if (flags & DMA_PREP_INTERRUPT)
+ csr |= TEGRA_GPCDMA_CSR_IE_EOC;
+ /* Configure default priority weight for the channel */
+ csr |= FIELD_PREP(TEGRA_GPCDMA_CSR_WEIGHT, 1);
+
+ mc_seq = tdc_read(tdc, TEGRA_GPCDMA_CHAN_MCSEQ);
+ /* retain stream-id and clean rest */
+ mc_seq &= TEGRA_GPCDMA_MCSEQ_STREAM_ID0_MASK;
+
+ /* Set the address wrapping */
+ mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_WRAP0,
+ TEGRA_GPCDMA_MCSEQ_WRAP_NONE);
+ mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_WRAP1,
+ TEGRA_GPCDMA_MCSEQ_WRAP_NONE);
+
+ /* Program outstanding MC requests */
+ mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_REQ_COUNT, 1);
+ /* Set burst size */
+ mc_seq |= TEGRA_GPCDMA_MCSEQ_BURST_16;
+
+ dma_desc = kzalloc(struct_size(dma_desc, sg_req, 1), GFP_NOWAIT);
+ if (!dma_desc)
+ return NULL;
+
+ dma_desc->bytes_req = len;
+ dma_desc->sg_count = 1;
+ sg_req = dma_desc->sg_req;
+
+ sg_req[0].ch_regs.src_ptr = 0;
+ sg_req[0].ch_regs.dst_ptr = dest;
+ sg_req[0].ch_regs.high_addr_ptr =
+ FIELD_PREP(TEGRA_GPCDMA_HIGH_ADDR_DST_PTR, (dest >> 32));
+ sg_req[0].ch_regs.fixed_pattern = value;
+ /* Word count reg takes value as (N +1) words */
+ sg_req[0].ch_regs.wcount = ((len - 4) >> 2);
+ sg_req[0].ch_regs.csr = csr;
+ sg_req[0].ch_regs.mmio_seq = 0;
+ sg_req[0].ch_regs.mc_seq = mc_seq;
+ sg_req[0].len = len;
+
+ dma_desc->cyclic = false;
+ return vchan_tx_prep(&tdc->vc, &dma_desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+tegra_dma_prep_dma_memcpy(struct dma_chan *dc, dma_addr_t dest,
+ dma_addr_t src, size_t len, unsigned long flags)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ struct tegra_dma_sg_req *sg_req;
+ struct tegra_dma_desc *dma_desc;
+ unsigned int max_dma_count;
+ u32 csr, mc_seq;
+
+ max_dma_count = tdc->tdma->chip_data->max_dma_count;
+ if ((len & 3) || (src & 3) || (dest & 3) || len > max_dma_count) {
+ dev_err(tdc2dev(tdc),
+ "DMA length/memory address is not supported\n");
+ return NULL;
+ }
+
+ /* Set DMA mode to memory to memory transfer */
+ csr = TEGRA_GPCDMA_CSR_DMA_MEM2MEM;
+ /* Enable once or continuous mode */
+ csr |= TEGRA_GPCDMA_CSR_ONCE;
+ /* Enable IRQ mask */
+ csr |= TEGRA_GPCDMA_CSR_IRQ_MASK;
+ /* Enable the DMA interrupt */
+ if (flags & DMA_PREP_INTERRUPT)
+ csr |= TEGRA_GPCDMA_CSR_IE_EOC;
+ /* Configure default priority weight for the channel */
+ csr |= FIELD_PREP(TEGRA_GPCDMA_CSR_WEIGHT, 1);
+
+ mc_seq = tdc_read(tdc, TEGRA_GPCDMA_CHAN_MCSEQ);
+ /* retain stream-id and clean rest */
+ mc_seq &= (TEGRA_GPCDMA_MCSEQ_STREAM_ID0_MASK) |
+ (TEGRA_GPCDMA_MCSEQ_STREAM_ID1_MASK);
+
+ /* Set the address wrapping */
+ mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_WRAP0,
+ TEGRA_GPCDMA_MCSEQ_WRAP_NONE);
+ mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_WRAP1,
+ TEGRA_GPCDMA_MCSEQ_WRAP_NONE);
+
+ /* Program outstanding MC requests */
+ mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_REQ_COUNT, 1);
+ /* Set burst size */
+ mc_seq |= TEGRA_GPCDMA_MCSEQ_BURST_16;
+
+ dma_desc = kzalloc(struct_size(dma_desc, sg_req, 1), GFP_NOWAIT);
+ if (!dma_desc)
+ return NULL;
+
+ dma_desc->bytes_req = len;
+ dma_desc->sg_count = 1;
+ sg_req = dma_desc->sg_req;
+
+ sg_req[0].ch_regs.src_ptr = src;
+ sg_req[0].ch_regs.dst_ptr = dest;
+ sg_req[0].ch_regs.high_addr_ptr =
+ FIELD_PREP(TEGRA_GPCDMA_HIGH_ADDR_SRC_PTR, (src >> 32));
+ sg_req[0].ch_regs.high_addr_ptr |=
+ FIELD_PREP(TEGRA_GPCDMA_HIGH_ADDR_DST_PTR, (dest >> 32));
+ /* Word count reg takes value as (N +1) words */
+ sg_req[0].ch_regs.wcount = ((len - 4) >> 2);
+ sg_req[0].ch_regs.csr = csr;
+ sg_req[0].ch_regs.mmio_seq = 0;
+ sg_req[0].ch_regs.mc_seq = mc_seq;
+ sg_req[0].len = len;
+
+ dma_desc->cyclic = false;
+ return vchan_tx_prep(&tdc->vc, &dma_desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+tegra_dma_prep_slave_sg(struct dma_chan *dc, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ unsigned int max_dma_count = tdc->tdma->chip_data->max_dma_count;
+ enum dma_slave_buswidth slave_bw = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+ u32 csr, mc_seq, apb_ptr = 0, mmio_seq = 0;
+ struct tegra_dma_sg_req *sg_req;
+ struct tegra_dma_desc *dma_desc;
+ struct scatterlist *sg;
+ u32 burst_size;
+ unsigned int i;
+ int ret;
+
+ if (!tdc->config_init) {
+ dev_err(tdc2dev(tdc), "DMA channel is not configured\n");
+ return NULL;
+ }
+ if (sg_len < 1) {
+ dev_err(tdc2dev(tdc), "Invalid segment length %d\n", sg_len);
+ return NULL;
+ }
+
+ ret = tegra_dma_sid_reserve(tdc, direction);
+ if (ret)
+ return NULL;
+
+ ret = get_transfer_param(tdc, direction, &apb_ptr, &mmio_seq, &csr,
+ &burst_size, &slave_bw);
+ if (ret < 0)
+ return NULL;
+
+ /* Enable once or continuous mode */
+ csr |= TEGRA_GPCDMA_CSR_ONCE;
+ /* Program the slave id in requestor select */
+ csr |= FIELD_PREP(TEGRA_GPCDMA_CSR_REQ_SEL_MASK, tdc->slave_id);
+ /* Enable IRQ mask */
+ csr |= TEGRA_GPCDMA_CSR_IRQ_MASK;
+ /* Configure default priority weight for the channel*/
+ csr |= FIELD_PREP(TEGRA_GPCDMA_CSR_WEIGHT, 1);
+
+ /* Enable the DMA interrupt */
+ if (flags & DMA_PREP_INTERRUPT)
+ csr |= TEGRA_GPCDMA_CSR_IE_EOC;
+
+ mc_seq = tdc_read(tdc, TEGRA_GPCDMA_CHAN_MCSEQ);
+ /* retain stream-id and clean rest */
+ mc_seq &= TEGRA_GPCDMA_MCSEQ_STREAM_ID0_MASK;
+
+ /* Set the address wrapping on both MC and MMIO side */
+
+ mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_WRAP0,
+ TEGRA_GPCDMA_MCSEQ_WRAP_NONE);
+ mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_WRAP1,
+ TEGRA_GPCDMA_MCSEQ_WRAP_NONE);
+ mmio_seq |= FIELD_PREP(TEGRA_GPCDMA_MMIOSEQ_WRAP_WORD, 1);
+
+ /* Program 2 MC outstanding requests by default. */
+ mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_REQ_COUNT, 1);
+
+ /* Setting MC burst size depending on MMIO burst size */
+ if (burst_size == 64)
+ mc_seq |= TEGRA_GPCDMA_MCSEQ_BURST_16;
+ else
+ mc_seq |= TEGRA_GPCDMA_MCSEQ_BURST_2;
+
+ dma_desc = kzalloc(struct_size(dma_desc, sg_req, sg_len), GFP_NOWAIT);
+ if (!dma_desc)
+ return NULL;
+
+ dma_desc->sg_count = sg_len;
+ sg_req = dma_desc->sg_req;
+
+ /* Make transfer requests */
+ for_each_sg(sgl, sg, sg_len, i) {
+ u32 len;
+ dma_addr_t mem;
+
+ mem = sg_dma_address(sg);
+ len = sg_dma_len(sg);
+
+ if ((len & 3) || (mem & 3) || len > max_dma_count) {
+ dev_err(tdc2dev(tdc),
+ "DMA length/memory address is not supported\n");
+ kfree(dma_desc);
+ return NULL;
+ }
+
+ mmio_seq |= get_burst_size(tdc, burst_size, slave_bw, len);
+ dma_desc->bytes_req += len;
+
+ if (direction == DMA_MEM_TO_DEV) {
+ sg_req[i].ch_regs.src_ptr = mem;
+ sg_req[i].ch_regs.dst_ptr = apb_ptr;
+ sg_req[i].ch_regs.high_addr_ptr =
+ FIELD_PREP(TEGRA_GPCDMA_HIGH_ADDR_SRC_PTR, (mem >> 32));
+ } else if (direction == DMA_DEV_TO_MEM) {
+ sg_req[i].ch_regs.src_ptr = apb_ptr;
+ sg_req[i].ch_regs.dst_ptr = mem;
+ sg_req[i].ch_regs.high_addr_ptr =
+ FIELD_PREP(TEGRA_GPCDMA_HIGH_ADDR_DST_PTR, (mem >> 32));
+ }
+
+ /*
+ * Word count register takes input in words. Writing a value
+ * of N into word count register means a req of (N+1) words.
+ */
+ sg_req[i].ch_regs.wcount = ((len - 4) >> 2);
+ sg_req[i].ch_regs.csr = csr;
+ sg_req[i].ch_regs.mmio_seq = mmio_seq;
+ sg_req[i].ch_regs.mc_seq = mc_seq;
+ sg_req[i].len = len;
+ }
+
+ dma_desc->cyclic = false;
+ return vchan_tx_prep(&tdc->vc, &dma_desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+tegra_dma_prep_dma_cyclic(struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ enum dma_slave_buswidth slave_bw = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+ u32 csr, mc_seq, apb_ptr = 0, mmio_seq = 0, burst_size;
+ unsigned int max_dma_count, len, period_count, i;
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ struct tegra_dma_desc *dma_desc;
+ struct tegra_dma_sg_req *sg_req;
+ dma_addr_t mem = buf_addr;
+ int ret;
+
+ if (!buf_len || !period_len) {
+ dev_err(tdc2dev(tdc), "Invalid buffer/period len\n");
+ return NULL;
+ }
+
+ if (!tdc->config_init) {
+ dev_err(tdc2dev(tdc), "DMA slave is not configured\n");
+ return NULL;
+ }
+
+ ret = tegra_dma_sid_reserve(tdc, direction);
+ if (ret)
+ return NULL;
+
+ /*
+ * We only support cycle transfer when buf_len is multiple of
+ * period_len.
+ */
+ if (buf_len % period_len) {
+ dev_err(tdc2dev(tdc), "buf_len is not multiple of period_len\n");
+ return NULL;
+ }
+
+ len = period_len;
+ max_dma_count = tdc->tdma->chip_data->max_dma_count;
+ if ((len & 3) || (buf_addr & 3) || len > max_dma_count) {
+ dev_err(tdc2dev(tdc), "Req len/mem address is not correct\n");
+ return NULL;
+ }
+
+ ret = get_transfer_param(tdc, direction, &apb_ptr, &mmio_seq, &csr,
+ &burst_size, &slave_bw);
+ if (ret < 0)
+ return NULL;
+
+ /* Enable once or continuous mode */
+ csr &= ~TEGRA_GPCDMA_CSR_ONCE;
+ /* Program the slave id in requestor select */
+ csr |= FIELD_PREP(TEGRA_GPCDMA_CSR_REQ_SEL_MASK, tdc->slave_id);
+ /* Enable IRQ mask */
+ csr |= TEGRA_GPCDMA_CSR_IRQ_MASK;
+ /* Configure default priority weight for the channel*/
+ csr |= FIELD_PREP(TEGRA_GPCDMA_CSR_WEIGHT, 1);
+
+ /* Enable the DMA interrupt */
+ if (flags & DMA_PREP_INTERRUPT)
+ csr |= TEGRA_GPCDMA_CSR_IE_EOC;
+
+ mmio_seq |= FIELD_PREP(TEGRA_GPCDMA_MMIOSEQ_WRAP_WORD, 1);
+
+ mc_seq = tdc_read(tdc, TEGRA_GPCDMA_CHAN_MCSEQ);
+ /* retain stream-id and clean rest */
+ mc_seq &= TEGRA_GPCDMA_MCSEQ_STREAM_ID0_MASK;
+
+ /* Set the address wrapping on both MC and MMIO side */
+ mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_WRAP0,
+ TEGRA_GPCDMA_MCSEQ_WRAP_NONE);
+ mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_WRAP1,
+ TEGRA_GPCDMA_MCSEQ_WRAP_NONE);
+
+ /* Program 2 MC outstanding requests by default. */
+ mc_seq |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_REQ_COUNT, 1);
+ /* Setting MC burst size depending on MMIO burst size */
+ if (burst_size == 64)
+ mc_seq |= TEGRA_GPCDMA_MCSEQ_BURST_16;
+ else
+ mc_seq |= TEGRA_GPCDMA_MCSEQ_BURST_2;
+
+ period_count = buf_len / period_len;
+ dma_desc = kzalloc(struct_size(dma_desc, sg_req, period_count),
+ GFP_NOWAIT);
+ if (!dma_desc)
+ return NULL;
+
+ dma_desc->bytes_req = buf_len;
+ dma_desc->sg_count = period_count;
+ sg_req = dma_desc->sg_req;
+
+ /* Split transfer equal to period size */
+ for (i = 0; i < period_count; i++) {
+ mmio_seq |= get_burst_size(tdc, burst_size, slave_bw, len);
+ if (direction == DMA_MEM_TO_DEV) {
+ sg_req[i].ch_regs.src_ptr = mem;
+ sg_req[i].ch_regs.dst_ptr = apb_ptr;
+ sg_req[i].ch_regs.high_addr_ptr =
+ FIELD_PREP(TEGRA_GPCDMA_HIGH_ADDR_SRC_PTR, (mem >> 32));
+ } else if (direction == DMA_DEV_TO_MEM) {
+ sg_req[i].ch_regs.src_ptr = apb_ptr;
+ sg_req[i].ch_regs.dst_ptr = mem;
+ sg_req[i].ch_regs.high_addr_ptr =
+ FIELD_PREP(TEGRA_GPCDMA_HIGH_ADDR_DST_PTR, (mem >> 32));
+ }
+ /*
+ * Word count register takes input in words. Writing a value
+ * of N into word count register means a req of (N+1) words.
+ */
+ sg_req[i].ch_regs.wcount = ((len - 4) >> 2);
+ sg_req[i].ch_regs.csr = csr;
+ sg_req[i].ch_regs.mmio_seq = mmio_seq;
+ sg_req[i].ch_regs.mc_seq = mc_seq;
+ sg_req[i].len = len;
+
+ mem += len;
+ }
+
+ dma_desc->cyclic = true;
+
+ return vchan_tx_prep(&tdc->vc, &dma_desc->vd, flags);
+}
+
+static int tegra_dma_alloc_chan_resources(struct dma_chan *dc)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ int ret;
+
+ ret = request_irq(tdc->irq, tegra_dma_isr, 0, tdc->name, tdc);
+ if (ret) {
+ dev_err(tdc2dev(tdc), "request_irq failed for %s\n", tdc->name);
+ return ret;
+ }
+
+ dma_cookie_init(&tdc->vc.chan);
+ tdc->config_init = false;
+ return 0;
+}
+
+static void tegra_dma_chan_synchronize(struct dma_chan *dc)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+
+ synchronize_irq(tdc->irq);
+ vchan_synchronize(&tdc->vc);
+}
+
+static void tegra_dma_free_chan_resources(struct dma_chan *dc)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+
+ dev_dbg(tdc2dev(tdc), "Freeing channel %d\n", tdc->id);
+
+ tegra_dma_terminate_all(dc);
+ synchronize_irq(tdc->irq);
+
+ tasklet_kill(&tdc->vc.task);
+ tdc->config_init = false;
+ tdc->slave_id = -1;
+ tdc->sid_dir = DMA_TRANS_NONE;
+ free_irq(tdc->irq, tdc);
+
+ vchan_free_chan_resources(&tdc->vc);
+}
+
+static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct tegra_dma *tdma = ofdma->of_dma_data;
+ struct tegra_dma_channel *tdc;
+ struct dma_chan *chan;
+
+ chan = dma_get_any_slave_channel(&tdma->dma_dev);
+ if (!chan)
+ return NULL;
+
+ tdc = to_tegra_dma_chan(chan);
+ tdc->slave_id = dma_spec->args[0];
+
+ return chan;
+}
+
+static const struct tegra_dma_chip_data tegra186_dma_chip_data = {
+ .nr_channels = 31,
+ .channel_reg_size = SZ_64K,
+ .max_dma_count = SZ_1G,
+ .hw_support_pause = false,
+ .terminate = tegra_dma_stop_client,
+};
+
+static const struct tegra_dma_chip_data tegra194_dma_chip_data = {
+ .nr_channels = 31,
+ .channel_reg_size = SZ_64K,
+ .max_dma_count = SZ_1G,
+ .hw_support_pause = true,
+ .terminate = tegra_dma_pause,
+};
+
+static const struct tegra_dma_chip_data tegra234_dma_chip_data = {
+ .nr_channels = 31,
+ .channel_reg_size = SZ_64K,
+ .max_dma_count = SZ_1G,
+ .hw_support_pause = true,
+ .terminate = tegra_dma_pause_noerr,
+};
+
+static const struct of_device_id tegra_dma_of_match[] = {
+ {
+ .compatible = "nvidia,tegra186-gpcdma",
+ .data = &tegra186_dma_chip_data,
+ }, {
+ .compatible = "nvidia,tegra194-gpcdma",
+ .data = &tegra194_dma_chip_data,
+ }, {
+ .compatible = "nvidia,tegra234-gpcdma",
+ .data = &tegra234_dma_chip_data,
+ }, {
+ },
+};
+MODULE_DEVICE_TABLE(of, tegra_dma_of_match);
+
+static int tegra_dma_program_sid(struct tegra_dma_channel *tdc, int stream_id)
+{
+ unsigned int reg_val = tdc_read(tdc, TEGRA_GPCDMA_CHAN_MCSEQ);
+
+ reg_val &= ~(TEGRA_GPCDMA_MCSEQ_STREAM_ID0_MASK);
+ reg_val &= ~(TEGRA_GPCDMA_MCSEQ_STREAM_ID1_MASK);
+
+ reg_val |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_STREAM_ID0_MASK, stream_id);
+ reg_val |= FIELD_PREP(TEGRA_GPCDMA_MCSEQ_STREAM_ID1_MASK, stream_id);
+
+ tdc_write(tdc, TEGRA_GPCDMA_CHAN_MCSEQ, reg_val);
+ return 0;
+}
+
+static int tegra_dma_probe(struct platform_device *pdev)
+{
+ const struct tegra_dma_chip_data *cdata = NULL;
+ struct iommu_fwspec *iommu_spec;
+ unsigned int stream_id, i;
+ struct tegra_dma *tdma;
+ int ret;
+
+ cdata = of_device_get_match_data(&pdev->dev);
+
+ tdma = devm_kzalloc(&pdev->dev,
+ struct_size(tdma, channels, cdata->nr_channels),
+ GFP_KERNEL);
+ if (!tdma)
+ return -ENOMEM;
+
+ tdma->dev = &pdev->dev;
+ tdma->chip_data = cdata;
+ platform_set_drvdata(pdev, tdma);
+
+ tdma->base_addr = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(tdma->base_addr))
+ return PTR_ERR(tdma->base_addr);
+
+ tdma->rst = devm_reset_control_get_exclusive(&pdev->dev, "gpcdma");
+ if (IS_ERR(tdma->rst)) {
+ return dev_err_probe(&pdev->dev, PTR_ERR(tdma->rst),
+ "Missing controller reset\n");
+ }
+ reset_control_reset(tdma->rst);
+
+ tdma->dma_dev.dev = &pdev->dev;
+
+ iommu_spec = dev_iommu_fwspec_get(&pdev->dev);
+ if (!iommu_spec) {
+ dev_err(&pdev->dev, "Missing iommu stream-id\n");
+ return -EINVAL;
+ }
+ stream_id = iommu_spec->ids[0] & 0xffff;
+
+ INIT_LIST_HEAD(&tdma->dma_dev.channels);
+ for (i = 0; i < cdata->nr_channels; i++) {
+ struct tegra_dma_channel *tdc = &tdma->channels[i];
+
+ tdc->irq = platform_get_irq(pdev, i);
+ if (tdc->irq < 0)
+ return tdc->irq;
+
+ tdc->chan_base_offset = TEGRA_GPCDMA_CHANNEL_BASE_ADD_OFFSET +
+ i * cdata->channel_reg_size;
+ snprintf(tdc->name, sizeof(tdc->name), "gpcdma.%d", i);
+ tdc->tdma = tdma;
+ tdc->id = i;
+ tdc->slave_id = -1;
+
+ vchan_init(&tdc->vc, &tdma->dma_dev);
+ tdc->vc.desc_free = tegra_dma_desc_free;
+
+ /* program stream-id for this channel */
+ tegra_dma_program_sid(tdc, stream_id);
+ tdc->stream_id = stream_id;
+ }
+
+ dma_cap_set(DMA_SLAVE, tdma->dma_dev.cap_mask);
+ dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask);
+ dma_cap_set(DMA_MEMCPY, tdma->dma_dev.cap_mask);
+ dma_cap_set(DMA_MEMSET, tdma->dma_dev.cap_mask);
+ dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask);
+
+ /*
+ * Only word aligned transfers are supported. Set the copy
+ * alignment shift.
+ */
+ tdma->dma_dev.copy_align = 2;
+ tdma->dma_dev.fill_align = 2;
+ tdma->dma_dev.device_alloc_chan_resources =
+ tegra_dma_alloc_chan_resources;
+ tdma->dma_dev.device_free_chan_resources =
+ tegra_dma_free_chan_resources;
+ tdma->dma_dev.device_prep_slave_sg = tegra_dma_prep_slave_sg;
+ tdma->dma_dev.device_prep_dma_memcpy = tegra_dma_prep_dma_memcpy;
+ tdma->dma_dev.device_prep_dma_memset = tegra_dma_prep_dma_memset;
+ tdma->dma_dev.device_prep_dma_cyclic = tegra_dma_prep_dma_cyclic;
+ tdma->dma_dev.device_config = tegra_dma_slave_config;
+ tdma->dma_dev.device_terminate_all = tegra_dma_terminate_all;
+ tdma->dma_dev.device_tx_status = tegra_dma_tx_status;
+ tdma->dma_dev.device_issue_pending = tegra_dma_issue_pending;
+ tdma->dma_dev.device_pause = tegra_dma_device_pause;
+ tdma->dma_dev.device_resume = tegra_dma_device_resume;
+ tdma->dma_dev.device_synchronize = tegra_dma_chan_synchronize;
+ tdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+ ret = dma_async_device_register(&tdma->dma_dev);
+ if (ret < 0) {
+ dev_err_probe(&pdev->dev, ret,
+ "GPC DMA driver registration failed\n");
+ return ret;
+ }
+
+ ret = of_dma_controller_register(pdev->dev.of_node,
+ tegra_dma_of_xlate, tdma);
+ if (ret < 0) {
+ dev_err_probe(&pdev->dev, ret,
+ "GPC DMA OF registration failed\n");
+
+ dma_async_device_unregister(&tdma->dma_dev);
+ return ret;
+ }
+
+ dev_info(&pdev->dev, "GPC DMA driver register %d channels\n",
+ cdata->nr_channels);
+
+ return 0;
+}
+
+static int tegra_dma_remove(struct platform_device *pdev)
+{
+ struct tegra_dma *tdma = platform_get_drvdata(pdev);
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&tdma->dma_dev);
+
+ return 0;
+}
+
+static int __maybe_unused tegra_dma_pm_suspend(struct device *dev)
+{
+ struct tegra_dma *tdma = dev_get_drvdata(dev);
+ unsigned int i;
+
+ for (i = 0; i < tdma->chip_data->nr_channels; i++) {
+ struct tegra_dma_channel *tdc = &tdma->channels[i];
+
+ if (tdc->dma_desc) {
+ dev_err(tdma->dev, "channel %u busy\n", i);
+ return -EBUSY;
+ }
+ }
+
+ return 0;
+}
+
+static int __maybe_unused tegra_dma_pm_resume(struct device *dev)
+{
+ struct tegra_dma *tdma = dev_get_drvdata(dev);
+ unsigned int i;
+
+ reset_control_reset(tdma->rst);
+
+ for (i = 0; i < tdma->chip_data->nr_channels; i++) {
+ struct tegra_dma_channel *tdc = &tdma->channels[i];
+
+ tegra_dma_program_sid(tdc, tdc->stream_id);
+ }
+
+ return 0;
+}
+
+static const struct dev_pm_ops tegra_dma_dev_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(tegra_dma_pm_suspend, tegra_dma_pm_resume)
+};
+
+static struct platform_driver tegra_dma_driver = {
+ .driver = {
+ .name = "tegra-gpcdma",
+ .pm = &tegra_dma_dev_pm_ops,
+ .of_match_table = tegra_dma_of_match,
+ },
+ .probe = tegra_dma_probe,
+ .remove = tegra_dma_remove,
+};
+
+module_platform_driver(tegra_dma_driver);
+
+MODULE_DESCRIPTION("NVIDIA Tegra GPC DMA Controller driver");
+MODULE_AUTHOR("Pavan Kunapuli <pkunapuli@nvidia.com>");
+MODULE_AUTHOR("Rajesh Gumasta <rgumasta@nvidia.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
new file mode 100644
index 000000000..eaafcbe4c
--- /dev/null
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -0,0 +1,1693 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * DMA driver for Nvidia's Tegra20 APB DMA controller.
+ *
+ * Copyright (c) 2012-2013, NVIDIA CORPORATION. All rights reserved.
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+#include "dmaengine.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/tegra_apb_dma.h>
+
+#define TEGRA_APBDMA_GENERAL 0x0
+#define TEGRA_APBDMA_GENERAL_ENABLE BIT(31)
+
+#define TEGRA_APBDMA_CONTROL 0x010
+#define TEGRA_APBDMA_IRQ_MASK 0x01c
+#define TEGRA_APBDMA_IRQ_MASK_SET 0x020
+
+/* CSR register */
+#define TEGRA_APBDMA_CHAN_CSR 0x00
+#define TEGRA_APBDMA_CSR_ENB BIT(31)
+#define TEGRA_APBDMA_CSR_IE_EOC BIT(30)
+#define TEGRA_APBDMA_CSR_HOLD BIT(29)
+#define TEGRA_APBDMA_CSR_DIR BIT(28)
+#define TEGRA_APBDMA_CSR_ONCE BIT(27)
+#define TEGRA_APBDMA_CSR_FLOW BIT(21)
+#define TEGRA_APBDMA_CSR_REQ_SEL_SHIFT 16
+#define TEGRA_APBDMA_CSR_REQ_SEL_MASK 0x1F
+#define TEGRA_APBDMA_CSR_WCOUNT_MASK 0xFFFC
+
+/* STATUS register */
+#define TEGRA_APBDMA_CHAN_STATUS 0x004
+#define TEGRA_APBDMA_STATUS_BUSY BIT(31)
+#define TEGRA_APBDMA_STATUS_ISE_EOC BIT(30)
+#define TEGRA_APBDMA_STATUS_HALT BIT(29)
+#define TEGRA_APBDMA_STATUS_PING_PONG BIT(28)
+#define TEGRA_APBDMA_STATUS_COUNT_SHIFT 2
+#define TEGRA_APBDMA_STATUS_COUNT_MASK 0xFFFC
+
+#define TEGRA_APBDMA_CHAN_CSRE 0x00C
+#define TEGRA_APBDMA_CHAN_CSRE_PAUSE BIT(31)
+
+/* AHB memory address */
+#define TEGRA_APBDMA_CHAN_AHBPTR 0x010
+
+/* AHB sequence register */
+#define TEGRA_APBDMA_CHAN_AHBSEQ 0x14
+#define TEGRA_APBDMA_AHBSEQ_INTR_ENB BIT(31)
+#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_8 (0 << 28)
+#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_16 (1 << 28)
+#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_32 (2 << 28)
+#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_64 (3 << 28)
+#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_128 (4 << 28)
+#define TEGRA_APBDMA_AHBSEQ_DATA_SWAP BIT(27)
+#define TEGRA_APBDMA_AHBSEQ_BURST_1 (4 << 24)
+#define TEGRA_APBDMA_AHBSEQ_BURST_4 (5 << 24)
+#define TEGRA_APBDMA_AHBSEQ_BURST_8 (6 << 24)
+#define TEGRA_APBDMA_AHBSEQ_DBL_BUF BIT(19)
+#define TEGRA_APBDMA_AHBSEQ_WRAP_SHIFT 16
+#define TEGRA_APBDMA_AHBSEQ_WRAP_NONE 0
+
+/* APB address */
+#define TEGRA_APBDMA_CHAN_APBPTR 0x018
+
+/* APB sequence register */
+#define TEGRA_APBDMA_CHAN_APBSEQ 0x01c
+#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_8 (0 << 28)
+#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_16 (1 << 28)
+#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_32 (2 << 28)
+#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_64 (3 << 28)
+#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_128 (4 << 28)
+#define TEGRA_APBDMA_APBSEQ_DATA_SWAP BIT(27)
+#define TEGRA_APBDMA_APBSEQ_WRAP_WORD_1 (1 << 16)
+
+/* Tegra148 specific registers */
+#define TEGRA_APBDMA_CHAN_WCOUNT 0x20
+
+#define TEGRA_APBDMA_CHAN_WORD_TRANSFER 0x24
+
+/*
+ * If any burst is in flight and DMA paused then this is the time to complete
+ * on-flight burst and update DMA status register.
+ */
+#define TEGRA_APBDMA_BURST_COMPLETE_TIME 20
+
+/* Channel base address offset from APBDMA base address */
+#define TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET 0x1000
+
+#define TEGRA_APBDMA_SLAVE_ID_INVALID (TEGRA_APBDMA_CSR_REQ_SEL_MASK + 1)
+
+struct tegra_dma;
+
+/*
+ * tegra_dma_chip_data Tegra chip specific DMA data
+ * @nr_channels: Number of channels available in the controller.
+ * @channel_reg_size: Channel register size/stride.
+ * @max_dma_count: Maximum DMA transfer count supported by DMA controller.
+ * @support_channel_pause: Support channel wise pause of dma.
+ * @support_separate_wcount_reg: Support separate word count register.
+ */
+struct tegra_dma_chip_data {
+ unsigned int nr_channels;
+ unsigned int channel_reg_size;
+ unsigned int max_dma_count;
+ bool support_channel_pause;
+ bool support_separate_wcount_reg;
+};
+
+/* DMA channel registers */
+struct tegra_dma_channel_regs {
+ u32 csr;
+ u32 ahb_ptr;
+ u32 apb_ptr;
+ u32 ahb_seq;
+ u32 apb_seq;
+ u32 wcount;
+};
+
+/*
+ * tegra_dma_sg_req: DMA request details to configure hardware. This
+ * contains the details for one transfer to configure DMA hw.
+ * The client's request for data transfer can be broken into multiple
+ * sub-transfer as per requester details and hw support.
+ * This sub transfer get added in the list of transfer and point to Tegra
+ * DMA descriptor which manages the transfer details.
+ */
+struct tegra_dma_sg_req {
+ struct tegra_dma_channel_regs ch_regs;
+ unsigned int req_len;
+ bool configured;
+ bool last_sg;
+ struct list_head node;
+ struct tegra_dma_desc *dma_desc;
+ unsigned int words_xferred;
+};
+
+/*
+ * tegra_dma_desc: Tegra DMA descriptors which manages the client requests.
+ * This descriptor keep track of transfer status, callbacks and request
+ * counts etc.
+ */
+struct tegra_dma_desc {
+ struct dma_async_tx_descriptor txd;
+ unsigned int bytes_requested;
+ unsigned int bytes_transferred;
+ enum dma_status dma_status;
+ struct list_head node;
+ struct list_head tx_list;
+ struct list_head cb_node;
+ unsigned int cb_count;
+};
+
+struct tegra_dma_channel;
+
+typedef void (*dma_isr_handler)(struct tegra_dma_channel *tdc,
+ bool to_terminate);
+
+/* tegra_dma_channel: Channel specific information */
+struct tegra_dma_channel {
+ struct dma_chan dma_chan;
+ char name[12];
+ bool config_init;
+ unsigned int id;
+ void __iomem *chan_addr;
+ spinlock_t lock;
+ bool busy;
+ struct tegra_dma *tdma;
+ bool cyclic;
+
+ /* Different lists for managing the requests */
+ struct list_head free_sg_req;
+ struct list_head pending_sg_req;
+ struct list_head free_dma_desc;
+ struct list_head cb_desc;
+
+ /* ISR handler and tasklet for bottom half of isr handling */
+ dma_isr_handler isr_handler;
+ struct tasklet_struct tasklet;
+
+ /* Channel-slave specific configuration */
+ unsigned int slave_id;
+ struct dma_slave_config dma_sconfig;
+ struct tegra_dma_channel_regs channel_reg;
+
+ struct wait_queue_head wq;
+};
+
+/* tegra_dma: Tegra DMA specific information */
+struct tegra_dma {
+ struct dma_device dma_dev;
+ struct device *dev;
+ struct clk *dma_clk;
+ struct reset_control *rst;
+ spinlock_t global_lock;
+ void __iomem *base_addr;
+ const struct tegra_dma_chip_data *chip_data;
+
+ /*
+ * Counter for managing global pausing of the DMA controller.
+ * Only applicable for devices that don't support individual
+ * channel pausing.
+ */
+ u32 global_pause_count;
+
+ /* Last member of the structure */
+ struct tegra_dma_channel channels[];
+};
+
+static inline void tdma_write(struct tegra_dma *tdma, u32 reg, u32 val)
+{
+ writel(val, tdma->base_addr + reg);
+}
+
+static inline u32 tdma_read(struct tegra_dma *tdma, u32 reg)
+{
+ return readl(tdma->base_addr + reg);
+}
+
+static inline void tdc_write(struct tegra_dma_channel *tdc,
+ u32 reg, u32 val)
+{
+ writel(val, tdc->chan_addr + reg);
+}
+
+static inline u32 tdc_read(struct tegra_dma_channel *tdc, u32 reg)
+{
+ return readl(tdc->chan_addr + reg);
+}
+
+static inline struct tegra_dma_channel *to_tegra_dma_chan(struct dma_chan *dc)
+{
+ return container_of(dc, struct tegra_dma_channel, dma_chan);
+}
+
+static inline struct tegra_dma_desc *
+txd_to_tegra_dma_desc(struct dma_async_tx_descriptor *td)
+{
+ return container_of(td, struct tegra_dma_desc, txd);
+}
+
+static inline struct device *tdc2dev(struct tegra_dma_channel *tdc)
+{
+ return &tdc->dma_chan.dev->device;
+}
+
+static dma_cookie_t tegra_dma_tx_submit(struct dma_async_tx_descriptor *tx);
+
+/* Get DMA desc from free list, if not there then allocate it. */
+static struct tegra_dma_desc *tegra_dma_desc_get(struct tegra_dma_channel *tdc)
+{
+ struct tegra_dma_desc *dma_desc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tdc->lock, flags);
+
+ /* Do not allocate if desc are waiting for ack */
+ list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) {
+ if (async_tx_test_ack(&dma_desc->txd) && !dma_desc->cb_count) {
+ list_del(&dma_desc->node);
+ spin_unlock_irqrestore(&tdc->lock, flags);
+ dma_desc->txd.flags = 0;
+ return dma_desc;
+ }
+ }
+
+ spin_unlock_irqrestore(&tdc->lock, flags);
+
+ /* Allocate DMA desc */
+ dma_desc = kzalloc(sizeof(*dma_desc), GFP_NOWAIT);
+ if (!dma_desc)
+ return NULL;
+
+ dma_async_tx_descriptor_init(&dma_desc->txd, &tdc->dma_chan);
+ dma_desc->txd.tx_submit = tegra_dma_tx_submit;
+ dma_desc->txd.flags = 0;
+
+ return dma_desc;
+}
+
+static void tegra_dma_desc_put(struct tegra_dma_channel *tdc,
+ struct tegra_dma_desc *dma_desc)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&tdc->lock, flags);
+ if (!list_empty(&dma_desc->tx_list))
+ list_splice_init(&dma_desc->tx_list, &tdc->free_sg_req);
+ list_add_tail(&dma_desc->node, &tdc->free_dma_desc);
+ spin_unlock_irqrestore(&tdc->lock, flags);
+}
+
+static struct tegra_dma_sg_req *
+tegra_dma_sg_req_get(struct tegra_dma_channel *tdc)
+{
+ struct tegra_dma_sg_req *sg_req;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tdc->lock, flags);
+ if (!list_empty(&tdc->free_sg_req)) {
+ sg_req = list_first_entry(&tdc->free_sg_req, typeof(*sg_req),
+ node);
+ list_del(&sg_req->node);
+ spin_unlock_irqrestore(&tdc->lock, flags);
+ return sg_req;
+ }
+ spin_unlock_irqrestore(&tdc->lock, flags);
+
+ sg_req = kzalloc(sizeof(*sg_req), GFP_NOWAIT);
+
+ return sg_req;
+}
+
+static int tegra_dma_slave_config(struct dma_chan *dc,
+ struct dma_slave_config *sconfig)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+
+ if (!list_empty(&tdc->pending_sg_req)) {
+ dev_err(tdc2dev(tdc), "Configuration not allowed\n");
+ return -EBUSY;
+ }
+
+ memcpy(&tdc->dma_sconfig, sconfig, sizeof(*sconfig));
+ tdc->config_init = true;
+
+ return 0;
+}
+
+static void tegra_dma_global_pause(struct tegra_dma_channel *tdc,
+ bool wait_for_burst_complete)
+{
+ struct tegra_dma *tdma = tdc->tdma;
+
+ spin_lock(&tdma->global_lock);
+
+ if (tdc->tdma->global_pause_count == 0) {
+ tdma_write(tdma, TEGRA_APBDMA_GENERAL, 0);
+ if (wait_for_burst_complete)
+ udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME);
+ }
+
+ tdc->tdma->global_pause_count++;
+
+ spin_unlock(&tdma->global_lock);
+}
+
+static void tegra_dma_global_resume(struct tegra_dma_channel *tdc)
+{
+ struct tegra_dma *tdma = tdc->tdma;
+
+ spin_lock(&tdma->global_lock);
+
+ if (WARN_ON(tdc->tdma->global_pause_count == 0))
+ goto out;
+
+ if (--tdc->tdma->global_pause_count == 0)
+ tdma_write(tdma, TEGRA_APBDMA_GENERAL,
+ TEGRA_APBDMA_GENERAL_ENABLE);
+
+out:
+ spin_unlock(&tdma->global_lock);
+}
+
+static void tegra_dma_pause(struct tegra_dma_channel *tdc,
+ bool wait_for_burst_complete)
+{
+ struct tegra_dma *tdma = tdc->tdma;
+
+ if (tdma->chip_data->support_channel_pause) {
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_CSRE,
+ TEGRA_APBDMA_CHAN_CSRE_PAUSE);
+ if (wait_for_burst_complete)
+ udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME);
+ } else {
+ tegra_dma_global_pause(tdc, wait_for_burst_complete);
+ }
+}
+
+static void tegra_dma_resume(struct tegra_dma_channel *tdc)
+{
+ struct tegra_dma *tdma = tdc->tdma;
+
+ if (tdma->chip_data->support_channel_pause)
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_CSRE, 0);
+ else
+ tegra_dma_global_resume(tdc);
+}
+
+static void tegra_dma_stop(struct tegra_dma_channel *tdc)
+{
+ u32 csr, status;
+
+ /* Disable interrupts */
+ csr = tdc_read(tdc, TEGRA_APBDMA_CHAN_CSR);
+ csr &= ~TEGRA_APBDMA_CSR_IE_EOC;
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR, csr);
+
+ /* Disable DMA */
+ csr &= ~TEGRA_APBDMA_CSR_ENB;
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR, csr);
+
+ /* Clear interrupt status if it is there */
+ status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+ if (status & TEGRA_APBDMA_STATUS_ISE_EOC) {
+ dev_dbg(tdc2dev(tdc), "%s():clearing interrupt\n", __func__);
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status);
+ }
+ tdc->busy = false;
+}
+
+static void tegra_dma_start(struct tegra_dma_channel *tdc,
+ struct tegra_dma_sg_req *sg_req)
+{
+ struct tegra_dma_channel_regs *ch_regs = &sg_req->ch_regs;
+
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR, ch_regs->csr);
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_APBSEQ, ch_regs->apb_seq);
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_APBPTR, ch_regs->apb_ptr);
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBSEQ, ch_regs->ahb_seq);
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBPTR, ch_regs->ahb_ptr);
+ if (tdc->tdma->chip_data->support_separate_wcount_reg)
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_WCOUNT, ch_regs->wcount);
+
+ /* Start DMA */
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR,
+ ch_regs->csr | TEGRA_APBDMA_CSR_ENB);
+}
+
+static void tegra_dma_configure_for_next(struct tegra_dma_channel *tdc,
+ struct tegra_dma_sg_req *nsg_req)
+{
+ unsigned long status;
+
+ /*
+ * The DMA controller reloads the new configuration for next transfer
+ * after last burst of current transfer completes.
+ * If there is no IEC status then this makes sure that last burst
+ * has not be completed. There may be case that last burst is on
+ * flight and so it can complete but because DMA is paused, it
+ * will not generates interrupt as well as not reload the new
+ * configuration.
+ * If there is already IEC status then interrupt handler need to
+ * load new configuration.
+ */
+ tegra_dma_pause(tdc, false);
+ status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+
+ /*
+ * If interrupt is pending then do nothing as the ISR will handle
+ * the programing for new request.
+ */
+ if (status & TEGRA_APBDMA_STATUS_ISE_EOC) {
+ dev_err(tdc2dev(tdc),
+ "Skipping new configuration as interrupt is pending\n");
+ tegra_dma_resume(tdc);
+ return;
+ }
+
+ /* Safe to program new configuration */
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_APBPTR, nsg_req->ch_regs.apb_ptr);
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBPTR, nsg_req->ch_regs.ahb_ptr);
+ if (tdc->tdma->chip_data->support_separate_wcount_reg)
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_WCOUNT,
+ nsg_req->ch_regs.wcount);
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR,
+ nsg_req->ch_regs.csr | TEGRA_APBDMA_CSR_ENB);
+ nsg_req->configured = true;
+ nsg_req->words_xferred = 0;
+
+ tegra_dma_resume(tdc);
+}
+
+static void tdc_start_head_req(struct tegra_dma_channel *tdc)
+{
+ struct tegra_dma_sg_req *sg_req;
+
+ sg_req = list_first_entry(&tdc->pending_sg_req, typeof(*sg_req), node);
+ tegra_dma_start(tdc, sg_req);
+ sg_req->configured = true;
+ sg_req->words_xferred = 0;
+ tdc->busy = true;
+}
+
+static void tdc_configure_next_head_desc(struct tegra_dma_channel *tdc)
+{
+ struct tegra_dma_sg_req *hsgreq, *hnsgreq;
+
+ hsgreq = list_first_entry(&tdc->pending_sg_req, typeof(*hsgreq), node);
+ if (!list_is_last(&hsgreq->node, &tdc->pending_sg_req)) {
+ hnsgreq = list_first_entry(&hsgreq->node, typeof(*hnsgreq),
+ node);
+ tegra_dma_configure_for_next(tdc, hnsgreq);
+ }
+}
+
+static inline unsigned int
+get_current_xferred_count(struct tegra_dma_channel *tdc,
+ struct tegra_dma_sg_req *sg_req,
+ unsigned long status)
+{
+ return sg_req->req_len - (status & TEGRA_APBDMA_STATUS_COUNT_MASK) - 4;
+}
+
+static void tegra_dma_abort_all(struct tegra_dma_channel *tdc)
+{
+ struct tegra_dma_desc *dma_desc;
+ struct tegra_dma_sg_req *sgreq;
+
+ while (!list_empty(&tdc->pending_sg_req)) {
+ sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq),
+ node);
+ list_move_tail(&sgreq->node, &tdc->free_sg_req);
+ if (sgreq->last_sg) {
+ dma_desc = sgreq->dma_desc;
+ dma_desc->dma_status = DMA_ERROR;
+ list_add_tail(&dma_desc->node, &tdc->free_dma_desc);
+
+ /* Add in cb list if it is not there. */
+ if (!dma_desc->cb_count)
+ list_add_tail(&dma_desc->cb_node,
+ &tdc->cb_desc);
+ dma_desc->cb_count++;
+ }
+ }
+ tdc->isr_handler = NULL;
+}
+
+static bool handle_continuous_head_request(struct tegra_dma_channel *tdc,
+ bool to_terminate)
+{
+ struct tegra_dma_sg_req *hsgreq;
+
+ /*
+ * Check that head req on list should be in flight.
+ * If it is not in flight then abort transfer as
+ * looping of transfer can not continue.
+ */
+ hsgreq = list_first_entry(&tdc->pending_sg_req, typeof(*hsgreq), node);
+ if (!hsgreq->configured) {
+ tegra_dma_stop(tdc);
+ pm_runtime_put(tdc->tdma->dev);
+ dev_err(tdc2dev(tdc), "DMA transfer underflow, aborting DMA\n");
+ tegra_dma_abort_all(tdc);
+ return false;
+ }
+
+ /* Configure next request */
+ if (!to_terminate)
+ tdc_configure_next_head_desc(tdc);
+
+ return true;
+}
+
+static void handle_once_dma_done(struct tegra_dma_channel *tdc,
+ bool to_terminate)
+{
+ struct tegra_dma_desc *dma_desc;
+ struct tegra_dma_sg_req *sgreq;
+
+ tdc->busy = false;
+ sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq), node);
+ dma_desc = sgreq->dma_desc;
+ dma_desc->bytes_transferred += sgreq->req_len;
+
+ list_del(&sgreq->node);
+ if (sgreq->last_sg) {
+ dma_desc->dma_status = DMA_COMPLETE;
+ dma_cookie_complete(&dma_desc->txd);
+ if (!dma_desc->cb_count)
+ list_add_tail(&dma_desc->cb_node, &tdc->cb_desc);
+ dma_desc->cb_count++;
+ list_add_tail(&dma_desc->node, &tdc->free_dma_desc);
+ }
+ list_add_tail(&sgreq->node, &tdc->free_sg_req);
+
+ /* Do not start DMA if it is going to be terminate */
+ if (to_terminate)
+ return;
+
+ if (list_empty(&tdc->pending_sg_req)) {
+ pm_runtime_put(tdc->tdma->dev);
+ return;
+ }
+
+ tdc_start_head_req(tdc);
+}
+
+static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc,
+ bool to_terminate)
+{
+ struct tegra_dma_desc *dma_desc;
+ struct tegra_dma_sg_req *sgreq;
+ bool st;
+
+ sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq), node);
+ dma_desc = sgreq->dma_desc;
+ /* if we dma for long enough the transfer count will wrap */
+ dma_desc->bytes_transferred =
+ (dma_desc->bytes_transferred + sgreq->req_len) %
+ dma_desc->bytes_requested;
+
+ /* Callback need to be call */
+ if (!dma_desc->cb_count)
+ list_add_tail(&dma_desc->cb_node, &tdc->cb_desc);
+ dma_desc->cb_count++;
+
+ sgreq->words_xferred = 0;
+
+ /* If not last req then put at end of pending list */
+ if (!list_is_last(&sgreq->node, &tdc->pending_sg_req)) {
+ list_move_tail(&sgreq->node, &tdc->pending_sg_req);
+ sgreq->configured = false;
+ st = handle_continuous_head_request(tdc, to_terminate);
+ if (!st)
+ dma_desc->dma_status = DMA_ERROR;
+ }
+}
+
+static void tegra_dma_tasklet(struct tasklet_struct *t)
+{
+ struct tegra_dma_channel *tdc = from_tasklet(tdc, t, tasklet);
+ struct dmaengine_desc_callback cb;
+ struct tegra_dma_desc *dma_desc;
+ unsigned int cb_count;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tdc->lock, flags);
+ while (!list_empty(&tdc->cb_desc)) {
+ dma_desc = list_first_entry(&tdc->cb_desc, typeof(*dma_desc),
+ cb_node);
+ list_del(&dma_desc->cb_node);
+ dmaengine_desc_get_callback(&dma_desc->txd, &cb);
+ cb_count = dma_desc->cb_count;
+ dma_desc->cb_count = 0;
+ trace_tegra_dma_complete_cb(&tdc->dma_chan, cb_count,
+ cb.callback);
+ spin_unlock_irqrestore(&tdc->lock, flags);
+ while (cb_count--)
+ dmaengine_desc_callback_invoke(&cb, NULL);
+ spin_lock_irqsave(&tdc->lock, flags);
+ }
+ spin_unlock_irqrestore(&tdc->lock, flags);
+}
+
+static irqreturn_t tegra_dma_isr(int irq, void *dev_id)
+{
+ struct tegra_dma_channel *tdc = dev_id;
+ u32 status;
+
+ spin_lock(&tdc->lock);
+
+ trace_tegra_dma_isr(&tdc->dma_chan, irq);
+ status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+ if (status & TEGRA_APBDMA_STATUS_ISE_EOC) {
+ tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status);
+ tdc->isr_handler(tdc, false);
+ tasklet_schedule(&tdc->tasklet);
+ wake_up_all(&tdc->wq);
+ spin_unlock(&tdc->lock);
+ return IRQ_HANDLED;
+ }
+
+ spin_unlock(&tdc->lock);
+ dev_info(tdc2dev(tdc), "Interrupt already served status 0x%08x\n",
+ status);
+
+ return IRQ_NONE;
+}
+
+static dma_cookie_t tegra_dma_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+ struct tegra_dma_desc *dma_desc = txd_to_tegra_dma_desc(txd);
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(txd->chan);
+ unsigned long flags;
+ dma_cookie_t cookie;
+
+ spin_lock_irqsave(&tdc->lock, flags);
+ dma_desc->dma_status = DMA_IN_PROGRESS;
+ cookie = dma_cookie_assign(&dma_desc->txd);
+ list_splice_tail_init(&dma_desc->tx_list, &tdc->pending_sg_req);
+ spin_unlock_irqrestore(&tdc->lock, flags);
+
+ return cookie;
+}
+
+static void tegra_dma_issue_pending(struct dma_chan *dc)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ unsigned long flags;
+ int err;
+
+ spin_lock_irqsave(&tdc->lock, flags);
+ if (list_empty(&tdc->pending_sg_req)) {
+ dev_err(tdc2dev(tdc), "No DMA request\n");
+ goto end;
+ }
+ if (!tdc->busy) {
+ err = pm_runtime_resume_and_get(tdc->tdma->dev);
+ if (err < 0) {
+ dev_err(tdc2dev(tdc), "Failed to enable DMA\n");
+ goto end;
+ }
+
+ tdc_start_head_req(tdc);
+
+ /* Continuous single mode: Configure next req */
+ if (tdc->cyclic) {
+ /*
+ * Wait for 1 burst time for configure DMA for
+ * next transfer.
+ */
+ udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME);
+ tdc_configure_next_head_desc(tdc);
+ }
+ }
+end:
+ spin_unlock_irqrestore(&tdc->lock, flags);
+}
+
+static int tegra_dma_terminate_all(struct dma_chan *dc)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ struct tegra_dma_desc *dma_desc;
+ struct tegra_dma_sg_req *sgreq;
+ unsigned long flags;
+ u32 status, wcount;
+ bool was_busy;
+
+ spin_lock_irqsave(&tdc->lock, flags);
+
+ if (!tdc->busy)
+ goto skip_dma_stop;
+
+ /* Pause DMA before checking the queue status */
+ tegra_dma_pause(tdc, true);
+
+ status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+ if (status & TEGRA_APBDMA_STATUS_ISE_EOC) {
+ dev_dbg(tdc2dev(tdc), "%s():handling isr\n", __func__);
+ tdc->isr_handler(tdc, true);
+ status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+ }
+ if (tdc->tdma->chip_data->support_separate_wcount_reg)
+ wcount = tdc_read(tdc, TEGRA_APBDMA_CHAN_WORD_TRANSFER);
+ else
+ wcount = status;
+
+ was_busy = tdc->busy;
+ tegra_dma_stop(tdc);
+
+ if (!list_empty(&tdc->pending_sg_req) && was_busy) {
+ sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq),
+ node);
+ sgreq->dma_desc->bytes_transferred +=
+ get_current_xferred_count(tdc, sgreq, wcount);
+ }
+ tegra_dma_resume(tdc);
+
+ pm_runtime_put(tdc->tdma->dev);
+ wake_up_all(&tdc->wq);
+
+skip_dma_stop:
+ tegra_dma_abort_all(tdc);
+
+ while (!list_empty(&tdc->cb_desc)) {
+ dma_desc = list_first_entry(&tdc->cb_desc, typeof(*dma_desc),
+ cb_node);
+ list_del(&dma_desc->cb_node);
+ dma_desc->cb_count = 0;
+ }
+ spin_unlock_irqrestore(&tdc->lock, flags);
+
+ return 0;
+}
+
+static bool tegra_dma_eoc_interrupt_deasserted(struct tegra_dma_channel *tdc)
+{
+ unsigned long flags;
+ u32 status;
+
+ spin_lock_irqsave(&tdc->lock, flags);
+ status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+ spin_unlock_irqrestore(&tdc->lock, flags);
+
+ return !(status & TEGRA_APBDMA_STATUS_ISE_EOC);
+}
+
+static void tegra_dma_synchronize(struct dma_chan *dc)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ int err;
+
+ err = pm_runtime_resume_and_get(tdc->tdma->dev);
+ if (err < 0) {
+ dev_err(tdc2dev(tdc), "Failed to synchronize DMA: %d\n", err);
+ return;
+ }
+
+ /*
+ * CPU, which handles interrupt, could be busy in
+ * uninterruptible state, in this case sibling CPU
+ * should wait until interrupt is handled.
+ */
+ wait_event(tdc->wq, tegra_dma_eoc_interrupt_deasserted(tdc));
+
+ tasklet_kill(&tdc->tasklet);
+
+ pm_runtime_put(tdc->tdma->dev);
+}
+
+static unsigned int tegra_dma_sg_bytes_xferred(struct tegra_dma_channel *tdc,
+ struct tegra_dma_sg_req *sg_req)
+{
+ u32 status, wcount = 0;
+
+ if (!list_is_first(&sg_req->node, &tdc->pending_sg_req))
+ return 0;
+
+ if (tdc->tdma->chip_data->support_separate_wcount_reg)
+ wcount = tdc_read(tdc, TEGRA_APBDMA_CHAN_WORD_TRANSFER);
+
+ status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+
+ if (!tdc->tdma->chip_data->support_separate_wcount_reg)
+ wcount = status;
+
+ if (status & TEGRA_APBDMA_STATUS_ISE_EOC)
+ return sg_req->req_len;
+
+ wcount = get_current_xferred_count(tdc, sg_req, wcount);
+
+ if (!wcount) {
+ /*
+ * If wcount wasn't ever polled for this SG before, then
+ * simply assume that transfer hasn't started yet.
+ *
+ * Otherwise it's the end of the transfer.
+ *
+ * The alternative would be to poll the status register
+ * until EOC bit is set or wcount goes UP. That's so
+ * because EOC bit is getting set only after the last
+ * burst's completion and counter is less than the actual
+ * transfer size by 4 bytes. The counter value wraps around
+ * in a cyclic mode before EOC is set(!), so we can't easily
+ * distinguish start of transfer from its end.
+ */
+ if (sg_req->words_xferred)
+ wcount = sg_req->req_len - 4;
+
+ } else if (wcount < sg_req->words_xferred) {
+ /*
+ * This case will never happen for a non-cyclic transfer.
+ *
+ * For a cyclic transfer, although it is possible for the
+ * next transfer to have already started (resetting the word
+ * count), this case should still not happen because we should
+ * have detected that the EOC bit is set and hence the transfer
+ * was completed.
+ */
+ WARN_ON_ONCE(1);
+
+ wcount = sg_req->req_len - 4;
+ } else {
+ sg_req->words_xferred = wcount;
+ }
+
+ return wcount;
+}
+
+static enum dma_status tegra_dma_tx_status(struct dma_chan *dc,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ struct tegra_dma_desc *dma_desc;
+ struct tegra_dma_sg_req *sg_req;
+ enum dma_status ret;
+ unsigned long flags;
+ unsigned int residual;
+ unsigned int bytes = 0;
+
+ ret = dma_cookie_status(dc, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ spin_lock_irqsave(&tdc->lock, flags);
+
+ /* Check on wait_ack desc status */
+ list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) {
+ if (dma_desc->txd.cookie == cookie) {
+ ret = dma_desc->dma_status;
+ goto found;
+ }
+ }
+
+ /* Check in pending list */
+ list_for_each_entry(sg_req, &tdc->pending_sg_req, node) {
+ dma_desc = sg_req->dma_desc;
+ if (dma_desc->txd.cookie == cookie) {
+ bytes = tegra_dma_sg_bytes_xferred(tdc, sg_req);
+ ret = dma_desc->dma_status;
+ goto found;
+ }
+ }
+
+ dev_dbg(tdc2dev(tdc), "cookie %d not found\n", cookie);
+ dma_desc = NULL;
+
+found:
+ if (dma_desc && txstate) {
+ residual = dma_desc->bytes_requested -
+ ((dma_desc->bytes_transferred + bytes) %
+ dma_desc->bytes_requested);
+ dma_set_residue(txstate, residual);
+ }
+
+ trace_tegra_dma_tx_status(&tdc->dma_chan, cookie, txstate);
+ spin_unlock_irqrestore(&tdc->lock, flags);
+
+ return ret;
+}
+
+static inline unsigned int get_bus_width(struct tegra_dma_channel *tdc,
+ enum dma_slave_buswidth slave_bw)
+{
+ switch (slave_bw) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_8;
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_16;
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_32;
+ case DMA_SLAVE_BUSWIDTH_8_BYTES:
+ return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_64;
+ default:
+ dev_warn(tdc2dev(tdc),
+ "slave bw is not supported, using 32bits\n");
+ return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_32;
+ }
+}
+
+static inline unsigned int get_burst_size(struct tegra_dma_channel *tdc,
+ u32 burst_size,
+ enum dma_slave_buswidth slave_bw,
+ u32 len)
+{
+ unsigned int burst_byte, burst_ahb_width;
+
+ /*
+ * burst_size from client is in terms of the bus_width.
+ * convert them into AHB memory width which is 4 byte.
+ */
+ burst_byte = burst_size * slave_bw;
+ burst_ahb_width = burst_byte / 4;
+
+ /* If burst size is 0 then calculate the burst size based on length */
+ if (!burst_ahb_width) {
+ if (len & 0xF)
+ return TEGRA_APBDMA_AHBSEQ_BURST_1;
+ else if ((len >> 4) & 0x1)
+ return TEGRA_APBDMA_AHBSEQ_BURST_4;
+ else
+ return TEGRA_APBDMA_AHBSEQ_BURST_8;
+ }
+ if (burst_ahb_width < 4)
+ return TEGRA_APBDMA_AHBSEQ_BURST_1;
+ else if (burst_ahb_width < 8)
+ return TEGRA_APBDMA_AHBSEQ_BURST_4;
+ else
+ return TEGRA_APBDMA_AHBSEQ_BURST_8;
+}
+
+static int get_transfer_param(struct tegra_dma_channel *tdc,
+ enum dma_transfer_direction direction,
+ u32 *apb_addr,
+ u32 *apb_seq,
+ u32 *csr,
+ unsigned int *burst_size,
+ enum dma_slave_buswidth *slave_bw)
+{
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+ *apb_addr = tdc->dma_sconfig.dst_addr;
+ *apb_seq = get_bus_width(tdc, tdc->dma_sconfig.dst_addr_width);
+ *burst_size = tdc->dma_sconfig.dst_maxburst;
+ *slave_bw = tdc->dma_sconfig.dst_addr_width;
+ *csr = TEGRA_APBDMA_CSR_DIR;
+ return 0;
+
+ case DMA_DEV_TO_MEM:
+ *apb_addr = tdc->dma_sconfig.src_addr;
+ *apb_seq = get_bus_width(tdc, tdc->dma_sconfig.src_addr_width);
+ *burst_size = tdc->dma_sconfig.src_maxburst;
+ *slave_bw = tdc->dma_sconfig.src_addr_width;
+ *csr = 0;
+ return 0;
+
+ default:
+ dev_err(tdc2dev(tdc), "DMA direction is not supported\n");
+ break;
+ }
+
+ return -EINVAL;
+}
+
+static void tegra_dma_prep_wcount(struct tegra_dma_channel *tdc,
+ struct tegra_dma_channel_regs *ch_regs,
+ u32 len)
+{
+ u32 len_field = (len - 4) & 0xFFFC;
+
+ if (tdc->tdma->chip_data->support_separate_wcount_reg)
+ ch_regs->wcount = len_field;
+ else
+ ch_regs->csr |= len_field;
+}
+
+static struct dma_async_tx_descriptor *
+tegra_dma_prep_slave_sg(struct dma_chan *dc,
+ struct scatterlist *sgl,
+ unsigned int sg_len,
+ enum dma_transfer_direction direction,
+ unsigned long flags,
+ void *context)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ struct tegra_dma_sg_req *sg_req = NULL;
+ u32 csr, ahb_seq, apb_ptr, apb_seq;
+ enum dma_slave_buswidth slave_bw;
+ struct tegra_dma_desc *dma_desc;
+ struct list_head req_list;
+ struct scatterlist *sg;
+ unsigned int burst_size;
+ unsigned int i;
+
+ if (!tdc->config_init) {
+ dev_err(tdc2dev(tdc), "DMA channel is not configured\n");
+ return NULL;
+ }
+ if (sg_len < 1) {
+ dev_err(tdc2dev(tdc), "Invalid segment length %d\n", sg_len);
+ return NULL;
+ }
+
+ if (get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr,
+ &burst_size, &slave_bw) < 0)
+ return NULL;
+
+ INIT_LIST_HEAD(&req_list);
+
+ ahb_seq = TEGRA_APBDMA_AHBSEQ_INTR_ENB;
+ ahb_seq |= TEGRA_APBDMA_AHBSEQ_WRAP_NONE <<
+ TEGRA_APBDMA_AHBSEQ_WRAP_SHIFT;
+ ahb_seq |= TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_32;
+
+ csr |= TEGRA_APBDMA_CSR_ONCE;
+
+ if (tdc->slave_id != TEGRA_APBDMA_SLAVE_ID_INVALID) {
+ csr |= TEGRA_APBDMA_CSR_FLOW;
+ csr |= tdc->slave_id << TEGRA_APBDMA_CSR_REQ_SEL_SHIFT;
+ }
+
+ if (flags & DMA_PREP_INTERRUPT) {
+ csr |= TEGRA_APBDMA_CSR_IE_EOC;
+ } else {
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+
+ apb_seq |= TEGRA_APBDMA_APBSEQ_WRAP_WORD_1;
+
+ dma_desc = tegra_dma_desc_get(tdc);
+ if (!dma_desc) {
+ dev_err(tdc2dev(tdc), "DMA descriptors not available\n");
+ return NULL;
+ }
+ INIT_LIST_HEAD(&dma_desc->tx_list);
+ INIT_LIST_HEAD(&dma_desc->cb_node);
+ dma_desc->cb_count = 0;
+ dma_desc->bytes_requested = 0;
+ dma_desc->bytes_transferred = 0;
+ dma_desc->dma_status = DMA_IN_PROGRESS;
+
+ /* Make transfer requests */
+ for_each_sg(sgl, sg, sg_len, i) {
+ u32 len, mem;
+
+ mem = sg_dma_address(sg);
+ len = sg_dma_len(sg);
+
+ if ((len & 3) || (mem & 3) ||
+ len > tdc->tdma->chip_data->max_dma_count) {
+ dev_err(tdc2dev(tdc),
+ "DMA length/memory address is not supported\n");
+ tegra_dma_desc_put(tdc, dma_desc);
+ return NULL;
+ }
+
+ sg_req = tegra_dma_sg_req_get(tdc);
+ if (!sg_req) {
+ dev_err(tdc2dev(tdc), "DMA sg-req not available\n");
+ tegra_dma_desc_put(tdc, dma_desc);
+ return NULL;
+ }
+
+ ahb_seq |= get_burst_size(tdc, burst_size, slave_bw, len);
+ dma_desc->bytes_requested += len;
+
+ sg_req->ch_regs.apb_ptr = apb_ptr;
+ sg_req->ch_regs.ahb_ptr = mem;
+ sg_req->ch_regs.csr = csr;
+ tegra_dma_prep_wcount(tdc, &sg_req->ch_regs, len);
+ sg_req->ch_regs.apb_seq = apb_seq;
+ sg_req->ch_regs.ahb_seq = ahb_seq;
+ sg_req->configured = false;
+ sg_req->last_sg = false;
+ sg_req->dma_desc = dma_desc;
+ sg_req->req_len = len;
+
+ list_add_tail(&sg_req->node, &dma_desc->tx_list);
+ }
+ sg_req->last_sg = true;
+ if (flags & DMA_CTRL_ACK)
+ dma_desc->txd.flags = DMA_CTRL_ACK;
+
+ /*
+ * Make sure that mode should not be conflicting with currently
+ * configured mode.
+ */
+ if (!tdc->isr_handler) {
+ tdc->isr_handler = handle_once_dma_done;
+ tdc->cyclic = false;
+ } else {
+ if (tdc->cyclic) {
+ dev_err(tdc2dev(tdc), "DMA configured in cyclic mode\n");
+ tegra_dma_desc_put(tdc, dma_desc);
+ return NULL;
+ }
+ }
+
+ return &dma_desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+tegra_dma_prep_dma_cyclic(struct dma_chan *dc, dma_addr_t buf_addr,
+ size_t buf_len,
+ size_t period_len,
+ enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ struct tegra_dma_sg_req *sg_req = NULL;
+ u32 csr, ahb_seq, apb_ptr, apb_seq;
+ enum dma_slave_buswidth slave_bw;
+ struct tegra_dma_desc *dma_desc;
+ dma_addr_t mem = buf_addr;
+ unsigned int burst_size;
+ size_t len, remain_len;
+
+ if (!buf_len || !period_len) {
+ dev_err(tdc2dev(tdc), "Invalid buffer/period len\n");
+ return NULL;
+ }
+
+ if (!tdc->config_init) {
+ dev_err(tdc2dev(tdc), "DMA slave is not configured\n");
+ return NULL;
+ }
+
+ /*
+ * We allow to take more number of requests till DMA is
+ * not started. The driver will loop over all requests.
+ * Once DMA is started then new requests can be queued only after
+ * terminating the DMA.
+ */
+ if (tdc->busy) {
+ dev_err(tdc2dev(tdc), "Request not allowed when DMA running\n");
+ return NULL;
+ }
+
+ /*
+ * We only support cycle transfer when buf_len is multiple of
+ * period_len.
+ */
+ if (buf_len % period_len) {
+ dev_err(tdc2dev(tdc), "buf_len is not multiple of period_len\n");
+ return NULL;
+ }
+
+ len = period_len;
+ if ((len & 3) || (buf_addr & 3) ||
+ len > tdc->tdma->chip_data->max_dma_count) {
+ dev_err(tdc2dev(tdc), "Req len/mem address is not correct\n");
+ return NULL;
+ }
+
+ if (get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr,
+ &burst_size, &slave_bw) < 0)
+ return NULL;
+
+ ahb_seq = TEGRA_APBDMA_AHBSEQ_INTR_ENB;
+ ahb_seq |= TEGRA_APBDMA_AHBSEQ_WRAP_NONE <<
+ TEGRA_APBDMA_AHBSEQ_WRAP_SHIFT;
+ ahb_seq |= TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_32;
+
+ if (tdc->slave_id != TEGRA_APBDMA_SLAVE_ID_INVALID) {
+ csr |= TEGRA_APBDMA_CSR_FLOW;
+ csr |= tdc->slave_id << TEGRA_APBDMA_CSR_REQ_SEL_SHIFT;
+ }
+
+ if (flags & DMA_PREP_INTERRUPT) {
+ csr |= TEGRA_APBDMA_CSR_IE_EOC;
+ } else {
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+
+ apb_seq |= TEGRA_APBDMA_APBSEQ_WRAP_WORD_1;
+
+ dma_desc = tegra_dma_desc_get(tdc);
+ if (!dma_desc) {
+ dev_err(tdc2dev(tdc), "not enough descriptors available\n");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&dma_desc->tx_list);
+ INIT_LIST_HEAD(&dma_desc->cb_node);
+ dma_desc->cb_count = 0;
+
+ dma_desc->bytes_transferred = 0;
+ dma_desc->bytes_requested = buf_len;
+ remain_len = buf_len;
+
+ /* Split transfer equal to period size */
+ while (remain_len) {
+ sg_req = tegra_dma_sg_req_get(tdc);
+ if (!sg_req) {
+ dev_err(tdc2dev(tdc), "DMA sg-req not available\n");
+ tegra_dma_desc_put(tdc, dma_desc);
+ return NULL;
+ }
+
+ ahb_seq |= get_burst_size(tdc, burst_size, slave_bw, len);
+ sg_req->ch_regs.apb_ptr = apb_ptr;
+ sg_req->ch_regs.ahb_ptr = mem;
+ sg_req->ch_regs.csr = csr;
+ tegra_dma_prep_wcount(tdc, &sg_req->ch_regs, len);
+ sg_req->ch_regs.apb_seq = apb_seq;
+ sg_req->ch_regs.ahb_seq = ahb_seq;
+ sg_req->configured = false;
+ sg_req->last_sg = false;
+ sg_req->dma_desc = dma_desc;
+ sg_req->req_len = len;
+
+ list_add_tail(&sg_req->node, &dma_desc->tx_list);
+ remain_len -= len;
+ mem += len;
+ }
+ sg_req->last_sg = true;
+ if (flags & DMA_CTRL_ACK)
+ dma_desc->txd.flags = DMA_CTRL_ACK;
+
+ /*
+ * Make sure that mode should not be conflicting with currently
+ * configured mode.
+ */
+ if (!tdc->isr_handler) {
+ tdc->isr_handler = handle_cont_sngl_cycle_dma_done;
+ tdc->cyclic = true;
+ } else {
+ if (!tdc->cyclic) {
+ dev_err(tdc2dev(tdc), "DMA configuration conflict\n");
+ tegra_dma_desc_put(tdc, dma_desc);
+ return NULL;
+ }
+ }
+
+ return &dma_desc->txd;
+}
+
+static int tegra_dma_alloc_chan_resources(struct dma_chan *dc)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+
+ dma_cookie_init(&tdc->dma_chan);
+
+ return 0;
+}
+
+static void tegra_dma_free_chan_resources(struct dma_chan *dc)
+{
+ struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+ struct tegra_dma_desc *dma_desc;
+ struct tegra_dma_sg_req *sg_req;
+ struct list_head dma_desc_list;
+ struct list_head sg_req_list;
+
+ INIT_LIST_HEAD(&dma_desc_list);
+ INIT_LIST_HEAD(&sg_req_list);
+
+ dev_dbg(tdc2dev(tdc), "Freeing channel %d\n", tdc->id);
+
+ tegra_dma_terminate_all(dc);
+ tasklet_kill(&tdc->tasklet);
+
+ list_splice_init(&tdc->pending_sg_req, &sg_req_list);
+ list_splice_init(&tdc->free_sg_req, &sg_req_list);
+ list_splice_init(&tdc->free_dma_desc, &dma_desc_list);
+ INIT_LIST_HEAD(&tdc->cb_desc);
+ tdc->config_init = false;
+ tdc->isr_handler = NULL;
+
+ while (!list_empty(&dma_desc_list)) {
+ dma_desc = list_first_entry(&dma_desc_list, typeof(*dma_desc),
+ node);
+ list_del(&dma_desc->node);
+ kfree(dma_desc);
+ }
+
+ while (!list_empty(&sg_req_list)) {
+ sg_req = list_first_entry(&sg_req_list, typeof(*sg_req), node);
+ list_del(&sg_req->node);
+ kfree(sg_req);
+ }
+
+ tdc->slave_id = TEGRA_APBDMA_SLAVE_ID_INVALID;
+}
+
+static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct tegra_dma *tdma = ofdma->of_dma_data;
+ struct tegra_dma_channel *tdc;
+ struct dma_chan *chan;
+
+ if (dma_spec->args[0] > TEGRA_APBDMA_CSR_REQ_SEL_MASK) {
+ dev_err(tdma->dev, "Invalid slave id: %d\n", dma_spec->args[0]);
+ return NULL;
+ }
+
+ chan = dma_get_any_slave_channel(&tdma->dma_dev);
+ if (!chan)
+ return NULL;
+
+ tdc = to_tegra_dma_chan(chan);
+ tdc->slave_id = dma_spec->args[0];
+
+ return chan;
+}
+
+/* Tegra20 specific DMA controller information */
+static const struct tegra_dma_chip_data tegra20_dma_chip_data = {
+ .nr_channels = 16,
+ .channel_reg_size = 0x20,
+ .max_dma_count = 1024UL * 64,
+ .support_channel_pause = false,
+ .support_separate_wcount_reg = false,
+};
+
+/* Tegra30 specific DMA controller information */
+static const struct tegra_dma_chip_data tegra30_dma_chip_data = {
+ .nr_channels = 32,
+ .channel_reg_size = 0x20,
+ .max_dma_count = 1024UL * 64,
+ .support_channel_pause = false,
+ .support_separate_wcount_reg = false,
+};
+
+/* Tegra114 specific DMA controller information */
+static const struct tegra_dma_chip_data tegra114_dma_chip_data = {
+ .nr_channels = 32,
+ .channel_reg_size = 0x20,
+ .max_dma_count = 1024UL * 64,
+ .support_channel_pause = true,
+ .support_separate_wcount_reg = false,
+};
+
+/* Tegra148 specific DMA controller information */
+static const struct tegra_dma_chip_data tegra148_dma_chip_data = {
+ .nr_channels = 32,
+ .channel_reg_size = 0x40,
+ .max_dma_count = 1024UL * 64,
+ .support_channel_pause = true,
+ .support_separate_wcount_reg = true,
+};
+
+static int tegra_dma_init_hw(struct tegra_dma *tdma)
+{
+ int err;
+
+ err = reset_control_assert(tdma->rst);
+ if (err) {
+ dev_err(tdma->dev, "failed to assert reset: %d\n", err);
+ return err;
+ }
+
+ err = clk_enable(tdma->dma_clk);
+ if (err) {
+ dev_err(tdma->dev, "failed to enable clk: %d\n", err);
+ return err;
+ }
+
+ /* reset DMA controller */
+ udelay(2);
+ reset_control_deassert(tdma->rst);
+
+ /* enable global DMA registers */
+ tdma_write(tdma, TEGRA_APBDMA_GENERAL, TEGRA_APBDMA_GENERAL_ENABLE);
+ tdma_write(tdma, TEGRA_APBDMA_CONTROL, 0);
+ tdma_write(tdma, TEGRA_APBDMA_IRQ_MASK_SET, 0xFFFFFFFF);
+
+ clk_disable(tdma->dma_clk);
+
+ return 0;
+}
+
+static int tegra_dma_probe(struct platform_device *pdev)
+{
+ const struct tegra_dma_chip_data *cdata;
+ struct tegra_dma *tdma;
+ unsigned int i;
+ size_t size;
+ int ret;
+
+ cdata = of_device_get_match_data(&pdev->dev);
+ size = struct_size(tdma, channels, cdata->nr_channels);
+
+ tdma = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
+ if (!tdma)
+ return -ENOMEM;
+
+ tdma->dev = &pdev->dev;
+ tdma->chip_data = cdata;
+ platform_set_drvdata(pdev, tdma);
+
+ tdma->base_addr = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(tdma->base_addr))
+ return PTR_ERR(tdma->base_addr);
+
+ tdma->dma_clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(tdma->dma_clk)) {
+ dev_err(&pdev->dev, "Error: Missing controller clock\n");
+ return PTR_ERR(tdma->dma_clk);
+ }
+
+ tdma->rst = devm_reset_control_get(&pdev->dev, "dma");
+ if (IS_ERR(tdma->rst)) {
+ dev_err(&pdev->dev, "Error: Missing reset\n");
+ return PTR_ERR(tdma->rst);
+ }
+
+ spin_lock_init(&tdma->global_lock);
+
+ ret = clk_prepare(tdma->dma_clk);
+ if (ret)
+ return ret;
+
+ ret = tegra_dma_init_hw(tdma);
+ if (ret)
+ goto err_clk_unprepare;
+
+ pm_runtime_irq_safe(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
+
+ INIT_LIST_HEAD(&tdma->dma_dev.channels);
+ for (i = 0; i < cdata->nr_channels; i++) {
+ struct tegra_dma_channel *tdc = &tdma->channels[i];
+ int irq;
+
+ tdc->chan_addr = tdma->base_addr +
+ TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET +
+ (i * cdata->channel_reg_size);
+
+ irq = platform_get_irq(pdev, i);
+ if (irq < 0) {
+ ret = irq;
+ goto err_pm_disable;
+ }
+
+ snprintf(tdc->name, sizeof(tdc->name), "apbdma.%d", i);
+ ret = devm_request_irq(&pdev->dev, irq, tegra_dma_isr, 0,
+ tdc->name, tdc);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "request_irq failed with err %d channel %d\n",
+ ret, i);
+ goto err_pm_disable;
+ }
+
+ tdc->dma_chan.device = &tdma->dma_dev;
+ dma_cookie_init(&tdc->dma_chan);
+ list_add_tail(&tdc->dma_chan.device_node,
+ &tdma->dma_dev.channels);
+ tdc->tdma = tdma;
+ tdc->id = i;
+ tdc->slave_id = TEGRA_APBDMA_SLAVE_ID_INVALID;
+
+ tasklet_setup(&tdc->tasklet, tegra_dma_tasklet);
+ spin_lock_init(&tdc->lock);
+ init_waitqueue_head(&tdc->wq);
+
+ INIT_LIST_HEAD(&tdc->pending_sg_req);
+ INIT_LIST_HEAD(&tdc->free_sg_req);
+ INIT_LIST_HEAD(&tdc->free_dma_desc);
+ INIT_LIST_HEAD(&tdc->cb_desc);
+ }
+
+ dma_cap_set(DMA_SLAVE, tdma->dma_dev.cap_mask);
+ dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask);
+ dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask);
+
+ tdma->global_pause_count = 0;
+ tdma->dma_dev.dev = &pdev->dev;
+ tdma->dma_dev.device_alloc_chan_resources =
+ tegra_dma_alloc_chan_resources;
+ tdma->dma_dev.device_free_chan_resources =
+ tegra_dma_free_chan_resources;
+ tdma->dma_dev.device_prep_slave_sg = tegra_dma_prep_slave_sg;
+ tdma->dma_dev.device_prep_dma_cyclic = tegra_dma_prep_dma_cyclic;
+ tdma->dma_dev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES);
+ tdma->dma_dev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES);
+ tdma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ tdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ tdma->dma_dev.device_config = tegra_dma_slave_config;
+ tdma->dma_dev.device_terminate_all = tegra_dma_terminate_all;
+ tdma->dma_dev.device_synchronize = tegra_dma_synchronize;
+ tdma->dma_dev.device_tx_status = tegra_dma_tx_status;
+ tdma->dma_dev.device_issue_pending = tegra_dma_issue_pending;
+
+ ret = dma_async_device_register(&tdma->dma_dev);
+ if (ret < 0) {
+ dev_err(&pdev->dev,
+ "Tegra20 APB DMA driver registration failed %d\n", ret);
+ goto err_pm_disable;
+ }
+
+ ret = of_dma_controller_register(pdev->dev.of_node,
+ tegra_dma_of_xlate, tdma);
+ if (ret < 0) {
+ dev_err(&pdev->dev,
+ "Tegra20 APB DMA OF registration failed %d\n", ret);
+ goto err_unregister_dma_dev;
+ }
+
+ dev_info(&pdev->dev, "Tegra20 APB DMA driver registered %u channels\n",
+ cdata->nr_channels);
+
+ return 0;
+
+err_unregister_dma_dev:
+ dma_async_device_unregister(&tdma->dma_dev);
+
+err_pm_disable:
+ pm_runtime_disable(&pdev->dev);
+
+err_clk_unprepare:
+ clk_unprepare(tdma->dma_clk);
+
+ return ret;
+}
+
+static int tegra_dma_remove(struct platform_device *pdev)
+{
+ struct tegra_dma *tdma = platform_get_drvdata(pdev);
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&tdma->dma_dev);
+ pm_runtime_disable(&pdev->dev);
+ clk_unprepare(tdma->dma_clk);
+
+ return 0;
+}
+
+static int __maybe_unused tegra_dma_runtime_suspend(struct device *dev)
+{
+ struct tegra_dma *tdma = dev_get_drvdata(dev);
+
+ clk_disable(tdma->dma_clk);
+
+ return 0;
+}
+
+static int __maybe_unused tegra_dma_runtime_resume(struct device *dev)
+{
+ struct tegra_dma *tdma = dev_get_drvdata(dev);
+
+ return clk_enable(tdma->dma_clk);
+}
+
+static int __maybe_unused tegra_dma_dev_suspend(struct device *dev)
+{
+ struct tegra_dma *tdma = dev_get_drvdata(dev);
+ unsigned long flags;
+ unsigned int i;
+ bool busy;
+
+ for (i = 0; i < tdma->chip_data->nr_channels; i++) {
+ struct tegra_dma_channel *tdc = &tdma->channels[i];
+
+ tasklet_kill(&tdc->tasklet);
+
+ spin_lock_irqsave(&tdc->lock, flags);
+ busy = tdc->busy;
+ spin_unlock_irqrestore(&tdc->lock, flags);
+
+ if (busy) {
+ dev_err(tdma->dev, "channel %u busy\n", i);
+ return -EBUSY;
+ }
+ }
+
+ return pm_runtime_force_suspend(dev);
+}
+
+static int __maybe_unused tegra_dma_dev_resume(struct device *dev)
+{
+ struct tegra_dma *tdma = dev_get_drvdata(dev);
+ int err;
+
+ err = tegra_dma_init_hw(tdma);
+ if (err)
+ return err;
+
+ return pm_runtime_force_resume(dev);
+}
+
+static const struct dev_pm_ops tegra_dma_dev_pm_ops = {
+ SET_RUNTIME_PM_OPS(tegra_dma_runtime_suspend, tegra_dma_runtime_resume,
+ NULL)
+ SET_SYSTEM_SLEEP_PM_OPS(tegra_dma_dev_suspend, tegra_dma_dev_resume)
+};
+
+static const struct of_device_id tegra_dma_of_match[] = {
+ {
+ .compatible = "nvidia,tegra148-apbdma",
+ .data = &tegra148_dma_chip_data,
+ }, {
+ .compatible = "nvidia,tegra114-apbdma",
+ .data = &tegra114_dma_chip_data,
+ }, {
+ .compatible = "nvidia,tegra30-apbdma",
+ .data = &tegra30_dma_chip_data,
+ }, {
+ .compatible = "nvidia,tegra20-apbdma",
+ .data = &tegra20_dma_chip_data,
+ }, {
+ },
+};
+MODULE_DEVICE_TABLE(of, tegra_dma_of_match);
+
+static struct platform_driver tegra_dmac_driver = {
+ .driver = {
+ .name = "tegra-apbdma",
+ .pm = &tegra_dma_dev_pm_ops,
+ .of_match_table = tegra_dma_of_match,
+ },
+ .probe = tegra_dma_probe,
+ .remove = tegra_dma_remove,
+};
+
+module_platform_driver(tegra_dmac_driver);
+
+MODULE_DESCRIPTION("NVIDIA Tegra APB DMA Controller driver");
+MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c
new file mode 100644
index 000000000..79da93cc7
--- /dev/null
+++ b/drivers/dma/tegra210-adma.c
@@ -0,0 +1,992 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ADMA driver for Nvidia's Tegra210 ADMA controller.
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ */
+
+#include <linux/clk.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/of_irq.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+#include "virt-dma.h"
+
+#define ADMA_CH_CMD 0x00
+#define ADMA_CH_STATUS 0x0c
+#define ADMA_CH_STATUS_XFER_EN BIT(0)
+#define ADMA_CH_STATUS_XFER_PAUSED BIT(1)
+
+#define ADMA_CH_INT_STATUS 0x10
+#define ADMA_CH_INT_STATUS_XFER_DONE BIT(0)
+
+#define ADMA_CH_INT_CLEAR 0x1c
+#define ADMA_CH_CTRL 0x24
+#define ADMA_CH_CTRL_DIR(val) (((val) & 0xf) << 12)
+#define ADMA_CH_CTRL_DIR_AHUB2MEM 2
+#define ADMA_CH_CTRL_DIR_MEM2AHUB 4
+#define ADMA_CH_CTRL_MODE_CONTINUOUS (2 << 8)
+#define ADMA_CH_CTRL_FLOWCTRL_EN BIT(1)
+#define ADMA_CH_CTRL_XFER_PAUSE_SHIFT 0
+
+#define ADMA_CH_CONFIG 0x28
+#define ADMA_CH_CONFIG_SRC_BUF(val) (((val) & 0x7) << 28)
+#define ADMA_CH_CONFIG_TRG_BUF(val) (((val) & 0x7) << 24)
+#define ADMA_CH_CONFIG_BURST_SIZE_SHIFT 20
+#define ADMA_CH_CONFIG_MAX_BURST_SIZE 16
+#define ADMA_CH_CONFIG_WEIGHT_FOR_WRR(val) ((val) & 0xf)
+#define ADMA_CH_CONFIG_MAX_BUFS 8
+#define TEGRA186_ADMA_CH_CONFIG_OUTSTANDING_REQS(reqs) (reqs << 4)
+
+#define ADMA_CH_FIFO_CTRL 0x2c
+#define ADMA_CH_TX_FIFO_SIZE_SHIFT 8
+#define ADMA_CH_RX_FIFO_SIZE_SHIFT 0
+
+#define ADMA_CH_LOWER_SRC_ADDR 0x34
+#define ADMA_CH_LOWER_TRG_ADDR 0x3c
+#define ADMA_CH_TC 0x44
+#define ADMA_CH_TC_COUNT_MASK 0x3ffffffc
+
+#define ADMA_CH_XFER_STATUS 0x54
+#define ADMA_CH_XFER_STATUS_COUNT_MASK 0xffff
+
+#define ADMA_GLOBAL_CMD 0x00
+#define ADMA_GLOBAL_SOFT_RESET 0x04
+
+#define TEGRA_ADMA_BURST_COMPLETE_TIME 20
+
+#define ADMA_CH_REG_FIELD_VAL(val, mask, shift) (((val) & mask) << shift)
+
+struct tegra_adma;
+
+/*
+ * struct tegra_adma_chip_data - Tegra chip specific data
+ * @adma_get_burst_config: Function callback used to set DMA burst size.
+ * @global_reg_offset: Register offset of DMA global register.
+ * @global_int_clear: Register offset of DMA global interrupt clear.
+ * @ch_req_tx_shift: Register offset for AHUB transmit channel select.
+ * @ch_req_rx_shift: Register offset for AHUB receive channel select.
+ * @ch_base_offset: Register offset of DMA channel registers.
+ * @ch_fifo_ctrl: Default value for channel FIFO CTRL register.
+ * @ch_req_mask: Mask for Tx or Rx channel select.
+ * @ch_req_max: Maximum number of Tx or Rx channels available.
+ * @ch_reg_size: Size of DMA channel register space.
+ * @nr_channels: Number of DMA channels available.
+ * @ch_fifo_size_mask: Mask for FIFO size field.
+ * @sreq_index_offset: Slave channel index offset.
+ * @has_outstanding_reqs: If DMA channel can have outstanding requests.
+ */
+struct tegra_adma_chip_data {
+ unsigned int (*adma_get_burst_config)(unsigned int burst_size);
+ unsigned int global_reg_offset;
+ unsigned int global_int_clear;
+ unsigned int ch_req_tx_shift;
+ unsigned int ch_req_rx_shift;
+ unsigned int ch_base_offset;
+ unsigned int ch_fifo_ctrl;
+ unsigned int ch_req_mask;
+ unsigned int ch_req_max;
+ unsigned int ch_reg_size;
+ unsigned int nr_channels;
+ unsigned int ch_fifo_size_mask;
+ unsigned int sreq_index_offset;
+ bool has_outstanding_reqs;
+};
+
+/*
+ * struct tegra_adma_chan_regs - Tegra ADMA channel registers
+ */
+struct tegra_adma_chan_regs {
+ unsigned int ctrl;
+ unsigned int config;
+ unsigned int src_addr;
+ unsigned int trg_addr;
+ unsigned int fifo_ctrl;
+ unsigned int cmd;
+ unsigned int tc;
+};
+
+/*
+ * struct tegra_adma_desc - Tegra ADMA descriptor to manage transfer requests.
+ */
+struct tegra_adma_desc {
+ struct virt_dma_desc vd;
+ struct tegra_adma_chan_regs ch_regs;
+ size_t buf_len;
+ size_t period_len;
+ size_t num_periods;
+};
+
+/*
+ * struct tegra_adma_chan - Tegra ADMA channel information
+ */
+struct tegra_adma_chan {
+ struct virt_dma_chan vc;
+ struct tegra_adma_desc *desc;
+ struct tegra_adma *tdma;
+ int irq;
+ void __iomem *chan_addr;
+
+ /* Slave channel configuration info */
+ struct dma_slave_config sconfig;
+ enum dma_transfer_direction sreq_dir;
+ unsigned int sreq_index;
+ bool sreq_reserved;
+ struct tegra_adma_chan_regs ch_regs;
+
+ /* Transfer count and position info */
+ unsigned int tx_buf_count;
+ unsigned int tx_buf_pos;
+};
+
+/*
+ * struct tegra_adma - Tegra ADMA controller information
+ */
+struct tegra_adma {
+ struct dma_device dma_dev;
+ struct device *dev;
+ void __iomem *base_addr;
+ struct clk *ahub_clk;
+ unsigned int nr_channels;
+ unsigned long rx_requests_reserved;
+ unsigned long tx_requests_reserved;
+
+ /* Used to store global command register state when suspending */
+ unsigned int global_cmd;
+
+ const struct tegra_adma_chip_data *cdata;
+
+ /* Last member of the structure */
+ struct tegra_adma_chan channels[];
+};
+
+static inline void tdma_write(struct tegra_adma *tdma, u32 reg, u32 val)
+{
+ writel(val, tdma->base_addr + tdma->cdata->global_reg_offset + reg);
+}
+
+static inline u32 tdma_read(struct tegra_adma *tdma, u32 reg)
+{
+ return readl(tdma->base_addr + tdma->cdata->global_reg_offset + reg);
+}
+
+static inline void tdma_ch_write(struct tegra_adma_chan *tdc, u32 reg, u32 val)
+{
+ writel(val, tdc->chan_addr + reg);
+}
+
+static inline u32 tdma_ch_read(struct tegra_adma_chan *tdc, u32 reg)
+{
+ return readl(tdc->chan_addr + reg);
+}
+
+static inline struct tegra_adma_chan *to_tegra_adma_chan(struct dma_chan *dc)
+{
+ return container_of(dc, struct tegra_adma_chan, vc.chan);
+}
+
+static inline struct tegra_adma_desc *to_tegra_adma_desc(
+ struct dma_async_tx_descriptor *td)
+{
+ return container_of(td, struct tegra_adma_desc, vd.tx);
+}
+
+static inline struct device *tdc2dev(struct tegra_adma_chan *tdc)
+{
+ return tdc->tdma->dev;
+}
+
+static void tegra_adma_desc_free(struct virt_dma_desc *vd)
+{
+ kfree(container_of(vd, struct tegra_adma_desc, vd));
+}
+
+static int tegra_adma_slave_config(struct dma_chan *dc,
+ struct dma_slave_config *sconfig)
+{
+ struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+
+ memcpy(&tdc->sconfig, sconfig, sizeof(*sconfig));
+
+ return 0;
+}
+
+static int tegra_adma_init(struct tegra_adma *tdma)
+{
+ u32 status;
+ int ret;
+
+ /* Clear any interrupts */
+ tdma_write(tdma, tdma->cdata->ch_base_offset + tdma->cdata->global_int_clear, 0x1);
+
+ /* Assert soft reset */
+ tdma_write(tdma, ADMA_GLOBAL_SOFT_RESET, 0x1);
+
+ /* Wait for reset to clear */
+ ret = readx_poll_timeout(readl,
+ tdma->base_addr +
+ tdma->cdata->global_reg_offset +
+ ADMA_GLOBAL_SOFT_RESET,
+ status, status == 0, 20, 10000);
+ if (ret)
+ return ret;
+
+ /* Enable global ADMA registers */
+ tdma_write(tdma, ADMA_GLOBAL_CMD, 1);
+
+ return 0;
+}
+
+static int tegra_adma_request_alloc(struct tegra_adma_chan *tdc,
+ enum dma_transfer_direction direction)
+{
+ struct tegra_adma *tdma = tdc->tdma;
+ unsigned int sreq_index = tdc->sreq_index;
+
+ if (tdc->sreq_reserved)
+ return tdc->sreq_dir == direction ? 0 : -EINVAL;
+
+ if (sreq_index > tdma->cdata->ch_req_max) {
+ dev_err(tdma->dev, "invalid DMA request\n");
+ return -EINVAL;
+ }
+
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+ if (test_and_set_bit(sreq_index, &tdma->tx_requests_reserved)) {
+ dev_err(tdma->dev, "DMA request reserved\n");
+ return -EINVAL;
+ }
+ break;
+
+ case DMA_DEV_TO_MEM:
+ if (test_and_set_bit(sreq_index, &tdma->rx_requests_reserved)) {
+ dev_err(tdma->dev, "DMA request reserved\n");
+ return -EINVAL;
+ }
+ break;
+
+ default:
+ dev_WARN(tdma->dev, "channel %s has invalid transfer type\n",
+ dma_chan_name(&tdc->vc.chan));
+ return -EINVAL;
+ }
+
+ tdc->sreq_dir = direction;
+ tdc->sreq_reserved = true;
+
+ return 0;
+}
+
+static void tegra_adma_request_free(struct tegra_adma_chan *tdc)
+{
+ struct tegra_adma *tdma = tdc->tdma;
+
+ if (!tdc->sreq_reserved)
+ return;
+
+ switch (tdc->sreq_dir) {
+ case DMA_MEM_TO_DEV:
+ clear_bit(tdc->sreq_index, &tdma->tx_requests_reserved);
+ break;
+
+ case DMA_DEV_TO_MEM:
+ clear_bit(tdc->sreq_index, &tdma->rx_requests_reserved);
+ break;
+
+ default:
+ dev_WARN(tdma->dev, "channel %s has invalid transfer type\n",
+ dma_chan_name(&tdc->vc.chan));
+ return;
+ }
+
+ tdc->sreq_reserved = false;
+}
+
+static u32 tegra_adma_irq_status(struct tegra_adma_chan *tdc)
+{
+ u32 status = tdma_ch_read(tdc, ADMA_CH_INT_STATUS);
+
+ return status & ADMA_CH_INT_STATUS_XFER_DONE;
+}
+
+static u32 tegra_adma_irq_clear(struct tegra_adma_chan *tdc)
+{
+ u32 status = tegra_adma_irq_status(tdc);
+
+ if (status)
+ tdma_ch_write(tdc, ADMA_CH_INT_CLEAR, status);
+
+ return status;
+}
+
+static void tegra_adma_stop(struct tegra_adma_chan *tdc)
+{
+ unsigned int status;
+
+ /* Disable ADMA */
+ tdma_ch_write(tdc, ADMA_CH_CMD, 0);
+
+ /* Clear interrupt status */
+ tegra_adma_irq_clear(tdc);
+
+ if (readx_poll_timeout_atomic(readl, tdc->chan_addr + ADMA_CH_STATUS,
+ status, !(status & ADMA_CH_STATUS_XFER_EN),
+ 20, 10000)) {
+ dev_err(tdc2dev(tdc), "unable to stop DMA channel\n");
+ return;
+ }
+
+ kfree(tdc->desc);
+ tdc->desc = NULL;
+}
+
+static void tegra_adma_start(struct tegra_adma_chan *tdc)
+{
+ struct virt_dma_desc *vd = vchan_next_desc(&tdc->vc);
+ struct tegra_adma_chan_regs *ch_regs;
+ struct tegra_adma_desc *desc;
+
+ if (!vd)
+ return;
+
+ list_del(&vd->node);
+
+ desc = to_tegra_adma_desc(&vd->tx);
+
+ if (!desc) {
+ dev_warn(tdc2dev(tdc), "unable to start DMA, no descriptor\n");
+ return;
+ }
+
+ ch_regs = &desc->ch_regs;
+
+ tdc->tx_buf_pos = 0;
+ tdc->tx_buf_count = 0;
+ tdma_ch_write(tdc, ADMA_CH_TC, ch_regs->tc);
+ tdma_ch_write(tdc, ADMA_CH_CTRL, ch_regs->ctrl);
+ tdma_ch_write(tdc, ADMA_CH_LOWER_SRC_ADDR, ch_regs->src_addr);
+ tdma_ch_write(tdc, ADMA_CH_LOWER_TRG_ADDR, ch_regs->trg_addr);
+ tdma_ch_write(tdc, ADMA_CH_FIFO_CTRL, ch_regs->fifo_ctrl);
+ tdma_ch_write(tdc, ADMA_CH_CONFIG, ch_regs->config);
+
+ /* Start ADMA */
+ tdma_ch_write(tdc, ADMA_CH_CMD, 1);
+
+ tdc->desc = desc;
+}
+
+static unsigned int tegra_adma_get_residue(struct tegra_adma_chan *tdc)
+{
+ struct tegra_adma_desc *desc = tdc->desc;
+ unsigned int max = ADMA_CH_XFER_STATUS_COUNT_MASK + 1;
+ unsigned int pos = tdma_ch_read(tdc, ADMA_CH_XFER_STATUS);
+ unsigned int periods_remaining;
+
+ /*
+ * Handle wrap around of buffer count register
+ */
+ if (pos < tdc->tx_buf_pos)
+ tdc->tx_buf_count += pos + (max - tdc->tx_buf_pos);
+ else
+ tdc->tx_buf_count += pos - tdc->tx_buf_pos;
+
+ periods_remaining = tdc->tx_buf_count % desc->num_periods;
+ tdc->tx_buf_pos = pos;
+
+ return desc->buf_len - (periods_remaining * desc->period_len);
+}
+
+static irqreturn_t tegra_adma_isr(int irq, void *dev_id)
+{
+ struct tegra_adma_chan *tdc = dev_id;
+ unsigned long status;
+
+ spin_lock(&tdc->vc.lock);
+
+ status = tegra_adma_irq_clear(tdc);
+ if (status == 0 || !tdc->desc) {
+ spin_unlock(&tdc->vc.lock);
+ return IRQ_NONE;
+ }
+
+ vchan_cyclic_callback(&tdc->desc->vd);
+
+ spin_unlock(&tdc->vc.lock);
+
+ return IRQ_HANDLED;
+}
+
+static void tegra_adma_issue_pending(struct dma_chan *dc)
+{
+ struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+ unsigned long flags;
+
+ spin_lock_irqsave(&tdc->vc.lock, flags);
+
+ if (vchan_issue_pending(&tdc->vc)) {
+ if (!tdc->desc)
+ tegra_adma_start(tdc);
+ }
+
+ spin_unlock_irqrestore(&tdc->vc.lock, flags);
+}
+
+static bool tegra_adma_is_paused(struct tegra_adma_chan *tdc)
+{
+ u32 csts;
+
+ csts = tdma_ch_read(tdc, ADMA_CH_STATUS);
+ csts &= ADMA_CH_STATUS_XFER_PAUSED;
+
+ return csts ? true : false;
+}
+
+static int tegra_adma_pause(struct dma_chan *dc)
+{
+ struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+ struct tegra_adma_desc *desc = tdc->desc;
+ struct tegra_adma_chan_regs *ch_regs = &desc->ch_regs;
+ int dcnt = 10;
+
+ ch_regs->ctrl = tdma_ch_read(tdc, ADMA_CH_CTRL);
+ ch_regs->ctrl |= (1 << ADMA_CH_CTRL_XFER_PAUSE_SHIFT);
+ tdma_ch_write(tdc, ADMA_CH_CTRL, ch_regs->ctrl);
+
+ while (dcnt-- && !tegra_adma_is_paused(tdc))
+ udelay(TEGRA_ADMA_BURST_COMPLETE_TIME);
+
+ if (dcnt < 0) {
+ dev_err(tdc2dev(tdc), "unable to pause DMA channel\n");
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int tegra_adma_resume(struct dma_chan *dc)
+{
+ struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+ struct tegra_adma_desc *desc = tdc->desc;
+ struct tegra_adma_chan_regs *ch_regs = &desc->ch_regs;
+
+ ch_regs->ctrl = tdma_ch_read(tdc, ADMA_CH_CTRL);
+ ch_regs->ctrl &= ~(1 << ADMA_CH_CTRL_XFER_PAUSE_SHIFT);
+ tdma_ch_write(tdc, ADMA_CH_CTRL, ch_regs->ctrl);
+
+ return 0;
+}
+
+static int tegra_adma_terminate_all(struct dma_chan *dc)
+{
+ struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&tdc->vc.lock, flags);
+
+ if (tdc->desc)
+ tegra_adma_stop(tdc);
+
+ tegra_adma_request_free(tdc);
+ vchan_get_all_descriptors(&tdc->vc, &head);
+ spin_unlock_irqrestore(&tdc->vc.lock, flags);
+ vchan_dma_desc_free_list(&tdc->vc, &head);
+
+ return 0;
+}
+
+static enum dma_status tegra_adma_tx_status(struct dma_chan *dc,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+ struct tegra_adma_desc *desc;
+ struct virt_dma_desc *vd;
+ enum dma_status ret;
+ unsigned long flags;
+ unsigned int residual;
+
+ ret = dma_cookie_status(dc, cookie, txstate);
+ if (ret == DMA_COMPLETE || !txstate)
+ return ret;
+
+ spin_lock_irqsave(&tdc->vc.lock, flags);
+
+ vd = vchan_find_desc(&tdc->vc, cookie);
+ if (vd) {
+ desc = to_tegra_adma_desc(&vd->tx);
+ residual = desc->ch_regs.tc;
+ } else if (tdc->desc && tdc->desc->vd.tx.cookie == cookie) {
+ residual = tegra_adma_get_residue(tdc);
+ } else {
+ residual = 0;
+ }
+
+ spin_unlock_irqrestore(&tdc->vc.lock, flags);
+
+ dma_set_residue(txstate, residual);
+
+ return ret;
+}
+
+static unsigned int tegra210_adma_get_burst_config(unsigned int burst_size)
+{
+ if (!burst_size || burst_size > ADMA_CH_CONFIG_MAX_BURST_SIZE)
+ burst_size = ADMA_CH_CONFIG_MAX_BURST_SIZE;
+
+ return fls(burst_size) << ADMA_CH_CONFIG_BURST_SIZE_SHIFT;
+}
+
+static unsigned int tegra186_adma_get_burst_config(unsigned int burst_size)
+{
+ if (!burst_size || burst_size > ADMA_CH_CONFIG_MAX_BURST_SIZE)
+ burst_size = ADMA_CH_CONFIG_MAX_BURST_SIZE;
+
+ return (burst_size - 1) << ADMA_CH_CONFIG_BURST_SIZE_SHIFT;
+}
+
+static int tegra_adma_set_xfer_params(struct tegra_adma_chan *tdc,
+ struct tegra_adma_desc *desc,
+ dma_addr_t buf_addr,
+ enum dma_transfer_direction direction)
+{
+ struct tegra_adma_chan_regs *ch_regs = &desc->ch_regs;
+ const struct tegra_adma_chip_data *cdata = tdc->tdma->cdata;
+ unsigned int burst_size, adma_dir, fifo_size_shift;
+
+ if (desc->num_periods > ADMA_CH_CONFIG_MAX_BUFS)
+ return -EINVAL;
+
+ switch (direction) {
+ case DMA_MEM_TO_DEV:
+ fifo_size_shift = ADMA_CH_TX_FIFO_SIZE_SHIFT;
+ adma_dir = ADMA_CH_CTRL_DIR_MEM2AHUB;
+ burst_size = tdc->sconfig.dst_maxburst;
+ ch_regs->config = ADMA_CH_CONFIG_SRC_BUF(desc->num_periods - 1);
+ ch_regs->ctrl = ADMA_CH_REG_FIELD_VAL(tdc->sreq_index,
+ cdata->ch_req_mask,
+ cdata->ch_req_tx_shift);
+ ch_regs->src_addr = buf_addr;
+ break;
+
+ case DMA_DEV_TO_MEM:
+ fifo_size_shift = ADMA_CH_RX_FIFO_SIZE_SHIFT;
+ adma_dir = ADMA_CH_CTRL_DIR_AHUB2MEM;
+ burst_size = tdc->sconfig.src_maxburst;
+ ch_regs->config = ADMA_CH_CONFIG_TRG_BUF(desc->num_periods - 1);
+ ch_regs->ctrl = ADMA_CH_REG_FIELD_VAL(tdc->sreq_index,
+ cdata->ch_req_mask,
+ cdata->ch_req_rx_shift);
+ ch_regs->trg_addr = buf_addr;
+ break;
+
+ default:
+ dev_err(tdc2dev(tdc), "DMA direction is not supported\n");
+ return -EINVAL;
+ }
+
+ ch_regs->ctrl |= ADMA_CH_CTRL_DIR(adma_dir) |
+ ADMA_CH_CTRL_MODE_CONTINUOUS |
+ ADMA_CH_CTRL_FLOWCTRL_EN;
+ ch_regs->config |= cdata->adma_get_burst_config(burst_size);
+ ch_regs->config |= ADMA_CH_CONFIG_WEIGHT_FOR_WRR(1);
+ if (cdata->has_outstanding_reqs)
+ ch_regs->config |= TEGRA186_ADMA_CH_CONFIG_OUTSTANDING_REQS(8);
+
+ /*
+ * 'sreq_index' represents the current ADMAIF channel number and as per
+ * HW recommendation its FIFO size should match with the corresponding
+ * ADMA channel.
+ *
+ * ADMA FIFO size is set as per below (based on default ADMAIF channel
+ * FIFO sizes):
+ * fifo_size = 0x2 (sreq_index > sreq_index_offset)
+ * fifo_size = 0x3 (sreq_index <= sreq_index_offset)
+ *
+ */
+ if (tdc->sreq_index > cdata->sreq_index_offset)
+ ch_regs->fifo_ctrl =
+ ADMA_CH_REG_FIELD_VAL(2, cdata->ch_fifo_size_mask,
+ fifo_size_shift);
+ else
+ ch_regs->fifo_ctrl =
+ ADMA_CH_REG_FIELD_VAL(3, cdata->ch_fifo_size_mask,
+ fifo_size_shift);
+
+ ch_regs->tc = desc->period_len & ADMA_CH_TC_COUNT_MASK;
+
+ return tegra_adma_request_alloc(tdc, direction);
+}
+
+static struct dma_async_tx_descriptor *tegra_adma_prep_dma_cyclic(
+ struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+ struct tegra_adma_desc *desc = NULL;
+
+ if (!buf_len || !period_len || period_len > ADMA_CH_TC_COUNT_MASK) {
+ dev_err(tdc2dev(tdc), "invalid buffer/period len\n");
+ return NULL;
+ }
+
+ if (buf_len % period_len) {
+ dev_err(tdc2dev(tdc), "buf_len not a multiple of period_len\n");
+ return NULL;
+ }
+
+ if (!IS_ALIGNED(buf_addr, 4)) {
+ dev_err(tdc2dev(tdc), "invalid buffer alignment\n");
+ return NULL;
+ }
+
+ desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ desc->buf_len = buf_len;
+ desc->period_len = period_len;
+ desc->num_periods = buf_len / period_len;
+
+ if (tegra_adma_set_xfer_params(tdc, desc, buf_addr, direction)) {
+ kfree(desc);
+ return NULL;
+ }
+
+ return vchan_tx_prep(&tdc->vc, &desc->vd, flags);
+}
+
+static int tegra_adma_alloc_chan_resources(struct dma_chan *dc)
+{
+ struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+ int ret;
+
+ ret = request_irq(tdc->irq, tegra_adma_isr, 0, dma_chan_name(dc), tdc);
+ if (ret) {
+ dev_err(tdc2dev(tdc), "failed to get interrupt for %s\n",
+ dma_chan_name(dc));
+ return ret;
+ }
+
+ ret = pm_runtime_resume_and_get(tdc2dev(tdc));
+ if (ret < 0) {
+ free_irq(tdc->irq, tdc);
+ return ret;
+ }
+
+ dma_cookie_init(&tdc->vc.chan);
+
+ return 0;
+}
+
+static void tegra_adma_free_chan_resources(struct dma_chan *dc)
+{
+ struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+
+ tegra_adma_terminate_all(dc);
+ vchan_free_chan_resources(&tdc->vc);
+ tasklet_kill(&tdc->vc.task);
+ free_irq(tdc->irq, tdc);
+ pm_runtime_put(tdc2dev(tdc));
+
+ tdc->sreq_index = 0;
+ tdc->sreq_dir = DMA_TRANS_NONE;
+}
+
+static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct tegra_adma *tdma = ofdma->of_dma_data;
+ struct tegra_adma_chan *tdc;
+ struct dma_chan *chan;
+ unsigned int sreq_index;
+
+ if (dma_spec->args_count != 1)
+ return NULL;
+
+ sreq_index = dma_spec->args[0];
+
+ if (sreq_index == 0) {
+ dev_err(tdma->dev, "DMA request must not be 0\n");
+ return NULL;
+ }
+
+ chan = dma_get_any_slave_channel(&tdma->dma_dev);
+ if (!chan)
+ return NULL;
+
+ tdc = to_tegra_adma_chan(chan);
+ tdc->sreq_index = sreq_index;
+
+ return chan;
+}
+
+static int __maybe_unused tegra_adma_runtime_suspend(struct device *dev)
+{
+ struct tegra_adma *tdma = dev_get_drvdata(dev);
+ struct tegra_adma_chan_regs *ch_reg;
+ struct tegra_adma_chan *tdc;
+ int i;
+
+ tdma->global_cmd = tdma_read(tdma, ADMA_GLOBAL_CMD);
+ if (!tdma->global_cmd)
+ goto clk_disable;
+
+ for (i = 0; i < tdma->nr_channels; i++) {
+ tdc = &tdma->channels[i];
+ ch_reg = &tdc->ch_regs;
+ ch_reg->cmd = tdma_ch_read(tdc, ADMA_CH_CMD);
+ /* skip if channel is not active */
+ if (!ch_reg->cmd)
+ continue;
+ ch_reg->tc = tdma_ch_read(tdc, ADMA_CH_TC);
+ ch_reg->src_addr = tdma_ch_read(tdc, ADMA_CH_LOWER_SRC_ADDR);
+ ch_reg->trg_addr = tdma_ch_read(tdc, ADMA_CH_LOWER_TRG_ADDR);
+ ch_reg->ctrl = tdma_ch_read(tdc, ADMA_CH_CTRL);
+ ch_reg->fifo_ctrl = tdma_ch_read(tdc, ADMA_CH_FIFO_CTRL);
+ ch_reg->config = tdma_ch_read(tdc, ADMA_CH_CONFIG);
+ }
+
+clk_disable:
+ clk_disable_unprepare(tdma->ahub_clk);
+
+ return 0;
+}
+
+static int __maybe_unused tegra_adma_runtime_resume(struct device *dev)
+{
+ struct tegra_adma *tdma = dev_get_drvdata(dev);
+ struct tegra_adma_chan_regs *ch_reg;
+ struct tegra_adma_chan *tdc;
+ int ret, i;
+
+ ret = clk_prepare_enable(tdma->ahub_clk);
+ if (ret) {
+ dev_err(dev, "ahub clk_enable failed: %d\n", ret);
+ return ret;
+ }
+ tdma_write(tdma, ADMA_GLOBAL_CMD, tdma->global_cmd);
+
+ if (!tdma->global_cmd)
+ return 0;
+
+ for (i = 0; i < tdma->nr_channels; i++) {
+ tdc = &tdma->channels[i];
+ ch_reg = &tdc->ch_regs;
+ /* skip if channel was not active earlier */
+ if (!ch_reg->cmd)
+ continue;
+ tdma_ch_write(tdc, ADMA_CH_TC, ch_reg->tc);
+ tdma_ch_write(tdc, ADMA_CH_LOWER_SRC_ADDR, ch_reg->src_addr);
+ tdma_ch_write(tdc, ADMA_CH_LOWER_TRG_ADDR, ch_reg->trg_addr);
+ tdma_ch_write(tdc, ADMA_CH_CTRL, ch_reg->ctrl);
+ tdma_ch_write(tdc, ADMA_CH_FIFO_CTRL, ch_reg->fifo_ctrl);
+ tdma_ch_write(tdc, ADMA_CH_CONFIG, ch_reg->config);
+ tdma_ch_write(tdc, ADMA_CH_CMD, ch_reg->cmd);
+ }
+
+ return 0;
+}
+
+static const struct tegra_adma_chip_data tegra210_chip_data = {
+ .adma_get_burst_config = tegra210_adma_get_burst_config,
+ .global_reg_offset = 0xc00,
+ .global_int_clear = 0x20,
+ .ch_req_tx_shift = 28,
+ .ch_req_rx_shift = 24,
+ .ch_base_offset = 0,
+ .ch_req_mask = 0xf,
+ .ch_req_max = 10,
+ .ch_reg_size = 0x80,
+ .nr_channels = 22,
+ .ch_fifo_size_mask = 0xf,
+ .sreq_index_offset = 2,
+ .has_outstanding_reqs = false,
+};
+
+static const struct tegra_adma_chip_data tegra186_chip_data = {
+ .adma_get_burst_config = tegra186_adma_get_burst_config,
+ .global_reg_offset = 0,
+ .global_int_clear = 0x402c,
+ .ch_req_tx_shift = 27,
+ .ch_req_rx_shift = 22,
+ .ch_base_offset = 0x10000,
+ .ch_req_mask = 0x1f,
+ .ch_req_max = 20,
+ .ch_reg_size = 0x100,
+ .nr_channels = 32,
+ .ch_fifo_size_mask = 0x1f,
+ .sreq_index_offset = 4,
+ .has_outstanding_reqs = true,
+};
+
+static const struct of_device_id tegra_adma_of_match[] = {
+ { .compatible = "nvidia,tegra210-adma", .data = &tegra210_chip_data },
+ { .compatible = "nvidia,tegra186-adma", .data = &tegra186_chip_data },
+ { },
+};
+MODULE_DEVICE_TABLE(of, tegra_adma_of_match);
+
+static int tegra_adma_probe(struct platform_device *pdev)
+{
+ const struct tegra_adma_chip_data *cdata;
+ struct tegra_adma *tdma;
+ struct resource *res;
+ int ret, i;
+
+ cdata = of_device_get_match_data(&pdev->dev);
+ if (!cdata) {
+ dev_err(&pdev->dev, "device match data not found\n");
+ return -ENODEV;
+ }
+
+ tdma = devm_kzalloc(&pdev->dev,
+ struct_size(tdma, channels, cdata->nr_channels),
+ GFP_KERNEL);
+ if (!tdma)
+ return -ENOMEM;
+
+ tdma->dev = &pdev->dev;
+ tdma->cdata = cdata;
+ tdma->nr_channels = cdata->nr_channels;
+ platform_set_drvdata(pdev, tdma);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ tdma->base_addr = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(tdma->base_addr))
+ return PTR_ERR(tdma->base_addr);
+
+ tdma->ahub_clk = devm_clk_get(&pdev->dev, "d_audio");
+ if (IS_ERR(tdma->ahub_clk)) {
+ dev_err(&pdev->dev, "Error: Missing ahub controller clock\n");
+ return PTR_ERR(tdma->ahub_clk);
+ }
+
+ INIT_LIST_HEAD(&tdma->dma_dev.channels);
+ for (i = 0; i < tdma->nr_channels; i++) {
+ struct tegra_adma_chan *tdc = &tdma->channels[i];
+
+ tdc->chan_addr = tdma->base_addr + cdata->ch_base_offset
+ + (cdata->ch_reg_size * i);
+
+ tdc->irq = of_irq_get(pdev->dev.of_node, i);
+ if (tdc->irq <= 0) {
+ ret = tdc->irq ?: -ENXIO;
+ goto irq_dispose;
+ }
+
+ vchan_init(&tdc->vc, &tdma->dma_dev);
+ tdc->vc.desc_free = tegra_adma_desc_free;
+ tdc->tdma = tdma;
+ }
+
+ pm_runtime_enable(&pdev->dev);
+
+ ret = pm_runtime_resume_and_get(&pdev->dev);
+ if (ret < 0)
+ goto rpm_disable;
+
+ ret = tegra_adma_init(tdma);
+ if (ret)
+ goto rpm_put;
+
+ dma_cap_set(DMA_SLAVE, tdma->dma_dev.cap_mask);
+ dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask);
+ dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask);
+
+ tdma->dma_dev.dev = &pdev->dev;
+ tdma->dma_dev.device_alloc_chan_resources =
+ tegra_adma_alloc_chan_resources;
+ tdma->dma_dev.device_free_chan_resources =
+ tegra_adma_free_chan_resources;
+ tdma->dma_dev.device_issue_pending = tegra_adma_issue_pending;
+ tdma->dma_dev.device_prep_dma_cyclic = tegra_adma_prep_dma_cyclic;
+ tdma->dma_dev.device_config = tegra_adma_slave_config;
+ tdma->dma_dev.device_tx_status = tegra_adma_tx_status;
+ tdma->dma_dev.device_terminate_all = tegra_adma_terminate_all;
+ tdma->dma_dev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ tdma->dma_dev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+ tdma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ tdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+ tdma->dma_dev.device_pause = tegra_adma_pause;
+ tdma->dma_dev.device_resume = tegra_adma_resume;
+
+ ret = dma_async_device_register(&tdma->dma_dev);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "ADMA registration failed: %d\n", ret);
+ goto rpm_put;
+ }
+
+ ret = of_dma_controller_register(pdev->dev.of_node,
+ tegra_dma_of_xlate, tdma);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "ADMA OF registration failed %d\n", ret);
+ goto dma_remove;
+ }
+
+ pm_runtime_put(&pdev->dev);
+
+ dev_info(&pdev->dev, "Tegra210 ADMA driver registered %d channels\n",
+ tdma->nr_channels);
+
+ return 0;
+
+dma_remove:
+ dma_async_device_unregister(&tdma->dma_dev);
+rpm_put:
+ pm_runtime_put_sync(&pdev->dev);
+rpm_disable:
+ pm_runtime_disable(&pdev->dev);
+irq_dispose:
+ while (--i >= 0)
+ irq_dispose_mapping(tdma->channels[i].irq);
+
+ return ret;
+}
+
+static int tegra_adma_remove(struct platform_device *pdev)
+{
+ struct tegra_adma *tdma = platform_get_drvdata(pdev);
+ int i;
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&tdma->dma_dev);
+
+ for (i = 0; i < tdma->nr_channels; ++i)
+ irq_dispose_mapping(tdma->channels[i].irq);
+
+ pm_runtime_disable(&pdev->dev);
+
+ return 0;
+}
+
+static const struct dev_pm_ops tegra_adma_dev_pm_ops = {
+ SET_RUNTIME_PM_OPS(tegra_adma_runtime_suspend,
+ tegra_adma_runtime_resume, NULL)
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
+};
+
+static struct platform_driver tegra_admac_driver = {
+ .driver = {
+ .name = "tegra-adma",
+ .pm = &tegra_adma_dev_pm_ops,
+ .of_match_table = tegra_adma_of_match,
+ },
+ .probe = tegra_adma_probe,
+ .remove = tegra_adma_remove,
+};
+
+module_platform_driver(tegra_admac_driver);
+
+MODULE_ALIAS("platform:tegra210-adma");
+MODULE_DESCRIPTION("NVIDIA Tegra ADMA driver");
+MODULE_AUTHOR("Dara Ramesh <dramesh@nvidia.com>");
+MODULE_AUTHOR("Jon Hunter <jonathanh@nvidia.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/ti/Kconfig b/drivers/dma/ti/Kconfig
new file mode 100644
index 000000000..79618fac1
--- /dev/null
+++ b/drivers/dma/ti/Kconfig
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Texas Instruments DMA drivers
+#
+
+config TI_CPPI41
+ tristate "Texas Instruments CPPI 4.1 DMA support"
+ depends on (ARCH_OMAP || ARCH_DAVINCI_DA8XX)
+ select DMA_ENGINE
+ help
+ The Communications Port Programming Interface (CPPI) 4.1 DMA engine
+ is currently used by the USB driver on AM335x and DA8xx platforms.
+
+config TI_EDMA
+ tristate "Texas Instruments EDMA support"
+ depends on ARCH_DAVINCI || ARCH_OMAP || ARCH_KEYSTONE || COMPILE_TEST
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ select TI_DMA_CROSSBAR if (ARCH_OMAP || COMPILE_TEST)
+ default y
+ help
+ Enable support for the TI EDMA (Enhanced DMA) controller. This DMA
+ engine is found on TI DaVinci, AM33xx, AM43xx, DRA7xx and Keystone 2
+ parts.
+
+config DMA_OMAP
+ tristate "Texas Instruments sDMA (omap-dma) support"
+ depends on ARCH_OMAP || COMPILE_TEST
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ select TI_DMA_CROSSBAR if (SOC_DRA7XX || COMPILE_TEST)
+ default y
+ help
+ Enable support for the TI sDMA (System DMA or DMA4) controller. This
+ DMA engine is found on OMAP and DRA7xx parts.
+
+config TI_K3_UDMA
+ bool "Texas Instruments UDMA support"
+ depends on ARCH_K3
+ depends on TI_SCI_PROTOCOL
+ depends on TI_SCI_INTA_IRQCHIP
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ select TI_K3_RINGACC
+ select TI_K3_PSIL
+ help
+ Enable support for the TI UDMA (Unified DMA) controller. This
+ DMA engine is used in AM65x and j721e.
+
+config TI_K3_UDMA_GLUE_LAYER
+ bool "Texas Instruments UDMA Glue layer for non DMAengine users"
+ depends on ARCH_K3
+ depends on TI_K3_UDMA
+ help
+ Say y here to support the K3 NAVSS DMA glue interface
+ If unsure, say N.
+
+config TI_K3_PSIL
+ bool
+
+config TI_DMA_CROSSBAR
+ bool
diff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile
new file mode 100644
index 000000000..d3a303f0d
--- /dev/null
+++ b/drivers/dma/ti/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_TI_CPPI41) += cppi41.o
+obj-$(CONFIG_TI_EDMA) += edma.o
+obj-$(CONFIG_DMA_OMAP) += omap-dma.o
+obj-$(CONFIG_TI_K3_UDMA) += k3-udma.o
+obj-$(CONFIG_TI_K3_UDMA_GLUE_LAYER) += k3-udma-glue.o
+obj-$(CONFIG_TI_K3_PSIL) += k3-psil.o \
+ k3-psil-am654.o \
+ k3-psil-j721e.o \
+ k3-psil-j7200.o \
+ k3-psil-am64.o \
+ k3-psil-j721s2.o \
+ k3-psil-am62.o
+obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o
diff --git a/drivers/dma/ti/cppi41.c b/drivers/dma/ti/cppi41.c
new file mode 100644
index 000000000..695915dba
--- /dev/null
+++ b/drivers/dma/ti/cppi41.c
@@ -0,0 +1,1261 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/of_dma.h>
+#include <linux/of_irq.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/of_address.h>
+#include <linux/pm_runtime.h>
+#include "../dmaengine.h"
+
+#define DESC_TYPE 27
+#define DESC_TYPE_HOST 0x10
+#define DESC_TYPE_TEARD 0x13
+
+#define TD_DESC_IS_RX (1 << 16)
+#define TD_DESC_DMA_NUM 10
+
+#define DESC_LENGTH_BITS_NUM 21
+
+#define DESC_TYPE_USB (5 << 26)
+#define DESC_PD_COMPLETE (1 << 31)
+
+/* DMA engine */
+#define DMA_TDFDQ 4
+#define DMA_TXGCR(x) (0x800 + (x) * 0x20)
+#define DMA_RXGCR(x) (0x808 + (x) * 0x20)
+#define RXHPCRA0 4
+
+#define GCR_CHAN_ENABLE (1 << 31)
+#define GCR_TEARDOWN (1 << 30)
+#define GCR_STARV_RETRY (1 << 24)
+#define GCR_DESC_TYPE_HOST (1 << 14)
+
+/* DMA scheduler */
+#define DMA_SCHED_CTRL 0
+#define DMA_SCHED_CTRL_EN (1 << 31)
+#define DMA_SCHED_WORD(x) ((x) * 4 + 0x800)
+
+#define SCHED_ENTRY0_CHAN(x) ((x) << 0)
+#define SCHED_ENTRY0_IS_RX (1 << 7)
+
+#define SCHED_ENTRY1_CHAN(x) ((x) << 8)
+#define SCHED_ENTRY1_IS_RX (1 << 15)
+
+#define SCHED_ENTRY2_CHAN(x) ((x) << 16)
+#define SCHED_ENTRY2_IS_RX (1 << 23)
+
+#define SCHED_ENTRY3_CHAN(x) ((x) << 24)
+#define SCHED_ENTRY3_IS_RX (1 << 31)
+
+/* Queue manager */
+/* 4 KiB of memory for descriptors, 2 for each endpoint */
+#define ALLOC_DECS_NUM 128
+#define DESCS_AREAS 1
+#define TOTAL_DESCS_NUM (ALLOC_DECS_NUM * DESCS_AREAS)
+#define QMGR_SCRATCH_SIZE (TOTAL_DESCS_NUM * 4)
+
+#define QMGR_LRAM0_BASE 0x80
+#define QMGR_LRAM_SIZE 0x84
+#define QMGR_LRAM1_BASE 0x88
+#define QMGR_MEMBASE(x) (0x1000 + (x) * 0x10)
+#define QMGR_MEMCTRL(x) (0x1004 + (x) * 0x10)
+#define QMGR_MEMCTRL_IDX_SH 16
+#define QMGR_MEMCTRL_DESC_SH 8
+
+#define QMGR_PEND(x) (0x90 + (x) * 4)
+
+#define QMGR_PENDING_SLOT_Q(x) (x / 32)
+#define QMGR_PENDING_BIT_Q(x) (x % 32)
+
+#define QMGR_QUEUE_A(n) (0x2000 + (n) * 0x10)
+#define QMGR_QUEUE_B(n) (0x2004 + (n) * 0x10)
+#define QMGR_QUEUE_C(n) (0x2008 + (n) * 0x10)
+#define QMGR_QUEUE_D(n) (0x200c + (n) * 0x10)
+
+/* Packet Descriptor */
+#define PD2_ZERO_LENGTH (1 << 19)
+
+struct cppi41_channel {
+ struct dma_chan chan;
+ struct dma_async_tx_descriptor txd;
+ struct cppi41_dd *cdd;
+ struct cppi41_desc *desc;
+ dma_addr_t desc_phys;
+ void __iomem *gcr_reg;
+ int is_tx;
+ u32 residue;
+
+ unsigned int q_num;
+ unsigned int q_comp_num;
+ unsigned int port_num;
+
+ unsigned td_retry;
+ unsigned td_queued:1;
+ unsigned td_seen:1;
+ unsigned td_desc_seen:1;
+
+ struct list_head node; /* Node for pending list */
+};
+
+struct cppi41_desc {
+ u32 pd0;
+ u32 pd1;
+ u32 pd2;
+ u32 pd3;
+ u32 pd4;
+ u32 pd5;
+ u32 pd6;
+ u32 pd7;
+} __aligned(32);
+
+struct chan_queues {
+ u16 submit;
+ u16 complete;
+};
+
+struct cppi41_dd {
+ struct dma_device ddev;
+
+ void *qmgr_scratch;
+ dma_addr_t scratch_phys;
+
+ struct cppi41_desc *cd;
+ dma_addr_t descs_phys;
+ u32 first_td_desc;
+ struct cppi41_channel *chan_busy[ALLOC_DECS_NUM];
+
+ void __iomem *ctrl_mem;
+ void __iomem *sched_mem;
+ void __iomem *qmgr_mem;
+ unsigned int irq;
+ const struct chan_queues *queues_rx;
+ const struct chan_queues *queues_tx;
+ struct chan_queues td_queue;
+ u16 first_completion_queue;
+ u16 qmgr_num_pend;
+ u32 n_chans;
+ u8 platform;
+
+ struct list_head pending; /* Pending queued transfers */
+ spinlock_t lock; /* Lock for pending list */
+
+ /* context for suspend/resume */
+ unsigned int dma_tdfdq;
+
+ bool is_suspended;
+};
+
+static struct chan_queues am335x_usb_queues_tx[] = {
+ /* USB0 ENDP 1 */
+ [ 0] = { .submit = 32, .complete = 93},
+ [ 1] = { .submit = 34, .complete = 94},
+ [ 2] = { .submit = 36, .complete = 95},
+ [ 3] = { .submit = 38, .complete = 96},
+ [ 4] = { .submit = 40, .complete = 97},
+ [ 5] = { .submit = 42, .complete = 98},
+ [ 6] = { .submit = 44, .complete = 99},
+ [ 7] = { .submit = 46, .complete = 100},
+ [ 8] = { .submit = 48, .complete = 101},
+ [ 9] = { .submit = 50, .complete = 102},
+ [10] = { .submit = 52, .complete = 103},
+ [11] = { .submit = 54, .complete = 104},
+ [12] = { .submit = 56, .complete = 105},
+ [13] = { .submit = 58, .complete = 106},
+ [14] = { .submit = 60, .complete = 107},
+
+ /* USB1 ENDP1 */
+ [15] = { .submit = 62, .complete = 125},
+ [16] = { .submit = 64, .complete = 126},
+ [17] = { .submit = 66, .complete = 127},
+ [18] = { .submit = 68, .complete = 128},
+ [19] = { .submit = 70, .complete = 129},
+ [20] = { .submit = 72, .complete = 130},
+ [21] = { .submit = 74, .complete = 131},
+ [22] = { .submit = 76, .complete = 132},
+ [23] = { .submit = 78, .complete = 133},
+ [24] = { .submit = 80, .complete = 134},
+ [25] = { .submit = 82, .complete = 135},
+ [26] = { .submit = 84, .complete = 136},
+ [27] = { .submit = 86, .complete = 137},
+ [28] = { .submit = 88, .complete = 138},
+ [29] = { .submit = 90, .complete = 139},
+};
+
+static const struct chan_queues am335x_usb_queues_rx[] = {
+ /* USB0 ENDP 1 */
+ [ 0] = { .submit = 1, .complete = 109},
+ [ 1] = { .submit = 2, .complete = 110},
+ [ 2] = { .submit = 3, .complete = 111},
+ [ 3] = { .submit = 4, .complete = 112},
+ [ 4] = { .submit = 5, .complete = 113},
+ [ 5] = { .submit = 6, .complete = 114},
+ [ 6] = { .submit = 7, .complete = 115},
+ [ 7] = { .submit = 8, .complete = 116},
+ [ 8] = { .submit = 9, .complete = 117},
+ [ 9] = { .submit = 10, .complete = 118},
+ [10] = { .submit = 11, .complete = 119},
+ [11] = { .submit = 12, .complete = 120},
+ [12] = { .submit = 13, .complete = 121},
+ [13] = { .submit = 14, .complete = 122},
+ [14] = { .submit = 15, .complete = 123},
+
+ /* USB1 ENDP 1 */
+ [15] = { .submit = 16, .complete = 141},
+ [16] = { .submit = 17, .complete = 142},
+ [17] = { .submit = 18, .complete = 143},
+ [18] = { .submit = 19, .complete = 144},
+ [19] = { .submit = 20, .complete = 145},
+ [20] = { .submit = 21, .complete = 146},
+ [21] = { .submit = 22, .complete = 147},
+ [22] = { .submit = 23, .complete = 148},
+ [23] = { .submit = 24, .complete = 149},
+ [24] = { .submit = 25, .complete = 150},
+ [25] = { .submit = 26, .complete = 151},
+ [26] = { .submit = 27, .complete = 152},
+ [27] = { .submit = 28, .complete = 153},
+ [28] = { .submit = 29, .complete = 154},
+ [29] = { .submit = 30, .complete = 155},
+};
+
+static const struct chan_queues da8xx_usb_queues_tx[] = {
+ [0] = { .submit = 16, .complete = 24},
+ [1] = { .submit = 18, .complete = 24},
+ [2] = { .submit = 20, .complete = 24},
+ [3] = { .submit = 22, .complete = 24},
+};
+
+static const struct chan_queues da8xx_usb_queues_rx[] = {
+ [0] = { .submit = 1, .complete = 26},
+ [1] = { .submit = 3, .complete = 26},
+ [2] = { .submit = 5, .complete = 26},
+ [3] = { .submit = 7, .complete = 26},
+};
+
+struct cppi_glue_infos {
+ const struct chan_queues *queues_rx;
+ const struct chan_queues *queues_tx;
+ struct chan_queues td_queue;
+ u16 first_completion_queue;
+ u16 qmgr_num_pend;
+};
+
+static struct cppi41_channel *to_cpp41_chan(struct dma_chan *c)
+{
+ return container_of(c, struct cppi41_channel, chan);
+}
+
+static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc)
+{
+ struct cppi41_channel *c;
+ u32 descs_size;
+ u32 desc_num;
+
+ descs_size = sizeof(struct cppi41_desc) * ALLOC_DECS_NUM;
+
+ if (!((desc >= cdd->descs_phys) &&
+ (desc < (cdd->descs_phys + descs_size)))) {
+ return NULL;
+ }
+
+ desc_num = (desc - cdd->descs_phys) / sizeof(struct cppi41_desc);
+ BUG_ON(desc_num >= ALLOC_DECS_NUM);
+ c = cdd->chan_busy[desc_num];
+ cdd->chan_busy[desc_num] = NULL;
+
+ /* Usecount for chan_busy[], paired with push_desc_queue() */
+ pm_runtime_put(cdd->ddev.dev);
+
+ return c;
+}
+
+static void cppi_writel(u32 val, void *__iomem *mem)
+{
+ __raw_writel(val, mem);
+}
+
+static u32 cppi_readl(void *__iomem *mem)
+{
+ return __raw_readl(mem);
+}
+
+static u32 pd_trans_len(u32 val)
+{
+ return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1);
+}
+
+static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num)
+{
+ u32 desc;
+
+ desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num));
+ desc &= ~0x1f;
+ return desc;
+}
+
+static irqreturn_t cppi41_irq(int irq, void *data)
+{
+ struct cppi41_dd *cdd = data;
+ u16 first_completion_queue = cdd->first_completion_queue;
+ u16 qmgr_num_pend = cdd->qmgr_num_pend;
+ struct cppi41_channel *c;
+ int i;
+
+ for (i = QMGR_PENDING_SLOT_Q(first_completion_queue); i < qmgr_num_pend;
+ i++) {
+ u32 val;
+ u32 q_num;
+
+ val = cppi_readl(cdd->qmgr_mem + QMGR_PEND(i));
+ if (i == QMGR_PENDING_SLOT_Q(first_completion_queue) && val) {
+ u32 mask;
+ /* set corresponding bit for completion Q 93 */
+ mask = 1 << QMGR_PENDING_BIT_Q(first_completion_queue);
+ /* not set all bits for queues less than Q 93 */
+ mask--;
+ /* now invert and keep only Q 93+ set */
+ val &= ~mask;
+ }
+
+ if (val)
+ __iormb();
+
+ while (val) {
+ u32 desc, len;
+
+ /*
+ * This should never trigger, see the comments in
+ * push_desc_queue()
+ */
+ WARN_ON(cdd->is_suspended);
+
+ q_num = __fls(val);
+ val &= ~(1 << q_num);
+ q_num += 32 * i;
+ desc = cppi41_pop_desc(cdd, q_num);
+ c = desc_to_chan(cdd, desc);
+ if (WARN_ON(!c)) {
+ pr_err("%s() q %d desc %08x\n", __func__,
+ q_num, desc);
+ continue;
+ }
+
+ if (c->desc->pd2 & PD2_ZERO_LENGTH)
+ len = 0;
+ else
+ len = pd_trans_len(c->desc->pd0);
+
+ c->residue = pd_trans_len(c->desc->pd6) - len;
+ dma_cookie_complete(&c->txd);
+ dmaengine_desc_get_callback_invoke(&c->txd, NULL);
+ }
+ }
+ return IRQ_HANDLED;
+}
+
+static dma_cookie_t cppi41_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ dma_cookie_t cookie;
+
+ cookie = dma_cookie_assign(tx);
+
+ return cookie;
+}
+
+static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct cppi41_channel *c = to_cpp41_chan(chan);
+ struct cppi41_dd *cdd = c->cdd;
+ int error;
+
+ error = pm_runtime_get_sync(cdd->ddev.dev);
+ if (error < 0) {
+ dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n",
+ __func__, error);
+ pm_runtime_put_noidle(cdd->ddev.dev);
+
+ return error;
+ }
+
+ dma_cookie_init(chan);
+ dma_async_tx_descriptor_init(&c->txd, chan);
+ c->txd.tx_submit = cppi41_tx_submit;
+
+ if (!c->is_tx)
+ cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0);
+
+ pm_runtime_mark_last_busy(cdd->ddev.dev);
+ pm_runtime_put_autosuspend(cdd->ddev.dev);
+
+ return 0;
+}
+
+static void cppi41_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct cppi41_channel *c = to_cpp41_chan(chan);
+ struct cppi41_dd *cdd = c->cdd;
+ int error;
+
+ error = pm_runtime_get_sync(cdd->ddev.dev);
+ if (error < 0) {
+ pm_runtime_put_noidle(cdd->ddev.dev);
+
+ return;
+ }
+
+ WARN_ON(!list_empty(&cdd->pending));
+
+ pm_runtime_mark_last_busy(cdd->ddev.dev);
+ pm_runtime_put_autosuspend(cdd->ddev.dev);
+}
+
+static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+ struct cppi41_channel *c = to_cpp41_chan(chan);
+ enum dma_status ret;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+
+ dma_set_residue(txstate, c->residue);
+
+ return ret;
+}
+
+static void push_desc_queue(struct cppi41_channel *c)
+{
+ struct cppi41_dd *cdd = c->cdd;
+ u32 desc_num;
+ u32 desc_phys;
+ u32 reg;
+
+ c->residue = 0;
+
+ reg = GCR_CHAN_ENABLE;
+ if (!c->is_tx) {
+ reg |= GCR_STARV_RETRY;
+ reg |= GCR_DESC_TYPE_HOST;
+ reg |= c->q_comp_num;
+ }
+
+ cppi_writel(reg, c->gcr_reg);
+
+ /*
+ * We don't use writel() but __raw_writel() so we have to make sure
+ * that the DMA descriptor in coherent memory made to the main memory
+ * before starting the dma engine.
+ */
+ __iowmb();
+
+ /*
+ * DMA transfers can take at least 200ms to complete with USB mass
+ * storage connected. To prevent autosuspend timeouts, we must use
+ * pm_runtime_get/put() when chan_busy[] is modified. This will get
+ * cleared in desc_to_chan() or cppi41_stop_chan() depending on the
+ * outcome of the transfer.
+ */
+ pm_runtime_get(cdd->ddev.dev);
+
+ desc_phys = lower_32_bits(c->desc_phys);
+ desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc);
+ WARN_ON(cdd->chan_busy[desc_num]);
+ cdd->chan_busy[desc_num] = c;
+
+ reg = (sizeof(struct cppi41_desc) - 24) / 4;
+ reg |= desc_phys;
+ cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num));
+}
+
+/*
+ * Caller must hold cdd->lock to prevent push_desc_queue()
+ * getting called out of order. We have both cppi41_dma_issue_pending()
+ * and cppi41_runtime_resume() call this function.
+ */
+static void cppi41_run_queue(struct cppi41_dd *cdd)
+{
+ struct cppi41_channel *c, *_c;
+
+ list_for_each_entry_safe(c, _c, &cdd->pending, node) {
+ push_desc_queue(c);
+ list_del(&c->node);
+ }
+}
+
+static void cppi41_dma_issue_pending(struct dma_chan *chan)
+{
+ struct cppi41_channel *c = to_cpp41_chan(chan);
+ struct cppi41_dd *cdd = c->cdd;
+ unsigned long flags;
+ int error;
+
+ error = pm_runtime_get(cdd->ddev.dev);
+ if ((error != -EINPROGRESS) && error < 0) {
+ pm_runtime_put_noidle(cdd->ddev.dev);
+ dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n",
+ error);
+
+ return;
+ }
+
+ spin_lock_irqsave(&cdd->lock, flags);
+ list_add_tail(&c->node, &cdd->pending);
+ if (!cdd->is_suspended)
+ cppi41_run_queue(cdd);
+ spin_unlock_irqrestore(&cdd->lock, flags);
+
+ pm_runtime_mark_last_busy(cdd->ddev.dev);
+ pm_runtime_put_autosuspend(cdd->ddev.dev);
+}
+
+static u32 get_host_pd0(u32 length)
+{
+ u32 reg;
+
+ reg = DESC_TYPE_HOST << DESC_TYPE;
+ reg |= length;
+
+ return reg;
+}
+
+static u32 get_host_pd1(struct cppi41_channel *c)
+{
+ u32 reg;
+
+ reg = 0;
+
+ return reg;
+}
+
+static u32 get_host_pd2(struct cppi41_channel *c)
+{
+ u32 reg;
+
+ reg = DESC_TYPE_USB;
+ reg |= c->q_comp_num;
+
+ return reg;
+}
+
+static u32 get_host_pd3(u32 length)
+{
+ u32 reg;
+
+ /* PD3 = packet size */
+ reg = length;
+
+ return reg;
+}
+
+static u32 get_host_pd6(u32 length)
+{
+ u32 reg;
+
+ /* PD6 buffer size */
+ reg = DESC_PD_COMPLETE;
+ reg |= length;
+
+ return reg;
+}
+
+static u32 get_host_pd4_or_7(u32 addr)
+{
+ u32 reg;
+
+ reg = addr;
+
+ return reg;
+}
+
+static u32 get_host_pd5(void)
+{
+ u32 reg;
+
+ reg = 0;
+
+ return reg;
+}
+
+static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl, unsigned sg_len,
+ enum dma_transfer_direction dir, unsigned long tx_flags, void *context)
+{
+ struct cppi41_channel *c = to_cpp41_chan(chan);
+ struct dma_async_tx_descriptor *txd = NULL;
+ struct cppi41_dd *cdd = c->cdd;
+ struct cppi41_desc *d;
+ struct scatterlist *sg;
+ unsigned int i;
+ int error;
+
+ error = pm_runtime_get(cdd->ddev.dev);
+ if (error < 0) {
+ pm_runtime_put_noidle(cdd->ddev.dev);
+
+ return NULL;
+ }
+
+ if (cdd->is_suspended)
+ goto err_out_not_ready;
+
+ d = c->desc;
+ for_each_sg(sgl, sg, sg_len, i) {
+ u32 addr;
+ u32 len;
+
+ /* We need to use more than one desc once musb supports sg */
+ addr = lower_32_bits(sg_dma_address(sg));
+ len = sg_dma_len(sg);
+
+ d->pd0 = get_host_pd0(len);
+ d->pd1 = get_host_pd1(c);
+ d->pd2 = get_host_pd2(c);
+ d->pd3 = get_host_pd3(len);
+ d->pd4 = get_host_pd4_or_7(addr);
+ d->pd5 = get_host_pd5();
+ d->pd6 = get_host_pd6(len);
+ d->pd7 = get_host_pd4_or_7(addr);
+
+ d++;
+ }
+
+ txd = &c->txd;
+
+err_out_not_ready:
+ pm_runtime_mark_last_busy(cdd->ddev.dev);
+ pm_runtime_put_autosuspend(cdd->ddev.dev);
+
+ return txd;
+}
+
+static void cppi41_compute_td_desc(struct cppi41_desc *d)
+{
+ d->pd0 = DESC_TYPE_TEARD << DESC_TYPE;
+}
+
+static int cppi41_tear_down_chan(struct cppi41_channel *c)
+{
+ struct dmaengine_result abort_result;
+ struct cppi41_dd *cdd = c->cdd;
+ struct cppi41_desc *td;
+ u32 reg;
+ u32 desc_phys;
+ u32 td_desc_phys;
+
+ td = cdd->cd;
+ td += cdd->first_td_desc;
+
+ td_desc_phys = cdd->descs_phys;
+ td_desc_phys += cdd->first_td_desc * sizeof(struct cppi41_desc);
+
+ if (!c->td_queued) {
+ cppi41_compute_td_desc(td);
+ __iowmb();
+
+ reg = (sizeof(struct cppi41_desc) - 24) / 4;
+ reg |= td_desc_phys;
+ cppi_writel(reg, cdd->qmgr_mem +
+ QMGR_QUEUE_D(cdd->td_queue.submit));
+
+ reg = GCR_CHAN_ENABLE;
+ if (!c->is_tx) {
+ reg |= GCR_STARV_RETRY;
+ reg |= GCR_DESC_TYPE_HOST;
+ reg |= cdd->td_queue.complete;
+ }
+ reg |= GCR_TEARDOWN;
+ cppi_writel(reg, c->gcr_reg);
+ c->td_queued = 1;
+ c->td_retry = 500;
+ }
+
+ if (!c->td_seen || !c->td_desc_seen) {
+
+ desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete);
+ if (!desc_phys && c->is_tx)
+ desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
+
+ if (desc_phys == c->desc_phys) {
+ c->td_desc_seen = 1;
+
+ } else if (desc_phys == td_desc_phys) {
+ u32 pd0;
+
+ __iormb();
+ pd0 = td->pd0;
+ WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD);
+ WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX));
+ WARN_ON((pd0 & 0x1f) != c->port_num);
+ c->td_seen = 1;
+ } else if (desc_phys) {
+ WARN_ON_ONCE(1);
+ }
+ }
+ c->td_retry--;
+ /*
+ * If the TX descriptor / channel is in use, the caller needs to poke
+ * his TD bit multiple times. After that he hardware releases the
+ * transfer descriptor followed by TD descriptor. Waiting seems not to
+ * cause any difference.
+ * RX seems to be thrown out right away. However once the TearDown
+ * descriptor gets through we are done. If we have seen the transfer
+ * descriptor before the TD we fetch it from enqueue, it has to be
+ * there waiting for us.
+ */
+ if (!c->td_seen && c->td_retry) {
+ udelay(1);
+ return -EAGAIN;
+ }
+ WARN_ON(!c->td_retry);
+
+ if (!c->td_desc_seen) {
+ desc_phys = cppi41_pop_desc(cdd, c->q_num);
+ if (!desc_phys)
+ desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
+ WARN_ON(!desc_phys);
+ }
+
+ c->td_queued = 0;
+ c->td_seen = 0;
+ c->td_desc_seen = 0;
+ cppi_writel(0, c->gcr_reg);
+
+ /* Invoke the callback to do the necessary clean-up */
+ abort_result.result = DMA_TRANS_ABORTED;
+ dma_cookie_complete(&c->txd);
+ dmaengine_desc_get_callback_invoke(&c->txd, &abort_result);
+
+ return 0;
+}
+
+static int cppi41_stop_chan(struct dma_chan *chan)
+{
+ struct cppi41_channel *c = to_cpp41_chan(chan);
+ struct cppi41_dd *cdd = c->cdd;
+ u32 desc_num;
+ u32 desc_phys;
+ int ret;
+
+ desc_phys = lower_32_bits(c->desc_phys);
+ desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc);
+ if (!cdd->chan_busy[desc_num]) {
+ struct cppi41_channel *cc, *_ct;
+
+ /*
+ * channels might still be in the pending list if
+ * cppi41_dma_issue_pending() is called after
+ * cppi41_runtime_suspend() is called
+ */
+ list_for_each_entry_safe(cc, _ct, &cdd->pending, node) {
+ if (cc != c)
+ continue;
+ list_del(&cc->node);
+ break;
+ }
+ return 0;
+ }
+
+ ret = cppi41_tear_down_chan(c);
+ if (ret)
+ return ret;
+
+ WARN_ON(!cdd->chan_busy[desc_num]);
+ cdd->chan_busy[desc_num] = NULL;
+
+ /* Usecount for chan_busy[], paired with push_desc_queue() */
+ pm_runtime_put(cdd->ddev.dev);
+
+ return 0;
+}
+
+static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd)
+{
+ struct cppi41_channel *cchan, *chans;
+ int i;
+ u32 n_chans = cdd->n_chans;
+
+ /*
+ * The channels can only be used as TX or as RX. So we add twice
+ * that much dma channels because USB can only do RX or TX.
+ */
+ n_chans *= 2;
+
+ chans = devm_kcalloc(dev, n_chans, sizeof(*chans), GFP_KERNEL);
+ if (!chans)
+ return -ENOMEM;
+
+ for (i = 0; i < n_chans; i++) {
+ cchan = &chans[i];
+
+ cchan->cdd = cdd;
+ if (i & 1) {
+ cchan->gcr_reg = cdd->ctrl_mem + DMA_TXGCR(i >> 1);
+ cchan->is_tx = 1;
+ } else {
+ cchan->gcr_reg = cdd->ctrl_mem + DMA_RXGCR(i >> 1);
+ cchan->is_tx = 0;
+ }
+ cchan->port_num = i >> 1;
+ cchan->desc = &cdd->cd[i];
+ cchan->desc_phys = cdd->descs_phys;
+ cchan->desc_phys += i * sizeof(struct cppi41_desc);
+ cchan->chan.device = &cdd->ddev;
+ list_add_tail(&cchan->chan.device_node, &cdd->ddev.channels);
+ }
+ cdd->first_td_desc = n_chans;
+
+ return 0;
+}
+
+static void purge_descs(struct device *dev, struct cppi41_dd *cdd)
+{
+ unsigned int mem_decs;
+ int i;
+
+ mem_decs = ALLOC_DECS_NUM * sizeof(struct cppi41_desc);
+
+ for (i = 0; i < DESCS_AREAS; i++) {
+
+ cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i));
+ cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i));
+
+ dma_free_coherent(dev, mem_decs, cdd->cd,
+ cdd->descs_phys);
+ }
+}
+
+static void disable_sched(struct cppi41_dd *cdd)
+{
+ cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL);
+}
+
+static void deinit_cppi41(struct device *dev, struct cppi41_dd *cdd)
+{
+ disable_sched(cdd);
+
+ purge_descs(dev, cdd);
+
+ cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE);
+ cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE);
+ dma_free_coherent(dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch,
+ cdd->scratch_phys);
+}
+
+static int init_descs(struct device *dev, struct cppi41_dd *cdd)
+{
+ unsigned int desc_size;
+ unsigned int mem_decs;
+ int i;
+ u32 reg;
+ u32 idx;
+
+ BUILD_BUG_ON(sizeof(struct cppi41_desc) &
+ (sizeof(struct cppi41_desc) - 1));
+ BUILD_BUG_ON(sizeof(struct cppi41_desc) < 32);
+ BUILD_BUG_ON(ALLOC_DECS_NUM < 32);
+
+ desc_size = sizeof(struct cppi41_desc);
+ mem_decs = ALLOC_DECS_NUM * desc_size;
+
+ idx = 0;
+ for (i = 0; i < DESCS_AREAS; i++) {
+
+ reg = idx << QMGR_MEMCTRL_IDX_SH;
+ reg |= (ilog2(desc_size) - 5) << QMGR_MEMCTRL_DESC_SH;
+ reg |= ilog2(ALLOC_DECS_NUM) - 5;
+
+ BUILD_BUG_ON(DESCS_AREAS != 1);
+ cdd->cd = dma_alloc_coherent(dev, mem_decs,
+ &cdd->descs_phys, GFP_KERNEL);
+ if (!cdd->cd)
+ return -ENOMEM;
+
+ cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i));
+ cppi_writel(reg, cdd->qmgr_mem + QMGR_MEMCTRL(i));
+
+ idx += ALLOC_DECS_NUM;
+ }
+ return 0;
+}
+
+static void init_sched(struct cppi41_dd *cdd)
+{
+ unsigned ch;
+ unsigned word;
+ u32 reg;
+
+ word = 0;
+ cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL);
+ for (ch = 0; ch < cdd->n_chans; ch += 2) {
+
+ reg = SCHED_ENTRY0_CHAN(ch);
+ reg |= SCHED_ENTRY1_CHAN(ch) | SCHED_ENTRY1_IS_RX;
+
+ reg |= SCHED_ENTRY2_CHAN(ch + 1);
+ reg |= SCHED_ENTRY3_CHAN(ch + 1) | SCHED_ENTRY3_IS_RX;
+ cppi_writel(reg, cdd->sched_mem + DMA_SCHED_WORD(word));
+ word++;
+ }
+ reg = cdd->n_chans * 2 - 1;
+ reg |= DMA_SCHED_CTRL_EN;
+ cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL);
+}
+
+static int init_cppi41(struct device *dev, struct cppi41_dd *cdd)
+{
+ int ret;
+
+ BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1));
+ cdd->qmgr_scratch = dma_alloc_coherent(dev, QMGR_SCRATCH_SIZE,
+ &cdd->scratch_phys, GFP_KERNEL);
+ if (!cdd->qmgr_scratch)
+ return -ENOMEM;
+
+ cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE);
+ cppi_writel(TOTAL_DESCS_NUM, cdd->qmgr_mem + QMGR_LRAM_SIZE);
+ cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);
+
+ ret = init_descs(dev, cdd);
+ if (ret)
+ goto err_td;
+
+ cppi_writel(cdd->td_queue.submit, cdd->ctrl_mem + DMA_TDFDQ);
+ init_sched(cdd);
+
+ return 0;
+err_td:
+ deinit_cppi41(dev, cdd);
+ return ret;
+}
+
+static struct platform_driver cpp41_dma_driver;
+/*
+ * The param format is:
+ * X Y
+ * X: Port
+ * Y: 0 = RX else TX
+ */
+#define INFO_PORT 0
+#define INFO_IS_TX 1
+
+static bool cpp41_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+ struct cppi41_channel *cchan;
+ struct cppi41_dd *cdd;
+ const struct chan_queues *queues;
+ u32 *num = param;
+
+ if (chan->device->dev->driver != &cpp41_dma_driver.driver)
+ return false;
+
+ cchan = to_cpp41_chan(chan);
+
+ if (cchan->port_num != num[INFO_PORT])
+ return false;
+
+ if (cchan->is_tx && !num[INFO_IS_TX])
+ return false;
+ cdd = cchan->cdd;
+ if (cchan->is_tx)
+ queues = cdd->queues_tx;
+ else
+ queues = cdd->queues_rx;
+
+ BUILD_BUG_ON(ARRAY_SIZE(am335x_usb_queues_rx) !=
+ ARRAY_SIZE(am335x_usb_queues_tx));
+ if (WARN_ON(cchan->port_num >= ARRAY_SIZE(am335x_usb_queues_rx)))
+ return false;
+
+ cchan->q_num = queues[cchan->port_num].submit;
+ cchan->q_comp_num = queues[cchan->port_num].complete;
+ return true;
+}
+
+static struct of_dma_filter_info cpp41_dma_info = {
+ .filter_fn = cpp41_dma_filter_fn,
+};
+
+static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ int count = dma_spec->args_count;
+ struct of_dma_filter_info *info = ofdma->of_dma_data;
+
+ if (!info || !info->filter_fn)
+ return NULL;
+
+ if (count != 2)
+ return NULL;
+
+ return dma_request_channel(info->dma_cap, info->filter_fn,
+ &dma_spec->args[0]);
+}
+
+static const struct cppi_glue_infos am335x_usb_infos = {
+ .queues_rx = am335x_usb_queues_rx,
+ .queues_tx = am335x_usb_queues_tx,
+ .td_queue = { .submit = 31, .complete = 0 },
+ .first_completion_queue = 93,
+ .qmgr_num_pend = 5,
+};
+
+static const struct cppi_glue_infos da8xx_usb_infos = {
+ .queues_rx = da8xx_usb_queues_rx,
+ .queues_tx = da8xx_usb_queues_tx,
+ .td_queue = { .submit = 31, .complete = 0 },
+ .first_completion_queue = 24,
+ .qmgr_num_pend = 2,
+};
+
+static const struct of_device_id cppi41_dma_ids[] = {
+ { .compatible = "ti,am3359-cppi41", .data = &am335x_usb_infos},
+ { .compatible = "ti,da830-cppi41", .data = &da8xx_usb_infos},
+ {},
+};
+MODULE_DEVICE_TABLE(of, cppi41_dma_ids);
+
+static const struct cppi_glue_infos *get_glue_info(struct device *dev)
+{
+ const struct of_device_id *of_id;
+
+ of_id = of_match_node(cppi41_dma_ids, dev->of_node);
+ if (!of_id)
+ return NULL;
+ return of_id->data;
+}
+
+#define CPPI41_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+static int cppi41_dma_probe(struct platform_device *pdev)
+{
+ struct cppi41_dd *cdd;
+ struct device *dev = &pdev->dev;
+ const struct cppi_glue_infos *glue_info;
+ struct resource *mem;
+ int index;
+ int irq;
+ int ret;
+
+ glue_info = get_glue_info(dev);
+ if (!glue_info)
+ return -EINVAL;
+
+ cdd = devm_kzalloc(&pdev->dev, sizeof(*cdd), GFP_KERNEL);
+ if (!cdd)
+ return -ENOMEM;
+
+ dma_cap_set(DMA_SLAVE, cdd->ddev.cap_mask);
+ cdd->ddev.device_alloc_chan_resources = cppi41_dma_alloc_chan_resources;
+ cdd->ddev.device_free_chan_resources = cppi41_dma_free_chan_resources;
+ cdd->ddev.device_tx_status = cppi41_dma_tx_status;
+ cdd->ddev.device_issue_pending = cppi41_dma_issue_pending;
+ cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg;
+ cdd->ddev.device_terminate_all = cppi41_stop_chan;
+ cdd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ cdd->ddev.src_addr_widths = CPPI41_DMA_BUSWIDTHS;
+ cdd->ddev.dst_addr_widths = CPPI41_DMA_BUSWIDTHS;
+ cdd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ cdd->ddev.dev = dev;
+ INIT_LIST_HEAD(&cdd->ddev.channels);
+ cpp41_dma_info.dma_cap = cdd->ddev.cap_mask;
+
+ index = of_property_match_string(dev->of_node,
+ "reg-names", "controller");
+ if (index < 0)
+ return index;
+
+ mem = platform_get_resource(pdev, IORESOURCE_MEM, index);
+ cdd->ctrl_mem = devm_ioremap_resource(dev, mem);
+ if (IS_ERR(cdd->ctrl_mem))
+ return PTR_ERR(cdd->ctrl_mem);
+
+ mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 1);
+ cdd->sched_mem = devm_ioremap_resource(dev, mem);
+ if (IS_ERR(cdd->sched_mem))
+ return PTR_ERR(cdd->sched_mem);
+
+ mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 2);
+ cdd->qmgr_mem = devm_ioremap_resource(dev, mem);
+ if (IS_ERR(cdd->qmgr_mem))
+ return PTR_ERR(cdd->qmgr_mem);
+
+ spin_lock_init(&cdd->lock);
+ INIT_LIST_HEAD(&cdd->pending);
+
+ platform_set_drvdata(pdev, cdd);
+
+ pm_runtime_enable(dev);
+ pm_runtime_set_autosuspend_delay(dev, 100);
+ pm_runtime_use_autosuspend(dev);
+ ret = pm_runtime_get_sync(dev);
+ if (ret < 0)
+ goto err_get_sync;
+
+ cdd->queues_rx = glue_info->queues_rx;
+ cdd->queues_tx = glue_info->queues_tx;
+ cdd->td_queue = glue_info->td_queue;
+ cdd->qmgr_num_pend = glue_info->qmgr_num_pend;
+ cdd->first_completion_queue = glue_info->first_completion_queue;
+
+ /* Parse new and deprecated dma-channels properties */
+ ret = of_property_read_u32(dev->of_node,
+ "dma-channels", &cdd->n_chans);
+ if (ret)
+ ret = of_property_read_u32(dev->of_node,
+ "#dma-channels", &cdd->n_chans);
+ if (ret)
+ goto err_get_n_chans;
+
+ ret = init_cppi41(dev, cdd);
+ if (ret)
+ goto err_init_cppi;
+
+ ret = cppi41_add_chans(dev, cdd);
+ if (ret)
+ goto err_chans;
+
+ irq = irq_of_parse_and_map(dev->of_node, 0);
+ if (!irq) {
+ ret = -EINVAL;
+ goto err_chans;
+ }
+
+ ret = devm_request_irq(&pdev->dev, irq, cppi41_irq, IRQF_SHARED,
+ dev_name(dev), cdd);
+ if (ret)
+ goto err_chans;
+ cdd->irq = irq;
+
+ ret = dma_async_device_register(&cdd->ddev);
+ if (ret)
+ goto err_chans;
+
+ ret = of_dma_controller_register(dev->of_node,
+ cppi41_dma_xlate, &cpp41_dma_info);
+ if (ret)
+ goto err_of;
+
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
+
+ return 0;
+err_of:
+ dma_async_device_unregister(&cdd->ddev);
+err_chans:
+ deinit_cppi41(dev, cdd);
+err_init_cppi:
+ pm_runtime_dont_use_autosuspend(dev);
+err_get_n_chans:
+err_get_sync:
+ pm_runtime_put_sync(dev);
+ pm_runtime_disable(dev);
+ return ret;
+}
+
+static int cppi41_dma_remove(struct platform_device *pdev)
+{
+ struct cppi41_dd *cdd = platform_get_drvdata(pdev);
+ int error;
+
+ error = pm_runtime_get_sync(&pdev->dev);
+ if (error < 0)
+ dev_err(&pdev->dev, "%s could not pm_runtime_get: %i\n",
+ __func__, error);
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&cdd->ddev);
+
+ devm_free_irq(&pdev->dev, cdd->irq, cdd);
+ deinit_cppi41(&pdev->dev, cdd);
+ pm_runtime_dont_use_autosuspend(&pdev->dev);
+ pm_runtime_put_sync(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+ return 0;
+}
+
+static int __maybe_unused cppi41_suspend(struct device *dev)
+{
+ struct cppi41_dd *cdd = dev_get_drvdata(dev);
+
+ cdd->dma_tdfdq = cppi_readl(cdd->ctrl_mem + DMA_TDFDQ);
+ disable_sched(cdd);
+
+ return 0;
+}
+
+static int __maybe_unused cppi41_resume(struct device *dev)
+{
+ struct cppi41_dd *cdd = dev_get_drvdata(dev);
+ struct cppi41_channel *c;
+ int i;
+
+ for (i = 0; i < DESCS_AREAS; i++)
+ cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i));
+
+ list_for_each_entry(c, &cdd->ddev.channels, chan.device_node)
+ if (!c->is_tx)
+ cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0);
+
+ init_sched(cdd);
+
+ cppi_writel(cdd->dma_tdfdq, cdd->ctrl_mem + DMA_TDFDQ);
+ cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE);
+ cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE);
+ cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);
+
+ return 0;
+}
+
+static int __maybe_unused cppi41_runtime_suspend(struct device *dev)
+{
+ struct cppi41_dd *cdd = dev_get_drvdata(dev);
+ unsigned long flags;
+
+ spin_lock_irqsave(&cdd->lock, flags);
+ cdd->is_suspended = true;
+ WARN_ON(!list_empty(&cdd->pending));
+ spin_unlock_irqrestore(&cdd->lock, flags);
+
+ return 0;
+}
+
+static int __maybe_unused cppi41_runtime_resume(struct device *dev)
+{
+ struct cppi41_dd *cdd = dev_get_drvdata(dev);
+ unsigned long flags;
+
+ spin_lock_irqsave(&cdd->lock, flags);
+ cdd->is_suspended = false;
+ cppi41_run_queue(cdd);
+ spin_unlock_irqrestore(&cdd->lock, flags);
+
+ return 0;
+}
+
+static const struct dev_pm_ops cppi41_pm_ops = {
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(cppi41_suspend, cppi41_resume)
+ SET_RUNTIME_PM_OPS(cppi41_runtime_suspend,
+ cppi41_runtime_resume,
+ NULL)
+};
+
+static struct platform_driver cpp41_dma_driver = {
+ .probe = cppi41_dma_probe,
+ .remove = cppi41_dma_remove,
+ .driver = {
+ .name = "cppi41-dma-engine",
+ .pm = &cppi41_pm_ops,
+ .of_match_table = of_match_ptr(cppi41_dma_ids),
+ },
+};
+
+module_platform_driver(cpp41_dma_driver);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sebastian Andrzej Siewior <bigeasy@linutronix.de>");
diff --git a/drivers/dma/ti/dma-crossbar.c b/drivers/dma/ti/dma-crossbar.c
new file mode 100644
index 000000000..f744ddbbb
--- /dev/null
+++ b/drivers/dma/ti/dma-crossbar.c
@@ -0,0 +1,477 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com
+ * Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+
+#define TI_XBAR_DRA7 0
+#define TI_XBAR_AM335X 1
+static const u32 ti_xbar_type[] = {
+ [TI_XBAR_DRA7] = TI_XBAR_DRA7,
+ [TI_XBAR_AM335X] = TI_XBAR_AM335X,
+};
+
+static const struct of_device_id ti_dma_xbar_match[] = {
+ {
+ .compatible = "ti,dra7-dma-crossbar",
+ .data = &ti_xbar_type[TI_XBAR_DRA7],
+ },
+ {
+ .compatible = "ti,am335x-edma-crossbar",
+ .data = &ti_xbar_type[TI_XBAR_AM335X],
+ },
+ {},
+};
+
+/* Crossbar on AM335x/AM437x family */
+#define TI_AM335X_XBAR_LINES 64
+
+struct ti_am335x_xbar_data {
+ void __iomem *iomem;
+
+ struct dma_router dmarouter;
+
+ u32 xbar_events; /* maximum number of events to select in xbar */
+ u32 dma_requests; /* number of DMA requests on eDMA */
+};
+
+struct ti_am335x_xbar_map {
+ u16 dma_line;
+ u8 mux_val;
+};
+
+static inline void ti_am335x_xbar_write(void __iomem *iomem, int event, u8 val)
+{
+ /*
+ * TPCC_EVT_MUX_60_63 register layout is different than the
+ * rest, in the sense, that event 63 is mapped to lowest byte
+ * and event 60 is mapped to highest, handle it separately.
+ */
+ if (event >= 60 && event <= 63)
+ writeb_relaxed(val, iomem + (63 - event % 4));
+ else
+ writeb_relaxed(val, iomem + event);
+}
+
+static void ti_am335x_xbar_free(struct device *dev, void *route_data)
+{
+ struct ti_am335x_xbar_data *xbar = dev_get_drvdata(dev);
+ struct ti_am335x_xbar_map *map = route_data;
+
+ dev_dbg(dev, "Unmapping XBAR event %u on channel %u\n",
+ map->mux_val, map->dma_line);
+
+ ti_am335x_xbar_write(xbar->iomem, map->dma_line, 0);
+ kfree(map);
+}
+
+static void *ti_am335x_xbar_route_allocate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct platform_device *pdev = of_find_device_by_node(ofdma->of_node);
+ struct ti_am335x_xbar_data *xbar = platform_get_drvdata(pdev);
+ struct ti_am335x_xbar_map *map;
+
+ if (dma_spec->args_count != 3)
+ return ERR_PTR(-EINVAL);
+
+ if (dma_spec->args[2] >= xbar->xbar_events) {
+ dev_err(&pdev->dev, "Invalid XBAR event number: %d\n",
+ dma_spec->args[2]);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (dma_spec->args[0] >= xbar->dma_requests) {
+ dev_err(&pdev->dev, "Invalid DMA request line number: %d\n",
+ dma_spec->args[0]);
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* The of_node_put() will be done in the core for the node */
+ dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0);
+ if (!dma_spec->np) {
+ dev_err(&pdev->dev, "Can't get DMA master\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (!map) {
+ of_node_put(dma_spec->np);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ map->dma_line = (u16)dma_spec->args[0];
+ map->mux_val = (u8)dma_spec->args[2];
+
+ dma_spec->args[2] = 0;
+ dma_spec->args_count = 2;
+
+ dev_dbg(&pdev->dev, "Mapping XBAR event%u to DMA%u\n",
+ map->mux_val, map->dma_line);
+
+ ti_am335x_xbar_write(xbar->iomem, map->dma_line, map->mux_val);
+
+ return map;
+}
+
+static const struct of_device_id ti_am335x_master_match[] __maybe_unused = {
+ { .compatible = "ti,edma3-tpcc", },
+ {},
+};
+
+static int ti_am335x_xbar_probe(struct platform_device *pdev)
+{
+ struct device_node *node = pdev->dev.of_node;
+ const struct of_device_id *match;
+ struct device_node *dma_node;
+ struct ti_am335x_xbar_data *xbar;
+ void __iomem *iomem;
+ int i, ret;
+
+ if (!node)
+ return -ENODEV;
+
+ xbar = devm_kzalloc(&pdev->dev, sizeof(*xbar), GFP_KERNEL);
+ if (!xbar)
+ return -ENOMEM;
+
+ dma_node = of_parse_phandle(node, "dma-masters", 0);
+ if (!dma_node) {
+ dev_err(&pdev->dev, "Can't get DMA master node\n");
+ return -ENODEV;
+ }
+
+ match = of_match_node(ti_am335x_master_match, dma_node);
+ if (!match) {
+ dev_err(&pdev->dev, "DMA master is not supported\n");
+ of_node_put(dma_node);
+ return -EINVAL;
+ }
+
+ if (of_property_read_u32(dma_node, "dma-requests",
+ &xbar->dma_requests)) {
+ dev_info(&pdev->dev,
+ "Missing XBAR output information, using %u.\n",
+ TI_AM335X_XBAR_LINES);
+ xbar->dma_requests = TI_AM335X_XBAR_LINES;
+ }
+ of_node_put(dma_node);
+
+ if (of_property_read_u32(node, "dma-requests", &xbar->xbar_events)) {
+ dev_info(&pdev->dev,
+ "Missing XBAR input information, using %u.\n",
+ TI_AM335X_XBAR_LINES);
+ xbar->xbar_events = TI_AM335X_XBAR_LINES;
+ }
+
+ iomem = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(iomem))
+ return PTR_ERR(iomem);
+
+ xbar->iomem = iomem;
+
+ xbar->dmarouter.dev = &pdev->dev;
+ xbar->dmarouter.route_free = ti_am335x_xbar_free;
+
+ platform_set_drvdata(pdev, xbar);
+
+ /* Reset the crossbar */
+ for (i = 0; i < xbar->dma_requests; i++)
+ ti_am335x_xbar_write(xbar->iomem, i, 0);
+
+ ret = of_dma_router_register(node, ti_am335x_xbar_route_allocate,
+ &xbar->dmarouter);
+
+ return ret;
+}
+
+/* Crossbar on DRA7xx family */
+#define TI_DRA7_XBAR_OUTPUTS 127
+#define TI_DRA7_XBAR_INPUTS 256
+
+struct ti_dra7_xbar_data {
+ void __iomem *iomem;
+
+ struct dma_router dmarouter;
+ struct mutex mutex;
+ unsigned long *dma_inuse;
+
+ u16 safe_val; /* Value to rest the crossbar lines */
+ u32 xbar_requests; /* number of DMA requests connected to XBAR */
+ u32 dma_requests; /* number of DMA requests forwarded to DMA */
+ u32 dma_offset;
+};
+
+struct ti_dra7_xbar_map {
+ u16 xbar_in;
+ int xbar_out;
+};
+
+static inline void ti_dra7_xbar_write(void __iomem *iomem, int xbar, u16 val)
+{
+ writew_relaxed(val, iomem + (xbar * 2));
+}
+
+static void ti_dra7_xbar_free(struct device *dev, void *route_data)
+{
+ struct ti_dra7_xbar_data *xbar = dev_get_drvdata(dev);
+ struct ti_dra7_xbar_map *map = route_data;
+
+ dev_dbg(dev, "Unmapping XBAR%u (was routed to %d)\n",
+ map->xbar_in, map->xbar_out);
+
+ ti_dra7_xbar_write(xbar->iomem, map->xbar_out, xbar->safe_val);
+ mutex_lock(&xbar->mutex);
+ clear_bit(map->xbar_out, xbar->dma_inuse);
+ mutex_unlock(&xbar->mutex);
+ kfree(map);
+}
+
+static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct platform_device *pdev = of_find_device_by_node(ofdma->of_node);
+ struct ti_dra7_xbar_data *xbar = platform_get_drvdata(pdev);
+ struct ti_dra7_xbar_map *map;
+
+ if (dma_spec->args[0] >= xbar->xbar_requests) {
+ dev_err(&pdev->dev, "Invalid XBAR request number: %d\n",
+ dma_spec->args[0]);
+ put_device(&pdev->dev);
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* The of_node_put() will be done in the core for the node */
+ dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0);
+ if (!dma_spec->np) {
+ dev_err(&pdev->dev, "Can't get DMA master\n");
+ put_device(&pdev->dev);
+ return ERR_PTR(-EINVAL);
+ }
+
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (!map) {
+ of_node_put(dma_spec->np);
+ put_device(&pdev->dev);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ mutex_lock(&xbar->mutex);
+ map->xbar_out = find_first_zero_bit(xbar->dma_inuse,
+ xbar->dma_requests);
+ if (map->xbar_out == xbar->dma_requests) {
+ mutex_unlock(&xbar->mutex);
+ dev_err(&pdev->dev, "Run out of free DMA requests\n");
+ kfree(map);
+ of_node_put(dma_spec->np);
+ put_device(&pdev->dev);
+ return ERR_PTR(-ENOMEM);
+ }
+ set_bit(map->xbar_out, xbar->dma_inuse);
+ mutex_unlock(&xbar->mutex);
+
+ map->xbar_in = (u16)dma_spec->args[0];
+
+ dma_spec->args[0] = map->xbar_out + xbar->dma_offset;
+
+ dev_dbg(&pdev->dev, "Mapping XBAR%u to DMA%d\n",
+ map->xbar_in, map->xbar_out);
+
+ ti_dra7_xbar_write(xbar->iomem, map->xbar_out, map->xbar_in);
+
+ return map;
+}
+
+#define TI_XBAR_EDMA_OFFSET 0
+#define TI_XBAR_SDMA_OFFSET 1
+static const u32 ti_dma_offset[] = {
+ [TI_XBAR_EDMA_OFFSET] = 0,
+ [TI_XBAR_SDMA_OFFSET] = 1,
+};
+
+static const struct of_device_id ti_dra7_master_match[] __maybe_unused = {
+ {
+ .compatible = "ti,omap4430-sdma",
+ .data = &ti_dma_offset[TI_XBAR_SDMA_OFFSET],
+ },
+ {
+ .compatible = "ti,edma3",
+ .data = &ti_dma_offset[TI_XBAR_EDMA_OFFSET],
+ },
+ {
+ .compatible = "ti,edma3-tpcc",
+ .data = &ti_dma_offset[TI_XBAR_EDMA_OFFSET],
+ },
+ {},
+};
+
+static inline void ti_dra7_xbar_reserve(int offset, int len, unsigned long *p)
+{
+ for (; len > 0; len--)
+ set_bit(offset + (len - 1), p);
+}
+
+static int ti_dra7_xbar_probe(struct platform_device *pdev)
+{
+ struct device_node *node = pdev->dev.of_node;
+ const struct of_device_id *match;
+ struct device_node *dma_node;
+ struct ti_dra7_xbar_data *xbar;
+ struct property *prop;
+ u32 safe_val;
+ int sz;
+ void __iomem *iomem;
+ int i, ret;
+
+ if (!node)
+ return -ENODEV;
+
+ xbar = devm_kzalloc(&pdev->dev, sizeof(*xbar), GFP_KERNEL);
+ if (!xbar)
+ return -ENOMEM;
+
+ dma_node = of_parse_phandle(node, "dma-masters", 0);
+ if (!dma_node) {
+ dev_err(&pdev->dev, "Can't get DMA master node\n");
+ return -ENODEV;
+ }
+
+ match = of_match_node(ti_dra7_master_match, dma_node);
+ if (!match) {
+ dev_err(&pdev->dev, "DMA master is not supported\n");
+ of_node_put(dma_node);
+ return -EINVAL;
+ }
+
+ if (of_property_read_u32(dma_node, "dma-requests",
+ &xbar->dma_requests)) {
+ dev_info(&pdev->dev,
+ "Missing XBAR output information, using %u.\n",
+ TI_DRA7_XBAR_OUTPUTS);
+ xbar->dma_requests = TI_DRA7_XBAR_OUTPUTS;
+ }
+ of_node_put(dma_node);
+
+ xbar->dma_inuse = devm_kcalloc(&pdev->dev,
+ BITS_TO_LONGS(xbar->dma_requests),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!xbar->dma_inuse)
+ return -ENOMEM;
+
+ if (of_property_read_u32(node, "dma-requests", &xbar->xbar_requests)) {
+ dev_info(&pdev->dev,
+ "Missing XBAR input information, using %u.\n",
+ TI_DRA7_XBAR_INPUTS);
+ xbar->xbar_requests = TI_DRA7_XBAR_INPUTS;
+ }
+
+ if (!of_property_read_u32(node, "ti,dma-safe-map", &safe_val))
+ xbar->safe_val = (u16)safe_val;
+
+
+ prop = of_find_property(node, "ti,reserved-dma-request-ranges", &sz);
+ if (prop) {
+ const char pname[] = "ti,reserved-dma-request-ranges";
+ u32 (*rsv_events)[2];
+ size_t nelm = sz / sizeof(*rsv_events);
+ int i;
+
+ if (!nelm)
+ return -EINVAL;
+
+ rsv_events = kcalloc(nelm, sizeof(*rsv_events), GFP_KERNEL);
+ if (!rsv_events)
+ return -ENOMEM;
+
+ ret = of_property_read_u32_array(node, pname, (u32 *)rsv_events,
+ nelm * 2);
+ if (ret) {
+ kfree(rsv_events);
+ return ret;
+ }
+
+ for (i = 0; i < nelm; i++) {
+ ti_dra7_xbar_reserve(rsv_events[i][0], rsv_events[i][1],
+ xbar->dma_inuse);
+ }
+ kfree(rsv_events);
+ }
+
+ iomem = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(iomem))
+ return PTR_ERR(iomem);
+
+ xbar->iomem = iomem;
+
+ xbar->dmarouter.dev = &pdev->dev;
+ xbar->dmarouter.route_free = ti_dra7_xbar_free;
+ xbar->dma_offset = *(u32 *)match->data;
+
+ mutex_init(&xbar->mutex);
+ platform_set_drvdata(pdev, xbar);
+
+ /* Reset the crossbar */
+ for (i = 0; i < xbar->dma_requests; i++) {
+ if (!test_bit(i, xbar->dma_inuse))
+ ti_dra7_xbar_write(xbar->iomem, i, xbar->safe_val);
+ }
+
+ ret = of_dma_router_register(node, ti_dra7_xbar_route_allocate,
+ &xbar->dmarouter);
+ if (ret) {
+ /* Restore the defaults for the crossbar */
+ for (i = 0; i < xbar->dma_requests; i++) {
+ if (!test_bit(i, xbar->dma_inuse))
+ ti_dra7_xbar_write(xbar->iomem, i, i);
+ }
+ }
+
+ return ret;
+}
+
+static int ti_dma_xbar_probe(struct platform_device *pdev)
+{
+ const struct of_device_id *match;
+ int ret;
+
+ match = of_match_node(ti_dma_xbar_match, pdev->dev.of_node);
+ if (unlikely(!match))
+ return -EINVAL;
+
+ switch (*(u32 *)match->data) {
+ case TI_XBAR_DRA7:
+ ret = ti_dra7_xbar_probe(pdev);
+ break;
+ case TI_XBAR_AM335X:
+ ret = ti_am335x_xbar_probe(pdev);
+ break;
+ default:
+ dev_err(&pdev->dev, "Unsupported crossbar\n");
+ ret = -ENODEV;
+ break;
+ }
+
+ return ret;
+}
+
+static struct platform_driver ti_dma_xbar_driver = {
+ .driver = {
+ .name = "ti-dma-crossbar",
+ .of_match_table = ti_dma_xbar_match,
+ },
+ .probe = ti_dma_xbar_probe,
+};
+
+static int omap_dmaxbar_init(void)
+{
+ return platform_driver_register(&ti_dma_xbar_driver);
+}
+arch_initcall(omap_dmaxbar_init);
diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c
new file mode 100644
index 000000000..7ec6e5d72
--- /dev/null
+++ b/drivers/dma/ti/edma.c
@@ -0,0 +1,2699 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * TI EDMA DMA engine driver
+ *
+ * Copyright 2012 Texas Instruments
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/bitmap.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/platform_data/edma.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+/* Offsets matching "struct edmacc_param" */
+#define PARM_OPT 0x00
+#define PARM_SRC 0x04
+#define PARM_A_B_CNT 0x08
+#define PARM_DST 0x0c
+#define PARM_SRC_DST_BIDX 0x10
+#define PARM_LINK_BCNTRLD 0x14
+#define PARM_SRC_DST_CIDX 0x18
+#define PARM_CCNT 0x1c
+
+#define PARM_SIZE 0x20
+
+/* Offsets for EDMA CC global channel registers and their shadows */
+#define SH_ER 0x00 /* 64 bits */
+#define SH_ECR 0x08 /* 64 bits */
+#define SH_ESR 0x10 /* 64 bits */
+#define SH_CER 0x18 /* 64 bits */
+#define SH_EER 0x20 /* 64 bits */
+#define SH_EECR 0x28 /* 64 bits */
+#define SH_EESR 0x30 /* 64 bits */
+#define SH_SER 0x38 /* 64 bits */
+#define SH_SECR 0x40 /* 64 bits */
+#define SH_IER 0x50 /* 64 bits */
+#define SH_IECR 0x58 /* 64 bits */
+#define SH_IESR 0x60 /* 64 bits */
+#define SH_IPR 0x68 /* 64 bits */
+#define SH_ICR 0x70 /* 64 bits */
+#define SH_IEVAL 0x78
+#define SH_QER 0x80
+#define SH_QEER 0x84
+#define SH_QEECR 0x88
+#define SH_QEESR 0x8c
+#define SH_QSER 0x90
+#define SH_QSECR 0x94
+#define SH_SIZE 0x200
+
+/* Offsets for EDMA CC global registers */
+#define EDMA_REV 0x0000
+#define EDMA_CCCFG 0x0004
+#define EDMA_QCHMAP 0x0200 /* 8 registers */
+#define EDMA_DMAQNUM 0x0240 /* 8 registers (4 on OMAP-L1xx) */
+#define EDMA_QDMAQNUM 0x0260
+#define EDMA_QUETCMAP 0x0280
+#define EDMA_QUEPRI 0x0284
+#define EDMA_EMR 0x0300 /* 64 bits */
+#define EDMA_EMCR 0x0308 /* 64 bits */
+#define EDMA_QEMR 0x0310
+#define EDMA_QEMCR 0x0314
+#define EDMA_CCERR 0x0318
+#define EDMA_CCERRCLR 0x031c
+#define EDMA_EEVAL 0x0320
+#define EDMA_DRAE 0x0340 /* 4 x 64 bits*/
+#define EDMA_QRAE 0x0380 /* 4 registers */
+#define EDMA_QUEEVTENTRY 0x0400 /* 2 x 16 registers */
+#define EDMA_QSTAT 0x0600 /* 2 registers */
+#define EDMA_QWMTHRA 0x0620
+#define EDMA_QWMTHRB 0x0624
+#define EDMA_CCSTAT 0x0640
+
+#define EDMA_M 0x1000 /* global channel registers */
+#define EDMA_ECR 0x1008
+#define EDMA_ECRH 0x100C
+#define EDMA_SHADOW0 0x2000 /* 4 shadow regions */
+#define EDMA_PARM 0x4000 /* PaRAM entries */
+
+#define PARM_OFFSET(param_no) (EDMA_PARM + ((param_no) << 5))
+
+#define EDMA_DCHMAP 0x0100 /* 64 registers */
+
+/* CCCFG register */
+#define GET_NUM_DMACH(x) (x & 0x7) /* bits 0-2 */
+#define GET_NUM_QDMACH(x) ((x & 0x70) >> 4) /* bits 4-6 */
+#define GET_NUM_PAENTRY(x) ((x & 0x7000) >> 12) /* bits 12-14 */
+#define GET_NUM_EVQUE(x) ((x & 0x70000) >> 16) /* bits 16-18 */
+#define GET_NUM_REGN(x) ((x & 0x300000) >> 20) /* bits 20-21 */
+#define CHMAP_EXIST BIT(24)
+
+/* CCSTAT register */
+#define EDMA_CCSTAT_ACTV BIT(4)
+
+/*
+ * Max of 20 segments per channel to conserve PaRAM slots
+ * Also note that MAX_NR_SG should be at least the no.of periods
+ * that are required for ASoC, otherwise DMA prep calls will
+ * fail. Today davinci-pcm is the only user of this driver and
+ * requires at least 17 slots, so we setup the default to 20.
+ */
+#define MAX_NR_SG 20
+#define EDMA_MAX_SLOTS MAX_NR_SG
+#define EDMA_DESCRIPTORS 16
+
+#define EDMA_CHANNEL_ANY -1 /* for edma_alloc_channel() */
+#define EDMA_SLOT_ANY -1 /* for edma_alloc_slot() */
+#define EDMA_CONT_PARAMS_ANY 1001
+#define EDMA_CONT_PARAMS_FIXED_EXACT 1002
+#define EDMA_CONT_PARAMS_FIXED_NOT_EXACT 1003
+
+/*
+ * 64bit array registers are split into two 32bit registers:
+ * reg0: channel/event 0-31
+ * reg1: channel/event 32-63
+ *
+ * bit 5 in the channel number tells the array index (0/1)
+ * bit 0-4 (0x1f) is the bit offset within the register
+ */
+#define EDMA_REG_ARRAY_INDEX(channel) ((channel) >> 5)
+#define EDMA_CHANNEL_BIT(channel) (BIT((channel) & 0x1f))
+
+/* PaRAM slots are laid out like this */
+struct edmacc_param {
+ u32 opt;
+ u32 src;
+ u32 a_b_cnt;
+ u32 dst;
+ u32 src_dst_bidx;
+ u32 link_bcntrld;
+ u32 src_dst_cidx;
+ u32 ccnt;
+} __packed;
+
+/* fields in edmacc_param.opt */
+#define SAM BIT(0)
+#define DAM BIT(1)
+#define SYNCDIM BIT(2)
+#define STATIC BIT(3)
+#define EDMA_FWID (0x07 << 8)
+#define TCCMODE BIT(11)
+#define EDMA_TCC(t) ((t) << 12)
+#define TCINTEN BIT(20)
+#define ITCINTEN BIT(21)
+#define TCCHEN BIT(22)
+#define ITCCHEN BIT(23)
+
+struct edma_pset {
+ u32 len;
+ dma_addr_t addr;
+ struct edmacc_param param;
+};
+
+struct edma_desc {
+ struct virt_dma_desc vdesc;
+ struct list_head node;
+ enum dma_transfer_direction direction;
+ int cyclic;
+ bool polled;
+ int absync;
+ int pset_nr;
+ struct edma_chan *echan;
+ int processed;
+
+ /*
+ * The following 4 elements are used for residue accounting.
+ *
+ * - processed_stat: the number of SG elements we have traversed
+ * so far to cover accounting. This is updated directly to processed
+ * during edma_callback and is always <= processed, because processed
+ * refers to the number of pending transfer (programmed to EDMA
+ * controller), where as processed_stat tracks number of transfers
+ * accounted for so far.
+ *
+ * - residue: The amount of bytes we have left to transfer for this desc
+ *
+ * - residue_stat: The residue in bytes of data we have covered
+ * so far for accounting. This is updated directly to residue
+ * during callbacks to keep it current.
+ *
+ * - sg_len: Tracks the length of the current intermediate transfer,
+ * this is required to update the residue during intermediate transfer
+ * completion callback.
+ */
+ int processed_stat;
+ u32 sg_len;
+ u32 residue;
+ u32 residue_stat;
+
+ struct edma_pset pset[];
+};
+
+struct edma_cc;
+
+struct edma_tc {
+ struct device_node *node;
+ u16 id;
+};
+
+struct edma_chan {
+ struct virt_dma_chan vchan;
+ struct list_head node;
+ struct edma_desc *edesc;
+ struct edma_cc *ecc;
+ struct edma_tc *tc;
+ int ch_num;
+ bool alloced;
+ bool hw_triggered;
+ int slot[EDMA_MAX_SLOTS];
+ int missed;
+ struct dma_slave_config cfg;
+};
+
+struct edma_cc {
+ struct device *dev;
+ struct edma_soc_info *info;
+ void __iomem *base;
+ int id;
+ bool legacy_mode;
+
+ /* eDMA3 resource information */
+ unsigned num_channels;
+ unsigned num_qchannels;
+ unsigned num_region;
+ unsigned num_slots;
+ unsigned num_tc;
+ bool chmap_exist;
+ enum dma_event_q default_queue;
+
+ unsigned int ccint;
+ unsigned int ccerrint;
+
+ /*
+ * The slot_inuse bit for each PaRAM slot is clear unless the slot is
+ * in use by Linux or if it is allocated to be used by DSP.
+ */
+ unsigned long *slot_inuse;
+
+ /*
+ * For tracking reserved channels used by DSP.
+ * If the bit is cleared, the channel is allocated to be used by DSP
+ * and Linux must not touch it.
+ */
+ unsigned long *channels_mask;
+
+ struct dma_device dma_slave;
+ struct dma_device *dma_memcpy;
+ struct edma_chan *slave_chans;
+ struct edma_tc *tc_list;
+ int dummy_slot;
+};
+
+/* dummy param set used to (re)initialize parameter RAM slots */
+static const struct edmacc_param dummy_paramset = {
+ .link_bcntrld = 0xffff,
+ .ccnt = 1,
+};
+
+#define EDMA_BINDING_LEGACY 0
+#define EDMA_BINDING_TPCC 1
+static const u32 edma_binding_type[] = {
+ [EDMA_BINDING_LEGACY] = EDMA_BINDING_LEGACY,
+ [EDMA_BINDING_TPCC] = EDMA_BINDING_TPCC,
+};
+
+static const struct of_device_id edma_of_ids[] = {
+ {
+ .compatible = "ti,edma3",
+ .data = &edma_binding_type[EDMA_BINDING_LEGACY],
+ },
+ {
+ .compatible = "ti,edma3-tpcc",
+ .data = &edma_binding_type[EDMA_BINDING_TPCC],
+ },
+ {}
+};
+MODULE_DEVICE_TABLE(of, edma_of_ids);
+
+static const struct of_device_id edma_tptc_of_ids[] = {
+ { .compatible = "ti,edma3-tptc", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, edma_tptc_of_ids);
+
+static inline unsigned int edma_read(struct edma_cc *ecc, int offset)
+{
+ return (unsigned int)__raw_readl(ecc->base + offset);
+}
+
+static inline void edma_write(struct edma_cc *ecc, int offset, int val)
+{
+ __raw_writel(val, ecc->base + offset);
+}
+
+static inline void edma_modify(struct edma_cc *ecc, int offset, unsigned and,
+ unsigned or)
+{
+ unsigned val = edma_read(ecc, offset);
+
+ val &= and;
+ val |= or;
+ edma_write(ecc, offset, val);
+}
+
+static inline void edma_and(struct edma_cc *ecc, int offset, unsigned and)
+{
+ unsigned val = edma_read(ecc, offset);
+
+ val &= and;
+ edma_write(ecc, offset, val);
+}
+
+static inline void edma_or(struct edma_cc *ecc, int offset, unsigned or)
+{
+ unsigned val = edma_read(ecc, offset);
+
+ val |= or;
+ edma_write(ecc, offset, val);
+}
+
+static inline unsigned int edma_read_array(struct edma_cc *ecc, int offset,
+ int i)
+{
+ return edma_read(ecc, offset + (i << 2));
+}
+
+static inline void edma_write_array(struct edma_cc *ecc, int offset, int i,
+ unsigned val)
+{
+ edma_write(ecc, offset + (i << 2), val);
+}
+
+static inline void edma_modify_array(struct edma_cc *ecc, int offset, int i,
+ unsigned and, unsigned or)
+{
+ edma_modify(ecc, offset + (i << 2), and, or);
+}
+
+static inline void edma_or_array2(struct edma_cc *ecc, int offset, int i, int j,
+ unsigned or)
+{
+ edma_or(ecc, offset + ((i * 2 + j) << 2), or);
+}
+
+static inline void edma_write_array2(struct edma_cc *ecc, int offset, int i,
+ int j, unsigned val)
+{
+ edma_write(ecc, offset + ((i * 2 + j) << 2), val);
+}
+
+static inline unsigned int edma_shadow0_read_array(struct edma_cc *ecc,
+ int offset, int i)
+{
+ return edma_read(ecc, EDMA_SHADOW0 + offset + (i << 2));
+}
+
+static inline void edma_shadow0_write(struct edma_cc *ecc, int offset,
+ unsigned val)
+{
+ edma_write(ecc, EDMA_SHADOW0 + offset, val);
+}
+
+static inline void edma_shadow0_write_array(struct edma_cc *ecc, int offset,
+ int i, unsigned val)
+{
+ edma_write(ecc, EDMA_SHADOW0 + offset + (i << 2), val);
+}
+
+static inline void edma_param_modify(struct edma_cc *ecc, int offset,
+ int param_no, unsigned and, unsigned or)
+{
+ edma_modify(ecc, EDMA_PARM + offset + (param_no << 5), and, or);
+}
+
+static void edma_assign_priority_to_queue(struct edma_cc *ecc, int queue_no,
+ int priority)
+{
+ int bit = queue_no * 4;
+
+ edma_modify(ecc, EDMA_QUEPRI, ~(0x7 << bit), ((priority & 0x7) << bit));
+}
+
+static void edma_set_chmap(struct edma_chan *echan, int slot)
+{
+ struct edma_cc *ecc = echan->ecc;
+ int channel = EDMA_CHAN_SLOT(echan->ch_num);
+
+ if (ecc->chmap_exist) {
+ slot = EDMA_CHAN_SLOT(slot);
+ edma_write_array(ecc, EDMA_DCHMAP, channel, (slot << 5));
+ }
+}
+
+static void edma_setup_interrupt(struct edma_chan *echan, bool enable)
+{
+ struct edma_cc *ecc = echan->ecc;
+ int channel = EDMA_CHAN_SLOT(echan->ch_num);
+ int idx = EDMA_REG_ARRAY_INDEX(channel);
+ int ch_bit = EDMA_CHANNEL_BIT(channel);
+
+ if (enable) {
+ edma_shadow0_write_array(ecc, SH_ICR, idx, ch_bit);
+ edma_shadow0_write_array(ecc, SH_IESR, idx, ch_bit);
+ } else {
+ edma_shadow0_write_array(ecc, SH_IECR, idx, ch_bit);
+ }
+}
+
+/*
+ * paRAM slot management functions
+ */
+static void edma_write_slot(struct edma_cc *ecc, unsigned slot,
+ const struct edmacc_param *param)
+{
+ slot = EDMA_CHAN_SLOT(slot);
+ if (slot >= ecc->num_slots)
+ return;
+ memcpy_toio(ecc->base + PARM_OFFSET(slot), param, PARM_SIZE);
+}
+
+static int edma_read_slot(struct edma_cc *ecc, unsigned slot,
+ struct edmacc_param *param)
+{
+ slot = EDMA_CHAN_SLOT(slot);
+ if (slot >= ecc->num_slots)
+ return -EINVAL;
+ memcpy_fromio(param, ecc->base + PARM_OFFSET(slot), PARM_SIZE);
+
+ return 0;
+}
+
+/**
+ * edma_alloc_slot - allocate DMA parameter RAM
+ * @ecc: pointer to edma_cc struct
+ * @slot: specific slot to allocate; negative for "any unused slot"
+ *
+ * This allocates a parameter RAM slot, initializing it to hold a
+ * dummy transfer. Slots allocated using this routine have not been
+ * mapped to a hardware DMA channel, and will normally be used by
+ * linking to them from a slot associated with a DMA channel.
+ *
+ * Normal use is to pass EDMA_SLOT_ANY as the @slot, but specific
+ * slots may be allocated on behalf of DSP firmware.
+ *
+ * Returns the number of the slot, else negative errno.
+ */
+static int edma_alloc_slot(struct edma_cc *ecc, int slot)
+{
+ if (slot >= 0) {
+ slot = EDMA_CHAN_SLOT(slot);
+ /* Requesting entry paRAM slot for a HW triggered channel. */
+ if (ecc->chmap_exist && slot < ecc->num_channels)
+ slot = EDMA_SLOT_ANY;
+ }
+
+ if (slot < 0) {
+ if (ecc->chmap_exist)
+ slot = 0;
+ else
+ slot = ecc->num_channels;
+ for (;;) {
+ slot = find_next_zero_bit(ecc->slot_inuse,
+ ecc->num_slots,
+ slot);
+ if (slot == ecc->num_slots)
+ return -ENOMEM;
+ if (!test_and_set_bit(slot, ecc->slot_inuse))
+ break;
+ }
+ } else if (slot >= ecc->num_slots) {
+ return -EINVAL;
+ } else if (test_and_set_bit(slot, ecc->slot_inuse)) {
+ return -EBUSY;
+ }
+
+ edma_write_slot(ecc, slot, &dummy_paramset);
+
+ return EDMA_CTLR_CHAN(ecc->id, slot);
+}
+
+static void edma_free_slot(struct edma_cc *ecc, unsigned slot)
+{
+ slot = EDMA_CHAN_SLOT(slot);
+ if (slot >= ecc->num_slots)
+ return;
+
+ edma_write_slot(ecc, slot, &dummy_paramset);
+ clear_bit(slot, ecc->slot_inuse);
+}
+
+/**
+ * edma_link - link one parameter RAM slot to another
+ * @ecc: pointer to edma_cc struct
+ * @from: parameter RAM slot originating the link
+ * @to: parameter RAM slot which is the link target
+ *
+ * The originating slot should not be part of any active DMA transfer.
+ */
+static void edma_link(struct edma_cc *ecc, unsigned from, unsigned to)
+{
+ if (unlikely(EDMA_CTLR(from) != EDMA_CTLR(to)))
+ dev_warn(ecc->dev, "Ignoring eDMA instance for linking\n");
+
+ from = EDMA_CHAN_SLOT(from);
+ to = EDMA_CHAN_SLOT(to);
+ if (from >= ecc->num_slots || to >= ecc->num_slots)
+ return;
+
+ edma_param_modify(ecc, PARM_LINK_BCNTRLD, from, 0xffff0000,
+ PARM_OFFSET(to));
+}
+
+/**
+ * edma_get_position - returns the current transfer point
+ * @ecc: pointer to edma_cc struct
+ * @slot: parameter RAM slot being examined
+ * @dst: true selects the dest position, false the source
+ *
+ * Returns the position of the current active slot
+ */
+static dma_addr_t edma_get_position(struct edma_cc *ecc, unsigned slot,
+ bool dst)
+{
+ u32 offs;
+
+ slot = EDMA_CHAN_SLOT(slot);
+ offs = PARM_OFFSET(slot);
+ offs += dst ? PARM_DST : PARM_SRC;
+
+ return edma_read(ecc, offs);
+}
+
+/*
+ * Channels with event associations will be triggered by their hardware
+ * events, and channels without such associations will be triggered by
+ * software. (At this writing there is no interface for using software
+ * triggers except with channels that don't support hardware triggers.)
+ */
+static void edma_start(struct edma_chan *echan)
+{
+ struct edma_cc *ecc = echan->ecc;
+ int channel = EDMA_CHAN_SLOT(echan->ch_num);
+ int idx = EDMA_REG_ARRAY_INDEX(channel);
+ int ch_bit = EDMA_CHANNEL_BIT(channel);
+
+ if (!echan->hw_triggered) {
+ /* EDMA channels without event association */
+ dev_dbg(ecc->dev, "ESR%d %08x\n", idx,
+ edma_shadow0_read_array(ecc, SH_ESR, idx));
+ edma_shadow0_write_array(ecc, SH_ESR, idx, ch_bit);
+ } else {
+ /* EDMA channel with event association */
+ dev_dbg(ecc->dev, "ER%d %08x\n", idx,
+ edma_shadow0_read_array(ecc, SH_ER, idx));
+ /* Clear any pending event or error */
+ edma_write_array(ecc, EDMA_ECR, idx, ch_bit);
+ edma_write_array(ecc, EDMA_EMCR, idx, ch_bit);
+ /* Clear any SER */
+ edma_shadow0_write_array(ecc, SH_SECR, idx, ch_bit);
+ edma_shadow0_write_array(ecc, SH_EESR, idx, ch_bit);
+ dev_dbg(ecc->dev, "EER%d %08x\n", idx,
+ edma_shadow0_read_array(ecc, SH_EER, idx));
+ }
+}
+
+static void edma_stop(struct edma_chan *echan)
+{
+ struct edma_cc *ecc = echan->ecc;
+ int channel = EDMA_CHAN_SLOT(echan->ch_num);
+ int idx = EDMA_REG_ARRAY_INDEX(channel);
+ int ch_bit = EDMA_CHANNEL_BIT(channel);
+
+ edma_shadow0_write_array(ecc, SH_EECR, idx, ch_bit);
+ edma_shadow0_write_array(ecc, SH_ECR, idx, ch_bit);
+ edma_shadow0_write_array(ecc, SH_SECR, idx, ch_bit);
+ edma_write_array(ecc, EDMA_EMCR, idx, ch_bit);
+
+ /* clear possibly pending completion interrupt */
+ edma_shadow0_write_array(ecc, SH_ICR, idx, ch_bit);
+
+ dev_dbg(ecc->dev, "EER%d %08x\n", idx,
+ edma_shadow0_read_array(ecc, SH_EER, idx));
+
+ /* REVISIT: consider guarding against inappropriate event
+ * chaining by overwriting with dummy_paramset.
+ */
+}
+
+/*
+ * Temporarily disable EDMA hardware events on the specified channel,
+ * preventing them from triggering new transfers
+ */
+static void edma_pause(struct edma_chan *echan)
+{
+ int channel = EDMA_CHAN_SLOT(echan->ch_num);
+
+ edma_shadow0_write_array(echan->ecc, SH_EECR,
+ EDMA_REG_ARRAY_INDEX(channel),
+ EDMA_CHANNEL_BIT(channel));
+}
+
+/* Re-enable EDMA hardware events on the specified channel. */
+static void edma_resume(struct edma_chan *echan)
+{
+ int channel = EDMA_CHAN_SLOT(echan->ch_num);
+
+ edma_shadow0_write_array(echan->ecc, SH_EESR,
+ EDMA_REG_ARRAY_INDEX(channel),
+ EDMA_CHANNEL_BIT(channel));
+}
+
+static void edma_trigger_channel(struct edma_chan *echan)
+{
+ struct edma_cc *ecc = echan->ecc;
+ int channel = EDMA_CHAN_SLOT(echan->ch_num);
+ int idx = EDMA_REG_ARRAY_INDEX(channel);
+ int ch_bit = EDMA_CHANNEL_BIT(channel);
+
+ edma_shadow0_write_array(ecc, SH_ESR, idx, ch_bit);
+
+ dev_dbg(ecc->dev, "ESR%d %08x\n", idx,
+ edma_shadow0_read_array(ecc, SH_ESR, idx));
+}
+
+static void edma_clean_channel(struct edma_chan *echan)
+{
+ struct edma_cc *ecc = echan->ecc;
+ int channel = EDMA_CHAN_SLOT(echan->ch_num);
+ int idx = EDMA_REG_ARRAY_INDEX(channel);
+ int ch_bit = EDMA_CHANNEL_BIT(channel);
+
+ dev_dbg(ecc->dev, "EMR%d %08x\n", idx,
+ edma_read_array(ecc, EDMA_EMR, idx));
+ edma_shadow0_write_array(ecc, SH_ECR, idx, ch_bit);
+ /* Clear the corresponding EMR bits */
+ edma_write_array(ecc, EDMA_EMCR, idx, ch_bit);
+ /* Clear any SER */
+ edma_shadow0_write_array(ecc, SH_SECR, idx, ch_bit);
+ edma_write(ecc, EDMA_CCERRCLR, BIT(16) | BIT(1) | BIT(0));
+}
+
+/* Move channel to a specific event queue */
+static void edma_assign_channel_eventq(struct edma_chan *echan,
+ enum dma_event_q eventq_no)
+{
+ struct edma_cc *ecc = echan->ecc;
+ int channel = EDMA_CHAN_SLOT(echan->ch_num);
+ int bit = (channel & 0x7) * 4;
+
+ /* default to low priority queue */
+ if (eventq_no == EVENTQ_DEFAULT)
+ eventq_no = ecc->default_queue;
+ if (eventq_no >= ecc->num_tc)
+ return;
+
+ eventq_no &= 7;
+ edma_modify_array(ecc, EDMA_DMAQNUM, (channel >> 3), ~(0x7 << bit),
+ eventq_no << bit);
+}
+
+static int edma_alloc_channel(struct edma_chan *echan,
+ enum dma_event_q eventq_no)
+{
+ struct edma_cc *ecc = echan->ecc;
+ int channel = EDMA_CHAN_SLOT(echan->ch_num);
+
+ if (!test_bit(echan->ch_num, ecc->channels_mask)) {
+ dev_err(ecc->dev, "Channel%d is reserved, can not be used!\n",
+ echan->ch_num);
+ return -EINVAL;
+ }
+
+ /* ensure access through shadow region 0 */
+ edma_or_array2(ecc, EDMA_DRAE, 0, EDMA_REG_ARRAY_INDEX(channel),
+ EDMA_CHANNEL_BIT(channel));
+
+ /* ensure no events are pending */
+ edma_stop(echan);
+
+ edma_setup_interrupt(echan, true);
+
+ edma_assign_channel_eventq(echan, eventq_no);
+
+ return 0;
+}
+
+static void edma_free_channel(struct edma_chan *echan)
+{
+ /* ensure no events are pending */
+ edma_stop(echan);
+ /* REVISIT should probably take out of shadow region 0 */
+ edma_setup_interrupt(echan, false);
+}
+
+static inline struct edma_chan *to_edma_chan(struct dma_chan *c)
+{
+ return container_of(c, struct edma_chan, vchan.chan);
+}
+
+static inline struct edma_desc *to_edma_desc(struct dma_async_tx_descriptor *tx)
+{
+ return container_of(tx, struct edma_desc, vdesc.tx);
+}
+
+static void edma_desc_free(struct virt_dma_desc *vdesc)
+{
+ kfree(container_of(vdesc, struct edma_desc, vdesc));
+}
+
+/* Dispatch a queued descriptor to the controller (caller holds lock) */
+static void edma_execute(struct edma_chan *echan)
+{
+ struct edma_cc *ecc = echan->ecc;
+ struct virt_dma_desc *vdesc;
+ struct edma_desc *edesc;
+ struct device *dev = echan->vchan.chan.device->dev;
+ int i, j, left, nslots;
+
+ if (!echan->edesc) {
+ /* Setup is needed for the first transfer */
+ vdesc = vchan_next_desc(&echan->vchan);
+ if (!vdesc)
+ return;
+ list_del(&vdesc->node);
+ echan->edesc = to_edma_desc(&vdesc->tx);
+ }
+
+ edesc = echan->edesc;
+
+ /* Find out how many left */
+ left = edesc->pset_nr - edesc->processed;
+ nslots = min(MAX_NR_SG, left);
+ edesc->sg_len = 0;
+
+ /* Write descriptor PaRAM set(s) */
+ for (i = 0; i < nslots; i++) {
+ j = i + edesc->processed;
+ edma_write_slot(ecc, echan->slot[i], &edesc->pset[j].param);
+ edesc->sg_len += edesc->pset[j].len;
+ dev_vdbg(dev,
+ "\n pset[%d]:\n"
+ " chnum\t%d\n"
+ " slot\t%d\n"
+ " opt\t%08x\n"
+ " src\t%08x\n"
+ " dst\t%08x\n"
+ " abcnt\t%08x\n"
+ " ccnt\t%08x\n"
+ " bidx\t%08x\n"
+ " cidx\t%08x\n"
+ " lkrld\t%08x\n",
+ j, echan->ch_num, echan->slot[i],
+ edesc->pset[j].param.opt,
+ edesc->pset[j].param.src,
+ edesc->pset[j].param.dst,
+ edesc->pset[j].param.a_b_cnt,
+ edesc->pset[j].param.ccnt,
+ edesc->pset[j].param.src_dst_bidx,
+ edesc->pset[j].param.src_dst_cidx,
+ edesc->pset[j].param.link_bcntrld);
+ /* Link to the previous slot if not the last set */
+ if (i != (nslots - 1))
+ edma_link(ecc, echan->slot[i], echan->slot[i + 1]);
+ }
+
+ edesc->processed += nslots;
+
+ /*
+ * If this is either the last set in a set of SG-list transactions
+ * then setup a link to the dummy slot, this results in all future
+ * events being absorbed and that's OK because we're done
+ */
+ if (edesc->processed == edesc->pset_nr) {
+ if (edesc->cyclic)
+ edma_link(ecc, echan->slot[nslots - 1], echan->slot[1]);
+ else
+ edma_link(ecc, echan->slot[nslots - 1],
+ echan->ecc->dummy_slot);
+ }
+
+ if (echan->missed) {
+ /*
+ * This happens due to setup times between intermediate
+ * transfers in long SG lists which have to be broken up into
+ * transfers of MAX_NR_SG
+ */
+ dev_dbg(dev, "missed event on channel %d\n", echan->ch_num);
+ edma_clean_channel(echan);
+ edma_stop(echan);
+ edma_start(echan);
+ edma_trigger_channel(echan);
+ echan->missed = 0;
+ } else if (edesc->processed <= MAX_NR_SG) {
+ dev_dbg(dev, "first transfer starting on channel %d\n",
+ echan->ch_num);
+ edma_start(echan);
+ } else {
+ dev_dbg(dev, "chan: %d: completed %d elements, resuming\n",
+ echan->ch_num, edesc->processed);
+ edma_resume(echan);
+ }
+}
+
+static int edma_terminate_all(struct dma_chan *chan)
+{
+ struct edma_chan *echan = to_edma_chan(chan);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&echan->vchan.lock, flags);
+
+ /*
+ * Stop DMA activity: we assume the callback will not be called
+ * after edma_dma() returns (even if it does, it will see
+ * echan->edesc is NULL and exit.)
+ */
+ if (echan->edesc) {
+ edma_stop(echan);
+ /* Move the cyclic channel back to default queue */
+ if (!echan->tc && echan->edesc->cyclic)
+ edma_assign_channel_eventq(echan, EVENTQ_DEFAULT);
+
+ vchan_terminate_vdesc(&echan->edesc->vdesc);
+ echan->edesc = NULL;
+ }
+
+ vchan_get_all_descriptors(&echan->vchan, &head);
+ spin_unlock_irqrestore(&echan->vchan.lock, flags);
+ vchan_dma_desc_free_list(&echan->vchan, &head);
+
+ return 0;
+}
+
+static void edma_synchronize(struct dma_chan *chan)
+{
+ struct edma_chan *echan = to_edma_chan(chan);
+
+ vchan_synchronize(&echan->vchan);
+}
+
+static int edma_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *cfg)
+{
+ struct edma_chan *echan = to_edma_chan(chan);
+
+ if (cfg->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES ||
+ cfg->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+ return -EINVAL;
+
+ if (cfg->src_maxburst > chan->device->max_burst ||
+ cfg->dst_maxburst > chan->device->max_burst)
+ return -EINVAL;
+
+ memcpy(&echan->cfg, cfg, sizeof(echan->cfg));
+
+ return 0;
+}
+
+static int edma_dma_pause(struct dma_chan *chan)
+{
+ struct edma_chan *echan = to_edma_chan(chan);
+
+ if (!echan->edesc)
+ return -EINVAL;
+
+ edma_pause(echan);
+ return 0;
+}
+
+static int edma_dma_resume(struct dma_chan *chan)
+{
+ struct edma_chan *echan = to_edma_chan(chan);
+
+ edma_resume(echan);
+ return 0;
+}
+
+/*
+ * A PaRAM set configuration abstraction used by other modes
+ * @chan: Channel who's PaRAM set we're configuring
+ * @pset: PaRAM set to initialize and setup.
+ * @src_addr: Source address of the DMA
+ * @dst_addr: Destination address of the DMA
+ * @burst: In units of dev_width, how much to send
+ * @dev_width: How much is the dev_width
+ * @dma_length: Total length of the DMA transfer
+ * @direction: Direction of the transfer
+ */
+static int edma_config_pset(struct dma_chan *chan, struct edma_pset *epset,
+ dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
+ unsigned int acnt, unsigned int dma_length,
+ enum dma_transfer_direction direction)
+{
+ struct edma_chan *echan = to_edma_chan(chan);
+ struct device *dev = chan->device->dev;
+ struct edmacc_param *param = &epset->param;
+ int bcnt, ccnt, cidx;
+ int src_bidx, dst_bidx, src_cidx, dst_cidx;
+ int absync;
+
+ /* src/dst_maxburst == 0 is the same case as src/dst_maxburst == 1 */
+ if (!burst)
+ burst = 1;
+ /*
+ * If the maxburst is equal to the fifo width, use
+ * A-synced transfers. This allows for large contiguous
+ * buffer transfers using only one PaRAM set.
+ */
+ if (burst == 1) {
+ /*
+ * For the A-sync case, bcnt and ccnt are the remainder
+ * and quotient respectively of the division of:
+ * (dma_length / acnt) by (SZ_64K -1). This is so
+ * that in case bcnt over flows, we have ccnt to use.
+ * Note: In A-sync transfer only, bcntrld is used, but it
+ * only applies for sg_dma_len(sg) >= SZ_64K.
+ * In this case, the best way adopted is- bccnt for the
+ * first frame will be the remainder below. Then for
+ * every successive frame, bcnt will be SZ_64K-1. This
+ * is assured as bcntrld = 0xffff in end of function.
+ */
+ absync = false;
+ ccnt = dma_length / acnt / (SZ_64K - 1);
+ bcnt = dma_length / acnt - ccnt * (SZ_64K - 1);
+ /*
+ * If bcnt is non-zero, we have a remainder and hence an
+ * extra frame to transfer, so increment ccnt.
+ */
+ if (bcnt)
+ ccnt++;
+ else
+ bcnt = SZ_64K - 1;
+ cidx = acnt;
+ } else {
+ /*
+ * If maxburst is greater than the fifo address_width,
+ * use AB-synced transfers where A count is the fifo
+ * address_width and B count is the maxburst. In this
+ * case, we are limited to transfers of C count frames
+ * of (address_width * maxburst) where C count is limited
+ * to SZ_64K-1. This places an upper bound on the length
+ * of an SG segment that can be handled.
+ */
+ absync = true;
+ bcnt = burst;
+ ccnt = dma_length / (acnt * bcnt);
+ if (ccnt > (SZ_64K - 1)) {
+ dev_err(dev, "Exceeded max SG segment size\n");
+ return -EINVAL;
+ }
+ cidx = acnt * bcnt;
+ }
+
+ epset->len = dma_length;
+
+ if (direction == DMA_MEM_TO_DEV) {
+ src_bidx = acnt;
+ src_cidx = cidx;
+ dst_bidx = 0;
+ dst_cidx = 0;
+ epset->addr = src_addr;
+ } else if (direction == DMA_DEV_TO_MEM) {
+ src_bidx = 0;
+ src_cidx = 0;
+ dst_bidx = acnt;
+ dst_cidx = cidx;
+ epset->addr = dst_addr;
+ } else if (direction == DMA_MEM_TO_MEM) {
+ src_bidx = acnt;
+ src_cidx = cidx;
+ dst_bidx = acnt;
+ dst_cidx = cidx;
+ epset->addr = src_addr;
+ } else {
+ dev_err(dev, "%s: direction not implemented yet\n", __func__);
+ return -EINVAL;
+ }
+
+ param->opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
+ /* Configure A or AB synchronized transfers */
+ if (absync)
+ param->opt |= SYNCDIM;
+
+ param->src = src_addr;
+ param->dst = dst_addr;
+
+ param->src_dst_bidx = (dst_bidx << 16) | src_bidx;
+ param->src_dst_cidx = (dst_cidx << 16) | src_cidx;
+
+ param->a_b_cnt = bcnt << 16 | acnt;
+ param->ccnt = ccnt;
+ /*
+ * Only time when (bcntrld) auto reload is required is for
+ * A-sync case, and in this case, a requirement of reload value
+ * of SZ_64K-1 only is assured. 'link' is initially set to NULL
+ * and then later will be populated by edma_execute.
+ */
+ param->link_bcntrld = 0xffffffff;
+ return absync;
+}
+
+static struct dma_async_tx_descriptor *edma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long tx_flags, void *context)
+{
+ struct edma_chan *echan = to_edma_chan(chan);
+ struct device *dev = chan->device->dev;
+ struct edma_desc *edesc;
+ dma_addr_t src_addr = 0, dst_addr = 0;
+ enum dma_slave_buswidth dev_width;
+ u32 burst;
+ struct scatterlist *sg;
+ int i, nslots, ret;
+
+ if (unlikely(!echan || !sgl || !sg_len))
+ return NULL;
+
+ if (direction == DMA_DEV_TO_MEM) {
+ src_addr = echan->cfg.src_addr;
+ dev_width = echan->cfg.src_addr_width;
+ burst = echan->cfg.src_maxburst;
+ } else if (direction == DMA_MEM_TO_DEV) {
+ dst_addr = echan->cfg.dst_addr;
+ dev_width = echan->cfg.dst_addr_width;
+ burst = echan->cfg.dst_maxburst;
+ } else {
+ dev_err(dev, "%s: bad direction: %d\n", __func__, direction);
+ return NULL;
+ }
+
+ if (dev_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) {
+ dev_err(dev, "%s: Undefined slave buswidth\n", __func__);
+ return NULL;
+ }
+
+ edesc = kzalloc(struct_size(edesc, pset, sg_len), GFP_ATOMIC);
+ if (!edesc)
+ return NULL;
+
+ edesc->pset_nr = sg_len;
+ edesc->residue = 0;
+ edesc->direction = direction;
+ edesc->echan = echan;
+
+ /* Allocate a PaRAM slot, if needed */
+ nslots = min_t(unsigned, MAX_NR_SG, sg_len);
+
+ for (i = 0; i < nslots; i++) {
+ if (echan->slot[i] < 0) {
+ echan->slot[i] =
+ edma_alloc_slot(echan->ecc, EDMA_SLOT_ANY);
+ if (echan->slot[i] < 0) {
+ kfree(edesc);
+ dev_err(dev, "%s: Failed to allocate slot\n",
+ __func__);
+ return NULL;
+ }
+ }
+ }
+
+ /* Configure PaRAM sets for each SG */
+ for_each_sg(sgl, sg, sg_len, i) {
+ /* Get address for each SG */
+ if (direction == DMA_DEV_TO_MEM)
+ dst_addr = sg_dma_address(sg);
+ else
+ src_addr = sg_dma_address(sg);
+
+ ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
+ dst_addr, burst, dev_width,
+ sg_dma_len(sg), direction);
+ if (ret < 0) {
+ kfree(edesc);
+ return NULL;
+ }
+
+ edesc->absync = ret;
+ edesc->residue += sg_dma_len(sg);
+
+ if (i == sg_len - 1)
+ /* Enable completion interrupt */
+ edesc->pset[i].param.opt |= TCINTEN;
+ else if (!((i+1) % MAX_NR_SG))
+ /*
+ * Enable early completion interrupt for the
+ * intermediateset. In this case the driver will be
+ * notified when the paRAM set is submitted to TC. This
+ * will allow more time to set up the next set of slots.
+ */
+ edesc->pset[i].param.opt |= (TCINTEN | TCCMODE);
+ }
+ edesc->residue_stat = edesc->residue;
+
+ return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
+}
+
+static struct dma_async_tx_descriptor *edma_prep_dma_memcpy(
+ struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long tx_flags)
+{
+ int ret, nslots;
+ struct edma_desc *edesc;
+ struct device *dev = chan->device->dev;
+ struct edma_chan *echan = to_edma_chan(chan);
+ unsigned int width, pset_len, array_size;
+
+ if (unlikely(!echan || !len))
+ return NULL;
+
+ /* Align the array size (acnt block) with the transfer properties */
+ switch (__ffs((src | dest | len))) {
+ case 0:
+ array_size = SZ_32K - 1;
+ break;
+ case 1:
+ array_size = SZ_32K - 2;
+ break;
+ default:
+ array_size = SZ_32K - 4;
+ break;
+ }
+
+ if (len < SZ_64K) {
+ /*
+ * Transfer size less than 64K can be handled with one paRAM
+ * slot and with one burst.
+ * ACNT = length
+ */
+ width = len;
+ pset_len = len;
+ nslots = 1;
+ } else {
+ /*
+ * Transfer size bigger than 64K will be handled with maximum of
+ * two paRAM slots.
+ * slot1: (full_length / 32767) times 32767 bytes bursts.
+ * ACNT = 32767, length1: (full_length / 32767) * 32767
+ * slot2: the remaining amount of data after slot1.
+ * ACNT = full_length - length1, length2 = ACNT
+ *
+ * When the full_length is a multiple of 32767 one slot can be
+ * used to complete the transfer.
+ */
+ width = array_size;
+ pset_len = rounddown(len, width);
+ /* One slot is enough for lengths multiple of (SZ_32K -1) */
+ if (unlikely(pset_len == len))
+ nslots = 1;
+ else
+ nslots = 2;
+ }
+
+ edesc = kzalloc(struct_size(edesc, pset, nslots), GFP_ATOMIC);
+ if (!edesc)
+ return NULL;
+
+ edesc->pset_nr = nslots;
+ edesc->residue = edesc->residue_stat = len;
+ edesc->direction = DMA_MEM_TO_MEM;
+ edesc->echan = echan;
+
+ ret = edma_config_pset(chan, &edesc->pset[0], src, dest, 1,
+ width, pset_len, DMA_MEM_TO_MEM);
+ if (ret < 0) {
+ kfree(edesc);
+ return NULL;
+ }
+
+ edesc->absync = ret;
+
+ edesc->pset[0].param.opt |= ITCCHEN;
+ if (nslots == 1) {
+ /* Enable transfer complete interrupt if requested */
+ if (tx_flags & DMA_PREP_INTERRUPT)
+ edesc->pset[0].param.opt |= TCINTEN;
+ } else {
+ /* Enable transfer complete chaining for the first slot */
+ edesc->pset[0].param.opt |= TCCHEN;
+
+ if (echan->slot[1] < 0) {
+ echan->slot[1] = edma_alloc_slot(echan->ecc,
+ EDMA_SLOT_ANY);
+ if (echan->slot[1] < 0) {
+ kfree(edesc);
+ dev_err(dev, "%s: Failed to allocate slot\n",
+ __func__);
+ return NULL;
+ }
+ }
+ dest += pset_len;
+ src += pset_len;
+ pset_len = width = len % array_size;
+
+ ret = edma_config_pset(chan, &edesc->pset[1], src, dest, 1,
+ width, pset_len, DMA_MEM_TO_MEM);
+ if (ret < 0) {
+ kfree(edesc);
+ return NULL;
+ }
+
+ edesc->pset[1].param.opt |= ITCCHEN;
+ /* Enable transfer complete interrupt if requested */
+ if (tx_flags & DMA_PREP_INTERRUPT)
+ edesc->pset[1].param.opt |= TCINTEN;
+ }
+
+ if (!(tx_flags & DMA_PREP_INTERRUPT))
+ edesc->polled = true;
+
+ return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
+}
+
+static struct dma_async_tx_descriptor *
+edma_prep_dma_interleaved(struct dma_chan *chan,
+ struct dma_interleaved_template *xt,
+ unsigned long tx_flags)
+{
+ struct device *dev = chan->device->dev;
+ struct edma_chan *echan = to_edma_chan(chan);
+ struct edmacc_param *param;
+ struct edma_desc *edesc;
+ size_t src_icg, dst_icg;
+ int src_bidx, dst_bidx;
+
+ /* Slave mode is not supported */
+ if (is_slave_direction(xt->dir))
+ return NULL;
+
+ if (xt->frame_size != 1 || xt->numf == 0)
+ return NULL;
+
+ if (xt->sgl[0].size > SZ_64K || xt->numf > SZ_64K)
+ return NULL;
+
+ src_icg = dmaengine_get_src_icg(xt, &xt->sgl[0]);
+ if (src_icg) {
+ src_bidx = src_icg + xt->sgl[0].size;
+ } else if (xt->src_inc) {
+ src_bidx = xt->sgl[0].size;
+ } else {
+ dev_err(dev, "%s: SRC constant addressing is not supported\n",
+ __func__);
+ return NULL;
+ }
+
+ dst_icg = dmaengine_get_dst_icg(xt, &xt->sgl[0]);
+ if (dst_icg) {
+ dst_bidx = dst_icg + xt->sgl[0].size;
+ } else if (xt->dst_inc) {
+ dst_bidx = xt->sgl[0].size;
+ } else {
+ dev_err(dev, "%s: DST constant addressing is not supported\n",
+ __func__);
+ return NULL;
+ }
+
+ if (src_bidx > SZ_64K || dst_bidx > SZ_64K)
+ return NULL;
+
+ edesc = kzalloc(struct_size(edesc, pset, 1), GFP_ATOMIC);
+ if (!edesc)
+ return NULL;
+
+ edesc->direction = DMA_MEM_TO_MEM;
+ edesc->echan = echan;
+ edesc->pset_nr = 1;
+
+ param = &edesc->pset[0].param;
+
+ param->src = xt->src_start;
+ param->dst = xt->dst_start;
+ param->a_b_cnt = xt->numf << 16 | xt->sgl[0].size;
+ param->ccnt = 1;
+ param->src_dst_bidx = (dst_bidx << 16) | src_bidx;
+ param->src_dst_cidx = 0;
+
+ param->opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
+ param->opt |= ITCCHEN;
+ /* Enable transfer complete interrupt if requested */
+ if (tx_flags & DMA_PREP_INTERRUPT)
+ param->opt |= TCINTEN;
+ else
+ edesc->polled = true;
+
+ return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
+}
+
+static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long tx_flags)
+{
+ struct edma_chan *echan = to_edma_chan(chan);
+ struct device *dev = chan->device->dev;
+ struct edma_desc *edesc;
+ dma_addr_t src_addr, dst_addr;
+ enum dma_slave_buswidth dev_width;
+ bool use_intermediate = false;
+ u32 burst;
+ int i, ret, nslots;
+
+ if (unlikely(!echan || !buf_len || !period_len))
+ return NULL;
+
+ if (direction == DMA_DEV_TO_MEM) {
+ src_addr = echan->cfg.src_addr;
+ dst_addr = buf_addr;
+ dev_width = echan->cfg.src_addr_width;
+ burst = echan->cfg.src_maxburst;
+ } else if (direction == DMA_MEM_TO_DEV) {
+ src_addr = buf_addr;
+ dst_addr = echan->cfg.dst_addr;
+ dev_width = echan->cfg.dst_addr_width;
+ burst = echan->cfg.dst_maxburst;
+ } else {
+ dev_err(dev, "%s: bad direction: %d\n", __func__, direction);
+ return NULL;
+ }
+
+ if (dev_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) {
+ dev_err(dev, "%s: Undefined slave buswidth\n", __func__);
+ return NULL;
+ }
+
+ if (unlikely(buf_len % period_len)) {
+ dev_err(dev, "Period should be multiple of Buffer length\n");
+ return NULL;
+ }
+
+ nslots = (buf_len / period_len) + 1;
+
+ /*
+ * Cyclic DMA users such as audio cannot tolerate delays introduced
+ * by cases where the number of periods is more than the maximum
+ * number of SGs the EDMA driver can handle at a time. For DMA types
+ * such as Slave SGs, such delays are tolerable and synchronized,
+ * but the synchronization is difficult to achieve with Cyclic and
+ * cannot be guaranteed, so we error out early.
+ */
+ if (nslots > MAX_NR_SG) {
+ /*
+ * If the burst and period sizes are the same, we can put
+ * the full buffer into a single period and activate
+ * intermediate interrupts. This will produce interrupts
+ * after each burst, which is also after each desired period.
+ */
+ if (burst == period_len) {
+ period_len = buf_len;
+ nslots = 2;
+ use_intermediate = true;
+ } else {
+ return NULL;
+ }
+ }
+
+ edesc = kzalloc(struct_size(edesc, pset, nslots), GFP_ATOMIC);
+ if (!edesc)
+ return NULL;
+
+ edesc->cyclic = 1;
+ edesc->pset_nr = nslots;
+ edesc->residue = edesc->residue_stat = buf_len;
+ edesc->direction = direction;
+ edesc->echan = echan;
+
+ dev_dbg(dev, "%s: channel=%d nslots=%d period_len=%zu buf_len=%zu\n",
+ __func__, echan->ch_num, nslots, period_len, buf_len);
+
+ for (i = 0; i < nslots; i++) {
+ /* Allocate a PaRAM slot, if needed */
+ if (echan->slot[i] < 0) {
+ echan->slot[i] =
+ edma_alloc_slot(echan->ecc, EDMA_SLOT_ANY);
+ if (echan->slot[i] < 0) {
+ kfree(edesc);
+ dev_err(dev, "%s: Failed to allocate slot\n",
+ __func__);
+ return NULL;
+ }
+ }
+
+ if (i == nslots - 1) {
+ memcpy(&edesc->pset[i], &edesc->pset[0],
+ sizeof(edesc->pset[0]));
+ break;
+ }
+
+ ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
+ dst_addr, burst, dev_width, period_len,
+ direction);
+ if (ret < 0) {
+ kfree(edesc);
+ return NULL;
+ }
+
+ if (direction == DMA_DEV_TO_MEM)
+ dst_addr += period_len;
+ else
+ src_addr += period_len;
+
+ dev_vdbg(dev, "%s: Configure period %d of buf:\n", __func__, i);
+ dev_vdbg(dev,
+ "\n pset[%d]:\n"
+ " chnum\t%d\n"
+ " slot\t%d\n"
+ " opt\t%08x\n"
+ " src\t%08x\n"
+ " dst\t%08x\n"
+ " abcnt\t%08x\n"
+ " ccnt\t%08x\n"
+ " bidx\t%08x\n"
+ " cidx\t%08x\n"
+ " lkrld\t%08x\n",
+ i, echan->ch_num, echan->slot[i],
+ edesc->pset[i].param.opt,
+ edesc->pset[i].param.src,
+ edesc->pset[i].param.dst,
+ edesc->pset[i].param.a_b_cnt,
+ edesc->pset[i].param.ccnt,
+ edesc->pset[i].param.src_dst_bidx,
+ edesc->pset[i].param.src_dst_cidx,
+ edesc->pset[i].param.link_bcntrld);
+
+ edesc->absync = ret;
+
+ /*
+ * Enable period interrupt only if it is requested
+ */
+ if (tx_flags & DMA_PREP_INTERRUPT) {
+ edesc->pset[i].param.opt |= TCINTEN;
+
+ /* Also enable intermediate interrupts if necessary */
+ if (use_intermediate)
+ edesc->pset[i].param.opt |= ITCINTEN;
+ }
+ }
+
+ /* Place the cyclic channel to highest priority queue */
+ if (!echan->tc)
+ edma_assign_channel_eventq(echan, EVENTQ_0);
+
+ return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
+}
+
+static void edma_completion_handler(struct edma_chan *echan)
+{
+ struct device *dev = echan->vchan.chan.device->dev;
+ struct edma_desc *edesc;
+
+ spin_lock(&echan->vchan.lock);
+ edesc = echan->edesc;
+ if (edesc) {
+ if (edesc->cyclic) {
+ vchan_cyclic_callback(&edesc->vdesc);
+ spin_unlock(&echan->vchan.lock);
+ return;
+ } else if (edesc->processed == edesc->pset_nr) {
+ edesc->residue = 0;
+ edma_stop(echan);
+ vchan_cookie_complete(&edesc->vdesc);
+ echan->edesc = NULL;
+
+ dev_dbg(dev, "Transfer completed on channel %d\n",
+ echan->ch_num);
+ } else {
+ dev_dbg(dev, "Sub transfer completed on channel %d\n",
+ echan->ch_num);
+
+ edma_pause(echan);
+
+ /* Update statistics for tx_status */
+ edesc->residue -= edesc->sg_len;
+ edesc->residue_stat = edesc->residue;
+ edesc->processed_stat = edesc->processed;
+ }
+ edma_execute(echan);
+ }
+
+ spin_unlock(&echan->vchan.lock);
+}
+
+/* eDMA interrupt handler */
+static irqreturn_t dma_irq_handler(int irq, void *data)
+{
+ struct edma_cc *ecc = data;
+ int ctlr;
+ u32 sh_ier;
+ u32 sh_ipr;
+ u32 bank;
+
+ ctlr = ecc->id;
+ if (ctlr < 0)
+ return IRQ_NONE;
+
+ dev_vdbg(ecc->dev, "dma_irq_handler\n");
+
+ sh_ipr = edma_shadow0_read_array(ecc, SH_IPR, 0);
+ if (!sh_ipr) {
+ sh_ipr = edma_shadow0_read_array(ecc, SH_IPR, 1);
+ if (!sh_ipr)
+ return IRQ_NONE;
+ sh_ier = edma_shadow0_read_array(ecc, SH_IER, 1);
+ bank = 1;
+ } else {
+ sh_ier = edma_shadow0_read_array(ecc, SH_IER, 0);
+ bank = 0;
+ }
+
+ do {
+ u32 slot;
+ u32 channel;
+
+ slot = __ffs(sh_ipr);
+ sh_ipr &= ~(BIT(slot));
+
+ if (sh_ier & BIT(slot)) {
+ channel = (bank << 5) | slot;
+ /* Clear the corresponding IPR bits */
+ edma_shadow0_write_array(ecc, SH_ICR, bank, BIT(slot));
+ edma_completion_handler(&ecc->slave_chans[channel]);
+ }
+ } while (sh_ipr);
+
+ edma_shadow0_write(ecc, SH_IEVAL, 1);
+ return IRQ_HANDLED;
+}
+
+static void edma_error_handler(struct edma_chan *echan)
+{
+ struct edma_cc *ecc = echan->ecc;
+ struct device *dev = echan->vchan.chan.device->dev;
+ struct edmacc_param p;
+ int err;
+
+ if (!echan->edesc)
+ return;
+
+ spin_lock(&echan->vchan.lock);
+
+ err = edma_read_slot(ecc, echan->slot[0], &p);
+
+ /*
+ * Issue later based on missed flag which will be sure
+ * to happen as:
+ * (1) we finished transmitting an intermediate slot and
+ * edma_execute is coming up.
+ * (2) or we finished current transfer and issue will
+ * call edma_execute.
+ *
+ * Important note: issuing can be dangerous here and
+ * lead to some nasty recursion when we are in a NULL
+ * slot. So we avoid doing so and set the missed flag.
+ */
+ if (err || (p.a_b_cnt == 0 && p.ccnt == 0)) {
+ dev_dbg(dev, "Error on null slot, setting miss\n");
+ echan->missed = 1;
+ } else {
+ /*
+ * The slot is already programmed but the event got
+ * missed, so its safe to issue it here.
+ */
+ dev_dbg(dev, "Missed event, TRIGGERING\n");
+ edma_clean_channel(echan);
+ edma_stop(echan);
+ edma_start(echan);
+ edma_trigger_channel(echan);
+ }
+ spin_unlock(&echan->vchan.lock);
+}
+
+static inline bool edma_error_pending(struct edma_cc *ecc)
+{
+ if (edma_read_array(ecc, EDMA_EMR, 0) ||
+ edma_read_array(ecc, EDMA_EMR, 1) ||
+ edma_read(ecc, EDMA_QEMR) || edma_read(ecc, EDMA_CCERR))
+ return true;
+
+ return false;
+}
+
+/* eDMA error interrupt handler */
+static irqreturn_t dma_ccerr_handler(int irq, void *data)
+{
+ struct edma_cc *ecc = data;
+ int i, j;
+ int ctlr;
+ unsigned int cnt = 0;
+ unsigned int val;
+
+ ctlr = ecc->id;
+ if (ctlr < 0)
+ return IRQ_NONE;
+
+ dev_vdbg(ecc->dev, "dma_ccerr_handler\n");
+
+ if (!edma_error_pending(ecc)) {
+ /*
+ * The registers indicate no pending error event but the irq
+ * handler has been called.
+ * Ask eDMA to re-evaluate the error registers.
+ */
+ dev_err(ecc->dev, "%s: Error interrupt without error event!\n",
+ __func__);
+ edma_write(ecc, EDMA_EEVAL, 1);
+ return IRQ_NONE;
+ }
+
+ while (1) {
+ /* Event missed register(s) */
+ for (j = 0; j < 2; j++) {
+ unsigned long emr;
+
+ val = edma_read_array(ecc, EDMA_EMR, j);
+ if (!val)
+ continue;
+
+ dev_dbg(ecc->dev, "EMR%d 0x%08x\n", j, val);
+ emr = val;
+ for_each_set_bit(i, &emr, 32) {
+ int k = (j << 5) + i;
+
+ /* Clear the corresponding EMR bits */
+ edma_write_array(ecc, EDMA_EMCR, j, BIT(i));
+ /* Clear any SER */
+ edma_shadow0_write_array(ecc, SH_SECR, j,
+ BIT(i));
+ edma_error_handler(&ecc->slave_chans[k]);
+ }
+ }
+
+ val = edma_read(ecc, EDMA_QEMR);
+ if (val) {
+ dev_dbg(ecc->dev, "QEMR 0x%02x\n", val);
+ /* Not reported, just clear the interrupt reason. */
+ edma_write(ecc, EDMA_QEMCR, val);
+ edma_shadow0_write(ecc, SH_QSECR, val);
+ }
+
+ val = edma_read(ecc, EDMA_CCERR);
+ if (val) {
+ dev_warn(ecc->dev, "CCERR 0x%08x\n", val);
+ /* Not reported, just clear the interrupt reason. */
+ edma_write(ecc, EDMA_CCERRCLR, val);
+ }
+
+ if (!edma_error_pending(ecc))
+ break;
+ cnt++;
+ if (cnt > 10)
+ break;
+ }
+ edma_write(ecc, EDMA_EEVAL, 1);
+ return IRQ_HANDLED;
+}
+
+/* Alloc channel resources */
+static int edma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct edma_chan *echan = to_edma_chan(chan);
+ struct edma_cc *ecc = echan->ecc;
+ struct device *dev = ecc->dev;
+ enum dma_event_q eventq_no = EVENTQ_DEFAULT;
+ int ret;
+
+ if (echan->tc) {
+ eventq_no = echan->tc->id;
+ } else if (ecc->tc_list) {
+ /* memcpy channel */
+ echan->tc = &ecc->tc_list[ecc->info->default_queue];
+ eventq_no = echan->tc->id;
+ }
+
+ ret = edma_alloc_channel(echan, eventq_no);
+ if (ret)
+ return ret;
+
+ echan->slot[0] = edma_alloc_slot(ecc, echan->ch_num);
+ if (echan->slot[0] < 0) {
+ dev_err(dev, "Entry slot allocation failed for channel %u\n",
+ EDMA_CHAN_SLOT(echan->ch_num));
+ ret = echan->slot[0];
+ goto err_slot;
+ }
+
+ /* Set up channel -> slot mapping for the entry slot */
+ edma_set_chmap(echan, echan->slot[0]);
+ echan->alloced = true;
+
+ dev_dbg(dev, "Got eDMA channel %d for virt channel %d (%s trigger)\n",
+ EDMA_CHAN_SLOT(echan->ch_num), chan->chan_id,
+ echan->hw_triggered ? "HW" : "SW");
+
+ return 0;
+
+err_slot:
+ edma_free_channel(echan);
+ return ret;
+}
+
+/* Free channel resources */
+static void edma_free_chan_resources(struct dma_chan *chan)
+{
+ struct edma_chan *echan = to_edma_chan(chan);
+ struct device *dev = echan->ecc->dev;
+ int i;
+
+ /* Terminate transfers */
+ edma_stop(echan);
+
+ vchan_free_chan_resources(&echan->vchan);
+
+ /* Free EDMA PaRAM slots */
+ for (i = 0; i < EDMA_MAX_SLOTS; i++) {
+ if (echan->slot[i] >= 0) {
+ edma_free_slot(echan->ecc, echan->slot[i]);
+ echan->slot[i] = -1;
+ }
+ }
+
+ /* Set entry slot to the dummy slot */
+ edma_set_chmap(echan, echan->ecc->dummy_slot);
+
+ /* Free EDMA channel */
+ if (echan->alloced) {
+ edma_free_channel(echan);
+ echan->alloced = false;
+ }
+
+ echan->tc = NULL;
+ echan->hw_triggered = false;
+
+ dev_dbg(dev, "Free eDMA channel %d for virt channel %d\n",
+ EDMA_CHAN_SLOT(echan->ch_num), chan->chan_id);
+}
+
+/* Send pending descriptor to hardware */
+static void edma_issue_pending(struct dma_chan *chan)
+{
+ struct edma_chan *echan = to_edma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&echan->vchan.lock, flags);
+ if (vchan_issue_pending(&echan->vchan) && !echan->edesc)
+ edma_execute(echan);
+ spin_unlock_irqrestore(&echan->vchan.lock, flags);
+}
+
+/*
+ * This limit exists to avoid a possible infinite loop when waiting for proof
+ * that a particular transfer is completed. This limit can be hit if there
+ * are large bursts to/from slow devices or the CPU is never able to catch
+ * the DMA hardware idle. On an AM335x transferring 48 bytes from the UART
+ * RX-FIFO, as many as 55 loops have been seen.
+ */
+#define EDMA_MAX_TR_WAIT_LOOPS 1000
+
+static u32 edma_residue(struct edma_desc *edesc)
+{
+ bool dst = edesc->direction == DMA_DEV_TO_MEM;
+ int loop_count = EDMA_MAX_TR_WAIT_LOOPS;
+ struct edma_chan *echan = edesc->echan;
+ struct edma_pset *pset = edesc->pset;
+ dma_addr_t done, pos, pos_old;
+ int channel = EDMA_CHAN_SLOT(echan->ch_num);
+ int idx = EDMA_REG_ARRAY_INDEX(channel);
+ int ch_bit = EDMA_CHANNEL_BIT(channel);
+ int event_reg;
+ int i;
+
+ /*
+ * We always read the dst/src position from the first RamPar
+ * pset. That's the one which is active now.
+ */
+ pos = edma_get_position(echan->ecc, echan->slot[0], dst);
+
+ /*
+ * "pos" may represent a transfer request that is still being
+ * processed by the EDMACC or EDMATC. We will busy wait until
+ * any one of the situations occurs:
+ * 1. while and event is pending for the channel
+ * 2. a position updated
+ * 3. we hit the loop limit
+ */
+ if (is_slave_direction(edesc->direction))
+ event_reg = SH_ER;
+ else
+ event_reg = SH_ESR;
+
+ pos_old = pos;
+ while (edma_shadow0_read_array(echan->ecc, event_reg, idx) & ch_bit) {
+ pos = edma_get_position(echan->ecc, echan->slot[0], dst);
+ if (pos != pos_old)
+ break;
+
+ if (!--loop_count) {
+ dev_dbg_ratelimited(echan->vchan.chan.device->dev,
+ "%s: timeout waiting for PaRAM update\n",
+ __func__);
+ break;
+ }
+
+ cpu_relax();
+ }
+
+ /*
+ * Cyclic is simple. Just subtract pset[0].addr from pos.
+ *
+ * We never update edesc->residue in the cyclic case, so we
+ * can tell the remaining room to the end of the circular
+ * buffer.
+ */
+ if (edesc->cyclic) {
+ done = pos - pset->addr;
+ edesc->residue_stat = edesc->residue - done;
+ return edesc->residue_stat;
+ }
+
+ /*
+ * If the position is 0, then EDMA loaded the closing dummy slot, the
+ * transfer is completed
+ */
+ if (!pos)
+ return 0;
+ /*
+ * For SG operation we catch up with the last processed
+ * status.
+ */
+ pset += edesc->processed_stat;
+
+ for (i = edesc->processed_stat; i < edesc->processed; i++, pset++) {
+ /*
+ * If we are inside this pset address range, we know
+ * this is the active one. Get the current delta and
+ * stop walking the psets.
+ */
+ if (pos >= pset->addr && pos < pset->addr + pset->len)
+ return edesc->residue_stat - (pos - pset->addr);
+
+ /* Otherwise mark it done and update residue_stat. */
+ edesc->processed_stat++;
+ edesc->residue_stat -= pset->len;
+ }
+ return edesc->residue_stat;
+}
+
+/* Check request completion status */
+static enum dma_status edma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct edma_chan *echan = to_edma_chan(chan);
+ struct dma_tx_state txstate_tmp;
+ enum dma_status ret;
+ unsigned long flags;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ /* Provide a dummy dma_tx_state for completion checking */
+ if (!txstate)
+ txstate = &txstate_tmp;
+
+ spin_lock_irqsave(&echan->vchan.lock, flags);
+ if (echan->edesc && echan->edesc->vdesc.tx.cookie == cookie) {
+ txstate->residue = edma_residue(echan->edesc);
+ } else {
+ struct virt_dma_desc *vdesc = vchan_find_desc(&echan->vchan,
+ cookie);
+
+ if (vdesc)
+ txstate->residue = to_edma_desc(&vdesc->tx)->residue;
+ else
+ txstate->residue = 0;
+ }
+
+ /*
+ * Mark the cookie completed if the residue is 0 for non cyclic
+ * transfers
+ */
+ if (ret != DMA_COMPLETE && !txstate->residue &&
+ echan->edesc && echan->edesc->polled &&
+ echan->edesc->vdesc.tx.cookie == cookie) {
+ edma_stop(echan);
+ vchan_cookie_complete(&echan->edesc->vdesc);
+ echan->edesc = NULL;
+ edma_execute(echan);
+ ret = DMA_COMPLETE;
+ }
+
+ spin_unlock_irqrestore(&echan->vchan.lock, flags);
+
+ return ret;
+}
+
+static bool edma_is_memcpy_channel(int ch_num, s32 *memcpy_channels)
+{
+ if (!memcpy_channels)
+ return false;
+ while (*memcpy_channels != -1) {
+ if (*memcpy_channels == ch_num)
+ return true;
+ memcpy_channels++;
+ }
+ return false;
+}
+
+#define EDMA_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode)
+{
+ struct dma_device *s_ddev = &ecc->dma_slave;
+ struct dma_device *m_ddev = NULL;
+ s32 *memcpy_channels = ecc->info->memcpy_channels;
+ int i, j;
+
+ dma_cap_zero(s_ddev->cap_mask);
+ dma_cap_set(DMA_SLAVE, s_ddev->cap_mask);
+ dma_cap_set(DMA_CYCLIC, s_ddev->cap_mask);
+ if (ecc->legacy_mode && !memcpy_channels) {
+ dev_warn(ecc->dev,
+ "Legacy memcpy is enabled, things might not work\n");
+
+ dma_cap_set(DMA_MEMCPY, s_ddev->cap_mask);
+ dma_cap_set(DMA_INTERLEAVE, s_ddev->cap_mask);
+ s_ddev->device_prep_dma_memcpy = edma_prep_dma_memcpy;
+ s_ddev->device_prep_interleaved_dma = edma_prep_dma_interleaved;
+ s_ddev->directions = BIT(DMA_MEM_TO_MEM);
+ }
+
+ s_ddev->device_prep_slave_sg = edma_prep_slave_sg;
+ s_ddev->device_prep_dma_cyclic = edma_prep_dma_cyclic;
+ s_ddev->device_alloc_chan_resources = edma_alloc_chan_resources;
+ s_ddev->device_free_chan_resources = edma_free_chan_resources;
+ s_ddev->device_issue_pending = edma_issue_pending;
+ s_ddev->device_tx_status = edma_tx_status;
+ s_ddev->device_config = edma_slave_config;
+ s_ddev->device_pause = edma_dma_pause;
+ s_ddev->device_resume = edma_dma_resume;
+ s_ddev->device_terminate_all = edma_terminate_all;
+ s_ddev->device_synchronize = edma_synchronize;
+
+ s_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS;
+ s_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
+ s_ddev->directions |= (BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV));
+ s_ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ s_ddev->max_burst = SZ_32K - 1; /* CIDX: 16bit signed */
+
+ s_ddev->dev = ecc->dev;
+ INIT_LIST_HEAD(&s_ddev->channels);
+
+ if (memcpy_channels) {
+ m_ddev = devm_kzalloc(ecc->dev, sizeof(*m_ddev), GFP_KERNEL);
+ if (!m_ddev) {
+ dev_warn(ecc->dev, "memcpy is disabled due to OoM\n");
+ memcpy_channels = NULL;
+ goto ch_setup;
+ }
+ ecc->dma_memcpy = m_ddev;
+
+ dma_cap_zero(m_ddev->cap_mask);
+ dma_cap_set(DMA_MEMCPY, m_ddev->cap_mask);
+ dma_cap_set(DMA_INTERLEAVE, m_ddev->cap_mask);
+
+ m_ddev->device_prep_dma_memcpy = edma_prep_dma_memcpy;
+ m_ddev->device_prep_interleaved_dma = edma_prep_dma_interleaved;
+ m_ddev->device_alloc_chan_resources = edma_alloc_chan_resources;
+ m_ddev->device_free_chan_resources = edma_free_chan_resources;
+ m_ddev->device_issue_pending = edma_issue_pending;
+ m_ddev->device_tx_status = edma_tx_status;
+ m_ddev->device_config = edma_slave_config;
+ m_ddev->device_pause = edma_dma_pause;
+ m_ddev->device_resume = edma_dma_resume;
+ m_ddev->device_terminate_all = edma_terminate_all;
+ m_ddev->device_synchronize = edma_synchronize;
+
+ m_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS;
+ m_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
+ m_ddev->directions = BIT(DMA_MEM_TO_MEM);
+ m_ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+ m_ddev->dev = ecc->dev;
+ INIT_LIST_HEAD(&m_ddev->channels);
+ } else if (!ecc->legacy_mode) {
+ dev_info(ecc->dev, "memcpy is disabled\n");
+ }
+
+ch_setup:
+ for (i = 0; i < ecc->num_channels; i++) {
+ struct edma_chan *echan = &ecc->slave_chans[i];
+ echan->ch_num = EDMA_CTLR_CHAN(ecc->id, i);
+ echan->ecc = ecc;
+ echan->vchan.desc_free = edma_desc_free;
+
+ if (m_ddev && edma_is_memcpy_channel(i, memcpy_channels))
+ vchan_init(&echan->vchan, m_ddev);
+ else
+ vchan_init(&echan->vchan, s_ddev);
+
+ INIT_LIST_HEAD(&echan->node);
+ for (j = 0; j < EDMA_MAX_SLOTS; j++)
+ echan->slot[j] = -1;
+ }
+}
+
+static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata,
+ struct edma_cc *ecc)
+{
+ int i;
+ u32 value, cccfg;
+ s8 (*queue_priority_map)[2];
+
+ /* Decode the eDMA3 configuration from CCCFG register */
+ cccfg = edma_read(ecc, EDMA_CCCFG);
+
+ value = GET_NUM_REGN(cccfg);
+ ecc->num_region = BIT(value);
+
+ value = GET_NUM_DMACH(cccfg);
+ ecc->num_channels = BIT(value + 1);
+
+ value = GET_NUM_QDMACH(cccfg);
+ ecc->num_qchannels = value * 2;
+
+ value = GET_NUM_PAENTRY(cccfg);
+ ecc->num_slots = BIT(value + 4);
+
+ value = GET_NUM_EVQUE(cccfg);
+ ecc->num_tc = value + 1;
+
+ ecc->chmap_exist = (cccfg & CHMAP_EXIST) ? true : false;
+
+ dev_dbg(dev, "eDMA3 CC HW configuration (cccfg: 0x%08x):\n", cccfg);
+ dev_dbg(dev, "num_region: %u\n", ecc->num_region);
+ dev_dbg(dev, "num_channels: %u\n", ecc->num_channels);
+ dev_dbg(dev, "num_qchannels: %u\n", ecc->num_qchannels);
+ dev_dbg(dev, "num_slots: %u\n", ecc->num_slots);
+ dev_dbg(dev, "num_tc: %u\n", ecc->num_tc);
+ dev_dbg(dev, "chmap_exist: %s\n", ecc->chmap_exist ? "yes" : "no");
+
+ /* Nothing need to be done if queue priority is provided */
+ if (pdata->queue_priority_mapping)
+ return 0;
+
+ /*
+ * Configure TC/queue priority as follows:
+ * Q0 - priority 0
+ * Q1 - priority 1
+ * Q2 - priority 2
+ * ...
+ * The meaning of priority numbers: 0 highest priority, 7 lowest
+ * priority. So Q0 is the highest priority queue and the last queue has
+ * the lowest priority.
+ */
+ queue_priority_map = devm_kcalloc(dev, ecc->num_tc + 1, sizeof(s8),
+ GFP_KERNEL);
+ if (!queue_priority_map)
+ return -ENOMEM;
+
+ for (i = 0; i < ecc->num_tc; i++) {
+ queue_priority_map[i][0] = i;
+ queue_priority_map[i][1] = i;
+ }
+ queue_priority_map[i][0] = -1;
+ queue_priority_map[i][1] = -1;
+
+ pdata->queue_priority_mapping = queue_priority_map;
+ /* Default queue has the lowest priority */
+ pdata->default_queue = i - 1;
+
+ return 0;
+}
+
+#if IS_ENABLED(CONFIG_OF)
+static int edma_xbar_event_map(struct device *dev, struct edma_soc_info *pdata,
+ size_t sz)
+{
+ const char pname[] = "ti,edma-xbar-event-map";
+ struct resource res;
+ void __iomem *xbar;
+ s16 (*xbar_chans)[2];
+ size_t nelm = sz / sizeof(s16);
+ u32 shift, offset, mux;
+ int ret, i;
+
+ xbar_chans = devm_kcalloc(dev, nelm + 2, sizeof(s16), GFP_KERNEL);
+ if (!xbar_chans)
+ return -ENOMEM;
+
+ ret = of_address_to_resource(dev->of_node, 1, &res);
+ if (ret)
+ return -ENOMEM;
+
+ xbar = devm_ioremap(dev, res.start, resource_size(&res));
+ if (!xbar)
+ return -ENOMEM;
+
+ ret = of_property_read_u16_array(dev->of_node, pname, (u16 *)xbar_chans,
+ nelm);
+ if (ret)
+ return -EIO;
+
+ /* Invalidate last entry for the other user of this mess */
+ nelm >>= 1;
+ xbar_chans[nelm][0] = -1;
+ xbar_chans[nelm][1] = -1;
+
+ for (i = 0; i < nelm; i++) {
+ shift = (xbar_chans[i][1] & 0x03) << 3;
+ offset = xbar_chans[i][1] & 0xfffffffc;
+ mux = readl(xbar + offset);
+ mux &= ~(0xff << shift);
+ mux |= xbar_chans[i][0] << shift;
+ writel(mux, (xbar + offset));
+ }
+
+ pdata->xbar_chans = (const s16 (*)[2]) xbar_chans;
+ return 0;
+}
+
+static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
+ bool legacy_mode)
+{
+ struct edma_soc_info *info;
+ struct property *prop;
+ int sz, ret;
+
+ info = devm_kzalloc(dev, sizeof(struct edma_soc_info), GFP_KERNEL);
+ if (!info)
+ return ERR_PTR(-ENOMEM);
+
+ if (legacy_mode) {
+ prop = of_find_property(dev->of_node, "ti,edma-xbar-event-map",
+ &sz);
+ if (prop) {
+ ret = edma_xbar_event_map(dev, info, sz);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+ return info;
+ }
+
+ /* Get the list of channels allocated to be used for memcpy */
+ prop = of_find_property(dev->of_node, "ti,edma-memcpy-channels", &sz);
+ if (prop) {
+ const char pname[] = "ti,edma-memcpy-channels";
+ size_t nelm = sz / sizeof(s32);
+ s32 *memcpy_ch;
+
+ memcpy_ch = devm_kcalloc(dev, nelm + 1, sizeof(s32),
+ GFP_KERNEL);
+ if (!memcpy_ch)
+ return ERR_PTR(-ENOMEM);
+
+ ret = of_property_read_u32_array(dev->of_node, pname,
+ (u32 *)memcpy_ch, nelm);
+ if (ret)
+ return ERR_PTR(ret);
+
+ memcpy_ch[nelm] = -1;
+ info->memcpy_channels = memcpy_ch;
+ }
+
+ prop = of_find_property(dev->of_node, "ti,edma-reserved-slot-ranges",
+ &sz);
+ if (prop) {
+ const char pname[] = "ti,edma-reserved-slot-ranges";
+ u32 (*tmp)[2];
+ s16 (*rsv_slots)[2];
+ size_t nelm = sz / sizeof(*tmp);
+ struct edma_rsv_info *rsv_info;
+ int i;
+
+ if (!nelm)
+ return info;
+
+ tmp = kcalloc(nelm, sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return ERR_PTR(-ENOMEM);
+
+ rsv_info = devm_kzalloc(dev, sizeof(*rsv_info), GFP_KERNEL);
+ if (!rsv_info) {
+ kfree(tmp);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ rsv_slots = devm_kcalloc(dev, nelm + 1, sizeof(*rsv_slots),
+ GFP_KERNEL);
+ if (!rsv_slots) {
+ kfree(tmp);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ret = of_property_read_u32_array(dev->of_node, pname,
+ (u32 *)tmp, nelm * 2);
+ if (ret) {
+ kfree(tmp);
+ return ERR_PTR(ret);
+ }
+
+ for (i = 0; i < nelm; i++) {
+ rsv_slots[i][0] = tmp[i][0];
+ rsv_slots[i][1] = tmp[i][1];
+ }
+ rsv_slots[nelm][0] = -1;
+ rsv_slots[nelm][1] = -1;
+
+ info->rsv = rsv_info;
+ info->rsv->rsv_slots = (const s16 (*)[2])rsv_slots;
+
+ kfree(tmp);
+ }
+
+ return info;
+}
+
+static struct dma_chan *of_edma_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct edma_cc *ecc = ofdma->of_dma_data;
+ struct dma_chan *chan = NULL;
+ struct edma_chan *echan;
+ int i;
+
+ if (!ecc || dma_spec->args_count < 1)
+ return NULL;
+
+ for (i = 0; i < ecc->num_channels; i++) {
+ echan = &ecc->slave_chans[i];
+ if (echan->ch_num == dma_spec->args[0]) {
+ chan = &echan->vchan.chan;
+ break;
+ }
+ }
+
+ if (!chan)
+ return NULL;
+
+ if (echan->ecc->legacy_mode && dma_spec->args_count == 1)
+ goto out;
+
+ if (!echan->ecc->legacy_mode && dma_spec->args_count == 2 &&
+ dma_spec->args[1] < echan->ecc->num_tc) {
+ echan->tc = &echan->ecc->tc_list[dma_spec->args[1]];
+ goto out;
+ }
+
+ return NULL;
+out:
+ /* The channel is going to be used as HW synchronized */
+ echan->hw_triggered = true;
+ return dma_get_slave_channel(chan);
+}
+#else
+static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
+ bool legacy_mode)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+static struct dma_chan *of_edma_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ return NULL;
+}
+#endif
+
+static bool edma_filter_fn(struct dma_chan *chan, void *param);
+
+static int edma_probe(struct platform_device *pdev)
+{
+ struct edma_soc_info *info = pdev->dev.platform_data;
+ s8 (*queue_priority_mapping)[2];
+ const s16 (*reserved)[2];
+ int i, irq;
+ char *irq_name;
+ struct resource *mem;
+ struct device_node *node = pdev->dev.of_node;
+ struct device *dev = &pdev->dev;
+ struct edma_cc *ecc;
+ bool legacy_mode = true;
+ int ret;
+
+ if (node) {
+ const struct of_device_id *match;
+
+ match = of_match_node(edma_of_ids, node);
+ if (match && (*(u32 *)match->data) == EDMA_BINDING_TPCC)
+ legacy_mode = false;
+
+ info = edma_setup_info_from_dt(dev, legacy_mode);
+ if (IS_ERR(info)) {
+ dev_err(dev, "failed to get DT data\n");
+ return PTR_ERR(info);
+ }
+ }
+
+ if (!info)
+ return -ENODEV;
+
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+ if (ret)
+ return ret;
+
+ ecc = devm_kzalloc(dev, sizeof(*ecc), GFP_KERNEL);
+ if (!ecc)
+ return -ENOMEM;
+
+ ecc->dev = dev;
+ ecc->id = pdev->id;
+ ecc->legacy_mode = legacy_mode;
+ /* When booting with DT the pdev->id is -1 */
+ if (ecc->id < 0)
+ ecc->id = 0;
+
+ mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "edma3_cc");
+ if (!mem) {
+ dev_dbg(dev, "mem resource not found, using index 0\n");
+ mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!mem) {
+ dev_err(dev, "no mem resource?\n");
+ return -ENODEV;
+ }
+ }
+ ecc->base = devm_ioremap_resource(dev, mem);
+ if (IS_ERR(ecc->base))
+ return PTR_ERR(ecc->base);
+
+ platform_set_drvdata(pdev, ecc);
+
+ pm_runtime_enable(dev);
+ ret = pm_runtime_get_sync(dev);
+ if (ret < 0) {
+ dev_err(dev, "pm_runtime_get_sync() failed\n");
+ pm_runtime_disable(dev);
+ return ret;
+ }
+
+ /* Get eDMA3 configuration from IP */
+ ret = edma_setup_from_hw(dev, info, ecc);
+ if (ret)
+ goto err_disable_pm;
+
+ /* Allocate memory based on the information we got from the IP */
+ ecc->slave_chans = devm_kcalloc(dev, ecc->num_channels,
+ sizeof(*ecc->slave_chans), GFP_KERNEL);
+
+ ecc->slot_inuse = devm_kcalloc(dev, BITS_TO_LONGS(ecc->num_slots),
+ sizeof(unsigned long), GFP_KERNEL);
+
+ ecc->channels_mask = devm_kcalloc(dev,
+ BITS_TO_LONGS(ecc->num_channels),
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!ecc->slave_chans || !ecc->slot_inuse || !ecc->channels_mask) {
+ ret = -ENOMEM;
+ goto err_disable_pm;
+ }
+
+ /* Mark all channels available initially */
+ bitmap_fill(ecc->channels_mask, ecc->num_channels);
+
+ ecc->default_queue = info->default_queue;
+
+ if (info->rsv) {
+ /* Set the reserved slots in inuse list */
+ reserved = info->rsv->rsv_slots;
+ if (reserved) {
+ for (i = 0; reserved[i][0] != -1; i++)
+ bitmap_set(ecc->slot_inuse, reserved[i][0],
+ reserved[i][1]);
+ }
+
+ /* Clear channels not usable for Linux */
+ reserved = info->rsv->rsv_chans;
+ if (reserved) {
+ for (i = 0; reserved[i][0] != -1; i++)
+ bitmap_clear(ecc->channels_mask, reserved[i][0],
+ reserved[i][1]);
+ }
+ }
+
+ for (i = 0; i < ecc->num_slots; i++) {
+ /* Reset only unused - not reserved - paRAM slots */
+ if (!test_bit(i, ecc->slot_inuse))
+ edma_write_slot(ecc, i, &dummy_paramset);
+ }
+
+ irq = platform_get_irq_byname(pdev, "edma3_ccint");
+ if (irq < 0 && node)
+ irq = irq_of_parse_and_map(node, 0);
+
+ if (irq > 0) {
+ irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint",
+ dev_name(dev));
+ ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name,
+ ecc);
+ if (ret) {
+ dev_err(dev, "CCINT (%d) failed --> %d\n", irq, ret);
+ goto err_disable_pm;
+ }
+ ecc->ccint = irq;
+ }
+
+ irq = platform_get_irq_byname(pdev, "edma3_ccerrint");
+ if (irq < 0 && node)
+ irq = irq_of_parse_and_map(node, 2);
+
+ if (irq > 0) {
+ irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint",
+ dev_name(dev));
+ ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name,
+ ecc);
+ if (ret) {
+ dev_err(dev, "CCERRINT (%d) failed --> %d\n", irq, ret);
+ goto err_disable_pm;
+ }
+ ecc->ccerrint = irq;
+ }
+
+ ecc->dummy_slot = edma_alloc_slot(ecc, EDMA_SLOT_ANY);
+ if (ecc->dummy_slot < 0) {
+ dev_err(dev, "Can't allocate PaRAM dummy slot\n");
+ ret = ecc->dummy_slot;
+ goto err_disable_pm;
+ }
+
+ queue_priority_mapping = info->queue_priority_mapping;
+
+ if (!ecc->legacy_mode) {
+ int lowest_priority = 0;
+ unsigned int array_max;
+ struct of_phandle_args tc_args;
+
+ ecc->tc_list = devm_kcalloc(dev, ecc->num_tc,
+ sizeof(*ecc->tc_list), GFP_KERNEL);
+ if (!ecc->tc_list) {
+ ret = -ENOMEM;
+ goto err_reg1;
+ }
+
+ for (i = 0;; i++) {
+ ret = of_parse_phandle_with_fixed_args(node, "ti,tptcs",
+ 1, i, &tc_args);
+ if (ret || i == ecc->num_tc)
+ break;
+
+ ecc->tc_list[i].node = tc_args.np;
+ ecc->tc_list[i].id = i;
+ queue_priority_mapping[i][1] = tc_args.args[0];
+ if (queue_priority_mapping[i][1] > lowest_priority) {
+ lowest_priority = queue_priority_mapping[i][1];
+ info->default_queue = i;
+ }
+ }
+
+ /* See if we have optional dma-channel-mask array */
+ array_max = DIV_ROUND_UP(ecc->num_channels, BITS_PER_TYPE(u32));
+ ret = of_property_read_variable_u32_array(node,
+ "dma-channel-mask",
+ (u32 *)ecc->channels_mask,
+ 1, array_max);
+ if (ret > 0 && ret != array_max)
+ dev_warn(dev, "dma-channel-mask is not complete.\n");
+ else if (ret == -EOVERFLOW || ret == -ENODATA)
+ dev_warn(dev,
+ "dma-channel-mask is out of range or empty\n");
+ }
+
+ /* Event queue priority mapping */
+ for (i = 0; queue_priority_mapping[i][0] != -1; i++)
+ edma_assign_priority_to_queue(ecc, queue_priority_mapping[i][0],
+ queue_priority_mapping[i][1]);
+
+ edma_write_array2(ecc, EDMA_DRAE, 0, 0, 0x0);
+ edma_write_array2(ecc, EDMA_DRAE, 0, 1, 0x0);
+ edma_write_array(ecc, EDMA_QRAE, 0, 0x0);
+
+ ecc->info = info;
+
+ /* Init the dma device and channels */
+ edma_dma_init(ecc, legacy_mode);
+
+ for (i = 0; i < ecc->num_channels; i++) {
+ /* Do not touch reserved channels */
+ if (!test_bit(i, ecc->channels_mask))
+ continue;
+
+ /* Assign all channels to the default queue */
+ edma_assign_channel_eventq(&ecc->slave_chans[i],
+ info->default_queue);
+ /* Set entry slot to the dummy slot */
+ edma_set_chmap(&ecc->slave_chans[i], ecc->dummy_slot);
+ }
+
+ ecc->dma_slave.filter.map = info->slave_map;
+ ecc->dma_slave.filter.mapcnt = info->slavecnt;
+ ecc->dma_slave.filter.fn = edma_filter_fn;
+
+ ret = dma_async_device_register(&ecc->dma_slave);
+ if (ret) {
+ dev_err(dev, "slave ddev registration failed (%d)\n", ret);
+ goto err_reg1;
+ }
+
+ if (ecc->dma_memcpy) {
+ ret = dma_async_device_register(ecc->dma_memcpy);
+ if (ret) {
+ dev_err(dev, "memcpy ddev registration failed (%d)\n",
+ ret);
+ dma_async_device_unregister(&ecc->dma_slave);
+ goto err_reg1;
+ }
+ }
+
+ if (node)
+ of_dma_controller_register(node, of_edma_xlate, ecc);
+
+ dev_info(dev, "TI EDMA DMA engine driver\n");
+
+ return 0;
+
+err_reg1:
+ edma_free_slot(ecc, ecc->dummy_slot);
+err_disable_pm:
+ pm_runtime_put_sync(dev);
+ pm_runtime_disable(dev);
+ return ret;
+}
+
+static void edma_cleanupp_vchan(struct dma_device *dmadev)
+{
+ struct edma_chan *echan, *_echan;
+
+ list_for_each_entry_safe(echan, _echan,
+ &dmadev->channels, vchan.chan.device_node) {
+ list_del(&echan->vchan.chan.device_node);
+ tasklet_kill(&echan->vchan.task);
+ }
+}
+
+static int edma_remove(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct edma_cc *ecc = dev_get_drvdata(dev);
+
+ devm_free_irq(dev, ecc->ccint, ecc);
+ devm_free_irq(dev, ecc->ccerrint, ecc);
+
+ edma_cleanupp_vchan(&ecc->dma_slave);
+
+ if (dev->of_node)
+ of_dma_controller_free(dev->of_node);
+ dma_async_device_unregister(&ecc->dma_slave);
+ if (ecc->dma_memcpy)
+ dma_async_device_unregister(ecc->dma_memcpy);
+ edma_free_slot(ecc, ecc->dummy_slot);
+ pm_runtime_put_sync(dev);
+ pm_runtime_disable(dev);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int edma_pm_suspend(struct device *dev)
+{
+ struct edma_cc *ecc = dev_get_drvdata(dev);
+ struct edma_chan *echan = ecc->slave_chans;
+ int i;
+
+ for (i = 0; i < ecc->num_channels; i++) {
+ if (echan[i].alloced)
+ edma_setup_interrupt(&echan[i], false);
+ }
+
+ return 0;
+}
+
+static int edma_pm_resume(struct device *dev)
+{
+ struct edma_cc *ecc = dev_get_drvdata(dev);
+ struct edma_chan *echan = ecc->slave_chans;
+ int i;
+ s8 (*queue_priority_mapping)[2];
+
+ /* re initialize dummy slot to dummy param set */
+ edma_write_slot(ecc, ecc->dummy_slot, &dummy_paramset);
+
+ queue_priority_mapping = ecc->info->queue_priority_mapping;
+
+ /* Event queue priority mapping */
+ for (i = 0; queue_priority_mapping[i][0] != -1; i++)
+ edma_assign_priority_to_queue(ecc, queue_priority_mapping[i][0],
+ queue_priority_mapping[i][1]);
+
+ for (i = 0; i < ecc->num_channels; i++) {
+ if (echan[i].alloced) {
+ /* ensure access through shadow region 0 */
+ edma_or_array2(ecc, EDMA_DRAE, 0,
+ EDMA_REG_ARRAY_INDEX(i),
+ EDMA_CHANNEL_BIT(i));
+
+ edma_setup_interrupt(&echan[i], true);
+
+ /* Set up channel -> slot mapping for the entry slot */
+ edma_set_chmap(&echan[i], echan[i].slot[0]);
+ }
+ }
+
+ return 0;
+}
+#endif
+
+static const struct dev_pm_ops edma_pm_ops = {
+ SET_LATE_SYSTEM_SLEEP_PM_OPS(edma_pm_suspend, edma_pm_resume)
+};
+
+static struct platform_driver edma_driver = {
+ .probe = edma_probe,
+ .remove = edma_remove,
+ .driver = {
+ .name = "edma",
+ .pm = &edma_pm_ops,
+ .of_match_table = edma_of_ids,
+ },
+};
+
+static int edma_tptc_probe(struct platform_device *pdev)
+{
+ pm_runtime_enable(&pdev->dev);
+ return pm_runtime_get_sync(&pdev->dev);
+}
+
+static struct platform_driver edma_tptc_driver = {
+ .probe = edma_tptc_probe,
+ .driver = {
+ .name = "edma3-tptc",
+ .of_match_table = edma_tptc_of_ids,
+ },
+};
+
+static bool edma_filter_fn(struct dma_chan *chan, void *param)
+{
+ bool match = false;
+
+ if (chan->device->dev->driver == &edma_driver.driver) {
+ struct edma_chan *echan = to_edma_chan(chan);
+ unsigned ch_req = *(unsigned *)param;
+ if (ch_req == echan->ch_num) {
+ /* The channel is going to be used as HW synchronized */
+ echan->hw_triggered = true;
+ match = true;
+ }
+ }
+ return match;
+}
+
+static int edma_init(void)
+{
+ int ret;
+
+ ret = platform_driver_register(&edma_tptc_driver);
+ if (ret)
+ return ret;
+
+ return platform_driver_register(&edma_driver);
+}
+subsys_initcall(edma_init);
+
+static void __exit edma_exit(void)
+{
+ platform_driver_unregister(&edma_driver);
+ platform_driver_unregister(&edma_tptc_driver);
+}
+module_exit(edma_exit);
+
+MODULE_AUTHOR("Matt Porter <matt.porter@linaro.org>");
+MODULE_DESCRIPTION("TI EDMA DMA engine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/ti/k3-psil-am62.c b/drivers/dma/ti/k3-psil-am62.c
new file mode 100644
index 000000000..2b6fd6e37
--- /dev/null
+++ b/drivers/dma/ti/k3-psil-am62.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Texas Instruments Incorporated - https://www.ti.com
+ */
+
+#include <linux/kernel.h>
+
+#include "k3-psil-priv.h"
+
+#define PSIL_PDMA_XY_PKT(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ .mapped_channel_id = -1, \
+ .default_flow_id = -1, \
+ .pkt_mode = 1, \
+ }, \
+ }
+
+#define PSIL_ETHERNET(x, ch, flow_base, flow_cnt) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_NATIVE, \
+ .pkt_mode = 1, \
+ .needs_epib = 1, \
+ .psd_size = 16, \
+ .mapped_channel_id = ch, \
+ .flow_start = flow_base, \
+ .flow_num = flow_cnt, \
+ .default_flow_id = flow_base, \
+ }, \
+ }
+
+#define PSIL_SAUL(x, ch, flow_base, flow_cnt, default_flow, tx) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_NATIVE, \
+ .pkt_mode = 1, \
+ .needs_epib = 1, \
+ .psd_size = 64, \
+ .mapped_channel_id = ch, \
+ .flow_start = flow_base, \
+ .flow_num = flow_cnt, \
+ .default_flow_id = default_flow, \
+ .notdpkt = tx, \
+ }, \
+ }
+
+#define PSIL_PDMA_MCASP(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ .pdma_acc32 = 1, \
+ .pdma_burst = 1, \
+ }, \
+ }
+
+#define PSIL_CSI2RX(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_NATIVE, \
+ }, \
+ }
+
+/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
+static struct psil_ep am62_src_ep_map[] = {
+ /* SAUL */
+ PSIL_SAUL(0x7504, 20, 35, 8, 35, 0),
+ PSIL_SAUL(0x7505, 21, 35, 8, 36, 0),
+ PSIL_SAUL(0x7506, 22, 43, 8, 43, 0),
+ PSIL_SAUL(0x7507, 23, 43, 8, 44, 0),
+ /* PDMA_MAIN0 - SPI0-3 */
+ PSIL_PDMA_XY_PKT(0x4302),
+ PSIL_PDMA_XY_PKT(0x4303),
+ PSIL_PDMA_XY_PKT(0x4304),
+ PSIL_PDMA_XY_PKT(0x4305),
+ PSIL_PDMA_XY_PKT(0x4306),
+ PSIL_PDMA_XY_PKT(0x4307),
+ PSIL_PDMA_XY_PKT(0x4308),
+ PSIL_PDMA_XY_PKT(0x4309),
+ PSIL_PDMA_XY_PKT(0x430a),
+ PSIL_PDMA_XY_PKT(0x430b),
+ PSIL_PDMA_XY_PKT(0x430c),
+ PSIL_PDMA_XY_PKT(0x430d),
+ /* PDMA_MAIN1 - UART0-6 */
+ PSIL_PDMA_XY_PKT(0x4400),
+ PSIL_PDMA_XY_PKT(0x4401),
+ PSIL_PDMA_XY_PKT(0x4402),
+ PSIL_PDMA_XY_PKT(0x4403),
+ PSIL_PDMA_XY_PKT(0x4404),
+ PSIL_PDMA_XY_PKT(0x4405),
+ PSIL_PDMA_XY_PKT(0x4406),
+ /* PDMA_MAIN2 - MCASP0-2 */
+ PSIL_PDMA_MCASP(0x4500),
+ PSIL_PDMA_MCASP(0x4501),
+ PSIL_PDMA_MCASP(0x4502),
+ /* CPSW3G */
+ PSIL_ETHERNET(0x4600, 19, 19, 16),
+ /* CSI2RX */
+ PSIL_CSI2RX(0x4700),
+ PSIL_CSI2RX(0x4701),
+ PSIL_CSI2RX(0x4702),
+ PSIL_CSI2RX(0x4703),
+ PSIL_CSI2RX(0x4704),
+ PSIL_CSI2RX(0x4705),
+ PSIL_CSI2RX(0x4706),
+ PSIL_CSI2RX(0x4707),
+ PSIL_CSI2RX(0x4708),
+ PSIL_CSI2RX(0x4709),
+ PSIL_CSI2RX(0x470a),
+ PSIL_CSI2RX(0x470b),
+ PSIL_CSI2RX(0x470c),
+ PSIL_CSI2RX(0x470d),
+ PSIL_CSI2RX(0x470e),
+ PSIL_CSI2RX(0x470f),
+ PSIL_CSI2RX(0x4710),
+ PSIL_CSI2RX(0x4711),
+ PSIL_CSI2RX(0x4712),
+ PSIL_CSI2RX(0x4713),
+ PSIL_CSI2RX(0x4714),
+ PSIL_CSI2RX(0x4715),
+ PSIL_CSI2RX(0x4716),
+ PSIL_CSI2RX(0x4717),
+ PSIL_CSI2RX(0x4718),
+ PSIL_CSI2RX(0x4719),
+ PSIL_CSI2RX(0x471a),
+ PSIL_CSI2RX(0x471b),
+ PSIL_CSI2RX(0x471c),
+ PSIL_CSI2RX(0x471d),
+ PSIL_CSI2RX(0x471e),
+ PSIL_CSI2RX(0x471f),
+};
+
+/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */
+static struct psil_ep am62_dst_ep_map[] = {
+ /* SAUL */
+ PSIL_SAUL(0xf500, 27, 83, 8, 83, 1),
+ PSIL_SAUL(0xf501, 28, 91, 8, 91, 1),
+ /* PDMA_MAIN0 - SPI0-3 */
+ PSIL_PDMA_XY_PKT(0xc302),
+ PSIL_PDMA_XY_PKT(0xc303),
+ PSIL_PDMA_XY_PKT(0xc304),
+ PSIL_PDMA_XY_PKT(0xc305),
+ PSIL_PDMA_XY_PKT(0xc306),
+ PSIL_PDMA_XY_PKT(0xc307),
+ PSIL_PDMA_XY_PKT(0xc308),
+ PSIL_PDMA_XY_PKT(0xc309),
+ PSIL_PDMA_XY_PKT(0xc30a),
+ PSIL_PDMA_XY_PKT(0xc30b),
+ PSIL_PDMA_XY_PKT(0xc30c),
+ PSIL_PDMA_XY_PKT(0xc30d),
+ /* PDMA_MAIN1 - UART0-6 */
+ PSIL_PDMA_XY_PKT(0xc400),
+ PSIL_PDMA_XY_PKT(0xc401),
+ PSIL_PDMA_XY_PKT(0xc402),
+ PSIL_PDMA_XY_PKT(0xc403),
+ PSIL_PDMA_XY_PKT(0xc404),
+ PSIL_PDMA_XY_PKT(0xc405),
+ PSIL_PDMA_XY_PKT(0xc406),
+ /* PDMA_MAIN2 - MCASP0-2 */
+ PSIL_PDMA_MCASP(0xc500),
+ PSIL_PDMA_MCASP(0xc501),
+ PSIL_PDMA_MCASP(0xc502),
+ /* CPSW3G */
+ PSIL_ETHERNET(0xc600, 19, 19, 8),
+ PSIL_ETHERNET(0xc601, 20, 27, 8),
+ PSIL_ETHERNET(0xc602, 21, 35, 8),
+ PSIL_ETHERNET(0xc603, 22, 43, 8),
+ PSIL_ETHERNET(0xc604, 23, 51, 8),
+ PSIL_ETHERNET(0xc605, 24, 59, 8),
+ PSIL_ETHERNET(0xc606, 25, 67, 8),
+ PSIL_ETHERNET(0xc607, 26, 75, 8),
+};
+
+struct psil_ep_map am62_ep_map = {
+ .name = "am62",
+ .src = am62_src_ep_map,
+ .src_count = ARRAY_SIZE(am62_src_ep_map),
+ .dst = am62_dst_ep_map,
+ .dst_count = ARRAY_SIZE(am62_dst_ep_map),
+};
diff --git a/drivers/dma/ti/k3-psil-am64.c b/drivers/dma/ti/k3-psil-am64.c
new file mode 100644
index 000000000..9fdeaa11a
--- /dev/null
+++ b/drivers/dma/ti/k3-psil-am64.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Texas Instruments Incorporated - https://www.ti.com
+ * Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/kernel.h>
+
+#include "k3-psil-priv.h"
+
+#define PSIL_PDMA_XY_TR(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ .mapped_channel_id = -1, \
+ .default_flow_id = -1, \
+ }, \
+ }
+
+#define PSIL_PDMA_XY_PKT(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ .mapped_channel_id = -1, \
+ .default_flow_id = -1, \
+ .pkt_mode = 1, \
+ }, \
+ }
+
+#define PSIL_ETHERNET(x, ch, flow_base, flow_cnt) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_NATIVE, \
+ .pkt_mode = 1, \
+ .needs_epib = 1, \
+ .psd_size = 16, \
+ .mapped_channel_id = ch, \
+ .flow_start = flow_base, \
+ .flow_num = flow_cnt, \
+ .default_flow_id = flow_base, \
+ }, \
+ }
+
+#define PSIL_SAUL(x, ch, flow_base, flow_cnt, default_flow, tx) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_NATIVE, \
+ .pkt_mode = 1, \
+ .needs_epib = 1, \
+ .psd_size = 64, \
+ .mapped_channel_id = ch, \
+ .flow_start = flow_base, \
+ .flow_num = flow_cnt, \
+ .default_flow_id = default_flow, \
+ .notdpkt = tx, \
+ }, \
+ }
+
+/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
+static struct psil_ep am64_src_ep_map[] = {
+ /* SAUL */
+ PSIL_SAUL(0x4000, 17, 32, 8, 32, 0),
+ PSIL_SAUL(0x4001, 18, 32, 8, 33, 0),
+ PSIL_SAUL(0x4002, 19, 40, 8, 40, 0),
+ PSIL_SAUL(0x4003, 20, 40, 8, 41, 0),
+ /* ICSS_G0 */
+ PSIL_ETHERNET(0x4100, 21, 48, 16),
+ PSIL_ETHERNET(0x4101, 22, 64, 16),
+ PSIL_ETHERNET(0x4102, 23, 80, 16),
+ PSIL_ETHERNET(0x4103, 24, 96, 16),
+ /* ICSS_G1 */
+ PSIL_ETHERNET(0x4200, 25, 112, 16),
+ PSIL_ETHERNET(0x4201, 26, 128, 16),
+ PSIL_ETHERNET(0x4202, 27, 144, 16),
+ PSIL_ETHERNET(0x4203, 28, 160, 16),
+ /* PDMA_MAIN0 - SPI0-3 */
+ PSIL_PDMA_XY_PKT(0x4300),
+ PSIL_PDMA_XY_PKT(0x4301),
+ PSIL_PDMA_XY_PKT(0x4302),
+ PSIL_PDMA_XY_PKT(0x4303),
+ PSIL_PDMA_XY_PKT(0x4304),
+ PSIL_PDMA_XY_PKT(0x4305),
+ PSIL_PDMA_XY_PKT(0x4306),
+ PSIL_PDMA_XY_PKT(0x4307),
+ PSIL_PDMA_XY_PKT(0x4308),
+ PSIL_PDMA_XY_PKT(0x4309),
+ PSIL_PDMA_XY_PKT(0x430a),
+ PSIL_PDMA_XY_PKT(0x430b),
+ PSIL_PDMA_XY_PKT(0x430c),
+ PSIL_PDMA_XY_PKT(0x430d),
+ PSIL_PDMA_XY_PKT(0x430e),
+ PSIL_PDMA_XY_PKT(0x430f),
+ /* PDMA_MAIN0 - USART0-1 */
+ PSIL_PDMA_XY_PKT(0x4310),
+ PSIL_PDMA_XY_PKT(0x4311),
+ /* PDMA_MAIN1 - SPI4 */
+ PSIL_PDMA_XY_PKT(0x4400),
+ PSIL_PDMA_XY_PKT(0x4401),
+ PSIL_PDMA_XY_PKT(0x4402),
+ PSIL_PDMA_XY_PKT(0x4403),
+ /* PDMA_MAIN1 - USART2-6 */
+ PSIL_PDMA_XY_PKT(0x4404),
+ PSIL_PDMA_XY_PKT(0x4405),
+ PSIL_PDMA_XY_PKT(0x4406),
+ PSIL_PDMA_XY_PKT(0x4407),
+ PSIL_PDMA_XY_PKT(0x4408),
+ /* PDMA_MAIN1 - ADCs */
+ PSIL_PDMA_XY_TR(0x440f),
+ PSIL_PDMA_XY_TR(0x4410),
+ /* CPSW2 */
+ PSIL_ETHERNET(0x4500, 16, 16, 16),
+};
+
+/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */
+static struct psil_ep am64_dst_ep_map[] = {
+ /* SAUL */
+ PSIL_SAUL(0xc000, 24, 80, 8, 80, 1),
+ PSIL_SAUL(0xc001, 25, 88, 8, 88, 1),
+ /* ICSS_G0 */
+ PSIL_ETHERNET(0xc100, 26, 96, 1),
+ PSIL_ETHERNET(0xc101, 27, 97, 1),
+ PSIL_ETHERNET(0xc102, 28, 98, 1),
+ PSIL_ETHERNET(0xc103, 29, 99, 1),
+ PSIL_ETHERNET(0xc104, 30, 100, 1),
+ PSIL_ETHERNET(0xc105, 31, 101, 1),
+ PSIL_ETHERNET(0xc106, 32, 102, 1),
+ PSIL_ETHERNET(0xc107, 33, 103, 1),
+ /* ICSS_G1 */
+ PSIL_ETHERNET(0xc200, 34, 104, 1),
+ PSIL_ETHERNET(0xc201, 35, 105, 1),
+ PSIL_ETHERNET(0xc202, 36, 106, 1),
+ PSIL_ETHERNET(0xc203, 37, 107, 1),
+ PSIL_ETHERNET(0xc204, 38, 108, 1),
+ PSIL_ETHERNET(0xc205, 39, 109, 1),
+ PSIL_ETHERNET(0xc206, 40, 110, 1),
+ PSIL_ETHERNET(0xc207, 41, 111, 1),
+ /* CPSW2 */
+ PSIL_ETHERNET(0xc500, 16, 16, 8),
+ PSIL_ETHERNET(0xc501, 17, 24, 8),
+ PSIL_ETHERNET(0xc502, 18, 32, 8),
+ PSIL_ETHERNET(0xc503, 19, 40, 8),
+ PSIL_ETHERNET(0xc504, 20, 48, 8),
+ PSIL_ETHERNET(0xc505, 21, 56, 8),
+ PSIL_ETHERNET(0xc506, 22, 64, 8),
+ PSIL_ETHERNET(0xc507, 23, 72, 8),
+};
+
+struct psil_ep_map am64_ep_map = {
+ .name = "am64",
+ .src = am64_src_ep_map,
+ .src_count = ARRAY_SIZE(am64_src_ep_map),
+ .dst = am64_dst_ep_map,
+ .dst_count = ARRAY_SIZE(am64_dst_ep_map),
+};
diff --git a/drivers/dma/ti/k3-psil-am654.c b/drivers/dma/ti/k3-psil-am654.c
new file mode 100644
index 000000000..a896a1590
--- /dev/null
+++ b/drivers/dma/ti/k3-psil-am654.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ * Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/kernel.h>
+
+#include "k3-psil-priv.h"
+
+#define PSIL_PDMA_XY_TR(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ }, \
+ }
+
+#define PSIL_PDMA_XY_PKT(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ .pkt_mode = 1, \
+ }, \
+ }
+
+#define PSIL_ETHERNET(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_NATIVE, \
+ .pkt_mode = 1, \
+ .needs_epib = 1, \
+ .psd_size = 16, \
+ }, \
+ }
+
+#define PSIL_SA2UL(x, tx) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_NATIVE, \
+ .pkt_mode = 1, \
+ .needs_epib = 1, \
+ .psd_size = 64, \
+ .notdpkt = tx, \
+ }, \
+ }
+
+/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
+static struct psil_ep am654_src_ep_map[] = {
+ /* SA2UL */
+ PSIL_SA2UL(0x4000, 0),
+ PSIL_SA2UL(0x4001, 0),
+ PSIL_SA2UL(0x4002, 0),
+ PSIL_SA2UL(0x4003, 0),
+ /* PRU_ICSSG0 */
+ PSIL_ETHERNET(0x4100),
+ PSIL_ETHERNET(0x4101),
+ PSIL_ETHERNET(0x4102),
+ PSIL_ETHERNET(0x4103),
+ /* PRU_ICSSG1 */
+ PSIL_ETHERNET(0x4200),
+ PSIL_ETHERNET(0x4201),
+ PSIL_ETHERNET(0x4202),
+ PSIL_ETHERNET(0x4203),
+ /* PRU_ICSSG2 */
+ PSIL_ETHERNET(0x4300),
+ PSIL_ETHERNET(0x4301),
+ PSIL_ETHERNET(0x4302),
+ PSIL_ETHERNET(0x4303),
+ /* PDMA0 - McASPs */
+ PSIL_PDMA_XY_TR(0x4400),
+ PSIL_PDMA_XY_TR(0x4401),
+ PSIL_PDMA_XY_TR(0x4402),
+ /* PDMA1 - SPI0-4 */
+ PSIL_PDMA_XY_PKT(0x4500),
+ PSIL_PDMA_XY_PKT(0x4501),
+ PSIL_PDMA_XY_PKT(0x4502),
+ PSIL_PDMA_XY_PKT(0x4503),
+ PSIL_PDMA_XY_PKT(0x4504),
+ PSIL_PDMA_XY_PKT(0x4505),
+ PSIL_PDMA_XY_PKT(0x4506),
+ PSIL_PDMA_XY_PKT(0x4507),
+ PSIL_PDMA_XY_PKT(0x4508),
+ PSIL_PDMA_XY_PKT(0x4509),
+ PSIL_PDMA_XY_PKT(0x450a),
+ PSIL_PDMA_XY_PKT(0x450b),
+ PSIL_PDMA_XY_PKT(0x450c),
+ PSIL_PDMA_XY_PKT(0x450d),
+ PSIL_PDMA_XY_PKT(0x450e),
+ PSIL_PDMA_XY_PKT(0x450f),
+ PSIL_PDMA_XY_PKT(0x4510),
+ PSIL_PDMA_XY_PKT(0x4511),
+ PSIL_PDMA_XY_PKT(0x4512),
+ PSIL_PDMA_XY_PKT(0x4513),
+ /* PDMA1 - USART0-2 */
+ PSIL_PDMA_XY_PKT(0x4514),
+ PSIL_PDMA_XY_PKT(0x4515),
+ PSIL_PDMA_XY_PKT(0x4516),
+ /* CPSW0 */
+ PSIL_ETHERNET(0x7000),
+ /* MCU_PDMA0 - ADCs */
+ PSIL_PDMA_XY_TR(0x7100),
+ PSIL_PDMA_XY_TR(0x7101),
+ PSIL_PDMA_XY_TR(0x7102),
+ PSIL_PDMA_XY_TR(0x7103),
+ /* MCU_PDMA1 - MCU_SPI0-2 */
+ PSIL_PDMA_XY_PKT(0x7200),
+ PSIL_PDMA_XY_PKT(0x7201),
+ PSIL_PDMA_XY_PKT(0x7202),
+ PSIL_PDMA_XY_PKT(0x7203),
+ PSIL_PDMA_XY_PKT(0x7204),
+ PSIL_PDMA_XY_PKT(0x7205),
+ PSIL_PDMA_XY_PKT(0x7206),
+ PSIL_PDMA_XY_PKT(0x7207),
+ PSIL_PDMA_XY_PKT(0x7208),
+ PSIL_PDMA_XY_PKT(0x7209),
+ PSIL_PDMA_XY_PKT(0x720a),
+ PSIL_PDMA_XY_PKT(0x720b),
+ /* MCU_PDMA1 - MCU_USART0 */
+ PSIL_PDMA_XY_PKT(0x7212),
+};
+
+/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */
+static struct psil_ep am654_dst_ep_map[] = {
+ /* SA2UL */
+ PSIL_SA2UL(0xc000, 1),
+ PSIL_SA2UL(0xc001, 1),
+ /* PRU_ICSSG0 */
+ PSIL_ETHERNET(0xc100),
+ PSIL_ETHERNET(0xc101),
+ PSIL_ETHERNET(0xc102),
+ PSIL_ETHERNET(0xc103),
+ PSIL_ETHERNET(0xc104),
+ PSIL_ETHERNET(0xc105),
+ PSIL_ETHERNET(0xc106),
+ PSIL_ETHERNET(0xc107),
+ /* PRU_ICSSG1 */
+ PSIL_ETHERNET(0xc200),
+ PSIL_ETHERNET(0xc201),
+ PSIL_ETHERNET(0xc202),
+ PSIL_ETHERNET(0xc203),
+ PSIL_ETHERNET(0xc204),
+ PSIL_ETHERNET(0xc205),
+ PSIL_ETHERNET(0xc206),
+ PSIL_ETHERNET(0xc207),
+ /* PRU_ICSSG2 */
+ PSIL_ETHERNET(0xc300),
+ PSIL_ETHERNET(0xc301),
+ PSIL_ETHERNET(0xc302),
+ PSIL_ETHERNET(0xc303),
+ PSIL_ETHERNET(0xc304),
+ PSIL_ETHERNET(0xc305),
+ PSIL_ETHERNET(0xc306),
+ PSIL_ETHERNET(0xc307),
+ /* CPSW0 */
+ PSIL_ETHERNET(0xf000),
+ PSIL_ETHERNET(0xf001),
+ PSIL_ETHERNET(0xf002),
+ PSIL_ETHERNET(0xf003),
+ PSIL_ETHERNET(0xf004),
+ PSIL_ETHERNET(0xf005),
+ PSIL_ETHERNET(0xf006),
+ PSIL_ETHERNET(0xf007),
+};
+
+struct psil_ep_map am654_ep_map = {
+ .name = "am654",
+ .src = am654_src_ep_map,
+ .src_count = ARRAY_SIZE(am654_src_ep_map),
+ .dst = am654_dst_ep_map,
+ .dst_count = ARRAY_SIZE(am654_dst_ep_map),
+};
diff --git a/drivers/dma/ti/k3-psil-j7200.c b/drivers/dma/ti/k3-psil-j7200.c
new file mode 100644
index 000000000..e3feff869
--- /dev/null
+++ b/drivers/dma/ti/k3-psil-j7200.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ * Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/kernel.h>
+
+#include "k3-psil-priv.h"
+
+#define PSIL_PDMA_XY_TR(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ }, \
+ }
+
+#define PSIL_PDMA_XY_PKT(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ .pkt_mode = 1, \
+ }, \
+ }
+
+#define PSIL_PDMA_MCASP(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ .pdma_acc32 = 1, \
+ .pdma_burst = 1, \
+ }, \
+ }
+
+#define PSIL_ETHERNET(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_NATIVE, \
+ .pkt_mode = 1, \
+ .needs_epib = 1, \
+ .psd_size = 16, \
+ }, \
+ }
+
+#define PSIL_SA2UL(x, tx) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_NATIVE, \
+ .pkt_mode = 1, \
+ .needs_epib = 1, \
+ .psd_size = 64, \
+ .notdpkt = tx, \
+ }, \
+ }
+
+/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
+static struct psil_ep j7200_src_ep_map[] = {
+ /* PDMA_MCASP - McASP0-2 */
+ PSIL_PDMA_MCASP(0x4400),
+ PSIL_PDMA_MCASP(0x4401),
+ PSIL_PDMA_MCASP(0x4402),
+ /* PDMA_SPI_G0 - SPI0-3 */
+ PSIL_PDMA_XY_PKT(0x4600),
+ PSIL_PDMA_XY_PKT(0x4601),
+ PSIL_PDMA_XY_PKT(0x4602),
+ PSIL_PDMA_XY_PKT(0x4603),
+ PSIL_PDMA_XY_PKT(0x4604),
+ PSIL_PDMA_XY_PKT(0x4605),
+ PSIL_PDMA_XY_PKT(0x4606),
+ PSIL_PDMA_XY_PKT(0x4607),
+ PSIL_PDMA_XY_PKT(0x4608),
+ PSIL_PDMA_XY_PKT(0x4609),
+ PSIL_PDMA_XY_PKT(0x460a),
+ PSIL_PDMA_XY_PKT(0x460b),
+ PSIL_PDMA_XY_PKT(0x460c),
+ PSIL_PDMA_XY_PKT(0x460d),
+ PSIL_PDMA_XY_PKT(0x460e),
+ PSIL_PDMA_XY_PKT(0x460f),
+ /* PDMA_SPI_G1 - SPI4-7 */
+ PSIL_PDMA_XY_PKT(0x4610),
+ PSIL_PDMA_XY_PKT(0x4611),
+ PSIL_PDMA_XY_PKT(0x4612),
+ PSIL_PDMA_XY_PKT(0x4613),
+ PSIL_PDMA_XY_PKT(0x4614),
+ PSIL_PDMA_XY_PKT(0x4615),
+ PSIL_PDMA_XY_PKT(0x4616),
+ PSIL_PDMA_XY_PKT(0x4617),
+ PSIL_PDMA_XY_PKT(0x4618),
+ PSIL_PDMA_XY_PKT(0x4619),
+ PSIL_PDMA_XY_PKT(0x461a),
+ PSIL_PDMA_XY_PKT(0x461b),
+ PSIL_PDMA_XY_PKT(0x461c),
+ PSIL_PDMA_XY_PKT(0x461d),
+ PSIL_PDMA_XY_PKT(0x461e),
+ PSIL_PDMA_XY_PKT(0x461f),
+ /* PDMA_USART_G0 - UART0-1 */
+ PSIL_PDMA_XY_PKT(0x4700),
+ PSIL_PDMA_XY_PKT(0x4701),
+ /* PDMA_USART_G1 - UART2-3 */
+ PSIL_PDMA_XY_PKT(0x4702),
+ PSIL_PDMA_XY_PKT(0x4703),
+ /* PDMA_USART_G2 - UART4-9 */
+ PSIL_PDMA_XY_PKT(0x4704),
+ PSIL_PDMA_XY_PKT(0x4705),
+ PSIL_PDMA_XY_PKT(0x4706),
+ PSIL_PDMA_XY_PKT(0x4707),
+ PSIL_PDMA_XY_PKT(0x4708),
+ PSIL_PDMA_XY_PKT(0x4709),
+ /* CPSW5 */
+ PSIL_ETHERNET(0x4a00),
+ /* CPSW0 */
+ PSIL_ETHERNET(0x7000),
+ /* MCU_PDMA_MISC_G0 - SPI0 */
+ PSIL_PDMA_XY_PKT(0x7100),
+ PSIL_PDMA_XY_PKT(0x7101),
+ PSIL_PDMA_XY_PKT(0x7102),
+ PSIL_PDMA_XY_PKT(0x7103),
+ /* MCU_PDMA_MISC_G1 - SPI1-2 */
+ PSIL_PDMA_XY_PKT(0x7200),
+ PSIL_PDMA_XY_PKT(0x7201),
+ PSIL_PDMA_XY_PKT(0x7202),
+ PSIL_PDMA_XY_PKT(0x7203),
+ PSIL_PDMA_XY_PKT(0x7204),
+ PSIL_PDMA_XY_PKT(0x7205),
+ PSIL_PDMA_XY_PKT(0x7206),
+ PSIL_PDMA_XY_PKT(0x7207),
+ /* MCU_PDMA_MISC_G2 - UART0 */
+ PSIL_PDMA_XY_PKT(0x7300),
+ /* MCU_PDMA_ADC - ADC0-1 */
+ PSIL_PDMA_XY_TR(0x7400),
+ PSIL_PDMA_XY_TR(0x7401),
+ /* SA2UL */
+ PSIL_SA2UL(0x7500, 0),
+ PSIL_SA2UL(0x7501, 0),
+ PSIL_SA2UL(0x7502, 0),
+ PSIL_SA2UL(0x7503, 0),
+};
+
+/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */
+static struct psil_ep j7200_dst_ep_map[] = {
+ /* PDMA_MCASP - McASP0-2 */
+ PSIL_PDMA_MCASP(0xc400),
+ PSIL_PDMA_MCASP(0xc401),
+ PSIL_PDMA_MCASP(0xc402),
+ /* PDMA_SPI_G0 - SPI0-3 */
+ PSIL_PDMA_XY_PKT(0xc600),
+ PSIL_PDMA_XY_PKT(0xc601),
+ PSIL_PDMA_XY_PKT(0xc602),
+ PSIL_PDMA_XY_PKT(0xc603),
+ PSIL_PDMA_XY_PKT(0xc604),
+ PSIL_PDMA_XY_PKT(0xc605),
+ PSIL_PDMA_XY_PKT(0xc606),
+ PSIL_PDMA_XY_PKT(0xc607),
+ PSIL_PDMA_XY_PKT(0xc608),
+ PSIL_PDMA_XY_PKT(0xc609),
+ PSIL_PDMA_XY_PKT(0xc60a),
+ PSIL_PDMA_XY_PKT(0xc60b),
+ PSIL_PDMA_XY_PKT(0xc60c),
+ PSIL_PDMA_XY_PKT(0xc60d),
+ PSIL_PDMA_XY_PKT(0xc60e),
+ PSIL_PDMA_XY_PKT(0xc60f),
+ /* PDMA_SPI_G1 - SPI4-7 */
+ PSIL_PDMA_XY_PKT(0xc610),
+ PSIL_PDMA_XY_PKT(0xc611),
+ PSIL_PDMA_XY_PKT(0xc612),
+ PSIL_PDMA_XY_PKT(0xc613),
+ PSIL_PDMA_XY_PKT(0xc614),
+ PSIL_PDMA_XY_PKT(0xc615),
+ PSIL_PDMA_XY_PKT(0xc616),
+ PSIL_PDMA_XY_PKT(0xc617),
+ PSIL_PDMA_XY_PKT(0xc618),
+ PSIL_PDMA_XY_PKT(0xc619),
+ PSIL_PDMA_XY_PKT(0xc61a),
+ PSIL_PDMA_XY_PKT(0xc61b),
+ PSIL_PDMA_XY_PKT(0xc61c),
+ PSIL_PDMA_XY_PKT(0xc61d),
+ PSIL_PDMA_XY_PKT(0xc61e),
+ PSIL_PDMA_XY_PKT(0xc61f),
+ /* PDMA_USART_G0 - UART0-1 */
+ PSIL_PDMA_XY_PKT(0xc700),
+ PSIL_PDMA_XY_PKT(0xc701),
+ /* PDMA_USART_G1 - UART2-3 */
+ PSIL_PDMA_XY_PKT(0xc702),
+ PSIL_PDMA_XY_PKT(0xc703),
+ /* PDMA_USART_G2 - UART4-9 */
+ PSIL_PDMA_XY_PKT(0xc704),
+ PSIL_PDMA_XY_PKT(0xc705),
+ PSIL_PDMA_XY_PKT(0xc706),
+ PSIL_PDMA_XY_PKT(0xc707),
+ PSIL_PDMA_XY_PKT(0xc708),
+ PSIL_PDMA_XY_PKT(0xc709),
+ /* CPSW5 */
+ PSIL_ETHERNET(0xca00),
+ PSIL_ETHERNET(0xca01),
+ PSIL_ETHERNET(0xca02),
+ PSIL_ETHERNET(0xca03),
+ PSIL_ETHERNET(0xca04),
+ PSIL_ETHERNET(0xca05),
+ PSIL_ETHERNET(0xca06),
+ PSIL_ETHERNET(0xca07),
+ /* CPSW0 */
+ PSIL_ETHERNET(0xf000),
+ PSIL_ETHERNET(0xf001),
+ PSIL_ETHERNET(0xf002),
+ PSIL_ETHERNET(0xf003),
+ PSIL_ETHERNET(0xf004),
+ PSIL_ETHERNET(0xf005),
+ PSIL_ETHERNET(0xf006),
+ PSIL_ETHERNET(0xf007),
+ /* MCU_PDMA_MISC_G0 - SPI0 */
+ PSIL_PDMA_XY_PKT(0xf100),
+ PSIL_PDMA_XY_PKT(0xf101),
+ PSIL_PDMA_XY_PKT(0xf102),
+ PSIL_PDMA_XY_PKT(0xf103),
+ /* MCU_PDMA_MISC_G1 - SPI1-2 */
+ PSIL_PDMA_XY_PKT(0xf200),
+ PSIL_PDMA_XY_PKT(0xf201),
+ PSIL_PDMA_XY_PKT(0xf202),
+ PSIL_PDMA_XY_PKT(0xf203),
+ PSIL_PDMA_XY_PKT(0xf204),
+ PSIL_PDMA_XY_PKT(0xf205),
+ PSIL_PDMA_XY_PKT(0xf206),
+ PSIL_PDMA_XY_PKT(0xf207),
+ /* MCU_PDMA_MISC_G2 - UART0 */
+ PSIL_PDMA_XY_PKT(0xf300),
+ /* SA2UL */
+ PSIL_SA2UL(0xf500, 1),
+ PSIL_SA2UL(0xf501, 1),
+};
+
+struct psil_ep_map j7200_ep_map = {
+ .name = "j7200",
+ .src = j7200_src_ep_map,
+ .src_count = ARRAY_SIZE(j7200_src_ep_map),
+ .dst = j7200_dst_ep_map,
+ .dst_count = ARRAY_SIZE(j7200_dst_ep_map),
+};
diff --git a/drivers/dma/ti/k3-psil-j721e.c b/drivers/dma/ti/k3-psil-j721e.c
new file mode 100644
index 000000000..e7c83d668
--- /dev/null
+++ b/drivers/dma/ti/k3-psil-j721e.c
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ * Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/kernel.h>
+
+#include "k3-psil-priv.h"
+
+#define PSIL_PDMA_XY_TR(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ }, \
+ }
+
+#define PSIL_PDMA_XY_PKT(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ .pkt_mode = 1, \
+ }, \
+ }
+
+#define PSIL_PDMA_MCASP(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ .pdma_acc32 = 1, \
+ .pdma_burst = 1, \
+ }, \
+ }
+
+#define PSIL_ETHERNET(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_NATIVE, \
+ .pkt_mode = 1, \
+ .needs_epib = 1, \
+ .psd_size = 16, \
+ }, \
+ }
+
+#define PSIL_SA2UL(x, tx) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_NATIVE, \
+ .pkt_mode = 1, \
+ .needs_epib = 1, \
+ .psd_size = 64, \
+ .notdpkt = tx, \
+ }, \
+ }
+
+#define PSIL_CSI2RX(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_NATIVE, \
+ }, \
+ }
+
+/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
+static struct psil_ep j721e_src_ep_map[] = {
+ /* SA2UL */
+ PSIL_SA2UL(0x4000, 0),
+ PSIL_SA2UL(0x4001, 0),
+ PSIL_SA2UL(0x4002, 0),
+ PSIL_SA2UL(0x4003, 0),
+ /* PRU_ICSSG0 */
+ PSIL_ETHERNET(0x4100),
+ PSIL_ETHERNET(0x4101),
+ PSIL_ETHERNET(0x4102),
+ PSIL_ETHERNET(0x4103),
+ /* PRU_ICSSG1 */
+ PSIL_ETHERNET(0x4200),
+ PSIL_ETHERNET(0x4201),
+ PSIL_ETHERNET(0x4202),
+ PSIL_ETHERNET(0x4203),
+ /* PDMA6 (PSIL_PDMA_MCASP_G0) - McASP0-2 */
+ PSIL_PDMA_MCASP(0x4400),
+ PSIL_PDMA_MCASP(0x4401),
+ PSIL_PDMA_MCASP(0x4402),
+ /* PDMA7 (PSIL_PDMA_MCASP_G1) - McASP3-11 */
+ PSIL_PDMA_MCASP(0x4500),
+ PSIL_PDMA_MCASP(0x4501),
+ PSIL_PDMA_MCASP(0x4502),
+ PSIL_PDMA_MCASP(0x4503),
+ PSIL_PDMA_MCASP(0x4504),
+ PSIL_PDMA_MCASP(0x4505),
+ PSIL_PDMA_MCASP(0x4506),
+ PSIL_PDMA_MCASP(0x4507),
+ PSIL_PDMA_MCASP(0x4508),
+ /* PDMA8 (PDMA_MISC_G0) - SPI0-1 */
+ PSIL_PDMA_XY_PKT(0x4600),
+ PSIL_PDMA_XY_PKT(0x4601),
+ PSIL_PDMA_XY_PKT(0x4602),
+ PSIL_PDMA_XY_PKT(0x4603),
+ PSIL_PDMA_XY_PKT(0x4604),
+ PSIL_PDMA_XY_PKT(0x4605),
+ PSIL_PDMA_XY_PKT(0x4606),
+ PSIL_PDMA_XY_PKT(0x4607),
+ /* PDMA9 (PDMA_MISC_G1) - SPI2-3 */
+ PSIL_PDMA_XY_PKT(0x460c),
+ PSIL_PDMA_XY_PKT(0x460d),
+ PSIL_PDMA_XY_PKT(0x460e),
+ PSIL_PDMA_XY_PKT(0x460f),
+ PSIL_PDMA_XY_PKT(0x4610),
+ PSIL_PDMA_XY_PKT(0x4611),
+ PSIL_PDMA_XY_PKT(0x4612),
+ PSIL_PDMA_XY_PKT(0x4613),
+ /* PDMA10 (PDMA_MISC_G2) - SPI4-5 */
+ PSIL_PDMA_XY_PKT(0x4618),
+ PSIL_PDMA_XY_PKT(0x4619),
+ PSIL_PDMA_XY_PKT(0x461a),
+ PSIL_PDMA_XY_PKT(0x461b),
+ PSIL_PDMA_XY_PKT(0x461c),
+ PSIL_PDMA_XY_PKT(0x461d),
+ PSIL_PDMA_XY_PKT(0x461e),
+ PSIL_PDMA_XY_PKT(0x461f),
+ /* PDMA11 (PDMA_MISC_G3) */
+ PSIL_PDMA_XY_PKT(0x4624),
+ PSIL_PDMA_XY_PKT(0x4625),
+ PSIL_PDMA_XY_PKT(0x4626),
+ PSIL_PDMA_XY_PKT(0x4627),
+ PSIL_PDMA_XY_PKT(0x4628),
+ PSIL_PDMA_XY_PKT(0x4629),
+ PSIL_PDMA_XY_PKT(0x4630),
+ PSIL_PDMA_XY_PKT(0x463a),
+ /* PDMA13 (PDMA_USART_G0) - UART0-1 */
+ PSIL_PDMA_XY_PKT(0x4700),
+ PSIL_PDMA_XY_PKT(0x4701),
+ /* PDMA14 (PDMA_USART_G1) - UART2-3 */
+ PSIL_PDMA_XY_PKT(0x4702),
+ PSIL_PDMA_XY_PKT(0x4703),
+ /* PDMA15 (PDMA_USART_G2) - UART4-9 */
+ PSIL_PDMA_XY_PKT(0x4704),
+ PSIL_PDMA_XY_PKT(0x4705),
+ PSIL_PDMA_XY_PKT(0x4706),
+ PSIL_PDMA_XY_PKT(0x4707),
+ PSIL_PDMA_XY_PKT(0x4708),
+ PSIL_PDMA_XY_PKT(0x4709),
+ /* CSI2RX */
+ PSIL_CSI2RX(0x4940),
+ PSIL_CSI2RX(0x4941),
+ PSIL_CSI2RX(0x4942),
+ PSIL_CSI2RX(0x4943),
+ PSIL_CSI2RX(0x4944),
+ PSIL_CSI2RX(0x4945),
+ PSIL_CSI2RX(0x4946),
+ PSIL_CSI2RX(0x4947),
+ PSIL_CSI2RX(0x4948),
+ PSIL_CSI2RX(0x4949),
+ PSIL_CSI2RX(0x494a),
+ PSIL_CSI2RX(0x494b),
+ PSIL_CSI2RX(0x494c),
+ PSIL_CSI2RX(0x494d),
+ PSIL_CSI2RX(0x494e),
+ PSIL_CSI2RX(0x494f),
+ PSIL_CSI2RX(0x4950),
+ PSIL_CSI2RX(0x4951),
+ PSIL_CSI2RX(0x4952),
+ PSIL_CSI2RX(0x4953),
+ PSIL_CSI2RX(0x4954),
+ PSIL_CSI2RX(0x4955),
+ PSIL_CSI2RX(0x4956),
+ PSIL_CSI2RX(0x4957),
+ PSIL_CSI2RX(0x4958),
+ PSIL_CSI2RX(0x4959),
+ PSIL_CSI2RX(0x495a),
+ PSIL_CSI2RX(0x495b),
+ PSIL_CSI2RX(0x495c),
+ PSIL_CSI2RX(0x495d),
+ PSIL_CSI2RX(0x495e),
+ PSIL_CSI2RX(0x495f),
+ PSIL_CSI2RX(0x4960),
+ PSIL_CSI2RX(0x4961),
+ PSIL_CSI2RX(0x4962),
+ PSIL_CSI2RX(0x4963),
+ PSIL_CSI2RX(0x4964),
+ PSIL_CSI2RX(0x4965),
+ PSIL_CSI2RX(0x4966),
+ PSIL_CSI2RX(0x4967),
+ PSIL_CSI2RX(0x4968),
+ PSIL_CSI2RX(0x4969),
+ PSIL_CSI2RX(0x496a),
+ PSIL_CSI2RX(0x496b),
+ PSIL_CSI2RX(0x496c),
+ PSIL_CSI2RX(0x496d),
+ PSIL_CSI2RX(0x496e),
+ PSIL_CSI2RX(0x496f),
+ PSIL_CSI2RX(0x4970),
+ PSIL_CSI2RX(0x4971),
+ PSIL_CSI2RX(0x4972),
+ PSIL_CSI2RX(0x4973),
+ PSIL_CSI2RX(0x4974),
+ PSIL_CSI2RX(0x4975),
+ PSIL_CSI2RX(0x4976),
+ PSIL_CSI2RX(0x4977),
+ PSIL_CSI2RX(0x4978),
+ PSIL_CSI2RX(0x4979),
+ PSIL_CSI2RX(0x497a),
+ PSIL_CSI2RX(0x497b),
+ PSIL_CSI2RX(0x497c),
+ PSIL_CSI2RX(0x497d),
+ PSIL_CSI2RX(0x497e),
+ PSIL_CSI2RX(0x497f),
+ /* CPSW9 */
+ PSIL_ETHERNET(0x4a00),
+ /* CPSW0 */
+ PSIL_ETHERNET(0x7000),
+ /* MCU_PDMA0 (MCU_PDMA_MISC_G0) - SPI0 */
+ PSIL_PDMA_XY_PKT(0x7100),
+ PSIL_PDMA_XY_PKT(0x7101),
+ PSIL_PDMA_XY_PKT(0x7102),
+ PSIL_PDMA_XY_PKT(0x7103),
+ /* MCU_PDMA1 (MCU_PDMA_MISC_G1) - SPI1-2 */
+ PSIL_PDMA_XY_PKT(0x7200),
+ PSIL_PDMA_XY_PKT(0x7201),
+ PSIL_PDMA_XY_PKT(0x7202),
+ PSIL_PDMA_XY_PKT(0x7203),
+ PSIL_PDMA_XY_PKT(0x7204),
+ PSIL_PDMA_XY_PKT(0x7205),
+ PSIL_PDMA_XY_PKT(0x7206),
+ PSIL_PDMA_XY_PKT(0x7207),
+ /* MCU_PDMA2 (MCU_PDMA_MISC_G2) - UART0 */
+ PSIL_PDMA_XY_PKT(0x7300),
+ /* MCU_PDMA_ADC - ADC0-1 */
+ PSIL_PDMA_XY_TR(0x7400),
+ PSIL_PDMA_XY_TR(0x7401),
+ PSIL_PDMA_XY_TR(0x7402),
+ PSIL_PDMA_XY_TR(0x7403),
+ /* SA2UL */
+ PSIL_SA2UL(0x7500, 0),
+ PSIL_SA2UL(0x7501, 0),
+ PSIL_SA2UL(0x7502, 0),
+ PSIL_SA2UL(0x7503, 0),
+};
+
+/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */
+static struct psil_ep j721e_dst_ep_map[] = {
+ /* SA2UL */
+ PSIL_SA2UL(0xc000, 1),
+ PSIL_SA2UL(0xc001, 1),
+ /* PRU_ICSSG0 */
+ PSIL_ETHERNET(0xc100),
+ PSIL_ETHERNET(0xc101),
+ PSIL_ETHERNET(0xc102),
+ PSIL_ETHERNET(0xc103),
+ PSIL_ETHERNET(0xc104),
+ PSIL_ETHERNET(0xc105),
+ PSIL_ETHERNET(0xc106),
+ PSIL_ETHERNET(0xc107),
+ /* PRU_ICSSG1 */
+ PSIL_ETHERNET(0xc200),
+ PSIL_ETHERNET(0xc201),
+ PSIL_ETHERNET(0xc202),
+ PSIL_ETHERNET(0xc203),
+ PSIL_ETHERNET(0xc204),
+ PSIL_ETHERNET(0xc205),
+ PSIL_ETHERNET(0xc206),
+ PSIL_ETHERNET(0xc207),
+ /* PDMA6 (PSIL_PDMA_MCASP_G0) - McASP0-2 */
+ PSIL_PDMA_MCASP(0xc400),
+ PSIL_PDMA_MCASP(0xc401),
+ PSIL_PDMA_MCASP(0xc402),
+ /* PDMA7 (PSIL_PDMA_MCASP_G1) - McASP3-11 */
+ PSIL_PDMA_MCASP(0xc500),
+ PSIL_PDMA_MCASP(0xc501),
+ PSIL_PDMA_MCASP(0xc502),
+ PSIL_PDMA_MCASP(0xc503),
+ PSIL_PDMA_MCASP(0xc504),
+ PSIL_PDMA_MCASP(0xc505),
+ PSIL_PDMA_MCASP(0xc506),
+ PSIL_PDMA_MCASP(0xc507),
+ PSIL_PDMA_MCASP(0xc508),
+ /* PDMA8 (PDMA_MISC_G0) - SPI0-1 */
+ PSIL_PDMA_XY_PKT(0xc600),
+ PSIL_PDMA_XY_PKT(0xc601),
+ PSIL_PDMA_XY_PKT(0xc602),
+ PSIL_PDMA_XY_PKT(0xc603),
+ PSIL_PDMA_XY_PKT(0xc604),
+ PSIL_PDMA_XY_PKT(0xc605),
+ PSIL_PDMA_XY_PKT(0xc606),
+ PSIL_PDMA_XY_PKT(0xc607),
+ /* PDMA9 (PDMA_MISC_G1) - SPI2-3 */
+ PSIL_PDMA_XY_PKT(0xc60c),
+ PSIL_PDMA_XY_PKT(0xc60d),
+ PSIL_PDMA_XY_PKT(0xc60e),
+ PSIL_PDMA_XY_PKT(0xc60f),
+ PSIL_PDMA_XY_PKT(0xc610),
+ PSIL_PDMA_XY_PKT(0xc611),
+ PSIL_PDMA_XY_PKT(0xc612),
+ PSIL_PDMA_XY_PKT(0xc613),
+ /* PDMA10 (PDMA_MISC_G2) - SPI4-5 */
+ PSIL_PDMA_XY_PKT(0xc618),
+ PSIL_PDMA_XY_PKT(0xc619),
+ PSIL_PDMA_XY_PKT(0xc61a),
+ PSIL_PDMA_XY_PKT(0xc61b),
+ PSIL_PDMA_XY_PKT(0xc61c),
+ PSIL_PDMA_XY_PKT(0xc61d),
+ PSIL_PDMA_XY_PKT(0xc61e),
+ PSIL_PDMA_XY_PKT(0xc61f),
+ /* PDMA11 (PDMA_MISC_G3) */
+ PSIL_PDMA_XY_PKT(0xc624),
+ PSIL_PDMA_XY_PKT(0xc625),
+ PSIL_PDMA_XY_PKT(0xc626),
+ PSIL_PDMA_XY_PKT(0xc627),
+ PSIL_PDMA_XY_PKT(0xc628),
+ PSIL_PDMA_XY_PKT(0xc629),
+ PSIL_PDMA_XY_PKT(0xc630),
+ PSIL_PDMA_XY_PKT(0xc63a),
+ /* PDMA13 (PDMA_USART_G0) - UART0-1 */
+ PSIL_PDMA_XY_PKT(0xc700),
+ PSIL_PDMA_XY_PKT(0xc701),
+ /* PDMA14 (PDMA_USART_G1) - UART2-3 */
+ PSIL_PDMA_XY_PKT(0xc702),
+ PSIL_PDMA_XY_PKT(0xc703),
+ /* PDMA15 (PDMA_USART_G2) - UART4-9 */
+ PSIL_PDMA_XY_PKT(0xc704),
+ PSIL_PDMA_XY_PKT(0xc705),
+ PSIL_PDMA_XY_PKT(0xc706),
+ PSIL_PDMA_XY_PKT(0xc707),
+ PSIL_PDMA_XY_PKT(0xc708),
+ PSIL_PDMA_XY_PKT(0xc709),
+ /* CPSW9 */
+ PSIL_ETHERNET(0xca00),
+ PSIL_ETHERNET(0xca01),
+ PSIL_ETHERNET(0xca02),
+ PSIL_ETHERNET(0xca03),
+ PSIL_ETHERNET(0xca04),
+ PSIL_ETHERNET(0xca05),
+ PSIL_ETHERNET(0xca06),
+ PSIL_ETHERNET(0xca07),
+ /* CPSW0 */
+ PSIL_ETHERNET(0xf000),
+ PSIL_ETHERNET(0xf001),
+ PSIL_ETHERNET(0xf002),
+ PSIL_ETHERNET(0xf003),
+ PSIL_ETHERNET(0xf004),
+ PSIL_ETHERNET(0xf005),
+ PSIL_ETHERNET(0xf006),
+ PSIL_ETHERNET(0xf007),
+ /* MCU_PDMA0 (MCU_PDMA_MISC_G0) - SPI0 */
+ PSIL_PDMA_XY_PKT(0xf100),
+ PSIL_PDMA_XY_PKT(0xf101),
+ PSIL_PDMA_XY_PKT(0xf102),
+ PSIL_PDMA_XY_PKT(0xf103),
+ /* MCU_PDMA1 (MCU_PDMA_MISC_G1) - SPI1-2 */
+ PSIL_PDMA_XY_PKT(0xf200),
+ PSIL_PDMA_XY_PKT(0xf201),
+ PSIL_PDMA_XY_PKT(0xf202),
+ PSIL_PDMA_XY_PKT(0xf203),
+ PSIL_PDMA_XY_PKT(0xf204),
+ PSIL_PDMA_XY_PKT(0xf205),
+ PSIL_PDMA_XY_PKT(0xf206),
+ PSIL_PDMA_XY_PKT(0xf207),
+ /* MCU_PDMA2 (MCU_PDMA_MISC_G2) - UART0 */
+ PSIL_PDMA_XY_PKT(0xf300),
+ /* SA2UL */
+ PSIL_SA2UL(0xf500, 1),
+ PSIL_SA2UL(0xf501, 1),
+};
+
+struct psil_ep_map j721e_ep_map = {
+ .name = "j721e",
+ .src = j721e_src_ep_map,
+ .src_count = ARRAY_SIZE(j721e_src_ep_map),
+ .dst = j721e_dst_ep_map,
+ .dst_count = ARRAY_SIZE(j721e_dst_ep_map),
+};
diff --git a/drivers/dma/ti/k3-psil-j721s2.c b/drivers/dma/ti/k3-psil-j721s2.c
new file mode 100644
index 000000000..a488c2250
--- /dev/null
+++ b/drivers/dma/ti/k3-psil-j721s2.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Texas Instruments Incorporated - https://www.ti.com
+ */
+
+#include <linux/kernel.h>
+
+#include "k3-psil-priv.h"
+
+#define PSIL_PDMA_XY_TR(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ }, \
+ }
+
+#define PSIL_PDMA_XY_PKT(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ .pkt_mode = 1, \
+ }, \
+ }
+
+#define PSIL_PDMA_MCASP(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_PDMA_XY, \
+ .pdma_acc32 = 1, \
+ .pdma_burst = 1, \
+ }, \
+ }
+
+#define PSIL_ETHERNET(x) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_NATIVE, \
+ .pkt_mode = 1, \
+ .needs_epib = 1, \
+ .psd_size = 16, \
+ }, \
+ }
+
+#define PSIL_SA2UL(x, tx) \
+ { \
+ .thread_id = x, \
+ .ep_config = { \
+ .ep_type = PSIL_EP_NATIVE, \
+ .pkt_mode = 1, \
+ .needs_epib = 1, \
+ .psd_size = 64, \
+ .notdpkt = tx, \
+ }, \
+ }
+
+/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
+static struct psil_ep j721s2_src_ep_map[] = {
+ /* PDMA_MCASP - McASP0-4 */
+ PSIL_PDMA_MCASP(0x4400),
+ PSIL_PDMA_MCASP(0x4401),
+ PSIL_PDMA_MCASP(0x4402),
+ PSIL_PDMA_MCASP(0x4403),
+ PSIL_PDMA_MCASP(0x4404),
+ /* PDMA_SPI_G0 - SPI0-3 */
+ PSIL_PDMA_XY_PKT(0x4600),
+ PSIL_PDMA_XY_PKT(0x4601),
+ PSIL_PDMA_XY_PKT(0x4602),
+ PSIL_PDMA_XY_PKT(0x4603),
+ PSIL_PDMA_XY_PKT(0x4604),
+ PSIL_PDMA_XY_PKT(0x4605),
+ PSIL_PDMA_XY_PKT(0x4606),
+ PSIL_PDMA_XY_PKT(0x4607),
+ PSIL_PDMA_XY_PKT(0x4608),
+ PSIL_PDMA_XY_PKT(0x4609),
+ PSIL_PDMA_XY_PKT(0x460a),
+ PSIL_PDMA_XY_PKT(0x460b),
+ PSIL_PDMA_XY_PKT(0x460c),
+ PSIL_PDMA_XY_PKT(0x460d),
+ PSIL_PDMA_XY_PKT(0x460e),
+ PSIL_PDMA_XY_PKT(0x460f),
+ /* PDMA_SPI_G1 - SPI4-7 */
+ PSIL_PDMA_XY_PKT(0x4610),
+ PSIL_PDMA_XY_PKT(0x4611),
+ PSIL_PDMA_XY_PKT(0x4612),
+ PSIL_PDMA_XY_PKT(0x4613),
+ PSIL_PDMA_XY_PKT(0x4614),
+ PSIL_PDMA_XY_PKT(0x4615),
+ PSIL_PDMA_XY_PKT(0x4616),
+ PSIL_PDMA_XY_PKT(0x4617),
+ PSIL_PDMA_XY_PKT(0x4618),
+ PSIL_PDMA_XY_PKT(0x4619),
+ PSIL_PDMA_XY_PKT(0x461a),
+ PSIL_PDMA_XY_PKT(0x461b),
+ PSIL_PDMA_XY_PKT(0x461c),
+ PSIL_PDMA_XY_PKT(0x461d),
+ PSIL_PDMA_XY_PKT(0x461e),
+ PSIL_PDMA_XY_PKT(0x461f),
+ /* PDMA_USART_G0 - UART0-1 */
+ PSIL_PDMA_XY_PKT(0x4700),
+ PSIL_PDMA_XY_PKT(0x4701),
+ /* PDMA_USART_G1 - UART2-3 */
+ PSIL_PDMA_XY_PKT(0x4702),
+ PSIL_PDMA_XY_PKT(0x4703),
+ /* PDMA_USART_G2 - UART4-9 */
+ PSIL_PDMA_XY_PKT(0x4704),
+ PSIL_PDMA_XY_PKT(0x4705),
+ PSIL_PDMA_XY_PKT(0x4706),
+ PSIL_PDMA_XY_PKT(0x4707),
+ PSIL_PDMA_XY_PKT(0x4708),
+ PSIL_PDMA_XY_PKT(0x4709),
+ /* MAIN SA2UL */
+ PSIL_SA2UL(0x4a40, 0),
+ PSIL_SA2UL(0x4a41, 0),
+ PSIL_SA2UL(0x4a42, 0),
+ PSIL_SA2UL(0x4a43, 0),
+ /* CPSW0 */
+ PSIL_ETHERNET(0x7000),
+ /* MCU_PDMA0 (MCU_PDMA_MISC_G0) - SPI0 */
+ PSIL_PDMA_XY_PKT(0x7100),
+ PSIL_PDMA_XY_PKT(0x7101),
+ PSIL_PDMA_XY_PKT(0x7102),
+ PSIL_PDMA_XY_PKT(0x7103),
+ /* MCU_PDMA1 (MCU_PDMA_MISC_G1) - SPI1-2 */
+ PSIL_PDMA_XY_PKT(0x7200),
+ PSIL_PDMA_XY_PKT(0x7201),
+ PSIL_PDMA_XY_PKT(0x7202),
+ PSIL_PDMA_XY_PKT(0x7203),
+ PSIL_PDMA_XY_PKT(0x7204),
+ PSIL_PDMA_XY_PKT(0x7205),
+ PSIL_PDMA_XY_PKT(0x7206),
+ PSIL_PDMA_XY_PKT(0x7207),
+ /* MCU_PDMA2 (MCU_PDMA_MISC_G2) - UART0 */
+ PSIL_PDMA_XY_PKT(0x7300),
+ /* MCU_PDMA_ADC - ADC0-1 */
+ PSIL_PDMA_XY_TR(0x7400),
+ PSIL_PDMA_XY_TR(0x7401),
+ PSIL_PDMA_XY_TR(0x7402),
+ PSIL_PDMA_XY_TR(0x7403),
+ /* SA2UL */
+ PSIL_SA2UL(0x7500, 0),
+ PSIL_SA2UL(0x7501, 0),
+ PSIL_SA2UL(0x7502, 0),
+ PSIL_SA2UL(0x7503, 0),
+};
+
+/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */
+static struct psil_ep j721s2_dst_ep_map[] = {
+ /* MAIN SA2UL */
+ PSIL_SA2UL(0xca40, 1),
+ PSIL_SA2UL(0xca41, 1),
+ /* CPSW0 */
+ PSIL_ETHERNET(0xf000),
+ PSIL_ETHERNET(0xf001),
+ PSIL_ETHERNET(0xf002),
+ PSIL_ETHERNET(0xf003),
+ PSIL_ETHERNET(0xf004),
+ PSIL_ETHERNET(0xf005),
+ PSIL_ETHERNET(0xf006),
+ PSIL_ETHERNET(0xf007),
+ /* SA2UL */
+ PSIL_SA2UL(0xf500, 1),
+ PSIL_SA2UL(0xf501, 1),
+};
+
+struct psil_ep_map j721s2_ep_map = {
+ .name = "j721s2",
+ .src = j721s2_src_ep_map,
+ .src_count = ARRAY_SIZE(j721s2_src_ep_map),
+ .dst = j721s2_dst_ep_map,
+ .dst_count = ARRAY_SIZE(j721s2_dst_ep_map),
+};
diff --git a/drivers/dma/ti/k3-psil-priv.h b/drivers/dma/ti/k3-psil-priv.h
new file mode 100644
index 000000000..74fa9ec02
--- /dev/null
+++ b/drivers/dma/ti/k3-psil-priv.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ */
+
+#ifndef K3_PSIL_PRIV_H_
+#define K3_PSIL_PRIV_H_
+
+#include <linux/dma/k3-psil.h>
+
+struct psil_ep {
+ u32 thread_id;
+ struct psil_endpoint_config ep_config;
+};
+
+/**
+ * struct psil_ep_map - PSI-L thread ID configuration maps
+ * @name: Name of the map, set it to the name of the SoC
+ * @src: Array of source PSI-L thread configurations
+ * @src_count: Number of entries in the src array
+ * @dst: Array of destination PSI-L thread configurations
+ * @dst_count: Number of entries in the dst array
+ *
+ * In case of symmetric configuration for a matching src/dst thread (for example
+ * 0x4400 and 0xc400) only the src configuration can be present. If no dst
+ * configuration found the code will look for (dst_thread_id & ~0x8000) to find
+ * the symmetric match.
+ */
+struct psil_ep_map {
+ char *name;
+ struct psil_ep *src;
+ int src_count;
+ struct psil_ep *dst;
+ int dst_count;
+};
+
+struct psil_endpoint_config *psil_get_ep_config(u32 thread_id);
+
+/* SoC PSI-L endpoint maps */
+extern struct psil_ep_map am654_ep_map;
+extern struct psil_ep_map j721e_ep_map;
+extern struct psil_ep_map j7200_ep_map;
+extern struct psil_ep_map am64_ep_map;
+extern struct psil_ep_map j721s2_ep_map;
+extern struct psil_ep_map am62_ep_map;
+
+#endif /* K3_PSIL_PRIV_H_ */
diff --git a/drivers/dma/ti/k3-psil.c b/drivers/dma/ti/k3-psil.c
new file mode 100644
index 000000000..761a38409
--- /dev/null
+++ b/drivers/dma/ti/k3-psil.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ * Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/sys_soc.h>
+
+#include "k3-psil-priv.h"
+
+static DEFINE_MUTEX(ep_map_mutex);
+static const struct psil_ep_map *soc_ep_map;
+
+static const struct soc_device_attribute k3_soc_devices[] = {
+ { .family = "AM65X", .data = &am654_ep_map },
+ { .family = "J721E", .data = &j721e_ep_map },
+ { .family = "J7200", .data = &j7200_ep_map },
+ { .family = "AM64X", .data = &am64_ep_map },
+ { .family = "J721S2", .data = &j721s2_ep_map },
+ { .family = "AM62X", .data = &am62_ep_map },
+ { /* sentinel */ }
+};
+
+struct psil_endpoint_config *psil_get_ep_config(u32 thread_id)
+{
+ int i;
+
+ mutex_lock(&ep_map_mutex);
+ if (!soc_ep_map) {
+ const struct soc_device_attribute *soc;
+
+ soc = soc_device_match(k3_soc_devices);
+ if (soc) {
+ soc_ep_map = soc->data;
+ } else {
+ pr_err("PSIL: No compatible machine found for map\n");
+ mutex_unlock(&ep_map_mutex);
+ return ERR_PTR(-ENOTSUPP);
+ }
+ pr_debug("%s: Using map for %s\n", __func__, soc_ep_map->name);
+ }
+ mutex_unlock(&ep_map_mutex);
+
+ if (thread_id & K3_PSIL_DST_THREAD_ID_OFFSET && soc_ep_map->dst) {
+ /* check in destination thread map */
+ for (i = 0; i < soc_ep_map->dst_count; i++) {
+ if (soc_ep_map->dst[i].thread_id == thread_id)
+ return &soc_ep_map->dst[i].ep_config;
+ }
+ }
+
+ thread_id &= ~K3_PSIL_DST_THREAD_ID_OFFSET;
+ if (soc_ep_map->src) {
+ for (i = 0; i < soc_ep_map->src_count; i++) {
+ if (soc_ep_map->src[i].thread_id == thread_id)
+ return &soc_ep_map->src[i].ep_config;
+ }
+ }
+
+ return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(psil_get_ep_config);
+
+int psil_set_new_ep_config(struct device *dev, const char *name,
+ struct psil_endpoint_config *ep_config)
+{
+ struct psil_endpoint_config *dst_ep_config;
+ struct of_phandle_args dma_spec;
+ u32 thread_id;
+ int index;
+
+ if (!dev || !dev->of_node)
+ return -EINVAL;
+
+ index = of_property_match_string(dev->of_node, "dma-names", name);
+ if (index < 0)
+ return index;
+
+ if (of_parse_phandle_with_args(dev->of_node, "dmas", "#dma-cells",
+ index, &dma_spec))
+ return -ENOENT;
+
+ thread_id = dma_spec.args[0];
+
+ dst_ep_config = psil_get_ep_config(thread_id);
+ if (IS_ERR(dst_ep_config)) {
+ pr_err("PSIL: thread ID 0x%04x not defined in map\n",
+ thread_id);
+ of_node_put(dma_spec.np);
+ return PTR_ERR(dst_ep_config);
+ }
+
+ memcpy(dst_ep_config, ep_config, sizeof(*dst_ep_config));
+
+ of_node_put(dma_spec.np);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(psil_set_new_ep_config);
diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c
new file mode 100644
index 000000000..4f1aeb81e
--- /dev/null
+++ b/drivers/dma/ti/k3-udma-glue.c
@@ -0,0 +1,1439 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * K3 NAVSS DMA glue interface
+ *
+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ *
+ */
+
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/soc/ti/k3-ringacc.h>
+#include <linux/dma/ti-cppi5.h>
+#include <linux/dma/k3-udma-glue.h>
+
+#include "k3-udma.h"
+#include "k3-psil-priv.h"
+
+struct k3_udma_glue_common {
+ struct device *dev;
+ struct device chan_dev;
+ struct udma_dev *udmax;
+ const struct udma_tisci_rm *tisci_rm;
+ struct k3_ringacc *ringacc;
+ u32 src_thread;
+ u32 dst_thread;
+
+ u32 hdesc_size;
+ bool epib;
+ u32 psdata_size;
+ u32 swdata_size;
+ u32 atype_asel;
+ struct psil_endpoint_config *ep_config;
+};
+
+struct k3_udma_glue_tx_channel {
+ struct k3_udma_glue_common common;
+
+ struct udma_tchan *udma_tchanx;
+ int udma_tchan_id;
+
+ struct k3_ring *ringtx;
+ struct k3_ring *ringtxcq;
+
+ bool psil_paired;
+
+ int virq;
+
+ atomic_t free_pkts;
+ bool tx_pause_on_err;
+ bool tx_filt_einfo;
+ bool tx_filt_pswords;
+ bool tx_supr_tdpkt;
+
+ int udma_tflow_id;
+};
+
+struct k3_udma_glue_rx_flow {
+ struct udma_rflow *udma_rflow;
+ int udma_rflow_id;
+ struct k3_ring *ringrx;
+ struct k3_ring *ringrxfdq;
+
+ int virq;
+};
+
+struct k3_udma_glue_rx_channel {
+ struct k3_udma_glue_common common;
+
+ struct udma_rchan *udma_rchanx;
+ int udma_rchan_id;
+ bool remote;
+
+ bool psil_paired;
+
+ u32 swdata_size;
+ int flow_id_base;
+
+ struct k3_udma_glue_rx_flow *flows;
+ u32 flow_num;
+ u32 flows_ready;
+};
+
+static void k3_udma_chan_dev_release(struct device *dev)
+{
+ /* The struct containing the device is devm managed */
+}
+
+static struct class k3_udma_glue_devclass = {
+ .name = "k3_udma_glue_chan",
+ .dev_release = k3_udma_chan_dev_release,
+};
+
+#define K3_UDMAX_TDOWN_TIMEOUT_US 1000
+
+static int of_k3_udma_glue_parse(struct device_node *udmax_np,
+ struct k3_udma_glue_common *common)
+{
+ common->udmax = of_xudma_dev_get(udmax_np, NULL);
+ if (IS_ERR(common->udmax))
+ return PTR_ERR(common->udmax);
+
+ common->ringacc = xudma_get_ringacc(common->udmax);
+ common->tisci_rm = xudma_dev_get_tisci_rm(common->udmax);
+
+ return 0;
+}
+
+static int of_k3_udma_glue_parse_chn(struct device_node *chn_np,
+ const char *name, struct k3_udma_glue_common *common,
+ bool tx_chn)
+{
+ struct of_phandle_args dma_spec;
+ u32 thread_id;
+ int ret = 0;
+ int index;
+
+ if (unlikely(!name))
+ return -EINVAL;
+
+ index = of_property_match_string(chn_np, "dma-names", name);
+ if (index < 0)
+ return index;
+
+ if (of_parse_phandle_with_args(chn_np, "dmas", "#dma-cells", index,
+ &dma_spec))
+ return -ENOENT;
+
+ ret = of_k3_udma_glue_parse(dma_spec.np, common);
+ if (ret)
+ goto out_put_spec;
+
+ thread_id = dma_spec.args[0];
+ if (dma_spec.args_count == 2) {
+ if (dma_spec.args[1] > 2 && !xudma_is_pktdma(common->udmax)) {
+ dev_err(common->dev, "Invalid channel atype: %u\n",
+ dma_spec.args[1]);
+ ret = -EINVAL;
+ goto out_put_spec;
+ }
+ if (dma_spec.args[1] > 15 && xudma_is_pktdma(common->udmax)) {
+ dev_err(common->dev, "Invalid channel asel: %u\n",
+ dma_spec.args[1]);
+ ret = -EINVAL;
+ goto out_put_spec;
+ }
+
+ common->atype_asel = dma_spec.args[1];
+ }
+
+ if (tx_chn && !(thread_id & K3_PSIL_DST_THREAD_ID_OFFSET)) {
+ ret = -EINVAL;
+ goto out_put_spec;
+ }
+
+ if (!tx_chn && (thread_id & K3_PSIL_DST_THREAD_ID_OFFSET)) {
+ ret = -EINVAL;
+ goto out_put_spec;
+ }
+
+ /* get psil endpoint config */
+ common->ep_config = psil_get_ep_config(thread_id);
+ if (IS_ERR(common->ep_config)) {
+ dev_err(common->dev,
+ "No configuration for psi-l thread 0x%04x\n",
+ thread_id);
+ ret = PTR_ERR(common->ep_config);
+ goto out_put_spec;
+ }
+
+ common->epib = common->ep_config->needs_epib;
+ common->psdata_size = common->ep_config->psd_size;
+
+ if (tx_chn)
+ common->dst_thread = thread_id;
+ else
+ common->src_thread = thread_id;
+
+out_put_spec:
+ of_node_put(dma_spec.np);
+ return ret;
+};
+
+static void k3_udma_glue_dump_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
+{
+ struct device *dev = tx_chn->common.dev;
+
+ dev_dbg(dev, "dump_tx_chn:\n"
+ "udma_tchan_id: %d\n"
+ "src_thread: %08x\n"
+ "dst_thread: %08x\n",
+ tx_chn->udma_tchan_id,
+ tx_chn->common.src_thread,
+ tx_chn->common.dst_thread);
+}
+
+static void k3_udma_glue_dump_tx_rt_chn(struct k3_udma_glue_tx_channel *chn,
+ char *mark)
+{
+ struct device *dev = chn->common.dev;
+
+ dev_dbg(dev, "=== dump ===> %s\n", mark);
+ dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_CTL_REG,
+ xudma_tchanrt_read(chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG));
+ dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_PEER_RT_EN_REG,
+ xudma_tchanrt_read(chn->udma_tchanx,
+ UDMA_CHAN_RT_PEER_RT_EN_REG));
+ dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_PCNT_REG,
+ xudma_tchanrt_read(chn->udma_tchanx, UDMA_CHAN_RT_PCNT_REG));
+ dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_BCNT_REG,
+ xudma_tchanrt_read(chn->udma_tchanx, UDMA_CHAN_RT_BCNT_REG));
+ dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_SBCNT_REG,
+ xudma_tchanrt_read(chn->udma_tchanx, UDMA_CHAN_RT_SBCNT_REG));
+}
+
+static int k3_udma_glue_cfg_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
+{
+ const struct udma_tisci_rm *tisci_rm = tx_chn->common.tisci_rm;
+ struct ti_sci_msg_rm_udmap_tx_ch_cfg req;
+
+ memset(&req, 0, sizeof(req));
+
+ req.valid_params = TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_EINFO_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_PSWORDS_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_SUPR_TDPKT_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_ATYPE_VALID;
+ req.nav_id = tisci_rm->tisci_dev_id;
+ req.index = tx_chn->udma_tchan_id;
+ if (tx_chn->tx_pause_on_err)
+ req.tx_pause_on_err = 1;
+ if (tx_chn->tx_filt_einfo)
+ req.tx_filt_einfo = 1;
+ if (tx_chn->tx_filt_pswords)
+ req.tx_filt_pswords = 1;
+ req.tx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR;
+ if (tx_chn->tx_supr_tdpkt)
+ req.tx_supr_tdpkt = 1;
+ req.tx_fetch_size = tx_chn->common.hdesc_size >> 2;
+ req.txcq_qnum = k3_ringacc_get_ring_id(tx_chn->ringtxcq);
+ req.tx_atype = tx_chn->common.atype_asel;
+
+ return tisci_rm->tisci_udmap_ops->tx_ch_cfg(tisci_rm->tisci, &req);
+}
+
+struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev,
+ const char *name, struct k3_udma_glue_tx_channel_cfg *cfg)
+{
+ struct k3_udma_glue_tx_channel *tx_chn;
+ int ret;
+
+ tx_chn = devm_kzalloc(dev, sizeof(*tx_chn), GFP_KERNEL);
+ if (!tx_chn)
+ return ERR_PTR(-ENOMEM);
+
+ tx_chn->common.dev = dev;
+ tx_chn->common.swdata_size = cfg->swdata_size;
+ tx_chn->tx_pause_on_err = cfg->tx_pause_on_err;
+ tx_chn->tx_filt_einfo = cfg->tx_filt_einfo;
+ tx_chn->tx_filt_pswords = cfg->tx_filt_pswords;
+ tx_chn->tx_supr_tdpkt = cfg->tx_supr_tdpkt;
+
+ /* parse of udmap channel */
+ ret = of_k3_udma_glue_parse_chn(dev->of_node, name,
+ &tx_chn->common, true);
+ if (ret)
+ goto err;
+
+ tx_chn->common.hdesc_size = cppi5_hdesc_calc_size(tx_chn->common.epib,
+ tx_chn->common.psdata_size,
+ tx_chn->common.swdata_size);
+
+ if (xudma_is_pktdma(tx_chn->common.udmax))
+ tx_chn->udma_tchan_id = tx_chn->common.ep_config->mapped_channel_id;
+ else
+ tx_chn->udma_tchan_id = -1;
+
+ /* request and cfg UDMAP TX channel */
+ tx_chn->udma_tchanx = xudma_tchan_get(tx_chn->common.udmax,
+ tx_chn->udma_tchan_id);
+ if (IS_ERR(tx_chn->udma_tchanx)) {
+ ret = PTR_ERR(tx_chn->udma_tchanx);
+ dev_err(dev, "UDMAX tchanx get err %d\n", ret);
+ goto err;
+ }
+ tx_chn->udma_tchan_id = xudma_tchan_get_id(tx_chn->udma_tchanx);
+
+ tx_chn->common.chan_dev.class = &k3_udma_glue_devclass;
+ tx_chn->common.chan_dev.parent = xudma_get_device(tx_chn->common.udmax);
+ dev_set_name(&tx_chn->common.chan_dev, "tchan%d-0x%04x",
+ tx_chn->udma_tchan_id, tx_chn->common.dst_thread);
+ ret = device_register(&tx_chn->common.chan_dev);
+ if (ret) {
+ dev_err(dev, "Channel Device registration failed %d\n", ret);
+ put_device(&tx_chn->common.chan_dev);
+ tx_chn->common.chan_dev.parent = NULL;
+ goto err;
+ }
+
+ if (xudma_is_pktdma(tx_chn->common.udmax)) {
+ /* prepare the channel device as coherent */
+ tx_chn->common.chan_dev.dma_coherent = true;
+ dma_coerce_mask_and_coherent(&tx_chn->common.chan_dev,
+ DMA_BIT_MASK(48));
+ }
+
+ atomic_set(&tx_chn->free_pkts, cfg->txcq_cfg.size);
+
+ if (xudma_is_pktdma(tx_chn->common.udmax))
+ tx_chn->udma_tflow_id = tx_chn->common.ep_config->default_flow_id;
+ else
+ tx_chn->udma_tflow_id = tx_chn->udma_tchan_id;
+
+ /* request and cfg rings */
+ ret = k3_ringacc_request_rings_pair(tx_chn->common.ringacc,
+ tx_chn->udma_tflow_id, -1,
+ &tx_chn->ringtx,
+ &tx_chn->ringtxcq);
+ if (ret) {
+ dev_err(dev, "Failed to get TX/TXCQ rings %d\n", ret);
+ goto err;
+ }
+
+ /* Set the dma_dev for the rings to be configured */
+ cfg->tx_cfg.dma_dev = k3_udma_glue_tx_get_dma_device(tx_chn);
+ cfg->txcq_cfg.dma_dev = cfg->tx_cfg.dma_dev;
+
+ /* Set the ASEL value for DMA rings of PKTDMA */
+ if (xudma_is_pktdma(tx_chn->common.udmax)) {
+ cfg->tx_cfg.asel = tx_chn->common.atype_asel;
+ cfg->txcq_cfg.asel = tx_chn->common.atype_asel;
+ }
+
+ ret = k3_ringacc_ring_cfg(tx_chn->ringtx, &cfg->tx_cfg);
+ if (ret) {
+ dev_err(dev, "Failed to cfg ringtx %d\n", ret);
+ goto err;
+ }
+
+ ret = k3_ringacc_ring_cfg(tx_chn->ringtxcq, &cfg->txcq_cfg);
+ if (ret) {
+ dev_err(dev, "Failed to cfg ringtx %d\n", ret);
+ goto err;
+ }
+
+ /* request and cfg psi-l */
+ tx_chn->common.src_thread =
+ xudma_dev_get_psil_base(tx_chn->common.udmax) +
+ tx_chn->udma_tchan_id;
+
+ ret = k3_udma_glue_cfg_tx_chn(tx_chn);
+ if (ret) {
+ dev_err(dev, "Failed to cfg tchan %d\n", ret);
+ goto err;
+ }
+
+ k3_udma_glue_dump_tx_chn(tx_chn);
+
+ return tx_chn;
+
+err:
+ k3_udma_glue_release_tx_chn(tx_chn);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_request_tx_chn);
+
+void k3_udma_glue_release_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
+{
+ if (tx_chn->psil_paired) {
+ xudma_navss_psil_unpair(tx_chn->common.udmax,
+ tx_chn->common.src_thread,
+ tx_chn->common.dst_thread);
+ tx_chn->psil_paired = false;
+ }
+
+ if (!IS_ERR_OR_NULL(tx_chn->udma_tchanx))
+ xudma_tchan_put(tx_chn->common.udmax,
+ tx_chn->udma_tchanx);
+
+ if (tx_chn->ringtxcq)
+ k3_ringacc_ring_free(tx_chn->ringtxcq);
+
+ if (tx_chn->ringtx)
+ k3_ringacc_ring_free(tx_chn->ringtx);
+
+ if (tx_chn->common.chan_dev.parent) {
+ device_unregister(&tx_chn->common.chan_dev);
+ tx_chn->common.chan_dev.parent = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_release_tx_chn);
+
+int k3_udma_glue_push_tx_chn(struct k3_udma_glue_tx_channel *tx_chn,
+ struct cppi5_host_desc_t *desc_tx,
+ dma_addr_t desc_dma)
+{
+ u32 ringtxcq_id;
+
+ if (!atomic_add_unless(&tx_chn->free_pkts, -1, 0))
+ return -ENOMEM;
+
+ ringtxcq_id = k3_ringacc_get_ring_id(tx_chn->ringtxcq);
+ cppi5_desc_set_retpolicy(&desc_tx->hdr, 0, ringtxcq_id);
+
+ return k3_ringacc_ring_push(tx_chn->ringtx, &desc_dma);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_push_tx_chn);
+
+int k3_udma_glue_pop_tx_chn(struct k3_udma_glue_tx_channel *tx_chn,
+ dma_addr_t *desc_dma)
+{
+ int ret;
+
+ ret = k3_ringacc_ring_pop(tx_chn->ringtxcq, desc_dma);
+ if (!ret)
+ atomic_inc(&tx_chn->free_pkts);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_pop_tx_chn);
+
+int k3_udma_glue_enable_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
+{
+ int ret;
+
+ ret = xudma_navss_psil_pair(tx_chn->common.udmax,
+ tx_chn->common.src_thread,
+ tx_chn->common.dst_thread);
+ if (ret) {
+ dev_err(tx_chn->common.dev, "PSI-L request err %d\n", ret);
+ return ret;
+ }
+
+ tx_chn->psil_paired = true;
+
+ xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_CHAN_RT_PEER_RT_EN_REG,
+ UDMA_PEER_RT_EN_ENABLE);
+
+ xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG,
+ UDMA_CHAN_RT_CTL_EN);
+
+ k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn en");
+ return 0;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_enable_tx_chn);
+
+void k3_udma_glue_disable_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
+{
+ k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn dis1");
+
+ xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG, 0);
+
+ xudma_tchanrt_write(tx_chn->udma_tchanx,
+ UDMA_CHAN_RT_PEER_RT_EN_REG, 0);
+ k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn dis2");
+
+ if (tx_chn->psil_paired) {
+ xudma_navss_psil_unpair(tx_chn->common.udmax,
+ tx_chn->common.src_thread,
+ tx_chn->common.dst_thread);
+ tx_chn->psil_paired = false;
+ }
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_disable_tx_chn);
+
+void k3_udma_glue_tdown_tx_chn(struct k3_udma_glue_tx_channel *tx_chn,
+ bool sync)
+{
+ int i = 0;
+ u32 val;
+
+ k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn tdown1");
+
+ xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG,
+ UDMA_CHAN_RT_CTL_EN | UDMA_CHAN_RT_CTL_TDOWN);
+
+ val = xudma_tchanrt_read(tx_chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG);
+
+ while (sync && (val & UDMA_CHAN_RT_CTL_EN)) {
+ val = xudma_tchanrt_read(tx_chn->udma_tchanx,
+ UDMA_CHAN_RT_CTL_REG);
+ udelay(1);
+ if (i > K3_UDMAX_TDOWN_TIMEOUT_US) {
+ dev_err(tx_chn->common.dev, "TX tdown timeout\n");
+ break;
+ }
+ i++;
+ }
+
+ val = xudma_tchanrt_read(tx_chn->udma_tchanx,
+ UDMA_CHAN_RT_PEER_RT_EN_REG);
+ if (sync && (val & UDMA_PEER_RT_EN_ENABLE))
+ dev_err(tx_chn->common.dev, "TX tdown peer not stopped\n");
+ k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn tdown2");
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tdown_tx_chn);
+
+void k3_udma_glue_reset_tx_chn(struct k3_udma_glue_tx_channel *tx_chn,
+ void *data,
+ void (*cleanup)(void *data, dma_addr_t desc_dma))
+{
+ struct device *dev = tx_chn->common.dev;
+ dma_addr_t desc_dma;
+ int occ_tx, i, ret;
+
+ /*
+ * TXQ reset need to be special way as it is input for udma and its
+ * state cached by udma, so:
+ * 1) save TXQ occ
+ * 2) clean up TXQ and call callback .cleanup() for each desc
+ * 3) reset TXQ in a special way
+ */
+ occ_tx = k3_ringacc_ring_get_occ(tx_chn->ringtx);
+ dev_dbg(dev, "TX reset occ_tx %u\n", occ_tx);
+
+ for (i = 0; i < occ_tx; i++) {
+ ret = k3_ringacc_ring_pop(tx_chn->ringtx, &desc_dma);
+ if (ret) {
+ if (ret != -ENODATA)
+ dev_err(dev, "TX reset pop %d\n", ret);
+ break;
+ }
+ cleanup(data, desc_dma);
+ }
+
+ /* reset TXCQ as it is not input for udma - expected to be empty */
+ k3_ringacc_ring_reset(tx_chn->ringtxcq);
+ k3_ringacc_ring_reset_dma(tx_chn->ringtx, occ_tx);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_reset_tx_chn);
+
+u32 k3_udma_glue_tx_get_hdesc_size(struct k3_udma_glue_tx_channel *tx_chn)
+{
+ return tx_chn->common.hdesc_size;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tx_get_hdesc_size);
+
+u32 k3_udma_glue_tx_get_txcq_id(struct k3_udma_glue_tx_channel *tx_chn)
+{
+ return k3_ringacc_get_ring_id(tx_chn->ringtxcq);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tx_get_txcq_id);
+
+int k3_udma_glue_tx_get_irq(struct k3_udma_glue_tx_channel *tx_chn)
+{
+ if (xudma_is_pktdma(tx_chn->common.udmax)) {
+ tx_chn->virq = xudma_pktdma_tflow_get_irq(tx_chn->common.udmax,
+ tx_chn->udma_tflow_id);
+ } else {
+ tx_chn->virq = k3_ringacc_get_ring_irq_num(tx_chn->ringtxcq);
+ }
+
+ return tx_chn->virq;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tx_get_irq);
+
+struct device *
+ k3_udma_glue_tx_get_dma_device(struct k3_udma_glue_tx_channel *tx_chn)
+{
+ if (xudma_is_pktdma(tx_chn->common.udmax) &&
+ (tx_chn->common.atype_asel == 14 || tx_chn->common.atype_asel == 15))
+ return &tx_chn->common.chan_dev;
+
+ return xudma_get_device(tx_chn->common.udmax);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tx_get_dma_device);
+
+void k3_udma_glue_tx_dma_to_cppi5_addr(struct k3_udma_glue_tx_channel *tx_chn,
+ dma_addr_t *addr)
+{
+ if (!xudma_is_pktdma(tx_chn->common.udmax) ||
+ !tx_chn->common.atype_asel)
+ return;
+
+ *addr |= (u64)tx_chn->common.atype_asel << K3_ADDRESS_ASEL_SHIFT;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tx_dma_to_cppi5_addr);
+
+void k3_udma_glue_tx_cppi5_to_dma_addr(struct k3_udma_glue_tx_channel *tx_chn,
+ dma_addr_t *addr)
+{
+ if (!xudma_is_pktdma(tx_chn->common.udmax) ||
+ !tx_chn->common.atype_asel)
+ return;
+
+ *addr &= (u64)GENMASK(K3_ADDRESS_ASEL_SHIFT - 1, 0);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tx_cppi5_to_dma_addr);
+
+static int k3_udma_glue_cfg_rx_chn(struct k3_udma_glue_rx_channel *rx_chn)
+{
+ const struct udma_tisci_rm *tisci_rm = rx_chn->common.tisci_rm;
+ struct ti_sci_msg_rm_udmap_rx_ch_cfg req;
+ int ret;
+
+ memset(&req, 0, sizeof(req));
+
+ req.valid_params = TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_ATYPE_VALID;
+
+ req.nav_id = tisci_rm->tisci_dev_id;
+ req.index = rx_chn->udma_rchan_id;
+ req.rx_fetch_size = rx_chn->common.hdesc_size >> 2;
+ /*
+ * TODO: we can't support rxcq_qnum/RCHAN[a]_RCQ cfg with current sysfw
+ * and udmax impl, so just configure it to invalid value.
+ * req.rxcq_qnum = k3_ringacc_get_ring_id(rx_chn->flows[0].ringrx);
+ */
+ req.rxcq_qnum = 0xFFFF;
+ if (!xudma_is_pktdma(rx_chn->common.udmax) && rx_chn->flow_num &&
+ rx_chn->flow_id_base != rx_chn->udma_rchan_id) {
+ /* Default flow + extra ones */
+ req.valid_params |= TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_START_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_CNT_VALID;
+ req.flowid_start = rx_chn->flow_id_base;
+ req.flowid_cnt = rx_chn->flow_num;
+ }
+ req.rx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR;
+ req.rx_atype = rx_chn->common.atype_asel;
+
+ ret = tisci_rm->tisci_udmap_ops->rx_ch_cfg(tisci_rm->tisci, &req);
+ if (ret)
+ dev_err(rx_chn->common.dev, "rchan%d cfg failed %d\n",
+ rx_chn->udma_rchan_id, ret);
+
+ return ret;
+}
+
+static void k3_udma_glue_release_rx_flow(struct k3_udma_glue_rx_channel *rx_chn,
+ u32 flow_num)
+{
+ struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num];
+
+ if (IS_ERR_OR_NULL(flow->udma_rflow))
+ return;
+
+ if (flow->ringrxfdq)
+ k3_ringacc_ring_free(flow->ringrxfdq);
+
+ if (flow->ringrx)
+ k3_ringacc_ring_free(flow->ringrx);
+
+ xudma_rflow_put(rx_chn->common.udmax, flow->udma_rflow);
+ flow->udma_rflow = NULL;
+ rx_chn->flows_ready--;
+}
+
+static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn,
+ u32 flow_idx,
+ struct k3_udma_glue_rx_flow_cfg *flow_cfg)
+{
+ struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_idx];
+ const struct udma_tisci_rm *tisci_rm = rx_chn->common.tisci_rm;
+ struct device *dev = rx_chn->common.dev;
+ struct ti_sci_msg_rm_udmap_flow_cfg req;
+ int rx_ring_id;
+ int rx_ringfdq_id;
+ int ret = 0;
+
+ flow->udma_rflow = xudma_rflow_get(rx_chn->common.udmax,
+ flow->udma_rflow_id);
+ if (IS_ERR(flow->udma_rflow)) {
+ ret = PTR_ERR(flow->udma_rflow);
+ dev_err(dev, "UDMAX rflow get err %d\n", ret);
+ return ret;
+ }
+
+ if (flow->udma_rflow_id != xudma_rflow_get_id(flow->udma_rflow)) {
+ ret = -ENODEV;
+ goto err_rflow_put;
+ }
+
+ if (xudma_is_pktdma(rx_chn->common.udmax)) {
+ rx_ringfdq_id = flow->udma_rflow_id +
+ xudma_get_rflow_ring_offset(rx_chn->common.udmax);
+ rx_ring_id = 0;
+ } else {
+ rx_ring_id = flow_cfg->ring_rxq_id;
+ rx_ringfdq_id = flow_cfg->ring_rxfdq0_id;
+ }
+
+ /* request and cfg rings */
+ ret = k3_ringacc_request_rings_pair(rx_chn->common.ringacc,
+ rx_ringfdq_id, rx_ring_id,
+ &flow->ringrxfdq,
+ &flow->ringrx);
+ if (ret) {
+ dev_err(dev, "Failed to get RX/RXFDQ rings %d\n", ret);
+ goto err_rflow_put;
+ }
+
+ /* Set the dma_dev for the rings to be configured */
+ flow_cfg->rx_cfg.dma_dev = k3_udma_glue_rx_get_dma_device(rx_chn);
+ flow_cfg->rxfdq_cfg.dma_dev = flow_cfg->rx_cfg.dma_dev;
+
+ /* Set the ASEL value for DMA rings of PKTDMA */
+ if (xudma_is_pktdma(rx_chn->common.udmax)) {
+ flow_cfg->rx_cfg.asel = rx_chn->common.atype_asel;
+ flow_cfg->rxfdq_cfg.asel = rx_chn->common.atype_asel;
+ }
+
+ ret = k3_ringacc_ring_cfg(flow->ringrx, &flow_cfg->rx_cfg);
+ if (ret) {
+ dev_err(dev, "Failed to cfg ringrx %d\n", ret);
+ goto err_ringrxfdq_free;
+ }
+
+ ret = k3_ringacc_ring_cfg(flow->ringrxfdq, &flow_cfg->rxfdq_cfg);
+ if (ret) {
+ dev_err(dev, "Failed to cfg ringrxfdq %d\n", ret);
+ goto err_ringrxfdq_free;
+ }
+
+ if (rx_chn->remote) {
+ rx_ring_id = TI_SCI_RESOURCE_NULL;
+ rx_ringfdq_id = TI_SCI_RESOURCE_NULL;
+ } else {
+ rx_ring_id = k3_ringacc_get_ring_id(flow->ringrx);
+ rx_ringfdq_id = k3_ringacc_get_ring_id(flow->ringrxfdq);
+ }
+
+ memset(&req, 0, sizeof(req));
+
+ req.valid_params =
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_EINFO_PRESENT_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_PSINFO_PRESENT_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_ERROR_HANDLING_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DESC_TYPE_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_HI_SEL_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_LO_SEL_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_HI_SEL_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_LO_SEL_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID;
+ req.nav_id = tisci_rm->tisci_dev_id;
+ req.flow_index = flow->udma_rflow_id;
+ if (rx_chn->common.epib)
+ req.rx_einfo_present = 1;
+ if (rx_chn->common.psdata_size)
+ req.rx_psinfo_present = 1;
+ if (flow_cfg->rx_error_handling)
+ req.rx_error_handling = 1;
+ req.rx_desc_type = 0;
+ req.rx_dest_qnum = rx_ring_id;
+ req.rx_src_tag_hi_sel = 0;
+ req.rx_src_tag_lo_sel = flow_cfg->src_tag_lo_sel;
+ req.rx_dest_tag_hi_sel = 0;
+ req.rx_dest_tag_lo_sel = 0;
+ req.rx_fdq0_sz0_qnum = rx_ringfdq_id;
+ req.rx_fdq1_qnum = rx_ringfdq_id;
+ req.rx_fdq2_qnum = rx_ringfdq_id;
+ req.rx_fdq3_qnum = rx_ringfdq_id;
+
+ ret = tisci_rm->tisci_udmap_ops->rx_flow_cfg(tisci_rm->tisci, &req);
+ if (ret) {
+ dev_err(dev, "flow%d config failed: %d\n", flow->udma_rflow_id,
+ ret);
+ goto err_ringrxfdq_free;
+ }
+
+ rx_chn->flows_ready++;
+ dev_dbg(dev, "flow%d config done. ready:%d\n",
+ flow->udma_rflow_id, rx_chn->flows_ready);
+
+ return 0;
+
+err_ringrxfdq_free:
+ k3_ringacc_ring_free(flow->ringrxfdq);
+ k3_ringacc_ring_free(flow->ringrx);
+
+err_rflow_put:
+ xudma_rflow_put(rx_chn->common.udmax, flow->udma_rflow);
+ flow->udma_rflow = NULL;
+
+ return ret;
+}
+
+static void k3_udma_glue_dump_rx_chn(struct k3_udma_glue_rx_channel *chn)
+{
+ struct device *dev = chn->common.dev;
+
+ dev_dbg(dev, "dump_rx_chn:\n"
+ "udma_rchan_id: %d\n"
+ "src_thread: %08x\n"
+ "dst_thread: %08x\n"
+ "epib: %d\n"
+ "hdesc_size: %u\n"
+ "psdata_size: %u\n"
+ "swdata_size: %u\n"
+ "flow_id_base: %d\n"
+ "flow_num: %d\n",
+ chn->udma_rchan_id,
+ chn->common.src_thread,
+ chn->common.dst_thread,
+ chn->common.epib,
+ chn->common.hdesc_size,
+ chn->common.psdata_size,
+ chn->common.swdata_size,
+ chn->flow_id_base,
+ chn->flow_num);
+}
+
+static void k3_udma_glue_dump_rx_rt_chn(struct k3_udma_glue_rx_channel *chn,
+ char *mark)
+{
+ struct device *dev = chn->common.dev;
+
+ dev_dbg(dev, "=== dump ===> %s\n", mark);
+
+ dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_CTL_REG,
+ xudma_rchanrt_read(chn->udma_rchanx, UDMA_CHAN_RT_CTL_REG));
+ dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_PEER_RT_EN_REG,
+ xudma_rchanrt_read(chn->udma_rchanx,
+ UDMA_CHAN_RT_PEER_RT_EN_REG));
+ dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_PCNT_REG,
+ xudma_rchanrt_read(chn->udma_rchanx, UDMA_CHAN_RT_PCNT_REG));
+ dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_BCNT_REG,
+ xudma_rchanrt_read(chn->udma_rchanx, UDMA_CHAN_RT_BCNT_REG));
+ dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_SBCNT_REG,
+ xudma_rchanrt_read(chn->udma_rchanx, UDMA_CHAN_RT_SBCNT_REG));
+}
+
+static int
+k3_udma_glue_allocate_rx_flows(struct k3_udma_glue_rx_channel *rx_chn,
+ struct k3_udma_glue_rx_channel_cfg *cfg)
+{
+ int ret;
+
+ /* default rflow */
+ if (cfg->flow_id_use_rxchan_id)
+ return 0;
+
+ /* not a GP rflows */
+ if (rx_chn->flow_id_base != -1 &&
+ !xudma_rflow_is_gp(rx_chn->common.udmax, rx_chn->flow_id_base))
+ return 0;
+
+ /* Allocate range of GP rflows */
+ ret = xudma_alloc_gp_rflow_range(rx_chn->common.udmax,
+ rx_chn->flow_id_base,
+ rx_chn->flow_num);
+ if (ret < 0) {
+ dev_err(rx_chn->common.dev, "UDMAX reserve_rflow %d cnt:%d err: %d\n",
+ rx_chn->flow_id_base, rx_chn->flow_num, ret);
+ return ret;
+ }
+ rx_chn->flow_id_base = ret;
+
+ return 0;
+}
+
+static struct k3_udma_glue_rx_channel *
+k3_udma_glue_request_rx_chn_priv(struct device *dev, const char *name,
+ struct k3_udma_glue_rx_channel_cfg *cfg)
+{
+ struct k3_udma_glue_rx_channel *rx_chn;
+ struct psil_endpoint_config *ep_cfg;
+ int ret, i;
+
+ if (cfg->flow_id_num <= 0)
+ return ERR_PTR(-EINVAL);
+
+ if (cfg->flow_id_num != 1 &&
+ (cfg->def_flow_cfg || cfg->flow_id_use_rxchan_id))
+ return ERR_PTR(-EINVAL);
+
+ rx_chn = devm_kzalloc(dev, sizeof(*rx_chn), GFP_KERNEL);
+ if (!rx_chn)
+ return ERR_PTR(-ENOMEM);
+
+ rx_chn->common.dev = dev;
+ rx_chn->common.swdata_size = cfg->swdata_size;
+ rx_chn->remote = false;
+
+ /* parse of udmap channel */
+ ret = of_k3_udma_glue_parse_chn(dev->of_node, name,
+ &rx_chn->common, false);
+ if (ret)
+ goto err;
+
+ rx_chn->common.hdesc_size = cppi5_hdesc_calc_size(rx_chn->common.epib,
+ rx_chn->common.psdata_size,
+ rx_chn->common.swdata_size);
+
+ ep_cfg = rx_chn->common.ep_config;
+
+ if (xudma_is_pktdma(rx_chn->common.udmax))
+ rx_chn->udma_rchan_id = ep_cfg->mapped_channel_id;
+ else
+ rx_chn->udma_rchan_id = -1;
+
+ /* request and cfg UDMAP RX channel */
+ rx_chn->udma_rchanx = xudma_rchan_get(rx_chn->common.udmax,
+ rx_chn->udma_rchan_id);
+ if (IS_ERR(rx_chn->udma_rchanx)) {
+ ret = PTR_ERR(rx_chn->udma_rchanx);
+ dev_err(dev, "UDMAX rchanx get err %d\n", ret);
+ goto err;
+ }
+ rx_chn->udma_rchan_id = xudma_rchan_get_id(rx_chn->udma_rchanx);
+
+ rx_chn->common.chan_dev.class = &k3_udma_glue_devclass;
+ rx_chn->common.chan_dev.parent = xudma_get_device(rx_chn->common.udmax);
+ dev_set_name(&rx_chn->common.chan_dev, "rchan%d-0x%04x",
+ rx_chn->udma_rchan_id, rx_chn->common.src_thread);
+ ret = device_register(&rx_chn->common.chan_dev);
+ if (ret) {
+ dev_err(dev, "Channel Device registration failed %d\n", ret);
+ put_device(&rx_chn->common.chan_dev);
+ rx_chn->common.chan_dev.parent = NULL;
+ goto err;
+ }
+
+ if (xudma_is_pktdma(rx_chn->common.udmax)) {
+ /* prepare the channel device as coherent */
+ rx_chn->common.chan_dev.dma_coherent = true;
+ dma_coerce_mask_and_coherent(&rx_chn->common.chan_dev,
+ DMA_BIT_MASK(48));
+ }
+
+ if (xudma_is_pktdma(rx_chn->common.udmax)) {
+ int flow_start = cfg->flow_id_base;
+ int flow_end;
+
+ if (flow_start == -1)
+ flow_start = ep_cfg->flow_start;
+
+ flow_end = flow_start + cfg->flow_id_num - 1;
+ if (flow_start < ep_cfg->flow_start ||
+ flow_end > (ep_cfg->flow_start + ep_cfg->flow_num - 1)) {
+ dev_err(dev, "Invalid flow range requested\n");
+ ret = -EINVAL;
+ goto err;
+ }
+ rx_chn->flow_id_base = flow_start;
+ } else {
+ rx_chn->flow_id_base = cfg->flow_id_base;
+
+ /* Use RX channel id as flow id: target dev can't generate flow_id */
+ if (cfg->flow_id_use_rxchan_id)
+ rx_chn->flow_id_base = rx_chn->udma_rchan_id;
+ }
+
+ rx_chn->flow_num = cfg->flow_id_num;
+
+ rx_chn->flows = devm_kcalloc(dev, rx_chn->flow_num,
+ sizeof(*rx_chn->flows), GFP_KERNEL);
+ if (!rx_chn->flows) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = k3_udma_glue_allocate_rx_flows(rx_chn, cfg);
+ if (ret)
+ goto err;
+
+ for (i = 0; i < rx_chn->flow_num; i++)
+ rx_chn->flows[i].udma_rflow_id = rx_chn->flow_id_base + i;
+
+ /* request and cfg psi-l */
+ rx_chn->common.dst_thread =
+ xudma_dev_get_psil_base(rx_chn->common.udmax) +
+ rx_chn->udma_rchan_id;
+
+ ret = k3_udma_glue_cfg_rx_chn(rx_chn);
+ if (ret) {
+ dev_err(dev, "Failed to cfg rchan %d\n", ret);
+ goto err;
+ }
+
+ /* init default RX flow only if flow_num = 1 */
+ if (cfg->def_flow_cfg) {
+ ret = k3_udma_glue_cfg_rx_flow(rx_chn, 0, cfg->def_flow_cfg);
+ if (ret)
+ goto err;
+ }
+
+ k3_udma_glue_dump_rx_chn(rx_chn);
+
+ return rx_chn;
+
+err:
+ k3_udma_glue_release_rx_chn(rx_chn);
+ return ERR_PTR(ret);
+}
+
+static struct k3_udma_glue_rx_channel *
+k3_udma_glue_request_remote_rx_chn(struct device *dev, const char *name,
+ struct k3_udma_glue_rx_channel_cfg *cfg)
+{
+ struct k3_udma_glue_rx_channel *rx_chn;
+ int ret, i;
+
+ if (cfg->flow_id_num <= 0 ||
+ cfg->flow_id_use_rxchan_id ||
+ cfg->def_flow_cfg ||
+ cfg->flow_id_base < 0)
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * Remote RX channel is under control of Remote CPU core, so
+ * Linux can only request and manipulate by dedicated RX flows
+ */
+
+ rx_chn = devm_kzalloc(dev, sizeof(*rx_chn), GFP_KERNEL);
+ if (!rx_chn)
+ return ERR_PTR(-ENOMEM);
+
+ rx_chn->common.dev = dev;
+ rx_chn->common.swdata_size = cfg->swdata_size;
+ rx_chn->remote = true;
+ rx_chn->udma_rchan_id = -1;
+ rx_chn->flow_num = cfg->flow_id_num;
+ rx_chn->flow_id_base = cfg->flow_id_base;
+ rx_chn->psil_paired = false;
+
+ /* parse of udmap channel */
+ ret = of_k3_udma_glue_parse_chn(dev->of_node, name,
+ &rx_chn->common, false);
+ if (ret)
+ goto err;
+
+ rx_chn->common.hdesc_size = cppi5_hdesc_calc_size(rx_chn->common.epib,
+ rx_chn->common.psdata_size,
+ rx_chn->common.swdata_size);
+
+ rx_chn->flows = devm_kcalloc(dev, rx_chn->flow_num,
+ sizeof(*rx_chn->flows), GFP_KERNEL);
+ if (!rx_chn->flows) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ rx_chn->common.chan_dev.class = &k3_udma_glue_devclass;
+ rx_chn->common.chan_dev.parent = xudma_get_device(rx_chn->common.udmax);
+ dev_set_name(&rx_chn->common.chan_dev, "rchan_remote-0x%04x",
+ rx_chn->common.src_thread);
+ ret = device_register(&rx_chn->common.chan_dev);
+ if (ret) {
+ dev_err(dev, "Channel Device registration failed %d\n", ret);
+ put_device(&rx_chn->common.chan_dev);
+ rx_chn->common.chan_dev.parent = NULL;
+ goto err;
+ }
+
+ if (xudma_is_pktdma(rx_chn->common.udmax)) {
+ /* prepare the channel device as coherent */
+ rx_chn->common.chan_dev.dma_coherent = true;
+ dma_coerce_mask_and_coherent(&rx_chn->common.chan_dev,
+ DMA_BIT_MASK(48));
+ }
+
+ ret = k3_udma_glue_allocate_rx_flows(rx_chn, cfg);
+ if (ret)
+ goto err;
+
+ for (i = 0; i < rx_chn->flow_num; i++)
+ rx_chn->flows[i].udma_rflow_id = rx_chn->flow_id_base + i;
+
+ k3_udma_glue_dump_rx_chn(rx_chn);
+
+ return rx_chn;
+
+err:
+ k3_udma_glue_release_rx_chn(rx_chn);
+ return ERR_PTR(ret);
+}
+
+struct k3_udma_glue_rx_channel *
+k3_udma_glue_request_rx_chn(struct device *dev, const char *name,
+ struct k3_udma_glue_rx_channel_cfg *cfg)
+{
+ if (cfg->remote)
+ return k3_udma_glue_request_remote_rx_chn(dev, name, cfg);
+ else
+ return k3_udma_glue_request_rx_chn_priv(dev, name, cfg);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_request_rx_chn);
+
+void k3_udma_glue_release_rx_chn(struct k3_udma_glue_rx_channel *rx_chn)
+{
+ int i;
+
+ if (IS_ERR_OR_NULL(rx_chn->common.udmax))
+ return;
+
+ if (rx_chn->psil_paired) {
+ xudma_navss_psil_unpair(rx_chn->common.udmax,
+ rx_chn->common.src_thread,
+ rx_chn->common.dst_thread);
+ rx_chn->psil_paired = false;
+ }
+
+ for (i = 0; i < rx_chn->flow_num; i++)
+ k3_udma_glue_release_rx_flow(rx_chn, i);
+
+ if (xudma_rflow_is_gp(rx_chn->common.udmax, rx_chn->flow_id_base))
+ xudma_free_gp_rflow_range(rx_chn->common.udmax,
+ rx_chn->flow_id_base,
+ rx_chn->flow_num);
+
+ if (!IS_ERR_OR_NULL(rx_chn->udma_rchanx))
+ xudma_rchan_put(rx_chn->common.udmax,
+ rx_chn->udma_rchanx);
+
+ if (rx_chn->common.chan_dev.parent) {
+ device_unregister(&rx_chn->common.chan_dev);
+ rx_chn->common.chan_dev.parent = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_release_rx_chn);
+
+int k3_udma_glue_rx_flow_init(struct k3_udma_glue_rx_channel *rx_chn,
+ u32 flow_idx,
+ struct k3_udma_glue_rx_flow_cfg *flow_cfg)
+{
+ if (flow_idx >= rx_chn->flow_num)
+ return -EINVAL;
+
+ return k3_udma_glue_cfg_rx_flow(rx_chn, flow_idx, flow_cfg);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_flow_init);
+
+u32 k3_udma_glue_rx_flow_get_fdq_id(struct k3_udma_glue_rx_channel *rx_chn,
+ u32 flow_idx)
+{
+ struct k3_udma_glue_rx_flow *flow;
+
+ if (flow_idx >= rx_chn->flow_num)
+ return -EINVAL;
+
+ flow = &rx_chn->flows[flow_idx];
+
+ return k3_ringacc_get_ring_id(flow->ringrxfdq);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_flow_get_fdq_id);
+
+u32 k3_udma_glue_rx_get_flow_id_base(struct k3_udma_glue_rx_channel *rx_chn)
+{
+ return rx_chn->flow_id_base;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_get_flow_id_base);
+
+int k3_udma_glue_rx_flow_enable(struct k3_udma_glue_rx_channel *rx_chn,
+ u32 flow_idx)
+{
+ struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_idx];
+ const struct udma_tisci_rm *tisci_rm = rx_chn->common.tisci_rm;
+ struct device *dev = rx_chn->common.dev;
+ struct ti_sci_msg_rm_udmap_flow_cfg req;
+ int rx_ring_id;
+ int rx_ringfdq_id;
+ int ret = 0;
+
+ if (!rx_chn->remote)
+ return -EINVAL;
+
+ rx_ring_id = k3_ringacc_get_ring_id(flow->ringrx);
+ rx_ringfdq_id = k3_ringacc_get_ring_id(flow->ringrxfdq);
+
+ memset(&req, 0, sizeof(req));
+
+ req.valid_params =
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID;
+ req.nav_id = tisci_rm->tisci_dev_id;
+ req.flow_index = flow->udma_rflow_id;
+ req.rx_dest_qnum = rx_ring_id;
+ req.rx_fdq0_sz0_qnum = rx_ringfdq_id;
+ req.rx_fdq1_qnum = rx_ringfdq_id;
+ req.rx_fdq2_qnum = rx_ringfdq_id;
+ req.rx_fdq3_qnum = rx_ringfdq_id;
+
+ ret = tisci_rm->tisci_udmap_ops->rx_flow_cfg(tisci_rm->tisci, &req);
+ if (ret) {
+ dev_err(dev, "flow%d enable failed: %d\n", flow->udma_rflow_id,
+ ret);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_flow_enable);
+
+int k3_udma_glue_rx_flow_disable(struct k3_udma_glue_rx_channel *rx_chn,
+ u32 flow_idx)
+{
+ struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_idx];
+ const struct udma_tisci_rm *tisci_rm = rx_chn->common.tisci_rm;
+ struct device *dev = rx_chn->common.dev;
+ struct ti_sci_msg_rm_udmap_flow_cfg req;
+ int ret = 0;
+
+ if (!rx_chn->remote)
+ return -EINVAL;
+
+ memset(&req, 0, sizeof(req));
+ req.valid_params =
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID;
+ req.nav_id = tisci_rm->tisci_dev_id;
+ req.flow_index = flow->udma_rflow_id;
+ req.rx_dest_qnum = TI_SCI_RESOURCE_NULL;
+ req.rx_fdq0_sz0_qnum = TI_SCI_RESOURCE_NULL;
+ req.rx_fdq1_qnum = TI_SCI_RESOURCE_NULL;
+ req.rx_fdq2_qnum = TI_SCI_RESOURCE_NULL;
+ req.rx_fdq3_qnum = TI_SCI_RESOURCE_NULL;
+
+ ret = tisci_rm->tisci_udmap_ops->rx_flow_cfg(tisci_rm->tisci, &req);
+ if (ret) {
+ dev_err(dev, "flow%d disable failed: %d\n", flow->udma_rflow_id,
+ ret);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_flow_disable);
+
+int k3_udma_glue_enable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn)
+{
+ int ret;
+
+ if (rx_chn->remote)
+ return -EINVAL;
+
+ if (rx_chn->flows_ready < rx_chn->flow_num)
+ return -EINVAL;
+
+ ret = xudma_navss_psil_pair(rx_chn->common.udmax,
+ rx_chn->common.src_thread,
+ rx_chn->common.dst_thread);
+ if (ret) {
+ dev_err(rx_chn->common.dev, "PSI-L request err %d\n", ret);
+ return ret;
+ }
+
+ rx_chn->psil_paired = true;
+
+ xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_CHAN_RT_CTL_REG,
+ UDMA_CHAN_RT_CTL_EN);
+
+ xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_CHAN_RT_PEER_RT_EN_REG,
+ UDMA_PEER_RT_EN_ENABLE);
+
+ k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt en");
+ return 0;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_enable_rx_chn);
+
+void k3_udma_glue_disable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn)
+{
+ k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt dis1");
+
+ xudma_rchanrt_write(rx_chn->udma_rchanx,
+ UDMA_CHAN_RT_PEER_RT_EN_REG, 0);
+ xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_CHAN_RT_CTL_REG, 0);
+
+ k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt dis2");
+
+ if (rx_chn->psil_paired) {
+ xudma_navss_psil_unpair(rx_chn->common.udmax,
+ rx_chn->common.src_thread,
+ rx_chn->common.dst_thread);
+ rx_chn->psil_paired = false;
+ }
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_disable_rx_chn);
+
+void k3_udma_glue_tdown_rx_chn(struct k3_udma_glue_rx_channel *rx_chn,
+ bool sync)
+{
+ int i = 0;
+ u32 val;
+
+ if (rx_chn->remote)
+ return;
+
+ k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt tdown1");
+
+ xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_CHAN_RT_PEER_RT_EN_REG,
+ UDMA_PEER_RT_EN_ENABLE | UDMA_PEER_RT_EN_TEARDOWN);
+
+ val = xudma_rchanrt_read(rx_chn->udma_rchanx, UDMA_CHAN_RT_CTL_REG);
+
+ while (sync && (val & UDMA_CHAN_RT_CTL_EN)) {
+ val = xudma_rchanrt_read(rx_chn->udma_rchanx,
+ UDMA_CHAN_RT_CTL_REG);
+ udelay(1);
+ if (i > K3_UDMAX_TDOWN_TIMEOUT_US) {
+ dev_err(rx_chn->common.dev, "RX tdown timeout\n");
+ break;
+ }
+ i++;
+ }
+
+ val = xudma_rchanrt_read(rx_chn->udma_rchanx,
+ UDMA_CHAN_RT_PEER_RT_EN_REG);
+ if (sync && (val & UDMA_PEER_RT_EN_ENABLE))
+ dev_err(rx_chn->common.dev, "TX tdown peer not stopped\n");
+ k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt tdown2");
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_tdown_rx_chn);
+
+void k3_udma_glue_reset_rx_chn(struct k3_udma_glue_rx_channel *rx_chn,
+ u32 flow_num, void *data,
+ void (*cleanup)(void *data, dma_addr_t desc_dma), bool skip_fdq)
+{
+ struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num];
+ struct device *dev = rx_chn->common.dev;
+ dma_addr_t desc_dma;
+ int occ_rx, i, ret;
+
+ /* reset RXCQ as it is not input for udma - expected to be empty */
+ occ_rx = k3_ringacc_ring_get_occ(flow->ringrx);
+ dev_dbg(dev, "RX reset flow %u occ_rx %u\n", flow_num, occ_rx);
+
+ /* Skip RX FDQ in case one FDQ is used for the set of flows */
+ if (skip_fdq)
+ goto do_reset;
+
+ /*
+ * RX FDQ reset need to be special way as it is input for udma and its
+ * state cached by udma, so:
+ * 1) save RX FDQ occ
+ * 2) clean up RX FDQ and call callback .cleanup() for each desc
+ * 3) reset RX FDQ in a special way
+ */
+ occ_rx = k3_ringacc_ring_get_occ(flow->ringrxfdq);
+ dev_dbg(dev, "RX reset flow %u occ_rx_fdq %u\n", flow_num, occ_rx);
+
+ for (i = 0; i < occ_rx; i++) {
+ ret = k3_ringacc_ring_pop(flow->ringrxfdq, &desc_dma);
+ if (ret) {
+ if (ret != -ENODATA)
+ dev_err(dev, "RX reset pop %d\n", ret);
+ break;
+ }
+ cleanup(data, desc_dma);
+ }
+
+ k3_ringacc_ring_reset_dma(flow->ringrxfdq, occ_rx);
+
+do_reset:
+ k3_ringacc_ring_reset(flow->ringrx);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_reset_rx_chn);
+
+int k3_udma_glue_push_rx_chn(struct k3_udma_glue_rx_channel *rx_chn,
+ u32 flow_num, struct cppi5_host_desc_t *desc_rx,
+ dma_addr_t desc_dma)
+{
+ struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num];
+
+ return k3_ringacc_ring_push(flow->ringrxfdq, &desc_dma);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_push_rx_chn);
+
+int k3_udma_glue_pop_rx_chn(struct k3_udma_glue_rx_channel *rx_chn,
+ u32 flow_num, dma_addr_t *desc_dma)
+{
+ struct k3_udma_glue_rx_flow *flow = &rx_chn->flows[flow_num];
+
+ return k3_ringacc_ring_pop(flow->ringrx, desc_dma);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_pop_rx_chn);
+
+int k3_udma_glue_rx_get_irq(struct k3_udma_glue_rx_channel *rx_chn,
+ u32 flow_num)
+{
+ struct k3_udma_glue_rx_flow *flow;
+
+ flow = &rx_chn->flows[flow_num];
+
+ if (xudma_is_pktdma(rx_chn->common.udmax)) {
+ flow->virq = xudma_pktdma_rflow_get_irq(rx_chn->common.udmax,
+ flow->udma_rflow_id);
+ } else {
+ flow->virq = k3_ringacc_get_ring_irq_num(flow->ringrx);
+ }
+
+ return flow->virq;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_get_irq);
+
+struct device *
+ k3_udma_glue_rx_get_dma_device(struct k3_udma_glue_rx_channel *rx_chn)
+{
+ if (xudma_is_pktdma(rx_chn->common.udmax) &&
+ (rx_chn->common.atype_asel == 14 || rx_chn->common.atype_asel == 15))
+ return &rx_chn->common.chan_dev;
+
+ return xudma_get_device(rx_chn->common.udmax);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_get_dma_device);
+
+void k3_udma_glue_rx_dma_to_cppi5_addr(struct k3_udma_glue_rx_channel *rx_chn,
+ dma_addr_t *addr)
+{
+ if (!xudma_is_pktdma(rx_chn->common.udmax) ||
+ !rx_chn->common.atype_asel)
+ return;
+
+ *addr |= (u64)rx_chn->common.atype_asel << K3_ADDRESS_ASEL_SHIFT;
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_dma_to_cppi5_addr);
+
+void k3_udma_glue_rx_cppi5_to_dma_addr(struct k3_udma_glue_rx_channel *rx_chn,
+ dma_addr_t *addr)
+{
+ if (!xudma_is_pktdma(rx_chn->common.udmax) ||
+ !rx_chn->common.atype_asel)
+ return;
+
+ *addr &= (u64)GENMASK(K3_ADDRESS_ASEL_SHIFT - 1, 0);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_rx_cppi5_to_dma_addr);
+
+static int __init k3_udma_glue_class_init(void)
+{
+ return class_register(&k3_udma_glue_devclass);
+}
+arch_initcall(k3_udma_glue_class_init);
diff --git a/drivers/dma/ti/k3-udma-private.c b/drivers/dma/ti/k3-udma-private.c
new file mode 100644
index 000000000..85e007014
--- /dev/null
+++ b/drivers/dma/ti/k3-udma-private.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ * Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+int xudma_navss_psil_pair(struct udma_dev *ud, u32 src_thread, u32 dst_thread)
+{
+ return navss_psil_pair(ud, src_thread, dst_thread);
+}
+EXPORT_SYMBOL(xudma_navss_psil_pair);
+
+int xudma_navss_psil_unpair(struct udma_dev *ud, u32 src_thread, u32 dst_thread)
+{
+ return navss_psil_unpair(ud, src_thread, dst_thread);
+}
+EXPORT_SYMBOL(xudma_navss_psil_unpair);
+
+struct udma_dev *of_xudma_dev_get(struct device_node *np, const char *property)
+{
+ struct device_node *udma_node = np;
+ struct platform_device *pdev;
+ struct udma_dev *ud;
+
+ if (property) {
+ udma_node = of_parse_phandle(np, property, 0);
+ if (!udma_node) {
+ pr_err("UDMA node is not found\n");
+ return ERR_PTR(-ENODEV);
+ }
+ }
+
+ pdev = of_find_device_by_node(udma_node);
+ if (np != udma_node)
+ of_node_put(udma_node);
+
+ if (!pdev) {
+ pr_debug("UDMA device not found\n");
+ return ERR_PTR(-EPROBE_DEFER);
+ }
+
+ ud = platform_get_drvdata(pdev);
+ if (!ud) {
+ pr_debug("UDMA has not been probed\n");
+ put_device(&pdev->dev);
+ return ERR_PTR(-EPROBE_DEFER);
+ }
+
+ return ud;
+}
+EXPORT_SYMBOL(of_xudma_dev_get);
+
+struct device *xudma_get_device(struct udma_dev *ud)
+{
+ return ud->dev;
+}
+EXPORT_SYMBOL(xudma_get_device);
+
+struct k3_ringacc *xudma_get_ringacc(struct udma_dev *ud)
+{
+ return ud->ringacc;
+}
+EXPORT_SYMBOL(xudma_get_ringacc);
+
+u32 xudma_dev_get_psil_base(struct udma_dev *ud)
+{
+ return ud->psil_base;
+}
+EXPORT_SYMBOL(xudma_dev_get_psil_base);
+
+struct udma_tisci_rm *xudma_dev_get_tisci_rm(struct udma_dev *ud)
+{
+ return &ud->tisci_rm;
+}
+EXPORT_SYMBOL(xudma_dev_get_tisci_rm);
+
+int xudma_alloc_gp_rflow_range(struct udma_dev *ud, int from, int cnt)
+{
+ return __udma_alloc_gp_rflow_range(ud, from, cnt);
+}
+EXPORT_SYMBOL(xudma_alloc_gp_rflow_range);
+
+int xudma_free_gp_rflow_range(struct udma_dev *ud, int from, int cnt)
+{
+ return __udma_free_gp_rflow_range(ud, from, cnt);
+}
+EXPORT_SYMBOL(xudma_free_gp_rflow_range);
+
+bool xudma_rflow_is_gp(struct udma_dev *ud, int id)
+{
+ if (!ud->rflow_gp_map)
+ return false;
+
+ return !test_bit(id, ud->rflow_gp_map);
+}
+EXPORT_SYMBOL(xudma_rflow_is_gp);
+
+#define XUDMA_GET_PUT_RESOURCE(res) \
+struct udma_##res *xudma_##res##_get(struct udma_dev *ud, int id) \
+{ \
+ return __udma_reserve_##res(ud, UDMA_TP_NORMAL, id); \
+} \
+EXPORT_SYMBOL(xudma_##res##_get); \
+ \
+void xudma_##res##_put(struct udma_dev *ud, struct udma_##res *p) \
+{ \
+ clear_bit(p->id, ud->res##_map); \
+} \
+EXPORT_SYMBOL(xudma_##res##_put)
+XUDMA_GET_PUT_RESOURCE(tchan);
+XUDMA_GET_PUT_RESOURCE(rchan);
+
+struct udma_rflow *xudma_rflow_get(struct udma_dev *ud, int id)
+{
+ return __udma_get_rflow(ud, id);
+}
+EXPORT_SYMBOL(xudma_rflow_get);
+
+void xudma_rflow_put(struct udma_dev *ud, struct udma_rflow *p)
+{
+ __udma_put_rflow(ud, p);
+}
+EXPORT_SYMBOL(xudma_rflow_put);
+
+int xudma_get_rflow_ring_offset(struct udma_dev *ud)
+{
+ return ud->tflow_cnt;
+}
+EXPORT_SYMBOL(xudma_get_rflow_ring_offset);
+
+#define XUDMA_GET_RESOURCE_ID(res) \
+int xudma_##res##_get_id(struct udma_##res *p) \
+{ \
+ return p->id; \
+} \
+EXPORT_SYMBOL(xudma_##res##_get_id)
+XUDMA_GET_RESOURCE_ID(tchan);
+XUDMA_GET_RESOURCE_ID(rchan);
+XUDMA_GET_RESOURCE_ID(rflow);
+
+/* Exported register access functions */
+#define XUDMA_RT_IO_FUNCTIONS(res) \
+u32 xudma_##res##rt_read(struct udma_##res *p, int reg) \
+{ \
+ if (!p) \
+ return 0; \
+ return udma_read(p->reg_rt, reg); \
+} \
+EXPORT_SYMBOL(xudma_##res##rt_read); \
+ \
+void xudma_##res##rt_write(struct udma_##res *p, int reg, u32 val) \
+{ \
+ if (!p) \
+ return; \
+ udma_write(p->reg_rt, reg, val); \
+} \
+EXPORT_SYMBOL(xudma_##res##rt_write)
+XUDMA_RT_IO_FUNCTIONS(tchan);
+XUDMA_RT_IO_FUNCTIONS(rchan);
+
+int xudma_is_pktdma(struct udma_dev *ud)
+{
+ return ud->match_data->type == DMA_TYPE_PKTDMA;
+}
+EXPORT_SYMBOL(xudma_is_pktdma);
+
+int xudma_pktdma_tflow_get_irq(struct udma_dev *ud, int udma_tflow_id)
+{
+ const struct udma_oes_offsets *oes = &ud->soc_data->oes;
+
+ return msi_get_virq(ud->dev, udma_tflow_id + oes->pktdma_tchan_flow);
+}
+EXPORT_SYMBOL(xudma_pktdma_tflow_get_irq);
+
+int xudma_pktdma_rflow_get_irq(struct udma_dev *ud, int udma_rflow_id)
+{
+ const struct udma_oes_offsets *oes = &ud->soc_data->oes;
+
+ return msi_get_virq(ud->dev, udma_rflow_id + oes->pktdma_rchan_flow);
+}
+EXPORT_SYMBOL(xudma_pktdma_rflow_get_irq);
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
new file mode 100644
index 000000000..b86b809eb
--- /dev/null
+++ b/drivers/dma/ti/k3-udma.c
@@ -0,0 +1,5538 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ * Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/sys_soc.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+#include <linux/soc/ti/k3-ringacc.h>
+#include <linux/soc/ti/ti_sci_protocol.h>
+#include <linux/soc/ti/ti_sci_inta_msi.h>
+#include <linux/dma/k3-event-router.h>
+#include <linux/dma/ti-cppi5.h>
+
+#include "../virt-dma.h"
+#include "k3-udma.h"
+#include "k3-psil-priv.h"
+
+struct udma_static_tr {
+ u8 elsize; /* RPSTR0 */
+ u16 elcnt; /* RPSTR0 */
+ u16 bstcnt; /* RPSTR1 */
+};
+
+#define K3_UDMA_MAX_RFLOWS 1024
+#define K3_UDMA_DEFAULT_RING_SIZE 16
+
+/* How SRC/DST tag should be updated by UDMA in the descriptor's Word 3 */
+#define UDMA_RFLOW_SRCTAG_NONE 0
+#define UDMA_RFLOW_SRCTAG_CFG_TAG 1
+#define UDMA_RFLOW_SRCTAG_FLOW_ID 2
+#define UDMA_RFLOW_SRCTAG_SRC_TAG 4
+
+#define UDMA_RFLOW_DSTTAG_NONE 0
+#define UDMA_RFLOW_DSTTAG_CFG_TAG 1
+#define UDMA_RFLOW_DSTTAG_FLOW_ID 2
+#define UDMA_RFLOW_DSTTAG_DST_TAG_LO 4
+#define UDMA_RFLOW_DSTTAG_DST_TAG_HI 5
+
+struct udma_chan;
+
+enum k3_dma_type {
+ DMA_TYPE_UDMA = 0,
+ DMA_TYPE_BCDMA,
+ DMA_TYPE_PKTDMA,
+};
+
+enum udma_mmr {
+ MMR_GCFG = 0,
+ MMR_BCHANRT,
+ MMR_RCHANRT,
+ MMR_TCHANRT,
+ MMR_LAST,
+};
+
+static const char * const mmr_names[] = {
+ [MMR_GCFG] = "gcfg",
+ [MMR_BCHANRT] = "bchanrt",
+ [MMR_RCHANRT] = "rchanrt",
+ [MMR_TCHANRT] = "tchanrt",
+};
+
+struct udma_tchan {
+ void __iomem *reg_rt;
+
+ int id;
+ struct k3_ring *t_ring; /* Transmit ring */
+ struct k3_ring *tc_ring; /* Transmit Completion ring */
+ int tflow_id; /* applicable only for PKTDMA */
+
+};
+
+#define udma_bchan udma_tchan
+
+struct udma_rflow {
+ int id;
+ struct k3_ring *fd_ring; /* Free Descriptor ring */
+ struct k3_ring *r_ring; /* Receive ring */
+};
+
+struct udma_rchan {
+ void __iomem *reg_rt;
+
+ int id;
+};
+
+struct udma_oes_offsets {
+ /* K3 UDMA Output Event Offset */
+ u32 udma_rchan;
+
+ /* BCDMA Output Event Offsets */
+ u32 bcdma_bchan_data;
+ u32 bcdma_bchan_ring;
+ u32 bcdma_tchan_data;
+ u32 bcdma_tchan_ring;
+ u32 bcdma_rchan_data;
+ u32 bcdma_rchan_ring;
+
+ /* PKTDMA Output Event Offsets */
+ u32 pktdma_tchan_flow;
+ u32 pktdma_rchan_flow;
+};
+
+#define UDMA_FLAG_PDMA_ACC32 BIT(0)
+#define UDMA_FLAG_PDMA_BURST BIT(1)
+#define UDMA_FLAG_TDTYPE BIT(2)
+#define UDMA_FLAG_BURST_SIZE BIT(3)
+#define UDMA_FLAGS_J7_CLASS (UDMA_FLAG_PDMA_ACC32 | \
+ UDMA_FLAG_PDMA_BURST | \
+ UDMA_FLAG_TDTYPE | \
+ UDMA_FLAG_BURST_SIZE)
+
+struct udma_match_data {
+ enum k3_dma_type type;
+ u32 psil_base;
+ bool enable_memcpy_support;
+ u32 flags;
+ u32 statictr_z_mask;
+ u8 burst_size[3];
+};
+
+struct udma_soc_data {
+ struct udma_oes_offsets oes;
+ u32 bcdma_trigger_event_offset;
+};
+
+struct udma_hwdesc {
+ size_t cppi5_desc_size;
+ void *cppi5_desc_vaddr;
+ dma_addr_t cppi5_desc_paddr;
+
+ /* TR descriptor internal pointers */
+ void *tr_req_base;
+ struct cppi5_tr_resp_t *tr_resp_base;
+};
+
+struct udma_rx_flush {
+ struct udma_hwdesc hwdescs[2];
+
+ size_t buffer_size;
+ void *buffer_vaddr;
+ dma_addr_t buffer_paddr;
+};
+
+struct udma_tpl {
+ u8 levels;
+ u32 start_idx[3];
+};
+
+struct udma_dev {
+ struct dma_device ddev;
+ struct device *dev;
+ void __iomem *mmrs[MMR_LAST];
+ const struct udma_match_data *match_data;
+ const struct udma_soc_data *soc_data;
+
+ struct udma_tpl bchan_tpl;
+ struct udma_tpl tchan_tpl;
+ struct udma_tpl rchan_tpl;
+
+ size_t desc_align; /* alignment to use for descriptors */
+
+ struct udma_tisci_rm tisci_rm;
+
+ struct k3_ringacc *ringacc;
+
+ struct work_struct purge_work;
+ struct list_head desc_to_purge;
+ spinlock_t lock;
+
+ struct udma_rx_flush rx_flush;
+
+ int bchan_cnt;
+ int tchan_cnt;
+ int echan_cnt;
+ int rchan_cnt;
+ int rflow_cnt;
+ int tflow_cnt;
+ unsigned long *bchan_map;
+ unsigned long *tchan_map;
+ unsigned long *rchan_map;
+ unsigned long *rflow_gp_map;
+ unsigned long *rflow_gp_map_allocated;
+ unsigned long *rflow_in_use;
+ unsigned long *tflow_map;
+
+ struct udma_bchan *bchans;
+ struct udma_tchan *tchans;
+ struct udma_rchan *rchans;
+ struct udma_rflow *rflows;
+
+ struct udma_chan *channels;
+ u32 psil_base;
+ u32 atype;
+ u32 asel;
+};
+
+struct udma_desc {
+ struct virt_dma_desc vd;
+
+ bool terminated;
+
+ enum dma_transfer_direction dir;
+
+ struct udma_static_tr static_tr;
+ u32 residue;
+
+ unsigned int sglen;
+ unsigned int desc_idx; /* Only used for cyclic in packet mode */
+ unsigned int tr_idx;
+
+ u32 metadata_size;
+ void *metadata; /* pointer to provided metadata buffer (EPIP, PSdata) */
+
+ unsigned int hwdesc_count;
+ struct udma_hwdesc hwdesc[];
+};
+
+enum udma_chan_state {
+ UDMA_CHAN_IS_IDLE = 0, /* not active, no teardown is in progress */
+ UDMA_CHAN_IS_ACTIVE, /* Normal operation */
+ UDMA_CHAN_IS_TERMINATING, /* channel is being terminated */
+};
+
+struct udma_tx_drain {
+ struct delayed_work work;
+ ktime_t tstamp;
+ u32 residue;
+};
+
+struct udma_chan_config {
+ bool pkt_mode; /* TR or packet */
+ bool needs_epib; /* EPIB is needed for the communication or not */
+ u32 psd_size; /* size of Protocol Specific Data */
+ u32 metadata_size; /* (needs_epib ? 16:0) + psd_size */
+ u32 hdesc_size; /* Size of a packet descriptor in packet mode */
+ bool notdpkt; /* Suppress sending TDC packet */
+ int remote_thread_id;
+ u32 atype;
+ u32 asel;
+ u32 src_thread;
+ u32 dst_thread;
+ enum psil_endpoint_type ep_type;
+ bool enable_acc32;
+ bool enable_burst;
+ enum udma_tp_level channel_tpl; /* Channel Throughput Level */
+
+ u32 tr_trigger_type;
+ unsigned long tx_flags;
+
+ /* PKDMA mapped channel */
+ int mapped_channel_id;
+ /* PKTDMA default tflow or rflow for mapped channel */
+ int default_flow_id;
+
+ enum dma_transfer_direction dir;
+};
+
+struct udma_chan {
+ struct virt_dma_chan vc;
+ struct dma_slave_config cfg;
+ struct udma_dev *ud;
+ struct device *dma_dev;
+ struct udma_desc *desc;
+ struct udma_desc *terminated_desc;
+ struct udma_static_tr static_tr;
+ char *name;
+
+ struct udma_bchan *bchan;
+ struct udma_tchan *tchan;
+ struct udma_rchan *rchan;
+ struct udma_rflow *rflow;
+
+ bool psil_paired;
+
+ int irq_num_ring;
+ int irq_num_udma;
+
+ bool cyclic;
+ bool paused;
+
+ enum udma_chan_state state;
+ struct completion teardown_completed;
+
+ struct udma_tx_drain tx_drain;
+
+ /* Channel configuration parameters */
+ struct udma_chan_config config;
+
+ /* dmapool for packet mode descriptors */
+ bool use_dma_pool;
+ struct dma_pool *hdesc_pool;
+
+ u32 id;
+};
+
+static inline struct udma_dev *to_udma_dev(struct dma_device *d)
+{
+ return container_of(d, struct udma_dev, ddev);
+}
+
+static inline struct udma_chan *to_udma_chan(struct dma_chan *c)
+{
+ return container_of(c, struct udma_chan, vc.chan);
+}
+
+static inline struct udma_desc *to_udma_desc(struct dma_async_tx_descriptor *t)
+{
+ return container_of(t, struct udma_desc, vd.tx);
+}
+
+/* Generic register access functions */
+static inline u32 udma_read(void __iomem *base, int reg)
+{
+ return readl(base + reg);
+}
+
+static inline void udma_write(void __iomem *base, int reg, u32 val)
+{
+ writel(val, base + reg);
+}
+
+static inline void udma_update_bits(void __iomem *base, int reg,
+ u32 mask, u32 val)
+{
+ u32 tmp, orig;
+
+ orig = readl(base + reg);
+ tmp = orig & ~mask;
+ tmp |= (val & mask);
+
+ if (tmp != orig)
+ writel(tmp, base + reg);
+}
+
+/* TCHANRT */
+static inline u32 udma_tchanrt_read(struct udma_chan *uc, int reg)
+{
+ if (!uc->tchan)
+ return 0;
+ return udma_read(uc->tchan->reg_rt, reg);
+}
+
+static inline void udma_tchanrt_write(struct udma_chan *uc, int reg, u32 val)
+{
+ if (!uc->tchan)
+ return;
+ udma_write(uc->tchan->reg_rt, reg, val);
+}
+
+static inline void udma_tchanrt_update_bits(struct udma_chan *uc, int reg,
+ u32 mask, u32 val)
+{
+ if (!uc->tchan)
+ return;
+ udma_update_bits(uc->tchan->reg_rt, reg, mask, val);
+}
+
+/* RCHANRT */
+static inline u32 udma_rchanrt_read(struct udma_chan *uc, int reg)
+{
+ if (!uc->rchan)
+ return 0;
+ return udma_read(uc->rchan->reg_rt, reg);
+}
+
+static inline void udma_rchanrt_write(struct udma_chan *uc, int reg, u32 val)
+{
+ if (!uc->rchan)
+ return;
+ udma_write(uc->rchan->reg_rt, reg, val);
+}
+
+static inline void udma_rchanrt_update_bits(struct udma_chan *uc, int reg,
+ u32 mask, u32 val)
+{
+ if (!uc->rchan)
+ return;
+ udma_update_bits(uc->rchan->reg_rt, reg, mask, val);
+}
+
+static int navss_psil_pair(struct udma_dev *ud, u32 src_thread, u32 dst_thread)
+{
+ struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+
+ dst_thread |= K3_PSIL_DST_THREAD_ID_OFFSET;
+ return tisci_rm->tisci_psil_ops->pair(tisci_rm->tisci,
+ tisci_rm->tisci_navss_dev_id,
+ src_thread, dst_thread);
+}
+
+static int navss_psil_unpair(struct udma_dev *ud, u32 src_thread,
+ u32 dst_thread)
+{
+ struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+
+ dst_thread |= K3_PSIL_DST_THREAD_ID_OFFSET;
+ return tisci_rm->tisci_psil_ops->unpair(tisci_rm->tisci,
+ tisci_rm->tisci_navss_dev_id,
+ src_thread, dst_thread);
+}
+
+static void k3_configure_chan_coherency(struct dma_chan *chan, u32 asel)
+{
+ struct device *chan_dev = &chan->dev->device;
+
+ if (asel == 0) {
+ /* No special handling for the channel */
+ chan->dev->chan_dma_dev = false;
+
+ chan_dev->dma_coherent = false;
+ chan_dev->dma_parms = NULL;
+ } else if (asel == 14 || asel == 15) {
+ chan->dev->chan_dma_dev = true;
+
+ chan_dev->dma_coherent = true;
+ dma_coerce_mask_and_coherent(chan_dev, DMA_BIT_MASK(48));
+ chan_dev->dma_parms = chan_dev->parent->dma_parms;
+ } else {
+ dev_warn(chan->device->dev, "Invalid ASEL value: %u\n", asel);
+
+ chan_dev->dma_coherent = false;
+ chan_dev->dma_parms = NULL;
+ }
+}
+
+static u8 udma_get_chan_tpl_index(struct udma_tpl *tpl_map, int chan_id)
+{
+ int i;
+
+ for (i = 0; i < tpl_map->levels; i++) {
+ if (chan_id >= tpl_map->start_idx[i])
+ return i;
+ }
+
+ return 0;
+}
+
+static void udma_reset_uchan(struct udma_chan *uc)
+{
+ memset(&uc->config, 0, sizeof(uc->config));
+ uc->config.remote_thread_id = -1;
+ uc->config.mapped_channel_id = -1;
+ uc->config.default_flow_id = -1;
+ uc->state = UDMA_CHAN_IS_IDLE;
+}
+
+static void udma_dump_chan_stdata(struct udma_chan *uc)
+{
+ struct device *dev = uc->ud->dev;
+ u32 offset;
+ int i;
+
+ if (uc->config.dir == DMA_MEM_TO_DEV || uc->config.dir == DMA_MEM_TO_MEM) {
+ dev_dbg(dev, "TCHAN State data:\n");
+ for (i = 0; i < 32; i++) {
+ offset = UDMA_CHAN_RT_STDATA_REG + i * 4;
+ dev_dbg(dev, "TRT_STDATA[%02d]: 0x%08x\n", i,
+ udma_tchanrt_read(uc, offset));
+ }
+ }
+
+ if (uc->config.dir == DMA_DEV_TO_MEM || uc->config.dir == DMA_MEM_TO_MEM) {
+ dev_dbg(dev, "RCHAN State data:\n");
+ for (i = 0; i < 32; i++) {
+ offset = UDMA_CHAN_RT_STDATA_REG + i * 4;
+ dev_dbg(dev, "RRT_STDATA[%02d]: 0x%08x\n", i,
+ udma_rchanrt_read(uc, offset));
+ }
+ }
+}
+
+static inline dma_addr_t udma_curr_cppi5_desc_paddr(struct udma_desc *d,
+ int idx)
+{
+ return d->hwdesc[idx].cppi5_desc_paddr;
+}
+
+static inline void *udma_curr_cppi5_desc_vaddr(struct udma_desc *d, int idx)
+{
+ return d->hwdesc[idx].cppi5_desc_vaddr;
+}
+
+static struct udma_desc *udma_udma_desc_from_paddr(struct udma_chan *uc,
+ dma_addr_t paddr)
+{
+ struct udma_desc *d = uc->terminated_desc;
+
+ if (d) {
+ dma_addr_t desc_paddr = udma_curr_cppi5_desc_paddr(d,
+ d->desc_idx);
+
+ if (desc_paddr != paddr)
+ d = NULL;
+ }
+
+ if (!d) {
+ d = uc->desc;
+ if (d) {
+ dma_addr_t desc_paddr = udma_curr_cppi5_desc_paddr(d,
+ d->desc_idx);
+
+ if (desc_paddr != paddr)
+ d = NULL;
+ }
+ }
+
+ return d;
+}
+
+static void udma_free_hwdesc(struct udma_chan *uc, struct udma_desc *d)
+{
+ if (uc->use_dma_pool) {
+ int i;
+
+ for (i = 0; i < d->hwdesc_count; i++) {
+ if (!d->hwdesc[i].cppi5_desc_vaddr)
+ continue;
+
+ dma_pool_free(uc->hdesc_pool,
+ d->hwdesc[i].cppi5_desc_vaddr,
+ d->hwdesc[i].cppi5_desc_paddr);
+
+ d->hwdesc[i].cppi5_desc_vaddr = NULL;
+ }
+ } else if (d->hwdesc[0].cppi5_desc_vaddr) {
+ dma_free_coherent(uc->dma_dev, d->hwdesc[0].cppi5_desc_size,
+ d->hwdesc[0].cppi5_desc_vaddr,
+ d->hwdesc[0].cppi5_desc_paddr);
+
+ d->hwdesc[0].cppi5_desc_vaddr = NULL;
+ }
+}
+
+static void udma_purge_desc_work(struct work_struct *work)
+{
+ struct udma_dev *ud = container_of(work, typeof(*ud), purge_work);
+ struct virt_dma_desc *vd, *_vd;
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&ud->lock, flags);
+ list_splice_tail_init(&ud->desc_to_purge, &head);
+ spin_unlock_irqrestore(&ud->lock, flags);
+
+ list_for_each_entry_safe(vd, _vd, &head, node) {
+ struct udma_chan *uc = to_udma_chan(vd->tx.chan);
+ struct udma_desc *d = to_udma_desc(&vd->tx);
+
+ udma_free_hwdesc(uc, d);
+ list_del(&vd->node);
+ kfree(d);
+ }
+
+ /* If more to purge, schedule the work again */
+ if (!list_empty(&ud->desc_to_purge))
+ schedule_work(&ud->purge_work);
+}
+
+static void udma_desc_free(struct virt_dma_desc *vd)
+{
+ struct udma_dev *ud = to_udma_dev(vd->tx.chan->device);
+ struct udma_chan *uc = to_udma_chan(vd->tx.chan);
+ struct udma_desc *d = to_udma_desc(&vd->tx);
+ unsigned long flags;
+
+ if (uc->terminated_desc == d)
+ uc->terminated_desc = NULL;
+
+ if (uc->use_dma_pool) {
+ udma_free_hwdesc(uc, d);
+ kfree(d);
+ return;
+ }
+
+ spin_lock_irqsave(&ud->lock, flags);
+ list_add_tail(&vd->node, &ud->desc_to_purge);
+ spin_unlock_irqrestore(&ud->lock, flags);
+
+ schedule_work(&ud->purge_work);
+}
+
+static bool udma_is_chan_running(struct udma_chan *uc)
+{
+ u32 trt_ctl = 0;
+ u32 rrt_ctl = 0;
+
+ if (uc->tchan)
+ trt_ctl = udma_tchanrt_read(uc, UDMA_CHAN_RT_CTL_REG);
+ if (uc->rchan)
+ rrt_ctl = udma_rchanrt_read(uc, UDMA_CHAN_RT_CTL_REG);
+
+ if (trt_ctl & UDMA_CHAN_RT_CTL_EN || rrt_ctl & UDMA_CHAN_RT_CTL_EN)
+ return true;
+
+ return false;
+}
+
+static bool udma_is_chan_paused(struct udma_chan *uc)
+{
+ u32 val, pause_mask;
+
+ switch (uc->config.dir) {
+ case DMA_DEV_TO_MEM:
+ val = udma_rchanrt_read(uc, UDMA_CHAN_RT_PEER_RT_EN_REG);
+ pause_mask = UDMA_PEER_RT_EN_PAUSE;
+ break;
+ case DMA_MEM_TO_DEV:
+ val = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_RT_EN_REG);
+ pause_mask = UDMA_PEER_RT_EN_PAUSE;
+ break;
+ case DMA_MEM_TO_MEM:
+ val = udma_tchanrt_read(uc, UDMA_CHAN_RT_CTL_REG);
+ pause_mask = UDMA_CHAN_RT_CTL_PAUSE;
+ break;
+ default:
+ return false;
+ }
+
+ if (val & pause_mask)
+ return true;
+
+ return false;
+}
+
+static inline dma_addr_t udma_get_rx_flush_hwdesc_paddr(struct udma_chan *uc)
+{
+ return uc->ud->rx_flush.hwdescs[uc->config.pkt_mode].cppi5_desc_paddr;
+}
+
+static int udma_push_to_ring(struct udma_chan *uc, int idx)
+{
+ struct udma_desc *d = uc->desc;
+ struct k3_ring *ring = NULL;
+ dma_addr_t paddr;
+
+ switch (uc->config.dir) {
+ case DMA_DEV_TO_MEM:
+ ring = uc->rflow->fd_ring;
+ break;
+ case DMA_MEM_TO_DEV:
+ case DMA_MEM_TO_MEM:
+ ring = uc->tchan->t_ring;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* RX flush packet: idx == -1 is only passed in case of DEV_TO_MEM */
+ if (idx == -1) {
+ paddr = udma_get_rx_flush_hwdesc_paddr(uc);
+ } else {
+ paddr = udma_curr_cppi5_desc_paddr(d, idx);
+
+ wmb(); /* Ensure that writes are not moved over this point */
+ }
+
+ return k3_ringacc_ring_push(ring, &paddr);
+}
+
+static bool udma_desc_is_rx_flush(struct udma_chan *uc, dma_addr_t addr)
+{
+ if (uc->config.dir != DMA_DEV_TO_MEM)
+ return false;
+
+ if (addr == udma_get_rx_flush_hwdesc_paddr(uc))
+ return true;
+
+ return false;
+}
+
+static int udma_pop_from_ring(struct udma_chan *uc, dma_addr_t *addr)
+{
+ struct k3_ring *ring = NULL;
+ int ret;
+
+ switch (uc->config.dir) {
+ case DMA_DEV_TO_MEM:
+ ring = uc->rflow->r_ring;
+ break;
+ case DMA_MEM_TO_DEV:
+ case DMA_MEM_TO_MEM:
+ ring = uc->tchan->tc_ring;
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ ret = k3_ringacc_ring_pop(ring, addr);
+ if (ret)
+ return ret;
+
+ rmb(); /* Ensure that reads are not moved before this point */
+
+ /* Teardown completion */
+ if (cppi5_desc_is_tdcm(*addr))
+ return 0;
+
+ /* Check for flush descriptor */
+ if (udma_desc_is_rx_flush(uc, *addr))
+ return -ENOENT;
+
+ return 0;
+}
+
+static void udma_reset_rings(struct udma_chan *uc)
+{
+ struct k3_ring *ring1 = NULL;
+ struct k3_ring *ring2 = NULL;
+
+ switch (uc->config.dir) {
+ case DMA_DEV_TO_MEM:
+ if (uc->rchan) {
+ ring1 = uc->rflow->fd_ring;
+ ring2 = uc->rflow->r_ring;
+ }
+ break;
+ case DMA_MEM_TO_DEV:
+ case DMA_MEM_TO_MEM:
+ if (uc->tchan) {
+ ring1 = uc->tchan->t_ring;
+ ring2 = uc->tchan->tc_ring;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (ring1)
+ k3_ringacc_ring_reset_dma(ring1,
+ k3_ringacc_ring_get_occ(ring1));
+ if (ring2)
+ k3_ringacc_ring_reset(ring2);
+
+ /* make sure we are not leaking memory by stalled descriptor */
+ if (uc->terminated_desc) {
+ udma_desc_free(&uc->terminated_desc->vd);
+ uc->terminated_desc = NULL;
+ }
+}
+
+static void udma_decrement_byte_counters(struct udma_chan *uc, u32 val)
+{
+ if (uc->desc->dir == DMA_DEV_TO_MEM) {
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val);
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val);
+ if (uc->config.ep_type != PSIL_EP_NATIVE)
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val);
+ } else {
+ udma_tchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val);
+ udma_tchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val);
+ if (!uc->bchan && uc->config.ep_type != PSIL_EP_NATIVE)
+ udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val);
+ }
+}
+
+static void udma_reset_counters(struct udma_chan *uc)
+{
+ u32 val;
+
+ if (uc->tchan) {
+ val = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG);
+ udma_tchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val);
+
+ val = udma_tchanrt_read(uc, UDMA_CHAN_RT_SBCNT_REG);
+ udma_tchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val);
+
+ val = udma_tchanrt_read(uc, UDMA_CHAN_RT_PCNT_REG);
+ udma_tchanrt_write(uc, UDMA_CHAN_RT_PCNT_REG, val);
+
+ if (!uc->bchan) {
+ val = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG);
+ udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val);
+ }
+ }
+
+ if (uc->rchan) {
+ val = udma_rchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG);
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val);
+
+ val = udma_rchanrt_read(uc, UDMA_CHAN_RT_SBCNT_REG);
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val);
+
+ val = udma_rchanrt_read(uc, UDMA_CHAN_RT_PCNT_REG);
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_PCNT_REG, val);
+
+ val = udma_rchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG);
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val);
+ }
+}
+
+static int udma_reset_chan(struct udma_chan *uc, bool hard)
+{
+ switch (uc->config.dir) {
+ case DMA_DEV_TO_MEM:
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, 0);
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, 0);
+ break;
+ case DMA_MEM_TO_DEV:
+ udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, 0);
+ udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, 0);
+ break;
+ case DMA_MEM_TO_MEM:
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, 0);
+ udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, 0);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* Reset all counters */
+ udma_reset_counters(uc);
+
+ /* Hard reset: re-initialize the channel to reset */
+ if (hard) {
+ struct udma_chan_config ucc_backup;
+ int ret;
+
+ memcpy(&ucc_backup, &uc->config, sizeof(uc->config));
+ uc->ud->ddev.device_free_chan_resources(&uc->vc.chan);
+
+ /* restore the channel configuration */
+ memcpy(&uc->config, &ucc_backup, sizeof(uc->config));
+ ret = uc->ud->ddev.device_alloc_chan_resources(&uc->vc.chan);
+ if (ret)
+ return ret;
+
+ /*
+ * Setting forced teardown after forced reset helps recovering
+ * the rchan.
+ */
+ if (uc->config.dir == DMA_DEV_TO_MEM)
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG,
+ UDMA_CHAN_RT_CTL_EN |
+ UDMA_CHAN_RT_CTL_TDOWN |
+ UDMA_CHAN_RT_CTL_FTDOWN);
+ }
+ uc->state = UDMA_CHAN_IS_IDLE;
+
+ return 0;
+}
+
+static void udma_start_desc(struct udma_chan *uc)
+{
+ struct udma_chan_config *ucc = &uc->config;
+
+ if (uc->ud->match_data->type == DMA_TYPE_UDMA && ucc->pkt_mode &&
+ (uc->cyclic || ucc->dir == DMA_DEV_TO_MEM)) {
+ int i;
+
+ /*
+ * UDMA only: Push all descriptors to ring for packet mode
+ * cyclic or RX
+ * PKTDMA supports pre-linked descriptor and cyclic is not
+ * supported
+ */
+ for (i = 0; i < uc->desc->sglen; i++)
+ udma_push_to_ring(uc, i);
+ } else {
+ udma_push_to_ring(uc, 0);
+ }
+}
+
+static bool udma_chan_needs_reconfiguration(struct udma_chan *uc)
+{
+ /* Only PDMAs have staticTR */
+ if (uc->config.ep_type == PSIL_EP_NATIVE)
+ return false;
+
+ /* Check if the staticTR configuration has changed for TX */
+ if (memcmp(&uc->static_tr, &uc->desc->static_tr, sizeof(uc->static_tr)))
+ return true;
+
+ return false;
+}
+
+static int udma_start(struct udma_chan *uc)
+{
+ struct virt_dma_desc *vd = vchan_next_desc(&uc->vc);
+
+ if (!vd) {
+ uc->desc = NULL;
+ return -ENOENT;
+ }
+
+ list_del(&vd->node);
+
+ uc->desc = to_udma_desc(&vd->tx);
+
+ /* Channel is already running and does not need reconfiguration */
+ if (udma_is_chan_running(uc) && !udma_chan_needs_reconfiguration(uc)) {
+ udma_start_desc(uc);
+ goto out;
+ }
+
+ /* Make sure that we clear the teardown bit, if it is set */
+ udma_reset_chan(uc, false);
+
+ /* Push descriptors before we start the channel */
+ udma_start_desc(uc);
+
+ switch (uc->desc->dir) {
+ case DMA_DEV_TO_MEM:
+ /* Config remote TR */
+ if (uc->config.ep_type == PSIL_EP_PDMA_XY) {
+ u32 val = PDMA_STATIC_TR_Y(uc->desc->static_tr.elcnt) |
+ PDMA_STATIC_TR_X(uc->desc->static_tr.elsize);
+ const struct udma_match_data *match_data =
+ uc->ud->match_data;
+
+ if (uc->config.enable_acc32)
+ val |= PDMA_STATIC_TR_XY_ACC32;
+ if (uc->config.enable_burst)
+ val |= PDMA_STATIC_TR_XY_BURST;
+
+ udma_rchanrt_write(uc,
+ UDMA_CHAN_RT_PEER_STATIC_TR_XY_REG,
+ val);
+
+ udma_rchanrt_write(uc,
+ UDMA_CHAN_RT_PEER_STATIC_TR_Z_REG,
+ PDMA_STATIC_TR_Z(uc->desc->static_tr.bstcnt,
+ match_data->statictr_z_mask));
+
+ /* save the current staticTR configuration */
+ memcpy(&uc->static_tr, &uc->desc->static_tr,
+ sizeof(uc->static_tr));
+ }
+
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG,
+ UDMA_CHAN_RT_CTL_EN);
+
+ /* Enable remote */
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG,
+ UDMA_PEER_RT_EN_ENABLE);
+
+ break;
+ case DMA_MEM_TO_DEV:
+ /* Config remote TR */
+ if (uc->config.ep_type == PSIL_EP_PDMA_XY) {
+ u32 val = PDMA_STATIC_TR_Y(uc->desc->static_tr.elcnt) |
+ PDMA_STATIC_TR_X(uc->desc->static_tr.elsize);
+
+ if (uc->config.enable_acc32)
+ val |= PDMA_STATIC_TR_XY_ACC32;
+ if (uc->config.enable_burst)
+ val |= PDMA_STATIC_TR_XY_BURST;
+
+ udma_tchanrt_write(uc,
+ UDMA_CHAN_RT_PEER_STATIC_TR_XY_REG,
+ val);
+
+ /* save the current staticTR configuration */
+ memcpy(&uc->static_tr, &uc->desc->static_tr,
+ sizeof(uc->static_tr));
+ }
+
+ /* Enable remote */
+ udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG,
+ UDMA_PEER_RT_EN_ENABLE);
+
+ udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG,
+ UDMA_CHAN_RT_CTL_EN);
+
+ break;
+ case DMA_MEM_TO_MEM:
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG,
+ UDMA_CHAN_RT_CTL_EN);
+ udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG,
+ UDMA_CHAN_RT_CTL_EN);
+
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ uc->state = UDMA_CHAN_IS_ACTIVE;
+out:
+
+ return 0;
+}
+
+static int udma_stop(struct udma_chan *uc)
+{
+ enum udma_chan_state old_state = uc->state;
+
+ uc->state = UDMA_CHAN_IS_TERMINATING;
+ reinit_completion(&uc->teardown_completed);
+
+ switch (uc->config.dir) {
+ case DMA_DEV_TO_MEM:
+ if (!uc->cyclic && !uc->desc)
+ udma_push_to_ring(uc, -1);
+
+ udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG,
+ UDMA_PEER_RT_EN_ENABLE |
+ UDMA_PEER_RT_EN_TEARDOWN);
+ break;
+ case DMA_MEM_TO_DEV:
+ udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG,
+ UDMA_PEER_RT_EN_ENABLE |
+ UDMA_PEER_RT_EN_FLUSH);
+ udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG,
+ UDMA_CHAN_RT_CTL_EN |
+ UDMA_CHAN_RT_CTL_TDOWN);
+ break;
+ case DMA_MEM_TO_MEM:
+ udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG,
+ UDMA_CHAN_RT_CTL_EN |
+ UDMA_CHAN_RT_CTL_TDOWN);
+ break;
+ default:
+ uc->state = old_state;
+ complete_all(&uc->teardown_completed);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void udma_cyclic_packet_elapsed(struct udma_chan *uc)
+{
+ struct udma_desc *d = uc->desc;
+ struct cppi5_host_desc_t *h_desc;
+
+ h_desc = d->hwdesc[d->desc_idx].cppi5_desc_vaddr;
+ cppi5_hdesc_reset_to_original(h_desc);
+ udma_push_to_ring(uc, d->desc_idx);
+ d->desc_idx = (d->desc_idx + 1) % d->sglen;
+}
+
+static inline void udma_fetch_epib(struct udma_chan *uc, struct udma_desc *d)
+{
+ struct cppi5_host_desc_t *h_desc = d->hwdesc[0].cppi5_desc_vaddr;
+
+ memcpy(d->metadata, h_desc->epib, d->metadata_size);
+}
+
+static bool udma_is_desc_really_done(struct udma_chan *uc, struct udma_desc *d)
+{
+ u32 peer_bcnt, bcnt;
+
+ /*
+ * Only TX towards PDMA is affected.
+ * If DMA_PREP_INTERRUPT is not set by consumer then skip the transfer
+ * completion calculation, consumer must ensure that there is no stale
+ * data in DMA fabric in this case.
+ */
+ if (uc->config.ep_type == PSIL_EP_NATIVE ||
+ uc->config.dir != DMA_MEM_TO_DEV || !(uc->config.tx_flags & DMA_PREP_INTERRUPT))
+ return true;
+
+ peer_bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG);
+ bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG);
+
+ /* Transfer is incomplete, store current residue and time stamp */
+ if (peer_bcnt < bcnt) {
+ uc->tx_drain.residue = bcnt - peer_bcnt;
+ uc->tx_drain.tstamp = ktime_get();
+ return false;
+ }
+
+ return true;
+}
+
+static void udma_check_tx_completion(struct work_struct *work)
+{
+ struct udma_chan *uc = container_of(work, typeof(*uc),
+ tx_drain.work.work);
+ bool desc_done = true;
+ u32 residue_diff;
+ ktime_t time_diff;
+ unsigned long delay;
+
+ while (1) {
+ if (uc->desc) {
+ /* Get previous residue and time stamp */
+ residue_diff = uc->tx_drain.residue;
+ time_diff = uc->tx_drain.tstamp;
+ /*
+ * Get current residue and time stamp or see if
+ * transfer is complete
+ */
+ desc_done = udma_is_desc_really_done(uc, uc->desc);
+ }
+
+ if (!desc_done) {
+ /*
+ * Find the time delta and residue delta w.r.t
+ * previous poll
+ */
+ time_diff = ktime_sub(uc->tx_drain.tstamp,
+ time_diff) + 1;
+ residue_diff -= uc->tx_drain.residue;
+ if (residue_diff) {
+ /*
+ * Try to guess when we should check
+ * next time by calculating rate at
+ * which data is being drained at the
+ * peer device
+ */
+ delay = (time_diff / residue_diff) *
+ uc->tx_drain.residue;
+ } else {
+ /* No progress, check again in 1 second */
+ schedule_delayed_work(&uc->tx_drain.work, HZ);
+ break;
+ }
+
+ usleep_range(ktime_to_us(delay),
+ ktime_to_us(delay) + 10);
+ continue;
+ }
+
+ if (uc->desc) {
+ struct udma_desc *d = uc->desc;
+
+ udma_decrement_byte_counters(uc, d->residue);
+ udma_start(uc);
+ vchan_cookie_complete(&d->vd);
+ break;
+ }
+
+ break;
+ }
+}
+
+static irqreturn_t udma_ring_irq_handler(int irq, void *data)
+{
+ struct udma_chan *uc = data;
+ struct udma_desc *d;
+ dma_addr_t paddr = 0;
+
+ if (udma_pop_from_ring(uc, &paddr) || !paddr)
+ return IRQ_HANDLED;
+
+ spin_lock(&uc->vc.lock);
+
+ /* Teardown completion message */
+ if (cppi5_desc_is_tdcm(paddr)) {
+ complete_all(&uc->teardown_completed);
+
+ if (uc->terminated_desc) {
+ udma_desc_free(&uc->terminated_desc->vd);
+ uc->terminated_desc = NULL;
+ }
+
+ if (!uc->desc)
+ udma_start(uc);
+
+ goto out;
+ }
+
+ d = udma_udma_desc_from_paddr(uc, paddr);
+
+ if (d) {
+ dma_addr_t desc_paddr = udma_curr_cppi5_desc_paddr(d,
+ d->desc_idx);
+ if (desc_paddr != paddr) {
+ dev_err(uc->ud->dev, "not matching descriptors!\n");
+ goto out;
+ }
+
+ if (d == uc->desc) {
+ /* active descriptor */
+ if (uc->cyclic) {
+ udma_cyclic_packet_elapsed(uc);
+ vchan_cyclic_callback(&d->vd);
+ } else {
+ if (udma_is_desc_really_done(uc, d)) {
+ udma_decrement_byte_counters(uc, d->residue);
+ udma_start(uc);
+ vchan_cookie_complete(&d->vd);
+ } else {
+ schedule_delayed_work(&uc->tx_drain.work,
+ 0);
+ }
+ }
+ } else {
+ /*
+ * terminated descriptor, mark the descriptor as
+ * completed to update the channel's cookie marker
+ */
+ dma_cookie_complete(&d->vd.tx);
+ }
+ }
+out:
+ spin_unlock(&uc->vc.lock);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t udma_udma_irq_handler(int irq, void *data)
+{
+ struct udma_chan *uc = data;
+ struct udma_desc *d;
+
+ spin_lock(&uc->vc.lock);
+ d = uc->desc;
+ if (d) {
+ d->tr_idx = (d->tr_idx + 1) % d->sglen;
+
+ if (uc->cyclic) {
+ vchan_cyclic_callback(&d->vd);
+ } else {
+ /* TODO: figure out the real amount of data */
+ udma_decrement_byte_counters(uc, d->residue);
+ udma_start(uc);
+ vchan_cookie_complete(&d->vd);
+ }
+ }
+
+ spin_unlock(&uc->vc.lock);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * __udma_alloc_gp_rflow_range - alloc range of GP RX flows
+ * @ud: UDMA device
+ * @from: Start the search from this flow id number
+ * @cnt: Number of consecutive flow ids to allocate
+ *
+ * Allocate range of RX flow ids for future use, those flows can be requested
+ * only using explicit flow id number. if @from is set to -1 it will try to find
+ * first free range. if @from is positive value it will force allocation only
+ * of the specified range of flows.
+ *
+ * Returns -ENOMEM if can't find free range.
+ * -EEXIST if requested range is busy.
+ * -EINVAL if wrong input values passed.
+ * Returns flow id on success.
+ */
+static int __udma_alloc_gp_rflow_range(struct udma_dev *ud, int from, int cnt)
+{
+ int start, tmp_from;
+ DECLARE_BITMAP(tmp, K3_UDMA_MAX_RFLOWS);
+
+ tmp_from = from;
+ if (tmp_from < 0)
+ tmp_from = ud->rchan_cnt;
+ /* default flows can't be allocated and accessible only by id */
+ if (tmp_from < ud->rchan_cnt)
+ return -EINVAL;
+
+ if (tmp_from + cnt > ud->rflow_cnt)
+ return -EINVAL;
+
+ bitmap_or(tmp, ud->rflow_gp_map, ud->rflow_gp_map_allocated,
+ ud->rflow_cnt);
+
+ start = bitmap_find_next_zero_area(tmp,
+ ud->rflow_cnt,
+ tmp_from, cnt, 0);
+ if (start >= ud->rflow_cnt)
+ return -ENOMEM;
+
+ if (from >= 0 && start != from)
+ return -EEXIST;
+
+ bitmap_set(ud->rflow_gp_map_allocated, start, cnt);
+ return start;
+}
+
+static int __udma_free_gp_rflow_range(struct udma_dev *ud, int from, int cnt)
+{
+ if (from < ud->rchan_cnt)
+ return -EINVAL;
+ if (from + cnt > ud->rflow_cnt)
+ return -EINVAL;
+
+ bitmap_clear(ud->rflow_gp_map_allocated, from, cnt);
+ return 0;
+}
+
+static struct udma_rflow *__udma_get_rflow(struct udma_dev *ud, int id)
+{
+ /*
+ * Attempt to request rflow by ID can be made for any rflow
+ * if not in use with assumption that caller knows what's doing.
+ * TI-SCI FW will perform additional permission check ant way, it's
+ * safe
+ */
+
+ if (id < 0 || id >= ud->rflow_cnt)
+ return ERR_PTR(-ENOENT);
+
+ if (test_bit(id, ud->rflow_in_use))
+ return ERR_PTR(-ENOENT);
+
+ if (ud->rflow_gp_map) {
+ /* GP rflow has to be allocated first */
+ if (!test_bit(id, ud->rflow_gp_map) &&
+ !test_bit(id, ud->rflow_gp_map_allocated))
+ return ERR_PTR(-EINVAL);
+ }
+
+ dev_dbg(ud->dev, "get rflow%d\n", id);
+ set_bit(id, ud->rflow_in_use);
+ return &ud->rflows[id];
+}
+
+static void __udma_put_rflow(struct udma_dev *ud, struct udma_rflow *rflow)
+{
+ if (!test_bit(rflow->id, ud->rflow_in_use)) {
+ dev_err(ud->dev, "attempt to put unused rflow%d\n", rflow->id);
+ return;
+ }
+
+ dev_dbg(ud->dev, "put rflow%d\n", rflow->id);
+ clear_bit(rflow->id, ud->rflow_in_use);
+}
+
+#define UDMA_RESERVE_RESOURCE(res) \
+static struct udma_##res *__udma_reserve_##res(struct udma_dev *ud, \
+ enum udma_tp_level tpl, \
+ int id) \
+{ \
+ if (id >= 0) { \
+ if (test_bit(id, ud->res##_map)) { \
+ dev_err(ud->dev, "res##%d is in use\n", id); \
+ return ERR_PTR(-ENOENT); \
+ } \
+ } else { \
+ int start; \
+ \
+ if (tpl >= ud->res##_tpl.levels) \
+ tpl = ud->res##_tpl.levels - 1; \
+ \
+ start = ud->res##_tpl.start_idx[tpl]; \
+ \
+ id = find_next_zero_bit(ud->res##_map, ud->res##_cnt, \
+ start); \
+ if (id == ud->res##_cnt) { \
+ return ERR_PTR(-ENOENT); \
+ } \
+ } \
+ \
+ set_bit(id, ud->res##_map); \
+ return &ud->res##s[id]; \
+}
+
+UDMA_RESERVE_RESOURCE(bchan);
+UDMA_RESERVE_RESOURCE(tchan);
+UDMA_RESERVE_RESOURCE(rchan);
+
+static int bcdma_get_bchan(struct udma_chan *uc)
+{
+ struct udma_dev *ud = uc->ud;
+ enum udma_tp_level tpl;
+ int ret;
+
+ if (uc->bchan) {
+ dev_dbg(ud->dev, "chan%d: already have bchan%d allocated\n",
+ uc->id, uc->bchan->id);
+ return 0;
+ }
+
+ /*
+ * Use normal channels for peripherals, and highest TPL channel for
+ * mem2mem
+ */
+ if (uc->config.tr_trigger_type)
+ tpl = 0;
+ else
+ tpl = ud->bchan_tpl.levels - 1;
+
+ uc->bchan = __udma_reserve_bchan(ud, tpl, -1);
+ if (IS_ERR(uc->bchan)) {
+ ret = PTR_ERR(uc->bchan);
+ uc->bchan = NULL;
+ return ret;
+ }
+
+ uc->tchan = uc->bchan;
+
+ return 0;
+}
+
+static int udma_get_tchan(struct udma_chan *uc)
+{
+ struct udma_dev *ud = uc->ud;
+ int ret;
+
+ if (uc->tchan) {
+ dev_dbg(ud->dev, "chan%d: already have tchan%d allocated\n",
+ uc->id, uc->tchan->id);
+ return 0;
+ }
+
+ /*
+ * mapped_channel_id is -1 for UDMA, BCDMA and PKTDMA unmapped channels.
+ * For PKTDMA mapped channels it is configured to a channel which must
+ * be used to service the peripheral.
+ */
+ uc->tchan = __udma_reserve_tchan(ud, uc->config.channel_tpl,
+ uc->config.mapped_channel_id);
+ if (IS_ERR(uc->tchan)) {
+ ret = PTR_ERR(uc->tchan);
+ uc->tchan = NULL;
+ return ret;
+ }
+
+ if (ud->tflow_cnt) {
+ int tflow_id;
+
+ /* Only PKTDMA have support for tx flows */
+ if (uc->config.default_flow_id >= 0)
+ tflow_id = uc->config.default_flow_id;
+ else
+ tflow_id = uc->tchan->id;
+
+ if (test_bit(tflow_id, ud->tflow_map)) {
+ dev_err(ud->dev, "tflow%d is in use\n", tflow_id);
+ clear_bit(uc->tchan->id, ud->tchan_map);
+ uc->tchan = NULL;
+ return -ENOENT;
+ }
+
+ uc->tchan->tflow_id = tflow_id;
+ set_bit(tflow_id, ud->tflow_map);
+ } else {
+ uc->tchan->tflow_id = -1;
+ }
+
+ return 0;
+}
+
+static int udma_get_rchan(struct udma_chan *uc)
+{
+ struct udma_dev *ud = uc->ud;
+ int ret;
+
+ if (uc->rchan) {
+ dev_dbg(ud->dev, "chan%d: already have rchan%d allocated\n",
+ uc->id, uc->rchan->id);
+ return 0;
+ }
+
+ /*
+ * mapped_channel_id is -1 for UDMA, BCDMA and PKTDMA unmapped channels.
+ * For PKTDMA mapped channels it is configured to a channel which must
+ * be used to service the peripheral.
+ */
+ uc->rchan = __udma_reserve_rchan(ud, uc->config.channel_tpl,
+ uc->config.mapped_channel_id);
+ if (IS_ERR(uc->rchan)) {
+ ret = PTR_ERR(uc->rchan);
+ uc->rchan = NULL;
+ return ret;
+ }
+
+ return 0;
+}
+
+static int udma_get_chan_pair(struct udma_chan *uc)
+{
+ struct udma_dev *ud = uc->ud;
+ int chan_id, end;
+
+ if ((uc->tchan && uc->rchan) && uc->tchan->id == uc->rchan->id) {
+ dev_info(ud->dev, "chan%d: already have %d pair allocated\n",
+ uc->id, uc->tchan->id);
+ return 0;
+ }
+
+ if (uc->tchan) {
+ dev_err(ud->dev, "chan%d: already have tchan%d allocated\n",
+ uc->id, uc->tchan->id);
+ return -EBUSY;
+ } else if (uc->rchan) {
+ dev_err(ud->dev, "chan%d: already have rchan%d allocated\n",
+ uc->id, uc->rchan->id);
+ return -EBUSY;
+ }
+
+ /* Can be optimized, but let's have it like this for now */
+ end = min(ud->tchan_cnt, ud->rchan_cnt);
+ /*
+ * Try to use the highest TPL channel pair for MEM_TO_MEM channels
+ * Note: in UDMAP the channel TPL is symmetric between tchan and rchan
+ */
+ chan_id = ud->tchan_tpl.start_idx[ud->tchan_tpl.levels - 1];
+ for (; chan_id < end; chan_id++) {
+ if (!test_bit(chan_id, ud->tchan_map) &&
+ !test_bit(chan_id, ud->rchan_map))
+ break;
+ }
+
+ if (chan_id == end)
+ return -ENOENT;
+
+ set_bit(chan_id, ud->tchan_map);
+ set_bit(chan_id, ud->rchan_map);
+ uc->tchan = &ud->tchans[chan_id];
+ uc->rchan = &ud->rchans[chan_id];
+
+ /* UDMA does not use tx flows */
+ uc->tchan->tflow_id = -1;
+
+ return 0;
+}
+
+static int udma_get_rflow(struct udma_chan *uc, int flow_id)
+{
+ struct udma_dev *ud = uc->ud;
+ int ret;
+
+ if (!uc->rchan) {
+ dev_err(ud->dev, "chan%d: does not have rchan??\n", uc->id);
+ return -EINVAL;
+ }
+
+ if (uc->rflow) {
+ dev_dbg(ud->dev, "chan%d: already have rflow%d allocated\n",
+ uc->id, uc->rflow->id);
+ return 0;
+ }
+
+ uc->rflow = __udma_get_rflow(ud, flow_id);
+ if (IS_ERR(uc->rflow)) {
+ ret = PTR_ERR(uc->rflow);
+ uc->rflow = NULL;
+ return ret;
+ }
+
+ return 0;
+}
+
+static void bcdma_put_bchan(struct udma_chan *uc)
+{
+ struct udma_dev *ud = uc->ud;
+
+ if (uc->bchan) {
+ dev_dbg(ud->dev, "chan%d: put bchan%d\n", uc->id,
+ uc->bchan->id);
+ clear_bit(uc->bchan->id, ud->bchan_map);
+ uc->bchan = NULL;
+ uc->tchan = NULL;
+ }
+}
+
+static void udma_put_rchan(struct udma_chan *uc)
+{
+ struct udma_dev *ud = uc->ud;
+
+ if (uc->rchan) {
+ dev_dbg(ud->dev, "chan%d: put rchan%d\n", uc->id,
+ uc->rchan->id);
+ clear_bit(uc->rchan->id, ud->rchan_map);
+ uc->rchan = NULL;
+ }
+}
+
+static void udma_put_tchan(struct udma_chan *uc)
+{
+ struct udma_dev *ud = uc->ud;
+
+ if (uc->tchan) {
+ dev_dbg(ud->dev, "chan%d: put tchan%d\n", uc->id,
+ uc->tchan->id);
+ clear_bit(uc->tchan->id, ud->tchan_map);
+
+ if (uc->tchan->tflow_id >= 0)
+ clear_bit(uc->tchan->tflow_id, ud->tflow_map);
+
+ uc->tchan = NULL;
+ }
+}
+
+static void udma_put_rflow(struct udma_chan *uc)
+{
+ struct udma_dev *ud = uc->ud;
+
+ if (uc->rflow) {
+ dev_dbg(ud->dev, "chan%d: put rflow%d\n", uc->id,
+ uc->rflow->id);
+ __udma_put_rflow(ud, uc->rflow);
+ uc->rflow = NULL;
+ }
+}
+
+static void bcdma_free_bchan_resources(struct udma_chan *uc)
+{
+ if (!uc->bchan)
+ return;
+
+ k3_ringacc_ring_free(uc->bchan->tc_ring);
+ k3_ringacc_ring_free(uc->bchan->t_ring);
+ uc->bchan->tc_ring = NULL;
+ uc->bchan->t_ring = NULL;
+ k3_configure_chan_coherency(&uc->vc.chan, 0);
+
+ bcdma_put_bchan(uc);
+}
+
+static int bcdma_alloc_bchan_resources(struct udma_chan *uc)
+{
+ struct k3_ring_cfg ring_cfg;
+ struct udma_dev *ud = uc->ud;
+ int ret;
+
+ ret = bcdma_get_bchan(uc);
+ if (ret)
+ return ret;
+
+ ret = k3_ringacc_request_rings_pair(ud->ringacc, uc->bchan->id, -1,
+ &uc->bchan->t_ring,
+ &uc->bchan->tc_ring);
+ if (ret) {
+ ret = -EBUSY;
+ goto err_ring;
+ }
+
+ memset(&ring_cfg, 0, sizeof(ring_cfg));
+ ring_cfg.size = K3_UDMA_DEFAULT_RING_SIZE;
+ ring_cfg.elm_size = K3_RINGACC_RING_ELSIZE_8;
+ ring_cfg.mode = K3_RINGACC_RING_MODE_RING;
+
+ k3_configure_chan_coherency(&uc->vc.chan, ud->asel);
+ ring_cfg.asel = ud->asel;
+ ring_cfg.dma_dev = dmaengine_get_dma_device(&uc->vc.chan);
+
+ ret = k3_ringacc_ring_cfg(uc->bchan->t_ring, &ring_cfg);
+ if (ret)
+ goto err_ringcfg;
+
+ return 0;
+
+err_ringcfg:
+ k3_ringacc_ring_free(uc->bchan->tc_ring);
+ uc->bchan->tc_ring = NULL;
+ k3_ringacc_ring_free(uc->bchan->t_ring);
+ uc->bchan->t_ring = NULL;
+ k3_configure_chan_coherency(&uc->vc.chan, 0);
+err_ring:
+ bcdma_put_bchan(uc);
+
+ return ret;
+}
+
+static void udma_free_tx_resources(struct udma_chan *uc)
+{
+ if (!uc->tchan)
+ return;
+
+ k3_ringacc_ring_free(uc->tchan->t_ring);
+ k3_ringacc_ring_free(uc->tchan->tc_ring);
+ uc->tchan->t_ring = NULL;
+ uc->tchan->tc_ring = NULL;
+
+ udma_put_tchan(uc);
+}
+
+static int udma_alloc_tx_resources(struct udma_chan *uc)
+{
+ struct k3_ring_cfg ring_cfg;
+ struct udma_dev *ud = uc->ud;
+ struct udma_tchan *tchan;
+ int ring_idx, ret;
+
+ ret = udma_get_tchan(uc);
+ if (ret)
+ return ret;
+
+ tchan = uc->tchan;
+ if (tchan->tflow_id >= 0)
+ ring_idx = tchan->tflow_id;
+ else
+ ring_idx = ud->bchan_cnt + tchan->id;
+
+ ret = k3_ringacc_request_rings_pair(ud->ringacc, ring_idx, -1,
+ &tchan->t_ring,
+ &tchan->tc_ring);
+ if (ret) {
+ ret = -EBUSY;
+ goto err_ring;
+ }
+
+ memset(&ring_cfg, 0, sizeof(ring_cfg));
+ ring_cfg.size = K3_UDMA_DEFAULT_RING_SIZE;
+ ring_cfg.elm_size = K3_RINGACC_RING_ELSIZE_8;
+ if (ud->match_data->type == DMA_TYPE_UDMA) {
+ ring_cfg.mode = K3_RINGACC_RING_MODE_MESSAGE;
+ } else {
+ ring_cfg.mode = K3_RINGACC_RING_MODE_RING;
+
+ k3_configure_chan_coherency(&uc->vc.chan, uc->config.asel);
+ ring_cfg.asel = uc->config.asel;
+ ring_cfg.dma_dev = dmaengine_get_dma_device(&uc->vc.chan);
+ }
+
+ ret = k3_ringacc_ring_cfg(tchan->t_ring, &ring_cfg);
+ ret |= k3_ringacc_ring_cfg(tchan->tc_ring, &ring_cfg);
+
+ if (ret)
+ goto err_ringcfg;
+
+ return 0;
+
+err_ringcfg:
+ k3_ringacc_ring_free(uc->tchan->tc_ring);
+ uc->tchan->tc_ring = NULL;
+ k3_ringacc_ring_free(uc->tchan->t_ring);
+ uc->tchan->t_ring = NULL;
+err_ring:
+ udma_put_tchan(uc);
+
+ return ret;
+}
+
+static void udma_free_rx_resources(struct udma_chan *uc)
+{
+ if (!uc->rchan)
+ return;
+
+ if (uc->rflow) {
+ struct udma_rflow *rflow = uc->rflow;
+
+ k3_ringacc_ring_free(rflow->fd_ring);
+ k3_ringacc_ring_free(rflow->r_ring);
+ rflow->fd_ring = NULL;
+ rflow->r_ring = NULL;
+
+ udma_put_rflow(uc);
+ }
+
+ udma_put_rchan(uc);
+}
+
+static int udma_alloc_rx_resources(struct udma_chan *uc)
+{
+ struct udma_dev *ud = uc->ud;
+ struct k3_ring_cfg ring_cfg;
+ struct udma_rflow *rflow;
+ int fd_ring_id;
+ int ret;
+
+ ret = udma_get_rchan(uc);
+ if (ret)
+ return ret;
+
+ /* For MEM_TO_MEM we don't need rflow or rings */
+ if (uc->config.dir == DMA_MEM_TO_MEM)
+ return 0;
+
+ if (uc->config.default_flow_id >= 0)
+ ret = udma_get_rflow(uc, uc->config.default_flow_id);
+ else
+ ret = udma_get_rflow(uc, uc->rchan->id);
+
+ if (ret) {
+ ret = -EBUSY;
+ goto err_rflow;
+ }
+
+ rflow = uc->rflow;
+ if (ud->tflow_cnt)
+ fd_ring_id = ud->tflow_cnt + rflow->id;
+ else
+ fd_ring_id = ud->bchan_cnt + ud->tchan_cnt + ud->echan_cnt +
+ uc->rchan->id;
+
+ ret = k3_ringacc_request_rings_pair(ud->ringacc, fd_ring_id, -1,
+ &rflow->fd_ring, &rflow->r_ring);
+ if (ret) {
+ ret = -EBUSY;
+ goto err_ring;
+ }
+
+ memset(&ring_cfg, 0, sizeof(ring_cfg));
+
+ ring_cfg.elm_size = K3_RINGACC_RING_ELSIZE_8;
+ if (ud->match_data->type == DMA_TYPE_UDMA) {
+ if (uc->config.pkt_mode)
+ ring_cfg.size = SG_MAX_SEGMENTS;
+ else
+ ring_cfg.size = K3_UDMA_DEFAULT_RING_SIZE;
+
+ ring_cfg.mode = K3_RINGACC_RING_MODE_MESSAGE;
+ } else {
+ ring_cfg.size = K3_UDMA_DEFAULT_RING_SIZE;
+ ring_cfg.mode = K3_RINGACC_RING_MODE_RING;
+
+ k3_configure_chan_coherency(&uc->vc.chan, uc->config.asel);
+ ring_cfg.asel = uc->config.asel;
+ ring_cfg.dma_dev = dmaengine_get_dma_device(&uc->vc.chan);
+ }
+
+ ret = k3_ringacc_ring_cfg(rflow->fd_ring, &ring_cfg);
+
+ ring_cfg.size = K3_UDMA_DEFAULT_RING_SIZE;
+ ret |= k3_ringacc_ring_cfg(rflow->r_ring, &ring_cfg);
+
+ if (ret)
+ goto err_ringcfg;
+
+ return 0;
+
+err_ringcfg:
+ k3_ringacc_ring_free(rflow->r_ring);
+ rflow->r_ring = NULL;
+ k3_ringacc_ring_free(rflow->fd_ring);
+ rflow->fd_ring = NULL;
+err_ring:
+ udma_put_rflow(uc);
+err_rflow:
+ udma_put_rchan(uc);
+
+ return ret;
+}
+
+#define TISCI_BCDMA_BCHAN_VALID_PARAMS ( \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_EXTENDED_CH_TYPE_VALID)
+
+#define TISCI_BCDMA_TCHAN_VALID_PARAMS ( \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_SUPR_TDPKT_VALID)
+
+#define TISCI_BCDMA_RCHAN_VALID_PARAMS ( \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID)
+
+#define TISCI_UDMA_TCHAN_VALID_PARAMS ( \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_EINFO_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_FILT_PSWORDS_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_SUPR_TDPKT_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_ATYPE_VALID)
+
+#define TISCI_UDMA_RCHAN_VALID_PARAMS ( \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_PAUSE_ON_ERR_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_FETCH_SIZE_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_CQ_QNUM_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_CHAN_TYPE_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_IGNORE_SHORT_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_IGNORE_LONG_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_START_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_RX_FLOWID_CNT_VALID | \
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_ATYPE_VALID)
+
+static int udma_tisci_m2m_channel_config(struct udma_chan *uc)
+{
+ struct udma_dev *ud = uc->ud;
+ struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+ const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
+ struct udma_tchan *tchan = uc->tchan;
+ struct udma_rchan *rchan = uc->rchan;
+ u8 burst_size = 0;
+ int ret;
+ u8 tpl;
+
+ /* Non synchronized - mem to mem type of transfer */
+ int tc_ring = k3_ringacc_get_ring_id(tchan->tc_ring);
+ struct ti_sci_msg_rm_udmap_tx_ch_cfg req_tx = { 0 };
+ struct ti_sci_msg_rm_udmap_rx_ch_cfg req_rx = { 0 };
+
+ if (ud->match_data->flags & UDMA_FLAG_BURST_SIZE) {
+ tpl = udma_get_chan_tpl_index(&ud->tchan_tpl, tchan->id);
+
+ burst_size = ud->match_data->burst_size[tpl];
+ }
+
+ req_tx.valid_params = TISCI_UDMA_TCHAN_VALID_PARAMS;
+ req_tx.nav_id = tisci_rm->tisci_dev_id;
+ req_tx.index = tchan->id;
+ req_tx.tx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_BCOPY_PBRR;
+ req_tx.tx_fetch_size = sizeof(struct cppi5_desc_hdr_t) >> 2;
+ req_tx.txcq_qnum = tc_ring;
+ req_tx.tx_atype = ud->atype;
+ if (burst_size) {
+ req_tx.valid_params |= TI_SCI_MSG_VALUE_RM_UDMAP_CH_BURST_SIZE_VALID;
+ req_tx.tx_burst_size = burst_size;
+ }
+
+ ret = tisci_ops->tx_ch_cfg(tisci_rm->tisci, &req_tx);
+ if (ret) {
+ dev_err(ud->dev, "tchan%d cfg failed %d\n", tchan->id, ret);
+ return ret;
+ }
+
+ req_rx.valid_params = TISCI_UDMA_RCHAN_VALID_PARAMS;
+ req_rx.nav_id = tisci_rm->tisci_dev_id;
+ req_rx.index = rchan->id;
+ req_rx.rx_fetch_size = sizeof(struct cppi5_desc_hdr_t) >> 2;
+ req_rx.rxcq_qnum = tc_ring;
+ req_rx.rx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_BCOPY_PBRR;
+ req_rx.rx_atype = ud->atype;
+ if (burst_size) {
+ req_rx.valid_params |= TI_SCI_MSG_VALUE_RM_UDMAP_CH_BURST_SIZE_VALID;
+ req_rx.rx_burst_size = burst_size;
+ }
+
+ ret = tisci_ops->rx_ch_cfg(tisci_rm->tisci, &req_rx);
+ if (ret)
+ dev_err(ud->dev, "rchan%d alloc failed %d\n", rchan->id, ret);
+
+ return ret;
+}
+
+static int bcdma_tisci_m2m_channel_config(struct udma_chan *uc)
+{
+ struct udma_dev *ud = uc->ud;
+ struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+ const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
+ struct ti_sci_msg_rm_udmap_tx_ch_cfg req_tx = { 0 };
+ struct udma_bchan *bchan = uc->bchan;
+ u8 burst_size = 0;
+ int ret;
+ u8 tpl;
+
+ if (ud->match_data->flags & UDMA_FLAG_BURST_SIZE) {
+ tpl = udma_get_chan_tpl_index(&ud->bchan_tpl, bchan->id);
+
+ burst_size = ud->match_data->burst_size[tpl];
+ }
+
+ req_tx.valid_params = TISCI_BCDMA_BCHAN_VALID_PARAMS;
+ req_tx.nav_id = tisci_rm->tisci_dev_id;
+ req_tx.extended_ch_type = TI_SCI_RM_BCDMA_EXTENDED_CH_TYPE_BCHAN;
+ req_tx.index = bchan->id;
+ if (burst_size) {
+ req_tx.valid_params |= TI_SCI_MSG_VALUE_RM_UDMAP_CH_BURST_SIZE_VALID;
+ req_tx.tx_burst_size = burst_size;
+ }
+
+ ret = tisci_ops->tx_ch_cfg(tisci_rm->tisci, &req_tx);
+ if (ret)
+ dev_err(ud->dev, "bchan%d cfg failed %d\n", bchan->id, ret);
+
+ return ret;
+}
+
+static int udma_tisci_tx_channel_config(struct udma_chan *uc)
+{
+ struct udma_dev *ud = uc->ud;
+ struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+ const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
+ struct udma_tchan *tchan = uc->tchan;
+ int tc_ring = k3_ringacc_get_ring_id(tchan->tc_ring);
+ struct ti_sci_msg_rm_udmap_tx_ch_cfg req_tx = { 0 };
+ u32 mode, fetch_size;
+ int ret;
+
+ if (uc->config.pkt_mode) {
+ mode = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR;
+ fetch_size = cppi5_hdesc_calc_size(uc->config.needs_epib,
+ uc->config.psd_size, 0);
+ } else {
+ mode = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_PBRR;
+ fetch_size = sizeof(struct cppi5_desc_hdr_t);
+ }
+
+ req_tx.valid_params = TISCI_UDMA_TCHAN_VALID_PARAMS;
+ req_tx.nav_id = tisci_rm->tisci_dev_id;
+ req_tx.index = tchan->id;
+ req_tx.tx_chan_type = mode;
+ req_tx.tx_supr_tdpkt = uc->config.notdpkt;
+ req_tx.tx_fetch_size = fetch_size >> 2;
+ req_tx.txcq_qnum = tc_ring;
+ req_tx.tx_atype = uc->config.atype;
+ if (uc->config.ep_type == PSIL_EP_PDMA_XY &&
+ ud->match_data->flags & UDMA_FLAG_TDTYPE) {
+ /* wait for peer to complete the teardown for PDMAs */
+ req_tx.valid_params |=
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_TDTYPE_VALID;
+ req_tx.tx_tdtype = 1;
+ }
+
+ ret = tisci_ops->tx_ch_cfg(tisci_rm->tisci, &req_tx);
+ if (ret)
+ dev_err(ud->dev, "tchan%d cfg failed %d\n", tchan->id, ret);
+
+ return ret;
+}
+
+static int bcdma_tisci_tx_channel_config(struct udma_chan *uc)
+{
+ struct udma_dev *ud = uc->ud;
+ struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+ const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
+ struct udma_tchan *tchan = uc->tchan;
+ struct ti_sci_msg_rm_udmap_tx_ch_cfg req_tx = { 0 };
+ int ret;
+
+ req_tx.valid_params = TISCI_BCDMA_TCHAN_VALID_PARAMS;
+ req_tx.nav_id = tisci_rm->tisci_dev_id;
+ req_tx.index = tchan->id;
+ req_tx.tx_supr_tdpkt = uc->config.notdpkt;
+ if (ud->match_data->flags & UDMA_FLAG_TDTYPE) {
+ /* wait for peer to complete the teardown for PDMAs */
+ req_tx.valid_params |=
+ TI_SCI_MSG_VALUE_RM_UDMAP_CH_TX_TDTYPE_VALID;
+ req_tx.tx_tdtype = 1;
+ }
+
+ ret = tisci_ops->tx_ch_cfg(tisci_rm->tisci, &req_tx);
+ if (ret)
+ dev_err(ud->dev, "tchan%d cfg failed %d\n", tchan->id, ret);
+
+ return ret;
+}
+
+#define pktdma_tisci_tx_channel_config bcdma_tisci_tx_channel_config
+
+static int udma_tisci_rx_channel_config(struct udma_chan *uc)
+{
+ struct udma_dev *ud = uc->ud;
+ struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+ const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
+ struct udma_rchan *rchan = uc->rchan;
+ int fd_ring = k3_ringacc_get_ring_id(uc->rflow->fd_ring);
+ int rx_ring = k3_ringacc_get_ring_id(uc->rflow->r_ring);
+ struct ti_sci_msg_rm_udmap_rx_ch_cfg req_rx = { 0 };
+ struct ti_sci_msg_rm_udmap_flow_cfg flow_req = { 0 };
+ u32 mode, fetch_size;
+ int ret;
+
+ if (uc->config.pkt_mode) {
+ mode = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR;
+ fetch_size = cppi5_hdesc_calc_size(uc->config.needs_epib,
+ uc->config.psd_size, 0);
+ } else {
+ mode = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_PBRR;
+ fetch_size = sizeof(struct cppi5_desc_hdr_t);
+ }
+
+ req_rx.valid_params = TISCI_UDMA_RCHAN_VALID_PARAMS;
+ req_rx.nav_id = tisci_rm->tisci_dev_id;
+ req_rx.index = rchan->id;
+ req_rx.rx_fetch_size = fetch_size >> 2;
+ req_rx.rxcq_qnum = rx_ring;
+ req_rx.rx_chan_type = mode;
+ req_rx.rx_atype = uc->config.atype;
+
+ ret = tisci_ops->rx_ch_cfg(tisci_rm->tisci, &req_rx);
+ if (ret) {
+ dev_err(ud->dev, "rchan%d cfg failed %d\n", rchan->id, ret);
+ return ret;
+ }
+
+ flow_req.valid_params =
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_EINFO_PRESENT_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_PSINFO_PRESENT_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_ERROR_HANDLING_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DESC_TYPE_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_HI_SEL_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_SRC_TAG_LO_SEL_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_HI_SEL_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_DEST_TAG_LO_SEL_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ0_SZ0_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ1_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ2_QNUM_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_FDQ3_QNUM_VALID;
+
+ flow_req.nav_id = tisci_rm->tisci_dev_id;
+ flow_req.flow_index = rchan->id;
+
+ if (uc->config.needs_epib)
+ flow_req.rx_einfo_present = 1;
+ else
+ flow_req.rx_einfo_present = 0;
+ if (uc->config.psd_size)
+ flow_req.rx_psinfo_present = 1;
+ else
+ flow_req.rx_psinfo_present = 0;
+ flow_req.rx_error_handling = 1;
+ flow_req.rx_dest_qnum = rx_ring;
+ flow_req.rx_src_tag_hi_sel = UDMA_RFLOW_SRCTAG_NONE;
+ flow_req.rx_src_tag_lo_sel = UDMA_RFLOW_SRCTAG_SRC_TAG;
+ flow_req.rx_dest_tag_hi_sel = UDMA_RFLOW_DSTTAG_DST_TAG_HI;
+ flow_req.rx_dest_tag_lo_sel = UDMA_RFLOW_DSTTAG_DST_TAG_LO;
+ flow_req.rx_fdq0_sz0_qnum = fd_ring;
+ flow_req.rx_fdq1_qnum = fd_ring;
+ flow_req.rx_fdq2_qnum = fd_ring;
+ flow_req.rx_fdq3_qnum = fd_ring;
+
+ ret = tisci_ops->rx_flow_cfg(tisci_rm->tisci, &flow_req);
+
+ if (ret)
+ dev_err(ud->dev, "flow%d config failed: %d\n", rchan->id, ret);
+
+ return 0;
+}
+
+static int bcdma_tisci_rx_channel_config(struct udma_chan *uc)
+{
+ struct udma_dev *ud = uc->ud;
+ struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+ const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
+ struct udma_rchan *rchan = uc->rchan;
+ struct ti_sci_msg_rm_udmap_rx_ch_cfg req_rx = { 0 };
+ int ret;
+
+ req_rx.valid_params = TISCI_BCDMA_RCHAN_VALID_PARAMS;
+ req_rx.nav_id = tisci_rm->tisci_dev_id;
+ req_rx.index = rchan->id;
+
+ ret = tisci_ops->rx_ch_cfg(tisci_rm->tisci, &req_rx);
+ if (ret)
+ dev_err(ud->dev, "rchan%d cfg failed %d\n", rchan->id, ret);
+
+ return ret;
+}
+
+static int pktdma_tisci_rx_channel_config(struct udma_chan *uc)
+{
+ struct udma_dev *ud = uc->ud;
+ struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+ const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
+ struct ti_sci_msg_rm_udmap_rx_ch_cfg req_rx = { 0 };
+ struct ti_sci_msg_rm_udmap_flow_cfg flow_req = { 0 };
+ int ret;
+
+ req_rx.valid_params = TISCI_BCDMA_RCHAN_VALID_PARAMS;
+ req_rx.nav_id = tisci_rm->tisci_dev_id;
+ req_rx.index = uc->rchan->id;
+
+ ret = tisci_ops->rx_ch_cfg(tisci_rm->tisci, &req_rx);
+ if (ret) {
+ dev_err(ud->dev, "rchan%d cfg failed %d\n", uc->rchan->id, ret);
+ return ret;
+ }
+
+ flow_req.valid_params =
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_EINFO_PRESENT_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_PSINFO_PRESENT_VALID |
+ TI_SCI_MSG_VALUE_RM_UDMAP_FLOW_ERROR_HANDLING_VALID;
+
+ flow_req.nav_id = tisci_rm->tisci_dev_id;
+ flow_req.flow_index = uc->rflow->id;
+
+ if (uc->config.needs_epib)
+ flow_req.rx_einfo_present = 1;
+ else
+ flow_req.rx_einfo_present = 0;
+ if (uc->config.psd_size)
+ flow_req.rx_psinfo_present = 1;
+ else
+ flow_req.rx_psinfo_present = 0;
+ flow_req.rx_error_handling = 1;
+
+ ret = tisci_ops->rx_flow_cfg(tisci_rm->tisci, &flow_req);
+
+ if (ret)
+ dev_err(ud->dev, "flow%d config failed: %d\n", uc->rflow->id,
+ ret);
+
+ return ret;
+}
+
+static int udma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct udma_chan *uc = to_udma_chan(chan);
+ struct udma_dev *ud = to_udma_dev(chan->device);
+ const struct udma_soc_data *soc_data = ud->soc_data;
+ struct k3_ring *irq_ring;
+ u32 irq_udma_idx;
+ int ret;
+
+ uc->dma_dev = ud->dev;
+
+ if (uc->config.pkt_mode || uc->config.dir == DMA_MEM_TO_MEM) {
+ uc->use_dma_pool = true;
+ /* in case of MEM_TO_MEM we have maximum of two TRs */
+ if (uc->config.dir == DMA_MEM_TO_MEM) {
+ uc->config.hdesc_size = cppi5_trdesc_calc_size(
+ sizeof(struct cppi5_tr_type15_t), 2);
+ uc->config.pkt_mode = false;
+ }
+ }
+
+ if (uc->use_dma_pool) {
+ uc->hdesc_pool = dma_pool_create(uc->name, ud->ddev.dev,
+ uc->config.hdesc_size,
+ ud->desc_align,
+ 0);
+ if (!uc->hdesc_pool) {
+ dev_err(ud->ddev.dev,
+ "Descriptor pool allocation failed\n");
+ uc->use_dma_pool = false;
+ ret = -ENOMEM;
+ goto err_cleanup;
+ }
+ }
+
+ /*
+ * Make sure that the completion is in a known state:
+ * No teardown, the channel is idle
+ */
+ reinit_completion(&uc->teardown_completed);
+ complete_all(&uc->teardown_completed);
+ uc->state = UDMA_CHAN_IS_IDLE;
+
+ switch (uc->config.dir) {
+ case DMA_MEM_TO_MEM:
+ /* Non synchronized - mem to mem type of transfer */
+ dev_dbg(uc->ud->dev, "%s: chan%d as MEM-to-MEM\n", __func__,
+ uc->id);
+
+ ret = udma_get_chan_pair(uc);
+ if (ret)
+ goto err_cleanup;
+
+ ret = udma_alloc_tx_resources(uc);
+ if (ret) {
+ udma_put_rchan(uc);
+ goto err_cleanup;
+ }
+
+ ret = udma_alloc_rx_resources(uc);
+ if (ret) {
+ udma_free_tx_resources(uc);
+ goto err_cleanup;
+ }
+
+ uc->config.src_thread = ud->psil_base + uc->tchan->id;
+ uc->config.dst_thread = (ud->psil_base + uc->rchan->id) |
+ K3_PSIL_DST_THREAD_ID_OFFSET;
+
+ irq_ring = uc->tchan->tc_ring;
+ irq_udma_idx = uc->tchan->id;
+
+ ret = udma_tisci_m2m_channel_config(uc);
+ break;
+ case DMA_MEM_TO_DEV:
+ /* Slave transfer synchronized - mem to dev (TX) trasnfer */
+ dev_dbg(uc->ud->dev, "%s: chan%d as MEM-to-DEV\n", __func__,
+ uc->id);
+
+ ret = udma_alloc_tx_resources(uc);
+ if (ret)
+ goto err_cleanup;
+
+ uc->config.src_thread = ud->psil_base + uc->tchan->id;
+ uc->config.dst_thread = uc->config.remote_thread_id;
+ uc->config.dst_thread |= K3_PSIL_DST_THREAD_ID_OFFSET;
+
+ irq_ring = uc->tchan->tc_ring;
+ irq_udma_idx = uc->tchan->id;
+
+ ret = udma_tisci_tx_channel_config(uc);
+ break;
+ case DMA_DEV_TO_MEM:
+ /* Slave transfer synchronized - dev to mem (RX) trasnfer */
+ dev_dbg(uc->ud->dev, "%s: chan%d as DEV-to-MEM\n", __func__,
+ uc->id);
+
+ ret = udma_alloc_rx_resources(uc);
+ if (ret)
+ goto err_cleanup;
+
+ uc->config.src_thread = uc->config.remote_thread_id;
+ uc->config.dst_thread = (ud->psil_base + uc->rchan->id) |
+ K3_PSIL_DST_THREAD_ID_OFFSET;
+
+ irq_ring = uc->rflow->r_ring;
+ irq_udma_idx = soc_data->oes.udma_rchan + uc->rchan->id;
+
+ ret = udma_tisci_rx_channel_config(uc);
+ break;
+ default:
+ /* Can not happen */
+ dev_err(uc->ud->dev, "%s: chan%d invalid direction (%u)\n",
+ __func__, uc->id, uc->config.dir);
+ ret = -EINVAL;
+ goto err_cleanup;
+
+ }
+
+ /* check if the channel configuration was successful */
+ if (ret)
+ goto err_res_free;
+
+ if (udma_is_chan_running(uc)) {
+ dev_warn(ud->dev, "chan%d: is running!\n", uc->id);
+ udma_reset_chan(uc, false);
+ if (udma_is_chan_running(uc)) {
+ dev_err(ud->dev, "chan%d: won't stop!\n", uc->id);
+ ret = -EBUSY;
+ goto err_res_free;
+ }
+ }
+
+ /* PSI-L pairing */
+ ret = navss_psil_pair(ud, uc->config.src_thread, uc->config.dst_thread);
+ if (ret) {
+ dev_err(ud->dev, "PSI-L pairing failed: 0x%04x -> 0x%04x\n",
+ uc->config.src_thread, uc->config.dst_thread);
+ goto err_res_free;
+ }
+
+ uc->psil_paired = true;
+
+ uc->irq_num_ring = k3_ringacc_get_ring_irq_num(irq_ring);
+ if (uc->irq_num_ring <= 0) {
+ dev_err(ud->dev, "Failed to get ring irq (index: %u)\n",
+ k3_ringacc_get_ring_id(irq_ring));
+ ret = -EINVAL;
+ goto err_psi_free;
+ }
+
+ ret = request_irq(uc->irq_num_ring, udma_ring_irq_handler,
+ IRQF_TRIGGER_HIGH, uc->name, uc);
+ if (ret) {
+ dev_err(ud->dev, "chan%d: ring irq request failed\n", uc->id);
+ goto err_irq_free;
+ }
+
+ /* Event from UDMA (TR events) only needed for slave TR mode channels */
+ if (is_slave_direction(uc->config.dir) && !uc->config.pkt_mode) {
+ uc->irq_num_udma = msi_get_virq(ud->dev, irq_udma_idx);
+ if (uc->irq_num_udma <= 0) {
+ dev_err(ud->dev, "Failed to get udma irq (index: %u)\n",
+ irq_udma_idx);
+ free_irq(uc->irq_num_ring, uc);
+ ret = -EINVAL;
+ goto err_irq_free;
+ }
+
+ ret = request_irq(uc->irq_num_udma, udma_udma_irq_handler, 0,
+ uc->name, uc);
+ if (ret) {
+ dev_err(ud->dev, "chan%d: UDMA irq request failed\n",
+ uc->id);
+ free_irq(uc->irq_num_ring, uc);
+ goto err_irq_free;
+ }
+ } else {
+ uc->irq_num_udma = 0;
+ }
+
+ udma_reset_rings(uc);
+
+ return 0;
+
+err_irq_free:
+ uc->irq_num_ring = 0;
+ uc->irq_num_udma = 0;
+err_psi_free:
+ navss_psil_unpair(ud, uc->config.src_thread, uc->config.dst_thread);
+ uc->psil_paired = false;
+err_res_free:
+ udma_free_tx_resources(uc);
+ udma_free_rx_resources(uc);
+err_cleanup:
+ udma_reset_uchan(uc);
+
+ if (uc->use_dma_pool) {
+ dma_pool_destroy(uc->hdesc_pool);
+ uc->use_dma_pool = false;
+ }
+
+ return ret;
+}
+
+static int bcdma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct udma_chan *uc = to_udma_chan(chan);
+ struct udma_dev *ud = to_udma_dev(chan->device);
+ const struct udma_oes_offsets *oes = &ud->soc_data->oes;
+ u32 irq_udma_idx, irq_ring_idx;
+ int ret;
+
+ /* Only TR mode is supported */
+ uc->config.pkt_mode = false;
+
+ /*
+ * Make sure that the completion is in a known state:
+ * No teardown, the channel is idle
+ */
+ reinit_completion(&uc->teardown_completed);
+ complete_all(&uc->teardown_completed);
+ uc->state = UDMA_CHAN_IS_IDLE;
+
+ switch (uc->config.dir) {
+ case DMA_MEM_TO_MEM:
+ /* Non synchronized - mem to mem type of transfer */
+ dev_dbg(uc->ud->dev, "%s: chan%d as MEM-to-MEM\n", __func__,
+ uc->id);
+
+ ret = bcdma_alloc_bchan_resources(uc);
+ if (ret)
+ return ret;
+
+ irq_ring_idx = uc->bchan->id + oes->bcdma_bchan_ring;
+ irq_udma_idx = uc->bchan->id + oes->bcdma_bchan_data;
+
+ ret = bcdma_tisci_m2m_channel_config(uc);
+ break;
+ case DMA_MEM_TO_DEV:
+ /* Slave transfer synchronized - mem to dev (TX) trasnfer */
+ dev_dbg(uc->ud->dev, "%s: chan%d as MEM-to-DEV\n", __func__,
+ uc->id);
+
+ ret = udma_alloc_tx_resources(uc);
+ if (ret) {
+ uc->config.remote_thread_id = -1;
+ return ret;
+ }
+
+ uc->config.src_thread = ud->psil_base + uc->tchan->id;
+ uc->config.dst_thread = uc->config.remote_thread_id;
+ uc->config.dst_thread |= K3_PSIL_DST_THREAD_ID_OFFSET;
+
+ irq_ring_idx = uc->tchan->id + oes->bcdma_tchan_ring;
+ irq_udma_idx = uc->tchan->id + oes->bcdma_tchan_data;
+
+ ret = bcdma_tisci_tx_channel_config(uc);
+ break;
+ case DMA_DEV_TO_MEM:
+ /* Slave transfer synchronized - dev to mem (RX) trasnfer */
+ dev_dbg(uc->ud->dev, "%s: chan%d as DEV-to-MEM\n", __func__,
+ uc->id);
+
+ ret = udma_alloc_rx_resources(uc);
+ if (ret) {
+ uc->config.remote_thread_id = -1;
+ return ret;
+ }
+
+ uc->config.src_thread = uc->config.remote_thread_id;
+ uc->config.dst_thread = (ud->psil_base + uc->rchan->id) |
+ K3_PSIL_DST_THREAD_ID_OFFSET;
+
+ irq_ring_idx = uc->rchan->id + oes->bcdma_rchan_ring;
+ irq_udma_idx = uc->rchan->id + oes->bcdma_rchan_data;
+
+ ret = bcdma_tisci_rx_channel_config(uc);
+ break;
+ default:
+ /* Can not happen */
+ dev_err(uc->ud->dev, "%s: chan%d invalid direction (%u)\n",
+ __func__, uc->id, uc->config.dir);
+ return -EINVAL;
+ }
+
+ /* check if the channel configuration was successful */
+ if (ret)
+ goto err_res_free;
+
+ if (udma_is_chan_running(uc)) {
+ dev_warn(ud->dev, "chan%d: is running!\n", uc->id);
+ udma_reset_chan(uc, false);
+ if (udma_is_chan_running(uc)) {
+ dev_err(ud->dev, "chan%d: won't stop!\n", uc->id);
+ ret = -EBUSY;
+ goto err_res_free;
+ }
+ }
+
+ uc->dma_dev = dmaengine_get_dma_device(chan);
+ if (uc->config.dir == DMA_MEM_TO_MEM && !uc->config.tr_trigger_type) {
+ uc->config.hdesc_size = cppi5_trdesc_calc_size(
+ sizeof(struct cppi5_tr_type15_t), 2);
+
+ uc->hdesc_pool = dma_pool_create(uc->name, ud->ddev.dev,
+ uc->config.hdesc_size,
+ ud->desc_align,
+ 0);
+ if (!uc->hdesc_pool) {
+ dev_err(ud->ddev.dev,
+ "Descriptor pool allocation failed\n");
+ uc->use_dma_pool = false;
+ ret = -ENOMEM;
+ goto err_res_free;
+ }
+
+ uc->use_dma_pool = true;
+ } else if (uc->config.dir != DMA_MEM_TO_MEM) {
+ /* PSI-L pairing */
+ ret = navss_psil_pair(ud, uc->config.src_thread,
+ uc->config.dst_thread);
+ if (ret) {
+ dev_err(ud->dev,
+ "PSI-L pairing failed: 0x%04x -> 0x%04x\n",
+ uc->config.src_thread, uc->config.dst_thread);
+ goto err_res_free;
+ }
+
+ uc->psil_paired = true;
+ }
+
+ uc->irq_num_ring = msi_get_virq(ud->dev, irq_ring_idx);
+ if (uc->irq_num_ring <= 0) {
+ dev_err(ud->dev, "Failed to get ring irq (index: %u)\n",
+ irq_ring_idx);
+ ret = -EINVAL;
+ goto err_psi_free;
+ }
+
+ ret = request_irq(uc->irq_num_ring, udma_ring_irq_handler,
+ IRQF_TRIGGER_HIGH, uc->name, uc);
+ if (ret) {
+ dev_err(ud->dev, "chan%d: ring irq request failed\n", uc->id);
+ goto err_irq_free;
+ }
+
+ /* Event from BCDMA (TR events) only needed for slave channels */
+ if (is_slave_direction(uc->config.dir)) {
+ uc->irq_num_udma = msi_get_virq(ud->dev, irq_udma_idx);
+ if (uc->irq_num_udma <= 0) {
+ dev_err(ud->dev, "Failed to get bcdma irq (index: %u)\n",
+ irq_udma_idx);
+ free_irq(uc->irq_num_ring, uc);
+ ret = -EINVAL;
+ goto err_irq_free;
+ }
+
+ ret = request_irq(uc->irq_num_udma, udma_udma_irq_handler, 0,
+ uc->name, uc);
+ if (ret) {
+ dev_err(ud->dev, "chan%d: BCDMA irq request failed\n",
+ uc->id);
+ free_irq(uc->irq_num_ring, uc);
+ goto err_irq_free;
+ }
+ } else {
+ uc->irq_num_udma = 0;
+ }
+
+ udma_reset_rings(uc);
+
+ INIT_DELAYED_WORK_ONSTACK(&uc->tx_drain.work,
+ udma_check_tx_completion);
+ return 0;
+
+err_irq_free:
+ uc->irq_num_ring = 0;
+ uc->irq_num_udma = 0;
+err_psi_free:
+ if (uc->psil_paired)
+ navss_psil_unpair(ud, uc->config.src_thread,
+ uc->config.dst_thread);
+ uc->psil_paired = false;
+err_res_free:
+ bcdma_free_bchan_resources(uc);
+ udma_free_tx_resources(uc);
+ udma_free_rx_resources(uc);
+
+ udma_reset_uchan(uc);
+
+ if (uc->use_dma_pool) {
+ dma_pool_destroy(uc->hdesc_pool);
+ uc->use_dma_pool = false;
+ }
+
+ return ret;
+}
+
+static int bcdma_router_config(struct dma_chan *chan)
+{
+ struct k3_event_route_data *router_data = chan->route_data;
+ struct udma_chan *uc = to_udma_chan(chan);
+ u32 trigger_event;
+
+ if (!uc->bchan)
+ return -EINVAL;
+
+ if (uc->config.tr_trigger_type != 1 && uc->config.tr_trigger_type != 2)
+ return -EINVAL;
+
+ trigger_event = uc->ud->soc_data->bcdma_trigger_event_offset;
+ trigger_event += (uc->bchan->id * 2) + uc->config.tr_trigger_type - 1;
+
+ return router_data->set_event(router_data->priv, trigger_event);
+}
+
+static int pktdma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct udma_chan *uc = to_udma_chan(chan);
+ struct udma_dev *ud = to_udma_dev(chan->device);
+ const struct udma_oes_offsets *oes = &ud->soc_data->oes;
+ u32 irq_ring_idx;
+ int ret;
+
+ /*
+ * Make sure that the completion is in a known state:
+ * No teardown, the channel is idle
+ */
+ reinit_completion(&uc->teardown_completed);
+ complete_all(&uc->teardown_completed);
+ uc->state = UDMA_CHAN_IS_IDLE;
+
+ switch (uc->config.dir) {
+ case DMA_MEM_TO_DEV:
+ /* Slave transfer synchronized - mem to dev (TX) trasnfer */
+ dev_dbg(uc->ud->dev, "%s: chan%d as MEM-to-DEV\n", __func__,
+ uc->id);
+
+ ret = udma_alloc_tx_resources(uc);
+ if (ret) {
+ uc->config.remote_thread_id = -1;
+ return ret;
+ }
+
+ uc->config.src_thread = ud->psil_base + uc->tchan->id;
+ uc->config.dst_thread = uc->config.remote_thread_id;
+ uc->config.dst_thread |= K3_PSIL_DST_THREAD_ID_OFFSET;
+
+ irq_ring_idx = uc->tchan->tflow_id + oes->pktdma_tchan_flow;
+
+ ret = pktdma_tisci_tx_channel_config(uc);
+ break;
+ case DMA_DEV_TO_MEM:
+ /* Slave transfer synchronized - dev to mem (RX) trasnfer */
+ dev_dbg(uc->ud->dev, "%s: chan%d as DEV-to-MEM\n", __func__,
+ uc->id);
+
+ ret = udma_alloc_rx_resources(uc);
+ if (ret) {
+ uc->config.remote_thread_id = -1;
+ return ret;
+ }
+
+ uc->config.src_thread = uc->config.remote_thread_id;
+ uc->config.dst_thread = (ud->psil_base + uc->rchan->id) |
+ K3_PSIL_DST_THREAD_ID_OFFSET;
+
+ irq_ring_idx = uc->rflow->id + oes->pktdma_rchan_flow;
+
+ ret = pktdma_tisci_rx_channel_config(uc);
+ break;
+ default:
+ /* Can not happen */
+ dev_err(uc->ud->dev, "%s: chan%d invalid direction (%u)\n",
+ __func__, uc->id, uc->config.dir);
+ return -EINVAL;
+ }
+
+ /* check if the channel configuration was successful */
+ if (ret)
+ goto err_res_free;
+
+ if (udma_is_chan_running(uc)) {
+ dev_warn(ud->dev, "chan%d: is running!\n", uc->id);
+ udma_reset_chan(uc, false);
+ if (udma_is_chan_running(uc)) {
+ dev_err(ud->dev, "chan%d: won't stop!\n", uc->id);
+ ret = -EBUSY;
+ goto err_res_free;
+ }
+ }
+
+ uc->dma_dev = dmaengine_get_dma_device(chan);
+ uc->hdesc_pool = dma_pool_create(uc->name, uc->dma_dev,
+ uc->config.hdesc_size, ud->desc_align,
+ 0);
+ if (!uc->hdesc_pool) {
+ dev_err(ud->ddev.dev,
+ "Descriptor pool allocation failed\n");
+ uc->use_dma_pool = false;
+ ret = -ENOMEM;
+ goto err_res_free;
+ }
+
+ uc->use_dma_pool = true;
+
+ /* PSI-L pairing */
+ ret = navss_psil_pair(ud, uc->config.src_thread, uc->config.dst_thread);
+ if (ret) {
+ dev_err(ud->dev, "PSI-L pairing failed: 0x%04x -> 0x%04x\n",
+ uc->config.src_thread, uc->config.dst_thread);
+ goto err_res_free;
+ }
+
+ uc->psil_paired = true;
+
+ uc->irq_num_ring = msi_get_virq(ud->dev, irq_ring_idx);
+ if (uc->irq_num_ring <= 0) {
+ dev_err(ud->dev, "Failed to get ring irq (index: %u)\n",
+ irq_ring_idx);
+ ret = -EINVAL;
+ goto err_psi_free;
+ }
+
+ ret = request_irq(uc->irq_num_ring, udma_ring_irq_handler,
+ IRQF_TRIGGER_HIGH, uc->name, uc);
+ if (ret) {
+ dev_err(ud->dev, "chan%d: ring irq request failed\n", uc->id);
+ goto err_irq_free;
+ }
+
+ uc->irq_num_udma = 0;
+
+ udma_reset_rings(uc);
+
+ INIT_DELAYED_WORK_ONSTACK(&uc->tx_drain.work,
+ udma_check_tx_completion);
+
+ if (uc->tchan)
+ dev_dbg(ud->dev,
+ "chan%d: tchan%d, tflow%d, Remote thread: 0x%04x\n",
+ uc->id, uc->tchan->id, uc->tchan->tflow_id,
+ uc->config.remote_thread_id);
+ else if (uc->rchan)
+ dev_dbg(ud->dev,
+ "chan%d: rchan%d, rflow%d, Remote thread: 0x%04x\n",
+ uc->id, uc->rchan->id, uc->rflow->id,
+ uc->config.remote_thread_id);
+ return 0;
+
+err_irq_free:
+ uc->irq_num_ring = 0;
+err_psi_free:
+ navss_psil_unpair(ud, uc->config.src_thread, uc->config.dst_thread);
+ uc->psil_paired = false;
+err_res_free:
+ udma_free_tx_resources(uc);
+ udma_free_rx_resources(uc);
+
+ udma_reset_uchan(uc);
+
+ dma_pool_destroy(uc->hdesc_pool);
+ uc->use_dma_pool = false;
+
+ return ret;
+}
+
+static int udma_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *cfg)
+{
+ struct udma_chan *uc = to_udma_chan(chan);
+
+ memcpy(&uc->cfg, cfg, sizeof(uc->cfg));
+
+ return 0;
+}
+
+static struct udma_desc *udma_alloc_tr_desc(struct udma_chan *uc,
+ size_t tr_size, int tr_count,
+ enum dma_transfer_direction dir)
+{
+ struct udma_hwdesc *hwdesc;
+ struct cppi5_desc_hdr_t *tr_desc;
+ struct udma_desc *d;
+ u32 reload_count = 0;
+ u32 ring_id;
+
+ switch (tr_size) {
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ break;
+ default:
+ dev_err(uc->ud->dev, "Unsupported TR size of %zu\n", tr_size);
+ return NULL;
+ }
+
+ /* We have only one descriptor containing multiple TRs */
+ d = kzalloc(sizeof(*d) + sizeof(d->hwdesc[0]), GFP_NOWAIT);
+ if (!d)
+ return NULL;
+
+ d->sglen = tr_count;
+
+ d->hwdesc_count = 1;
+ hwdesc = &d->hwdesc[0];
+
+ /* Allocate memory for DMA ring descriptor */
+ if (uc->use_dma_pool) {
+ hwdesc->cppi5_desc_size = uc->config.hdesc_size;
+ hwdesc->cppi5_desc_vaddr = dma_pool_zalloc(uc->hdesc_pool,
+ GFP_NOWAIT,
+ &hwdesc->cppi5_desc_paddr);
+ } else {
+ hwdesc->cppi5_desc_size = cppi5_trdesc_calc_size(tr_size,
+ tr_count);
+ hwdesc->cppi5_desc_size = ALIGN(hwdesc->cppi5_desc_size,
+ uc->ud->desc_align);
+ hwdesc->cppi5_desc_vaddr = dma_alloc_coherent(uc->ud->dev,
+ hwdesc->cppi5_desc_size,
+ &hwdesc->cppi5_desc_paddr,
+ GFP_NOWAIT);
+ }
+
+ if (!hwdesc->cppi5_desc_vaddr) {
+ kfree(d);
+ return NULL;
+ }
+
+ /* Start of the TR req records */
+ hwdesc->tr_req_base = hwdesc->cppi5_desc_vaddr + tr_size;
+ /* Start address of the TR response array */
+ hwdesc->tr_resp_base = hwdesc->tr_req_base + tr_size * tr_count;
+
+ tr_desc = hwdesc->cppi5_desc_vaddr;
+
+ if (uc->cyclic)
+ reload_count = CPPI5_INFO0_TRDESC_RLDCNT_INFINITE;
+
+ if (dir == DMA_DEV_TO_MEM)
+ ring_id = k3_ringacc_get_ring_id(uc->rflow->r_ring);
+ else
+ ring_id = k3_ringacc_get_ring_id(uc->tchan->tc_ring);
+
+ cppi5_trdesc_init(tr_desc, tr_count, tr_size, 0, reload_count);
+ cppi5_desc_set_pktids(tr_desc, uc->id,
+ CPPI5_INFO1_DESC_FLOWID_DEFAULT);
+ cppi5_desc_set_retpolicy(tr_desc, 0, ring_id);
+
+ return d;
+}
+
+/**
+ * udma_get_tr_counters - calculate TR counters for a given length
+ * @len: Length of the trasnfer
+ * @align_to: Preferred alignment
+ * @tr0_cnt0: First TR icnt0
+ * @tr0_cnt1: First TR icnt1
+ * @tr1_cnt0: Second (if used) TR icnt0
+ *
+ * For len < SZ_64K only one TR is enough, tr1_cnt0 is not updated
+ * For len >= SZ_64K two TRs are used in a simple way:
+ * First TR: SZ_64K-alignment blocks (tr0_cnt0, tr0_cnt1)
+ * Second TR: the remaining length (tr1_cnt0)
+ *
+ * Returns the number of TRs the length needs (1 or 2)
+ * -EINVAL if the length can not be supported
+ */
+static int udma_get_tr_counters(size_t len, unsigned long align_to,
+ u16 *tr0_cnt0, u16 *tr0_cnt1, u16 *tr1_cnt0)
+{
+ if (len < SZ_64K) {
+ *tr0_cnt0 = len;
+ *tr0_cnt1 = 1;
+
+ return 1;
+ }
+
+ if (align_to > 3)
+ align_to = 3;
+
+realign:
+ *tr0_cnt0 = SZ_64K - BIT(align_to);
+ if (len / *tr0_cnt0 >= SZ_64K) {
+ if (align_to) {
+ align_to--;
+ goto realign;
+ }
+ return -EINVAL;
+ }
+
+ *tr0_cnt1 = len / *tr0_cnt0;
+ *tr1_cnt0 = len % *tr0_cnt0;
+
+ return 2;
+}
+
+static struct udma_desc *
+udma_prep_slave_sg_tr(struct udma_chan *uc, struct scatterlist *sgl,
+ unsigned int sglen, enum dma_transfer_direction dir,
+ unsigned long tx_flags, void *context)
+{
+ struct scatterlist *sgent;
+ struct udma_desc *d;
+ struct cppi5_tr_type1_t *tr_req = NULL;
+ u16 tr0_cnt0, tr0_cnt1, tr1_cnt0;
+ unsigned int i;
+ size_t tr_size;
+ int num_tr = 0;
+ int tr_idx = 0;
+ u64 asel;
+
+ /* estimate the number of TRs we will need */
+ for_each_sg(sgl, sgent, sglen, i) {
+ if (sg_dma_len(sgent) < SZ_64K)
+ num_tr++;
+ else
+ num_tr += 2;
+ }
+
+ /* Now allocate and setup the descriptor. */
+ tr_size = sizeof(struct cppi5_tr_type1_t);
+ d = udma_alloc_tr_desc(uc, tr_size, num_tr, dir);
+ if (!d)
+ return NULL;
+
+ d->sglen = sglen;
+
+ if (uc->ud->match_data->type == DMA_TYPE_UDMA)
+ asel = 0;
+ else
+ asel = (u64)uc->config.asel << K3_ADDRESS_ASEL_SHIFT;
+
+ tr_req = d->hwdesc[0].tr_req_base;
+ for_each_sg(sgl, sgent, sglen, i) {
+ dma_addr_t sg_addr = sg_dma_address(sgent);
+
+ num_tr = udma_get_tr_counters(sg_dma_len(sgent), __ffs(sg_addr),
+ &tr0_cnt0, &tr0_cnt1, &tr1_cnt0);
+ if (num_tr < 0) {
+ dev_err(uc->ud->dev, "size %u is not supported\n",
+ sg_dma_len(sgent));
+ udma_free_hwdesc(uc, d);
+ kfree(d);
+ return NULL;
+ }
+
+ cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1, false,
+ false, CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+ cppi5_tr_csf_set(&tr_req[tr_idx].flags, CPPI5_TR_CSF_SUPR_EVT);
+
+ sg_addr |= asel;
+ tr_req[tr_idx].addr = sg_addr;
+ tr_req[tr_idx].icnt0 = tr0_cnt0;
+ tr_req[tr_idx].icnt1 = tr0_cnt1;
+ tr_req[tr_idx].dim1 = tr0_cnt0;
+ tr_idx++;
+
+ if (num_tr == 2) {
+ cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1,
+ false, false,
+ CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+ cppi5_tr_csf_set(&tr_req[tr_idx].flags,
+ CPPI5_TR_CSF_SUPR_EVT);
+
+ tr_req[tr_idx].addr = sg_addr + tr0_cnt1 * tr0_cnt0;
+ tr_req[tr_idx].icnt0 = tr1_cnt0;
+ tr_req[tr_idx].icnt1 = 1;
+ tr_req[tr_idx].dim1 = tr1_cnt0;
+ tr_idx++;
+ }
+
+ d->residue += sg_dma_len(sgent);
+ }
+
+ cppi5_tr_csf_set(&tr_req[tr_idx - 1].flags,
+ CPPI5_TR_CSF_SUPR_EVT | CPPI5_TR_CSF_EOP);
+
+ return d;
+}
+
+static struct udma_desc *
+udma_prep_slave_sg_triggered_tr(struct udma_chan *uc, struct scatterlist *sgl,
+ unsigned int sglen,
+ enum dma_transfer_direction dir,
+ unsigned long tx_flags, void *context)
+{
+ struct scatterlist *sgent;
+ struct cppi5_tr_type15_t *tr_req = NULL;
+ enum dma_slave_buswidth dev_width;
+ u16 tr_cnt0, tr_cnt1;
+ dma_addr_t dev_addr;
+ struct udma_desc *d;
+ unsigned int i;
+ size_t tr_size, sg_len;
+ int num_tr = 0;
+ int tr_idx = 0;
+ u32 burst, trigger_size, port_window;
+ u64 asel;
+
+ if (dir == DMA_DEV_TO_MEM) {
+ dev_addr = uc->cfg.src_addr;
+ dev_width = uc->cfg.src_addr_width;
+ burst = uc->cfg.src_maxburst;
+ port_window = uc->cfg.src_port_window_size;
+ } else if (dir == DMA_MEM_TO_DEV) {
+ dev_addr = uc->cfg.dst_addr;
+ dev_width = uc->cfg.dst_addr_width;
+ burst = uc->cfg.dst_maxburst;
+ port_window = uc->cfg.dst_port_window_size;
+ } else {
+ dev_err(uc->ud->dev, "%s: bad direction?\n", __func__);
+ return NULL;
+ }
+
+ if (!burst)
+ burst = 1;
+
+ if (port_window) {
+ if (port_window != burst) {
+ dev_err(uc->ud->dev,
+ "The burst must be equal to port_window\n");
+ return NULL;
+ }
+
+ tr_cnt0 = dev_width * port_window;
+ tr_cnt1 = 1;
+ } else {
+ tr_cnt0 = dev_width;
+ tr_cnt1 = burst;
+ }
+ trigger_size = tr_cnt0 * tr_cnt1;
+
+ /* estimate the number of TRs we will need */
+ for_each_sg(sgl, sgent, sglen, i) {
+ sg_len = sg_dma_len(sgent);
+
+ if (sg_len % trigger_size) {
+ dev_err(uc->ud->dev,
+ "Not aligned SG entry (%zu for %u)\n", sg_len,
+ trigger_size);
+ return NULL;
+ }
+
+ if (sg_len / trigger_size < SZ_64K)
+ num_tr++;
+ else
+ num_tr += 2;
+ }
+
+ /* Now allocate and setup the descriptor. */
+ tr_size = sizeof(struct cppi5_tr_type15_t);
+ d = udma_alloc_tr_desc(uc, tr_size, num_tr, dir);
+ if (!d)
+ return NULL;
+
+ d->sglen = sglen;
+
+ if (uc->ud->match_data->type == DMA_TYPE_UDMA) {
+ asel = 0;
+ } else {
+ asel = (u64)uc->config.asel << K3_ADDRESS_ASEL_SHIFT;
+ dev_addr |= asel;
+ }
+
+ tr_req = d->hwdesc[0].tr_req_base;
+ for_each_sg(sgl, sgent, sglen, i) {
+ u16 tr0_cnt2, tr0_cnt3, tr1_cnt2;
+ dma_addr_t sg_addr = sg_dma_address(sgent);
+
+ sg_len = sg_dma_len(sgent);
+ num_tr = udma_get_tr_counters(sg_len / trigger_size, 0,
+ &tr0_cnt2, &tr0_cnt3, &tr1_cnt2);
+ if (num_tr < 0) {
+ dev_err(uc->ud->dev, "size %zu is not supported\n",
+ sg_len);
+ udma_free_hwdesc(uc, d);
+ kfree(d);
+ return NULL;
+ }
+
+ cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE15, false,
+ true, CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+ cppi5_tr_csf_set(&tr_req[tr_idx].flags, CPPI5_TR_CSF_SUPR_EVT);
+ cppi5_tr_set_trigger(&tr_req[tr_idx].flags,
+ uc->config.tr_trigger_type,
+ CPPI5_TR_TRIGGER_TYPE_ICNT2_DEC, 0, 0);
+
+ sg_addr |= asel;
+ if (dir == DMA_DEV_TO_MEM) {
+ tr_req[tr_idx].addr = dev_addr;
+ tr_req[tr_idx].icnt0 = tr_cnt0;
+ tr_req[tr_idx].icnt1 = tr_cnt1;
+ tr_req[tr_idx].icnt2 = tr0_cnt2;
+ tr_req[tr_idx].icnt3 = tr0_cnt3;
+ tr_req[tr_idx].dim1 = (-1) * tr_cnt0;
+
+ tr_req[tr_idx].daddr = sg_addr;
+ tr_req[tr_idx].dicnt0 = tr_cnt0;
+ tr_req[tr_idx].dicnt1 = tr_cnt1;
+ tr_req[tr_idx].dicnt2 = tr0_cnt2;
+ tr_req[tr_idx].dicnt3 = tr0_cnt3;
+ tr_req[tr_idx].ddim1 = tr_cnt0;
+ tr_req[tr_idx].ddim2 = trigger_size;
+ tr_req[tr_idx].ddim3 = trigger_size * tr0_cnt2;
+ } else {
+ tr_req[tr_idx].addr = sg_addr;
+ tr_req[tr_idx].icnt0 = tr_cnt0;
+ tr_req[tr_idx].icnt1 = tr_cnt1;
+ tr_req[tr_idx].icnt2 = tr0_cnt2;
+ tr_req[tr_idx].icnt3 = tr0_cnt3;
+ tr_req[tr_idx].dim1 = tr_cnt0;
+ tr_req[tr_idx].dim2 = trigger_size;
+ tr_req[tr_idx].dim3 = trigger_size * tr0_cnt2;
+
+ tr_req[tr_idx].daddr = dev_addr;
+ tr_req[tr_idx].dicnt0 = tr_cnt0;
+ tr_req[tr_idx].dicnt1 = tr_cnt1;
+ tr_req[tr_idx].dicnt2 = tr0_cnt2;
+ tr_req[tr_idx].dicnt3 = tr0_cnt3;
+ tr_req[tr_idx].ddim1 = (-1) * tr_cnt0;
+ }
+
+ tr_idx++;
+
+ if (num_tr == 2) {
+ cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE15,
+ false, true,
+ CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+ cppi5_tr_csf_set(&tr_req[tr_idx].flags,
+ CPPI5_TR_CSF_SUPR_EVT);
+ cppi5_tr_set_trigger(&tr_req[tr_idx].flags,
+ uc->config.tr_trigger_type,
+ CPPI5_TR_TRIGGER_TYPE_ICNT2_DEC,
+ 0, 0);
+
+ sg_addr += trigger_size * tr0_cnt2 * tr0_cnt3;
+ if (dir == DMA_DEV_TO_MEM) {
+ tr_req[tr_idx].addr = dev_addr;
+ tr_req[tr_idx].icnt0 = tr_cnt0;
+ tr_req[tr_idx].icnt1 = tr_cnt1;
+ tr_req[tr_idx].icnt2 = tr1_cnt2;
+ tr_req[tr_idx].icnt3 = 1;
+ tr_req[tr_idx].dim1 = (-1) * tr_cnt0;
+
+ tr_req[tr_idx].daddr = sg_addr;
+ tr_req[tr_idx].dicnt0 = tr_cnt0;
+ tr_req[tr_idx].dicnt1 = tr_cnt1;
+ tr_req[tr_idx].dicnt2 = tr1_cnt2;
+ tr_req[tr_idx].dicnt3 = 1;
+ tr_req[tr_idx].ddim1 = tr_cnt0;
+ tr_req[tr_idx].ddim2 = trigger_size;
+ } else {
+ tr_req[tr_idx].addr = sg_addr;
+ tr_req[tr_idx].icnt0 = tr_cnt0;
+ tr_req[tr_idx].icnt1 = tr_cnt1;
+ tr_req[tr_idx].icnt2 = tr1_cnt2;
+ tr_req[tr_idx].icnt3 = 1;
+ tr_req[tr_idx].dim1 = tr_cnt0;
+ tr_req[tr_idx].dim2 = trigger_size;
+
+ tr_req[tr_idx].daddr = dev_addr;
+ tr_req[tr_idx].dicnt0 = tr_cnt0;
+ tr_req[tr_idx].dicnt1 = tr_cnt1;
+ tr_req[tr_idx].dicnt2 = tr1_cnt2;
+ tr_req[tr_idx].dicnt3 = 1;
+ tr_req[tr_idx].ddim1 = (-1) * tr_cnt0;
+ }
+ tr_idx++;
+ }
+
+ d->residue += sg_len;
+ }
+
+ cppi5_tr_csf_set(&tr_req[tr_idx - 1].flags,
+ CPPI5_TR_CSF_SUPR_EVT | CPPI5_TR_CSF_EOP);
+
+ return d;
+}
+
+static int udma_configure_statictr(struct udma_chan *uc, struct udma_desc *d,
+ enum dma_slave_buswidth dev_width,
+ u16 elcnt)
+{
+ if (uc->config.ep_type != PSIL_EP_PDMA_XY)
+ return 0;
+
+ /* Bus width translates to the element size (ES) */
+ switch (dev_width) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ d->static_tr.elsize = 0;
+ break;
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ d->static_tr.elsize = 1;
+ break;
+ case DMA_SLAVE_BUSWIDTH_3_BYTES:
+ d->static_tr.elsize = 2;
+ break;
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ d->static_tr.elsize = 3;
+ break;
+ case DMA_SLAVE_BUSWIDTH_8_BYTES:
+ d->static_tr.elsize = 4;
+ break;
+ default: /* not reached */
+ return -EINVAL;
+ }
+
+ d->static_tr.elcnt = elcnt;
+
+ /*
+ * PDMA must to close the packet when the channel is in packet mode.
+ * For TR mode when the channel is not cyclic we also need PDMA to close
+ * the packet otherwise the transfer will stall because PDMA holds on
+ * the data it has received from the peripheral.
+ */
+ if (uc->config.pkt_mode || !uc->cyclic) {
+ unsigned int div = dev_width * elcnt;
+
+ if (uc->cyclic)
+ d->static_tr.bstcnt = d->residue / d->sglen / div;
+ else
+ d->static_tr.bstcnt = d->residue / div;
+
+ if (uc->config.dir == DMA_DEV_TO_MEM &&
+ d->static_tr.bstcnt > uc->ud->match_data->statictr_z_mask)
+ return -EINVAL;
+ } else {
+ d->static_tr.bstcnt = 0;
+ }
+
+ return 0;
+}
+
+static struct udma_desc *
+udma_prep_slave_sg_pkt(struct udma_chan *uc, struct scatterlist *sgl,
+ unsigned int sglen, enum dma_transfer_direction dir,
+ unsigned long tx_flags, void *context)
+{
+ struct scatterlist *sgent;
+ struct cppi5_host_desc_t *h_desc = NULL;
+ struct udma_desc *d;
+ u32 ring_id;
+ unsigned int i;
+ u64 asel;
+
+ d = kzalloc(struct_size(d, hwdesc, sglen), GFP_NOWAIT);
+ if (!d)
+ return NULL;
+
+ d->sglen = sglen;
+ d->hwdesc_count = sglen;
+
+ if (dir == DMA_DEV_TO_MEM)
+ ring_id = k3_ringacc_get_ring_id(uc->rflow->r_ring);
+ else
+ ring_id = k3_ringacc_get_ring_id(uc->tchan->tc_ring);
+
+ if (uc->ud->match_data->type == DMA_TYPE_UDMA)
+ asel = 0;
+ else
+ asel = (u64)uc->config.asel << K3_ADDRESS_ASEL_SHIFT;
+
+ for_each_sg(sgl, sgent, sglen, i) {
+ struct udma_hwdesc *hwdesc = &d->hwdesc[i];
+ dma_addr_t sg_addr = sg_dma_address(sgent);
+ struct cppi5_host_desc_t *desc;
+ size_t sg_len = sg_dma_len(sgent);
+
+ hwdesc->cppi5_desc_vaddr = dma_pool_zalloc(uc->hdesc_pool,
+ GFP_NOWAIT,
+ &hwdesc->cppi5_desc_paddr);
+ if (!hwdesc->cppi5_desc_vaddr) {
+ dev_err(uc->ud->dev,
+ "descriptor%d allocation failed\n", i);
+
+ udma_free_hwdesc(uc, d);
+ kfree(d);
+ return NULL;
+ }
+
+ d->residue += sg_len;
+ hwdesc->cppi5_desc_size = uc->config.hdesc_size;
+ desc = hwdesc->cppi5_desc_vaddr;
+
+ if (i == 0) {
+ cppi5_hdesc_init(desc, 0, 0);
+ /* Flow and Packed ID */
+ cppi5_desc_set_pktids(&desc->hdr, uc->id,
+ CPPI5_INFO1_DESC_FLOWID_DEFAULT);
+ cppi5_desc_set_retpolicy(&desc->hdr, 0, ring_id);
+ } else {
+ cppi5_hdesc_reset_hbdesc(desc);
+ cppi5_desc_set_retpolicy(&desc->hdr, 0, 0xffff);
+ }
+
+ /* attach the sg buffer to the descriptor */
+ sg_addr |= asel;
+ cppi5_hdesc_attach_buf(desc, sg_addr, sg_len, sg_addr, sg_len);
+
+ /* Attach link as host buffer descriptor */
+ if (h_desc)
+ cppi5_hdesc_link_hbdesc(h_desc,
+ hwdesc->cppi5_desc_paddr | asel);
+
+ if (uc->ud->match_data->type == DMA_TYPE_PKTDMA ||
+ dir == DMA_MEM_TO_DEV)
+ h_desc = desc;
+ }
+
+ if (d->residue >= SZ_4M) {
+ dev_err(uc->ud->dev,
+ "%s: Transfer size %u is over the supported 4M range\n",
+ __func__, d->residue);
+ udma_free_hwdesc(uc, d);
+ kfree(d);
+ return NULL;
+ }
+
+ h_desc = d->hwdesc[0].cppi5_desc_vaddr;
+ cppi5_hdesc_set_pktlen(h_desc, d->residue);
+
+ return d;
+}
+
+static int udma_attach_metadata(struct dma_async_tx_descriptor *desc,
+ void *data, size_t len)
+{
+ struct udma_desc *d = to_udma_desc(desc);
+ struct udma_chan *uc = to_udma_chan(desc->chan);
+ struct cppi5_host_desc_t *h_desc;
+ u32 psd_size = len;
+ u32 flags = 0;
+
+ if (!uc->config.pkt_mode || !uc->config.metadata_size)
+ return -ENOTSUPP;
+
+ if (!data || len > uc->config.metadata_size)
+ return -EINVAL;
+
+ if (uc->config.needs_epib && len < CPPI5_INFO0_HDESC_EPIB_SIZE)
+ return -EINVAL;
+
+ h_desc = d->hwdesc[0].cppi5_desc_vaddr;
+ if (d->dir == DMA_MEM_TO_DEV)
+ memcpy(h_desc->epib, data, len);
+
+ if (uc->config.needs_epib)
+ psd_size -= CPPI5_INFO0_HDESC_EPIB_SIZE;
+
+ d->metadata = data;
+ d->metadata_size = len;
+ if (uc->config.needs_epib)
+ flags |= CPPI5_INFO0_HDESC_EPIB_PRESENT;
+
+ cppi5_hdesc_update_flags(h_desc, flags);
+ cppi5_hdesc_update_psdata_size(h_desc, psd_size);
+
+ return 0;
+}
+
+static void *udma_get_metadata_ptr(struct dma_async_tx_descriptor *desc,
+ size_t *payload_len, size_t *max_len)
+{
+ struct udma_desc *d = to_udma_desc(desc);
+ struct udma_chan *uc = to_udma_chan(desc->chan);
+ struct cppi5_host_desc_t *h_desc;
+
+ if (!uc->config.pkt_mode || !uc->config.metadata_size)
+ return ERR_PTR(-ENOTSUPP);
+
+ h_desc = d->hwdesc[0].cppi5_desc_vaddr;
+
+ *max_len = uc->config.metadata_size;
+
+ *payload_len = cppi5_hdesc_epib_present(&h_desc->hdr) ?
+ CPPI5_INFO0_HDESC_EPIB_SIZE : 0;
+ *payload_len += cppi5_hdesc_get_psdata_size(h_desc);
+
+ return h_desc->epib;
+}
+
+static int udma_set_metadata_len(struct dma_async_tx_descriptor *desc,
+ size_t payload_len)
+{
+ struct udma_desc *d = to_udma_desc(desc);
+ struct udma_chan *uc = to_udma_chan(desc->chan);
+ struct cppi5_host_desc_t *h_desc;
+ u32 psd_size = payload_len;
+ u32 flags = 0;
+
+ if (!uc->config.pkt_mode || !uc->config.metadata_size)
+ return -ENOTSUPP;
+
+ if (payload_len > uc->config.metadata_size)
+ return -EINVAL;
+
+ if (uc->config.needs_epib && payload_len < CPPI5_INFO0_HDESC_EPIB_SIZE)
+ return -EINVAL;
+
+ h_desc = d->hwdesc[0].cppi5_desc_vaddr;
+
+ if (uc->config.needs_epib) {
+ psd_size -= CPPI5_INFO0_HDESC_EPIB_SIZE;
+ flags |= CPPI5_INFO0_HDESC_EPIB_PRESENT;
+ }
+
+ cppi5_hdesc_update_flags(h_desc, flags);
+ cppi5_hdesc_update_psdata_size(h_desc, psd_size);
+
+ return 0;
+}
+
+static struct dma_descriptor_metadata_ops metadata_ops = {
+ .attach = udma_attach_metadata,
+ .get_ptr = udma_get_metadata_ptr,
+ .set_len = udma_set_metadata_len,
+};
+
+static struct dma_async_tx_descriptor *
+udma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sglen, enum dma_transfer_direction dir,
+ unsigned long tx_flags, void *context)
+{
+ struct udma_chan *uc = to_udma_chan(chan);
+ enum dma_slave_buswidth dev_width;
+ struct udma_desc *d;
+ u32 burst;
+
+ if (dir != uc->config.dir &&
+ (uc->config.dir == DMA_MEM_TO_MEM && !uc->config.tr_trigger_type)) {
+ dev_err(chan->device->dev,
+ "%s: chan%d is for %s, not supporting %s\n",
+ __func__, uc->id,
+ dmaengine_get_direction_text(uc->config.dir),
+ dmaengine_get_direction_text(dir));
+ return NULL;
+ }
+
+ if (dir == DMA_DEV_TO_MEM) {
+ dev_width = uc->cfg.src_addr_width;
+ burst = uc->cfg.src_maxburst;
+ } else if (dir == DMA_MEM_TO_DEV) {
+ dev_width = uc->cfg.dst_addr_width;
+ burst = uc->cfg.dst_maxburst;
+ } else {
+ dev_err(chan->device->dev, "%s: bad direction?\n", __func__);
+ return NULL;
+ }
+
+ if (!burst)
+ burst = 1;
+
+ uc->config.tx_flags = tx_flags;
+
+ if (uc->config.pkt_mode)
+ d = udma_prep_slave_sg_pkt(uc, sgl, sglen, dir, tx_flags,
+ context);
+ else if (is_slave_direction(uc->config.dir))
+ d = udma_prep_slave_sg_tr(uc, sgl, sglen, dir, tx_flags,
+ context);
+ else
+ d = udma_prep_slave_sg_triggered_tr(uc, sgl, sglen, dir,
+ tx_flags, context);
+
+ if (!d)
+ return NULL;
+
+ d->dir = dir;
+ d->desc_idx = 0;
+ d->tr_idx = 0;
+
+ /* static TR for remote PDMA */
+ if (udma_configure_statictr(uc, d, dev_width, burst)) {
+ dev_err(uc->ud->dev,
+ "%s: StaticTR Z is limited to maximum 4095 (%u)\n",
+ __func__, d->static_tr.bstcnt);
+
+ udma_free_hwdesc(uc, d);
+ kfree(d);
+ return NULL;
+ }
+
+ if (uc->config.metadata_size)
+ d->vd.tx.metadata_ops = &metadata_ops;
+
+ return vchan_tx_prep(&uc->vc, &d->vd, tx_flags);
+}
+
+static struct udma_desc *
+udma_prep_dma_cyclic_tr(struct udma_chan *uc, dma_addr_t buf_addr,
+ size_t buf_len, size_t period_len,
+ enum dma_transfer_direction dir, unsigned long flags)
+{
+ struct udma_desc *d;
+ size_t tr_size, period_addr;
+ struct cppi5_tr_type1_t *tr_req;
+ unsigned int periods = buf_len / period_len;
+ u16 tr0_cnt0, tr0_cnt1, tr1_cnt0;
+ unsigned int i;
+ int num_tr;
+
+ num_tr = udma_get_tr_counters(period_len, __ffs(buf_addr), &tr0_cnt0,
+ &tr0_cnt1, &tr1_cnt0);
+ if (num_tr < 0) {
+ dev_err(uc->ud->dev, "size %zu is not supported\n",
+ period_len);
+ return NULL;
+ }
+
+ /* Now allocate and setup the descriptor. */
+ tr_size = sizeof(struct cppi5_tr_type1_t);
+ d = udma_alloc_tr_desc(uc, tr_size, periods * num_tr, dir);
+ if (!d)
+ return NULL;
+
+ tr_req = d->hwdesc[0].tr_req_base;
+ if (uc->ud->match_data->type == DMA_TYPE_UDMA)
+ period_addr = buf_addr;
+ else
+ period_addr = buf_addr |
+ ((u64)uc->config.asel << K3_ADDRESS_ASEL_SHIFT);
+
+ for (i = 0; i < periods; i++) {
+ int tr_idx = i * num_tr;
+
+ cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1, false,
+ false, CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+
+ tr_req[tr_idx].addr = period_addr;
+ tr_req[tr_idx].icnt0 = tr0_cnt0;
+ tr_req[tr_idx].icnt1 = tr0_cnt1;
+ tr_req[tr_idx].dim1 = tr0_cnt0;
+
+ if (num_tr == 2) {
+ cppi5_tr_csf_set(&tr_req[tr_idx].flags,
+ CPPI5_TR_CSF_SUPR_EVT);
+ tr_idx++;
+
+ cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE1,
+ false, false,
+ CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+
+ tr_req[tr_idx].addr = period_addr + tr0_cnt1 * tr0_cnt0;
+ tr_req[tr_idx].icnt0 = tr1_cnt0;
+ tr_req[tr_idx].icnt1 = 1;
+ tr_req[tr_idx].dim1 = tr1_cnt0;
+ }
+
+ if (!(flags & DMA_PREP_INTERRUPT))
+ cppi5_tr_csf_set(&tr_req[tr_idx].flags,
+ CPPI5_TR_CSF_SUPR_EVT);
+
+ period_addr += period_len;
+ }
+
+ return d;
+}
+
+static struct udma_desc *
+udma_prep_dma_cyclic_pkt(struct udma_chan *uc, dma_addr_t buf_addr,
+ size_t buf_len, size_t period_len,
+ enum dma_transfer_direction dir, unsigned long flags)
+{
+ struct udma_desc *d;
+ u32 ring_id;
+ int i;
+ int periods = buf_len / period_len;
+
+ if (periods > (K3_UDMA_DEFAULT_RING_SIZE - 1))
+ return NULL;
+
+ if (period_len >= SZ_4M)
+ return NULL;
+
+ d = kzalloc(struct_size(d, hwdesc, periods), GFP_NOWAIT);
+ if (!d)
+ return NULL;
+
+ d->hwdesc_count = periods;
+
+ /* TODO: re-check this... */
+ if (dir == DMA_DEV_TO_MEM)
+ ring_id = k3_ringacc_get_ring_id(uc->rflow->r_ring);
+ else
+ ring_id = k3_ringacc_get_ring_id(uc->tchan->tc_ring);
+
+ if (uc->ud->match_data->type != DMA_TYPE_UDMA)
+ buf_addr |= (u64)uc->config.asel << K3_ADDRESS_ASEL_SHIFT;
+
+ for (i = 0; i < periods; i++) {
+ struct udma_hwdesc *hwdesc = &d->hwdesc[i];
+ dma_addr_t period_addr = buf_addr + (period_len * i);
+ struct cppi5_host_desc_t *h_desc;
+
+ hwdesc->cppi5_desc_vaddr = dma_pool_zalloc(uc->hdesc_pool,
+ GFP_NOWAIT,
+ &hwdesc->cppi5_desc_paddr);
+ if (!hwdesc->cppi5_desc_vaddr) {
+ dev_err(uc->ud->dev,
+ "descriptor%d allocation failed\n", i);
+
+ udma_free_hwdesc(uc, d);
+ kfree(d);
+ return NULL;
+ }
+
+ hwdesc->cppi5_desc_size = uc->config.hdesc_size;
+ h_desc = hwdesc->cppi5_desc_vaddr;
+
+ cppi5_hdesc_init(h_desc, 0, 0);
+ cppi5_hdesc_set_pktlen(h_desc, period_len);
+
+ /* Flow and Packed ID */
+ cppi5_desc_set_pktids(&h_desc->hdr, uc->id,
+ CPPI5_INFO1_DESC_FLOWID_DEFAULT);
+ cppi5_desc_set_retpolicy(&h_desc->hdr, 0, ring_id);
+
+ /* attach each period to a new descriptor */
+ cppi5_hdesc_attach_buf(h_desc,
+ period_addr, period_len,
+ period_addr, period_len);
+ }
+
+ return d;
+}
+
+static struct dma_async_tx_descriptor *
+udma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction dir,
+ unsigned long flags)
+{
+ struct udma_chan *uc = to_udma_chan(chan);
+ enum dma_slave_buswidth dev_width;
+ struct udma_desc *d;
+ u32 burst;
+
+ if (dir != uc->config.dir) {
+ dev_err(chan->device->dev,
+ "%s: chan%d is for %s, not supporting %s\n",
+ __func__, uc->id,
+ dmaengine_get_direction_text(uc->config.dir),
+ dmaengine_get_direction_text(dir));
+ return NULL;
+ }
+
+ uc->cyclic = true;
+
+ if (dir == DMA_DEV_TO_MEM) {
+ dev_width = uc->cfg.src_addr_width;
+ burst = uc->cfg.src_maxburst;
+ } else if (dir == DMA_MEM_TO_DEV) {
+ dev_width = uc->cfg.dst_addr_width;
+ burst = uc->cfg.dst_maxburst;
+ } else {
+ dev_err(uc->ud->dev, "%s: bad direction?\n", __func__);
+ return NULL;
+ }
+
+ if (!burst)
+ burst = 1;
+
+ if (uc->config.pkt_mode)
+ d = udma_prep_dma_cyclic_pkt(uc, buf_addr, buf_len, period_len,
+ dir, flags);
+ else
+ d = udma_prep_dma_cyclic_tr(uc, buf_addr, buf_len, period_len,
+ dir, flags);
+
+ if (!d)
+ return NULL;
+
+ d->sglen = buf_len / period_len;
+
+ d->dir = dir;
+ d->residue = buf_len;
+
+ /* static TR for remote PDMA */
+ if (udma_configure_statictr(uc, d, dev_width, burst)) {
+ dev_err(uc->ud->dev,
+ "%s: StaticTR Z is limited to maximum 4095 (%u)\n",
+ __func__, d->static_tr.bstcnt);
+
+ udma_free_hwdesc(uc, d);
+ kfree(d);
+ return NULL;
+ }
+
+ if (uc->config.metadata_size)
+ d->vd.tx.metadata_ops = &metadata_ops;
+
+ return vchan_tx_prep(&uc->vc, &d->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+udma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long tx_flags)
+{
+ struct udma_chan *uc = to_udma_chan(chan);
+ struct udma_desc *d;
+ struct cppi5_tr_type15_t *tr_req;
+ int num_tr;
+ size_t tr_size = sizeof(struct cppi5_tr_type15_t);
+ u16 tr0_cnt0, tr0_cnt1, tr1_cnt0;
+
+ if (uc->config.dir != DMA_MEM_TO_MEM) {
+ dev_err(chan->device->dev,
+ "%s: chan%d is for %s, not supporting %s\n",
+ __func__, uc->id,
+ dmaengine_get_direction_text(uc->config.dir),
+ dmaengine_get_direction_text(DMA_MEM_TO_MEM));
+ return NULL;
+ }
+
+ num_tr = udma_get_tr_counters(len, __ffs(src | dest), &tr0_cnt0,
+ &tr0_cnt1, &tr1_cnt0);
+ if (num_tr < 0) {
+ dev_err(uc->ud->dev, "size %zu is not supported\n",
+ len);
+ return NULL;
+ }
+
+ d = udma_alloc_tr_desc(uc, tr_size, num_tr, DMA_MEM_TO_MEM);
+ if (!d)
+ return NULL;
+
+ d->dir = DMA_MEM_TO_MEM;
+ d->desc_idx = 0;
+ d->tr_idx = 0;
+ d->residue = len;
+
+ if (uc->ud->match_data->type != DMA_TYPE_UDMA) {
+ src |= (u64)uc->ud->asel << K3_ADDRESS_ASEL_SHIFT;
+ dest |= (u64)uc->ud->asel << K3_ADDRESS_ASEL_SHIFT;
+ }
+
+ tr_req = d->hwdesc[0].tr_req_base;
+
+ cppi5_tr_init(&tr_req[0].flags, CPPI5_TR_TYPE15, false, true,
+ CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+ cppi5_tr_csf_set(&tr_req[0].flags, CPPI5_TR_CSF_SUPR_EVT);
+
+ tr_req[0].addr = src;
+ tr_req[0].icnt0 = tr0_cnt0;
+ tr_req[0].icnt1 = tr0_cnt1;
+ tr_req[0].icnt2 = 1;
+ tr_req[0].icnt3 = 1;
+ tr_req[0].dim1 = tr0_cnt0;
+
+ tr_req[0].daddr = dest;
+ tr_req[0].dicnt0 = tr0_cnt0;
+ tr_req[0].dicnt1 = tr0_cnt1;
+ tr_req[0].dicnt2 = 1;
+ tr_req[0].dicnt3 = 1;
+ tr_req[0].ddim1 = tr0_cnt0;
+
+ if (num_tr == 2) {
+ cppi5_tr_init(&tr_req[1].flags, CPPI5_TR_TYPE15, false, true,
+ CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+ cppi5_tr_csf_set(&tr_req[1].flags, CPPI5_TR_CSF_SUPR_EVT);
+
+ tr_req[1].addr = src + tr0_cnt1 * tr0_cnt0;
+ tr_req[1].icnt0 = tr1_cnt0;
+ tr_req[1].icnt1 = 1;
+ tr_req[1].icnt2 = 1;
+ tr_req[1].icnt3 = 1;
+
+ tr_req[1].daddr = dest + tr0_cnt1 * tr0_cnt0;
+ tr_req[1].dicnt0 = tr1_cnt0;
+ tr_req[1].dicnt1 = 1;
+ tr_req[1].dicnt2 = 1;
+ tr_req[1].dicnt3 = 1;
+ }
+
+ cppi5_tr_csf_set(&tr_req[num_tr - 1].flags,
+ CPPI5_TR_CSF_SUPR_EVT | CPPI5_TR_CSF_EOP);
+
+ if (uc->config.metadata_size)
+ d->vd.tx.metadata_ops = &metadata_ops;
+
+ return vchan_tx_prep(&uc->vc, &d->vd, tx_flags);
+}
+
+static void udma_issue_pending(struct dma_chan *chan)
+{
+ struct udma_chan *uc = to_udma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&uc->vc.lock, flags);
+
+ /* If we have something pending and no active descriptor, then */
+ if (vchan_issue_pending(&uc->vc) && !uc->desc) {
+ /*
+ * start a descriptor if the channel is NOT [marked as
+ * terminating _and_ it is still running (teardown has not
+ * completed yet)].
+ */
+ if (!(uc->state == UDMA_CHAN_IS_TERMINATING &&
+ udma_is_chan_running(uc)))
+ udma_start(uc);
+ }
+
+ spin_unlock_irqrestore(&uc->vc.lock, flags);
+}
+
+static enum dma_status udma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct udma_chan *uc = to_udma_chan(chan);
+ enum dma_status ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&uc->vc.lock, flags);
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+
+ if (!udma_is_chan_running(uc))
+ ret = DMA_COMPLETE;
+
+ if (ret == DMA_IN_PROGRESS && udma_is_chan_paused(uc))
+ ret = DMA_PAUSED;
+
+ if (ret == DMA_COMPLETE || !txstate)
+ goto out;
+
+ if (uc->desc && uc->desc->vd.tx.cookie == cookie) {
+ u32 peer_bcnt = 0;
+ u32 bcnt = 0;
+ u32 residue = uc->desc->residue;
+ u32 delay = 0;
+
+ if (uc->desc->dir == DMA_MEM_TO_DEV) {
+ bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_SBCNT_REG);
+
+ if (uc->config.ep_type != PSIL_EP_NATIVE) {
+ peer_bcnt = udma_tchanrt_read(uc,
+ UDMA_CHAN_RT_PEER_BCNT_REG);
+
+ if (bcnt > peer_bcnt)
+ delay = bcnt - peer_bcnt;
+ }
+ } else if (uc->desc->dir == DMA_DEV_TO_MEM) {
+ bcnt = udma_rchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG);
+
+ if (uc->config.ep_type != PSIL_EP_NATIVE) {
+ peer_bcnt = udma_rchanrt_read(uc,
+ UDMA_CHAN_RT_PEER_BCNT_REG);
+
+ if (peer_bcnt > bcnt)
+ delay = peer_bcnt - bcnt;
+ }
+ } else {
+ bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG);
+ }
+
+ if (bcnt && !(bcnt % uc->desc->residue))
+ residue = 0;
+ else
+ residue -= bcnt % uc->desc->residue;
+
+ if (!residue && (uc->config.dir == DMA_DEV_TO_MEM || !delay)) {
+ ret = DMA_COMPLETE;
+ delay = 0;
+ }
+
+ dma_set_residue(txstate, residue);
+ dma_set_in_flight_bytes(txstate, delay);
+
+ } else {
+ ret = DMA_COMPLETE;
+ }
+
+out:
+ spin_unlock_irqrestore(&uc->vc.lock, flags);
+ return ret;
+}
+
+static int udma_pause(struct dma_chan *chan)
+{
+ struct udma_chan *uc = to_udma_chan(chan);
+
+ /* pause the channel */
+ switch (uc->config.dir) {
+ case DMA_DEV_TO_MEM:
+ udma_rchanrt_update_bits(uc, UDMA_CHAN_RT_PEER_RT_EN_REG,
+ UDMA_PEER_RT_EN_PAUSE,
+ UDMA_PEER_RT_EN_PAUSE);
+ break;
+ case DMA_MEM_TO_DEV:
+ udma_tchanrt_update_bits(uc, UDMA_CHAN_RT_PEER_RT_EN_REG,
+ UDMA_PEER_RT_EN_PAUSE,
+ UDMA_PEER_RT_EN_PAUSE);
+ break;
+ case DMA_MEM_TO_MEM:
+ udma_tchanrt_update_bits(uc, UDMA_CHAN_RT_CTL_REG,
+ UDMA_CHAN_RT_CTL_PAUSE,
+ UDMA_CHAN_RT_CTL_PAUSE);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int udma_resume(struct dma_chan *chan)
+{
+ struct udma_chan *uc = to_udma_chan(chan);
+
+ /* resume the channel */
+ switch (uc->config.dir) {
+ case DMA_DEV_TO_MEM:
+ udma_rchanrt_update_bits(uc, UDMA_CHAN_RT_PEER_RT_EN_REG,
+ UDMA_PEER_RT_EN_PAUSE, 0);
+
+ break;
+ case DMA_MEM_TO_DEV:
+ udma_tchanrt_update_bits(uc, UDMA_CHAN_RT_PEER_RT_EN_REG,
+ UDMA_PEER_RT_EN_PAUSE, 0);
+ break;
+ case DMA_MEM_TO_MEM:
+ udma_tchanrt_update_bits(uc, UDMA_CHAN_RT_CTL_REG,
+ UDMA_CHAN_RT_CTL_PAUSE, 0);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int udma_terminate_all(struct dma_chan *chan)
+{
+ struct udma_chan *uc = to_udma_chan(chan);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&uc->vc.lock, flags);
+
+ if (udma_is_chan_running(uc))
+ udma_stop(uc);
+
+ if (uc->desc) {
+ uc->terminated_desc = uc->desc;
+ uc->desc = NULL;
+ uc->terminated_desc->terminated = true;
+ cancel_delayed_work(&uc->tx_drain.work);
+ }
+
+ uc->paused = false;
+
+ vchan_get_all_descriptors(&uc->vc, &head);
+ spin_unlock_irqrestore(&uc->vc.lock, flags);
+ vchan_dma_desc_free_list(&uc->vc, &head);
+
+ return 0;
+}
+
+static void udma_synchronize(struct dma_chan *chan)
+{
+ struct udma_chan *uc = to_udma_chan(chan);
+ unsigned long timeout = msecs_to_jiffies(1000);
+
+ vchan_synchronize(&uc->vc);
+
+ if (uc->state == UDMA_CHAN_IS_TERMINATING) {
+ timeout = wait_for_completion_timeout(&uc->teardown_completed,
+ timeout);
+ if (!timeout) {
+ dev_warn(uc->ud->dev, "chan%d teardown timeout!\n",
+ uc->id);
+ udma_dump_chan_stdata(uc);
+ udma_reset_chan(uc, true);
+ }
+ }
+
+ udma_reset_chan(uc, false);
+ if (udma_is_chan_running(uc))
+ dev_warn(uc->ud->dev, "chan%d refused to stop!\n", uc->id);
+
+ cancel_delayed_work_sync(&uc->tx_drain.work);
+ udma_reset_rings(uc);
+}
+
+static void udma_desc_pre_callback(struct virt_dma_chan *vc,
+ struct virt_dma_desc *vd,
+ struct dmaengine_result *result)
+{
+ struct udma_chan *uc = to_udma_chan(&vc->chan);
+ struct udma_desc *d;
+
+ if (!vd)
+ return;
+
+ d = to_udma_desc(&vd->tx);
+
+ if (d->metadata_size)
+ udma_fetch_epib(uc, d);
+
+ /* Provide residue information for the client */
+ if (result) {
+ void *desc_vaddr = udma_curr_cppi5_desc_vaddr(d, d->desc_idx);
+
+ if (cppi5_desc_get_type(desc_vaddr) ==
+ CPPI5_INFO0_DESC_TYPE_VAL_HOST) {
+ result->residue = d->residue -
+ cppi5_hdesc_get_pktlen(desc_vaddr);
+ if (result->residue)
+ result->result = DMA_TRANS_ABORTED;
+ else
+ result->result = DMA_TRANS_NOERROR;
+ } else {
+ result->residue = 0;
+ result->result = DMA_TRANS_NOERROR;
+ }
+ }
+}
+
+/*
+ * This tasklet handles the completion of a DMA descriptor by
+ * calling its callback and freeing it.
+ */
+static void udma_vchan_complete(struct tasklet_struct *t)
+{
+ struct virt_dma_chan *vc = from_tasklet(vc, t, task);
+ struct virt_dma_desc *vd, *_vd;
+ struct dmaengine_desc_callback cb;
+ LIST_HEAD(head);
+
+ spin_lock_irq(&vc->lock);
+ list_splice_tail_init(&vc->desc_completed, &head);
+ vd = vc->cyclic;
+ if (vd) {
+ vc->cyclic = NULL;
+ dmaengine_desc_get_callback(&vd->tx, &cb);
+ } else {
+ memset(&cb, 0, sizeof(cb));
+ }
+ spin_unlock_irq(&vc->lock);
+
+ udma_desc_pre_callback(vc, vd, NULL);
+ dmaengine_desc_callback_invoke(&cb, NULL);
+
+ list_for_each_entry_safe(vd, _vd, &head, node) {
+ struct dmaengine_result result;
+
+ dmaengine_desc_get_callback(&vd->tx, &cb);
+
+ list_del(&vd->node);
+
+ udma_desc_pre_callback(vc, vd, &result);
+ dmaengine_desc_callback_invoke(&cb, &result);
+
+ vchan_vdesc_fini(vd);
+ }
+}
+
+static void udma_free_chan_resources(struct dma_chan *chan)
+{
+ struct udma_chan *uc = to_udma_chan(chan);
+ struct udma_dev *ud = to_udma_dev(chan->device);
+
+ udma_terminate_all(chan);
+ if (uc->terminated_desc) {
+ udma_reset_chan(uc, false);
+ udma_reset_rings(uc);
+ }
+
+ cancel_delayed_work_sync(&uc->tx_drain.work);
+
+ if (uc->irq_num_ring > 0) {
+ free_irq(uc->irq_num_ring, uc);
+
+ uc->irq_num_ring = 0;
+ }
+ if (uc->irq_num_udma > 0) {
+ free_irq(uc->irq_num_udma, uc);
+
+ uc->irq_num_udma = 0;
+ }
+
+ /* Release PSI-L pairing */
+ if (uc->psil_paired) {
+ navss_psil_unpair(ud, uc->config.src_thread,
+ uc->config.dst_thread);
+ uc->psil_paired = false;
+ }
+
+ vchan_free_chan_resources(&uc->vc);
+ tasklet_kill(&uc->vc.task);
+
+ bcdma_free_bchan_resources(uc);
+ udma_free_tx_resources(uc);
+ udma_free_rx_resources(uc);
+ udma_reset_uchan(uc);
+
+ if (uc->use_dma_pool) {
+ dma_pool_destroy(uc->hdesc_pool);
+ uc->use_dma_pool = false;
+ }
+}
+
+static struct platform_driver udma_driver;
+static struct platform_driver bcdma_driver;
+static struct platform_driver pktdma_driver;
+
+struct udma_filter_param {
+ int remote_thread_id;
+ u32 atype;
+ u32 asel;
+ u32 tr_trigger_type;
+};
+
+static bool udma_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+ struct udma_chan_config *ucc;
+ struct psil_endpoint_config *ep_config;
+ struct udma_filter_param *filter_param;
+ struct udma_chan *uc;
+ struct udma_dev *ud;
+
+ if (chan->device->dev->driver != &udma_driver.driver &&
+ chan->device->dev->driver != &bcdma_driver.driver &&
+ chan->device->dev->driver != &pktdma_driver.driver)
+ return false;
+
+ uc = to_udma_chan(chan);
+ ucc = &uc->config;
+ ud = uc->ud;
+ filter_param = param;
+
+ if (filter_param->atype > 2) {
+ dev_err(ud->dev, "Invalid channel atype: %u\n",
+ filter_param->atype);
+ return false;
+ }
+
+ if (filter_param->asel > 15) {
+ dev_err(ud->dev, "Invalid channel asel: %u\n",
+ filter_param->asel);
+ return false;
+ }
+
+ ucc->remote_thread_id = filter_param->remote_thread_id;
+ ucc->atype = filter_param->atype;
+ ucc->asel = filter_param->asel;
+ ucc->tr_trigger_type = filter_param->tr_trigger_type;
+
+ if (ucc->tr_trigger_type) {
+ ucc->dir = DMA_MEM_TO_MEM;
+ goto triggered_bchan;
+ } else if (ucc->remote_thread_id & K3_PSIL_DST_THREAD_ID_OFFSET) {
+ ucc->dir = DMA_MEM_TO_DEV;
+ } else {
+ ucc->dir = DMA_DEV_TO_MEM;
+ }
+
+ ep_config = psil_get_ep_config(ucc->remote_thread_id);
+ if (IS_ERR(ep_config)) {
+ dev_err(ud->dev, "No configuration for psi-l thread 0x%04x\n",
+ ucc->remote_thread_id);
+ ucc->dir = DMA_MEM_TO_MEM;
+ ucc->remote_thread_id = -1;
+ ucc->atype = 0;
+ ucc->asel = 0;
+ return false;
+ }
+
+ if (ud->match_data->type == DMA_TYPE_BCDMA &&
+ ep_config->pkt_mode) {
+ dev_err(ud->dev,
+ "Only TR mode is supported (psi-l thread 0x%04x)\n",
+ ucc->remote_thread_id);
+ ucc->dir = DMA_MEM_TO_MEM;
+ ucc->remote_thread_id = -1;
+ ucc->atype = 0;
+ ucc->asel = 0;
+ return false;
+ }
+
+ ucc->pkt_mode = ep_config->pkt_mode;
+ ucc->channel_tpl = ep_config->channel_tpl;
+ ucc->notdpkt = ep_config->notdpkt;
+ ucc->ep_type = ep_config->ep_type;
+
+ if (ud->match_data->type == DMA_TYPE_PKTDMA &&
+ ep_config->mapped_channel_id >= 0) {
+ ucc->mapped_channel_id = ep_config->mapped_channel_id;
+ ucc->default_flow_id = ep_config->default_flow_id;
+ } else {
+ ucc->mapped_channel_id = -1;
+ ucc->default_flow_id = -1;
+ }
+
+ if (ucc->ep_type != PSIL_EP_NATIVE) {
+ const struct udma_match_data *match_data = ud->match_data;
+
+ if (match_data->flags & UDMA_FLAG_PDMA_ACC32)
+ ucc->enable_acc32 = ep_config->pdma_acc32;
+ if (match_data->flags & UDMA_FLAG_PDMA_BURST)
+ ucc->enable_burst = ep_config->pdma_burst;
+ }
+
+ ucc->needs_epib = ep_config->needs_epib;
+ ucc->psd_size = ep_config->psd_size;
+ ucc->metadata_size =
+ (ucc->needs_epib ? CPPI5_INFO0_HDESC_EPIB_SIZE : 0) +
+ ucc->psd_size;
+
+ if (ucc->pkt_mode)
+ ucc->hdesc_size = ALIGN(sizeof(struct cppi5_host_desc_t) +
+ ucc->metadata_size, ud->desc_align);
+
+ dev_dbg(ud->dev, "chan%d: Remote thread: 0x%04x (%s)\n", uc->id,
+ ucc->remote_thread_id, dmaengine_get_direction_text(ucc->dir));
+
+ return true;
+
+triggered_bchan:
+ dev_dbg(ud->dev, "chan%d: triggered channel (type: %u)\n", uc->id,
+ ucc->tr_trigger_type);
+
+ return true;
+
+}
+
+static struct dma_chan *udma_of_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct udma_dev *ud = ofdma->of_dma_data;
+ dma_cap_mask_t mask = ud->ddev.cap_mask;
+ struct udma_filter_param filter_param;
+ struct dma_chan *chan;
+
+ if (ud->match_data->type == DMA_TYPE_BCDMA) {
+ if (dma_spec->args_count != 3)
+ return NULL;
+
+ filter_param.tr_trigger_type = dma_spec->args[0];
+ filter_param.remote_thread_id = dma_spec->args[1];
+ filter_param.asel = dma_spec->args[2];
+ filter_param.atype = 0;
+ } else {
+ if (dma_spec->args_count != 1 && dma_spec->args_count != 2)
+ return NULL;
+
+ filter_param.remote_thread_id = dma_spec->args[0];
+ filter_param.tr_trigger_type = 0;
+ if (dma_spec->args_count == 2) {
+ if (ud->match_data->type == DMA_TYPE_UDMA) {
+ filter_param.atype = dma_spec->args[1];
+ filter_param.asel = 0;
+ } else {
+ filter_param.atype = 0;
+ filter_param.asel = dma_spec->args[1];
+ }
+ } else {
+ filter_param.atype = 0;
+ filter_param.asel = 0;
+ }
+ }
+
+ chan = __dma_request_channel(&mask, udma_dma_filter_fn, &filter_param,
+ ofdma->of_node);
+ if (!chan) {
+ dev_err(ud->dev, "get channel fail in %s.\n", __func__);
+ return ERR_PTR(-EINVAL);
+ }
+
+ return chan;
+}
+
+static struct udma_match_data am654_main_data = {
+ .type = DMA_TYPE_UDMA,
+ .psil_base = 0x1000,
+ .enable_memcpy_support = true,
+ .statictr_z_mask = GENMASK(11, 0),
+ .burst_size = {
+ TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */
+ TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* H Channels */
+ 0, /* No UH Channels */
+ },
+};
+
+static struct udma_match_data am654_mcu_data = {
+ .type = DMA_TYPE_UDMA,
+ .psil_base = 0x6000,
+ .enable_memcpy_support = false,
+ .statictr_z_mask = GENMASK(11, 0),
+ .burst_size = {
+ TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */
+ TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* H Channels */
+ 0, /* No UH Channels */
+ },
+};
+
+static struct udma_match_data j721e_main_data = {
+ .type = DMA_TYPE_UDMA,
+ .psil_base = 0x1000,
+ .enable_memcpy_support = true,
+ .flags = UDMA_FLAGS_J7_CLASS,
+ .statictr_z_mask = GENMASK(23, 0),
+ .burst_size = {
+ TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */
+ TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_256_BYTES, /* H Channels */
+ TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_256_BYTES, /* UH Channels */
+ },
+};
+
+static struct udma_match_data j721e_mcu_data = {
+ .type = DMA_TYPE_UDMA,
+ .psil_base = 0x6000,
+ .enable_memcpy_support = false, /* MEM_TO_MEM is slow via MCU UDMA */
+ .flags = UDMA_FLAGS_J7_CLASS,
+ .statictr_z_mask = GENMASK(23, 0),
+ .burst_size = {
+ TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */
+ TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_128_BYTES, /* H Channels */
+ 0, /* No UH Channels */
+ },
+};
+
+static struct udma_match_data am64_bcdma_data = {
+ .type = DMA_TYPE_BCDMA,
+ .psil_base = 0x2000, /* for tchan and rchan, not applicable to bchan */
+ .enable_memcpy_support = true, /* Supported via bchan */
+ .flags = UDMA_FLAGS_J7_CLASS,
+ .statictr_z_mask = GENMASK(23, 0),
+ .burst_size = {
+ TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */
+ 0, /* No H Channels */
+ 0, /* No UH Channels */
+ },
+};
+
+static struct udma_match_data am64_pktdma_data = {
+ .type = DMA_TYPE_PKTDMA,
+ .psil_base = 0x1000,
+ .enable_memcpy_support = false, /* PKTDMA does not support MEM_TO_MEM */
+ .flags = UDMA_FLAGS_J7_CLASS,
+ .statictr_z_mask = GENMASK(23, 0),
+ .burst_size = {
+ TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */
+ 0, /* No H Channels */
+ 0, /* No UH Channels */
+ },
+};
+
+static const struct of_device_id udma_of_match[] = {
+ {
+ .compatible = "ti,am654-navss-main-udmap",
+ .data = &am654_main_data,
+ },
+ {
+ .compatible = "ti,am654-navss-mcu-udmap",
+ .data = &am654_mcu_data,
+ }, {
+ .compatible = "ti,j721e-navss-main-udmap",
+ .data = &j721e_main_data,
+ }, {
+ .compatible = "ti,j721e-navss-mcu-udmap",
+ .data = &j721e_mcu_data,
+ },
+ { /* Sentinel */ },
+};
+
+static const struct of_device_id bcdma_of_match[] = {
+ {
+ .compatible = "ti,am64-dmss-bcdma",
+ .data = &am64_bcdma_data,
+ },
+ { /* Sentinel */ },
+};
+
+static const struct of_device_id pktdma_of_match[] = {
+ {
+ .compatible = "ti,am64-dmss-pktdma",
+ .data = &am64_pktdma_data,
+ },
+ { /* Sentinel */ },
+};
+
+static struct udma_soc_data am654_soc_data = {
+ .oes = {
+ .udma_rchan = 0x200,
+ },
+};
+
+static struct udma_soc_data j721e_soc_data = {
+ .oes = {
+ .udma_rchan = 0x400,
+ },
+};
+
+static struct udma_soc_data j7200_soc_data = {
+ .oes = {
+ .udma_rchan = 0x80,
+ },
+};
+
+static struct udma_soc_data am64_soc_data = {
+ .oes = {
+ .bcdma_bchan_data = 0x2200,
+ .bcdma_bchan_ring = 0x2400,
+ .bcdma_tchan_data = 0x2800,
+ .bcdma_tchan_ring = 0x2a00,
+ .bcdma_rchan_data = 0x2e00,
+ .bcdma_rchan_ring = 0x3000,
+ .pktdma_tchan_flow = 0x1200,
+ .pktdma_rchan_flow = 0x1600,
+ },
+ .bcdma_trigger_event_offset = 0xc400,
+};
+
+static const struct soc_device_attribute k3_soc_devices[] = {
+ { .family = "AM65X", .data = &am654_soc_data },
+ { .family = "J721E", .data = &j721e_soc_data },
+ { .family = "J7200", .data = &j7200_soc_data },
+ { .family = "AM64X", .data = &am64_soc_data },
+ { .family = "J721S2", .data = &j721e_soc_data},
+ { .family = "AM62X", .data = &am64_soc_data },
+ { /* sentinel */ }
+};
+
+static int udma_get_mmrs(struct platform_device *pdev, struct udma_dev *ud)
+{
+ u32 cap2, cap3, cap4;
+ int i;
+
+ ud->mmrs[MMR_GCFG] = devm_platform_ioremap_resource_byname(pdev, mmr_names[MMR_GCFG]);
+ if (IS_ERR(ud->mmrs[MMR_GCFG]))
+ return PTR_ERR(ud->mmrs[MMR_GCFG]);
+
+ cap2 = udma_read(ud->mmrs[MMR_GCFG], 0x28);
+ cap3 = udma_read(ud->mmrs[MMR_GCFG], 0x2c);
+
+ switch (ud->match_data->type) {
+ case DMA_TYPE_UDMA:
+ ud->rflow_cnt = UDMA_CAP3_RFLOW_CNT(cap3);
+ ud->tchan_cnt = UDMA_CAP2_TCHAN_CNT(cap2);
+ ud->echan_cnt = UDMA_CAP2_ECHAN_CNT(cap2);
+ ud->rchan_cnt = UDMA_CAP2_RCHAN_CNT(cap2);
+ break;
+ case DMA_TYPE_BCDMA:
+ ud->bchan_cnt = BCDMA_CAP2_BCHAN_CNT(cap2);
+ ud->tchan_cnt = BCDMA_CAP2_TCHAN_CNT(cap2);
+ ud->rchan_cnt = BCDMA_CAP2_RCHAN_CNT(cap2);
+ ud->rflow_cnt = ud->rchan_cnt;
+ break;
+ case DMA_TYPE_PKTDMA:
+ cap4 = udma_read(ud->mmrs[MMR_GCFG], 0x30);
+ ud->tchan_cnt = UDMA_CAP2_TCHAN_CNT(cap2);
+ ud->rchan_cnt = UDMA_CAP2_RCHAN_CNT(cap2);
+ ud->rflow_cnt = UDMA_CAP3_RFLOW_CNT(cap3);
+ ud->tflow_cnt = PKTDMA_CAP4_TFLOW_CNT(cap4);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ for (i = 1; i < MMR_LAST; i++) {
+ if (i == MMR_BCHANRT && ud->bchan_cnt == 0)
+ continue;
+ if (i == MMR_TCHANRT && ud->tchan_cnt == 0)
+ continue;
+ if (i == MMR_RCHANRT && ud->rchan_cnt == 0)
+ continue;
+
+ ud->mmrs[i] = devm_platform_ioremap_resource_byname(pdev, mmr_names[i]);
+ if (IS_ERR(ud->mmrs[i]))
+ return PTR_ERR(ud->mmrs[i]);
+ }
+
+ return 0;
+}
+
+static void udma_mark_resource_ranges(struct udma_dev *ud, unsigned long *map,
+ struct ti_sci_resource_desc *rm_desc,
+ char *name)
+{
+ bitmap_clear(map, rm_desc->start, rm_desc->num);
+ bitmap_clear(map, rm_desc->start_sec, rm_desc->num_sec);
+ dev_dbg(ud->dev, "ti_sci resource range for %s: %d:%d | %d:%d\n", name,
+ rm_desc->start, rm_desc->num, rm_desc->start_sec,
+ rm_desc->num_sec);
+}
+
+static const char * const range_names[] = {
+ [RM_RANGE_BCHAN] = "ti,sci-rm-range-bchan",
+ [RM_RANGE_TCHAN] = "ti,sci-rm-range-tchan",
+ [RM_RANGE_RCHAN] = "ti,sci-rm-range-rchan",
+ [RM_RANGE_RFLOW] = "ti,sci-rm-range-rflow",
+ [RM_RANGE_TFLOW] = "ti,sci-rm-range-tflow",
+};
+
+static int udma_setup_resources(struct udma_dev *ud)
+{
+ int ret, i, j;
+ struct device *dev = ud->dev;
+ struct ti_sci_resource *rm_res, irq_res;
+ struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+ u32 cap3;
+
+ /* Set up the throughput level start indexes */
+ cap3 = udma_read(ud->mmrs[MMR_GCFG], 0x2c);
+ if (of_device_is_compatible(dev->of_node,
+ "ti,am654-navss-main-udmap")) {
+ ud->tchan_tpl.levels = 2;
+ ud->tchan_tpl.start_idx[0] = 8;
+ } else if (of_device_is_compatible(dev->of_node,
+ "ti,am654-navss-mcu-udmap")) {
+ ud->tchan_tpl.levels = 2;
+ ud->tchan_tpl.start_idx[0] = 2;
+ } else if (UDMA_CAP3_UCHAN_CNT(cap3)) {
+ ud->tchan_tpl.levels = 3;
+ ud->tchan_tpl.start_idx[1] = UDMA_CAP3_UCHAN_CNT(cap3);
+ ud->tchan_tpl.start_idx[0] = UDMA_CAP3_HCHAN_CNT(cap3);
+ } else if (UDMA_CAP3_HCHAN_CNT(cap3)) {
+ ud->tchan_tpl.levels = 2;
+ ud->tchan_tpl.start_idx[0] = UDMA_CAP3_HCHAN_CNT(cap3);
+ } else {
+ ud->tchan_tpl.levels = 1;
+ }
+
+ ud->rchan_tpl.levels = ud->tchan_tpl.levels;
+ ud->rchan_tpl.start_idx[0] = ud->tchan_tpl.start_idx[0];
+ ud->rchan_tpl.start_idx[1] = ud->tchan_tpl.start_idx[1];
+
+ ud->tchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tchan_cnt),
+ sizeof(unsigned long), GFP_KERNEL);
+ ud->tchans = devm_kcalloc(dev, ud->tchan_cnt, sizeof(*ud->tchans),
+ GFP_KERNEL);
+ ud->rchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->rchan_cnt),
+ sizeof(unsigned long), GFP_KERNEL);
+ ud->rchans = devm_kcalloc(dev, ud->rchan_cnt, sizeof(*ud->rchans),
+ GFP_KERNEL);
+ ud->rflow_gp_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->rflow_cnt),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+ ud->rflow_gp_map_allocated = devm_kcalloc(dev,
+ BITS_TO_LONGS(ud->rflow_cnt),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+ ud->rflow_in_use = devm_kcalloc(dev, BITS_TO_LONGS(ud->rflow_cnt),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+ ud->rflows = devm_kcalloc(dev, ud->rflow_cnt, sizeof(*ud->rflows),
+ GFP_KERNEL);
+
+ if (!ud->tchan_map || !ud->rchan_map || !ud->rflow_gp_map ||
+ !ud->rflow_gp_map_allocated || !ud->tchans || !ud->rchans ||
+ !ud->rflows || !ud->rflow_in_use)
+ return -ENOMEM;
+
+ /*
+ * RX flows with the same Ids as RX channels are reserved to be used
+ * as default flows if remote HW can't generate flow_ids. Those
+ * RX flows can be requested only explicitly by id.
+ */
+ bitmap_set(ud->rflow_gp_map_allocated, 0, ud->rchan_cnt);
+
+ /* by default no GP rflows are assigned to Linux */
+ bitmap_set(ud->rflow_gp_map, 0, ud->rflow_cnt);
+
+ /* Get resource ranges from tisci */
+ for (i = 0; i < RM_RANGE_LAST; i++) {
+ if (i == RM_RANGE_BCHAN || i == RM_RANGE_TFLOW)
+ continue;
+
+ tisci_rm->rm_ranges[i] =
+ devm_ti_sci_get_of_resource(tisci_rm->tisci, dev,
+ tisci_rm->tisci_dev_id,
+ (char *)range_names[i]);
+ }
+
+ /* tchan ranges */
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN];
+ if (IS_ERR(rm_res)) {
+ bitmap_zero(ud->tchan_map, ud->tchan_cnt);
+ irq_res.sets = 1;
+ } else {
+ bitmap_fill(ud->tchan_map, ud->tchan_cnt);
+ for (i = 0; i < rm_res->sets; i++)
+ udma_mark_resource_ranges(ud, ud->tchan_map,
+ &rm_res->desc[i], "tchan");
+ irq_res.sets = rm_res->sets;
+ }
+
+ /* rchan and matching default flow ranges */
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN];
+ if (IS_ERR(rm_res)) {
+ bitmap_zero(ud->rchan_map, ud->rchan_cnt);
+ irq_res.sets++;
+ } else {
+ bitmap_fill(ud->rchan_map, ud->rchan_cnt);
+ for (i = 0; i < rm_res->sets; i++)
+ udma_mark_resource_ranges(ud, ud->rchan_map,
+ &rm_res->desc[i], "rchan");
+ irq_res.sets += rm_res->sets;
+ }
+
+ irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL);
+ if (!irq_res.desc)
+ return -ENOMEM;
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN];
+ if (IS_ERR(rm_res)) {
+ irq_res.desc[0].start = 0;
+ irq_res.desc[0].num = ud->tchan_cnt;
+ i = 1;
+ } else {
+ for (i = 0; i < rm_res->sets; i++) {
+ irq_res.desc[i].start = rm_res->desc[i].start;
+ irq_res.desc[i].num = rm_res->desc[i].num;
+ irq_res.desc[i].start_sec = rm_res->desc[i].start_sec;
+ irq_res.desc[i].num_sec = rm_res->desc[i].num_sec;
+ }
+ }
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN];
+ if (IS_ERR(rm_res)) {
+ irq_res.desc[i].start = 0;
+ irq_res.desc[i].num = ud->rchan_cnt;
+ } else {
+ for (j = 0; j < rm_res->sets; j++, i++) {
+ if (rm_res->desc[j].num) {
+ irq_res.desc[i].start = rm_res->desc[j].start +
+ ud->soc_data->oes.udma_rchan;
+ irq_res.desc[i].num = rm_res->desc[j].num;
+ }
+ if (rm_res->desc[j].num_sec) {
+ irq_res.desc[i].start_sec = rm_res->desc[j].start_sec +
+ ud->soc_data->oes.udma_rchan;
+ irq_res.desc[i].num_sec = rm_res->desc[j].num_sec;
+ }
+ }
+ }
+ ret = ti_sci_inta_msi_domain_alloc_irqs(ud->dev, &irq_res);
+ kfree(irq_res.desc);
+ if (ret) {
+ dev_err(ud->dev, "Failed to allocate MSI interrupts\n");
+ return ret;
+ }
+
+ /* GP rflow ranges */
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_RFLOW];
+ if (IS_ERR(rm_res)) {
+ /* all gp flows are assigned exclusively to Linux */
+ bitmap_clear(ud->rflow_gp_map, ud->rchan_cnt,
+ ud->rflow_cnt - ud->rchan_cnt);
+ } else {
+ for (i = 0; i < rm_res->sets; i++)
+ udma_mark_resource_ranges(ud, ud->rflow_gp_map,
+ &rm_res->desc[i], "gp-rflow");
+ }
+
+ return 0;
+}
+
+static int bcdma_setup_resources(struct udma_dev *ud)
+{
+ int ret, i, j;
+ struct device *dev = ud->dev;
+ struct ti_sci_resource *rm_res, irq_res;
+ struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+ const struct udma_oes_offsets *oes = &ud->soc_data->oes;
+ u32 cap;
+
+ /* Set up the throughput level start indexes */
+ cap = udma_read(ud->mmrs[MMR_GCFG], 0x2c);
+ if (BCDMA_CAP3_UBCHAN_CNT(cap)) {
+ ud->bchan_tpl.levels = 3;
+ ud->bchan_tpl.start_idx[1] = BCDMA_CAP3_UBCHAN_CNT(cap);
+ ud->bchan_tpl.start_idx[0] = BCDMA_CAP3_HBCHAN_CNT(cap);
+ } else if (BCDMA_CAP3_HBCHAN_CNT(cap)) {
+ ud->bchan_tpl.levels = 2;
+ ud->bchan_tpl.start_idx[0] = BCDMA_CAP3_HBCHAN_CNT(cap);
+ } else {
+ ud->bchan_tpl.levels = 1;
+ }
+
+ cap = udma_read(ud->mmrs[MMR_GCFG], 0x30);
+ if (BCDMA_CAP4_URCHAN_CNT(cap)) {
+ ud->rchan_tpl.levels = 3;
+ ud->rchan_tpl.start_idx[1] = BCDMA_CAP4_URCHAN_CNT(cap);
+ ud->rchan_tpl.start_idx[0] = BCDMA_CAP4_HRCHAN_CNT(cap);
+ } else if (BCDMA_CAP4_HRCHAN_CNT(cap)) {
+ ud->rchan_tpl.levels = 2;
+ ud->rchan_tpl.start_idx[0] = BCDMA_CAP4_HRCHAN_CNT(cap);
+ } else {
+ ud->rchan_tpl.levels = 1;
+ }
+
+ if (BCDMA_CAP4_UTCHAN_CNT(cap)) {
+ ud->tchan_tpl.levels = 3;
+ ud->tchan_tpl.start_idx[1] = BCDMA_CAP4_UTCHAN_CNT(cap);
+ ud->tchan_tpl.start_idx[0] = BCDMA_CAP4_HTCHAN_CNT(cap);
+ } else if (BCDMA_CAP4_HTCHAN_CNT(cap)) {
+ ud->tchan_tpl.levels = 2;
+ ud->tchan_tpl.start_idx[0] = BCDMA_CAP4_HTCHAN_CNT(cap);
+ } else {
+ ud->tchan_tpl.levels = 1;
+ }
+
+ ud->bchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->bchan_cnt),
+ sizeof(unsigned long), GFP_KERNEL);
+ ud->bchans = devm_kcalloc(dev, ud->bchan_cnt, sizeof(*ud->bchans),
+ GFP_KERNEL);
+ ud->tchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tchan_cnt),
+ sizeof(unsigned long), GFP_KERNEL);
+ ud->tchans = devm_kcalloc(dev, ud->tchan_cnt, sizeof(*ud->tchans),
+ GFP_KERNEL);
+ ud->rchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->rchan_cnt),
+ sizeof(unsigned long), GFP_KERNEL);
+ ud->rchans = devm_kcalloc(dev, ud->rchan_cnt, sizeof(*ud->rchans),
+ GFP_KERNEL);
+ /* BCDMA do not really have flows, but the driver expect it */
+ ud->rflow_in_use = devm_kcalloc(dev, BITS_TO_LONGS(ud->rchan_cnt),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+ ud->rflows = devm_kcalloc(dev, ud->rchan_cnt, sizeof(*ud->rflows),
+ GFP_KERNEL);
+
+ if (!ud->bchan_map || !ud->tchan_map || !ud->rchan_map ||
+ !ud->rflow_in_use || !ud->bchans || !ud->tchans || !ud->rchans ||
+ !ud->rflows)
+ return -ENOMEM;
+
+ /* Get resource ranges from tisci */
+ for (i = 0; i < RM_RANGE_LAST; i++) {
+ if (i == RM_RANGE_RFLOW || i == RM_RANGE_TFLOW)
+ continue;
+ if (i == RM_RANGE_BCHAN && ud->bchan_cnt == 0)
+ continue;
+ if (i == RM_RANGE_TCHAN && ud->tchan_cnt == 0)
+ continue;
+ if (i == RM_RANGE_RCHAN && ud->rchan_cnt == 0)
+ continue;
+
+ tisci_rm->rm_ranges[i] =
+ devm_ti_sci_get_of_resource(tisci_rm->tisci, dev,
+ tisci_rm->tisci_dev_id,
+ (char *)range_names[i]);
+ }
+
+ irq_res.sets = 0;
+
+ /* bchan ranges */
+ if (ud->bchan_cnt) {
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_BCHAN];
+ if (IS_ERR(rm_res)) {
+ bitmap_zero(ud->bchan_map, ud->bchan_cnt);
+ irq_res.sets++;
+ } else {
+ bitmap_fill(ud->bchan_map, ud->bchan_cnt);
+ for (i = 0; i < rm_res->sets; i++)
+ udma_mark_resource_ranges(ud, ud->bchan_map,
+ &rm_res->desc[i],
+ "bchan");
+ irq_res.sets += rm_res->sets;
+ }
+ }
+
+ /* tchan ranges */
+ if (ud->tchan_cnt) {
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN];
+ if (IS_ERR(rm_res)) {
+ bitmap_zero(ud->tchan_map, ud->tchan_cnt);
+ irq_res.sets += 2;
+ } else {
+ bitmap_fill(ud->tchan_map, ud->tchan_cnt);
+ for (i = 0; i < rm_res->sets; i++)
+ udma_mark_resource_ranges(ud, ud->tchan_map,
+ &rm_res->desc[i],
+ "tchan");
+ irq_res.sets += rm_res->sets * 2;
+ }
+ }
+
+ /* rchan ranges */
+ if (ud->rchan_cnt) {
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN];
+ if (IS_ERR(rm_res)) {
+ bitmap_zero(ud->rchan_map, ud->rchan_cnt);
+ irq_res.sets += 2;
+ } else {
+ bitmap_fill(ud->rchan_map, ud->rchan_cnt);
+ for (i = 0; i < rm_res->sets; i++)
+ udma_mark_resource_ranges(ud, ud->rchan_map,
+ &rm_res->desc[i],
+ "rchan");
+ irq_res.sets += rm_res->sets * 2;
+ }
+ }
+
+ irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL);
+ if (!irq_res.desc)
+ return -ENOMEM;
+ if (ud->bchan_cnt) {
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_BCHAN];
+ if (IS_ERR(rm_res)) {
+ irq_res.desc[0].start = oes->bcdma_bchan_ring;
+ irq_res.desc[0].num = ud->bchan_cnt;
+ i = 1;
+ } else {
+ for (i = 0; i < rm_res->sets; i++) {
+ irq_res.desc[i].start = rm_res->desc[i].start +
+ oes->bcdma_bchan_ring;
+ irq_res.desc[i].num = rm_res->desc[i].num;
+ }
+ }
+ }
+ if (ud->tchan_cnt) {
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN];
+ if (IS_ERR(rm_res)) {
+ irq_res.desc[i].start = oes->bcdma_tchan_data;
+ irq_res.desc[i].num = ud->tchan_cnt;
+ irq_res.desc[i + 1].start = oes->bcdma_tchan_ring;
+ irq_res.desc[i + 1].num = ud->tchan_cnt;
+ i += 2;
+ } else {
+ for (j = 0; j < rm_res->sets; j++, i += 2) {
+ irq_res.desc[i].start = rm_res->desc[j].start +
+ oes->bcdma_tchan_data;
+ irq_res.desc[i].num = rm_res->desc[j].num;
+
+ irq_res.desc[i + 1].start = rm_res->desc[j].start +
+ oes->bcdma_tchan_ring;
+ irq_res.desc[i + 1].num = rm_res->desc[j].num;
+ }
+ }
+ }
+ if (ud->rchan_cnt) {
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN];
+ if (IS_ERR(rm_res)) {
+ irq_res.desc[i].start = oes->bcdma_rchan_data;
+ irq_res.desc[i].num = ud->rchan_cnt;
+ irq_res.desc[i + 1].start = oes->bcdma_rchan_ring;
+ irq_res.desc[i + 1].num = ud->rchan_cnt;
+ i += 2;
+ } else {
+ for (j = 0; j < rm_res->sets; j++, i += 2) {
+ irq_res.desc[i].start = rm_res->desc[j].start +
+ oes->bcdma_rchan_data;
+ irq_res.desc[i].num = rm_res->desc[j].num;
+
+ irq_res.desc[i + 1].start = rm_res->desc[j].start +
+ oes->bcdma_rchan_ring;
+ irq_res.desc[i + 1].num = rm_res->desc[j].num;
+ }
+ }
+ }
+
+ ret = ti_sci_inta_msi_domain_alloc_irqs(ud->dev, &irq_res);
+ kfree(irq_res.desc);
+ if (ret) {
+ dev_err(ud->dev, "Failed to allocate MSI interrupts\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int pktdma_setup_resources(struct udma_dev *ud)
+{
+ int ret, i, j;
+ struct device *dev = ud->dev;
+ struct ti_sci_resource *rm_res, irq_res;
+ struct udma_tisci_rm *tisci_rm = &ud->tisci_rm;
+ const struct udma_oes_offsets *oes = &ud->soc_data->oes;
+ u32 cap3;
+
+ /* Set up the throughput level start indexes */
+ cap3 = udma_read(ud->mmrs[MMR_GCFG], 0x2c);
+ if (UDMA_CAP3_UCHAN_CNT(cap3)) {
+ ud->tchan_tpl.levels = 3;
+ ud->tchan_tpl.start_idx[1] = UDMA_CAP3_UCHAN_CNT(cap3);
+ ud->tchan_tpl.start_idx[0] = UDMA_CAP3_HCHAN_CNT(cap3);
+ } else if (UDMA_CAP3_HCHAN_CNT(cap3)) {
+ ud->tchan_tpl.levels = 2;
+ ud->tchan_tpl.start_idx[0] = UDMA_CAP3_HCHAN_CNT(cap3);
+ } else {
+ ud->tchan_tpl.levels = 1;
+ }
+
+ ud->rchan_tpl.levels = ud->tchan_tpl.levels;
+ ud->rchan_tpl.start_idx[0] = ud->tchan_tpl.start_idx[0];
+ ud->rchan_tpl.start_idx[1] = ud->tchan_tpl.start_idx[1];
+
+ ud->tchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tchan_cnt),
+ sizeof(unsigned long), GFP_KERNEL);
+ ud->tchans = devm_kcalloc(dev, ud->tchan_cnt, sizeof(*ud->tchans),
+ GFP_KERNEL);
+ ud->rchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->rchan_cnt),
+ sizeof(unsigned long), GFP_KERNEL);
+ ud->rchans = devm_kcalloc(dev, ud->rchan_cnt, sizeof(*ud->rchans),
+ GFP_KERNEL);
+ ud->rflow_in_use = devm_kcalloc(dev, BITS_TO_LONGS(ud->rflow_cnt),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+ ud->rflows = devm_kcalloc(dev, ud->rflow_cnt, sizeof(*ud->rflows),
+ GFP_KERNEL);
+ ud->tflow_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tflow_cnt),
+ sizeof(unsigned long), GFP_KERNEL);
+
+ if (!ud->tchan_map || !ud->rchan_map || !ud->tflow_map || !ud->tchans ||
+ !ud->rchans || !ud->rflows || !ud->rflow_in_use)
+ return -ENOMEM;
+
+ /* Get resource ranges from tisci */
+ for (i = 0; i < RM_RANGE_LAST; i++) {
+ if (i == RM_RANGE_BCHAN)
+ continue;
+
+ tisci_rm->rm_ranges[i] =
+ devm_ti_sci_get_of_resource(tisci_rm->tisci, dev,
+ tisci_rm->tisci_dev_id,
+ (char *)range_names[i]);
+ }
+
+ /* tchan ranges */
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN];
+ if (IS_ERR(rm_res)) {
+ bitmap_zero(ud->tchan_map, ud->tchan_cnt);
+ } else {
+ bitmap_fill(ud->tchan_map, ud->tchan_cnt);
+ for (i = 0; i < rm_res->sets; i++)
+ udma_mark_resource_ranges(ud, ud->tchan_map,
+ &rm_res->desc[i], "tchan");
+ }
+
+ /* rchan ranges */
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN];
+ if (IS_ERR(rm_res)) {
+ bitmap_zero(ud->rchan_map, ud->rchan_cnt);
+ } else {
+ bitmap_fill(ud->rchan_map, ud->rchan_cnt);
+ for (i = 0; i < rm_res->sets; i++)
+ udma_mark_resource_ranges(ud, ud->rchan_map,
+ &rm_res->desc[i], "rchan");
+ }
+
+ /* rflow ranges */
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_RFLOW];
+ if (IS_ERR(rm_res)) {
+ /* all rflows are assigned exclusively to Linux */
+ bitmap_zero(ud->rflow_in_use, ud->rflow_cnt);
+ irq_res.sets = 1;
+ } else {
+ bitmap_fill(ud->rflow_in_use, ud->rflow_cnt);
+ for (i = 0; i < rm_res->sets; i++)
+ udma_mark_resource_ranges(ud, ud->rflow_in_use,
+ &rm_res->desc[i], "rflow");
+ irq_res.sets = rm_res->sets;
+ }
+
+ /* tflow ranges */
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_TFLOW];
+ if (IS_ERR(rm_res)) {
+ /* all tflows are assigned exclusively to Linux */
+ bitmap_zero(ud->tflow_map, ud->tflow_cnt);
+ irq_res.sets++;
+ } else {
+ bitmap_fill(ud->tflow_map, ud->tflow_cnt);
+ for (i = 0; i < rm_res->sets; i++)
+ udma_mark_resource_ranges(ud, ud->tflow_map,
+ &rm_res->desc[i], "tflow");
+ irq_res.sets += rm_res->sets;
+ }
+
+ irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL);
+ if (!irq_res.desc)
+ return -ENOMEM;
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_TFLOW];
+ if (IS_ERR(rm_res)) {
+ irq_res.desc[0].start = oes->pktdma_tchan_flow;
+ irq_res.desc[0].num = ud->tflow_cnt;
+ i = 1;
+ } else {
+ for (i = 0; i < rm_res->sets; i++) {
+ irq_res.desc[i].start = rm_res->desc[i].start +
+ oes->pktdma_tchan_flow;
+ irq_res.desc[i].num = rm_res->desc[i].num;
+ }
+ }
+ rm_res = tisci_rm->rm_ranges[RM_RANGE_RFLOW];
+ if (IS_ERR(rm_res)) {
+ irq_res.desc[i].start = oes->pktdma_rchan_flow;
+ irq_res.desc[i].num = ud->rflow_cnt;
+ } else {
+ for (j = 0; j < rm_res->sets; j++, i++) {
+ irq_res.desc[i].start = rm_res->desc[j].start +
+ oes->pktdma_rchan_flow;
+ irq_res.desc[i].num = rm_res->desc[j].num;
+ }
+ }
+ ret = ti_sci_inta_msi_domain_alloc_irqs(ud->dev, &irq_res);
+ kfree(irq_res.desc);
+ if (ret) {
+ dev_err(ud->dev, "Failed to allocate MSI interrupts\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int setup_resources(struct udma_dev *ud)
+{
+ struct device *dev = ud->dev;
+ int ch_count, ret;
+
+ switch (ud->match_data->type) {
+ case DMA_TYPE_UDMA:
+ ret = udma_setup_resources(ud);
+ break;
+ case DMA_TYPE_BCDMA:
+ ret = bcdma_setup_resources(ud);
+ break;
+ case DMA_TYPE_PKTDMA:
+ ret = pktdma_setup_resources(ud);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (ret)
+ return ret;
+
+ ch_count = ud->bchan_cnt + ud->tchan_cnt + ud->rchan_cnt;
+ if (ud->bchan_cnt)
+ ch_count -= bitmap_weight(ud->bchan_map, ud->bchan_cnt);
+ ch_count -= bitmap_weight(ud->tchan_map, ud->tchan_cnt);
+ ch_count -= bitmap_weight(ud->rchan_map, ud->rchan_cnt);
+ if (!ch_count)
+ return -ENODEV;
+
+ ud->channels = devm_kcalloc(dev, ch_count, sizeof(*ud->channels),
+ GFP_KERNEL);
+ if (!ud->channels)
+ return -ENOMEM;
+
+ switch (ud->match_data->type) {
+ case DMA_TYPE_UDMA:
+ dev_info(dev,
+ "Channels: %d (tchan: %u, rchan: %u, gp-rflow: %u)\n",
+ ch_count,
+ ud->tchan_cnt - bitmap_weight(ud->tchan_map,
+ ud->tchan_cnt),
+ ud->rchan_cnt - bitmap_weight(ud->rchan_map,
+ ud->rchan_cnt),
+ ud->rflow_cnt - bitmap_weight(ud->rflow_gp_map,
+ ud->rflow_cnt));
+ break;
+ case DMA_TYPE_BCDMA:
+ dev_info(dev,
+ "Channels: %d (bchan: %u, tchan: %u, rchan: %u)\n",
+ ch_count,
+ ud->bchan_cnt - bitmap_weight(ud->bchan_map,
+ ud->bchan_cnt),
+ ud->tchan_cnt - bitmap_weight(ud->tchan_map,
+ ud->tchan_cnt),
+ ud->rchan_cnt - bitmap_weight(ud->rchan_map,
+ ud->rchan_cnt));
+ break;
+ case DMA_TYPE_PKTDMA:
+ dev_info(dev,
+ "Channels: %d (tchan: %u, rchan: %u)\n",
+ ch_count,
+ ud->tchan_cnt - bitmap_weight(ud->tchan_map,
+ ud->tchan_cnt),
+ ud->rchan_cnt - bitmap_weight(ud->rchan_map,
+ ud->rchan_cnt));
+ break;
+ default:
+ break;
+ }
+
+ return ch_count;
+}
+
+static int udma_setup_rx_flush(struct udma_dev *ud)
+{
+ struct udma_rx_flush *rx_flush = &ud->rx_flush;
+ struct cppi5_desc_hdr_t *tr_desc;
+ struct cppi5_tr_type1_t *tr_req;
+ struct cppi5_host_desc_t *desc;
+ struct device *dev = ud->dev;
+ struct udma_hwdesc *hwdesc;
+ size_t tr_size;
+
+ /* Allocate 1K buffer for discarded data on RX channel teardown */
+ rx_flush->buffer_size = SZ_1K;
+ rx_flush->buffer_vaddr = devm_kzalloc(dev, rx_flush->buffer_size,
+ GFP_KERNEL);
+ if (!rx_flush->buffer_vaddr)
+ return -ENOMEM;
+
+ rx_flush->buffer_paddr = dma_map_single(dev, rx_flush->buffer_vaddr,
+ rx_flush->buffer_size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, rx_flush->buffer_paddr))
+ return -ENOMEM;
+
+ /* Set up descriptor to be used for TR mode */
+ hwdesc = &rx_flush->hwdescs[0];
+ tr_size = sizeof(struct cppi5_tr_type1_t);
+ hwdesc->cppi5_desc_size = cppi5_trdesc_calc_size(tr_size, 1);
+ hwdesc->cppi5_desc_size = ALIGN(hwdesc->cppi5_desc_size,
+ ud->desc_align);
+
+ hwdesc->cppi5_desc_vaddr = devm_kzalloc(dev, hwdesc->cppi5_desc_size,
+ GFP_KERNEL);
+ if (!hwdesc->cppi5_desc_vaddr)
+ return -ENOMEM;
+
+ hwdesc->cppi5_desc_paddr = dma_map_single(dev, hwdesc->cppi5_desc_vaddr,
+ hwdesc->cppi5_desc_size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, hwdesc->cppi5_desc_paddr))
+ return -ENOMEM;
+
+ /* Start of the TR req records */
+ hwdesc->tr_req_base = hwdesc->cppi5_desc_vaddr + tr_size;
+ /* Start address of the TR response array */
+ hwdesc->tr_resp_base = hwdesc->tr_req_base + tr_size;
+
+ tr_desc = hwdesc->cppi5_desc_vaddr;
+ cppi5_trdesc_init(tr_desc, 1, tr_size, 0, 0);
+ cppi5_desc_set_pktids(tr_desc, 0, CPPI5_INFO1_DESC_FLOWID_DEFAULT);
+ cppi5_desc_set_retpolicy(tr_desc, 0, 0);
+
+ tr_req = hwdesc->tr_req_base;
+ cppi5_tr_init(&tr_req->flags, CPPI5_TR_TYPE1, false, false,
+ CPPI5_TR_EVENT_SIZE_COMPLETION, 0);
+ cppi5_tr_csf_set(&tr_req->flags, CPPI5_TR_CSF_SUPR_EVT);
+
+ tr_req->addr = rx_flush->buffer_paddr;
+ tr_req->icnt0 = rx_flush->buffer_size;
+ tr_req->icnt1 = 1;
+
+ dma_sync_single_for_device(dev, hwdesc->cppi5_desc_paddr,
+ hwdesc->cppi5_desc_size, DMA_TO_DEVICE);
+
+ /* Set up descriptor to be used for packet mode */
+ hwdesc = &rx_flush->hwdescs[1];
+ hwdesc->cppi5_desc_size = ALIGN(sizeof(struct cppi5_host_desc_t) +
+ CPPI5_INFO0_HDESC_EPIB_SIZE +
+ CPPI5_INFO0_HDESC_PSDATA_MAX_SIZE,
+ ud->desc_align);
+
+ hwdesc->cppi5_desc_vaddr = devm_kzalloc(dev, hwdesc->cppi5_desc_size,
+ GFP_KERNEL);
+ if (!hwdesc->cppi5_desc_vaddr)
+ return -ENOMEM;
+
+ hwdesc->cppi5_desc_paddr = dma_map_single(dev, hwdesc->cppi5_desc_vaddr,
+ hwdesc->cppi5_desc_size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, hwdesc->cppi5_desc_paddr))
+ return -ENOMEM;
+
+ desc = hwdesc->cppi5_desc_vaddr;
+ cppi5_hdesc_init(desc, 0, 0);
+ cppi5_desc_set_pktids(&desc->hdr, 0, CPPI5_INFO1_DESC_FLOWID_DEFAULT);
+ cppi5_desc_set_retpolicy(&desc->hdr, 0, 0);
+
+ cppi5_hdesc_attach_buf(desc,
+ rx_flush->buffer_paddr, rx_flush->buffer_size,
+ rx_flush->buffer_paddr, rx_flush->buffer_size);
+
+ dma_sync_single_for_device(dev, hwdesc->cppi5_desc_paddr,
+ hwdesc->cppi5_desc_size, DMA_TO_DEVICE);
+ return 0;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static void udma_dbg_summary_show_chan(struct seq_file *s,
+ struct dma_chan *chan)
+{
+ struct udma_chan *uc = to_udma_chan(chan);
+ struct udma_chan_config *ucc = &uc->config;
+
+ seq_printf(s, " %-13s| %s", dma_chan_name(chan),
+ chan->dbg_client_name ?: "in-use");
+ if (ucc->tr_trigger_type)
+ seq_puts(s, " (triggered, ");
+ else
+ seq_printf(s, " (%s, ",
+ dmaengine_get_direction_text(uc->config.dir));
+
+ switch (uc->config.dir) {
+ case DMA_MEM_TO_MEM:
+ if (uc->ud->match_data->type == DMA_TYPE_BCDMA) {
+ seq_printf(s, "bchan%d)\n", uc->bchan->id);
+ return;
+ }
+
+ seq_printf(s, "chan%d pair [0x%04x -> 0x%04x], ", uc->tchan->id,
+ ucc->src_thread, ucc->dst_thread);
+ break;
+ case DMA_DEV_TO_MEM:
+ seq_printf(s, "rchan%d [0x%04x -> 0x%04x], ", uc->rchan->id,
+ ucc->src_thread, ucc->dst_thread);
+ if (uc->ud->match_data->type == DMA_TYPE_PKTDMA)
+ seq_printf(s, "rflow%d, ", uc->rflow->id);
+ break;
+ case DMA_MEM_TO_DEV:
+ seq_printf(s, "tchan%d [0x%04x -> 0x%04x], ", uc->tchan->id,
+ ucc->src_thread, ucc->dst_thread);
+ if (uc->ud->match_data->type == DMA_TYPE_PKTDMA)
+ seq_printf(s, "tflow%d, ", uc->tchan->tflow_id);
+ break;
+ default:
+ seq_printf(s, ")\n");
+ return;
+ }
+
+ if (ucc->ep_type == PSIL_EP_NATIVE) {
+ seq_printf(s, "PSI-L Native");
+ if (ucc->metadata_size) {
+ seq_printf(s, "[%s", ucc->needs_epib ? " EPIB" : "");
+ if (ucc->psd_size)
+ seq_printf(s, " PSDsize:%u", ucc->psd_size);
+ seq_printf(s, " ]");
+ }
+ } else {
+ seq_printf(s, "PDMA");
+ if (ucc->enable_acc32 || ucc->enable_burst)
+ seq_printf(s, "[%s%s ]",
+ ucc->enable_acc32 ? " ACC32" : "",
+ ucc->enable_burst ? " BURST" : "");
+ }
+
+ seq_printf(s, ", %s)\n", ucc->pkt_mode ? "Packet mode" : "TR mode");
+}
+
+static void udma_dbg_summary_show(struct seq_file *s,
+ struct dma_device *dma_dev)
+{
+ struct dma_chan *chan;
+
+ list_for_each_entry(chan, &dma_dev->channels, device_node) {
+ if (chan->client_count)
+ udma_dbg_summary_show_chan(s, chan);
+ }
+}
+#endif /* CONFIG_DEBUG_FS */
+
+static enum dmaengine_alignment udma_get_copy_align(struct udma_dev *ud)
+{
+ const struct udma_match_data *match_data = ud->match_data;
+ u8 tpl;
+
+ if (!match_data->enable_memcpy_support)
+ return DMAENGINE_ALIGN_8_BYTES;
+
+ /* Get the highest TPL level the device supports for memcpy */
+ if (ud->bchan_cnt)
+ tpl = udma_get_chan_tpl_index(&ud->bchan_tpl, 0);
+ else if (ud->tchan_cnt)
+ tpl = udma_get_chan_tpl_index(&ud->tchan_tpl, 0);
+ else
+ return DMAENGINE_ALIGN_8_BYTES;
+
+ switch (match_data->burst_size[tpl]) {
+ case TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_256_BYTES:
+ return DMAENGINE_ALIGN_256_BYTES;
+ case TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_128_BYTES:
+ return DMAENGINE_ALIGN_128_BYTES;
+ case TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES:
+ fallthrough;
+ default:
+ return DMAENGINE_ALIGN_64_BYTES;
+ }
+}
+
+#define TI_UDMAC_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+static int udma_probe(struct platform_device *pdev)
+{
+ struct device_node *navss_node = pdev->dev.parent->of_node;
+ const struct soc_device_attribute *soc;
+ struct device *dev = &pdev->dev;
+ struct udma_dev *ud;
+ const struct of_device_id *match;
+ int i, ret;
+ int ch_count;
+
+ ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(48));
+ if (ret)
+ dev_err(dev, "failed to set dma mask stuff\n");
+
+ ud = devm_kzalloc(dev, sizeof(*ud), GFP_KERNEL);
+ if (!ud)
+ return -ENOMEM;
+
+ match = of_match_node(udma_of_match, dev->of_node);
+ if (!match)
+ match = of_match_node(bcdma_of_match, dev->of_node);
+ if (!match) {
+ match = of_match_node(pktdma_of_match, dev->of_node);
+ if (!match) {
+ dev_err(dev, "No compatible match found\n");
+ return -ENODEV;
+ }
+ }
+ ud->match_data = match->data;
+
+ soc = soc_device_match(k3_soc_devices);
+ if (!soc) {
+ dev_err(dev, "No compatible SoC found\n");
+ return -ENODEV;
+ }
+ ud->soc_data = soc->data;
+
+ ret = udma_get_mmrs(pdev, ud);
+ if (ret)
+ return ret;
+
+ ud->tisci_rm.tisci = ti_sci_get_by_phandle(dev->of_node, "ti,sci");
+ if (IS_ERR(ud->tisci_rm.tisci))
+ return PTR_ERR(ud->tisci_rm.tisci);
+
+ ret = of_property_read_u32(dev->of_node, "ti,sci-dev-id",
+ &ud->tisci_rm.tisci_dev_id);
+ if (ret) {
+ dev_err(dev, "ti,sci-dev-id read failure %d\n", ret);
+ return ret;
+ }
+ pdev->id = ud->tisci_rm.tisci_dev_id;
+
+ ret = of_property_read_u32(navss_node, "ti,sci-dev-id",
+ &ud->tisci_rm.tisci_navss_dev_id);
+ if (ret) {
+ dev_err(dev, "NAVSS ti,sci-dev-id read failure %d\n", ret);
+ return ret;
+ }
+
+ if (ud->match_data->type == DMA_TYPE_UDMA) {
+ ret = of_property_read_u32(dev->of_node, "ti,udma-atype",
+ &ud->atype);
+ if (!ret && ud->atype > 2) {
+ dev_err(dev, "Invalid atype: %u\n", ud->atype);
+ return -EINVAL;
+ }
+ } else {
+ ret = of_property_read_u32(dev->of_node, "ti,asel",
+ &ud->asel);
+ if (!ret && ud->asel > 15) {
+ dev_err(dev, "Invalid asel: %u\n", ud->asel);
+ return -EINVAL;
+ }
+ }
+
+ ud->tisci_rm.tisci_udmap_ops = &ud->tisci_rm.tisci->ops.rm_udmap_ops;
+ ud->tisci_rm.tisci_psil_ops = &ud->tisci_rm.tisci->ops.rm_psil_ops;
+
+ if (ud->match_data->type == DMA_TYPE_UDMA) {
+ ud->ringacc = of_k3_ringacc_get_by_phandle(dev->of_node, "ti,ringacc");
+ } else {
+ struct k3_ringacc_init_data ring_init_data;
+
+ ring_init_data.tisci = ud->tisci_rm.tisci;
+ ring_init_data.tisci_dev_id = ud->tisci_rm.tisci_dev_id;
+ if (ud->match_data->type == DMA_TYPE_BCDMA) {
+ ring_init_data.num_rings = ud->bchan_cnt +
+ ud->tchan_cnt +
+ ud->rchan_cnt;
+ } else {
+ ring_init_data.num_rings = ud->rflow_cnt +
+ ud->tflow_cnt;
+ }
+
+ ud->ringacc = k3_ringacc_dmarings_init(pdev, &ring_init_data);
+ }
+
+ if (IS_ERR(ud->ringacc))
+ return PTR_ERR(ud->ringacc);
+
+ dev->msi.domain = of_msi_get_domain(dev, dev->of_node,
+ DOMAIN_BUS_TI_SCI_INTA_MSI);
+ if (!dev->msi.domain) {
+ dev_err(dev, "Failed to get MSI domain\n");
+ return -EPROBE_DEFER;
+ }
+
+ dma_cap_set(DMA_SLAVE, ud->ddev.cap_mask);
+ /* cyclic operation is not supported via PKTDMA */
+ if (ud->match_data->type != DMA_TYPE_PKTDMA) {
+ dma_cap_set(DMA_CYCLIC, ud->ddev.cap_mask);
+ ud->ddev.device_prep_dma_cyclic = udma_prep_dma_cyclic;
+ }
+
+ ud->ddev.device_config = udma_slave_config;
+ ud->ddev.device_prep_slave_sg = udma_prep_slave_sg;
+ ud->ddev.device_issue_pending = udma_issue_pending;
+ ud->ddev.device_tx_status = udma_tx_status;
+ ud->ddev.device_pause = udma_pause;
+ ud->ddev.device_resume = udma_resume;
+ ud->ddev.device_terminate_all = udma_terminate_all;
+ ud->ddev.device_synchronize = udma_synchronize;
+#ifdef CONFIG_DEBUG_FS
+ ud->ddev.dbg_summary_show = udma_dbg_summary_show;
+#endif
+
+ switch (ud->match_data->type) {
+ case DMA_TYPE_UDMA:
+ ud->ddev.device_alloc_chan_resources =
+ udma_alloc_chan_resources;
+ break;
+ case DMA_TYPE_BCDMA:
+ ud->ddev.device_alloc_chan_resources =
+ bcdma_alloc_chan_resources;
+ ud->ddev.device_router_config = bcdma_router_config;
+ break;
+ case DMA_TYPE_PKTDMA:
+ ud->ddev.device_alloc_chan_resources =
+ pktdma_alloc_chan_resources;
+ break;
+ default:
+ return -EINVAL;
+ }
+ ud->ddev.device_free_chan_resources = udma_free_chan_resources;
+
+ ud->ddev.src_addr_widths = TI_UDMAC_BUSWIDTHS;
+ ud->ddev.dst_addr_widths = TI_UDMAC_BUSWIDTHS;
+ ud->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ ud->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ ud->ddev.desc_metadata_modes = DESC_METADATA_CLIENT |
+ DESC_METADATA_ENGINE;
+ if (ud->match_data->enable_memcpy_support &&
+ !(ud->match_data->type == DMA_TYPE_BCDMA && ud->bchan_cnt == 0)) {
+ dma_cap_set(DMA_MEMCPY, ud->ddev.cap_mask);
+ ud->ddev.device_prep_dma_memcpy = udma_prep_dma_memcpy;
+ ud->ddev.directions |= BIT(DMA_MEM_TO_MEM);
+ }
+
+ ud->ddev.dev = dev;
+ ud->dev = dev;
+ ud->psil_base = ud->match_data->psil_base;
+
+ INIT_LIST_HEAD(&ud->ddev.channels);
+ INIT_LIST_HEAD(&ud->desc_to_purge);
+
+ ch_count = setup_resources(ud);
+ if (ch_count <= 0)
+ return ch_count;
+
+ spin_lock_init(&ud->lock);
+ INIT_WORK(&ud->purge_work, udma_purge_desc_work);
+
+ ud->desc_align = 64;
+ if (ud->desc_align < dma_get_cache_alignment())
+ ud->desc_align = dma_get_cache_alignment();
+
+ ret = udma_setup_rx_flush(ud);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < ud->bchan_cnt; i++) {
+ struct udma_bchan *bchan = &ud->bchans[i];
+
+ bchan->id = i;
+ bchan->reg_rt = ud->mmrs[MMR_BCHANRT] + i * 0x1000;
+ }
+
+ for (i = 0; i < ud->tchan_cnt; i++) {
+ struct udma_tchan *tchan = &ud->tchans[i];
+
+ tchan->id = i;
+ tchan->reg_rt = ud->mmrs[MMR_TCHANRT] + i * 0x1000;
+ }
+
+ for (i = 0; i < ud->rchan_cnt; i++) {
+ struct udma_rchan *rchan = &ud->rchans[i];
+
+ rchan->id = i;
+ rchan->reg_rt = ud->mmrs[MMR_RCHANRT] + i * 0x1000;
+ }
+
+ for (i = 0; i < ud->rflow_cnt; i++) {
+ struct udma_rflow *rflow = &ud->rflows[i];
+
+ rflow->id = i;
+ }
+
+ for (i = 0; i < ch_count; i++) {
+ struct udma_chan *uc = &ud->channels[i];
+
+ uc->ud = ud;
+ uc->vc.desc_free = udma_desc_free;
+ uc->id = i;
+ uc->bchan = NULL;
+ uc->tchan = NULL;
+ uc->rchan = NULL;
+ uc->config.remote_thread_id = -1;
+ uc->config.mapped_channel_id = -1;
+ uc->config.default_flow_id = -1;
+ uc->config.dir = DMA_MEM_TO_MEM;
+ uc->name = devm_kasprintf(dev, GFP_KERNEL, "%s chan%d",
+ dev_name(dev), i);
+
+ vchan_init(&uc->vc, &ud->ddev);
+ /* Use custom vchan completion handling */
+ tasklet_setup(&uc->vc.task, udma_vchan_complete);
+ init_completion(&uc->teardown_completed);
+ INIT_DELAYED_WORK(&uc->tx_drain.work, udma_check_tx_completion);
+ }
+
+ /* Configure the copy_align to the maximum burst size the device supports */
+ ud->ddev.copy_align = udma_get_copy_align(ud);
+
+ ret = dma_async_device_register(&ud->ddev);
+ if (ret) {
+ dev_err(dev, "failed to register slave DMA engine: %d\n", ret);
+ return ret;
+ }
+
+ platform_set_drvdata(pdev, ud);
+
+ ret = of_dma_controller_register(dev->of_node, udma_of_xlate, ud);
+ if (ret) {
+ dev_err(dev, "failed to register of_dma controller\n");
+ dma_async_device_unregister(&ud->ddev);
+ }
+
+ return ret;
+}
+
+static struct platform_driver udma_driver = {
+ .driver = {
+ .name = "ti-udma",
+ .of_match_table = udma_of_match,
+ .suppress_bind_attrs = true,
+ },
+ .probe = udma_probe,
+};
+builtin_platform_driver(udma_driver);
+
+static struct platform_driver bcdma_driver = {
+ .driver = {
+ .name = "ti-bcdma",
+ .of_match_table = bcdma_of_match,
+ .suppress_bind_attrs = true,
+ },
+ .probe = udma_probe,
+};
+builtin_platform_driver(bcdma_driver);
+
+static struct platform_driver pktdma_driver = {
+ .driver = {
+ .name = "ti-pktdma",
+ .of_match_table = pktdma_of_match,
+ .suppress_bind_attrs = true,
+ },
+ .probe = udma_probe,
+};
+builtin_platform_driver(pktdma_driver);
+
+/* Private interfaces to UDMA */
+#include "k3-udma-private.c"
diff --git a/drivers/dma/ti/k3-udma.h b/drivers/dma/ti/k3-udma.h
new file mode 100644
index 000000000..d349c6d48
--- /dev/null
+++ b/drivers/dma/ti/k3-udma.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ */
+
+#ifndef K3_UDMA_H_
+#define K3_UDMA_H_
+
+#include <linux/soc/ti/ti_sci_protocol.h>
+
+/* Global registers */
+#define UDMA_REV_REG 0x0
+#define UDMA_PERF_CTL_REG 0x4
+#define UDMA_EMU_CTL_REG 0x8
+#define UDMA_PSIL_TO_REG 0x10
+#define UDMA_UTC_CTL_REG 0x1c
+#define UDMA_CAP_REG(i) (0x20 + ((i) * 4))
+#define UDMA_RX_FLOW_ID_FW_OES_REG 0x80
+#define UDMA_RX_FLOW_ID_FW_STATUS_REG 0x88
+
+/* BCHANRT/TCHANRT/RCHANRT registers */
+#define UDMA_CHAN_RT_CTL_REG 0x0
+#define UDMA_CHAN_RT_SWTRIG_REG 0x8
+#define UDMA_CHAN_RT_STDATA_REG 0x80
+
+#define UDMA_CHAN_RT_PEER_REG(i) (0x200 + ((i) * 0x4))
+#define UDMA_CHAN_RT_PEER_STATIC_TR_XY_REG \
+ UDMA_CHAN_RT_PEER_REG(0) /* PSI-L: 0x400 */
+#define UDMA_CHAN_RT_PEER_STATIC_TR_Z_REG \
+ UDMA_CHAN_RT_PEER_REG(1) /* PSI-L: 0x401 */
+#define UDMA_CHAN_RT_PEER_BCNT_REG \
+ UDMA_CHAN_RT_PEER_REG(4) /* PSI-L: 0x404 */
+#define UDMA_CHAN_RT_PEER_RT_EN_REG \
+ UDMA_CHAN_RT_PEER_REG(8) /* PSI-L: 0x408 */
+
+#define UDMA_CHAN_RT_PCNT_REG 0x400
+#define UDMA_CHAN_RT_BCNT_REG 0x408
+#define UDMA_CHAN_RT_SBCNT_REG 0x410
+
+/* UDMA_CAP Registers */
+#define UDMA_CAP2_TCHAN_CNT(val) ((val) & 0x1ff)
+#define UDMA_CAP2_ECHAN_CNT(val) (((val) >> 9) & 0x1ff)
+#define UDMA_CAP2_RCHAN_CNT(val) (((val) >> 18) & 0x1ff)
+#define UDMA_CAP3_RFLOW_CNT(val) ((val) & 0x3fff)
+#define UDMA_CAP3_HCHAN_CNT(val) (((val) >> 14) & 0x1ff)
+#define UDMA_CAP3_UCHAN_CNT(val) (((val) >> 23) & 0x1ff)
+
+#define BCDMA_CAP2_BCHAN_CNT(val) ((val) & 0x1ff)
+#define BCDMA_CAP2_TCHAN_CNT(val) (((val) >> 9) & 0x1ff)
+#define BCDMA_CAP2_RCHAN_CNT(val) (((val) >> 18) & 0x1ff)
+#define BCDMA_CAP3_HBCHAN_CNT(val) (((val) >> 14) & 0x1ff)
+#define BCDMA_CAP3_UBCHAN_CNT(val) (((val) >> 23) & 0x1ff)
+#define BCDMA_CAP4_HRCHAN_CNT(val) ((val) & 0xff)
+#define BCDMA_CAP4_URCHAN_CNT(val) (((val) >> 8) & 0xff)
+#define BCDMA_CAP4_HTCHAN_CNT(val) (((val) >> 16) & 0xff)
+#define BCDMA_CAP4_UTCHAN_CNT(val) (((val) >> 24) & 0xff)
+
+#define PKTDMA_CAP4_TFLOW_CNT(val) ((val) & 0x3fff)
+
+/* UDMA_CHAN_RT_CTL_REG */
+#define UDMA_CHAN_RT_CTL_EN BIT(31)
+#define UDMA_CHAN_RT_CTL_TDOWN BIT(30)
+#define UDMA_CHAN_RT_CTL_PAUSE BIT(29)
+#define UDMA_CHAN_RT_CTL_FTDOWN BIT(28)
+#define UDMA_CHAN_RT_CTL_ERROR BIT(0)
+
+/* UDMA_CHAN_RT_PEER_RT_EN_REG */
+#define UDMA_PEER_RT_EN_ENABLE BIT(31)
+#define UDMA_PEER_RT_EN_TEARDOWN BIT(30)
+#define UDMA_PEER_RT_EN_PAUSE BIT(29)
+#define UDMA_PEER_RT_EN_FLUSH BIT(28)
+#define UDMA_PEER_RT_EN_IDLE BIT(1)
+
+/*
+ * UDMA_TCHAN_RT_PEER_STATIC_TR_XY_REG /
+ * UDMA_RCHAN_RT_PEER_STATIC_TR_XY_REG
+ */
+#define PDMA_STATIC_TR_X_MASK GENMASK(26, 24)
+#define PDMA_STATIC_TR_X_SHIFT (24)
+#define PDMA_STATIC_TR_Y_MASK GENMASK(11, 0)
+#define PDMA_STATIC_TR_Y_SHIFT (0)
+
+#define PDMA_STATIC_TR_Y(x) \
+ (((x) << PDMA_STATIC_TR_Y_SHIFT) & PDMA_STATIC_TR_Y_MASK)
+#define PDMA_STATIC_TR_X(x) \
+ (((x) << PDMA_STATIC_TR_X_SHIFT) & PDMA_STATIC_TR_X_MASK)
+
+#define PDMA_STATIC_TR_XY_ACC32 BIT(30)
+#define PDMA_STATIC_TR_XY_BURST BIT(31)
+
+/*
+ * UDMA_TCHAN_RT_PEER_STATIC_TR_Z_REG /
+ * UDMA_RCHAN_RT_PEER_STATIC_TR_Z_REG
+ */
+#define PDMA_STATIC_TR_Z(x, mask) ((x) & (mask))
+
+/* Address Space Select */
+#define K3_ADDRESS_ASEL_SHIFT 48
+
+struct udma_dev;
+struct udma_tchan;
+struct udma_rchan;
+struct udma_rflow;
+
+enum udma_rm_range {
+ RM_RANGE_BCHAN = 0,
+ RM_RANGE_TCHAN,
+ RM_RANGE_RCHAN,
+ RM_RANGE_RFLOW,
+ RM_RANGE_TFLOW,
+ RM_RANGE_LAST,
+};
+
+struct udma_tisci_rm {
+ const struct ti_sci_handle *tisci;
+ const struct ti_sci_rm_udmap_ops *tisci_udmap_ops;
+ u32 tisci_dev_id;
+
+ /* tisci information for PSI-L thread pairing/unpairing */
+ const struct ti_sci_rm_psil_ops *tisci_psil_ops;
+ u32 tisci_navss_dev_id;
+
+ struct ti_sci_resource *rm_ranges[RM_RANGE_LAST];
+};
+
+/* Direct access to UDMA low lever resources for the glue layer */
+int xudma_navss_psil_pair(struct udma_dev *ud, u32 src_thread, u32 dst_thread);
+int xudma_navss_psil_unpair(struct udma_dev *ud, u32 src_thread,
+ u32 dst_thread);
+
+struct udma_dev *of_xudma_dev_get(struct device_node *np, const char *property);
+struct device *xudma_get_device(struct udma_dev *ud);
+struct k3_ringacc *xudma_get_ringacc(struct udma_dev *ud);
+void xudma_dev_put(struct udma_dev *ud);
+u32 xudma_dev_get_psil_base(struct udma_dev *ud);
+struct udma_tisci_rm *xudma_dev_get_tisci_rm(struct udma_dev *ud);
+
+int xudma_alloc_gp_rflow_range(struct udma_dev *ud, int from, int cnt);
+int xudma_free_gp_rflow_range(struct udma_dev *ud, int from, int cnt);
+
+struct udma_tchan *xudma_tchan_get(struct udma_dev *ud, int id);
+struct udma_rchan *xudma_rchan_get(struct udma_dev *ud, int id);
+struct udma_rflow *xudma_rflow_get(struct udma_dev *ud, int id);
+
+void xudma_tchan_put(struct udma_dev *ud, struct udma_tchan *p);
+void xudma_rchan_put(struct udma_dev *ud, struct udma_rchan *p);
+void xudma_rflow_put(struct udma_dev *ud, struct udma_rflow *p);
+
+int xudma_tchan_get_id(struct udma_tchan *p);
+int xudma_rchan_get_id(struct udma_rchan *p);
+int xudma_rflow_get_id(struct udma_rflow *p);
+
+u32 xudma_tchanrt_read(struct udma_tchan *tchan, int reg);
+void xudma_tchanrt_write(struct udma_tchan *tchan, int reg, u32 val);
+u32 xudma_rchanrt_read(struct udma_rchan *rchan, int reg);
+void xudma_rchanrt_write(struct udma_rchan *rchan, int reg, u32 val);
+bool xudma_rflow_is_gp(struct udma_dev *ud, int id);
+int xudma_get_rflow_ring_offset(struct udma_dev *ud);
+
+int xudma_is_pktdma(struct udma_dev *ud);
+
+int xudma_pktdma_tflow_get_irq(struct udma_dev *ud, int udma_tflow_id);
+int xudma_pktdma_rflow_get_irq(struct udma_dev *ud, int udma_rflow_id);
+#endif /* K3_UDMA_H_ */
diff --git a/drivers/dma/ti/omap-dma.c b/drivers/dma/ti/omap-dma.c
new file mode 100644
index 000000000..27f5019bd
--- /dev/null
+++ b/drivers/dma/ti/omap-dma.c
@@ -0,0 +1,1958 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * OMAP DMAengine support
+ */
+#include <linux/cpu_pm.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/omap-dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of_dma.h>
+#include <linux/of_device.h>
+
+#include "../virt-dma.h"
+
+#define OMAP_SDMA_REQUESTS 127
+#define OMAP_SDMA_CHANNELS 32
+
+struct omap_dma_config {
+ int lch_end;
+ unsigned int rw_priority:1;
+ unsigned int needs_busy_check:1;
+ unsigned int may_lose_context:1;
+ unsigned int needs_lch_clear:1;
+};
+
+struct omap_dma_context {
+ u32 irqenable_l0;
+ u32 irqenable_l1;
+ u32 ocp_sysconfig;
+ u32 gcr;
+};
+
+struct omap_dmadev {
+ struct dma_device ddev;
+ spinlock_t lock;
+ void __iomem *base;
+ const struct omap_dma_reg *reg_map;
+ struct omap_system_dma_plat_info *plat;
+ const struct omap_dma_config *cfg;
+ struct notifier_block nb;
+ struct omap_dma_context context;
+ int lch_count;
+ DECLARE_BITMAP(lch_bitmap, OMAP_SDMA_CHANNELS);
+ struct mutex lch_lock; /* for assigning logical channels */
+ bool legacy;
+ bool ll123_supported;
+ struct dma_pool *desc_pool;
+ unsigned dma_requests;
+ spinlock_t irq_lock;
+ uint32_t irq_enable_mask;
+ struct omap_chan **lch_map;
+};
+
+struct omap_chan {
+ struct virt_dma_chan vc;
+ void __iomem *channel_base;
+ const struct omap_dma_reg *reg_map;
+ uint32_t ccr;
+
+ struct dma_slave_config cfg;
+ unsigned dma_sig;
+ bool cyclic;
+ bool paused;
+ bool running;
+
+ int dma_ch;
+ struct omap_desc *desc;
+ unsigned sgidx;
+};
+
+#define DESC_NXT_SV_REFRESH (0x1 << 24)
+#define DESC_NXT_SV_REUSE (0x2 << 24)
+#define DESC_NXT_DV_REFRESH (0x1 << 26)
+#define DESC_NXT_DV_REUSE (0x2 << 26)
+#define DESC_NTYPE_TYPE2 (0x2 << 29)
+
+/* Type 2 descriptor with Source or Destination address update */
+struct omap_type2_desc {
+ uint32_t next_desc;
+ uint32_t en;
+ uint32_t addr; /* src or dst */
+ uint16_t fn;
+ uint16_t cicr;
+ int16_t cdei;
+ int16_t csei;
+ int32_t cdfi;
+ int32_t csfi;
+} __packed;
+
+struct omap_sg {
+ dma_addr_t addr;
+ uint32_t en; /* number of elements (24-bit) */
+ uint32_t fn; /* number of frames (16-bit) */
+ int32_t fi; /* for double indexing */
+ int16_t ei; /* for double indexing */
+
+ /* Linked list */
+ struct omap_type2_desc *t2_desc;
+ dma_addr_t t2_desc_paddr;
+};
+
+struct omap_desc {
+ struct virt_dma_desc vd;
+ bool using_ll;
+ enum dma_transfer_direction dir;
+ dma_addr_t dev_addr;
+ bool polled;
+
+ int32_t fi; /* for OMAP_DMA_SYNC_PACKET / double indexing */
+ int16_t ei; /* for double indexing */
+ uint8_t es; /* CSDP_DATA_TYPE_xxx */
+ uint32_t ccr; /* CCR value */
+ uint16_t clnk_ctrl; /* CLNK_CTRL value */
+ uint16_t cicr; /* CICR value */
+ uint32_t csdp; /* CSDP value */
+
+ unsigned sglen;
+ struct omap_sg sg[];
+};
+
+enum {
+ CAPS_0_SUPPORT_LL123 = BIT(20), /* Linked List type1/2/3 */
+ CAPS_0_SUPPORT_LL4 = BIT(21), /* Linked List type4 */
+
+ CCR_FS = BIT(5),
+ CCR_READ_PRIORITY = BIT(6),
+ CCR_ENABLE = BIT(7),
+ CCR_AUTO_INIT = BIT(8), /* OMAP1 only */
+ CCR_REPEAT = BIT(9), /* OMAP1 only */
+ CCR_OMAP31_DISABLE = BIT(10), /* OMAP1 only */
+ CCR_SUSPEND_SENSITIVE = BIT(8), /* OMAP2+ only */
+ CCR_RD_ACTIVE = BIT(9), /* OMAP2+ only */
+ CCR_WR_ACTIVE = BIT(10), /* OMAP2+ only */
+ CCR_SRC_AMODE_CONSTANT = 0 << 12,
+ CCR_SRC_AMODE_POSTINC = 1 << 12,
+ CCR_SRC_AMODE_SGLIDX = 2 << 12,
+ CCR_SRC_AMODE_DBLIDX = 3 << 12,
+ CCR_DST_AMODE_CONSTANT = 0 << 14,
+ CCR_DST_AMODE_POSTINC = 1 << 14,
+ CCR_DST_AMODE_SGLIDX = 2 << 14,
+ CCR_DST_AMODE_DBLIDX = 3 << 14,
+ CCR_CONSTANT_FILL = BIT(16),
+ CCR_TRANSPARENT_COPY = BIT(17),
+ CCR_BS = BIT(18),
+ CCR_SUPERVISOR = BIT(22),
+ CCR_PREFETCH = BIT(23),
+ CCR_TRIGGER_SRC = BIT(24),
+ CCR_BUFFERING_DISABLE = BIT(25),
+ CCR_WRITE_PRIORITY = BIT(26),
+ CCR_SYNC_ELEMENT = 0,
+ CCR_SYNC_FRAME = CCR_FS,
+ CCR_SYNC_BLOCK = CCR_BS,
+ CCR_SYNC_PACKET = CCR_BS | CCR_FS,
+
+ CSDP_DATA_TYPE_8 = 0,
+ CSDP_DATA_TYPE_16 = 1,
+ CSDP_DATA_TYPE_32 = 2,
+ CSDP_SRC_PORT_EMIFF = 0 << 2, /* OMAP1 only */
+ CSDP_SRC_PORT_EMIFS = 1 << 2, /* OMAP1 only */
+ CSDP_SRC_PORT_OCP_T1 = 2 << 2, /* OMAP1 only */
+ CSDP_SRC_PORT_TIPB = 3 << 2, /* OMAP1 only */
+ CSDP_SRC_PORT_OCP_T2 = 4 << 2, /* OMAP1 only */
+ CSDP_SRC_PORT_MPUI = 5 << 2, /* OMAP1 only */
+ CSDP_SRC_PACKED = BIT(6),
+ CSDP_SRC_BURST_1 = 0 << 7,
+ CSDP_SRC_BURST_16 = 1 << 7,
+ CSDP_SRC_BURST_32 = 2 << 7,
+ CSDP_SRC_BURST_64 = 3 << 7,
+ CSDP_DST_PORT_EMIFF = 0 << 9, /* OMAP1 only */
+ CSDP_DST_PORT_EMIFS = 1 << 9, /* OMAP1 only */
+ CSDP_DST_PORT_OCP_T1 = 2 << 9, /* OMAP1 only */
+ CSDP_DST_PORT_TIPB = 3 << 9, /* OMAP1 only */
+ CSDP_DST_PORT_OCP_T2 = 4 << 9, /* OMAP1 only */
+ CSDP_DST_PORT_MPUI = 5 << 9, /* OMAP1 only */
+ CSDP_DST_PACKED = BIT(13),
+ CSDP_DST_BURST_1 = 0 << 14,
+ CSDP_DST_BURST_16 = 1 << 14,
+ CSDP_DST_BURST_32 = 2 << 14,
+ CSDP_DST_BURST_64 = 3 << 14,
+ CSDP_WRITE_NON_POSTED = 0 << 16,
+ CSDP_WRITE_POSTED = 1 << 16,
+ CSDP_WRITE_LAST_NON_POSTED = 2 << 16,
+
+ CICR_TOUT_IE = BIT(0), /* OMAP1 only */
+ CICR_DROP_IE = BIT(1),
+ CICR_HALF_IE = BIT(2),
+ CICR_FRAME_IE = BIT(3),
+ CICR_LAST_IE = BIT(4),
+ CICR_BLOCK_IE = BIT(5),
+ CICR_PKT_IE = BIT(7), /* OMAP2+ only */
+ CICR_TRANS_ERR_IE = BIT(8), /* OMAP2+ only */
+ CICR_SUPERVISOR_ERR_IE = BIT(10), /* OMAP2+ only */
+ CICR_MISALIGNED_ERR_IE = BIT(11), /* OMAP2+ only */
+ CICR_DRAIN_IE = BIT(12), /* OMAP2+ only */
+ CICR_SUPER_BLOCK_IE = BIT(14), /* OMAP2+ only */
+
+ CLNK_CTRL_ENABLE_LNK = BIT(15),
+
+ CDP_DST_VALID_INC = 0 << 0,
+ CDP_DST_VALID_RELOAD = 1 << 0,
+ CDP_DST_VALID_REUSE = 2 << 0,
+ CDP_SRC_VALID_INC = 0 << 2,
+ CDP_SRC_VALID_RELOAD = 1 << 2,
+ CDP_SRC_VALID_REUSE = 2 << 2,
+ CDP_NTYPE_TYPE1 = 1 << 4,
+ CDP_NTYPE_TYPE2 = 2 << 4,
+ CDP_NTYPE_TYPE3 = 3 << 4,
+ CDP_TMODE_NORMAL = 0 << 8,
+ CDP_TMODE_LLIST = 1 << 8,
+ CDP_FAST = BIT(10),
+};
+
+static const unsigned es_bytes[] = {
+ [CSDP_DATA_TYPE_8] = 1,
+ [CSDP_DATA_TYPE_16] = 2,
+ [CSDP_DATA_TYPE_32] = 4,
+};
+
+static bool omap_dma_filter_fn(struct dma_chan *chan, void *param);
+static struct of_dma_filter_info omap_dma_info = {
+ .filter_fn = omap_dma_filter_fn,
+};
+
+static inline struct omap_dmadev *to_omap_dma_dev(struct dma_device *d)
+{
+ return container_of(d, struct omap_dmadev, ddev);
+}
+
+static inline struct omap_chan *to_omap_dma_chan(struct dma_chan *c)
+{
+ return container_of(c, struct omap_chan, vc.chan);
+}
+
+static inline struct omap_desc *to_omap_dma_desc(struct dma_async_tx_descriptor *t)
+{
+ return container_of(t, struct omap_desc, vd.tx);
+}
+
+static void omap_dma_desc_free(struct virt_dma_desc *vd)
+{
+ struct omap_desc *d = to_omap_dma_desc(&vd->tx);
+
+ if (d->using_ll) {
+ struct omap_dmadev *od = to_omap_dma_dev(vd->tx.chan->device);
+ int i;
+
+ for (i = 0; i < d->sglen; i++) {
+ if (d->sg[i].t2_desc)
+ dma_pool_free(od->desc_pool, d->sg[i].t2_desc,
+ d->sg[i].t2_desc_paddr);
+ }
+ }
+
+ kfree(d);
+}
+
+static void omap_dma_fill_type2_desc(struct omap_desc *d, int idx,
+ enum dma_transfer_direction dir, bool last)
+{
+ struct omap_sg *sg = &d->sg[idx];
+ struct omap_type2_desc *t2_desc = sg->t2_desc;
+
+ if (idx)
+ d->sg[idx - 1].t2_desc->next_desc = sg->t2_desc_paddr;
+ if (last)
+ t2_desc->next_desc = 0xfffffffc;
+
+ t2_desc->en = sg->en;
+ t2_desc->addr = sg->addr;
+ t2_desc->fn = sg->fn & 0xffff;
+ t2_desc->cicr = d->cicr;
+ if (!last)
+ t2_desc->cicr &= ~CICR_BLOCK_IE;
+
+ switch (dir) {
+ case DMA_DEV_TO_MEM:
+ t2_desc->cdei = sg->ei;
+ t2_desc->csei = d->ei;
+ t2_desc->cdfi = sg->fi;
+ t2_desc->csfi = d->fi;
+
+ t2_desc->en |= DESC_NXT_DV_REFRESH;
+ t2_desc->en |= DESC_NXT_SV_REUSE;
+ break;
+ case DMA_MEM_TO_DEV:
+ t2_desc->cdei = d->ei;
+ t2_desc->csei = sg->ei;
+ t2_desc->cdfi = d->fi;
+ t2_desc->csfi = sg->fi;
+
+ t2_desc->en |= DESC_NXT_SV_REFRESH;
+ t2_desc->en |= DESC_NXT_DV_REUSE;
+ break;
+ default:
+ return;
+ }
+
+ t2_desc->en |= DESC_NTYPE_TYPE2;
+}
+
+static void omap_dma_write(uint32_t val, unsigned type, void __iomem *addr)
+{
+ switch (type) {
+ case OMAP_DMA_REG_16BIT:
+ writew_relaxed(val, addr);
+ break;
+ case OMAP_DMA_REG_2X16BIT:
+ writew_relaxed(val, addr);
+ writew_relaxed(val >> 16, addr + 2);
+ break;
+ case OMAP_DMA_REG_32BIT:
+ writel_relaxed(val, addr);
+ break;
+ default:
+ WARN_ON(1);
+ }
+}
+
+static unsigned omap_dma_read(unsigned type, void __iomem *addr)
+{
+ unsigned val;
+
+ switch (type) {
+ case OMAP_DMA_REG_16BIT:
+ val = readw_relaxed(addr);
+ break;
+ case OMAP_DMA_REG_2X16BIT:
+ val = readw_relaxed(addr);
+ val |= readw_relaxed(addr + 2) << 16;
+ break;
+ case OMAP_DMA_REG_32BIT:
+ val = readl_relaxed(addr);
+ break;
+ default:
+ WARN_ON(1);
+ val = 0;
+ }
+
+ return val;
+}
+
+static void omap_dma_glbl_write(struct omap_dmadev *od, unsigned reg, unsigned val)
+{
+ const struct omap_dma_reg *r = od->reg_map + reg;
+
+ WARN_ON(r->stride);
+
+ omap_dma_write(val, r->type, od->base + r->offset);
+}
+
+static unsigned omap_dma_glbl_read(struct omap_dmadev *od, unsigned reg)
+{
+ const struct omap_dma_reg *r = od->reg_map + reg;
+
+ WARN_ON(r->stride);
+
+ return omap_dma_read(r->type, od->base + r->offset);
+}
+
+static void omap_dma_chan_write(struct omap_chan *c, unsigned reg, unsigned val)
+{
+ const struct omap_dma_reg *r = c->reg_map + reg;
+
+ omap_dma_write(val, r->type, c->channel_base + r->offset);
+}
+
+static unsigned omap_dma_chan_read(struct omap_chan *c, unsigned reg)
+{
+ const struct omap_dma_reg *r = c->reg_map + reg;
+
+ return omap_dma_read(r->type, c->channel_base + r->offset);
+}
+
+static void omap_dma_clear_csr(struct omap_chan *c)
+{
+ if (dma_omap1())
+ omap_dma_chan_read(c, CSR);
+ else
+ omap_dma_chan_write(c, CSR, ~0);
+}
+
+static unsigned omap_dma_get_csr(struct omap_chan *c)
+{
+ unsigned val = omap_dma_chan_read(c, CSR);
+
+ if (!dma_omap1())
+ omap_dma_chan_write(c, CSR, val);
+
+ return val;
+}
+
+static void omap_dma_clear_lch(struct omap_dmadev *od, int lch)
+{
+ struct omap_chan *c;
+ int i;
+
+ c = od->lch_map[lch];
+ if (!c)
+ return;
+
+ for (i = CSDP; i <= od->cfg->lch_end; i++)
+ omap_dma_chan_write(c, i, 0);
+}
+
+static void omap_dma_assign(struct omap_dmadev *od, struct omap_chan *c,
+ unsigned lch)
+{
+ c->channel_base = od->base + od->plat->channel_stride * lch;
+
+ od->lch_map[lch] = c;
+}
+
+static void omap_dma_start(struct omap_chan *c, struct omap_desc *d)
+{
+ struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device);
+ uint16_t cicr = d->cicr;
+
+ if (__dma_omap15xx(od->plat->dma_attr))
+ omap_dma_chan_write(c, CPC, 0);
+ else
+ omap_dma_chan_write(c, CDAC, 0);
+
+ omap_dma_clear_csr(c);
+
+ if (d->using_ll) {
+ uint32_t cdp = CDP_TMODE_LLIST | CDP_NTYPE_TYPE2 | CDP_FAST;
+
+ if (d->dir == DMA_DEV_TO_MEM)
+ cdp |= (CDP_DST_VALID_RELOAD | CDP_SRC_VALID_REUSE);
+ else
+ cdp |= (CDP_DST_VALID_REUSE | CDP_SRC_VALID_RELOAD);
+ omap_dma_chan_write(c, CDP, cdp);
+
+ omap_dma_chan_write(c, CNDP, d->sg[0].t2_desc_paddr);
+ omap_dma_chan_write(c, CCDN, 0);
+ omap_dma_chan_write(c, CCFN, 0xffff);
+ omap_dma_chan_write(c, CCEN, 0xffffff);
+
+ cicr &= ~CICR_BLOCK_IE;
+ } else if (od->ll123_supported) {
+ omap_dma_chan_write(c, CDP, 0);
+ }
+
+ /* Enable interrupts */
+ omap_dma_chan_write(c, CICR, cicr);
+
+ /* Enable channel */
+ omap_dma_chan_write(c, CCR, d->ccr | CCR_ENABLE);
+
+ c->running = true;
+}
+
+static void omap_dma_drain_chan(struct omap_chan *c)
+{
+ int i;
+ u32 val;
+
+ /* Wait for sDMA FIFO to drain */
+ for (i = 0; ; i++) {
+ val = omap_dma_chan_read(c, CCR);
+ if (!(val & (CCR_RD_ACTIVE | CCR_WR_ACTIVE)))
+ break;
+
+ if (i > 100)
+ break;
+
+ udelay(5);
+ }
+
+ if (val & (CCR_RD_ACTIVE | CCR_WR_ACTIVE))
+ dev_err(c->vc.chan.device->dev,
+ "DMA drain did not complete on lch %d\n",
+ c->dma_ch);
+}
+
+static int omap_dma_stop(struct omap_chan *c)
+{
+ struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device);
+ uint32_t val;
+
+ /* disable irq */
+ omap_dma_chan_write(c, CICR, 0);
+
+ omap_dma_clear_csr(c);
+
+ val = omap_dma_chan_read(c, CCR);
+ if (od->plat->errata & DMA_ERRATA_i541 && val & CCR_TRIGGER_SRC) {
+ uint32_t sysconfig;
+
+ sysconfig = omap_dma_glbl_read(od, OCP_SYSCONFIG);
+ val = sysconfig & ~DMA_SYSCONFIG_MIDLEMODE_MASK;
+ val |= DMA_SYSCONFIG_MIDLEMODE(DMA_IDLEMODE_NO_IDLE);
+ omap_dma_glbl_write(od, OCP_SYSCONFIG, val);
+
+ val = omap_dma_chan_read(c, CCR);
+ val &= ~CCR_ENABLE;
+ omap_dma_chan_write(c, CCR, val);
+
+ if (!(c->ccr & CCR_BUFFERING_DISABLE))
+ omap_dma_drain_chan(c);
+
+ omap_dma_glbl_write(od, OCP_SYSCONFIG, sysconfig);
+ } else {
+ if (!(val & CCR_ENABLE))
+ return -EINVAL;
+
+ val &= ~CCR_ENABLE;
+ omap_dma_chan_write(c, CCR, val);
+
+ if (!(c->ccr & CCR_BUFFERING_DISABLE))
+ omap_dma_drain_chan(c);
+ }
+
+ mb();
+
+ if (!__dma_omap15xx(od->plat->dma_attr) && c->cyclic) {
+ val = omap_dma_chan_read(c, CLNK_CTRL);
+
+ if (dma_omap1())
+ val |= 1 << 14; /* set the STOP_LNK bit */
+ else
+ val &= ~CLNK_CTRL_ENABLE_LNK;
+
+ omap_dma_chan_write(c, CLNK_CTRL, val);
+ }
+ c->running = false;
+ return 0;
+}
+
+static void omap_dma_start_sg(struct omap_chan *c, struct omap_desc *d)
+{
+ struct omap_sg *sg = d->sg + c->sgidx;
+ unsigned cxsa, cxei, cxfi;
+
+ if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) {
+ cxsa = CDSA;
+ cxei = CDEI;
+ cxfi = CDFI;
+ } else {
+ cxsa = CSSA;
+ cxei = CSEI;
+ cxfi = CSFI;
+ }
+
+ omap_dma_chan_write(c, cxsa, sg->addr);
+ omap_dma_chan_write(c, cxei, sg->ei);
+ omap_dma_chan_write(c, cxfi, sg->fi);
+ omap_dma_chan_write(c, CEN, sg->en);
+ omap_dma_chan_write(c, CFN, sg->fn);
+
+ omap_dma_start(c, d);
+ c->sgidx++;
+}
+
+static void omap_dma_start_desc(struct omap_chan *c)
+{
+ struct virt_dma_desc *vd = vchan_next_desc(&c->vc);
+ struct omap_desc *d;
+ unsigned cxsa, cxei, cxfi;
+
+ if (!vd) {
+ c->desc = NULL;
+ return;
+ }
+
+ list_del(&vd->node);
+
+ c->desc = d = to_omap_dma_desc(&vd->tx);
+ c->sgidx = 0;
+
+ /*
+ * This provides the necessary barrier to ensure data held in
+ * DMA coherent memory is visible to the DMA engine prior to
+ * the transfer starting.
+ */
+ mb();
+
+ omap_dma_chan_write(c, CCR, d->ccr);
+ if (dma_omap1())
+ omap_dma_chan_write(c, CCR2, d->ccr >> 16);
+
+ if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) {
+ cxsa = CSSA;
+ cxei = CSEI;
+ cxfi = CSFI;
+ } else {
+ cxsa = CDSA;
+ cxei = CDEI;
+ cxfi = CDFI;
+ }
+
+ omap_dma_chan_write(c, cxsa, d->dev_addr);
+ omap_dma_chan_write(c, cxei, d->ei);
+ omap_dma_chan_write(c, cxfi, d->fi);
+ omap_dma_chan_write(c, CSDP, d->csdp);
+ omap_dma_chan_write(c, CLNK_CTRL, d->clnk_ctrl);
+
+ omap_dma_start_sg(c, d);
+}
+
+static void omap_dma_callback(int ch, u16 status, void *data)
+{
+ struct omap_chan *c = data;
+ struct omap_desc *d;
+ unsigned long flags;
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+ d = c->desc;
+ if (d) {
+ if (c->cyclic) {
+ vchan_cyclic_callback(&d->vd);
+ } else if (d->using_ll || c->sgidx == d->sglen) {
+ omap_dma_start_desc(c);
+ vchan_cookie_complete(&d->vd);
+ } else {
+ omap_dma_start_sg(c, d);
+ }
+ }
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+}
+
+static irqreturn_t omap_dma_irq(int irq, void *devid)
+{
+ struct omap_dmadev *od = devid;
+ unsigned status, channel;
+
+ spin_lock(&od->irq_lock);
+
+ status = omap_dma_glbl_read(od, IRQSTATUS_L1);
+ status &= od->irq_enable_mask;
+ if (status == 0) {
+ spin_unlock(&od->irq_lock);
+ return IRQ_NONE;
+ }
+
+ while ((channel = ffs(status)) != 0) {
+ unsigned mask, csr;
+ struct omap_chan *c;
+
+ channel -= 1;
+ mask = BIT(channel);
+ status &= ~mask;
+
+ c = od->lch_map[channel];
+ if (c == NULL) {
+ /* This should never happen */
+ dev_err(od->ddev.dev, "invalid channel %u\n", channel);
+ continue;
+ }
+
+ csr = omap_dma_get_csr(c);
+ omap_dma_glbl_write(od, IRQSTATUS_L1, mask);
+
+ omap_dma_callback(channel, csr, c);
+ }
+
+ spin_unlock(&od->irq_lock);
+
+ return IRQ_HANDLED;
+}
+
+static int omap_dma_get_lch(struct omap_dmadev *od, int *lch)
+{
+ int channel;
+
+ mutex_lock(&od->lch_lock);
+ channel = find_first_zero_bit(od->lch_bitmap, od->lch_count);
+ if (channel >= od->lch_count)
+ goto out_busy;
+ set_bit(channel, od->lch_bitmap);
+ mutex_unlock(&od->lch_lock);
+
+ omap_dma_clear_lch(od, channel);
+ *lch = channel;
+
+ return 0;
+
+out_busy:
+ mutex_unlock(&od->lch_lock);
+ *lch = -EINVAL;
+
+ return -EBUSY;
+}
+
+static void omap_dma_put_lch(struct omap_dmadev *od, int lch)
+{
+ omap_dma_clear_lch(od, lch);
+ mutex_lock(&od->lch_lock);
+ clear_bit(lch, od->lch_bitmap);
+ mutex_unlock(&od->lch_lock);
+}
+
+static inline bool omap_dma_legacy(struct omap_dmadev *od)
+{
+ return IS_ENABLED(CONFIG_ARCH_OMAP1) && od->legacy;
+}
+
+static int omap_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct omap_dmadev *od = to_omap_dma_dev(chan->device);
+ struct omap_chan *c = to_omap_dma_chan(chan);
+ struct device *dev = od->ddev.dev;
+ int ret;
+
+ if (omap_dma_legacy(od)) {
+ ret = omap_request_dma(c->dma_sig, "DMA engine",
+ omap_dma_callback, c, &c->dma_ch);
+ } else {
+ ret = omap_dma_get_lch(od, &c->dma_ch);
+ }
+
+ dev_dbg(dev, "allocating channel %u for %u\n", c->dma_ch, c->dma_sig);
+
+ if (ret >= 0) {
+ omap_dma_assign(od, c, c->dma_ch);
+
+ if (!omap_dma_legacy(od)) {
+ unsigned val;
+
+ spin_lock_irq(&od->irq_lock);
+ val = BIT(c->dma_ch);
+ omap_dma_glbl_write(od, IRQSTATUS_L1, val);
+ od->irq_enable_mask |= val;
+ omap_dma_glbl_write(od, IRQENABLE_L1, od->irq_enable_mask);
+
+ val = omap_dma_glbl_read(od, IRQENABLE_L0);
+ val &= ~BIT(c->dma_ch);
+ omap_dma_glbl_write(od, IRQENABLE_L0, val);
+ spin_unlock_irq(&od->irq_lock);
+ }
+ }
+
+ if (dma_omap1()) {
+ if (__dma_omap16xx(od->plat->dma_attr)) {
+ c->ccr = CCR_OMAP31_DISABLE;
+ /* Duplicate what plat-omap/dma.c does */
+ c->ccr |= c->dma_ch + 1;
+ } else {
+ c->ccr = c->dma_sig & 0x1f;
+ }
+ } else {
+ c->ccr = c->dma_sig & 0x1f;
+ c->ccr |= (c->dma_sig & ~0x1f) << 14;
+ }
+ if (od->plat->errata & DMA_ERRATA_IFRAME_BUFFERING)
+ c->ccr |= CCR_BUFFERING_DISABLE;
+
+ return ret;
+}
+
+static void omap_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct omap_dmadev *od = to_omap_dma_dev(chan->device);
+ struct omap_chan *c = to_omap_dma_chan(chan);
+
+ if (!omap_dma_legacy(od)) {
+ spin_lock_irq(&od->irq_lock);
+ od->irq_enable_mask &= ~BIT(c->dma_ch);
+ omap_dma_glbl_write(od, IRQENABLE_L1, od->irq_enable_mask);
+ spin_unlock_irq(&od->irq_lock);
+ }
+
+ c->channel_base = NULL;
+ od->lch_map[c->dma_ch] = NULL;
+ vchan_free_chan_resources(&c->vc);
+
+ if (omap_dma_legacy(od))
+ omap_free_dma(c->dma_ch);
+ else
+ omap_dma_put_lch(od, c->dma_ch);
+
+ dev_dbg(od->ddev.dev, "freeing channel %u used for %u\n", c->dma_ch,
+ c->dma_sig);
+ c->dma_sig = 0;
+}
+
+static size_t omap_dma_sg_size(struct omap_sg *sg)
+{
+ return sg->en * sg->fn;
+}
+
+static size_t omap_dma_desc_size(struct omap_desc *d)
+{
+ unsigned i;
+ size_t size;
+
+ for (size = i = 0; i < d->sglen; i++)
+ size += omap_dma_sg_size(&d->sg[i]);
+
+ return size * es_bytes[d->es];
+}
+
+static size_t omap_dma_desc_size_pos(struct omap_desc *d, dma_addr_t addr)
+{
+ unsigned i;
+ size_t size, es_size = es_bytes[d->es];
+
+ for (size = i = 0; i < d->sglen; i++) {
+ size_t this_size = omap_dma_sg_size(&d->sg[i]) * es_size;
+
+ if (size)
+ size += this_size;
+ else if (addr >= d->sg[i].addr &&
+ addr < d->sg[i].addr + this_size)
+ size += d->sg[i].addr + this_size - addr;
+ }
+ return size;
+}
+
+/*
+ * OMAP 3.2/3.3 erratum: sometimes 0 is returned if CSAC/CDAC is
+ * read before the DMA controller finished disabling the channel.
+ */
+static uint32_t omap_dma_chan_read_3_3(struct omap_chan *c, unsigned reg)
+{
+ struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device);
+ uint32_t val;
+
+ val = omap_dma_chan_read(c, reg);
+ if (val == 0 && od->plat->errata & DMA_ERRATA_3_3)
+ val = omap_dma_chan_read(c, reg);
+
+ return val;
+}
+
+static dma_addr_t omap_dma_get_src_pos(struct omap_chan *c)
+{
+ struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device);
+ dma_addr_t addr, cdac;
+
+ if (__dma_omap15xx(od->plat->dma_attr)) {
+ addr = omap_dma_chan_read(c, CPC);
+ } else {
+ addr = omap_dma_chan_read_3_3(c, CSAC);
+ cdac = omap_dma_chan_read_3_3(c, CDAC);
+
+ /*
+ * CDAC == 0 indicates that the DMA transfer on the channel has
+ * not been started (no data has been transferred so far).
+ * Return the programmed source start address in this case.
+ */
+ if (cdac == 0)
+ addr = omap_dma_chan_read(c, CSSA);
+ }
+
+ if (dma_omap1())
+ addr |= omap_dma_chan_read(c, CSSA) & 0xffff0000;
+
+ return addr;
+}
+
+static dma_addr_t omap_dma_get_dst_pos(struct omap_chan *c)
+{
+ struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device);
+ dma_addr_t addr;
+
+ if (__dma_omap15xx(od->plat->dma_attr)) {
+ addr = omap_dma_chan_read(c, CPC);
+ } else {
+ addr = omap_dma_chan_read_3_3(c, CDAC);
+
+ /*
+ * CDAC == 0 indicates that the DMA transfer on the channel
+ * has not been started (no data has been transferred so
+ * far). Return the programmed destination start address in
+ * this case.
+ */
+ if (addr == 0)
+ addr = omap_dma_chan_read(c, CDSA);
+ }
+
+ if (dma_omap1())
+ addr |= omap_dma_chan_read(c, CDSA) & 0xffff0000;
+
+ return addr;
+}
+
+static enum dma_status omap_dma_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+ struct omap_chan *c = to_omap_dma_chan(chan);
+ enum dma_status ret;
+ unsigned long flags;
+ struct omap_desc *d = NULL;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return ret;
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+ if (c->desc && c->desc->vd.tx.cookie == cookie)
+ d = c->desc;
+
+ if (!txstate)
+ goto out;
+
+ if (d) {
+ dma_addr_t pos;
+
+ if (d->dir == DMA_MEM_TO_DEV)
+ pos = omap_dma_get_src_pos(c);
+ else if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM)
+ pos = omap_dma_get_dst_pos(c);
+ else
+ pos = 0;
+
+ txstate->residue = omap_dma_desc_size_pos(d, pos);
+ } else {
+ struct virt_dma_desc *vd = vchan_find_desc(&c->vc, cookie);
+
+ if (vd)
+ txstate->residue = omap_dma_desc_size(
+ to_omap_dma_desc(&vd->tx));
+ else
+ txstate->residue = 0;
+ }
+
+out:
+ if (ret == DMA_IN_PROGRESS && c->paused) {
+ ret = DMA_PAUSED;
+ } else if (d && d->polled && c->running) {
+ uint32_t ccr = omap_dma_chan_read(c, CCR);
+ /*
+ * The channel is no longer active, set the return value
+ * accordingly and mark it as completed
+ */
+ if (!(ccr & CCR_ENABLE)) {
+ ret = DMA_COMPLETE;
+ omap_dma_start_desc(c);
+ vchan_cookie_complete(&d->vd);
+ }
+ }
+
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+
+ return ret;
+}
+
+static void omap_dma_issue_pending(struct dma_chan *chan)
+{
+ struct omap_chan *c = to_omap_dma_chan(chan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+ if (vchan_issue_pending(&c->vc) && !c->desc)
+ omap_dma_start_desc(c);
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+}
+
+static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl, unsigned sglen,
+ enum dma_transfer_direction dir, unsigned long tx_flags, void *context)
+{
+ struct omap_dmadev *od = to_omap_dma_dev(chan->device);
+ struct omap_chan *c = to_omap_dma_chan(chan);
+ enum dma_slave_buswidth dev_width;
+ struct scatterlist *sgent;
+ struct omap_desc *d;
+ dma_addr_t dev_addr;
+ unsigned i, es, en, frame_bytes;
+ bool ll_failed = false;
+ u32 burst;
+ u32 port_window, port_window_bytes;
+
+ if (dir == DMA_DEV_TO_MEM) {
+ dev_addr = c->cfg.src_addr;
+ dev_width = c->cfg.src_addr_width;
+ burst = c->cfg.src_maxburst;
+ port_window = c->cfg.src_port_window_size;
+ } else if (dir == DMA_MEM_TO_DEV) {
+ dev_addr = c->cfg.dst_addr;
+ dev_width = c->cfg.dst_addr_width;
+ burst = c->cfg.dst_maxburst;
+ port_window = c->cfg.dst_port_window_size;
+ } else {
+ dev_err(chan->device->dev, "%s: bad direction?\n", __func__);
+ return NULL;
+ }
+
+ /* Bus width translates to the element size (ES) */
+ switch (dev_width) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ es = CSDP_DATA_TYPE_8;
+ break;
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ es = CSDP_DATA_TYPE_16;
+ break;
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ es = CSDP_DATA_TYPE_32;
+ break;
+ default: /* not reached */
+ return NULL;
+ }
+
+ /* Now allocate and setup the descriptor. */
+ d = kzalloc(struct_size(d, sg, sglen), GFP_ATOMIC);
+ if (!d)
+ return NULL;
+
+ d->dir = dir;
+ d->dev_addr = dev_addr;
+ d->es = es;
+
+ /* When the port_window is used, one frame must cover the window */
+ if (port_window) {
+ burst = port_window;
+ port_window_bytes = port_window * es_bytes[es];
+
+ d->ei = 1;
+ /*
+ * One frame covers the port_window and by configure
+ * the source frame index to be -1 * (port_window - 1)
+ * we instruct the sDMA that after a frame is processed
+ * it should move back to the start of the window.
+ */
+ d->fi = -(port_window_bytes - 1);
+ }
+
+ d->ccr = c->ccr | CCR_SYNC_FRAME;
+ if (dir == DMA_DEV_TO_MEM) {
+ d->csdp = CSDP_DST_BURST_64 | CSDP_DST_PACKED;
+
+ d->ccr |= CCR_DST_AMODE_POSTINC;
+ if (port_window) {
+ d->ccr |= CCR_SRC_AMODE_DBLIDX;
+
+ if (port_window_bytes >= 64)
+ d->csdp |= CSDP_SRC_BURST_64;
+ else if (port_window_bytes >= 32)
+ d->csdp |= CSDP_SRC_BURST_32;
+ else if (port_window_bytes >= 16)
+ d->csdp |= CSDP_SRC_BURST_16;
+
+ } else {
+ d->ccr |= CCR_SRC_AMODE_CONSTANT;
+ }
+ } else {
+ d->csdp = CSDP_SRC_BURST_64 | CSDP_SRC_PACKED;
+
+ d->ccr |= CCR_SRC_AMODE_POSTINC;
+ if (port_window) {
+ d->ccr |= CCR_DST_AMODE_DBLIDX;
+
+ if (port_window_bytes >= 64)
+ d->csdp |= CSDP_DST_BURST_64;
+ else if (port_window_bytes >= 32)
+ d->csdp |= CSDP_DST_BURST_32;
+ else if (port_window_bytes >= 16)
+ d->csdp |= CSDP_DST_BURST_16;
+ } else {
+ d->ccr |= CCR_DST_AMODE_CONSTANT;
+ }
+ }
+
+ d->cicr = CICR_DROP_IE | CICR_BLOCK_IE;
+ d->csdp |= es;
+
+ if (dma_omap1()) {
+ d->cicr |= CICR_TOUT_IE;
+
+ if (dir == DMA_DEV_TO_MEM)
+ d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_TIPB;
+ else
+ d->csdp |= CSDP_DST_PORT_TIPB | CSDP_SRC_PORT_EMIFF;
+ } else {
+ if (dir == DMA_DEV_TO_MEM)
+ d->ccr |= CCR_TRIGGER_SRC;
+
+ d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE;
+
+ if (port_window)
+ d->csdp |= CSDP_WRITE_LAST_NON_POSTED;
+ }
+ if (od->plat->errata & DMA_ERRATA_PARALLEL_CHANNELS)
+ d->clnk_ctrl = c->dma_ch;
+
+ /*
+ * Build our scatterlist entries: each contains the address,
+ * the number of elements (EN) in each frame, and the number of
+ * frames (FN). Number of bytes for this entry = ES * EN * FN.
+ *
+ * Burst size translates to number of elements with frame sync.
+ * Note: DMA engine defines burst to be the number of dev-width
+ * transfers.
+ */
+ en = burst;
+ frame_bytes = es_bytes[es] * en;
+
+ if (sglen >= 2)
+ d->using_ll = od->ll123_supported;
+
+ for_each_sg(sgl, sgent, sglen, i) {
+ struct omap_sg *osg = &d->sg[i];
+
+ osg->addr = sg_dma_address(sgent);
+ osg->en = en;
+ osg->fn = sg_dma_len(sgent) / frame_bytes;
+
+ if (d->using_ll) {
+ osg->t2_desc = dma_pool_alloc(od->desc_pool, GFP_ATOMIC,
+ &osg->t2_desc_paddr);
+ if (!osg->t2_desc) {
+ dev_err(chan->device->dev,
+ "t2_desc[%d] allocation failed\n", i);
+ ll_failed = true;
+ d->using_ll = false;
+ continue;
+ }
+
+ omap_dma_fill_type2_desc(d, i, dir, (i == sglen - 1));
+ }
+ }
+
+ d->sglen = sglen;
+
+ /* Release the dma_pool entries if one allocation failed */
+ if (ll_failed) {
+ for (i = 0; i < d->sglen; i++) {
+ struct omap_sg *osg = &d->sg[i];
+
+ if (osg->t2_desc) {
+ dma_pool_free(od->desc_pool, osg->t2_desc,
+ osg->t2_desc_paddr);
+ osg->t2_desc = NULL;
+ }
+ }
+ }
+
+ return vchan_tx_prep(&c->vc, &d->vd, tx_flags);
+}
+
+static struct dma_async_tx_descriptor *omap_dma_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction dir, unsigned long flags)
+{
+ struct omap_dmadev *od = to_omap_dma_dev(chan->device);
+ struct omap_chan *c = to_omap_dma_chan(chan);
+ enum dma_slave_buswidth dev_width;
+ struct omap_desc *d;
+ dma_addr_t dev_addr;
+ unsigned es;
+ u32 burst;
+
+ if (dir == DMA_DEV_TO_MEM) {
+ dev_addr = c->cfg.src_addr;
+ dev_width = c->cfg.src_addr_width;
+ burst = c->cfg.src_maxburst;
+ } else if (dir == DMA_MEM_TO_DEV) {
+ dev_addr = c->cfg.dst_addr;
+ dev_width = c->cfg.dst_addr_width;
+ burst = c->cfg.dst_maxburst;
+ } else {
+ dev_err(chan->device->dev, "%s: bad direction?\n", __func__);
+ return NULL;
+ }
+
+ /* Bus width translates to the element size (ES) */
+ switch (dev_width) {
+ case DMA_SLAVE_BUSWIDTH_1_BYTE:
+ es = CSDP_DATA_TYPE_8;
+ break;
+ case DMA_SLAVE_BUSWIDTH_2_BYTES:
+ es = CSDP_DATA_TYPE_16;
+ break;
+ case DMA_SLAVE_BUSWIDTH_4_BYTES:
+ es = CSDP_DATA_TYPE_32;
+ break;
+ default: /* not reached */
+ return NULL;
+ }
+
+ /* Now allocate and setup the descriptor. */
+ d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC);
+ if (!d)
+ return NULL;
+
+ d->dir = dir;
+ d->dev_addr = dev_addr;
+ d->fi = burst;
+ d->es = es;
+ d->sg[0].addr = buf_addr;
+ d->sg[0].en = period_len / es_bytes[es];
+ d->sg[0].fn = buf_len / period_len;
+ d->sglen = 1;
+
+ d->ccr = c->ccr;
+ if (dir == DMA_DEV_TO_MEM)
+ d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_CONSTANT;
+ else
+ d->ccr |= CCR_DST_AMODE_CONSTANT | CCR_SRC_AMODE_POSTINC;
+
+ d->cicr = CICR_DROP_IE;
+ if (flags & DMA_PREP_INTERRUPT)
+ d->cicr |= CICR_FRAME_IE;
+
+ d->csdp = es;
+
+ if (dma_omap1()) {
+ d->cicr |= CICR_TOUT_IE;
+
+ if (dir == DMA_DEV_TO_MEM)
+ d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_MPUI;
+ else
+ d->csdp |= CSDP_DST_PORT_MPUI | CSDP_SRC_PORT_EMIFF;
+ } else {
+ if (burst)
+ d->ccr |= CCR_SYNC_PACKET;
+ else
+ d->ccr |= CCR_SYNC_ELEMENT;
+
+ if (dir == DMA_DEV_TO_MEM) {
+ d->ccr |= CCR_TRIGGER_SRC;
+ d->csdp |= CSDP_DST_PACKED;
+ } else {
+ d->csdp |= CSDP_SRC_PACKED;
+ }
+
+ d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE;
+
+ d->csdp |= CSDP_DST_BURST_64 | CSDP_SRC_BURST_64;
+ }
+
+ if (__dma_omap15xx(od->plat->dma_attr))
+ d->ccr |= CCR_AUTO_INIT | CCR_REPEAT;
+ else
+ d->clnk_ctrl = c->dma_ch | CLNK_CTRL_ENABLE_LNK;
+
+ c->cyclic = true;
+
+ return vchan_tx_prep(&c->vc, &d->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *omap_dma_prep_dma_memcpy(
+ struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long tx_flags)
+{
+ struct omap_chan *c = to_omap_dma_chan(chan);
+ struct omap_desc *d;
+ uint8_t data_type;
+
+ d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC);
+ if (!d)
+ return NULL;
+
+ data_type = __ffs((src | dest | len));
+ if (data_type > CSDP_DATA_TYPE_32)
+ data_type = CSDP_DATA_TYPE_32;
+
+ d->dir = DMA_MEM_TO_MEM;
+ d->dev_addr = src;
+ d->fi = 0;
+ d->es = data_type;
+ d->sg[0].en = len / BIT(data_type);
+ d->sg[0].fn = 1;
+ d->sg[0].addr = dest;
+ d->sglen = 1;
+ d->ccr = c->ccr;
+ d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_POSTINC;
+
+ if (tx_flags & DMA_PREP_INTERRUPT)
+ d->cicr |= CICR_FRAME_IE;
+ else
+ d->polled = true;
+
+ d->csdp = data_type;
+
+ if (dma_omap1()) {
+ d->cicr |= CICR_TOUT_IE;
+ d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_EMIFF;
+ } else {
+ d->csdp |= CSDP_DST_PACKED | CSDP_SRC_PACKED;
+ d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE;
+ d->csdp |= CSDP_DST_BURST_64 | CSDP_SRC_BURST_64;
+ }
+
+ return vchan_tx_prep(&c->vc, &d->vd, tx_flags);
+}
+
+static struct dma_async_tx_descriptor *omap_dma_prep_dma_interleaved(
+ struct dma_chan *chan, struct dma_interleaved_template *xt,
+ unsigned long flags)
+{
+ struct omap_chan *c = to_omap_dma_chan(chan);
+ struct omap_desc *d;
+ struct omap_sg *sg;
+ uint8_t data_type;
+ size_t src_icg, dst_icg;
+
+ /* Slave mode is not supported */
+ if (is_slave_direction(xt->dir))
+ return NULL;
+
+ if (xt->frame_size != 1 || xt->numf == 0)
+ return NULL;
+
+ d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC);
+ if (!d)
+ return NULL;
+
+ data_type = __ffs((xt->src_start | xt->dst_start | xt->sgl[0].size));
+ if (data_type > CSDP_DATA_TYPE_32)
+ data_type = CSDP_DATA_TYPE_32;
+
+ sg = &d->sg[0];
+ d->dir = DMA_MEM_TO_MEM;
+ d->dev_addr = xt->src_start;
+ d->es = data_type;
+ sg->en = xt->sgl[0].size / BIT(data_type);
+ sg->fn = xt->numf;
+ sg->addr = xt->dst_start;
+ d->sglen = 1;
+ d->ccr = c->ccr;
+
+ src_icg = dmaengine_get_src_icg(xt, &xt->sgl[0]);
+ dst_icg = dmaengine_get_dst_icg(xt, &xt->sgl[0]);
+ if (src_icg) {
+ d->ccr |= CCR_SRC_AMODE_DBLIDX;
+ d->ei = 1;
+ d->fi = src_icg + 1;
+ } else if (xt->src_inc) {
+ d->ccr |= CCR_SRC_AMODE_POSTINC;
+ d->fi = 0;
+ } else {
+ dev_err(chan->device->dev,
+ "%s: SRC constant addressing is not supported\n",
+ __func__);
+ kfree(d);
+ return NULL;
+ }
+
+ if (dst_icg) {
+ d->ccr |= CCR_DST_AMODE_DBLIDX;
+ sg->ei = 1;
+ sg->fi = dst_icg + 1;
+ } else if (xt->dst_inc) {
+ d->ccr |= CCR_DST_AMODE_POSTINC;
+ sg->fi = 0;
+ } else {
+ dev_err(chan->device->dev,
+ "%s: DST constant addressing is not supported\n",
+ __func__);
+ kfree(d);
+ return NULL;
+ }
+
+ d->cicr = CICR_DROP_IE | CICR_FRAME_IE;
+
+ d->csdp = data_type;
+
+ if (dma_omap1()) {
+ d->cicr |= CICR_TOUT_IE;
+ d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_EMIFF;
+ } else {
+ d->csdp |= CSDP_DST_PACKED | CSDP_SRC_PACKED;
+ d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE;
+ d->csdp |= CSDP_DST_BURST_64 | CSDP_SRC_BURST_64;
+ }
+
+ return vchan_tx_prep(&c->vc, &d->vd, flags);
+}
+
+static int omap_dma_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg)
+{
+ struct omap_chan *c = to_omap_dma_chan(chan);
+
+ if (cfg->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES ||
+ cfg->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+ return -EINVAL;
+
+ if (cfg->src_maxburst > chan->device->max_burst ||
+ cfg->dst_maxburst > chan->device->max_burst)
+ return -EINVAL;
+
+ memcpy(&c->cfg, cfg, sizeof(c->cfg));
+
+ return 0;
+}
+
+static int omap_dma_terminate_all(struct dma_chan *chan)
+{
+ struct omap_chan *c = to_omap_dma_chan(chan);
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&c->vc.lock, flags);
+
+ /*
+ * Stop DMA activity: we assume the callback will not be called
+ * after omap_dma_stop() returns (even if it does, it will see
+ * c->desc is NULL and exit.)
+ */
+ if (c->desc) {
+ vchan_terminate_vdesc(&c->desc->vd);
+ c->desc = NULL;
+ /* Avoid stopping the dma twice */
+ if (!c->paused)
+ omap_dma_stop(c);
+ }
+
+ c->cyclic = false;
+ c->paused = false;
+
+ vchan_get_all_descriptors(&c->vc, &head);
+ spin_unlock_irqrestore(&c->vc.lock, flags);
+ vchan_dma_desc_free_list(&c->vc, &head);
+
+ return 0;
+}
+
+static void omap_dma_synchronize(struct dma_chan *chan)
+{
+ struct omap_chan *c = to_omap_dma_chan(chan);
+
+ vchan_synchronize(&c->vc);
+}
+
+static int omap_dma_pause(struct dma_chan *chan)
+{
+ struct omap_chan *c = to_omap_dma_chan(chan);
+ struct omap_dmadev *od = to_omap_dma_dev(chan->device);
+ unsigned long flags;
+ int ret = -EINVAL;
+ bool can_pause = false;
+
+ spin_lock_irqsave(&od->irq_lock, flags);
+
+ if (!c->desc)
+ goto out;
+
+ if (c->cyclic)
+ can_pause = true;
+
+ /*
+ * We do not allow DMA_MEM_TO_DEV transfers to be paused.
+ * From the AM572x TRM, 16.1.4.18 Disabling a Channel During Transfer:
+ * "When a channel is disabled during a transfer, the channel undergoes
+ * an abort, unless it is hardware-source-synchronized …".
+ * A source-synchronised channel is one where the fetching of data is
+ * under control of the device. In other words, a device-to-memory
+ * transfer. So, a destination-synchronised channel (which would be a
+ * memory-to-device transfer) undergoes an abort if the CCR_ENABLE
+ * bit is cleared.
+ * From 16.1.4.20.4.6.2 Abort: "If an abort trigger occurs, the channel
+ * aborts immediately after completion of current read/write
+ * transactions and then the FIFO is cleaned up." The term "cleaned up"
+ * is not defined. TI recommends to check that RD_ACTIVE and WR_ACTIVE
+ * are both clear _before_ disabling the channel, otherwise data loss
+ * will occur.
+ * The problem is that if the channel is active, then device activity
+ * can result in DMA activity starting between reading those as both
+ * clear and the write to DMA_CCR to clear the enable bit hitting the
+ * hardware. If the DMA hardware can't drain the data in its FIFO to the
+ * destination, then data loss "might" occur (say if we write to an UART
+ * and the UART is not accepting any further data).
+ */
+ else if (c->desc->dir == DMA_DEV_TO_MEM)
+ can_pause = true;
+
+ if (can_pause && !c->paused) {
+ ret = omap_dma_stop(c);
+ if (!ret)
+ c->paused = true;
+ }
+out:
+ spin_unlock_irqrestore(&od->irq_lock, flags);
+
+ return ret;
+}
+
+static int omap_dma_resume(struct dma_chan *chan)
+{
+ struct omap_chan *c = to_omap_dma_chan(chan);
+ struct omap_dmadev *od = to_omap_dma_dev(chan->device);
+ unsigned long flags;
+ int ret = -EINVAL;
+
+ spin_lock_irqsave(&od->irq_lock, flags);
+
+ if (c->paused && c->desc) {
+ mb();
+
+ /* Restore channel link register */
+ omap_dma_chan_write(c, CLNK_CTRL, c->desc->clnk_ctrl);
+
+ omap_dma_start(c, c->desc);
+ c->paused = false;
+ ret = 0;
+ }
+ spin_unlock_irqrestore(&od->irq_lock, flags);
+
+ return ret;
+}
+
+static int omap_dma_chan_init(struct omap_dmadev *od)
+{
+ struct omap_chan *c;
+
+ c = kzalloc(sizeof(*c), GFP_KERNEL);
+ if (!c)
+ return -ENOMEM;
+
+ c->reg_map = od->reg_map;
+ c->vc.desc_free = omap_dma_desc_free;
+ vchan_init(&c->vc, &od->ddev);
+
+ return 0;
+}
+
+static void omap_dma_free(struct omap_dmadev *od)
+{
+ while (!list_empty(&od->ddev.channels)) {
+ struct omap_chan *c = list_first_entry(&od->ddev.channels,
+ struct omap_chan, vc.chan.device_node);
+
+ list_del(&c->vc.chan.device_node);
+ tasklet_kill(&c->vc.task);
+ kfree(c);
+ }
+}
+
+/* Currently used by omap2 & 3 to block deeper SoC idle states */
+static bool omap_dma_busy(struct omap_dmadev *od)
+{
+ struct omap_chan *c;
+ int lch = -1;
+
+ while (1) {
+ lch = find_next_bit(od->lch_bitmap, od->lch_count, lch + 1);
+ if (lch >= od->lch_count)
+ break;
+ c = od->lch_map[lch];
+ if (!c)
+ continue;
+ if (omap_dma_chan_read(c, CCR) & CCR_ENABLE)
+ return true;
+ }
+
+ return false;
+}
+
+/* Currently only used for omap2. For omap1, also a check for lcd_dma is needed */
+static int omap_dma_busy_notifier(struct notifier_block *nb,
+ unsigned long cmd, void *v)
+{
+ struct omap_dmadev *od;
+
+ od = container_of(nb, struct omap_dmadev, nb);
+
+ switch (cmd) {
+ case CPU_CLUSTER_PM_ENTER:
+ if (omap_dma_busy(od))
+ return NOTIFY_BAD;
+ break;
+ case CPU_CLUSTER_PM_ENTER_FAILED:
+ case CPU_CLUSTER_PM_EXIT:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+/*
+ * We are using IRQENABLE_L1, and legacy DMA code was using IRQENABLE_L0.
+ * As the DSP may be using IRQENABLE_L2 and L3, let's not touch those for
+ * now. Context save seems to be only currently needed on omap3.
+ */
+static void omap_dma_context_save(struct omap_dmadev *od)
+{
+ od->context.irqenable_l0 = omap_dma_glbl_read(od, IRQENABLE_L0);
+ od->context.irqenable_l1 = omap_dma_glbl_read(od, IRQENABLE_L1);
+ od->context.ocp_sysconfig = omap_dma_glbl_read(od, OCP_SYSCONFIG);
+ od->context.gcr = omap_dma_glbl_read(od, GCR);
+}
+
+static void omap_dma_context_restore(struct omap_dmadev *od)
+{
+ int i;
+
+ omap_dma_glbl_write(od, GCR, od->context.gcr);
+ omap_dma_glbl_write(od, OCP_SYSCONFIG, od->context.ocp_sysconfig);
+ omap_dma_glbl_write(od, IRQENABLE_L0, od->context.irqenable_l0);
+ omap_dma_glbl_write(od, IRQENABLE_L1, od->context.irqenable_l1);
+
+ /* Clear IRQSTATUS_L0 as legacy DMA code is no longer doing it */
+ if (od->plat->errata & DMA_ROMCODE_BUG)
+ omap_dma_glbl_write(od, IRQSTATUS_L0, 0);
+
+ /* Clear dma channels */
+ for (i = 0; i < od->lch_count; i++)
+ omap_dma_clear_lch(od, i);
+}
+
+/* Currently only used for omap3 */
+static int omap_dma_context_notifier(struct notifier_block *nb,
+ unsigned long cmd, void *v)
+{
+ struct omap_dmadev *od;
+
+ od = container_of(nb, struct omap_dmadev, nb);
+
+ switch (cmd) {
+ case CPU_CLUSTER_PM_ENTER:
+ if (omap_dma_busy(od))
+ return NOTIFY_BAD;
+ omap_dma_context_save(od);
+ break;
+ case CPU_CLUSTER_PM_ENTER_FAILED: /* No need to restore context */
+ break;
+ case CPU_CLUSTER_PM_EXIT:
+ omap_dma_context_restore(od);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static void omap_dma_init_gcr(struct omap_dmadev *od, int arb_rate,
+ int max_fifo_depth, int tparams)
+{
+ u32 val;
+
+ /* Set only for omap2430 and later */
+ if (!od->cfg->rw_priority)
+ return;
+
+ if (max_fifo_depth == 0)
+ max_fifo_depth = 1;
+ if (arb_rate == 0)
+ arb_rate = 1;
+
+ val = 0xff & max_fifo_depth;
+ val |= (0x3 & tparams) << 12;
+ val |= (arb_rate & 0xff) << 16;
+
+ omap_dma_glbl_write(od, GCR, val);
+}
+
+#define OMAP_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+/*
+ * No flags currently set for default configuration as omap1 is still
+ * using platform data.
+ */
+static const struct omap_dma_config default_cfg;
+
+static int omap_dma_probe(struct platform_device *pdev)
+{
+ const struct omap_dma_config *conf;
+ struct omap_dmadev *od;
+ struct resource *res;
+ int rc, i, irq;
+ u32 val;
+
+ od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL);
+ if (!od)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ od->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(od->base))
+ return PTR_ERR(od->base);
+
+ conf = of_device_get_match_data(&pdev->dev);
+ if (conf) {
+ od->cfg = conf;
+ od->plat = dev_get_platdata(&pdev->dev);
+ if (!od->plat) {
+ dev_err(&pdev->dev, "omap_system_dma_plat_info is missing");
+ return -ENODEV;
+ }
+ } else if (IS_ENABLED(CONFIG_ARCH_OMAP1)) {
+ od->cfg = &default_cfg;
+
+ od->plat = omap_get_plat_info();
+ if (!od->plat)
+ return -EPROBE_DEFER;
+ } else {
+ return -ENODEV;
+ }
+
+ od->reg_map = od->plat->reg_map;
+
+ dma_cap_set(DMA_SLAVE, od->ddev.cap_mask);
+ dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask);
+ dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask);
+ dma_cap_set(DMA_INTERLEAVE, od->ddev.cap_mask);
+ od->ddev.device_alloc_chan_resources = omap_dma_alloc_chan_resources;
+ od->ddev.device_free_chan_resources = omap_dma_free_chan_resources;
+ od->ddev.device_tx_status = omap_dma_tx_status;
+ od->ddev.device_issue_pending = omap_dma_issue_pending;
+ od->ddev.device_prep_slave_sg = omap_dma_prep_slave_sg;
+ od->ddev.device_prep_dma_cyclic = omap_dma_prep_dma_cyclic;
+ od->ddev.device_prep_dma_memcpy = omap_dma_prep_dma_memcpy;
+ od->ddev.device_prep_interleaved_dma = omap_dma_prep_dma_interleaved;
+ od->ddev.device_config = omap_dma_slave_config;
+ od->ddev.device_pause = omap_dma_pause;
+ od->ddev.device_resume = omap_dma_resume;
+ od->ddev.device_terminate_all = omap_dma_terminate_all;
+ od->ddev.device_synchronize = omap_dma_synchronize;
+ od->ddev.src_addr_widths = OMAP_DMA_BUSWIDTHS;
+ od->ddev.dst_addr_widths = OMAP_DMA_BUSWIDTHS;
+ od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+ if (__dma_omap15xx(od->plat->dma_attr))
+ od->ddev.residue_granularity =
+ DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+ else
+ od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ od->ddev.max_burst = SZ_16M - 1; /* CCEN: 24bit unsigned */
+ od->ddev.dev = &pdev->dev;
+ INIT_LIST_HEAD(&od->ddev.channels);
+ mutex_init(&od->lch_lock);
+ spin_lock_init(&od->lock);
+ spin_lock_init(&od->irq_lock);
+
+ /* Number of DMA requests */
+ od->dma_requests = OMAP_SDMA_REQUESTS;
+ if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node,
+ "dma-requests",
+ &od->dma_requests)) {
+ dev_info(&pdev->dev,
+ "Missing dma-requests property, using %u.\n",
+ OMAP_SDMA_REQUESTS);
+ }
+
+ /* Number of available logical channels */
+ if (!pdev->dev.of_node) {
+ od->lch_count = od->plat->dma_attr->lch_count;
+ if (unlikely(!od->lch_count))
+ od->lch_count = OMAP_SDMA_CHANNELS;
+ } else if (of_property_read_u32(pdev->dev.of_node, "dma-channels",
+ &od->lch_count)) {
+ dev_info(&pdev->dev,
+ "Missing dma-channels property, using %u.\n",
+ OMAP_SDMA_CHANNELS);
+ od->lch_count = OMAP_SDMA_CHANNELS;
+ }
+
+ /* Mask of allowed logical channels */
+ if (pdev->dev.of_node && !of_property_read_u32(pdev->dev.of_node,
+ "dma-channel-mask",
+ &val)) {
+ /* Tag channels not in mask as reserved */
+ val = ~val;
+ bitmap_from_arr32(od->lch_bitmap, &val, od->lch_count);
+ }
+ if (od->plat->dma_attr->dev_caps & HS_CHANNELS_RESERVED)
+ bitmap_set(od->lch_bitmap, 0, 2);
+
+ od->lch_map = devm_kcalloc(&pdev->dev, od->lch_count,
+ sizeof(*od->lch_map),
+ GFP_KERNEL);
+ if (!od->lch_map)
+ return -ENOMEM;
+
+ for (i = 0; i < od->dma_requests; i++) {
+ rc = omap_dma_chan_init(od);
+ if (rc) {
+ omap_dma_free(od);
+ return rc;
+ }
+ }
+
+ irq = platform_get_irq(pdev, 1);
+ if (irq <= 0) {
+ dev_info(&pdev->dev, "failed to get L1 IRQ: %d\n", irq);
+ od->legacy = true;
+ } else {
+ /* Disable all interrupts */
+ od->irq_enable_mask = 0;
+ omap_dma_glbl_write(od, IRQENABLE_L1, 0);
+
+ rc = devm_request_irq(&pdev->dev, irq, omap_dma_irq,
+ IRQF_SHARED, "omap-dma-engine", od);
+ if (rc) {
+ omap_dma_free(od);
+ return rc;
+ }
+ }
+
+ if (omap_dma_glbl_read(od, CAPS_0) & CAPS_0_SUPPORT_LL123)
+ od->ll123_supported = true;
+
+ od->ddev.filter.map = od->plat->slave_map;
+ od->ddev.filter.mapcnt = od->plat->slavecnt;
+ od->ddev.filter.fn = omap_dma_filter_fn;
+
+ if (od->ll123_supported) {
+ od->desc_pool = dma_pool_create(dev_name(&pdev->dev),
+ &pdev->dev,
+ sizeof(struct omap_type2_desc),
+ 4, 0);
+ if (!od->desc_pool) {
+ dev_err(&pdev->dev,
+ "unable to allocate descriptor pool\n");
+ od->ll123_supported = false;
+ }
+ }
+
+ rc = dma_async_device_register(&od->ddev);
+ if (rc) {
+ pr_warn("OMAP-DMA: failed to register slave DMA engine device: %d\n",
+ rc);
+ omap_dma_free(od);
+ return rc;
+ }
+
+ platform_set_drvdata(pdev, od);
+
+ if (pdev->dev.of_node) {
+ omap_dma_info.dma_cap = od->ddev.cap_mask;
+
+ /* Device-tree DMA controller registration */
+ rc = of_dma_controller_register(pdev->dev.of_node,
+ of_dma_simple_xlate, &omap_dma_info);
+ if (rc) {
+ pr_warn("OMAP-DMA: failed to register DMA controller\n");
+ dma_async_device_unregister(&od->ddev);
+ omap_dma_free(od);
+ }
+ }
+
+ omap_dma_init_gcr(od, DMA_DEFAULT_ARB_RATE, DMA_DEFAULT_FIFO_DEPTH, 0);
+
+ if (od->cfg->needs_busy_check) {
+ od->nb.notifier_call = omap_dma_busy_notifier;
+ cpu_pm_register_notifier(&od->nb);
+ } else if (od->cfg->may_lose_context) {
+ od->nb.notifier_call = omap_dma_context_notifier;
+ cpu_pm_register_notifier(&od->nb);
+ }
+
+ dev_info(&pdev->dev, "OMAP DMA engine driver%s\n",
+ od->ll123_supported ? " (LinkedList1/2/3 supported)" : "");
+
+ return rc;
+}
+
+static int omap_dma_remove(struct platform_device *pdev)
+{
+ struct omap_dmadev *od = platform_get_drvdata(pdev);
+ int irq;
+
+ if (od->cfg->may_lose_context)
+ cpu_pm_unregister_notifier(&od->nb);
+
+ if (pdev->dev.of_node)
+ of_dma_controller_free(pdev->dev.of_node);
+
+ irq = platform_get_irq(pdev, 1);
+ devm_free_irq(&pdev->dev, irq, od);
+
+ dma_async_device_unregister(&od->ddev);
+
+ if (!omap_dma_legacy(od)) {
+ /* Disable all interrupts */
+ omap_dma_glbl_write(od, IRQENABLE_L0, 0);
+ }
+
+ if (od->ll123_supported)
+ dma_pool_destroy(od->desc_pool);
+
+ omap_dma_free(od);
+
+ return 0;
+}
+
+static const struct omap_dma_config omap2420_data = {
+ .lch_end = CCFN,
+ .rw_priority = true,
+ .needs_lch_clear = true,
+ .needs_busy_check = true,
+};
+
+static const struct omap_dma_config omap2430_data = {
+ .lch_end = CCFN,
+ .rw_priority = true,
+ .needs_lch_clear = true,
+};
+
+static const struct omap_dma_config omap3430_data = {
+ .lch_end = CCFN,
+ .rw_priority = true,
+ .needs_lch_clear = true,
+ .may_lose_context = true,
+};
+
+static const struct omap_dma_config omap3630_data = {
+ .lch_end = CCDN,
+ .rw_priority = true,
+ .needs_lch_clear = true,
+ .may_lose_context = true,
+};
+
+static const struct omap_dma_config omap4_data = {
+ .lch_end = CCDN,
+ .rw_priority = true,
+ .needs_lch_clear = true,
+};
+
+static const struct of_device_id omap_dma_match[] = {
+ { .compatible = "ti,omap2420-sdma", .data = &omap2420_data, },
+ { .compatible = "ti,omap2430-sdma", .data = &omap2430_data, },
+ { .compatible = "ti,omap3430-sdma", .data = &omap3430_data, },
+ { .compatible = "ti,omap3630-sdma", .data = &omap3630_data, },
+ { .compatible = "ti,omap4430-sdma", .data = &omap4_data, },
+ {},
+};
+MODULE_DEVICE_TABLE(of, omap_dma_match);
+
+static struct platform_driver omap_dma_driver = {
+ .probe = omap_dma_probe,
+ .remove = omap_dma_remove,
+ .driver = {
+ .name = "omap-dma-engine",
+ .of_match_table = omap_dma_match,
+ },
+};
+
+static bool omap_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+ if (chan->device->dev->driver == &omap_dma_driver.driver) {
+ struct omap_dmadev *od = to_omap_dma_dev(chan->device);
+ struct omap_chan *c = to_omap_dma_chan(chan);
+ unsigned req = *(unsigned *)param;
+
+ if (req <= od->dma_requests) {
+ c->dma_sig = req;
+ return true;
+ }
+ }
+ return false;
+}
+
+static int omap_dma_init(void)
+{
+ return platform_driver_register(&omap_dma_driver);
+}
+subsys_initcall(omap_dma_init);
+
+static void __exit omap_dma_exit(void)
+{
+ platform_driver_unregister(&omap_dma_driver);
+}
+module_exit(omap_dma_exit);
+
+MODULE_AUTHOR("Russell King");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
new file mode 100644
index 000000000..3f524be69
--- /dev/null
+++ b/drivers/dma/timb_dma.c
@@ -0,0 +1,773 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * timb_dma.c timberdale FPGA DMA driver
+ * Copyright (c) 2010 Intel Corporation
+ */
+
+/* Supports:
+ * Timberdale FPGA DMA engine
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <linux/timb_dma.h>
+
+#include "dmaengine.h"
+
+#define DRIVER_NAME "timb-dma"
+
+/* Global DMA registers */
+#define TIMBDMA_ACR 0x34
+#define TIMBDMA_32BIT_ADDR 0x01
+
+#define TIMBDMA_ISR 0x080000
+#define TIMBDMA_IPR 0x080004
+#define TIMBDMA_IER 0x080008
+
+/* Channel specific registers */
+/* RX instances base addresses are 0x00, 0x40, 0x80 ...
+ * TX instances base addresses are 0x18, 0x58, 0x98 ...
+ */
+#define TIMBDMA_INSTANCE_OFFSET 0x40
+#define TIMBDMA_INSTANCE_TX_OFFSET 0x18
+
+/* RX registers, relative the instance base */
+#define TIMBDMA_OFFS_RX_DHAR 0x00
+#define TIMBDMA_OFFS_RX_DLAR 0x04
+#define TIMBDMA_OFFS_RX_LR 0x0C
+#define TIMBDMA_OFFS_RX_BLR 0x10
+#define TIMBDMA_OFFS_RX_ER 0x14
+#define TIMBDMA_RX_EN 0x01
+/* bytes per Row, video specific register
+ * which is placed after the TX registers...
+ */
+#define TIMBDMA_OFFS_RX_BPRR 0x30
+
+/* TX registers, relative the instance base */
+#define TIMBDMA_OFFS_TX_DHAR 0x00
+#define TIMBDMA_OFFS_TX_DLAR 0x04
+#define TIMBDMA_OFFS_TX_BLR 0x0C
+#define TIMBDMA_OFFS_TX_LR 0x14
+
+
+#define TIMB_DMA_DESC_SIZE 8
+
+struct timb_dma_desc {
+ struct list_head desc_node;
+ struct dma_async_tx_descriptor txd;
+ u8 *desc_list;
+ unsigned int desc_list_len;
+ bool interrupt;
+};
+
+struct timb_dma_chan {
+ struct dma_chan chan;
+ void __iomem *membase;
+ spinlock_t lock; /* Used to protect data structures,
+ especially the lists and descriptors,
+ from races between the tasklet and calls
+ from above */
+ bool ongoing;
+ struct list_head active_list;
+ struct list_head queue;
+ struct list_head free_list;
+ unsigned int bytes_per_line;
+ enum dma_transfer_direction direction;
+ unsigned int descs; /* Descriptors to allocate */
+ unsigned int desc_elems; /* number of elems per descriptor */
+};
+
+struct timb_dma {
+ struct dma_device dma;
+ void __iomem *membase;
+ struct tasklet_struct tasklet;
+ struct timb_dma_chan channels[];
+};
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+ return &chan->dev->device;
+}
+static struct device *chan2dmadev(struct dma_chan *chan)
+{
+ return chan2dev(chan)->parent->parent;
+}
+
+static struct timb_dma *tdchantotd(struct timb_dma_chan *td_chan)
+{
+ int id = td_chan->chan.chan_id;
+ return (struct timb_dma *)((u8 *)td_chan -
+ id * sizeof(struct timb_dma_chan) - sizeof(struct timb_dma));
+}
+
+/* Must be called with the spinlock held */
+static void __td_enable_chan_irq(struct timb_dma_chan *td_chan)
+{
+ int id = td_chan->chan.chan_id;
+ struct timb_dma *td = tdchantotd(td_chan);
+ u32 ier;
+
+ /* enable interrupt for this channel */
+ ier = ioread32(td->membase + TIMBDMA_IER);
+ ier |= 1 << id;
+ dev_dbg(chan2dev(&td_chan->chan), "Enabling irq: %d, IER: 0x%x\n", id,
+ ier);
+ iowrite32(ier, td->membase + TIMBDMA_IER);
+}
+
+/* Should be called with the spinlock held */
+static bool __td_dma_done_ack(struct timb_dma_chan *td_chan)
+{
+ int id = td_chan->chan.chan_id;
+ struct timb_dma *td = (struct timb_dma *)((u8 *)td_chan -
+ id * sizeof(struct timb_dma_chan) - sizeof(struct timb_dma));
+ u32 isr;
+ bool done = false;
+
+ dev_dbg(chan2dev(&td_chan->chan), "Checking irq: %d, td: %p\n", id, td);
+
+ isr = ioread32(td->membase + TIMBDMA_ISR) & (1 << id);
+ if (isr) {
+ iowrite32(isr, td->membase + TIMBDMA_ISR);
+ done = true;
+ }
+
+ return done;
+}
+
+static int td_fill_desc(struct timb_dma_chan *td_chan, u8 *dma_desc,
+ struct scatterlist *sg, bool last)
+{
+ if (sg_dma_len(sg) > USHRT_MAX) {
+ dev_err(chan2dev(&td_chan->chan), "Too big sg element\n");
+ return -EINVAL;
+ }
+
+ /* length must be word aligned */
+ if (sg_dma_len(sg) % sizeof(u32)) {
+ dev_err(chan2dev(&td_chan->chan), "Incorrect length: %d\n",
+ sg_dma_len(sg));
+ return -EINVAL;
+ }
+
+ dev_dbg(chan2dev(&td_chan->chan), "desc: %p, addr: 0x%llx\n",
+ dma_desc, (unsigned long long)sg_dma_address(sg));
+
+ dma_desc[7] = (sg_dma_address(sg) >> 24) & 0xff;
+ dma_desc[6] = (sg_dma_address(sg) >> 16) & 0xff;
+ dma_desc[5] = (sg_dma_address(sg) >> 8) & 0xff;
+ dma_desc[4] = (sg_dma_address(sg) >> 0) & 0xff;
+
+ dma_desc[3] = (sg_dma_len(sg) >> 8) & 0xff;
+ dma_desc[2] = (sg_dma_len(sg) >> 0) & 0xff;
+
+ dma_desc[1] = 0x00;
+ dma_desc[0] = 0x21 | (last ? 0x02 : 0); /* tran, valid */
+
+ return 0;
+}
+
+/* Must be called with the spinlock held */
+static void __td_start_dma(struct timb_dma_chan *td_chan)
+{
+ struct timb_dma_desc *td_desc;
+
+ if (td_chan->ongoing) {
+ dev_err(chan2dev(&td_chan->chan),
+ "Transfer already ongoing\n");
+ return;
+ }
+
+ td_desc = list_entry(td_chan->active_list.next, struct timb_dma_desc,
+ desc_node);
+
+ dev_dbg(chan2dev(&td_chan->chan),
+ "td_chan: %p, chan: %d, membase: %p\n",
+ td_chan, td_chan->chan.chan_id, td_chan->membase);
+
+ if (td_chan->direction == DMA_DEV_TO_MEM) {
+
+ /* descriptor address */
+ iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_DHAR);
+ iowrite32(td_desc->txd.phys, td_chan->membase +
+ TIMBDMA_OFFS_RX_DLAR);
+ /* Bytes per line */
+ iowrite32(td_chan->bytes_per_line, td_chan->membase +
+ TIMBDMA_OFFS_RX_BPRR);
+ /* enable RX */
+ iowrite32(TIMBDMA_RX_EN, td_chan->membase + TIMBDMA_OFFS_RX_ER);
+ } else {
+ /* address high */
+ iowrite32(0, td_chan->membase + TIMBDMA_OFFS_TX_DHAR);
+ iowrite32(td_desc->txd.phys, td_chan->membase +
+ TIMBDMA_OFFS_TX_DLAR);
+ }
+
+ td_chan->ongoing = true;
+
+ if (td_desc->interrupt)
+ __td_enable_chan_irq(td_chan);
+}
+
+static void __td_finish(struct timb_dma_chan *td_chan)
+{
+ struct dmaengine_desc_callback cb;
+ struct dma_async_tx_descriptor *txd;
+ struct timb_dma_desc *td_desc;
+
+ /* can happen if the descriptor is canceled */
+ if (list_empty(&td_chan->active_list))
+ return;
+
+ td_desc = list_entry(td_chan->active_list.next, struct timb_dma_desc,
+ desc_node);
+ txd = &td_desc->txd;
+
+ dev_dbg(chan2dev(&td_chan->chan), "descriptor %u complete\n",
+ txd->cookie);
+
+ /* make sure to stop the transfer */
+ if (td_chan->direction == DMA_DEV_TO_MEM)
+ iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_ER);
+/* Currently no support for stopping DMA transfers
+ else
+ iowrite32(0, td_chan->membase + TIMBDMA_OFFS_TX_DLAR);
+*/
+ dma_cookie_complete(txd);
+ td_chan->ongoing = false;
+
+ dmaengine_desc_get_callback(txd, &cb);
+
+ list_move(&td_desc->desc_node, &td_chan->free_list);
+
+ dma_descriptor_unmap(txd);
+ /*
+ * The API requires that no submissions are done from a
+ * callback, so we don't need to drop the lock here
+ */
+ dmaengine_desc_callback_invoke(&cb, NULL);
+}
+
+static u32 __td_ier_mask(struct timb_dma *td)
+{
+ int i;
+ u32 ret = 0;
+
+ for (i = 0; i < td->dma.chancnt; i++) {
+ struct timb_dma_chan *td_chan = td->channels + i;
+ if (td_chan->ongoing) {
+ struct timb_dma_desc *td_desc =
+ list_entry(td_chan->active_list.next,
+ struct timb_dma_desc, desc_node);
+ if (td_desc->interrupt)
+ ret |= 1 << i;
+ }
+ }
+
+ return ret;
+}
+
+static void __td_start_next(struct timb_dma_chan *td_chan)
+{
+ struct timb_dma_desc *td_desc;
+
+ BUG_ON(list_empty(&td_chan->queue));
+ BUG_ON(td_chan->ongoing);
+
+ td_desc = list_entry(td_chan->queue.next, struct timb_dma_desc,
+ desc_node);
+
+ dev_dbg(chan2dev(&td_chan->chan), "%s: started %u\n",
+ __func__, td_desc->txd.cookie);
+
+ list_move(&td_desc->desc_node, &td_chan->active_list);
+ __td_start_dma(td_chan);
+}
+
+static dma_cookie_t td_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+ struct timb_dma_desc *td_desc = container_of(txd, struct timb_dma_desc,
+ txd);
+ struct timb_dma_chan *td_chan = container_of(txd->chan,
+ struct timb_dma_chan, chan);
+ dma_cookie_t cookie;
+
+ spin_lock_bh(&td_chan->lock);
+ cookie = dma_cookie_assign(txd);
+
+ if (list_empty(&td_chan->active_list)) {
+ dev_dbg(chan2dev(txd->chan), "%s: started %u\n", __func__,
+ txd->cookie);
+ list_add_tail(&td_desc->desc_node, &td_chan->active_list);
+ __td_start_dma(td_chan);
+ } else {
+ dev_dbg(chan2dev(txd->chan), "tx_submit: queued %u\n",
+ txd->cookie);
+
+ list_add_tail(&td_desc->desc_node, &td_chan->queue);
+ }
+
+ spin_unlock_bh(&td_chan->lock);
+
+ return cookie;
+}
+
+static struct timb_dma_desc *td_alloc_init_desc(struct timb_dma_chan *td_chan)
+{
+ struct dma_chan *chan = &td_chan->chan;
+ struct timb_dma_desc *td_desc;
+ int err;
+
+ td_desc = kzalloc(sizeof(struct timb_dma_desc), GFP_KERNEL);
+ if (!td_desc)
+ goto out;
+
+ td_desc->desc_list_len = td_chan->desc_elems * TIMB_DMA_DESC_SIZE;
+
+ td_desc->desc_list = kzalloc(td_desc->desc_list_len, GFP_KERNEL);
+ if (!td_desc->desc_list)
+ goto err;
+
+ dma_async_tx_descriptor_init(&td_desc->txd, chan);
+ td_desc->txd.tx_submit = td_tx_submit;
+ td_desc->txd.flags = DMA_CTRL_ACK;
+
+ td_desc->txd.phys = dma_map_single(chan2dmadev(chan),
+ td_desc->desc_list, td_desc->desc_list_len, DMA_TO_DEVICE);
+
+ err = dma_mapping_error(chan2dmadev(chan), td_desc->txd.phys);
+ if (err) {
+ dev_err(chan2dev(chan), "DMA mapping error: %d\n", err);
+ goto err;
+ }
+
+ return td_desc;
+err:
+ kfree(td_desc->desc_list);
+ kfree(td_desc);
+out:
+ return NULL;
+
+}
+
+static void td_free_desc(struct timb_dma_desc *td_desc)
+{
+ dev_dbg(chan2dev(td_desc->txd.chan), "Freeing desc: %p\n", td_desc);
+ dma_unmap_single(chan2dmadev(td_desc->txd.chan), td_desc->txd.phys,
+ td_desc->desc_list_len, DMA_TO_DEVICE);
+
+ kfree(td_desc->desc_list);
+ kfree(td_desc);
+}
+
+static void td_desc_put(struct timb_dma_chan *td_chan,
+ struct timb_dma_desc *td_desc)
+{
+ dev_dbg(chan2dev(&td_chan->chan), "Putting desc: %p\n", td_desc);
+
+ spin_lock_bh(&td_chan->lock);
+ list_add(&td_desc->desc_node, &td_chan->free_list);
+ spin_unlock_bh(&td_chan->lock);
+}
+
+static struct timb_dma_desc *td_desc_get(struct timb_dma_chan *td_chan)
+{
+ struct timb_dma_desc *td_desc, *_td_desc;
+ struct timb_dma_desc *ret = NULL;
+
+ spin_lock_bh(&td_chan->lock);
+ list_for_each_entry_safe(td_desc, _td_desc, &td_chan->free_list,
+ desc_node) {
+ if (async_tx_test_ack(&td_desc->txd)) {
+ list_del(&td_desc->desc_node);
+ ret = td_desc;
+ break;
+ }
+ dev_dbg(chan2dev(&td_chan->chan), "desc %p not ACKed\n",
+ td_desc);
+ }
+ spin_unlock_bh(&td_chan->lock);
+
+ return ret;
+}
+
+static int td_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct timb_dma_chan *td_chan =
+ container_of(chan, struct timb_dma_chan, chan);
+ int i;
+
+ dev_dbg(chan2dev(chan), "%s: entry\n", __func__);
+
+ BUG_ON(!list_empty(&td_chan->free_list));
+ for (i = 0; i < td_chan->descs; i++) {
+ struct timb_dma_desc *td_desc = td_alloc_init_desc(td_chan);
+ if (!td_desc) {
+ if (i)
+ break;
+ else {
+ dev_err(chan2dev(chan),
+ "Couldn't allocate any descriptors\n");
+ return -ENOMEM;
+ }
+ }
+
+ td_desc_put(td_chan, td_desc);
+ }
+
+ spin_lock_bh(&td_chan->lock);
+ dma_cookie_init(chan);
+ spin_unlock_bh(&td_chan->lock);
+
+ return 0;
+}
+
+static void td_free_chan_resources(struct dma_chan *chan)
+{
+ struct timb_dma_chan *td_chan =
+ container_of(chan, struct timb_dma_chan, chan);
+ struct timb_dma_desc *td_desc, *_td_desc;
+ LIST_HEAD(list);
+
+ dev_dbg(chan2dev(chan), "%s: Entry\n", __func__);
+
+ /* check that all descriptors are free */
+ BUG_ON(!list_empty(&td_chan->active_list));
+ BUG_ON(!list_empty(&td_chan->queue));
+
+ spin_lock_bh(&td_chan->lock);
+ list_splice_init(&td_chan->free_list, &list);
+ spin_unlock_bh(&td_chan->lock);
+
+ list_for_each_entry_safe(td_desc, _td_desc, &list, desc_node) {
+ dev_dbg(chan2dev(chan), "%s: Freeing desc: %p\n", __func__,
+ td_desc);
+ td_free_desc(td_desc);
+ }
+}
+
+static enum dma_status td_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ enum dma_status ret;
+
+ dev_dbg(chan2dev(chan), "%s: Entry\n", __func__);
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+
+ dev_dbg(chan2dev(chan), "%s: exit, ret: %d\n", __func__, ret);
+
+ return ret;
+}
+
+static void td_issue_pending(struct dma_chan *chan)
+{
+ struct timb_dma_chan *td_chan =
+ container_of(chan, struct timb_dma_chan, chan);
+
+ dev_dbg(chan2dev(chan), "%s: Entry\n", __func__);
+ spin_lock_bh(&td_chan->lock);
+
+ if (!list_empty(&td_chan->active_list))
+ /* transfer ongoing */
+ if (__td_dma_done_ack(td_chan))
+ __td_finish(td_chan);
+
+ if (list_empty(&td_chan->active_list) && !list_empty(&td_chan->queue))
+ __td_start_next(td_chan);
+
+ spin_unlock_bh(&td_chan->lock);
+}
+
+static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan,
+ struct scatterlist *sgl, unsigned int sg_len,
+ enum dma_transfer_direction direction, unsigned long flags,
+ void *context)
+{
+ struct timb_dma_chan *td_chan =
+ container_of(chan, struct timb_dma_chan, chan);
+ struct timb_dma_desc *td_desc;
+ struct scatterlist *sg;
+ unsigned int i;
+ unsigned int desc_usage = 0;
+
+ if (!sgl || !sg_len) {
+ dev_err(chan2dev(chan), "%s: No SG list\n", __func__);
+ return NULL;
+ }
+
+ /* even channels are for RX, odd for TX */
+ if (td_chan->direction != direction) {
+ dev_err(chan2dev(chan),
+ "Requesting channel in wrong direction\n");
+ return NULL;
+ }
+
+ td_desc = td_desc_get(td_chan);
+ if (!td_desc) {
+ dev_err(chan2dev(chan), "Not enough descriptors available\n");
+ return NULL;
+ }
+
+ td_desc->interrupt = (flags & DMA_PREP_INTERRUPT) != 0;
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ int err;
+ if (desc_usage > td_desc->desc_list_len) {
+ dev_err(chan2dev(chan), "No descriptor space\n");
+ return NULL;
+ }
+
+ err = td_fill_desc(td_chan, td_desc->desc_list + desc_usage, sg,
+ i == (sg_len - 1));
+ if (err) {
+ dev_err(chan2dev(chan), "Failed to update desc: %d\n",
+ err);
+ td_desc_put(td_chan, td_desc);
+ return NULL;
+ }
+ desc_usage += TIMB_DMA_DESC_SIZE;
+ }
+
+ dma_sync_single_for_device(chan2dmadev(chan), td_desc->txd.phys,
+ td_desc->desc_list_len, DMA_TO_DEVICE);
+
+ return &td_desc->txd;
+}
+
+static int td_terminate_all(struct dma_chan *chan)
+{
+ struct timb_dma_chan *td_chan =
+ container_of(chan, struct timb_dma_chan, chan);
+ struct timb_dma_desc *td_desc, *_td_desc;
+
+ dev_dbg(chan2dev(chan), "%s: Entry\n", __func__);
+
+ /* first the easy part, put the queue into the free list */
+ spin_lock_bh(&td_chan->lock);
+ list_for_each_entry_safe(td_desc, _td_desc, &td_chan->queue,
+ desc_node)
+ list_move(&td_desc->desc_node, &td_chan->free_list);
+
+ /* now tear down the running */
+ __td_finish(td_chan);
+ spin_unlock_bh(&td_chan->lock);
+
+ return 0;
+}
+
+static void td_tasklet(struct tasklet_struct *t)
+{
+ struct timb_dma *td = from_tasklet(td, t, tasklet);
+ u32 isr;
+ u32 ipr;
+ u32 ier;
+ int i;
+
+ isr = ioread32(td->membase + TIMBDMA_ISR);
+ ipr = isr & __td_ier_mask(td);
+
+ /* ack the interrupts */
+ iowrite32(ipr, td->membase + TIMBDMA_ISR);
+
+ for (i = 0; i < td->dma.chancnt; i++)
+ if (ipr & (1 << i)) {
+ struct timb_dma_chan *td_chan = td->channels + i;
+ spin_lock(&td_chan->lock);
+ __td_finish(td_chan);
+ if (!list_empty(&td_chan->queue))
+ __td_start_next(td_chan);
+ spin_unlock(&td_chan->lock);
+ }
+
+ ier = __td_ier_mask(td);
+ iowrite32(ier, td->membase + TIMBDMA_IER);
+}
+
+
+static irqreturn_t td_irq(int irq, void *devid)
+{
+ struct timb_dma *td = devid;
+ u32 ipr = ioread32(td->membase + TIMBDMA_IPR);
+
+ if (ipr) {
+ /* disable interrupts, will be re-enabled in tasklet */
+ iowrite32(0, td->membase + TIMBDMA_IER);
+
+ tasklet_schedule(&td->tasklet);
+
+ return IRQ_HANDLED;
+ } else
+ return IRQ_NONE;
+}
+
+
+static int td_probe(struct platform_device *pdev)
+{
+ struct timb_dma_platform_data *pdata = dev_get_platdata(&pdev->dev);
+ struct timb_dma *td;
+ struct resource *iomem;
+ int irq;
+ int err;
+ int i;
+
+ if (!pdata) {
+ dev_err(&pdev->dev, "No platform data\n");
+ return -EINVAL;
+ }
+
+ iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!iomem)
+ return -EINVAL;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ if (!request_mem_region(iomem->start, resource_size(iomem),
+ DRIVER_NAME))
+ return -EBUSY;
+
+ td = kzalloc(struct_size(td, channels, pdata->nr_channels),
+ GFP_KERNEL);
+ if (!td) {
+ err = -ENOMEM;
+ goto err_release_region;
+ }
+
+ dev_dbg(&pdev->dev, "Allocated TD: %p\n", td);
+
+ td->membase = ioremap(iomem->start, resource_size(iomem));
+ if (!td->membase) {
+ dev_err(&pdev->dev, "Failed to remap I/O memory\n");
+ err = -ENOMEM;
+ goto err_free_mem;
+ }
+
+ /* 32bit addressing */
+ iowrite32(TIMBDMA_32BIT_ADDR, td->membase + TIMBDMA_ACR);
+
+ /* disable and clear any interrupts */
+ iowrite32(0x0, td->membase + TIMBDMA_IER);
+ iowrite32(0xFFFFFFFF, td->membase + TIMBDMA_ISR);
+
+ tasklet_setup(&td->tasklet, td_tasklet);
+
+ err = request_irq(irq, td_irq, IRQF_SHARED, DRIVER_NAME, td);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to request IRQ\n");
+ goto err_tasklet_kill;
+ }
+
+ td->dma.device_alloc_chan_resources = td_alloc_chan_resources;
+ td->dma.device_free_chan_resources = td_free_chan_resources;
+ td->dma.device_tx_status = td_tx_status;
+ td->dma.device_issue_pending = td_issue_pending;
+
+ dma_cap_set(DMA_SLAVE, td->dma.cap_mask);
+ dma_cap_set(DMA_PRIVATE, td->dma.cap_mask);
+ td->dma.device_prep_slave_sg = td_prep_slave_sg;
+ td->dma.device_terminate_all = td_terminate_all;
+
+ td->dma.dev = &pdev->dev;
+
+ INIT_LIST_HEAD(&td->dma.channels);
+
+ for (i = 0; i < pdata->nr_channels; i++) {
+ struct timb_dma_chan *td_chan = &td->channels[i];
+ struct timb_dma_platform_data_channel *pchan =
+ pdata->channels + i;
+
+ /* even channels are RX, odd are TX */
+ if ((i % 2) == pchan->rx) {
+ dev_err(&pdev->dev, "Wrong channel configuration\n");
+ err = -EINVAL;
+ goto err_free_irq;
+ }
+
+ td_chan->chan.device = &td->dma;
+ dma_cookie_init(&td_chan->chan);
+ spin_lock_init(&td_chan->lock);
+ INIT_LIST_HEAD(&td_chan->active_list);
+ INIT_LIST_HEAD(&td_chan->queue);
+ INIT_LIST_HEAD(&td_chan->free_list);
+
+ td_chan->descs = pchan->descriptors;
+ td_chan->desc_elems = pchan->descriptor_elements;
+ td_chan->bytes_per_line = pchan->bytes_per_line;
+ td_chan->direction = pchan->rx ? DMA_DEV_TO_MEM :
+ DMA_MEM_TO_DEV;
+
+ td_chan->membase = td->membase +
+ (i / 2) * TIMBDMA_INSTANCE_OFFSET +
+ (pchan->rx ? 0 : TIMBDMA_INSTANCE_TX_OFFSET);
+
+ dev_dbg(&pdev->dev, "Chan: %d, membase: %p\n",
+ i, td_chan->membase);
+
+ list_add_tail(&td_chan->chan.device_node, &td->dma.channels);
+ }
+
+ err = dma_async_device_register(&td->dma);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to register async device\n");
+ goto err_free_irq;
+ }
+
+ platform_set_drvdata(pdev, td);
+
+ dev_dbg(&pdev->dev, "Probe result: %d\n", err);
+ return err;
+
+err_free_irq:
+ free_irq(irq, td);
+err_tasklet_kill:
+ tasklet_kill(&td->tasklet);
+ iounmap(td->membase);
+err_free_mem:
+ kfree(td);
+err_release_region:
+ release_mem_region(iomem->start, resource_size(iomem));
+
+ return err;
+
+}
+
+static int td_remove(struct platform_device *pdev)
+{
+ struct timb_dma *td = platform_get_drvdata(pdev);
+ struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ int irq = platform_get_irq(pdev, 0);
+
+ dma_async_device_unregister(&td->dma);
+ free_irq(irq, td);
+ tasklet_kill(&td->tasklet);
+ iounmap(td->membase);
+ kfree(td);
+ release_mem_region(iomem->start, resource_size(iomem));
+
+ dev_dbg(&pdev->dev, "Removed...\n");
+ return 0;
+}
+
+static struct platform_driver td_driver = {
+ .driver = {
+ .name = DRIVER_NAME,
+ },
+ .probe = td_probe,
+ .remove = td_remove,
+};
+
+module_platform_driver(td_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Timberdale DMA controller driver");
+MODULE_AUTHOR("Pelagicore AB <info@pelagicore.com>");
+MODULE_ALIAS("platform:"DRIVER_NAME);
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
new file mode 100644
index 000000000..5b6b375a2
--- /dev/null
+++ b/drivers/dma/txx9dmac.c
@@ -0,0 +1,1306 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for the TXx9 SoC DMA Controller
+ *
+ * Copyright (C) 2009 Atsushi Nemoto
+ */
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/scatterlist.h>
+
+#include "dmaengine.h"
+#include "txx9dmac.h"
+
+static struct txx9dmac_chan *to_txx9dmac_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct txx9dmac_chan, chan);
+}
+
+static struct txx9dmac_cregs __iomem *__dma_regs(const struct txx9dmac_chan *dc)
+{
+ return dc->ch_regs;
+}
+
+static struct txx9dmac_cregs32 __iomem *__dma_regs32(
+ const struct txx9dmac_chan *dc)
+{
+ return dc->ch_regs;
+}
+
+#define channel64_readq(dc, name) \
+ __raw_readq(&(__dma_regs(dc)->name))
+#define channel64_writeq(dc, name, val) \
+ __raw_writeq((val), &(__dma_regs(dc)->name))
+#define channel64_readl(dc, name) \
+ __raw_readl(&(__dma_regs(dc)->name))
+#define channel64_writel(dc, name, val) \
+ __raw_writel((val), &(__dma_regs(dc)->name))
+
+#define channel32_readl(dc, name) \
+ __raw_readl(&(__dma_regs32(dc)->name))
+#define channel32_writel(dc, name, val) \
+ __raw_writel((val), &(__dma_regs32(dc)->name))
+
+#define channel_readq(dc, name) channel64_readq(dc, name)
+#define channel_writeq(dc, name, val) channel64_writeq(dc, name, val)
+#define channel_readl(dc, name) \
+ (is_dmac64(dc) ? \
+ channel64_readl(dc, name) : channel32_readl(dc, name))
+#define channel_writel(dc, name, val) \
+ (is_dmac64(dc) ? \
+ channel64_writel(dc, name, val) : channel32_writel(dc, name, val))
+
+static dma_addr_t channel64_read_CHAR(const struct txx9dmac_chan *dc)
+{
+ if (sizeof(__dma_regs(dc)->CHAR) == sizeof(u64))
+ return channel64_readq(dc, CHAR);
+ else
+ return channel64_readl(dc, CHAR);
+}
+
+static void channel64_write_CHAR(const struct txx9dmac_chan *dc, dma_addr_t val)
+{
+ if (sizeof(__dma_regs(dc)->CHAR) == sizeof(u64))
+ channel64_writeq(dc, CHAR, val);
+ else
+ channel64_writel(dc, CHAR, val);
+}
+
+static void channel64_clear_CHAR(const struct txx9dmac_chan *dc)
+{
+#if defined(CONFIG_32BIT) && !defined(CONFIG_PHYS_ADDR_T_64BIT)
+ channel64_writel(dc, CHAR, 0);
+ channel64_writel(dc, __pad_CHAR, 0);
+#else
+ channel64_writeq(dc, CHAR, 0);
+#endif
+}
+
+static dma_addr_t channel_read_CHAR(const struct txx9dmac_chan *dc)
+{
+ if (is_dmac64(dc))
+ return channel64_read_CHAR(dc);
+ else
+ return channel32_readl(dc, CHAR);
+}
+
+static void channel_write_CHAR(const struct txx9dmac_chan *dc, dma_addr_t val)
+{
+ if (is_dmac64(dc))
+ channel64_write_CHAR(dc, val);
+ else
+ channel32_writel(dc, CHAR, val);
+}
+
+static struct txx9dmac_regs __iomem *__txx9dmac_regs(
+ const struct txx9dmac_dev *ddev)
+{
+ return ddev->regs;
+}
+
+static struct txx9dmac_regs32 __iomem *__txx9dmac_regs32(
+ const struct txx9dmac_dev *ddev)
+{
+ return ddev->regs;
+}
+
+#define dma64_readl(ddev, name) \
+ __raw_readl(&(__txx9dmac_regs(ddev)->name))
+#define dma64_writel(ddev, name, val) \
+ __raw_writel((val), &(__txx9dmac_regs(ddev)->name))
+
+#define dma32_readl(ddev, name) \
+ __raw_readl(&(__txx9dmac_regs32(ddev)->name))
+#define dma32_writel(ddev, name, val) \
+ __raw_writel((val), &(__txx9dmac_regs32(ddev)->name))
+
+#define dma_readl(ddev, name) \
+ (__is_dmac64(ddev) ? \
+ dma64_readl(ddev, name) : dma32_readl(ddev, name))
+#define dma_writel(ddev, name, val) \
+ (__is_dmac64(ddev) ? \
+ dma64_writel(ddev, name, val) : dma32_writel(ddev, name, val))
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+ return &chan->dev->device;
+}
+static struct device *chan2parent(struct dma_chan *chan)
+{
+ return chan->dev->device.parent;
+}
+
+static struct txx9dmac_desc *
+txd_to_txx9dmac_desc(struct dma_async_tx_descriptor *txd)
+{
+ return container_of(txd, struct txx9dmac_desc, txd);
+}
+
+static dma_addr_t desc_read_CHAR(const struct txx9dmac_chan *dc,
+ const struct txx9dmac_desc *desc)
+{
+ return is_dmac64(dc) ? desc->hwdesc.CHAR : desc->hwdesc32.CHAR;
+}
+
+static void desc_write_CHAR(const struct txx9dmac_chan *dc,
+ struct txx9dmac_desc *desc, dma_addr_t val)
+{
+ if (is_dmac64(dc))
+ desc->hwdesc.CHAR = val;
+ else
+ desc->hwdesc32.CHAR = val;
+}
+
+#define TXX9_DMA_MAX_COUNT 0x04000000
+
+#define TXX9_DMA_INITIAL_DESC_COUNT 64
+
+static struct txx9dmac_desc *txx9dmac_first_active(struct txx9dmac_chan *dc)
+{
+ return list_entry(dc->active_list.next,
+ struct txx9dmac_desc, desc_node);
+}
+
+static struct txx9dmac_desc *txx9dmac_last_active(struct txx9dmac_chan *dc)
+{
+ return list_entry(dc->active_list.prev,
+ struct txx9dmac_desc, desc_node);
+}
+
+static struct txx9dmac_desc *txx9dmac_first_queued(struct txx9dmac_chan *dc)
+{
+ return list_entry(dc->queue.next, struct txx9dmac_desc, desc_node);
+}
+
+static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc)
+{
+ if (!list_empty(&desc->tx_list))
+ desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node);
+ return desc;
+}
+
+static dma_cookie_t txx9dmac_tx_submit(struct dma_async_tx_descriptor *tx);
+
+static struct txx9dmac_desc *txx9dmac_desc_alloc(struct txx9dmac_chan *dc,
+ gfp_t flags)
+{
+ struct txx9dmac_dev *ddev = dc->ddev;
+ struct txx9dmac_desc *desc;
+
+ desc = kzalloc(sizeof(*desc), flags);
+ if (!desc)
+ return NULL;
+ INIT_LIST_HEAD(&desc->tx_list);
+ dma_async_tx_descriptor_init(&desc->txd, &dc->chan);
+ desc->txd.tx_submit = txx9dmac_tx_submit;
+ /* txd.flags will be overwritten in prep funcs */
+ desc->txd.flags = DMA_CTRL_ACK;
+ desc->txd.phys = dma_map_single(chan2parent(&dc->chan), &desc->hwdesc,
+ ddev->descsize, DMA_TO_DEVICE);
+ return desc;
+}
+
+static struct txx9dmac_desc *txx9dmac_desc_get(struct txx9dmac_chan *dc)
+{
+ struct txx9dmac_desc *desc, *_desc;
+ struct txx9dmac_desc *ret = NULL;
+ unsigned int i = 0;
+
+ spin_lock_bh(&dc->lock);
+ list_for_each_entry_safe(desc, _desc, &dc->free_list, desc_node) {
+ if (async_tx_test_ack(&desc->txd)) {
+ list_del(&desc->desc_node);
+ ret = desc;
+ break;
+ }
+ dev_dbg(chan2dev(&dc->chan), "desc %p not ACKed\n", desc);
+ i++;
+ }
+ spin_unlock_bh(&dc->lock);
+
+ dev_vdbg(chan2dev(&dc->chan), "scanned %u descriptors on freelist\n",
+ i);
+ if (!ret) {
+ ret = txx9dmac_desc_alloc(dc, GFP_ATOMIC);
+ if (ret) {
+ spin_lock_bh(&dc->lock);
+ dc->descs_allocated++;
+ spin_unlock_bh(&dc->lock);
+ } else
+ dev_err(chan2dev(&dc->chan),
+ "not enough descriptors available\n");
+ }
+ return ret;
+}
+
+static void txx9dmac_sync_desc_for_cpu(struct txx9dmac_chan *dc,
+ struct txx9dmac_desc *desc)
+{
+ struct txx9dmac_dev *ddev = dc->ddev;
+ struct txx9dmac_desc *child;
+
+ list_for_each_entry(child, &desc->tx_list, desc_node)
+ dma_sync_single_for_cpu(chan2parent(&dc->chan),
+ child->txd.phys, ddev->descsize,
+ DMA_TO_DEVICE);
+ dma_sync_single_for_cpu(chan2parent(&dc->chan),
+ desc->txd.phys, ddev->descsize,
+ DMA_TO_DEVICE);
+}
+
+/*
+ * Move a descriptor, including any children, to the free list.
+ * `desc' must not be on any lists.
+ */
+static void txx9dmac_desc_put(struct txx9dmac_chan *dc,
+ struct txx9dmac_desc *desc)
+{
+ if (desc) {
+ struct txx9dmac_desc *child;
+
+ txx9dmac_sync_desc_for_cpu(dc, desc);
+
+ spin_lock_bh(&dc->lock);
+ list_for_each_entry(child, &desc->tx_list, desc_node)
+ dev_vdbg(chan2dev(&dc->chan),
+ "moving child desc %p to freelist\n",
+ child);
+ list_splice_init(&desc->tx_list, &dc->free_list);
+ dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n",
+ desc);
+ list_add(&desc->desc_node, &dc->free_list);
+ spin_unlock_bh(&dc->lock);
+ }
+}
+
+/*----------------------------------------------------------------------*/
+
+static void txx9dmac_dump_regs(struct txx9dmac_chan *dc)
+{
+ if (is_dmac64(dc))
+ dev_err(chan2dev(&dc->chan),
+ " CHAR: %#llx SAR: %#llx DAR: %#llx CNTR: %#x"
+ " SAIR: %#x DAIR: %#x CCR: %#x CSR: %#x\n",
+ (u64)channel64_read_CHAR(dc),
+ channel64_readq(dc, SAR),
+ channel64_readq(dc, DAR),
+ channel64_readl(dc, CNTR),
+ channel64_readl(dc, SAIR),
+ channel64_readl(dc, DAIR),
+ channel64_readl(dc, CCR),
+ channel64_readl(dc, CSR));
+ else
+ dev_err(chan2dev(&dc->chan),
+ " CHAR: %#x SAR: %#x DAR: %#x CNTR: %#x"
+ " SAIR: %#x DAIR: %#x CCR: %#x CSR: %#x\n",
+ channel32_readl(dc, CHAR),
+ channel32_readl(dc, SAR),
+ channel32_readl(dc, DAR),
+ channel32_readl(dc, CNTR),
+ channel32_readl(dc, SAIR),
+ channel32_readl(dc, DAIR),
+ channel32_readl(dc, CCR),
+ channel32_readl(dc, CSR));
+}
+
+static void txx9dmac_reset_chan(struct txx9dmac_chan *dc)
+{
+ channel_writel(dc, CCR, TXX9_DMA_CCR_CHRST);
+ if (is_dmac64(dc)) {
+ channel64_clear_CHAR(dc);
+ channel_writeq(dc, SAR, 0);
+ channel_writeq(dc, DAR, 0);
+ } else {
+ channel_writel(dc, CHAR, 0);
+ channel_writel(dc, SAR, 0);
+ channel_writel(dc, DAR, 0);
+ }
+ channel_writel(dc, CNTR, 0);
+ channel_writel(dc, SAIR, 0);
+ channel_writel(dc, DAIR, 0);
+ channel_writel(dc, CCR, 0);
+}
+
+/* Called with dc->lock held and bh disabled */
+static void txx9dmac_dostart(struct txx9dmac_chan *dc,
+ struct txx9dmac_desc *first)
+{
+ struct txx9dmac_slave *ds = dc->chan.private;
+ u32 sai, dai;
+
+ dev_vdbg(chan2dev(&dc->chan), "dostart %u %p\n",
+ first->txd.cookie, first);
+ /* ASSERT: channel is idle */
+ if (channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT) {
+ dev_err(chan2dev(&dc->chan),
+ "BUG: Attempted to start non-idle channel\n");
+ txx9dmac_dump_regs(dc);
+ /* The tasklet will hopefully advance the queue... */
+ return;
+ }
+
+ if (is_dmac64(dc)) {
+ channel64_writel(dc, CNTR, 0);
+ channel64_writel(dc, CSR, 0xffffffff);
+ if (ds) {
+ if (ds->tx_reg) {
+ sai = ds->reg_width;
+ dai = 0;
+ } else {
+ sai = 0;
+ dai = ds->reg_width;
+ }
+ } else {
+ sai = 8;
+ dai = 8;
+ }
+ channel64_writel(dc, SAIR, sai);
+ channel64_writel(dc, DAIR, dai);
+ /* All 64-bit DMAC supports SMPCHN */
+ channel64_writel(dc, CCR, dc->ccr);
+ /* Writing a non zero value to CHAR will assert XFACT */
+ channel64_write_CHAR(dc, first->txd.phys);
+ } else {
+ channel32_writel(dc, CNTR, 0);
+ channel32_writel(dc, CSR, 0xffffffff);
+ if (ds) {
+ if (ds->tx_reg) {
+ sai = ds->reg_width;
+ dai = 0;
+ } else {
+ sai = 0;
+ dai = ds->reg_width;
+ }
+ } else {
+ sai = 4;
+ dai = 4;
+ }
+ channel32_writel(dc, SAIR, sai);
+ channel32_writel(dc, DAIR, dai);
+ if (txx9_dma_have_SMPCHN()) {
+ channel32_writel(dc, CCR, dc->ccr);
+ /* Writing a non zero value to CHAR will assert XFACT */
+ channel32_writel(dc, CHAR, first->txd.phys);
+ } else {
+ channel32_writel(dc, CHAR, first->txd.phys);
+ channel32_writel(dc, CCR, dc->ccr);
+ }
+ }
+}
+
+/*----------------------------------------------------------------------*/
+
+static void
+txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
+ struct txx9dmac_desc *desc)
+{
+ struct dmaengine_desc_callback cb;
+ struct dma_async_tx_descriptor *txd = &desc->txd;
+
+ dev_vdbg(chan2dev(&dc->chan), "descriptor %u %p complete\n",
+ txd->cookie, desc);
+
+ dma_cookie_complete(txd);
+ dmaengine_desc_get_callback(txd, &cb);
+
+ txx9dmac_sync_desc_for_cpu(dc, desc);
+ list_splice_init(&desc->tx_list, &dc->free_list);
+ list_move(&desc->desc_node, &dc->free_list);
+
+ dma_descriptor_unmap(txd);
+ /*
+ * The API requires that no submissions are done from a
+ * callback, so we don't need to drop the lock here
+ */
+ dmaengine_desc_callback_invoke(&cb, NULL);
+ dma_run_dependencies(txd);
+}
+
+static void txx9dmac_dequeue(struct txx9dmac_chan *dc, struct list_head *list)
+{
+ struct txx9dmac_dev *ddev = dc->ddev;
+ struct txx9dmac_desc *desc;
+ struct txx9dmac_desc *prev = NULL;
+
+ BUG_ON(!list_empty(list));
+ do {
+ desc = txx9dmac_first_queued(dc);
+ if (prev) {
+ desc_write_CHAR(dc, prev, desc->txd.phys);
+ dma_sync_single_for_device(chan2parent(&dc->chan),
+ prev->txd.phys, ddev->descsize,
+ DMA_TO_DEVICE);
+ }
+ prev = txx9dmac_last_child(desc);
+ list_move_tail(&desc->desc_node, list);
+ /* Make chain-completion interrupt happen */
+ if ((desc->txd.flags & DMA_PREP_INTERRUPT) &&
+ !txx9dmac_chan_INTENT(dc))
+ break;
+ } while (!list_empty(&dc->queue));
+}
+
+static void txx9dmac_complete_all(struct txx9dmac_chan *dc)
+{
+ struct txx9dmac_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ /*
+ * Submit queued descriptors ASAP, i.e. before we go through
+ * the completed ones.
+ */
+ list_splice_init(&dc->active_list, &list);
+ if (!list_empty(&dc->queue)) {
+ txx9dmac_dequeue(dc, &dc->active_list);
+ txx9dmac_dostart(dc, txx9dmac_first_active(dc));
+ }
+
+ list_for_each_entry_safe(desc, _desc, &list, desc_node)
+ txx9dmac_descriptor_complete(dc, desc);
+}
+
+static void txx9dmac_dump_desc(struct txx9dmac_chan *dc,
+ struct txx9dmac_hwdesc *desc)
+{
+ if (is_dmac64(dc)) {
+#ifdef TXX9_DMA_USE_SIMPLE_CHAIN
+ dev_crit(chan2dev(&dc->chan),
+ " desc: ch%#llx s%#llx d%#llx c%#x\n",
+ (u64)desc->CHAR, desc->SAR, desc->DAR, desc->CNTR);
+#else
+ dev_crit(chan2dev(&dc->chan),
+ " desc: ch%#llx s%#llx d%#llx c%#x"
+ " si%#x di%#x cc%#x cs%#x\n",
+ (u64)desc->CHAR, desc->SAR, desc->DAR, desc->CNTR,
+ desc->SAIR, desc->DAIR, desc->CCR, desc->CSR);
+#endif
+ } else {
+ struct txx9dmac_hwdesc32 *d = (struct txx9dmac_hwdesc32 *)desc;
+#ifdef TXX9_DMA_USE_SIMPLE_CHAIN
+ dev_crit(chan2dev(&dc->chan),
+ " desc: ch%#x s%#x d%#x c%#x\n",
+ d->CHAR, d->SAR, d->DAR, d->CNTR);
+#else
+ dev_crit(chan2dev(&dc->chan),
+ " desc: ch%#x s%#x d%#x c%#x"
+ " si%#x di%#x cc%#x cs%#x\n",
+ d->CHAR, d->SAR, d->DAR, d->CNTR,
+ d->SAIR, d->DAIR, d->CCR, d->CSR);
+#endif
+ }
+}
+
+static void txx9dmac_handle_error(struct txx9dmac_chan *dc, u32 csr)
+{
+ struct txx9dmac_desc *bad_desc;
+ struct txx9dmac_desc *child;
+ u32 errors;
+
+ /*
+ * The descriptor currently at the head of the active list is
+ * borked. Since we don't have any way to report errors, we'll
+ * just have to scream loudly and try to carry on.
+ */
+ dev_crit(chan2dev(&dc->chan), "Abnormal Chain Completion\n");
+ txx9dmac_dump_regs(dc);
+
+ bad_desc = txx9dmac_first_active(dc);
+ list_del_init(&bad_desc->desc_node);
+
+ /* Clear all error flags and try to restart the controller */
+ errors = csr & (TXX9_DMA_CSR_ABCHC |
+ TXX9_DMA_CSR_CFERR | TXX9_DMA_CSR_CHERR |
+ TXX9_DMA_CSR_DESERR | TXX9_DMA_CSR_SORERR);
+ channel_writel(dc, CSR, errors);
+
+ if (list_empty(&dc->active_list) && !list_empty(&dc->queue))
+ txx9dmac_dequeue(dc, &dc->active_list);
+ if (!list_empty(&dc->active_list))
+ txx9dmac_dostart(dc, txx9dmac_first_active(dc));
+
+ dev_crit(chan2dev(&dc->chan),
+ "Bad descriptor submitted for DMA! (cookie: %d)\n",
+ bad_desc->txd.cookie);
+ txx9dmac_dump_desc(dc, &bad_desc->hwdesc);
+ list_for_each_entry(child, &bad_desc->tx_list, desc_node)
+ txx9dmac_dump_desc(dc, &child->hwdesc);
+ /* Pretend the descriptor completed successfully */
+ txx9dmac_descriptor_complete(dc, bad_desc);
+}
+
+static void txx9dmac_scan_descriptors(struct txx9dmac_chan *dc)
+{
+ dma_addr_t chain;
+ struct txx9dmac_desc *desc, *_desc;
+ struct txx9dmac_desc *child;
+ u32 csr;
+
+ if (is_dmac64(dc)) {
+ chain = channel64_read_CHAR(dc);
+ csr = channel64_readl(dc, CSR);
+ channel64_writel(dc, CSR, csr);
+ } else {
+ chain = channel32_readl(dc, CHAR);
+ csr = channel32_readl(dc, CSR);
+ channel32_writel(dc, CSR, csr);
+ }
+ /* For dynamic chain, we should look at XFACT instead of NCHNC */
+ if (!(csr & (TXX9_DMA_CSR_XFACT | TXX9_DMA_CSR_ABCHC))) {
+ /* Everything we've submitted is done */
+ txx9dmac_complete_all(dc);
+ return;
+ }
+ if (!(csr & TXX9_DMA_CSR_CHNEN))
+ chain = 0; /* last descriptor of this chain */
+
+ dev_vdbg(chan2dev(&dc->chan), "scan_descriptors: char=%#llx\n",
+ (u64)chain);
+
+ list_for_each_entry_safe(desc, _desc, &dc->active_list, desc_node) {
+ if (desc_read_CHAR(dc, desc) == chain) {
+ /* This one is currently in progress */
+ if (csr & TXX9_DMA_CSR_ABCHC)
+ goto scan_done;
+ return;
+ }
+
+ list_for_each_entry(child, &desc->tx_list, desc_node)
+ if (desc_read_CHAR(dc, child) == chain) {
+ /* Currently in progress */
+ if (csr & TXX9_DMA_CSR_ABCHC)
+ goto scan_done;
+ return;
+ }
+
+ /*
+ * No descriptors so far seem to be in progress, i.e.
+ * this one must be done.
+ */
+ txx9dmac_descriptor_complete(dc, desc);
+ }
+scan_done:
+ if (csr & TXX9_DMA_CSR_ABCHC) {
+ txx9dmac_handle_error(dc, csr);
+ return;
+ }
+
+ dev_err(chan2dev(&dc->chan),
+ "BUG: All descriptors done, but channel not idle!\n");
+
+ /* Try to continue after resetting the channel... */
+ txx9dmac_reset_chan(dc);
+
+ if (!list_empty(&dc->queue)) {
+ txx9dmac_dequeue(dc, &dc->active_list);
+ txx9dmac_dostart(dc, txx9dmac_first_active(dc));
+ }
+}
+
+static void txx9dmac_chan_tasklet(struct tasklet_struct *t)
+{
+ int irq;
+ u32 csr;
+ struct txx9dmac_chan *dc;
+
+ dc = from_tasklet(dc, t, tasklet);
+ csr = channel_readl(dc, CSR);
+ dev_vdbg(chan2dev(&dc->chan), "tasklet: status=%x\n", csr);
+
+ spin_lock(&dc->lock);
+ if (csr & (TXX9_DMA_CSR_ABCHC | TXX9_DMA_CSR_NCHNC |
+ TXX9_DMA_CSR_NTRNFC))
+ txx9dmac_scan_descriptors(dc);
+ spin_unlock(&dc->lock);
+ irq = dc->irq;
+
+ enable_irq(irq);
+}
+
+static irqreturn_t txx9dmac_chan_interrupt(int irq, void *dev_id)
+{
+ struct txx9dmac_chan *dc = dev_id;
+
+ dev_vdbg(chan2dev(&dc->chan), "interrupt: status=%#x\n",
+ channel_readl(dc, CSR));
+
+ tasklet_schedule(&dc->tasklet);
+ /*
+ * Just disable the interrupts. We'll turn them back on in the
+ * softirq handler.
+ */
+ disable_irq_nosync(irq);
+
+ return IRQ_HANDLED;
+}
+
+static void txx9dmac_tasklet(struct tasklet_struct *t)
+{
+ int irq;
+ u32 csr;
+ struct txx9dmac_chan *dc;
+
+ struct txx9dmac_dev *ddev = from_tasklet(ddev, t, tasklet);
+ u32 mcr;
+ int i;
+
+ mcr = dma_readl(ddev, MCR);
+ dev_vdbg(ddev->chan[0]->dma.dev, "tasklet: mcr=%x\n", mcr);
+ for (i = 0; i < TXX9_DMA_MAX_NR_CHANNELS; i++) {
+ if ((mcr >> (24 + i)) & 0x11) {
+ dc = ddev->chan[i];
+ csr = channel_readl(dc, CSR);
+ dev_vdbg(chan2dev(&dc->chan), "tasklet: status=%x\n",
+ csr);
+ spin_lock(&dc->lock);
+ if (csr & (TXX9_DMA_CSR_ABCHC | TXX9_DMA_CSR_NCHNC |
+ TXX9_DMA_CSR_NTRNFC))
+ txx9dmac_scan_descriptors(dc);
+ spin_unlock(&dc->lock);
+ }
+ }
+ irq = ddev->irq;
+
+ enable_irq(irq);
+}
+
+static irqreturn_t txx9dmac_interrupt(int irq, void *dev_id)
+{
+ struct txx9dmac_dev *ddev = dev_id;
+
+ dev_vdbg(ddev->chan[0]->dma.dev, "interrupt: status=%#x\n",
+ dma_readl(ddev, MCR));
+
+ tasklet_schedule(&ddev->tasklet);
+ /*
+ * Just disable the interrupts. We'll turn them back on in the
+ * softirq handler.
+ */
+ disable_irq_nosync(irq);
+
+ return IRQ_HANDLED;
+}
+
+/*----------------------------------------------------------------------*/
+
+static dma_cookie_t txx9dmac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct txx9dmac_desc *desc = txd_to_txx9dmac_desc(tx);
+ struct txx9dmac_chan *dc = to_txx9dmac_chan(tx->chan);
+ dma_cookie_t cookie;
+
+ spin_lock_bh(&dc->lock);
+ cookie = dma_cookie_assign(tx);
+
+ dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u %p\n",
+ desc->txd.cookie, desc);
+
+ list_add_tail(&desc->desc_node, &dc->queue);
+ spin_unlock_bh(&dc->lock);
+
+ return cookie;
+}
+
+static struct dma_async_tx_descriptor *
+txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+ struct txx9dmac_dev *ddev = dc->ddev;
+ struct txx9dmac_desc *desc;
+ struct txx9dmac_desc *first;
+ struct txx9dmac_desc *prev;
+ size_t xfer_count;
+ size_t offset;
+
+ dev_vdbg(chan2dev(chan), "prep_dma_memcpy d%#llx s%#llx l%#zx f%#lx\n",
+ (u64)dest, (u64)src, len, flags);
+
+ if (unlikely(!len)) {
+ dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
+ return NULL;
+ }
+
+ prev = first = NULL;
+
+ for (offset = 0; offset < len; offset += xfer_count) {
+ xfer_count = min_t(size_t, len - offset, TXX9_DMA_MAX_COUNT);
+ /*
+ * Workaround for ERT-TX49H2-033, ERT-TX49H3-020,
+ * ERT-TX49H4-016 (slightly conservative)
+ */
+ if (__is_dmac64(ddev)) {
+ if (xfer_count > 0x100 &&
+ (xfer_count & 0xff) >= 0xfa &&
+ (xfer_count & 0xff) <= 0xff)
+ xfer_count -= 0x20;
+ } else {
+ if (xfer_count > 0x80 &&
+ (xfer_count & 0x7f) >= 0x7e &&
+ (xfer_count & 0x7f) <= 0x7f)
+ xfer_count -= 0x20;
+ }
+
+ desc = txx9dmac_desc_get(dc);
+ if (!desc) {
+ txx9dmac_desc_put(dc, first);
+ return NULL;
+ }
+
+ if (__is_dmac64(ddev)) {
+ desc->hwdesc.SAR = src + offset;
+ desc->hwdesc.DAR = dest + offset;
+ desc->hwdesc.CNTR = xfer_count;
+ txx9dmac_desc_set_nosimple(ddev, desc, 8, 8,
+ dc->ccr | TXX9_DMA_CCR_XFACT);
+ } else {
+ desc->hwdesc32.SAR = src + offset;
+ desc->hwdesc32.DAR = dest + offset;
+ desc->hwdesc32.CNTR = xfer_count;
+ txx9dmac_desc_set_nosimple(ddev, desc, 4, 4,
+ dc->ccr | TXX9_DMA_CCR_XFACT);
+ }
+
+ /*
+ * The descriptors on tx_list are not reachable from
+ * the dc->queue list or dc->active_list after a
+ * submit. If we put all descriptors on active_list,
+ * calling of callback on the completion will be more
+ * complex.
+ */
+ if (!first) {
+ first = desc;
+ } else {
+ desc_write_CHAR(dc, prev, desc->txd.phys);
+ dma_sync_single_for_device(chan2parent(&dc->chan),
+ prev->txd.phys, ddev->descsize,
+ DMA_TO_DEVICE);
+ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+ prev = desc;
+ }
+
+ /* Trigger interrupt after last block */
+ if (flags & DMA_PREP_INTERRUPT)
+ txx9dmac_desc_set_INTENT(ddev, prev);
+
+ desc_write_CHAR(dc, prev, 0);
+ dma_sync_single_for_device(chan2parent(&dc->chan),
+ prev->txd.phys, ddev->descsize,
+ DMA_TO_DEVICE);
+
+ first->txd.flags = flags;
+ first->len = len;
+
+ return &first->txd;
+}
+
+static struct dma_async_tx_descriptor *
+txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+ struct txx9dmac_dev *ddev = dc->ddev;
+ struct txx9dmac_slave *ds = chan->private;
+ struct txx9dmac_desc *prev;
+ struct txx9dmac_desc *first;
+ unsigned int i;
+ struct scatterlist *sg;
+
+ dev_vdbg(chan2dev(chan), "prep_dma_slave\n");
+
+ BUG_ON(!ds || !ds->reg_width);
+ if (ds->tx_reg)
+ BUG_ON(direction != DMA_MEM_TO_DEV);
+ else
+ BUG_ON(direction != DMA_DEV_TO_MEM);
+ if (unlikely(!sg_len))
+ return NULL;
+
+ prev = first = NULL;
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct txx9dmac_desc *desc;
+ dma_addr_t mem;
+ u32 sai, dai;
+
+ desc = txx9dmac_desc_get(dc);
+ if (!desc) {
+ txx9dmac_desc_put(dc, first);
+ return NULL;
+ }
+
+ mem = sg_dma_address(sg);
+
+ if (__is_dmac64(ddev)) {
+ if (direction == DMA_MEM_TO_DEV) {
+ desc->hwdesc.SAR = mem;
+ desc->hwdesc.DAR = ds->tx_reg;
+ } else {
+ desc->hwdesc.SAR = ds->rx_reg;
+ desc->hwdesc.DAR = mem;
+ }
+ desc->hwdesc.CNTR = sg_dma_len(sg);
+ } else {
+ if (direction == DMA_MEM_TO_DEV) {
+ desc->hwdesc32.SAR = mem;
+ desc->hwdesc32.DAR = ds->tx_reg;
+ } else {
+ desc->hwdesc32.SAR = ds->rx_reg;
+ desc->hwdesc32.DAR = mem;
+ }
+ desc->hwdesc32.CNTR = sg_dma_len(sg);
+ }
+ if (direction == DMA_MEM_TO_DEV) {
+ sai = ds->reg_width;
+ dai = 0;
+ } else {
+ sai = 0;
+ dai = ds->reg_width;
+ }
+ txx9dmac_desc_set_nosimple(ddev, desc, sai, dai,
+ dc->ccr | TXX9_DMA_CCR_XFACT);
+
+ if (!first) {
+ first = desc;
+ } else {
+ desc_write_CHAR(dc, prev, desc->txd.phys);
+ dma_sync_single_for_device(chan2parent(&dc->chan),
+ prev->txd.phys,
+ ddev->descsize,
+ DMA_TO_DEVICE);
+ list_add_tail(&desc->desc_node, &first->tx_list);
+ }
+ prev = desc;
+ }
+
+ /* Trigger interrupt after last block */
+ if (flags & DMA_PREP_INTERRUPT)
+ txx9dmac_desc_set_INTENT(ddev, prev);
+
+ desc_write_CHAR(dc, prev, 0);
+ dma_sync_single_for_device(chan2parent(&dc->chan),
+ prev->txd.phys, ddev->descsize,
+ DMA_TO_DEVICE);
+
+ first->txd.flags = flags;
+ first->len = 0;
+
+ return &first->txd;
+}
+
+static int txx9dmac_terminate_all(struct dma_chan *chan)
+{
+ struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+ struct txx9dmac_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ dev_vdbg(chan2dev(chan), "terminate_all\n");
+ spin_lock_bh(&dc->lock);
+
+ txx9dmac_reset_chan(dc);
+
+ /* active_list entries will end up before queued entries */
+ list_splice_init(&dc->queue, &list);
+ list_splice_init(&dc->active_list, &list);
+
+ spin_unlock_bh(&dc->lock);
+
+ /* Flush all pending and queued descriptors */
+ list_for_each_entry_safe(desc, _desc, &list, desc_node)
+ txx9dmac_descriptor_complete(dc, desc);
+
+ return 0;
+}
+
+static enum dma_status
+txx9dmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+ enum dma_status ret;
+
+ ret = dma_cookie_status(chan, cookie, txstate);
+ if (ret == DMA_COMPLETE)
+ return DMA_COMPLETE;
+
+ spin_lock_bh(&dc->lock);
+ txx9dmac_scan_descriptors(dc);
+ spin_unlock_bh(&dc->lock);
+
+ return dma_cookie_status(chan, cookie, txstate);
+}
+
+static void txx9dmac_chain_dynamic(struct txx9dmac_chan *dc,
+ struct txx9dmac_desc *prev)
+{
+ struct txx9dmac_dev *ddev = dc->ddev;
+ struct txx9dmac_desc *desc;
+ LIST_HEAD(list);
+
+ prev = txx9dmac_last_child(prev);
+ txx9dmac_dequeue(dc, &list);
+ desc = list_entry(list.next, struct txx9dmac_desc, desc_node);
+ desc_write_CHAR(dc, prev, desc->txd.phys);
+ dma_sync_single_for_device(chan2parent(&dc->chan),
+ prev->txd.phys, ddev->descsize,
+ DMA_TO_DEVICE);
+ if (!(channel_readl(dc, CSR) & TXX9_DMA_CSR_CHNEN) &&
+ channel_read_CHAR(dc) == prev->txd.phys)
+ /* Restart chain DMA */
+ channel_write_CHAR(dc, desc->txd.phys);
+ list_splice_tail(&list, &dc->active_list);
+}
+
+static void txx9dmac_issue_pending(struct dma_chan *chan)
+{
+ struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+
+ spin_lock_bh(&dc->lock);
+
+ if (!list_empty(&dc->active_list))
+ txx9dmac_scan_descriptors(dc);
+ if (!list_empty(&dc->queue)) {
+ if (list_empty(&dc->active_list)) {
+ txx9dmac_dequeue(dc, &dc->active_list);
+ txx9dmac_dostart(dc, txx9dmac_first_active(dc));
+ } else if (txx9_dma_have_SMPCHN()) {
+ struct txx9dmac_desc *prev = txx9dmac_last_active(dc);
+
+ if (!(prev->txd.flags & DMA_PREP_INTERRUPT) ||
+ txx9dmac_chan_INTENT(dc))
+ txx9dmac_chain_dynamic(dc, prev);
+ }
+ }
+
+ spin_unlock_bh(&dc->lock);
+}
+
+static int txx9dmac_alloc_chan_resources(struct dma_chan *chan)
+{
+ struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+ struct txx9dmac_slave *ds = chan->private;
+ struct txx9dmac_desc *desc;
+ int i;
+
+ dev_vdbg(chan2dev(chan), "alloc_chan_resources\n");
+
+ /* ASSERT: channel is idle */
+ if (channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT) {
+ dev_dbg(chan2dev(chan), "DMA channel not idle?\n");
+ return -EIO;
+ }
+
+ dma_cookie_init(chan);
+
+ dc->ccr = TXX9_DMA_CCR_IMMCHN | TXX9_DMA_CCR_INTENE | CCR_LE;
+ txx9dmac_chan_set_SMPCHN(dc);
+ if (!txx9_dma_have_SMPCHN() || (dc->ccr & TXX9_DMA_CCR_SMPCHN))
+ dc->ccr |= TXX9_DMA_CCR_INTENC;
+ if (chan->device->device_prep_dma_memcpy) {
+ if (ds)
+ return -EINVAL;
+ dc->ccr |= TXX9_DMA_CCR_XFSZ_X8;
+ } else {
+ if (!ds ||
+ (ds->tx_reg && ds->rx_reg) || (!ds->tx_reg && !ds->rx_reg))
+ return -EINVAL;
+ dc->ccr |= TXX9_DMA_CCR_EXTRQ |
+ TXX9_DMA_CCR_XFSZ(__ffs(ds->reg_width));
+ txx9dmac_chan_set_INTENT(dc);
+ }
+
+ spin_lock_bh(&dc->lock);
+ i = dc->descs_allocated;
+ while (dc->descs_allocated < TXX9_DMA_INITIAL_DESC_COUNT) {
+ spin_unlock_bh(&dc->lock);
+
+ desc = txx9dmac_desc_alloc(dc, GFP_KERNEL);
+ if (!desc) {
+ dev_info(chan2dev(chan),
+ "only allocated %d descriptors\n", i);
+ spin_lock_bh(&dc->lock);
+ break;
+ }
+ txx9dmac_desc_put(dc, desc);
+
+ spin_lock_bh(&dc->lock);
+ i = ++dc->descs_allocated;
+ }
+ spin_unlock_bh(&dc->lock);
+
+ dev_dbg(chan2dev(chan),
+ "alloc_chan_resources allocated %d descriptors\n", i);
+
+ return i;
+}
+
+static void txx9dmac_free_chan_resources(struct dma_chan *chan)
+{
+ struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+ struct txx9dmac_dev *ddev = dc->ddev;
+ struct txx9dmac_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ dev_dbg(chan2dev(chan), "free_chan_resources (descs allocated=%u)\n",
+ dc->descs_allocated);
+
+ /* ASSERT: channel is idle */
+ BUG_ON(!list_empty(&dc->active_list));
+ BUG_ON(!list_empty(&dc->queue));
+ BUG_ON(channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT);
+
+ spin_lock_bh(&dc->lock);
+ list_splice_init(&dc->free_list, &list);
+ dc->descs_allocated = 0;
+ spin_unlock_bh(&dc->lock);
+
+ list_for_each_entry_safe(desc, _desc, &list, desc_node) {
+ dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc);
+ dma_unmap_single(chan2parent(chan), desc->txd.phys,
+ ddev->descsize, DMA_TO_DEVICE);
+ kfree(desc);
+ }
+
+ dev_vdbg(chan2dev(chan), "free_chan_resources done\n");
+}
+
+/*----------------------------------------------------------------------*/
+
+static void txx9dmac_off(struct txx9dmac_dev *ddev)
+{
+ dma_writel(ddev, MCR, 0);
+}
+
+static int __init txx9dmac_chan_probe(struct platform_device *pdev)
+{
+ struct txx9dmac_chan_platform_data *cpdata =
+ dev_get_platdata(&pdev->dev);
+ struct platform_device *dmac_dev = cpdata->dmac_dev;
+ struct txx9dmac_platform_data *pdata = dev_get_platdata(&dmac_dev->dev);
+ struct txx9dmac_chan *dc;
+ int err;
+ int ch = pdev->id % TXX9_DMA_MAX_NR_CHANNELS;
+ int irq;
+
+ dc = devm_kzalloc(&pdev->dev, sizeof(*dc), GFP_KERNEL);
+ if (!dc)
+ return -ENOMEM;
+
+ dc->dma.dev = &pdev->dev;
+ dc->dma.device_alloc_chan_resources = txx9dmac_alloc_chan_resources;
+ dc->dma.device_free_chan_resources = txx9dmac_free_chan_resources;
+ dc->dma.device_terminate_all = txx9dmac_terminate_all;
+ dc->dma.device_tx_status = txx9dmac_tx_status;
+ dc->dma.device_issue_pending = txx9dmac_issue_pending;
+ if (pdata && pdata->memcpy_chan == ch) {
+ dc->dma.device_prep_dma_memcpy = txx9dmac_prep_dma_memcpy;
+ dma_cap_set(DMA_MEMCPY, dc->dma.cap_mask);
+ } else {
+ dc->dma.device_prep_slave_sg = txx9dmac_prep_slave_sg;
+ dma_cap_set(DMA_SLAVE, dc->dma.cap_mask);
+ dma_cap_set(DMA_PRIVATE, dc->dma.cap_mask);
+ }
+
+ INIT_LIST_HEAD(&dc->dma.channels);
+ dc->ddev = platform_get_drvdata(dmac_dev);
+ if (dc->ddev->irq < 0) {
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+ tasklet_setup(&dc->tasklet, txx9dmac_chan_tasklet);
+ dc->irq = irq;
+ err = devm_request_irq(&pdev->dev, dc->irq,
+ txx9dmac_chan_interrupt, 0, dev_name(&pdev->dev), dc);
+ if (err)
+ return err;
+ } else
+ dc->irq = -1;
+ dc->ddev->chan[ch] = dc;
+ dc->chan.device = &dc->dma;
+ list_add_tail(&dc->chan.device_node, &dc->chan.device->channels);
+ dma_cookie_init(&dc->chan);
+
+ if (is_dmac64(dc))
+ dc->ch_regs = &__txx9dmac_regs(dc->ddev)->CHAN[ch];
+ else
+ dc->ch_regs = &__txx9dmac_regs32(dc->ddev)->CHAN[ch];
+ spin_lock_init(&dc->lock);
+
+ INIT_LIST_HEAD(&dc->active_list);
+ INIT_LIST_HEAD(&dc->queue);
+ INIT_LIST_HEAD(&dc->free_list);
+
+ txx9dmac_reset_chan(dc);
+
+ platform_set_drvdata(pdev, dc);
+
+ err = dma_async_device_register(&dc->dma);
+ if (err)
+ return err;
+ dev_dbg(&pdev->dev, "TXx9 DMA Channel (dma%d%s%s)\n",
+ dc->dma.dev_id,
+ dma_has_cap(DMA_MEMCPY, dc->dma.cap_mask) ? " memcpy" : "",
+ dma_has_cap(DMA_SLAVE, dc->dma.cap_mask) ? " slave" : "");
+
+ return 0;
+}
+
+static int txx9dmac_chan_remove(struct platform_device *pdev)
+{
+ struct txx9dmac_chan *dc = platform_get_drvdata(pdev);
+
+
+ dma_async_device_unregister(&dc->dma);
+ if (dc->irq >= 0) {
+ devm_free_irq(&pdev->dev, dc->irq, dc);
+ tasklet_kill(&dc->tasklet);
+ }
+ dc->ddev->chan[pdev->id % TXX9_DMA_MAX_NR_CHANNELS] = NULL;
+ return 0;
+}
+
+static int __init txx9dmac_probe(struct platform_device *pdev)
+{
+ struct txx9dmac_platform_data *pdata = dev_get_platdata(&pdev->dev);
+ struct resource *io;
+ struct txx9dmac_dev *ddev;
+ u32 mcr;
+ int err;
+
+ io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!io)
+ return -EINVAL;
+
+ ddev = devm_kzalloc(&pdev->dev, sizeof(*ddev), GFP_KERNEL);
+ if (!ddev)
+ return -ENOMEM;
+
+ if (!devm_request_mem_region(&pdev->dev, io->start, resource_size(io),
+ dev_name(&pdev->dev)))
+ return -EBUSY;
+
+ ddev->regs = devm_ioremap(&pdev->dev, io->start, resource_size(io));
+ if (!ddev->regs)
+ return -ENOMEM;
+ ddev->have_64bit_regs = pdata->have_64bit_regs;
+ if (__is_dmac64(ddev))
+ ddev->descsize = sizeof(struct txx9dmac_hwdesc);
+ else
+ ddev->descsize = sizeof(struct txx9dmac_hwdesc32);
+
+ /* force dma off, just in case */
+ txx9dmac_off(ddev);
+
+ ddev->irq = platform_get_irq(pdev, 0);
+ if (ddev->irq >= 0) {
+ tasklet_setup(&ddev->tasklet, txx9dmac_tasklet);
+ err = devm_request_irq(&pdev->dev, ddev->irq,
+ txx9dmac_interrupt, 0, dev_name(&pdev->dev), ddev);
+ if (err)
+ return err;
+ }
+
+ mcr = TXX9_DMA_MCR_MSTEN | MCR_LE;
+ if (pdata && pdata->memcpy_chan >= 0)
+ mcr |= TXX9_DMA_MCR_FIFUM(pdata->memcpy_chan);
+ dma_writel(ddev, MCR, mcr);
+
+ platform_set_drvdata(pdev, ddev);
+ return 0;
+}
+
+static int txx9dmac_remove(struct platform_device *pdev)
+{
+ struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
+
+ txx9dmac_off(ddev);
+ if (ddev->irq >= 0) {
+ devm_free_irq(&pdev->dev, ddev->irq, ddev);
+ tasklet_kill(&ddev->tasklet);
+ }
+ return 0;
+}
+
+static void txx9dmac_shutdown(struct platform_device *pdev)
+{
+ struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
+
+ txx9dmac_off(ddev);
+}
+
+static int txx9dmac_suspend_noirq(struct device *dev)
+{
+ struct txx9dmac_dev *ddev = dev_get_drvdata(dev);
+
+ txx9dmac_off(ddev);
+ return 0;
+}
+
+static int txx9dmac_resume_noirq(struct device *dev)
+{
+ struct txx9dmac_dev *ddev = dev_get_drvdata(dev);
+ struct txx9dmac_platform_data *pdata = dev_get_platdata(dev);
+ u32 mcr;
+
+ mcr = TXX9_DMA_MCR_MSTEN | MCR_LE;
+ if (pdata && pdata->memcpy_chan >= 0)
+ mcr |= TXX9_DMA_MCR_FIFUM(pdata->memcpy_chan);
+ dma_writel(ddev, MCR, mcr);
+ return 0;
+
+}
+
+static const struct dev_pm_ops txx9dmac_dev_pm_ops = {
+ .suspend_noirq = txx9dmac_suspend_noirq,
+ .resume_noirq = txx9dmac_resume_noirq,
+};
+
+static struct platform_driver txx9dmac_chan_driver = {
+ .remove = txx9dmac_chan_remove,
+ .driver = {
+ .name = "txx9dmac-chan",
+ },
+};
+
+static struct platform_driver txx9dmac_driver = {
+ .remove = txx9dmac_remove,
+ .shutdown = txx9dmac_shutdown,
+ .driver = {
+ .name = "txx9dmac",
+ .pm = &txx9dmac_dev_pm_ops,
+ },
+};
+
+static int __init txx9dmac_init(void)
+{
+ int rc;
+
+ rc = platform_driver_probe(&txx9dmac_driver, txx9dmac_probe);
+ if (!rc) {
+ rc = platform_driver_probe(&txx9dmac_chan_driver,
+ txx9dmac_chan_probe);
+ if (rc)
+ platform_driver_unregister(&txx9dmac_driver);
+ }
+ return rc;
+}
+module_init(txx9dmac_init);
+
+static void __exit txx9dmac_exit(void)
+{
+ platform_driver_unregister(&txx9dmac_chan_driver);
+ platform_driver_unregister(&txx9dmac_driver);
+}
+module_exit(txx9dmac_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("TXx9 DMA Controller driver");
+MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>");
+MODULE_ALIAS("platform:txx9dmac");
+MODULE_ALIAS("platform:txx9dmac-chan");
diff --git a/drivers/dma/txx9dmac.h b/drivers/dma/txx9dmac.h
new file mode 100644
index 000000000..aa53eafb1
--- /dev/null
+++ b/drivers/dma/txx9dmac.h
@@ -0,0 +1,304 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Driver for the TXx9 SoC DMA Controller
+ *
+ * Copyright (C) 2009 Atsushi Nemoto
+ */
+#ifndef TXX9DMAC_H
+#define TXX9DMAC_H
+
+#include <linux/dmaengine.h>
+#include <asm/txx9/dmac.h>
+
+/*
+ * Design Notes:
+ *
+ * This DMAC have four channels and one FIFO buffer. Each channel can
+ * be configured for memory-memory or device-memory transfer, but only
+ * one channel can do alignment-free memory-memory transfer at a time
+ * while the channel should occupy the FIFO buffer for effective
+ * transfers.
+ *
+ * Instead of dynamically assign the FIFO buffer to channels, I chose
+ * make one dedicated channel for memory-memory transfer. The
+ * dedicated channel is public. Other channels are private and used
+ * for slave transfer. Some devices in the SoC are wired to certain
+ * DMA channel.
+ */
+
+#ifdef CONFIG_MACH_TX49XX
+static inline bool txx9_dma_have_SMPCHN(void)
+{
+ return true;
+}
+#define TXX9_DMA_USE_SIMPLE_CHAIN
+#else
+static inline bool txx9_dma_have_SMPCHN(void)
+{
+ return false;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN
+#ifdef CONFIG_MACH_TX49XX
+#define CCR_LE TXX9_DMA_CCR_LE
+#define MCR_LE 0
+#else
+#define CCR_LE 0
+#define MCR_LE TXX9_DMA_MCR_LE
+#endif
+#else
+#define CCR_LE 0
+#define MCR_LE 0
+#endif
+
+/*
+ * Redefine this macro to handle differences between 32- and 64-bit
+ * addressing, big vs. little endian, etc.
+ */
+#ifdef __BIG_ENDIAN
+#define TXX9_DMA_REG32(name) u32 __pad_##name; u32 name
+#else
+#define TXX9_DMA_REG32(name) u32 name; u32 __pad_##name
+#endif
+
+/* Hardware register definitions. */
+struct txx9dmac_cregs {
+#if defined(CONFIG_32BIT) && !defined(CONFIG_PHYS_ADDR_T_64BIT)
+ TXX9_DMA_REG32(CHAR); /* Chain Address Register */
+#else
+ u64 CHAR; /* Chain Address Register */
+#endif
+ u64 SAR; /* Source Address Register */
+ u64 DAR; /* Destination Address Register */
+ TXX9_DMA_REG32(CNTR); /* Count Register */
+ TXX9_DMA_REG32(SAIR); /* Source Address Increment Register */
+ TXX9_DMA_REG32(DAIR); /* Destination Address Increment Register */
+ TXX9_DMA_REG32(CCR); /* Channel Control Register */
+ TXX9_DMA_REG32(CSR); /* Channel Status Register */
+};
+struct txx9dmac_cregs32 {
+ u32 CHAR;
+ u32 SAR;
+ u32 DAR;
+ u32 CNTR;
+ u32 SAIR;
+ u32 DAIR;
+ u32 CCR;
+ u32 CSR;
+};
+
+struct txx9dmac_regs {
+ /* per-channel registers */
+ struct txx9dmac_cregs CHAN[TXX9_DMA_MAX_NR_CHANNELS];
+ u64 __pad[9];
+ u64 MFDR; /* Memory Fill Data Register */
+ TXX9_DMA_REG32(MCR); /* Master Control Register */
+};
+struct txx9dmac_regs32 {
+ struct txx9dmac_cregs32 CHAN[TXX9_DMA_MAX_NR_CHANNELS];
+ u32 __pad[9];
+ u32 MFDR;
+ u32 MCR;
+};
+
+/* bits for MCR */
+#define TXX9_DMA_MCR_EIS(ch) (0x10000000<<(ch))
+#define TXX9_DMA_MCR_DIS(ch) (0x01000000<<(ch))
+#define TXX9_DMA_MCR_RSFIF 0x00000080
+#define TXX9_DMA_MCR_FIFUM(ch) (0x00000008<<(ch))
+#define TXX9_DMA_MCR_LE 0x00000004
+#define TXX9_DMA_MCR_RPRT 0x00000002
+#define TXX9_DMA_MCR_MSTEN 0x00000001
+
+/* bits for CCRn */
+#define TXX9_DMA_CCR_IMMCHN 0x20000000
+#define TXX9_DMA_CCR_USEXFSZ 0x10000000
+#define TXX9_DMA_CCR_LE 0x08000000
+#define TXX9_DMA_CCR_DBINH 0x04000000
+#define TXX9_DMA_CCR_SBINH 0x02000000
+#define TXX9_DMA_CCR_CHRST 0x01000000
+#define TXX9_DMA_CCR_RVBYTE 0x00800000
+#define TXX9_DMA_CCR_ACKPOL 0x00400000
+#define TXX9_DMA_CCR_REQPL 0x00200000
+#define TXX9_DMA_CCR_EGREQ 0x00100000
+#define TXX9_DMA_CCR_CHDN 0x00080000
+#define TXX9_DMA_CCR_DNCTL 0x00060000
+#define TXX9_DMA_CCR_EXTRQ 0x00010000
+#define TXX9_DMA_CCR_INTRQD 0x0000e000
+#define TXX9_DMA_CCR_INTENE 0x00001000
+#define TXX9_DMA_CCR_INTENC 0x00000800
+#define TXX9_DMA_CCR_INTENT 0x00000400
+#define TXX9_DMA_CCR_CHNEN 0x00000200
+#define TXX9_DMA_CCR_XFACT 0x00000100
+#define TXX9_DMA_CCR_SMPCHN 0x00000020
+#define TXX9_DMA_CCR_XFSZ(order) (((order) << 2) & 0x0000001c)
+#define TXX9_DMA_CCR_XFSZ_1 TXX9_DMA_CCR_XFSZ(0)
+#define TXX9_DMA_CCR_XFSZ_2 TXX9_DMA_CCR_XFSZ(1)
+#define TXX9_DMA_CCR_XFSZ_4 TXX9_DMA_CCR_XFSZ(2)
+#define TXX9_DMA_CCR_XFSZ_8 TXX9_DMA_CCR_XFSZ(3)
+#define TXX9_DMA_CCR_XFSZ_X4 TXX9_DMA_CCR_XFSZ(4)
+#define TXX9_DMA_CCR_XFSZ_X8 TXX9_DMA_CCR_XFSZ(5)
+#define TXX9_DMA_CCR_XFSZ_X16 TXX9_DMA_CCR_XFSZ(6)
+#define TXX9_DMA_CCR_XFSZ_X32 TXX9_DMA_CCR_XFSZ(7)
+#define TXX9_DMA_CCR_MEMIO 0x00000002
+#define TXX9_DMA_CCR_SNGAD 0x00000001
+
+/* bits for CSRn */
+#define TXX9_DMA_CSR_CHNEN 0x00000400
+#define TXX9_DMA_CSR_STLXFER 0x00000200
+#define TXX9_DMA_CSR_XFACT 0x00000100
+#define TXX9_DMA_CSR_ABCHC 0x00000080
+#define TXX9_DMA_CSR_NCHNC 0x00000040
+#define TXX9_DMA_CSR_NTRNFC 0x00000020
+#define TXX9_DMA_CSR_EXTDN 0x00000010
+#define TXX9_DMA_CSR_CFERR 0x00000008
+#define TXX9_DMA_CSR_CHERR 0x00000004
+#define TXX9_DMA_CSR_DESERR 0x00000002
+#define TXX9_DMA_CSR_SORERR 0x00000001
+
+struct txx9dmac_chan {
+ struct dma_chan chan;
+ struct dma_device dma;
+ struct txx9dmac_dev *ddev;
+ void __iomem *ch_regs;
+ struct tasklet_struct tasklet;
+ int irq;
+ u32 ccr;
+
+ spinlock_t lock;
+
+ /* these other elements are all protected by lock */
+ struct list_head active_list;
+ struct list_head queue;
+ struct list_head free_list;
+
+ unsigned int descs_allocated;
+};
+
+struct txx9dmac_dev {
+ void __iomem *regs;
+ struct tasklet_struct tasklet;
+ int irq;
+ struct txx9dmac_chan *chan[TXX9_DMA_MAX_NR_CHANNELS];
+ bool have_64bit_regs;
+ unsigned int descsize;
+};
+
+static inline bool __is_dmac64(const struct txx9dmac_dev *ddev)
+{
+ return ddev->have_64bit_regs;
+}
+
+static inline bool is_dmac64(const struct txx9dmac_chan *dc)
+{
+ return __is_dmac64(dc->ddev);
+}
+
+#ifdef TXX9_DMA_USE_SIMPLE_CHAIN
+/* Hardware descriptor definition. (for simple-chain) */
+struct txx9dmac_hwdesc {
+#if defined(CONFIG_32BIT) && !defined(CONFIG_PHYS_ADDR_T_64BIT)
+ TXX9_DMA_REG32(CHAR);
+#else
+ u64 CHAR;
+#endif
+ u64 SAR;
+ u64 DAR;
+ TXX9_DMA_REG32(CNTR);
+};
+struct txx9dmac_hwdesc32 {
+ u32 CHAR;
+ u32 SAR;
+ u32 DAR;
+ u32 CNTR;
+};
+#else
+#define txx9dmac_hwdesc txx9dmac_cregs
+#define txx9dmac_hwdesc32 txx9dmac_cregs32
+#endif
+
+struct txx9dmac_desc {
+ /* FIRST values the hardware uses */
+ union {
+ struct txx9dmac_hwdesc hwdesc;
+ struct txx9dmac_hwdesc32 hwdesc32;
+ };
+
+ /* THEN values for driver housekeeping */
+ struct list_head desc_node ____cacheline_aligned;
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor txd;
+ size_t len;
+};
+
+#ifdef TXX9_DMA_USE_SIMPLE_CHAIN
+
+static inline bool txx9dmac_chan_INTENT(struct txx9dmac_chan *dc)
+{
+ return (dc->ccr & TXX9_DMA_CCR_INTENT) != 0;
+}
+
+static inline void txx9dmac_chan_set_INTENT(struct txx9dmac_chan *dc)
+{
+ dc->ccr |= TXX9_DMA_CCR_INTENT;
+}
+
+static inline void txx9dmac_desc_set_INTENT(struct txx9dmac_dev *ddev,
+ struct txx9dmac_desc *desc)
+{
+}
+
+static inline void txx9dmac_chan_set_SMPCHN(struct txx9dmac_chan *dc)
+{
+ dc->ccr |= TXX9_DMA_CCR_SMPCHN;
+}
+
+static inline void txx9dmac_desc_set_nosimple(struct txx9dmac_dev *ddev,
+ struct txx9dmac_desc *desc,
+ u32 sair, u32 dair, u32 ccr)
+{
+}
+
+#else /* TXX9_DMA_USE_SIMPLE_CHAIN */
+
+static inline bool txx9dmac_chan_INTENT(struct txx9dmac_chan *dc)
+{
+ return true;
+}
+
+static void txx9dmac_chan_set_INTENT(struct txx9dmac_chan *dc)
+{
+}
+
+static inline void txx9dmac_desc_set_INTENT(struct txx9dmac_dev *ddev,
+ struct txx9dmac_desc *desc)
+{
+ if (__is_dmac64(ddev))
+ desc->hwdesc.CCR |= TXX9_DMA_CCR_INTENT;
+ else
+ desc->hwdesc32.CCR |= TXX9_DMA_CCR_INTENT;
+}
+
+static inline void txx9dmac_chan_set_SMPCHN(struct txx9dmac_chan *dc)
+{
+}
+
+static inline void txx9dmac_desc_set_nosimple(struct txx9dmac_dev *ddev,
+ struct txx9dmac_desc *desc,
+ u32 sai, u32 dai, u32 ccr)
+{
+ if (__is_dmac64(ddev)) {
+ desc->hwdesc.SAIR = sai;
+ desc->hwdesc.DAIR = dai;
+ desc->hwdesc.CCR = ccr;
+ } else {
+ desc->hwdesc32.SAIR = sai;
+ desc->hwdesc32.DAIR = dai;
+ desc->hwdesc32.CCR = ccr;
+ }
+}
+
+#endif /* TXX9_DMA_USE_SIMPLE_CHAIN */
+
+#endif /* TXX9DMAC_H */
diff --git a/drivers/dma/uniphier-mdmac.c b/drivers/dma/uniphier-mdmac.c
new file mode 100644
index 000000000..618839df0
--- /dev/null
+++ b/drivers/dma/uniphier-mdmac.c
@@ -0,0 +1,501 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (C) 2018 Socionext Inc.
+// Author: Masahiro Yamada <yamada.masahiro@socionext.com>
+
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "virt-dma.h"
+
+/* registers common for all channels */
+#define UNIPHIER_MDMAC_CMD 0x000 /* issue DMA start/abort */
+#define UNIPHIER_MDMAC_CMD_ABORT BIT(31) /* 1: abort, 0: start */
+
+/* per-channel registers */
+#define UNIPHIER_MDMAC_CH_OFFSET 0x100
+#define UNIPHIER_MDMAC_CH_STRIDE 0x040
+
+#define UNIPHIER_MDMAC_CH_IRQ_STAT 0x010 /* current hw status (RO) */
+#define UNIPHIER_MDMAC_CH_IRQ_REQ 0x014 /* latched STAT (WOC) */
+#define UNIPHIER_MDMAC_CH_IRQ_EN 0x018 /* IRQ enable mask */
+#define UNIPHIER_MDMAC_CH_IRQ_DET 0x01c /* REQ & EN (RO) */
+#define UNIPHIER_MDMAC_CH_IRQ__ABORT BIT(13)
+#define UNIPHIER_MDMAC_CH_IRQ__DONE BIT(1)
+#define UNIPHIER_MDMAC_CH_SRC_MODE 0x020 /* mode of source */
+#define UNIPHIER_MDMAC_CH_DEST_MODE 0x024 /* mode of destination */
+#define UNIPHIER_MDMAC_CH_MODE__ADDR_INC (0 << 4)
+#define UNIPHIER_MDMAC_CH_MODE__ADDR_DEC (1 << 4)
+#define UNIPHIER_MDMAC_CH_MODE__ADDR_FIXED (2 << 4)
+#define UNIPHIER_MDMAC_CH_SRC_ADDR 0x028 /* source address */
+#define UNIPHIER_MDMAC_CH_DEST_ADDR 0x02c /* destination address */
+#define UNIPHIER_MDMAC_CH_SIZE 0x030 /* transfer bytes */
+
+#define UNIPHIER_MDMAC_SLAVE_BUSWIDTHS \
+ (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+struct uniphier_mdmac_desc {
+ struct virt_dma_desc vd;
+ struct scatterlist *sgl;
+ unsigned int sg_len;
+ unsigned int sg_cur;
+ enum dma_transfer_direction dir;
+};
+
+struct uniphier_mdmac_chan {
+ struct virt_dma_chan vc;
+ struct uniphier_mdmac_device *mdev;
+ struct uniphier_mdmac_desc *md;
+ void __iomem *reg_ch_base;
+ unsigned int chan_id;
+};
+
+struct uniphier_mdmac_device {
+ struct dma_device ddev;
+ struct clk *clk;
+ void __iomem *reg_base;
+ struct uniphier_mdmac_chan channels[];
+};
+
+static struct uniphier_mdmac_chan *
+to_uniphier_mdmac_chan(struct virt_dma_chan *vc)
+{
+ return container_of(vc, struct uniphier_mdmac_chan, vc);
+}
+
+static struct uniphier_mdmac_desc *
+to_uniphier_mdmac_desc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct uniphier_mdmac_desc, vd);
+}
+
+/* mc->vc.lock must be held by caller */
+static struct uniphier_mdmac_desc *
+uniphier_mdmac_next_desc(struct uniphier_mdmac_chan *mc)
+{
+ struct virt_dma_desc *vd;
+
+ vd = vchan_next_desc(&mc->vc);
+ if (!vd) {
+ mc->md = NULL;
+ return NULL;
+ }
+
+ list_del(&vd->node);
+
+ mc->md = to_uniphier_mdmac_desc(vd);
+
+ return mc->md;
+}
+
+/* mc->vc.lock must be held by caller */
+static void uniphier_mdmac_handle(struct uniphier_mdmac_chan *mc,
+ struct uniphier_mdmac_desc *md)
+{
+ struct uniphier_mdmac_device *mdev = mc->mdev;
+ struct scatterlist *sg;
+ u32 irq_flag = UNIPHIER_MDMAC_CH_IRQ__DONE;
+ u32 src_mode, src_addr, dest_mode, dest_addr, chunk_size;
+
+ sg = &md->sgl[md->sg_cur];
+
+ if (md->dir == DMA_MEM_TO_DEV) {
+ src_mode = UNIPHIER_MDMAC_CH_MODE__ADDR_INC;
+ src_addr = sg_dma_address(sg);
+ dest_mode = UNIPHIER_MDMAC_CH_MODE__ADDR_FIXED;
+ dest_addr = 0;
+ } else {
+ src_mode = UNIPHIER_MDMAC_CH_MODE__ADDR_FIXED;
+ src_addr = 0;
+ dest_mode = UNIPHIER_MDMAC_CH_MODE__ADDR_INC;
+ dest_addr = sg_dma_address(sg);
+ }
+
+ chunk_size = sg_dma_len(sg);
+
+ writel(src_mode, mc->reg_ch_base + UNIPHIER_MDMAC_CH_SRC_MODE);
+ writel(dest_mode, mc->reg_ch_base + UNIPHIER_MDMAC_CH_DEST_MODE);
+ writel(src_addr, mc->reg_ch_base + UNIPHIER_MDMAC_CH_SRC_ADDR);
+ writel(dest_addr, mc->reg_ch_base + UNIPHIER_MDMAC_CH_DEST_ADDR);
+ writel(chunk_size, mc->reg_ch_base + UNIPHIER_MDMAC_CH_SIZE);
+
+ /* write 1 to clear */
+ writel(irq_flag, mc->reg_ch_base + UNIPHIER_MDMAC_CH_IRQ_REQ);
+
+ writel(irq_flag, mc->reg_ch_base + UNIPHIER_MDMAC_CH_IRQ_EN);
+
+ writel(BIT(mc->chan_id), mdev->reg_base + UNIPHIER_MDMAC_CMD);
+}
+
+/* mc->vc.lock must be held by caller */
+static void uniphier_mdmac_start(struct uniphier_mdmac_chan *mc)
+{
+ struct uniphier_mdmac_desc *md;
+
+ md = uniphier_mdmac_next_desc(mc);
+ if (md)
+ uniphier_mdmac_handle(mc, md);
+}
+
+/* mc->vc.lock must be held by caller */
+static int uniphier_mdmac_abort(struct uniphier_mdmac_chan *mc)
+{
+ struct uniphier_mdmac_device *mdev = mc->mdev;
+ u32 irq_flag = UNIPHIER_MDMAC_CH_IRQ__ABORT;
+ u32 val;
+
+ /* write 1 to clear */
+ writel(irq_flag, mc->reg_ch_base + UNIPHIER_MDMAC_CH_IRQ_REQ);
+
+ writel(UNIPHIER_MDMAC_CMD_ABORT | BIT(mc->chan_id),
+ mdev->reg_base + UNIPHIER_MDMAC_CMD);
+
+ /*
+ * Abort should be accepted soon. We poll the bit here instead of
+ * waiting for the interrupt.
+ */
+ return readl_poll_timeout(mc->reg_ch_base + UNIPHIER_MDMAC_CH_IRQ_REQ,
+ val, val & irq_flag, 0, 20);
+}
+
+static irqreturn_t uniphier_mdmac_interrupt(int irq, void *dev_id)
+{
+ struct uniphier_mdmac_chan *mc = dev_id;
+ struct uniphier_mdmac_desc *md;
+ irqreturn_t ret = IRQ_HANDLED;
+ u32 irq_stat;
+
+ spin_lock(&mc->vc.lock);
+
+ irq_stat = readl(mc->reg_ch_base + UNIPHIER_MDMAC_CH_IRQ_DET);
+
+ /*
+ * Some channels share a single interrupt line. If the IRQ status is 0,
+ * this is probably triggered by a different channel.
+ */
+ if (!irq_stat) {
+ ret = IRQ_NONE;
+ goto out;
+ }
+
+ /* write 1 to clear */
+ writel(irq_stat, mc->reg_ch_base + UNIPHIER_MDMAC_CH_IRQ_REQ);
+
+ /*
+ * UNIPHIER_MDMAC_CH_IRQ__DONE interrupt is asserted even when the DMA
+ * is aborted. To distinguish the normal completion and the abort,
+ * check mc->md. If it is NULL, we are aborting.
+ */
+ md = mc->md;
+ if (!md)
+ goto out;
+
+ md->sg_cur++;
+
+ if (md->sg_cur >= md->sg_len) {
+ vchan_cookie_complete(&md->vd);
+ md = uniphier_mdmac_next_desc(mc);
+ if (!md)
+ goto out;
+ }
+
+ uniphier_mdmac_handle(mc, md);
+
+out:
+ spin_unlock(&mc->vc.lock);
+
+ return ret;
+}
+
+static void uniphier_mdmac_free_chan_resources(struct dma_chan *chan)
+{
+ vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+static struct dma_async_tx_descriptor *
+uniphier_mdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len,
+ enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct uniphier_mdmac_desc *md;
+
+ if (!is_slave_direction(direction))
+ return NULL;
+
+ md = kzalloc(sizeof(*md), GFP_NOWAIT);
+ if (!md)
+ return NULL;
+
+ md->sgl = sgl;
+ md->sg_len = sg_len;
+ md->dir = direction;
+
+ return vchan_tx_prep(vc, &md->vd, flags);
+}
+
+static int uniphier_mdmac_terminate_all(struct dma_chan *chan)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct uniphier_mdmac_chan *mc = to_uniphier_mdmac_chan(vc);
+ unsigned long flags;
+ int ret = 0;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&vc->lock, flags);
+
+ if (mc->md) {
+ vchan_terminate_vdesc(&mc->md->vd);
+ mc->md = NULL;
+ ret = uniphier_mdmac_abort(mc);
+ }
+ vchan_get_all_descriptors(vc, &head);
+
+ spin_unlock_irqrestore(&vc->lock, flags);
+
+ vchan_dma_desc_free_list(vc, &head);
+
+ return ret;
+}
+
+static void uniphier_mdmac_synchronize(struct dma_chan *chan)
+{
+ vchan_synchronize(to_virt_chan(chan));
+}
+
+static enum dma_status uniphier_mdmac_tx_status(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct virt_dma_chan *vc;
+ struct virt_dma_desc *vd;
+ struct uniphier_mdmac_chan *mc;
+ struct uniphier_mdmac_desc *md = NULL;
+ enum dma_status stat;
+ unsigned long flags;
+ int i;
+
+ stat = dma_cookie_status(chan, cookie, txstate);
+ /* Return immediately if we do not need to compute the residue. */
+ if (stat == DMA_COMPLETE || !txstate)
+ return stat;
+
+ vc = to_virt_chan(chan);
+
+ spin_lock_irqsave(&vc->lock, flags);
+
+ mc = to_uniphier_mdmac_chan(vc);
+
+ if (mc->md && mc->md->vd.tx.cookie == cookie) {
+ /* residue from the on-flight chunk */
+ txstate->residue = readl(mc->reg_ch_base +
+ UNIPHIER_MDMAC_CH_SIZE);
+ md = mc->md;
+ }
+
+ if (!md) {
+ vd = vchan_find_desc(vc, cookie);
+ if (vd)
+ md = to_uniphier_mdmac_desc(vd);
+ }
+
+ if (md) {
+ /* residue from the queued chunks */
+ for (i = md->sg_cur; i < md->sg_len; i++)
+ txstate->residue += sg_dma_len(&md->sgl[i]);
+ }
+
+ spin_unlock_irqrestore(&vc->lock, flags);
+
+ return stat;
+}
+
+static void uniphier_mdmac_issue_pending(struct dma_chan *chan)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct uniphier_mdmac_chan *mc = to_uniphier_mdmac_chan(vc);
+ unsigned long flags;
+
+ spin_lock_irqsave(&vc->lock, flags);
+
+ if (vchan_issue_pending(vc) && !mc->md)
+ uniphier_mdmac_start(mc);
+
+ spin_unlock_irqrestore(&vc->lock, flags);
+}
+
+static void uniphier_mdmac_desc_free(struct virt_dma_desc *vd)
+{
+ kfree(to_uniphier_mdmac_desc(vd));
+}
+
+static int uniphier_mdmac_chan_init(struct platform_device *pdev,
+ struct uniphier_mdmac_device *mdev,
+ int chan_id)
+{
+ struct device *dev = &pdev->dev;
+ struct uniphier_mdmac_chan *mc = &mdev->channels[chan_id];
+ char *irq_name;
+ int irq, ret;
+
+ irq = platform_get_irq(pdev, chan_id);
+ if (irq < 0)
+ return irq;
+
+ irq_name = devm_kasprintf(dev, GFP_KERNEL, "uniphier-mio-dmac-ch%d",
+ chan_id);
+ if (!irq_name)
+ return -ENOMEM;
+
+ ret = devm_request_irq(dev, irq, uniphier_mdmac_interrupt,
+ IRQF_SHARED, irq_name, mc);
+ if (ret)
+ return ret;
+
+ mc->mdev = mdev;
+ mc->reg_ch_base = mdev->reg_base + UNIPHIER_MDMAC_CH_OFFSET +
+ UNIPHIER_MDMAC_CH_STRIDE * chan_id;
+ mc->chan_id = chan_id;
+ mc->vc.desc_free = uniphier_mdmac_desc_free;
+ vchan_init(&mc->vc, &mdev->ddev);
+
+ return 0;
+}
+
+static int uniphier_mdmac_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct uniphier_mdmac_device *mdev;
+ struct dma_device *ddev;
+ int nr_chans, ret, i;
+
+ nr_chans = platform_irq_count(pdev);
+ if (nr_chans < 0)
+ return nr_chans;
+
+ ret = dma_set_mask(dev, DMA_BIT_MASK(32));
+ if (ret)
+ return ret;
+
+ mdev = devm_kzalloc(dev, struct_size(mdev, channels, nr_chans),
+ GFP_KERNEL);
+ if (!mdev)
+ return -ENOMEM;
+
+ mdev->reg_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(mdev->reg_base))
+ return PTR_ERR(mdev->reg_base);
+
+ mdev->clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(mdev->clk)) {
+ dev_err(dev, "failed to get clock\n");
+ return PTR_ERR(mdev->clk);
+ }
+
+ ret = clk_prepare_enable(mdev->clk);
+ if (ret)
+ return ret;
+
+ ddev = &mdev->ddev;
+ ddev->dev = dev;
+ dma_cap_set(DMA_PRIVATE, ddev->cap_mask);
+ ddev->src_addr_widths = UNIPHIER_MDMAC_SLAVE_BUSWIDTHS;
+ ddev->dst_addr_widths = UNIPHIER_MDMAC_SLAVE_BUSWIDTHS;
+ ddev->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+ ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+ ddev->device_free_chan_resources = uniphier_mdmac_free_chan_resources;
+ ddev->device_prep_slave_sg = uniphier_mdmac_prep_slave_sg;
+ ddev->device_terminate_all = uniphier_mdmac_terminate_all;
+ ddev->device_synchronize = uniphier_mdmac_synchronize;
+ ddev->device_tx_status = uniphier_mdmac_tx_status;
+ ddev->device_issue_pending = uniphier_mdmac_issue_pending;
+ INIT_LIST_HEAD(&ddev->channels);
+
+ for (i = 0; i < nr_chans; i++) {
+ ret = uniphier_mdmac_chan_init(pdev, mdev, i);
+ if (ret)
+ goto disable_clk;
+ }
+
+ ret = dma_async_device_register(ddev);
+ if (ret)
+ goto disable_clk;
+
+ ret = of_dma_controller_register(dev->of_node, of_dma_xlate_by_chan_id,
+ ddev);
+ if (ret)
+ goto unregister_dmac;
+
+ platform_set_drvdata(pdev, mdev);
+
+ return 0;
+
+unregister_dmac:
+ dma_async_device_unregister(ddev);
+disable_clk:
+ clk_disable_unprepare(mdev->clk);
+
+ return ret;
+}
+
+static int uniphier_mdmac_remove(struct platform_device *pdev)
+{
+ struct uniphier_mdmac_device *mdev = platform_get_drvdata(pdev);
+ struct dma_chan *chan;
+ int ret;
+
+ /*
+ * Before reaching here, almost all descriptors have been freed by the
+ * ->device_free_chan_resources() hook. However, each channel might
+ * be still holding one descriptor that was on-flight at that moment.
+ * Terminate it to make sure this hardware is no longer running. Then,
+ * free the channel resources once again to avoid memory leak.
+ */
+ list_for_each_entry(chan, &mdev->ddev.channels, device_node) {
+ ret = dmaengine_terminate_sync(chan);
+ if (ret)
+ return ret;
+ uniphier_mdmac_free_chan_resources(chan);
+ }
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&mdev->ddev);
+ clk_disable_unprepare(mdev->clk);
+
+ return 0;
+}
+
+static const struct of_device_id uniphier_mdmac_match[] = {
+ { .compatible = "socionext,uniphier-mio-dmac" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, uniphier_mdmac_match);
+
+static struct platform_driver uniphier_mdmac_driver = {
+ .probe = uniphier_mdmac_probe,
+ .remove = uniphier_mdmac_remove,
+ .driver = {
+ .name = "uniphier-mio-dmac",
+ .of_match_table = uniphier_mdmac_match,
+ },
+};
+module_platform_driver(uniphier_mdmac_driver);
+
+MODULE_AUTHOR("Masahiro Yamada <yamada.masahiro@socionext.com>");
+MODULE_DESCRIPTION("UniPhier MIO DMAC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/uniphier-xdmac.c b/drivers/dma/uniphier-xdmac.c
new file mode 100644
index 000000000..290836b7e
--- /dev/null
+++ b/drivers/dma/uniphier-xdmac.c
@@ -0,0 +1,611 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * External DMA controller driver for UniPhier SoCs
+ * Copyright 2019 Socionext Inc.
+ * Author: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/bitfield.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define XDMAC_CH_WIDTH 0x100
+
+#define XDMAC_TFA 0x08
+#define XDMAC_TFA_MCNT_MASK GENMASK(23, 16)
+#define XDMAC_TFA_MASK GENMASK(5, 0)
+#define XDMAC_SADM 0x10
+#define XDMAC_SADM_STW_MASK GENMASK(25, 24)
+#define XDMAC_SADM_SAM BIT(4)
+#define XDMAC_SADM_SAM_FIXED XDMAC_SADM_SAM
+#define XDMAC_SADM_SAM_INC 0
+#define XDMAC_DADM 0x14
+#define XDMAC_DADM_DTW_MASK XDMAC_SADM_STW_MASK
+#define XDMAC_DADM_DAM XDMAC_SADM_SAM
+#define XDMAC_DADM_DAM_FIXED XDMAC_SADM_SAM_FIXED
+#define XDMAC_DADM_DAM_INC XDMAC_SADM_SAM_INC
+#define XDMAC_EXSAD 0x18
+#define XDMAC_EXDAD 0x1c
+#define XDMAC_SAD 0x20
+#define XDMAC_DAD 0x24
+#define XDMAC_ITS 0x28
+#define XDMAC_ITS_MASK GENMASK(25, 0)
+#define XDMAC_TNUM 0x2c
+#define XDMAC_TNUM_MASK GENMASK(15, 0)
+#define XDMAC_TSS 0x30
+#define XDMAC_TSS_REQ BIT(0)
+#define XDMAC_IEN 0x34
+#define XDMAC_IEN_ERRIEN BIT(1)
+#define XDMAC_IEN_ENDIEN BIT(0)
+#define XDMAC_STAT 0x40
+#define XDMAC_STAT_TENF BIT(0)
+#define XDMAC_IR 0x44
+#define XDMAC_IR_ERRF BIT(1)
+#define XDMAC_IR_ENDF BIT(0)
+#define XDMAC_ID 0x48
+#define XDMAC_ID_ERRIDF BIT(1)
+#define XDMAC_ID_ENDIDF BIT(0)
+
+#define XDMAC_MAX_CHANS 16
+#define XDMAC_INTERVAL_CLKS 20
+#define XDMAC_MAX_WORDS XDMAC_TNUM_MASK
+
+/* cut lower bit for maintain alignment of maximum transfer size */
+#define XDMAC_MAX_WORD_SIZE (XDMAC_ITS_MASK & ~GENMASK(3, 0))
+
+#define UNIPHIER_XDMAC_BUSWIDTHS \
+ (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+ BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+ BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+struct uniphier_xdmac_desc_node {
+ dma_addr_t src;
+ dma_addr_t dst;
+ u32 burst_size;
+ u32 nr_burst;
+};
+
+struct uniphier_xdmac_desc {
+ struct virt_dma_desc vd;
+
+ unsigned int nr_node;
+ unsigned int cur_node;
+ enum dma_transfer_direction dir;
+ struct uniphier_xdmac_desc_node nodes[];
+};
+
+struct uniphier_xdmac_chan {
+ struct virt_dma_chan vc;
+ struct uniphier_xdmac_device *xdev;
+ struct uniphier_xdmac_desc *xd;
+ void __iomem *reg_ch_base;
+ struct dma_slave_config sconfig;
+ int id;
+ unsigned int req_factor;
+};
+
+struct uniphier_xdmac_device {
+ struct dma_device ddev;
+ void __iomem *reg_base;
+ int nr_chans;
+ struct uniphier_xdmac_chan channels[];
+};
+
+static struct uniphier_xdmac_chan *
+to_uniphier_xdmac_chan(struct virt_dma_chan *vc)
+{
+ return container_of(vc, struct uniphier_xdmac_chan, vc);
+}
+
+static struct uniphier_xdmac_desc *
+to_uniphier_xdmac_desc(struct virt_dma_desc *vd)
+{
+ return container_of(vd, struct uniphier_xdmac_desc, vd);
+}
+
+/* xc->vc.lock must be held by caller */
+static struct uniphier_xdmac_desc *
+uniphier_xdmac_next_desc(struct uniphier_xdmac_chan *xc)
+{
+ struct virt_dma_desc *vd;
+
+ vd = vchan_next_desc(&xc->vc);
+ if (!vd)
+ return NULL;
+
+ list_del(&vd->node);
+
+ return to_uniphier_xdmac_desc(vd);
+}
+
+/* xc->vc.lock must be held by caller */
+static void uniphier_xdmac_chan_start(struct uniphier_xdmac_chan *xc,
+ struct uniphier_xdmac_desc *xd)
+{
+ u32 src_mode, src_width;
+ u32 dst_mode, dst_width;
+ dma_addr_t src_addr, dst_addr;
+ u32 val, its, tnum;
+ enum dma_slave_buswidth buswidth;
+
+ src_addr = xd->nodes[xd->cur_node].src;
+ dst_addr = xd->nodes[xd->cur_node].dst;
+ its = xd->nodes[xd->cur_node].burst_size;
+ tnum = xd->nodes[xd->cur_node].nr_burst;
+
+ /*
+ * The width of MEM side must be 4 or 8 bytes, that does not
+ * affect that of DEV side and transfer size.
+ */
+ if (xd->dir == DMA_DEV_TO_MEM) {
+ src_mode = XDMAC_SADM_SAM_FIXED;
+ buswidth = xc->sconfig.src_addr_width;
+ } else {
+ src_mode = XDMAC_SADM_SAM_INC;
+ buswidth = DMA_SLAVE_BUSWIDTH_8_BYTES;
+ }
+ src_width = FIELD_PREP(XDMAC_SADM_STW_MASK, __ffs(buswidth));
+
+ if (xd->dir == DMA_MEM_TO_DEV) {
+ dst_mode = XDMAC_DADM_DAM_FIXED;
+ buswidth = xc->sconfig.dst_addr_width;
+ } else {
+ dst_mode = XDMAC_DADM_DAM_INC;
+ buswidth = DMA_SLAVE_BUSWIDTH_8_BYTES;
+ }
+ dst_width = FIELD_PREP(XDMAC_DADM_DTW_MASK, __ffs(buswidth));
+
+ /* setup transfer factor */
+ val = FIELD_PREP(XDMAC_TFA_MCNT_MASK, XDMAC_INTERVAL_CLKS);
+ val |= FIELD_PREP(XDMAC_TFA_MASK, xc->req_factor);
+ writel(val, xc->reg_ch_base + XDMAC_TFA);
+
+ /* setup the channel */
+ writel(lower_32_bits(src_addr), xc->reg_ch_base + XDMAC_SAD);
+ writel(upper_32_bits(src_addr), xc->reg_ch_base + XDMAC_EXSAD);
+
+ writel(lower_32_bits(dst_addr), xc->reg_ch_base + XDMAC_DAD);
+ writel(upper_32_bits(dst_addr), xc->reg_ch_base + XDMAC_EXDAD);
+
+ src_mode |= src_width;
+ dst_mode |= dst_width;
+ writel(src_mode, xc->reg_ch_base + XDMAC_SADM);
+ writel(dst_mode, xc->reg_ch_base + XDMAC_DADM);
+
+ writel(its, xc->reg_ch_base + XDMAC_ITS);
+ writel(tnum, xc->reg_ch_base + XDMAC_TNUM);
+
+ /* enable interrupt */
+ writel(XDMAC_IEN_ENDIEN | XDMAC_IEN_ERRIEN,
+ xc->reg_ch_base + XDMAC_IEN);
+
+ /* start XDMAC */
+ val = readl(xc->reg_ch_base + XDMAC_TSS);
+ val |= XDMAC_TSS_REQ;
+ writel(val, xc->reg_ch_base + XDMAC_TSS);
+}
+
+/* xc->vc.lock must be held by caller */
+static int uniphier_xdmac_chan_stop(struct uniphier_xdmac_chan *xc)
+{
+ u32 val;
+
+ /* disable interrupt */
+ val = readl(xc->reg_ch_base + XDMAC_IEN);
+ val &= ~(XDMAC_IEN_ENDIEN | XDMAC_IEN_ERRIEN);
+ writel(val, xc->reg_ch_base + XDMAC_IEN);
+
+ /* stop XDMAC */
+ val = readl(xc->reg_ch_base + XDMAC_TSS);
+ val &= ~XDMAC_TSS_REQ;
+ writel(0, xc->reg_ch_base + XDMAC_TSS);
+
+ /* wait until transfer is stopped */
+ return readl_poll_timeout_atomic(xc->reg_ch_base + XDMAC_STAT, val,
+ !(val & XDMAC_STAT_TENF), 100, 1000);
+}
+
+/* xc->vc.lock must be held by caller */
+static void uniphier_xdmac_start(struct uniphier_xdmac_chan *xc)
+{
+ struct uniphier_xdmac_desc *xd;
+
+ xd = uniphier_xdmac_next_desc(xc);
+ if (xd)
+ uniphier_xdmac_chan_start(xc, xd);
+
+ /* set desc to chan regardless of xd is null */
+ xc->xd = xd;
+}
+
+static void uniphier_xdmac_chan_irq(struct uniphier_xdmac_chan *xc)
+{
+ u32 stat;
+ int ret;
+
+ spin_lock(&xc->vc.lock);
+
+ stat = readl(xc->reg_ch_base + XDMAC_ID);
+
+ if (stat & XDMAC_ID_ERRIDF) {
+ ret = uniphier_xdmac_chan_stop(xc);
+ if (ret)
+ dev_err(xc->xdev->ddev.dev,
+ "DMA transfer error with aborting issue\n");
+ else
+ dev_err(xc->xdev->ddev.dev,
+ "DMA transfer error\n");
+
+ } else if ((stat & XDMAC_ID_ENDIDF) && xc->xd) {
+ xc->xd->cur_node++;
+ if (xc->xd->cur_node >= xc->xd->nr_node) {
+ vchan_cookie_complete(&xc->xd->vd);
+ uniphier_xdmac_start(xc);
+ } else {
+ uniphier_xdmac_chan_start(xc, xc->xd);
+ }
+ }
+
+ /* write bits to clear */
+ writel(stat, xc->reg_ch_base + XDMAC_IR);
+
+ spin_unlock(&xc->vc.lock);
+}
+
+static irqreturn_t uniphier_xdmac_irq_handler(int irq, void *dev_id)
+{
+ struct uniphier_xdmac_device *xdev = dev_id;
+ int i;
+
+ for (i = 0; i < xdev->nr_chans; i++)
+ uniphier_xdmac_chan_irq(&xdev->channels[i]);
+
+ return IRQ_HANDLED;
+}
+
+static void uniphier_xdmac_free_chan_resources(struct dma_chan *chan)
+{
+ vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+static struct dma_async_tx_descriptor *
+uniphier_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
+ dma_addr_t src, size_t len, unsigned long flags)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct uniphier_xdmac_desc *xd;
+ unsigned int nr;
+ size_t burst_size, tlen;
+ int i;
+
+ if (len > XDMAC_MAX_WORD_SIZE * XDMAC_MAX_WORDS)
+ return NULL;
+
+ nr = 1 + len / XDMAC_MAX_WORD_SIZE;
+
+ xd = kzalloc(struct_size(xd, nodes, nr), GFP_NOWAIT);
+ if (!xd)
+ return NULL;
+
+ for (i = 0; i < nr; i++) {
+ burst_size = min_t(size_t, len, XDMAC_MAX_WORD_SIZE);
+ xd->nodes[i].src = src;
+ xd->nodes[i].dst = dst;
+ xd->nodes[i].burst_size = burst_size;
+ xd->nodes[i].nr_burst = len / burst_size;
+ tlen = rounddown(len, burst_size);
+ src += tlen;
+ dst += tlen;
+ len -= tlen;
+ }
+
+ xd->dir = DMA_MEM_TO_MEM;
+ xd->nr_node = nr;
+ xd->cur_node = 0;
+
+ return vchan_tx_prep(vc, &xd->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+uniphier_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len,
+ enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct uniphier_xdmac_chan *xc = to_uniphier_xdmac_chan(vc);
+ struct uniphier_xdmac_desc *xd;
+ struct scatterlist *sg;
+ enum dma_slave_buswidth buswidth;
+ u32 maxburst;
+ int i;
+
+ if (!is_slave_direction(direction))
+ return NULL;
+
+ if (direction == DMA_DEV_TO_MEM) {
+ buswidth = xc->sconfig.src_addr_width;
+ maxburst = xc->sconfig.src_maxburst;
+ } else {
+ buswidth = xc->sconfig.dst_addr_width;
+ maxburst = xc->sconfig.dst_maxburst;
+ }
+
+ if (!maxburst)
+ maxburst = 1;
+ if (maxburst > xc->xdev->ddev.max_burst) {
+ dev_err(xc->xdev->ddev.dev,
+ "Exceed maximum number of burst words\n");
+ return NULL;
+ }
+
+ xd = kzalloc(struct_size(xd, nodes, sg_len), GFP_NOWAIT);
+ if (!xd)
+ return NULL;
+
+ for_each_sg(sgl, sg, sg_len, i) {
+ xd->nodes[i].src = (direction == DMA_DEV_TO_MEM)
+ ? xc->sconfig.src_addr : sg_dma_address(sg);
+ xd->nodes[i].dst = (direction == DMA_MEM_TO_DEV)
+ ? xc->sconfig.dst_addr : sg_dma_address(sg);
+ xd->nodes[i].burst_size = maxburst * buswidth;
+ xd->nodes[i].nr_burst =
+ sg_dma_len(sg) / xd->nodes[i].burst_size;
+
+ /*
+ * Currently transfer that size doesn't align the unit size
+ * (the number of burst words * bus-width) is not allowed,
+ * because the driver does not support the way to transfer
+ * residue size. As a matter of fact, in order to transfer
+ * arbitrary size, 'src_maxburst' or 'dst_maxburst' of
+ * dma_slave_config must be 1.
+ */
+ if (sg_dma_len(sg) % xd->nodes[i].burst_size) {
+ dev_err(xc->xdev->ddev.dev,
+ "Unaligned transfer size: %d", sg_dma_len(sg));
+ kfree(xd);
+ return NULL;
+ }
+
+ if (xd->nodes[i].nr_burst > XDMAC_MAX_WORDS) {
+ dev_err(xc->xdev->ddev.dev,
+ "Exceed maximum transfer size");
+ kfree(xd);
+ return NULL;
+ }
+ }
+
+ xd->dir = direction;
+ xd->nr_node = sg_len;
+ xd->cur_node = 0;
+
+ return vchan_tx_prep(vc, &xd->vd, flags);
+}
+
+static int uniphier_xdmac_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct uniphier_xdmac_chan *xc = to_uniphier_xdmac_chan(vc);
+
+ memcpy(&xc->sconfig, config, sizeof(*config));
+
+ return 0;
+}
+
+static int uniphier_xdmac_terminate_all(struct dma_chan *chan)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct uniphier_xdmac_chan *xc = to_uniphier_xdmac_chan(vc);
+ unsigned long flags;
+ int ret = 0;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&vc->lock, flags);
+
+ if (xc->xd) {
+ vchan_terminate_vdesc(&xc->xd->vd);
+ xc->xd = NULL;
+ ret = uniphier_xdmac_chan_stop(xc);
+ }
+
+ vchan_get_all_descriptors(vc, &head);
+
+ spin_unlock_irqrestore(&vc->lock, flags);
+
+ vchan_dma_desc_free_list(vc, &head);
+
+ return ret;
+}
+
+static void uniphier_xdmac_synchronize(struct dma_chan *chan)
+{
+ vchan_synchronize(to_virt_chan(chan));
+}
+
+static void uniphier_xdmac_issue_pending(struct dma_chan *chan)
+{
+ struct virt_dma_chan *vc = to_virt_chan(chan);
+ struct uniphier_xdmac_chan *xc = to_uniphier_xdmac_chan(vc);
+ unsigned long flags;
+
+ spin_lock_irqsave(&vc->lock, flags);
+
+ if (vchan_issue_pending(vc) && !xc->xd)
+ uniphier_xdmac_start(xc);
+
+ spin_unlock_irqrestore(&vc->lock, flags);
+}
+
+static void uniphier_xdmac_desc_free(struct virt_dma_desc *vd)
+{
+ kfree(to_uniphier_xdmac_desc(vd));
+}
+
+static void uniphier_xdmac_chan_init(struct uniphier_xdmac_device *xdev,
+ int ch)
+{
+ struct uniphier_xdmac_chan *xc = &xdev->channels[ch];
+
+ xc->xdev = xdev;
+ xc->reg_ch_base = xdev->reg_base + XDMAC_CH_WIDTH * ch;
+ xc->vc.desc_free = uniphier_xdmac_desc_free;
+
+ vchan_init(&xc->vc, &xdev->ddev);
+}
+
+static struct dma_chan *of_dma_uniphier_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct uniphier_xdmac_device *xdev = ofdma->of_dma_data;
+ int chan_id = dma_spec->args[0];
+
+ if (chan_id >= xdev->nr_chans)
+ return NULL;
+
+ xdev->channels[chan_id].id = chan_id;
+ xdev->channels[chan_id].req_factor = dma_spec->args[1];
+
+ return dma_get_slave_channel(&xdev->channels[chan_id].vc.chan);
+}
+
+static int uniphier_xdmac_probe(struct platform_device *pdev)
+{
+ struct uniphier_xdmac_device *xdev;
+ struct device *dev = &pdev->dev;
+ struct dma_device *ddev;
+ int irq;
+ int nr_chans;
+ int i, ret;
+
+ if (of_property_read_u32(dev->of_node, "dma-channels", &nr_chans))
+ return -EINVAL;
+ if (nr_chans > XDMAC_MAX_CHANS)
+ nr_chans = XDMAC_MAX_CHANS;
+
+ xdev = devm_kzalloc(dev, struct_size(xdev, channels, nr_chans),
+ GFP_KERNEL);
+ if (!xdev)
+ return -ENOMEM;
+
+ xdev->nr_chans = nr_chans;
+ xdev->reg_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(xdev->reg_base))
+ return PTR_ERR(xdev->reg_base);
+
+ ddev = &xdev->ddev;
+ ddev->dev = dev;
+ dma_cap_zero(ddev->cap_mask);
+ dma_cap_set(DMA_MEMCPY, ddev->cap_mask);
+ dma_cap_set(DMA_SLAVE, ddev->cap_mask);
+ ddev->src_addr_widths = UNIPHIER_XDMAC_BUSWIDTHS;
+ ddev->dst_addr_widths = UNIPHIER_XDMAC_BUSWIDTHS;
+ ddev->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) |
+ BIT(DMA_MEM_TO_MEM);
+ ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+ ddev->max_burst = XDMAC_MAX_WORDS;
+ ddev->device_free_chan_resources = uniphier_xdmac_free_chan_resources;
+ ddev->device_prep_dma_memcpy = uniphier_xdmac_prep_dma_memcpy;
+ ddev->device_prep_slave_sg = uniphier_xdmac_prep_slave_sg;
+ ddev->device_config = uniphier_xdmac_slave_config;
+ ddev->device_terminate_all = uniphier_xdmac_terminate_all;
+ ddev->device_synchronize = uniphier_xdmac_synchronize;
+ ddev->device_tx_status = dma_cookie_status;
+ ddev->device_issue_pending = uniphier_xdmac_issue_pending;
+ INIT_LIST_HEAD(&ddev->channels);
+
+ for (i = 0; i < nr_chans; i++)
+ uniphier_xdmac_chan_init(xdev, i);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ ret = devm_request_irq(dev, irq, uniphier_xdmac_irq_handler,
+ IRQF_SHARED, "xdmac", xdev);
+ if (ret) {
+ dev_err(dev, "Failed to request IRQ\n");
+ return ret;
+ }
+
+ ret = dma_async_device_register(ddev);
+ if (ret) {
+ dev_err(dev, "Failed to register XDMA device\n");
+ return ret;
+ }
+
+ ret = of_dma_controller_register(dev->of_node,
+ of_dma_uniphier_xlate, xdev);
+ if (ret) {
+ dev_err(dev, "Failed to register XDMA controller\n");
+ goto out_unregister_dmac;
+ }
+
+ platform_set_drvdata(pdev, xdev);
+
+ dev_info(&pdev->dev, "UniPhier XDMAC driver (%d channels)\n",
+ nr_chans);
+
+ return 0;
+
+out_unregister_dmac:
+ dma_async_device_unregister(ddev);
+
+ return ret;
+}
+
+static int uniphier_xdmac_remove(struct platform_device *pdev)
+{
+ struct uniphier_xdmac_device *xdev = platform_get_drvdata(pdev);
+ struct dma_device *ddev = &xdev->ddev;
+ struct dma_chan *chan;
+ int ret;
+
+ /*
+ * Before reaching here, almost all descriptors have been freed by the
+ * ->device_free_chan_resources() hook. However, each channel might
+ * be still holding one descriptor that was on-flight at that moment.
+ * Terminate it to make sure this hardware is no longer running. Then,
+ * free the channel resources once again to avoid memory leak.
+ */
+ list_for_each_entry(chan, &ddev->channels, device_node) {
+ ret = dmaengine_terminate_sync(chan);
+ if (ret)
+ return ret;
+ uniphier_xdmac_free_chan_resources(chan);
+ }
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(ddev);
+
+ return 0;
+}
+
+static const struct of_device_id uniphier_xdmac_match[] = {
+ { .compatible = "socionext,uniphier-xdmac" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, uniphier_xdmac_match);
+
+static struct platform_driver uniphier_xdmac_driver = {
+ .probe = uniphier_xdmac_probe,
+ .remove = uniphier_xdmac_remove,
+ .driver = {
+ .name = "uniphier-xdmac",
+ .of_match_table = uniphier_xdmac_match,
+ },
+};
+module_platform_driver(uniphier_xdmac_driver);
+
+MODULE_AUTHOR("Kunihiko Hayashi <hayashi.kunihiko@socionext.com>");
+MODULE_DESCRIPTION("UniPhier external DMA controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
new file mode 100644
index 000000000..a6f4265be
--- /dev/null
+++ b/drivers/dma/virt-dma.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Virtual DMA channel support for DMAengine
+ *
+ * Copyright (C) 2012 Russell King
+ */
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+
+#include "virt-dma.h"
+
+static struct virt_dma_desc *to_virt_desc(struct dma_async_tx_descriptor *tx)
+{
+ return container_of(tx, struct virt_dma_desc, tx);
+}
+
+dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct virt_dma_chan *vc = to_virt_chan(tx->chan);
+ struct virt_dma_desc *vd = to_virt_desc(tx);
+ unsigned long flags;
+ dma_cookie_t cookie;
+
+ spin_lock_irqsave(&vc->lock, flags);
+ cookie = dma_cookie_assign(tx);
+
+ list_move_tail(&vd->node, &vc->desc_submitted);
+ spin_unlock_irqrestore(&vc->lock, flags);
+
+ dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: submitted\n",
+ vc, vd, cookie);
+
+ return cookie;
+}
+EXPORT_SYMBOL_GPL(vchan_tx_submit);
+
+/**
+ * vchan_tx_desc_free - free a reusable descriptor
+ * @tx: the transfer
+ *
+ * This function frees a previously allocated reusable descriptor. The only
+ * other way is to clear the DMA_CTRL_REUSE flag and submit one last time the
+ * transfer.
+ *
+ * Returns 0 upon success
+ */
+int vchan_tx_desc_free(struct dma_async_tx_descriptor *tx)
+{
+ struct virt_dma_chan *vc = to_virt_chan(tx->chan);
+ struct virt_dma_desc *vd = to_virt_desc(tx);
+ unsigned long flags;
+
+ spin_lock_irqsave(&vc->lock, flags);
+ list_del(&vd->node);
+ spin_unlock_irqrestore(&vc->lock, flags);
+
+ dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: freeing\n",
+ vc, vd, vd->tx.cookie);
+ vc->desc_free(vd);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vchan_tx_desc_free);
+
+struct virt_dma_desc *vchan_find_desc(struct virt_dma_chan *vc,
+ dma_cookie_t cookie)
+{
+ struct virt_dma_desc *vd;
+
+ list_for_each_entry(vd, &vc->desc_issued, node)
+ if (vd->tx.cookie == cookie)
+ return vd;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(vchan_find_desc);
+
+/*
+ * This tasklet handles the completion of a DMA descriptor by
+ * calling its callback and freeing it.
+ */
+static void vchan_complete(struct tasklet_struct *t)
+{
+ struct virt_dma_chan *vc = from_tasklet(vc, t, task);
+ struct virt_dma_desc *vd, *_vd;
+ struct dmaengine_desc_callback cb;
+ LIST_HEAD(head);
+
+ spin_lock_irq(&vc->lock);
+ list_splice_tail_init(&vc->desc_completed, &head);
+ vd = vc->cyclic;
+ if (vd) {
+ vc->cyclic = NULL;
+ dmaengine_desc_get_callback(&vd->tx, &cb);
+ } else {
+ memset(&cb, 0, sizeof(cb));
+ }
+ spin_unlock_irq(&vc->lock);
+
+ dmaengine_desc_callback_invoke(&cb, &vd->tx_result);
+
+ list_for_each_entry_safe(vd, _vd, &head, node) {
+ dmaengine_desc_get_callback(&vd->tx, &cb);
+
+ list_del(&vd->node);
+ dmaengine_desc_callback_invoke(&cb, &vd->tx_result);
+ vchan_vdesc_fini(vd);
+ }
+}
+
+void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head)
+{
+ struct virt_dma_desc *vd, *_vd;
+
+ list_for_each_entry_safe(vd, _vd, head, node) {
+ list_del(&vd->node);
+ vchan_vdesc_fini(vd);
+ }
+}
+EXPORT_SYMBOL_GPL(vchan_dma_desc_free_list);
+
+void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev)
+{
+ dma_cookie_init(&vc->chan);
+
+ spin_lock_init(&vc->lock);
+ INIT_LIST_HEAD(&vc->desc_allocated);
+ INIT_LIST_HEAD(&vc->desc_submitted);
+ INIT_LIST_HEAD(&vc->desc_issued);
+ INIT_LIST_HEAD(&vc->desc_completed);
+ INIT_LIST_HEAD(&vc->desc_terminated);
+
+ tasklet_setup(&vc->task, vchan_complete);
+
+ vc->chan.device = dmadev;
+ list_add_tail(&vc->chan.device_node, &dmadev->channels);
+}
+EXPORT_SYMBOL_GPL(vchan_init);
+
+MODULE_AUTHOR("Russell King");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h
new file mode 100644
index 000000000..e9f5250fb
--- /dev/null
+++ b/drivers/dma/virt-dma.h
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Virtual DMA channel support for DMAengine
+ *
+ * Copyright (C) 2012 Russell King
+ */
+#ifndef VIRT_DMA_H
+#define VIRT_DMA_H
+
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+#include "dmaengine.h"
+
+struct virt_dma_desc {
+ struct dma_async_tx_descriptor tx;
+ struct dmaengine_result tx_result;
+ /* protected by vc.lock */
+ struct list_head node;
+};
+
+struct virt_dma_chan {
+ struct dma_chan chan;
+ struct tasklet_struct task;
+ void (*desc_free)(struct virt_dma_desc *);
+
+ spinlock_t lock;
+
+ /* protected by vc.lock */
+ struct list_head desc_allocated;
+ struct list_head desc_submitted;
+ struct list_head desc_issued;
+ struct list_head desc_completed;
+ struct list_head desc_terminated;
+
+ struct virt_dma_desc *cyclic;
+};
+
+static inline struct virt_dma_chan *to_virt_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct virt_dma_chan, chan);
+}
+
+void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head);
+void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev);
+struct virt_dma_desc *vchan_find_desc(struct virt_dma_chan *, dma_cookie_t);
+extern dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *);
+extern int vchan_tx_desc_free(struct dma_async_tx_descriptor *);
+
+/**
+ * vchan_tx_prep - prepare a descriptor
+ * @vc: virtual channel allocating this descriptor
+ * @vd: virtual descriptor to prepare
+ * @tx_flags: flags argument passed in to prepare function
+ */
+static inline struct dma_async_tx_descriptor *vchan_tx_prep(struct virt_dma_chan *vc,
+ struct virt_dma_desc *vd, unsigned long tx_flags)
+{
+ unsigned long flags;
+
+ dma_async_tx_descriptor_init(&vd->tx, &vc->chan);
+ vd->tx.flags = tx_flags;
+ vd->tx.tx_submit = vchan_tx_submit;
+ vd->tx.desc_free = vchan_tx_desc_free;
+
+ vd->tx_result.result = DMA_TRANS_NOERROR;
+ vd->tx_result.residue = 0;
+
+ spin_lock_irqsave(&vc->lock, flags);
+ list_add_tail(&vd->node, &vc->desc_allocated);
+ spin_unlock_irqrestore(&vc->lock, flags);
+
+ return &vd->tx;
+}
+
+/**
+ * vchan_issue_pending - move submitted descriptors to issued list
+ * @vc: virtual channel to update
+ *
+ * vc.lock must be held by caller
+ */
+static inline bool vchan_issue_pending(struct virt_dma_chan *vc)
+{
+ list_splice_tail_init(&vc->desc_submitted, &vc->desc_issued);
+ return !list_empty(&vc->desc_issued);
+}
+
+/**
+ * vchan_cookie_complete - report completion of a descriptor
+ * @vd: virtual descriptor to update
+ *
+ * vc.lock must be held by caller
+ */
+static inline void vchan_cookie_complete(struct virt_dma_desc *vd)
+{
+ struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan);
+ dma_cookie_t cookie;
+
+ cookie = vd->tx.cookie;
+ dma_cookie_complete(&vd->tx);
+ dev_vdbg(vc->chan.device->dev, "txd %p[%x]: marked complete\n",
+ vd, cookie);
+ list_add_tail(&vd->node, &vc->desc_completed);
+
+ tasklet_schedule(&vc->task);
+}
+
+/**
+ * vchan_vdesc_fini - Free or reuse a descriptor
+ * @vd: virtual descriptor to free/reuse
+ */
+static inline void vchan_vdesc_fini(struct virt_dma_desc *vd)
+{
+ struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan);
+
+ if (dmaengine_desc_test_reuse(&vd->tx)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&vc->lock, flags);
+ list_add(&vd->node, &vc->desc_allocated);
+ spin_unlock_irqrestore(&vc->lock, flags);
+ } else {
+ vc->desc_free(vd);
+ }
+}
+
+/**
+ * vchan_cyclic_callback - report the completion of a period
+ * @vd: virtual descriptor
+ */
+static inline void vchan_cyclic_callback(struct virt_dma_desc *vd)
+{
+ struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan);
+
+ vc->cyclic = vd;
+ tasklet_schedule(&vc->task);
+}
+
+/**
+ * vchan_terminate_vdesc - Disable pending cyclic callback
+ * @vd: virtual descriptor to be terminated
+ *
+ * vc.lock must be held by caller
+ */
+static inline void vchan_terminate_vdesc(struct virt_dma_desc *vd)
+{
+ struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan);
+
+ list_add_tail(&vd->node, &vc->desc_terminated);
+
+ if (vc->cyclic == vd)
+ vc->cyclic = NULL;
+}
+
+/**
+ * vchan_next_desc - peek at the next descriptor to be processed
+ * @vc: virtual channel to obtain descriptor from
+ *
+ * vc.lock must be held by caller
+ */
+static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc)
+{
+ return list_first_entry_or_null(&vc->desc_issued,
+ struct virt_dma_desc, node);
+}
+
+/**
+ * vchan_get_all_descriptors - obtain all submitted and issued descriptors
+ * @vc: virtual channel to get descriptors from
+ * @head: list of descriptors found
+ *
+ * vc.lock must be held by caller
+ *
+ * Removes all submitted and issued descriptors from internal lists, and
+ * provides a list of all descriptors found
+ */
+static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc,
+ struct list_head *head)
+{
+ list_splice_tail_init(&vc->desc_allocated, head);
+ list_splice_tail_init(&vc->desc_submitted, head);
+ list_splice_tail_init(&vc->desc_issued, head);
+ list_splice_tail_init(&vc->desc_completed, head);
+ list_splice_tail_init(&vc->desc_terminated, head);
+}
+
+static inline void vchan_free_chan_resources(struct virt_dma_chan *vc)
+{
+ struct virt_dma_desc *vd;
+ unsigned long flags;
+ LIST_HEAD(head);
+
+ spin_lock_irqsave(&vc->lock, flags);
+ vchan_get_all_descriptors(vc, &head);
+ list_for_each_entry(vd, &head, node)
+ dmaengine_desc_clear_reuse(&vd->tx);
+ spin_unlock_irqrestore(&vc->lock, flags);
+
+ vchan_dma_desc_free_list(vc, &head);
+}
+
+/**
+ * vchan_synchronize() - synchronize callback execution to the current context
+ * @vc: virtual channel to synchronize
+ *
+ * Makes sure that all scheduled or active callbacks have finished running. For
+ * proper operation the caller has to ensure that no new callbacks are scheduled
+ * after the invocation of this function started.
+ * Free up the terminated cyclic descriptor to prevent memory leakage.
+ */
+static inline void vchan_synchronize(struct virt_dma_chan *vc)
+{
+ LIST_HEAD(head);
+ unsigned long flags;
+
+ tasklet_kill(&vc->task);
+
+ spin_lock_irqsave(&vc->lock, flags);
+
+ list_splice_tail_init(&vc->desc_terminated, &head);
+
+ spin_unlock_irqrestore(&vc->lock, flags);
+
+ vchan_dma_desc_free_list(vc, &head);
+}
+
+#endif
diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c
new file mode 100644
index 000000000..3589b4ef5
--- /dev/null
+++ b/drivers/dma/xgene-dma.c
@@ -0,0 +1,1833 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Applied Micro X-Gene SoC DMA engine Driver
+ *
+ * Copyright (c) 2015, Applied Micro Circuits Corporation
+ * Authors: Rameshwar Prasad Sahu <rsahu@apm.com>
+ * Loc Ho <lho@apm.com>
+ *
+ * NOTE: PM support is currently not available.
+ */
+
+#include <linux/acpi.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+
+#include "dmaengine.h"
+
+/* X-Gene DMA ring csr registers and bit definations */
+#define XGENE_DMA_RING_CONFIG 0x04
+#define XGENE_DMA_RING_ENABLE BIT(31)
+#define XGENE_DMA_RING_ID 0x08
+#define XGENE_DMA_RING_ID_SETUP(v) ((v) | BIT(31))
+#define XGENE_DMA_RING_ID_BUF 0x0C
+#define XGENE_DMA_RING_ID_BUF_SETUP(v) (((v) << 9) | BIT(21))
+#define XGENE_DMA_RING_THRESLD0_SET1 0x30
+#define XGENE_DMA_RING_THRESLD0_SET1_VAL 0X64
+#define XGENE_DMA_RING_THRESLD1_SET1 0x34
+#define XGENE_DMA_RING_THRESLD1_SET1_VAL 0xC8
+#define XGENE_DMA_RING_HYSTERESIS 0x68
+#define XGENE_DMA_RING_HYSTERESIS_VAL 0xFFFFFFFF
+#define XGENE_DMA_RING_STATE 0x6C
+#define XGENE_DMA_RING_STATE_WR_BASE 0x70
+#define XGENE_DMA_RING_NE_INT_MODE 0x017C
+#define XGENE_DMA_RING_NE_INT_MODE_SET(m, v) \
+ ((m) = ((m) & ~BIT(31 - (v))) | BIT(31 - (v)))
+#define XGENE_DMA_RING_NE_INT_MODE_RESET(m, v) \
+ ((m) &= (~BIT(31 - (v))))
+#define XGENE_DMA_RING_CLKEN 0xC208
+#define XGENE_DMA_RING_SRST 0xC200
+#define XGENE_DMA_RING_MEM_RAM_SHUTDOWN 0xD070
+#define XGENE_DMA_RING_BLK_MEM_RDY 0xD074
+#define XGENE_DMA_RING_BLK_MEM_RDY_VAL 0xFFFFFFFF
+#define XGENE_DMA_RING_ID_GET(owner, num) (((owner) << 6) | (num))
+#define XGENE_DMA_RING_DST_ID(v) ((1 << 10) | (v))
+#define XGENE_DMA_RING_CMD_OFFSET 0x2C
+#define XGENE_DMA_RING_CMD_BASE_OFFSET(v) ((v) << 6)
+#define XGENE_DMA_RING_COHERENT_SET(m) \
+ (((u32 *)(m))[2] |= BIT(4))
+#define XGENE_DMA_RING_ADDRL_SET(m, v) \
+ (((u32 *)(m))[2] |= (((v) >> 8) << 5))
+#define XGENE_DMA_RING_ADDRH_SET(m, v) \
+ (((u32 *)(m))[3] |= ((v) >> 35))
+#define XGENE_DMA_RING_ACCEPTLERR_SET(m) \
+ (((u32 *)(m))[3] |= BIT(19))
+#define XGENE_DMA_RING_SIZE_SET(m, v) \
+ (((u32 *)(m))[3] |= ((v) << 23))
+#define XGENE_DMA_RING_RECOMBBUF_SET(m) \
+ (((u32 *)(m))[3] |= BIT(27))
+#define XGENE_DMA_RING_RECOMTIMEOUTL_SET(m) \
+ (((u32 *)(m))[3] |= (0x7 << 28))
+#define XGENE_DMA_RING_RECOMTIMEOUTH_SET(m) \
+ (((u32 *)(m))[4] |= 0x3)
+#define XGENE_DMA_RING_SELTHRSH_SET(m) \
+ (((u32 *)(m))[4] |= BIT(3))
+#define XGENE_DMA_RING_TYPE_SET(m, v) \
+ (((u32 *)(m))[4] |= ((v) << 19))
+
+/* X-Gene DMA device csr registers and bit definitions */
+#define XGENE_DMA_IPBRR 0x0
+#define XGENE_DMA_DEV_ID_RD(v) ((v) & 0x00000FFF)
+#define XGENE_DMA_BUS_ID_RD(v) (((v) >> 12) & 3)
+#define XGENE_DMA_REV_NO_RD(v) (((v) >> 14) & 3)
+#define XGENE_DMA_GCR 0x10
+#define XGENE_DMA_CH_SETUP(v) \
+ ((v) = ((v) & ~0x000FFFFF) | 0x000AAFFF)
+#define XGENE_DMA_ENABLE(v) ((v) |= BIT(31))
+#define XGENE_DMA_DISABLE(v) ((v) &= ~BIT(31))
+#define XGENE_DMA_RAID6_CONT 0x14
+#define XGENE_DMA_RAID6_MULTI_CTRL(v) ((v) << 24)
+#define XGENE_DMA_INT 0x70
+#define XGENE_DMA_INT_MASK 0x74
+#define XGENE_DMA_INT_ALL_MASK 0xFFFFFFFF
+#define XGENE_DMA_INT_ALL_UNMASK 0x0
+#define XGENE_DMA_INT_MASK_SHIFT 0x14
+#define XGENE_DMA_RING_INT0_MASK 0x90A0
+#define XGENE_DMA_RING_INT1_MASK 0x90A8
+#define XGENE_DMA_RING_INT2_MASK 0x90B0
+#define XGENE_DMA_RING_INT3_MASK 0x90B8
+#define XGENE_DMA_RING_INT4_MASK 0x90C0
+#define XGENE_DMA_CFG_RING_WQ_ASSOC 0x90E0
+#define XGENE_DMA_ASSOC_RING_MNGR1 0xFFFFFFFF
+#define XGENE_DMA_MEM_RAM_SHUTDOWN 0xD070
+#define XGENE_DMA_BLK_MEM_RDY 0xD074
+#define XGENE_DMA_BLK_MEM_RDY_VAL 0xFFFFFFFF
+#define XGENE_DMA_RING_CMD_SM_OFFSET 0x8000
+
+/* X-Gene SoC EFUSE csr register and bit defination */
+#define XGENE_SOC_JTAG1_SHADOW 0x18
+#define XGENE_DMA_PQ_DISABLE_MASK BIT(13)
+
+/* X-Gene DMA Descriptor format */
+#define XGENE_DMA_DESC_NV_BIT BIT_ULL(50)
+#define XGENE_DMA_DESC_IN_BIT BIT_ULL(55)
+#define XGENE_DMA_DESC_C_BIT BIT_ULL(63)
+#define XGENE_DMA_DESC_DR_BIT BIT_ULL(61)
+#define XGENE_DMA_DESC_ELERR_POS 46
+#define XGENE_DMA_DESC_RTYPE_POS 56
+#define XGENE_DMA_DESC_LERR_POS 60
+#define XGENE_DMA_DESC_BUFLEN_POS 48
+#define XGENE_DMA_DESC_HOENQ_NUM_POS 48
+#define XGENE_DMA_DESC_ELERR_RD(m) \
+ (((m) >> XGENE_DMA_DESC_ELERR_POS) & 0x3)
+#define XGENE_DMA_DESC_LERR_RD(m) \
+ (((m) >> XGENE_DMA_DESC_LERR_POS) & 0x7)
+#define XGENE_DMA_DESC_STATUS(elerr, lerr) \
+ (((elerr) << 4) | (lerr))
+
+/* X-Gene DMA descriptor empty s/w signature */
+#define XGENE_DMA_DESC_EMPTY_SIGNATURE ~0ULL
+
+/* X-Gene DMA configurable parameters defines */
+#define XGENE_DMA_RING_NUM 512
+#define XGENE_DMA_BUFNUM 0x0
+#define XGENE_DMA_CPU_BUFNUM 0x18
+#define XGENE_DMA_RING_OWNER_DMA 0x03
+#define XGENE_DMA_RING_OWNER_CPU 0x0F
+#define XGENE_DMA_RING_TYPE_REGULAR 0x01
+#define XGENE_DMA_RING_WQ_DESC_SIZE 32 /* 32 Bytes */
+#define XGENE_DMA_RING_NUM_CONFIG 5
+#define XGENE_DMA_MAX_CHANNEL 4
+#define XGENE_DMA_XOR_CHANNEL 0
+#define XGENE_DMA_PQ_CHANNEL 1
+#define XGENE_DMA_MAX_BYTE_CNT 0x4000 /* 16 KB */
+#define XGENE_DMA_MAX_64B_DESC_BYTE_CNT 0x14000 /* 80 KB */
+#define XGENE_DMA_MAX_XOR_SRC 5
+#define XGENE_DMA_16K_BUFFER_LEN_CODE 0x0
+#define XGENE_DMA_INVALID_LEN_CODE 0x7800000000000000ULL
+
+/* X-Gene DMA descriptor error codes */
+#define ERR_DESC_AXI 0x01
+#define ERR_BAD_DESC 0x02
+#define ERR_READ_DATA_AXI 0x03
+#define ERR_WRITE_DATA_AXI 0x04
+#define ERR_FBP_TIMEOUT 0x05
+#define ERR_ECC 0x06
+#define ERR_DIFF_SIZE 0x08
+#define ERR_SCT_GAT_LEN 0x09
+#define ERR_CRC_ERR 0x11
+#define ERR_CHKSUM 0x12
+#define ERR_DIF 0x13
+
+/* X-Gene DMA error interrupt codes */
+#define ERR_DIF_SIZE_INT 0x0
+#define ERR_GS_ERR_INT 0x1
+#define ERR_FPB_TIMEO_INT 0x2
+#define ERR_WFIFO_OVF_INT 0x3
+#define ERR_RFIFO_OVF_INT 0x4
+#define ERR_WR_TIMEO_INT 0x5
+#define ERR_RD_TIMEO_INT 0x6
+#define ERR_WR_ERR_INT 0x7
+#define ERR_RD_ERR_INT 0x8
+#define ERR_BAD_DESC_INT 0x9
+#define ERR_DESC_DST_INT 0xA
+#define ERR_DESC_SRC_INT 0xB
+
+/* X-Gene DMA flyby operation code */
+#define FLYBY_2SRC_XOR 0x80
+#define FLYBY_3SRC_XOR 0x90
+#define FLYBY_4SRC_XOR 0xA0
+#define FLYBY_5SRC_XOR 0xB0
+
+/* X-Gene DMA SW descriptor flags */
+#define XGENE_DMA_FLAG_64B_DESC BIT(0)
+
+/* Define to dump X-Gene DMA descriptor */
+#define XGENE_DMA_DESC_DUMP(desc, m) \
+ print_hex_dump(KERN_ERR, (m), \
+ DUMP_PREFIX_ADDRESS, 16, 8, (desc), 32, 0)
+
+#define to_dma_desc_sw(tx) \
+ container_of(tx, struct xgene_dma_desc_sw, tx)
+#define to_dma_chan(dchan) \
+ container_of(dchan, struct xgene_dma_chan, dma_chan)
+
+#define chan_dbg(chan, fmt, arg...) \
+ dev_dbg(chan->dev, "%s: " fmt, chan->name, ##arg)
+#define chan_err(chan, fmt, arg...) \
+ dev_err(chan->dev, "%s: " fmt, chan->name, ##arg)
+
+struct xgene_dma_desc_hw {
+ __le64 m0;
+ __le64 m1;
+ __le64 m2;
+ __le64 m3;
+};
+
+enum xgene_dma_ring_cfgsize {
+ XGENE_DMA_RING_CFG_SIZE_512B,
+ XGENE_DMA_RING_CFG_SIZE_2KB,
+ XGENE_DMA_RING_CFG_SIZE_16KB,
+ XGENE_DMA_RING_CFG_SIZE_64KB,
+ XGENE_DMA_RING_CFG_SIZE_512KB,
+ XGENE_DMA_RING_CFG_SIZE_INVALID
+};
+
+struct xgene_dma_ring {
+ struct xgene_dma *pdma;
+ u8 buf_num;
+ u16 id;
+ u16 num;
+ u16 head;
+ u16 owner;
+ u16 slots;
+ u16 dst_ring_num;
+ u32 size;
+ void __iomem *cmd;
+ void __iomem *cmd_base;
+ dma_addr_t desc_paddr;
+ u32 state[XGENE_DMA_RING_NUM_CONFIG];
+ enum xgene_dma_ring_cfgsize cfgsize;
+ union {
+ void *desc_vaddr;
+ struct xgene_dma_desc_hw *desc_hw;
+ };
+};
+
+struct xgene_dma_desc_sw {
+ struct xgene_dma_desc_hw desc1;
+ struct xgene_dma_desc_hw desc2;
+ u32 flags;
+ struct list_head node;
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor tx;
+};
+
+/**
+ * struct xgene_dma_chan - internal representation of an X-Gene DMA channel
+ * @dma_chan: dmaengine channel object member
+ * @pdma: X-Gene DMA device structure reference
+ * @dev: struct device reference for dma mapping api
+ * @id: raw id of this channel
+ * @rx_irq: channel IRQ
+ * @name: name of X-Gene DMA channel
+ * @lock: serializes enqueue/dequeue operations to the descriptor pool
+ * @pending: number of transaction request pushed to DMA controller for
+ * execution, but still waiting for completion,
+ * @max_outstanding: max number of outstanding request we can push to channel
+ * @ld_pending: descriptors which are queued to run, but have not yet been
+ * submitted to the hardware for execution
+ * @ld_running: descriptors which are currently being executing by the hardware
+ * @ld_completed: descriptors which have finished execution by the hardware.
+ * These descriptors have already had their cleanup actions run. They
+ * are waiting for the ACK bit to be set by the async tx API.
+ * @desc_pool: descriptor pool for DMA operations
+ * @tasklet: bottom half where all completed descriptors cleans
+ * @tx_ring: transmit ring descriptor that we use to prepare actual
+ * descriptors for further executions
+ * @rx_ring: receive ring descriptor that we use to get completed DMA
+ * descriptors during cleanup time
+ */
+struct xgene_dma_chan {
+ struct dma_chan dma_chan;
+ struct xgene_dma *pdma;
+ struct device *dev;
+ int id;
+ int rx_irq;
+ char name[10];
+ spinlock_t lock;
+ int pending;
+ int max_outstanding;
+ struct list_head ld_pending;
+ struct list_head ld_running;
+ struct list_head ld_completed;
+ struct dma_pool *desc_pool;
+ struct tasklet_struct tasklet;
+ struct xgene_dma_ring tx_ring;
+ struct xgene_dma_ring rx_ring;
+};
+
+/**
+ * struct xgene_dma - internal representation of an X-Gene DMA device
+ * @dev: reference to this device's struct device
+ * @clk: reference to this device's clock
+ * @err_irq: DMA error irq number
+ * @ring_num: start id number for DMA ring
+ * @csr_dma: base for DMA register access
+ * @csr_ring: base for DMA ring register access
+ * @csr_ring_cmd: base for DMA ring command register access
+ * @csr_efuse: base for efuse register access
+ * @dma_dev: embedded struct dma_device
+ * @chan: reference to X-Gene DMA channels
+ */
+struct xgene_dma {
+ struct device *dev;
+ struct clk *clk;
+ int err_irq;
+ int ring_num;
+ void __iomem *csr_dma;
+ void __iomem *csr_ring;
+ void __iomem *csr_ring_cmd;
+ void __iomem *csr_efuse;
+ struct dma_device dma_dev[XGENE_DMA_MAX_CHANNEL];
+ struct xgene_dma_chan chan[XGENE_DMA_MAX_CHANNEL];
+};
+
+static const char * const xgene_dma_desc_err[] = {
+ [ERR_DESC_AXI] = "AXI error when reading src/dst link list",
+ [ERR_BAD_DESC] = "ERR or El_ERR fields not set to zero in desc",
+ [ERR_READ_DATA_AXI] = "AXI error when reading data",
+ [ERR_WRITE_DATA_AXI] = "AXI error when writing data",
+ [ERR_FBP_TIMEOUT] = "Timeout on bufpool fetch",
+ [ERR_ECC] = "ECC double bit error",
+ [ERR_DIFF_SIZE] = "Bufpool too small to hold all the DIF result",
+ [ERR_SCT_GAT_LEN] = "Gather and scatter data length not same",
+ [ERR_CRC_ERR] = "CRC error",
+ [ERR_CHKSUM] = "Checksum error",
+ [ERR_DIF] = "DIF error",
+};
+
+static const char * const xgene_dma_err[] = {
+ [ERR_DIF_SIZE_INT] = "DIF size error",
+ [ERR_GS_ERR_INT] = "Gather scatter not same size error",
+ [ERR_FPB_TIMEO_INT] = "Free pool time out error",
+ [ERR_WFIFO_OVF_INT] = "Write FIFO over flow error",
+ [ERR_RFIFO_OVF_INT] = "Read FIFO over flow error",
+ [ERR_WR_TIMEO_INT] = "Write time out error",
+ [ERR_RD_TIMEO_INT] = "Read time out error",
+ [ERR_WR_ERR_INT] = "HBF bus write error",
+ [ERR_RD_ERR_INT] = "HBF bus read error",
+ [ERR_BAD_DESC_INT] = "Ring descriptor HE0 not set error",
+ [ERR_DESC_DST_INT] = "HFB reading dst link address error",
+ [ERR_DESC_SRC_INT] = "HFB reading src link address error",
+};
+
+static bool is_pq_enabled(struct xgene_dma *pdma)
+{
+ u32 val;
+
+ val = ioread32(pdma->csr_efuse + XGENE_SOC_JTAG1_SHADOW);
+ return !(val & XGENE_DMA_PQ_DISABLE_MASK);
+}
+
+static u64 xgene_dma_encode_len(size_t len)
+{
+ return (len < XGENE_DMA_MAX_BYTE_CNT) ?
+ ((u64)len << XGENE_DMA_DESC_BUFLEN_POS) :
+ XGENE_DMA_16K_BUFFER_LEN_CODE;
+}
+
+static u8 xgene_dma_encode_xor_flyby(u32 src_cnt)
+{
+ static u8 flyby_type[] = {
+ FLYBY_2SRC_XOR, /* Dummy */
+ FLYBY_2SRC_XOR, /* Dummy */
+ FLYBY_2SRC_XOR,
+ FLYBY_3SRC_XOR,
+ FLYBY_4SRC_XOR,
+ FLYBY_5SRC_XOR
+ };
+
+ return flyby_type[src_cnt];
+}
+
+static void xgene_dma_set_src_buffer(__le64 *ext8, size_t *len,
+ dma_addr_t *paddr)
+{
+ size_t nbytes = (*len < XGENE_DMA_MAX_BYTE_CNT) ?
+ *len : XGENE_DMA_MAX_BYTE_CNT;
+
+ *ext8 |= cpu_to_le64(*paddr);
+ *ext8 |= cpu_to_le64(xgene_dma_encode_len(nbytes));
+ *len -= nbytes;
+ *paddr += nbytes;
+}
+
+static __le64 *xgene_dma_lookup_ext8(struct xgene_dma_desc_hw *desc, int idx)
+{
+ switch (idx) {
+ case 0:
+ return &desc->m1;
+ case 1:
+ return &desc->m0;
+ case 2:
+ return &desc->m3;
+ case 3:
+ return &desc->m2;
+ default:
+ pr_err("Invalid dma descriptor index\n");
+ }
+
+ return NULL;
+}
+
+static void xgene_dma_init_desc(struct xgene_dma_desc_hw *desc,
+ u16 dst_ring_num)
+{
+ desc->m0 |= cpu_to_le64(XGENE_DMA_DESC_IN_BIT);
+ desc->m0 |= cpu_to_le64((u64)XGENE_DMA_RING_OWNER_DMA <<
+ XGENE_DMA_DESC_RTYPE_POS);
+ desc->m1 |= cpu_to_le64(XGENE_DMA_DESC_C_BIT);
+ desc->m3 |= cpu_to_le64((u64)dst_ring_num <<
+ XGENE_DMA_DESC_HOENQ_NUM_POS);
+}
+
+static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan,
+ struct xgene_dma_desc_sw *desc_sw,
+ dma_addr_t *dst, dma_addr_t *src,
+ u32 src_cnt, size_t *nbytes,
+ const u8 *scf)
+{
+ struct xgene_dma_desc_hw *desc1, *desc2;
+ size_t len = *nbytes;
+ int i;
+
+ desc1 = &desc_sw->desc1;
+ desc2 = &desc_sw->desc2;
+
+ /* Initialize DMA descriptor */
+ xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num);
+
+ /* Set destination address */
+ desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_DR_BIT);
+ desc1->m3 |= cpu_to_le64(*dst);
+
+ /* We have multiple source addresses, so need to set NV bit*/
+ desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT);
+
+ /* Set flyby opcode */
+ desc1->m2 |= cpu_to_le64(xgene_dma_encode_xor_flyby(src_cnt));
+
+ /* Set 1st to 5th source addresses */
+ for (i = 0; i < src_cnt; i++) {
+ len = *nbytes;
+ xgene_dma_set_src_buffer((i == 0) ? &desc1->m1 :
+ xgene_dma_lookup_ext8(desc2, i - 1),
+ &len, &src[i]);
+ desc1->m2 |= cpu_to_le64((scf[i] << ((i + 1) * 8)));
+ }
+
+ /* Update meta data */
+ *nbytes = len;
+ *dst += XGENE_DMA_MAX_BYTE_CNT;
+
+ /* We need always 64B descriptor to perform xor or pq operations */
+ desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC;
+}
+
+static dma_cookie_t xgene_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct xgene_dma_desc_sw *desc;
+ struct xgene_dma_chan *chan;
+ dma_cookie_t cookie;
+
+ if (unlikely(!tx))
+ return -EINVAL;
+
+ chan = to_dma_chan(tx->chan);
+ desc = to_dma_desc_sw(tx);
+
+ spin_lock_bh(&chan->lock);
+
+ cookie = dma_cookie_assign(tx);
+
+ /* Add this transaction list onto the tail of the pending queue */
+ list_splice_tail_init(&desc->tx_list, &chan->ld_pending);
+
+ spin_unlock_bh(&chan->lock);
+
+ return cookie;
+}
+
+static void xgene_dma_clean_descriptor(struct xgene_dma_chan *chan,
+ struct xgene_dma_desc_sw *desc)
+{
+ list_del(&desc->node);
+ chan_dbg(chan, "LD %p free\n", desc);
+ dma_pool_free(chan->desc_pool, desc, desc->tx.phys);
+}
+
+static struct xgene_dma_desc_sw *xgene_dma_alloc_descriptor(
+ struct xgene_dma_chan *chan)
+{
+ struct xgene_dma_desc_sw *desc;
+ dma_addr_t phys;
+
+ desc = dma_pool_zalloc(chan->desc_pool, GFP_NOWAIT, &phys);
+ if (!desc) {
+ chan_err(chan, "Failed to allocate LDs\n");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&desc->tx_list);
+ desc->tx.phys = phys;
+ desc->tx.tx_submit = xgene_dma_tx_submit;
+ dma_async_tx_descriptor_init(&desc->tx, &chan->dma_chan);
+
+ chan_dbg(chan, "LD %p allocated\n", desc);
+
+ return desc;
+}
+
+/**
+ * xgene_dma_clean_completed_descriptor - free all descriptors which
+ * has been completed and acked
+ * @chan: X-Gene DMA channel
+ *
+ * This function is used on all completed and acked descriptors.
+ */
+static void xgene_dma_clean_completed_descriptor(struct xgene_dma_chan *chan)
+{
+ struct xgene_dma_desc_sw *desc, *_desc;
+
+ /* Run the callback for each descriptor, in order */
+ list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node) {
+ if (async_tx_test_ack(&desc->tx))
+ xgene_dma_clean_descriptor(chan, desc);
+ }
+}
+
+/**
+ * xgene_dma_run_tx_complete_actions - cleanup a single link descriptor
+ * @chan: X-Gene DMA channel
+ * @desc: descriptor to cleanup and free
+ *
+ * This function is used on a descriptor which has been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies.
+ */
+static void xgene_dma_run_tx_complete_actions(struct xgene_dma_chan *chan,
+ struct xgene_dma_desc_sw *desc)
+{
+ struct dma_async_tx_descriptor *tx = &desc->tx;
+
+ /*
+ * If this is not the last transaction in the group,
+ * then no need to complete cookie and run any callback as
+ * this is not the tx_descriptor which had been sent to caller
+ * of this DMA request
+ */
+
+ if (tx->cookie == 0)
+ return;
+
+ dma_cookie_complete(tx);
+ dma_descriptor_unmap(tx);
+
+ /* Run the link descriptor callback function */
+ dmaengine_desc_get_callback_invoke(tx, NULL);
+
+ /* Run any dependencies */
+ dma_run_dependencies(tx);
+}
+
+/**
+ * xgene_dma_clean_running_descriptor - move the completed descriptor from
+ * ld_running to ld_completed
+ * @chan: X-Gene DMA channel
+ * @desc: the descriptor which is completed
+ *
+ * Free the descriptor directly if acked by async_tx api,
+ * else move it to queue ld_completed.
+ */
+static void xgene_dma_clean_running_descriptor(struct xgene_dma_chan *chan,
+ struct xgene_dma_desc_sw *desc)
+{
+ /* Remove from the list of running transactions */
+ list_del(&desc->node);
+
+ /*
+ * the client is allowed to attach dependent operations
+ * until 'ack' is set
+ */
+ if (!async_tx_test_ack(&desc->tx)) {
+ /*
+ * Move this descriptor to the list of descriptors which is
+ * completed, but still awaiting the 'ack' bit to be set.
+ */
+ list_add_tail(&desc->node, &chan->ld_completed);
+ return;
+ }
+
+ chan_dbg(chan, "LD %p free\n", desc);
+ dma_pool_free(chan->desc_pool, desc, desc->tx.phys);
+}
+
+static void xgene_chan_xfer_request(struct xgene_dma_chan *chan,
+ struct xgene_dma_desc_sw *desc_sw)
+{
+ struct xgene_dma_ring *ring = &chan->tx_ring;
+ struct xgene_dma_desc_hw *desc_hw;
+
+ /* Get hw descriptor from DMA tx ring */
+ desc_hw = &ring->desc_hw[ring->head];
+
+ /*
+ * Increment the head count to point next
+ * descriptor for next time
+ */
+ if (++ring->head == ring->slots)
+ ring->head = 0;
+
+ /* Copy prepared sw descriptor data to hw descriptor */
+ memcpy(desc_hw, &desc_sw->desc1, sizeof(*desc_hw));
+
+ /*
+ * Check if we have prepared 64B descriptor,
+ * in this case we need one more hw descriptor
+ */
+ if (desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) {
+ desc_hw = &ring->desc_hw[ring->head];
+
+ if (++ring->head == ring->slots)
+ ring->head = 0;
+
+ memcpy(desc_hw, &desc_sw->desc2, sizeof(*desc_hw));
+ }
+
+ /* Increment the pending transaction count */
+ chan->pending += ((desc_sw->flags &
+ XGENE_DMA_FLAG_64B_DESC) ? 2 : 1);
+
+ /* Notify the hw that we have descriptor ready for execution */
+ iowrite32((desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) ?
+ 2 : 1, ring->cmd);
+}
+
+/**
+ * xgene_chan_xfer_ld_pending - push any pending transactions to hw
+ * @chan : X-Gene DMA channel
+ *
+ * LOCKING: must hold chan->lock
+ */
+static void xgene_chan_xfer_ld_pending(struct xgene_dma_chan *chan)
+{
+ struct xgene_dma_desc_sw *desc_sw, *_desc_sw;
+
+ /*
+ * If the list of pending descriptors is empty, then we
+ * don't need to do any work at all
+ */
+ if (list_empty(&chan->ld_pending)) {
+ chan_dbg(chan, "No pending LDs\n");
+ return;
+ }
+
+ /*
+ * Move elements from the queue of pending transactions onto the list
+ * of running transactions and push it to hw for further executions
+ */
+ list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_pending, node) {
+ /*
+ * Check if have pushed max number of transactions to hw
+ * as capable, so let's stop here and will push remaining
+ * elements from pening ld queue after completing some
+ * descriptors that we have already pushed
+ */
+ if (chan->pending >= chan->max_outstanding)
+ return;
+
+ xgene_chan_xfer_request(chan, desc_sw);
+
+ /*
+ * Delete this element from ld pending queue and append it to
+ * ld running queue
+ */
+ list_move_tail(&desc_sw->node, &chan->ld_running);
+ }
+}
+
+/**
+ * xgene_dma_cleanup_descriptors - cleanup link descriptors which are completed
+ * and move them to ld_completed to free until flag 'ack' is set
+ * @chan: X-Gene DMA channel
+ *
+ * This function is used on descriptors which have been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies, then
+ * free these descriptors if flag 'ack' is set.
+ */
+static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan)
+{
+ struct xgene_dma_ring *ring = &chan->rx_ring;
+ struct xgene_dma_desc_sw *desc_sw, *_desc_sw;
+ struct xgene_dma_desc_hw *desc_hw;
+ struct list_head ld_completed;
+ u8 status;
+
+ INIT_LIST_HEAD(&ld_completed);
+
+ spin_lock(&chan->lock);
+
+ /* Clean already completed and acked descriptors */
+ xgene_dma_clean_completed_descriptor(chan);
+
+ /* Move all completed descriptors to ld completed queue, in order */
+ list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_running, node) {
+ /* Get subsequent hw descriptor from DMA rx ring */
+ desc_hw = &ring->desc_hw[ring->head];
+
+ /* Check if this descriptor has been completed */
+ if (unlikely(le64_to_cpu(desc_hw->m0) ==
+ XGENE_DMA_DESC_EMPTY_SIGNATURE))
+ break;
+
+ if (++ring->head == ring->slots)
+ ring->head = 0;
+
+ /* Check if we have any error with DMA transactions */
+ status = XGENE_DMA_DESC_STATUS(
+ XGENE_DMA_DESC_ELERR_RD(le64_to_cpu(
+ desc_hw->m0)),
+ XGENE_DMA_DESC_LERR_RD(le64_to_cpu(
+ desc_hw->m0)));
+ if (status) {
+ /* Print the DMA error type */
+ chan_err(chan, "%s\n", xgene_dma_desc_err[status]);
+
+ /*
+ * We have DMA transactions error here. Dump DMA Tx
+ * and Rx descriptors for this request */
+ XGENE_DMA_DESC_DUMP(&desc_sw->desc1,
+ "X-Gene DMA TX DESC1: ");
+
+ if (desc_sw->flags & XGENE_DMA_FLAG_64B_DESC)
+ XGENE_DMA_DESC_DUMP(&desc_sw->desc2,
+ "X-Gene DMA TX DESC2: ");
+
+ XGENE_DMA_DESC_DUMP(desc_hw,
+ "X-Gene DMA RX ERR DESC: ");
+ }
+
+ /* Notify the hw about this completed descriptor */
+ iowrite32(-1, ring->cmd);
+
+ /* Mark this hw descriptor as processed */
+ desc_hw->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE);
+
+ /*
+ * Decrement the pending transaction count
+ * as we have processed one
+ */
+ chan->pending -= ((desc_sw->flags &
+ XGENE_DMA_FLAG_64B_DESC) ? 2 : 1);
+
+ /*
+ * Delete this node from ld running queue and append it to
+ * ld completed queue for further processing
+ */
+ list_move_tail(&desc_sw->node, &ld_completed);
+ }
+
+ /*
+ * Start any pending transactions automatically
+ * In the ideal case, we keep the DMA controller busy while we go
+ * ahead and free the descriptors below.
+ */
+ xgene_chan_xfer_ld_pending(chan);
+
+ spin_unlock(&chan->lock);
+
+ /* Run the callback for each descriptor, in order */
+ list_for_each_entry_safe(desc_sw, _desc_sw, &ld_completed, node) {
+ xgene_dma_run_tx_complete_actions(chan, desc_sw);
+ xgene_dma_clean_running_descriptor(chan, desc_sw);
+ }
+}
+
+static int xgene_dma_alloc_chan_resources(struct dma_chan *dchan)
+{
+ struct xgene_dma_chan *chan = to_dma_chan(dchan);
+
+ /* Has this channel already been allocated? */
+ if (chan->desc_pool)
+ return 1;
+
+ chan->desc_pool = dma_pool_create(chan->name, chan->dev,
+ sizeof(struct xgene_dma_desc_sw),
+ 0, 0);
+ if (!chan->desc_pool) {
+ chan_err(chan, "Failed to allocate descriptor pool\n");
+ return -ENOMEM;
+ }
+
+ chan_dbg(chan, "Allocate descriptor pool\n");
+
+ return 1;
+}
+
+/**
+ * xgene_dma_free_desc_list - Free all descriptors in a queue
+ * @chan: X-Gene DMA channel
+ * @list: the list to free
+ *
+ * LOCKING: must hold chan->lock
+ */
+static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan,
+ struct list_head *list)
+{
+ struct xgene_dma_desc_sw *desc, *_desc;
+
+ list_for_each_entry_safe(desc, _desc, list, node)
+ xgene_dma_clean_descriptor(chan, desc);
+}
+
+static void xgene_dma_free_chan_resources(struct dma_chan *dchan)
+{
+ struct xgene_dma_chan *chan = to_dma_chan(dchan);
+
+ chan_dbg(chan, "Free all resources\n");
+
+ if (!chan->desc_pool)
+ return;
+
+ /* Process all running descriptor */
+ xgene_dma_cleanup_descriptors(chan);
+
+ spin_lock_bh(&chan->lock);
+
+ /* Clean all link descriptor queues */
+ xgene_dma_free_desc_list(chan, &chan->ld_pending);
+ xgene_dma_free_desc_list(chan, &chan->ld_running);
+ xgene_dma_free_desc_list(chan, &chan->ld_completed);
+
+ spin_unlock_bh(&chan->lock);
+
+ /* Delete this channel DMA pool */
+ dma_pool_destroy(chan->desc_pool);
+ chan->desc_pool = NULL;
+}
+
+static struct dma_async_tx_descriptor *xgene_dma_prep_xor(
+ struct dma_chan *dchan, dma_addr_t dst, dma_addr_t *src,
+ u32 src_cnt, size_t len, unsigned long flags)
+{
+ struct xgene_dma_desc_sw *first = NULL, *new;
+ struct xgene_dma_chan *chan;
+ static u8 multi[XGENE_DMA_MAX_XOR_SRC] = {
+ 0x01, 0x01, 0x01, 0x01, 0x01};
+
+ if (unlikely(!dchan || !len))
+ return NULL;
+
+ chan = to_dma_chan(dchan);
+
+ do {
+ /* Allocate the link descriptor from DMA pool */
+ new = xgene_dma_alloc_descriptor(chan);
+ if (!new)
+ goto fail;
+
+ /* Prepare xor DMA descriptor */
+ xgene_dma_prep_xor_desc(chan, new, &dst, src,
+ src_cnt, &len, multi);
+
+ if (!first)
+ first = new;
+
+ new->tx.cookie = 0;
+ async_tx_ack(&new->tx);
+
+ /* Insert the link descriptor to the LD ring */
+ list_add_tail(&new->node, &first->tx_list);
+ } while (len);
+
+ new->tx.flags = flags; /* client is in control of this ack */
+ new->tx.cookie = -EBUSY;
+ list_splice(&first->tx_list, &new->tx_list);
+
+ return &new->tx;
+
+fail:
+ if (!first)
+ return NULL;
+
+ xgene_dma_free_desc_list(chan, &first->tx_list);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *xgene_dma_prep_pq(
+ struct dma_chan *dchan, dma_addr_t *dst, dma_addr_t *src,
+ u32 src_cnt, const u8 *scf, size_t len, unsigned long flags)
+{
+ struct xgene_dma_desc_sw *first = NULL, *new;
+ struct xgene_dma_chan *chan;
+ size_t _len = len;
+ dma_addr_t _src[XGENE_DMA_MAX_XOR_SRC];
+ static u8 multi[XGENE_DMA_MAX_XOR_SRC] = {0x01, 0x01, 0x01, 0x01, 0x01};
+
+ if (unlikely(!dchan || !len))
+ return NULL;
+
+ chan = to_dma_chan(dchan);
+
+ /*
+ * Save source addresses on local variable, may be we have to
+ * prepare two descriptor to generate P and Q if both enabled
+ * in the flags by client
+ */
+ memcpy(_src, src, sizeof(*src) * src_cnt);
+
+ if (flags & DMA_PREP_PQ_DISABLE_P)
+ len = 0;
+
+ if (flags & DMA_PREP_PQ_DISABLE_Q)
+ _len = 0;
+
+ do {
+ /* Allocate the link descriptor from DMA pool */
+ new = xgene_dma_alloc_descriptor(chan);
+ if (!new)
+ goto fail;
+
+ if (!first)
+ first = new;
+
+ new->tx.cookie = 0;
+ async_tx_ack(&new->tx);
+
+ /* Insert the link descriptor to the LD ring */
+ list_add_tail(&new->node, &first->tx_list);
+
+ /*
+ * Prepare DMA descriptor to generate P,
+ * if DMA_PREP_PQ_DISABLE_P flag is not set
+ */
+ if (len) {
+ xgene_dma_prep_xor_desc(chan, new, &dst[0], src,
+ src_cnt, &len, multi);
+ continue;
+ }
+
+ /*
+ * Prepare DMA descriptor to generate Q,
+ * if DMA_PREP_PQ_DISABLE_Q flag is not set
+ */
+ if (_len) {
+ xgene_dma_prep_xor_desc(chan, new, &dst[1], _src,
+ src_cnt, &_len, scf);
+ }
+ } while (len || _len);
+
+ new->tx.flags = flags; /* client is in control of this ack */
+ new->tx.cookie = -EBUSY;
+ list_splice(&first->tx_list, &new->tx_list);
+
+ return &new->tx;
+
+fail:
+ if (!first)
+ return NULL;
+
+ xgene_dma_free_desc_list(chan, &first->tx_list);
+ return NULL;
+}
+
+static void xgene_dma_issue_pending(struct dma_chan *dchan)
+{
+ struct xgene_dma_chan *chan = to_dma_chan(dchan);
+
+ spin_lock_bh(&chan->lock);
+ xgene_chan_xfer_ld_pending(chan);
+ spin_unlock_bh(&chan->lock);
+}
+
+static enum dma_status xgene_dma_tx_status(struct dma_chan *dchan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ return dma_cookie_status(dchan, cookie, txstate);
+}
+
+static void xgene_dma_tasklet_cb(struct tasklet_struct *t)
+{
+ struct xgene_dma_chan *chan = from_tasklet(chan, t, tasklet);
+
+ /* Run all cleanup for descriptors which have been completed */
+ xgene_dma_cleanup_descriptors(chan);
+
+ /* Re-enable DMA channel IRQ */
+ enable_irq(chan->rx_irq);
+}
+
+static irqreturn_t xgene_dma_chan_ring_isr(int irq, void *id)
+{
+ struct xgene_dma_chan *chan = (struct xgene_dma_chan *)id;
+
+ BUG_ON(!chan);
+
+ /*
+ * Disable DMA channel IRQ until we process completed
+ * descriptors
+ */
+ disable_irq_nosync(chan->rx_irq);
+
+ /*
+ * Schedule the tasklet to handle all cleanup of the current
+ * transaction. It will start a new transaction if there is
+ * one pending.
+ */
+ tasklet_schedule(&chan->tasklet);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t xgene_dma_err_isr(int irq, void *id)
+{
+ struct xgene_dma *pdma = (struct xgene_dma *)id;
+ unsigned long int_mask;
+ u32 val, i;
+
+ val = ioread32(pdma->csr_dma + XGENE_DMA_INT);
+
+ /* Clear DMA interrupts */
+ iowrite32(val, pdma->csr_dma + XGENE_DMA_INT);
+
+ /* Print DMA error info */
+ int_mask = val >> XGENE_DMA_INT_MASK_SHIFT;
+ for_each_set_bit(i, &int_mask, ARRAY_SIZE(xgene_dma_err))
+ dev_err(pdma->dev,
+ "Interrupt status 0x%08X %s\n", val, xgene_dma_err[i]);
+
+ return IRQ_HANDLED;
+}
+
+static void xgene_dma_wr_ring_state(struct xgene_dma_ring *ring)
+{
+ int i;
+
+ iowrite32(ring->num, ring->pdma->csr_ring + XGENE_DMA_RING_STATE);
+
+ for (i = 0; i < XGENE_DMA_RING_NUM_CONFIG; i++)
+ iowrite32(ring->state[i], ring->pdma->csr_ring +
+ XGENE_DMA_RING_STATE_WR_BASE + (i * 4));
+}
+
+static void xgene_dma_clr_ring_state(struct xgene_dma_ring *ring)
+{
+ memset(ring->state, 0, sizeof(u32) * XGENE_DMA_RING_NUM_CONFIG);
+ xgene_dma_wr_ring_state(ring);
+}
+
+static void xgene_dma_setup_ring(struct xgene_dma_ring *ring)
+{
+ void *ring_cfg = ring->state;
+ u64 addr = ring->desc_paddr;
+ u32 i, val;
+
+ ring->slots = ring->size / XGENE_DMA_RING_WQ_DESC_SIZE;
+
+ /* Clear DMA ring state */
+ xgene_dma_clr_ring_state(ring);
+
+ /* Set DMA ring type */
+ XGENE_DMA_RING_TYPE_SET(ring_cfg, XGENE_DMA_RING_TYPE_REGULAR);
+
+ if (ring->owner == XGENE_DMA_RING_OWNER_DMA) {
+ /* Set recombination buffer and timeout */
+ XGENE_DMA_RING_RECOMBBUF_SET(ring_cfg);
+ XGENE_DMA_RING_RECOMTIMEOUTL_SET(ring_cfg);
+ XGENE_DMA_RING_RECOMTIMEOUTH_SET(ring_cfg);
+ }
+
+ /* Initialize DMA ring state */
+ XGENE_DMA_RING_SELTHRSH_SET(ring_cfg);
+ XGENE_DMA_RING_ACCEPTLERR_SET(ring_cfg);
+ XGENE_DMA_RING_COHERENT_SET(ring_cfg);
+ XGENE_DMA_RING_ADDRL_SET(ring_cfg, addr);
+ XGENE_DMA_RING_ADDRH_SET(ring_cfg, addr);
+ XGENE_DMA_RING_SIZE_SET(ring_cfg, ring->cfgsize);
+
+ /* Write DMA ring configurations */
+ xgene_dma_wr_ring_state(ring);
+
+ /* Set DMA ring id */
+ iowrite32(XGENE_DMA_RING_ID_SETUP(ring->id),
+ ring->pdma->csr_ring + XGENE_DMA_RING_ID);
+
+ /* Set DMA ring buffer */
+ iowrite32(XGENE_DMA_RING_ID_BUF_SETUP(ring->num),
+ ring->pdma->csr_ring + XGENE_DMA_RING_ID_BUF);
+
+ if (ring->owner != XGENE_DMA_RING_OWNER_CPU)
+ return;
+
+ /* Set empty signature to DMA Rx ring descriptors */
+ for (i = 0; i < ring->slots; i++) {
+ struct xgene_dma_desc_hw *desc;
+
+ desc = &ring->desc_hw[i];
+ desc->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE);
+ }
+
+ /* Enable DMA Rx ring interrupt */
+ val = ioread32(ring->pdma->csr_ring + XGENE_DMA_RING_NE_INT_MODE);
+ XGENE_DMA_RING_NE_INT_MODE_SET(val, ring->buf_num);
+ iowrite32(val, ring->pdma->csr_ring + XGENE_DMA_RING_NE_INT_MODE);
+}
+
+static void xgene_dma_clear_ring(struct xgene_dma_ring *ring)
+{
+ u32 ring_id, val;
+
+ if (ring->owner == XGENE_DMA_RING_OWNER_CPU) {
+ /* Disable DMA Rx ring interrupt */
+ val = ioread32(ring->pdma->csr_ring +
+ XGENE_DMA_RING_NE_INT_MODE);
+ XGENE_DMA_RING_NE_INT_MODE_RESET(val, ring->buf_num);
+ iowrite32(val, ring->pdma->csr_ring +
+ XGENE_DMA_RING_NE_INT_MODE);
+ }
+
+ /* Clear DMA ring state */
+ ring_id = XGENE_DMA_RING_ID_SETUP(ring->id);
+ iowrite32(ring_id, ring->pdma->csr_ring + XGENE_DMA_RING_ID);
+
+ iowrite32(0, ring->pdma->csr_ring + XGENE_DMA_RING_ID_BUF);
+ xgene_dma_clr_ring_state(ring);
+}
+
+static void xgene_dma_set_ring_cmd(struct xgene_dma_ring *ring)
+{
+ ring->cmd_base = ring->pdma->csr_ring_cmd +
+ XGENE_DMA_RING_CMD_BASE_OFFSET((ring->num -
+ XGENE_DMA_RING_NUM));
+
+ ring->cmd = ring->cmd_base + XGENE_DMA_RING_CMD_OFFSET;
+}
+
+static int xgene_dma_get_ring_size(struct xgene_dma_chan *chan,
+ enum xgene_dma_ring_cfgsize cfgsize)
+{
+ int size;
+
+ switch (cfgsize) {
+ case XGENE_DMA_RING_CFG_SIZE_512B:
+ size = 0x200;
+ break;
+ case XGENE_DMA_RING_CFG_SIZE_2KB:
+ size = 0x800;
+ break;
+ case XGENE_DMA_RING_CFG_SIZE_16KB:
+ size = 0x4000;
+ break;
+ case XGENE_DMA_RING_CFG_SIZE_64KB:
+ size = 0x10000;
+ break;
+ case XGENE_DMA_RING_CFG_SIZE_512KB:
+ size = 0x80000;
+ break;
+ default:
+ chan_err(chan, "Unsupported cfg ring size %d\n", cfgsize);
+ return -EINVAL;
+ }
+
+ return size;
+}
+
+static void xgene_dma_delete_ring_one(struct xgene_dma_ring *ring)
+{
+ /* Clear DMA ring configurations */
+ xgene_dma_clear_ring(ring);
+
+ /* De-allocate DMA ring descriptor */
+ if (ring->desc_vaddr) {
+ dma_free_coherent(ring->pdma->dev, ring->size,
+ ring->desc_vaddr, ring->desc_paddr);
+ ring->desc_vaddr = NULL;
+ }
+}
+
+static void xgene_dma_delete_chan_rings(struct xgene_dma_chan *chan)
+{
+ xgene_dma_delete_ring_one(&chan->rx_ring);
+ xgene_dma_delete_ring_one(&chan->tx_ring);
+}
+
+static int xgene_dma_create_ring_one(struct xgene_dma_chan *chan,
+ struct xgene_dma_ring *ring,
+ enum xgene_dma_ring_cfgsize cfgsize)
+{
+ int ret;
+
+ /* Setup DMA ring descriptor variables */
+ ring->pdma = chan->pdma;
+ ring->cfgsize = cfgsize;
+ ring->num = chan->pdma->ring_num++;
+ ring->id = XGENE_DMA_RING_ID_GET(ring->owner, ring->buf_num);
+
+ ret = xgene_dma_get_ring_size(chan, cfgsize);
+ if (ret <= 0)
+ return ret;
+ ring->size = ret;
+
+ /* Allocate memory for DMA ring descriptor */
+ ring->desc_vaddr = dma_alloc_coherent(chan->dev, ring->size,
+ &ring->desc_paddr, GFP_KERNEL);
+ if (!ring->desc_vaddr) {
+ chan_err(chan, "Failed to allocate ring desc\n");
+ return -ENOMEM;
+ }
+
+ /* Configure and enable DMA ring */
+ xgene_dma_set_ring_cmd(ring);
+ xgene_dma_setup_ring(ring);
+
+ return 0;
+}
+
+static int xgene_dma_create_chan_rings(struct xgene_dma_chan *chan)
+{
+ struct xgene_dma_ring *rx_ring = &chan->rx_ring;
+ struct xgene_dma_ring *tx_ring = &chan->tx_ring;
+ int ret;
+
+ /* Create DMA Rx ring descriptor */
+ rx_ring->owner = XGENE_DMA_RING_OWNER_CPU;
+ rx_ring->buf_num = XGENE_DMA_CPU_BUFNUM + chan->id;
+
+ ret = xgene_dma_create_ring_one(chan, rx_ring,
+ XGENE_DMA_RING_CFG_SIZE_64KB);
+ if (ret)
+ return ret;
+
+ chan_dbg(chan, "Rx ring id 0x%X num %d desc 0x%p\n",
+ rx_ring->id, rx_ring->num, rx_ring->desc_vaddr);
+
+ /* Create DMA Tx ring descriptor */
+ tx_ring->owner = XGENE_DMA_RING_OWNER_DMA;
+ tx_ring->buf_num = XGENE_DMA_BUFNUM + chan->id;
+
+ ret = xgene_dma_create_ring_one(chan, tx_ring,
+ XGENE_DMA_RING_CFG_SIZE_64KB);
+ if (ret) {
+ xgene_dma_delete_ring_one(rx_ring);
+ return ret;
+ }
+
+ tx_ring->dst_ring_num = XGENE_DMA_RING_DST_ID(rx_ring->num);
+
+ chan_dbg(chan,
+ "Tx ring id 0x%X num %d desc 0x%p\n",
+ tx_ring->id, tx_ring->num, tx_ring->desc_vaddr);
+
+ /* Set the max outstanding request possible to this channel */
+ chan->max_outstanding = tx_ring->slots;
+
+ return ret;
+}
+
+static int xgene_dma_init_rings(struct xgene_dma *pdma)
+{
+ int ret, i, j;
+
+ for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+ ret = xgene_dma_create_chan_rings(&pdma->chan[i]);
+ if (ret) {
+ for (j = 0; j < i; j++)
+ xgene_dma_delete_chan_rings(&pdma->chan[j]);
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+static void xgene_dma_enable(struct xgene_dma *pdma)
+{
+ u32 val;
+
+ /* Configure and enable DMA engine */
+ val = ioread32(pdma->csr_dma + XGENE_DMA_GCR);
+ XGENE_DMA_CH_SETUP(val);
+ XGENE_DMA_ENABLE(val);
+ iowrite32(val, pdma->csr_dma + XGENE_DMA_GCR);
+}
+
+static void xgene_dma_disable(struct xgene_dma *pdma)
+{
+ u32 val;
+
+ val = ioread32(pdma->csr_dma + XGENE_DMA_GCR);
+ XGENE_DMA_DISABLE(val);
+ iowrite32(val, pdma->csr_dma + XGENE_DMA_GCR);
+}
+
+static void xgene_dma_mask_interrupts(struct xgene_dma *pdma)
+{
+ /*
+ * Mask DMA ring overflow, underflow and
+ * AXI write/read error interrupts
+ */
+ iowrite32(XGENE_DMA_INT_ALL_MASK,
+ pdma->csr_dma + XGENE_DMA_RING_INT0_MASK);
+ iowrite32(XGENE_DMA_INT_ALL_MASK,
+ pdma->csr_dma + XGENE_DMA_RING_INT1_MASK);
+ iowrite32(XGENE_DMA_INT_ALL_MASK,
+ pdma->csr_dma + XGENE_DMA_RING_INT2_MASK);
+ iowrite32(XGENE_DMA_INT_ALL_MASK,
+ pdma->csr_dma + XGENE_DMA_RING_INT3_MASK);
+ iowrite32(XGENE_DMA_INT_ALL_MASK,
+ pdma->csr_dma + XGENE_DMA_RING_INT4_MASK);
+
+ /* Mask DMA error interrupts */
+ iowrite32(XGENE_DMA_INT_ALL_MASK, pdma->csr_dma + XGENE_DMA_INT_MASK);
+}
+
+static void xgene_dma_unmask_interrupts(struct xgene_dma *pdma)
+{
+ /*
+ * Unmask DMA ring overflow, underflow and
+ * AXI write/read error interrupts
+ */
+ iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+ pdma->csr_dma + XGENE_DMA_RING_INT0_MASK);
+ iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+ pdma->csr_dma + XGENE_DMA_RING_INT1_MASK);
+ iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+ pdma->csr_dma + XGENE_DMA_RING_INT2_MASK);
+ iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+ pdma->csr_dma + XGENE_DMA_RING_INT3_MASK);
+ iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+ pdma->csr_dma + XGENE_DMA_RING_INT4_MASK);
+
+ /* Unmask DMA error interrupts */
+ iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+ pdma->csr_dma + XGENE_DMA_INT_MASK);
+}
+
+static void xgene_dma_init_hw(struct xgene_dma *pdma)
+{
+ u32 val;
+
+ /* Associate DMA ring to corresponding ring HW */
+ iowrite32(XGENE_DMA_ASSOC_RING_MNGR1,
+ pdma->csr_dma + XGENE_DMA_CFG_RING_WQ_ASSOC);
+
+ /* Configure RAID6 polynomial control setting */
+ if (is_pq_enabled(pdma))
+ iowrite32(XGENE_DMA_RAID6_MULTI_CTRL(0x1D),
+ pdma->csr_dma + XGENE_DMA_RAID6_CONT);
+ else
+ dev_info(pdma->dev, "PQ is disabled in HW\n");
+
+ xgene_dma_enable(pdma);
+ xgene_dma_unmask_interrupts(pdma);
+
+ /* Get DMA id and version info */
+ val = ioread32(pdma->csr_dma + XGENE_DMA_IPBRR);
+
+ /* DMA device info */
+ dev_info(pdma->dev,
+ "X-Gene DMA v%d.%02d.%02d driver registered %d channels",
+ XGENE_DMA_REV_NO_RD(val), XGENE_DMA_BUS_ID_RD(val),
+ XGENE_DMA_DEV_ID_RD(val), XGENE_DMA_MAX_CHANNEL);
+}
+
+static int xgene_dma_init_ring_mngr(struct xgene_dma *pdma)
+{
+ if (ioread32(pdma->csr_ring + XGENE_DMA_RING_CLKEN) &&
+ (!ioread32(pdma->csr_ring + XGENE_DMA_RING_SRST)))
+ return 0;
+
+ iowrite32(0x3, pdma->csr_ring + XGENE_DMA_RING_CLKEN);
+ iowrite32(0x0, pdma->csr_ring + XGENE_DMA_RING_SRST);
+
+ /* Bring up memory */
+ iowrite32(0x0, pdma->csr_ring + XGENE_DMA_RING_MEM_RAM_SHUTDOWN);
+
+ /* Force a barrier */
+ ioread32(pdma->csr_ring + XGENE_DMA_RING_MEM_RAM_SHUTDOWN);
+
+ /* reset may take up to 1ms */
+ usleep_range(1000, 1100);
+
+ if (ioread32(pdma->csr_ring + XGENE_DMA_RING_BLK_MEM_RDY)
+ != XGENE_DMA_RING_BLK_MEM_RDY_VAL) {
+ dev_err(pdma->dev,
+ "Failed to release ring mngr memory from shutdown\n");
+ return -ENODEV;
+ }
+
+ /* program threshold set 1 and all hysteresis */
+ iowrite32(XGENE_DMA_RING_THRESLD0_SET1_VAL,
+ pdma->csr_ring + XGENE_DMA_RING_THRESLD0_SET1);
+ iowrite32(XGENE_DMA_RING_THRESLD1_SET1_VAL,
+ pdma->csr_ring + XGENE_DMA_RING_THRESLD1_SET1);
+ iowrite32(XGENE_DMA_RING_HYSTERESIS_VAL,
+ pdma->csr_ring + XGENE_DMA_RING_HYSTERESIS);
+
+ /* Enable QPcore and assign error queue */
+ iowrite32(XGENE_DMA_RING_ENABLE,
+ pdma->csr_ring + XGENE_DMA_RING_CONFIG);
+
+ return 0;
+}
+
+static int xgene_dma_init_mem(struct xgene_dma *pdma)
+{
+ int ret;
+
+ ret = xgene_dma_init_ring_mngr(pdma);
+ if (ret)
+ return ret;
+
+ /* Bring up memory */
+ iowrite32(0x0, pdma->csr_dma + XGENE_DMA_MEM_RAM_SHUTDOWN);
+
+ /* Force a barrier */
+ ioread32(pdma->csr_dma + XGENE_DMA_MEM_RAM_SHUTDOWN);
+
+ /* reset may take up to 1ms */
+ usleep_range(1000, 1100);
+
+ if (ioread32(pdma->csr_dma + XGENE_DMA_BLK_MEM_RDY)
+ != XGENE_DMA_BLK_MEM_RDY_VAL) {
+ dev_err(pdma->dev,
+ "Failed to release DMA memory from shutdown\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static int xgene_dma_request_irqs(struct xgene_dma *pdma)
+{
+ struct xgene_dma_chan *chan;
+ int ret, i, j;
+
+ /* Register DMA error irq */
+ ret = devm_request_irq(pdma->dev, pdma->err_irq, xgene_dma_err_isr,
+ 0, "dma_error", pdma);
+ if (ret) {
+ dev_err(pdma->dev,
+ "Failed to register error IRQ %d\n", pdma->err_irq);
+ return ret;
+ }
+
+ /* Register DMA channel rx irq */
+ for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+ chan = &pdma->chan[i];
+ irq_set_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
+ ret = devm_request_irq(chan->dev, chan->rx_irq,
+ xgene_dma_chan_ring_isr,
+ 0, chan->name, chan);
+ if (ret) {
+ chan_err(chan, "Failed to register Rx IRQ %d\n",
+ chan->rx_irq);
+ devm_free_irq(pdma->dev, pdma->err_irq, pdma);
+
+ for (j = 0; j < i; j++) {
+ chan = &pdma->chan[i];
+ irq_clear_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
+ devm_free_irq(chan->dev, chan->rx_irq, chan);
+ }
+
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static void xgene_dma_free_irqs(struct xgene_dma *pdma)
+{
+ struct xgene_dma_chan *chan;
+ int i;
+
+ /* Free DMA device error irq */
+ devm_free_irq(pdma->dev, pdma->err_irq, pdma);
+
+ for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+ chan = &pdma->chan[i];
+ irq_clear_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
+ devm_free_irq(chan->dev, chan->rx_irq, chan);
+ }
+}
+
+static void xgene_dma_set_caps(struct xgene_dma_chan *chan,
+ struct dma_device *dma_dev)
+{
+ /* Initialize DMA device capability mask */
+ dma_cap_zero(dma_dev->cap_mask);
+
+ /* Set DMA device capability */
+
+ /* Basically here, the X-Gene SoC DMA engine channel 0 supports XOR
+ * and channel 1 supports XOR, PQ both. First thing here is we have
+ * mechanism in hw to enable/disable PQ/XOR supports on channel 1,
+ * we can make sure this by reading SoC Efuse register.
+ * Second thing, we have hw errata that if we run channel 0 and
+ * channel 1 simultaneously with executing XOR and PQ request,
+ * suddenly DMA engine hangs, So here we enable XOR on channel 0 only
+ * if XOR and PQ supports on channel 1 is disabled.
+ */
+ if ((chan->id == XGENE_DMA_PQ_CHANNEL) &&
+ is_pq_enabled(chan->pdma)) {
+ dma_cap_set(DMA_PQ, dma_dev->cap_mask);
+ dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+ } else if ((chan->id == XGENE_DMA_XOR_CHANNEL) &&
+ !is_pq_enabled(chan->pdma)) {
+ dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+ }
+
+ /* Set base and prep routines */
+ dma_dev->dev = chan->dev;
+ dma_dev->device_alloc_chan_resources = xgene_dma_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = xgene_dma_free_chan_resources;
+ dma_dev->device_issue_pending = xgene_dma_issue_pending;
+ dma_dev->device_tx_status = xgene_dma_tx_status;
+
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ dma_dev->device_prep_dma_xor = xgene_dma_prep_xor;
+ dma_dev->max_xor = XGENE_DMA_MAX_XOR_SRC;
+ dma_dev->xor_align = DMAENGINE_ALIGN_64_BYTES;
+ }
+
+ if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+ dma_dev->device_prep_dma_pq = xgene_dma_prep_pq;
+ dma_dev->max_pq = XGENE_DMA_MAX_XOR_SRC;
+ dma_dev->pq_align = DMAENGINE_ALIGN_64_BYTES;
+ }
+}
+
+static int xgene_dma_async_register(struct xgene_dma *pdma, int id)
+{
+ struct xgene_dma_chan *chan = &pdma->chan[id];
+ struct dma_device *dma_dev = &pdma->dma_dev[id];
+ int ret;
+
+ chan->dma_chan.device = dma_dev;
+
+ spin_lock_init(&chan->lock);
+ INIT_LIST_HEAD(&chan->ld_pending);
+ INIT_LIST_HEAD(&chan->ld_running);
+ INIT_LIST_HEAD(&chan->ld_completed);
+ tasklet_setup(&chan->tasklet, xgene_dma_tasklet_cb);
+
+ chan->pending = 0;
+ chan->desc_pool = NULL;
+ dma_cookie_init(&chan->dma_chan);
+
+ /* Setup dma device capabilities and prep routines */
+ xgene_dma_set_caps(chan, dma_dev);
+
+ /* Initialize DMA device list head */
+ INIT_LIST_HEAD(&dma_dev->channels);
+ list_add_tail(&chan->dma_chan.device_node, &dma_dev->channels);
+
+ /* Register with Linux async DMA framework*/
+ ret = dma_async_device_register(dma_dev);
+ if (ret) {
+ chan_err(chan, "Failed to register async device %d", ret);
+ tasklet_kill(&chan->tasklet);
+
+ return ret;
+ }
+
+ /* DMA capability info */
+ dev_info(pdma->dev,
+ "%s: CAPABILITY ( %s%s)\n", dma_chan_name(&chan->dma_chan),
+ dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "XOR " : "",
+ dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "PQ " : "");
+
+ return 0;
+}
+
+static int xgene_dma_init_async(struct xgene_dma *pdma)
+{
+ int ret, i, j;
+
+ for (i = 0; i < XGENE_DMA_MAX_CHANNEL ; i++) {
+ ret = xgene_dma_async_register(pdma, i);
+ if (ret) {
+ for (j = 0; j < i; j++) {
+ dma_async_device_unregister(&pdma->dma_dev[j]);
+ tasklet_kill(&pdma->chan[j].tasklet);
+ }
+
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+static void xgene_dma_async_unregister(struct xgene_dma *pdma)
+{
+ int i;
+
+ for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++)
+ dma_async_device_unregister(&pdma->dma_dev[i]);
+}
+
+static void xgene_dma_init_channels(struct xgene_dma *pdma)
+{
+ struct xgene_dma_chan *chan;
+ int i;
+
+ pdma->ring_num = XGENE_DMA_RING_NUM;
+
+ for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+ chan = &pdma->chan[i];
+ chan->dev = pdma->dev;
+ chan->pdma = pdma;
+ chan->id = i;
+ snprintf(chan->name, sizeof(chan->name), "dmachan%d", chan->id);
+ }
+}
+
+static int xgene_dma_get_resources(struct platform_device *pdev,
+ struct xgene_dma *pdma)
+{
+ struct resource *res;
+ int irq, i;
+
+ /* Get DMA csr region */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
+ dev_err(&pdev->dev, "Failed to get csr region\n");
+ return -ENXIO;
+ }
+
+ pdma->csr_dma = devm_ioremap(&pdev->dev, res->start,
+ resource_size(res));
+ if (!pdma->csr_dma) {
+ dev_err(&pdev->dev, "Failed to ioremap csr region");
+ return -ENOMEM;
+ }
+
+ /* Get DMA ring csr region */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!res) {
+ dev_err(&pdev->dev, "Failed to get ring csr region\n");
+ return -ENXIO;
+ }
+
+ pdma->csr_ring = devm_ioremap(&pdev->dev, res->start,
+ resource_size(res));
+ if (!pdma->csr_ring) {
+ dev_err(&pdev->dev, "Failed to ioremap ring csr region");
+ return -ENOMEM;
+ }
+
+ /* Get DMA ring cmd csr region */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+ if (!res) {
+ dev_err(&pdev->dev, "Failed to get ring cmd csr region\n");
+ return -ENXIO;
+ }
+
+ pdma->csr_ring_cmd = devm_ioremap(&pdev->dev, res->start,
+ resource_size(res));
+ if (!pdma->csr_ring_cmd) {
+ dev_err(&pdev->dev, "Failed to ioremap ring cmd csr region");
+ return -ENOMEM;
+ }
+
+ pdma->csr_ring_cmd += XGENE_DMA_RING_CMD_SM_OFFSET;
+
+ /* Get efuse csr region */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
+ if (!res) {
+ dev_err(&pdev->dev, "Failed to get efuse csr region\n");
+ return -ENXIO;
+ }
+
+ pdma->csr_efuse = devm_ioremap(&pdev->dev, res->start,
+ resource_size(res));
+ if (!pdma->csr_efuse) {
+ dev_err(&pdev->dev, "Failed to ioremap efuse csr region");
+ return -ENOMEM;
+ }
+
+ /* Get DMA error interrupt */
+ irq = platform_get_irq(pdev, 0);
+ if (irq <= 0)
+ return -ENXIO;
+
+ pdma->err_irq = irq;
+
+ /* Get DMA Rx ring descriptor interrupts for all DMA channels */
+ for (i = 1; i <= XGENE_DMA_MAX_CHANNEL; i++) {
+ irq = platform_get_irq(pdev, i);
+ if (irq <= 0)
+ return -ENXIO;
+
+ pdma->chan[i - 1].rx_irq = irq;
+ }
+
+ return 0;
+}
+
+static int xgene_dma_probe(struct platform_device *pdev)
+{
+ struct xgene_dma *pdma;
+ int ret, i;
+
+ pdma = devm_kzalloc(&pdev->dev, sizeof(*pdma), GFP_KERNEL);
+ if (!pdma)
+ return -ENOMEM;
+
+ pdma->dev = &pdev->dev;
+ platform_set_drvdata(pdev, pdma);
+
+ ret = xgene_dma_get_resources(pdev, pdma);
+ if (ret)
+ return ret;
+
+ pdma->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(pdma->clk) && !ACPI_COMPANION(&pdev->dev)) {
+ dev_err(&pdev->dev, "Failed to get clk\n");
+ return PTR_ERR(pdma->clk);
+ }
+
+ /* Enable clk before accessing registers */
+ if (!IS_ERR(pdma->clk)) {
+ ret = clk_prepare_enable(pdma->clk);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to enable clk %d\n", ret);
+ return ret;
+ }
+ }
+
+ /* Remove DMA RAM out of shutdown */
+ ret = xgene_dma_init_mem(pdma);
+ if (ret)
+ goto err_clk_enable;
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(42));
+ if (ret) {
+ dev_err(&pdev->dev, "No usable DMA configuration\n");
+ goto err_dma_mask;
+ }
+
+ /* Initialize DMA channels software state */
+ xgene_dma_init_channels(pdma);
+
+ /* Configue DMA rings */
+ ret = xgene_dma_init_rings(pdma);
+ if (ret)
+ goto err_clk_enable;
+
+ ret = xgene_dma_request_irqs(pdma);
+ if (ret)
+ goto err_request_irq;
+
+ /* Configure and enable DMA engine */
+ xgene_dma_init_hw(pdma);
+
+ /* Register DMA device with linux async framework */
+ ret = xgene_dma_init_async(pdma);
+ if (ret)
+ goto err_async_init;
+
+ return 0;
+
+err_async_init:
+ xgene_dma_free_irqs(pdma);
+
+err_request_irq:
+ for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++)
+ xgene_dma_delete_chan_rings(&pdma->chan[i]);
+
+err_dma_mask:
+err_clk_enable:
+ if (!IS_ERR(pdma->clk))
+ clk_disable_unprepare(pdma->clk);
+
+ return ret;
+}
+
+static int xgene_dma_remove(struct platform_device *pdev)
+{
+ struct xgene_dma *pdma = platform_get_drvdata(pdev);
+ struct xgene_dma_chan *chan;
+ int i;
+
+ xgene_dma_async_unregister(pdma);
+
+ /* Mask interrupts and disable DMA engine */
+ xgene_dma_mask_interrupts(pdma);
+ xgene_dma_disable(pdma);
+ xgene_dma_free_irqs(pdma);
+
+ for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+ chan = &pdma->chan[i];
+ tasklet_kill(&chan->tasklet);
+ xgene_dma_delete_chan_rings(chan);
+ }
+
+ if (!IS_ERR(pdma->clk))
+ clk_disable_unprepare(pdma->clk);
+
+ return 0;
+}
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id xgene_dma_acpi_match_ptr[] = {
+ {"APMC0D43", 0},
+ {},
+};
+MODULE_DEVICE_TABLE(acpi, xgene_dma_acpi_match_ptr);
+#endif
+
+static const struct of_device_id xgene_dma_of_match_ptr[] = {
+ {.compatible = "apm,xgene-storm-dma",},
+ {},
+};
+MODULE_DEVICE_TABLE(of, xgene_dma_of_match_ptr);
+
+static struct platform_driver xgene_dma_driver = {
+ .probe = xgene_dma_probe,
+ .remove = xgene_dma_remove,
+ .driver = {
+ .name = "X-Gene-DMA",
+ .of_match_table = xgene_dma_of_match_ptr,
+ .acpi_match_table = ACPI_PTR(xgene_dma_acpi_match_ptr),
+ },
+};
+
+module_platform_driver(xgene_dma_driver);
+
+MODULE_DESCRIPTION("APM X-Gene SoC DMA driver");
+MODULE_AUTHOR("Rameshwar Prasad Sahu <rsahu@apm.com>");
+MODULE_AUTHOR("Loc Ho <lho@apm.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
diff --git a/drivers/dma/xilinx/Makefile b/drivers/dma/xilinx/Makefile
new file mode 100644
index 000000000..767bb45f6
--- /dev/null
+++ b/drivers/dma/xilinx/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_XILINX_DMA) += xilinx_dma.o
+obj-$(CONFIG_XILINX_ZYNQMP_DMA) += zynqmp_dma.o
+obj-$(CONFIG_XILINX_ZYNQMP_DPDMA) += xilinx_dpdma.o
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
new file mode 100644
index 000000000..766017570
--- /dev/null
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -0,0 +1,3229 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * DMA driver for Xilinx Video DMA Engine
+ *
+ * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved.
+ *
+ * Based on the Freescale DMA driver.
+ *
+ * Description:
+ * The AXI Video Direct Memory Access (AXI VDMA) core is a soft Xilinx IP
+ * core that provides high-bandwidth direct memory access between memory
+ * and AXI4-Stream type video target peripherals. The core provides efficient
+ * two dimensional DMA operations with independent asynchronous read (S2MM)
+ * and write (MM2S) channel operation. It can be configured to have either
+ * one channel or two channels. If configured as two channels, one is to
+ * transmit to the video device (MM2S) and another is to receive from the
+ * video device (S2MM). Initialization, status, interrupt and management
+ * registers are accessed through an AXI4-Lite slave interface.
+ *
+ * The AXI Direct Memory Access (AXI DMA) core is a soft Xilinx IP core that
+ * provides high-bandwidth one dimensional direct memory access between memory
+ * and AXI4-Stream target peripherals. It supports one receive and one
+ * transmit channel, both of them optional at synthesis time.
+ *
+ * The AXI CDMA, is a soft IP, which provides high-bandwidth Direct Memory
+ * Access (DMA) between a memory-mapped source address and a memory-mapped
+ * destination address.
+ *
+ * The AXI Multichannel Direct Memory Access (AXI MCDMA) core is a soft
+ * Xilinx IP that provides high-bandwidth direct memory access between
+ * memory and AXI4-Stream target peripherals. It provides scatter gather
+ * (SG) interface with multiple channels independent configuration support.
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/dmapool.h>
+#include <linux/dma/xilinx_dma.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#include "../dmaengine.h"
+
+/* Register/Descriptor Offsets */
+#define XILINX_DMA_MM2S_CTRL_OFFSET 0x0000
+#define XILINX_DMA_S2MM_CTRL_OFFSET 0x0030
+#define XILINX_VDMA_MM2S_DESC_OFFSET 0x0050
+#define XILINX_VDMA_S2MM_DESC_OFFSET 0x00a0
+
+/* Control Registers */
+#define XILINX_DMA_REG_DMACR 0x0000
+#define XILINX_DMA_DMACR_DELAY_MAX 0xff
+#define XILINX_DMA_DMACR_DELAY_SHIFT 24
+#define XILINX_DMA_DMACR_FRAME_COUNT_MAX 0xff
+#define XILINX_DMA_DMACR_FRAME_COUNT_SHIFT 16
+#define XILINX_DMA_DMACR_ERR_IRQ BIT(14)
+#define XILINX_DMA_DMACR_DLY_CNT_IRQ BIT(13)
+#define XILINX_DMA_DMACR_FRM_CNT_IRQ BIT(12)
+#define XILINX_DMA_DMACR_MASTER_SHIFT 8
+#define XILINX_DMA_DMACR_FSYNCSRC_SHIFT 5
+#define XILINX_DMA_DMACR_FRAMECNT_EN BIT(4)
+#define XILINX_DMA_DMACR_GENLOCK_EN BIT(3)
+#define XILINX_DMA_DMACR_RESET BIT(2)
+#define XILINX_DMA_DMACR_CIRC_EN BIT(1)
+#define XILINX_DMA_DMACR_RUNSTOP BIT(0)
+#define XILINX_DMA_DMACR_FSYNCSRC_MASK GENMASK(6, 5)
+#define XILINX_DMA_DMACR_DELAY_MASK GENMASK(31, 24)
+#define XILINX_DMA_DMACR_FRAME_COUNT_MASK GENMASK(23, 16)
+#define XILINX_DMA_DMACR_MASTER_MASK GENMASK(11, 8)
+
+#define XILINX_DMA_REG_DMASR 0x0004
+#define XILINX_DMA_DMASR_EOL_LATE_ERR BIT(15)
+#define XILINX_DMA_DMASR_ERR_IRQ BIT(14)
+#define XILINX_DMA_DMASR_DLY_CNT_IRQ BIT(13)
+#define XILINX_DMA_DMASR_FRM_CNT_IRQ BIT(12)
+#define XILINX_DMA_DMASR_SOF_LATE_ERR BIT(11)
+#define XILINX_DMA_DMASR_SG_DEC_ERR BIT(10)
+#define XILINX_DMA_DMASR_SG_SLV_ERR BIT(9)
+#define XILINX_DMA_DMASR_EOF_EARLY_ERR BIT(8)
+#define XILINX_DMA_DMASR_SOF_EARLY_ERR BIT(7)
+#define XILINX_DMA_DMASR_DMA_DEC_ERR BIT(6)
+#define XILINX_DMA_DMASR_DMA_SLAVE_ERR BIT(5)
+#define XILINX_DMA_DMASR_DMA_INT_ERR BIT(4)
+#define XILINX_DMA_DMASR_SG_MASK BIT(3)
+#define XILINX_DMA_DMASR_IDLE BIT(1)
+#define XILINX_DMA_DMASR_HALTED BIT(0)
+#define XILINX_DMA_DMASR_DELAY_MASK GENMASK(31, 24)
+#define XILINX_DMA_DMASR_FRAME_COUNT_MASK GENMASK(23, 16)
+
+#define XILINX_DMA_REG_CURDESC 0x0008
+#define XILINX_DMA_REG_TAILDESC 0x0010
+#define XILINX_DMA_REG_REG_INDEX 0x0014
+#define XILINX_DMA_REG_FRMSTORE 0x0018
+#define XILINX_DMA_REG_THRESHOLD 0x001c
+#define XILINX_DMA_REG_FRMPTR_STS 0x0024
+#define XILINX_DMA_REG_PARK_PTR 0x0028
+#define XILINX_DMA_PARK_PTR_WR_REF_SHIFT 8
+#define XILINX_DMA_PARK_PTR_WR_REF_MASK GENMASK(12, 8)
+#define XILINX_DMA_PARK_PTR_RD_REF_SHIFT 0
+#define XILINX_DMA_PARK_PTR_RD_REF_MASK GENMASK(4, 0)
+#define XILINX_DMA_REG_VDMA_VERSION 0x002c
+
+/* Register Direct Mode Registers */
+#define XILINX_DMA_REG_VSIZE 0x0000
+#define XILINX_DMA_REG_HSIZE 0x0004
+
+#define XILINX_DMA_REG_FRMDLY_STRIDE 0x0008
+#define XILINX_DMA_FRMDLY_STRIDE_FRMDLY_SHIFT 24
+#define XILINX_DMA_FRMDLY_STRIDE_STRIDE_SHIFT 0
+
+#define XILINX_VDMA_REG_START_ADDRESS(n) (0x000c + 4 * (n))
+#define XILINX_VDMA_REG_START_ADDRESS_64(n) (0x000c + 8 * (n))
+
+#define XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP 0x00ec
+#define XILINX_VDMA_ENABLE_VERTICAL_FLIP BIT(0)
+
+/* HW specific definitions */
+#define XILINX_MCDMA_MAX_CHANS_PER_DEVICE 0x20
+#define XILINX_DMA_MAX_CHANS_PER_DEVICE 0x2
+#define XILINX_CDMA_MAX_CHANS_PER_DEVICE 0x1
+
+#define XILINX_DMA_DMAXR_ALL_IRQ_MASK \
+ (XILINX_DMA_DMASR_FRM_CNT_IRQ | \
+ XILINX_DMA_DMASR_DLY_CNT_IRQ | \
+ XILINX_DMA_DMASR_ERR_IRQ)
+
+#define XILINX_DMA_DMASR_ALL_ERR_MASK \
+ (XILINX_DMA_DMASR_EOL_LATE_ERR | \
+ XILINX_DMA_DMASR_SOF_LATE_ERR | \
+ XILINX_DMA_DMASR_SG_DEC_ERR | \
+ XILINX_DMA_DMASR_SG_SLV_ERR | \
+ XILINX_DMA_DMASR_EOF_EARLY_ERR | \
+ XILINX_DMA_DMASR_SOF_EARLY_ERR | \
+ XILINX_DMA_DMASR_DMA_DEC_ERR | \
+ XILINX_DMA_DMASR_DMA_SLAVE_ERR | \
+ XILINX_DMA_DMASR_DMA_INT_ERR)
+
+/*
+ * Recoverable errors are DMA Internal error, SOF Early, EOF Early
+ * and SOF Late. They are only recoverable when C_FLUSH_ON_FSYNC
+ * is enabled in the h/w system.
+ */
+#define XILINX_DMA_DMASR_ERR_RECOVER_MASK \
+ (XILINX_DMA_DMASR_SOF_LATE_ERR | \
+ XILINX_DMA_DMASR_EOF_EARLY_ERR | \
+ XILINX_DMA_DMASR_SOF_EARLY_ERR | \
+ XILINX_DMA_DMASR_DMA_INT_ERR)
+
+/* Axi VDMA Flush on Fsync bits */
+#define XILINX_DMA_FLUSH_S2MM 3
+#define XILINX_DMA_FLUSH_MM2S 2
+#define XILINX_DMA_FLUSH_BOTH 1
+
+/* Delay loop counter to prevent hardware failure */
+#define XILINX_DMA_LOOP_COUNT 1000000
+
+/* AXI DMA Specific Registers/Offsets */
+#define XILINX_DMA_REG_SRCDSTADDR 0x18
+#define XILINX_DMA_REG_BTT 0x28
+
+/* AXI DMA Specific Masks/Bit fields */
+#define XILINX_DMA_MAX_TRANS_LEN_MIN 8
+#define XILINX_DMA_MAX_TRANS_LEN_MAX 23
+#define XILINX_DMA_V2_MAX_TRANS_LEN_MAX 26
+#define XILINX_DMA_CR_COALESCE_MAX GENMASK(23, 16)
+#define XILINX_DMA_CR_CYCLIC_BD_EN_MASK BIT(4)
+#define XILINX_DMA_CR_COALESCE_SHIFT 16
+#define XILINX_DMA_BD_SOP BIT(27)
+#define XILINX_DMA_BD_EOP BIT(26)
+#define XILINX_DMA_COALESCE_MAX 255
+#define XILINX_DMA_NUM_DESCS 255
+#define XILINX_DMA_NUM_APP_WORDS 5
+
+/* AXI CDMA Specific Registers/Offsets */
+#define XILINX_CDMA_REG_SRCADDR 0x18
+#define XILINX_CDMA_REG_DSTADDR 0x20
+
+/* AXI CDMA Specific Masks */
+#define XILINX_CDMA_CR_SGMODE BIT(3)
+
+#define xilinx_prep_dma_addr_t(addr) \
+ ((dma_addr_t)((u64)addr##_##msb << 32 | (addr)))
+
+/* AXI MCDMA Specific Registers/Offsets */
+#define XILINX_MCDMA_MM2S_CTRL_OFFSET 0x0000
+#define XILINX_MCDMA_S2MM_CTRL_OFFSET 0x0500
+#define XILINX_MCDMA_CHEN_OFFSET 0x0008
+#define XILINX_MCDMA_CH_ERR_OFFSET 0x0010
+#define XILINX_MCDMA_RXINT_SER_OFFSET 0x0020
+#define XILINX_MCDMA_TXINT_SER_OFFSET 0x0028
+#define XILINX_MCDMA_CHAN_CR_OFFSET(x) (0x40 + (x) * 0x40)
+#define XILINX_MCDMA_CHAN_SR_OFFSET(x) (0x44 + (x) * 0x40)
+#define XILINX_MCDMA_CHAN_CDESC_OFFSET(x) (0x48 + (x) * 0x40)
+#define XILINX_MCDMA_CHAN_TDESC_OFFSET(x) (0x50 + (x) * 0x40)
+
+/* AXI MCDMA Specific Masks/Shifts */
+#define XILINX_MCDMA_COALESCE_SHIFT 16
+#define XILINX_MCDMA_COALESCE_MAX 24
+#define XILINX_MCDMA_IRQ_ALL_MASK GENMASK(7, 5)
+#define XILINX_MCDMA_COALESCE_MASK GENMASK(23, 16)
+#define XILINX_MCDMA_CR_RUNSTOP_MASK BIT(0)
+#define XILINX_MCDMA_IRQ_IOC_MASK BIT(5)
+#define XILINX_MCDMA_IRQ_DELAY_MASK BIT(6)
+#define XILINX_MCDMA_IRQ_ERR_MASK BIT(7)
+#define XILINX_MCDMA_BD_EOP BIT(30)
+#define XILINX_MCDMA_BD_SOP BIT(31)
+
+/**
+ * struct xilinx_vdma_desc_hw - Hardware Descriptor
+ * @next_desc: Next Descriptor Pointer @0x00
+ * @pad1: Reserved @0x04
+ * @buf_addr: Buffer address @0x08
+ * @buf_addr_msb: MSB of Buffer address @0x0C
+ * @vsize: Vertical Size @0x10
+ * @hsize: Horizontal Size @0x14
+ * @stride: Number of bytes between the first
+ * pixels of each horizontal line @0x18
+ */
+struct xilinx_vdma_desc_hw {
+ u32 next_desc;
+ u32 pad1;
+ u32 buf_addr;
+ u32 buf_addr_msb;
+ u32 vsize;
+ u32 hsize;
+ u32 stride;
+} __aligned(64);
+
+/**
+ * struct xilinx_axidma_desc_hw - Hardware Descriptor for AXI DMA
+ * @next_desc: Next Descriptor Pointer @0x00
+ * @next_desc_msb: MSB of Next Descriptor Pointer @0x04
+ * @buf_addr: Buffer address @0x08
+ * @buf_addr_msb: MSB of Buffer address @0x0C
+ * @reserved1: Reserved @0x10
+ * @reserved2: Reserved @0x14
+ * @control: Control field @0x18
+ * @status: Status field @0x1C
+ * @app: APP Fields @0x20 - 0x30
+ */
+struct xilinx_axidma_desc_hw {
+ u32 next_desc;
+ u32 next_desc_msb;
+ u32 buf_addr;
+ u32 buf_addr_msb;
+ u32 reserved1;
+ u32 reserved2;
+ u32 control;
+ u32 status;
+ u32 app[XILINX_DMA_NUM_APP_WORDS];
+} __aligned(64);
+
+/**
+ * struct xilinx_aximcdma_desc_hw - Hardware Descriptor for AXI MCDMA
+ * @next_desc: Next Descriptor Pointer @0x00
+ * @next_desc_msb: MSB of Next Descriptor Pointer @0x04
+ * @buf_addr: Buffer address @0x08
+ * @buf_addr_msb: MSB of Buffer address @0x0C
+ * @rsvd: Reserved field @0x10
+ * @control: Control Information field @0x14
+ * @status: Status field @0x18
+ * @sideband_status: Status of sideband signals @0x1C
+ * @app: APP Fields @0x20 - 0x30
+ */
+struct xilinx_aximcdma_desc_hw {
+ u32 next_desc;
+ u32 next_desc_msb;
+ u32 buf_addr;
+ u32 buf_addr_msb;
+ u32 rsvd;
+ u32 control;
+ u32 status;
+ u32 sideband_status;
+ u32 app[XILINX_DMA_NUM_APP_WORDS];
+} __aligned(64);
+
+/**
+ * struct xilinx_cdma_desc_hw - Hardware Descriptor
+ * @next_desc: Next Descriptor Pointer @0x00
+ * @next_desc_msb: Next Descriptor Pointer MSB @0x04
+ * @src_addr: Source address @0x08
+ * @src_addr_msb: Source address MSB @0x0C
+ * @dest_addr: Destination address @0x10
+ * @dest_addr_msb: Destination address MSB @0x14
+ * @control: Control field @0x18
+ * @status: Status field @0x1C
+ */
+struct xilinx_cdma_desc_hw {
+ u32 next_desc;
+ u32 next_desc_msb;
+ u32 src_addr;
+ u32 src_addr_msb;
+ u32 dest_addr;
+ u32 dest_addr_msb;
+ u32 control;
+ u32 status;
+} __aligned(64);
+
+/**
+ * struct xilinx_vdma_tx_segment - Descriptor segment
+ * @hw: Hardware descriptor
+ * @node: Node in the descriptor segments list
+ * @phys: Physical address of segment
+ */
+struct xilinx_vdma_tx_segment {
+ struct xilinx_vdma_desc_hw hw;
+ struct list_head node;
+ dma_addr_t phys;
+} __aligned(64);
+
+/**
+ * struct xilinx_axidma_tx_segment - Descriptor segment
+ * @hw: Hardware descriptor
+ * @node: Node in the descriptor segments list
+ * @phys: Physical address of segment
+ */
+struct xilinx_axidma_tx_segment {
+ struct xilinx_axidma_desc_hw hw;
+ struct list_head node;
+ dma_addr_t phys;
+} __aligned(64);
+
+/**
+ * struct xilinx_aximcdma_tx_segment - Descriptor segment
+ * @hw: Hardware descriptor
+ * @node: Node in the descriptor segments list
+ * @phys: Physical address of segment
+ */
+struct xilinx_aximcdma_tx_segment {
+ struct xilinx_aximcdma_desc_hw hw;
+ struct list_head node;
+ dma_addr_t phys;
+} __aligned(64);
+
+/**
+ * struct xilinx_cdma_tx_segment - Descriptor segment
+ * @hw: Hardware descriptor
+ * @node: Node in the descriptor segments list
+ * @phys: Physical address of segment
+ */
+struct xilinx_cdma_tx_segment {
+ struct xilinx_cdma_desc_hw hw;
+ struct list_head node;
+ dma_addr_t phys;
+} __aligned(64);
+
+/**
+ * struct xilinx_dma_tx_descriptor - Per Transaction structure
+ * @async_tx: Async transaction descriptor
+ * @segments: TX segments list
+ * @node: Node in the channel descriptors list
+ * @cyclic: Check for cyclic transfers.
+ * @err: Whether the descriptor has an error.
+ * @residue: Residue of the completed descriptor
+ */
+struct xilinx_dma_tx_descriptor {
+ struct dma_async_tx_descriptor async_tx;
+ struct list_head segments;
+ struct list_head node;
+ bool cyclic;
+ bool err;
+ u32 residue;
+};
+
+/**
+ * struct xilinx_dma_chan - Driver specific DMA channel structure
+ * @xdev: Driver specific device structure
+ * @ctrl_offset: Control registers offset
+ * @desc_offset: TX descriptor registers offset
+ * @lock: Descriptor operation lock
+ * @pending_list: Descriptors waiting
+ * @active_list: Descriptors ready to submit
+ * @done_list: Complete descriptors
+ * @free_seg_list: Free descriptors
+ * @common: DMA common channel
+ * @desc_pool: Descriptors pool
+ * @dev: The dma device
+ * @irq: Channel IRQ
+ * @id: Channel ID
+ * @direction: Transfer direction
+ * @num_frms: Number of frames
+ * @has_sg: Support scatter transfers
+ * @cyclic: Check for cyclic transfers.
+ * @genlock: Support genlock mode
+ * @err: Channel has errors
+ * @idle: Check for channel idle
+ * @terminating: Check for channel being synchronized by user
+ * @tasklet: Cleanup work after irq
+ * @config: Device configuration info
+ * @flush_on_fsync: Flush on Frame sync
+ * @desc_pendingcount: Descriptor pending count
+ * @ext_addr: Indicates 64 bit addressing is supported by dma channel
+ * @desc_submitcount: Descriptor h/w submitted count
+ * @seg_v: Statically allocated segments base
+ * @seg_mv: Statically allocated segments base for MCDMA
+ * @seg_p: Physical allocated segments base
+ * @cyclic_seg_v: Statically allocated segment base for cyclic transfers
+ * @cyclic_seg_p: Physical allocated segments base for cyclic dma
+ * @start_transfer: Differentiate b/w DMA IP's transfer
+ * @stop_transfer: Differentiate b/w DMA IP's quiesce
+ * @tdest: TDEST value for mcdma
+ * @has_vflip: S2MM vertical flip
+ */
+struct xilinx_dma_chan {
+ struct xilinx_dma_device *xdev;
+ u32 ctrl_offset;
+ u32 desc_offset;
+ spinlock_t lock;
+ struct list_head pending_list;
+ struct list_head active_list;
+ struct list_head done_list;
+ struct list_head free_seg_list;
+ struct dma_chan common;
+ struct dma_pool *desc_pool;
+ struct device *dev;
+ int irq;
+ int id;
+ enum dma_transfer_direction direction;
+ int num_frms;
+ bool has_sg;
+ bool cyclic;
+ bool genlock;
+ bool err;
+ bool idle;
+ bool terminating;
+ struct tasklet_struct tasklet;
+ struct xilinx_vdma_config config;
+ bool flush_on_fsync;
+ u32 desc_pendingcount;
+ bool ext_addr;
+ u32 desc_submitcount;
+ struct xilinx_axidma_tx_segment *seg_v;
+ struct xilinx_aximcdma_tx_segment *seg_mv;
+ dma_addr_t seg_p;
+ struct xilinx_axidma_tx_segment *cyclic_seg_v;
+ dma_addr_t cyclic_seg_p;
+ void (*start_transfer)(struct xilinx_dma_chan *chan);
+ int (*stop_transfer)(struct xilinx_dma_chan *chan);
+ u16 tdest;
+ bool has_vflip;
+};
+
+/**
+ * enum xdma_ip_type - DMA IP type.
+ *
+ * @XDMA_TYPE_AXIDMA: Axi dma ip.
+ * @XDMA_TYPE_CDMA: Axi cdma ip.
+ * @XDMA_TYPE_VDMA: Axi vdma ip.
+ * @XDMA_TYPE_AXIMCDMA: Axi MCDMA ip.
+ *
+ */
+enum xdma_ip_type {
+ XDMA_TYPE_AXIDMA = 0,
+ XDMA_TYPE_CDMA,
+ XDMA_TYPE_VDMA,
+ XDMA_TYPE_AXIMCDMA
+};
+
+struct xilinx_dma_config {
+ enum xdma_ip_type dmatype;
+ int (*clk_init)(struct platform_device *pdev, struct clk **axi_clk,
+ struct clk **tx_clk, struct clk **txs_clk,
+ struct clk **rx_clk, struct clk **rxs_clk);
+ irqreturn_t (*irq_handler)(int irq, void *data);
+ const int max_channels;
+};
+
+/**
+ * struct xilinx_dma_device - DMA device structure
+ * @regs: I/O mapped base address
+ * @dev: Device Structure
+ * @common: DMA device structure
+ * @chan: Driver specific DMA channel
+ * @flush_on_fsync: Flush on frame sync
+ * @ext_addr: Indicates 64 bit addressing is supported by dma device
+ * @pdev: Platform device structure pointer
+ * @dma_config: DMA config structure
+ * @axi_clk: DMA Axi4-lite interace clock
+ * @tx_clk: DMA mm2s clock
+ * @txs_clk: DMA mm2s stream clock
+ * @rx_clk: DMA s2mm clock
+ * @rxs_clk: DMA s2mm stream clock
+ * @s2mm_chan_id: DMA s2mm channel identifier
+ * @mm2s_chan_id: DMA mm2s channel identifier
+ * @max_buffer_len: Max buffer length
+ */
+struct xilinx_dma_device {
+ void __iomem *regs;
+ struct device *dev;
+ struct dma_device common;
+ struct xilinx_dma_chan *chan[XILINX_MCDMA_MAX_CHANS_PER_DEVICE];
+ u32 flush_on_fsync;
+ bool ext_addr;
+ struct platform_device *pdev;
+ const struct xilinx_dma_config *dma_config;
+ struct clk *axi_clk;
+ struct clk *tx_clk;
+ struct clk *txs_clk;
+ struct clk *rx_clk;
+ struct clk *rxs_clk;
+ u32 s2mm_chan_id;
+ u32 mm2s_chan_id;
+ u32 max_buffer_len;
+};
+
+/* Macros */
+#define to_xilinx_chan(chan) \
+ container_of(chan, struct xilinx_dma_chan, common)
+#define to_dma_tx_descriptor(tx) \
+ container_of(tx, struct xilinx_dma_tx_descriptor, async_tx)
+#define xilinx_dma_poll_timeout(chan, reg, val, cond, delay_us, timeout_us) \
+ readl_poll_timeout_atomic(chan->xdev->regs + chan->ctrl_offset + reg, \
+ val, cond, delay_us, timeout_us)
+
+/* IO accessors */
+static inline u32 dma_read(struct xilinx_dma_chan *chan, u32 reg)
+{
+ return ioread32(chan->xdev->regs + reg);
+}
+
+static inline void dma_write(struct xilinx_dma_chan *chan, u32 reg, u32 value)
+{
+ iowrite32(value, chan->xdev->regs + reg);
+}
+
+static inline void vdma_desc_write(struct xilinx_dma_chan *chan, u32 reg,
+ u32 value)
+{
+ dma_write(chan, chan->desc_offset + reg, value);
+}
+
+static inline u32 dma_ctrl_read(struct xilinx_dma_chan *chan, u32 reg)
+{
+ return dma_read(chan, chan->ctrl_offset + reg);
+}
+
+static inline void dma_ctrl_write(struct xilinx_dma_chan *chan, u32 reg,
+ u32 value)
+{
+ dma_write(chan, chan->ctrl_offset + reg, value);
+}
+
+static inline void dma_ctrl_clr(struct xilinx_dma_chan *chan, u32 reg,
+ u32 clr)
+{
+ dma_ctrl_write(chan, reg, dma_ctrl_read(chan, reg) & ~clr);
+}
+
+static inline void dma_ctrl_set(struct xilinx_dma_chan *chan, u32 reg,
+ u32 set)
+{
+ dma_ctrl_write(chan, reg, dma_ctrl_read(chan, reg) | set);
+}
+
+/**
+ * vdma_desc_write_64 - 64-bit descriptor write
+ * @chan: Driver specific VDMA channel
+ * @reg: Register to write
+ * @value_lsb: lower address of the descriptor.
+ * @value_msb: upper address of the descriptor.
+ *
+ * Since vdma driver is trying to write to a register offset which is not a
+ * multiple of 64 bits(ex : 0x5c), we are writing as two separate 32 bits
+ * instead of a single 64 bit register write.
+ */
+static inline void vdma_desc_write_64(struct xilinx_dma_chan *chan, u32 reg,
+ u32 value_lsb, u32 value_msb)
+{
+ /* Write the lsb 32 bits*/
+ writel(value_lsb, chan->xdev->regs + chan->desc_offset + reg);
+
+ /* Write the msb 32 bits */
+ writel(value_msb, chan->xdev->regs + chan->desc_offset + reg + 4);
+}
+
+static inline void dma_writeq(struct xilinx_dma_chan *chan, u32 reg, u64 value)
+{
+ lo_hi_writeq(value, chan->xdev->regs + chan->ctrl_offset + reg);
+}
+
+static inline void xilinx_write(struct xilinx_dma_chan *chan, u32 reg,
+ dma_addr_t addr)
+{
+ if (chan->ext_addr)
+ dma_writeq(chan, reg, addr);
+ else
+ dma_ctrl_write(chan, reg, addr);
+}
+
+static inline void xilinx_axidma_buf(struct xilinx_dma_chan *chan,
+ struct xilinx_axidma_desc_hw *hw,
+ dma_addr_t buf_addr, size_t sg_used,
+ size_t period_len)
+{
+ if (chan->ext_addr) {
+ hw->buf_addr = lower_32_bits(buf_addr + sg_used + period_len);
+ hw->buf_addr_msb = upper_32_bits(buf_addr + sg_used +
+ period_len);
+ } else {
+ hw->buf_addr = buf_addr + sg_used + period_len;
+ }
+}
+
+static inline void xilinx_aximcdma_buf(struct xilinx_dma_chan *chan,
+ struct xilinx_aximcdma_desc_hw *hw,
+ dma_addr_t buf_addr, size_t sg_used)
+{
+ if (chan->ext_addr) {
+ hw->buf_addr = lower_32_bits(buf_addr + sg_used);
+ hw->buf_addr_msb = upper_32_bits(buf_addr + sg_used);
+ } else {
+ hw->buf_addr = buf_addr + sg_used;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors and segments alloc and free
+ */
+
+/**
+ * xilinx_vdma_alloc_tx_segment - Allocate transaction segment
+ * @chan: Driver specific DMA channel
+ *
+ * Return: The allocated segment on success and NULL on failure.
+ */
+static struct xilinx_vdma_tx_segment *
+xilinx_vdma_alloc_tx_segment(struct xilinx_dma_chan *chan)
+{
+ struct xilinx_vdma_tx_segment *segment;
+ dma_addr_t phys;
+
+ segment = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &phys);
+ if (!segment)
+ return NULL;
+
+ segment->phys = phys;
+
+ return segment;
+}
+
+/**
+ * xilinx_cdma_alloc_tx_segment - Allocate transaction segment
+ * @chan: Driver specific DMA channel
+ *
+ * Return: The allocated segment on success and NULL on failure.
+ */
+static struct xilinx_cdma_tx_segment *
+xilinx_cdma_alloc_tx_segment(struct xilinx_dma_chan *chan)
+{
+ struct xilinx_cdma_tx_segment *segment;
+ dma_addr_t phys;
+
+ segment = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &phys);
+ if (!segment)
+ return NULL;
+
+ segment->phys = phys;
+
+ return segment;
+}
+
+/**
+ * xilinx_axidma_alloc_tx_segment - Allocate transaction segment
+ * @chan: Driver specific DMA channel
+ *
+ * Return: The allocated segment on success and NULL on failure.
+ */
+static struct xilinx_axidma_tx_segment *
+xilinx_axidma_alloc_tx_segment(struct xilinx_dma_chan *chan)
+{
+ struct xilinx_axidma_tx_segment *segment = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->lock, flags);
+ if (!list_empty(&chan->free_seg_list)) {
+ segment = list_first_entry(&chan->free_seg_list,
+ struct xilinx_axidma_tx_segment,
+ node);
+ list_del(&segment->node);
+ }
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ if (!segment)
+ dev_dbg(chan->dev, "Could not find free tx segment\n");
+
+ return segment;
+}
+
+/**
+ * xilinx_aximcdma_alloc_tx_segment - Allocate transaction segment
+ * @chan: Driver specific DMA channel
+ *
+ * Return: The allocated segment on success and NULL on failure.
+ */
+static struct xilinx_aximcdma_tx_segment *
+xilinx_aximcdma_alloc_tx_segment(struct xilinx_dma_chan *chan)
+{
+ struct xilinx_aximcdma_tx_segment *segment = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->lock, flags);
+ if (!list_empty(&chan->free_seg_list)) {
+ segment = list_first_entry(&chan->free_seg_list,
+ struct xilinx_aximcdma_tx_segment,
+ node);
+ list_del(&segment->node);
+ }
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ return segment;
+}
+
+static void xilinx_dma_clean_hw_desc(struct xilinx_axidma_desc_hw *hw)
+{
+ u32 next_desc = hw->next_desc;
+ u32 next_desc_msb = hw->next_desc_msb;
+
+ memset(hw, 0, sizeof(struct xilinx_axidma_desc_hw));
+
+ hw->next_desc = next_desc;
+ hw->next_desc_msb = next_desc_msb;
+}
+
+static void xilinx_mcdma_clean_hw_desc(struct xilinx_aximcdma_desc_hw *hw)
+{
+ u32 next_desc = hw->next_desc;
+ u32 next_desc_msb = hw->next_desc_msb;
+
+ memset(hw, 0, sizeof(struct xilinx_aximcdma_desc_hw));
+
+ hw->next_desc = next_desc;
+ hw->next_desc_msb = next_desc_msb;
+}
+
+/**
+ * xilinx_dma_free_tx_segment - Free transaction segment
+ * @chan: Driver specific DMA channel
+ * @segment: DMA transaction segment
+ */
+static void xilinx_dma_free_tx_segment(struct xilinx_dma_chan *chan,
+ struct xilinx_axidma_tx_segment *segment)
+{
+ xilinx_dma_clean_hw_desc(&segment->hw);
+
+ list_add_tail(&segment->node, &chan->free_seg_list);
+}
+
+/**
+ * xilinx_mcdma_free_tx_segment - Free transaction segment
+ * @chan: Driver specific DMA channel
+ * @segment: DMA transaction segment
+ */
+static void xilinx_mcdma_free_tx_segment(struct xilinx_dma_chan *chan,
+ struct xilinx_aximcdma_tx_segment *
+ segment)
+{
+ xilinx_mcdma_clean_hw_desc(&segment->hw);
+
+ list_add_tail(&segment->node, &chan->free_seg_list);
+}
+
+/**
+ * xilinx_cdma_free_tx_segment - Free transaction segment
+ * @chan: Driver specific DMA channel
+ * @segment: DMA transaction segment
+ */
+static void xilinx_cdma_free_tx_segment(struct xilinx_dma_chan *chan,
+ struct xilinx_cdma_tx_segment *segment)
+{
+ dma_pool_free(chan->desc_pool, segment, segment->phys);
+}
+
+/**
+ * xilinx_vdma_free_tx_segment - Free transaction segment
+ * @chan: Driver specific DMA channel
+ * @segment: DMA transaction segment
+ */
+static void xilinx_vdma_free_tx_segment(struct xilinx_dma_chan *chan,
+ struct xilinx_vdma_tx_segment *segment)
+{
+ dma_pool_free(chan->desc_pool, segment, segment->phys);
+}
+
+/**
+ * xilinx_dma_alloc_tx_descriptor - Allocate transaction descriptor
+ * @chan: Driver specific DMA channel
+ *
+ * Return: The allocated descriptor on success and NULL on failure.
+ */
+static struct xilinx_dma_tx_descriptor *
+xilinx_dma_alloc_tx_descriptor(struct xilinx_dma_chan *chan)
+{
+ struct xilinx_dma_tx_descriptor *desc;
+
+ desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+ if (!desc)
+ return NULL;
+
+ INIT_LIST_HEAD(&desc->segments);
+
+ return desc;
+}
+
+/**
+ * xilinx_dma_free_tx_descriptor - Free transaction descriptor
+ * @chan: Driver specific DMA channel
+ * @desc: DMA transaction descriptor
+ */
+static void
+xilinx_dma_free_tx_descriptor(struct xilinx_dma_chan *chan,
+ struct xilinx_dma_tx_descriptor *desc)
+{
+ struct xilinx_vdma_tx_segment *segment, *next;
+ struct xilinx_cdma_tx_segment *cdma_segment, *cdma_next;
+ struct xilinx_axidma_tx_segment *axidma_segment, *axidma_next;
+ struct xilinx_aximcdma_tx_segment *aximcdma_segment, *aximcdma_next;
+
+ if (!desc)
+ return;
+
+ if (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+ list_for_each_entry_safe(segment, next, &desc->segments, node) {
+ list_del(&segment->node);
+ xilinx_vdma_free_tx_segment(chan, segment);
+ }
+ } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+ list_for_each_entry_safe(cdma_segment, cdma_next,
+ &desc->segments, node) {
+ list_del(&cdma_segment->node);
+ xilinx_cdma_free_tx_segment(chan, cdma_segment);
+ }
+ } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+ list_for_each_entry_safe(axidma_segment, axidma_next,
+ &desc->segments, node) {
+ list_del(&axidma_segment->node);
+ xilinx_dma_free_tx_segment(chan, axidma_segment);
+ }
+ } else {
+ list_for_each_entry_safe(aximcdma_segment, aximcdma_next,
+ &desc->segments, node) {
+ list_del(&aximcdma_segment->node);
+ xilinx_mcdma_free_tx_segment(chan, aximcdma_segment);
+ }
+ }
+
+ kfree(desc);
+}
+
+/* Required functions */
+
+/**
+ * xilinx_dma_free_desc_list - Free descriptors list
+ * @chan: Driver specific DMA channel
+ * @list: List to parse and delete the descriptor
+ */
+static void xilinx_dma_free_desc_list(struct xilinx_dma_chan *chan,
+ struct list_head *list)
+{
+ struct xilinx_dma_tx_descriptor *desc, *next;
+
+ list_for_each_entry_safe(desc, next, list, node) {
+ list_del(&desc->node);
+ xilinx_dma_free_tx_descriptor(chan, desc);
+ }
+}
+
+/**
+ * xilinx_dma_free_descriptors - Free channel descriptors
+ * @chan: Driver specific DMA channel
+ */
+static void xilinx_dma_free_descriptors(struct xilinx_dma_chan *chan)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->lock, flags);
+
+ xilinx_dma_free_desc_list(chan, &chan->pending_list);
+ xilinx_dma_free_desc_list(chan, &chan->done_list);
+ xilinx_dma_free_desc_list(chan, &chan->active_list);
+
+ spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_dma_free_chan_resources - Free channel resources
+ * @dchan: DMA channel
+ */
+static void xilinx_dma_free_chan_resources(struct dma_chan *dchan)
+{
+ struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+ unsigned long flags;
+
+ dev_dbg(chan->dev, "Free all channel resources.\n");
+
+ xilinx_dma_free_descriptors(chan);
+
+ if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+ spin_lock_irqsave(&chan->lock, flags);
+ INIT_LIST_HEAD(&chan->free_seg_list);
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ /* Free memory that is allocated for BD */
+ dma_free_coherent(chan->dev, sizeof(*chan->seg_v) *
+ XILINX_DMA_NUM_DESCS, chan->seg_v,
+ chan->seg_p);
+
+ /* Free Memory that is allocated for cyclic DMA Mode */
+ dma_free_coherent(chan->dev, sizeof(*chan->cyclic_seg_v),
+ chan->cyclic_seg_v, chan->cyclic_seg_p);
+ }
+
+ if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) {
+ spin_lock_irqsave(&chan->lock, flags);
+ INIT_LIST_HEAD(&chan->free_seg_list);
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ /* Free memory that is allocated for BD */
+ dma_free_coherent(chan->dev, sizeof(*chan->seg_mv) *
+ XILINX_DMA_NUM_DESCS, chan->seg_mv,
+ chan->seg_p);
+ }
+
+ if (chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIDMA &&
+ chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIMCDMA) {
+ dma_pool_destroy(chan->desc_pool);
+ chan->desc_pool = NULL;
+ }
+
+}
+
+/**
+ * xilinx_dma_get_residue - Compute residue for a given descriptor
+ * @chan: Driver specific dma channel
+ * @desc: dma transaction descriptor
+ *
+ * Return: The number of residue bytes for the descriptor.
+ */
+static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan,
+ struct xilinx_dma_tx_descriptor *desc)
+{
+ struct xilinx_cdma_tx_segment *cdma_seg;
+ struct xilinx_axidma_tx_segment *axidma_seg;
+ struct xilinx_aximcdma_tx_segment *aximcdma_seg;
+ struct xilinx_cdma_desc_hw *cdma_hw;
+ struct xilinx_axidma_desc_hw *axidma_hw;
+ struct xilinx_aximcdma_desc_hw *aximcdma_hw;
+ struct list_head *entry;
+ u32 residue = 0;
+
+ list_for_each(entry, &desc->segments) {
+ if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+ cdma_seg = list_entry(entry,
+ struct xilinx_cdma_tx_segment,
+ node);
+ cdma_hw = &cdma_seg->hw;
+ residue += (cdma_hw->control - cdma_hw->status) &
+ chan->xdev->max_buffer_len;
+ } else if (chan->xdev->dma_config->dmatype ==
+ XDMA_TYPE_AXIDMA) {
+ axidma_seg = list_entry(entry,
+ struct xilinx_axidma_tx_segment,
+ node);
+ axidma_hw = &axidma_seg->hw;
+ residue += (axidma_hw->control - axidma_hw->status) &
+ chan->xdev->max_buffer_len;
+ } else {
+ aximcdma_seg =
+ list_entry(entry,
+ struct xilinx_aximcdma_tx_segment,
+ node);
+ aximcdma_hw = &aximcdma_seg->hw;
+ residue +=
+ (aximcdma_hw->control - aximcdma_hw->status) &
+ chan->xdev->max_buffer_len;
+ }
+ }
+
+ return residue;
+}
+
+/**
+ * xilinx_dma_chan_handle_cyclic - Cyclic dma callback
+ * @chan: Driver specific dma channel
+ * @desc: dma transaction descriptor
+ * @flags: flags for spin lock
+ */
+static void xilinx_dma_chan_handle_cyclic(struct xilinx_dma_chan *chan,
+ struct xilinx_dma_tx_descriptor *desc,
+ unsigned long *flags)
+{
+ struct dmaengine_desc_callback cb;
+
+ dmaengine_desc_get_callback(&desc->async_tx, &cb);
+ if (dmaengine_desc_callback_valid(&cb)) {
+ spin_unlock_irqrestore(&chan->lock, *flags);
+ dmaengine_desc_callback_invoke(&cb, NULL);
+ spin_lock_irqsave(&chan->lock, *flags);
+ }
+}
+
+/**
+ * xilinx_dma_chan_desc_cleanup - Clean channel descriptors
+ * @chan: Driver specific DMA channel
+ */
+static void xilinx_dma_chan_desc_cleanup(struct xilinx_dma_chan *chan)
+{
+ struct xilinx_dma_tx_descriptor *desc, *next;
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->lock, flags);
+
+ list_for_each_entry_safe(desc, next, &chan->done_list, node) {
+ struct dmaengine_result result;
+
+ if (desc->cyclic) {
+ xilinx_dma_chan_handle_cyclic(chan, desc, &flags);
+ break;
+ }
+
+ /* Remove from the list of running transactions */
+ list_del(&desc->node);
+
+ if (unlikely(desc->err)) {
+ if (chan->direction == DMA_DEV_TO_MEM)
+ result.result = DMA_TRANS_READ_FAILED;
+ else
+ result.result = DMA_TRANS_WRITE_FAILED;
+ } else {
+ result.result = DMA_TRANS_NOERROR;
+ }
+
+ result.residue = desc->residue;
+
+ /* Run the link descriptor callback function */
+ spin_unlock_irqrestore(&chan->lock, flags);
+ dmaengine_desc_get_callback_invoke(&desc->async_tx, &result);
+ spin_lock_irqsave(&chan->lock, flags);
+
+ /* Run any dependencies, then free the descriptor */
+ dma_run_dependencies(&desc->async_tx);
+ xilinx_dma_free_tx_descriptor(chan, desc);
+
+ /*
+ * While we ran a callback the user called a terminate function,
+ * which takes care of cleaning up any remaining descriptors
+ */
+ if (chan->terminating)
+ break;
+ }
+
+ spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_dma_do_tasklet - Schedule completion tasklet
+ * @t: Pointer to the Xilinx DMA channel structure
+ */
+static void xilinx_dma_do_tasklet(struct tasklet_struct *t)
+{
+ struct xilinx_dma_chan *chan = from_tasklet(chan, t, tasklet);
+
+ xilinx_dma_chan_desc_cleanup(chan);
+}
+
+/**
+ * xilinx_dma_alloc_chan_resources - Allocate channel resources
+ * @dchan: DMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan)
+{
+ struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+ int i;
+
+ /* Has this channel already been allocated? */
+ if (chan->desc_pool)
+ return 0;
+
+ /*
+ * We need the descriptor to be aligned to 64bytes
+ * for meeting Xilinx VDMA specification requirement.
+ */
+ if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+ /* Allocate the buffer descriptors. */
+ chan->seg_v = dma_alloc_coherent(chan->dev,
+ sizeof(*chan->seg_v) * XILINX_DMA_NUM_DESCS,
+ &chan->seg_p, GFP_KERNEL);
+ if (!chan->seg_v) {
+ dev_err(chan->dev,
+ "unable to allocate channel %d descriptors\n",
+ chan->id);
+ return -ENOMEM;
+ }
+ /*
+ * For cyclic DMA mode we need to program the tail Descriptor
+ * register with a value which is not a part of the BD chain
+ * so allocating a desc segment during channel allocation for
+ * programming tail descriptor.
+ */
+ chan->cyclic_seg_v = dma_alloc_coherent(chan->dev,
+ sizeof(*chan->cyclic_seg_v),
+ &chan->cyclic_seg_p,
+ GFP_KERNEL);
+ if (!chan->cyclic_seg_v) {
+ dev_err(chan->dev,
+ "unable to allocate desc segment for cyclic DMA\n");
+ dma_free_coherent(chan->dev, sizeof(*chan->seg_v) *
+ XILINX_DMA_NUM_DESCS, chan->seg_v,
+ chan->seg_p);
+ return -ENOMEM;
+ }
+ chan->cyclic_seg_v->phys = chan->cyclic_seg_p;
+
+ for (i = 0; i < XILINX_DMA_NUM_DESCS; i++) {
+ chan->seg_v[i].hw.next_desc =
+ lower_32_bits(chan->seg_p + sizeof(*chan->seg_v) *
+ ((i + 1) % XILINX_DMA_NUM_DESCS));
+ chan->seg_v[i].hw.next_desc_msb =
+ upper_32_bits(chan->seg_p + sizeof(*chan->seg_v) *
+ ((i + 1) % XILINX_DMA_NUM_DESCS));
+ chan->seg_v[i].phys = chan->seg_p +
+ sizeof(*chan->seg_v) * i;
+ list_add_tail(&chan->seg_v[i].node,
+ &chan->free_seg_list);
+ }
+ } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) {
+ /* Allocate the buffer descriptors. */
+ chan->seg_mv = dma_alloc_coherent(chan->dev,
+ sizeof(*chan->seg_mv) *
+ XILINX_DMA_NUM_DESCS,
+ &chan->seg_p, GFP_KERNEL);
+ if (!chan->seg_mv) {
+ dev_err(chan->dev,
+ "unable to allocate channel %d descriptors\n",
+ chan->id);
+ return -ENOMEM;
+ }
+ for (i = 0; i < XILINX_DMA_NUM_DESCS; i++) {
+ chan->seg_mv[i].hw.next_desc =
+ lower_32_bits(chan->seg_p + sizeof(*chan->seg_mv) *
+ ((i + 1) % XILINX_DMA_NUM_DESCS));
+ chan->seg_mv[i].hw.next_desc_msb =
+ upper_32_bits(chan->seg_p + sizeof(*chan->seg_mv) *
+ ((i + 1) % XILINX_DMA_NUM_DESCS));
+ chan->seg_mv[i].phys = chan->seg_p +
+ sizeof(*chan->seg_mv) * i;
+ list_add_tail(&chan->seg_mv[i].node,
+ &chan->free_seg_list);
+ }
+ } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+ chan->desc_pool = dma_pool_create("xilinx_cdma_desc_pool",
+ chan->dev,
+ sizeof(struct xilinx_cdma_tx_segment),
+ __alignof__(struct xilinx_cdma_tx_segment),
+ 0);
+ } else {
+ chan->desc_pool = dma_pool_create("xilinx_vdma_desc_pool",
+ chan->dev,
+ sizeof(struct xilinx_vdma_tx_segment),
+ __alignof__(struct xilinx_vdma_tx_segment),
+ 0);
+ }
+
+ if (!chan->desc_pool &&
+ ((chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIDMA) &&
+ chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIMCDMA)) {
+ dev_err(chan->dev,
+ "unable to allocate channel %d descriptor pool\n",
+ chan->id);
+ return -ENOMEM;
+ }
+
+ dma_cookie_init(dchan);
+
+ if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+ /* For AXI DMA resetting once channel will reset the
+ * other channel as well so enable the interrupts here.
+ */
+ dma_ctrl_set(chan, XILINX_DMA_REG_DMACR,
+ XILINX_DMA_DMAXR_ALL_IRQ_MASK);
+ }
+
+ if ((chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) && chan->has_sg)
+ dma_ctrl_set(chan, XILINX_DMA_REG_DMACR,
+ XILINX_CDMA_CR_SGMODE);
+
+ return 0;
+}
+
+/**
+ * xilinx_dma_calc_copysize - Calculate the amount of data to copy
+ * @chan: Driver specific DMA channel
+ * @size: Total data that needs to be copied
+ * @done: Amount of data that has been already copied
+ *
+ * Return: Amount of data that has to be copied
+ */
+static int xilinx_dma_calc_copysize(struct xilinx_dma_chan *chan,
+ int size, int done)
+{
+ size_t copy;
+
+ copy = min_t(size_t, size - done,
+ chan->xdev->max_buffer_len);
+
+ if ((copy + done < size) &&
+ chan->xdev->common.copy_align) {
+ /*
+ * If this is not the last descriptor, make sure
+ * the next one will be properly aligned
+ */
+ copy = rounddown(copy,
+ (1 << chan->xdev->common.copy_align));
+ }
+ return copy;
+}
+
+/**
+ * xilinx_dma_tx_status - Get DMA transaction status
+ * @dchan: DMA channel
+ * @cookie: Transaction identifier
+ * @txstate: Transaction state
+ *
+ * Return: DMA transaction status
+ */
+static enum dma_status xilinx_dma_tx_status(struct dma_chan *dchan,
+ dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+ struct xilinx_dma_tx_descriptor *desc;
+ enum dma_status ret;
+ unsigned long flags;
+ u32 residue = 0;
+
+ ret = dma_cookie_status(dchan, cookie, txstate);
+ if (ret == DMA_COMPLETE || !txstate)
+ return ret;
+
+ spin_lock_irqsave(&chan->lock, flags);
+ if (!list_empty(&chan->active_list)) {
+ desc = list_last_entry(&chan->active_list,
+ struct xilinx_dma_tx_descriptor, node);
+ /*
+ * VDMA and simple mode do not support residue reporting, so the
+ * residue field will always be 0.
+ */
+ if (chan->has_sg && chan->xdev->dma_config->dmatype != XDMA_TYPE_VDMA)
+ residue = xilinx_dma_get_residue(chan, desc);
+ }
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ dma_set_residue(txstate, residue);
+
+ return ret;
+}
+
+/**
+ * xilinx_dma_stop_transfer - Halt DMA channel
+ * @chan: Driver specific DMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_dma_stop_transfer(struct xilinx_dma_chan *chan)
+{
+ u32 val;
+
+ dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RUNSTOP);
+
+ /* Wait for the hardware to halt */
+ return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
+ val & XILINX_DMA_DMASR_HALTED, 0,
+ XILINX_DMA_LOOP_COUNT);
+}
+
+/**
+ * xilinx_cdma_stop_transfer - Wait for the current transfer to complete
+ * @chan: Driver specific DMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_cdma_stop_transfer(struct xilinx_dma_chan *chan)
+{
+ u32 val;
+
+ return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
+ val & XILINX_DMA_DMASR_IDLE, 0,
+ XILINX_DMA_LOOP_COUNT);
+}
+
+/**
+ * xilinx_dma_start - Start DMA channel
+ * @chan: Driver specific DMA channel
+ */
+static void xilinx_dma_start(struct xilinx_dma_chan *chan)
+{
+ int err;
+ u32 val;
+
+ dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RUNSTOP);
+
+ /* Wait for the hardware to start */
+ err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
+ !(val & XILINX_DMA_DMASR_HALTED), 0,
+ XILINX_DMA_LOOP_COUNT);
+
+ if (err) {
+ dev_err(chan->dev, "Cannot start channel %p: %x\n",
+ chan, dma_ctrl_read(chan, XILINX_DMA_REG_DMASR));
+
+ chan->err = true;
+ }
+}
+
+/**
+ * xilinx_vdma_start_transfer - Starts VDMA transfer
+ * @chan: Driver specific channel struct pointer
+ */
+static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
+{
+ struct xilinx_vdma_config *config = &chan->config;
+ struct xilinx_dma_tx_descriptor *desc;
+ u32 reg, j;
+ struct xilinx_vdma_tx_segment *segment, *last = NULL;
+ int i = 0;
+
+ /* This function was invoked with lock held */
+ if (chan->err)
+ return;
+
+ if (!chan->idle)
+ return;
+
+ if (list_empty(&chan->pending_list))
+ return;
+
+ desc = list_first_entry(&chan->pending_list,
+ struct xilinx_dma_tx_descriptor, node);
+
+ /* Configure the hardware using info in the config structure */
+ if (chan->has_vflip) {
+ reg = dma_read(chan, XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP);
+ reg &= ~XILINX_VDMA_ENABLE_VERTICAL_FLIP;
+ reg |= config->vflip_en;
+ dma_write(chan, XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP,
+ reg);
+ }
+
+ reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
+
+ if (config->frm_cnt_en)
+ reg |= XILINX_DMA_DMACR_FRAMECNT_EN;
+ else
+ reg &= ~XILINX_DMA_DMACR_FRAMECNT_EN;
+
+ /* If not parking, enable circular mode */
+ if (config->park)
+ reg &= ~XILINX_DMA_DMACR_CIRC_EN;
+ else
+ reg |= XILINX_DMA_DMACR_CIRC_EN;
+
+ dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
+
+ j = chan->desc_submitcount;
+ reg = dma_read(chan, XILINX_DMA_REG_PARK_PTR);
+ if (chan->direction == DMA_MEM_TO_DEV) {
+ reg &= ~XILINX_DMA_PARK_PTR_RD_REF_MASK;
+ reg |= j << XILINX_DMA_PARK_PTR_RD_REF_SHIFT;
+ } else {
+ reg &= ~XILINX_DMA_PARK_PTR_WR_REF_MASK;
+ reg |= j << XILINX_DMA_PARK_PTR_WR_REF_SHIFT;
+ }
+ dma_write(chan, XILINX_DMA_REG_PARK_PTR, reg);
+
+ /* Start the hardware */
+ xilinx_dma_start(chan);
+
+ if (chan->err)
+ return;
+
+ /* Start the transfer */
+ if (chan->desc_submitcount < chan->num_frms)
+ i = chan->desc_submitcount;
+
+ list_for_each_entry(segment, &desc->segments, node) {
+ if (chan->ext_addr)
+ vdma_desc_write_64(chan,
+ XILINX_VDMA_REG_START_ADDRESS_64(i++),
+ segment->hw.buf_addr,
+ segment->hw.buf_addr_msb);
+ else
+ vdma_desc_write(chan,
+ XILINX_VDMA_REG_START_ADDRESS(i++),
+ segment->hw.buf_addr);
+
+ last = segment;
+ }
+
+ if (!last)
+ return;
+
+ /* HW expects these parameters to be same for one transaction */
+ vdma_desc_write(chan, XILINX_DMA_REG_HSIZE, last->hw.hsize);
+ vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE,
+ last->hw.stride);
+ vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last->hw.vsize);
+
+ chan->desc_submitcount++;
+ chan->desc_pendingcount--;
+ list_move_tail(&desc->node, &chan->active_list);
+ if (chan->desc_submitcount == chan->num_frms)
+ chan->desc_submitcount = 0;
+
+ chan->idle = false;
+}
+
+/**
+ * xilinx_cdma_start_transfer - Starts cdma transfer
+ * @chan: Driver specific channel struct pointer
+ */
+static void xilinx_cdma_start_transfer(struct xilinx_dma_chan *chan)
+{
+ struct xilinx_dma_tx_descriptor *head_desc, *tail_desc;
+ struct xilinx_cdma_tx_segment *tail_segment;
+ u32 ctrl_reg = dma_read(chan, XILINX_DMA_REG_DMACR);
+
+ if (chan->err)
+ return;
+
+ if (!chan->idle)
+ return;
+
+ if (list_empty(&chan->pending_list))
+ return;
+
+ head_desc = list_first_entry(&chan->pending_list,
+ struct xilinx_dma_tx_descriptor, node);
+ tail_desc = list_last_entry(&chan->pending_list,
+ struct xilinx_dma_tx_descriptor, node);
+ tail_segment = list_last_entry(&tail_desc->segments,
+ struct xilinx_cdma_tx_segment, node);
+
+ if (chan->desc_pendingcount <= XILINX_DMA_COALESCE_MAX) {
+ ctrl_reg &= ~XILINX_DMA_CR_COALESCE_MAX;
+ ctrl_reg |= chan->desc_pendingcount <<
+ XILINX_DMA_CR_COALESCE_SHIFT;
+ dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, ctrl_reg);
+ }
+
+ if (chan->has_sg) {
+ dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR,
+ XILINX_CDMA_CR_SGMODE);
+
+ dma_ctrl_set(chan, XILINX_DMA_REG_DMACR,
+ XILINX_CDMA_CR_SGMODE);
+
+ xilinx_write(chan, XILINX_DMA_REG_CURDESC,
+ head_desc->async_tx.phys);
+
+ /* Update tail ptr register which will start the transfer */
+ xilinx_write(chan, XILINX_DMA_REG_TAILDESC,
+ tail_segment->phys);
+ } else {
+ /* In simple mode */
+ struct xilinx_cdma_tx_segment *segment;
+ struct xilinx_cdma_desc_hw *hw;
+
+ segment = list_first_entry(&head_desc->segments,
+ struct xilinx_cdma_tx_segment,
+ node);
+
+ hw = &segment->hw;
+
+ xilinx_write(chan, XILINX_CDMA_REG_SRCADDR,
+ xilinx_prep_dma_addr_t(hw->src_addr));
+ xilinx_write(chan, XILINX_CDMA_REG_DSTADDR,
+ xilinx_prep_dma_addr_t(hw->dest_addr));
+
+ /* Start the transfer */
+ dma_ctrl_write(chan, XILINX_DMA_REG_BTT,
+ hw->control & chan->xdev->max_buffer_len);
+ }
+
+ list_splice_tail_init(&chan->pending_list, &chan->active_list);
+ chan->desc_pendingcount = 0;
+ chan->idle = false;
+}
+
+/**
+ * xilinx_dma_start_transfer - Starts DMA transfer
+ * @chan: Driver specific channel struct pointer
+ */
+static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan)
+{
+ struct xilinx_dma_tx_descriptor *head_desc, *tail_desc;
+ struct xilinx_axidma_tx_segment *tail_segment;
+ u32 reg;
+
+ if (chan->err)
+ return;
+
+ if (list_empty(&chan->pending_list))
+ return;
+
+ if (!chan->idle)
+ return;
+
+ head_desc = list_first_entry(&chan->pending_list,
+ struct xilinx_dma_tx_descriptor, node);
+ tail_desc = list_last_entry(&chan->pending_list,
+ struct xilinx_dma_tx_descriptor, node);
+ tail_segment = list_last_entry(&tail_desc->segments,
+ struct xilinx_axidma_tx_segment, node);
+
+ reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
+
+ if (chan->desc_pendingcount <= XILINX_DMA_COALESCE_MAX) {
+ reg &= ~XILINX_DMA_CR_COALESCE_MAX;
+ reg |= chan->desc_pendingcount <<
+ XILINX_DMA_CR_COALESCE_SHIFT;
+ dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
+ }
+
+ if (chan->has_sg)
+ xilinx_write(chan, XILINX_DMA_REG_CURDESC,
+ head_desc->async_tx.phys);
+
+ xilinx_dma_start(chan);
+
+ if (chan->err)
+ return;
+
+ /* Start the transfer */
+ if (chan->has_sg) {
+ if (chan->cyclic)
+ xilinx_write(chan, XILINX_DMA_REG_TAILDESC,
+ chan->cyclic_seg_v->phys);
+ else
+ xilinx_write(chan, XILINX_DMA_REG_TAILDESC,
+ tail_segment->phys);
+ } else {
+ struct xilinx_axidma_tx_segment *segment;
+ struct xilinx_axidma_desc_hw *hw;
+
+ segment = list_first_entry(&head_desc->segments,
+ struct xilinx_axidma_tx_segment,
+ node);
+ hw = &segment->hw;
+
+ xilinx_write(chan, XILINX_DMA_REG_SRCDSTADDR,
+ xilinx_prep_dma_addr_t(hw->buf_addr));
+
+ /* Start the transfer */
+ dma_ctrl_write(chan, XILINX_DMA_REG_BTT,
+ hw->control & chan->xdev->max_buffer_len);
+ }
+
+ list_splice_tail_init(&chan->pending_list, &chan->active_list);
+ chan->desc_pendingcount = 0;
+ chan->idle = false;
+}
+
+/**
+ * xilinx_mcdma_start_transfer - Starts MCDMA transfer
+ * @chan: Driver specific channel struct pointer
+ */
+static void xilinx_mcdma_start_transfer(struct xilinx_dma_chan *chan)
+{
+ struct xilinx_dma_tx_descriptor *head_desc, *tail_desc;
+ struct xilinx_aximcdma_tx_segment *tail_segment;
+ u32 reg;
+
+ /*
+ * lock has been held by calling functions, so we don't need it
+ * to take it here again.
+ */
+
+ if (chan->err)
+ return;
+
+ if (!chan->idle)
+ return;
+
+ if (list_empty(&chan->pending_list))
+ return;
+
+ head_desc = list_first_entry(&chan->pending_list,
+ struct xilinx_dma_tx_descriptor, node);
+ tail_desc = list_last_entry(&chan->pending_list,
+ struct xilinx_dma_tx_descriptor, node);
+ tail_segment = list_last_entry(&tail_desc->segments,
+ struct xilinx_aximcdma_tx_segment, node);
+
+ reg = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest));
+
+ if (chan->desc_pendingcount <= XILINX_MCDMA_COALESCE_MAX) {
+ reg &= ~XILINX_MCDMA_COALESCE_MASK;
+ reg |= chan->desc_pendingcount <<
+ XILINX_MCDMA_COALESCE_SHIFT;
+ }
+
+ reg |= XILINX_MCDMA_IRQ_ALL_MASK;
+ dma_ctrl_write(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest), reg);
+
+ /* Program current descriptor */
+ xilinx_write(chan, XILINX_MCDMA_CHAN_CDESC_OFFSET(chan->tdest),
+ head_desc->async_tx.phys);
+
+ /* Program channel enable register */
+ reg = dma_ctrl_read(chan, XILINX_MCDMA_CHEN_OFFSET);
+ reg |= BIT(chan->tdest);
+ dma_ctrl_write(chan, XILINX_MCDMA_CHEN_OFFSET, reg);
+
+ /* Start the fetch of BDs for the channel */
+ reg = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest));
+ reg |= XILINX_MCDMA_CR_RUNSTOP_MASK;
+ dma_ctrl_write(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest), reg);
+
+ xilinx_dma_start(chan);
+
+ if (chan->err)
+ return;
+
+ /* Start the transfer */
+ xilinx_write(chan, XILINX_MCDMA_CHAN_TDESC_OFFSET(chan->tdest),
+ tail_segment->phys);
+
+ list_splice_tail_init(&chan->pending_list, &chan->active_list);
+ chan->desc_pendingcount = 0;
+ chan->idle = false;
+}
+
+/**
+ * xilinx_dma_issue_pending - Issue pending transactions
+ * @dchan: DMA channel
+ */
+static void xilinx_dma_issue_pending(struct dma_chan *dchan)
+{
+ struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->lock, flags);
+ chan->start_transfer(chan);
+ spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_dma_device_config - Configure the DMA channel
+ * @dchan: DMA channel
+ * @config: channel configuration
+ */
+static int xilinx_dma_device_config(struct dma_chan *dchan,
+ struct dma_slave_config *config)
+{
+ return 0;
+}
+
+/**
+ * xilinx_dma_complete_descriptor - Mark the active descriptor as complete
+ * @chan : xilinx DMA channel
+ *
+ * CONTEXT: hardirq
+ */
+static void xilinx_dma_complete_descriptor(struct xilinx_dma_chan *chan)
+{
+ struct xilinx_dma_tx_descriptor *desc, *next;
+
+ /* This function was invoked with lock held */
+ if (list_empty(&chan->active_list))
+ return;
+
+ list_for_each_entry_safe(desc, next, &chan->active_list, node) {
+ if (chan->has_sg && chan->xdev->dma_config->dmatype !=
+ XDMA_TYPE_VDMA)
+ desc->residue = xilinx_dma_get_residue(chan, desc);
+ else
+ desc->residue = 0;
+ desc->err = chan->err;
+
+ list_del(&desc->node);
+ if (!desc->cyclic)
+ dma_cookie_complete(&desc->async_tx);
+ list_add_tail(&desc->node, &chan->done_list);
+ }
+}
+
+/**
+ * xilinx_dma_reset - Reset DMA channel
+ * @chan: Driver specific DMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_dma_reset(struct xilinx_dma_chan *chan)
+{
+ int err;
+ u32 tmp;
+
+ dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RESET);
+
+ /* Wait for the hardware to finish reset */
+ err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMACR, tmp,
+ !(tmp & XILINX_DMA_DMACR_RESET), 0,
+ XILINX_DMA_LOOP_COUNT);
+
+ if (err) {
+ dev_err(chan->dev, "reset timeout, cr %x, sr %x\n",
+ dma_ctrl_read(chan, XILINX_DMA_REG_DMACR),
+ dma_ctrl_read(chan, XILINX_DMA_REG_DMASR));
+ return -ETIMEDOUT;
+ }
+
+ chan->err = false;
+ chan->idle = true;
+ chan->desc_pendingcount = 0;
+ chan->desc_submitcount = 0;
+
+ return err;
+}
+
+/**
+ * xilinx_dma_chan_reset - Reset DMA channel and enable interrupts
+ * @chan: Driver specific DMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_dma_chan_reset(struct xilinx_dma_chan *chan)
+{
+ int err;
+
+ /* Reset VDMA */
+ err = xilinx_dma_reset(chan);
+ if (err)
+ return err;
+
+ /* Enable interrupts */
+ dma_ctrl_set(chan, XILINX_DMA_REG_DMACR,
+ XILINX_DMA_DMAXR_ALL_IRQ_MASK);
+
+ return 0;
+}
+
+/**
+ * xilinx_mcdma_irq_handler - MCDMA Interrupt handler
+ * @irq: IRQ number
+ * @data: Pointer to the Xilinx MCDMA channel structure
+ *
+ * Return: IRQ_HANDLED/IRQ_NONE
+ */
+static irqreturn_t xilinx_mcdma_irq_handler(int irq, void *data)
+{
+ struct xilinx_dma_chan *chan = data;
+ u32 status, ser_offset, chan_sermask, chan_offset = 0, chan_id;
+
+ if (chan->direction == DMA_DEV_TO_MEM)
+ ser_offset = XILINX_MCDMA_RXINT_SER_OFFSET;
+ else
+ ser_offset = XILINX_MCDMA_TXINT_SER_OFFSET;
+
+ /* Read the channel id raising the interrupt*/
+ chan_sermask = dma_ctrl_read(chan, ser_offset);
+ chan_id = ffs(chan_sermask);
+
+ if (!chan_id)
+ return IRQ_NONE;
+
+ if (chan->direction == DMA_DEV_TO_MEM)
+ chan_offset = chan->xdev->dma_config->max_channels / 2;
+
+ chan_offset = chan_offset + (chan_id - 1);
+ chan = chan->xdev->chan[chan_offset];
+ /* Read the status and ack the interrupts. */
+ status = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_SR_OFFSET(chan->tdest));
+ if (!(status & XILINX_MCDMA_IRQ_ALL_MASK))
+ return IRQ_NONE;
+
+ dma_ctrl_write(chan, XILINX_MCDMA_CHAN_SR_OFFSET(chan->tdest),
+ status & XILINX_MCDMA_IRQ_ALL_MASK);
+
+ if (status & XILINX_MCDMA_IRQ_ERR_MASK) {
+ dev_err(chan->dev, "Channel %p has errors %x cdr %x tdr %x\n",
+ chan,
+ dma_ctrl_read(chan, XILINX_MCDMA_CH_ERR_OFFSET),
+ dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CDESC_OFFSET
+ (chan->tdest)),
+ dma_ctrl_read(chan, XILINX_MCDMA_CHAN_TDESC_OFFSET
+ (chan->tdest)));
+ chan->err = true;
+ }
+
+ if (status & XILINX_MCDMA_IRQ_DELAY_MASK) {
+ /*
+ * Device takes too long to do the transfer when user requires
+ * responsiveness.
+ */
+ dev_dbg(chan->dev, "Inter-packet latency too long\n");
+ }
+
+ if (status & XILINX_MCDMA_IRQ_IOC_MASK) {
+ spin_lock(&chan->lock);
+ xilinx_dma_complete_descriptor(chan);
+ chan->idle = true;
+ chan->start_transfer(chan);
+ spin_unlock(&chan->lock);
+ }
+
+ tasklet_schedule(&chan->tasklet);
+ return IRQ_HANDLED;
+}
+
+/**
+ * xilinx_dma_irq_handler - DMA Interrupt handler
+ * @irq: IRQ number
+ * @data: Pointer to the Xilinx DMA channel structure
+ *
+ * Return: IRQ_HANDLED/IRQ_NONE
+ */
+static irqreturn_t xilinx_dma_irq_handler(int irq, void *data)
+{
+ struct xilinx_dma_chan *chan = data;
+ u32 status;
+
+ /* Read the status and ack the interrupts. */
+ status = dma_ctrl_read(chan, XILINX_DMA_REG_DMASR);
+ if (!(status & XILINX_DMA_DMAXR_ALL_IRQ_MASK))
+ return IRQ_NONE;
+
+ dma_ctrl_write(chan, XILINX_DMA_REG_DMASR,
+ status & XILINX_DMA_DMAXR_ALL_IRQ_MASK);
+
+ if (status & XILINX_DMA_DMASR_ERR_IRQ) {
+ /*
+ * An error occurred. If C_FLUSH_ON_FSYNC is enabled and the
+ * error is recoverable, ignore it. Otherwise flag the error.
+ *
+ * Only recoverable errors can be cleared in the DMASR register,
+ * make sure not to write to other error bits to 1.
+ */
+ u32 errors = status & XILINX_DMA_DMASR_ALL_ERR_MASK;
+
+ dma_ctrl_write(chan, XILINX_DMA_REG_DMASR,
+ errors & XILINX_DMA_DMASR_ERR_RECOVER_MASK);
+
+ if (!chan->flush_on_fsync ||
+ (errors & ~XILINX_DMA_DMASR_ERR_RECOVER_MASK)) {
+ dev_err(chan->dev,
+ "Channel %p has errors %x, cdr %x tdr %x\n",
+ chan, errors,
+ dma_ctrl_read(chan, XILINX_DMA_REG_CURDESC),
+ dma_ctrl_read(chan, XILINX_DMA_REG_TAILDESC));
+ chan->err = true;
+ }
+ }
+
+ if (status & XILINX_DMA_DMASR_DLY_CNT_IRQ) {
+ /*
+ * Device takes too long to do the transfer when user requires
+ * responsiveness.
+ */
+ dev_dbg(chan->dev, "Inter-packet latency too long\n");
+ }
+
+ if (status & XILINX_DMA_DMASR_FRM_CNT_IRQ) {
+ spin_lock(&chan->lock);
+ xilinx_dma_complete_descriptor(chan);
+ chan->idle = true;
+ chan->start_transfer(chan);
+ spin_unlock(&chan->lock);
+ }
+
+ tasklet_schedule(&chan->tasklet);
+ return IRQ_HANDLED;
+}
+
+/**
+ * append_desc_queue - Queuing descriptor
+ * @chan: Driver specific dma channel
+ * @desc: dma transaction descriptor
+ */
+static void append_desc_queue(struct xilinx_dma_chan *chan,
+ struct xilinx_dma_tx_descriptor *desc)
+{
+ struct xilinx_vdma_tx_segment *tail_segment;
+ struct xilinx_dma_tx_descriptor *tail_desc;
+ struct xilinx_axidma_tx_segment *axidma_tail_segment;
+ struct xilinx_aximcdma_tx_segment *aximcdma_tail_segment;
+ struct xilinx_cdma_tx_segment *cdma_tail_segment;
+
+ if (list_empty(&chan->pending_list))
+ goto append;
+
+ /*
+ * Add the hardware descriptor to the chain of hardware descriptors
+ * that already exists in memory.
+ */
+ tail_desc = list_last_entry(&chan->pending_list,
+ struct xilinx_dma_tx_descriptor, node);
+ if (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+ tail_segment = list_last_entry(&tail_desc->segments,
+ struct xilinx_vdma_tx_segment,
+ node);
+ tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
+ } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+ cdma_tail_segment = list_last_entry(&tail_desc->segments,
+ struct xilinx_cdma_tx_segment,
+ node);
+ cdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
+ } else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+ axidma_tail_segment = list_last_entry(&tail_desc->segments,
+ struct xilinx_axidma_tx_segment,
+ node);
+ axidma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
+ } else {
+ aximcdma_tail_segment =
+ list_last_entry(&tail_desc->segments,
+ struct xilinx_aximcdma_tx_segment,
+ node);
+ aximcdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
+ }
+
+ /*
+ * Add the software descriptor and all children to the list
+ * of pending transactions
+ */
+append:
+ list_add_tail(&desc->node, &chan->pending_list);
+ chan->desc_pendingcount++;
+
+ if (chan->has_sg && (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA)
+ && unlikely(chan->desc_pendingcount > chan->num_frms)) {
+ dev_dbg(chan->dev, "desc pendingcount is too high\n");
+ chan->desc_pendingcount = chan->num_frms;
+ }
+}
+
+/**
+ * xilinx_dma_tx_submit - Submit DMA transaction
+ * @tx: Async transaction descriptor
+ *
+ * Return: cookie value on success and failure value on error
+ */
+static dma_cookie_t xilinx_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct xilinx_dma_tx_descriptor *desc = to_dma_tx_descriptor(tx);
+ struct xilinx_dma_chan *chan = to_xilinx_chan(tx->chan);
+ dma_cookie_t cookie;
+ unsigned long flags;
+ int err;
+
+ if (chan->cyclic) {
+ xilinx_dma_free_tx_descriptor(chan, desc);
+ return -EBUSY;
+ }
+
+ if (chan->err) {
+ /*
+ * If reset fails, need to hard reset the system.
+ * Channel is no longer functional
+ */
+ err = xilinx_dma_chan_reset(chan);
+ if (err < 0)
+ return err;
+ }
+
+ spin_lock_irqsave(&chan->lock, flags);
+
+ cookie = dma_cookie_assign(tx);
+
+ /* Put this transaction onto the tail of the pending queue */
+ append_desc_queue(chan, desc);
+
+ if (desc->cyclic)
+ chan->cyclic = true;
+
+ chan->terminating = false;
+
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ return cookie;
+}
+
+/**
+ * xilinx_vdma_dma_prep_interleaved - prepare a descriptor for a
+ * DMA_SLAVE transaction
+ * @dchan: DMA channel
+ * @xt: Interleaved template pointer
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *
+xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan,
+ struct dma_interleaved_template *xt,
+ unsigned long flags)
+{
+ struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+ struct xilinx_dma_tx_descriptor *desc;
+ struct xilinx_vdma_tx_segment *segment;
+ struct xilinx_vdma_desc_hw *hw;
+
+ if (!is_slave_direction(xt->dir))
+ return NULL;
+
+ if (!xt->numf || !xt->sgl[0].size)
+ return NULL;
+
+ if (xt->frame_size != 1)
+ return NULL;
+
+ /* Allocate a transaction descriptor. */
+ desc = xilinx_dma_alloc_tx_descriptor(chan);
+ if (!desc)
+ return NULL;
+
+ dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+ desc->async_tx.tx_submit = xilinx_dma_tx_submit;
+ async_tx_ack(&desc->async_tx);
+
+ /* Allocate the link descriptor from DMA pool */
+ segment = xilinx_vdma_alloc_tx_segment(chan);
+ if (!segment)
+ goto error;
+
+ /* Fill in the hardware descriptor */
+ hw = &segment->hw;
+ hw->vsize = xt->numf;
+ hw->hsize = xt->sgl[0].size;
+ hw->stride = (xt->sgl[0].icg + xt->sgl[0].size) <<
+ XILINX_DMA_FRMDLY_STRIDE_STRIDE_SHIFT;
+ hw->stride |= chan->config.frm_dly <<
+ XILINX_DMA_FRMDLY_STRIDE_FRMDLY_SHIFT;
+
+ if (xt->dir != DMA_MEM_TO_DEV) {
+ if (chan->ext_addr) {
+ hw->buf_addr = lower_32_bits(xt->dst_start);
+ hw->buf_addr_msb = upper_32_bits(xt->dst_start);
+ } else {
+ hw->buf_addr = xt->dst_start;
+ }
+ } else {
+ if (chan->ext_addr) {
+ hw->buf_addr = lower_32_bits(xt->src_start);
+ hw->buf_addr_msb = upper_32_bits(xt->src_start);
+ } else {
+ hw->buf_addr = xt->src_start;
+ }
+ }
+
+ /* Insert the segment into the descriptor segments list. */
+ list_add_tail(&segment->node, &desc->segments);
+
+ /* Link the last hardware descriptor with the first. */
+ segment = list_first_entry(&desc->segments,
+ struct xilinx_vdma_tx_segment, node);
+ desc->async_tx.phys = segment->phys;
+
+ return &desc->async_tx;
+
+error:
+ xilinx_dma_free_tx_descriptor(chan, desc);
+ return NULL;
+}
+
+/**
+ * xilinx_cdma_prep_memcpy - prepare descriptors for a memcpy transaction
+ * @dchan: DMA channel
+ * @dma_dst: destination address
+ * @dma_src: source address
+ * @len: transfer length
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *
+xilinx_cdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+ struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+ struct xilinx_dma_tx_descriptor *desc;
+ struct xilinx_cdma_tx_segment *segment;
+ struct xilinx_cdma_desc_hw *hw;
+
+ if (!len || len > chan->xdev->max_buffer_len)
+ return NULL;
+
+ desc = xilinx_dma_alloc_tx_descriptor(chan);
+ if (!desc)
+ return NULL;
+
+ dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+ desc->async_tx.tx_submit = xilinx_dma_tx_submit;
+
+ /* Allocate the link descriptor from DMA pool */
+ segment = xilinx_cdma_alloc_tx_segment(chan);
+ if (!segment)
+ goto error;
+
+ hw = &segment->hw;
+ hw->control = len;
+ hw->src_addr = dma_src;
+ hw->dest_addr = dma_dst;
+ if (chan->ext_addr) {
+ hw->src_addr_msb = upper_32_bits(dma_src);
+ hw->dest_addr_msb = upper_32_bits(dma_dst);
+ }
+
+ /* Insert the segment into the descriptor segments list. */
+ list_add_tail(&segment->node, &desc->segments);
+
+ desc->async_tx.phys = segment->phys;
+ hw->next_desc = segment->phys;
+
+ return &desc->async_tx;
+
+error:
+ xilinx_dma_free_tx_descriptor(chan, desc);
+ return NULL;
+}
+
+/**
+ * xilinx_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
+ * @dchan: DMA channel
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
+ * @flags: transfer ack flags
+ * @context: APP words of the descriptor
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *xilinx_dma_prep_slave_sg(
+ struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
+ enum dma_transfer_direction direction, unsigned long flags,
+ void *context)
+{
+ struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+ struct xilinx_dma_tx_descriptor *desc;
+ struct xilinx_axidma_tx_segment *segment = NULL;
+ u32 *app_w = (u32 *)context;
+ struct scatterlist *sg;
+ size_t copy;
+ size_t sg_used;
+ unsigned int i;
+
+ if (!is_slave_direction(direction))
+ return NULL;
+
+ /* Allocate a transaction descriptor. */
+ desc = xilinx_dma_alloc_tx_descriptor(chan);
+ if (!desc)
+ return NULL;
+
+ dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+ desc->async_tx.tx_submit = xilinx_dma_tx_submit;
+
+ /* Build transactions using information in the scatter gather list */
+ for_each_sg(sgl, sg, sg_len, i) {
+ sg_used = 0;
+
+ /* Loop until the entire scatterlist entry is used */
+ while (sg_used < sg_dma_len(sg)) {
+ struct xilinx_axidma_desc_hw *hw;
+
+ /* Get a free segment */
+ segment = xilinx_axidma_alloc_tx_segment(chan);
+ if (!segment)
+ goto error;
+
+ /*
+ * Calculate the maximum number of bytes to transfer,
+ * making sure it is less than the hw limit
+ */
+ copy = xilinx_dma_calc_copysize(chan, sg_dma_len(sg),
+ sg_used);
+ hw = &segment->hw;
+
+ /* Fill in the descriptor */
+ xilinx_axidma_buf(chan, hw, sg_dma_address(sg),
+ sg_used, 0);
+
+ hw->control = copy;
+
+ if (chan->direction == DMA_MEM_TO_DEV) {
+ if (app_w)
+ memcpy(hw->app, app_w, sizeof(u32) *
+ XILINX_DMA_NUM_APP_WORDS);
+ }
+
+ sg_used += copy;
+
+ /*
+ * Insert the segment into the descriptor segments
+ * list.
+ */
+ list_add_tail(&segment->node, &desc->segments);
+ }
+ }
+
+ segment = list_first_entry(&desc->segments,
+ struct xilinx_axidma_tx_segment, node);
+ desc->async_tx.phys = segment->phys;
+
+ /* For the last DMA_MEM_TO_DEV transfer, set EOP */
+ if (chan->direction == DMA_MEM_TO_DEV) {
+ segment->hw.control |= XILINX_DMA_BD_SOP;
+ segment = list_last_entry(&desc->segments,
+ struct xilinx_axidma_tx_segment,
+ node);
+ segment->hw.control |= XILINX_DMA_BD_EOP;
+ }
+
+ return &desc->async_tx;
+
+error:
+ xilinx_dma_free_tx_descriptor(chan, desc);
+ return NULL;
+}
+
+/**
+ * xilinx_dma_prep_dma_cyclic - prepare descriptors for a DMA_SLAVE transaction
+ * @dchan: DMA channel
+ * @buf_addr: Physical address of the buffer
+ * @buf_len: Total length of the cyclic buffers
+ * @period_len: length of individual cyclic buffer
+ * @direction: DMA direction
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *xilinx_dma_prep_dma_cyclic(
+ struct dma_chan *dchan, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_transfer_direction direction,
+ unsigned long flags)
+{
+ struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+ struct xilinx_dma_tx_descriptor *desc;
+ struct xilinx_axidma_tx_segment *segment, *head_segment, *prev = NULL;
+ size_t copy, sg_used;
+ unsigned int num_periods;
+ int i;
+ u32 reg;
+
+ if (!period_len)
+ return NULL;
+
+ num_periods = buf_len / period_len;
+
+ if (!num_periods)
+ return NULL;
+
+ if (!is_slave_direction(direction))
+ return NULL;
+
+ /* Allocate a transaction descriptor. */
+ desc = xilinx_dma_alloc_tx_descriptor(chan);
+ if (!desc)
+ return NULL;
+
+ chan->direction = direction;
+ dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+ desc->async_tx.tx_submit = xilinx_dma_tx_submit;
+
+ for (i = 0; i < num_periods; ++i) {
+ sg_used = 0;
+
+ while (sg_used < period_len) {
+ struct xilinx_axidma_desc_hw *hw;
+
+ /* Get a free segment */
+ segment = xilinx_axidma_alloc_tx_segment(chan);
+ if (!segment)
+ goto error;
+
+ /*
+ * Calculate the maximum number of bytes to transfer,
+ * making sure it is less than the hw limit
+ */
+ copy = xilinx_dma_calc_copysize(chan, period_len,
+ sg_used);
+ hw = &segment->hw;
+ xilinx_axidma_buf(chan, hw, buf_addr, sg_used,
+ period_len * i);
+ hw->control = copy;
+
+ if (prev)
+ prev->hw.next_desc = segment->phys;
+
+ prev = segment;
+ sg_used += copy;
+
+ /*
+ * Insert the segment into the descriptor segments
+ * list.
+ */
+ list_add_tail(&segment->node, &desc->segments);
+ }
+ }
+
+ head_segment = list_first_entry(&desc->segments,
+ struct xilinx_axidma_tx_segment, node);
+ desc->async_tx.phys = head_segment->phys;
+
+ desc->cyclic = true;
+ reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
+ reg |= XILINX_DMA_CR_CYCLIC_BD_EN_MASK;
+ dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
+
+ segment = list_last_entry(&desc->segments,
+ struct xilinx_axidma_tx_segment,
+ node);
+ segment->hw.next_desc = (u32) head_segment->phys;
+
+ /* For the last DMA_MEM_TO_DEV transfer, set EOP */
+ if (direction == DMA_MEM_TO_DEV) {
+ head_segment->hw.control |= XILINX_DMA_BD_SOP;
+ segment->hw.control |= XILINX_DMA_BD_EOP;
+ }
+
+ return &desc->async_tx;
+
+error:
+ xilinx_dma_free_tx_descriptor(chan, desc);
+ return NULL;
+}
+
+/**
+ * xilinx_mcdma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
+ * @dchan: DMA channel
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
+ * @flags: transfer ack flags
+ * @context: APP words of the descriptor
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *
+xilinx_mcdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+ unsigned int sg_len,
+ enum dma_transfer_direction direction,
+ unsigned long flags, void *context)
+{
+ struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+ struct xilinx_dma_tx_descriptor *desc;
+ struct xilinx_aximcdma_tx_segment *segment = NULL;
+ u32 *app_w = (u32 *)context;
+ struct scatterlist *sg;
+ size_t copy;
+ size_t sg_used;
+ unsigned int i;
+
+ if (!is_slave_direction(direction))
+ return NULL;
+
+ /* Allocate a transaction descriptor. */
+ desc = xilinx_dma_alloc_tx_descriptor(chan);
+ if (!desc)
+ return NULL;
+
+ dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+ desc->async_tx.tx_submit = xilinx_dma_tx_submit;
+
+ /* Build transactions using information in the scatter gather list */
+ for_each_sg(sgl, sg, sg_len, i) {
+ sg_used = 0;
+
+ /* Loop until the entire scatterlist entry is used */
+ while (sg_used < sg_dma_len(sg)) {
+ struct xilinx_aximcdma_desc_hw *hw;
+
+ /* Get a free segment */
+ segment = xilinx_aximcdma_alloc_tx_segment(chan);
+ if (!segment)
+ goto error;
+
+ /*
+ * Calculate the maximum number of bytes to transfer,
+ * making sure it is less than the hw limit
+ */
+ copy = min_t(size_t, sg_dma_len(sg) - sg_used,
+ chan->xdev->max_buffer_len);
+ hw = &segment->hw;
+
+ /* Fill in the descriptor */
+ xilinx_aximcdma_buf(chan, hw, sg_dma_address(sg),
+ sg_used);
+ hw->control = copy;
+
+ if (chan->direction == DMA_MEM_TO_DEV && app_w) {
+ memcpy(hw->app, app_w, sizeof(u32) *
+ XILINX_DMA_NUM_APP_WORDS);
+ }
+
+ sg_used += copy;
+ /*
+ * Insert the segment into the descriptor segments
+ * list.
+ */
+ list_add_tail(&segment->node, &desc->segments);
+ }
+ }
+
+ segment = list_first_entry(&desc->segments,
+ struct xilinx_aximcdma_tx_segment, node);
+ desc->async_tx.phys = segment->phys;
+
+ /* For the last DMA_MEM_TO_DEV transfer, set EOP */
+ if (chan->direction == DMA_MEM_TO_DEV) {
+ segment->hw.control |= XILINX_MCDMA_BD_SOP;
+ segment = list_last_entry(&desc->segments,
+ struct xilinx_aximcdma_tx_segment,
+ node);
+ segment->hw.control |= XILINX_MCDMA_BD_EOP;
+ }
+
+ return &desc->async_tx;
+
+error:
+ xilinx_dma_free_tx_descriptor(chan, desc);
+
+ return NULL;
+}
+
+/**
+ * xilinx_dma_terminate_all - Halt the channel and free descriptors
+ * @dchan: Driver specific DMA Channel pointer
+ *
+ * Return: '0' always.
+ */
+static int xilinx_dma_terminate_all(struct dma_chan *dchan)
+{
+ struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+ u32 reg;
+ int err;
+
+ if (!chan->cyclic) {
+ err = chan->stop_transfer(chan);
+ if (err) {
+ dev_err(chan->dev, "Cannot stop channel %p: %x\n",
+ chan, dma_ctrl_read(chan,
+ XILINX_DMA_REG_DMASR));
+ chan->err = true;
+ }
+ }
+
+ xilinx_dma_chan_reset(chan);
+ /* Remove and free all of the descriptors in the lists */
+ chan->terminating = true;
+ xilinx_dma_free_descriptors(chan);
+ chan->idle = true;
+
+ if (chan->cyclic) {
+ reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
+ reg &= ~XILINX_DMA_CR_CYCLIC_BD_EN_MASK;
+ dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
+ chan->cyclic = false;
+ }
+
+ if ((chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) && chan->has_sg)
+ dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR,
+ XILINX_CDMA_CR_SGMODE);
+
+ return 0;
+}
+
+static void xilinx_dma_synchronize(struct dma_chan *dchan)
+{
+ struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+
+ tasklet_kill(&chan->tasklet);
+}
+
+/**
+ * xilinx_vdma_channel_set_config - Configure VDMA channel
+ * Run-time configuration for Axi VDMA, supports:
+ * . halt the channel
+ * . configure interrupt coalescing and inter-packet delay threshold
+ * . start/stop parking
+ * . enable genlock
+ *
+ * @dchan: DMA channel
+ * @cfg: VDMA device configuration pointer
+ *
+ * Return: '0' on success and failure value on error
+ */
+int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
+ struct xilinx_vdma_config *cfg)
+{
+ struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+ u32 dmacr;
+
+ if (cfg->reset)
+ return xilinx_dma_chan_reset(chan);
+
+ dmacr = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
+
+ chan->config.frm_dly = cfg->frm_dly;
+ chan->config.park = cfg->park;
+
+ /* genlock settings */
+ chan->config.gen_lock = cfg->gen_lock;
+ chan->config.master = cfg->master;
+
+ dmacr &= ~XILINX_DMA_DMACR_GENLOCK_EN;
+ if (cfg->gen_lock && chan->genlock) {
+ dmacr |= XILINX_DMA_DMACR_GENLOCK_EN;
+ dmacr &= ~XILINX_DMA_DMACR_MASTER_MASK;
+ dmacr |= cfg->master << XILINX_DMA_DMACR_MASTER_SHIFT;
+ }
+
+ chan->config.frm_cnt_en = cfg->frm_cnt_en;
+ chan->config.vflip_en = cfg->vflip_en;
+
+ if (cfg->park)
+ chan->config.park_frm = cfg->park_frm;
+ else
+ chan->config.park_frm = -1;
+
+ chan->config.coalesc = cfg->coalesc;
+ chan->config.delay = cfg->delay;
+
+ if (cfg->coalesc <= XILINX_DMA_DMACR_FRAME_COUNT_MAX) {
+ dmacr &= ~XILINX_DMA_DMACR_FRAME_COUNT_MASK;
+ dmacr |= cfg->coalesc << XILINX_DMA_DMACR_FRAME_COUNT_SHIFT;
+ chan->config.coalesc = cfg->coalesc;
+ }
+
+ if (cfg->delay <= XILINX_DMA_DMACR_DELAY_MAX) {
+ dmacr &= ~XILINX_DMA_DMACR_DELAY_MASK;
+ dmacr |= cfg->delay << XILINX_DMA_DMACR_DELAY_SHIFT;
+ chan->config.delay = cfg->delay;
+ }
+
+ /* FSync Source selection */
+ dmacr &= ~XILINX_DMA_DMACR_FSYNCSRC_MASK;
+ dmacr |= cfg->ext_fsync << XILINX_DMA_DMACR_FSYNCSRC_SHIFT;
+
+ dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, dmacr);
+
+ return 0;
+}
+EXPORT_SYMBOL(xilinx_vdma_channel_set_config);
+
+/* -----------------------------------------------------------------------------
+ * Probe and remove
+ */
+
+/**
+ * xilinx_dma_chan_remove - Per Channel remove function
+ * @chan: Driver specific DMA channel
+ */
+static void xilinx_dma_chan_remove(struct xilinx_dma_chan *chan)
+{
+ /* Disable all interrupts */
+ dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR,
+ XILINX_DMA_DMAXR_ALL_IRQ_MASK);
+
+ if (chan->irq > 0)
+ free_irq(chan->irq, chan);
+
+ tasklet_kill(&chan->tasklet);
+
+ list_del(&chan->common.device_node);
+}
+
+static int axidma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
+ struct clk **tx_clk, struct clk **rx_clk,
+ struct clk **sg_clk, struct clk **tmp_clk)
+{
+ int err;
+
+ *tmp_clk = NULL;
+
+ *axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
+ if (IS_ERR(*axi_clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(*axi_clk), "failed to get axi_aclk\n");
+
+ *tx_clk = devm_clk_get(&pdev->dev, "m_axi_mm2s_aclk");
+ if (IS_ERR(*tx_clk))
+ *tx_clk = NULL;
+
+ *rx_clk = devm_clk_get(&pdev->dev, "m_axi_s2mm_aclk");
+ if (IS_ERR(*rx_clk))
+ *rx_clk = NULL;
+
+ *sg_clk = devm_clk_get(&pdev->dev, "m_axi_sg_aclk");
+ if (IS_ERR(*sg_clk))
+ *sg_clk = NULL;
+
+ err = clk_prepare_enable(*axi_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n", err);
+ return err;
+ }
+
+ err = clk_prepare_enable(*tx_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable tx_clk (%d)\n", err);
+ goto err_disable_axiclk;
+ }
+
+ err = clk_prepare_enable(*rx_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable rx_clk (%d)\n", err);
+ goto err_disable_txclk;
+ }
+
+ err = clk_prepare_enable(*sg_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable sg_clk (%d)\n", err);
+ goto err_disable_rxclk;
+ }
+
+ return 0;
+
+err_disable_rxclk:
+ clk_disable_unprepare(*rx_clk);
+err_disable_txclk:
+ clk_disable_unprepare(*tx_clk);
+err_disable_axiclk:
+ clk_disable_unprepare(*axi_clk);
+
+ return err;
+}
+
+static int axicdma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
+ struct clk **dev_clk, struct clk **tmp_clk,
+ struct clk **tmp1_clk, struct clk **tmp2_clk)
+{
+ int err;
+
+ *tmp_clk = NULL;
+ *tmp1_clk = NULL;
+ *tmp2_clk = NULL;
+
+ *axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
+ if (IS_ERR(*axi_clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(*axi_clk), "failed to get axi_aclk\n");
+
+ *dev_clk = devm_clk_get(&pdev->dev, "m_axi_aclk");
+ if (IS_ERR(*dev_clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(*dev_clk), "failed to get dev_clk\n");
+
+ err = clk_prepare_enable(*axi_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n", err);
+ return err;
+ }
+
+ err = clk_prepare_enable(*dev_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable dev_clk (%d)\n", err);
+ goto err_disable_axiclk;
+ }
+
+ return 0;
+
+err_disable_axiclk:
+ clk_disable_unprepare(*axi_clk);
+
+ return err;
+}
+
+static int axivdma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
+ struct clk **tx_clk, struct clk **txs_clk,
+ struct clk **rx_clk, struct clk **rxs_clk)
+{
+ int err;
+
+ *axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
+ if (IS_ERR(*axi_clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(*axi_clk), "failed to get axi_aclk\n");
+
+ *tx_clk = devm_clk_get(&pdev->dev, "m_axi_mm2s_aclk");
+ if (IS_ERR(*tx_clk))
+ *tx_clk = NULL;
+
+ *txs_clk = devm_clk_get(&pdev->dev, "m_axis_mm2s_aclk");
+ if (IS_ERR(*txs_clk))
+ *txs_clk = NULL;
+
+ *rx_clk = devm_clk_get(&pdev->dev, "m_axi_s2mm_aclk");
+ if (IS_ERR(*rx_clk))
+ *rx_clk = NULL;
+
+ *rxs_clk = devm_clk_get(&pdev->dev, "s_axis_s2mm_aclk");
+ if (IS_ERR(*rxs_clk))
+ *rxs_clk = NULL;
+
+ err = clk_prepare_enable(*axi_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n",
+ err);
+ return err;
+ }
+
+ err = clk_prepare_enable(*tx_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable tx_clk (%d)\n", err);
+ goto err_disable_axiclk;
+ }
+
+ err = clk_prepare_enable(*txs_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable txs_clk (%d)\n", err);
+ goto err_disable_txclk;
+ }
+
+ err = clk_prepare_enable(*rx_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable rx_clk (%d)\n", err);
+ goto err_disable_txsclk;
+ }
+
+ err = clk_prepare_enable(*rxs_clk);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable rxs_clk (%d)\n", err);
+ goto err_disable_rxclk;
+ }
+
+ return 0;
+
+err_disable_rxclk:
+ clk_disable_unprepare(*rx_clk);
+err_disable_txsclk:
+ clk_disable_unprepare(*txs_clk);
+err_disable_txclk:
+ clk_disable_unprepare(*tx_clk);
+err_disable_axiclk:
+ clk_disable_unprepare(*axi_clk);
+
+ return err;
+}
+
+static void xdma_disable_allclks(struct xilinx_dma_device *xdev)
+{
+ clk_disable_unprepare(xdev->rxs_clk);
+ clk_disable_unprepare(xdev->rx_clk);
+ clk_disable_unprepare(xdev->txs_clk);
+ clk_disable_unprepare(xdev->tx_clk);
+ clk_disable_unprepare(xdev->axi_clk);
+}
+
+/**
+ * xilinx_dma_chan_probe - Per Channel Probing
+ * It get channel features from the device tree entry and
+ * initialize special channel handling routines
+ *
+ * @xdev: Driver specific device structure
+ * @node: Device node
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
+ struct device_node *node)
+{
+ struct xilinx_dma_chan *chan;
+ bool has_dre = false;
+ u32 value, width;
+ int err;
+
+ /* Allocate and initialize the channel structure */
+ chan = devm_kzalloc(xdev->dev, sizeof(*chan), GFP_KERNEL);
+ if (!chan)
+ return -ENOMEM;
+
+ chan->dev = xdev->dev;
+ chan->xdev = xdev;
+ chan->desc_pendingcount = 0x0;
+ chan->ext_addr = xdev->ext_addr;
+ /* This variable ensures that descriptors are not
+ * Submitted when dma engine is in progress. This variable is
+ * Added to avoid polling for a bit in the status register to
+ * Know dma state in the driver hot path.
+ */
+ chan->idle = true;
+
+ spin_lock_init(&chan->lock);
+ INIT_LIST_HEAD(&chan->pending_list);
+ INIT_LIST_HEAD(&chan->done_list);
+ INIT_LIST_HEAD(&chan->active_list);
+ INIT_LIST_HEAD(&chan->free_seg_list);
+
+ /* Retrieve the channel properties from the device tree */
+ has_dre = of_property_read_bool(node, "xlnx,include-dre");
+
+ chan->genlock = of_property_read_bool(node, "xlnx,genlock-mode");
+
+ err = of_property_read_u32(node, "xlnx,datawidth", &value);
+ if (err) {
+ dev_err(xdev->dev, "missing xlnx,datawidth property\n");
+ return err;
+ }
+ width = value >> 3; /* Convert bits to bytes */
+
+ /* If data width is greater than 8 bytes, DRE is not in hw */
+ if (width > 8)
+ has_dre = false;
+
+ if (!has_dre)
+ xdev->common.copy_align = (enum dmaengine_alignment)fls(width - 1);
+
+ if (of_device_is_compatible(node, "xlnx,axi-vdma-mm2s-channel") ||
+ of_device_is_compatible(node, "xlnx,axi-dma-mm2s-channel") ||
+ of_device_is_compatible(node, "xlnx,axi-cdma-channel")) {
+ chan->direction = DMA_MEM_TO_DEV;
+ chan->id = xdev->mm2s_chan_id++;
+ chan->tdest = chan->id;
+
+ chan->ctrl_offset = XILINX_DMA_MM2S_CTRL_OFFSET;
+ if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+ chan->desc_offset = XILINX_VDMA_MM2S_DESC_OFFSET;
+ chan->config.park = 1;
+
+ if (xdev->flush_on_fsync == XILINX_DMA_FLUSH_BOTH ||
+ xdev->flush_on_fsync == XILINX_DMA_FLUSH_MM2S)
+ chan->flush_on_fsync = true;
+ }
+ } else if (of_device_is_compatible(node,
+ "xlnx,axi-vdma-s2mm-channel") ||
+ of_device_is_compatible(node,
+ "xlnx,axi-dma-s2mm-channel")) {
+ chan->direction = DMA_DEV_TO_MEM;
+ chan->id = xdev->s2mm_chan_id++;
+ chan->tdest = chan->id - xdev->dma_config->max_channels / 2;
+ chan->has_vflip = of_property_read_bool(node,
+ "xlnx,enable-vert-flip");
+ if (chan->has_vflip) {
+ chan->config.vflip_en = dma_read(chan,
+ XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP) &
+ XILINX_VDMA_ENABLE_VERTICAL_FLIP;
+ }
+
+ if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA)
+ chan->ctrl_offset = XILINX_MCDMA_S2MM_CTRL_OFFSET;
+ else
+ chan->ctrl_offset = XILINX_DMA_S2MM_CTRL_OFFSET;
+
+ if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+ chan->desc_offset = XILINX_VDMA_S2MM_DESC_OFFSET;
+ chan->config.park = 1;
+
+ if (xdev->flush_on_fsync == XILINX_DMA_FLUSH_BOTH ||
+ xdev->flush_on_fsync == XILINX_DMA_FLUSH_S2MM)
+ chan->flush_on_fsync = true;
+ }
+ } else {
+ dev_err(xdev->dev, "Invalid channel compatible node\n");
+ return -EINVAL;
+ }
+
+ /* Request the interrupt */
+ chan->irq = of_irq_get(node, chan->tdest);
+ if (chan->irq < 0)
+ return dev_err_probe(xdev->dev, chan->irq, "failed to get irq\n");
+ err = request_irq(chan->irq, xdev->dma_config->irq_handler,
+ IRQF_SHARED, "xilinx-dma-controller", chan);
+ if (err) {
+ dev_err(xdev->dev, "unable to request IRQ %d\n", chan->irq);
+ return err;
+ }
+
+ if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+ chan->start_transfer = xilinx_dma_start_transfer;
+ chan->stop_transfer = xilinx_dma_stop_transfer;
+ } else if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) {
+ chan->start_transfer = xilinx_mcdma_start_transfer;
+ chan->stop_transfer = xilinx_dma_stop_transfer;
+ } else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+ chan->start_transfer = xilinx_cdma_start_transfer;
+ chan->stop_transfer = xilinx_cdma_stop_transfer;
+ } else {
+ chan->start_transfer = xilinx_vdma_start_transfer;
+ chan->stop_transfer = xilinx_dma_stop_transfer;
+ }
+
+ /* check if SG is enabled (only for AXIDMA, AXIMCDMA, and CDMA) */
+ if (xdev->dma_config->dmatype != XDMA_TYPE_VDMA) {
+ if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA ||
+ dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) &
+ XILINX_DMA_DMASR_SG_MASK)
+ chan->has_sg = true;
+ dev_dbg(chan->dev, "ch %d: SG %s\n", chan->id,
+ chan->has_sg ? "enabled" : "disabled");
+ }
+
+ /* Initialize the tasklet */
+ tasklet_setup(&chan->tasklet, xilinx_dma_do_tasklet);
+
+ /*
+ * Initialize the DMA channel and add it to the DMA engine channels
+ * list.
+ */
+ chan->common.device = &xdev->common;
+
+ list_add_tail(&chan->common.device_node, &xdev->common.channels);
+ xdev->chan[chan->id] = chan;
+
+ /* Reset the channel */
+ err = xilinx_dma_chan_reset(chan);
+ if (err < 0) {
+ dev_err(xdev->dev, "Reset channel failed\n");
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * xilinx_dma_child_probe - Per child node probe
+ * It get number of dma-channels per child node from
+ * device-tree and initializes all the channels.
+ *
+ * @xdev: Driver specific device structure
+ * @node: Device node
+ *
+ * Return: 0 always.
+ */
+static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev,
+ struct device_node *node)
+{
+ int ret, i;
+ u32 nr_channels = 1;
+
+ ret = of_property_read_u32(node, "dma-channels", &nr_channels);
+ if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA && ret < 0)
+ dev_warn(xdev->dev, "missing dma-channels property\n");
+
+ for (i = 0; i < nr_channels; i++) {
+ ret = xilinx_dma_chan_probe(xdev, node);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * of_dma_xilinx_xlate - Translation function
+ * @dma_spec: Pointer to DMA specifier as found in the device tree
+ * @ofdma: Pointer to DMA controller data
+ *
+ * Return: DMA channel pointer on success and NULL on error
+ */
+static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct xilinx_dma_device *xdev = ofdma->of_dma_data;
+ int chan_id = dma_spec->args[0];
+
+ if (chan_id >= xdev->dma_config->max_channels || !xdev->chan[chan_id])
+ return NULL;
+
+ return dma_get_slave_channel(&xdev->chan[chan_id]->common);
+}
+
+static const struct xilinx_dma_config axidma_config = {
+ .dmatype = XDMA_TYPE_AXIDMA,
+ .clk_init = axidma_clk_init,
+ .irq_handler = xilinx_dma_irq_handler,
+ .max_channels = XILINX_DMA_MAX_CHANS_PER_DEVICE,
+};
+
+static const struct xilinx_dma_config aximcdma_config = {
+ .dmatype = XDMA_TYPE_AXIMCDMA,
+ .clk_init = axidma_clk_init,
+ .irq_handler = xilinx_mcdma_irq_handler,
+ .max_channels = XILINX_MCDMA_MAX_CHANS_PER_DEVICE,
+};
+static const struct xilinx_dma_config axicdma_config = {
+ .dmatype = XDMA_TYPE_CDMA,
+ .clk_init = axicdma_clk_init,
+ .irq_handler = xilinx_dma_irq_handler,
+ .max_channels = XILINX_CDMA_MAX_CHANS_PER_DEVICE,
+};
+
+static const struct xilinx_dma_config axivdma_config = {
+ .dmatype = XDMA_TYPE_VDMA,
+ .clk_init = axivdma_clk_init,
+ .irq_handler = xilinx_dma_irq_handler,
+ .max_channels = XILINX_DMA_MAX_CHANS_PER_DEVICE,
+};
+
+static const struct of_device_id xilinx_dma_of_ids[] = {
+ { .compatible = "xlnx,axi-dma-1.00.a", .data = &axidma_config },
+ { .compatible = "xlnx,axi-cdma-1.00.a", .data = &axicdma_config },
+ { .compatible = "xlnx,axi-vdma-1.00.a", .data = &axivdma_config },
+ { .compatible = "xlnx,axi-mcdma-1.00.a", .data = &aximcdma_config },
+ {}
+};
+MODULE_DEVICE_TABLE(of, xilinx_dma_of_ids);
+
+/**
+ * xilinx_dma_probe - Driver probe function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_dma_probe(struct platform_device *pdev)
+{
+ int (*clk_init)(struct platform_device *, struct clk **, struct clk **,
+ struct clk **, struct clk **, struct clk **)
+ = axivdma_clk_init;
+ struct device_node *node = pdev->dev.of_node;
+ struct xilinx_dma_device *xdev;
+ struct device_node *child, *np = pdev->dev.of_node;
+ u32 num_frames, addr_width, len_width;
+ int i, err;
+
+ /* Allocate and initialize the DMA engine structure */
+ xdev = devm_kzalloc(&pdev->dev, sizeof(*xdev), GFP_KERNEL);
+ if (!xdev)
+ return -ENOMEM;
+
+ xdev->dev = &pdev->dev;
+ if (np) {
+ const struct of_device_id *match;
+
+ match = of_match_node(xilinx_dma_of_ids, np);
+ if (match && match->data) {
+ xdev->dma_config = match->data;
+ clk_init = xdev->dma_config->clk_init;
+ }
+ }
+
+ err = clk_init(pdev, &xdev->axi_clk, &xdev->tx_clk, &xdev->txs_clk,
+ &xdev->rx_clk, &xdev->rxs_clk);
+ if (err)
+ return err;
+
+ /* Request and map I/O memory */
+ xdev->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(xdev->regs)) {
+ err = PTR_ERR(xdev->regs);
+ goto disable_clks;
+ }
+ /* Retrieve the DMA engine properties from the device tree */
+ xdev->max_buffer_len = GENMASK(XILINX_DMA_MAX_TRANS_LEN_MAX - 1, 0);
+ xdev->s2mm_chan_id = xdev->dma_config->max_channels / 2;
+
+ if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA ||
+ xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) {
+ if (!of_property_read_u32(node, "xlnx,sg-length-width",
+ &len_width)) {
+ if (len_width < XILINX_DMA_MAX_TRANS_LEN_MIN ||
+ len_width > XILINX_DMA_V2_MAX_TRANS_LEN_MAX) {
+ dev_warn(xdev->dev,
+ "invalid xlnx,sg-length-width property value. Using default width\n");
+ } else {
+ if (len_width > XILINX_DMA_MAX_TRANS_LEN_MAX)
+ dev_warn(xdev->dev, "Please ensure that IP supports buffer length > 23 bits\n");
+ xdev->max_buffer_len =
+ GENMASK(len_width - 1, 0);
+ }
+ }
+ }
+
+ if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+ err = of_property_read_u32(node, "xlnx,num-fstores",
+ &num_frames);
+ if (err < 0) {
+ dev_err(xdev->dev,
+ "missing xlnx,num-fstores property\n");
+ goto disable_clks;
+ }
+
+ err = of_property_read_u32(node, "xlnx,flush-fsync",
+ &xdev->flush_on_fsync);
+ if (err < 0)
+ dev_warn(xdev->dev,
+ "missing xlnx,flush-fsync property\n");
+ }
+
+ err = of_property_read_u32(node, "xlnx,addrwidth", &addr_width);
+ if (err < 0)
+ dev_warn(xdev->dev, "missing xlnx,addrwidth property\n");
+
+ if (addr_width > 32)
+ xdev->ext_addr = true;
+ else
+ xdev->ext_addr = false;
+
+ /* Set the dma mask bits */
+ err = dma_set_mask_and_coherent(xdev->dev, DMA_BIT_MASK(addr_width));
+ if (err < 0) {
+ dev_err(xdev->dev, "DMA mask error %d\n", err);
+ goto disable_clks;
+ }
+
+ /* Initialize the DMA engine */
+ xdev->common.dev = &pdev->dev;
+
+ INIT_LIST_HEAD(&xdev->common.channels);
+ if (!(xdev->dma_config->dmatype == XDMA_TYPE_CDMA)) {
+ dma_cap_set(DMA_SLAVE, xdev->common.cap_mask);
+ dma_cap_set(DMA_PRIVATE, xdev->common.cap_mask);
+ }
+
+ xdev->common.device_alloc_chan_resources =
+ xilinx_dma_alloc_chan_resources;
+ xdev->common.device_free_chan_resources =
+ xilinx_dma_free_chan_resources;
+ xdev->common.device_terminate_all = xilinx_dma_terminate_all;
+ xdev->common.device_synchronize = xilinx_dma_synchronize;
+ xdev->common.device_tx_status = xilinx_dma_tx_status;
+ xdev->common.device_issue_pending = xilinx_dma_issue_pending;
+ xdev->common.device_config = xilinx_dma_device_config;
+ if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+ dma_cap_set(DMA_CYCLIC, xdev->common.cap_mask);
+ xdev->common.device_prep_slave_sg = xilinx_dma_prep_slave_sg;
+ xdev->common.device_prep_dma_cyclic =
+ xilinx_dma_prep_dma_cyclic;
+ /* Residue calculation is supported by only AXI DMA and CDMA */
+ xdev->common.residue_granularity =
+ DMA_RESIDUE_GRANULARITY_SEGMENT;
+ } else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+ dma_cap_set(DMA_MEMCPY, xdev->common.cap_mask);
+ xdev->common.device_prep_dma_memcpy = xilinx_cdma_prep_memcpy;
+ /* Residue calculation is supported by only AXI DMA and CDMA */
+ xdev->common.residue_granularity =
+ DMA_RESIDUE_GRANULARITY_SEGMENT;
+ } else if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) {
+ xdev->common.device_prep_slave_sg = xilinx_mcdma_prep_slave_sg;
+ } else {
+ xdev->common.device_prep_interleaved_dma =
+ xilinx_vdma_dma_prep_interleaved;
+ }
+
+ platform_set_drvdata(pdev, xdev);
+
+ /* Initialize the channels */
+ for_each_child_of_node(node, child) {
+ err = xilinx_dma_child_probe(xdev, child);
+ if (err < 0) {
+ of_node_put(child);
+ goto error;
+ }
+ }
+
+ if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+ for (i = 0; i < xdev->dma_config->max_channels; i++)
+ if (xdev->chan[i])
+ xdev->chan[i]->num_frms = num_frames;
+ }
+
+ /* Register the DMA engine with the core */
+ err = dma_async_device_register(&xdev->common);
+ if (err) {
+ dev_err(xdev->dev, "failed to register the dma device\n");
+ goto error;
+ }
+
+ err = of_dma_controller_register(node, of_dma_xilinx_xlate,
+ xdev);
+ if (err < 0) {
+ dev_err(&pdev->dev, "Unable to register DMA to DT\n");
+ dma_async_device_unregister(&xdev->common);
+ goto error;
+ }
+
+ if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA)
+ dev_info(&pdev->dev, "Xilinx AXI DMA Engine Driver Probed!!\n");
+ else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA)
+ dev_info(&pdev->dev, "Xilinx AXI CDMA Engine Driver Probed!!\n");
+ else if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA)
+ dev_info(&pdev->dev, "Xilinx AXI MCDMA Engine Driver Probed!!\n");
+ else
+ dev_info(&pdev->dev, "Xilinx AXI VDMA Engine Driver Probed!!\n");
+
+ return 0;
+
+error:
+ for (i = 0; i < xdev->dma_config->max_channels; i++)
+ if (xdev->chan[i])
+ xilinx_dma_chan_remove(xdev->chan[i]);
+disable_clks:
+ xdma_disable_allclks(xdev);
+
+ return err;
+}
+
+/**
+ * xilinx_dma_remove - Driver remove function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: Always '0'
+ */
+static int xilinx_dma_remove(struct platform_device *pdev)
+{
+ struct xilinx_dma_device *xdev = platform_get_drvdata(pdev);
+ int i;
+
+ of_dma_controller_free(pdev->dev.of_node);
+
+ dma_async_device_unregister(&xdev->common);
+
+ for (i = 0; i < xdev->dma_config->max_channels; i++)
+ if (xdev->chan[i])
+ xilinx_dma_chan_remove(xdev->chan[i]);
+
+ xdma_disable_allclks(xdev);
+
+ return 0;
+}
+
+static struct platform_driver xilinx_vdma_driver = {
+ .driver = {
+ .name = "xilinx-vdma",
+ .of_match_table = xilinx_dma_of_ids,
+ },
+ .probe = xilinx_dma_probe,
+ .remove = xilinx_dma_remove,
+};
+
+module_platform_driver(xilinx_vdma_driver);
+
+MODULE_AUTHOR("Xilinx, Inc.");
+MODULE_DESCRIPTION("Xilinx VDMA driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c
new file mode 100644
index 000000000..84dc5240a
--- /dev/null
+++ b/drivers/dma/xilinx/xilinx_dpdma.c
@@ -0,0 +1,1777 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx ZynqMP DPDMA Engine driver
+ *
+ * Copyright (C) 2015 - 2020 Xilinx, Inc.
+ *
+ * Author: Hyun Woo Kwon <hyun.kwon@xilinx.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/dma/xilinx_dpdma.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+
+#include <dt-bindings/dma/xlnx-zynqmp-dpdma.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+/* DPDMA registers */
+#define XILINX_DPDMA_ERR_CTRL 0x000
+#define XILINX_DPDMA_ISR 0x004
+#define XILINX_DPDMA_IMR 0x008
+#define XILINX_DPDMA_IEN 0x00c
+#define XILINX_DPDMA_IDS 0x010
+#define XILINX_DPDMA_INTR_DESC_DONE(n) BIT((n) + 0)
+#define XILINX_DPDMA_INTR_DESC_DONE_MASK GENMASK(5, 0)
+#define XILINX_DPDMA_INTR_NO_OSTAND(n) BIT((n) + 6)
+#define XILINX_DPDMA_INTR_NO_OSTAND_MASK GENMASK(11, 6)
+#define XILINX_DPDMA_INTR_AXI_ERR(n) BIT((n) + 12)
+#define XILINX_DPDMA_INTR_AXI_ERR_MASK GENMASK(17, 12)
+#define XILINX_DPDMA_INTR_DESC_ERR(n) BIT((n) + 16)
+#define XILINX_DPDMA_INTR_DESC_ERR_MASK GENMASK(23, 18)
+#define XILINX_DPDMA_INTR_WR_CMD_FIFO_FULL BIT(24)
+#define XILINX_DPDMA_INTR_WR_DATA_FIFO_FULL BIT(25)
+#define XILINX_DPDMA_INTR_AXI_4K_CROSS BIT(26)
+#define XILINX_DPDMA_INTR_VSYNC BIT(27)
+#define XILINX_DPDMA_INTR_CHAN_ERR_MASK 0x00041000
+#define XILINX_DPDMA_INTR_CHAN_ERR 0x00fff000
+#define XILINX_DPDMA_INTR_GLOBAL_ERR 0x07000000
+#define XILINX_DPDMA_INTR_ERR_ALL 0x07fff000
+#define XILINX_DPDMA_INTR_CHAN_MASK 0x00041041
+#define XILINX_DPDMA_INTR_GLOBAL_MASK 0x0f000000
+#define XILINX_DPDMA_INTR_ALL 0x0fffffff
+#define XILINX_DPDMA_EISR 0x014
+#define XILINX_DPDMA_EIMR 0x018
+#define XILINX_DPDMA_EIEN 0x01c
+#define XILINX_DPDMA_EIDS 0x020
+#define XILINX_DPDMA_EINTR_INV_APB BIT(0)
+#define XILINX_DPDMA_EINTR_RD_AXI_ERR(n) BIT((n) + 1)
+#define XILINX_DPDMA_EINTR_RD_AXI_ERR_MASK GENMASK(6, 1)
+#define XILINX_DPDMA_EINTR_PRE_ERR(n) BIT((n) + 7)
+#define XILINX_DPDMA_EINTR_PRE_ERR_MASK GENMASK(12, 7)
+#define XILINX_DPDMA_EINTR_CRC_ERR(n) BIT((n) + 13)
+#define XILINX_DPDMA_EINTR_CRC_ERR_MASK GENMASK(18, 13)
+#define XILINX_DPDMA_EINTR_WR_AXI_ERR(n) BIT((n) + 19)
+#define XILINX_DPDMA_EINTR_WR_AXI_ERR_MASK GENMASK(24, 19)
+#define XILINX_DPDMA_EINTR_DESC_DONE_ERR(n) BIT((n) + 25)
+#define XILINX_DPDMA_EINTR_DESC_DONE_ERR_MASK GENMASK(30, 25)
+#define XILINX_DPDMA_EINTR_RD_CMD_FIFO_FULL BIT(32)
+#define XILINX_DPDMA_EINTR_CHAN_ERR_MASK 0x02082082
+#define XILINX_DPDMA_EINTR_CHAN_ERR 0x7ffffffe
+#define XILINX_DPDMA_EINTR_GLOBAL_ERR 0x80000001
+#define XILINX_DPDMA_EINTR_ALL 0xffffffff
+#define XILINX_DPDMA_CNTL 0x100
+#define XILINX_DPDMA_GBL 0x104
+#define XILINX_DPDMA_GBL_TRIG_MASK(n) ((n) << 0)
+#define XILINX_DPDMA_GBL_RETRIG_MASK(n) ((n) << 6)
+#define XILINX_DPDMA_ALC0_CNTL 0x108
+#define XILINX_DPDMA_ALC0_STATUS 0x10c
+#define XILINX_DPDMA_ALC0_MAX 0x110
+#define XILINX_DPDMA_ALC0_MIN 0x114
+#define XILINX_DPDMA_ALC0_ACC 0x118
+#define XILINX_DPDMA_ALC0_ACC_TRAN 0x11c
+#define XILINX_DPDMA_ALC1_CNTL 0x120
+#define XILINX_DPDMA_ALC1_STATUS 0x124
+#define XILINX_DPDMA_ALC1_MAX 0x128
+#define XILINX_DPDMA_ALC1_MIN 0x12c
+#define XILINX_DPDMA_ALC1_ACC 0x130
+#define XILINX_DPDMA_ALC1_ACC_TRAN 0x134
+
+/* Channel register */
+#define XILINX_DPDMA_CH_BASE 0x200
+#define XILINX_DPDMA_CH_OFFSET 0x100
+#define XILINX_DPDMA_CH_DESC_START_ADDRE 0x000
+#define XILINX_DPDMA_CH_DESC_START_ADDRE_MASK GENMASK(15, 0)
+#define XILINX_DPDMA_CH_DESC_START_ADDR 0x004
+#define XILINX_DPDMA_CH_DESC_NEXT_ADDRE 0x008
+#define XILINX_DPDMA_CH_DESC_NEXT_ADDR 0x00c
+#define XILINX_DPDMA_CH_PYLD_CUR_ADDRE 0x010
+#define XILINX_DPDMA_CH_PYLD_CUR_ADDR 0x014
+#define XILINX_DPDMA_CH_CNTL 0x018
+#define XILINX_DPDMA_CH_CNTL_ENABLE BIT(0)
+#define XILINX_DPDMA_CH_CNTL_PAUSE BIT(1)
+#define XILINX_DPDMA_CH_CNTL_QOS_DSCR_WR_MASK GENMASK(5, 2)
+#define XILINX_DPDMA_CH_CNTL_QOS_DSCR_RD_MASK GENMASK(9, 6)
+#define XILINX_DPDMA_CH_CNTL_QOS_DATA_RD_MASK GENMASK(13, 10)
+#define XILINX_DPDMA_CH_CNTL_QOS_VID_CLASS 11
+#define XILINX_DPDMA_CH_STATUS 0x01c
+#define XILINX_DPDMA_CH_STATUS_OTRAN_CNT_MASK GENMASK(24, 21)
+#define XILINX_DPDMA_CH_VDO 0x020
+#define XILINX_DPDMA_CH_PYLD_SZ 0x024
+#define XILINX_DPDMA_CH_DESC_ID 0x028
+#define XILINX_DPDMA_CH_DESC_ID_MASK GENMASK(15, 0)
+
+/* DPDMA descriptor fields */
+#define XILINX_DPDMA_DESC_CONTROL_PREEMBLE 0xa5
+#define XILINX_DPDMA_DESC_CONTROL_COMPLETE_INTR BIT(8)
+#define XILINX_DPDMA_DESC_CONTROL_DESC_UPDATE BIT(9)
+#define XILINX_DPDMA_DESC_CONTROL_IGNORE_DONE BIT(10)
+#define XILINX_DPDMA_DESC_CONTROL_FRAG_MODE BIT(18)
+#define XILINX_DPDMA_DESC_CONTROL_LAST BIT(19)
+#define XILINX_DPDMA_DESC_CONTROL_ENABLE_CRC BIT(20)
+#define XILINX_DPDMA_DESC_CONTROL_LAST_OF_FRAME BIT(21)
+#define XILINX_DPDMA_DESC_ID_MASK GENMASK(15, 0)
+#define XILINX_DPDMA_DESC_HSIZE_STRIDE_HSIZE_MASK GENMASK(17, 0)
+#define XILINX_DPDMA_DESC_HSIZE_STRIDE_STRIDE_MASK GENMASK(31, 18)
+#define XILINX_DPDMA_DESC_ADDR_EXT_NEXT_ADDR_MASK GENMASK(15, 0)
+#define XILINX_DPDMA_DESC_ADDR_EXT_SRC_ADDR_MASK GENMASK(31, 16)
+
+#define XILINX_DPDMA_ALIGN_BYTES 256
+#define XILINX_DPDMA_LINESIZE_ALIGN_BITS 128
+
+#define XILINX_DPDMA_NUM_CHAN 6
+
+struct xilinx_dpdma_chan;
+
+/**
+ * struct xilinx_dpdma_hw_desc - DPDMA hardware descriptor
+ * @control: control configuration field
+ * @desc_id: descriptor ID
+ * @xfer_size: transfer size
+ * @hsize_stride: horizontal size and stride
+ * @timestamp_lsb: LSB of time stamp
+ * @timestamp_msb: MSB of time stamp
+ * @addr_ext: upper 16 bit of 48 bit address (next_desc and src_addr)
+ * @next_desc: next descriptor 32 bit address
+ * @src_addr: payload source address (1st page, 32 LSB)
+ * @addr_ext_23: payload source address (3nd and 3rd pages, 16 LSBs)
+ * @addr_ext_45: payload source address (4th and 5th pages, 16 LSBs)
+ * @src_addr2: payload source address (2nd page, 32 LSB)
+ * @src_addr3: payload source address (3rd page, 32 LSB)
+ * @src_addr4: payload source address (4th page, 32 LSB)
+ * @src_addr5: payload source address (5th page, 32 LSB)
+ * @crc: descriptor CRC
+ */
+struct xilinx_dpdma_hw_desc {
+ u32 control;
+ u32 desc_id;
+ u32 xfer_size;
+ u32 hsize_stride;
+ u32 timestamp_lsb;
+ u32 timestamp_msb;
+ u32 addr_ext;
+ u32 next_desc;
+ u32 src_addr;
+ u32 addr_ext_23;
+ u32 addr_ext_45;
+ u32 src_addr2;
+ u32 src_addr3;
+ u32 src_addr4;
+ u32 src_addr5;
+ u32 crc;
+} __aligned(XILINX_DPDMA_ALIGN_BYTES);
+
+/**
+ * struct xilinx_dpdma_sw_desc - DPDMA software descriptor
+ * @hw: DPDMA hardware descriptor
+ * @node: list node for software descriptors
+ * @dma_addr: DMA address of the software descriptor
+ */
+struct xilinx_dpdma_sw_desc {
+ struct xilinx_dpdma_hw_desc hw;
+ struct list_head node;
+ dma_addr_t dma_addr;
+};
+
+/**
+ * struct xilinx_dpdma_tx_desc - DPDMA transaction descriptor
+ * @vdesc: virtual DMA descriptor
+ * @chan: DMA channel
+ * @descriptors: list of software descriptors
+ * @error: an error has been detected with this descriptor
+ */
+struct xilinx_dpdma_tx_desc {
+ struct virt_dma_desc vdesc;
+ struct xilinx_dpdma_chan *chan;
+ struct list_head descriptors;
+ bool error;
+};
+
+#define to_dpdma_tx_desc(_desc) \
+ container_of(_desc, struct xilinx_dpdma_tx_desc, vdesc)
+
+/**
+ * struct xilinx_dpdma_chan - DPDMA channel
+ * @vchan: virtual DMA channel
+ * @reg: register base address
+ * @id: channel ID
+ * @wait_to_stop: queue to wait for outstanding transacitons before stopping
+ * @running: true if the channel is running
+ * @first_frame: flag for the first frame of stream
+ * @video_group: flag if multi-channel operation is needed for video channels
+ * @lock: lock to access struct xilinx_dpdma_chan
+ * @desc_pool: descriptor allocation pool
+ * @err_task: error IRQ bottom half handler
+ * @desc: References to descriptors being processed
+ * @desc.pending: Descriptor schedule to the hardware, pending execution
+ * @desc.active: Descriptor being executed by the hardware
+ * @xdev: DPDMA device
+ */
+struct xilinx_dpdma_chan {
+ struct virt_dma_chan vchan;
+ void __iomem *reg;
+ unsigned int id;
+
+ wait_queue_head_t wait_to_stop;
+ bool running;
+ bool first_frame;
+ bool video_group;
+
+ spinlock_t lock; /* lock to access struct xilinx_dpdma_chan */
+ struct dma_pool *desc_pool;
+ struct tasklet_struct err_task;
+
+ struct {
+ struct xilinx_dpdma_tx_desc *pending;
+ struct xilinx_dpdma_tx_desc *active;
+ } desc;
+
+ struct xilinx_dpdma_device *xdev;
+};
+
+#define to_xilinx_chan(_chan) \
+ container_of(_chan, struct xilinx_dpdma_chan, vchan.chan)
+
+/**
+ * struct xilinx_dpdma_device - DPDMA device
+ * @common: generic dma device structure
+ * @reg: register base address
+ * @dev: generic device structure
+ * @irq: the interrupt number
+ * @axi_clk: axi clock
+ * @chan: DPDMA channels
+ * @ext_addr: flag for 64 bit system (48 bit addressing)
+ */
+struct xilinx_dpdma_device {
+ struct dma_device common;
+ void __iomem *reg;
+ struct device *dev;
+ int irq;
+
+ struct clk *axi_clk;
+ struct xilinx_dpdma_chan *chan[XILINX_DPDMA_NUM_CHAN];
+
+ bool ext_addr;
+};
+
+/* -----------------------------------------------------------------------------
+ * DebugFS
+ */
+#define XILINX_DPDMA_DEBUGFS_READ_MAX_SIZE 32
+#define XILINX_DPDMA_DEBUGFS_UINT16_MAX_STR "65535"
+
+/* Match xilinx_dpdma_testcases vs dpdma_debugfs_reqs[] entry */
+enum xilinx_dpdma_testcases {
+ DPDMA_TC_INTR_DONE,
+ DPDMA_TC_NONE
+};
+
+struct xilinx_dpdma_debugfs {
+ enum xilinx_dpdma_testcases testcase;
+ u16 xilinx_dpdma_irq_done_count;
+ unsigned int chan_id;
+};
+
+static struct xilinx_dpdma_debugfs dpdma_debugfs;
+struct xilinx_dpdma_debugfs_request {
+ const char *name;
+ enum xilinx_dpdma_testcases tc;
+ ssize_t (*read)(char *buf);
+ int (*write)(char *args);
+};
+
+static void xilinx_dpdma_debugfs_desc_done_irq(struct xilinx_dpdma_chan *chan)
+{
+ if (IS_ENABLED(CONFIG_DEBUG_FS) && chan->id == dpdma_debugfs.chan_id)
+ dpdma_debugfs.xilinx_dpdma_irq_done_count++;
+}
+
+static ssize_t xilinx_dpdma_debugfs_desc_done_irq_read(char *buf)
+{
+ size_t out_str_len;
+
+ dpdma_debugfs.testcase = DPDMA_TC_NONE;
+
+ out_str_len = strlen(XILINX_DPDMA_DEBUGFS_UINT16_MAX_STR);
+ out_str_len = min_t(size_t, XILINX_DPDMA_DEBUGFS_READ_MAX_SIZE,
+ out_str_len);
+ snprintf(buf, out_str_len, "%d",
+ dpdma_debugfs.xilinx_dpdma_irq_done_count);
+
+ return 0;
+}
+
+static int xilinx_dpdma_debugfs_desc_done_irq_write(char *args)
+{
+ char *arg;
+ int ret;
+ u32 id;
+
+ arg = strsep(&args, " ");
+ if (!arg || strncasecmp(arg, "start", 5))
+ return -EINVAL;
+
+ arg = strsep(&args, " ");
+ if (!arg)
+ return -EINVAL;
+
+ ret = kstrtou32(arg, 0, &id);
+ if (ret < 0)
+ return ret;
+
+ if (id < ZYNQMP_DPDMA_VIDEO0 || id > ZYNQMP_DPDMA_AUDIO1)
+ return -EINVAL;
+
+ dpdma_debugfs.testcase = DPDMA_TC_INTR_DONE;
+ dpdma_debugfs.xilinx_dpdma_irq_done_count = 0;
+ dpdma_debugfs.chan_id = id;
+
+ return 0;
+}
+
+/* Match xilinx_dpdma_testcases vs dpdma_debugfs_reqs[] entry */
+static struct xilinx_dpdma_debugfs_request dpdma_debugfs_reqs[] = {
+ {
+ .name = "DESCRIPTOR_DONE_INTR",
+ .tc = DPDMA_TC_INTR_DONE,
+ .read = xilinx_dpdma_debugfs_desc_done_irq_read,
+ .write = xilinx_dpdma_debugfs_desc_done_irq_write,
+ },
+};
+
+static ssize_t xilinx_dpdma_debugfs_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ enum xilinx_dpdma_testcases testcase;
+ char *kern_buff;
+ int ret = 0;
+
+ if (*pos != 0 || size <= 0)
+ return -EINVAL;
+
+ kern_buff = kzalloc(XILINX_DPDMA_DEBUGFS_READ_MAX_SIZE, GFP_KERNEL);
+ if (!kern_buff) {
+ dpdma_debugfs.testcase = DPDMA_TC_NONE;
+ return -ENOMEM;
+ }
+
+ testcase = READ_ONCE(dpdma_debugfs.testcase);
+ if (testcase != DPDMA_TC_NONE) {
+ ret = dpdma_debugfs_reqs[testcase].read(kern_buff);
+ if (ret < 0)
+ goto done;
+ } else {
+ strscpy(kern_buff, "No testcase executed",
+ XILINX_DPDMA_DEBUGFS_READ_MAX_SIZE);
+ }
+
+ size = min(size, strlen(kern_buff));
+ if (copy_to_user(buf, kern_buff, size))
+ ret = -EFAULT;
+
+done:
+ kfree(kern_buff);
+ if (ret)
+ return ret;
+
+ *pos = size + 1;
+ return size;
+}
+
+static ssize_t xilinx_dpdma_debugfs_write(struct file *f,
+ const char __user *buf, size_t size,
+ loff_t *pos)
+{
+ char *kern_buff, *kern_buff_start;
+ char *testcase;
+ unsigned int i;
+ int ret;
+
+ if (*pos != 0 || size <= 0)
+ return -EINVAL;
+
+ /* Supporting single instance of test as of now. */
+ if (dpdma_debugfs.testcase != DPDMA_TC_NONE)
+ return -EBUSY;
+
+ kern_buff = kzalloc(size, GFP_KERNEL);
+ if (!kern_buff)
+ return -ENOMEM;
+ kern_buff_start = kern_buff;
+
+ ret = strncpy_from_user(kern_buff, buf, size);
+ if (ret < 0)
+ goto done;
+
+ /* Read the testcase name from a user request. */
+ testcase = strsep(&kern_buff, " ");
+
+ for (i = 0; i < ARRAY_SIZE(dpdma_debugfs_reqs); i++) {
+ if (!strcasecmp(testcase, dpdma_debugfs_reqs[i].name))
+ break;
+ }
+
+ if (i == ARRAY_SIZE(dpdma_debugfs_reqs)) {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ ret = dpdma_debugfs_reqs[i].write(kern_buff);
+ if (ret < 0)
+ goto done;
+
+ ret = size;
+
+done:
+ kfree(kern_buff_start);
+ return ret;
+}
+
+static const struct file_operations fops_xilinx_dpdma_dbgfs = {
+ .owner = THIS_MODULE,
+ .read = xilinx_dpdma_debugfs_read,
+ .write = xilinx_dpdma_debugfs_write,
+};
+
+static void xilinx_dpdma_debugfs_init(struct xilinx_dpdma_device *xdev)
+{
+ struct dentry *dent;
+
+ dpdma_debugfs.testcase = DPDMA_TC_NONE;
+
+ dent = debugfs_create_file("testcase", 0444, xdev->common.dbg_dev_root,
+ NULL, &fops_xilinx_dpdma_dbgfs);
+ if (IS_ERR(dent))
+ dev_err(xdev->dev, "Failed to create debugfs testcase file\n");
+}
+
+/* -----------------------------------------------------------------------------
+ * I/O Accessors
+ */
+
+static inline u32 dpdma_read(void __iomem *base, u32 offset)
+{
+ return ioread32(base + offset);
+}
+
+static inline void dpdma_write(void __iomem *base, u32 offset, u32 val)
+{
+ iowrite32(val, base + offset);
+}
+
+static inline void dpdma_clr(void __iomem *base, u32 offset, u32 clr)
+{
+ dpdma_write(base, offset, dpdma_read(base, offset) & ~clr);
+}
+
+static inline void dpdma_set(void __iomem *base, u32 offset, u32 set)
+{
+ dpdma_write(base, offset, dpdma_read(base, offset) | set);
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptor Operations
+ */
+
+/**
+ * xilinx_dpdma_sw_desc_set_dma_addrs - Set DMA addresses in the descriptor
+ * @xdev: DPDMA device
+ * @sw_desc: The software descriptor in which to set DMA addresses
+ * @prev: The previous descriptor
+ * @dma_addr: array of dma addresses
+ * @num_src_addr: number of addresses in @dma_addr
+ *
+ * Set all the DMA addresses in the hardware descriptor corresponding to @dev
+ * from @dma_addr. If a previous descriptor is specified in @prev, its next
+ * descriptor DMA address is set to the DMA address of @sw_desc. @prev may be
+ * identical to @sw_desc for cyclic transfers.
+ */
+static void xilinx_dpdma_sw_desc_set_dma_addrs(struct xilinx_dpdma_device *xdev,
+ struct xilinx_dpdma_sw_desc *sw_desc,
+ struct xilinx_dpdma_sw_desc *prev,
+ dma_addr_t dma_addr[],
+ unsigned int num_src_addr)
+{
+ struct xilinx_dpdma_hw_desc *hw_desc = &sw_desc->hw;
+ unsigned int i;
+
+ hw_desc->src_addr = lower_32_bits(dma_addr[0]);
+ if (xdev->ext_addr)
+ hw_desc->addr_ext |=
+ FIELD_PREP(XILINX_DPDMA_DESC_ADDR_EXT_SRC_ADDR_MASK,
+ upper_32_bits(dma_addr[0]));
+
+ for (i = 1; i < num_src_addr; i++) {
+ u32 *addr = &hw_desc->src_addr2;
+
+ addr[i - 1] = lower_32_bits(dma_addr[i]);
+
+ if (xdev->ext_addr) {
+ u32 *addr_ext = &hw_desc->addr_ext_23;
+ u32 addr_msb;
+
+ addr_msb = upper_32_bits(dma_addr[i]) & GENMASK(15, 0);
+ addr_msb <<= 16 * ((i - 1) % 2);
+ addr_ext[(i - 1) / 2] |= addr_msb;
+ }
+ }
+
+ if (!prev)
+ return;
+
+ prev->hw.next_desc = lower_32_bits(sw_desc->dma_addr);
+ if (xdev->ext_addr)
+ prev->hw.addr_ext |=
+ FIELD_PREP(XILINX_DPDMA_DESC_ADDR_EXT_NEXT_ADDR_MASK,
+ upper_32_bits(sw_desc->dma_addr));
+}
+
+/**
+ * xilinx_dpdma_chan_alloc_sw_desc - Allocate a software descriptor
+ * @chan: DPDMA channel
+ *
+ * Allocate a software descriptor from the channel's descriptor pool.
+ *
+ * Return: a software descriptor or NULL.
+ */
+static struct xilinx_dpdma_sw_desc *
+xilinx_dpdma_chan_alloc_sw_desc(struct xilinx_dpdma_chan *chan)
+{
+ struct xilinx_dpdma_sw_desc *sw_desc;
+ dma_addr_t dma_addr;
+
+ sw_desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &dma_addr);
+ if (!sw_desc)
+ return NULL;
+
+ sw_desc->dma_addr = dma_addr;
+
+ return sw_desc;
+}
+
+/**
+ * xilinx_dpdma_chan_free_sw_desc - Free a software descriptor
+ * @chan: DPDMA channel
+ * @sw_desc: software descriptor to free
+ *
+ * Free a software descriptor from the channel's descriptor pool.
+ */
+static void
+xilinx_dpdma_chan_free_sw_desc(struct xilinx_dpdma_chan *chan,
+ struct xilinx_dpdma_sw_desc *sw_desc)
+{
+ dma_pool_free(chan->desc_pool, sw_desc, sw_desc->dma_addr);
+}
+
+/**
+ * xilinx_dpdma_chan_dump_tx_desc - Dump a tx descriptor
+ * @chan: DPDMA channel
+ * @tx_desc: tx descriptor to dump
+ *
+ * Dump contents of a tx descriptor
+ */
+static void xilinx_dpdma_chan_dump_tx_desc(struct xilinx_dpdma_chan *chan,
+ struct xilinx_dpdma_tx_desc *tx_desc)
+{
+ struct xilinx_dpdma_sw_desc *sw_desc;
+ struct device *dev = chan->xdev->dev;
+ unsigned int i = 0;
+
+ dev_dbg(dev, "------- TX descriptor dump start -------\n");
+ dev_dbg(dev, "------- channel ID = %d -------\n", chan->id);
+
+ list_for_each_entry(sw_desc, &tx_desc->descriptors, node) {
+ struct xilinx_dpdma_hw_desc *hw_desc = &sw_desc->hw;
+
+ dev_dbg(dev, "------- HW descriptor %d -------\n", i++);
+ dev_dbg(dev, "descriptor DMA addr: %pad\n", &sw_desc->dma_addr);
+ dev_dbg(dev, "control: 0x%08x\n", hw_desc->control);
+ dev_dbg(dev, "desc_id: 0x%08x\n", hw_desc->desc_id);
+ dev_dbg(dev, "xfer_size: 0x%08x\n", hw_desc->xfer_size);
+ dev_dbg(dev, "hsize_stride: 0x%08x\n", hw_desc->hsize_stride);
+ dev_dbg(dev, "timestamp_lsb: 0x%08x\n", hw_desc->timestamp_lsb);
+ dev_dbg(dev, "timestamp_msb: 0x%08x\n", hw_desc->timestamp_msb);
+ dev_dbg(dev, "addr_ext: 0x%08x\n", hw_desc->addr_ext);
+ dev_dbg(dev, "next_desc: 0x%08x\n", hw_desc->next_desc);
+ dev_dbg(dev, "src_addr: 0x%08x\n", hw_desc->src_addr);
+ dev_dbg(dev, "addr_ext_23: 0x%08x\n", hw_desc->addr_ext_23);
+ dev_dbg(dev, "addr_ext_45: 0x%08x\n", hw_desc->addr_ext_45);
+ dev_dbg(dev, "src_addr2: 0x%08x\n", hw_desc->src_addr2);
+ dev_dbg(dev, "src_addr3: 0x%08x\n", hw_desc->src_addr3);
+ dev_dbg(dev, "src_addr4: 0x%08x\n", hw_desc->src_addr4);
+ dev_dbg(dev, "src_addr5: 0x%08x\n", hw_desc->src_addr5);
+ dev_dbg(dev, "crc: 0x%08x\n", hw_desc->crc);
+ }
+
+ dev_dbg(dev, "------- TX descriptor dump end -------\n");
+}
+
+/**
+ * xilinx_dpdma_chan_alloc_tx_desc - Allocate a transaction descriptor
+ * @chan: DPDMA channel
+ *
+ * Allocate a tx descriptor.
+ *
+ * Return: a tx descriptor or NULL.
+ */
+static struct xilinx_dpdma_tx_desc *
+xilinx_dpdma_chan_alloc_tx_desc(struct xilinx_dpdma_chan *chan)
+{
+ struct xilinx_dpdma_tx_desc *tx_desc;
+
+ tx_desc = kzalloc(sizeof(*tx_desc), GFP_NOWAIT);
+ if (!tx_desc)
+ return NULL;
+
+ INIT_LIST_HEAD(&tx_desc->descriptors);
+ tx_desc->chan = chan;
+ tx_desc->error = false;
+
+ return tx_desc;
+}
+
+/**
+ * xilinx_dpdma_chan_free_tx_desc - Free a virtual DMA descriptor
+ * @vdesc: virtual DMA descriptor
+ *
+ * Free the virtual DMA descriptor @vdesc including its software descriptors.
+ */
+static void xilinx_dpdma_chan_free_tx_desc(struct virt_dma_desc *vdesc)
+{
+ struct xilinx_dpdma_sw_desc *sw_desc, *next;
+ struct xilinx_dpdma_tx_desc *desc;
+
+ if (!vdesc)
+ return;
+
+ desc = to_dpdma_tx_desc(vdesc);
+
+ list_for_each_entry_safe(sw_desc, next, &desc->descriptors, node) {
+ list_del(&sw_desc->node);
+ xilinx_dpdma_chan_free_sw_desc(desc->chan, sw_desc);
+ }
+
+ kfree(desc);
+}
+
+/**
+ * xilinx_dpdma_chan_prep_interleaved_dma - Prepare an interleaved dma
+ * descriptor
+ * @chan: DPDMA channel
+ * @xt: dma interleaved template
+ *
+ * Prepare a tx descriptor including internal software/hardware descriptors
+ * based on @xt.
+ *
+ * Return: A DPDMA TX descriptor on success, or NULL.
+ */
+static struct xilinx_dpdma_tx_desc *
+xilinx_dpdma_chan_prep_interleaved_dma(struct xilinx_dpdma_chan *chan,
+ struct dma_interleaved_template *xt)
+{
+ struct xilinx_dpdma_tx_desc *tx_desc;
+ struct xilinx_dpdma_sw_desc *sw_desc;
+ struct xilinx_dpdma_hw_desc *hw_desc;
+ size_t hsize = xt->sgl[0].size;
+ size_t stride = hsize + xt->sgl[0].icg;
+
+ if (!IS_ALIGNED(xt->src_start, XILINX_DPDMA_ALIGN_BYTES)) {
+ dev_err(chan->xdev->dev,
+ "chan%u: buffer should be aligned at %d B\n",
+ chan->id, XILINX_DPDMA_ALIGN_BYTES);
+ return NULL;
+ }
+
+ tx_desc = xilinx_dpdma_chan_alloc_tx_desc(chan);
+ if (!tx_desc)
+ return NULL;
+
+ sw_desc = xilinx_dpdma_chan_alloc_sw_desc(chan);
+ if (!sw_desc) {
+ xilinx_dpdma_chan_free_tx_desc(&tx_desc->vdesc);
+ return NULL;
+ }
+
+ xilinx_dpdma_sw_desc_set_dma_addrs(chan->xdev, sw_desc, sw_desc,
+ &xt->src_start, 1);
+
+ hw_desc = &sw_desc->hw;
+ hsize = ALIGN(hsize, XILINX_DPDMA_LINESIZE_ALIGN_BITS / 8);
+ hw_desc->xfer_size = hsize * xt->numf;
+ hw_desc->hsize_stride =
+ FIELD_PREP(XILINX_DPDMA_DESC_HSIZE_STRIDE_HSIZE_MASK, hsize) |
+ FIELD_PREP(XILINX_DPDMA_DESC_HSIZE_STRIDE_STRIDE_MASK,
+ stride / 16);
+ hw_desc->control |= XILINX_DPDMA_DESC_CONTROL_PREEMBLE;
+ hw_desc->control |= XILINX_DPDMA_DESC_CONTROL_COMPLETE_INTR;
+ hw_desc->control |= XILINX_DPDMA_DESC_CONTROL_IGNORE_DONE;
+ hw_desc->control |= XILINX_DPDMA_DESC_CONTROL_LAST_OF_FRAME;
+
+ list_add_tail(&sw_desc->node, &tx_desc->descriptors);
+
+ return tx_desc;
+}
+
+/* -----------------------------------------------------------------------------
+ * DPDMA Channel Operations
+ */
+
+/**
+ * xilinx_dpdma_chan_enable - Enable the channel
+ * @chan: DPDMA channel
+ *
+ * Enable the channel and its interrupts. Set the QoS values for video class.
+ */
+static void xilinx_dpdma_chan_enable(struct xilinx_dpdma_chan *chan)
+{
+ u32 reg;
+
+ reg = (XILINX_DPDMA_INTR_CHAN_MASK << chan->id)
+ | XILINX_DPDMA_INTR_GLOBAL_MASK;
+ dpdma_write(chan->xdev->reg, XILINX_DPDMA_IEN, reg);
+ reg = (XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id)
+ | XILINX_DPDMA_INTR_GLOBAL_ERR;
+ dpdma_write(chan->xdev->reg, XILINX_DPDMA_EIEN, reg);
+
+ reg = XILINX_DPDMA_CH_CNTL_ENABLE
+ | FIELD_PREP(XILINX_DPDMA_CH_CNTL_QOS_DSCR_WR_MASK,
+ XILINX_DPDMA_CH_CNTL_QOS_VID_CLASS)
+ | FIELD_PREP(XILINX_DPDMA_CH_CNTL_QOS_DSCR_RD_MASK,
+ XILINX_DPDMA_CH_CNTL_QOS_VID_CLASS)
+ | FIELD_PREP(XILINX_DPDMA_CH_CNTL_QOS_DATA_RD_MASK,
+ XILINX_DPDMA_CH_CNTL_QOS_VID_CLASS);
+ dpdma_set(chan->reg, XILINX_DPDMA_CH_CNTL, reg);
+}
+
+/**
+ * xilinx_dpdma_chan_disable - Disable the channel
+ * @chan: DPDMA channel
+ *
+ * Disable the channel and its interrupts.
+ */
+static void xilinx_dpdma_chan_disable(struct xilinx_dpdma_chan *chan)
+{
+ u32 reg;
+
+ reg = XILINX_DPDMA_INTR_CHAN_MASK << chan->id;
+ dpdma_write(chan->xdev->reg, XILINX_DPDMA_IEN, reg);
+ reg = XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id;
+ dpdma_write(chan->xdev->reg, XILINX_DPDMA_EIEN, reg);
+
+ dpdma_clr(chan->reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_ENABLE);
+}
+
+/**
+ * xilinx_dpdma_chan_pause - Pause the channel
+ * @chan: DPDMA channel
+ *
+ * Pause the channel.
+ */
+static void xilinx_dpdma_chan_pause(struct xilinx_dpdma_chan *chan)
+{
+ dpdma_set(chan->reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_PAUSE);
+}
+
+/**
+ * xilinx_dpdma_chan_unpause - Unpause the channel
+ * @chan: DPDMA channel
+ *
+ * Unpause the channel.
+ */
+static void xilinx_dpdma_chan_unpause(struct xilinx_dpdma_chan *chan)
+{
+ dpdma_clr(chan->reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_PAUSE);
+}
+
+static u32 xilinx_dpdma_chan_video_group_ready(struct xilinx_dpdma_chan *chan)
+{
+ struct xilinx_dpdma_device *xdev = chan->xdev;
+ u32 channels = 0;
+ unsigned int i;
+
+ for (i = ZYNQMP_DPDMA_VIDEO0; i <= ZYNQMP_DPDMA_VIDEO2; i++) {
+ if (xdev->chan[i]->video_group && !xdev->chan[i]->running)
+ return 0;
+
+ if (xdev->chan[i]->video_group)
+ channels |= BIT(i);
+ }
+
+ return channels;
+}
+
+/**
+ * xilinx_dpdma_chan_queue_transfer - Queue the next transfer
+ * @chan: DPDMA channel
+ *
+ * Queue the next descriptor, if any, to the hardware. If the channel is
+ * stopped, start it first. Otherwise retrigger it with the next descriptor.
+ */
+static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan)
+{
+ struct xilinx_dpdma_device *xdev = chan->xdev;
+ struct xilinx_dpdma_sw_desc *sw_desc;
+ struct xilinx_dpdma_tx_desc *desc;
+ struct virt_dma_desc *vdesc;
+ u32 reg, channels;
+ bool first_frame;
+
+ lockdep_assert_held(&chan->lock);
+
+ if (chan->desc.pending)
+ return;
+
+ if (!chan->running) {
+ xilinx_dpdma_chan_unpause(chan);
+ xilinx_dpdma_chan_enable(chan);
+ chan->first_frame = true;
+ chan->running = true;
+ }
+
+ vdesc = vchan_next_desc(&chan->vchan);
+ if (!vdesc)
+ return;
+
+ desc = to_dpdma_tx_desc(vdesc);
+ chan->desc.pending = desc;
+ list_del(&desc->vdesc.node);
+
+ /*
+ * Assign the cookie to descriptors in this transaction. Only 16 bit
+ * will be used, but it should be enough.
+ */
+ list_for_each_entry(sw_desc, &desc->descriptors, node)
+ sw_desc->hw.desc_id = desc->vdesc.tx.cookie
+ & XILINX_DPDMA_CH_DESC_ID_MASK;
+
+ sw_desc = list_first_entry(&desc->descriptors,
+ struct xilinx_dpdma_sw_desc, node);
+ dpdma_write(chan->reg, XILINX_DPDMA_CH_DESC_START_ADDR,
+ lower_32_bits(sw_desc->dma_addr));
+ if (xdev->ext_addr)
+ dpdma_write(chan->reg, XILINX_DPDMA_CH_DESC_START_ADDRE,
+ FIELD_PREP(XILINX_DPDMA_CH_DESC_START_ADDRE_MASK,
+ upper_32_bits(sw_desc->dma_addr)));
+
+ first_frame = chan->first_frame;
+ chan->first_frame = false;
+
+ if (chan->video_group) {
+ channels = xilinx_dpdma_chan_video_group_ready(chan);
+ /*
+ * Trigger the transfer only when all channels in the group are
+ * ready.
+ */
+ if (!channels)
+ return;
+ } else {
+ channels = BIT(chan->id);
+ }
+
+ if (first_frame)
+ reg = XILINX_DPDMA_GBL_TRIG_MASK(channels);
+ else
+ reg = XILINX_DPDMA_GBL_RETRIG_MASK(channels);
+
+ dpdma_write(xdev->reg, XILINX_DPDMA_GBL, reg);
+}
+
+/**
+ * xilinx_dpdma_chan_ostand - Number of outstanding transactions
+ * @chan: DPDMA channel
+ *
+ * Read and return the number of outstanding transactions from register.
+ *
+ * Return: Number of outstanding transactions from the status register.
+ */
+static u32 xilinx_dpdma_chan_ostand(struct xilinx_dpdma_chan *chan)
+{
+ return FIELD_GET(XILINX_DPDMA_CH_STATUS_OTRAN_CNT_MASK,
+ dpdma_read(chan->reg, XILINX_DPDMA_CH_STATUS));
+}
+
+/**
+ * xilinx_dpdma_chan_notify_no_ostand - Notify no outstanding transaction event
+ * @chan: DPDMA channel
+ *
+ * Notify waiters for no outstanding event, so waiters can stop the channel
+ * safely. This function is supposed to be called when 'no outstanding'
+ * interrupt is generated. The 'no outstanding' interrupt is disabled and
+ * should be re-enabled when this event is handled. If the channel status
+ * register still shows some number of outstanding transactions, the interrupt
+ * remains enabled.
+ *
+ * Return: 0 on success. On failure, -EWOULDBLOCK if there's still outstanding
+ * transaction(s).
+ */
+static int xilinx_dpdma_chan_notify_no_ostand(struct xilinx_dpdma_chan *chan)
+{
+ u32 cnt;
+
+ cnt = xilinx_dpdma_chan_ostand(chan);
+ if (cnt) {
+ dev_dbg(chan->xdev->dev,
+ "chan%u: %d outstanding transactions\n",
+ chan->id, cnt);
+ return -EWOULDBLOCK;
+ }
+
+ /* Disable 'no outstanding' interrupt */
+ dpdma_write(chan->xdev->reg, XILINX_DPDMA_IDS,
+ XILINX_DPDMA_INTR_NO_OSTAND(chan->id));
+ wake_up(&chan->wait_to_stop);
+
+ return 0;
+}
+
+/**
+ * xilinx_dpdma_chan_wait_no_ostand - Wait for the no outstanding irq
+ * @chan: DPDMA channel
+ *
+ * Wait for the no outstanding transaction interrupt. This functions can sleep
+ * for 50ms.
+ *
+ * Return: 0 on success. On failure, -ETIMEOUT for time out, or the error code
+ * from wait_event_interruptible_timeout().
+ */
+static int xilinx_dpdma_chan_wait_no_ostand(struct xilinx_dpdma_chan *chan)
+{
+ int ret;
+
+ /* Wait for a no outstanding transaction interrupt upto 50msec */
+ ret = wait_event_interruptible_timeout(chan->wait_to_stop,
+ !xilinx_dpdma_chan_ostand(chan),
+ msecs_to_jiffies(50));
+ if (ret > 0) {
+ dpdma_write(chan->xdev->reg, XILINX_DPDMA_IEN,
+ XILINX_DPDMA_INTR_NO_OSTAND(chan->id));
+ return 0;
+ }
+
+ dev_err(chan->xdev->dev, "chan%u: not ready to stop: %d trans\n",
+ chan->id, xilinx_dpdma_chan_ostand(chan));
+
+ if (ret == 0)
+ return -ETIMEDOUT;
+
+ return ret;
+}
+
+/**
+ * xilinx_dpdma_chan_poll_no_ostand - Poll the outstanding transaction status
+ * @chan: DPDMA channel
+ *
+ * Poll the outstanding transaction status, and return when there's no
+ * outstanding transaction. This functions can be used in the interrupt context
+ * or where the atomicity is required. Calling thread may wait more than 50ms.
+ *
+ * Return: 0 on success, or -ETIMEDOUT.
+ */
+static int xilinx_dpdma_chan_poll_no_ostand(struct xilinx_dpdma_chan *chan)
+{
+ u32 cnt, loop = 50000;
+
+ /* Poll at least for 50ms (20 fps). */
+ do {
+ cnt = xilinx_dpdma_chan_ostand(chan);
+ udelay(1);
+ } while (loop-- > 0 && cnt);
+
+ if (loop) {
+ dpdma_write(chan->xdev->reg, XILINX_DPDMA_IEN,
+ XILINX_DPDMA_INTR_NO_OSTAND(chan->id));
+ return 0;
+ }
+
+ dev_err(chan->xdev->dev, "chan%u: not ready to stop: %d trans\n",
+ chan->id, xilinx_dpdma_chan_ostand(chan));
+
+ return -ETIMEDOUT;
+}
+
+/**
+ * xilinx_dpdma_chan_stop - Stop the channel
+ * @chan: DPDMA channel
+ *
+ * Stop a previously paused channel by first waiting for completion of all
+ * outstanding transaction and then disabling the channel.
+ *
+ * Return: 0 on success, or -ETIMEDOUT if the channel failed to stop.
+ */
+static int xilinx_dpdma_chan_stop(struct xilinx_dpdma_chan *chan)
+{
+ unsigned long flags;
+ int ret;
+
+ ret = xilinx_dpdma_chan_wait_no_ostand(chan);
+ if (ret)
+ return ret;
+
+ spin_lock_irqsave(&chan->lock, flags);
+ xilinx_dpdma_chan_disable(chan);
+ chan->running = false;
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ return 0;
+}
+
+/**
+ * xilinx_dpdma_chan_done_irq - Handle hardware descriptor completion
+ * @chan: DPDMA channel
+ *
+ * Handle completion of the currently active descriptor (@chan->desc.active). As
+ * we currently support cyclic transfers only, this just invokes the cyclic
+ * callback. The descriptor will be completed at the VSYNC interrupt when a new
+ * descriptor replaces it.
+ */
+static void xilinx_dpdma_chan_done_irq(struct xilinx_dpdma_chan *chan)
+{
+ struct xilinx_dpdma_tx_desc *active;
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->lock, flags);
+
+ xilinx_dpdma_debugfs_desc_done_irq(chan);
+
+ active = chan->desc.active;
+ if (active)
+ vchan_cyclic_callback(&active->vdesc);
+ else
+ dev_warn(chan->xdev->dev,
+ "chan%u: DONE IRQ with no active descriptor!\n",
+ chan->id);
+
+ spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_dpdma_chan_vsync_irq - Handle hardware descriptor scheduling
+ * @chan: DPDMA channel
+ *
+ * At VSYNC the active descriptor may have been replaced by the pending
+ * descriptor. Detect this through the DESC_ID and perform appropriate
+ * bookkeeping.
+ */
+static void xilinx_dpdma_chan_vsync_irq(struct xilinx_dpdma_chan *chan)
+{
+ struct xilinx_dpdma_tx_desc *pending;
+ struct xilinx_dpdma_sw_desc *sw_desc;
+ unsigned long flags;
+ u32 desc_id;
+
+ spin_lock_irqsave(&chan->lock, flags);
+
+ pending = chan->desc.pending;
+ if (!chan->running || !pending)
+ goto out;
+
+ desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID)
+ & XILINX_DPDMA_CH_DESC_ID_MASK;
+
+ /* If the retrigger raced with vsync, retry at the next frame. */
+ sw_desc = list_first_entry(&pending->descriptors,
+ struct xilinx_dpdma_sw_desc, node);
+ if (sw_desc->hw.desc_id != desc_id) {
+ dev_dbg(chan->xdev->dev,
+ "chan%u: vsync race lost (%u != %u), retrying\n",
+ chan->id, sw_desc->hw.desc_id, desc_id);
+ goto out;
+ }
+
+ /*
+ * Complete the active descriptor, if any, promote the pending
+ * descriptor to active, and queue the next transfer, if any.
+ */
+ if (chan->desc.active)
+ vchan_cookie_complete(&chan->desc.active->vdesc);
+ chan->desc.active = pending;
+ chan->desc.pending = NULL;
+
+ xilinx_dpdma_chan_queue_transfer(chan);
+
+out:
+ spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_dpdma_chan_err - Detect any channel error
+ * @chan: DPDMA channel
+ * @isr: masked Interrupt Status Register
+ * @eisr: Error Interrupt Status Register
+ *
+ * Return: true if any channel error occurs, or false otherwise.
+ */
+static bool
+xilinx_dpdma_chan_err(struct xilinx_dpdma_chan *chan, u32 isr, u32 eisr)
+{
+ if (!chan)
+ return false;
+
+ if (chan->running &&
+ ((isr & (XILINX_DPDMA_INTR_CHAN_ERR_MASK << chan->id)) ||
+ (eisr & (XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id))))
+ return true;
+
+ return false;
+}
+
+/**
+ * xilinx_dpdma_chan_handle_err - DPDMA channel error handling
+ * @chan: DPDMA channel
+ *
+ * This function is called when any channel error or any global error occurs.
+ * The function disables the paused channel by errors and determines
+ * if the current active descriptor can be rescheduled depending on
+ * the descriptor status.
+ */
+static void xilinx_dpdma_chan_handle_err(struct xilinx_dpdma_chan *chan)
+{
+ struct xilinx_dpdma_device *xdev = chan->xdev;
+ struct xilinx_dpdma_tx_desc *active;
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->lock, flags);
+
+ dev_dbg(xdev->dev, "chan%u: cur desc addr = 0x%04x%08x\n",
+ chan->id,
+ dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_START_ADDRE),
+ dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_START_ADDR));
+ dev_dbg(xdev->dev, "chan%u: cur payload addr = 0x%04x%08x\n",
+ chan->id,
+ dpdma_read(chan->reg, XILINX_DPDMA_CH_PYLD_CUR_ADDRE),
+ dpdma_read(chan->reg, XILINX_DPDMA_CH_PYLD_CUR_ADDR));
+
+ xilinx_dpdma_chan_disable(chan);
+ chan->running = false;
+
+ if (!chan->desc.active)
+ goto out_unlock;
+
+ active = chan->desc.active;
+ chan->desc.active = NULL;
+
+ xilinx_dpdma_chan_dump_tx_desc(chan, active);
+
+ if (active->error)
+ dev_dbg(xdev->dev, "chan%u: repeated error on desc\n",
+ chan->id);
+
+ /* Reschedule if there's no new descriptor */
+ if (!chan->desc.pending &&
+ list_empty(&chan->vchan.desc_issued)) {
+ active->error = true;
+ list_add_tail(&active->vdesc.node,
+ &chan->vchan.desc_issued);
+ } else {
+ xilinx_dpdma_chan_free_tx_desc(&active->vdesc);
+ }
+
+out_unlock:
+ spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/* -----------------------------------------------------------------------------
+ * DMA Engine Operations
+ */
+
+static struct dma_async_tx_descriptor *
+xilinx_dpdma_prep_interleaved_dma(struct dma_chan *dchan,
+ struct dma_interleaved_template *xt,
+ unsigned long flags)
+{
+ struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
+ struct xilinx_dpdma_tx_desc *desc;
+
+ if (xt->dir != DMA_MEM_TO_DEV)
+ return NULL;
+
+ if (!xt->numf || !xt->sgl[0].size)
+ return NULL;
+
+ if (!(flags & DMA_PREP_REPEAT) || !(flags & DMA_PREP_LOAD_EOT))
+ return NULL;
+
+ desc = xilinx_dpdma_chan_prep_interleaved_dma(chan, xt);
+ if (!desc)
+ return NULL;
+
+ vchan_tx_prep(&chan->vchan, &desc->vdesc, flags | DMA_CTRL_ACK);
+
+ return &desc->vdesc.tx;
+}
+
+/**
+ * xilinx_dpdma_alloc_chan_resources - Allocate resources for the channel
+ * @dchan: DMA channel
+ *
+ * Allocate a descriptor pool for the channel.
+ *
+ * Return: 0 on success, or -ENOMEM if failed to allocate a pool.
+ */
+static int xilinx_dpdma_alloc_chan_resources(struct dma_chan *dchan)
+{
+ struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
+ size_t align = __alignof__(struct xilinx_dpdma_sw_desc);
+
+ chan->desc_pool = dma_pool_create(dev_name(chan->xdev->dev),
+ chan->xdev->dev,
+ sizeof(struct xilinx_dpdma_sw_desc),
+ align, 0);
+ if (!chan->desc_pool) {
+ dev_err(chan->xdev->dev,
+ "chan%u: failed to allocate a descriptor pool\n",
+ chan->id);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * xilinx_dpdma_free_chan_resources - Free all resources for the channel
+ * @dchan: DMA channel
+ *
+ * Free resources associated with the virtual DMA channel, and destroy the
+ * descriptor pool.
+ */
+static void xilinx_dpdma_free_chan_resources(struct dma_chan *dchan)
+{
+ struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
+
+ vchan_free_chan_resources(&chan->vchan);
+
+ dma_pool_destroy(chan->desc_pool);
+ chan->desc_pool = NULL;
+}
+
+static void xilinx_dpdma_issue_pending(struct dma_chan *dchan)
+{
+ struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+ if (vchan_issue_pending(&chan->vchan))
+ xilinx_dpdma_chan_queue_transfer(chan);
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+}
+
+static int xilinx_dpdma_config(struct dma_chan *dchan,
+ struct dma_slave_config *config)
+{
+ struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
+ struct xilinx_dpdma_peripheral_config *pconfig;
+ unsigned long flags;
+
+ /*
+ * The destination address doesn't need to be specified as the DPDMA is
+ * hardwired to the destination (the DP controller). The transfer
+ * width, burst size and port window size are thus meaningless, they're
+ * fixed both on the DPDMA side and on the DP controller side.
+ */
+
+ /*
+ * Use the peripheral_config to indicate that the channel is part
+ * of a video group. This requires matching use of the custom
+ * structure in each driver.
+ */
+ pconfig = config->peripheral_config;
+ if (WARN_ON(pconfig && config->peripheral_size != sizeof(*pconfig)))
+ return -EINVAL;
+
+ spin_lock_irqsave(&chan->lock, flags);
+ if (chan->id <= ZYNQMP_DPDMA_VIDEO2 && pconfig)
+ chan->video_group = pconfig->video_group;
+ spin_unlock_irqrestore(&chan->lock, flags);
+
+ return 0;
+}
+
+static int xilinx_dpdma_pause(struct dma_chan *dchan)
+{
+ xilinx_dpdma_chan_pause(to_xilinx_chan(dchan));
+
+ return 0;
+}
+
+static int xilinx_dpdma_resume(struct dma_chan *dchan)
+{
+ xilinx_dpdma_chan_unpause(to_xilinx_chan(dchan));
+
+ return 0;
+}
+
+/**
+ * xilinx_dpdma_terminate_all - Terminate the channel and descriptors
+ * @dchan: DMA channel
+ *
+ * Pause the channel without waiting for ongoing transfers to complete. Waiting
+ * for completion is performed by xilinx_dpdma_synchronize() that will disable
+ * the channel to complete the stop.
+ *
+ * All the descriptors associated with the channel that are guaranteed not to
+ * be touched by the hardware. The pending and active descriptor are not
+ * touched, and will be freed either upon completion, or by
+ * xilinx_dpdma_synchronize().
+ *
+ * Return: 0 on success, or -ETIMEDOUT if the channel failed to stop.
+ */
+static int xilinx_dpdma_terminate_all(struct dma_chan *dchan)
+{
+ struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
+ struct xilinx_dpdma_device *xdev = chan->xdev;
+ LIST_HEAD(descriptors);
+ unsigned long flags;
+ unsigned int i;
+
+ /* Pause the channel (including the whole video group if applicable). */
+ if (chan->video_group) {
+ for (i = ZYNQMP_DPDMA_VIDEO0; i <= ZYNQMP_DPDMA_VIDEO2; i++) {
+ if (xdev->chan[i]->video_group &&
+ xdev->chan[i]->running) {
+ xilinx_dpdma_chan_pause(xdev->chan[i]);
+ xdev->chan[i]->video_group = false;
+ }
+ }
+ } else {
+ xilinx_dpdma_chan_pause(chan);
+ }
+
+ /* Gather all the descriptors we can free and free them. */
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+ vchan_get_all_descriptors(&chan->vchan, &descriptors);
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+ vchan_dma_desc_free_list(&chan->vchan, &descriptors);
+
+ return 0;
+}
+
+/**
+ * xilinx_dpdma_synchronize - Synchronize callback execution
+ * @dchan: DMA channel
+ *
+ * Synchronizing callback execution ensures that all previously issued
+ * transfers have completed and all associated callbacks have been called and
+ * have returned.
+ *
+ * This function waits for the DMA channel to stop. It assumes it has been
+ * paused by a previous call to dmaengine_terminate_async(), and that no new
+ * pending descriptors have been issued with dma_async_issue_pending(). The
+ * behaviour is undefined otherwise.
+ */
+static void xilinx_dpdma_synchronize(struct dma_chan *dchan)
+{
+ struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
+ unsigned long flags;
+
+ xilinx_dpdma_chan_stop(chan);
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+ if (chan->desc.pending) {
+ vchan_terminate_vdesc(&chan->desc.pending->vdesc);
+ chan->desc.pending = NULL;
+ }
+ if (chan->desc.active) {
+ vchan_terminate_vdesc(&chan->desc.active->vdesc);
+ chan->desc.active = NULL;
+ }
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+ vchan_synchronize(&chan->vchan);
+}
+
+/* -----------------------------------------------------------------------------
+ * Interrupt and Tasklet Handling
+ */
+
+/**
+ * xilinx_dpdma_err - Detect any global error
+ * @isr: Interrupt Status Register
+ * @eisr: Error Interrupt Status Register
+ *
+ * Return: True if any global error occurs, or false otherwise.
+ */
+static bool xilinx_dpdma_err(u32 isr, u32 eisr)
+{
+ if (isr & XILINX_DPDMA_INTR_GLOBAL_ERR ||
+ eisr & XILINX_DPDMA_EINTR_GLOBAL_ERR)
+ return true;
+
+ return false;
+}
+
+/**
+ * xilinx_dpdma_handle_err_irq - Handle DPDMA error interrupt
+ * @xdev: DPDMA device
+ * @isr: masked Interrupt Status Register
+ * @eisr: Error Interrupt Status Register
+ *
+ * Handle if any error occurs based on @isr and @eisr. This function disables
+ * corresponding error interrupts, and those should be re-enabled once handling
+ * is done.
+ */
+static void xilinx_dpdma_handle_err_irq(struct xilinx_dpdma_device *xdev,
+ u32 isr, u32 eisr)
+{
+ bool err = xilinx_dpdma_err(isr, eisr);
+ unsigned int i;
+
+ dev_dbg_ratelimited(xdev->dev,
+ "error irq: isr = 0x%08x, eisr = 0x%08x\n",
+ isr, eisr);
+
+ /* Disable channel error interrupts until errors are handled. */
+ dpdma_write(xdev->reg, XILINX_DPDMA_IDS,
+ isr & ~XILINX_DPDMA_INTR_GLOBAL_ERR);
+ dpdma_write(xdev->reg, XILINX_DPDMA_EIDS,
+ eisr & ~XILINX_DPDMA_EINTR_GLOBAL_ERR);
+
+ for (i = 0; i < ARRAY_SIZE(xdev->chan); i++)
+ if (err || xilinx_dpdma_chan_err(xdev->chan[i], isr, eisr))
+ tasklet_schedule(&xdev->chan[i]->err_task);
+}
+
+/**
+ * xilinx_dpdma_enable_irq - Enable interrupts
+ * @xdev: DPDMA device
+ *
+ * Enable interrupts.
+ */
+static void xilinx_dpdma_enable_irq(struct xilinx_dpdma_device *xdev)
+{
+ dpdma_write(xdev->reg, XILINX_DPDMA_IEN, XILINX_DPDMA_INTR_ALL);
+ dpdma_write(xdev->reg, XILINX_DPDMA_EIEN, XILINX_DPDMA_EINTR_ALL);
+}
+
+/**
+ * xilinx_dpdma_disable_irq - Disable interrupts
+ * @xdev: DPDMA device
+ *
+ * Disable interrupts.
+ */
+static void xilinx_dpdma_disable_irq(struct xilinx_dpdma_device *xdev)
+{
+ dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ALL);
+ dpdma_write(xdev->reg, XILINX_DPDMA_EIDS, XILINX_DPDMA_EINTR_ALL);
+}
+
+/**
+ * xilinx_dpdma_chan_err_task - Per channel tasklet for error handling
+ * @t: pointer to the tasklet associated with this handler
+ *
+ * Per channel error handling tasklet. This function waits for the outstanding
+ * transaction to complete and triggers error handling. After error handling,
+ * re-enable channel error interrupts, and restart the channel if needed.
+ */
+static void xilinx_dpdma_chan_err_task(struct tasklet_struct *t)
+{
+ struct xilinx_dpdma_chan *chan = from_tasklet(chan, t, err_task);
+ struct xilinx_dpdma_device *xdev = chan->xdev;
+ unsigned long flags;
+
+ /* Proceed error handling even when polling fails. */
+ xilinx_dpdma_chan_poll_no_ostand(chan);
+
+ xilinx_dpdma_chan_handle_err(chan);
+
+ dpdma_write(xdev->reg, XILINX_DPDMA_IEN,
+ XILINX_DPDMA_INTR_CHAN_ERR_MASK << chan->id);
+ dpdma_write(xdev->reg, XILINX_DPDMA_EIEN,
+ XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id);
+
+ spin_lock_irqsave(&chan->lock, flags);
+ xilinx_dpdma_chan_queue_transfer(chan);
+ spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static irqreturn_t xilinx_dpdma_irq_handler(int irq, void *data)
+{
+ struct xilinx_dpdma_device *xdev = data;
+ unsigned long mask;
+ unsigned int i;
+ u32 status;
+ u32 error;
+
+ status = dpdma_read(xdev->reg, XILINX_DPDMA_ISR);
+ error = dpdma_read(xdev->reg, XILINX_DPDMA_EISR);
+ if (!status && !error)
+ return IRQ_NONE;
+
+ dpdma_write(xdev->reg, XILINX_DPDMA_ISR, status);
+ dpdma_write(xdev->reg, XILINX_DPDMA_EISR, error);
+
+ if (status & XILINX_DPDMA_INTR_VSYNC) {
+ /*
+ * There's a single VSYNC interrupt that needs to be processed
+ * by each running channel to update the active descriptor.
+ */
+ for (i = 0; i < ARRAY_SIZE(xdev->chan); i++) {
+ struct xilinx_dpdma_chan *chan = xdev->chan[i];
+
+ if (chan)
+ xilinx_dpdma_chan_vsync_irq(chan);
+ }
+ }
+
+ mask = FIELD_GET(XILINX_DPDMA_INTR_DESC_DONE_MASK, status);
+ if (mask) {
+ for_each_set_bit(i, &mask, ARRAY_SIZE(xdev->chan))
+ xilinx_dpdma_chan_done_irq(xdev->chan[i]);
+ }
+
+ mask = FIELD_GET(XILINX_DPDMA_INTR_NO_OSTAND_MASK, status);
+ if (mask) {
+ for_each_set_bit(i, &mask, ARRAY_SIZE(xdev->chan))
+ xilinx_dpdma_chan_notify_no_ostand(xdev->chan[i]);
+ }
+
+ mask = status & XILINX_DPDMA_INTR_ERR_ALL;
+ if (mask || error)
+ xilinx_dpdma_handle_err_irq(xdev, mask, error);
+
+ return IRQ_HANDLED;
+}
+
+/* -----------------------------------------------------------------------------
+ * Initialization & Cleanup
+ */
+
+static int xilinx_dpdma_chan_init(struct xilinx_dpdma_device *xdev,
+ unsigned int chan_id)
+{
+ struct xilinx_dpdma_chan *chan;
+
+ chan = devm_kzalloc(xdev->dev, sizeof(*chan), GFP_KERNEL);
+ if (!chan)
+ return -ENOMEM;
+
+ chan->id = chan_id;
+ chan->reg = xdev->reg + XILINX_DPDMA_CH_BASE
+ + XILINX_DPDMA_CH_OFFSET * chan->id;
+ chan->running = false;
+ chan->xdev = xdev;
+
+ spin_lock_init(&chan->lock);
+ init_waitqueue_head(&chan->wait_to_stop);
+
+ tasklet_setup(&chan->err_task, xilinx_dpdma_chan_err_task);
+
+ chan->vchan.desc_free = xilinx_dpdma_chan_free_tx_desc;
+ vchan_init(&chan->vchan, &xdev->common);
+
+ xdev->chan[chan->id] = chan;
+
+ return 0;
+}
+
+static void xilinx_dpdma_chan_remove(struct xilinx_dpdma_chan *chan)
+{
+ if (!chan)
+ return;
+
+ tasklet_kill(&chan->err_task);
+ list_del(&chan->vchan.chan.device_node);
+}
+
+static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct xilinx_dpdma_device *xdev = ofdma->of_dma_data;
+ u32 chan_id = dma_spec->args[0];
+
+ if (chan_id >= ARRAY_SIZE(xdev->chan))
+ return NULL;
+
+ if (!xdev->chan[chan_id])
+ return NULL;
+
+ return dma_get_slave_channel(&xdev->chan[chan_id]->vchan.chan);
+}
+
+static void dpdma_hw_init(struct xilinx_dpdma_device *xdev)
+{
+ unsigned int i;
+ void __iomem *reg;
+
+ /* Disable all interrupts */
+ xilinx_dpdma_disable_irq(xdev);
+
+ /* Stop all channels */
+ for (i = 0; i < ARRAY_SIZE(xdev->chan); i++) {
+ reg = xdev->reg + XILINX_DPDMA_CH_BASE
+ + XILINX_DPDMA_CH_OFFSET * i;
+ dpdma_clr(reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_ENABLE);
+ }
+
+ /* Clear the interrupt status registers */
+ dpdma_write(xdev->reg, XILINX_DPDMA_ISR, XILINX_DPDMA_INTR_ALL);
+ dpdma_write(xdev->reg, XILINX_DPDMA_EISR, XILINX_DPDMA_EINTR_ALL);
+}
+
+static int xilinx_dpdma_probe(struct platform_device *pdev)
+{
+ struct xilinx_dpdma_device *xdev;
+ struct dma_device *ddev;
+ unsigned int i;
+ int ret;
+
+ xdev = devm_kzalloc(&pdev->dev, sizeof(*xdev), GFP_KERNEL);
+ if (!xdev)
+ return -ENOMEM;
+
+ xdev->dev = &pdev->dev;
+ xdev->ext_addr = sizeof(dma_addr_t) > 4;
+
+ INIT_LIST_HEAD(&xdev->common.channels);
+
+ platform_set_drvdata(pdev, xdev);
+
+ xdev->axi_clk = devm_clk_get(xdev->dev, "axi_clk");
+ if (IS_ERR(xdev->axi_clk))
+ return PTR_ERR(xdev->axi_clk);
+
+ xdev->reg = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(xdev->reg))
+ return PTR_ERR(xdev->reg);
+
+ dpdma_hw_init(xdev);
+
+ xdev->irq = platform_get_irq(pdev, 0);
+ if (xdev->irq < 0)
+ return xdev->irq;
+
+ ret = request_irq(xdev->irq, xilinx_dpdma_irq_handler, IRQF_SHARED,
+ dev_name(xdev->dev), xdev);
+ if (ret) {
+ dev_err(xdev->dev, "failed to request IRQ\n");
+ return ret;
+ }
+
+ ddev = &xdev->common;
+ ddev->dev = &pdev->dev;
+
+ dma_cap_set(DMA_SLAVE, ddev->cap_mask);
+ dma_cap_set(DMA_PRIVATE, ddev->cap_mask);
+ dma_cap_set(DMA_INTERLEAVE, ddev->cap_mask);
+ dma_cap_set(DMA_REPEAT, ddev->cap_mask);
+ dma_cap_set(DMA_LOAD_EOT, ddev->cap_mask);
+ ddev->copy_align = fls(XILINX_DPDMA_ALIGN_BYTES - 1);
+
+ ddev->device_alloc_chan_resources = xilinx_dpdma_alloc_chan_resources;
+ ddev->device_free_chan_resources = xilinx_dpdma_free_chan_resources;
+ ddev->device_prep_interleaved_dma = xilinx_dpdma_prep_interleaved_dma;
+ /* TODO: Can we achieve better granularity ? */
+ ddev->device_tx_status = dma_cookie_status;
+ ddev->device_issue_pending = xilinx_dpdma_issue_pending;
+ ddev->device_config = xilinx_dpdma_config;
+ ddev->device_pause = xilinx_dpdma_pause;
+ ddev->device_resume = xilinx_dpdma_resume;
+ ddev->device_terminate_all = xilinx_dpdma_terminate_all;
+ ddev->device_synchronize = xilinx_dpdma_synchronize;
+ ddev->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED);
+ ddev->directions = BIT(DMA_MEM_TO_DEV);
+ ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+ for (i = 0; i < ARRAY_SIZE(xdev->chan); ++i) {
+ ret = xilinx_dpdma_chan_init(xdev, i);
+ if (ret < 0) {
+ dev_err(xdev->dev, "failed to initialize channel %u\n",
+ i);
+ goto error;
+ }
+ }
+
+ ret = clk_prepare_enable(xdev->axi_clk);
+ if (ret) {
+ dev_err(xdev->dev, "failed to enable the axi clock\n");
+ goto error;
+ }
+
+ ret = dma_async_device_register(ddev);
+ if (ret) {
+ dev_err(xdev->dev, "failed to register the dma device\n");
+ goto error_dma_async;
+ }
+
+ ret = of_dma_controller_register(xdev->dev->of_node,
+ of_dma_xilinx_xlate, ddev);
+ if (ret) {
+ dev_err(xdev->dev, "failed to register DMA to DT DMA helper\n");
+ goto error_of_dma;
+ }
+
+ xilinx_dpdma_enable_irq(xdev);
+
+ xilinx_dpdma_debugfs_init(xdev);
+
+ dev_info(&pdev->dev, "Xilinx DPDMA engine is probed\n");
+
+ return 0;
+
+error_of_dma:
+ dma_async_device_unregister(ddev);
+error_dma_async:
+ clk_disable_unprepare(xdev->axi_clk);
+error:
+ for (i = 0; i < ARRAY_SIZE(xdev->chan); i++)
+ xilinx_dpdma_chan_remove(xdev->chan[i]);
+
+ free_irq(xdev->irq, xdev);
+
+ return ret;
+}
+
+static int xilinx_dpdma_remove(struct platform_device *pdev)
+{
+ struct xilinx_dpdma_device *xdev = platform_get_drvdata(pdev);
+ unsigned int i;
+
+ /* Start by disabling the IRQ to avoid races during cleanup. */
+ free_irq(xdev->irq, xdev);
+
+ xilinx_dpdma_disable_irq(xdev);
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&xdev->common);
+ clk_disable_unprepare(xdev->axi_clk);
+
+ for (i = 0; i < ARRAY_SIZE(xdev->chan); i++)
+ xilinx_dpdma_chan_remove(xdev->chan[i]);
+
+ return 0;
+}
+
+static const struct of_device_id xilinx_dpdma_of_match[] = {
+ { .compatible = "xlnx,zynqmp-dpdma",},
+ { /* end of table */ },
+};
+MODULE_DEVICE_TABLE(of, xilinx_dpdma_of_match);
+
+static struct platform_driver xilinx_dpdma_driver = {
+ .probe = xilinx_dpdma_probe,
+ .remove = xilinx_dpdma_remove,
+ .driver = {
+ .name = "xilinx-zynqmp-dpdma",
+ .of_match_table = xilinx_dpdma_of_match,
+ },
+};
+
+module_platform_driver(xilinx_dpdma_driver);
+
+MODULE_AUTHOR("Xilinx, Inc.");
+MODULE_DESCRIPTION("Xilinx ZynqMP DPDMA driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c
new file mode 100644
index 000000000..21472a5d7
--- /dev/null
+++ b/drivers/dma/xilinx/zynqmp_dma.c
@@ -0,0 +1,1182 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * DMA driver for Xilinx ZynqMP DMA Engine
+ *
+ * Copyright (C) 2016 Xilinx, Inc. All rights reserved.
+ */
+
+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/pm_runtime.h>
+
+#include "../dmaengine.h"
+
+/* Register Offsets */
+#define ZYNQMP_DMA_ISR 0x100
+#define ZYNQMP_DMA_IMR 0x104
+#define ZYNQMP_DMA_IER 0x108
+#define ZYNQMP_DMA_IDS 0x10C
+#define ZYNQMP_DMA_CTRL0 0x110
+#define ZYNQMP_DMA_CTRL1 0x114
+#define ZYNQMP_DMA_DATA_ATTR 0x120
+#define ZYNQMP_DMA_DSCR_ATTR 0x124
+#define ZYNQMP_DMA_SRC_DSCR_WRD0 0x128
+#define ZYNQMP_DMA_SRC_DSCR_WRD1 0x12C
+#define ZYNQMP_DMA_SRC_DSCR_WRD2 0x130
+#define ZYNQMP_DMA_SRC_DSCR_WRD3 0x134
+#define ZYNQMP_DMA_DST_DSCR_WRD0 0x138
+#define ZYNQMP_DMA_DST_DSCR_WRD1 0x13C
+#define ZYNQMP_DMA_DST_DSCR_WRD2 0x140
+#define ZYNQMP_DMA_DST_DSCR_WRD3 0x144
+#define ZYNQMP_DMA_SRC_START_LSB 0x158
+#define ZYNQMP_DMA_SRC_START_MSB 0x15C
+#define ZYNQMP_DMA_DST_START_LSB 0x160
+#define ZYNQMP_DMA_DST_START_MSB 0x164
+#define ZYNQMP_DMA_TOTAL_BYTE 0x188
+#define ZYNQMP_DMA_RATE_CTRL 0x18C
+#define ZYNQMP_DMA_IRQ_SRC_ACCT 0x190
+#define ZYNQMP_DMA_IRQ_DST_ACCT 0x194
+#define ZYNQMP_DMA_CTRL2 0x200
+
+/* Interrupt registers bit field definitions */
+#define ZYNQMP_DMA_DONE BIT(10)
+#define ZYNQMP_DMA_AXI_WR_DATA BIT(9)
+#define ZYNQMP_DMA_AXI_RD_DATA BIT(8)
+#define ZYNQMP_DMA_AXI_RD_DST_DSCR BIT(7)
+#define ZYNQMP_DMA_AXI_RD_SRC_DSCR BIT(6)
+#define ZYNQMP_DMA_IRQ_DST_ACCT_ERR BIT(5)
+#define ZYNQMP_DMA_IRQ_SRC_ACCT_ERR BIT(4)
+#define ZYNQMP_DMA_BYTE_CNT_OVRFL BIT(3)
+#define ZYNQMP_DMA_DST_DSCR_DONE BIT(2)
+#define ZYNQMP_DMA_INV_APB BIT(0)
+
+/* Control 0 register bit field definitions */
+#define ZYNQMP_DMA_OVR_FETCH BIT(7)
+#define ZYNQMP_DMA_POINT_TYPE_SG BIT(6)
+#define ZYNQMP_DMA_RATE_CTRL_EN BIT(3)
+
+/* Control 1 register bit field definitions */
+#define ZYNQMP_DMA_SRC_ISSUE GENMASK(4, 0)
+
+/* Data Attribute register bit field definitions */
+#define ZYNQMP_DMA_ARBURST GENMASK(27, 26)
+#define ZYNQMP_DMA_ARCACHE GENMASK(25, 22)
+#define ZYNQMP_DMA_ARCACHE_OFST 22
+#define ZYNQMP_DMA_ARQOS GENMASK(21, 18)
+#define ZYNQMP_DMA_ARQOS_OFST 18
+#define ZYNQMP_DMA_ARLEN GENMASK(17, 14)
+#define ZYNQMP_DMA_ARLEN_OFST 14
+#define ZYNQMP_DMA_AWBURST GENMASK(13, 12)
+#define ZYNQMP_DMA_AWCACHE GENMASK(11, 8)
+#define ZYNQMP_DMA_AWCACHE_OFST 8
+#define ZYNQMP_DMA_AWQOS GENMASK(7, 4)
+#define ZYNQMP_DMA_AWQOS_OFST 4
+#define ZYNQMP_DMA_AWLEN GENMASK(3, 0)
+#define ZYNQMP_DMA_AWLEN_OFST 0
+
+/* Descriptor Attribute register bit field definitions */
+#define ZYNQMP_DMA_AXCOHRNT BIT(8)
+#define ZYNQMP_DMA_AXCACHE GENMASK(7, 4)
+#define ZYNQMP_DMA_AXCACHE_OFST 4
+#define ZYNQMP_DMA_AXQOS GENMASK(3, 0)
+#define ZYNQMP_DMA_AXQOS_OFST 0
+
+/* Control register 2 bit field definitions */
+#define ZYNQMP_DMA_ENABLE BIT(0)
+
+/* Buffer Descriptor definitions */
+#define ZYNQMP_DMA_DESC_CTRL_STOP 0x10
+#define ZYNQMP_DMA_DESC_CTRL_COMP_INT 0x4
+#define ZYNQMP_DMA_DESC_CTRL_SIZE_256 0x2
+#define ZYNQMP_DMA_DESC_CTRL_COHRNT 0x1
+
+/* Interrupt Mask specific definitions */
+#define ZYNQMP_DMA_INT_ERR (ZYNQMP_DMA_AXI_RD_DATA | \
+ ZYNQMP_DMA_AXI_WR_DATA | \
+ ZYNQMP_DMA_AXI_RD_DST_DSCR | \
+ ZYNQMP_DMA_AXI_RD_SRC_DSCR | \
+ ZYNQMP_DMA_INV_APB)
+#define ZYNQMP_DMA_INT_OVRFL (ZYNQMP_DMA_BYTE_CNT_OVRFL | \
+ ZYNQMP_DMA_IRQ_SRC_ACCT_ERR | \
+ ZYNQMP_DMA_IRQ_DST_ACCT_ERR)
+#define ZYNQMP_DMA_INT_DONE (ZYNQMP_DMA_DONE | ZYNQMP_DMA_DST_DSCR_DONE)
+#define ZYNQMP_DMA_INT_EN_DEFAULT_MASK (ZYNQMP_DMA_INT_DONE | \
+ ZYNQMP_DMA_INT_ERR | \
+ ZYNQMP_DMA_INT_OVRFL | \
+ ZYNQMP_DMA_DST_DSCR_DONE)
+
+/* Max number of descriptors per channel */
+#define ZYNQMP_DMA_NUM_DESCS 32
+
+/* Max transfer size per descriptor */
+#define ZYNQMP_DMA_MAX_TRANS_LEN 0x40000000
+
+/* Max burst lengths */
+#define ZYNQMP_DMA_MAX_DST_BURST_LEN 32768U
+#define ZYNQMP_DMA_MAX_SRC_BURST_LEN 32768U
+
+/* Reset values for data attributes */
+#define ZYNQMP_DMA_AXCACHE_VAL 0xF
+
+#define ZYNQMP_DMA_SRC_ISSUE_RST_VAL 0x1F
+
+#define ZYNQMP_DMA_IDS_DEFAULT_MASK 0xFFF
+
+/* Bus width in bits */
+#define ZYNQMP_DMA_BUS_WIDTH_64 64
+#define ZYNQMP_DMA_BUS_WIDTH_128 128
+
+#define ZDMA_PM_TIMEOUT 100
+
+#define ZYNQMP_DMA_DESC_SIZE(chan) (chan->desc_size)
+
+#define to_chan(chan) container_of(chan, struct zynqmp_dma_chan, \
+ common)
+#define tx_to_desc(tx) container_of(tx, struct zynqmp_dma_desc_sw, \
+ async_tx)
+
+/**
+ * struct zynqmp_dma_desc_ll - Hw linked list descriptor
+ * @addr: Buffer address
+ * @size: Size of the buffer
+ * @ctrl: Control word
+ * @nxtdscraddr: Next descriptor base address
+ * @rsvd: Reserved field and for Hw internal use.
+ */
+struct zynqmp_dma_desc_ll {
+ u64 addr;
+ u32 size;
+ u32 ctrl;
+ u64 nxtdscraddr;
+ u64 rsvd;
+};
+
+/**
+ * struct zynqmp_dma_desc_sw - Per Transaction structure
+ * @src: Source address for simple mode dma
+ * @dst: Destination address for simple mode dma
+ * @len: Transfer length for simple mode dma
+ * @node: Node in the channel descriptor list
+ * @tx_list: List head for the current transfer
+ * @async_tx: Async transaction descriptor
+ * @src_v: Virtual address of the src descriptor
+ * @src_p: Physical address of the src descriptor
+ * @dst_v: Virtual address of the dst descriptor
+ * @dst_p: Physical address of the dst descriptor
+ */
+struct zynqmp_dma_desc_sw {
+ u64 src;
+ u64 dst;
+ u32 len;
+ struct list_head node;
+ struct list_head tx_list;
+ struct dma_async_tx_descriptor async_tx;
+ struct zynqmp_dma_desc_ll *src_v;
+ dma_addr_t src_p;
+ struct zynqmp_dma_desc_ll *dst_v;
+ dma_addr_t dst_p;
+};
+
+/**
+ * struct zynqmp_dma_chan - Driver specific DMA channel structure
+ * @zdev: Driver specific device structure
+ * @regs: Control registers offset
+ * @lock: Descriptor operation lock
+ * @pending_list: Descriptors waiting
+ * @free_list: Descriptors free
+ * @active_list: Descriptors active
+ * @sw_desc_pool: SW descriptor pool
+ * @done_list: Complete descriptors
+ * @common: DMA common channel
+ * @desc_pool_v: Statically allocated descriptor base
+ * @desc_pool_p: Physical allocated descriptor base
+ * @desc_free_cnt: Descriptor available count
+ * @dev: The dma device
+ * @irq: Channel IRQ
+ * @is_dmacoherent: Tells whether dma operations are coherent or not
+ * @tasklet: Cleanup work after irq
+ * @idle : Channel status;
+ * @desc_size: Size of the low level descriptor
+ * @err: Channel has errors
+ * @bus_width: Bus width
+ * @src_burst_len: Source burst length
+ * @dst_burst_len: Dest burst length
+ */
+struct zynqmp_dma_chan {
+ struct zynqmp_dma_device *zdev;
+ void __iomem *regs;
+ spinlock_t lock;
+ struct list_head pending_list;
+ struct list_head free_list;
+ struct list_head active_list;
+ struct zynqmp_dma_desc_sw *sw_desc_pool;
+ struct list_head done_list;
+ struct dma_chan common;
+ void *desc_pool_v;
+ dma_addr_t desc_pool_p;
+ u32 desc_free_cnt;
+ struct device *dev;
+ int irq;
+ bool is_dmacoherent;
+ struct tasklet_struct tasklet;
+ bool idle;
+ size_t desc_size;
+ bool err;
+ u32 bus_width;
+ u32 src_burst_len;
+ u32 dst_burst_len;
+};
+
+/**
+ * struct zynqmp_dma_device - DMA device structure
+ * @dev: Device Structure
+ * @common: DMA device structure
+ * @chan: Driver specific DMA channel
+ * @clk_main: Pointer to main clock
+ * @clk_apb: Pointer to apb clock
+ */
+struct zynqmp_dma_device {
+ struct device *dev;
+ struct dma_device common;
+ struct zynqmp_dma_chan *chan;
+ struct clk *clk_main;
+ struct clk *clk_apb;
+};
+
+static inline void zynqmp_dma_writeq(struct zynqmp_dma_chan *chan, u32 reg,
+ u64 value)
+{
+ lo_hi_writeq(value, chan->regs + reg);
+}
+
+/**
+ * zynqmp_dma_update_desc_to_ctrlr - Updates descriptor to the controller
+ * @chan: ZynqMP DMA DMA channel pointer
+ * @desc: Transaction descriptor pointer
+ */
+static void zynqmp_dma_update_desc_to_ctrlr(struct zynqmp_dma_chan *chan,
+ struct zynqmp_dma_desc_sw *desc)
+{
+ dma_addr_t addr;
+
+ addr = desc->src_p;
+ zynqmp_dma_writeq(chan, ZYNQMP_DMA_SRC_START_LSB, addr);
+ addr = desc->dst_p;
+ zynqmp_dma_writeq(chan, ZYNQMP_DMA_DST_START_LSB, addr);
+}
+
+/**
+ * zynqmp_dma_desc_config_eod - Mark the descriptor as end descriptor
+ * @chan: ZynqMP DMA channel pointer
+ * @desc: Hw descriptor pointer
+ */
+static void zynqmp_dma_desc_config_eod(struct zynqmp_dma_chan *chan,
+ void *desc)
+{
+ struct zynqmp_dma_desc_ll *hw = (struct zynqmp_dma_desc_ll *)desc;
+
+ hw->ctrl |= ZYNQMP_DMA_DESC_CTRL_STOP;
+ hw++;
+ hw->ctrl |= ZYNQMP_DMA_DESC_CTRL_COMP_INT | ZYNQMP_DMA_DESC_CTRL_STOP;
+}
+
+/**
+ * zynqmp_dma_config_sg_ll_desc - Configure the linked list descriptor
+ * @chan: ZynqMP DMA channel pointer
+ * @sdesc: Hw descriptor pointer
+ * @src: Source buffer address
+ * @dst: Destination buffer address
+ * @len: Transfer length
+ * @prev: Previous hw descriptor pointer
+ */
+static void zynqmp_dma_config_sg_ll_desc(struct zynqmp_dma_chan *chan,
+ struct zynqmp_dma_desc_ll *sdesc,
+ dma_addr_t src, dma_addr_t dst, size_t len,
+ struct zynqmp_dma_desc_ll *prev)
+{
+ struct zynqmp_dma_desc_ll *ddesc = sdesc + 1;
+
+ sdesc->size = ddesc->size = len;
+ sdesc->addr = src;
+ ddesc->addr = dst;
+
+ sdesc->ctrl = ddesc->ctrl = ZYNQMP_DMA_DESC_CTRL_SIZE_256;
+ if (chan->is_dmacoherent) {
+ sdesc->ctrl |= ZYNQMP_DMA_DESC_CTRL_COHRNT;
+ ddesc->ctrl |= ZYNQMP_DMA_DESC_CTRL_COHRNT;
+ }
+
+ if (prev) {
+ dma_addr_t addr = chan->desc_pool_p +
+ ((uintptr_t)sdesc - (uintptr_t)chan->desc_pool_v);
+ ddesc = prev + 1;
+ prev->nxtdscraddr = addr;
+ ddesc->nxtdscraddr = addr + ZYNQMP_DMA_DESC_SIZE(chan);
+ }
+}
+
+/**
+ * zynqmp_dma_init - Initialize the channel
+ * @chan: ZynqMP DMA channel pointer
+ */
+static void zynqmp_dma_init(struct zynqmp_dma_chan *chan)
+{
+ u32 val;
+
+ writel(ZYNQMP_DMA_IDS_DEFAULT_MASK, chan->regs + ZYNQMP_DMA_IDS);
+ val = readl(chan->regs + ZYNQMP_DMA_ISR);
+ writel(val, chan->regs + ZYNQMP_DMA_ISR);
+
+ if (chan->is_dmacoherent) {
+ val = ZYNQMP_DMA_AXCOHRNT;
+ val = (val & ~ZYNQMP_DMA_AXCACHE) |
+ (ZYNQMP_DMA_AXCACHE_VAL << ZYNQMP_DMA_AXCACHE_OFST);
+ writel(val, chan->regs + ZYNQMP_DMA_DSCR_ATTR);
+ }
+
+ val = readl(chan->regs + ZYNQMP_DMA_DATA_ATTR);
+ if (chan->is_dmacoherent) {
+ val = (val & ~ZYNQMP_DMA_ARCACHE) |
+ (ZYNQMP_DMA_AXCACHE_VAL << ZYNQMP_DMA_ARCACHE_OFST);
+ val = (val & ~ZYNQMP_DMA_AWCACHE) |
+ (ZYNQMP_DMA_AXCACHE_VAL << ZYNQMP_DMA_AWCACHE_OFST);
+ }
+ writel(val, chan->regs + ZYNQMP_DMA_DATA_ATTR);
+
+ /* Clearing the interrupt account rgisters */
+ val = readl(chan->regs + ZYNQMP_DMA_IRQ_SRC_ACCT);
+ val = readl(chan->regs + ZYNQMP_DMA_IRQ_DST_ACCT);
+
+ chan->idle = true;
+}
+
+/**
+ * zynqmp_dma_tx_submit - Submit DMA transaction
+ * @tx: Async transaction descriptor pointer
+ *
+ * Return: cookie value
+ */
+static dma_cookie_t zynqmp_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct zynqmp_dma_chan *chan = to_chan(tx->chan);
+ struct zynqmp_dma_desc_sw *desc, *new;
+ dma_cookie_t cookie;
+ unsigned long irqflags;
+
+ new = tx_to_desc(tx);
+ spin_lock_irqsave(&chan->lock, irqflags);
+ cookie = dma_cookie_assign(tx);
+
+ if (!list_empty(&chan->pending_list)) {
+ desc = list_last_entry(&chan->pending_list,
+ struct zynqmp_dma_desc_sw, node);
+ if (!list_empty(&desc->tx_list))
+ desc = list_last_entry(&desc->tx_list,
+ struct zynqmp_dma_desc_sw, node);
+ desc->src_v->nxtdscraddr = new->src_p;
+ desc->src_v->ctrl &= ~ZYNQMP_DMA_DESC_CTRL_STOP;
+ desc->dst_v->nxtdscraddr = new->dst_p;
+ desc->dst_v->ctrl &= ~ZYNQMP_DMA_DESC_CTRL_STOP;
+ }
+
+ list_add_tail(&new->node, &chan->pending_list);
+ spin_unlock_irqrestore(&chan->lock, irqflags);
+
+ return cookie;
+}
+
+/**
+ * zynqmp_dma_get_descriptor - Get the sw descriptor from the pool
+ * @chan: ZynqMP DMA channel pointer
+ *
+ * Return: The sw descriptor
+ */
+static struct zynqmp_dma_desc_sw *
+zynqmp_dma_get_descriptor(struct zynqmp_dma_chan *chan)
+{
+ struct zynqmp_dma_desc_sw *desc;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&chan->lock, irqflags);
+ desc = list_first_entry(&chan->free_list,
+ struct zynqmp_dma_desc_sw, node);
+ list_del(&desc->node);
+ spin_unlock_irqrestore(&chan->lock, irqflags);
+
+ INIT_LIST_HEAD(&desc->tx_list);
+ /* Clear the src and dst descriptor memory */
+ memset((void *)desc->src_v, 0, ZYNQMP_DMA_DESC_SIZE(chan));
+ memset((void *)desc->dst_v, 0, ZYNQMP_DMA_DESC_SIZE(chan));
+
+ return desc;
+}
+
+/**
+ * zynqmp_dma_free_descriptor - Issue pending transactions
+ * @chan: ZynqMP DMA channel pointer
+ * @sdesc: Transaction descriptor pointer
+ */
+static void zynqmp_dma_free_descriptor(struct zynqmp_dma_chan *chan,
+ struct zynqmp_dma_desc_sw *sdesc)
+{
+ struct zynqmp_dma_desc_sw *child, *next;
+
+ chan->desc_free_cnt++;
+ list_move_tail(&sdesc->node, &chan->free_list);
+ list_for_each_entry_safe(child, next, &sdesc->tx_list, node) {
+ chan->desc_free_cnt++;
+ list_move_tail(&child->node, &chan->free_list);
+ }
+}
+
+/**
+ * zynqmp_dma_free_desc_list - Free descriptors list
+ * @chan: ZynqMP DMA channel pointer
+ * @list: List to parse and delete the descriptor
+ */
+static void zynqmp_dma_free_desc_list(struct zynqmp_dma_chan *chan,
+ struct list_head *list)
+{
+ struct zynqmp_dma_desc_sw *desc, *next;
+
+ list_for_each_entry_safe(desc, next, list, node)
+ zynqmp_dma_free_descriptor(chan, desc);
+}
+
+/**
+ * zynqmp_dma_alloc_chan_resources - Allocate channel resources
+ * @dchan: DMA channel
+ *
+ * Return: Number of descriptors on success and failure value on error
+ */
+static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan)
+{
+ struct zynqmp_dma_chan *chan = to_chan(dchan);
+ struct zynqmp_dma_desc_sw *desc;
+ int i, ret;
+
+ ret = pm_runtime_resume_and_get(chan->dev);
+ if (ret < 0)
+ return ret;
+
+ chan->sw_desc_pool = kcalloc(ZYNQMP_DMA_NUM_DESCS, sizeof(*desc),
+ GFP_KERNEL);
+ if (!chan->sw_desc_pool)
+ return -ENOMEM;
+
+ chan->idle = true;
+ chan->desc_free_cnt = ZYNQMP_DMA_NUM_DESCS;
+
+ INIT_LIST_HEAD(&chan->free_list);
+
+ for (i = 0; i < ZYNQMP_DMA_NUM_DESCS; i++) {
+ desc = chan->sw_desc_pool + i;
+ dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+ desc->async_tx.tx_submit = zynqmp_dma_tx_submit;
+ list_add_tail(&desc->node, &chan->free_list);
+ }
+
+ chan->desc_pool_v = dma_alloc_coherent(chan->dev,
+ (2 * ZYNQMP_DMA_DESC_SIZE(chan) *
+ ZYNQMP_DMA_NUM_DESCS),
+ &chan->desc_pool_p, GFP_KERNEL);
+ if (!chan->desc_pool_v)
+ return -ENOMEM;
+
+ for (i = 0; i < ZYNQMP_DMA_NUM_DESCS; i++) {
+ desc = chan->sw_desc_pool + i;
+ desc->src_v = (struct zynqmp_dma_desc_ll *) (chan->desc_pool_v +
+ (i * ZYNQMP_DMA_DESC_SIZE(chan) * 2));
+ desc->dst_v = (struct zynqmp_dma_desc_ll *) (desc->src_v + 1);
+ desc->src_p = chan->desc_pool_p +
+ (i * ZYNQMP_DMA_DESC_SIZE(chan) * 2);
+ desc->dst_p = desc->src_p + ZYNQMP_DMA_DESC_SIZE(chan);
+ }
+
+ return ZYNQMP_DMA_NUM_DESCS;
+}
+
+/**
+ * zynqmp_dma_start - Start DMA channel
+ * @chan: ZynqMP DMA channel pointer
+ */
+static void zynqmp_dma_start(struct zynqmp_dma_chan *chan)
+{
+ writel(ZYNQMP_DMA_INT_EN_DEFAULT_MASK, chan->regs + ZYNQMP_DMA_IER);
+ writel(0, chan->regs + ZYNQMP_DMA_TOTAL_BYTE);
+ chan->idle = false;
+ writel(ZYNQMP_DMA_ENABLE, chan->regs + ZYNQMP_DMA_CTRL2);
+}
+
+/**
+ * zynqmp_dma_handle_ovfl_int - Process the overflow interrupt
+ * @chan: ZynqMP DMA channel pointer
+ * @status: Interrupt status value
+ */
+static void zynqmp_dma_handle_ovfl_int(struct zynqmp_dma_chan *chan, u32 status)
+{
+ if (status & ZYNQMP_DMA_BYTE_CNT_OVRFL)
+ writel(0, chan->regs + ZYNQMP_DMA_TOTAL_BYTE);
+ if (status & ZYNQMP_DMA_IRQ_DST_ACCT_ERR)
+ readl(chan->regs + ZYNQMP_DMA_IRQ_DST_ACCT);
+ if (status & ZYNQMP_DMA_IRQ_SRC_ACCT_ERR)
+ readl(chan->regs + ZYNQMP_DMA_IRQ_SRC_ACCT);
+}
+
+static void zynqmp_dma_config(struct zynqmp_dma_chan *chan)
+{
+ u32 val, burst_val;
+
+ val = readl(chan->regs + ZYNQMP_DMA_CTRL0);
+ val |= ZYNQMP_DMA_POINT_TYPE_SG;
+ writel(val, chan->regs + ZYNQMP_DMA_CTRL0);
+
+ val = readl(chan->regs + ZYNQMP_DMA_DATA_ATTR);
+ burst_val = __ilog2_u32(chan->src_burst_len);
+ val = (val & ~ZYNQMP_DMA_ARLEN) |
+ ((burst_val << ZYNQMP_DMA_ARLEN_OFST) & ZYNQMP_DMA_ARLEN);
+ burst_val = __ilog2_u32(chan->dst_burst_len);
+ val = (val & ~ZYNQMP_DMA_AWLEN) |
+ ((burst_val << ZYNQMP_DMA_AWLEN_OFST) & ZYNQMP_DMA_AWLEN);
+ writel(val, chan->regs + ZYNQMP_DMA_DATA_ATTR);
+}
+
+/**
+ * zynqmp_dma_device_config - Zynqmp dma device configuration
+ * @dchan: DMA channel
+ * @config: DMA device config
+ *
+ * Return: 0 always
+ */
+static int zynqmp_dma_device_config(struct dma_chan *dchan,
+ struct dma_slave_config *config)
+{
+ struct zynqmp_dma_chan *chan = to_chan(dchan);
+
+ chan->src_burst_len = clamp(config->src_maxburst, 1U,
+ ZYNQMP_DMA_MAX_SRC_BURST_LEN);
+ chan->dst_burst_len = clamp(config->dst_maxburst, 1U,
+ ZYNQMP_DMA_MAX_DST_BURST_LEN);
+
+ return 0;
+}
+
+/**
+ * zynqmp_dma_start_transfer - Initiate the new transfer
+ * @chan: ZynqMP DMA channel pointer
+ */
+static void zynqmp_dma_start_transfer(struct zynqmp_dma_chan *chan)
+{
+ struct zynqmp_dma_desc_sw *desc;
+
+ if (!chan->idle)
+ return;
+
+ zynqmp_dma_config(chan);
+
+ desc = list_first_entry_or_null(&chan->pending_list,
+ struct zynqmp_dma_desc_sw, node);
+ if (!desc)
+ return;
+
+ list_splice_tail_init(&chan->pending_list, &chan->active_list);
+ zynqmp_dma_update_desc_to_ctrlr(chan, desc);
+ zynqmp_dma_start(chan);
+}
+
+
+/**
+ * zynqmp_dma_chan_desc_cleanup - Cleanup the completed descriptors
+ * @chan: ZynqMP DMA channel
+ */
+static void zynqmp_dma_chan_desc_cleanup(struct zynqmp_dma_chan *chan)
+{
+ struct zynqmp_dma_desc_sw *desc, *next;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&chan->lock, irqflags);
+
+ list_for_each_entry_safe(desc, next, &chan->done_list, node) {
+ struct dmaengine_desc_callback cb;
+
+ dmaengine_desc_get_callback(&desc->async_tx, &cb);
+ if (dmaengine_desc_callback_valid(&cb)) {
+ spin_unlock_irqrestore(&chan->lock, irqflags);
+ dmaengine_desc_callback_invoke(&cb, NULL);
+ spin_lock_irqsave(&chan->lock, irqflags);
+ }
+
+ /* Run any dependencies, then free the descriptor */
+ zynqmp_dma_free_descriptor(chan, desc);
+ }
+
+ spin_unlock_irqrestore(&chan->lock, irqflags);
+}
+
+/**
+ * zynqmp_dma_complete_descriptor - Mark the active descriptor as complete
+ * @chan: ZynqMP DMA channel pointer
+ */
+static void zynqmp_dma_complete_descriptor(struct zynqmp_dma_chan *chan)
+{
+ struct zynqmp_dma_desc_sw *desc;
+
+ desc = list_first_entry_or_null(&chan->active_list,
+ struct zynqmp_dma_desc_sw, node);
+ if (!desc)
+ return;
+ list_del(&desc->node);
+ dma_cookie_complete(&desc->async_tx);
+ list_add_tail(&desc->node, &chan->done_list);
+}
+
+/**
+ * zynqmp_dma_issue_pending - Issue pending transactions
+ * @dchan: DMA channel pointer
+ */
+static void zynqmp_dma_issue_pending(struct dma_chan *dchan)
+{
+ struct zynqmp_dma_chan *chan = to_chan(dchan);
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&chan->lock, irqflags);
+ zynqmp_dma_start_transfer(chan);
+ spin_unlock_irqrestore(&chan->lock, irqflags);
+}
+
+/**
+ * zynqmp_dma_free_descriptors - Free channel descriptors
+ * @chan: ZynqMP DMA channel pointer
+ */
+static void zynqmp_dma_free_descriptors(struct zynqmp_dma_chan *chan)
+{
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&chan->lock, irqflags);
+ zynqmp_dma_free_desc_list(chan, &chan->active_list);
+ zynqmp_dma_free_desc_list(chan, &chan->pending_list);
+ zynqmp_dma_free_desc_list(chan, &chan->done_list);
+ spin_unlock_irqrestore(&chan->lock, irqflags);
+}
+
+/**
+ * zynqmp_dma_free_chan_resources - Free channel resources
+ * @dchan: DMA channel pointer
+ */
+static void zynqmp_dma_free_chan_resources(struct dma_chan *dchan)
+{
+ struct zynqmp_dma_chan *chan = to_chan(dchan);
+
+ zynqmp_dma_free_descriptors(chan);
+ dma_free_coherent(chan->dev,
+ (2 * ZYNQMP_DMA_DESC_SIZE(chan) * ZYNQMP_DMA_NUM_DESCS),
+ chan->desc_pool_v, chan->desc_pool_p);
+ kfree(chan->sw_desc_pool);
+ pm_runtime_mark_last_busy(chan->dev);
+ pm_runtime_put_autosuspend(chan->dev);
+}
+
+/**
+ * zynqmp_dma_reset - Reset the channel
+ * @chan: ZynqMP DMA channel pointer
+ */
+static void zynqmp_dma_reset(struct zynqmp_dma_chan *chan)
+{
+ unsigned long irqflags;
+
+ writel(ZYNQMP_DMA_IDS_DEFAULT_MASK, chan->regs + ZYNQMP_DMA_IDS);
+
+ spin_lock_irqsave(&chan->lock, irqflags);
+ zynqmp_dma_complete_descriptor(chan);
+ spin_unlock_irqrestore(&chan->lock, irqflags);
+ zynqmp_dma_chan_desc_cleanup(chan);
+ zynqmp_dma_free_descriptors(chan);
+
+ zynqmp_dma_init(chan);
+}
+
+/**
+ * zynqmp_dma_irq_handler - ZynqMP DMA Interrupt handler
+ * @irq: IRQ number
+ * @data: Pointer to the ZynqMP DMA channel structure
+ *
+ * Return: IRQ_HANDLED/IRQ_NONE
+ */
+static irqreturn_t zynqmp_dma_irq_handler(int irq, void *data)
+{
+ struct zynqmp_dma_chan *chan = (struct zynqmp_dma_chan *)data;
+ u32 isr, imr, status;
+ irqreturn_t ret = IRQ_NONE;
+
+ isr = readl(chan->regs + ZYNQMP_DMA_ISR);
+ imr = readl(chan->regs + ZYNQMP_DMA_IMR);
+ status = isr & ~imr;
+
+ writel(isr, chan->regs + ZYNQMP_DMA_ISR);
+ if (status & ZYNQMP_DMA_INT_DONE) {
+ tasklet_schedule(&chan->tasklet);
+ ret = IRQ_HANDLED;
+ }
+
+ if (status & ZYNQMP_DMA_DONE)
+ chan->idle = true;
+
+ if (status & ZYNQMP_DMA_INT_ERR) {
+ chan->err = true;
+ tasklet_schedule(&chan->tasklet);
+ dev_err(chan->dev, "Channel %p has errors\n", chan);
+ ret = IRQ_HANDLED;
+ }
+
+ if (status & ZYNQMP_DMA_INT_OVRFL) {
+ zynqmp_dma_handle_ovfl_int(chan, status);
+ dev_dbg(chan->dev, "Channel %p overflow interrupt\n", chan);
+ ret = IRQ_HANDLED;
+ }
+
+ return ret;
+}
+
+/**
+ * zynqmp_dma_do_tasklet - Schedule completion tasklet
+ * @t: Pointer to the ZynqMP DMA channel structure
+ */
+static void zynqmp_dma_do_tasklet(struct tasklet_struct *t)
+{
+ struct zynqmp_dma_chan *chan = from_tasklet(chan, t, tasklet);
+ u32 count;
+ unsigned long irqflags;
+
+ if (chan->err) {
+ zynqmp_dma_reset(chan);
+ chan->err = false;
+ return;
+ }
+
+ spin_lock_irqsave(&chan->lock, irqflags);
+ count = readl(chan->regs + ZYNQMP_DMA_IRQ_DST_ACCT);
+ while (count) {
+ zynqmp_dma_complete_descriptor(chan);
+ count--;
+ }
+ spin_unlock_irqrestore(&chan->lock, irqflags);
+
+ zynqmp_dma_chan_desc_cleanup(chan);
+
+ if (chan->idle) {
+ spin_lock_irqsave(&chan->lock, irqflags);
+ zynqmp_dma_start_transfer(chan);
+ spin_unlock_irqrestore(&chan->lock, irqflags);
+ }
+}
+
+/**
+ * zynqmp_dma_device_terminate_all - Aborts all transfers on a channel
+ * @dchan: DMA channel pointer
+ *
+ * Return: Always '0'
+ */
+static int zynqmp_dma_device_terminate_all(struct dma_chan *dchan)
+{
+ struct zynqmp_dma_chan *chan = to_chan(dchan);
+
+ writel(ZYNQMP_DMA_IDS_DEFAULT_MASK, chan->regs + ZYNQMP_DMA_IDS);
+ zynqmp_dma_free_descriptors(chan);
+
+ return 0;
+}
+
+/**
+ * zynqmp_dma_synchronize - Synchronizes the termination of a transfers to the current context.
+ * @dchan: DMA channel pointer
+ */
+static void zynqmp_dma_synchronize(struct dma_chan *dchan)
+{
+ struct zynqmp_dma_chan *chan = to_chan(dchan);
+
+ tasklet_kill(&chan->tasklet);
+}
+
+/**
+ * zynqmp_dma_prep_memcpy - prepare descriptors for memcpy transaction
+ * @dchan: DMA channel
+ * @dma_dst: Destination buffer address
+ * @dma_src: Source buffer address
+ * @len: Transfer length
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *zynqmp_dma_prep_memcpy(
+ struct dma_chan *dchan, dma_addr_t dma_dst,
+ dma_addr_t dma_src, size_t len, ulong flags)
+{
+ struct zynqmp_dma_chan *chan;
+ struct zynqmp_dma_desc_sw *new, *first = NULL;
+ void *desc = NULL, *prev = NULL;
+ size_t copy;
+ u32 desc_cnt;
+ unsigned long irqflags;
+
+ chan = to_chan(dchan);
+
+ desc_cnt = DIV_ROUND_UP(len, ZYNQMP_DMA_MAX_TRANS_LEN);
+
+ spin_lock_irqsave(&chan->lock, irqflags);
+ if (desc_cnt > chan->desc_free_cnt) {
+ spin_unlock_irqrestore(&chan->lock, irqflags);
+ dev_dbg(chan->dev, "chan %p descs are not available\n", chan);
+ return NULL;
+ }
+ chan->desc_free_cnt = chan->desc_free_cnt - desc_cnt;
+ spin_unlock_irqrestore(&chan->lock, irqflags);
+
+ do {
+ /* Allocate and populate the descriptor */
+ new = zynqmp_dma_get_descriptor(chan);
+
+ copy = min_t(size_t, len, ZYNQMP_DMA_MAX_TRANS_LEN);
+ desc = (struct zynqmp_dma_desc_ll *)new->src_v;
+ zynqmp_dma_config_sg_ll_desc(chan, desc, dma_src,
+ dma_dst, copy, prev);
+ prev = desc;
+ len -= copy;
+ dma_src += copy;
+ dma_dst += copy;
+ if (!first)
+ first = new;
+ else
+ list_add_tail(&new->node, &first->tx_list);
+ } while (len);
+
+ zynqmp_dma_desc_config_eod(chan, desc);
+ async_tx_ack(&first->async_tx);
+ first->async_tx.flags = (enum dma_ctrl_flags)flags;
+ return &first->async_tx;
+}
+
+/**
+ * zynqmp_dma_chan_remove - Channel remove function
+ * @chan: ZynqMP DMA channel pointer
+ */
+static void zynqmp_dma_chan_remove(struct zynqmp_dma_chan *chan)
+{
+ if (!chan)
+ return;
+
+ if (chan->irq)
+ devm_free_irq(chan->zdev->dev, chan->irq, chan);
+ tasklet_kill(&chan->tasklet);
+ list_del(&chan->common.device_node);
+}
+
+/**
+ * zynqmp_dma_chan_probe - Per Channel Probing
+ * @zdev: Driver specific device structure
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int zynqmp_dma_chan_probe(struct zynqmp_dma_device *zdev,
+ struct platform_device *pdev)
+{
+ struct zynqmp_dma_chan *chan;
+ struct resource *res;
+ struct device_node *node = pdev->dev.of_node;
+ int err;
+
+ chan = devm_kzalloc(zdev->dev, sizeof(*chan), GFP_KERNEL);
+ if (!chan)
+ return -ENOMEM;
+ chan->dev = zdev->dev;
+ chan->zdev = zdev;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ chan->regs = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(chan->regs))
+ return PTR_ERR(chan->regs);
+
+ chan->bus_width = ZYNQMP_DMA_BUS_WIDTH_64;
+ chan->dst_burst_len = ZYNQMP_DMA_MAX_DST_BURST_LEN;
+ chan->src_burst_len = ZYNQMP_DMA_MAX_SRC_BURST_LEN;
+ err = of_property_read_u32(node, "xlnx,bus-width", &chan->bus_width);
+ if (err < 0) {
+ dev_err(&pdev->dev, "missing xlnx,bus-width property\n");
+ return err;
+ }
+
+ if (chan->bus_width != ZYNQMP_DMA_BUS_WIDTH_64 &&
+ chan->bus_width != ZYNQMP_DMA_BUS_WIDTH_128) {
+ dev_err(zdev->dev, "invalid bus-width value");
+ return -EINVAL;
+ }
+
+ chan->is_dmacoherent = of_property_read_bool(node, "dma-coherent");
+ zdev->chan = chan;
+ tasklet_setup(&chan->tasklet, zynqmp_dma_do_tasklet);
+ spin_lock_init(&chan->lock);
+ INIT_LIST_HEAD(&chan->active_list);
+ INIT_LIST_HEAD(&chan->pending_list);
+ INIT_LIST_HEAD(&chan->done_list);
+ INIT_LIST_HEAD(&chan->free_list);
+
+ dma_cookie_init(&chan->common);
+ chan->common.device = &zdev->common;
+ list_add_tail(&chan->common.device_node, &zdev->common.channels);
+
+ zynqmp_dma_init(chan);
+ chan->irq = platform_get_irq(pdev, 0);
+ if (chan->irq < 0)
+ return -ENXIO;
+ err = devm_request_irq(&pdev->dev, chan->irq, zynqmp_dma_irq_handler, 0,
+ "zynqmp-dma", chan);
+ if (err)
+ return err;
+
+ chan->desc_size = sizeof(struct zynqmp_dma_desc_ll);
+ chan->idle = true;
+ return 0;
+}
+
+/**
+ * of_zynqmp_dma_xlate - Translation function
+ * @dma_spec: Pointer to DMA specifier as found in the device tree
+ * @ofdma: Pointer to DMA controller data
+ *
+ * Return: DMA channel pointer on success and NULL on error
+ */
+static struct dma_chan *of_zynqmp_dma_xlate(struct of_phandle_args *dma_spec,
+ struct of_dma *ofdma)
+{
+ struct zynqmp_dma_device *zdev = ofdma->of_dma_data;
+
+ return dma_get_slave_channel(&zdev->chan->common);
+}
+
+/**
+ * zynqmp_dma_suspend - Suspend method for the driver
+ * @dev: Address of the device structure
+ *
+ * Put the driver into low power mode.
+ * Return: 0 on success and failure value on error
+ */
+static int __maybe_unused zynqmp_dma_suspend(struct device *dev)
+{
+ if (!device_may_wakeup(dev))
+ return pm_runtime_force_suspend(dev);
+
+ return 0;
+}
+
+/**
+ * zynqmp_dma_resume - Resume from suspend
+ * @dev: Address of the device structure
+ *
+ * Resume operation after suspend.
+ * Return: 0 on success and failure value on error
+ */
+static int __maybe_unused zynqmp_dma_resume(struct device *dev)
+{
+ if (!device_may_wakeup(dev))
+ return pm_runtime_force_resume(dev);
+
+ return 0;
+}
+
+/**
+ * zynqmp_dma_runtime_suspend - Runtime suspend method for the driver
+ * @dev: Address of the device structure
+ *
+ * Put the driver into low power mode.
+ * Return: 0 always
+ */
+static int __maybe_unused zynqmp_dma_runtime_suspend(struct device *dev)
+{
+ struct zynqmp_dma_device *zdev = dev_get_drvdata(dev);
+
+ clk_disable_unprepare(zdev->clk_main);
+ clk_disable_unprepare(zdev->clk_apb);
+
+ return 0;
+}
+
+/**
+ * zynqmp_dma_runtime_resume - Runtime suspend method for the driver
+ * @dev: Address of the device structure
+ *
+ * Put the driver into low power mode.
+ * Return: 0 always
+ */
+static int __maybe_unused zynqmp_dma_runtime_resume(struct device *dev)
+{
+ struct zynqmp_dma_device *zdev = dev_get_drvdata(dev);
+ int err;
+
+ err = clk_prepare_enable(zdev->clk_main);
+ if (err) {
+ dev_err(dev, "Unable to enable main clock.\n");
+ return err;
+ }
+
+ err = clk_prepare_enable(zdev->clk_apb);
+ if (err) {
+ dev_err(dev, "Unable to enable apb clock.\n");
+ clk_disable_unprepare(zdev->clk_main);
+ return err;
+ }
+
+ return 0;
+}
+
+static const struct dev_pm_ops zynqmp_dma_dev_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(zynqmp_dma_suspend, zynqmp_dma_resume)
+ SET_RUNTIME_PM_OPS(zynqmp_dma_runtime_suspend,
+ zynqmp_dma_runtime_resume, NULL)
+};
+
+/**
+ * zynqmp_dma_probe - Driver probe function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int zynqmp_dma_probe(struct platform_device *pdev)
+{
+ struct zynqmp_dma_device *zdev;
+ struct dma_device *p;
+ int ret;
+
+ zdev = devm_kzalloc(&pdev->dev, sizeof(*zdev), GFP_KERNEL);
+ if (!zdev)
+ return -ENOMEM;
+
+ zdev->dev = &pdev->dev;
+ INIT_LIST_HEAD(&zdev->common.channels);
+
+ dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
+ dma_cap_set(DMA_MEMCPY, zdev->common.cap_mask);
+
+ p = &zdev->common;
+ p->device_prep_dma_memcpy = zynqmp_dma_prep_memcpy;
+ p->device_terminate_all = zynqmp_dma_device_terminate_all;
+ p->device_synchronize = zynqmp_dma_synchronize;
+ p->device_issue_pending = zynqmp_dma_issue_pending;
+ p->device_alloc_chan_resources = zynqmp_dma_alloc_chan_resources;
+ p->device_free_chan_resources = zynqmp_dma_free_chan_resources;
+ p->device_tx_status = dma_cookie_status;
+ p->device_config = zynqmp_dma_device_config;
+ p->dev = &pdev->dev;
+
+ zdev->clk_main = devm_clk_get(&pdev->dev, "clk_main");
+ if (IS_ERR(zdev->clk_main))
+ return dev_err_probe(&pdev->dev, PTR_ERR(zdev->clk_main),
+ "main clock not found.\n");
+
+ zdev->clk_apb = devm_clk_get(&pdev->dev, "clk_apb");
+ if (IS_ERR(zdev->clk_apb))
+ return dev_err_probe(&pdev->dev, PTR_ERR(zdev->clk_apb),
+ "apb clock not found.\n");
+
+ platform_set_drvdata(pdev, zdev);
+ pm_runtime_set_autosuspend_delay(zdev->dev, ZDMA_PM_TIMEOUT);
+ pm_runtime_use_autosuspend(zdev->dev);
+ pm_runtime_enable(zdev->dev);
+ ret = pm_runtime_resume_and_get(zdev->dev);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "device wakeup failed.\n");
+ pm_runtime_disable(zdev->dev);
+ }
+ if (!pm_runtime_enabled(zdev->dev)) {
+ ret = zynqmp_dma_runtime_resume(zdev->dev);
+ if (ret)
+ return ret;
+ }
+
+ ret = zynqmp_dma_chan_probe(zdev, pdev);
+ if (ret) {
+ dev_err_probe(&pdev->dev, ret, "Probing channel failed\n");
+ goto err_disable_pm;
+ }
+
+ p->dst_addr_widths = BIT(zdev->chan->bus_width / 8);
+ p->src_addr_widths = BIT(zdev->chan->bus_width / 8);
+
+ ret = dma_async_device_register(&zdev->common);
+ if (ret) {
+ dev_err(zdev->dev, "failed to register the dma device\n");
+ goto free_chan_resources;
+ }
+
+ ret = of_dma_controller_register(pdev->dev.of_node,
+ of_zynqmp_dma_xlate, zdev);
+ if (ret) {
+ dev_err_probe(&pdev->dev, ret, "Unable to register DMA to DT\n");
+ dma_async_device_unregister(&zdev->common);
+ goto free_chan_resources;
+ }
+
+ pm_runtime_mark_last_busy(zdev->dev);
+ pm_runtime_put_sync_autosuspend(zdev->dev);
+
+ return 0;
+
+free_chan_resources:
+ zynqmp_dma_chan_remove(zdev->chan);
+err_disable_pm:
+ if (!pm_runtime_enabled(zdev->dev))
+ zynqmp_dma_runtime_suspend(zdev->dev);
+ pm_runtime_disable(zdev->dev);
+ return ret;
+}
+
+/**
+ * zynqmp_dma_remove - Driver remove function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: Always '0'
+ */
+static int zynqmp_dma_remove(struct platform_device *pdev)
+{
+ struct zynqmp_dma_device *zdev = platform_get_drvdata(pdev);
+
+ of_dma_controller_free(pdev->dev.of_node);
+ dma_async_device_unregister(&zdev->common);
+
+ zynqmp_dma_chan_remove(zdev->chan);
+ pm_runtime_disable(zdev->dev);
+ if (!pm_runtime_enabled(zdev->dev))
+ zynqmp_dma_runtime_suspend(zdev->dev);
+
+ return 0;
+}
+
+static const struct of_device_id zynqmp_dma_of_match[] = {
+ { .compatible = "xlnx,zynqmp-dma-1.0", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, zynqmp_dma_of_match);
+
+static struct platform_driver zynqmp_dma_driver = {
+ .driver = {
+ .name = "xilinx-zynqmp-dma",
+ .of_match_table = zynqmp_dma_of_match,
+ .pm = &zynqmp_dma_dev_pm_ops,
+ },
+ .probe = zynqmp_dma_probe,
+ .remove = zynqmp_dma_remove,
+};
+
+module_platform_driver(zynqmp_dma_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Xilinx, Inc.");
+MODULE_DESCRIPTION("Xilinx ZynqMP DMA driver");