From 76cb841cb886eef6b3bee341a2266c76578724ad Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 6 May 2024 03:02:30 +0200 Subject: Adding upstream version 4.19.249. Signed-off-by: Daniel Baumann --- drivers/misc/Kconfig | 530 ++++ drivers/misc/Makefile | 60 + drivers/misc/ad525x_dpot-i2c.c | 119 + drivers/misc/ad525x_dpot-spi.c | 146 + drivers/misc/ad525x_dpot.c | 765 ++++++ drivers/misc/ad525x_dpot.h | 215 ++ drivers/misc/altera-stapl/Kconfig | 9 + drivers/misc/altera-stapl/Makefile | 3 + drivers/misc/altera-stapl/altera-comp.c | 142 + drivers/misc/altera-stapl/altera-exprt.h | 33 + drivers/misc/altera-stapl/altera-jtag.c | 1021 +++++++ drivers/misc/altera-stapl/altera-jtag.h | 113 + drivers/misc/altera-stapl/altera-lpt.c | 70 + drivers/misc/altera-stapl/altera.c | 2536 ++++++++++++++++++ drivers/misc/apds9802als.c | 322 +++ drivers/misc/apds990x.c | 1300 +++++++++ drivers/misc/aspeed-lpc-ctrl.c | 300 +++ drivers/misc/aspeed-lpc-snoop.c | 375 +++ drivers/misc/atmel-ssc.c | 285 ++ drivers/misc/atmel_tclib.c | 198 ++ drivers/misc/bh1770glc.c | 1410 ++++++++++ drivers/misc/c2port/Kconfig | 34 + drivers/misc/c2port/Makefile | 3 + drivers/misc/c2port/c2port-duramar2150.c | 159 ++ drivers/misc/c2port/core.c | 1003 +++++++ drivers/misc/cardreader/Kconfig | 20 + drivers/misc/cardreader/Makefile | 4 + drivers/misc/cardreader/rtl8411.c | 508 ++++ drivers/misc/cardreader/rts5209.c | 277 ++ drivers/misc/cardreader/rts5227.c | 380 +++ drivers/misc/cardreader/rts5229.c | 273 ++ drivers/misc/cardreader/rts5249.c | 742 +++++ drivers/misc/cardreader/rts5260.c | 748 ++++++ drivers/misc/cardreader/rts5260.h | 45 + drivers/misc/cardreader/rtsx_pcr.c | 1698 ++++++++++++ drivers/misc/cardreader/rtsx_pcr.h | 113 + drivers/misc/cardreader/rtsx_usb.c | 792 ++++++ drivers/misc/cb710/Kconfig | 25 + drivers/misc/cb710/Makefile | 6 + drivers/misc/cb710/core.c | 346 +++ drivers/misc/cb710/debug.c | 118 + drivers/misc/cb710/sgbuf2.c | 146 + drivers/misc/cs5535-mfgpt.c | 383 +++ drivers/misc/cxl/Kconfig | 35 + drivers/misc/cxl/Makefile | 14 + drivers/misc/cxl/api.c | 540 ++++ drivers/misc/cxl/base.c | 129 + drivers/misc/cxl/context.c | 366 +++ drivers/misc/cxl/cxl.h | 1139 ++++++++ drivers/misc/cxl/cxllib.c | 277 ++ drivers/misc/cxl/debugfs.c | 152 ++ drivers/misc/cxl/fault.c | 360 +++ drivers/misc/cxl/file.c | 703 +++++ drivers/misc/cxl/flash.c | 543 ++++ drivers/misc/cxl/guest.c | 1202 +++++++++ drivers/misc/cxl/hcalls.c | 647 +++++ drivers/misc/cxl/hcalls.h | 204 ++ drivers/misc/cxl/irq.c | 453 ++++ drivers/misc/cxl/main.c | 382 +++ drivers/misc/cxl/native.c | 1600 +++++++++++ drivers/misc/cxl/of.c | 511 ++++ drivers/misc/cxl/pci.c | 2104 +++++++++++++++ drivers/misc/cxl/sysfs.c | 774 ++++++ drivers/misc/cxl/trace.c | 13 + drivers/misc/cxl/trace.h | 695 +++++ drivers/misc/cxl/vphb.c | 333 +++ drivers/misc/ds1682.c | 267 ++ drivers/misc/dummy-irq.c | 64 + drivers/misc/echo/Kconfig | 9 + drivers/misc/echo/Makefile | 1 + drivers/misc/echo/echo.c | 601 +++++ drivers/misc/echo/echo.h | 187 ++ drivers/misc/echo/fir.h | 166 ++ drivers/misc/echo/oslec.h | 94 + drivers/misc/eeprom/Kconfig | 114 + drivers/misc/eeprom/Makefile | 9 + drivers/misc/eeprom/at24.c | 805 ++++++ drivers/misc/eeprom/at25.c | 414 +++ drivers/misc/eeprom/digsy_mtc_eeprom.c | 106 + drivers/misc/eeprom/eeprom.c | 222 ++ drivers/misc/eeprom/eeprom_93cx6.c | 381 +++ drivers/misc/eeprom/eeprom_93xx46.c | 541 ++++ drivers/misc/eeprom/idt_89hpesx.c | 1620 +++++++++++ drivers/misc/eeprom/max6875.c | 210 ++ drivers/misc/enclosure.c | 695 +++++ drivers/misc/fsa9480.c | 550 ++++ drivers/misc/genwqe/Kconfig | 19 + drivers/misc/genwqe/Makefile | 7 + drivers/misc/genwqe/card_base.c | 1412 ++++++++++ drivers/misc/genwqe/card_base.h | 585 ++++ drivers/misc/genwqe/card_ddcb.c | 1410 ++++++++++ drivers/misc/genwqe/card_ddcb.h | 188 ++ drivers/misc/genwqe/card_debugfs.c | 504 ++++ drivers/misc/genwqe/card_dev.c | 1412 ++++++++++ drivers/misc/genwqe/card_sysfs.c | 303 +++ drivers/misc/genwqe/card_utils.c | 1061 ++++++++ drivers/misc/genwqe/genwqe_driver.h | 77 + drivers/misc/hmc6352.c | 157 ++ drivers/misc/hpilo.c | 918 +++++++ drivers/misc/hpilo.h | 211 ++ drivers/misc/ibmasm/Makefile | 16 + drivers/misc/ibmasm/command.c | 187 ++ drivers/misc/ibmasm/dot_command.c | 152 ++ drivers/misc/ibmasm/dot_command.h | 78 + drivers/misc/ibmasm/event.c | 177 ++ drivers/misc/ibmasm/heartbeat.c | 101 + drivers/misc/ibmasm/i2o.h | 77 + drivers/misc/ibmasm/ibmasm.h | 223 ++ drivers/misc/ibmasm/ibmasmfs.c | 608 +++++ drivers/misc/ibmasm/lowlevel.c | 85 + drivers/misc/ibmasm/lowlevel.h | 137 + drivers/misc/ibmasm/module.c | 238 ++ drivers/misc/ibmasm/r_heartbeat.c | 99 + drivers/misc/ibmasm/remote.c | 282 ++ drivers/misc/ibmasm/remote.h | 270 ++ drivers/misc/ibmasm/uart.c | 72 + drivers/misc/ibmvmc.c | 2421 +++++++++++++++++ drivers/misc/ibmvmc.h | 209 ++ drivers/misc/ics932s401.c | 495 ++++ drivers/misc/ioc4.c | 500 ++++ drivers/misc/isl29003.c | 487 ++++ drivers/misc/isl29020.c | 238 ++ drivers/misc/kgdbts.c | 1193 +++++++++ drivers/misc/lattice-ecp3-config.c | 249 ++ drivers/misc/lis3lv02d/Kconfig | 37 + drivers/misc/lis3lv02d/Makefile | 7 + drivers/misc/lis3lv02d/lis3lv02d.c | 1274 +++++++++ drivers/misc/lis3lv02d/lis3lv02d.h | 332 +++ drivers/misc/lis3lv02d/lis3lv02d_i2c.c | 289 ++ drivers/misc/lis3lv02d/lis3lv02d_spi.c | 153 ++ drivers/misc/lkdtm/Makefile | 19 + drivers/misc/lkdtm/bugs.c | 257 ++ drivers/misc/lkdtm/core.c | 507 ++++ drivers/misc/lkdtm/heap.c | 148 + drivers/misc/lkdtm/lkdtm.h | 88 + drivers/misc/lkdtm/perms.c | 221 ++ drivers/misc/lkdtm/refcount.c | 401 +++ drivers/misc/lkdtm/rodata.c | 11 + drivers/misc/lkdtm/usercopy.c | 350 +++ drivers/misc/mei/Kconfig | 45 + drivers/misc/mei/Makefile | 25 + drivers/misc/mei/bus-fixup.c | 505 ++++ drivers/misc/mei/bus.c | 1153 ++++++++ drivers/misc/mei/client.c | 1844 +++++++++++++ drivers/misc/mei/client.h | 236 ++ drivers/misc/mei/debugfs.c | 288 ++ drivers/misc/mei/hbm.c | 1287 +++++++++ drivers/misc/mei/hbm.h | 62 + drivers/misc/mei/hw-me-regs.h | 245 ++ drivers/misc/mei/hw-me.c | 1500 +++++++++++ drivers/misc/mei/hw-me.h | 115 + drivers/misc/mei/hw-txe-regs.h | 294 ++ drivers/misc/mei/hw-txe.c | 1267 +++++++++ drivers/misc/mei/hw-txe.h | 75 + drivers/misc/mei/hw.h | 515 ++++ drivers/misc/mei/init.c | 406 +++ drivers/misc/mei/interrupt.c | 535 ++++ drivers/misc/mei/main.c | 1022 +++++++ drivers/misc/mei/mei-trace.c | 26 + drivers/misc/mei/mei-trace.h | 93 + drivers/misc/mei/mei_dev.h | 756 ++++++ drivers/misc/mei/pci-me.c | 534 ++++ drivers/misc/mei/pci-txe.c | 422 +++ drivers/misc/mic/Kconfig | 156 ++ drivers/misc/mic/Makefile | 12 + drivers/misc/mic/bus/Makefile | 8 + drivers/misc/mic/bus/cosm_bus.c | 141 + drivers/misc/mic/bus/cosm_bus.h | 136 + drivers/misc/mic/bus/mic_bus.c | 204 ++ drivers/misc/mic/bus/scif_bus.c | 209 ++ drivers/misc/mic/bus/scif_bus.h | 133 + drivers/misc/mic/bus/vop_bus.c | 205 ++ drivers/misc/mic/bus/vop_bus.h | 140 + drivers/misc/mic/card/Makefile | 11 + drivers/misc/mic/card/mic_debugfs.c | 130 + drivers/misc/mic/card/mic_device.c | 429 +++ drivers/misc/mic/card/mic_device.h | 149 ++ drivers/misc/mic/card/mic_x100.c | 359 +++ drivers/misc/mic/card/mic_x100.h | 49 + drivers/misc/mic/common/mic_dev.h | 67 + drivers/misc/mic/cosm/Makefile | 11 + drivers/misc/mic/cosm/cosm_debugfs.c | 156 ++ drivers/misc/mic/cosm/cosm_main.c | 393 +++ drivers/misc/mic/cosm/cosm_main.h | 73 + drivers/misc/mic/cosm/cosm_scif_server.c | 411 +++ drivers/misc/mic/cosm/cosm_sysfs.c | 461 ++++ drivers/misc/mic/cosm_client/Makefile | 7 + drivers/misc/mic/cosm_client/cosm_scif_client.c | 281 ++ drivers/misc/mic/host/Makefile | 12 + drivers/misc/mic/host/mic_boot.c | 599 +++++ drivers/misc/mic/host/mic_debugfs.c | 216 ++ drivers/misc/mic/host/mic_device.h | 169 ++ drivers/misc/mic/host/mic_intr.c | 645 +++++ drivers/misc/mic/host/mic_intr.h | 149 ++ drivers/misc/mic/host/mic_main.c | 347 +++ drivers/misc/mic/host/mic_smpt.c | 439 +++ drivers/misc/mic/host/mic_smpt.h | 99 + drivers/misc/mic/host/mic_x100.c | 584 ++++ drivers/misc/mic/host/mic_x100.h | 98 + drivers/misc/mic/scif/Makefile | 21 + drivers/misc/mic/scif/scif_api.c | 1496 +++++++++++ drivers/misc/mic/scif/scif_debugfs.c | 162 ++ drivers/misc/mic/scif/scif_dma.c | 1960 ++++++++++++++ drivers/misc/mic/scif/scif_epd.c | 357 +++ drivers/misc/mic/scif/scif_epd.h | 210 ++ drivers/misc/mic/scif/scif_fd.c | 471 ++++ drivers/misc/mic/scif/scif_fence.c | 772 ++++++ drivers/misc/mic/scif/scif_main.c | 359 +++ drivers/misc/mic/scif/scif_main.h | 283 ++ drivers/misc/mic/scif/scif_map.h | 136 + drivers/misc/mic/scif/scif_mmap.c | 699 +++++ drivers/misc/mic/scif/scif_nm.c | 237 ++ drivers/misc/mic/scif/scif_nodeqp.c | 1354 ++++++++++ drivers/misc/mic/scif/scif_nodeqp.h | 221 ++ drivers/misc/mic/scif/scif_peer_bus.c | 183 ++ drivers/misc/mic/scif/scif_peer_bus.h | 31 + drivers/misc/mic/scif/scif_ports.c | 124 + drivers/misc/mic/scif/scif_rb.c | 249 ++ drivers/misc/mic/scif/scif_rb.h | 100 + drivers/misc/mic/scif/scif_rma.c | 1775 ++++++++++++ drivers/misc/mic/scif/scif_rma.h | 464 ++++ drivers/misc/mic/scif/scif_rma_list.c | 291 ++ drivers/misc/mic/scif/scif_rma_list.h | 57 + drivers/misc/mic/vop/Makefile | 9 + drivers/misc/mic/vop/vop_debugfs.c | 232 ++ drivers/misc/mic/vop/vop_main.c | 766 ++++++ drivers/misc/mic/vop/vop_main.h | 170 ++ drivers/misc/mic/vop/vop_vringh.c | 1169 ++++++++ drivers/misc/ocxl/Kconfig | 31 + drivers/misc/ocxl/Makefile | 11 + drivers/misc/ocxl/afu_irq.c | 202 ++ drivers/misc/ocxl/config.c | 725 +++++ drivers/misc/ocxl/context.c | 282 ++ drivers/misc/ocxl/file.c | 541 ++++ drivers/misc/ocxl/link.c | 690 +++++ drivers/misc/ocxl/main.c | 33 + drivers/misc/ocxl/ocxl_internal.h | 132 + drivers/misc/ocxl/pasid.c | 107 + drivers/misc/ocxl/pci.c | 585 ++++ drivers/misc/ocxl/sysfs.c | 141 + drivers/misc/ocxl/trace.c | 6 + drivers/misc/ocxl/trace.h | 182 ++ drivers/misc/pch_phub.c | 915 +++++++ drivers/misc/pci_endpoint_test.c | 831 ++++++ drivers/misc/phantom.c | 571 ++++ drivers/misc/pti.c | 988 +++++++ drivers/misc/qcom-coincell.c | 153 ++ drivers/misc/sgi-gru/Makefile | 5 + drivers/misc/sgi-gru/gru.h | 78 + drivers/misc/sgi-gru/gru_instructions.h | 736 +++++ drivers/misc/sgi-gru/grufault.c | 907 +++++++ drivers/misc/sgi-gru/grufile.c | 623 +++++ drivers/misc/sgi-gru/gruhandles.c | 210 ++ drivers/misc/sgi-gru/gruhandles.h | 530 ++++ drivers/misc/sgi-gru/grukdump.c | 236 ++ drivers/misc/sgi-gru/grukservices.c | 1171 ++++++++ drivers/misc/sgi-gru/grukservices.h | 214 ++ drivers/misc/sgi-gru/grulib.h | 153 ++ drivers/misc/sgi-gru/grumain.c | 976 +++++++ drivers/misc/sgi-gru/gruprocfs.c | 324 +++ drivers/misc/sgi-gru/grutables.h | 679 +++++ drivers/misc/sgi-gru/grutlbpurge.c | 370 +++ drivers/misc/sgi-xp/Makefile | 20 + drivers/misc/sgi-xp/xp.h | 368 +++ drivers/misc/sgi-xp/xp_main.c | 264 ++ drivers/misc/sgi-xp/xp_nofault.S | 35 + drivers/misc/sgi-xp/xp_sn2.c | 190 ++ drivers/misc/sgi-xp/xp_uv.c | 171 ++ drivers/misc/sgi-xp/xpc.h | 1004 +++++++ drivers/misc/sgi-xp/xpc_channel.c | 1011 +++++++ drivers/misc/sgi-xp/xpc_main.c | 1373 ++++++++++ drivers/misc/sgi-xp/xpc_partition.c | 540 ++++ drivers/misc/sgi-xp/xpc_sn2.c | 2459 +++++++++++++++++ drivers/misc/sgi-xp/xpc_uv.c | 1813 +++++++++++++ drivers/misc/sgi-xp/xpnet.c | 596 +++++ drivers/misc/spear13xx_pcie_gadget.c | 797 ++++++ drivers/misc/sram-exec.c | 119 + drivers/misc/sram.c | 456 ++++ drivers/misc/sram.h | 58 + drivers/misc/ti-st/Kconfig | 18 + drivers/misc/ti-st/Makefile | 6 + drivers/misc/ti-st/st_core.c | 922 +++++++ drivers/misc/ti-st/st_kim.c | 868 ++++++ drivers/misc/ti-st/st_ll.c | 169 ++ drivers/misc/tifm_7xx1.c | 446 +++ drivers/misc/tifm_core.c | 371 +++ drivers/misc/tsl2550.c | 468 ++++ drivers/misc/vexpress-syscfg.c | 287 ++ drivers/misc/vmw_balloon.c | 1236 +++++++++ drivers/misc/vmw_vmci/Kconfig | 16 + drivers/misc/vmw_vmci/Makefile | 4 + drivers/misc/vmw_vmci/vmci_context.c | 1225 +++++++++ drivers/misc/vmw_vmci/vmci_context.h | 182 ++ drivers/misc/vmw_vmci/vmci_datagram.c | 502 ++++ drivers/misc/vmw_vmci/vmci_datagram.h | 52 + drivers/misc/vmw_vmci/vmci_doorbell.c | 609 +++++ drivers/misc/vmw_vmci/vmci_doorbell.h | 51 + drivers/misc/vmw_vmci/vmci_driver.c | 117 + drivers/misc/vmw_vmci/vmci_driver.h | 57 + drivers/misc/vmw_vmci/vmci_event.c | 225 ++ drivers/misc/vmw_vmci/vmci_event.h | 25 + drivers/misc/vmw_vmci/vmci_guest.c | 736 +++++ drivers/misc/vmw_vmci/vmci_handle_array.c | 154 ++ drivers/misc/vmw_vmci/vmci_handle_array.h | 61 + drivers/misc/vmw_vmci/vmci_host.c | 1036 +++++++ drivers/misc/vmw_vmci/vmci_queue_pair.c | 3272 +++++++++++++++++++++++ drivers/misc/vmw_vmci/vmci_queue_pair.h | 173 ++ drivers/misc/vmw_vmci/vmci_resource.c | 229 ++ drivers/misc/vmw_vmci/vmci_resource.h | 59 + drivers/misc/vmw_vmci/vmci_route.c | 226 ++ drivers/misc/vmw_vmci/vmci_route.h | 30 + 311 files changed, 133699 insertions(+) create mode 100644 drivers/misc/Kconfig create mode 100644 drivers/misc/Makefile create mode 100644 drivers/misc/ad525x_dpot-i2c.c create mode 100644 drivers/misc/ad525x_dpot-spi.c create mode 100644 drivers/misc/ad525x_dpot.c create mode 100644 drivers/misc/ad525x_dpot.h create mode 100644 drivers/misc/altera-stapl/Kconfig create mode 100644 drivers/misc/altera-stapl/Makefile create mode 100644 drivers/misc/altera-stapl/altera-comp.c create mode 100644 drivers/misc/altera-stapl/altera-exprt.h create mode 100644 drivers/misc/altera-stapl/altera-jtag.c create mode 100644 drivers/misc/altera-stapl/altera-jtag.h create mode 100644 drivers/misc/altera-stapl/altera-lpt.c create mode 100644 drivers/misc/altera-stapl/altera.c create mode 100644 drivers/misc/apds9802als.c create mode 100644 drivers/misc/apds990x.c create mode 100644 drivers/misc/aspeed-lpc-ctrl.c create mode 100644 drivers/misc/aspeed-lpc-snoop.c create mode 100644 drivers/misc/atmel-ssc.c create mode 100644 drivers/misc/atmel_tclib.c create mode 100644 drivers/misc/bh1770glc.c create mode 100644 drivers/misc/c2port/Kconfig create mode 100644 drivers/misc/c2port/Makefile create mode 100644 drivers/misc/c2port/c2port-duramar2150.c create mode 100644 drivers/misc/c2port/core.c create mode 100644 drivers/misc/cardreader/Kconfig create mode 100644 drivers/misc/cardreader/Makefile create mode 100644 drivers/misc/cardreader/rtl8411.c create mode 100644 drivers/misc/cardreader/rts5209.c create mode 100644 drivers/misc/cardreader/rts5227.c create mode 100644 drivers/misc/cardreader/rts5229.c create mode 100644 drivers/misc/cardreader/rts5249.c create mode 100644 drivers/misc/cardreader/rts5260.c create mode 100644 drivers/misc/cardreader/rts5260.h create mode 100644 drivers/misc/cardreader/rtsx_pcr.c create mode 100644 drivers/misc/cardreader/rtsx_pcr.h create mode 100644 drivers/misc/cardreader/rtsx_usb.c create mode 100644 drivers/misc/cb710/Kconfig create mode 100644 drivers/misc/cb710/Makefile create mode 100644 drivers/misc/cb710/core.c create mode 100644 drivers/misc/cb710/debug.c create mode 100644 drivers/misc/cb710/sgbuf2.c create mode 100644 drivers/misc/cs5535-mfgpt.c create mode 100644 drivers/misc/cxl/Kconfig create mode 100644 drivers/misc/cxl/Makefile create mode 100644 drivers/misc/cxl/api.c create mode 100644 drivers/misc/cxl/base.c create mode 100644 drivers/misc/cxl/context.c create mode 100644 drivers/misc/cxl/cxl.h create mode 100644 drivers/misc/cxl/cxllib.c create mode 100644 drivers/misc/cxl/debugfs.c create mode 100644 drivers/misc/cxl/fault.c create mode 100644 drivers/misc/cxl/file.c create mode 100644 drivers/misc/cxl/flash.c create mode 100644 drivers/misc/cxl/guest.c create mode 100644 drivers/misc/cxl/hcalls.c create mode 100644 drivers/misc/cxl/hcalls.h create mode 100644 drivers/misc/cxl/irq.c create mode 100644 drivers/misc/cxl/main.c create mode 100644 drivers/misc/cxl/native.c create mode 100644 drivers/misc/cxl/of.c create mode 100644 drivers/misc/cxl/pci.c create mode 100644 drivers/misc/cxl/sysfs.c create mode 100644 drivers/misc/cxl/trace.c create mode 100644 drivers/misc/cxl/trace.h create mode 100644 drivers/misc/cxl/vphb.c create mode 100644 drivers/misc/ds1682.c create mode 100644 drivers/misc/dummy-irq.c create mode 100644 drivers/misc/echo/Kconfig create mode 100644 drivers/misc/echo/Makefile create mode 100644 drivers/misc/echo/echo.c create mode 100644 drivers/misc/echo/echo.h create mode 100644 drivers/misc/echo/fir.h create mode 100644 drivers/misc/echo/oslec.h create mode 100644 drivers/misc/eeprom/Kconfig create mode 100644 drivers/misc/eeprom/Makefile create mode 100644 drivers/misc/eeprom/at24.c create mode 100644 drivers/misc/eeprom/at25.c create mode 100644 drivers/misc/eeprom/digsy_mtc_eeprom.c create mode 100644 drivers/misc/eeprom/eeprom.c create mode 100644 drivers/misc/eeprom/eeprom_93cx6.c create mode 100644 drivers/misc/eeprom/eeprom_93xx46.c create mode 100644 drivers/misc/eeprom/idt_89hpesx.c create mode 100644 drivers/misc/eeprom/max6875.c create mode 100644 drivers/misc/enclosure.c create mode 100644 drivers/misc/fsa9480.c create mode 100644 drivers/misc/genwqe/Kconfig create mode 100644 drivers/misc/genwqe/Makefile create mode 100644 drivers/misc/genwqe/card_base.c create mode 100644 drivers/misc/genwqe/card_base.h create mode 100644 drivers/misc/genwqe/card_ddcb.c create mode 100644 drivers/misc/genwqe/card_ddcb.h create mode 100644 drivers/misc/genwqe/card_debugfs.c create mode 100644 drivers/misc/genwqe/card_dev.c create mode 100644 drivers/misc/genwqe/card_sysfs.c create mode 100644 drivers/misc/genwqe/card_utils.c create mode 100644 drivers/misc/genwqe/genwqe_driver.h create mode 100644 drivers/misc/hmc6352.c create mode 100644 drivers/misc/hpilo.c create mode 100644 drivers/misc/hpilo.h create mode 100644 drivers/misc/ibmasm/Makefile create mode 100644 drivers/misc/ibmasm/command.c create mode 100644 drivers/misc/ibmasm/dot_command.c create mode 100644 drivers/misc/ibmasm/dot_command.h create mode 100644 drivers/misc/ibmasm/event.c create mode 100644 drivers/misc/ibmasm/heartbeat.c create mode 100644 drivers/misc/ibmasm/i2o.h create mode 100644 drivers/misc/ibmasm/ibmasm.h create mode 100644 drivers/misc/ibmasm/ibmasmfs.c create mode 100644 drivers/misc/ibmasm/lowlevel.c create mode 100644 drivers/misc/ibmasm/lowlevel.h create mode 100644 drivers/misc/ibmasm/module.c create mode 100644 drivers/misc/ibmasm/r_heartbeat.c create mode 100644 drivers/misc/ibmasm/remote.c create mode 100644 drivers/misc/ibmasm/remote.h create mode 100644 drivers/misc/ibmasm/uart.c create mode 100644 drivers/misc/ibmvmc.c create mode 100644 drivers/misc/ibmvmc.h create mode 100644 drivers/misc/ics932s401.c create mode 100644 drivers/misc/ioc4.c create mode 100644 drivers/misc/isl29003.c create mode 100644 drivers/misc/isl29020.c create mode 100644 drivers/misc/kgdbts.c create mode 100644 drivers/misc/lattice-ecp3-config.c create mode 100644 drivers/misc/lis3lv02d/Kconfig create mode 100644 drivers/misc/lis3lv02d/Makefile create mode 100644 drivers/misc/lis3lv02d/lis3lv02d.c create mode 100644 drivers/misc/lis3lv02d/lis3lv02d.h create mode 100644 drivers/misc/lis3lv02d/lis3lv02d_i2c.c create mode 100644 drivers/misc/lis3lv02d/lis3lv02d_spi.c create mode 100644 drivers/misc/lkdtm/Makefile create mode 100644 drivers/misc/lkdtm/bugs.c create mode 100644 drivers/misc/lkdtm/core.c create mode 100644 drivers/misc/lkdtm/heap.c create mode 100644 drivers/misc/lkdtm/lkdtm.h create mode 100644 drivers/misc/lkdtm/perms.c create mode 100644 drivers/misc/lkdtm/refcount.c create mode 100644 drivers/misc/lkdtm/rodata.c create mode 100644 drivers/misc/lkdtm/usercopy.c create mode 100644 drivers/misc/mei/Kconfig create mode 100644 drivers/misc/mei/Makefile create mode 100644 drivers/misc/mei/bus-fixup.c create mode 100644 drivers/misc/mei/bus.c create mode 100644 drivers/misc/mei/client.c create mode 100644 drivers/misc/mei/client.h create mode 100644 drivers/misc/mei/debugfs.c create mode 100644 drivers/misc/mei/hbm.c create mode 100644 drivers/misc/mei/hbm.h create mode 100644 drivers/misc/mei/hw-me-regs.h create mode 100644 drivers/misc/mei/hw-me.c create mode 100644 drivers/misc/mei/hw-me.h create mode 100644 drivers/misc/mei/hw-txe-regs.h create mode 100644 drivers/misc/mei/hw-txe.c create mode 100644 drivers/misc/mei/hw-txe.h create mode 100644 drivers/misc/mei/hw.h create mode 100644 drivers/misc/mei/init.c create mode 100644 drivers/misc/mei/interrupt.c create mode 100644 drivers/misc/mei/main.c create mode 100644 drivers/misc/mei/mei-trace.c create mode 100644 drivers/misc/mei/mei-trace.h create mode 100644 drivers/misc/mei/mei_dev.h create mode 100644 drivers/misc/mei/pci-me.c create mode 100644 drivers/misc/mei/pci-txe.c create mode 100644 drivers/misc/mic/Kconfig create mode 100644 drivers/misc/mic/Makefile create mode 100644 drivers/misc/mic/bus/Makefile create mode 100644 drivers/misc/mic/bus/cosm_bus.c create mode 100644 drivers/misc/mic/bus/cosm_bus.h create mode 100644 drivers/misc/mic/bus/mic_bus.c create mode 100644 drivers/misc/mic/bus/scif_bus.c create mode 100644 drivers/misc/mic/bus/scif_bus.h create mode 100644 drivers/misc/mic/bus/vop_bus.c create mode 100644 drivers/misc/mic/bus/vop_bus.h create mode 100644 drivers/misc/mic/card/Makefile create mode 100644 drivers/misc/mic/card/mic_debugfs.c create mode 100644 drivers/misc/mic/card/mic_device.c create mode 100644 drivers/misc/mic/card/mic_device.h create mode 100644 drivers/misc/mic/card/mic_x100.c create mode 100644 drivers/misc/mic/card/mic_x100.h create mode 100644 drivers/misc/mic/common/mic_dev.h create mode 100644 drivers/misc/mic/cosm/Makefile create mode 100644 drivers/misc/mic/cosm/cosm_debugfs.c create mode 100644 drivers/misc/mic/cosm/cosm_main.c create mode 100644 drivers/misc/mic/cosm/cosm_main.h create mode 100644 drivers/misc/mic/cosm/cosm_scif_server.c create mode 100644 drivers/misc/mic/cosm/cosm_sysfs.c create mode 100644 drivers/misc/mic/cosm_client/Makefile create mode 100644 drivers/misc/mic/cosm_client/cosm_scif_client.c create mode 100644 drivers/misc/mic/host/Makefile create mode 100644 drivers/misc/mic/host/mic_boot.c create mode 100644 drivers/misc/mic/host/mic_debugfs.c create mode 100644 drivers/misc/mic/host/mic_device.h create mode 100644 drivers/misc/mic/host/mic_intr.c create mode 100644 drivers/misc/mic/host/mic_intr.h create mode 100644 drivers/misc/mic/host/mic_main.c create mode 100644 drivers/misc/mic/host/mic_smpt.c create mode 100644 drivers/misc/mic/host/mic_smpt.h create mode 100644 drivers/misc/mic/host/mic_x100.c create mode 100644 drivers/misc/mic/host/mic_x100.h create mode 100644 drivers/misc/mic/scif/Makefile create mode 100644 drivers/misc/mic/scif/scif_api.c create mode 100644 drivers/misc/mic/scif/scif_debugfs.c create mode 100644 drivers/misc/mic/scif/scif_dma.c create mode 100644 drivers/misc/mic/scif/scif_epd.c create mode 100644 drivers/misc/mic/scif/scif_epd.h create mode 100644 drivers/misc/mic/scif/scif_fd.c create mode 100644 drivers/misc/mic/scif/scif_fence.c create mode 100644 drivers/misc/mic/scif/scif_main.c create mode 100644 drivers/misc/mic/scif/scif_main.h create mode 100644 drivers/misc/mic/scif/scif_map.h create mode 100644 drivers/misc/mic/scif/scif_mmap.c create mode 100644 drivers/misc/mic/scif/scif_nm.c create mode 100644 drivers/misc/mic/scif/scif_nodeqp.c create mode 100644 drivers/misc/mic/scif/scif_nodeqp.h create mode 100644 drivers/misc/mic/scif/scif_peer_bus.c create mode 100644 drivers/misc/mic/scif/scif_peer_bus.h create mode 100644 drivers/misc/mic/scif/scif_ports.c create mode 100644 drivers/misc/mic/scif/scif_rb.c create mode 100644 drivers/misc/mic/scif/scif_rb.h create mode 100644 drivers/misc/mic/scif/scif_rma.c create mode 100644 drivers/misc/mic/scif/scif_rma.h create mode 100644 drivers/misc/mic/scif/scif_rma_list.c create mode 100644 drivers/misc/mic/scif/scif_rma_list.h create mode 100644 drivers/misc/mic/vop/Makefile create mode 100644 drivers/misc/mic/vop/vop_debugfs.c create mode 100644 drivers/misc/mic/vop/vop_main.c create mode 100644 drivers/misc/mic/vop/vop_main.h create mode 100644 drivers/misc/mic/vop/vop_vringh.c create mode 100644 drivers/misc/ocxl/Kconfig create mode 100644 drivers/misc/ocxl/Makefile create mode 100644 drivers/misc/ocxl/afu_irq.c create mode 100644 drivers/misc/ocxl/config.c create mode 100644 drivers/misc/ocxl/context.c create mode 100644 drivers/misc/ocxl/file.c create mode 100644 drivers/misc/ocxl/link.c create mode 100644 drivers/misc/ocxl/main.c create mode 100644 drivers/misc/ocxl/ocxl_internal.h create mode 100644 drivers/misc/ocxl/pasid.c create mode 100644 drivers/misc/ocxl/pci.c create mode 100644 drivers/misc/ocxl/sysfs.c create mode 100644 drivers/misc/ocxl/trace.c create mode 100644 drivers/misc/ocxl/trace.h create mode 100644 drivers/misc/pch_phub.c create mode 100644 drivers/misc/pci_endpoint_test.c create mode 100644 drivers/misc/phantom.c create mode 100644 drivers/misc/pti.c create mode 100644 drivers/misc/qcom-coincell.c create mode 100644 drivers/misc/sgi-gru/Makefile create mode 100644 drivers/misc/sgi-gru/gru.h create mode 100644 drivers/misc/sgi-gru/gru_instructions.h create mode 100644 drivers/misc/sgi-gru/grufault.c create mode 100644 drivers/misc/sgi-gru/grufile.c create mode 100644 drivers/misc/sgi-gru/gruhandles.c create mode 100644 drivers/misc/sgi-gru/gruhandles.h create mode 100644 drivers/misc/sgi-gru/grukdump.c create mode 100644 drivers/misc/sgi-gru/grukservices.c create mode 100644 drivers/misc/sgi-gru/grukservices.h create mode 100644 drivers/misc/sgi-gru/grulib.h create mode 100644 drivers/misc/sgi-gru/grumain.c create mode 100644 drivers/misc/sgi-gru/gruprocfs.c create mode 100644 drivers/misc/sgi-gru/grutables.h create mode 100644 drivers/misc/sgi-gru/grutlbpurge.c create mode 100644 drivers/misc/sgi-xp/Makefile create mode 100644 drivers/misc/sgi-xp/xp.h create mode 100644 drivers/misc/sgi-xp/xp_main.c create mode 100644 drivers/misc/sgi-xp/xp_nofault.S create mode 100644 drivers/misc/sgi-xp/xp_sn2.c create mode 100644 drivers/misc/sgi-xp/xp_uv.c create mode 100644 drivers/misc/sgi-xp/xpc.h create mode 100644 drivers/misc/sgi-xp/xpc_channel.c create mode 100644 drivers/misc/sgi-xp/xpc_main.c create mode 100644 drivers/misc/sgi-xp/xpc_partition.c create mode 100644 drivers/misc/sgi-xp/xpc_sn2.c create mode 100644 drivers/misc/sgi-xp/xpc_uv.c create mode 100644 drivers/misc/sgi-xp/xpnet.c create mode 100644 drivers/misc/spear13xx_pcie_gadget.c create mode 100644 drivers/misc/sram-exec.c create mode 100644 drivers/misc/sram.c create mode 100644 drivers/misc/sram.h create mode 100644 drivers/misc/ti-st/Kconfig create mode 100644 drivers/misc/ti-st/Makefile create mode 100644 drivers/misc/ti-st/st_core.c create mode 100644 drivers/misc/ti-st/st_kim.c create mode 100644 drivers/misc/ti-st/st_ll.c create mode 100644 drivers/misc/tifm_7xx1.c create mode 100644 drivers/misc/tifm_core.c create mode 100644 drivers/misc/tsl2550.c create mode 100644 drivers/misc/vexpress-syscfg.c create mode 100644 drivers/misc/vmw_balloon.c create mode 100644 drivers/misc/vmw_vmci/Kconfig create mode 100644 drivers/misc/vmw_vmci/Makefile create mode 100644 drivers/misc/vmw_vmci/vmci_context.c create mode 100644 drivers/misc/vmw_vmci/vmci_context.h create mode 100644 drivers/misc/vmw_vmci/vmci_datagram.c create mode 100644 drivers/misc/vmw_vmci/vmci_datagram.h create mode 100644 drivers/misc/vmw_vmci/vmci_doorbell.c create mode 100644 drivers/misc/vmw_vmci/vmci_doorbell.h create mode 100644 drivers/misc/vmw_vmci/vmci_driver.c create mode 100644 drivers/misc/vmw_vmci/vmci_driver.h create mode 100644 drivers/misc/vmw_vmci/vmci_event.c create mode 100644 drivers/misc/vmw_vmci/vmci_event.h create mode 100644 drivers/misc/vmw_vmci/vmci_guest.c create mode 100644 drivers/misc/vmw_vmci/vmci_handle_array.c create mode 100644 drivers/misc/vmw_vmci/vmci_handle_array.h create mode 100644 drivers/misc/vmw_vmci/vmci_host.c create mode 100644 drivers/misc/vmw_vmci/vmci_queue_pair.c create mode 100644 drivers/misc/vmw_vmci/vmci_queue_pair.h create mode 100644 drivers/misc/vmw_vmci/vmci_resource.c create mode 100644 drivers/misc/vmw_vmci/vmci_resource.h create mode 100644 drivers/misc/vmw_vmci/vmci_route.c create mode 100644 drivers/misc/vmw_vmci/vmci_route.h (limited to 'drivers/misc') diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig new file mode 100644 index 000000000..3726eacdf --- /dev/null +++ b/drivers/misc/Kconfig @@ -0,0 +1,530 @@ +# +# Misc strange devices +# + +menu "Misc devices" + +config SENSORS_LIS3LV02D + tristate + depends on INPUT + select INPUT_POLLDEV + default n + +config AD525X_DPOT + tristate "Analog Devices Digital Potentiometers" + depends on (I2C || SPI) && SYSFS + help + If you say yes here, you get support for the Analog Devices + AD5258, AD5259, AD5251, AD5252, AD5253, AD5254, AD5255 + AD5160, AD5161, AD5162, AD5165, AD5200, AD5201, AD5203, + AD5204, AD5206, AD5207, AD5231, AD5232, AD5233, AD5235, + AD5260, AD5262, AD5263, AD5290, AD5291, AD5292, AD5293, + AD7376, AD8400, AD8402, AD8403, ADN2850, AD5241, AD5242, + AD5243, AD5245, AD5246, AD5247, AD5248, AD5280, AD5282, + ADN2860, AD5273, AD5171, AD5170, AD5172, AD5173, AD5270, + AD5271, AD5272, AD5274 + digital potentiometer chips. + + See Documentation/misc-devices/ad525x_dpot.txt for the + userspace interface. + + This driver can also be built as a module. If so, the module + will be called ad525x_dpot. + +config AD525X_DPOT_I2C + tristate "support I2C bus connection" + depends on AD525X_DPOT && I2C + help + Say Y here if you have a digital potentiometers hooked to an I2C bus. + + To compile this driver as a module, choose M here: the + module will be called ad525x_dpot-i2c. + +config AD525X_DPOT_SPI + tristate "support SPI bus connection" + depends on AD525X_DPOT && SPI_MASTER + help + Say Y here if you have a digital potentiometers hooked to an SPI bus. + + If unsure, say N (but it's safe to say "Y"). + + To compile this driver as a module, choose M here: the + module will be called ad525x_dpot-spi. + +config ATMEL_TCLIB + bool "Atmel AT32/AT91 Timer/Counter Library" + depends on ARCH_AT91 + help + Select this if you want a library to allocate the Timer/Counter + blocks found on many Atmel processors. This facilitates using + these blocks by different drivers despite processor differences. + +config ATMEL_TCB_CLKSRC + bool "TC Block Clocksource" + depends on ATMEL_TCLIB + default y + help + Select this to get a high precision clocksource based on a + TC block with a 5+ MHz base clock rate. Two timer channels + are combined to make a single 32-bit timer. + + When GENERIC_CLOCKEVENTS is defined, the third timer channel + may be used as a clock event device supporting oneshot mode + (delays of up to two seconds) based on the 32 KiHz clock. + +config ATMEL_TCB_CLKSRC_BLOCK + int + depends on ATMEL_TCB_CLKSRC + default 0 + range 0 1 + help + Some chips provide more than one TC block, so you have the + choice of which one to use for the clock framework. The other + TC can be used for other purposes, such as PWM generation and + interval timing. + +config DUMMY_IRQ + tristate "Dummy IRQ handler" + default n + ---help--- + This module accepts a single 'irq' parameter, which it should register for. + The sole purpose of this module is to help with debugging of systems on + which spurious IRQs would happen on disabled IRQ vector. + +config IBM_ASM + tristate "Device driver for IBM RSA service processor" + depends on X86 && PCI && INPUT + depends on SERIAL_8250 || SERIAL_8250=n + ---help--- + This option enables device driver support for in-band access to the + IBM RSA (Condor) service processor in eServer xSeries systems. + The ibmasm device driver allows user space application to access + ASM (Advanced Systems Management) functions on the service + processor. The driver is meant to be used in conjunction with + a user space API. + The ibmasm driver also enables the OS to use the UART on the + service processor board as a regular serial port. To make use of + this feature serial driver support (CONFIG_SERIAL_8250) must be + enabled. + + WARNING: This software may not be supported or function + correctly on your IBM server. Please consult the IBM ServerProven + website + for information on the specific driver level and support statement + for your IBM server. + +config IBMVMC + tristate "IBM Virtual Management Channel support" + depends on PPC_PSERIES + help + This is the IBM POWER Virtual Management Channel + + This driver is to be used for the POWER Virtual + Management Channel virtual adapter on the PowerVM + platform. It provides both request/response and + async message support through the /dev/ibmvmc node. + + To compile this driver as a module, choose M here: the + module will be called ibmvmc. + +config PHANTOM + tristate "Sensable PHANToM (PCI)" + depends on PCI + help + Say Y here if you want to build a driver for Sensable PHANToM device. + + This driver is only for PCI PHANToMs. + + If you choose to build module, its name will be phantom. If unsure, + say N here. + +config INTEL_MID_PTI + tristate "Parallel Trace Interface for MIPI P1149.7 cJTAG standard" + depends on PCI && TTY && (X86_INTEL_MID || COMPILE_TEST) + default n + help + The PTI (Parallel Trace Interface) driver directs + trace data routed from various parts in the system out + through an Intel Penwell PTI port and out of the mobile + device for analysis with a debugging tool (Lauterbach or Fido). + + You should select this driver if the target kernel is meant for + an Intel Atom (non-netbook) mobile device containing a MIPI + P1149.7 standard implementation. + +config SGI_IOC4 + tristate "SGI IOC4 Base IO support" + depends on PCI + ---help--- + This option enables basic support for the IOC4 chip on certain + SGI IO controller cards (IO9, IO10, and PCI-RT). This option + does not enable any specific functions on such a card, but provides + necessary infrastructure for other drivers to utilize. + + If you have an SGI Altix with an IOC4-based card say Y. + Otherwise say N. + +config TIFM_CORE + tristate "TI Flash Media interface support" + depends on PCI + help + If you want support for Texas Instruments(R) Flash Media adapters + you should select this option and then also choose an appropriate + host adapter, such as 'TI Flash Media PCI74xx/PCI76xx host adapter + support', if you have a TI PCI74xx compatible card reader, for + example. + You will also have to select some flash card format drivers. MMC/SD + cards are supported via 'MMC/SD Card support: TI Flash Media MMC/SD + Interface support (MMC_TIFM_SD)'. + + To compile this driver as a module, choose M here: the module will + be called tifm_core. + +config TIFM_7XX1 + tristate "TI Flash Media PCI74xx/PCI76xx host adapter support" + depends on PCI && TIFM_CORE + default TIFM_CORE + help + This option enables support for Texas Instruments(R) PCI74xx and + PCI76xx families of Flash Media adapters, found in many laptops. + To make actual use of the device, you will have to select some + flash card format drivers, as outlined in the TIFM_CORE Help. + + To compile this driver as a module, choose M here: the module will + be called tifm_7xx1. + +config ICS932S401 + tristate "Integrated Circuits ICS932S401" + depends on I2C + help + If you say yes here you get support for the Integrated Circuits + ICS932S401 clock control chips. + + This driver can also be built as a module. If so, the module + will be called ics932s401. + +config ATMEL_SSC + tristate "Device driver for Atmel SSC peripheral" + depends on HAS_IOMEM && (ARCH_AT91 || COMPILE_TEST) + ---help--- + This option enables device driver support for Atmel Synchronized + Serial Communication peripheral (SSC). + + The SSC peripheral supports a wide variety of serial frame based + communications, i.e. I2S, SPI, etc. + + If unsure, say N. + +config ENCLOSURE_SERVICES + tristate "Enclosure Services" + default n + help + Provides support for intelligent enclosures (bays which + contain storage devices). You also need either a host + driver (SCSI/ATA) which supports enclosures + or a SCSI enclosure device (SES) to use these services. + +config SGI_XP + tristate "Support communication between SGI SSIs" + depends on NET + depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP + select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 + select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 + select SGI_GRU if X86_64 && SMP + ---help--- + An SGI machine can be divided into multiple Single System + Images which act independently of each other and have + hardware based memory protection from the others. Enabling + this feature will allow for direct communication between SSIs + based on a network adapter and DMA messaging. + +config CS5535_MFGPT + tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support" + depends on MFD_CS5535 + default n + help + This driver provides access to MFGPT functionality for other + drivers that need timers. MFGPTs are available in the CS5535 and + CS5536 companion chips that are found in AMD Geode and several + other platforms. They have a better resolution and max interval + than the generic PIT, and are suitable for use as high-res timers. + You probably don't want to enable this manually; other drivers that + make use of it should enable it. + +config CS5535_MFGPT_DEFAULT_IRQ + int + depends on CS5535_MFGPT + default 7 + help + MFGPTs on the CS5535 require an interrupt. The selected IRQ + can be overridden as a module option as well as by driver that + use the cs5535_mfgpt_ API; however, different architectures might + want to use a different IRQ by default. This is here for + architectures to set as necessary. + +config CS5535_CLOCK_EVENT_SRC + tristate "CS5535/CS5536 high-res timer (MFGPT) events" + depends on GENERIC_CLOCKEVENTS && CS5535_MFGPT + help + This driver provides a clock event source based on the MFGPT + timer(s) in the CS5535 and CS5536 companion chips. + MFGPTs have a better resolution and max interval than the + generic PIT, and are suitable for use as high-res timers. + +config HP_ILO + tristate "Channel interface driver for the HP iLO processor" + depends on PCI + default n + help + The channel interface driver allows applications to communicate + with iLO management processors present on HP ProLiant servers. + Upon loading, the driver creates /dev/hpilo/dXccbN files, which + can be used to gather data from the management processor, via + read and write system calls. + + To compile this driver as a module, choose M here: the + module will be called hpilo. + +config QCOM_COINCELL + tristate "Qualcomm coincell charger support" + depends on MFD_SPMI_PMIC || COMPILE_TEST + help + This driver supports the coincell block found inside of + Qualcomm PMICs. The coincell charger provides a means to + charge a coincell battery or backup capacitor which is used + to maintain PMIC register and RTC state in the absence of + external power. + +config SGI_GRU + tristate "SGI GRU driver" + depends on X86_UV && SMP + default n + select MMU_NOTIFIER + ---help--- + The GRU is a hardware resource located in the system chipset. The GRU + contains memory that can be mmapped into the user address space. This memory is + used to communicate with the GRU to perform functions such as load/store, + scatter/gather, bcopy, AMOs, etc. The GRU is directly accessed by user + instructions using user virtual addresses. GRU instructions (ex., bcopy) use + user virtual addresses for operands. + + If you are not running on a SGI UV system, say N. + +config SGI_GRU_DEBUG + bool "SGI GRU driver debug" + depends on SGI_GRU + default n + ---help--- + This option enables additional debugging code for the SGI GRU driver. + If you are unsure, say N. + +config APDS9802ALS + tristate "Medfield Avago APDS9802 ALS Sensor module" + depends on I2C + help + If you say yes here you get support for the ALS APDS9802 ambient + light sensor. + + This driver can also be built as a module. If so, the module + will be called apds9802als. + +config ISL29003 + tristate "Intersil ISL29003 ambient light sensor" + depends on I2C && SYSFS + help + If you say yes here you get support for the Intersil ISL29003 + ambient light sensor. + + This driver can also be built as a module. If so, the module + will be called isl29003. + +config ISL29020 + tristate "Intersil ISL29020 ambient light sensor" + depends on I2C + help + If you say yes here you get support for the Intersil ISL29020 + ambient light sensor. + + This driver can also be built as a module. If so, the module + will be called isl29020. + +config SENSORS_TSL2550 + tristate "Taos TSL2550 ambient light sensor" + depends on I2C && SYSFS + help + If you say yes here you get support for the Taos TSL2550 + ambient light sensor. + + This driver can also be built as a module. If so, the module + will be called tsl2550. + +config SENSORS_BH1770 + tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor" + depends on I2C + ---help--- + Say Y here if you want to build a driver for BH1770GLC (ROHM) or + SFH7770 (Osram) combined ambient light and proximity sensor chip. + + To compile this driver as a module, choose M here: the + module will be called bh1770glc. If unsure, say N here. + +config SENSORS_APDS990X + tristate "APDS990X combined als and proximity sensors" + depends on I2C + default n + ---help--- + Say Y here if you want to build a driver for Avago APDS990x + combined ambient light and proximity sensor chip. + + To compile this driver as a module, choose M here: the + module will be called apds990x. If unsure, say N here. + +config HMC6352 + tristate "Honeywell HMC6352 compass" + depends on I2C + help + This driver provides support for the Honeywell HMC6352 compass, + providing configuration and heading data via sysfs. + +config DS1682 + tristate "Dallas DS1682 Total Elapsed Time Recorder with Alarm" + depends on I2C + help + If you say yes here you get support for Dallas Semiconductor + DS1682 Total Elapsed Time Recorder. + + This driver can also be built as a module. If so, the module + will be called ds1682. + +config SPEAR13XX_PCIE_GADGET + bool "PCIe gadget support for SPEAr13XX platform" + depends on ARCH_SPEAR13XX && BROKEN + default n + help + This option enables gadget support for PCIe controller. If + board file defines any controller as PCIe endpoint then a sysfs + entry will be created for that controller. User can use these + sysfs node to configure PCIe EP as per his requirements. + +config VMWARE_BALLOON + tristate "VMware Balloon Driver" + depends on VMWARE_VMCI && X86 && HYPERVISOR_GUEST + help + This is VMware physical memory management driver which acts + like a "balloon" that can be inflated to reclaim physical pages + by reserving them in the guest and invalidating them in the + monitor, freeing up the underlying machine pages so they can + be allocated to other guests. The balloon can also be deflated + to allow the guest to use more physical memory. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called vmw_balloon. + +config PCH_PHUB + tristate "Intel EG20T PCH/LAPIS Semicon IOH(ML7213/ML7223/ML7831) PHUB" + select GENERIC_NET_UTILS + depends on PCI && (X86_32 || MIPS || COMPILE_TEST) + help + This driver is for PCH(Platform controller Hub) PHUB(Packet Hub) of + Intel Topcliff which is an IOH(Input/Output Hub) for x86 embedded + processor. The Topcliff has MAC address and Option ROM data in SROM. + This driver can access MAC address and Option ROM data in SROM. + + This driver also can be used for LAPIS Semiconductor's IOH, + ML7213/ML7223/ML7831. + ML7213 which is for IVI(In-Vehicle Infotainment) use. + ML7223 IOH is for MP(Media Phone) use. + ML7831 IOH is for general purpose use. + ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series. + ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH. + + To compile this driver as a module, choose M here: the module will + be called pch_phub. + +config USB_SWITCH_FSA9480 + tristate "FSA9480 USB Switch" + depends on I2C + help + The FSA9480 is a USB port accessory detector and switch. + The FSA9480 is fully controlled using I2C and enables USB data, + stereo and mono audio, video, microphone and UART data to use + a common connector port. + +config LATTICE_ECP3_CONFIG + tristate "Lattice ECP3 FPGA bitstream configuration via SPI" + depends on SPI && SYSFS + select FW_LOADER + default n + help + This option enables support for bitstream configuration (programming + or loading) of the Lattice ECP3 FPGA family via SPI. + + If unsure, say N. + +config SRAM + bool "Generic on-chip SRAM driver" + depends on HAS_IOMEM + select GENERIC_ALLOCATOR + select SRAM_EXEC if ARM + help + This driver allows you to declare a memory region to be managed by + the genalloc API. It is supposed to be used for small on-chip SRAM + areas found on many SoCs. + +config SRAM_EXEC + bool + +config VEXPRESS_SYSCFG + bool "Versatile Express System Configuration driver" + depends on VEXPRESS_CONFIG + default y + help + ARM Ltd. Versatile Express uses specialised platform configuration + bus. System Configuration interface is one of the possible means + of generating transactions on this bus. + +config ASPEED_LPC_CTRL + depends on (ARCH_ASPEED || COMPILE_TEST) && REGMAP && MFD_SYSCON + tristate "Aspeed ast2400/2500 HOST LPC to BMC bridge control" + ---help--- + Control Aspeed ast2400/2500 HOST LPC to BMC mappings through + ioctl()s, the driver also provides a read/write interface to a BMC ram + region where the host LPC read/write region can be buffered. + +config ASPEED_LPC_SNOOP + tristate "Aspeed ast2500 HOST LPC snoop support" + depends on (ARCH_ASPEED || COMPILE_TEST) && REGMAP && MFD_SYSCON + help + Provides a driver to control the LPC snoop interface which + allows the BMC to listen on and save the data written by + the host to an arbitrary LPC I/O port. + +config PCI_ENDPOINT_TEST + depends on PCI + select CRC32 + tristate "PCI Endpoint Test driver" + ---help--- + Enable this configuration option to enable the host side test driver + for PCI Endpoint. + +config MISC_RTSX + tristate + default MISC_RTSX_PCI || MISC_RTSX_USB + +source "drivers/misc/c2port/Kconfig" +source "drivers/misc/eeprom/Kconfig" +source "drivers/misc/cb710/Kconfig" +source "drivers/misc/ti-st/Kconfig" +source "drivers/misc/lis3lv02d/Kconfig" +source "drivers/misc/altera-stapl/Kconfig" +source "drivers/misc/mei/Kconfig" +source "drivers/misc/vmw_vmci/Kconfig" +source "drivers/misc/mic/Kconfig" +source "drivers/misc/genwqe/Kconfig" +source "drivers/misc/echo/Kconfig" +source "drivers/misc/cxl/Kconfig" +source "drivers/misc/ocxl/Kconfig" +source "drivers/misc/cardreader/Kconfig" +endmenu diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile new file mode 100644 index 000000000..af22bbc3d --- /dev/null +++ b/drivers/misc/Makefile @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for misc devices that really don't fit anywhere else. +# + +obj-$(CONFIG_IBM_ASM) += ibmasm/ +obj-$(CONFIG_IBMVMC) += ibmvmc.o +obj-$(CONFIG_AD525X_DPOT) += ad525x_dpot.o +obj-$(CONFIG_AD525X_DPOT_I2C) += ad525x_dpot-i2c.o +obj-$(CONFIG_AD525X_DPOT_SPI) += ad525x_dpot-spi.o +obj-$(CONFIG_INTEL_MID_PTI) += pti.o +obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o +obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o +obj-$(CONFIG_DUMMY_IRQ) += dummy-irq.o +obj-$(CONFIG_ICS932S401) += ics932s401.o +obj-$(CONFIG_LKDTM) += lkdtm/ +obj-$(CONFIG_TIFM_CORE) += tifm_core.o +obj-$(CONFIG_TIFM_7XX1) += tifm_7xx1.o +obj-$(CONFIG_PHANTOM) += phantom.o +obj-$(CONFIG_QCOM_COINCELL) += qcom-coincell.o +obj-$(CONFIG_SENSORS_BH1770) += bh1770glc.o +obj-$(CONFIG_SENSORS_APDS990X) += apds990x.o +obj-$(CONFIG_SGI_IOC4) += ioc4.o +obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o +obj-$(CONFIG_KGDB_TESTS) += kgdbts.o +obj-$(CONFIG_SGI_XP) += sgi-xp/ +obj-$(CONFIG_SGI_GRU) += sgi-gru/ +obj-$(CONFIG_CS5535_MFGPT) += cs5535-mfgpt.o +obj-$(CONFIG_HP_ILO) += hpilo.o +obj-$(CONFIG_APDS9802ALS) += apds9802als.o +obj-$(CONFIG_ISL29003) += isl29003.o +obj-$(CONFIG_ISL29020) += isl29020.o +obj-$(CONFIG_SENSORS_TSL2550) += tsl2550.o +obj-$(CONFIG_DS1682) += ds1682.o +obj-$(CONFIG_C2PORT) += c2port/ +obj-$(CONFIG_HMC6352) += hmc6352.o +obj-y += eeprom/ +obj-y += cb710/ +obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o +obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o +obj-$(CONFIG_PCH_PHUB) += pch_phub.o +obj-y += ti-st/ +obj-y += lis3lv02d/ +obj-$(CONFIG_USB_SWITCH_FSA9480) += fsa9480.o +obj-$(CONFIG_ALTERA_STAPL) +=altera-stapl/ +obj-$(CONFIG_INTEL_MEI) += mei/ +obj-$(CONFIG_VMWARE_VMCI) += vmw_vmci/ +obj-$(CONFIG_LATTICE_ECP3_CONFIG) += lattice-ecp3-config.o +obj-$(CONFIG_SRAM) += sram.o +obj-$(CONFIG_SRAM_EXEC) += sram-exec.o +obj-y += mic/ +obj-$(CONFIG_GENWQE) += genwqe/ +obj-$(CONFIG_ECHO) += echo/ +obj-$(CONFIG_VEXPRESS_SYSCFG) += vexpress-syscfg.o +obj-$(CONFIG_CXL_BASE) += cxl/ +obj-$(CONFIG_ASPEED_LPC_CTRL) += aspeed-lpc-ctrl.o +obj-$(CONFIG_ASPEED_LPC_SNOOP) += aspeed-lpc-snoop.o +obj-$(CONFIG_PCI_ENDPOINT_TEST) += pci_endpoint_test.o +obj-$(CONFIG_OCXL) += ocxl/ +obj-$(CONFIG_MISC_RTSX) += cardreader/ diff --git a/drivers/misc/ad525x_dpot-i2c.c b/drivers/misc/ad525x_dpot-i2c.c new file mode 100644 index 000000000..4f832002d --- /dev/null +++ b/drivers/misc/ad525x_dpot-i2c.c @@ -0,0 +1,119 @@ +/* + * Driver for the Analog Devices digital potentiometers (I2C bus) + * + * Copyright (C) 2010-2011 Michael Hennerich, Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#include +#include + +#include "ad525x_dpot.h" + +/* I2C bus functions */ +static int write_d8(void *client, u8 val) +{ + return i2c_smbus_write_byte(client, val); +} + +static int write_r8d8(void *client, u8 reg, u8 val) +{ + return i2c_smbus_write_byte_data(client, reg, val); +} + +static int write_r8d16(void *client, u8 reg, u16 val) +{ + return i2c_smbus_write_word_data(client, reg, val); +} + +static int read_d8(void *client) +{ + return i2c_smbus_read_byte(client); +} + +static int read_r8d8(void *client, u8 reg) +{ + return i2c_smbus_read_byte_data(client, reg); +} + +static int read_r8d16(void *client, u8 reg) +{ + return i2c_smbus_read_word_data(client, reg); +} + +static const struct ad_dpot_bus_ops bops = { + .read_d8 = read_d8, + .read_r8d8 = read_r8d8, + .read_r8d16 = read_r8d16, + .write_d8 = write_d8, + .write_r8d8 = write_r8d8, + .write_r8d16 = write_r8d16, +}; + +static int ad_dpot_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct ad_dpot_bus_data bdata = { + .client = client, + .bops = &bops, + }; + + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_WORD_DATA)) { + dev_err(&client->dev, "SMBUS Word Data not Supported\n"); + return -EIO; + } + + return ad_dpot_probe(&client->dev, &bdata, id->driver_data, id->name); +} + +static int ad_dpot_i2c_remove(struct i2c_client *client) +{ + return ad_dpot_remove(&client->dev); +} + +static const struct i2c_device_id ad_dpot_id[] = { + {"ad5258", AD5258_ID}, + {"ad5259", AD5259_ID}, + {"ad5251", AD5251_ID}, + {"ad5252", AD5252_ID}, + {"ad5253", AD5253_ID}, + {"ad5254", AD5254_ID}, + {"ad5255", AD5255_ID}, + {"ad5241", AD5241_ID}, + {"ad5242", AD5242_ID}, + {"ad5243", AD5243_ID}, + {"ad5245", AD5245_ID}, + {"ad5246", AD5246_ID}, + {"ad5247", AD5247_ID}, + {"ad5248", AD5248_ID}, + {"ad5280", AD5280_ID}, + {"ad5282", AD5282_ID}, + {"adn2860", ADN2860_ID}, + {"ad5273", AD5273_ID}, + {"ad5161", AD5161_ID}, + {"ad5171", AD5171_ID}, + {"ad5170", AD5170_ID}, + {"ad5172", AD5172_ID}, + {"ad5173", AD5173_ID}, + {"ad5272", AD5272_ID}, + {"ad5274", AD5274_ID}, + {} +}; +MODULE_DEVICE_TABLE(i2c, ad_dpot_id); + +static struct i2c_driver ad_dpot_i2c_driver = { + .driver = { + .name = "ad_dpot", + }, + .probe = ad_dpot_i2c_probe, + .remove = ad_dpot_i2c_remove, + .id_table = ad_dpot_id, +}; + +module_i2c_driver(ad_dpot_i2c_driver); + +MODULE_AUTHOR("Michael Hennerich "); +MODULE_DESCRIPTION("digital potentiometer I2C bus driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/ad525x_dpot-spi.c b/drivers/misc/ad525x_dpot-spi.c new file mode 100644 index 000000000..39a7f517e --- /dev/null +++ b/drivers/misc/ad525x_dpot-spi.c @@ -0,0 +1,146 @@ +/* + * Driver for the Analog Devices digital potentiometers (SPI bus) + * + * Copyright (C) 2010-2011 Michael Hennerich, Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#include +#include + +#include "ad525x_dpot.h" + +/* SPI bus functions */ +static int write8(void *client, u8 val) +{ + u8 data = val; + + return spi_write(client, &data, 1); +} + +static int write16(void *client, u8 reg, u8 val) +{ + u8 data[2] = {reg, val}; + + return spi_write(client, data, 2); +} + +static int write24(void *client, u8 reg, u16 val) +{ + u8 data[3] = {reg, val >> 8, val}; + + return spi_write(client, data, 3); +} + +static int read8(void *client) +{ + int ret; + u8 data; + + ret = spi_read(client, &data, 1); + if (ret < 0) + return ret; + + return data; +} + +static int read16(void *client, u8 reg) +{ + int ret; + u8 buf_rx[2]; + + write16(client, reg, 0); + ret = spi_read(client, buf_rx, 2); + if (ret < 0) + return ret; + + return (buf_rx[0] << 8) | buf_rx[1]; +} + +static int read24(void *client, u8 reg) +{ + int ret; + u8 buf_rx[3]; + + write24(client, reg, 0); + ret = spi_read(client, buf_rx, 3); + if (ret < 0) + return ret; + + return (buf_rx[1] << 8) | buf_rx[2]; +} + +static const struct ad_dpot_bus_ops bops = { + .read_d8 = read8, + .read_r8d8 = read16, + .read_r8d16 = read24, + .write_d8 = write8, + .write_r8d8 = write16, + .write_r8d16 = write24, +}; +static int ad_dpot_spi_probe(struct spi_device *spi) +{ + struct ad_dpot_bus_data bdata = { + .client = spi, + .bops = &bops, + }; + + return ad_dpot_probe(&spi->dev, &bdata, + spi_get_device_id(spi)->driver_data, + spi_get_device_id(spi)->name); +} + +static int ad_dpot_spi_remove(struct spi_device *spi) +{ + return ad_dpot_remove(&spi->dev); +} + +static const struct spi_device_id ad_dpot_spi_id[] = { + {"ad5160", AD5160_ID}, + {"ad5161", AD5161_ID}, + {"ad5162", AD5162_ID}, + {"ad5165", AD5165_ID}, + {"ad5200", AD5200_ID}, + {"ad5201", AD5201_ID}, + {"ad5203", AD5203_ID}, + {"ad5204", AD5204_ID}, + {"ad5206", AD5206_ID}, + {"ad5207", AD5207_ID}, + {"ad5231", AD5231_ID}, + {"ad5232", AD5232_ID}, + {"ad5233", AD5233_ID}, + {"ad5235", AD5235_ID}, + {"ad5260", AD5260_ID}, + {"ad5262", AD5262_ID}, + {"ad5263", AD5263_ID}, + {"ad5290", AD5290_ID}, + {"ad5291", AD5291_ID}, + {"ad5292", AD5292_ID}, + {"ad5293", AD5293_ID}, + {"ad7376", AD7376_ID}, + {"ad8400", AD8400_ID}, + {"ad8402", AD8402_ID}, + {"ad8403", AD8403_ID}, + {"adn2850", ADN2850_ID}, + {"ad5270", AD5270_ID}, + {"ad5271", AD5271_ID}, + {} +}; +MODULE_DEVICE_TABLE(spi, ad_dpot_spi_id); + +static struct spi_driver ad_dpot_spi_driver = { + .driver = { + .name = "ad_dpot", + }, + .probe = ad_dpot_spi_probe, + .remove = ad_dpot_spi_remove, + .id_table = ad_dpot_spi_id, +}; + +module_spi_driver(ad_dpot_spi_driver); + +MODULE_AUTHOR("Michael Hennerich "); +MODULE_DESCRIPTION("digital potentiometer SPI bus driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("spi:ad_dpot"); diff --git a/drivers/misc/ad525x_dpot.c b/drivers/misc/ad525x_dpot.c new file mode 100644 index 000000000..bc591b716 --- /dev/null +++ b/drivers/misc/ad525x_dpot.c @@ -0,0 +1,765 @@ +/* + * ad525x_dpot: Driver for the Analog Devices digital potentiometers + * Copyright (c) 2009-2010 Analog Devices, Inc. + * Author: Michael Hennerich + * + * DEVID #Wipers #Positions Resistor Options (kOhm) + * AD5258 1 64 1, 10, 50, 100 + * AD5259 1 256 5, 10, 50, 100 + * AD5251 2 64 1, 10, 50, 100 + * AD5252 2 256 1, 10, 50, 100 + * AD5255 3 512 25, 250 + * AD5253 4 64 1, 10, 50, 100 + * AD5254 4 256 1, 10, 50, 100 + * AD5160 1 256 5, 10, 50, 100 + * AD5161 1 256 5, 10, 50, 100 + * AD5162 2 256 2.5, 10, 50, 100 + * AD5165 1 256 100 + * AD5200 1 256 10, 50 + * AD5201 1 33 10, 50 + * AD5203 4 64 10, 100 + * AD5204 4 256 10, 50, 100 + * AD5206 6 256 10, 50, 100 + * AD5207 2 256 10, 50, 100 + * AD5231 1 1024 10, 50, 100 + * AD5232 2 256 10, 50, 100 + * AD5233 4 64 10, 50, 100 + * AD5235 2 1024 25, 250 + * AD5260 1 256 20, 50, 200 + * AD5262 2 256 20, 50, 200 + * AD5263 4 256 20, 50, 200 + * AD5290 1 256 10, 50, 100 + * AD5291 1 256 20, 50, 100 (20-TP) + * AD5292 1 1024 20, 50, 100 (20-TP) + * AD5293 1 1024 20, 50, 100 + * AD7376 1 128 10, 50, 100, 1M + * AD8400 1 256 1, 10, 50, 100 + * AD8402 2 256 1, 10, 50, 100 + * AD8403 4 256 1, 10, 50, 100 + * ADN2850 3 512 25, 250 + * AD5241 1 256 10, 100, 1M + * AD5246 1 128 5, 10, 50, 100 + * AD5247 1 128 5, 10, 50, 100 + * AD5245 1 256 5, 10, 50, 100 + * AD5243 2 256 2.5, 10, 50, 100 + * AD5248 2 256 2.5, 10, 50, 100 + * AD5242 2 256 20, 50, 200 + * AD5280 1 256 20, 50, 200 + * AD5282 2 256 20, 50, 200 + * ADN2860 3 512 25, 250 + * AD5273 1 64 1, 10, 50, 100 (OTP) + * AD5171 1 64 5, 10, 50, 100 (OTP) + * AD5170 1 256 2.5, 10, 50, 100 (OTP) + * AD5172 2 256 2.5, 10, 50, 100 (OTP) + * AD5173 2 256 2.5, 10, 50, 100 (OTP) + * AD5270 1 1024 20, 50, 100 (50-TP) + * AD5271 1 256 20, 50, 100 (50-TP) + * AD5272 1 1024 20, 50, 100 (50-TP) + * AD5274 1 256 20, 50, 100 (50-TP) + * + * See Documentation/misc-devices/ad525x_dpot.txt for more info. + * + * derived from ad5258.c + * Copyright (c) 2009 Cyber Switching, Inc. + * Author: Chris Verges + * + * derived from ad5252.c + * Copyright (c) 2006-2011 Michael Hennerich + * + * Licensed under the GPL-2 or later. + */ + +#include +#include +#include +#include +#include + +#include "ad525x_dpot.h" + +/* + * Client data (each client gets its own) + */ + +struct dpot_data { + struct ad_dpot_bus_data bdata; + struct mutex update_lock; + unsigned int rdac_mask; + unsigned int max_pos; + unsigned long devid; + unsigned int uid; + unsigned int feat; + unsigned int wipers; + u16 rdac_cache[MAX_RDACS]; + DECLARE_BITMAP(otp_en_mask, MAX_RDACS); +}; + +static inline int dpot_read_d8(struct dpot_data *dpot) +{ + return dpot->bdata.bops->read_d8(dpot->bdata.client); +} + +static inline int dpot_read_r8d8(struct dpot_data *dpot, u8 reg) +{ + return dpot->bdata.bops->read_r8d8(dpot->bdata.client, reg); +} + +static inline int dpot_read_r8d16(struct dpot_data *dpot, u8 reg) +{ + return dpot->bdata.bops->read_r8d16(dpot->bdata.client, reg); +} + +static inline int dpot_write_d8(struct dpot_data *dpot, u8 val) +{ + return dpot->bdata.bops->write_d8(dpot->bdata.client, val); +} + +static inline int dpot_write_r8d8(struct dpot_data *dpot, u8 reg, u16 val) +{ + return dpot->bdata.bops->write_r8d8(dpot->bdata.client, reg, val); +} + +static inline int dpot_write_r8d16(struct dpot_data *dpot, u8 reg, u16 val) +{ + return dpot->bdata.bops->write_r8d16(dpot->bdata.client, reg, val); +} + +static s32 dpot_read_spi(struct dpot_data *dpot, u8 reg) +{ + unsigned int ctrl = 0; + int value; + + if (!(reg & (DPOT_ADDR_EEPROM | DPOT_ADDR_CMD))) { + + if (dpot->feat & F_RDACS_WONLY) + return dpot->rdac_cache[reg & DPOT_RDAC_MASK]; + if (dpot->uid == DPOT_UID(AD5291_ID) || + dpot->uid == DPOT_UID(AD5292_ID) || + dpot->uid == DPOT_UID(AD5293_ID)) { + + value = dpot_read_r8d8(dpot, + DPOT_AD5291_READ_RDAC << 2); + + if (dpot->uid == DPOT_UID(AD5291_ID)) + value = value >> 2; + + return value; + } else if (dpot->uid == DPOT_UID(AD5270_ID) || + dpot->uid == DPOT_UID(AD5271_ID)) { + + value = dpot_read_r8d8(dpot, + DPOT_AD5270_1_2_4_READ_RDAC << 2); + + if (value < 0) + return value; + + if (dpot->uid == DPOT_UID(AD5271_ID)) + value = value >> 2; + + return value; + } + + ctrl = DPOT_SPI_READ_RDAC; + } else if (reg & DPOT_ADDR_EEPROM) { + ctrl = DPOT_SPI_READ_EEPROM; + } + + if (dpot->feat & F_SPI_16BIT) + return dpot_read_r8d8(dpot, ctrl); + else if (dpot->feat & F_SPI_24BIT) + return dpot_read_r8d16(dpot, ctrl); + + return -EFAULT; +} + +static s32 dpot_read_i2c(struct dpot_data *dpot, u8 reg) +{ + int value; + unsigned int ctrl = 0; + + switch (dpot->uid) { + case DPOT_UID(AD5246_ID): + case DPOT_UID(AD5247_ID): + return dpot_read_d8(dpot); + case DPOT_UID(AD5245_ID): + case DPOT_UID(AD5241_ID): + case DPOT_UID(AD5242_ID): + case DPOT_UID(AD5243_ID): + case DPOT_UID(AD5248_ID): + case DPOT_UID(AD5280_ID): + case DPOT_UID(AD5282_ID): + ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ? + 0 : DPOT_AD5282_RDAC_AB; + return dpot_read_r8d8(dpot, ctrl); + case DPOT_UID(AD5170_ID): + case DPOT_UID(AD5171_ID): + case DPOT_UID(AD5273_ID): + return dpot_read_d8(dpot); + case DPOT_UID(AD5172_ID): + case DPOT_UID(AD5173_ID): + ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ? + 0 : DPOT_AD5172_3_A0; + return dpot_read_r8d8(dpot, ctrl); + case DPOT_UID(AD5272_ID): + case DPOT_UID(AD5274_ID): + dpot_write_r8d8(dpot, + (DPOT_AD5270_1_2_4_READ_RDAC << 2), 0); + + value = dpot_read_r8d16(dpot, + DPOT_AD5270_1_2_4_RDAC << 2); + + if (value < 0) + return value; + /* + * AD5272/AD5274 returns high byte first, however + * underling smbus expects low byte first. + */ + value = swab16(value); + + if (dpot->uid == DPOT_UID(AD5274_ID)) + value = value >> 2; + return value; + default: + if ((reg & DPOT_REG_TOL) || (dpot->max_pos > 256)) + return dpot_read_r8d16(dpot, (reg & 0xF8) | + ((reg & 0x7) << 1)); + else + return dpot_read_r8d8(dpot, reg); + } +} + +static s32 dpot_read(struct dpot_data *dpot, u8 reg) +{ + if (dpot->feat & F_SPI) + return dpot_read_spi(dpot, reg); + else + return dpot_read_i2c(dpot, reg); +} + +static s32 dpot_write_spi(struct dpot_data *dpot, u8 reg, u16 value) +{ + unsigned int val = 0; + + if (!(reg & (DPOT_ADDR_EEPROM | DPOT_ADDR_CMD | DPOT_ADDR_OTP))) { + if (dpot->feat & F_RDACS_WONLY) + dpot->rdac_cache[reg & DPOT_RDAC_MASK] = value; + + if (dpot->feat & F_AD_APPDATA) { + if (dpot->feat & F_SPI_8BIT) { + val = ((reg & DPOT_RDAC_MASK) << + DPOT_MAX_POS(dpot->devid)) | + value; + return dpot_write_d8(dpot, val); + } else if (dpot->feat & F_SPI_16BIT) { + val = ((reg & DPOT_RDAC_MASK) << + DPOT_MAX_POS(dpot->devid)) | + value; + return dpot_write_r8d8(dpot, val >> 8, + val & 0xFF); + } else + BUG(); + } else { + if (dpot->uid == DPOT_UID(AD5291_ID) || + dpot->uid == DPOT_UID(AD5292_ID) || + dpot->uid == DPOT_UID(AD5293_ID)) { + + dpot_write_r8d8(dpot, DPOT_AD5291_CTRLREG << 2, + DPOT_AD5291_UNLOCK_CMD); + + if (dpot->uid == DPOT_UID(AD5291_ID)) + value = value << 2; + + return dpot_write_r8d8(dpot, + (DPOT_AD5291_RDAC << 2) | + (value >> 8), value & 0xFF); + } else if (dpot->uid == DPOT_UID(AD5270_ID) || + dpot->uid == DPOT_UID(AD5271_ID)) { + dpot_write_r8d8(dpot, + DPOT_AD5270_1_2_4_CTRLREG << 2, + DPOT_AD5270_1_2_4_UNLOCK_CMD); + + if (dpot->uid == DPOT_UID(AD5271_ID)) + value = value << 2; + + return dpot_write_r8d8(dpot, + (DPOT_AD5270_1_2_4_RDAC << 2) | + (value >> 8), value & 0xFF); + } + val = DPOT_SPI_RDAC | (reg & DPOT_RDAC_MASK); + } + } else if (reg & DPOT_ADDR_EEPROM) { + val = DPOT_SPI_EEPROM | (reg & DPOT_RDAC_MASK); + } else if (reg & DPOT_ADDR_CMD) { + switch (reg) { + case DPOT_DEC_ALL_6DB: + val = DPOT_SPI_DEC_ALL_6DB; + break; + case DPOT_INC_ALL_6DB: + val = DPOT_SPI_INC_ALL_6DB; + break; + case DPOT_DEC_ALL: + val = DPOT_SPI_DEC_ALL; + break; + case DPOT_INC_ALL: + val = DPOT_SPI_INC_ALL; + break; + } + } else if (reg & DPOT_ADDR_OTP) { + if (dpot->uid == DPOT_UID(AD5291_ID) || + dpot->uid == DPOT_UID(AD5292_ID)) { + return dpot_write_r8d8(dpot, + DPOT_AD5291_STORE_XTPM << 2, 0); + } else if (dpot->uid == DPOT_UID(AD5270_ID) || + dpot->uid == DPOT_UID(AD5271_ID)) { + return dpot_write_r8d8(dpot, + DPOT_AD5270_1_2_4_STORE_XTPM << 2, 0); + } + } else + BUG(); + + if (dpot->feat & F_SPI_16BIT) + return dpot_write_r8d8(dpot, val, value); + else if (dpot->feat & F_SPI_24BIT) + return dpot_write_r8d16(dpot, val, value); + + return -EFAULT; +} + +static s32 dpot_write_i2c(struct dpot_data *dpot, u8 reg, u16 value) +{ + /* Only write the instruction byte for certain commands */ + unsigned int tmp = 0, ctrl = 0; + + switch (dpot->uid) { + case DPOT_UID(AD5246_ID): + case DPOT_UID(AD5247_ID): + return dpot_write_d8(dpot, value); + + case DPOT_UID(AD5245_ID): + case DPOT_UID(AD5241_ID): + case DPOT_UID(AD5242_ID): + case DPOT_UID(AD5243_ID): + case DPOT_UID(AD5248_ID): + case DPOT_UID(AD5280_ID): + case DPOT_UID(AD5282_ID): + ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ? + 0 : DPOT_AD5282_RDAC_AB; + return dpot_write_r8d8(dpot, ctrl, value); + case DPOT_UID(AD5171_ID): + case DPOT_UID(AD5273_ID): + if (reg & DPOT_ADDR_OTP) { + tmp = dpot_read_d8(dpot); + if (tmp >> 6) /* Ready to Program? */ + return -EFAULT; + ctrl = DPOT_AD5273_FUSE; + } + return dpot_write_r8d8(dpot, ctrl, value); + case DPOT_UID(AD5172_ID): + case DPOT_UID(AD5173_ID): + ctrl = ((reg & DPOT_RDAC_MASK) == DPOT_RDAC0) ? + 0 : DPOT_AD5172_3_A0; + if (reg & DPOT_ADDR_OTP) { + tmp = dpot_read_r8d16(dpot, ctrl); + if (tmp >> 14) /* Ready to Program? */ + return -EFAULT; + ctrl |= DPOT_AD5170_2_3_FUSE; + } + return dpot_write_r8d8(dpot, ctrl, value); + case DPOT_UID(AD5170_ID): + if (reg & DPOT_ADDR_OTP) { + tmp = dpot_read_r8d16(dpot, tmp); + if (tmp >> 14) /* Ready to Program? */ + return -EFAULT; + ctrl = DPOT_AD5170_2_3_FUSE; + } + return dpot_write_r8d8(dpot, ctrl, value); + case DPOT_UID(AD5272_ID): + case DPOT_UID(AD5274_ID): + dpot_write_r8d8(dpot, DPOT_AD5270_1_2_4_CTRLREG << 2, + DPOT_AD5270_1_2_4_UNLOCK_CMD); + + if (reg & DPOT_ADDR_OTP) + return dpot_write_r8d8(dpot, + DPOT_AD5270_1_2_4_STORE_XTPM << 2, 0); + + if (dpot->uid == DPOT_UID(AD5274_ID)) + value = value << 2; + + return dpot_write_r8d8(dpot, (DPOT_AD5270_1_2_4_RDAC << 2) | + (value >> 8), value & 0xFF); + default: + if (reg & DPOT_ADDR_CMD) + return dpot_write_d8(dpot, reg); + + if (dpot->max_pos > 256) + return dpot_write_r8d16(dpot, (reg & 0xF8) | + ((reg & 0x7) << 1), value); + else + /* All other registers require instruction + data bytes */ + return dpot_write_r8d8(dpot, reg, value); + } +} + +static s32 dpot_write(struct dpot_data *dpot, u8 reg, u16 value) +{ + if (dpot->feat & F_SPI) + return dpot_write_spi(dpot, reg, value); + else + return dpot_write_i2c(dpot, reg, value); +} + +/* sysfs functions */ + +static ssize_t sysfs_show_reg(struct device *dev, + struct device_attribute *attr, + char *buf, u32 reg) +{ + struct dpot_data *data = dev_get_drvdata(dev); + s32 value; + + if (reg & DPOT_ADDR_OTP_EN) + return sprintf(buf, "%s\n", + test_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask) ? + "enabled" : "disabled"); + + + mutex_lock(&data->update_lock); + value = dpot_read(data, reg); + mutex_unlock(&data->update_lock); + + if (value < 0) + return -EINVAL; + /* + * Let someone else deal with converting this ... + * the tolerance is a two-byte value where the MSB + * is a sign + integer value, and the LSB is a + * decimal value. See page 18 of the AD5258 + * datasheet (Rev. A) for more details. + */ + + if (reg & DPOT_REG_TOL) + return sprintf(buf, "0x%04x\n", value & 0xFFFF); + else + return sprintf(buf, "%u\n", value & data->rdac_mask); +} + +static ssize_t sysfs_set_reg(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count, u32 reg) +{ + struct dpot_data *data = dev_get_drvdata(dev); + unsigned long value; + int err; + + if (reg & DPOT_ADDR_OTP_EN) { + if (sysfs_streq(buf, "enabled")) + set_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask); + else + clear_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask); + + return count; + } + + if ((reg & DPOT_ADDR_OTP) && + !test_bit(DPOT_RDAC_MASK & reg, data->otp_en_mask)) + return -EPERM; + + err = kstrtoul(buf, 10, &value); + if (err) + return err; + + if (value > data->rdac_mask) + value = data->rdac_mask; + + mutex_lock(&data->update_lock); + dpot_write(data, reg, value); + if (reg & DPOT_ADDR_EEPROM) + msleep(26); /* Sleep while the EEPROM updates */ + else if (reg & DPOT_ADDR_OTP) + msleep(400); /* Sleep while the OTP updates */ + mutex_unlock(&data->update_lock); + + return count; +} + +static ssize_t sysfs_do_cmd(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count, u32 reg) +{ + struct dpot_data *data = dev_get_drvdata(dev); + + mutex_lock(&data->update_lock); + dpot_write(data, reg, 0); + mutex_unlock(&data->update_lock); + + return count; +} + +/* ------------------------------------------------------------------------- */ + +#define DPOT_DEVICE_SHOW(_name, _reg) static ssize_t \ +show_##_name(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + return sysfs_show_reg(dev, attr, buf, _reg); \ +} + +#define DPOT_DEVICE_SET(_name, _reg) static ssize_t \ +set_##_name(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t count) \ +{ \ + return sysfs_set_reg(dev, attr, buf, count, _reg); \ +} + +#define DPOT_DEVICE_SHOW_SET(name, reg) \ +DPOT_DEVICE_SHOW(name, reg) \ +DPOT_DEVICE_SET(name, reg) \ +static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, show_##name, set_##name) + +#define DPOT_DEVICE_SHOW_ONLY(name, reg) \ +DPOT_DEVICE_SHOW(name, reg) \ +static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, show_##name, NULL) + +DPOT_DEVICE_SHOW_SET(rdac0, DPOT_ADDR_RDAC | DPOT_RDAC0); +DPOT_DEVICE_SHOW_SET(eeprom0, DPOT_ADDR_EEPROM | DPOT_RDAC0); +DPOT_DEVICE_SHOW_ONLY(tolerance0, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC0); +DPOT_DEVICE_SHOW_SET(otp0, DPOT_ADDR_OTP | DPOT_RDAC0); +DPOT_DEVICE_SHOW_SET(otp0en, DPOT_ADDR_OTP_EN | DPOT_RDAC0); + +DPOT_DEVICE_SHOW_SET(rdac1, DPOT_ADDR_RDAC | DPOT_RDAC1); +DPOT_DEVICE_SHOW_SET(eeprom1, DPOT_ADDR_EEPROM | DPOT_RDAC1); +DPOT_DEVICE_SHOW_ONLY(tolerance1, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC1); +DPOT_DEVICE_SHOW_SET(otp1, DPOT_ADDR_OTP | DPOT_RDAC1); +DPOT_DEVICE_SHOW_SET(otp1en, DPOT_ADDR_OTP_EN | DPOT_RDAC1); + +DPOT_DEVICE_SHOW_SET(rdac2, DPOT_ADDR_RDAC | DPOT_RDAC2); +DPOT_DEVICE_SHOW_SET(eeprom2, DPOT_ADDR_EEPROM | DPOT_RDAC2); +DPOT_DEVICE_SHOW_ONLY(tolerance2, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC2); +DPOT_DEVICE_SHOW_SET(otp2, DPOT_ADDR_OTP | DPOT_RDAC2); +DPOT_DEVICE_SHOW_SET(otp2en, DPOT_ADDR_OTP_EN | DPOT_RDAC2); + +DPOT_DEVICE_SHOW_SET(rdac3, DPOT_ADDR_RDAC | DPOT_RDAC3); +DPOT_DEVICE_SHOW_SET(eeprom3, DPOT_ADDR_EEPROM | DPOT_RDAC3); +DPOT_DEVICE_SHOW_ONLY(tolerance3, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC3); +DPOT_DEVICE_SHOW_SET(otp3, DPOT_ADDR_OTP | DPOT_RDAC3); +DPOT_DEVICE_SHOW_SET(otp3en, DPOT_ADDR_OTP_EN | DPOT_RDAC3); + +DPOT_DEVICE_SHOW_SET(rdac4, DPOT_ADDR_RDAC | DPOT_RDAC4); +DPOT_DEVICE_SHOW_SET(eeprom4, DPOT_ADDR_EEPROM | DPOT_RDAC4); +DPOT_DEVICE_SHOW_ONLY(tolerance4, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC4); +DPOT_DEVICE_SHOW_SET(otp4, DPOT_ADDR_OTP | DPOT_RDAC4); +DPOT_DEVICE_SHOW_SET(otp4en, DPOT_ADDR_OTP_EN | DPOT_RDAC4); + +DPOT_DEVICE_SHOW_SET(rdac5, DPOT_ADDR_RDAC | DPOT_RDAC5); +DPOT_DEVICE_SHOW_SET(eeprom5, DPOT_ADDR_EEPROM | DPOT_RDAC5); +DPOT_DEVICE_SHOW_ONLY(tolerance5, DPOT_ADDR_EEPROM | DPOT_TOL_RDAC5); +DPOT_DEVICE_SHOW_SET(otp5, DPOT_ADDR_OTP | DPOT_RDAC5); +DPOT_DEVICE_SHOW_SET(otp5en, DPOT_ADDR_OTP_EN | DPOT_RDAC5); + +static const struct attribute *dpot_attrib_wipers[] = { + &dev_attr_rdac0.attr, + &dev_attr_rdac1.attr, + &dev_attr_rdac2.attr, + &dev_attr_rdac3.attr, + &dev_attr_rdac4.attr, + &dev_attr_rdac5.attr, + NULL +}; + +static const struct attribute *dpot_attrib_eeprom[] = { + &dev_attr_eeprom0.attr, + &dev_attr_eeprom1.attr, + &dev_attr_eeprom2.attr, + &dev_attr_eeprom3.attr, + &dev_attr_eeprom4.attr, + &dev_attr_eeprom5.attr, + NULL +}; + +static const struct attribute *dpot_attrib_otp[] = { + &dev_attr_otp0.attr, + &dev_attr_otp1.attr, + &dev_attr_otp2.attr, + &dev_attr_otp3.attr, + &dev_attr_otp4.attr, + &dev_attr_otp5.attr, + NULL +}; + +static const struct attribute *dpot_attrib_otp_en[] = { + &dev_attr_otp0en.attr, + &dev_attr_otp1en.attr, + &dev_attr_otp2en.attr, + &dev_attr_otp3en.attr, + &dev_attr_otp4en.attr, + &dev_attr_otp5en.attr, + NULL +}; + +static const struct attribute *dpot_attrib_tolerance[] = { + &dev_attr_tolerance0.attr, + &dev_attr_tolerance1.attr, + &dev_attr_tolerance2.attr, + &dev_attr_tolerance3.attr, + &dev_attr_tolerance4.attr, + &dev_attr_tolerance5.attr, + NULL +}; + +/* ------------------------------------------------------------------------- */ + +#define DPOT_DEVICE_DO_CMD(_name, _cmd) static ssize_t \ +set_##_name(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t count) \ +{ \ + return sysfs_do_cmd(dev, attr, buf, count, _cmd); \ +} \ +static DEVICE_ATTR(_name, S_IWUSR | S_IRUGO, NULL, set_##_name) + +DPOT_DEVICE_DO_CMD(inc_all, DPOT_INC_ALL); +DPOT_DEVICE_DO_CMD(dec_all, DPOT_DEC_ALL); +DPOT_DEVICE_DO_CMD(inc_all_6db, DPOT_INC_ALL_6DB); +DPOT_DEVICE_DO_CMD(dec_all_6db, DPOT_DEC_ALL_6DB); + +static struct attribute *ad525x_attributes_commands[] = { + &dev_attr_inc_all.attr, + &dev_attr_dec_all.attr, + &dev_attr_inc_all_6db.attr, + &dev_attr_dec_all_6db.attr, + NULL +}; + +static const struct attribute_group ad525x_group_commands = { + .attrs = ad525x_attributes_commands, +}; + +static int ad_dpot_add_files(struct device *dev, + unsigned int features, unsigned int rdac) +{ + int err = sysfs_create_file(&dev->kobj, + dpot_attrib_wipers[rdac]); + if (features & F_CMD_EEP) + err |= sysfs_create_file(&dev->kobj, + dpot_attrib_eeprom[rdac]); + if (features & F_CMD_TOL) + err |= sysfs_create_file(&dev->kobj, + dpot_attrib_tolerance[rdac]); + if (features & F_CMD_OTP) { + err |= sysfs_create_file(&dev->kobj, + dpot_attrib_otp_en[rdac]); + err |= sysfs_create_file(&dev->kobj, + dpot_attrib_otp[rdac]); + } + + if (err) + dev_err(dev, "failed to register sysfs hooks for RDAC%d\n", + rdac); + + return err; +} + +static inline void ad_dpot_remove_files(struct device *dev, + unsigned int features, unsigned int rdac) +{ + sysfs_remove_file(&dev->kobj, + dpot_attrib_wipers[rdac]); + if (features & F_CMD_EEP) + sysfs_remove_file(&dev->kobj, + dpot_attrib_eeprom[rdac]); + if (features & F_CMD_TOL) + sysfs_remove_file(&dev->kobj, + dpot_attrib_tolerance[rdac]); + if (features & F_CMD_OTP) { + sysfs_remove_file(&dev->kobj, + dpot_attrib_otp_en[rdac]); + sysfs_remove_file(&dev->kobj, + dpot_attrib_otp[rdac]); + } +} + +int ad_dpot_probe(struct device *dev, + struct ad_dpot_bus_data *bdata, unsigned long devid, + const char *name) +{ + + struct dpot_data *data; + int i, err = 0; + + data = kzalloc(sizeof(struct dpot_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + dev_set_drvdata(dev, data); + mutex_init(&data->update_lock); + + data->bdata = *bdata; + data->devid = devid; + + data->max_pos = 1 << DPOT_MAX_POS(devid); + data->rdac_mask = data->max_pos - 1; + data->feat = DPOT_FEAT(devid); + data->uid = DPOT_UID(devid); + data->wipers = DPOT_WIPERS(devid); + + for (i = DPOT_RDAC0; i < MAX_RDACS; i++) + if (data->wipers & (1 << i)) { + err = ad_dpot_add_files(dev, data->feat, i); + if (err) + goto exit_remove_files; + /* power-up midscale */ + if (data->feat & F_RDACS_WONLY) + data->rdac_cache[i] = data->max_pos / 2; + } + + if (data->feat & F_CMD_INC) + err = sysfs_create_group(&dev->kobj, &ad525x_group_commands); + + if (err) { + dev_err(dev, "failed to register sysfs hooks\n"); + goto exit_free; + } + + dev_info(dev, "%s %d-Position Digital Potentiometer registered\n", + name, data->max_pos); + + return 0; + +exit_remove_files: + for (i = DPOT_RDAC0; i < MAX_RDACS; i++) + if (data->wipers & (1 << i)) + ad_dpot_remove_files(dev, data->feat, i); + +exit_free: + kfree(data); + dev_set_drvdata(dev, NULL); +exit: + dev_err(dev, "failed to create client for %s ID 0x%lX\n", + name, devid); + return err; +} +EXPORT_SYMBOL(ad_dpot_probe); + +int ad_dpot_remove(struct device *dev) +{ + struct dpot_data *data = dev_get_drvdata(dev); + int i; + + for (i = DPOT_RDAC0; i < MAX_RDACS; i++) + if (data->wipers & (1 << i)) + ad_dpot_remove_files(dev, data->feat, i); + + kfree(data); + + return 0; +} +EXPORT_SYMBOL(ad_dpot_remove); + + +MODULE_AUTHOR("Chris Verges , " + "Michael Hennerich "); +MODULE_DESCRIPTION("Digital potentiometer driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/ad525x_dpot.h b/drivers/misc/ad525x_dpot.h new file mode 100644 index 000000000..443a51fd5 --- /dev/null +++ b/drivers/misc/ad525x_dpot.h @@ -0,0 +1,215 @@ +/* + * Driver for the Analog Devices digital potentiometers + * + * Copyright (C) 2010 Michael Hennerich, Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#ifndef _AD_DPOT_H_ +#define _AD_DPOT_H_ + +#include + +#define DPOT_CONF(features, wipers, max_pos, uid) \ + (((features) << 18) | (((wipers) & 0xFF) << 10) | \ + ((max_pos & 0xF) << 6) | (uid & 0x3F)) + +#define DPOT_UID(conf) (conf & 0x3F) +#define DPOT_MAX_POS(conf) ((conf >> 6) & 0xF) +#define DPOT_WIPERS(conf) ((conf >> 10) & 0xFF) +#define DPOT_FEAT(conf) (conf >> 18) + +#define BRDAC0 (1 << 0) +#define BRDAC1 (1 << 1) +#define BRDAC2 (1 << 2) +#define BRDAC3 (1 << 3) +#define BRDAC4 (1 << 4) +#define BRDAC5 (1 << 5) +#define MAX_RDACS 6 + +#define F_CMD_INC (1 << 0) /* Features INC/DEC ALL, 6dB */ +#define F_CMD_EEP (1 << 1) /* Features EEPROM */ +#define F_CMD_OTP (1 << 2) /* Features OTP */ +#define F_CMD_TOL (1 << 3) /* RDACS feature Tolerance REG */ +#define F_RDACS_RW (1 << 4) /* RDACS are Read/Write */ +#define F_RDACS_WONLY (1 << 5) /* RDACS are Write only */ +#define F_AD_APPDATA (1 << 6) /* RDAC Address append to data */ +#define F_SPI_8BIT (1 << 7) /* All SPI XFERS are 8-bit */ +#define F_SPI_16BIT (1 << 8) /* All SPI XFERS are 16-bit */ +#define F_SPI_24BIT (1 << 9) /* All SPI XFERS are 24-bit */ + +#define F_RDACS_RW_TOL (F_RDACS_RW | F_CMD_EEP | F_CMD_TOL) +#define F_RDACS_RW_EEP (F_RDACS_RW | F_CMD_EEP) +#define F_SPI (F_SPI_8BIT | F_SPI_16BIT | F_SPI_24BIT) + +enum dpot_devid { + AD5258_ID = DPOT_CONF(F_RDACS_RW_TOL, BRDAC0, 6, 0), /* I2C */ + AD5259_ID = DPOT_CONF(F_RDACS_RW_TOL, BRDAC0, 8, 1), + AD5251_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC, + BRDAC1 | BRDAC3, 6, 2), + AD5252_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC, + BRDAC1 | BRDAC3, 8, 3), + AD5253_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC, + BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 6, 4), + AD5254_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC, + BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 8, 5), + AD5255_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC, + BRDAC0 | BRDAC1 | BRDAC2, 9, 6), + AD5160_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0, 8, 7), /* SPI */ + AD5161_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0, 8, 8), + AD5162_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0 | BRDAC1, 8, 9), + AD5165_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0, 8, 10), + AD5200_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0, 8, 11), + AD5201_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0, 5, 12), + AD5203_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 6, 13), + AD5204_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 8, 14), + AD5206_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3 | BRDAC4 | BRDAC5, + 8, 15), + AD5207_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0 | BRDAC1, 8, 16), + AD5231_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_24BIT, + BRDAC0, 10, 17), + AD5232_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_16BIT, + BRDAC0 | BRDAC1, 8, 18), + AD5233_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_16BIT, + BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 6, 19), + AD5235_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_24BIT, + BRDAC0 | BRDAC1, 10, 20), + AD5260_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0, 8, 21), + AD5262_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0 | BRDAC1, 8, 22), + AD5263_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0 | BRDAC1 | BRDAC2 | BRDAC3, 8, 23), + AD5290_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0, 8, 24), + AD5291_ID = DPOT_CONF(F_RDACS_RW | F_SPI_16BIT | F_CMD_OTP, + BRDAC0, 8, 25), + AD5292_ID = DPOT_CONF(F_RDACS_RW | F_SPI_16BIT | F_CMD_OTP, + BRDAC0, 10, 26), + AD5293_ID = DPOT_CONF(F_RDACS_RW | F_SPI_16BIT, BRDAC0, 10, 27), + AD7376_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_8BIT, + BRDAC0, 7, 28), + AD8400_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0, 8, 29), + AD8402_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0 | BRDAC1, 8, 30), + AD8403_ID = DPOT_CONF(F_RDACS_WONLY | F_AD_APPDATA | F_SPI_16BIT, + BRDAC0 | BRDAC1 | BRDAC2, 8, 31), + ADN2850_ID = DPOT_CONF(F_RDACS_RW_EEP | F_CMD_INC | F_SPI_24BIT, + BRDAC0 | BRDAC1, 10, 32), + AD5241_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 8, 33), + AD5242_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 34), + AD5243_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 35), + AD5245_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 8, 36), + AD5246_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 7, 37), + AD5247_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 7, 38), + AD5248_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 39), + AD5280_ID = DPOT_CONF(F_RDACS_RW, BRDAC0, 8, 40), + AD5282_ID = DPOT_CONF(F_RDACS_RW, BRDAC0 | BRDAC1, 8, 41), + ADN2860_ID = DPOT_CONF(F_RDACS_RW_TOL | F_CMD_INC, + BRDAC0 | BRDAC1 | BRDAC2, 9, 42), + AD5273_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 6, 43), + AD5171_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 6, 44), + AD5170_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 8, 45), + AD5172_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0 | BRDAC1, 8, 46), + AD5173_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0 | BRDAC1, 8, 47), + AD5270_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP | F_SPI_16BIT, + BRDAC0, 10, 48), + AD5271_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP | F_SPI_16BIT, + BRDAC0, 8, 49), + AD5272_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 10, 50), + AD5274_ID = DPOT_CONF(F_RDACS_RW | F_CMD_OTP, BRDAC0, 8, 51), +}; + +#define DPOT_RDAC0 0 +#define DPOT_RDAC1 1 +#define DPOT_RDAC2 2 +#define DPOT_RDAC3 3 +#define DPOT_RDAC4 4 +#define DPOT_RDAC5 5 + +#define DPOT_RDAC_MASK 0x1F + +#define DPOT_REG_TOL 0x18 +#define DPOT_TOL_RDAC0 (DPOT_REG_TOL | DPOT_RDAC0) +#define DPOT_TOL_RDAC1 (DPOT_REG_TOL | DPOT_RDAC1) +#define DPOT_TOL_RDAC2 (DPOT_REG_TOL | DPOT_RDAC2) +#define DPOT_TOL_RDAC3 (DPOT_REG_TOL | DPOT_RDAC3) +#define DPOT_TOL_RDAC4 (DPOT_REG_TOL | DPOT_RDAC4) +#define DPOT_TOL_RDAC5 (DPOT_REG_TOL | DPOT_RDAC5) + +/* RDAC-to-EEPROM Interface Commands */ +#define DPOT_ADDR_RDAC (0x0 << 5) +#define DPOT_ADDR_EEPROM (0x1 << 5) +#define DPOT_ADDR_OTP (0x1 << 6) +#define DPOT_ADDR_CMD (0x1 << 7) +#define DPOT_ADDR_OTP_EN (0x1 << 9) + +#define DPOT_DEC_ALL_6DB (DPOT_ADDR_CMD | (0x4 << 3)) +#define DPOT_INC_ALL_6DB (DPOT_ADDR_CMD | (0x9 << 3)) +#define DPOT_DEC_ALL (DPOT_ADDR_CMD | (0x6 << 3)) +#define DPOT_INC_ALL (DPOT_ADDR_CMD | (0xB << 3)) + +#define DPOT_SPI_RDAC 0xB0 +#define DPOT_SPI_EEPROM 0x30 +#define DPOT_SPI_READ_RDAC 0xA0 +#define DPOT_SPI_READ_EEPROM 0x90 +#define DPOT_SPI_DEC_ALL_6DB 0x50 +#define DPOT_SPI_INC_ALL_6DB 0xD0 +#define DPOT_SPI_DEC_ALL 0x70 +#define DPOT_SPI_INC_ALL 0xF0 + +/* AD5291/2/3 use special commands */ +#define DPOT_AD5291_RDAC 0x01 +#define DPOT_AD5291_READ_RDAC 0x02 +#define DPOT_AD5291_STORE_XTPM 0x03 +#define DPOT_AD5291_CTRLREG 0x06 +#define DPOT_AD5291_UNLOCK_CMD 0x03 + +/* AD5270/1/2/4 use special commands */ +#define DPOT_AD5270_1_2_4_RDAC 0x01 +#define DPOT_AD5270_1_2_4_READ_RDAC 0x02 +#define DPOT_AD5270_1_2_4_STORE_XTPM 0x03 +#define DPOT_AD5270_1_2_4_CTRLREG 0x07 +#define DPOT_AD5270_1_2_4_UNLOCK_CMD 0x03 + +#define DPOT_AD5282_RDAC_AB 0x80 + +#define DPOT_AD5273_FUSE 0x80 +#define DPOT_AD5170_2_3_FUSE 0x20 +#define DPOT_AD5170_2_3_OW 0x08 +#define DPOT_AD5172_3_A0 0x08 +#define DPOT_AD5170_2FUSE 0x80 + +struct dpot_data; + +struct ad_dpot_bus_ops { + int (*read_d8)(void *client); + int (*read_r8d8)(void *client, u8 reg); + int (*read_r8d16)(void *client, u8 reg); + int (*write_d8)(void *client, u8 val); + int (*write_r8d8)(void *client, u8 reg, u8 val); + int (*write_r8d16)(void *client, u8 reg, u16 val); +}; + +struct ad_dpot_bus_data { + void *client; + const struct ad_dpot_bus_ops *bops; +}; + +int ad_dpot_probe(struct device *dev, struct ad_dpot_bus_data *bdata, + unsigned long devid, const char *name); +int ad_dpot_remove(struct device *dev); + +#endif diff --git a/drivers/misc/altera-stapl/Kconfig b/drivers/misc/altera-stapl/Kconfig new file mode 100644 index 000000000..8a828fe41 --- /dev/null +++ b/drivers/misc/altera-stapl/Kconfig @@ -0,0 +1,9 @@ +comment "Altera FPGA firmware download module (requires I2C)" + depends on !I2C + +config ALTERA_STAPL + tristate "Altera FPGA firmware download module" + depends on I2C + default n + help + An Altera FPGA module. Say Y when you want to support this tool. diff --git a/drivers/misc/altera-stapl/Makefile b/drivers/misc/altera-stapl/Makefile new file mode 100644 index 000000000..055f61ee7 --- /dev/null +++ b/drivers/misc/altera-stapl/Makefile @@ -0,0 +1,3 @@ +altera-stapl-objs = altera-lpt.o altera-jtag.o altera-comp.o altera.o + +obj-$(CONFIG_ALTERA_STAPL) += altera-stapl.o diff --git a/drivers/misc/altera-stapl/altera-comp.c b/drivers/misc/altera-stapl/altera-comp.c new file mode 100644 index 000000000..49b103bed --- /dev/null +++ b/drivers/misc/altera-stapl/altera-comp.c @@ -0,0 +1,142 @@ +/* + * altera-comp.c + * + * altera FPGA driver + * + * Copyright (C) Altera Corporation 1998-2001 + * Copyright (C) 2010 NetUP Inc. + * Copyright (C) 2010 Igor M. Liplianin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include "altera-exprt.h" + +#define SHORT_BITS 16 +#define CHAR_BITS 8 +#define DATA_BLOB_LENGTH 3 +#define MATCH_DATA_LENGTH 8192 +#define ALTERA_REQUEST_SIZE 1024 +#define ALTERA_BUFFER_SIZE (MATCH_DATA_LENGTH + ALTERA_REQUEST_SIZE) + +static u32 altera_bits_req(u32 n) +{ + u32 result = SHORT_BITS; + + if (n == 0) + result = 1; + else { + /* Look for the highest non-zero bit position */ + while ((n & (1 << (SHORT_BITS - 1))) == 0) { + n <<= 1; + --result; + } + } + + return result; +} + +static u32 altera_read_packed(u8 *buffer, u32 bits, u32 *bits_avail, + u32 *in_index) +{ + u32 result = 0; + u32 shift = 0; + u32 databyte = 0; + + while (bits > 0) { + databyte = buffer[*in_index]; + result |= (((databyte >> (CHAR_BITS - *bits_avail)) + & (0xff >> (CHAR_BITS - *bits_avail))) << shift); + + if (bits <= *bits_avail) { + result &= (0xffff >> (SHORT_BITS - (bits + shift))); + *bits_avail -= bits; + bits = 0; + } else { + ++(*in_index); + shift += *bits_avail; + bits -= *bits_avail; + *bits_avail = CHAR_BITS; + } + } + + return result; +} + +u32 altera_shrink(u8 *in, u32 in_length, u8 *out, u32 out_length, s32 version) +{ + u32 i, j, data_length = 0L; + u32 offset, length; + u32 match_data_length = MATCH_DATA_LENGTH; + u32 bits_avail = CHAR_BITS; + u32 in_index = 0L; + + if (version > 0) + --match_data_length; + + for (i = 0; i < out_length; ++i) + out[i] = 0; + + /* Read number of bytes in data. */ + for (i = 0; i < sizeof(in_length); ++i) { + data_length = data_length | ( + altera_read_packed(in, + CHAR_BITS, + &bits_avail, + &in_index) << (i * CHAR_BITS)); + } + + if (data_length > out_length) { + data_length = 0L; + return data_length; + } + + i = 0; + while (i < data_length) { + /* A 0 bit indicates literal data. */ + if (altera_read_packed(in, 1, &bits_avail, + &in_index) == 0) { + for (j = 0; j < DATA_BLOB_LENGTH; ++j) { + if (i < data_length) { + out[i] = (u8)altera_read_packed(in, + CHAR_BITS, + &bits_avail, + &in_index); + i++; + } + } + } else { + /* A 1 bit indicates offset/length to follow. */ + offset = altera_read_packed(in, altera_bits_req((s16) + (i > match_data_length ? + match_data_length : i)), + &bits_avail, + &in_index); + length = altera_read_packed(in, CHAR_BITS, + &bits_avail, + &in_index); + for (j = 0; j < length; ++j) { + if (i < data_length) { + out[i] = out[i - offset]; + i++; + } + } + } + } + + return data_length; +} diff --git a/drivers/misc/altera-stapl/altera-exprt.h b/drivers/misc/altera-stapl/altera-exprt.h new file mode 100644 index 000000000..39c38d84a --- /dev/null +++ b/drivers/misc/altera-stapl/altera-exprt.h @@ -0,0 +1,33 @@ +/* + * altera-exprt.h + * + * altera FPGA driver + * + * Copyright (C) Altera Corporation 1998-2001 + * Copyright (C) 2010 NetUP Inc. + * Copyright (C) 2010 Igor M. Liplianin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef ALTERA_EXPRT_H +#define ALTERA_EXPRT_H + + +u32 altera_shrink(u8 *in, u32 in_length, u8 *out, u32 out_length, s32 version); +int netup_jtag_io_lpt(void *device, int tms, int tdi, int read_tdo); + +#endif /* ALTERA_EXPRT_H */ diff --git a/drivers/misc/altera-stapl/altera-jtag.c b/drivers/misc/altera-stapl/altera-jtag.c new file mode 100644 index 000000000..f4bf20096 --- /dev/null +++ b/drivers/misc/altera-stapl/altera-jtag.c @@ -0,0 +1,1021 @@ +/* + * altera-jtag.c + * + * altera FPGA driver + * + * Copyright (C) Altera Corporation 1998-2001 + * Copyright (C) 2010 NetUP Inc. + * Copyright (C) 2010 Igor M. Liplianin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include "altera-exprt.h" +#include "altera-jtag.h" + +#define alt_jtag_io(a, b, c)\ + astate->config->jtag_io(astate->config->dev, a, b, c); + +#define alt_malloc(a) kzalloc(a, GFP_KERNEL); + +/* + * This structure shows, for each JTAG state, which state is reached after + * a single TCK clock cycle with TMS high or TMS low, respectively. This + * describes all possible state transitions in the JTAG state machine. + */ +struct altera_jtag_machine { + enum altera_jtag_state tms_high; + enum altera_jtag_state tms_low; +}; + +static const struct altera_jtag_machine altera_transitions[] = { + /* RESET */ { RESET, IDLE }, + /* IDLE */ { DRSELECT, IDLE }, + /* DRSELECT */ { IRSELECT, DRCAPTURE }, + /* DRCAPTURE */ { DREXIT1, DRSHIFT }, + /* DRSHIFT */ { DREXIT1, DRSHIFT }, + /* DREXIT1 */ { DRUPDATE, DRPAUSE }, + /* DRPAUSE */ { DREXIT2, DRPAUSE }, + /* DREXIT2 */ { DRUPDATE, DRSHIFT }, + /* DRUPDATE */ { DRSELECT, IDLE }, + /* IRSELECT */ { RESET, IRCAPTURE }, + /* IRCAPTURE */ { IREXIT1, IRSHIFT }, + /* IRSHIFT */ { IREXIT1, IRSHIFT }, + /* IREXIT1 */ { IRUPDATE, IRPAUSE }, + /* IRPAUSE */ { IREXIT2, IRPAUSE }, + /* IREXIT2 */ { IRUPDATE, IRSHIFT }, + /* IRUPDATE */ { DRSELECT, IDLE } +}; + +/* + * This table contains the TMS value to be used to take the NEXT STEP on + * the path to the desired state. The array index is the current state, + * and the bit position is the desired endstate. To find out which state + * is used as the intermediate state, look up the TMS value in the + * altera_transitions[] table. + */ +static const u16 altera_jtag_path_map[16] = { + /* RST RTI SDRS CDR SDR E1DR PDR E2DR */ + 0x0001, 0xFFFD, 0xFE01, 0xFFE7, 0xFFEF, 0xFF0F, 0xFFBF, 0xFFFF, + /* UDR SIRS CIR SIR E1IR PIR E2IR UIR */ + 0xFEFD, 0x0001, 0xF3FF, 0xF7FF, 0x87FF, 0xDFFF, 0xFFFF, 0x7FFD +}; + +/* Flag bits for alt_jtag_io() function */ +#define TMS_HIGH 1 +#define TMS_LOW 0 +#define TDI_HIGH 1 +#define TDI_LOW 0 +#define READ_TDO 1 +#define IGNORE_TDO 0 + +int altera_jinit(struct altera_state *astate) +{ + struct altera_jtag *js = &astate->js; + + /* initial JTAG state is unknown */ + js->jtag_state = ILLEGAL_JTAG_STATE; + + /* initialize to default state */ + js->drstop_state = IDLE; + js->irstop_state = IDLE; + js->dr_pre = 0; + js->dr_post = 0; + js->ir_pre = 0; + js->ir_post = 0; + js->dr_length = 0; + js->ir_length = 0; + + js->dr_pre_data = NULL; + js->dr_post_data = NULL; + js->ir_pre_data = NULL; + js->ir_post_data = NULL; + js->dr_buffer = NULL; + js->ir_buffer = NULL; + + return 0; +} + +int altera_set_drstop(struct altera_jtag *js, enum altera_jtag_state state) +{ + js->drstop_state = state; + + return 0; +} + +int altera_set_irstop(struct altera_jtag *js, enum altera_jtag_state state) +{ + js->irstop_state = state; + + return 0; +} + +int altera_set_dr_pre(struct altera_jtag *js, + u32 count, u32 start_index, + u8 *preamble_data) +{ + int status = 0; + u32 i; + u32 j; + + if (count > js->dr_pre) { + kfree(js->dr_pre_data); + js->dr_pre_data = (u8 *)alt_malloc((count + 7) >> 3); + if (js->dr_pre_data == NULL) + status = -ENOMEM; + else + js->dr_pre = count; + } else + js->dr_pre = count; + + if (status == 0) { + for (i = 0; i < count; ++i) { + j = i + start_index; + + if (preamble_data == NULL) + js->dr_pre_data[i >> 3] |= (1 << (i & 7)); + else { + if (preamble_data[j >> 3] & (1 << (j & 7))) + js->dr_pre_data[i >> 3] |= + (1 << (i & 7)); + else + js->dr_pre_data[i >> 3] &= + ~(u32)(1 << (i & 7)); + + } + } + } + + return status; +} + +int altera_set_ir_pre(struct altera_jtag *js, u32 count, u32 start_index, + u8 *preamble_data) +{ + int status = 0; + u32 i; + u32 j; + + if (count > js->ir_pre) { + kfree(js->ir_pre_data); + js->ir_pre_data = (u8 *)alt_malloc((count + 7) >> 3); + if (js->ir_pre_data == NULL) + status = -ENOMEM; + else + js->ir_pre = count; + + } else + js->ir_pre = count; + + if (status == 0) { + for (i = 0; i < count; ++i) { + j = i + start_index; + if (preamble_data == NULL) + js->ir_pre_data[i >> 3] |= (1 << (i & 7)); + else { + if (preamble_data[j >> 3] & (1 << (j & 7))) + js->ir_pre_data[i >> 3] |= + (1 << (i & 7)); + else + js->ir_pre_data[i >> 3] &= + ~(u32)(1 << (i & 7)); + + } + } + } + + return status; +} + +int altera_set_dr_post(struct altera_jtag *js, u32 count, u32 start_index, + u8 *postamble_data) +{ + int status = 0; + u32 i; + u32 j; + + if (count > js->dr_post) { + kfree(js->dr_post_data); + js->dr_post_data = (u8 *)alt_malloc((count + 7) >> 3); + + if (js->dr_post_data == NULL) + status = -ENOMEM; + else + js->dr_post = count; + + } else + js->dr_post = count; + + if (status == 0) { + for (i = 0; i < count; ++i) { + j = i + start_index; + + if (postamble_data == NULL) + js->dr_post_data[i >> 3] |= (1 << (i & 7)); + else { + if (postamble_data[j >> 3] & (1 << (j & 7))) + js->dr_post_data[i >> 3] |= + (1 << (i & 7)); + else + js->dr_post_data[i >> 3] &= + ~(u32)(1 << (i & 7)); + + } + } + } + + return status; +} + +int altera_set_ir_post(struct altera_jtag *js, u32 count, u32 start_index, + u8 *postamble_data) +{ + int status = 0; + u32 i; + u32 j; + + if (count > js->ir_post) { + kfree(js->ir_post_data); + js->ir_post_data = (u8 *)alt_malloc((count + 7) >> 3); + if (js->ir_post_data == NULL) + status = -ENOMEM; + else + js->ir_post = count; + + } else + js->ir_post = count; + + if (status != 0) + return status; + + for (i = 0; i < count; ++i) { + j = i + start_index; + + if (postamble_data == NULL) + js->ir_post_data[i >> 3] |= (1 << (i & 7)); + else { + if (postamble_data[j >> 3] & (1 << (j & 7))) + js->ir_post_data[i >> 3] |= (1 << (i & 7)); + else + js->ir_post_data[i >> 3] &= + ~(u32)(1 << (i & 7)); + + } + } + + return status; +} + +static void altera_jreset_idle(struct altera_state *astate) +{ + struct altera_jtag *js = &astate->js; + int i; + /* Go to Test Logic Reset (no matter what the starting state may be) */ + for (i = 0; i < 5; ++i) + alt_jtag_io(TMS_HIGH, TDI_LOW, IGNORE_TDO); + + /* Now step to Run Test / Idle */ + alt_jtag_io(TMS_LOW, TDI_LOW, IGNORE_TDO); + js->jtag_state = IDLE; +} + +int altera_goto_jstate(struct altera_state *astate, + enum altera_jtag_state state) +{ + struct altera_jtag *js = &astate->js; + int tms; + int count = 0; + int status = 0; + + if (js->jtag_state == ILLEGAL_JTAG_STATE) + /* initialize JTAG chain to known state */ + altera_jreset_idle(astate); + + if (js->jtag_state == state) { + /* + * We are already in the desired state. + * If it is a stable state, loop here. + * Otherwise do nothing (no clock cycles). + */ + if ((state == IDLE) || (state == DRSHIFT) || + (state == DRPAUSE) || (state == IRSHIFT) || + (state == IRPAUSE)) { + alt_jtag_io(TMS_LOW, TDI_LOW, IGNORE_TDO); + } else if (state == RESET) + alt_jtag_io(TMS_HIGH, TDI_LOW, IGNORE_TDO); + + } else { + while ((js->jtag_state != state) && (count < 9)) { + /* Get TMS value to take a step toward desired state */ + tms = (altera_jtag_path_map[js->jtag_state] & + (1 << state)) + ? TMS_HIGH : TMS_LOW; + + /* Take a step */ + alt_jtag_io(tms, TDI_LOW, IGNORE_TDO); + + if (tms) + js->jtag_state = + altera_transitions[js->jtag_state].tms_high; + else + js->jtag_state = + altera_transitions[js->jtag_state].tms_low; + + ++count; + } + } + + if (js->jtag_state != state) + status = -EREMOTEIO; + + return status; +} + +int altera_wait_cycles(struct altera_state *astate, + s32 cycles, + enum altera_jtag_state wait_state) +{ + struct altera_jtag *js = &astate->js; + int tms; + s32 count; + int status = 0; + + if (js->jtag_state != wait_state) + status = altera_goto_jstate(astate, wait_state); + + if (status == 0) { + /* + * Set TMS high to loop in RESET state + * Set TMS low to loop in any other stable state + */ + tms = (wait_state == RESET) ? TMS_HIGH : TMS_LOW; + + for (count = 0L; count < cycles; count++) + alt_jtag_io(tms, TDI_LOW, IGNORE_TDO); + + } + + return status; +} + +int altera_wait_msecs(struct altera_state *astate, + s32 microseconds, enum altera_jtag_state wait_state) +/* + * Causes JTAG hardware to sit in the specified stable + * state for the specified duration of real time. If + * no JTAG operations have been performed yet, then only + * a delay is performed. This permits the WAIT USECS + * statement to be used in VECTOR programs without causing + * any JTAG operations. + * Returns 0 for success, else appropriate error code. + */ +{ + struct altera_jtag *js = &astate->js; + int status = 0; + + if ((js->jtag_state != ILLEGAL_JTAG_STATE) && + (js->jtag_state != wait_state)) + status = altera_goto_jstate(astate, wait_state); + + if (status == 0) + /* Wait for specified time interval */ + udelay(microseconds); + + return status; +} + +static void altera_concatenate_data(u8 *buffer, + u8 *preamble_data, + u32 preamble_count, + u8 *target_data, + u32 start_index, + u32 target_count, + u8 *postamble_data, + u32 postamble_count) +/* + * Copies preamble data, target data, and postamble data + * into one buffer for IR or DR scans. + */ +{ + u32 i, j, k; + + for (i = 0L; i < preamble_count; ++i) { + if (preamble_data[i >> 3L] & (1L << (i & 7L))) + buffer[i >> 3L] |= (1L << (i & 7L)); + else + buffer[i >> 3L] &= ~(u32)(1L << (i & 7L)); + + } + + j = start_index; + k = preamble_count + target_count; + for (; i < k; ++i, ++j) { + if (target_data[j >> 3L] & (1L << (j & 7L))) + buffer[i >> 3L] |= (1L << (i & 7L)); + else + buffer[i >> 3L] &= ~(u32)(1L << (i & 7L)); + + } + + j = 0L; + k = preamble_count + target_count + postamble_count; + for (; i < k; ++i, ++j) { + if (postamble_data[j >> 3L] & (1L << (j & 7L))) + buffer[i >> 3L] |= (1L << (i & 7L)); + else + buffer[i >> 3L] &= ~(u32)(1L << (i & 7L)); + + } +} + +static int alt_jtag_drscan(struct altera_state *astate, + int start_state, + int count, + u8 *tdi, + u8 *tdo) +{ + int i = 0; + int tdo_bit = 0; + int status = 1; + + /* First go to DRSHIFT state */ + switch (start_state) { + case 0: /* IDLE */ + alt_jtag_io(1, 0, 0); /* DRSELECT */ + alt_jtag_io(0, 0, 0); /* DRCAPTURE */ + alt_jtag_io(0, 0, 0); /* DRSHIFT */ + break; + + case 1: /* DRPAUSE */ + alt_jtag_io(1, 0, 0); /* DREXIT2 */ + alt_jtag_io(1, 0, 0); /* DRUPDATE */ + alt_jtag_io(1, 0, 0); /* DRSELECT */ + alt_jtag_io(0, 0, 0); /* DRCAPTURE */ + alt_jtag_io(0, 0, 0); /* DRSHIFT */ + break; + + case 2: /* IRPAUSE */ + alt_jtag_io(1, 0, 0); /* IREXIT2 */ + alt_jtag_io(1, 0, 0); /* IRUPDATE */ + alt_jtag_io(1, 0, 0); /* DRSELECT */ + alt_jtag_io(0, 0, 0); /* DRCAPTURE */ + alt_jtag_io(0, 0, 0); /* DRSHIFT */ + break; + + default: + status = 0; + } + + if (status) { + /* loop in the SHIFT-DR state */ + for (i = 0; i < count; i++) { + tdo_bit = alt_jtag_io( + (i == count - 1), + tdi[i >> 3] & (1 << (i & 7)), + (tdo != NULL)); + + if (tdo != NULL) { + if (tdo_bit) + tdo[i >> 3] |= (1 << (i & 7)); + else + tdo[i >> 3] &= ~(u32)(1 << (i & 7)); + + } + } + + alt_jtag_io(0, 0, 0); /* DRPAUSE */ + } + + return status; +} + +static int alt_jtag_irscan(struct altera_state *astate, + int start_state, + int count, + u8 *tdi, + u8 *tdo) +{ + int i = 0; + int tdo_bit = 0; + int status = 1; + + /* First go to IRSHIFT state */ + switch (start_state) { + case 0: /* IDLE */ + alt_jtag_io(1, 0, 0); /* DRSELECT */ + alt_jtag_io(1, 0, 0); /* IRSELECT */ + alt_jtag_io(0, 0, 0); /* IRCAPTURE */ + alt_jtag_io(0, 0, 0); /* IRSHIFT */ + break; + + case 1: /* DRPAUSE */ + alt_jtag_io(1, 0, 0); /* DREXIT2 */ + alt_jtag_io(1, 0, 0); /* DRUPDATE */ + alt_jtag_io(1, 0, 0); /* DRSELECT */ + alt_jtag_io(1, 0, 0); /* IRSELECT */ + alt_jtag_io(0, 0, 0); /* IRCAPTURE */ + alt_jtag_io(0, 0, 0); /* IRSHIFT */ + break; + + case 2: /* IRPAUSE */ + alt_jtag_io(1, 0, 0); /* IREXIT2 */ + alt_jtag_io(1, 0, 0); /* IRUPDATE */ + alt_jtag_io(1, 0, 0); /* DRSELECT */ + alt_jtag_io(1, 0, 0); /* IRSELECT */ + alt_jtag_io(0, 0, 0); /* IRCAPTURE */ + alt_jtag_io(0, 0, 0); /* IRSHIFT */ + break; + + default: + status = 0; + } + + if (status) { + /* loop in the SHIFT-IR state */ + for (i = 0; i < count; i++) { + tdo_bit = alt_jtag_io( + (i == count - 1), + tdi[i >> 3] & (1 << (i & 7)), + (tdo != NULL)); + if (tdo != NULL) { + if (tdo_bit) + tdo[i >> 3] |= (1 << (i & 7)); + else + tdo[i >> 3] &= ~(u32)(1 << (i & 7)); + + } + } + + alt_jtag_io(0, 0, 0); /* IRPAUSE */ + } + + return status; +} + +static void altera_extract_target_data(u8 *buffer, + u8 *target_data, + u32 start_index, + u32 preamble_count, + u32 target_count) +/* + * Copies target data from scan buffer, filtering out + * preamble and postamble data. + */ +{ + u32 i; + u32 j; + u32 k; + + j = preamble_count; + k = start_index + target_count; + for (i = start_index; i < k; ++i, ++j) { + if (buffer[j >> 3] & (1 << (j & 7))) + target_data[i >> 3] |= (1 << (i & 7)); + else + target_data[i >> 3] &= ~(u32)(1 << (i & 7)); + + } +} + +int altera_irscan(struct altera_state *astate, + u32 count, + u8 *tdi_data, + u32 start_index) +/* Shifts data into instruction register */ +{ + struct altera_jtag *js = &astate->js; + int start_code = 0; + u32 alloc_chars = 0; + u32 shift_count = js->ir_pre + count + js->ir_post; + int status = 0; + enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE; + + switch (js->jtag_state) { + case ILLEGAL_JTAG_STATE: + case RESET: + case IDLE: + start_code = 0; + start_state = IDLE; + break; + + case DRSELECT: + case DRCAPTURE: + case DRSHIFT: + case DREXIT1: + case DRPAUSE: + case DREXIT2: + case DRUPDATE: + start_code = 1; + start_state = DRPAUSE; + break; + + case IRSELECT: + case IRCAPTURE: + case IRSHIFT: + case IREXIT1: + case IRPAUSE: + case IREXIT2: + case IRUPDATE: + start_code = 2; + start_state = IRPAUSE; + break; + + default: + status = -EREMOTEIO; + break; + } + + if (status == 0) + if (js->jtag_state != start_state) + status = altera_goto_jstate(astate, start_state); + + if (status == 0) { + if (shift_count > js->ir_length) { + alloc_chars = (shift_count + 7) >> 3; + kfree(js->ir_buffer); + js->ir_buffer = (u8 *)alt_malloc(alloc_chars); + if (js->ir_buffer == NULL) + status = -ENOMEM; + else + js->ir_length = alloc_chars * 8; + + } + } + + if (status == 0) { + /* + * Copy preamble data, IR data, + * and postamble data into a buffer + */ + altera_concatenate_data(js->ir_buffer, + js->ir_pre_data, + js->ir_pre, + tdi_data, + start_index, + count, + js->ir_post_data, + js->ir_post); + /* Do the IRSCAN */ + alt_jtag_irscan(astate, + start_code, + shift_count, + js->ir_buffer, + NULL); + + /* alt_jtag_irscan() always ends in IRPAUSE state */ + js->jtag_state = IRPAUSE; + } + + if (status == 0) + if (js->irstop_state != IRPAUSE) + status = altera_goto_jstate(astate, js->irstop_state); + + + return status; +} + +int altera_swap_ir(struct altera_state *astate, + u32 count, + u8 *in_data, + u32 in_index, + u8 *out_data, + u32 out_index) +/* Shifts data into instruction register, capturing output data */ +{ + struct altera_jtag *js = &astate->js; + int start_code = 0; + u32 alloc_chars = 0; + u32 shift_count = js->ir_pre + count + js->ir_post; + int status = 0; + enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE; + + switch (js->jtag_state) { + case ILLEGAL_JTAG_STATE: + case RESET: + case IDLE: + start_code = 0; + start_state = IDLE; + break; + + case DRSELECT: + case DRCAPTURE: + case DRSHIFT: + case DREXIT1: + case DRPAUSE: + case DREXIT2: + case DRUPDATE: + start_code = 1; + start_state = DRPAUSE; + break; + + case IRSELECT: + case IRCAPTURE: + case IRSHIFT: + case IREXIT1: + case IRPAUSE: + case IREXIT2: + case IRUPDATE: + start_code = 2; + start_state = IRPAUSE; + break; + + default: + status = -EREMOTEIO; + break; + } + + if (status == 0) + if (js->jtag_state != start_state) + status = altera_goto_jstate(astate, start_state); + + if (status == 0) { + if (shift_count > js->ir_length) { + alloc_chars = (shift_count + 7) >> 3; + kfree(js->ir_buffer); + js->ir_buffer = (u8 *)alt_malloc(alloc_chars); + if (js->ir_buffer == NULL) + status = -ENOMEM; + else + js->ir_length = alloc_chars * 8; + + } + } + + if (status == 0) { + /* + * Copy preamble data, IR data, + * and postamble data into a buffer + */ + altera_concatenate_data(js->ir_buffer, + js->ir_pre_data, + js->ir_pre, + in_data, + in_index, + count, + js->ir_post_data, + js->ir_post); + + /* Do the IRSCAN */ + alt_jtag_irscan(astate, + start_code, + shift_count, + js->ir_buffer, + js->ir_buffer); + + /* alt_jtag_irscan() always ends in IRPAUSE state */ + js->jtag_state = IRPAUSE; + } + + if (status == 0) + if (js->irstop_state != IRPAUSE) + status = altera_goto_jstate(astate, js->irstop_state); + + + if (status == 0) + /* Now extract the returned data from the buffer */ + altera_extract_target_data(js->ir_buffer, + out_data, out_index, + js->ir_pre, count); + + return status; +} + +int altera_drscan(struct altera_state *astate, + u32 count, + u8 *tdi_data, + u32 start_index) +/* Shifts data into data register (ignoring output data) */ +{ + struct altera_jtag *js = &astate->js; + int start_code = 0; + u32 alloc_chars = 0; + u32 shift_count = js->dr_pre + count + js->dr_post; + int status = 0; + enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE; + + switch (js->jtag_state) { + case ILLEGAL_JTAG_STATE: + case RESET: + case IDLE: + start_code = 0; + start_state = IDLE; + break; + + case DRSELECT: + case DRCAPTURE: + case DRSHIFT: + case DREXIT1: + case DRPAUSE: + case DREXIT2: + case DRUPDATE: + start_code = 1; + start_state = DRPAUSE; + break; + + case IRSELECT: + case IRCAPTURE: + case IRSHIFT: + case IREXIT1: + case IRPAUSE: + case IREXIT2: + case IRUPDATE: + start_code = 2; + start_state = IRPAUSE; + break; + + default: + status = -EREMOTEIO; + break; + } + + if (status == 0) + if (js->jtag_state != start_state) + status = altera_goto_jstate(astate, start_state); + + if (status == 0) { + if (shift_count > js->dr_length) { + alloc_chars = (shift_count + 7) >> 3; + kfree(js->dr_buffer); + js->dr_buffer = (u8 *)alt_malloc(alloc_chars); + if (js->dr_buffer == NULL) + status = -ENOMEM; + else + js->dr_length = alloc_chars * 8; + + } + } + + if (status == 0) { + /* + * Copy preamble data, DR data, + * and postamble data into a buffer + */ + altera_concatenate_data(js->dr_buffer, + js->dr_pre_data, + js->dr_pre, + tdi_data, + start_index, + count, + js->dr_post_data, + js->dr_post); + /* Do the DRSCAN */ + alt_jtag_drscan(astate, start_code, shift_count, + js->dr_buffer, NULL); + /* alt_jtag_drscan() always ends in DRPAUSE state */ + js->jtag_state = DRPAUSE; + } + + if (status == 0) + if (js->drstop_state != DRPAUSE) + status = altera_goto_jstate(astate, js->drstop_state); + + return status; +} + +int altera_swap_dr(struct altera_state *astate, u32 count, + u8 *in_data, u32 in_index, + u8 *out_data, u32 out_index) +/* Shifts data into data register, capturing output data */ +{ + struct altera_jtag *js = &astate->js; + int start_code = 0; + u32 alloc_chars = 0; + u32 shift_count = js->dr_pre + count + js->dr_post; + int status = 0; + enum altera_jtag_state start_state = ILLEGAL_JTAG_STATE; + + switch (js->jtag_state) { + case ILLEGAL_JTAG_STATE: + case RESET: + case IDLE: + start_code = 0; + start_state = IDLE; + break; + + case DRSELECT: + case DRCAPTURE: + case DRSHIFT: + case DREXIT1: + case DRPAUSE: + case DREXIT2: + case DRUPDATE: + start_code = 1; + start_state = DRPAUSE; + break; + + case IRSELECT: + case IRCAPTURE: + case IRSHIFT: + case IREXIT1: + case IRPAUSE: + case IREXIT2: + case IRUPDATE: + start_code = 2; + start_state = IRPAUSE; + break; + + default: + status = -EREMOTEIO; + break; + } + + if (status == 0) + if (js->jtag_state != start_state) + status = altera_goto_jstate(astate, start_state); + + if (status == 0) { + if (shift_count > js->dr_length) { + alloc_chars = (shift_count + 7) >> 3; + kfree(js->dr_buffer); + js->dr_buffer = (u8 *)alt_malloc(alloc_chars); + + if (js->dr_buffer == NULL) + status = -ENOMEM; + else + js->dr_length = alloc_chars * 8; + + } + } + + if (status == 0) { + /* + * Copy preamble data, DR data, + * and postamble data into a buffer + */ + altera_concatenate_data(js->dr_buffer, + js->dr_pre_data, + js->dr_pre, + in_data, + in_index, + count, + js->dr_post_data, + js->dr_post); + + /* Do the DRSCAN */ + alt_jtag_drscan(astate, + start_code, + shift_count, + js->dr_buffer, + js->dr_buffer); + + /* alt_jtag_drscan() always ends in DRPAUSE state */ + js->jtag_state = DRPAUSE; + } + + if (status == 0) + if (js->drstop_state != DRPAUSE) + status = altera_goto_jstate(astate, js->drstop_state); + + if (status == 0) + /* Now extract the returned data from the buffer */ + altera_extract_target_data(js->dr_buffer, + out_data, + out_index, + js->dr_pre, + count); + + return status; +} + +void altera_free_buffers(struct altera_state *astate) +{ + struct altera_jtag *js = &astate->js; + /* If the JTAG interface was used, reset it to TLR */ + if (js->jtag_state != ILLEGAL_JTAG_STATE) + altera_jreset_idle(astate); + + kfree(js->dr_pre_data); + js->dr_pre_data = NULL; + + kfree(js->dr_post_data); + js->dr_post_data = NULL; + + kfree(js->dr_buffer); + js->dr_buffer = NULL; + + kfree(js->ir_pre_data); + js->ir_pre_data = NULL; + + kfree(js->ir_post_data); + js->ir_post_data = NULL; + + kfree(js->ir_buffer); + js->ir_buffer = NULL; +} diff --git a/drivers/misc/altera-stapl/altera-jtag.h b/drivers/misc/altera-stapl/altera-jtag.h new file mode 100644 index 000000000..2f97e36a2 --- /dev/null +++ b/drivers/misc/altera-stapl/altera-jtag.h @@ -0,0 +1,113 @@ +/* + * altera-jtag.h + * + * altera FPGA driver + * + * Copyright (C) Altera Corporation 1998-2001 + * Copyright (C) 2010 NetUP Inc. + * Copyright (C) 2010 Igor M. Liplianin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef ALTERA_JTAG_H +#define ALTERA_JTAG_H + +/* Function Prototypes */ +enum altera_jtag_state { + ILLEGAL_JTAG_STATE = -1, + RESET = 0, + IDLE = 1, + DRSELECT = 2, + DRCAPTURE = 3, + DRSHIFT = 4, + DREXIT1 = 5, + DRPAUSE = 6, + DREXIT2 = 7, + DRUPDATE = 8, + IRSELECT = 9, + IRCAPTURE = 10, + IRSHIFT = 11, + IREXIT1 = 12, + IRPAUSE = 13, + IREXIT2 = 14, + IRUPDATE = 15 + +}; + +struct altera_jtag { + /* Global variable to store the current JTAG state */ + enum altera_jtag_state jtag_state; + + /* Store current stop-state for DR and IR scan commands */ + enum altera_jtag_state drstop_state; + enum altera_jtag_state irstop_state; + + /* Store current padding values */ + u32 dr_pre; + u32 dr_post; + u32 ir_pre; + u32 ir_post; + u32 dr_length; + u32 ir_length; + u8 *dr_pre_data; + u8 *dr_post_data; + u8 *ir_pre_data; + u8 *ir_post_data; + u8 *dr_buffer; + u8 *ir_buffer; +}; + +#define ALTERA_STACK_SIZE 128 +#define ALTERA_MESSAGE_LENGTH 1024 + +struct altera_state { + struct altera_config *config; + struct altera_jtag js; + char msg_buff[ALTERA_MESSAGE_LENGTH + 1]; + long stack[ALTERA_STACK_SIZE]; +}; + +int altera_jinit(struct altera_state *astate); +int altera_set_drstop(struct altera_jtag *js, enum altera_jtag_state state); +int altera_set_irstop(struct altera_jtag *js, enum altera_jtag_state state); +int altera_set_dr_pre(struct altera_jtag *js, u32 count, u32 start_index, + u8 *preamble_data); +int altera_set_ir_pre(struct altera_jtag *js, u32 count, u32 start_index, + u8 *preamble_data); +int altera_set_dr_post(struct altera_jtag *js, u32 count, u32 start_index, + u8 *postamble_data); +int altera_set_ir_post(struct altera_jtag *js, u32 count, u32 start_index, + u8 *postamble_data); +int altera_goto_jstate(struct altera_state *astate, + enum altera_jtag_state state); +int altera_wait_cycles(struct altera_state *astate, s32 cycles, + enum altera_jtag_state wait_state); +int altera_wait_msecs(struct altera_state *astate, s32 microseconds, + enum altera_jtag_state wait_state); +int altera_irscan(struct altera_state *astate, u32 count, + u8 *tdi_data, u32 start_index); +int altera_swap_ir(struct altera_state *astate, + u32 count, u8 *in_data, + u32 in_index, u8 *out_data, + u32 out_index); +int altera_drscan(struct altera_state *astate, u32 count, + u8 *tdi_data, u32 start_index); +int altera_swap_dr(struct altera_state *astate, u32 count, + u8 *in_data, u32 in_index, + u8 *out_data, u32 out_index); +void altera_free_buffers(struct altera_state *astate); +#endif /* ALTERA_JTAG_H */ diff --git a/drivers/misc/altera-stapl/altera-lpt.c b/drivers/misc/altera-stapl/altera-lpt.c new file mode 100644 index 000000000..91456a036 --- /dev/null +++ b/drivers/misc/altera-stapl/altera-lpt.c @@ -0,0 +1,70 @@ +/* + * altera-lpt.c + * + * altera FPGA driver + * + * Copyright (C) Altera Corporation 1998-2001 + * Copyright (C) 2010 NetUP Inc. + * Copyright (C) 2010 Abylay Ospan + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include "altera-exprt.h" + +static int lpt_hardware_initialized; + +static void byteblaster_write(int port, int data) +{ + outb((u8)data, (u16)(port + 0x378)); +}; + +static int byteblaster_read(int port) +{ + int data = 0; + data = inb((u16)(port + 0x378)); + return data & 0xff; +}; + +int netup_jtag_io_lpt(void *device, int tms, int tdi, int read_tdo) +{ + int data = 0; + int tdo = 0; + int initial_lpt_ctrl = 0; + + if (!lpt_hardware_initialized) { + initial_lpt_ctrl = byteblaster_read(2); + byteblaster_write(2, (initial_lpt_ctrl | 0x02) & 0xdf); + lpt_hardware_initialized = 1; + } + + data = ((tdi ? 0x40 : 0) | (tms ? 0x02 : 0)); + + byteblaster_write(0, data); + + if (read_tdo) { + tdo = byteblaster_read(1); + tdo = ((tdo & 0x80) ? 0 : 1); + } + + byteblaster_write(0, data | 0x01); + + byteblaster_write(0, data); + + return tdo; +} diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c new file mode 100644 index 000000000..94bde09d9 --- /dev/null +++ b/drivers/misc/altera-stapl/altera.c @@ -0,0 +1,2536 @@ +/* + * altera.c + * + * altera FPGA driver + * + * Copyright (C) Altera Corporation 1998-2001 + * Copyright (C) 2010,2011 NetUP Inc. + * Copyright (C) 2010,2011 Igor M. Liplianin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "altera-exprt.h" +#include "altera-jtag.h" + +static int debug = 1; +module_param(debug, int, 0644); +MODULE_PARM_DESC(debug, "enable debugging information"); + +MODULE_DESCRIPTION("altera FPGA kernel module"); +MODULE_AUTHOR("Igor M. Liplianin "); +MODULE_LICENSE("GPL"); + +#define dprintk(args...) \ + if (debug) { \ + printk(KERN_DEBUG args); \ + } + +enum altera_fpga_opcode { + OP_NOP = 0, + OP_DUP, + OP_SWP, + OP_ADD, + OP_SUB, + OP_MULT, + OP_DIV, + OP_MOD, + OP_SHL, + OP_SHR, + OP_NOT, + OP_AND, + OP_OR, + OP_XOR, + OP_INV, + OP_GT, + OP_LT, + OP_RET, + OP_CMPS, + OP_PINT, + OP_PRNT, + OP_DSS, + OP_DSSC, + OP_ISS, + OP_ISSC, + OP_DPR = 0x1c, + OP_DPRL, + OP_DPO, + OP_DPOL, + OP_IPR, + OP_IPRL, + OP_IPO, + OP_IPOL, + OP_PCHR, + OP_EXIT, + OP_EQU, + OP_POPT, + OP_ABS = 0x2c, + OP_BCH0, + OP_PSH0 = 0x2f, + OP_PSHL = 0x40, + OP_PSHV, + OP_JMP, + OP_CALL, + OP_NEXT, + OP_PSTR, + OP_SINT = 0x47, + OP_ST, + OP_ISTP, + OP_DSTP, + OP_SWPN, + OP_DUPN, + OP_POPV, + OP_POPE, + OP_POPA, + OP_JMPZ, + OP_DS, + OP_IS, + OP_DPRA, + OP_DPOA, + OP_IPRA, + OP_IPOA, + OP_EXPT, + OP_PSHE, + OP_PSHA, + OP_DYNA, + OP_EXPV = 0x5c, + OP_COPY = 0x80, + OP_REVA, + OP_DSC, + OP_ISC, + OP_WAIT, + OP_VS, + OP_CMPA = 0xc0, + OP_VSC, +}; + +struct altera_procinfo { + char *name; + u8 attrs; + struct altera_procinfo *next; +}; + +/* This function checks if enough parameters are available on the stack. */ +static int altera_check_stack(int stack_ptr, int count, int *status) +{ + if (stack_ptr < count) { + *status = -EOVERFLOW; + return 0; + } + + return 1; +} + +static void altera_export_int(char *key, s32 value) +{ + dprintk("Export: key = \"%s\", value = %d\n", key, value); +} + +#define HEX_LINE_CHARS 72 +#define HEX_LINE_BITS (HEX_LINE_CHARS * 4) + +static void altera_export_bool_array(char *key, u8 *data, s32 count) +{ + char string[HEX_LINE_CHARS + 1]; + s32 i, offset; + u32 size, line, lines, linebits, value, j, k; + + if (count > HEX_LINE_BITS) { + dprintk("Export: key = \"%s\", %d bits, value = HEX\n", + key, count); + lines = (count + (HEX_LINE_BITS - 1)) / HEX_LINE_BITS; + + for (line = 0; line < lines; ++line) { + if (line < (lines - 1)) { + linebits = HEX_LINE_BITS; + size = HEX_LINE_CHARS; + offset = count - ((line + 1) * HEX_LINE_BITS); + } else { + linebits = + count - ((lines - 1) * HEX_LINE_BITS); + size = (linebits + 3) / 4; + offset = 0L; + } + + string[size] = '\0'; + j = size - 1; + value = 0; + + for (k = 0; k < linebits; ++k) { + i = k + offset; + if (data[i >> 3] & (1 << (i & 7))) + value |= (1 << (i & 3)); + if ((i & 3) == 3) { + sprintf(&string[j], "%1x", value); + value = 0; + --j; + } + } + if ((k & 3) > 0) + sprintf(&string[j], "%1x", value); + + dprintk("%s\n", string); + } + + } else { + size = (count + 3) / 4; + string[size] = '\0'; + j = size - 1; + value = 0; + + for (i = 0; i < count; ++i) { + if (data[i >> 3] & (1 << (i & 7))) + value |= (1 << (i & 3)); + if ((i & 3) == 3) { + sprintf(&string[j], "%1x", value); + value = 0; + --j; + } + } + if ((i & 3) > 0) + sprintf(&string[j], "%1x", value); + + dprintk("Export: key = \"%s\", %d bits, value = HEX %s\n", + key, count, string); + } +} + +static int altera_execute(struct altera_state *astate, + u8 *p, + s32 program_size, + s32 *error_address, + int *exit_code, + int *format_version) +{ + struct altera_config *aconf = astate->config; + char *msg_buff = astate->msg_buff; + long *stack = astate->stack; + int status = 0; + u32 first_word = 0L; + u32 action_table = 0L; + u32 proc_table = 0L; + u32 str_table = 0L; + u32 sym_table = 0L; + u32 data_sect = 0L; + u32 code_sect = 0L; + u32 debug_sect = 0L; + u32 action_count = 0L; + u32 proc_count = 0L; + u32 sym_count = 0L; + long *vars = NULL; + s32 *var_size = NULL; + char *attrs = NULL; + u8 *proc_attributes = NULL; + u32 pc; + u32 opcode_address; + u32 args[3]; + u32 opcode; + u32 name_id; + u8 charbuf[4]; + long long_tmp; + u32 variable_id; + u8 *charptr_tmp; + u8 *charptr_tmp2; + long *longptr_tmp; + int version = 0; + int delta = 0; + int stack_ptr = 0; + u32 arg_count; + int done = 0; + int bad_opcode = 0; + u32 count; + u32 index; + u32 index2; + s32 long_count; + s32 long_idx; + s32 long_idx2; + u32 i; + u32 j; + u32 uncomp_size; + u32 offset; + u32 value; + int current_proc = 0; + int reverse; + + char *name; + + dprintk("%s\n", __func__); + + /* Read header information */ + if (program_size > 52L) { + first_word = get_unaligned_be32(&p[0]); + version = (first_word & 1L); + *format_version = version + 1; + delta = version * 8; + + action_table = get_unaligned_be32(&p[4]); + proc_table = get_unaligned_be32(&p[8]); + str_table = get_unaligned_be32(&p[4 + delta]); + sym_table = get_unaligned_be32(&p[16 + delta]); + data_sect = get_unaligned_be32(&p[20 + delta]); + code_sect = get_unaligned_be32(&p[24 + delta]); + debug_sect = get_unaligned_be32(&p[28 + delta]); + action_count = get_unaligned_be32(&p[40 + delta]); + proc_count = get_unaligned_be32(&p[44 + delta]); + sym_count = get_unaligned_be32(&p[48 + (2 * delta)]); + } + + if ((first_word != 0x4A414D00L) && (first_word != 0x4A414D01L)) { + done = 1; + status = -EIO; + goto exit_done; + } + + if (sym_count <= 0) + goto exit_done; + + vars = kcalloc(sym_count, sizeof(long), GFP_KERNEL); + + if (vars == NULL) + status = -ENOMEM; + + if (status == 0) { + var_size = kcalloc(sym_count, sizeof(s32), GFP_KERNEL); + + if (var_size == NULL) + status = -ENOMEM; + } + + if (status == 0) { + attrs = kzalloc(sym_count, GFP_KERNEL); + + if (attrs == NULL) + status = -ENOMEM; + } + + if ((status == 0) && (version > 0)) { + proc_attributes = kzalloc(proc_count, GFP_KERNEL); + + if (proc_attributes == NULL) + status = -ENOMEM; + } + + if (status != 0) + goto exit_done; + + delta = version * 2; + + for (i = 0; i < sym_count; ++i) { + offset = (sym_table + ((11 + delta) * i)); + + value = get_unaligned_be32(&p[offset + 3 + delta]); + + attrs[i] = p[offset]; + + /* + * use bit 7 of attribute byte to indicate that + * this buffer was dynamically allocated + * and should be freed later + */ + attrs[i] &= 0x7f; + + var_size[i] = get_unaligned_be32(&p[offset + 7 + delta]); + + /* + * Attribute bits: + * bit 0: 0 = read-only, 1 = read-write + * bit 1: 0 = not compressed, 1 = compressed + * bit 2: 0 = not initialized, 1 = initialized + * bit 3: 0 = scalar, 1 = array + * bit 4: 0 = Boolean, 1 = integer + * bit 5: 0 = declared variable, + * 1 = compiler created temporary variable + */ + + if ((attrs[i] & 0x0c) == 0x04) + /* initialized scalar variable */ + vars[i] = value; + else if ((attrs[i] & 0x1e) == 0x0e) { + /* initialized compressed Boolean array */ + uncomp_size = get_unaligned_le32(&p[data_sect + value]); + + /* allocate a buffer for the uncompressed data */ + vars[i] = (long)kzalloc(uncomp_size, GFP_KERNEL); + if (vars[i] == 0L) + status = -ENOMEM; + else { + /* set flag so buffer will be freed later */ + attrs[i] |= 0x80; + + /* uncompress the data */ + if (altera_shrink(&p[data_sect + value], + var_size[i], + (u8 *)vars[i], + uncomp_size, + version) != uncomp_size) + /* decompression failed */ + status = -EIO; + else + var_size[i] = uncomp_size * 8L; + + } + } else if ((attrs[i] & 0x1e) == 0x0c) { + /* initialized Boolean array */ + vars[i] = value + data_sect + (long)p; + } else if ((attrs[i] & 0x1c) == 0x1c) { + /* initialized integer array */ + vars[i] = value + data_sect; + } else if ((attrs[i] & 0x0c) == 0x08) { + /* uninitialized array */ + + /* flag attrs so that memory is freed */ + attrs[i] |= 0x80; + + if (var_size[i] > 0) { + u32 size; + + if (attrs[i] & 0x10) + /* integer array */ + size = (var_size[i] * sizeof(s32)); + else + /* Boolean array */ + size = ((var_size[i] + 7L) / 8L); + + vars[i] = (long)kzalloc(size, GFP_KERNEL); + + if (vars[i] == 0) { + status = -ENOMEM; + } else { + /* zero out memory */ + for (j = 0; j < size; ++j) + ((u8 *)(vars[i]))[j] = 0; + + } + } else + vars[i] = 0; + + } else + vars[i] = 0; + + } + +exit_done: + if (status != 0) + done = 1; + + altera_jinit(astate); + + pc = code_sect; + msg_buff[0] = '\0'; + + /* + * For JBC version 2, we will execute the procedures corresponding to + * the selected ACTION + */ + if (version > 0) { + if (aconf->action == NULL) { + status = -EINVAL; + done = 1; + } else { + int action_found = 0; + for (i = 0; (i < action_count) && !action_found; ++i) { + name_id = get_unaligned_be32(&p[action_table + + (12 * i)]); + + name = &p[str_table + name_id]; + + if (strncasecmp(aconf->action, name, strlen(name)) == 0) { + action_found = 1; + current_proc = + get_unaligned_be32(&p[action_table + + (12 * i) + 8]); + } + } + + if (!action_found) { + status = -EINVAL; + done = 1; + } + } + + if (status == 0) { + int first_time = 1; + i = current_proc; + while ((i != 0) || first_time) { + first_time = 0; + /* check procedure attribute byte */ + proc_attributes[i] = + (p[proc_table + + (13 * i) + 8] & + 0x03); + + /* + * BIT0 - OPTIONAL + * BIT1 - RECOMMENDED + * BIT6 - FORCED OFF + * BIT7 - FORCED ON + */ + + i = get_unaligned_be32(&p[proc_table + + (13 * i) + 4]); + } + + /* + * Set current_proc to the first procedure + * to be executed + */ + i = current_proc; + while ((i != 0) && + ((proc_attributes[i] == 1) || + ((proc_attributes[i] & 0xc0) == 0x40))) { + i = get_unaligned_be32(&p[proc_table + + (13 * i) + 4]); + } + + if ((i != 0) || ((i == 0) && (current_proc == 0) && + ((proc_attributes[0] != 1) && + ((proc_attributes[0] & 0xc0) != 0x40)))) { + current_proc = i; + pc = code_sect + + get_unaligned_be32(&p[proc_table + + (13 * i) + 9]); + if ((pc < code_sect) || (pc >= debug_sect)) + status = -ERANGE; + } else + /* there are no procedures to execute! */ + done = 1; + + } + } + + msg_buff[0] = '\0'; + + while (!done) { + opcode = (p[pc] & 0xff); + opcode_address = pc; + ++pc; + + if (debug > 1) + printk("opcode: %02x\n", opcode); + + arg_count = (opcode >> 6) & 3; + for (i = 0; i < arg_count; ++i) { + args[i] = get_unaligned_be32(&p[pc]); + pc += 4; + } + + switch (opcode) { + case OP_NOP: + break; + case OP_DUP: + if (altera_check_stack(stack_ptr, 1, &status)) { + stack[stack_ptr] = stack[stack_ptr - 1]; + ++stack_ptr; + } + break; + case OP_SWP: + if (altera_check_stack(stack_ptr, 2, &status)) { + long_tmp = stack[stack_ptr - 2]; + stack[stack_ptr - 2] = stack[stack_ptr - 1]; + stack[stack_ptr - 1] = long_tmp; + } + break; + case OP_ADD: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] += stack[stack_ptr]; + } + break; + case OP_SUB: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] -= stack[stack_ptr]; + } + break; + case OP_MULT: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] *= stack[stack_ptr]; + } + break; + case OP_DIV: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] /= stack[stack_ptr]; + } + break; + case OP_MOD: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] %= stack[stack_ptr]; + } + break; + case OP_SHL: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] <<= stack[stack_ptr]; + } + break; + case OP_SHR: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] >>= stack[stack_ptr]; + } + break; + case OP_NOT: + if (altera_check_stack(stack_ptr, 1, &status)) + stack[stack_ptr - 1] ^= (-1L); + + break; + case OP_AND: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] &= stack[stack_ptr]; + } + break; + case OP_OR: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] |= stack[stack_ptr]; + } + break; + case OP_XOR: + if (altera_check_stack(stack_ptr, 2, &status)) { + --stack_ptr; + stack[stack_ptr - 1] ^= stack[stack_ptr]; + } + break; + case OP_INV: + if (!altera_check_stack(stack_ptr, 1, &status)) + break; + stack[stack_ptr - 1] = stack[stack_ptr - 1] ? 0L : 1L; + break; + case OP_GT: + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + --stack_ptr; + stack[stack_ptr - 1] = + (stack[stack_ptr - 1] > stack[stack_ptr]) ? + 1L : 0L; + + break; + case OP_LT: + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + --stack_ptr; + stack[stack_ptr - 1] = + (stack[stack_ptr - 1] < stack[stack_ptr]) ? + 1L : 0L; + + break; + case OP_RET: + if ((version > 0) && (stack_ptr == 0)) { + /* + * We completed one of the main procedures + * of an ACTION. + * Find the next procedure + * to be executed and jump to it. + * If there are no more procedures, then EXIT. + */ + i = get_unaligned_be32(&p[proc_table + + (13 * current_proc) + 4]); + while ((i != 0) && + ((proc_attributes[i] == 1) || + ((proc_attributes[i] & 0xc0) == 0x40))) + i = get_unaligned_be32(&p[proc_table + + (13 * i) + 4]); + + if (i == 0) { + /* no procedures to execute! */ + done = 1; + *exit_code = 0; /* success */ + } else { + current_proc = i; + pc = code_sect + get_unaligned_be32( + &p[proc_table + + (13 * i) + 9]); + if ((pc < code_sect) || + (pc >= debug_sect)) + status = -ERANGE; + } + + } else + if (altera_check_stack(stack_ptr, 1, &status)) { + pc = stack[--stack_ptr] + code_sect; + if ((pc <= code_sect) || + (pc >= debug_sect)) + status = -ERANGE; + + } + + break; + case OP_CMPS: + /* + * Array short compare + * ...stack 0 is source 1 value + * ...stack 1 is source 2 value + * ...stack 2 is mask value + * ...stack 3 is count + */ + if (altera_check_stack(stack_ptr, 4, &status)) { + s32 a = stack[--stack_ptr]; + s32 b = stack[--stack_ptr]; + long_tmp = stack[--stack_ptr]; + count = stack[stack_ptr - 1]; + + if ((count < 1) || (count > 32)) + status = -ERANGE; + else { + long_tmp &= ((-1L) >> (32 - count)); + + stack[stack_ptr - 1] = + ((a & long_tmp) == (b & long_tmp)) + ? 1L : 0L; + } + } + break; + case OP_PINT: + /* + * PRINT add integer + * ...stack 0 is integer value + */ + if (!altera_check_stack(stack_ptr, 1, &status)) + break; + sprintf(&msg_buff[strlen(msg_buff)], + "%ld", stack[--stack_ptr]); + break; + case OP_PRNT: + /* PRINT finish */ + if (debug) + printk(msg_buff, "\n"); + + msg_buff[0] = '\0'; + break; + case OP_DSS: + /* + * DRSCAN short + * ...stack 0 is scan data + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + long_tmp = stack[--stack_ptr]; + count = stack[--stack_ptr]; + put_unaligned_le32(long_tmp, &charbuf[0]); + status = altera_drscan(astate, count, charbuf, 0); + break; + case OP_DSSC: + /* + * DRSCAN short with capture + * ...stack 0 is scan data + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + long_tmp = stack[--stack_ptr]; + count = stack[stack_ptr - 1]; + put_unaligned_le32(long_tmp, &charbuf[0]); + status = altera_swap_dr(astate, count, charbuf, + 0, charbuf, 0); + stack[stack_ptr - 1] = get_unaligned_le32(&charbuf[0]); + break; + case OP_ISS: + /* + * IRSCAN short + * ...stack 0 is scan data + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + long_tmp = stack[--stack_ptr]; + count = stack[--stack_ptr]; + put_unaligned_le32(long_tmp, &charbuf[0]); + status = altera_irscan(astate, count, charbuf, 0); + break; + case OP_ISSC: + /* + * IRSCAN short with capture + * ...stack 0 is scan data + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + long_tmp = stack[--stack_ptr]; + count = stack[stack_ptr - 1]; + put_unaligned_le32(long_tmp, &charbuf[0]); + status = altera_swap_ir(astate, count, charbuf, + 0, charbuf, 0); + stack[stack_ptr - 1] = get_unaligned_le32(&charbuf[0]); + break; + case OP_DPR: + if (!altera_check_stack(stack_ptr, 1, &status)) + break; + count = stack[--stack_ptr]; + status = altera_set_dr_pre(&astate->js, count, 0, NULL); + break; + case OP_DPRL: + /* + * DRPRE with literal data + * ...stack 0 is count + * ...stack 1 is literal data + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + count = stack[--stack_ptr]; + long_tmp = stack[--stack_ptr]; + put_unaligned_le32(long_tmp, &charbuf[0]); + status = altera_set_dr_pre(&astate->js, count, 0, + charbuf); + break; + case OP_DPO: + /* + * DRPOST + * ...stack 0 is count + */ + if (altera_check_stack(stack_ptr, 1, &status)) { + count = stack[--stack_ptr]; + status = altera_set_dr_post(&astate->js, count, + 0, NULL); + } + break; + case OP_DPOL: + /* + * DRPOST with literal data + * ...stack 0 is count + * ...stack 1 is literal data + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + count = stack[--stack_ptr]; + long_tmp = stack[--stack_ptr]; + put_unaligned_le32(long_tmp, &charbuf[0]); + status = altera_set_dr_post(&astate->js, count, 0, + charbuf); + break; + case OP_IPR: + if (altera_check_stack(stack_ptr, 1, &status)) { + count = stack[--stack_ptr]; + status = altera_set_ir_pre(&astate->js, count, + 0, NULL); + } + break; + case OP_IPRL: + /* + * IRPRE with literal data + * ...stack 0 is count + * ...stack 1 is literal data + */ + if (altera_check_stack(stack_ptr, 2, &status)) { + count = stack[--stack_ptr]; + long_tmp = stack[--stack_ptr]; + put_unaligned_le32(long_tmp, &charbuf[0]); + status = altera_set_ir_pre(&astate->js, count, + 0, charbuf); + } + break; + case OP_IPO: + /* + * IRPOST + * ...stack 0 is count + */ + if (altera_check_stack(stack_ptr, 1, &status)) { + count = stack[--stack_ptr]; + status = altera_set_ir_post(&astate->js, count, + 0, NULL); + } + break; + case OP_IPOL: + /* + * IRPOST with literal data + * ...stack 0 is count + * ...stack 1 is literal data + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + count = stack[--stack_ptr]; + long_tmp = stack[--stack_ptr]; + put_unaligned_le32(long_tmp, &charbuf[0]); + status = altera_set_ir_post(&astate->js, count, 0, + charbuf); + break; + case OP_PCHR: + if (altera_check_stack(stack_ptr, 1, &status)) { + u8 ch; + count = strlen(msg_buff); + ch = (char) stack[--stack_ptr]; + if ((ch < 1) || (ch > 127)) { + /* + * character code out of range + * instead of flagging an error, + * force the value to 127 + */ + ch = 127; + } + msg_buff[count] = ch; + msg_buff[count + 1] = '\0'; + } + break; + case OP_EXIT: + if (altera_check_stack(stack_ptr, 1, &status)) + *exit_code = stack[--stack_ptr]; + + done = 1; + break; + case OP_EQU: + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + --stack_ptr; + stack[stack_ptr - 1] = + (stack[stack_ptr - 1] == stack[stack_ptr]) ? + 1L : 0L; + break; + case OP_POPT: + if (altera_check_stack(stack_ptr, 1, &status)) + --stack_ptr; + + break; + case OP_ABS: + if (!altera_check_stack(stack_ptr, 1, &status)) + break; + if (stack[stack_ptr - 1] < 0) + stack[stack_ptr - 1] = 0 - stack[stack_ptr - 1]; + + break; + case OP_BCH0: + /* + * Batch operation 0 + * SWP + * SWPN 7 + * SWP + * SWPN 6 + * DUPN 8 + * SWPN 2 + * SWP + * DUPN 6 + * DUPN 6 + */ + + /* SWP */ + if (altera_check_stack(stack_ptr, 2, &status)) { + long_tmp = stack[stack_ptr - 2]; + stack[stack_ptr - 2] = stack[stack_ptr - 1]; + stack[stack_ptr - 1] = long_tmp; + } + + /* SWPN 7 */ + index = 7 + 1; + if (altera_check_stack(stack_ptr, index, &status)) { + long_tmp = stack[stack_ptr - index]; + stack[stack_ptr - index] = stack[stack_ptr - 1]; + stack[stack_ptr - 1] = long_tmp; + } + + /* SWP */ + if (altera_check_stack(stack_ptr, 2, &status)) { + long_tmp = stack[stack_ptr - 2]; + stack[stack_ptr - 2] = stack[stack_ptr - 1]; + stack[stack_ptr - 1] = long_tmp; + } + + /* SWPN 6 */ + index = 6 + 1; + if (altera_check_stack(stack_ptr, index, &status)) { + long_tmp = stack[stack_ptr - index]; + stack[stack_ptr - index] = stack[stack_ptr - 1]; + stack[stack_ptr - 1] = long_tmp; + } + + /* DUPN 8 */ + index = 8 + 1; + if (altera_check_stack(stack_ptr, index, &status)) { + stack[stack_ptr] = stack[stack_ptr - index]; + ++stack_ptr; + } + + /* SWPN 2 */ + index = 2 + 1; + if (altera_check_stack(stack_ptr, index, &status)) { + long_tmp = stack[stack_ptr - index]; + stack[stack_ptr - index] = stack[stack_ptr - 1]; + stack[stack_ptr - 1] = long_tmp; + } + + /* SWP */ + if (altera_check_stack(stack_ptr, 2, &status)) { + long_tmp = stack[stack_ptr - 2]; + stack[stack_ptr - 2] = stack[stack_ptr - 1]; + stack[stack_ptr - 1] = long_tmp; + } + + /* DUPN 6 */ + index = 6 + 1; + if (altera_check_stack(stack_ptr, index, &status)) { + stack[stack_ptr] = stack[stack_ptr - index]; + ++stack_ptr; + } + + /* DUPN 6 */ + index = 6 + 1; + if (altera_check_stack(stack_ptr, index, &status)) { + stack[stack_ptr] = stack[stack_ptr - index]; + ++stack_ptr; + } + break; + case OP_PSH0: + stack[stack_ptr++] = 0; + break; + case OP_PSHL: + stack[stack_ptr++] = (s32) args[0]; + break; + case OP_PSHV: + stack[stack_ptr++] = vars[args[0]]; + break; + case OP_JMP: + pc = args[0] + code_sect; + if ((pc < code_sect) || (pc >= debug_sect)) + status = -ERANGE; + break; + case OP_CALL: + stack[stack_ptr++] = pc; + pc = args[0] + code_sect; + if ((pc < code_sect) || (pc >= debug_sect)) + status = -ERANGE; + break; + case OP_NEXT: + /* + * Process FOR / NEXT loop + * ...argument 0 is variable ID + * ...stack 0 is step value + * ...stack 1 is end value + * ...stack 2 is top address + */ + if (altera_check_stack(stack_ptr, 3, &status)) { + s32 step = stack[stack_ptr - 1]; + s32 end = stack[stack_ptr - 2]; + s32 top = stack[stack_ptr - 3]; + s32 iterator = vars[args[0]]; + int break_out = 0; + + if (step < 0) { + if (iterator <= end) + break_out = 1; + } else if (iterator >= end) + break_out = 1; + + if (break_out) { + stack_ptr -= 3; + } else { + vars[args[0]] = iterator + step; + pc = top + code_sect; + if ((pc < code_sect) || + (pc >= debug_sect)) + status = -ERANGE; + } + } + break; + case OP_PSTR: + /* + * PRINT add string + * ...argument 0 is string ID + */ + count = strlen(msg_buff); + strlcpy(&msg_buff[count], + &p[str_table + args[0]], + ALTERA_MESSAGE_LENGTH - count); + break; + case OP_SINT: + /* + * STATE intermediate state + * ...argument 0 is state code + */ + status = altera_goto_jstate(astate, args[0]); + break; + case OP_ST: + /* + * STATE final state + * ...argument 0 is state code + */ + status = altera_goto_jstate(astate, args[0]); + break; + case OP_ISTP: + /* + * IRSTOP state + * ...argument 0 is state code + */ + status = altera_set_irstop(&astate->js, args[0]); + break; + case OP_DSTP: + /* + * DRSTOP state + * ...argument 0 is state code + */ + status = altera_set_drstop(&astate->js, args[0]); + break; + + case OP_SWPN: + /* + * Exchange top with Nth stack value + * ...argument 0 is 0-based stack entry + * to swap with top element + */ + index = (args[0]) + 1; + if (altera_check_stack(stack_ptr, index, &status)) { + long_tmp = stack[stack_ptr - index]; + stack[stack_ptr - index] = stack[stack_ptr - 1]; + stack[stack_ptr - 1] = long_tmp; + } + break; + case OP_DUPN: + /* + * Duplicate Nth stack value + * ...argument 0 is 0-based stack entry to duplicate + */ + index = (args[0]) + 1; + if (altera_check_stack(stack_ptr, index, &status)) { + stack[stack_ptr] = stack[stack_ptr - index]; + ++stack_ptr; + } + break; + case OP_POPV: + /* + * Pop stack into scalar variable + * ...argument 0 is variable ID + * ...stack 0 is value + */ + if (altera_check_stack(stack_ptr, 1, &status)) + vars[args[0]] = stack[--stack_ptr]; + + break; + case OP_POPE: + /* + * Pop stack into integer array element + * ...argument 0 is variable ID + * ...stack 0 is array index + * ...stack 1 is value + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + variable_id = args[0]; + + /* + * If variable is read-only, + * convert to writable array + */ + if ((version > 0) && + ((attrs[variable_id] & 0x9c) == 0x1c)) { + /* Allocate a writable buffer for this array */ + count = var_size[variable_id]; + long_tmp = vars[variable_id]; + longptr_tmp = kcalloc(count, sizeof(long), + GFP_KERNEL); + vars[variable_id] = (long)longptr_tmp; + + if (vars[variable_id] == 0) { + status = -ENOMEM; + break; + } + + /* copy previous contents into buffer */ + for (i = 0; i < count; ++i) { + longptr_tmp[i] = + get_unaligned_be32(&p[long_tmp]); + long_tmp += sizeof(long); + } + + /* + * set bit 7 - buffer was + * dynamically allocated + */ + attrs[variable_id] |= 0x80; + + /* clear bit 2 - variable is writable */ + attrs[variable_id] &= ~0x04; + attrs[variable_id] |= 0x01; + + } + + /* check that variable is a writable integer array */ + if ((attrs[variable_id] & 0x1c) != 0x18) + status = -ERANGE; + else { + longptr_tmp = (long *)vars[variable_id]; + + /* pop the array index */ + index = stack[--stack_ptr]; + + /* pop the value and store it into the array */ + longptr_tmp[index] = stack[--stack_ptr]; + } + + break; + case OP_POPA: + /* + * Pop stack into Boolean array + * ...argument 0 is variable ID + * ...stack 0 is count + * ...stack 1 is array index + * ...stack 2 is value + */ + if (!altera_check_stack(stack_ptr, 3, &status)) + break; + variable_id = args[0]; + + /* + * If variable is read-only, + * convert to writable array + */ + if ((version > 0) && + ((attrs[variable_id] & 0x9c) == 0x0c)) { + /* Allocate a writable buffer for this array */ + long_tmp = + (var_size[variable_id] + 7L) >> 3L; + charptr_tmp2 = (u8 *)vars[variable_id]; + charptr_tmp = + kzalloc(long_tmp, GFP_KERNEL); + vars[variable_id] = (long)charptr_tmp; + + if (vars[variable_id] == 0) { + status = -ENOMEM; + break; + } + + /* zero the buffer */ + for (long_idx = 0L; + long_idx < long_tmp; + ++long_idx) { + charptr_tmp[long_idx] = 0; + } + + /* copy previous contents into buffer */ + for (long_idx = 0L; + long_idx < var_size[variable_id]; + ++long_idx) { + long_idx2 = long_idx; + + if (charptr_tmp2[long_idx2 >> 3] & + (1 << (long_idx2 & 7))) { + charptr_tmp[long_idx >> 3] |= + (1 << (long_idx & 7)); + } + } + + /* + * set bit 7 - buffer was + * dynamically allocated + */ + attrs[variable_id] |= 0x80; + + /* clear bit 2 - variable is writable */ + attrs[variable_id] &= ~0x04; + attrs[variable_id] |= 0x01; + + } + + /* + * check that variable is + * a writable Boolean array + */ + if ((attrs[variable_id] & 0x1c) != 0x08) { + status = -ERANGE; + break; + } + + charptr_tmp = (u8 *)vars[variable_id]; + + /* pop the count (number of bits to copy) */ + long_count = stack[--stack_ptr]; + + /* pop the array index */ + long_idx = stack[--stack_ptr]; + + reverse = 0; + + if (version > 0) { + /* + * stack 0 = array right index + * stack 1 = array left index + */ + + if (long_idx > long_count) { + reverse = 1; + long_tmp = long_count; + long_count = 1 + long_idx - + long_count; + long_idx = long_tmp; + + /* reverse POPA is not supported */ + status = -ERANGE; + break; + } else + long_count = 1 + long_count - + long_idx; + + } + + /* pop the data */ + long_tmp = stack[--stack_ptr]; + + if (long_count < 1) { + status = -ERANGE; + break; + } + + for (i = 0; i < long_count; ++i) { + if (long_tmp & (1L << (s32) i)) + charptr_tmp[long_idx >> 3L] |= + (1L << (long_idx & 7L)); + else + charptr_tmp[long_idx >> 3L] &= + ~(1L << (long_idx & 7L)); + + ++long_idx; + } + + break; + case OP_JMPZ: + /* + * Pop stack and branch if zero + * ...argument 0 is address + * ...stack 0 is condition value + */ + if (altera_check_stack(stack_ptr, 1, &status)) { + if (stack[--stack_ptr] == 0) { + pc = args[0] + code_sect; + if ((pc < code_sect) || + (pc >= debug_sect)) + status = -ERANGE; + } + } + break; + case OP_DS: + case OP_IS: + /* + * DRSCAN + * IRSCAN + * ...argument 0 is scan data variable ID + * ...stack 0 is array index + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + long_idx = stack[--stack_ptr]; + long_count = stack[--stack_ptr]; + reverse = 0; + if (version > 0) { + /* + * stack 0 = array right index + * stack 1 = array left index + * stack 2 = count + */ + long_tmp = long_count; + long_count = stack[--stack_ptr]; + + if (long_idx > long_tmp) { + reverse = 1; + long_idx = long_tmp; + } + } + + charptr_tmp = (u8 *)vars[args[0]]; + + if (reverse) { + /* + * allocate a buffer + * and reverse the data order + */ + charptr_tmp2 = charptr_tmp; + charptr_tmp = kzalloc((long_count >> 3) + 1, + GFP_KERNEL); + if (charptr_tmp == NULL) { + status = -ENOMEM; + break; + } + + long_tmp = long_idx + long_count - 1; + long_idx2 = 0; + while (long_idx2 < long_count) { + if (charptr_tmp2[long_tmp >> 3] & + (1 << (long_tmp & 7))) + charptr_tmp[long_idx2 >> 3] |= + (1 << (long_idx2 & 7)); + else + charptr_tmp[long_idx2 >> 3] &= + ~(1 << (long_idx2 & 7)); + + --long_tmp; + ++long_idx2; + } + } + + if (opcode == 0x51) /* DS */ + status = altera_drscan(astate, long_count, + charptr_tmp, long_idx); + else /* IS */ + status = altera_irscan(astate, long_count, + charptr_tmp, long_idx); + + if (reverse) + kfree(charptr_tmp); + + break; + case OP_DPRA: + /* + * DRPRE with array data + * ...argument 0 is variable ID + * ...stack 0 is array index + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + index = stack[--stack_ptr]; + count = stack[--stack_ptr]; + + if (version > 0) + /* + * stack 0 = array right index + * stack 1 = array left index + */ + count = 1 + count - index; + + charptr_tmp = (u8 *)vars[args[0]]; + status = altera_set_dr_pre(&astate->js, count, index, + charptr_tmp); + break; + case OP_DPOA: + /* + * DRPOST with array data + * ...argument 0 is variable ID + * ...stack 0 is array index + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + index = stack[--stack_ptr]; + count = stack[--stack_ptr]; + + if (version > 0) + /* + * stack 0 = array right index + * stack 1 = array left index + */ + count = 1 + count - index; + + charptr_tmp = (u8 *)vars[args[0]]; + status = altera_set_dr_post(&astate->js, count, index, + charptr_tmp); + break; + case OP_IPRA: + /* + * IRPRE with array data + * ...argument 0 is variable ID + * ...stack 0 is array index + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + index = stack[--stack_ptr]; + count = stack[--stack_ptr]; + + if (version > 0) + /* + * stack 0 = array right index + * stack 1 = array left index + */ + count = 1 + count - index; + + charptr_tmp = (u8 *)vars[args[0]]; + status = altera_set_ir_pre(&astate->js, count, index, + charptr_tmp); + + break; + case OP_IPOA: + /* + * IRPOST with array data + * ...argument 0 is variable ID + * ...stack 0 is array index + * ...stack 1 is count + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + index = stack[--stack_ptr]; + count = stack[--stack_ptr]; + + if (version > 0) + /* + * stack 0 = array right index + * stack 1 = array left index + */ + count = 1 + count - index; + + charptr_tmp = (u8 *)vars[args[0]]; + status = altera_set_ir_post(&astate->js, count, index, + charptr_tmp); + + break; + case OP_EXPT: + /* + * EXPORT + * ...argument 0 is string ID + * ...stack 0 is integer expression + */ + if (altera_check_stack(stack_ptr, 1, &status)) { + name = &p[str_table + args[0]]; + long_tmp = stack[--stack_ptr]; + altera_export_int(name, long_tmp); + } + break; + case OP_PSHE: + /* + * Push integer array element + * ...argument 0 is variable ID + * ...stack 0 is array index + */ + if (!altera_check_stack(stack_ptr, 1, &status)) + break; + variable_id = args[0]; + index = stack[stack_ptr - 1]; + + /* check variable type */ + if ((attrs[variable_id] & 0x1f) == 0x19) { + /* writable integer array */ + longptr_tmp = (long *)vars[variable_id]; + stack[stack_ptr - 1] = longptr_tmp[index]; + } else if ((attrs[variable_id] & 0x1f) == 0x1c) { + /* read-only integer array */ + long_tmp = vars[variable_id] + + (index * sizeof(long)); + stack[stack_ptr - 1] = + get_unaligned_be32(&p[long_tmp]); + } else + status = -ERANGE; + + break; + case OP_PSHA: + /* + * Push Boolean array + * ...argument 0 is variable ID + * ...stack 0 is count + * ...stack 1 is array index + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + variable_id = args[0]; + + /* check that variable is a Boolean array */ + if ((attrs[variable_id] & 0x18) != 0x08) { + status = -ERANGE; + break; + } + + charptr_tmp = (u8 *)vars[variable_id]; + + /* pop the count (number of bits to copy) */ + count = stack[--stack_ptr]; + + /* pop the array index */ + index = stack[stack_ptr - 1]; + + if (version > 0) + /* + * stack 0 = array right index + * stack 1 = array left index + */ + count = 1 + count - index; + + if ((count < 1) || (count > 32)) { + status = -ERANGE; + break; + } + + long_tmp = 0L; + + for (i = 0; i < count; ++i) + if (charptr_tmp[(i + index) >> 3] & + (1 << ((i + index) & 7))) + long_tmp |= (1L << i); + + stack[stack_ptr - 1] = long_tmp; + + break; + case OP_DYNA: + /* + * Dynamically change size of array + * ...argument 0 is variable ID + * ...stack 0 is new size + */ + if (!altera_check_stack(stack_ptr, 1, &status)) + break; + variable_id = args[0]; + long_tmp = stack[--stack_ptr]; + + if (long_tmp > var_size[variable_id]) { + var_size[variable_id] = long_tmp; + + if (attrs[variable_id] & 0x10) + /* allocate integer array */ + long_tmp *= sizeof(long); + else + /* allocate Boolean array */ + long_tmp = (long_tmp + 7) >> 3; + + /* + * If the buffer was previously allocated, + * free it + */ + if (attrs[variable_id] & 0x80) { + kfree((void *)vars[variable_id]); + vars[variable_id] = 0; + } + + /* + * Allocate a new buffer + * of the requested size + */ + vars[variable_id] = (long) + kzalloc(long_tmp, GFP_KERNEL); + + if (vars[variable_id] == 0) { + status = -ENOMEM; + break; + } + + /* + * Set the attribute bit to indicate that + * this buffer was dynamically allocated and + * should be freed later + */ + attrs[variable_id] |= 0x80; + + /* zero out memory */ + count = ((var_size[variable_id] + 7L) / + 8L); + charptr_tmp = (u8 *)(vars[variable_id]); + for (index = 0; index < count; ++index) + charptr_tmp[index] = 0; + + } + + break; + case OP_EXPV: + /* + * Export Boolean array + * ...argument 0 is string ID + * ...stack 0 is variable ID + * ...stack 1 is array right index + * ...stack 2 is array left index + */ + if (!altera_check_stack(stack_ptr, 3, &status)) + break; + if (version == 0) { + /* EXPV is not supported in JBC 1.0 */ + bad_opcode = 1; + break; + } + name = &p[str_table + args[0]]; + variable_id = stack[--stack_ptr]; + long_idx = stack[--stack_ptr];/* right indx */ + long_idx2 = stack[--stack_ptr];/* left indx */ + + if (long_idx > long_idx2) { + /* reverse indices not supported */ + status = -ERANGE; + break; + } + + long_count = 1 + long_idx2 - long_idx; + + charptr_tmp = (u8 *)vars[variable_id]; + charptr_tmp2 = NULL; + + if ((long_idx & 7L) != 0) { + s32 k = long_idx; + charptr_tmp2 = + kzalloc(((long_count + 7L) / 8L), + GFP_KERNEL); + if (charptr_tmp2 == NULL) { + status = -ENOMEM; + break; + } + + for (i = 0; i < long_count; ++i) { + if (charptr_tmp[k >> 3] & + (1 << (k & 7))) + charptr_tmp2[i >> 3] |= + (1 << (i & 7)); + else + charptr_tmp2[i >> 3] &= + ~(1 << (i & 7)); + + ++k; + } + charptr_tmp = charptr_tmp2; + + } else if (long_idx != 0) + charptr_tmp = &charptr_tmp[long_idx >> 3]; + + altera_export_bool_array(name, charptr_tmp, + long_count); + + /* free allocated buffer */ + if ((long_idx & 7L) != 0) + kfree(charptr_tmp2); + + break; + case OP_COPY: { + /* + * Array copy + * ...argument 0 is dest ID + * ...argument 1 is source ID + * ...stack 0 is count + * ...stack 1 is dest index + * ...stack 2 is source index + */ + s32 copy_count; + s32 copy_index; + s32 copy_index2; + s32 destleft; + s32 src_count; + s32 dest_count; + int src_reverse = 0; + int dest_reverse = 0; + + if (!altera_check_stack(stack_ptr, 3, &status)) + break; + + copy_count = stack[--stack_ptr]; + copy_index = stack[--stack_ptr]; + copy_index2 = stack[--stack_ptr]; + reverse = 0; + + if (version > 0) { + /* + * stack 0 = source right index + * stack 1 = source left index + * stack 2 = destination right index + * stack 3 = destination left index + */ + destleft = stack[--stack_ptr]; + + if (copy_count > copy_index) { + src_reverse = 1; + reverse = 1; + src_count = 1 + copy_count - copy_index; + /* copy_index = source start index */ + } else { + src_count = 1 + copy_index - copy_count; + /* source start index */ + copy_index = copy_count; + } + + if (copy_index2 > destleft) { + dest_reverse = 1; + reverse = !reverse; + dest_count = 1 + copy_index2 - destleft; + /* destination start index */ + copy_index2 = destleft; + } else + dest_count = 1 + destleft - copy_index2; + + copy_count = (src_count < dest_count) ? + src_count : dest_count; + + if ((src_reverse || dest_reverse) && + (src_count != dest_count)) + /* + * If either the source or destination + * is reversed, we can't tolerate + * a length mismatch, because we + * "left justify" arrays when copying. + * This won't work correctly + * with reversed arrays. + */ + status = -ERANGE; + + } + + count = copy_count; + index = copy_index; + index2 = copy_index2; + + /* + * If destination is a read-only array, + * allocate a buffer and convert it to a writable array + */ + variable_id = args[1]; + if ((version > 0) && + ((attrs[variable_id] & 0x9c) == 0x0c)) { + /* Allocate a writable buffer for this array */ + long_tmp = + (var_size[variable_id] + 7L) >> 3L; + charptr_tmp2 = (u8 *)vars[variable_id]; + charptr_tmp = + kzalloc(long_tmp, GFP_KERNEL); + vars[variable_id] = (long)charptr_tmp; + + if (vars[variable_id] == 0) { + status = -ENOMEM; + break; + } + + /* zero the buffer */ + for (long_idx = 0L; long_idx < long_tmp; + ++long_idx) + charptr_tmp[long_idx] = 0; + + /* copy previous contents into buffer */ + for (long_idx = 0L; + long_idx < var_size[variable_id]; + ++long_idx) { + long_idx2 = long_idx; + + if (charptr_tmp2[long_idx2 >> 3] & + (1 << (long_idx2 & 7))) + charptr_tmp[long_idx >> 3] |= + (1 << (long_idx & 7)); + + } + + /* + set bit 7 - buffer was dynamically allocated */ + attrs[variable_id] |= 0x80; + + /* clear bit 2 - variable is writable */ + attrs[variable_id] &= ~0x04; + attrs[variable_id] |= 0x01; + } + + charptr_tmp = (u8 *)vars[args[1]]; + charptr_tmp2 = (u8 *)vars[args[0]]; + + /* check if destination is a writable Boolean array */ + if ((attrs[args[1]] & 0x1c) != 0x08) { + status = -ERANGE; + break; + } + + if (count < 1) { + status = -ERANGE; + break; + } + + if (reverse) + index2 += (count - 1); + + for (i = 0; i < count; ++i) { + if (charptr_tmp2[index >> 3] & + (1 << (index & 7))) + charptr_tmp[index2 >> 3] |= + (1 << (index2 & 7)); + else + charptr_tmp[index2 >> 3] &= + ~(1 << (index2 & 7)); + + ++index; + if (reverse) + --index2; + else + ++index2; + } + + break; + } + case OP_DSC: + case OP_ISC: { + /* + * DRSCAN with capture + * IRSCAN with capture + * ...argument 0 is scan data variable ID + * ...argument 1 is capture variable ID + * ...stack 0 is capture index + * ...stack 1 is scan data index + * ...stack 2 is count + */ + s32 scan_right, scan_left; + s32 capture_count = 0; + s32 scan_count = 0; + s32 capture_index; + s32 scan_index; + + if (!altera_check_stack(stack_ptr, 3, &status)) + break; + + capture_index = stack[--stack_ptr]; + scan_index = stack[--stack_ptr]; + + if (version > 0) { + /* + * stack 0 = capture right index + * stack 1 = capture left index + * stack 2 = scan right index + * stack 3 = scan left index + * stack 4 = count + */ + scan_right = stack[--stack_ptr]; + scan_left = stack[--stack_ptr]; + capture_count = 1 + scan_index - capture_index; + scan_count = 1 + scan_left - scan_right; + scan_index = scan_right; + } + + long_count = stack[--stack_ptr]; + /* + * If capture array is read-only, allocate a buffer + * and convert it to a writable array + */ + variable_id = args[1]; + if ((version > 0) && + ((attrs[variable_id] & 0x9c) == 0x0c)) { + /* Allocate a writable buffer for this array */ + long_tmp = + (var_size[variable_id] + 7L) >> 3L; + charptr_tmp2 = (u8 *)vars[variable_id]; + charptr_tmp = + kzalloc(long_tmp, GFP_KERNEL); + vars[variable_id] = (long)charptr_tmp; + + if (vars[variable_id] == 0) { + status = -ENOMEM; + break; + } + + /* zero the buffer */ + for (long_idx = 0L; long_idx < long_tmp; + ++long_idx) + charptr_tmp[long_idx] = 0; + + /* copy previous contents into buffer */ + for (long_idx = 0L; + long_idx < var_size[variable_id]; + ++long_idx) { + long_idx2 = long_idx; + + if (charptr_tmp2[long_idx2 >> 3] & + (1 << (long_idx2 & 7))) + charptr_tmp[long_idx >> 3] |= + (1 << (long_idx & 7)); + + } + + /* + * set bit 7 - buffer was + * dynamically allocated + */ + attrs[variable_id] |= 0x80; + + /* clear bit 2 - variable is writable */ + attrs[variable_id] &= ~0x04; + attrs[variable_id] |= 0x01; + + } + + charptr_tmp = (u8 *)vars[args[0]]; + charptr_tmp2 = (u8 *)vars[args[1]]; + + if ((version > 0) && + ((long_count > capture_count) || + (long_count > scan_count))) { + status = -ERANGE; + break; + } + + /* + * check that capture array + * is a writable Boolean array + */ + if ((attrs[args[1]] & 0x1c) != 0x08) { + status = -ERANGE; + break; + } + + if (status == 0) { + if (opcode == 0x82) /* DSC */ + status = altera_swap_dr(astate, + long_count, + charptr_tmp, + scan_index, + charptr_tmp2, + capture_index); + else /* ISC */ + status = altera_swap_ir(astate, + long_count, + charptr_tmp, + scan_index, + charptr_tmp2, + capture_index); + + } + + break; + } + case OP_WAIT: + /* + * WAIT + * ...argument 0 is wait state + * ...argument 1 is end state + * ...stack 0 is cycles + * ...stack 1 is microseconds + */ + if (!altera_check_stack(stack_ptr, 2, &status)) + break; + long_tmp = stack[--stack_ptr]; + + if (long_tmp != 0L) + status = altera_wait_cycles(astate, long_tmp, + args[0]); + + long_tmp = stack[--stack_ptr]; + + if ((status == 0) && (long_tmp != 0L)) + status = altera_wait_msecs(astate, + long_tmp, + args[0]); + + if ((status == 0) && (args[1] != args[0])) + status = altera_goto_jstate(astate, + args[1]); + + if (version > 0) { + --stack_ptr; /* throw away MAX cycles */ + --stack_ptr; /* throw away MAX microseconds */ + } + break; + case OP_CMPA: { + /* + * Array compare + * ...argument 0 is source 1 ID + * ...argument 1 is source 2 ID + * ...argument 2 is mask ID + * ...stack 0 is source 1 index + * ...stack 1 is source 2 index + * ...stack 2 is mask index + * ...stack 3 is count + */ + s32 a, b; + u8 *source1 = (u8 *)vars[args[0]]; + u8 *source2 = (u8 *)vars[args[1]]; + u8 *mask = (u8 *)vars[args[2]]; + u32 index1; + u32 index2; + u32 mask_index; + + if (!altera_check_stack(stack_ptr, 4, &status)) + break; + + index1 = stack[--stack_ptr]; + index2 = stack[--stack_ptr]; + mask_index = stack[--stack_ptr]; + long_count = stack[--stack_ptr]; + + if (version > 0) { + /* + * stack 0 = source 1 right index + * stack 1 = source 1 left index + * stack 2 = source 2 right index + * stack 3 = source 2 left index + * stack 4 = mask right index + * stack 5 = mask left index + */ + s32 mask_right = stack[--stack_ptr]; + s32 mask_left = stack[--stack_ptr]; + /* source 1 count */ + a = 1 + index2 - index1; + /* source 2 count */ + b = 1 + long_count - mask_index; + a = (a < b) ? a : b; + /* mask count */ + b = 1 + mask_left - mask_right; + a = (a < b) ? a : b; + /* source 2 start index */ + index2 = mask_index; + /* mask start index */ + mask_index = mask_right; + long_count = a; + } + + long_tmp = 1L; + + if (long_count < 1) + status = -ERANGE; + else { + count = long_count; + + for (i = 0; i < count; ++i) { + if (mask[mask_index >> 3] & + (1 << (mask_index & 7))) { + a = source1[index1 >> 3] & + (1 << (index1 & 7)) + ? 1 : 0; + b = source2[index2 >> 3] & + (1 << (index2 & 7)) + ? 1 : 0; + + if (a != b) /* failure */ + long_tmp = 0L; + } + ++index1; + ++index2; + ++mask_index; + } + } + + stack[stack_ptr++] = long_tmp; + + break; + } + default: + /* Unrecognized opcode -- ERROR! */ + bad_opcode = 1; + break; + } + + if (bad_opcode) + status = -ENOSYS; + + if ((stack_ptr < 0) || (stack_ptr >= ALTERA_STACK_SIZE)) + status = -EOVERFLOW; + + if (status != 0) { + done = 1; + *error_address = (s32)(opcode_address - code_sect); + } + } + + altera_free_buffers(astate); + + /* Free all dynamically allocated arrays */ + if ((attrs != NULL) && (vars != NULL)) + for (i = 0; i < sym_count; ++i) + if (attrs[i] & 0x80) + kfree((void *)vars[i]); + + kfree(vars); + kfree(var_size); + kfree(attrs); + kfree(proc_attributes); + + return status; +} + +static int altera_get_note(u8 *p, s32 program_size, s32 *offset, + char *key, char *value, int keylen, int vallen) +/* + * Gets key and value of NOTE fields in the JBC file. + * Can be called in two modes: if offset pointer is NULL, + * then the function searches for note fields which match + * the key string provided. If offset is not NULL, then + * the function finds the next note field of any key, + * starting at the offset specified by the offset pointer. + * Returns 0 for success, else appropriate error code + */ +{ + int status = -ENODATA; + u32 note_strings = 0L; + u32 note_table = 0L; + u32 note_count = 0L; + u32 first_word = 0L; + int version = 0; + int delta = 0; + char *key_ptr; + char *value_ptr; + int i; + + /* Read header information */ + if (program_size > 52L) { + first_word = get_unaligned_be32(&p[0]); + version = (first_word & 1L); + delta = version * 8; + + note_strings = get_unaligned_be32(&p[8 + delta]); + note_table = get_unaligned_be32(&p[12 + delta]); + note_count = get_unaligned_be32(&p[44 + (2 * delta)]); + } + + if ((first_word != 0x4A414D00L) && (first_word != 0x4A414D01L)) + return -EIO; + + if (note_count <= 0L) + return status; + + if (offset == NULL) { + /* + * We will search for the first note with a specific key, + * and return only the value + */ + for (i = 0; (i < note_count) && + (status != 0); ++i) { + key_ptr = &p[note_strings + + get_unaligned_be32( + &p[note_table + (8 * i)])]; + if (key && !strncasecmp(key, key_ptr, strlen(key_ptr))) { + status = 0; + + value_ptr = &p[note_strings + + get_unaligned_be32( + &p[note_table + (8 * i) + 4])]; + + if (value != NULL) + strlcpy(value, value_ptr, vallen); + + } + } + } else { + /* + * We will search for the next note, regardless of the key, + * and return both the value and the key + */ + + i = *offset; + + if ((i >= 0) && (i < note_count)) { + status = 0; + + if (key != NULL) + strlcpy(key, &p[note_strings + + get_unaligned_be32( + &p[note_table + (8 * i)])], + keylen); + + if (value != NULL) + strlcpy(value, &p[note_strings + + get_unaligned_be32( + &p[note_table + (8 * i) + 4])], + vallen); + + *offset = i + 1; + } + } + + return status; +} + +static int altera_check_crc(u8 *p, s32 program_size) +{ + int status = 0; + u16 local_expected = 0, + local_actual = 0, + shift_reg = 0xffff; + int bit, feedback; + u8 databyte; + u32 i; + u32 crc_section = 0L; + u32 first_word = 0L; + int version = 0; + int delta = 0; + + if (program_size > 52L) { + first_word = get_unaligned_be32(&p[0]); + version = (first_word & 1L); + delta = version * 8; + + crc_section = get_unaligned_be32(&p[32 + delta]); + } + + if ((first_word != 0x4A414D00L) && (first_word != 0x4A414D01L)) + status = -EIO; + + if (crc_section >= program_size) + status = -EIO; + + if (status == 0) { + local_expected = (u16)get_unaligned_be16(&p[crc_section]); + + for (i = 0; i < crc_section; ++i) { + databyte = p[i]; + for (bit = 0; bit < 8; bit++) { + feedback = (databyte ^ shift_reg) & 0x01; + shift_reg >>= 1; + if (feedback) + shift_reg ^= 0x8408; + + databyte >>= 1; + } + } + + local_actual = (u16)~shift_reg; + + if (local_expected != local_actual) + status = -EILSEQ; + + } + + if (debug || status) { + switch (status) { + case 0: + printk(KERN_INFO "%s: CRC matched: %04x\n", __func__, + local_actual); + break; + case -EILSEQ: + printk(KERN_ERR "%s: CRC mismatch: expected %04x, " + "actual %04x\n", __func__, local_expected, + local_actual); + break; + case -ENODATA: + printk(KERN_ERR "%s: expected CRC not found, " + "actual CRC = %04x\n", __func__, + local_actual); + break; + case -EIO: + printk(KERN_ERR "%s: error: format isn't " + "recognized.\n", __func__); + break; + default: + printk(KERN_ERR "%s: CRC function returned error " + "code %d\n", __func__, status); + break; + } + } + + return status; +} + +static int altera_get_file_info(u8 *p, + s32 program_size, + int *format_version, + int *action_count, + int *procedure_count) +{ + int status = -EIO; + u32 first_word = 0; + int version = 0; + + if (program_size <= 52L) + return status; + + first_word = get_unaligned_be32(&p[0]); + + if ((first_word == 0x4A414D00L) || (first_word == 0x4A414D01L)) { + status = 0; + + version = (first_word & 1L); + *format_version = version + 1; + + if (version > 0) { + *action_count = get_unaligned_be32(&p[48]); + *procedure_count = get_unaligned_be32(&p[52]); + } + } + + return status; +} + +static int altera_get_act_info(u8 *p, + s32 program_size, + int index, + char **name, + char **description, + struct altera_procinfo **proc_list) +{ + int status = -EIO; + struct altera_procinfo *procptr = NULL; + struct altera_procinfo *tmpptr = NULL; + u32 first_word = 0L; + u32 action_table = 0L; + u32 proc_table = 0L; + u32 str_table = 0L; + u32 note_strings = 0L; + u32 action_count = 0L; + u32 proc_count = 0L; + u32 act_name_id = 0L; + u32 act_desc_id = 0L; + u32 act_proc_id = 0L; + u32 act_proc_name = 0L; + u8 act_proc_attribute = 0; + + if (program_size <= 52L) + return status; + /* Read header information */ + first_word = get_unaligned_be32(&p[0]); + + if (first_word != 0x4A414D01L) + return status; + + action_table = get_unaligned_be32(&p[4]); + proc_table = get_unaligned_be32(&p[8]); + str_table = get_unaligned_be32(&p[12]); + note_strings = get_unaligned_be32(&p[16]); + action_count = get_unaligned_be32(&p[48]); + proc_count = get_unaligned_be32(&p[52]); + + if (index >= action_count) + return status; + + act_name_id = get_unaligned_be32(&p[action_table + (12 * index)]); + act_desc_id = get_unaligned_be32(&p[action_table + (12 * index) + 4]); + act_proc_id = get_unaligned_be32(&p[action_table + (12 * index) + 8]); + + *name = &p[str_table + act_name_id]; + + if (act_desc_id < (note_strings - str_table)) + *description = &p[str_table + act_desc_id]; + + do { + act_proc_name = get_unaligned_be32( + &p[proc_table + (13 * act_proc_id)]); + act_proc_attribute = + (p[proc_table + (13 * act_proc_id) + 8] & 0x03); + + procptr = + kzalloc(sizeof(struct altera_procinfo), + GFP_KERNEL); + + if (procptr == NULL) + status = -ENOMEM; + else { + procptr->name = &p[str_table + act_proc_name]; + procptr->attrs = act_proc_attribute; + procptr->next = NULL; + + /* add record to end of linked list */ + if (*proc_list == NULL) + *proc_list = procptr; + else { + tmpptr = *proc_list; + while (tmpptr->next != NULL) + tmpptr = tmpptr->next; + tmpptr->next = procptr; + } + } + + act_proc_id = get_unaligned_be32( + &p[proc_table + (13 * act_proc_id) + 4]); + } while ((act_proc_id != 0) && (act_proc_id < proc_count)); + + return status; +} + +int altera_init(struct altera_config *config, const struct firmware *fw) +{ + struct altera_state *astate = NULL; + struct altera_procinfo *proc_list = NULL; + struct altera_procinfo *procptr = NULL; + char *key = NULL; + char *value = NULL; + char *action_name = NULL; + char *description = NULL; + int exec_result = 0; + int exit_code = 0; + int format_version = 0; + int action_count = 0; + int procedure_count = 0; + int index = 0; + s32 offset = 0L; + s32 error_address = 0L; + int retval = 0; + + key = kzalloc(33, GFP_KERNEL); + if (!key) { + retval = -ENOMEM; + goto out; + } + value = kzalloc(257, GFP_KERNEL); + if (!value) { + retval = -ENOMEM; + goto free_key; + } + astate = kzalloc(sizeof(struct altera_state), GFP_KERNEL); + if (!astate) { + retval = -ENOMEM; + goto free_value; + } + + astate->config = config; + if (!astate->config->jtag_io) { + dprintk("%s: using byteblaster!\n", __func__); + astate->config->jtag_io = netup_jtag_io_lpt; + } + + altera_check_crc((u8 *)fw->data, fw->size); + + if (debug) { + altera_get_file_info((u8 *)fw->data, fw->size, &format_version, + &action_count, &procedure_count); + printk(KERN_INFO "%s: File format is %s ByteCode format\n", + __func__, (format_version == 2) ? "Jam STAPL" : + "pre-standardized Jam 1.1"); + while (altera_get_note((u8 *)fw->data, fw->size, + &offset, key, value, 32, 256) == 0) + printk(KERN_INFO "%s: NOTE \"%s\" = \"%s\"\n", + __func__, key, value); + } + + if (debug && (format_version == 2) && (action_count > 0)) { + printk(KERN_INFO "%s: Actions available:\n", __func__); + for (index = 0; index < action_count; ++index) { + altera_get_act_info((u8 *)fw->data, fw->size, + index, &action_name, + &description, + &proc_list); + + if (description == NULL) + printk(KERN_INFO "%s: %s\n", + __func__, + action_name); + else + printk(KERN_INFO "%s: %s \"%s\"\n", + __func__, + action_name, + description); + + procptr = proc_list; + while (procptr != NULL) { + if (procptr->attrs != 0) + printk(KERN_INFO "%s: %s (%s)\n", + __func__, + procptr->name, + (procptr->attrs == 1) ? + "optional" : "recommended"); + + proc_list = procptr->next; + kfree(procptr); + procptr = proc_list; + } + } + + printk(KERN_INFO "\n"); + } + + exec_result = altera_execute(astate, (u8 *)fw->data, fw->size, + &error_address, &exit_code, &format_version); + + if (exit_code) + exec_result = -EREMOTEIO; + + if ((format_version == 2) && (exec_result == -EINVAL)) { + if (astate->config->action == NULL) + printk(KERN_ERR "%s: error: no action specified for " + "Jam STAPL file.\nprogram terminated.\n", + __func__); + else + printk(KERN_ERR "%s: error: action \"%s\"" + " is not supported " + "for this Jam STAPL file.\n" + "Program terminated.\n", __func__, + astate->config->action); + + } else if (exec_result) + printk(KERN_ERR "%s: error %d\n", __func__, exec_result); + + kfree(astate); +free_value: + kfree(value); +free_key: + kfree(key); +out: + return retval; +} +EXPORT_SYMBOL(altera_init); diff --git a/drivers/misc/apds9802als.c b/drivers/misc/apds9802als.c new file mode 100644 index 000000000..d8ac036f0 --- /dev/null +++ b/drivers/misc/apds9802als.c @@ -0,0 +1,322 @@ +/* + * apds9802als.c - apds9802 ALS Driver + * + * Copyright (C) 2009 Intel Corp + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define ALS_MIN_RANGE_VAL 1 +#define ALS_MAX_RANGE_VAL 2 +#define POWER_STA_ENABLE 1 +#define POWER_STA_DISABLE 0 + +#define DRIVER_NAME "apds9802als" + +struct als_data { + struct mutex mutex; +}; + +static ssize_t als_sensing_range_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + int val; + + val = i2c_smbus_read_byte_data(client, 0x81); + if (val < 0) + return val; + if (val & 1) + return sprintf(buf, "4095\n"); + else + return sprintf(buf, "65535\n"); +} + +static int als_wait_for_data_ready(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + int ret; + int retry = 10; + + do { + msleep(30); + ret = i2c_smbus_read_byte_data(client, 0x86); + } while (!(ret & 0x80) && retry--); + + if (retry < 0) { + dev_warn(dev, "timeout waiting for data ready\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static ssize_t als_lux0_input_data_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct als_data *data = i2c_get_clientdata(client); + int ret_val; + int temp; + + /* Protect against parallel reads */ + pm_runtime_get_sync(dev); + mutex_lock(&data->mutex); + + /* clear EOC interrupt status */ + i2c_smbus_write_byte(client, 0x40); + /* start measurement */ + temp = i2c_smbus_read_byte_data(client, 0x81); + i2c_smbus_write_byte_data(client, 0x81, temp | 0x08); + + ret_val = als_wait_for_data_ready(dev); + if (ret_val < 0) + goto failed; + + temp = i2c_smbus_read_byte_data(client, 0x8C); /* LSB data */ + if (temp < 0) { + ret_val = temp; + goto failed; + } + ret_val = i2c_smbus_read_byte_data(client, 0x8D); /* MSB data */ + if (ret_val < 0) + goto failed; + + mutex_unlock(&data->mutex); + pm_runtime_put_sync(dev); + + temp = (ret_val << 8) | temp; + return sprintf(buf, "%d\n", temp); +failed: + mutex_unlock(&data->mutex); + pm_runtime_put_sync(dev); + return ret_val; +} + +static ssize_t als_sensing_range_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct als_data *data = i2c_get_clientdata(client); + int ret_val; + unsigned long val; + + ret_val = kstrtoul(buf, 10, &val); + if (ret_val) + return ret_val; + + if (val < 4096) + val = 1; + else if (val < 65536) + val = 2; + else + return -ERANGE; + + pm_runtime_get_sync(dev); + + /* Make sure nobody else reads/modifies/writes 0x81 while we + are active */ + mutex_lock(&data->mutex); + + ret_val = i2c_smbus_read_byte_data(client, 0x81); + if (ret_val < 0) + goto fail; + + /* Reset the bits before setting them */ + ret_val = ret_val & 0xFA; + + if (val == 1) /* Setting detection range up to 4k LUX */ + ret_val = (ret_val | 0x01); + else /* Setting detection range up to 64k LUX*/ + ret_val = (ret_val | 0x00); + + ret_val = i2c_smbus_write_byte_data(client, 0x81, ret_val); + + if (ret_val >= 0) { + /* All OK */ + mutex_unlock(&data->mutex); + pm_runtime_put_sync(dev); + return count; + } +fail: + mutex_unlock(&data->mutex); + pm_runtime_put_sync(dev); + return ret_val; +} + +static int als_set_power_state(struct i2c_client *client, bool on_off) +{ + int ret_val; + struct als_data *data = i2c_get_clientdata(client); + + mutex_lock(&data->mutex); + ret_val = i2c_smbus_read_byte_data(client, 0x80); + if (ret_val < 0) + goto fail; + if (on_off) + ret_val = ret_val | 0x01; + else + ret_val = ret_val & 0xFE; + ret_val = i2c_smbus_write_byte_data(client, 0x80, ret_val); +fail: + mutex_unlock(&data->mutex); + return ret_val; +} + +static DEVICE_ATTR(lux0_sensor_range, S_IRUGO | S_IWUSR, + als_sensing_range_show, als_sensing_range_store); +static DEVICE_ATTR(lux0_input, S_IRUGO, als_lux0_input_data_show, NULL); + +static struct attribute *mid_att_als[] = { + &dev_attr_lux0_sensor_range.attr, + &dev_attr_lux0_input.attr, + NULL +}; + +static const struct attribute_group m_als_gr = { + .name = "apds9802als", + .attrs = mid_att_als +}; + +static int als_set_default_config(struct i2c_client *client) +{ + int ret_val; + /* Write the command and then switch on */ + ret_val = i2c_smbus_write_byte_data(client, 0x80, 0x01); + if (ret_val < 0) { + dev_err(&client->dev, "failed default switch on write\n"); + return ret_val; + } + /* detection range: 1~64K Lux, maunal measurement */ + ret_val = i2c_smbus_write_byte_data(client, 0x81, 0x08); + if (ret_val < 0) + dev_err(&client->dev, "failed default LUX on write\n"); + + /* We always get 0 for the 1st measurement after system power on, + * so make sure it is finished before user asks for data. + */ + als_wait_for_data_ready(&client->dev); + + return ret_val; +} + +static int apds9802als_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int res; + struct als_data *data; + + data = kzalloc(sizeof(struct als_data), GFP_KERNEL); + if (data == NULL) { + dev_err(&client->dev, "Memory allocation failed\n"); + return -ENOMEM; + } + i2c_set_clientdata(client, data); + res = sysfs_create_group(&client->dev.kobj, &m_als_gr); + if (res) { + dev_err(&client->dev, "device create file failed\n"); + goto als_error1; + } + dev_info(&client->dev, "ALS chip found\n"); + als_set_default_config(client); + mutex_init(&data->mutex); + + pm_runtime_set_active(&client->dev); + pm_runtime_enable(&client->dev); + + return res; +als_error1: + kfree(data); + return res; +} + +static int apds9802als_remove(struct i2c_client *client) +{ + struct als_data *data = i2c_get_clientdata(client); + + pm_runtime_get_sync(&client->dev); + + als_set_power_state(client, false); + sysfs_remove_group(&client->dev.kobj, &m_als_gr); + + pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); + pm_runtime_put_noidle(&client->dev); + + kfree(data); + return 0; +} + +#ifdef CONFIG_PM + +static int apds9802als_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + als_set_power_state(client, false); + return 0; +} + +static int apds9802als_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + als_set_power_state(client, true); + return 0; +} + +static UNIVERSAL_DEV_PM_OPS(apds9802als_pm_ops, apds9802als_suspend, + apds9802als_resume, NULL); + +#define APDS9802ALS_PM_OPS (&apds9802als_pm_ops) + +#else /* CONFIG_PM */ +#define APDS9802ALS_PM_OPS NULL +#endif /* CONFIG_PM */ + +static const struct i2c_device_id apds9802als_id[] = { + { DRIVER_NAME, 0 }, + { } +}; + +MODULE_DEVICE_TABLE(i2c, apds9802als_id); + +static struct i2c_driver apds9802als_driver = { + .driver = { + .name = DRIVER_NAME, + .pm = APDS9802ALS_PM_OPS, + }, + .probe = apds9802als_probe, + .remove = apds9802als_remove, + .id_table = apds9802als_id, +}; + +module_i2c_driver(apds9802als_driver); + +MODULE_AUTHOR("Anantha Narayanan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Register map */ +#define APDS990X_ENABLE 0x00 /* Enable of states and interrupts */ +#define APDS990X_ATIME 0x01 /* ALS ADC time */ +#define APDS990X_PTIME 0x02 /* Proximity ADC time */ +#define APDS990X_WTIME 0x03 /* Wait time */ +#define APDS990X_AILTL 0x04 /* ALS interrupt low threshold low byte */ +#define APDS990X_AILTH 0x05 /* ALS interrupt low threshold hi byte */ +#define APDS990X_AIHTL 0x06 /* ALS interrupt hi threshold low byte */ +#define APDS990X_AIHTH 0x07 /* ALS interrupt hi threshold hi byte */ +#define APDS990X_PILTL 0x08 /* Proximity interrupt low threshold low byte */ +#define APDS990X_PILTH 0x09 /* Proximity interrupt low threshold hi byte */ +#define APDS990X_PIHTL 0x0a /* Proximity interrupt hi threshold low byte */ +#define APDS990X_PIHTH 0x0b /* Proximity interrupt hi threshold hi byte */ +#define APDS990X_PERS 0x0c /* Interrupt persistence filters */ +#define APDS990X_CONFIG 0x0d /* Configuration */ +#define APDS990X_PPCOUNT 0x0e /* Proximity pulse count */ +#define APDS990X_CONTROL 0x0f /* Gain control register */ +#define APDS990X_REV 0x11 /* Revision Number */ +#define APDS990X_ID 0x12 /* Device ID */ +#define APDS990X_STATUS 0x13 /* Device status */ +#define APDS990X_CDATAL 0x14 /* Clear ADC low data register */ +#define APDS990X_CDATAH 0x15 /* Clear ADC high data register */ +#define APDS990X_IRDATAL 0x16 /* IR ADC low data register */ +#define APDS990X_IRDATAH 0x17 /* IR ADC high data register */ +#define APDS990X_PDATAL 0x18 /* Proximity ADC low data register */ +#define APDS990X_PDATAH 0x19 /* Proximity ADC high data register */ + +/* Control */ +#define APDS990X_MAX_AGAIN 3 + +/* Enable register */ +#define APDS990X_EN_PIEN (0x1 << 5) +#define APDS990X_EN_AIEN (0x1 << 4) +#define APDS990X_EN_WEN (0x1 << 3) +#define APDS990X_EN_PEN (0x1 << 2) +#define APDS990X_EN_AEN (0x1 << 1) +#define APDS990X_EN_PON (0x1 << 0) +#define APDS990X_EN_DISABLE_ALL 0 + +/* Status register */ +#define APDS990X_ST_PINT (0x1 << 5) +#define APDS990X_ST_AINT (0x1 << 4) + +/* I2C access types */ +#define APDS990x_CMD_TYPE_MASK (0x03 << 5) +#define APDS990x_CMD_TYPE_RB (0x00 << 5) /* Repeated byte */ +#define APDS990x_CMD_TYPE_INC (0x01 << 5) /* Auto increment */ +#define APDS990x_CMD_TYPE_SPE (0x03 << 5) /* Special function */ + +#define APDS990x_ADDR_SHIFT 0 +#define APDS990x_CMD 0x80 + +/* Interrupt ack commands */ +#define APDS990X_INT_ACK_ALS 0x6 +#define APDS990X_INT_ACK_PS 0x5 +#define APDS990X_INT_ACK_BOTH 0x7 + +/* ptime */ +#define APDS990X_PTIME_DEFAULT 0xff /* Recommended conversion time 2.7ms*/ + +/* wtime */ +#define APDS990X_WTIME_DEFAULT 0xee /* ~50ms wait time */ + +#define APDS990X_TIME_TO_ADC 1024 /* One timetick as ADC count value */ + +/* Persistence */ +#define APDS990X_APERS_SHIFT 0 +#define APDS990X_PPERS_SHIFT 4 + +/* Supported ID:s */ +#define APDS990X_ID_0 0x0 +#define APDS990X_ID_4 0x4 +#define APDS990X_ID_29 0x29 + +/* pgain and pdiode settings */ +#define APDS_PGAIN_1X 0x0 +#define APDS_PDIODE_IR 0x2 + +#define APDS990X_LUX_OUTPUT_SCALE 10 + +/* Reverse chip factors for threshold calculation */ +struct reverse_factors { + u32 afactor; + int cf1; + int irf1; + int cf2; + int irf2; +}; + +struct apds990x_chip { + struct apds990x_platform_data *pdata; + struct i2c_client *client; + struct mutex mutex; /* avoid parallel access */ + struct regulator_bulk_data regs[2]; + wait_queue_head_t wait; + + int prox_en; + bool prox_continuous_mode; + bool lux_wait_fresh_res; + + /* Chip parameters */ + struct apds990x_chip_factors cf; + struct reverse_factors rcf; + u16 atime; /* als integration time */ + u16 arate; /* als reporting rate */ + u16 a_max_result; /* Max possible ADC value with current atime */ + u8 again_meas; /* Gain used in last measurement */ + u8 again_next; /* Next calculated gain */ + u8 pgain; + u8 pdiode; + u8 pdrive; + u8 lux_persistence; + u8 prox_persistence; + + u32 lux_raw; + u32 lux; + u16 lux_clear; + u16 lux_ir; + u16 lux_calib; + u32 lux_thres_hi; + u32 lux_thres_lo; + + u32 prox_thres; + u16 prox_data; + u16 prox_calib; + + char chipname[10]; + u8 revision; +}; + +#define APDS_CALIB_SCALER 8192 +#define APDS_LUX_NEUTRAL_CALIB_VALUE (1 * APDS_CALIB_SCALER) +#define APDS_PROX_NEUTRAL_CALIB_VALUE (1 * APDS_CALIB_SCALER) + +#define APDS_PROX_DEF_THRES 600 +#define APDS_PROX_HYSTERESIS 50 +#define APDS_LUX_DEF_THRES_HI 101 +#define APDS_LUX_DEF_THRES_LO 100 +#define APDS_DEFAULT_PROX_PERS 1 + +#define APDS_TIMEOUT 2000 +#define APDS_STARTUP_DELAY 25000 /* us */ +#define APDS_RANGE 65535 +#define APDS_PROX_RANGE 1023 +#define APDS_LUX_GAIN_LO_LIMIT 100 +#define APDS_LUX_GAIN_LO_LIMIT_STRICT 25 + +#define TIMESTEP 87 /* 2.7ms is about 87 / 32 */ +#define TIME_STEP_SCALER 32 + +#define APDS_LUX_AVERAGING_TIME 50 /* tolerates 50/60Hz ripple */ +#define APDS_LUX_DEFAULT_RATE 200 + +static const u8 again[] = {1, 8, 16, 120}; /* ALS gain steps */ +static const u8 ir_currents[] = {100, 50, 25, 12}; /* IRled currents in mA */ + +/* Following two tables must match i.e 10Hz rate means 1 as persistence value */ +static const u16 arates_hz[] = {10, 5, 2, 1}; +static const u8 apersis[] = {1, 2, 4, 5}; + +/* Regulators */ +static const char reg_vcc[] = "Vdd"; +static const char reg_vled[] = "Vled"; + +static int apds990x_read_byte(struct apds990x_chip *chip, u8 reg, u8 *data) +{ + struct i2c_client *client = chip->client; + s32 ret; + + reg &= ~APDS990x_CMD_TYPE_MASK; + reg |= APDS990x_CMD | APDS990x_CMD_TYPE_RB; + + ret = i2c_smbus_read_byte_data(client, reg); + *data = ret; + return (int)ret; +} + +static int apds990x_read_word(struct apds990x_chip *chip, u8 reg, u16 *data) +{ + struct i2c_client *client = chip->client; + s32 ret; + + reg &= ~APDS990x_CMD_TYPE_MASK; + reg |= APDS990x_CMD | APDS990x_CMD_TYPE_INC; + + ret = i2c_smbus_read_word_data(client, reg); + *data = ret; + return (int)ret; +} + +static int apds990x_write_byte(struct apds990x_chip *chip, u8 reg, u8 data) +{ + struct i2c_client *client = chip->client; + s32 ret; + + reg &= ~APDS990x_CMD_TYPE_MASK; + reg |= APDS990x_CMD | APDS990x_CMD_TYPE_RB; + + ret = i2c_smbus_write_byte_data(client, reg, data); + return (int)ret; +} + +static int apds990x_write_word(struct apds990x_chip *chip, u8 reg, u16 data) +{ + struct i2c_client *client = chip->client; + s32 ret; + + reg &= ~APDS990x_CMD_TYPE_MASK; + reg |= APDS990x_CMD | APDS990x_CMD_TYPE_INC; + + ret = i2c_smbus_write_word_data(client, reg, data); + return (int)ret; +} + +static int apds990x_mode_on(struct apds990x_chip *chip) +{ + /* ALS is mandatory, proximity optional */ + u8 reg = APDS990X_EN_AIEN | APDS990X_EN_PON | APDS990X_EN_AEN | + APDS990X_EN_WEN; + + if (chip->prox_en) + reg |= APDS990X_EN_PIEN | APDS990X_EN_PEN; + + return apds990x_write_byte(chip, APDS990X_ENABLE, reg); +} + +static u16 apds990x_lux_to_threshold(struct apds990x_chip *chip, u32 lux) +{ + u32 thres; + u32 cpl; + u32 ir; + + if (lux == 0) + return 0; + else if (lux == APDS_RANGE) + return APDS_RANGE; + + /* + * Reported LUX value is a combination of the IR and CLEAR channel + * values. However, interrupt threshold is only for clear channel. + * This function approximates needed HW threshold value for a given + * LUX value in the current lightning type. + * IR level compared to visible light varies heavily depending on the + * source of the light + * + * Calculate threshold value for the next measurement period. + * Math: threshold = lux * cpl where + * cpl = atime * again / (glass_attenuation * device_factor) + * (count-per-lux) + * + * First remove calibration. Division by four is to avoid overflow + */ + lux = lux * (APDS_CALIB_SCALER / 4) / (chip->lux_calib / 4); + + /* Multiplication by 64 is to increase accuracy */ + cpl = ((u32)chip->atime * (u32)again[chip->again_next] * + APDS_PARAM_SCALE * 64) / (chip->cf.ga * chip->cf.df); + + thres = lux * cpl / 64; + /* + * Convert IR light from the latest result to match with + * new gain step. This helps to adapt with the current + * source of light. + */ + ir = (u32)chip->lux_ir * (u32)again[chip->again_next] / + (u32)again[chip->again_meas]; + + /* + * Compensate count with IR light impact + * IAC1 > IAC2 (see apds990x_get_lux for formulas) + */ + if (chip->lux_clear * APDS_PARAM_SCALE >= + chip->rcf.afactor * chip->lux_ir) + thres = (chip->rcf.cf1 * thres + chip->rcf.irf1 * ir) / + APDS_PARAM_SCALE; + else + thres = (chip->rcf.cf2 * thres + chip->rcf.irf2 * ir) / + APDS_PARAM_SCALE; + + if (thres >= chip->a_max_result) + thres = chip->a_max_result - 1; + return thres; +} + +static inline int apds990x_set_atime(struct apds990x_chip *chip, u32 time_ms) +{ + u8 reg_value; + + chip->atime = time_ms; + /* Formula is specified in the data sheet */ + reg_value = 256 - ((time_ms * TIME_STEP_SCALER) / TIMESTEP); + /* Calculate max ADC value for given integration time */ + chip->a_max_result = (u16)(256 - reg_value) * APDS990X_TIME_TO_ADC; + return apds990x_write_byte(chip, APDS990X_ATIME, reg_value); +} + +/* Called always with mutex locked */ +static int apds990x_refresh_pthres(struct apds990x_chip *chip, int data) +{ + int ret, lo, hi; + + /* If the chip is not in use, don't try to access it */ + if (pm_runtime_suspended(&chip->client->dev)) + return 0; + + if (data < chip->prox_thres) { + lo = 0; + hi = chip->prox_thres; + } else { + lo = chip->prox_thres - APDS_PROX_HYSTERESIS; + if (chip->prox_continuous_mode) + hi = chip->prox_thres; + else + hi = APDS_RANGE; + } + + ret = apds990x_write_word(chip, APDS990X_PILTL, lo); + ret |= apds990x_write_word(chip, APDS990X_PIHTL, hi); + return ret; +} + +/* Called always with mutex locked */ +static int apds990x_refresh_athres(struct apds990x_chip *chip) +{ + int ret; + /* If the chip is not in use, don't try to access it */ + if (pm_runtime_suspended(&chip->client->dev)) + return 0; + + ret = apds990x_write_word(chip, APDS990X_AILTL, + apds990x_lux_to_threshold(chip, chip->lux_thres_lo)); + ret |= apds990x_write_word(chip, APDS990X_AIHTL, + apds990x_lux_to_threshold(chip, chip->lux_thres_hi)); + + return ret; +} + +/* Called always with mutex locked */ +static void apds990x_force_a_refresh(struct apds990x_chip *chip) +{ + /* This will force ALS interrupt after the next measurement. */ + apds990x_write_word(chip, APDS990X_AILTL, APDS_LUX_DEF_THRES_LO); + apds990x_write_word(chip, APDS990X_AIHTL, APDS_LUX_DEF_THRES_HI); +} + +/* Called always with mutex locked */ +static void apds990x_force_p_refresh(struct apds990x_chip *chip) +{ + /* This will force proximity interrupt after the next measurement. */ + apds990x_write_word(chip, APDS990X_PILTL, APDS_PROX_DEF_THRES - 1); + apds990x_write_word(chip, APDS990X_PIHTL, APDS_PROX_DEF_THRES); +} + +/* Called always with mutex locked */ +static int apds990x_calc_again(struct apds990x_chip *chip) +{ + int curr_again = chip->again_meas; + int next_again = chip->again_meas; + int ret = 0; + + /* Calculate suitable als gain */ + if (chip->lux_clear == chip->a_max_result) + next_again -= 2; /* ALS saturated. Decrease gain by 2 steps */ + else if (chip->lux_clear > chip->a_max_result / 2) + next_again--; + else if (chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT_STRICT) + next_again += 2; /* Too dark. Increase gain by 2 steps */ + else if (chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT) + next_again++; + + /* Limit gain to available range */ + if (next_again < 0) + next_again = 0; + else if (next_again > APDS990X_MAX_AGAIN) + next_again = APDS990X_MAX_AGAIN; + + /* Let's check can we trust the measured result */ + if (chip->lux_clear == chip->a_max_result) + /* Result can be totally garbage due to saturation */ + ret = -ERANGE; + else if (next_again != curr_again && + chip->lux_clear < APDS_LUX_GAIN_LO_LIMIT_STRICT) + /* + * Gain is changed and measurement result is very small. + * Result can be totally garbage due to underflow + */ + ret = -ERANGE; + + chip->again_next = next_again; + apds990x_write_byte(chip, APDS990X_CONTROL, + (chip->pdrive << 6) | + (chip->pdiode << 4) | + (chip->pgain << 2) | + (chip->again_next << 0)); + + /* + * Error means bad result -> re-measurement is needed. The forced + * refresh uses fastest possible persistence setting to get result + * as soon as possible. + */ + if (ret < 0) + apds990x_force_a_refresh(chip); + else + apds990x_refresh_athres(chip); + + return ret; +} + +/* Called always with mutex locked */ +static int apds990x_get_lux(struct apds990x_chip *chip, int clear, int ir) +{ + int iac, iac1, iac2; /* IR adjusted counts */ + u32 lpc; /* Lux per count */ + + /* Formulas: + * iac1 = CF1 * CLEAR_CH - IRF1 * IR_CH + * iac2 = CF2 * CLEAR_CH - IRF2 * IR_CH + */ + iac1 = (chip->cf.cf1 * clear - chip->cf.irf1 * ir) / APDS_PARAM_SCALE; + iac2 = (chip->cf.cf2 * clear - chip->cf.irf2 * ir) / APDS_PARAM_SCALE; + + iac = max(iac1, iac2); + iac = max(iac, 0); + + lpc = APDS990X_LUX_OUTPUT_SCALE * (chip->cf.df * chip->cf.ga) / + (u32)(again[chip->again_meas] * (u32)chip->atime); + + return (iac * lpc) / APDS_PARAM_SCALE; +} + +static int apds990x_ack_int(struct apds990x_chip *chip, u8 mode) +{ + struct i2c_client *client = chip->client; + s32 ret; + u8 reg = APDS990x_CMD | APDS990x_CMD_TYPE_SPE; + + switch (mode & (APDS990X_ST_AINT | APDS990X_ST_PINT)) { + case APDS990X_ST_AINT: + reg |= APDS990X_INT_ACK_ALS; + break; + case APDS990X_ST_PINT: + reg |= APDS990X_INT_ACK_PS; + break; + default: + reg |= APDS990X_INT_ACK_BOTH; + break; + } + + ret = i2c_smbus_read_byte_data(client, reg); + return (int)ret; +} + +static irqreturn_t apds990x_irq(int irq, void *data) +{ + struct apds990x_chip *chip = data; + u8 status; + + apds990x_read_byte(chip, APDS990X_STATUS, &status); + apds990x_ack_int(chip, status); + + mutex_lock(&chip->mutex); + if (!pm_runtime_suspended(&chip->client->dev)) { + if (status & APDS990X_ST_AINT) { + apds990x_read_word(chip, APDS990X_CDATAL, + &chip->lux_clear); + apds990x_read_word(chip, APDS990X_IRDATAL, + &chip->lux_ir); + /* Store used gain for calculations */ + chip->again_meas = chip->again_next; + + chip->lux_raw = apds990x_get_lux(chip, + chip->lux_clear, + chip->lux_ir); + + if (apds990x_calc_again(chip) == 0) { + /* Result is valid */ + chip->lux = chip->lux_raw; + chip->lux_wait_fresh_res = false; + wake_up(&chip->wait); + sysfs_notify(&chip->client->dev.kobj, + NULL, "lux0_input"); + } + } + + if ((status & APDS990X_ST_PINT) && chip->prox_en) { + u16 clr_ch; + + apds990x_read_word(chip, APDS990X_CDATAL, &clr_ch); + /* + * If ALS channel is saturated at min gain, + * proximity gives false posivite values. + * Just ignore them. + */ + if (chip->again_meas == 0 && + clr_ch == chip->a_max_result) + chip->prox_data = 0; + else + apds990x_read_word(chip, + APDS990X_PDATAL, + &chip->prox_data); + + apds990x_refresh_pthres(chip, chip->prox_data); + if (chip->prox_data < chip->prox_thres) + chip->prox_data = 0; + else if (!chip->prox_continuous_mode) + chip->prox_data = APDS_PROX_RANGE; + sysfs_notify(&chip->client->dev.kobj, + NULL, "prox0_raw"); + } + } + mutex_unlock(&chip->mutex); + return IRQ_HANDLED; +} + +static int apds990x_configure(struct apds990x_chip *chip) +{ + /* It is recommended to use disabled mode during these operations */ + apds990x_write_byte(chip, APDS990X_ENABLE, APDS990X_EN_DISABLE_ALL); + + /* conversion and wait times for different state machince states */ + apds990x_write_byte(chip, APDS990X_PTIME, APDS990X_PTIME_DEFAULT); + apds990x_write_byte(chip, APDS990X_WTIME, APDS990X_WTIME_DEFAULT); + apds990x_set_atime(chip, APDS_LUX_AVERAGING_TIME); + + apds990x_write_byte(chip, APDS990X_CONFIG, 0); + + /* Persistence levels */ + apds990x_write_byte(chip, APDS990X_PERS, + (chip->lux_persistence << APDS990X_APERS_SHIFT) | + (chip->prox_persistence << APDS990X_PPERS_SHIFT)); + + apds990x_write_byte(chip, APDS990X_PPCOUNT, chip->pdata->ppcount); + + /* Start with relatively small gain */ + chip->again_meas = 1; + chip->again_next = 1; + apds990x_write_byte(chip, APDS990X_CONTROL, + (chip->pdrive << 6) | + (chip->pdiode << 4) | + (chip->pgain << 2) | + (chip->again_next << 0)); + return 0; +} + +static int apds990x_detect(struct apds990x_chip *chip) +{ + struct i2c_client *client = chip->client; + int ret; + u8 id; + + ret = apds990x_read_byte(chip, APDS990X_ID, &id); + if (ret < 0) { + dev_err(&client->dev, "ID read failed\n"); + return ret; + } + + ret = apds990x_read_byte(chip, APDS990X_REV, &chip->revision); + if (ret < 0) { + dev_err(&client->dev, "REV read failed\n"); + return ret; + } + + switch (id) { + case APDS990X_ID_0: + case APDS990X_ID_4: + case APDS990X_ID_29: + snprintf(chip->chipname, sizeof(chip->chipname), "APDS-990x"); + break; + default: + ret = -ENODEV; + break; + } + return ret; +} + +#ifdef CONFIG_PM +static int apds990x_chip_on(struct apds990x_chip *chip) +{ + int err = regulator_bulk_enable(ARRAY_SIZE(chip->regs), + chip->regs); + if (err < 0) + return err; + + usleep_range(APDS_STARTUP_DELAY, 2 * APDS_STARTUP_DELAY); + + /* Refresh all configs in case of regulators were off */ + chip->prox_data = 0; + apds990x_configure(chip); + apds990x_mode_on(chip); + return 0; +} +#endif + +static int apds990x_chip_off(struct apds990x_chip *chip) +{ + apds990x_write_byte(chip, APDS990X_ENABLE, APDS990X_EN_DISABLE_ALL); + regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs); + return 0; +} + +static ssize_t apds990x_lux_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + ssize_t ret; + u32 result; + long timeout; + + if (pm_runtime_suspended(dev)) + return -EIO; + + timeout = wait_event_interruptible_timeout(chip->wait, + !chip->lux_wait_fresh_res, + msecs_to_jiffies(APDS_TIMEOUT)); + if (!timeout) + return -EIO; + + mutex_lock(&chip->mutex); + result = (chip->lux * chip->lux_calib) / APDS_CALIB_SCALER; + if (result > (APDS_RANGE * APDS990X_LUX_OUTPUT_SCALE)) + result = APDS_RANGE * APDS990X_LUX_OUTPUT_SCALE; + + ret = sprintf(buf, "%d.%d\n", + result / APDS990X_LUX_OUTPUT_SCALE, + result % APDS990X_LUX_OUTPUT_SCALE); + mutex_unlock(&chip->mutex); + return ret; +} + +static DEVICE_ATTR(lux0_input, S_IRUGO, apds990x_lux_show, NULL); + +static ssize_t apds990x_lux_range_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", APDS_RANGE); +} + +static DEVICE_ATTR(lux0_sensor_range, S_IRUGO, apds990x_lux_range_show, NULL); + +static ssize_t apds990x_lux_calib_format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", APDS_CALIB_SCALER); +} + +static DEVICE_ATTR(lux0_calibscale_default, S_IRUGO, + apds990x_lux_calib_format_show, NULL); + +static ssize_t apds990x_lux_calib_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%u\n", chip->lux_calib); +} + +static ssize_t apds990x_lux_calib_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + chip->lux_calib = value; + + return len; +} + +static DEVICE_ATTR(lux0_calibscale, S_IRUGO | S_IWUSR, apds990x_lux_calib_show, + apds990x_lux_calib_store); + +static ssize_t apds990x_rate_avail(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int i; + int pos = 0; + + for (i = 0; i < ARRAY_SIZE(arates_hz); i++) + pos += sprintf(buf + pos, "%d ", arates_hz[i]); + sprintf(buf + pos - 1, "\n"); + return pos; +} + +static ssize_t apds990x_rate_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", chip->arate); +} + +static int apds990x_set_arate(struct apds990x_chip *chip, int rate) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(arates_hz); i++) + if (rate >= arates_hz[i]) + break; + + if (i == ARRAY_SIZE(arates_hz)) + return -EINVAL; + + /* Pick up corresponding persistence value */ + chip->lux_persistence = apersis[i]; + chip->arate = arates_hz[i]; + + /* If the chip is not in use, don't try to access it */ + if (pm_runtime_suspended(&chip->client->dev)) + return 0; + + /* Persistence levels */ + return apds990x_write_byte(chip, APDS990X_PERS, + (chip->lux_persistence << APDS990X_APERS_SHIFT) | + (chip->prox_persistence << APDS990X_PPERS_SHIFT)); +} + +static ssize_t apds990x_rate_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + mutex_lock(&chip->mutex); + ret = apds990x_set_arate(chip, value); + mutex_unlock(&chip->mutex); + + if (ret < 0) + return ret; + return len; +} + +static DEVICE_ATTR(lux0_rate_avail, S_IRUGO, apds990x_rate_avail, NULL); + +static DEVICE_ATTR(lux0_rate, S_IRUGO | S_IWUSR, apds990x_rate_show, + apds990x_rate_store); + +static ssize_t apds990x_prox_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + ssize_t ret; + struct apds990x_chip *chip = dev_get_drvdata(dev); + + if (pm_runtime_suspended(dev) || !chip->prox_en) + return -EIO; + + mutex_lock(&chip->mutex); + ret = sprintf(buf, "%d\n", chip->prox_data); + mutex_unlock(&chip->mutex); + return ret; +} + +static DEVICE_ATTR(prox0_raw, S_IRUGO, apds990x_prox_show, NULL); + +static ssize_t apds990x_prox_range_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", APDS_PROX_RANGE); +} + +static DEVICE_ATTR(prox0_sensor_range, S_IRUGO, apds990x_prox_range_show, NULL); + +static ssize_t apds990x_prox_enable_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", chip->prox_en); +} + +static ssize_t apds990x_prox_enable_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + mutex_lock(&chip->mutex); + + if (!chip->prox_en) + chip->prox_data = 0; + + if (value) + chip->prox_en++; + else if (chip->prox_en > 0) + chip->prox_en--; + + if (!pm_runtime_suspended(dev)) + apds990x_mode_on(chip); + mutex_unlock(&chip->mutex); + return len; +} + +static DEVICE_ATTR(prox0_raw_en, S_IRUGO | S_IWUSR, apds990x_prox_enable_show, + apds990x_prox_enable_store); + +static const char *reporting_modes[] = {"trigger", "periodic"}; + +static ssize_t apds990x_prox_reporting_mode_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", + reporting_modes[!!chip->prox_continuous_mode]); +} + +static ssize_t apds990x_prox_reporting_mode_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + int ret; + + ret = sysfs_match_string(reporting_modes, buf); + if (ret < 0) + return ret; + + chip->prox_continuous_mode = ret; + return len; +} + +static DEVICE_ATTR(prox0_reporting_mode, S_IRUGO | S_IWUSR, + apds990x_prox_reporting_mode_show, + apds990x_prox_reporting_mode_store); + +static ssize_t apds990x_prox_reporting_avail_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s %s\n", reporting_modes[0], reporting_modes[1]); +} + +static DEVICE_ATTR(prox0_reporting_mode_avail, S_IRUGO | S_IWUSR, + apds990x_prox_reporting_avail_show, NULL); + + +static ssize_t apds990x_lux_thresh_above_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", chip->lux_thres_hi); +} + +static ssize_t apds990x_lux_thresh_below_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", chip->lux_thres_lo); +} + +static ssize_t apds990x_set_lux_thresh(struct apds990x_chip *chip, u32 *target, + const char *buf) +{ + unsigned long thresh; + int ret; + + ret = kstrtoul(buf, 0, &thresh); + if (ret) + return ret; + + if (thresh > APDS_RANGE) + return -EINVAL; + + mutex_lock(&chip->mutex); + *target = thresh; + /* + * Don't update values in HW if we are still waiting for + * first interrupt to come after device handle open call. + */ + if (!chip->lux_wait_fresh_res) + apds990x_refresh_athres(chip); + mutex_unlock(&chip->mutex); + return ret; + +} + +static ssize_t apds990x_lux_thresh_above_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + int ret = apds990x_set_lux_thresh(chip, &chip->lux_thres_hi, buf); + + if (ret < 0) + return ret; + return len; +} + +static ssize_t apds990x_lux_thresh_below_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + int ret = apds990x_set_lux_thresh(chip, &chip->lux_thres_lo, buf); + + if (ret < 0) + return ret; + return len; +} + +static DEVICE_ATTR(lux0_thresh_above_value, S_IRUGO | S_IWUSR, + apds990x_lux_thresh_above_show, + apds990x_lux_thresh_above_store); + +static DEVICE_ATTR(lux0_thresh_below_value, S_IRUGO | S_IWUSR, + apds990x_lux_thresh_below_show, + apds990x_lux_thresh_below_store); + +static ssize_t apds990x_prox_threshold_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", chip->prox_thres); +} + +static ssize_t apds990x_prox_threshold_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + if ((value > APDS_RANGE) || (value == 0) || + (value < APDS_PROX_HYSTERESIS)) + return -EINVAL; + + mutex_lock(&chip->mutex); + chip->prox_thres = value; + + apds990x_force_p_refresh(chip); + mutex_unlock(&chip->mutex); + return len; +} + +static DEVICE_ATTR(prox0_thresh_above_value, S_IRUGO | S_IWUSR, + apds990x_prox_threshold_show, + apds990x_prox_threshold_store); + +static ssize_t apds990x_power_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", !pm_runtime_suspended(dev)); + return 0; +} + +static ssize_t apds990x_power_state_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + if (value) { + pm_runtime_get_sync(dev); + mutex_lock(&chip->mutex); + chip->lux_wait_fresh_res = true; + apds990x_force_a_refresh(chip); + apds990x_force_p_refresh(chip); + mutex_unlock(&chip->mutex); + } else { + if (!pm_runtime_suspended(dev)) + pm_runtime_put(dev); + } + return len; +} + +static DEVICE_ATTR(power_state, S_IRUGO | S_IWUSR, + apds990x_power_state_show, + apds990x_power_state_store); + +static ssize_t apds990x_chip_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct apds990x_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%s %d\n", chip->chipname, chip->revision); +} + +static DEVICE_ATTR(chip_id, S_IRUGO, apds990x_chip_id_show, NULL); + +static struct attribute *sysfs_attrs_ctrl[] = { + &dev_attr_lux0_calibscale.attr, + &dev_attr_lux0_calibscale_default.attr, + &dev_attr_lux0_input.attr, + &dev_attr_lux0_sensor_range.attr, + &dev_attr_lux0_rate.attr, + &dev_attr_lux0_rate_avail.attr, + &dev_attr_lux0_thresh_above_value.attr, + &dev_attr_lux0_thresh_below_value.attr, + &dev_attr_prox0_raw_en.attr, + &dev_attr_prox0_raw.attr, + &dev_attr_prox0_sensor_range.attr, + &dev_attr_prox0_thresh_above_value.attr, + &dev_attr_prox0_reporting_mode.attr, + &dev_attr_prox0_reporting_mode_avail.attr, + &dev_attr_chip_id.attr, + &dev_attr_power_state.attr, + NULL +}; + +static const struct attribute_group apds990x_attribute_group[] = { + {.attrs = sysfs_attrs_ctrl }, +}; + +static int apds990x_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct apds990x_chip *chip; + int err; + + chip = kzalloc(sizeof *chip, GFP_KERNEL); + if (!chip) + return -ENOMEM; + + i2c_set_clientdata(client, chip); + chip->client = client; + + init_waitqueue_head(&chip->wait); + mutex_init(&chip->mutex); + chip->pdata = client->dev.platform_data; + + if (chip->pdata == NULL) { + dev_err(&client->dev, "platform data is mandatory\n"); + err = -EINVAL; + goto fail1; + } + + if (chip->pdata->cf.ga == 0) { + /* set uncovered sensor default parameters */ + chip->cf.ga = 1966; /* 0.48 * APDS_PARAM_SCALE */ + chip->cf.cf1 = 4096; /* 1.00 * APDS_PARAM_SCALE */ + chip->cf.irf1 = 9134; /* 2.23 * APDS_PARAM_SCALE */ + chip->cf.cf2 = 2867; /* 0.70 * APDS_PARAM_SCALE */ + chip->cf.irf2 = 5816; /* 1.42 * APDS_PARAM_SCALE */ + chip->cf.df = 52; + } else { + chip->cf = chip->pdata->cf; + } + + /* precalculate inverse chip factors for threshold control */ + chip->rcf.afactor = + (chip->cf.irf1 - chip->cf.irf2) * APDS_PARAM_SCALE / + (chip->cf.cf1 - chip->cf.cf2); + chip->rcf.cf1 = APDS_PARAM_SCALE * APDS_PARAM_SCALE / + chip->cf.cf1; + chip->rcf.irf1 = chip->cf.irf1 * APDS_PARAM_SCALE / + chip->cf.cf1; + chip->rcf.cf2 = APDS_PARAM_SCALE * APDS_PARAM_SCALE / + chip->cf.cf2; + chip->rcf.irf2 = chip->cf.irf2 * APDS_PARAM_SCALE / + chip->cf.cf2; + + /* Set something to start with */ + chip->lux_thres_hi = APDS_LUX_DEF_THRES_HI; + chip->lux_thres_lo = APDS_LUX_DEF_THRES_LO; + chip->lux_calib = APDS_LUX_NEUTRAL_CALIB_VALUE; + + chip->prox_thres = APDS_PROX_DEF_THRES; + chip->pdrive = chip->pdata->pdrive; + chip->pdiode = APDS_PDIODE_IR; + chip->pgain = APDS_PGAIN_1X; + chip->prox_calib = APDS_PROX_NEUTRAL_CALIB_VALUE; + chip->prox_persistence = APDS_DEFAULT_PROX_PERS; + chip->prox_continuous_mode = false; + + chip->regs[0].supply = reg_vcc; + chip->regs[1].supply = reg_vled; + + err = regulator_bulk_get(&client->dev, + ARRAY_SIZE(chip->regs), chip->regs); + if (err < 0) { + dev_err(&client->dev, "Cannot get regulators\n"); + goto fail1; + } + + err = regulator_bulk_enable(ARRAY_SIZE(chip->regs), chip->regs); + if (err < 0) { + dev_err(&client->dev, "Cannot enable regulators\n"); + goto fail2; + } + + usleep_range(APDS_STARTUP_DELAY, 2 * APDS_STARTUP_DELAY); + + err = apds990x_detect(chip); + if (err < 0) { + dev_err(&client->dev, "APDS990X not found\n"); + goto fail3; + } + + pm_runtime_set_active(&client->dev); + + apds990x_configure(chip); + apds990x_set_arate(chip, APDS_LUX_DEFAULT_RATE); + apds990x_mode_on(chip); + + pm_runtime_enable(&client->dev); + + if (chip->pdata->setup_resources) { + err = chip->pdata->setup_resources(); + if (err) { + err = -EINVAL; + goto fail3; + } + } + + err = sysfs_create_group(&chip->client->dev.kobj, + apds990x_attribute_group); + if (err < 0) { + dev_err(&chip->client->dev, "Sysfs registration failed\n"); + goto fail4; + } + + err = request_threaded_irq(client->irq, NULL, + apds990x_irq, + IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW | + IRQF_ONESHOT, + "apds990x", chip); + if (err) { + dev_err(&client->dev, "could not get IRQ %d\n", + client->irq); + goto fail5; + } + return err; +fail5: + sysfs_remove_group(&chip->client->dev.kobj, + &apds990x_attribute_group[0]); +fail4: + if (chip->pdata && chip->pdata->release_resources) + chip->pdata->release_resources(); +fail3: + regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs); +fail2: + regulator_bulk_free(ARRAY_SIZE(chip->regs), chip->regs); +fail1: + kfree(chip); + return err; +} + +static int apds990x_remove(struct i2c_client *client) +{ + struct apds990x_chip *chip = i2c_get_clientdata(client); + + free_irq(client->irq, chip); + sysfs_remove_group(&chip->client->dev.kobj, + apds990x_attribute_group); + + if (chip->pdata && chip->pdata->release_resources) + chip->pdata->release_resources(); + + if (!pm_runtime_suspended(&client->dev)) + apds990x_chip_off(chip); + + pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); + + regulator_bulk_free(ARRAY_SIZE(chip->regs), chip->regs); + + kfree(chip); + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int apds990x_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct apds990x_chip *chip = i2c_get_clientdata(client); + + apds990x_chip_off(chip); + return 0; +} + +static int apds990x_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct apds990x_chip *chip = i2c_get_clientdata(client); + + /* + * If we were enabled at suspend time, it is expected + * everything works nice and smoothly. Chip_on is enough + */ + apds990x_chip_on(chip); + + return 0; +} +#endif + +#ifdef CONFIG_PM +static int apds990x_runtime_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct apds990x_chip *chip = i2c_get_clientdata(client); + + apds990x_chip_off(chip); + return 0; +} + +static int apds990x_runtime_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct apds990x_chip *chip = i2c_get_clientdata(client); + + apds990x_chip_on(chip); + return 0; +} + +#endif + +static const struct i2c_device_id apds990x_id[] = { + {"apds990x", 0 }, + {} +}; + +MODULE_DEVICE_TABLE(i2c, apds990x_id); + +static const struct dev_pm_ops apds990x_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(apds990x_suspend, apds990x_resume) + SET_RUNTIME_PM_OPS(apds990x_runtime_suspend, + apds990x_runtime_resume, + NULL) +}; + +static struct i2c_driver apds990x_driver = { + .driver = { + .name = "apds990x", + .pm = &apds990x_pm_ops, + }, + .probe = apds990x_probe, + .remove = apds990x_remove, + .id_table = apds990x_id, +}; + +module_i2c_driver(apds990x_driver); + +MODULE_DESCRIPTION("APDS990X combined ALS and proximity sensor"); +MODULE_AUTHOR("Samu Onkalo, Nokia Corporation"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/aspeed-lpc-ctrl.c b/drivers/misc/aspeed-lpc-ctrl.c new file mode 100644 index 000000000..870ab0dfc --- /dev/null +++ b/drivers/misc/aspeed-lpc-ctrl.c @@ -0,0 +1,300 @@ +/* + * Copyright 2017 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define DEVICE_NAME "aspeed-lpc-ctrl" + +#define HICR5 0x0 +#define HICR5_ENL2H BIT(8) +#define HICR5_ENFWH BIT(10) + +#define HICR7 0x8 +#define HICR8 0xc + +struct aspeed_lpc_ctrl { + struct miscdevice miscdev; + struct regmap *regmap; + struct clk *clk; + phys_addr_t mem_base; + resource_size_t mem_size; + u32 pnor_size; + u32 pnor_base; +}; + +static struct aspeed_lpc_ctrl *file_aspeed_lpc_ctrl(struct file *file) +{ + return container_of(file->private_data, struct aspeed_lpc_ctrl, + miscdev); +} + +static int aspeed_lpc_ctrl_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct aspeed_lpc_ctrl *lpc_ctrl = file_aspeed_lpc_ctrl(file); + unsigned long vsize = vma->vm_end - vma->vm_start; + pgprot_t prot = vma->vm_page_prot; + + if (vma->vm_pgoff + vma_pages(vma) > lpc_ctrl->mem_size >> PAGE_SHIFT) + return -EINVAL; + + /* ast2400/2500 AHB accesses are not cache coherent */ + prot = pgprot_noncached(prot); + + if (remap_pfn_range(vma, vma->vm_start, + (lpc_ctrl->mem_base >> PAGE_SHIFT) + vma->vm_pgoff, + vsize, prot)) + return -EAGAIN; + + return 0; +} + +static long aspeed_lpc_ctrl_ioctl(struct file *file, unsigned int cmd, + unsigned long param) +{ + struct aspeed_lpc_ctrl *lpc_ctrl = file_aspeed_lpc_ctrl(file); + void __user *p = (void __user *)param; + struct aspeed_lpc_ctrl_mapping map; + u32 addr; + u32 size; + long rc; + + if (copy_from_user(&map, p, sizeof(map))) + return -EFAULT; + + if (map.flags != 0) + return -EINVAL; + + switch (cmd) { + case ASPEED_LPC_CTRL_IOCTL_GET_SIZE: + /* The flash windows don't report their size */ + if (map.window_type != ASPEED_LPC_CTRL_WINDOW_MEMORY) + return -EINVAL; + + /* Support more than one window id in the future */ + if (map.window_id != 0) + return -EINVAL; + + map.size = lpc_ctrl->mem_size; + + return copy_to_user(p, &map, sizeof(map)) ? -EFAULT : 0; + case ASPEED_LPC_CTRL_IOCTL_MAP: + + /* + * The top half of HICR7 is the MSB of the BMC address of the + * mapping. + * The bottom half of HICR7 is the MSB of the HOST LPC + * firmware space address of the mapping. + * + * The 1 bits in the top of half of HICR8 represent the bits + * (in the requested address) that should be ignored and + * replaced with those from the top half of HICR7. + * The 1 bits in the bottom half of HICR8 represent the bits + * (in the requested address) that should be kept and pass + * into the BMC address space. + */ + + /* + * It doesn't make sense to talk about a size or offset with + * low 16 bits set. Both HICR7 and HICR8 talk about the top 16 + * bits of addresses and sizes. + */ + + if ((map.size & 0x0000ffff) || (map.offset & 0x0000ffff)) + return -EINVAL; + + /* + * Because of the way the masks work in HICR8 offset has to + * be a multiple of size. + */ + if (map.offset & (map.size - 1)) + return -EINVAL; + + if (map.window_type == ASPEED_LPC_CTRL_WINDOW_FLASH) { + addr = lpc_ctrl->pnor_base; + size = lpc_ctrl->pnor_size; + } else if (map.window_type == ASPEED_LPC_CTRL_WINDOW_MEMORY) { + addr = lpc_ctrl->mem_base; + size = lpc_ctrl->mem_size; + } else { + return -EINVAL; + } + + /* Check overflow first! */ + if (map.offset + map.size < map.offset || + map.offset + map.size > size) + return -EINVAL; + + if (map.size == 0 || map.size > size) + return -EINVAL; + + addr += map.offset; + + /* + * addr (host lpc address) is safe regardless of values. This + * simply changes the address the host has to request on its + * side of the LPC bus. This cannot impact the hosts own + * memory space by surprise as LPC specific accessors are + * required. The only strange thing that could be done is + * setting the lower 16 bits but the shift takes care of that. + */ + + rc = regmap_write(lpc_ctrl->regmap, HICR7, + (addr | (map.addr >> 16))); + if (rc) + return rc; + + rc = regmap_write(lpc_ctrl->regmap, HICR8, + (~(map.size - 1)) | ((map.size >> 16) - 1)); + if (rc) + return rc; + + /* + * Enable LPC FHW cycles. This is required for the host to + * access the regions specified. + */ + return regmap_update_bits(lpc_ctrl->regmap, HICR5, + HICR5_ENFWH | HICR5_ENL2H, + HICR5_ENFWH | HICR5_ENL2H); + } + + return -EINVAL; +} + +static const struct file_operations aspeed_lpc_ctrl_fops = { + .owner = THIS_MODULE, + .mmap = aspeed_lpc_ctrl_mmap, + .unlocked_ioctl = aspeed_lpc_ctrl_ioctl, +}; + +static int aspeed_lpc_ctrl_probe(struct platform_device *pdev) +{ + struct aspeed_lpc_ctrl *lpc_ctrl; + struct device_node *node; + struct resource resm; + struct device *dev; + int rc; + + dev = &pdev->dev; + + lpc_ctrl = devm_kzalloc(dev, sizeof(*lpc_ctrl), GFP_KERNEL); + if (!lpc_ctrl) + return -ENOMEM; + + node = of_parse_phandle(dev->of_node, "flash", 0); + if (!node) { + dev_err(dev, "Didn't find host pnor flash node\n"); + return -ENODEV; + } + + rc = of_address_to_resource(node, 1, &resm); + of_node_put(node); + if (rc) { + dev_err(dev, "Couldn't address to resource for flash\n"); + return rc; + } + + lpc_ctrl->pnor_size = resource_size(&resm); + lpc_ctrl->pnor_base = resm.start; + + dev_set_drvdata(&pdev->dev, lpc_ctrl); + + node = of_parse_phandle(dev->of_node, "memory-region", 0); + if (!node) { + dev_err(dev, "Didn't find reserved memory\n"); + return -EINVAL; + } + + rc = of_address_to_resource(node, 0, &resm); + of_node_put(node); + if (rc) { + dev_err(dev, "Couldn't address to resource for reserved memory\n"); + return -ENOMEM; + } + + lpc_ctrl->mem_size = resource_size(&resm); + lpc_ctrl->mem_base = resm.start; + + lpc_ctrl->regmap = syscon_node_to_regmap( + pdev->dev.parent->of_node); + if (IS_ERR(lpc_ctrl->regmap)) { + dev_err(dev, "Couldn't get regmap\n"); + return -ENODEV; + } + + lpc_ctrl->clk = devm_clk_get(dev, NULL); + if (IS_ERR(lpc_ctrl->clk)) { + dev_err(dev, "couldn't get clock\n"); + return PTR_ERR(lpc_ctrl->clk); + } + rc = clk_prepare_enable(lpc_ctrl->clk); + if (rc) { + dev_err(dev, "couldn't enable clock\n"); + return rc; + } + + lpc_ctrl->miscdev.minor = MISC_DYNAMIC_MINOR; + lpc_ctrl->miscdev.name = DEVICE_NAME; + lpc_ctrl->miscdev.fops = &aspeed_lpc_ctrl_fops; + lpc_ctrl->miscdev.parent = dev; + rc = misc_register(&lpc_ctrl->miscdev); + if (rc) { + dev_err(dev, "Unable to register device\n"); + goto err; + } + + dev_info(dev, "Loaded at %pr\n", &resm); + + return 0; + +err: + clk_disable_unprepare(lpc_ctrl->clk); + return rc; +} + +static int aspeed_lpc_ctrl_remove(struct platform_device *pdev) +{ + struct aspeed_lpc_ctrl *lpc_ctrl = dev_get_drvdata(&pdev->dev); + + misc_deregister(&lpc_ctrl->miscdev); + clk_disable_unprepare(lpc_ctrl->clk); + + return 0; +} + +static const struct of_device_id aspeed_lpc_ctrl_match[] = { + { .compatible = "aspeed,ast2400-lpc-ctrl" }, + { .compatible = "aspeed,ast2500-lpc-ctrl" }, + { }, +}; + +static struct platform_driver aspeed_lpc_ctrl_driver = { + .driver = { + .name = DEVICE_NAME, + .of_match_table = aspeed_lpc_ctrl_match, + }, + .probe = aspeed_lpc_ctrl_probe, + .remove = aspeed_lpc_ctrl_remove, +}; + +module_platform_driver(aspeed_lpc_ctrl_driver); + +MODULE_DEVICE_TABLE(of, aspeed_lpc_ctrl_match); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Cyril Bur "); +MODULE_DESCRIPTION("Control for aspeed 2400/2500 LPC HOST to BMC mappings"); diff --git a/drivers/misc/aspeed-lpc-snoop.c b/drivers/misc/aspeed-lpc-snoop.c new file mode 100644 index 000000000..e2cb0b960 --- /dev/null +++ b/drivers/misc/aspeed-lpc-snoop.c @@ -0,0 +1,375 @@ +/* + * Copyright 2017 Google Inc + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Provides a simple driver to control the ASPEED LPC snoop interface which + * allows the BMC to listen on and save the data written by + * the host to an arbitrary LPC I/O port. + * + * Typically used by the BMC to "watch" host boot progress via port + * 0x80 writes made by the BIOS during the boot process. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEVICE_NAME "aspeed-lpc-snoop" + +#define NUM_SNOOP_CHANNELS 2 +#define SNOOP_FIFO_SIZE 2048 + +#define HICR5 0x0 +#define HICR5_EN_SNP0W BIT(0) +#define HICR5_ENINT_SNP0W BIT(1) +#define HICR5_EN_SNP1W BIT(2) +#define HICR5_ENINT_SNP1W BIT(3) + +#define HICR6 0x4 +#define HICR6_STR_SNP0W BIT(0) +#define HICR6_STR_SNP1W BIT(1) +#define SNPWADR 0x10 +#define SNPWADR_CH0_MASK GENMASK(15, 0) +#define SNPWADR_CH0_SHIFT 0 +#define SNPWADR_CH1_MASK GENMASK(31, 16) +#define SNPWADR_CH1_SHIFT 16 +#define SNPWDR 0x14 +#define SNPWDR_CH0_MASK GENMASK(7, 0) +#define SNPWDR_CH0_SHIFT 0 +#define SNPWDR_CH1_MASK GENMASK(15, 8) +#define SNPWDR_CH1_SHIFT 8 +#define HICRB 0x80 +#define HICRB_ENSNP0D BIT(14) +#define HICRB_ENSNP1D BIT(15) + +struct aspeed_lpc_snoop_model_data { + /* The ast2400 has bits 14 and 15 as reserved, whereas the ast2500 + * can use them. + */ + unsigned int has_hicrb_ensnp; +}; + +struct aspeed_lpc_snoop_channel { + struct kfifo fifo; + wait_queue_head_t wq; + struct miscdevice miscdev; +}; + +struct aspeed_lpc_snoop { + struct regmap *regmap; + int irq; + struct clk *clk; + struct aspeed_lpc_snoop_channel chan[NUM_SNOOP_CHANNELS]; +}; + +static struct aspeed_lpc_snoop_channel *snoop_file_to_chan(struct file *file) +{ + return container_of(file->private_data, + struct aspeed_lpc_snoop_channel, + miscdev); +} + +static ssize_t snoop_file_read(struct file *file, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct aspeed_lpc_snoop_channel *chan = snoop_file_to_chan(file); + unsigned int copied; + int ret = 0; + + if (kfifo_is_empty(&chan->fifo)) { + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + ret = wait_event_interruptible(chan->wq, + !kfifo_is_empty(&chan->fifo)); + if (ret == -ERESTARTSYS) + return -EINTR; + } + ret = kfifo_to_user(&chan->fifo, buffer, count, &copied); + if (ret) + return ret; + + return copied; +} + +static __poll_t snoop_file_poll(struct file *file, + struct poll_table_struct *pt) +{ + struct aspeed_lpc_snoop_channel *chan = snoop_file_to_chan(file); + + poll_wait(file, &chan->wq, pt); + return !kfifo_is_empty(&chan->fifo) ? EPOLLIN : 0; +} + +static const struct file_operations snoop_fops = { + .owner = THIS_MODULE, + .read = snoop_file_read, + .poll = snoop_file_poll, + .llseek = noop_llseek, +}; + +/* Save a byte to a FIFO and discard the oldest byte if FIFO is full */ +static void put_fifo_with_discard(struct aspeed_lpc_snoop_channel *chan, u8 val) +{ + if (!kfifo_initialized(&chan->fifo)) + return; + if (kfifo_is_full(&chan->fifo)) + kfifo_skip(&chan->fifo); + kfifo_put(&chan->fifo, val); + wake_up_interruptible(&chan->wq); +} + +static irqreturn_t aspeed_lpc_snoop_irq(int irq, void *arg) +{ + struct aspeed_lpc_snoop *lpc_snoop = arg; + u32 reg, data; + + if (regmap_read(lpc_snoop->regmap, HICR6, ®)) + return IRQ_NONE; + + /* Check if one of the snoop channels is interrupting */ + reg &= (HICR6_STR_SNP0W | HICR6_STR_SNP1W); + if (!reg) + return IRQ_NONE; + + /* Ack pending IRQs */ + regmap_write(lpc_snoop->regmap, HICR6, reg); + + /* Read and save most recent snoop'ed data byte to FIFO */ + regmap_read(lpc_snoop->regmap, SNPWDR, &data); + + if (reg & HICR6_STR_SNP0W) { + u8 val = (data & SNPWDR_CH0_MASK) >> SNPWDR_CH0_SHIFT; + + put_fifo_with_discard(&lpc_snoop->chan[0], val); + } + if (reg & HICR6_STR_SNP1W) { + u8 val = (data & SNPWDR_CH1_MASK) >> SNPWDR_CH1_SHIFT; + + put_fifo_with_discard(&lpc_snoop->chan[1], val); + } + + return IRQ_HANDLED; +} + +static int aspeed_lpc_snoop_config_irq(struct aspeed_lpc_snoop *lpc_snoop, + struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + int rc; + + lpc_snoop->irq = platform_get_irq(pdev, 0); + if (!lpc_snoop->irq) + return -ENODEV; + + rc = devm_request_irq(dev, lpc_snoop->irq, + aspeed_lpc_snoop_irq, IRQF_SHARED, + DEVICE_NAME, lpc_snoop); + if (rc < 0) { + dev_warn(dev, "Unable to request IRQ %d\n", lpc_snoop->irq); + lpc_snoop->irq = 0; + return rc; + } + + return 0; +} + +static int aspeed_lpc_enable_snoop(struct aspeed_lpc_snoop *lpc_snoop, + struct device *dev, + int channel, u16 lpc_port) +{ + int rc = 0; + u32 hicr5_en, snpwadr_mask, snpwadr_shift, hicrb_en; + const struct aspeed_lpc_snoop_model_data *model_data = + of_device_get_match_data(dev); + + init_waitqueue_head(&lpc_snoop->chan[channel].wq); + /* Create FIFO datastructure */ + rc = kfifo_alloc(&lpc_snoop->chan[channel].fifo, + SNOOP_FIFO_SIZE, GFP_KERNEL); + if (rc) + return rc; + + lpc_snoop->chan[channel].miscdev.minor = MISC_DYNAMIC_MINOR; + lpc_snoop->chan[channel].miscdev.name = + devm_kasprintf(dev, GFP_KERNEL, "%s%d", DEVICE_NAME, channel); + lpc_snoop->chan[channel].miscdev.fops = &snoop_fops; + lpc_snoop->chan[channel].miscdev.parent = dev; + rc = misc_register(&lpc_snoop->chan[channel].miscdev); + if (rc) + return rc; + + /* Enable LPC snoop channel at requested port */ + switch (channel) { + case 0: + hicr5_en = HICR5_EN_SNP0W | HICR5_ENINT_SNP0W; + snpwadr_mask = SNPWADR_CH0_MASK; + snpwadr_shift = SNPWADR_CH0_SHIFT; + hicrb_en = HICRB_ENSNP0D; + break; + case 1: + hicr5_en = HICR5_EN_SNP1W | HICR5_ENINT_SNP1W; + snpwadr_mask = SNPWADR_CH1_MASK; + snpwadr_shift = SNPWADR_CH1_SHIFT; + hicrb_en = HICRB_ENSNP1D; + break; + default: + return -EINVAL; + } + + regmap_update_bits(lpc_snoop->regmap, HICR5, hicr5_en, hicr5_en); + regmap_update_bits(lpc_snoop->regmap, SNPWADR, snpwadr_mask, + lpc_port << snpwadr_shift); + if (model_data->has_hicrb_ensnp) + regmap_update_bits(lpc_snoop->regmap, HICRB, + hicrb_en, hicrb_en); + + return rc; +} + +static void aspeed_lpc_disable_snoop(struct aspeed_lpc_snoop *lpc_snoop, + int channel) +{ + switch (channel) { + case 0: + regmap_update_bits(lpc_snoop->regmap, HICR5, + HICR5_EN_SNP0W | HICR5_ENINT_SNP0W, + 0); + break; + case 1: + regmap_update_bits(lpc_snoop->regmap, HICR5, + HICR5_EN_SNP1W | HICR5_ENINT_SNP1W, + 0); + break; + default: + return; + } + + kfifo_free(&lpc_snoop->chan[channel].fifo); + misc_deregister(&lpc_snoop->chan[channel].miscdev); +} + +static int aspeed_lpc_snoop_probe(struct platform_device *pdev) +{ + struct aspeed_lpc_snoop *lpc_snoop; + struct device *dev; + u32 port; + int rc; + + dev = &pdev->dev; + + lpc_snoop = devm_kzalloc(dev, sizeof(*lpc_snoop), GFP_KERNEL); + if (!lpc_snoop) + return -ENOMEM; + + lpc_snoop->regmap = syscon_node_to_regmap( + pdev->dev.parent->of_node); + if (IS_ERR(lpc_snoop->regmap)) { + dev_err(dev, "Couldn't get regmap\n"); + return -ENODEV; + } + + dev_set_drvdata(&pdev->dev, lpc_snoop); + + rc = of_property_read_u32_index(dev->of_node, "snoop-ports", 0, &port); + if (rc) { + dev_err(dev, "no snoop ports configured\n"); + return -ENODEV; + } + + lpc_snoop->clk = devm_clk_get(dev, NULL); + if (IS_ERR(lpc_snoop->clk)) { + rc = PTR_ERR(lpc_snoop->clk); + if (rc != -EPROBE_DEFER) + dev_err(dev, "couldn't get clock\n"); + return rc; + } + rc = clk_prepare_enable(lpc_snoop->clk); + if (rc) { + dev_err(dev, "couldn't enable clock\n"); + return rc; + } + + rc = aspeed_lpc_snoop_config_irq(lpc_snoop, pdev); + if (rc) + goto err; + + rc = aspeed_lpc_enable_snoop(lpc_snoop, dev, 0, port); + if (rc) + goto err; + + /* Configuration of 2nd snoop channel port is optional */ + if (of_property_read_u32_index(dev->of_node, "snoop-ports", + 1, &port) == 0) { + rc = aspeed_lpc_enable_snoop(lpc_snoop, dev, 1, port); + if (rc) { + aspeed_lpc_disable_snoop(lpc_snoop, 0); + goto err; + } + } + + return 0; + +err: + clk_disable_unprepare(lpc_snoop->clk); + + return rc; +} + +static int aspeed_lpc_snoop_remove(struct platform_device *pdev) +{ + struct aspeed_lpc_snoop *lpc_snoop = dev_get_drvdata(&pdev->dev); + + /* Disable both snoop channels */ + aspeed_lpc_disable_snoop(lpc_snoop, 0); + aspeed_lpc_disable_snoop(lpc_snoop, 1); + + clk_disable_unprepare(lpc_snoop->clk); + + return 0; +} + +static const struct aspeed_lpc_snoop_model_data ast2400_model_data = { + .has_hicrb_ensnp = 0, +}; + +static const struct aspeed_lpc_snoop_model_data ast2500_model_data = { + .has_hicrb_ensnp = 1, +}; + +static const struct of_device_id aspeed_lpc_snoop_match[] = { + { .compatible = "aspeed,ast2400-lpc-snoop", + .data = &ast2400_model_data }, + { .compatible = "aspeed,ast2500-lpc-snoop", + .data = &ast2500_model_data }, + { }, +}; + +static struct platform_driver aspeed_lpc_snoop_driver = { + .driver = { + .name = DEVICE_NAME, + .of_match_table = aspeed_lpc_snoop_match, + }, + .probe = aspeed_lpc_snoop_probe, + .remove = aspeed_lpc_snoop_remove, +}; + +module_platform_driver(aspeed_lpc_snoop_driver); + +MODULE_DEVICE_TABLE(of, aspeed_lpc_snoop_match); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Robert Lippert "); +MODULE_DESCRIPTION("Linux driver to control Aspeed LPC snoop functionality"); diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c new file mode 100644 index 000000000..48521861b --- /dev/null +++ b/drivers/misc/atmel-ssc.c @@ -0,0 +1,285 @@ +/* + * Atmel SSC driver + * + * Copyright (C) 2007 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../../sound/soc/atmel/atmel_ssc_dai.h" + +/* Serialize access to ssc_list and user count */ +static DEFINE_MUTEX(user_lock); +static LIST_HEAD(ssc_list); + +struct ssc_device *ssc_request(unsigned int ssc_num) +{ + int ssc_valid = 0; + struct ssc_device *ssc; + + mutex_lock(&user_lock); + list_for_each_entry(ssc, &ssc_list, list) { + if (ssc->pdev->dev.of_node) { + if (of_alias_get_id(ssc->pdev->dev.of_node, "ssc") + == ssc_num) { + ssc->pdev->id = ssc_num; + ssc_valid = 1; + break; + } + } else if (ssc->pdev->id == ssc_num) { + ssc_valid = 1; + break; + } + } + + if (!ssc_valid) { + mutex_unlock(&user_lock); + pr_err("ssc: ssc%d platform device is missing\n", ssc_num); + return ERR_PTR(-ENODEV); + } + + if (ssc->user) { + mutex_unlock(&user_lock); + dev_dbg(&ssc->pdev->dev, "module busy\n"); + return ERR_PTR(-EBUSY); + } + ssc->user++; + mutex_unlock(&user_lock); + + clk_prepare(ssc->clk); + + return ssc; +} +EXPORT_SYMBOL(ssc_request); + +void ssc_free(struct ssc_device *ssc) +{ + bool disable_clk = true; + + mutex_lock(&user_lock); + if (ssc->user) + ssc->user--; + else { + disable_clk = false; + dev_dbg(&ssc->pdev->dev, "device already free\n"); + } + mutex_unlock(&user_lock); + + if (disable_clk) + clk_unprepare(ssc->clk); +} +EXPORT_SYMBOL(ssc_free); + +static struct atmel_ssc_platform_data at91rm9200_config = { + .use_dma = 0, + .has_fslen_ext = 0, +}; + +static struct atmel_ssc_platform_data at91sam9rl_config = { + .use_dma = 0, + .has_fslen_ext = 1, +}; + +static struct atmel_ssc_platform_data at91sam9g45_config = { + .use_dma = 1, + .has_fslen_ext = 1, +}; + +static const struct platform_device_id atmel_ssc_devtypes[] = { + { + .name = "at91rm9200_ssc", + .driver_data = (unsigned long) &at91rm9200_config, + }, { + .name = "at91sam9rl_ssc", + .driver_data = (unsigned long) &at91sam9rl_config, + }, { + .name = "at91sam9g45_ssc", + .driver_data = (unsigned long) &at91sam9g45_config, + }, { + /* sentinel */ + } +}; + +#ifdef CONFIG_OF +static const struct of_device_id atmel_ssc_dt_ids[] = { + { + .compatible = "atmel,at91rm9200-ssc", + .data = &at91rm9200_config, + }, { + .compatible = "atmel,at91sam9rl-ssc", + .data = &at91sam9rl_config, + }, { + .compatible = "atmel,at91sam9g45-ssc", + .data = &at91sam9g45_config, + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(of, atmel_ssc_dt_ids); +#endif + +static inline const struct atmel_ssc_platform_data * + atmel_ssc_get_driver_data(struct platform_device *pdev) +{ + if (pdev->dev.of_node) { + const struct of_device_id *match; + match = of_match_node(atmel_ssc_dt_ids, pdev->dev.of_node); + if (match == NULL) + return NULL; + return match->data; + } + + return (struct atmel_ssc_platform_data *) + platform_get_device_id(pdev)->driver_data; +} + +#ifdef CONFIG_SND_ATMEL_SOC_SSC +static int ssc_sound_dai_probe(struct ssc_device *ssc) +{ + struct device_node *np = ssc->pdev->dev.of_node; + int ret; + int id; + + ssc->sound_dai = false; + + if (!of_property_read_bool(np, "#sound-dai-cells")) + return 0; + + id = of_alias_get_id(np, "ssc"); + if (id < 0) + return id; + + ret = atmel_ssc_set_audio(id); + ssc->sound_dai = !ret; + + return ret; +} + +static void ssc_sound_dai_remove(struct ssc_device *ssc) +{ + if (!ssc->sound_dai) + return; + + atmel_ssc_put_audio(of_alias_get_id(ssc->pdev->dev.of_node, "ssc")); +} +#else +static inline int ssc_sound_dai_probe(struct ssc_device *ssc) +{ + if (of_property_read_bool(ssc->pdev->dev.of_node, "#sound-dai-cells")) + return -ENOTSUPP; + + return 0; +} + +static inline void ssc_sound_dai_remove(struct ssc_device *ssc) +{ +} +#endif + +static int ssc_probe(struct platform_device *pdev) +{ + struct resource *regs; + struct ssc_device *ssc; + const struct atmel_ssc_platform_data *plat_dat; + + ssc = devm_kzalloc(&pdev->dev, sizeof(struct ssc_device), GFP_KERNEL); + if (!ssc) { + dev_dbg(&pdev->dev, "out of memory\n"); + return -ENOMEM; + } + + ssc->pdev = pdev; + + plat_dat = atmel_ssc_get_driver_data(pdev); + if (!plat_dat) + return -ENODEV; + ssc->pdata = (struct atmel_ssc_platform_data *)plat_dat; + + if (pdev->dev.of_node) { + struct device_node *np = pdev->dev.of_node; + ssc->clk_from_rk_pin = + of_property_read_bool(np, "atmel,clk-from-rk-pin"); + } + + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ssc->regs = devm_ioremap_resource(&pdev->dev, regs); + if (IS_ERR(ssc->regs)) + return PTR_ERR(ssc->regs); + + ssc->phybase = regs->start; + + ssc->clk = devm_clk_get(&pdev->dev, "pclk"); + if (IS_ERR(ssc->clk)) { + dev_dbg(&pdev->dev, "no pclk clock defined\n"); + return -ENXIO; + } + + /* disable all interrupts */ + clk_prepare_enable(ssc->clk); + ssc_writel(ssc->regs, IDR, -1); + ssc_readl(ssc->regs, SR); + clk_disable_unprepare(ssc->clk); + + ssc->irq = platform_get_irq(pdev, 0); + if (ssc->irq < 0) { + dev_dbg(&pdev->dev, "could not get irq\n"); + return ssc->irq; + } + + mutex_lock(&user_lock); + list_add_tail(&ssc->list, &ssc_list); + mutex_unlock(&user_lock); + + platform_set_drvdata(pdev, ssc); + + dev_info(&pdev->dev, "Atmel SSC device at 0x%p (irq %d)\n", + ssc->regs, ssc->irq); + + if (ssc_sound_dai_probe(ssc)) + dev_err(&pdev->dev, "failed to auto-setup ssc for audio\n"); + + return 0; +} + +static int ssc_remove(struct platform_device *pdev) +{ + struct ssc_device *ssc = platform_get_drvdata(pdev); + + ssc_sound_dai_remove(ssc); + + mutex_lock(&user_lock); + list_del(&ssc->list); + mutex_unlock(&user_lock); + + return 0; +} + +static struct platform_driver ssc_driver = { + .driver = { + .name = "ssc", + .of_match_table = of_match_ptr(atmel_ssc_dt_ids), + }, + .id_table = atmel_ssc_devtypes, + .probe = ssc_probe, + .remove = ssc_remove, +}; +module_platform_driver(ssc_driver); + +MODULE_AUTHOR("Hans-Christian Egtvedt "); +MODULE_DESCRIPTION("SSC driver for Atmel AVR32 and AT91"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:ssc"); diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c new file mode 100644 index 000000000..ac24a4bd6 --- /dev/null +++ b/drivers/misc/atmel_tclib.c @@ -0,0 +1,198 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This is a thin library to solve the problem of how to portably allocate + * one of the TC blocks. For simplicity, it doesn't currently expect to + * share individual timers between different drivers. + */ + +#if defined(CONFIG_AVR32) +/* AVR32 has these divide PBB */ +const u8 atmel_tc_divisors[5] = { 0, 4, 8, 16, 32, }; +EXPORT_SYMBOL(atmel_tc_divisors); + +#elif defined(CONFIG_ARCH_AT91) +/* AT91 has these divide MCK */ +const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, }; +EXPORT_SYMBOL(atmel_tc_divisors); + +#endif + +static DEFINE_SPINLOCK(tc_list_lock); +static LIST_HEAD(tc_list); + +/** + * atmel_tc_alloc - allocate a specified TC block + * @block: which block to allocate + * + * Caller allocates a block. If it is available, a pointer to a + * pre-initialized struct atmel_tc is returned. The caller can access + * the registers directly through the "regs" field. + */ +struct atmel_tc *atmel_tc_alloc(unsigned block) +{ + struct atmel_tc *tc; + struct platform_device *pdev = NULL; + + spin_lock(&tc_list_lock); + list_for_each_entry(tc, &tc_list, node) { + if (tc->allocated) + continue; + + if ((tc->pdev->dev.of_node && tc->id == block) || + (tc->pdev->id == block)) { + pdev = tc->pdev; + tc->allocated = true; + break; + } + } + spin_unlock(&tc_list_lock); + + return pdev ? tc : NULL; +} +EXPORT_SYMBOL_GPL(atmel_tc_alloc); + +/** + * atmel_tc_free - release a specified TC block + * @tc: Timer/counter block that was returned by atmel_tc_alloc() + * + * This reverses the effect of atmel_tc_alloc(), invalidating the resource + * returned by that routine and making the TC available to other drivers. + */ +void atmel_tc_free(struct atmel_tc *tc) +{ + spin_lock(&tc_list_lock); + if (tc->allocated) + tc->allocated = false; + spin_unlock(&tc_list_lock); +} +EXPORT_SYMBOL_GPL(atmel_tc_free); + +#if defined(CONFIG_OF) +static struct atmel_tcb_config tcb_rm9200_config = { + .counter_width = 16, +}; + +static struct atmel_tcb_config tcb_sam9x5_config = { + .counter_width = 32, +}; + +static const struct of_device_id atmel_tcb_dt_ids[] = { + { + .compatible = "atmel,at91rm9200-tcb", + .data = &tcb_rm9200_config, + }, { + .compatible = "atmel,at91sam9x5-tcb", + .data = &tcb_sam9x5_config, + }, { + /* sentinel */ + } +}; + +MODULE_DEVICE_TABLE(of, atmel_tcb_dt_ids); +#endif + +static int __init tc_probe(struct platform_device *pdev) +{ + struct atmel_tc *tc; + struct clk *clk; + int irq; + struct resource *r; + unsigned int i; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return -EINVAL; + + tc = devm_kzalloc(&pdev->dev, sizeof(struct atmel_tc), GFP_KERNEL); + if (!tc) + return -ENOMEM; + + tc->pdev = pdev; + + clk = devm_clk_get(&pdev->dev, "t0_clk"); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + tc->slow_clk = devm_clk_get(&pdev->dev, "slow_clk"); + if (IS_ERR(tc->slow_clk)) + return PTR_ERR(tc->slow_clk); + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + tc->regs = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(tc->regs)) + return PTR_ERR(tc->regs); + + /* Now take SoC information if available */ + if (pdev->dev.of_node) { + const struct of_device_id *match; + match = of_match_node(atmel_tcb_dt_ids, pdev->dev.of_node); + if (match) + tc->tcb_config = match->data; + + tc->id = of_alias_get_id(tc->pdev->dev.of_node, "tcb"); + } else { + tc->id = pdev->id; + } + + tc->clk[0] = clk; + tc->clk[1] = devm_clk_get(&pdev->dev, "t1_clk"); + if (IS_ERR(tc->clk[1])) + tc->clk[1] = clk; + tc->clk[2] = devm_clk_get(&pdev->dev, "t2_clk"); + if (IS_ERR(tc->clk[2])) + tc->clk[2] = clk; + + tc->irq[0] = irq; + tc->irq[1] = platform_get_irq(pdev, 1); + if (tc->irq[1] < 0) + tc->irq[1] = irq; + tc->irq[2] = platform_get_irq(pdev, 2); + if (tc->irq[2] < 0) + tc->irq[2] = irq; + + for (i = 0; i < 3; i++) + writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR)); + + spin_lock(&tc_list_lock); + list_add_tail(&tc->node, &tc_list); + spin_unlock(&tc_list_lock); + + platform_set_drvdata(pdev, tc); + + return 0; +} + +static void tc_shutdown(struct platform_device *pdev) +{ + int i; + struct atmel_tc *tc = platform_get_drvdata(pdev); + + for (i = 0; i < 3; i++) + writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR)); +} + +static struct platform_driver tc_driver = { + .driver = { + .name = "atmel_tcb", + .of_match_table = of_match_ptr(atmel_tcb_dt_ids), + }, + .shutdown = tc_shutdown, +}; + +static int __init tc_init(void) +{ + return platform_driver_probe(&tc_driver, tc_probe); +} +arch_initcall(tc_init); diff --git a/drivers/misc/bh1770glc.c b/drivers/misc/bh1770glc.c new file mode 100644 index 000000000..9c62bf064 --- /dev/null +++ b/drivers/misc/bh1770glc.c @@ -0,0 +1,1410 @@ +/* + * This file is part of the ROHM BH1770GLC / OSRAM SFH7770 sensor driver. + * Chip is combined proximity and ambient light sensor. + * + * Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). + * + * Contact: Samu Onkalo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BH1770_ALS_CONTROL 0x80 /* ALS operation mode control */ +#define BH1770_PS_CONTROL 0x81 /* PS operation mode control */ +#define BH1770_I_LED 0x82 /* active LED and LED1, LED2 current */ +#define BH1770_I_LED3 0x83 /* LED3 current setting */ +#define BH1770_ALS_PS_MEAS 0x84 /* Forced mode trigger */ +#define BH1770_PS_MEAS_RATE 0x85 /* PS meas. rate at stand alone mode */ +#define BH1770_ALS_MEAS_RATE 0x86 /* ALS meas. rate at stand alone mode */ +#define BH1770_PART_ID 0x8a /* Part number and revision ID */ +#define BH1770_MANUFACT_ID 0x8b /* Manufacturerer ID */ +#define BH1770_ALS_DATA_0 0x8c /* ALS DATA low byte */ +#define BH1770_ALS_DATA_1 0x8d /* ALS DATA high byte */ +#define BH1770_ALS_PS_STATUS 0x8e /* Measurement data and int status */ +#define BH1770_PS_DATA_LED1 0x8f /* PS data from LED1 */ +#define BH1770_PS_DATA_LED2 0x90 /* PS data from LED2 */ +#define BH1770_PS_DATA_LED3 0x91 /* PS data from LED3 */ +#define BH1770_INTERRUPT 0x92 /* Interrupt setting */ +#define BH1770_PS_TH_LED1 0x93 /* PS interrupt threshold for LED1 */ +#define BH1770_PS_TH_LED2 0x94 /* PS interrupt threshold for LED2 */ +#define BH1770_PS_TH_LED3 0x95 /* PS interrupt threshold for LED3 */ +#define BH1770_ALS_TH_UP_0 0x96 /* ALS upper threshold low byte */ +#define BH1770_ALS_TH_UP_1 0x97 /* ALS upper threshold high byte */ +#define BH1770_ALS_TH_LOW_0 0x98 /* ALS lower threshold low byte */ +#define BH1770_ALS_TH_LOW_1 0x99 /* ALS lower threshold high byte */ + +/* MANUFACT_ID */ +#define BH1770_MANUFACT_ROHM 0x01 +#define BH1770_MANUFACT_OSRAM 0x03 + +/* PART_ID */ +#define BH1770_PART 0x90 +#define BH1770_PART_MASK 0xf0 +#define BH1770_REV_MASK 0x0f +#define BH1770_REV_SHIFT 0 +#define BH1770_REV_0 0x00 +#define BH1770_REV_1 0x01 + +/* Operating modes for both */ +#define BH1770_STANDBY 0x00 +#define BH1770_FORCED 0x02 +#define BH1770_STANDALONE 0x03 +#define BH1770_SWRESET (0x01 << 2) + +#define BH1770_PS_TRIG_MEAS (1 << 0) +#define BH1770_ALS_TRIG_MEAS (1 << 1) + +/* Interrupt control */ +#define BH1770_INT_OUTPUT_MODE (1 << 3) /* 0 = latched */ +#define BH1770_INT_POLARITY (1 << 2) /* 1 = active high */ +#define BH1770_INT_ALS_ENA (1 << 1) +#define BH1770_INT_PS_ENA (1 << 0) + +/* Interrupt status */ +#define BH1770_INT_LED1_DATA (1 << 0) +#define BH1770_INT_LED1_INT (1 << 1) +#define BH1770_INT_LED2_DATA (1 << 2) +#define BH1770_INT_LED2_INT (1 << 3) +#define BH1770_INT_LED3_DATA (1 << 4) +#define BH1770_INT_LED3_INT (1 << 5) +#define BH1770_INT_LEDS_INT ((1 << 1) | (1 << 3) | (1 << 5)) +#define BH1770_INT_ALS_DATA (1 << 6) +#define BH1770_INT_ALS_INT (1 << 7) + +/* Led channels */ +#define BH1770_LED1 0x00 + +#define BH1770_DISABLE 0 +#define BH1770_ENABLE 1 +#define BH1770_PROX_CHANNELS 1 + +#define BH1770_LUX_DEFAULT_RATE 1 /* Index to lux rate table */ +#define BH1770_PROX_DEFAULT_RATE 1 /* Direct HW value =~ 50Hz */ +#define BH1770_PROX_DEF_RATE_THRESH 6 /* Direct HW value =~ 5 Hz */ +#define BH1770_STARTUP_DELAY 50 +#define BH1770_RESET_TIME 10 +#define BH1770_TIMEOUT 2100 /* Timeout in 2.1 seconds */ + +#define BH1770_LUX_RANGE 65535 +#define BH1770_PROX_RANGE 255 +#define BH1770_COEF_SCALER 1024 +#define BH1770_CALIB_SCALER 8192 +#define BH1770_LUX_NEUTRAL_CALIB_VALUE (1 * BH1770_CALIB_SCALER) +#define BH1770_LUX_DEF_THRES 1000 +#define BH1770_PROX_DEF_THRES 70 +#define BH1770_PROX_DEF_ABS_THRES 100 +#define BH1770_DEFAULT_PERSISTENCE 10 +#define BH1770_PROX_MAX_PERSISTENCE 50 +#define BH1770_LUX_GA_SCALE 16384 +#define BH1770_LUX_CF_SCALE 2048 /* CF ChipFactor */ +#define BH1770_NEUTRAL_CF BH1770_LUX_CF_SCALE +#define BH1770_LUX_CORR_SCALE 4096 + +#define PROX_ABOVE_THRESHOLD 1 +#define PROX_BELOW_THRESHOLD 0 + +#define PROX_IGNORE_LUX_LIMIT 500 + +struct bh1770_chip { + struct bh1770_platform_data *pdata; + char chipname[10]; + u8 revision; + struct i2c_client *client; + struct regulator_bulk_data regs[2]; + struct mutex mutex; /* avoid parallel access */ + wait_queue_head_t wait; + + bool int_mode_prox; + bool int_mode_lux; + struct delayed_work prox_work; + u32 lux_cf; /* Chip specific factor */ + u32 lux_ga; + u32 lux_calib; + int lux_rate_index; + u32 lux_corr; + u16 lux_data_raw; + u16 lux_threshold_hi; + u16 lux_threshold_lo; + u16 lux_thres_hi_onchip; + u16 lux_thres_lo_onchip; + bool lux_wait_result; + + int prox_enable_count; + u16 prox_coef; + u16 prox_const; + int prox_rate; + int prox_rate_threshold; + u8 prox_persistence; + u8 prox_persistence_counter; + u8 prox_data; + u8 prox_threshold; + u8 prox_threshold_hw; + bool prox_force_update; + u8 prox_abs_thres; + u8 prox_led; +}; + +static const char reg_vcc[] = "Vcc"; +static const char reg_vleds[] = "Vleds"; + +/* + * Supported stand alone rates in ms from chip data sheet + * {10, 20, 30, 40, 70, 100, 200, 500, 1000, 2000}; + */ +static const s16 prox_rates_hz[] = {100, 50, 33, 25, 14, 10, 5, 2}; +static const s16 prox_rates_ms[] = {10, 20, 30, 40, 70, 100, 200, 500}; + +/* Supported IR-led currents in mA */ +static const u8 prox_curr_ma[] = {5, 10, 20, 50, 100, 150, 200}; + +/* + * Supported stand alone rates in ms from chip data sheet + * {100, 200, 500, 1000, 2000}; + */ +static const s16 lux_rates_hz[] = {10, 5, 2, 1, 0}; + +/* + * interrupt control functions are called while keeping chip->mutex + * excluding module probe / remove + */ +static inline int bh1770_lux_interrupt_control(struct bh1770_chip *chip, + int lux) +{ + chip->int_mode_lux = lux; + /* Set interrupt modes, interrupt active low, latched */ + return i2c_smbus_write_byte_data(chip->client, + BH1770_INTERRUPT, + (lux << 1) | chip->int_mode_prox); +} + +static inline int bh1770_prox_interrupt_control(struct bh1770_chip *chip, + int ps) +{ + chip->int_mode_prox = ps; + return i2c_smbus_write_byte_data(chip->client, + BH1770_INTERRUPT, + (chip->int_mode_lux << 1) | (ps << 0)); +} + +/* chip->mutex is always kept here */ +static int bh1770_lux_rate(struct bh1770_chip *chip, int rate_index) +{ + /* sysfs may call this when the chip is powered off */ + if (pm_runtime_suspended(&chip->client->dev)) + return 0; + + /* Proper proximity response needs fastest lux rate (100ms) */ + if (chip->prox_enable_count) + rate_index = 0; + + return i2c_smbus_write_byte_data(chip->client, + BH1770_ALS_MEAS_RATE, + rate_index); +} + +static int bh1770_prox_rate(struct bh1770_chip *chip, int mode) +{ + int rate; + + rate = (mode == PROX_ABOVE_THRESHOLD) ? + chip->prox_rate_threshold : chip->prox_rate; + + return i2c_smbus_write_byte_data(chip->client, + BH1770_PS_MEAS_RATE, + rate); +} + +/* InfraredLED is controlled by the chip during proximity scanning */ +static inline int bh1770_led_cfg(struct bh1770_chip *chip) +{ + /* LED cfg, current for leds 1 and 2 */ + return i2c_smbus_write_byte_data(chip->client, + BH1770_I_LED, + (BH1770_LED1 << 6) | + (BH1770_LED_5mA << 3) | + chip->prox_led); +} + +/* + * Following two functions converts raw ps values from HW to normalized + * values. Purpose is to compensate differences between different sensor + * versions and variants so that result means about the same between + * versions. + */ +static inline u8 bh1770_psraw_to_adjusted(struct bh1770_chip *chip, u8 psraw) +{ + u16 adjusted; + adjusted = (u16)(((u32)(psraw + chip->prox_const) * chip->prox_coef) / + BH1770_COEF_SCALER); + if (adjusted > BH1770_PROX_RANGE) + adjusted = BH1770_PROX_RANGE; + return adjusted; +} + +static inline u8 bh1770_psadjusted_to_raw(struct bh1770_chip *chip, u8 ps) +{ + u16 raw; + + raw = (((u32)ps * BH1770_COEF_SCALER) / chip->prox_coef); + if (raw > chip->prox_const) + raw = raw - chip->prox_const; + else + raw = 0; + return raw; +} + +/* + * Following two functions converts raw lux values from HW to normalized + * values. Purpose is to compensate differences between different sensor + * versions and variants so that result means about the same between + * versions. Chip->mutex is kept when this is called. + */ +static int bh1770_prox_set_threshold(struct bh1770_chip *chip) +{ + u8 tmp = 0; + + /* sysfs may call this when the chip is powered off */ + if (pm_runtime_suspended(&chip->client->dev)) + return 0; + + tmp = bh1770_psadjusted_to_raw(chip, chip->prox_threshold); + chip->prox_threshold_hw = tmp; + + return i2c_smbus_write_byte_data(chip->client, BH1770_PS_TH_LED1, + tmp); +} + +static inline u16 bh1770_lux_raw_to_adjusted(struct bh1770_chip *chip, u16 raw) +{ + u32 lux; + lux = ((u32)raw * chip->lux_corr) / BH1770_LUX_CORR_SCALE; + return min(lux, (u32)BH1770_LUX_RANGE); +} + +static inline u16 bh1770_lux_adjusted_to_raw(struct bh1770_chip *chip, + u16 adjusted) +{ + return (u32)adjusted * BH1770_LUX_CORR_SCALE / chip->lux_corr; +} + +/* chip->mutex is kept when this is called */ +static int bh1770_lux_update_thresholds(struct bh1770_chip *chip, + u16 threshold_hi, u16 threshold_lo) +{ + u8 data[4]; + int ret; + + /* sysfs may call this when the chip is powered off */ + if (pm_runtime_suspended(&chip->client->dev)) + return 0; + + /* + * Compensate threshold values with the correction factors if not + * set to minimum or maximum. + * Min & max values disables interrupts. + */ + if (threshold_hi != BH1770_LUX_RANGE && threshold_hi != 0) + threshold_hi = bh1770_lux_adjusted_to_raw(chip, threshold_hi); + + if (threshold_lo != BH1770_LUX_RANGE && threshold_lo != 0) + threshold_lo = bh1770_lux_adjusted_to_raw(chip, threshold_lo); + + if (chip->lux_thres_hi_onchip == threshold_hi && + chip->lux_thres_lo_onchip == threshold_lo) + return 0; + + chip->lux_thres_hi_onchip = threshold_hi; + chip->lux_thres_lo_onchip = threshold_lo; + + data[0] = threshold_hi; + data[1] = threshold_hi >> 8; + data[2] = threshold_lo; + data[3] = threshold_lo >> 8; + + ret = i2c_smbus_write_i2c_block_data(chip->client, + BH1770_ALS_TH_UP_0, + ARRAY_SIZE(data), + data); + return ret; +} + +static int bh1770_lux_get_result(struct bh1770_chip *chip) +{ + u16 data; + int ret; + + ret = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_DATA_0); + if (ret < 0) + return ret; + + data = ret & 0xff; + ret = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_DATA_1); + if (ret < 0) + return ret; + + chip->lux_data_raw = data | ((ret & 0xff) << 8); + + return 0; +} + +/* Calculate correction value which contains chip and device specific parts */ +static u32 bh1770_get_corr_value(struct bh1770_chip *chip) +{ + u32 tmp; + /* Impact of glass attenuation correction */ + tmp = (BH1770_LUX_CORR_SCALE * chip->lux_ga) / BH1770_LUX_GA_SCALE; + /* Impact of chip factor correction */ + tmp = (tmp * chip->lux_cf) / BH1770_LUX_CF_SCALE; + /* Impact of Device specific calibration correction */ + tmp = (tmp * chip->lux_calib) / BH1770_CALIB_SCALER; + return tmp; +} + +static int bh1770_lux_read_result(struct bh1770_chip *chip) +{ + bh1770_lux_get_result(chip); + return bh1770_lux_raw_to_adjusted(chip, chip->lux_data_raw); +} + +/* + * Chip on / off functions are called while keeping mutex except probe + * or remove phase + */ +static int bh1770_chip_on(struct bh1770_chip *chip) +{ + int ret = regulator_bulk_enable(ARRAY_SIZE(chip->regs), + chip->regs); + if (ret < 0) + return ret; + + usleep_range(BH1770_STARTUP_DELAY, BH1770_STARTUP_DELAY * 2); + + /* Reset the chip */ + i2c_smbus_write_byte_data(chip->client, BH1770_ALS_CONTROL, + BH1770_SWRESET); + usleep_range(BH1770_RESET_TIME, BH1770_RESET_TIME * 2); + + /* + * ALS is started always since proximity needs als results + * for realibility estimation. + * Let's assume dark until the first ALS measurement is ready. + */ + chip->lux_data_raw = 0; + chip->prox_data = 0; + ret = i2c_smbus_write_byte_data(chip->client, + BH1770_ALS_CONTROL, BH1770_STANDALONE); + + /* Assume reset defaults */ + chip->lux_thres_hi_onchip = BH1770_LUX_RANGE; + chip->lux_thres_lo_onchip = 0; + + return ret; +} + +static void bh1770_chip_off(struct bh1770_chip *chip) +{ + i2c_smbus_write_byte_data(chip->client, + BH1770_INTERRUPT, BH1770_DISABLE); + i2c_smbus_write_byte_data(chip->client, + BH1770_ALS_CONTROL, BH1770_STANDBY); + i2c_smbus_write_byte_data(chip->client, + BH1770_PS_CONTROL, BH1770_STANDBY); + regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs); +} + +/* chip->mutex is kept when this is called */ +static int bh1770_prox_mode_control(struct bh1770_chip *chip) +{ + if (chip->prox_enable_count) { + chip->prox_force_update = true; /* Force immediate update */ + + bh1770_lux_rate(chip, chip->lux_rate_index); + bh1770_prox_set_threshold(chip); + bh1770_led_cfg(chip); + bh1770_prox_rate(chip, PROX_BELOW_THRESHOLD); + bh1770_prox_interrupt_control(chip, BH1770_ENABLE); + i2c_smbus_write_byte_data(chip->client, + BH1770_PS_CONTROL, BH1770_STANDALONE); + } else { + chip->prox_data = 0; + bh1770_lux_rate(chip, chip->lux_rate_index); + bh1770_prox_interrupt_control(chip, BH1770_DISABLE); + i2c_smbus_write_byte_data(chip->client, + BH1770_PS_CONTROL, BH1770_STANDBY); + } + return 0; +} + +/* chip->mutex is kept when this is called */ +static int bh1770_prox_read_result(struct bh1770_chip *chip) +{ + int ret; + bool above; + u8 mode; + + ret = i2c_smbus_read_byte_data(chip->client, BH1770_PS_DATA_LED1); + if (ret < 0) + goto out; + + if (ret > chip->prox_threshold_hw) + above = true; + else + above = false; + + /* + * when ALS levels goes above limit, proximity result may be + * false proximity. Thus ignore the result. With real proximity + * there is a shadow causing low als levels. + */ + if (chip->lux_data_raw > PROX_IGNORE_LUX_LIMIT) + ret = 0; + + chip->prox_data = bh1770_psraw_to_adjusted(chip, ret); + + /* Strong proximity level or force mode requires immediate response */ + if (chip->prox_data >= chip->prox_abs_thres || + chip->prox_force_update) + chip->prox_persistence_counter = chip->prox_persistence; + + chip->prox_force_update = false; + + /* Persistence filttering to reduce false proximity events */ + if (likely(above)) { + if (chip->prox_persistence_counter < chip->prox_persistence) { + chip->prox_persistence_counter++; + ret = -ENODATA; + } else { + mode = PROX_ABOVE_THRESHOLD; + ret = 0; + } + } else { + chip->prox_persistence_counter = 0; + mode = PROX_BELOW_THRESHOLD; + chip->prox_data = 0; + ret = 0; + } + + /* Set proximity detection rate based on above or below value */ + if (ret == 0) { + bh1770_prox_rate(chip, mode); + sysfs_notify(&chip->client->dev.kobj, NULL, "prox0_raw"); + } +out: + return ret; +} + +static int bh1770_detect(struct bh1770_chip *chip) +{ + struct i2c_client *client = chip->client; + s32 ret; + u8 manu, part; + + ret = i2c_smbus_read_byte_data(client, BH1770_MANUFACT_ID); + if (ret < 0) + goto error; + manu = (u8)ret; + + ret = i2c_smbus_read_byte_data(client, BH1770_PART_ID); + if (ret < 0) + goto error; + part = (u8)ret; + + chip->revision = (part & BH1770_REV_MASK) >> BH1770_REV_SHIFT; + chip->prox_coef = BH1770_COEF_SCALER; + chip->prox_const = 0; + chip->lux_cf = BH1770_NEUTRAL_CF; + + if ((manu == BH1770_MANUFACT_ROHM) && + ((part & BH1770_PART_MASK) == BH1770_PART)) { + snprintf(chip->chipname, sizeof(chip->chipname), "BH1770GLC"); + return 0; + } + + if ((manu == BH1770_MANUFACT_OSRAM) && + ((part & BH1770_PART_MASK) == BH1770_PART)) { + snprintf(chip->chipname, sizeof(chip->chipname), "SFH7770"); + /* Values selected by comparing different versions */ + chip->prox_coef = 819; /* 0.8 * BH1770_COEF_SCALER */ + chip->prox_const = 40; + return 0; + } + + ret = -ENODEV; +error: + dev_dbg(&client->dev, "BH1770 or SFH7770 not found\n"); + + return ret; +} + +/* + * This work is re-scheduled at every proximity interrupt. + * If this work is running, it means that there hasn't been any + * proximity interrupt in time. Situation is handled as no-proximity. + * It would be nice to have low-threshold interrupt or interrupt + * when measurement and hi-threshold are both 0. But neither of those exists. + * This is a workaroud for missing HW feature. + */ + +static void bh1770_prox_work(struct work_struct *work) +{ + struct bh1770_chip *chip = + container_of(work, struct bh1770_chip, prox_work.work); + + mutex_lock(&chip->mutex); + bh1770_prox_read_result(chip); + mutex_unlock(&chip->mutex); +} + +/* This is threaded irq handler */ +static irqreturn_t bh1770_irq(int irq, void *data) +{ + struct bh1770_chip *chip = data; + int status; + int rate = 0; + + mutex_lock(&chip->mutex); + status = i2c_smbus_read_byte_data(chip->client, BH1770_ALS_PS_STATUS); + + /* Acknowledge interrupt by reading this register */ + i2c_smbus_read_byte_data(chip->client, BH1770_INTERRUPT); + + /* + * Check if there is fresh data available for als. + * If this is the very first data, update thresholds after that. + */ + if (status & BH1770_INT_ALS_DATA) { + bh1770_lux_get_result(chip); + if (unlikely(chip->lux_wait_result)) { + chip->lux_wait_result = false; + wake_up(&chip->wait); + bh1770_lux_update_thresholds(chip, + chip->lux_threshold_hi, + chip->lux_threshold_lo); + } + } + + /* Disable interrupt logic to guarantee acknowledgement */ + i2c_smbus_write_byte_data(chip->client, BH1770_INTERRUPT, + (0 << 1) | (0 << 0)); + + if ((status & BH1770_INT_ALS_INT)) + sysfs_notify(&chip->client->dev.kobj, NULL, "lux0_input"); + + if (chip->int_mode_prox && (status & BH1770_INT_LEDS_INT)) { + rate = prox_rates_ms[chip->prox_rate_threshold]; + bh1770_prox_read_result(chip); + } + + /* Re-enable interrupt logic */ + i2c_smbus_write_byte_data(chip->client, BH1770_INTERRUPT, + (chip->int_mode_lux << 1) | + (chip->int_mode_prox << 0)); + mutex_unlock(&chip->mutex); + + /* + * Can't cancel work while keeping mutex since the work uses the + * same mutex. + */ + if (rate) { + /* + * Simulate missing no-proximity interrupt 50ms after the + * next expected interrupt time. + */ + cancel_delayed_work_sync(&chip->prox_work); + schedule_delayed_work(&chip->prox_work, + msecs_to_jiffies(rate + 50)); + } + return IRQ_HANDLED; +} + +static ssize_t bh1770_power_state_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long value; + ssize_t ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + mutex_lock(&chip->mutex); + if (value) { + pm_runtime_get_sync(dev); + + ret = bh1770_lux_rate(chip, chip->lux_rate_index); + if (ret < 0) { + pm_runtime_put(dev); + goto leave; + } + + ret = bh1770_lux_interrupt_control(chip, BH1770_ENABLE); + if (ret < 0) { + pm_runtime_put(dev); + goto leave; + } + + /* This causes interrupt after the next measurement cycle */ + bh1770_lux_update_thresholds(chip, BH1770_LUX_DEF_THRES, + BH1770_LUX_DEF_THRES); + /* Inform that we are waiting for a result from ALS */ + chip->lux_wait_result = true; + bh1770_prox_mode_control(chip); + } else if (!pm_runtime_suspended(dev)) { + pm_runtime_put(dev); + } + ret = count; +leave: + mutex_unlock(&chip->mutex); + return ret; +} + +static ssize_t bh1770_power_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", !pm_runtime_suspended(dev)); +} + +static ssize_t bh1770_lux_result_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + ssize_t ret; + long timeout; + + if (pm_runtime_suspended(dev)) + return -EIO; /* Chip is not enabled at all */ + + timeout = wait_event_interruptible_timeout(chip->wait, + !chip->lux_wait_result, + msecs_to_jiffies(BH1770_TIMEOUT)); + if (!timeout) + return -EIO; + + mutex_lock(&chip->mutex); + ret = sprintf(buf, "%d\n", bh1770_lux_read_result(chip)); + mutex_unlock(&chip->mutex); + + return ret; +} + +static ssize_t bh1770_lux_range_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", BH1770_LUX_RANGE); +} + +static ssize_t bh1770_prox_enable_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + mutex_lock(&chip->mutex); + /* Assume no proximity. Sensor will tell real state soon */ + if (!chip->prox_enable_count) + chip->prox_data = 0; + + if (value) + chip->prox_enable_count++; + else if (chip->prox_enable_count > 0) + chip->prox_enable_count--; + else + goto leave; + + /* Run control only when chip is powered on */ + if (!pm_runtime_suspended(dev)) + bh1770_prox_mode_control(chip); +leave: + mutex_unlock(&chip->mutex); + return count; +} + +static ssize_t bh1770_prox_enable_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + ssize_t len; + + mutex_lock(&chip->mutex); + len = sprintf(buf, "%d\n", chip->prox_enable_count); + mutex_unlock(&chip->mutex); + return len; +} + +static ssize_t bh1770_prox_result_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + ssize_t ret; + + mutex_lock(&chip->mutex); + if (chip->prox_enable_count && !pm_runtime_suspended(dev)) + ret = sprintf(buf, "%d\n", chip->prox_data); + else + ret = -EIO; + mutex_unlock(&chip->mutex); + return ret; +} + +static ssize_t bh1770_prox_range_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", BH1770_PROX_RANGE); +} + +static ssize_t bh1770_get_prox_rate_avail(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int i; + int pos = 0; + for (i = 0; i < ARRAY_SIZE(prox_rates_hz); i++) + pos += sprintf(buf + pos, "%d ", prox_rates_hz[i]); + sprintf(buf + pos - 1, "\n"); + return pos; +} + +static ssize_t bh1770_get_prox_rate_above(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", prox_rates_hz[chip->prox_rate_threshold]); +} + +static ssize_t bh1770_get_prox_rate_below(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", prox_rates_hz[chip->prox_rate]); +} + +static int bh1770_prox_rate_validate(int rate) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(prox_rates_hz) - 1; i++) + if (rate >= prox_rates_hz[i]) + break; + return i; +} + +static ssize_t bh1770_set_prox_rate_above(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + mutex_lock(&chip->mutex); + chip->prox_rate_threshold = bh1770_prox_rate_validate(value); + mutex_unlock(&chip->mutex); + return count; +} + +static ssize_t bh1770_set_prox_rate_below(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + mutex_lock(&chip->mutex); + chip->prox_rate = bh1770_prox_rate_validate(value); + mutex_unlock(&chip->mutex); + return count; +} + +static ssize_t bh1770_get_prox_thres(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", chip->prox_threshold); +} + +static ssize_t bh1770_set_prox_thres(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + if (value > BH1770_PROX_RANGE) + return -EINVAL; + + mutex_lock(&chip->mutex); + chip->prox_threshold = value; + ret = bh1770_prox_set_threshold(chip); + mutex_unlock(&chip->mutex); + if (ret < 0) + return ret; + return count; +} + +static ssize_t bh1770_prox_persistence_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%u\n", chip->prox_persistence); +} + +static ssize_t bh1770_prox_persistence_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + if (value > BH1770_PROX_MAX_PERSISTENCE) + return -EINVAL; + + chip->prox_persistence = value; + + return len; +} + +static ssize_t bh1770_prox_abs_thres_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%u\n", chip->prox_abs_thres); +} + +static ssize_t bh1770_prox_abs_thres_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long value; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + if (value > BH1770_PROX_RANGE) + return -EINVAL; + + chip->prox_abs_thres = value; + + return len; +} + +static ssize_t bh1770_chip_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%s rev %d\n", chip->chipname, chip->revision); +} + +static ssize_t bh1770_lux_calib_default_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", BH1770_CALIB_SCALER); +} + +static ssize_t bh1770_lux_calib_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + ssize_t len; + + mutex_lock(&chip->mutex); + len = sprintf(buf, "%u\n", chip->lux_calib); + mutex_unlock(&chip->mutex); + return len; +} + +static ssize_t bh1770_lux_calib_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long value; + u32 old_calib; + u32 new_corr; + int ret; + + ret = kstrtoul(buf, 0, &value); + if (ret) + return ret; + + mutex_lock(&chip->mutex); + old_calib = chip->lux_calib; + chip->lux_calib = value; + new_corr = bh1770_get_corr_value(chip); + if (new_corr == 0) { + chip->lux_calib = old_calib; + mutex_unlock(&chip->mutex); + return -EINVAL; + } + chip->lux_corr = new_corr; + /* Refresh thresholds on HW after changing correction value */ + bh1770_lux_update_thresholds(chip, chip->lux_threshold_hi, + chip->lux_threshold_lo); + + mutex_unlock(&chip->mutex); + + return len; +} + +static ssize_t bh1770_get_lux_rate_avail(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int i; + int pos = 0; + for (i = 0; i < ARRAY_SIZE(lux_rates_hz); i++) + pos += sprintf(buf + pos, "%d ", lux_rates_hz[i]); + sprintf(buf + pos - 1, "\n"); + return pos; +} + +static ssize_t bh1770_get_lux_rate(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", lux_rates_hz[chip->lux_rate_index]); +} + +static ssize_t bh1770_set_lux_rate(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + unsigned long rate_hz; + int ret, i; + + ret = kstrtoul(buf, 0, &rate_hz); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(lux_rates_hz) - 1; i++) + if (rate_hz >= lux_rates_hz[i]) + break; + + mutex_lock(&chip->mutex); + chip->lux_rate_index = i; + ret = bh1770_lux_rate(chip, i); + mutex_unlock(&chip->mutex); + + if (ret < 0) + return ret; + + return count; +} + +static ssize_t bh1770_get_lux_thresh_above(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", chip->lux_threshold_hi); +} + +static ssize_t bh1770_get_lux_thresh_below(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", chip->lux_threshold_lo); +} + +static ssize_t bh1770_set_lux_thresh(struct bh1770_chip *chip, u16 *target, + const char *buf) +{ + unsigned long thresh; + int ret; + + ret = kstrtoul(buf, 0, &thresh); + if (ret) + return ret; + + if (thresh > BH1770_LUX_RANGE) + return -EINVAL; + + mutex_lock(&chip->mutex); + *target = thresh; + /* + * Don't update values in HW if we are still waiting for + * first interrupt to come after device handle open call. + */ + if (!chip->lux_wait_result) + ret = bh1770_lux_update_thresholds(chip, + chip->lux_threshold_hi, + chip->lux_threshold_lo); + mutex_unlock(&chip->mutex); + return ret; + +} + +static ssize_t bh1770_set_lux_thresh_above(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + int ret = bh1770_set_lux_thresh(chip, &chip->lux_threshold_hi, buf); + if (ret < 0) + return ret; + return len; +} + +static ssize_t bh1770_set_lux_thresh_below(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct bh1770_chip *chip = dev_get_drvdata(dev); + int ret = bh1770_set_lux_thresh(chip, &chip->lux_threshold_lo, buf); + if (ret < 0) + return ret; + return len; +} + +static DEVICE_ATTR(prox0_raw_en, S_IRUGO | S_IWUSR, bh1770_prox_enable_show, + bh1770_prox_enable_store); +static DEVICE_ATTR(prox0_thresh_above1_value, S_IRUGO | S_IWUSR, + bh1770_prox_abs_thres_show, + bh1770_prox_abs_thres_store); +static DEVICE_ATTR(prox0_thresh_above0_value, S_IRUGO | S_IWUSR, + bh1770_get_prox_thres, + bh1770_set_prox_thres); +static DEVICE_ATTR(prox0_raw, S_IRUGO, bh1770_prox_result_show, NULL); +static DEVICE_ATTR(prox0_sensor_range, S_IRUGO, bh1770_prox_range_show, NULL); +static DEVICE_ATTR(prox0_thresh_above_count, S_IRUGO | S_IWUSR, + bh1770_prox_persistence_show, + bh1770_prox_persistence_store); +static DEVICE_ATTR(prox0_rate_above, S_IRUGO | S_IWUSR, + bh1770_get_prox_rate_above, + bh1770_set_prox_rate_above); +static DEVICE_ATTR(prox0_rate_below, S_IRUGO | S_IWUSR, + bh1770_get_prox_rate_below, + bh1770_set_prox_rate_below); +static DEVICE_ATTR(prox0_rate_avail, S_IRUGO, bh1770_get_prox_rate_avail, NULL); + +static DEVICE_ATTR(lux0_calibscale, S_IRUGO | S_IWUSR, bh1770_lux_calib_show, + bh1770_lux_calib_store); +static DEVICE_ATTR(lux0_calibscale_default, S_IRUGO, + bh1770_lux_calib_default_show, + NULL); +static DEVICE_ATTR(lux0_input, S_IRUGO, bh1770_lux_result_show, NULL); +static DEVICE_ATTR(lux0_sensor_range, S_IRUGO, bh1770_lux_range_show, NULL); +static DEVICE_ATTR(lux0_rate, S_IRUGO | S_IWUSR, bh1770_get_lux_rate, + bh1770_set_lux_rate); +static DEVICE_ATTR(lux0_rate_avail, S_IRUGO, bh1770_get_lux_rate_avail, NULL); +static DEVICE_ATTR(lux0_thresh_above_value, S_IRUGO | S_IWUSR, + bh1770_get_lux_thresh_above, + bh1770_set_lux_thresh_above); +static DEVICE_ATTR(lux0_thresh_below_value, S_IRUGO | S_IWUSR, + bh1770_get_lux_thresh_below, + bh1770_set_lux_thresh_below); +static DEVICE_ATTR(chip_id, S_IRUGO, bh1770_chip_id_show, NULL); +static DEVICE_ATTR(power_state, S_IRUGO | S_IWUSR, bh1770_power_state_show, + bh1770_power_state_store); + + +static struct attribute *sysfs_attrs[] = { + &dev_attr_lux0_calibscale.attr, + &dev_attr_lux0_calibscale_default.attr, + &dev_attr_lux0_input.attr, + &dev_attr_lux0_sensor_range.attr, + &dev_attr_lux0_rate.attr, + &dev_attr_lux0_rate_avail.attr, + &dev_attr_lux0_thresh_above_value.attr, + &dev_attr_lux0_thresh_below_value.attr, + &dev_attr_prox0_raw.attr, + &dev_attr_prox0_sensor_range.attr, + &dev_attr_prox0_raw_en.attr, + &dev_attr_prox0_thresh_above_count.attr, + &dev_attr_prox0_rate_above.attr, + &dev_attr_prox0_rate_below.attr, + &dev_attr_prox0_rate_avail.attr, + &dev_attr_prox0_thresh_above0_value.attr, + &dev_attr_prox0_thresh_above1_value.attr, + &dev_attr_chip_id.attr, + &dev_attr_power_state.attr, + NULL +}; + +static const struct attribute_group bh1770_attribute_group = { + .attrs = sysfs_attrs +}; + +static int bh1770_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct bh1770_chip *chip; + int err; + + chip = devm_kzalloc(&client->dev, sizeof *chip, GFP_KERNEL); + if (!chip) + return -ENOMEM; + + i2c_set_clientdata(client, chip); + chip->client = client; + + mutex_init(&chip->mutex); + init_waitqueue_head(&chip->wait); + INIT_DELAYED_WORK(&chip->prox_work, bh1770_prox_work); + + if (client->dev.platform_data == NULL) { + dev_err(&client->dev, "platform data is mandatory\n"); + return -EINVAL; + } + + chip->pdata = client->dev.platform_data; + chip->lux_calib = BH1770_LUX_NEUTRAL_CALIB_VALUE; + chip->lux_rate_index = BH1770_LUX_DEFAULT_RATE; + chip->lux_threshold_lo = BH1770_LUX_DEF_THRES; + chip->lux_threshold_hi = BH1770_LUX_DEF_THRES; + + if (chip->pdata->glass_attenuation == 0) + chip->lux_ga = BH1770_NEUTRAL_GA; + else + chip->lux_ga = chip->pdata->glass_attenuation; + + chip->prox_threshold = BH1770_PROX_DEF_THRES; + chip->prox_led = chip->pdata->led_def_curr; + chip->prox_abs_thres = BH1770_PROX_DEF_ABS_THRES; + chip->prox_persistence = BH1770_DEFAULT_PERSISTENCE; + chip->prox_rate_threshold = BH1770_PROX_DEF_RATE_THRESH; + chip->prox_rate = BH1770_PROX_DEFAULT_RATE; + chip->prox_data = 0; + + chip->regs[0].supply = reg_vcc; + chip->regs[1].supply = reg_vleds; + + err = devm_regulator_bulk_get(&client->dev, + ARRAY_SIZE(chip->regs), chip->regs); + if (err < 0) { + dev_err(&client->dev, "Cannot get regulators\n"); + return err; + } + + err = regulator_bulk_enable(ARRAY_SIZE(chip->regs), + chip->regs); + if (err < 0) { + dev_err(&client->dev, "Cannot enable regulators\n"); + return err; + } + + usleep_range(BH1770_STARTUP_DELAY, BH1770_STARTUP_DELAY * 2); + err = bh1770_detect(chip); + if (err < 0) + goto fail0; + + /* Start chip */ + bh1770_chip_on(chip); + pm_runtime_set_active(&client->dev); + pm_runtime_enable(&client->dev); + + chip->lux_corr = bh1770_get_corr_value(chip); + if (chip->lux_corr == 0) { + dev_err(&client->dev, "Improper correction values\n"); + err = -EINVAL; + goto fail0; + } + + if (chip->pdata->setup_resources) { + err = chip->pdata->setup_resources(); + if (err) { + err = -EINVAL; + goto fail0; + } + } + + err = sysfs_create_group(&chip->client->dev.kobj, + &bh1770_attribute_group); + if (err < 0) { + dev_err(&chip->client->dev, "Sysfs registration failed\n"); + goto fail1; + } + + /* + * Chip needs level triggered interrupt to work. However, + * level triggering doesn't work always correctly with power + * management. Select both + */ + err = request_threaded_irq(client->irq, NULL, + bh1770_irq, + IRQF_TRIGGER_FALLING | IRQF_ONESHOT | + IRQF_TRIGGER_LOW, + "bh1770", chip); + if (err) { + dev_err(&client->dev, "could not get IRQ %d\n", + client->irq); + goto fail2; + } + regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs); + return err; +fail2: + sysfs_remove_group(&chip->client->dev.kobj, + &bh1770_attribute_group); +fail1: + if (chip->pdata->release_resources) + chip->pdata->release_resources(); +fail0: + regulator_bulk_disable(ARRAY_SIZE(chip->regs), chip->regs); + return err; +} + +static int bh1770_remove(struct i2c_client *client) +{ + struct bh1770_chip *chip = i2c_get_clientdata(client); + + free_irq(client->irq, chip); + + sysfs_remove_group(&chip->client->dev.kobj, + &bh1770_attribute_group); + + if (chip->pdata->release_resources) + chip->pdata->release_resources(); + + cancel_delayed_work_sync(&chip->prox_work); + + if (!pm_runtime_suspended(&client->dev)) + bh1770_chip_off(chip); + + pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int bh1770_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct bh1770_chip *chip = i2c_get_clientdata(client); + + bh1770_chip_off(chip); + + return 0; +} + +static int bh1770_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct bh1770_chip *chip = i2c_get_clientdata(client); + int ret = 0; + + bh1770_chip_on(chip); + + if (!pm_runtime_suspended(dev)) { + /* + * If we were enabled at suspend time, it is expected + * everything works nice and smoothly + */ + ret = bh1770_lux_rate(chip, chip->lux_rate_index); + ret |= bh1770_lux_interrupt_control(chip, BH1770_ENABLE); + + /* This causes interrupt after the next measurement cycle */ + bh1770_lux_update_thresholds(chip, BH1770_LUX_DEF_THRES, + BH1770_LUX_DEF_THRES); + /* Inform that we are waiting for a result from ALS */ + chip->lux_wait_result = true; + bh1770_prox_mode_control(chip); + } + return ret; +} +#endif + +#ifdef CONFIG_PM +static int bh1770_runtime_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct bh1770_chip *chip = i2c_get_clientdata(client); + + bh1770_chip_off(chip); + + return 0; +} + +static int bh1770_runtime_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct bh1770_chip *chip = i2c_get_clientdata(client); + + bh1770_chip_on(chip); + + return 0; +} +#endif + +static const struct i2c_device_id bh1770_id[] = { + {"bh1770glc", 0 }, + {"sfh7770", 0 }, + {} +}; + +MODULE_DEVICE_TABLE(i2c, bh1770_id); + +static const struct dev_pm_ops bh1770_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(bh1770_suspend, bh1770_resume) + SET_RUNTIME_PM_OPS(bh1770_runtime_suspend, bh1770_runtime_resume, NULL) +}; + +static struct i2c_driver bh1770_driver = { + .driver = { + .name = "bh1770glc", + .pm = &bh1770_pm_ops, + }, + .probe = bh1770_probe, + .remove = bh1770_remove, + .id_table = bh1770_id, +}; + +module_i2c_driver(bh1770_driver); + +MODULE_DESCRIPTION("BH1770GLC / SFH7770 combined ALS and proximity sensor"); +MODULE_AUTHOR("Samu Onkalo, Nokia Corporation"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/c2port/Kconfig b/drivers/misc/c2port/Kconfig new file mode 100644 index 000000000..0dd690e61 --- /dev/null +++ b/drivers/misc/c2port/Kconfig @@ -0,0 +1,34 @@ +# +# C2 port devices +# + +menuconfig C2PORT + tristate "Silicon Labs C2 port support" + default n + help + This option enables support for Silicon Labs C2 port used to + program Silicon micro controller chips (and other 8051 compatible). + + If your board have no such micro controllers you don't need this + interface at all. + + To compile this driver as a module, choose M here: the module will + be called c2port_core. Note that you also need a client module + usually called c2port-*. + + If you are not sure, say N here. + +if C2PORT + +config C2PORT_DURAMAR_2150 + tristate "C2 port support for Eurotech's Duramar 2150" + depends on X86 + default n + help + This option enables C2 support for the Eurotech's Duramar 2150 + on board micro controller. + + To compile this driver as a module, choose M here: the module will + be called c2port-duramar2150. + +endif # C2PORT diff --git a/drivers/misc/c2port/Makefile b/drivers/misc/c2port/Makefile new file mode 100644 index 000000000..3b2cf43d6 --- /dev/null +++ b/drivers/misc/c2port/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_C2PORT) += core.o + +obj-$(CONFIG_C2PORT_DURAMAR_2150) += c2port-duramar2150.o diff --git a/drivers/misc/c2port/c2port-duramar2150.c b/drivers/misc/c2port/c2port-duramar2150.c new file mode 100644 index 000000000..3dc61ea7d --- /dev/null +++ b/drivers/misc/c2port/c2port-duramar2150.c @@ -0,0 +1,159 @@ +/* + * Silicon Labs C2 port Linux support for Eurotech Duramar 2150 + * + * Copyright (c) 2008 Rodolfo Giometti + * Copyright (c) 2008 Eurotech S.p.A. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DATA_PORT 0x325 +#define DIR_PORT 0x326 +#define C2D (1 << 0) +#define C2CK (1 << 1) + +static DEFINE_MUTEX(update_lock); + +/* + * C2 port operations + */ + +static void duramar2150_c2port_access(struct c2port_device *dev, int status) +{ + u8 v; + + mutex_lock(&update_lock); + + v = inb(DIR_PORT); + + /* 0 = input, 1 = output */ + if (status) + outb(v | (C2D | C2CK), DIR_PORT); + else + /* When access is "off" is important that both lines are set + * as inputs or hi-impedance */ + outb(v & ~(C2D | C2CK), DIR_PORT); + + mutex_unlock(&update_lock); +} + +static void duramar2150_c2port_c2d_dir(struct c2port_device *dev, int dir) +{ + u8 v; + + mutex_lock(&update_lock); + + v = inb(DIR_PORT); + + if (dir) + outb(v & ~C2D, DIR_PORT); + else + outb(v | C2D, DIR_PORT); + + mutex_unlock(&update_lock); +} + +static int duramar2150_c2port_c2d_get(struct c2port_device *dev) +{ + return inb(DATA_PORT) & C2D; +} + +static void duramar2150_c2port_c2d_set(struct c2port_device *dev, int status) +{ + u8 v; + + mutex_lock(&update_lock); + + v = inb(DATA_PORT); + + if (status) + outb(v | C2D, DATA_PORT); + else + outb(v & ~C2D, DATA_PORT); + + mutex_unlock(&update_lock); +} + +static void duramar2150_c2port_c2ck_set(struct c2port_device *dev, int status) +{ + u8 v; + + mutex_lock(&update_lock); + + v = inb(DATA_PORT); + + if (status) + outb(v | C2CK, DATA_PORT); + else + outb(v & ~C2CK, DATA_PORT); + + mutex_unlock(&update_lock); +} + +static struct c2port_ops duramar2150_c2port_ops = { + .block_size = 512, /* bytes */ + .blocks_num = 30, /* total flash size: 15360 bytes */ + + .access = duramar2150_c2port_access, + .c2d_dir = duramar2150_c2port_c2d_dir, + .c2d_get = duramar2150_c2port_c2d_get, + .c2d_set = duramar2150_c2port_c2d_set, + .c2ck_set = duramar2150_c2port_c2ck_set, +}; + +static struct c2port_device *duramar2150_c2port_dev; + +/* + * Module stuff + */ + +static int __init duramar2150_c2port_init(void) +{ + struct resource *res; + int ret = 0; + + res = request_region(0x325, 2, "c2port"); + if (!res) + return -EBUSY; + + duramar2150_c2port_dev = c2port_device_register("uc", + &duramar2150_c2port_ops, NULL); + if (IS_ERR(duramar2150_c2port_dev)) { + ret = PTR_ERR(duramar2150_c2port_dev); + goto free_region; + } + + return 0; + +free_region: + release_region(0x325, 2); + return ret; +} + +static void __exit duramar2150_c2port_exit(void) +{ + /* Setup the GPIOs as input by default (access = 0) */ + duramar2150_c2port_access(duramar2150_c2port_dev, 0); + + c2port_device_unregister(duramar2150_c2port_dev); + + release_region(0x325, 2); +} + +module_init(duramar2150_c2port_init); +module_exit(duramar2150_c2port_exit); + +MODULE_AUTHOR("Rodolfo Giometti "); +MODULE_DESCRIPTION("Silicon Labs C2 port Linux support for Duramar 2150"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c new file mode 100644 index 000000000..1c5b7aec1 --- /dev/null +++ b/drivers/misc/c2port/core.c @@ -0,0 +1,1003 @@ +/* + * Silicon Labs C2 port core Linux support + * + * Copyright (c) 2007 Rodolfo Giometti + * Copyright (c) 2007 Eurotech S.p.A. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define DRIVER_NAME "c2port" +#define DRIVER_VERSION "0.51.0" + +static DEFINE_SPINLOCK(c2port_idr_lock); +static DEFINE_IDR(c2port_idr); + +/* + * Local variables + */ + +static struct class *c2port_class; + +/* + * C2 registers & commands defines + */ + +/* C2 registers */ +#define C2PORT_DEVICEID 0x00 +#define C2PORT_REVID 0x01 +#define C2PORT_FPCTL 0x02 +#define C2PORT_FPDAT 0xB4 + +/* C2 interface commands */ +#define C2PORT_GET_VERSION 0x01 +#define C2PORT_DEVICE_ERASE 0x03 +#define C2PORT_BLOCK_READ 0x06 +#define C2PORT_BLOCK_WRITE 0x07 +#define C2PORT_PAGE_ERASE 0x08 + +/* C2 status return codes */ +#define C2PORT_INVALID_COMMAND 0x00 +#define C2PORT_COMMAND_FAILED 0x02 +#define C2PORT_COMMAND_OK 0x0d + +/* + * C2 port low level signal managements + */ + +static void c2port_reset(struct c2port_device *dev) +{ + struct c2port_ops *ops = dev->ops; + + /* To reset the device we have to keep clock line low for at least + * 20us. + */ + local_irq_disable(); + ops->c2ck_set(dev, 0); + udelay(25); + ops->c2ck_set(dev, 1); + local_irq_enable(); + + udelay(1); +} + +static void c2port_strobe_ck(struct c2port_device *dev) +{ + struct c2port_ops *ops = dev->ops; + + /* During hi-low-hi transition we disable local IRQs to avoid + * interructions since C2 port specification says that it must be + * shorter than 5us, otherwise the microcontroller may consider + * it as a reset signal! + */ + local_irq_disable(); + ops->c2ck_set(dev, 0); + udelay(1); + ops->c2ck_set(dev, 1); + local_irq_enable(); + + udelay(1); +} + +/* + * C2 port basic functions + */ + +static void c2port_write_ar(struct c2port_device *dev, u8 addr) +{ + struct c2port_ops *ops = dev->ops; + int i; + + /* START field */ + c2port_strobe_ck(dev); + + /* INS field (11b, LSB first) */ + ops->c2d_dir(dev, 0); + ops->c2d_set(dev, 1); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 1); + c2port_strobe_ck(dev); + + /* ADDRESS field */ + for (i = 0; i < 8; i++) { + ops->c2d_set(dev, addr & 0x01); + c2port_strobe_ck(dev); + + addr >>= 1; + } + + /* STOP field */ + ops->c2d_dir(dev, 1); + c2port_strobe_ck(dev); +} + +static int c2port_read_ar(struct c2port_device *dev, u8 *addr) +{ + struct c2port_ops *ops = dev->ops; + int i; + + /* START field */ + c2port_strobe_ck(dev); + + /* INS field (10b, LSB first) */ + ops->c2d_dir(dev, 0); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 1); + c2port_strobe_ck(dev); + + /* ADDRESS field */ + ops->c2d_dir(dev, 1); + *addr = 0; + for (i = 0; i < 8; i++) { + *addr >>= 1; /* shift in 8-bit ADDRESS field LSB first */ + + c2port_strobe_ck(dev); + if (ops->c2d_get(dev)) + *addr |= 0x80; + } + + /* STOP field */ + c2port_strobe_ck(dev); + + return 0; +} + +static int c2port_write_dr(struct c2port_device *dev, u8 data) +{ + struct c2port_ops *ops = dev->ops; + int timeout, i; + + /* START field */ + c2port_strobe_ck(dev); + + /* INS field (01b, LSB first) */ + ops->c2d_dir(dev, 0); + ops->c2d_set(dev, 1); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + + /* LENGTH field (00b, LSB first -> 1 byte) */ + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + + /* DATA field */ + for (i = 0; i < 8; i++) { + ops->c2d_set(dev, data & 0x01); + c2port_strobe_ck(dev); + + data >>= 1; + } + + /* WAIT field */ + ops->c2d_dir(dev, 1); + timeout = 20; + do { + c2port_strobe_ck(dev); + if (ops->c2d_get(dev)) + break; + + udelay(1); + } while (--timeout > 0); + if (timeout == 0) + return -EIO; + + /* STOP field */ + c2port_strobe_ck(dev); + + return 0; +} + +static int c2port_read_dr(struct c2port_device *dev, u8 *data) +{ + struct c2port_ops *ops = dev->ops; + int timeout, i; + + /* START field */ + c2port_strobe_ck(dev); + + /* INS field (00b, LSB first) */ + ops->c2d_dir(dev, 0); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + + /* LENGTH field (00b, LSB first -> 1 byte) */ + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + ops->c2d_set(dev, 0); + c2port_strobe_ck(dev); + + /* WAIT field */ + ops->c2d_dir(dev, 1); + timeout = 20; + do { + c2port_strobe_ck(dev); + if (ops->c2d_get(dev)) + break; + + udelay(1); + } while (--timeout > 0); + if (timeout == 0) + return -EIO; + + /* DATA field */ + *data = 0; + for (i = 0; i < 8; i++) { + *data >>= 1; /* shift in 8-bit DATA field LSB first */ + + c2port_strobe_ck(dev); + if (ops->c2d_get(dev)) + *data |= 0x80; + } + + /* STOP field */ + c2port_strobe_ck(dev); + + return 0; +} + +static int c2port_poll_in_busy(struct c2port_device *dev) +{ + u8 addr; + int ret, timeout = 20; + + do { + ret = (c2port_read_ar(dev, &addr)); + if (ret < 0) + return -EIO; + + if (!(addr & 0x02)) + break; + + udelay(1); + } while (--timeout > 0); + if (timeout == 0) + return -EIO; + + return 0; +} + +static int c2port_poll_out_ready(struct c2port_device *dev) +{ + u8 addr; + int ret, timeout = 10000; /* erase flash needs long time... */ + + do { + ret = (c2port_read_ar(dev, &addr)); + if (ret < 0) + return -EIO; + + if (addr & 0x01) + break; + + udelay(1); + } while (--timeout > 0); + if (timeout == 0) + return -EIO; + + return 0; +} + +/* + * sysfs methods + */ + +static ssize_t c2port_show_name(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", c2dev->name); +} +static DEVICE_ATTR(name, 0444, c2port_show_name, NULL); + +static ssize_t c2port_show_flash_blocks_num(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + struct c2port_ops *ops = c2dev->ops; + + return sprintf(buf, "%d\n", ops->blocks_num); +} +static DEVICE_ATTR(flash_blocks_num, 0444, c2port_show_flash_blocks_num, NULL); + +static ssize_t c2port_show_flash_block_size(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + struct c2port_ops *ops = c2dev->ops; + + return sprintf(buf, "%d\n", ops->block_size); +} +static DEVICE_ATTR(flash_block_size, 0444, c2port_show_flash_block_size, NULL); + +static ssize_t c2port_show_flash_size(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + struct c2port_ops *ops = c2dev->ops; + + return sprintf(buf, "%d\n", ops->blocks_num * ops->block_size); +} +static DEVICE_ATTR(flash_size, 0444, c2port_show_flash_size, NULL); + +static ssize_t access_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", c2dev->access); +} + +static ssize_t access_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + struct c2port_ops *ops = c2dev->ops; + int status, ret; + + ret = sscanf(buf, "%d", &status); + if (ret != 1) + return -EINVAL; + + mutex_lock(&c2dev->mutex); + + c2dev->access = !!status; + + /* If access is "on" clock should be HIGH _before_ setting the line + * as output and data line should be set as INPUT anyway */ + if (c2dev->access) + ops->c2ck_set(c2dev, 1); + ops->access(c2dev, c2dev->access); + if (c2dev->access) + ops->c2d_dir(c2dev, 1); + + mutex_unlock(&c2dev->mutex); + + return count; +} +static DEVICE_ATTR_RW(access); + +static ssize_t c2port_store_reset(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + + /* Check the device access status */ + if (!c2dev->access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + + c2port_reset(c2dev); + c2dev->flash_access = 0; + + mutex_unlock(&c2dev->mutex); + + return count; +} +static DEVICE_ATTR(reset, 0200, NULL, c2port_store_reset); + +static ssize_t __c2port_show_dev_id(struct c2port_device *dev, char *buf) +{ + u8 data; + int ret; + + /* Select DEVICEID register for C2 data register accesses */ + c2port_write_ar(dev, C2PORT_DEVICEID); + + /* Read and return the device ID register */ + ret = c2port_read_dr(dev, &data); + if (ret < 0) + return ret; + + return sprintf(buf, "%d\n", data); +} + +static ssize_t c2port_show_dev_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + ssize_t ret; + + /* Check the device access status */ + if (!c2dev->access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_show_dev_id(c2dev, buf); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) + dev_err(dev, "cannot read from %s\n", c2dev->name); + + return ret; +} +static DEVICE_ATTR(dev_id, 0444, c2port_show_dev_id, NULL); + +static ssize_t __c2port_show_rev_id(struct c2port_device *dev, char *buf) +{ + u8 data; + int ret; + + /* Select REVID register for C2 data register accesses */ + c2port_write_ar(dev, C2PORT_REVID); + + /* Read and return the revision ID register */ + ret = c2port_read_dr(dev, &data); + if (ret < 0) + return ret; + + return sprintf(buf, "%d\n", data); +} + +static ssize_t c2port_show_rev_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + ssize_t ret; + + /* Check the device access status */ + if (!c2dev->access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_show_rev_id(c2dev, buf); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) + dev_err(c2dev->dev, "cannot read from %s\n", c2dev->name); + + return ret; +} +static DEVICE_ATTR(rev_id, 0444, c2port_show_rev_id, NULL); + +static ssize_t c2port_show_flash_access(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", c2dev->flash_access); +} + +static ssize_t __c2port_store_flash_access(struct c2port_device *dev, + int status) +{ + int ret; + + /* Check the device access status */ + if (!dev->access) + return -EBUSY; + + dev->flash_access = !!status; + + /* If flash_access is off we have nothing to do... */ + if (dev->flash_access == 0) + return 0; + + /* Target the C2 flash programming control register for C2 data + * register access */ + c2port_write_ar(dev, C2PORT_FPCTL); + + /* Write the first keycode to enable C2 Flash programming */ + ret = c2port_write_dr(dev, 0x02); + if (ret < 0) + return ret; + + /* Write the second keycode to enable C2 Flash programming */ + ret = c2port_write_dr(dev, 0x01); + if (ret < 0) + return ret; + + /* Delay for at least 20ms to ensure the target is ready for + * C2 flash programming */ + mdelay(25); + + return 0; +} + +static ssize_t c2port_store_flash_access(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + int status; + ssize_t ret; + + ret = sscanf(buf, "%d", &status); + if (ret != 1) + return -EINVAL; + + mutex_lock(&c2dev->mutex); + ret = __c2port_store_flash_access(c2dev, status); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) { + dev_err(c2dev->dev, "cannot enable %s flash programming\n", + c2dev->name); + return ret; + } + + return count; +} +static DEVICE_ATTR(flash_access, 0644, c2port_show_flash_access, + c2port_store_flash_access); + +static ssize_t __c2port_write_flash_erase(struct c2port_device *dev) +{ + u8 status; + int ret; + + /* Target the C2 flash programming data register for C2 data register + * access. + */ + c2port_write_ar(dev, C2PORT_FPDAT); + + /* Send device erase command */ + c2port_write_dr(dev, C2PORT_DEVICE_ERASE); + + /* Wait for input acknowledge */ + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before starting FLASH access sequence */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Send a three-byte arming sequence to enable the device erase. + * If the sequence is not received correctly, the command will be + * ignored. + * Sequence is: 0xde, 0xad, 0xa5. + */ + c2port_write_dr(dev, 0xde); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + c2port_write_dr(dev, 0xad); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + c2port_write_dr(dev, 0xa5); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + return 0; +} + +static ssize_t c2port_store_flash_erase(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(dev); + int ret; + + /* Check the device and flash access status */ + if (!c2dev->access || !c2dev->flash_access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_write_flash_erase(c2dev); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) { + dev_err(c2dev->dev, "cannot erase %s flash\n", c2dev->name); + return ret; + } + + return count; +} +static DEVICE_ATTR(flash_erase, 0200, NULL, c2port_store_flash_erase); + +static ssize_t __c2port_read_flash_data(struct c2port_device *dev, + char *buffer, loff_t offset, size_t count) +{ + struct c2port_ops *ops = dev->ops; + u8 status, nread = 128; + int i, ret; + + /* Check for flash end */ + if (offset >= ops->block_size * ops->blocks_num) + return 0; + + if (ops->block_size * ops->blocks_num - offset < nread) + nread = ops->block_size * ops->blocks_num - offset; + if (count < nread) + nread = count; + if (nread == 0) + return nread; + + /* Target the C2 flash programming data register for C2 data register + * access */ + c2port_write_ar(dev, C2PORT_FPDAT); + + /* Send flash block read command */ + c2port_write_dr(dev, C2PORT_BLOCK_READ); + + /* Wait for input acknowledge */ + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before starting FLASH access sequence */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Send address high byte */ + c2port_write_dr(dev, offset >> 8); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Send address low byte */ + c2port_write_dr(dev, offset & 0x00ff); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Send address block size */ + c2port_write_dr(dev, nread); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before reading FLASH block */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Read flash block */ + for (i = 0; i < nread; i++) { + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + ret = c2port_read_dr(dev, buffer+i); + if (ret < 0) + return ret; + } + + return nread; +} + +static ssize_t c2port_read_flash_data(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buffer, loff_t offset, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(kobj_to_dev(kobj)); + ssize_t ret; + + /* Check the device and flash access status */ + if (!c2dev->access || !c2dev->flash_access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_read_flash_data(c2dev, buffer, offset, count); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) + dev_err(c2dev->dev, "cannot read %s flash\n", c2dev->name); + + return ret; +} + +static ssize_t __c2port_write_flash_data(struct c2port_device *dev, + char *buffer, loff_t offset, size_t count) +{ + struct c2port_ops *ops = dev->ops; + u8 status, nwrite = 128; + int i, ret; + + if (nwrite > count) + nwrite = count; + if (ops->block_size * ops->blocks_num - offset < nwrite) + nwrite = ops->block_size * ops->blocks_num - offset; + + /* Check for flash end */ + if (offset >= ops->block_size * ops->blocks_num) + return -EINVAL; + + /* Target the C2 flash programming data register for C2 data register + * access */ + c2port_write_ar(dev, C2PORT_FPDAT); + + /* Send flash block write command */ + c2port_write_dr(dev, C2PORT_BLOCK_WRITE); + + /* Wait for input acknowledge */ + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before starting FLASH access sequence */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Send address high byte */ + c2port_write_dr(dev, offset >> 8); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Send address low byte */ + c2port_write_dr(dev, offset & 0x00ff); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Send address block size */ + c2port_write_dr(dev, nwrite); + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + /* Should check status before writing FLASH block */ + + /* Wait for status information */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + /* Read flash programming interface status */ + ret = c2port_read_dr(dev, &status); + if (ret < 0) + return ret; + if (status != C2PORT_COMMAND_OK) + return -EBUSY; + + /* Write flash block */ + for (i = 0; i < nwrite; i++) { + ret = c2port_write_dr(dev, *(buffer+i)); + if (ret < 0) + return ret; + + ret = c2port_poll_in_busy(dev); + if (ret < 0) + return ret; + + } + + /* Wait for last flash write to complete */ + ret = c2port_poll_out_ready(dev); + if (ret < 0) + return ret; + + return nwrite; +} + +static ssize_t c2port_write_flash_data(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buffer, loff_t offset, size_t count) +{ + struct c2port_device *c2dev = dev_get_drvdata(kobj_to_dev(kobj)); + int ret; + + /* Check the device access status */ + if (!c2dev->access || !c2dev->flash_access) + return -EBUSY; + + mutex_lock(&c2dev->mutex); + ret = __c2port_write_flash_data(c2dev, buffer, offset, count); + mutex_unlock(&c2dev->mutex); + + if (ret < 0) + dev_err(c2dev->dev, "cannot write %s flash\n", c2dev->name); + + return ret; +} +/* size is computed at run-time */ +static BIN_ATTR(flash_data, 0644, c2port_read_flash_data, + c2port_write_flash_data, 0); + +/* + * Class attributes + */ +static struct attribute *c2port_attrs[] = { + &dev_attr_name.attr, + &dev_attr_flash_blocks_num.attr, + &dev_attr_flash_block_size.attr, + &dev_attr_flash_size.attr, + &dev_attr_access.attr, + &dev_attr_reset.attr, + &dev_attr_dev_id.attr, + &dev_attr_rev_id.attr, + &dev_attr_flash_access.attr, + &dev_attr_flash_erase.attr, + NULL, +}; + +static struct bin_attribute *c2port_bin_attrs[] = { + &bin_attr_flash_data, + NULL, +}; + +static const struct attribute_group c2port_group = { + .attrs = c2port_attrs, + .bin_attrs = c2port_bin_attrs, +}; + +static const struct attribute_group *c2port_groups[] = { + &c2port_group, + NULL, +}; + +/* + * Exported functions + */ + +struct c2port_device *c2port_device_register(char *name, + struct c2port_ops *ops, void *devdata) +{ + struct c2port_device *c2dev; + int ret; + + if (unlikely(!ops) || unlikely(!ops->access) || \ + unlikely(!ops->c2d_dir) || unlikely(!ops->c2ck_set) || \ + unlikely(!ops->c2d_get) || unlikely(!ops->c2d_set)) + return ERR_PTR(-EINVAL); + + c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL); + if (unlikely(!c2dev)) + return ERR_PTR(-ENOMEM); + + idr_preload(GFP_KERNEL); + spin_lock_irq(&c2port_idr_lock); + ret = idr_alloc(&c2port_idr, c2dev, 0, 0, GFP_NOWAIT); + spin_unlock_irq(&c2port_idr_lock); + idr_preload_end(); + + if (ret < 0) + goto error_idr_alloc; + c2dev->id = ret; + + bin_attr_flash_data.size = ops->blocks_num * ops->block_size; + + c2dev->dev = device_create(c2port_class, NULL, 0, c2dev, + "c2port%d", c2dev->id); + if (IS_ERR(c2dev->dev)) { + ret = PTR_ERR(c2dev->dev); + goto error_device_create; + } + dev_set_drvdata(c2dev->dev, c2dev); + + strncpy(c2dev->name, name, C2PORT_NAME_LEN); + c2dev->ops = ops; + mutex_init(&c2dev->mutex); + + /* By default C2 port access is off */ + c2dev->access = c2dev->flash_access = 0; + ops->access(c2dev, 0); + + dev_info(c2dev->dev, "C2 port %s added\n", name); + dev_info(c2dev->dev, "%s flash has %d blocks x %d bytes " + "(%d bytes total)\n", + name, ops->blocks_num, ops->block_size, + ops->blocks_num * ops->block_size); + + return c2dev; + +error_device_create: + spin_lock_irq(&c2port_idr_lock); + idr_remove(&c2port_idr, c2dev->id); + spin_unlock_irq(&c2port_idr_lock); + +error_idr_alloc: + kfree(c2dev); + + return ERR_PTR(ret); +} +EXPORT_SYMBOL(c2port_device_register); + +void c2port_device_unregister(struct c2port_device *c2dev) +{ + if (!c2dev) + return; + + dev_info(c2dev->dev, "C2 port %s removed\n", c2dev->name); + + spin_lock_irq(&c2port_idr_lock); + idr_remove(&c2port_idr, c2dev->id); + spin_unlock_irq(&c2port_idr_lock); + + device_destroy(c2port_class, c2dev->id); + + kfree(c2dev); +} +EXPORT_SYMBOL(c2port_device_unregister); + +/* + * Module stuff + */ + +static int __init c2port_init(void) +{ + printk(KERN_INFO "Silicon Labs C2 port support v. " DRIVER_VERSION + " - (C) 2007 Rodolfo Giometti\n"); + + c2port_class = class_create(THIS_MODULE, "c2port"); + if (IS_ERR(c2port_class)) { + printk(KERN_ERR "c2port: failed to allocate class\n"); + return PTR_ERR(c2port_class); + } + c2port_class->dev_groups = c2port_groups; + + return 0; +} + +static void __exit c2port_exit(void) +{ + class_destroy(c2port_class); +} + +module_init(c2port_init); +module_exit(c2port_exit); + +MODULE_AUTHOR("Rodolfo Giometti "); +MODULE_DESCRIPTION("Silicon Labs C2 port support v. " DRIVER_VERSION); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/cardreader/Kconfig b/drivers/misc/cardreader/Kconfig new file mode 100644 index 000000000..69e815e32 --- /dev/null +++ b/drivers/misc/cardreader/Kconfig @@ -0,0 +1,20 @@ +config MISC_RTSX_PCI + tristate "Realtek PCI-E card reader" + depends on PCI + select MFD_CORE + help + This supports for Realtek PCI-Express card reader including rts5209, + rts5227, rts522A, rts5229, rts5249, rts524A, rts525A, rtl8411, rts5260. + Realtek card readers support access to many types of memory cards, + such as Memory Stick, Memory Stick Pro, Secure Digital and + MultiMediaCard. + +config MISC_RTSX_USB + tristate "Realtek USB card reader" + depends on USB + select MFD_CORE + help + Select this option to get support for Realtek USB 2.0 card readers + including RTS5129, RTS5139, RTS5179 and RTS5170. + Realtek card reader supports access to many types of memory cards, + such as Memory Stick Pro, Secure Digital and MultiMediaCard. diff --git a/drivers/misc/cardreader/Makefile b/drivers/misc/cardreader/Makefile new file mode 100644 index 000000000..9fabfcc6f --- /dev/null +++ b/drivers/misc/cardreader/Makefile @@ -0,0 +1,4 @@ +rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o rts5260.o + +obj-$(CONFIG_MISC_RTSX_PCI) += rtsx_pci.o +obj-$(CONFIG_MISC_RTSX_USB) += rtsx_usb.o diff --git a/drivers/misc/cardreader/rtl8411.c b/drivers/misc/cardreader/rtl8411.c new file mode 100644 index 000000000..434fd070d --- /dev/null +++ b/drivers/misc/cardreader/rtl8411.c @@ -0,0 +1,508 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + * Roger Tseng + */ + +#include +#include +#include +#include + +#include "rtsx_pcr.h" + +static u8 rtl8411_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, SYS_VER, &val); + return val & 0x0F; +} + +static int rtl8411b_is_qfn48(struct rtsx_pcr *pcr) +{ + u8 val = 0; + + rtsx_pci_read_register(pcr, RTL8411B_PACKAGE_MODE, &val); + + if (val & 0x2) + return 1; + else + return 0; +} + +static void rtl8411_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg1 = 0; + u8 reg3 = 0; + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®1); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg1); + + if (!rtsx_vendor_setting_valid(reg1)) + return; + + pcr->aspm_en = rtsx_reg_to_aspm(reg1); + pcr->sd30_drive_sel_1v8 = + map_sd_drive(rtsx_reg_to_sd30_drive_sel_1v8(reg1)); + pcr->card_drive_sel &= 0x3F; + pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg1); + + rtsx_pci_read_config_byte(pcr, PCR_SETTING_REG3, ®3); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG3, reg3); + pcr->sd30_drive_sel_3v3 = rtl8411_reg_to_sd30_drive_sel_3v3(reg3); +} + +static void rtl8411b_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg = 0; + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + + if (!rtsx_vendor_setting_valid(reg)) + return; + + pcr->aspm_en = rtsx_reg_to_aspm(reg); + pcr->sd30_drive_sel_1v8 = + map_sd_drive(rtsx_reg_to_sd30_drive_sel_1v8(reg)); + pcr->sd30_drive_sel_3v3 = + map_sd_drive(rtl8411b_reg_to_sd30_drive_sel_3v3(reg)); +} + +static void rtl8411_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +{ + rtsx_pci_write_register(pcr, FPDCTL, 0x07, 0x07); +} + +static int rtl8411_extra_init_hw(struct rtsx_pcr *pcr) +{ + rtsx_pci_init_cmd(pcr); + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL, + 0xFF, pcr->sd30_drive_sel_3v3); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CD_PAD_CTL, + CD_DISABLE_MASK | CD_AUTO_DISABLE, CD_ENABLE); + + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rtl8411b_extra_init_hw(struct rtsx_pcr *pcr) +{ + rtsx_pci_init_cmd(pcr); + + if (rtl8411b_is_qfn48(pcr)) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + CARD_PULL_CTL3, 0xFF, 0xF5); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL, + 0xFF, pcr->sd30_drive_sel_3v3); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CD_PAD_CTL, + CD_DISABLE_MASK | CD_AUTO_DISABLE, CD_ENABLE); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, FUNC_FORCE_CTL, + 0x06, 0x00); + + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rtl8411_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00); +} + +static int rtl8411_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01); +} + +static int rtl8411_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D); +} + +static int rtl8411_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00); +} + +static int rtl8411_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err; + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CTL, + BPP_LDO_POWB, BPP_LDO_SUSPEND); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + /* To avoid too large in-rush current */ + udelay(150); + + err = rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_10_PERCENT_ON); + if (err < 0) + return err; + + udelay(150); + + err = rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_15_PERCENT_ON); + if (err < 0) + return err; + + udelay(150); + + err = rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_ON); + if (err < 0) + return err; + + return rtsx_pci_write_register(pcr, LDO_CTL, BPP_LDO_POWB, BPP_LDO_ON); +} + +static int rtl8411_card_power_off(struct rtsx_pcr *pcr, int card) +{ + int err; + + err = rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_OFF); + if (err < 0) + return err; + + return rtsx_pci_write_register(pcr, LDO_CTL, + BPP_LDO_POWB, BPP_LDO_SUSPEND); +} + +static int rtl8411_do_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage, + int bpp_tuned18_shift, int bpp_asic_1v8) +{ + u8 mask, val; + int err; + + mask = (BPP_REG_TUNED18 << bpp_tuned18_shift) | BPP_PAD_MASK; + if (voltage == OUTPUT_3V3) { + err = rtsx_pci_write_register(pcr, + SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_3v3); + if (err < 0) + return err; + val = (BPP_ASIC_3V3 << bpp_tuned18_shift) | BPP_PAD_3V3; + } else if (voltage == OUTPUT_1V8) { + err = rtsx_pci_write_register(pcr, + SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_1v8); + if (err < 0) + return err; + val = (bpp_asic_1v8 << bpp_tuned18_shift) | BPP_PAD_1V8; + } else { + return -EINVAL; + } + + return rtsx_pci_write_register(pcr, LDO_CTL, mask, val); +} + +static int rtl8411_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + return rtl8411_do_switch_output_voltage(pcr, voltage, + BPP_TUNED18_SHIFT_8411, BPP_ASIC_1V8); +} + +static int rtl8402_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + return rtl8411_do_switch_output_voltage(pcr, voltage, + BPP_TUNED18_SHIFT_8402, BPP_ASIC_2V0); +} + +static unsigned int rtl8411_cd_deglitch(struct rtsx_pcr *pcr) +{ + unsigned int card_exist; + + card_exist = rtsx_pci_readl(pcr, RTSX_BIPR); + card_exist &= CARD_EXIST; + if (!card_exist) { + /* Enable card CD */ + rtsx_pci_write_register(pcr, CD_PAD_CTL, + CD_DISABLE_MASK, CD_ENABLE); + /* Enable card interrupt */ + rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x00); + return 0; + } + + if (hweight32(card_exist) > 1) { + rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_5_PERCENT_ON); + msleep(100); + + card_exist = rtsx_pci_readl(pcr, RTSX_BIPR); + if (card_exist & MS_EXIST) + card_exist = MS_EXIST; + else if (card_exist & SD_EXIST) + card_exist = SD_EXIST; + else + card_exist = 0; + + rtsx_pci_write_register(pcr, CARD_PWR_CTL, + BPP_POWER_MASK, BPP_POWER_OFF); + + pcr_dbg(pcr, "After CD deglitch, card_exist = 0x%x\n", + card_exist); + } + + if (card_exist & MS_EXIST) { + /* Disable SD interrupt */ + rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x40); + rtsx_pci_write_register(pcr, CD_PAD_CTL, + CD_DISABLE_MASK, MS_CD_EN_ONLY); + } else if (card_exist & SD_EXIST) { + /* Disable MS interrupt */ + rtsx_pci_write_register(pcr, EFUSE_CONTENT, 0xe0, 0x80); + rtsx_pci_write_register(pcr, CD_PAD_CTL, + CD_DISABLE_MASK, SD_CD_EN_ONLY); + } + + return card_exist; +} + +static int rtl8411_conv_clk_and_div_n(int input, int dir) +{ + int output; + + if (dir == CLK_TO_DIV_N) + output = input * 4 / 5 - 2; + else + output = (input + 2) * 5 / 4; + + return output; +} + +static const struct pcr_ops rtl8411_pcr_ops = { + .fetch_vendor_settings = rtl8411_fetch_vendor_settings, + .extra_init_hw = rtl8411_extra_init_hw, + .optimize_phy = NULL, + .turn_on_led = rtl8411_turn_on_led, + .turn_off_led = rtl8411_turn_off_led, + .enable_auto_blink = rtl8411_enable_auto_blink, + .disable_auto_blink = rtl8411_disable_auto_blink, + .card_power_on = rtl8411_card_power_on, + .card_power_off = rtl8411_card_power_off, + .switch_output_voltage = rtl8411_switch_output_voltage, + .cd_deglitch = rtl8411_cd_deglitch, + .conv_clk_and_div_n = rtl8411_conv_clk_and_div_n, + .force_power_down = rtl8411_force_power_down, +}; + +static const struct pcr_ops rtl8402_pcr_ops = { + .fetch_vendor_settings = rtl8411_fetch_vendor_settings, + .extra_init_hw = rtl8411_extra_init_hw, + .optimize_phy = NULL, + .turn_on_led = rtl8411_turn_on_led, + .turn_off_led = rtl8411_turn_off_led, + .enable_auto_blink = rtl8411_enable_auto_blink, + .disable_auto_blink = rtl8411_disable_auto_blink, + .card_power_on = rtl8411_card_power_on, + .card_power_off = rtl8411_card_power_off, + .switch_output_voltage = rtl8402_switch_output_voltage, + .cd_deglitch = rtl8411_cd_deglitch, + .conv_clk_and_div_n = rtl8411_conv_clk_and_div_n, + .force_power_down = rtl8411_force_power_down, +}; + +static const struct pcr_ops rtl8411b_pcr_ops = { + .fetch_vendor_settings = rtl8411b_fetch_vendor_settings, + .extra_init_hw = rtl8411b_extra_init_hw, + .optimize_phy = NULL, + .turn_on_led = rtl8411_turn_on_led, + .turn_off_led = rtl8411_turn_off_led, + .enable_auto_blink = rtl8411_enable_auto_blink, + .disable_auto_blink = rtl8411_disable_auto_blink, + .card_power_on = rtl8411_card_power_on, + .card_power_off = rtl8411_card_power_off, + .switch_output_voltage = rtl8411_switch_output_voltage, + .cd_deglitch = rtl8411_cd_deglitch, + .conv_clk_and_div_n = rtl8411_conv_clk_and_div_n, + .force_power_down = rtl8411_force_power_down, +}; + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rtl8411_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xA9), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x09), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rtl8411_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rtl8411_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x05), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rtl8411_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x95), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04), + 0, +}; + +static const u32 rtl8411b_qfn64_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x09 | 0xD0), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09 | 0x50), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), + 0, +}; + +static const u32 rtl8411b_qfn48_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x69 | 0x90), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x08 | 0x11), + 0, +}; + +static const u32 rtl8411b_qfn64_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x05 | 0xD0), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09 | 0x50), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), + 0, +}; + +static const u32 rtl8411b_qfn48_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x65 | 0x90), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), + 0, +}; + +static const u32 rtl8411b_qfn64_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x05 | 0xD0), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x05 | 0x50), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), + 0, +}; + +static const u32 rtl8411b_qfn48_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x65 | 0x90), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), + 0, +}; + +static const u32 rtl8411b_qfn64_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x65), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x05 | 0xD0), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x09 | 0x50), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x05 | 0x50), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), + 0, +}; + +static const u32 rtl8411b_qfn48_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0x65 | 0x90), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x04 | 0x11), + 0, +}; + +static void rtl8411_init_common_params(struct rtsx_pcr *pcr) +{ + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 2; + pcr->flags = 0; + pcr->card_drive_sel = RTL8411_CARD_DRIVE_DEFAULT; + pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B; + pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D; + pcr->aspm_en = ASPM_L1_EN; + pcr->tx_initial_phase = SET_CLOCK_PHASE(23, 7, 14); + pcr->rx_initial_phase = SET_CLOCK_PHASE(4, 3, 10); + pcr->ic_version = rtl8411_get_ic_version(pcr); +} + +void rtl8411_init_params(struct rtsx_pcr *pcr) +{ + rtl8411_init_common_params(pcr); + pcr->ops = &rtl8411_pcr_ops; + set_pull_ctrl_tables(pcr, rtl8411); +} + +void rtl8411b_init_params(struct rtsx_pcr *pcr) +{ + rtl8411_init_common_params(pcr); + pcr->ops = &rtl8411b_pcr_ops; + if (rtl8411b_is_qfn48(pcr)) + set_pull_ctrl_tables(pcr, rtl8411b_qfn48); + else + set_pull_ctrl_tables(pcr, rtl8411b_qfn64); +} + +void rtl8402_init_params(struct rtsx_pcr *pcr) +{ + rtl8411_init_common_params(pcr); + pcr->ops = &rtl8402_pcr_ops; + set_pull_ctrl_tables(pcr, rtl8411); +} diff --git a/drivers/misc/cardreader/rts5209.c b/drivers/misc/cardreader/rts5209.c new file mode 100644 index 000000000..ce68c48d8 --- /dev/null +++ b/drivers/misc/cardreader/rts5209.c @@ -0,0 +1,277 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + */ + +#include +#include +#include + +#include "rtsx_pcr.h" + +static u8 rts5209_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + val = rtsx_pci_readb(pcr, 0x1C); + return val & 0x0F; +} + +static void rts5209_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg; + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + + if (rts5209_vendor_setting1_valid(reg)) { + if (rts5209_reg_check_ms_pmos(reg)) + pcr->flags |= PCR_MS_PMOS; + pcr->aspm_en = rts5209_reg_to_aspm(reg); + } + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); + + if (rts5209_vendor_setting2_valid(reg)) { + pcr->sd30_drive_sel_1v8 = + rts5209_reg_to_sd30_drive_sel_1v8(reg); + pcr->sd30_drive_sel_3v3 = + rts5209_reg_to_sd30_drive_sel_3v3(reg); + pcr->card_drive_sel = rts5209_reg_to_card_drive_sel(reg); + } +} + +static void rts5209_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +{ + rtsx_pci_write_register(pcr, FPDCTL, 0x07, 0x07); +} + +static int rts5209_extra_init_hw(struct rtsx_pcr *pcr) +{ + rtsx_pci_init_cmd(pcr); + + /* Turn off LED */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO, 0xFF, 0x03); + /* Reset ASPM state to default value */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0); + /* Force CLKREQ# PIN to drive 0 to request clock */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x08, 0x08); + /* Configure GPIO as output */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_GPIO_DIR, 0xFF, 0x03); + /* Configure driving */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL, + 0xFF, pcr->sd30_drive_sel_3v3); + + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5209_optimize_phy(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_phy_register(pcr, 0x00, 0xB966); +} + +static int rts5209_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x00); +} + +static int rts5209_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_GPIO, 0x01, 0x01); +} + +static int rts5209_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0xFF, 0x0D); +} + +static int rts5209_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, CARD_AUTO_BLINK, 0x08, 0x00); +} + +static int rts5209_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err; + u8 pwr_mask, partial_pwr_on, pwr_on; + + pwr_mask = SD_POWER_MASK; + partial_pwr_on = SD_PARTIAL_POWER_ON; + pwr_on = SD_POWER_ON; + + if ((pcr->flags & PCR_MS_PMOS) && (card == RTSX_MS_CARD)) { + pwr_mask = MS_POWER_MASK; + partial_pwr_on = MS_PARTIAL_POWER_ON; + pwr_on = MS_POWER_ON; + } + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + pwr_mask, partial_pwr_on); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x04); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + /* To avoid too large in-rush current */ + udelay(150); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, pwr_mask, pwr_on); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x00); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5209_card_power_off(struct rtsx_pcr *pcr, int card) +{ + u8 pwr_mask, pwr_off; + + pwr_mask = SD_POWER_MASK; + pwr_off = SD_POWER_OFF; + + if ((pcr->flags & PCR_MS_PMOS) && (card == RTSX_MS_CARD)) { + pwr_mask = MS_POWER_MASK; + pwr_off = MS_POWER_OFF; + } + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + pwr_mask | PMOS_STRG_MASK, pwr_off | PMOS_STRG_400mA); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x06); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5209_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + int err; + + if (voltage == OUTPUT_3V3) { + err = rtsx_pci_write_register(pcr, + SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_3v3); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24); + if (err < 0) + return err; + } else if (voltage == OUTPUT_1V8) { + err = rtsx_pci_write_register(pcr, + SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_1v8); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C40 | 0x24); + if (err < 0) + return err; + } else { + return -EINVAL; + } + + return 0; +} + +static const struct pcr_ops rts5209_pcr_ops = { + .fetch_vendor_settings = rts5209_fetch_vendor_settings, + .extra_init_hw = rts5209_extra_init_hw, + .optimize_phy = rts5209_optimize_phy, + .turn_on_led = rts5209_turn_on_led, + .turn_off_led = rts5209_turn_off_led, + .enable_auto_blink = rts5209_enable_auto_blink, + .disable_auto_blink = rts5209_disable_auto_blink, + .card_power_on = rts5209_card_power_on, + .card_power_off = rts5209_card_power_off, + .switch_output_voltage = rts5209_switch_output_voltage, + .cd_deglitch = NULL, + .conv_clk_and_div_n = NULL, + .force_power_down = rts5209_force_power_down, +}; + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5209_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5209_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5209_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5209_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +void rts5209_init_params(struct rtsx_pcr *pcr) +{ + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | + EXTRA_CAPS_SD_SDR104 | EXTRA_CAPS_MMC_8BIT; + pcr->num_slots = 2; + pcr->ops = &rts5209_pcr_ops; + + pcr->flags = 0; + pcr->card_drive_sel = RTS5209_CARD_DRIVE_DEFAULT; + pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B; + pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D; + pcr->aspm_en = ASPM_L1_EN; + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 16); + pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); + + pcr->ic_version = rts5209_get_ic_version(pcr); + pcr->sd_pull_ctl_enable_tbl = rts5209_sd_pull_ctl_enable_tbl; + pcr->sd_pull_ctl_disable_tbl = rts5209_sd_pull_ctl_disable_tbl; + pcr->ms_pull_ctl_enable_tbl = rts5209_ms_pull_ctl_enable_tbl; + pcr->ms_pull_ctl_disable_tbl = rts5209_ms_pull_ctl_disable_tbl; +} diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c new file mode 100644 index 000000000..d3ce9fe08 --- /dev/null +++ b/drivers/misc/cardreader/rts5227.c @@ -0,0 +1,380 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + * Roger Tseng + */ + +#include +#include +#include + +#include "rtsx_pcr.h" + +static u8 rts5227_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); + return val & 0x0F; +} + +static void rts5227_fill_driving(struct rtsx_pcr *pcr, u8 voltage) +{ + u8 driving_3v3[4][3] = { + {0x13, 0x13, 0x13}, + {0x96, 0x96, 0x96}, + {0x7F, 0x7F, 0x7F}, + {0x96, 0x96, 0x96}, + }; + u8 driving_1v8[4][3] = { + {0x99, 0x99, 0x99}, + {0xAA, 0xAA, 0xAA}, + {0xFE, 0xFE, 0xFE}, + {0xB3, 0xB3, 0xB3}, + }; + u8 (*driving)[3], drive_sel; + + if (voltage == OUTPUT_3V3) { + driving = driving_3v3; + drive_sel = pcr->sd30_drive_sel_3v3; + } else { + driving = driving_1v8; + drive_sel = pcr->sd30_drive_sel_1v8; + } + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL, + 0xFF, driving[drive_sel][0]); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL, + 0xFF, driving[drive_sel][1]); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL, + 0xFF, driving[drive_sel][2]); +} + +static void rts5227_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg; + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + + if (!rtsx_vendor_setting_valid(reg)) + return; + + pcr->aspm_en = rtsx_reg_to_aspm(reg); + pcr->sd30_drive_sel_1v8 = rtsx_reg_to_sd30_drive_sel_1v8(reg); + pcr->card_drive_sel &= 0x3F; + pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg); + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); + pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg); + if (rtsx_reg_check_reverse_socket(reg)) + pcr->flags |= PCR_REVERSE_SOCKET; +} + +static void rts5227_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +{ + /* Set relink_time to 0 */ + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, 0xFF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, 0xFF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3, 0x01, 0); + + if (pm_state == HOST_ENTER_S3) + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x10); + + rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03); +} + +static int rts5227_extra_init_hw(struct rtsx_pcr *pcr) +{ + u16 cap; + + rtsx_pci_init_cmd(pcr); + + /* Configure GPIO as output */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02); + /* Reset ASPM state to default value */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0); + /* Switch LDO3318 source from DV33 to card_3v3 */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01); + /* LED shine disabled, set initial shine cycle period */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02); + /* Configure LTR */ + pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &cap); + if (cap & PCI_EXP_DEVCTL2_LTR_EN) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LTR_CTL, 0xFF, 0xA3); + /* Configure OBFF */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OBFF_CFG, 0x03, 0x03); + /* Configure driving */ + rts5227_fill_driving(pcr, OUTPUT_3V3); + /* Configure force_clock_req */ + if (pcr->flags & PCR_REVERSE_SOCKET) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB8, 0xB8); + else + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB8, 0x88); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, pcr->reg_pm_ctrl3, 0x10, 0x00); + + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5227_optimize_phy(struct rtsx_pcr *pcr) +{ + int err; + + err = rtsx_pci_write_register(pcr, PM_CTRL3, D3_DELINK_MODE_EN, 0x00); + if (err < 0) + return err; + + /* Optimize RX sensitivity */ + return rtsx_pci_write_phy_register(pcr, 0x00, 0xBA42); +} + +static int rts5227_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02); +} + +static int rts5227_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00); +} + +static int rts5227_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08); +} + +static int rts5227_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00); +} + +static int rts5227_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err; + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_PARTIAL_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x02); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + /* To avoid too large in-rush current */ + udelay(150); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x06); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5227_card_power_off(struct rtsx_pcr *pcr, int card) +{ + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK | PMOS_STRG_MASK, + SD_POWER_OFF | PMOS_STRG_400mA); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0X00); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5227_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + int err; + + if (voltage == OUTPUT_3V3) { + err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24); + if (err < 0) + return err; + } else if (voltage == OUTPUT_1V8) { + err = rtsx_pci_write_phy_register(pcr, 0x11, 0x3C02); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C80 | 0x24); + if (err < 0) + return err; + } else { + return -EINVAL; + } + + /* set pad drive */ + rtsx_pci_init_cmd(pcr); + rts5227_fill_driving(pcr, voltage); + return rtsx_pci_send_cmd(pcr, 100); +} + +static const struct pcr_ops rts5227_pcr_ops = { + .fetch_vendor_settings = rts5227_fetch_vendor_settings, + .extra_init_hw = rts5227_extra_init_hw, + .optimize_phy = rts5227_optimize_phy, + .turn_on_led = rts5227_turn_on_led, + .turn_off_led = rts5227_turn_off_led, + .enable_auto_blink = rts5227_enable_auto_blink, + .disable_auto_blink = rts5227_disable_auto_blink, + .card_power_on = rts5227_card_power_on, + .card_power_off = rts5227_card_power_off, + .switch_output_voltage = rts5227_switch_output_voltage, + .cd_deglitch = NULL, + .conv_clk_and_div_n = NULL, + .force_power_down = rts5227_force_power_down, +}; + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5227_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5227_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5227_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5227_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +void rts5227_init_params(struct rtsx_pcr *pcr) +{ + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 2; + pcr->ops = &rts5227_pcr_ops; + + pcr->flags = 0; + pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT; + pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; + pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; + pcr->aspm_en = ASPM_L1_EN; + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 15); + pcr->rx_initial_phase = SET_CLOCK_PHASE(30, 7, 7); + + pcr->ic_version = rts5227_get_ic_version(pcr); + pcr->sd_pull_ctl_enable_tbl = rts5227_sd_pull_ctl_enable_tbl; + pcr->sd_pull_ctl_disable_tbl = rts5227_sd_pull_ctl_disable_tbl; + pcr->ms_pull_ctl_enable_tbl = rts5227_ms_pull_ctl_enable_tbl; + pcr->ms_pull_ctl_disable_tbl = rts5227_ms_pull_ctl_disable_tbl; + + pcr->reg_pm_ctrl3 = PM_CTRL3; +} + +static int rts522a_optimize_phy(struct rtsx_pcr *pcr) +{ + int err; + + err = rtsx_pci_write_register(pcr, RTS522A_PM_CTRL3, D3_DELINK_MODE_EN, + 0x00); + if (err < 0) + return err; + + if (is_version(pcr, 0x522A, IC_VER_A)) { + err = rtsx_pci_write_phy_register(pcr, PHY_RCR2, + PHY_RCR2_INIT_27S); + if (err) + return err; + + rtsx_pci_write_phy_register(pcr, PHY_RCR1, PHY_RCR1_INIT_27S); + rtsx_pci_write_phy_register(pcr, PHY_FLD0, PHY_FLD0_INIT_27S); + rtsx_pci_write_phy_register(pcr, PHY_FLD3, PHY_FLD3_INIT_27S); + rtsx_pci_write_phy_register(pcr, PHY_FLD4, PHY_FLD4_INIT_27S); + } + + return 0; +} + +static int rts522a_extra_init_hw(struct rtsx_pcr *pcr) +{ + rts5227_extra_init_hw(pcr); + + /* Power down OCP for power consumption */ + if (!pcr->card_exist) + rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN, + OC_POWER_DOWN); + + rtsx_pci_write_register(pcr, FUNC_FORCE_CTL, FUNC_FORCE_UPME_XMT_DBG, + FUNC_FORCE_UPME_XMT_DBG); + rtsx_pci_write_register(pcr, PCLK_CTL, 0x04, 0x04); + rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0); + rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, 0xFF, 0x11); + + return 0; +} + +/* rts522a operations mainly derived from rts5227, except phy/hw init setting. + */ +static const struct pcr_ops rts522a_pcr_ops = { + .fetch_vendor_settings = rts5227_fetch_vendor_settings, + .extra_init_hw = rts522a_extra_init_hw, + .optimize_phy = rts522a_optimize_phy, + .turn_on_led = rts5227_turn_on_led, + .turn_off_led = rts5227_turn_off_led, + .enable_auto_blink = rts5227_enable_auto_blink, + .disable_auto_blink = rts5227_disable_auto_blink, + .card_power_on = rts5227_card_power_on, + .card_power_off = rts5227_card_power_off, + .switch_output_voltage = rts5227_switch_output_voltage, + .cd_deglitch = NULL, + .conv_clk_and_div_n = NULL, + .force_power_down = rts5227_force_power_down, +}; + +void rts522a_init_params(struct rtsx_pcr *pcr) +{ + rts5227_init_params(pcr); + pcr->ops = &rts522a_pcr_ops; + pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 20, 11); + pcr->reg_pm_ctrl3 = RTS522A_PM_CTRL3; +} diff --git a/drivers/misc/cardreader/rts5229.c b/drivers/misc/cardreader/rts5229.c new file mode 100644 index 000000000..911926133 --- /dev/null +++ b/drivers/misc/cardreader/rts5229.c @@ -0,0 +1,273 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + */ + +#include +#include +#include + +#include "rtsx_pcr.h" + +static u8 rts5229_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); + return val & 0x0F; +} + +static void rts5229_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg; + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + + if (!rtsx_vendor_setting_valid(reg)) + return; + + pcr->aspm_en = rtsx_reg_to_aspm(reg); + pcr->sd30_drive_sel_1v8 = + map_sd_drive(rtsx_reg_to_sd30_drive_sel_1v8(reg)); + pcr->card_drive_sel &= 0x3F; + pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg); + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); + pcr->sd30_drive_sel_3v3 = + map_sd_drive(rtsx_reg_to_sd30_drive_sel_3v3(reg)); +} + +static void rts5229_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +{ + rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03); +} + +static int rts5229_extra_init_hw(struct rtsx_pcr *pcr) +{ + rtsx_pci_init_cmd(pcr); + + /* Configure GPIO as output */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02); + /* Reset ASPM state to default value */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0); + /* Force CLKREQ# PIN to drive 0 to request clock */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0x08, 0x08); + /* Switch LDO3318 source from DV33 to card_3v3 */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01); + /* LED shine disabled, set initial shine cycle period */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02); + /* Configure driving */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DRIVE_SEL, + 0xFF, pcr->sd30_drive_sel_3v3); + + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5229_optimize_phy(struct rtsx_pcr *pcr) +{ + /* Optimize RX sensitivity */ + return rtsx_pci_write_phy_register(pcr, 0x00, 0xBA42); +} + +static int rts5229_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02); +} + +static int rts5229_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00); +} + +static int rts5229_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08); +} + +static int rts5229_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00); +} + +static int rts5229_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err; + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_PARTIAL_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x02); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + /* To avoid too large in-rush current */ + udelay(150); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x06); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5229_card_power_off(struct rtsx_pcr *pcr, int card) +{ + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK | PMOS_STRG_MASK, + SD_POWER_OFF | PMOS_STRG_400mA); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x00); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts5229_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + int err; + + if (voltage == OUTPUT_3V3) { + err = rtsx_pci_write_register(pcr, + SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_3v3); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4FC0 | 0x24); + if (err < 0) + return err; + } else if (voltage == OUTPUT_1V8) { + err = rtsx_pci_write_register(pcr, + SD30_DRIVE_SEL, 0x07, pcr->sd30_drive_sel_1v8); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, 0x08, 0x4C40 | 0x24); + if (err < 0) + return err; + } else { + return -EINVAL; + } + + return 0; +} + +static const struct pcr_ops rts5229_pcr_ops = { + .fetch_vendor_settings = rts5229_fetch_vendor_settings, + .extra_init_hw = rts5229_extra_init_hw, + .optimize_phy = rts5229_optimize_phy, + .turn_on_led = rts5229_turn_on_led, + .turn_off_led = rts5229_turn_off_led, + .enable_auto_blink = rts5229_enable_auto_blink, + .disable_auto_blink = rts5229_disable_auto_blink, + .card_power_on = rts5229_card_power_on, + .card_power_off = rts5229_card_power_off, + .switch_output_voltage = rts5229_switch_output_voltage, + .cd_deglitch = NULL, + .conv_clk_and_div_n = NULL, + .force_power_down = rts5229_force_power_down, +}; + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5229_sd_pull_ctl_enable_tbl1[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + 0, +}; + +/* For RTS5229 version C */ +static const u32 rts5229_sd_pull_ctl_enable_tbl2[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD9), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5229_sd_pull_ctl_disable_tbl1[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + 0, +}; + +/* For RTS5229 version C */ +static const u32 rts5229_sd_pull_ctl_disable_tbl2[] = { + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE5), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5229_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5229_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +void rts5229_init_params(struct rtsx_pcr *pcr) +{ + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 2; + pcr->ops = &rts5229_pcr_ops; + + pcr->flags = 0; + pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT; + pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B; + pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D; + pcr->aspm_en = ASPM_L1_EN; + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 15); + pcr->rx_initial_phase = SET_CLOCK_PHASE(30, 6, 6); + + pcr->ic_version = rts5229_get_ic_version(pcr); + if (pcr->ic_version == IC_VER_C) { + pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl2; + pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl2; + } else { + pcr->sd_pull_ctl_enable_tbl = rts5229_sd_pull_ctl_enable_tbl1; + pcr->sd_pull_ctl_disable_tbl = rts5229_sd_pull_ctl_disable_tbl1; + } + pcr->ms_pull_ctl_enable_tbl = rts5229_ms_pull_ctl_enable_tbl; + pcr->ms_pull_ctl_disable_tbl = rts5229_ms_pull_ctl_disable_tbl; +} diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c new file mode 100644 index 000000000..45cd75041 --- /dev/null +++ b/drivers/misc/cardreader/rts5249.c @@ -0,0 +1,742 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + */ + +#include +#include +#include + +#include "rtsx_pcr.h" + +static u8 rts5249_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); + return val & 0x0F; +} + +static void rts5249_fill_driving(struct rtsx_pcr *pcr, u8 voltage) +{ + u8 driving_3v3[4][3] = { + {0x11, 0x11, 0x18}, + {0x55, 0x55, 0x5C}, + {0xFF, 0xFF, 0xFF}, + {0x96, 0x96, 0x96}, + }; + u8 driving_1v8[4][3] = { + {0xC4, 0xC4, 0xC4}, + {0x3C, 0x3C, 0x3C}, + {0xFE, 0xFE, 0xFE}, + {0xB3, 0xB3, 0xB3}, + }; + u8 (*driving)[3], drive_sel; + + if (voltage == OUTPUT_3V3) { + driving = driving_3v3; + drive_sel = pcr->sd30_drive_sel_3v3; + } else { + driving = driving_1v8; + drive_sel = pcr->sd30_drive_sel_1v8; + } + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL, + 0xFF, driving[drive_sel][0]); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL, + 0xFF, driving[drive_sel][1]); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL, + 0xFF, driving[drive_sel][2]); +} + +static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg; + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + + if (!rtsx_vendor_setting_valid(reg)) { + pcr_dbg(pcr, "skip fetch vendor setting\n"); + return; + } + + pcr->aspm_en = rtsx_reg_to_aspm(reg); + pcr->sd30_drive_sel_1v8 = rtsx_reg_to_sd30_drive_sel_1v8(reg); + pcr->card_drive_sel &= 0x3F; + pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg); + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); + pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg); + if (rtsx_reg_check_reverse_socket(reg)) + pcr->flags |= PCR_REVERSE_SOCKET; +} + +static void rtsx_base_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +{ + /* Set relink_time to 0 */ + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, 0xFF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, 0xFF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3, 0x01, 0); + + if (pm_state == HOST_ENTER_S3) + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, + D3_DELINK_MODE_EN, D3_DELINK_MODE_EN); + + rtsx_pci_write_register(pcr, FPDCTL, 0x03, 0x03); +} + +static void rts5249_init_from_cfg(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &(pcr->option); + u32 lval; + + if (CHK_PCI_PID(pcr, PID_524A)) + rtsx_pci_read_config_dword(pcr, + PCR_ASPM_SETTING_REG1, &lval); + else + rtsx_pci_read_config_dword(pcr, + PCR_ASPM_SETTING_REG2, &lval); + + if (lval & ASPM_L1_1_EN_MASK) + rtsx_set_dev_flag(pcr, ASPM_L1_1_EN); + + if (lval & ASPM_L1_2_EN_MASK) + rtsx_set_dev_flag(pcr, ASPM_L1_2_EN); + + if (lval & PM_L1_1_EN_MASK) + rtsx_set_dev_flag(pcr, PM_L1_1_EN); + + if (lval & PM_L1_2_EN_MASK) + rtsx_set_dev_flag(pcr, PM_L1_2_EN); + + if (option->ltr_en) { + u16 val; + + pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &val); + if (val & PCI_EXP_DEVCTL2_LTR_EN) { + option->ltr_enabled = true; + option->ltr_active = true; + rtsx_set_ltr_latency(pcr, option->ltr_active_latency); + } else { + option->ltr_enabled = false; + } + } +} + +static int rts5249_init_from_hw(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &(pcr->option); + + if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN + | PM_L1_1_EN | PM_L1_2_EN)) + option->force_clkreq_0 = false; + else + option->force_clkreq_0 = true; + + return 0; +} + +static int rts5249_extra_init_hw(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &(pcr->option); + + rts5249_init_from_cfg(pcr); + rts5249_init_from_hw(pcr); + + rtsx_pci_init_cmd(pcr); + + /* Rest L1SUB Config */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, L1SUB_CONFIG3, 0xFF, 0x00); + /* Configure GPIO as output */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, GPIO_CTL, 0x02, 0x02); + /* Reset ASPM state to default value */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, ASPM_FORCE_CTL, 0x3F, 0); + /* Switch LDO3318 source from DV33 to card_3v3 */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x00); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_PWR_SEL, 0x03, 0x01); + /* LED shine disabled, set initial shine cycle period */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OLT_LED_CTL, 0x0F, 0x02); + /* Configure driving */ + rts5249_fill_driving(pcr, OUTPUT_3V3); + if (pcr->flags & PCR_REVERSE_SOCKET) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0xB0); + else + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0x80); + + /* + * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced + * to drive low, and we forcibly request clock. + */ + if (option->force_clkreq_0) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); + else + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); + + return rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF); +} + +static int rts5249_optimize_phy(struct rtsx_pcr *pcr) +{ + int err; + + err = rtsx_pci_write_register(pcr, PM_CTRL3, D3_DELINK_MODE_EN, 0x00); + if (err < 0) + return err; + + err = rtsx_pci_write_phy_register(pcr, PHY_REV, + PHY_REV_RESV | PHY_REV_RXIDLE_LATCHED | + PHY_REV_P1_EN | PHY_REV_RXIDLE_EN | + PHY_REV_CLKREQ_TX_EN | PHY_REV_RX_PWST | + PHY_REV_CLKREQ_DT_1_0 | PHY_REV_STOP_CLKRD | + PHY_REV_STOP_CLKWR); + if (err < 0) + return err; + + msleep(1); + + err = rtsx_pci_write_phy_register(pcr, PHY_BPCR, + PHY_BPCR_IBRXSEL | PHY_BPCR_IBTXSEL | + PHY_BPCR_IB_FILTER | PHY_BPCR_CMIRROR_EN); + if (err < 0) + return err; + + err = rtsx_pci_write_phy_register(pcr, PHY_PCR, + PHY_PCR_FORCE_CODE | PHY_PCR_OOBS_CALI_50 | + PHY_PCR_OOBS_VCM_08 | PHY_PCR_OOBS_SEN_90 | + PHY_PCR_RSSI_EN | PHY_PCR_RX10K); + if (err < 0) + return err; + + err = rtsx_pci_write_phy_register(pcr, PHY_RCR2, + PHY_RCR2_EMPHASE_EN | PHY_RCR2_NADJR | + PHY_RCR2_CDR_SR_2 | PHY_RCR2_FREQSEL_12 | + PHY_RCR2_CDR_SC_12P | PHY_RCR2_CALIB_LATE); + if (err < 0) + return err; + + err = rtsx_pci_write_phy_register(pcr, PHY_FLD4, + PHY_FLD4_FLDEN_SEL | PHY_FLD4_REQ_REF | + PHY_FLD4_RXAMP_OFF | PHY_FLD4_REQ_ADDA | + PHY_FLD4_BER_COUNT | PHY_FLD4_BER_TIMER | + PHY_FLD4_BER_CHK_EN); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, PHY_RDR, + PHY_RDR_RXDSEL_1_9 | PHY_SSC_AUTO_PWD); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, PHY_RCR1, + PHY_RCR1_ADP_TIME_4 | PHY_RCR1_VCO_COARSE); + if (err < 0) + return err; + err = rtsx_pci_write_phy_register(pcr, PHY_FLD3, + PHY_FLD3_TIMER_4 | PHY_FLD3_TIMER_6 | + PHY_FLD3_RXDELINK); + if (err < 0) + return err; + + return rtsx_pci_write_phy_register(pcr, PHY_TUNE, + PHY_TUNE_TUNEREF_1_0 | PHY_TUNE_VBGSEL_1252 | + PHY_TUNE_SDBUS_33 | PHY_TUNE_TUNED18 | + PHY_TUNE_TUNED12 | PHY_TUNE_TUNEA12); +} + +static int rtsx_base_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x02); +} + +static int rtsx_base_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, GPIO_CTL, 0x02, 0x00); +} + +static int rtsx_base_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x08); +} + +static int rtsx_base_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x08, 0x00); +} + +static int rtsx_base_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err; + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_VCC_PARTIAL_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x02); + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + msleep(5); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_VCC_POWER_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x06); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rtsx_base_card_power_off(struct rtsx_pcr *pcr, int card) +{ + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_PWR_CTL, + SD_POWER_MASK, SD_POWER_OFF); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + LDO3318_PWR_MASK, 0x00); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rtsx_base_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + int err; + u16 append; + + switch (voltage) { + case OUTPUT_3V3: + err = rtsx_pci_update_phy(pcr, PHY_TUNE, PHY_TUNE_VOLTAGE_MASK, + PHY_TUNE_VOLTAGE_3V3); + if (err < 0) + return err; + break; + case OUTPUT_1V8: + append = PHY_TUNE_D18_1V8; + if (CHK_PCI_PID(pcr, 0x5249)) { + err = rtsx_pci_update_phy(pcr, PHY_BACR, + PHY_BACR_BASIC_MASK, 0); + if (err < 0) + return err; + append = PHY_TUNE_D18_1V7; + } + + err = rtsx_pci_update_phy(pcr, PHY_TUNE, PHY_TUNE_VOLTAGE_MASK, + append); + if (err < 0) + return err; + break; + default: + pcr_dbg(pcr, "unknown output voltage %d\n", voltage); + return -EINVAL; + } + + /* set pad drive */ + rtsx_pci_init_cmd(pcr); + rts5249_fill_driving(pcr, voltage); + return rtsx_pci_send_cmd(pcr, 100); +} + +static void rts5249_set_aspm(struct rtsx_pcr *pcr, bool enable) +{ + struct rtsx_cr_option *option = &pcr->option; + u8 val = 0; + + if (pcr->aspm_enabled == enable) + return; + + if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) { + if (enable) + val = pcr->aspm_en; + rtsx_pci_update_cfg_byte(pcr, + pcr->pcie_cap + PCI_EXP_LNKCTL, + ASPM_MASK_NEG, val); + } else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) { + u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0; + + if (!enable) + val = FORCE_ASPM_CTL0; + rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val); + } + + pcr->aspm_enabled = enable; +} + +static const struct pcr_ops rts5249_pcr_ops = { + .fetch_vendor_settings = rtsx_base_fetch_vendor_settings, + .extra_init_hw = rts5249_extra_init_hw, + .optimize_phy = rts5249_optimize_phy, + .turn_on_led = rtsx_base_turn_on_led, + .turn_off_led = rtsx_base_turn_off_led, + .enable_auto_blink = rtsx_base_enable_auto_blink, + .disable_auto_blink = rtsx_base_disable_auto_blink, + .card_power_on = rtsx_base_card_power_on, + .card_power_off = rtsx_base_card_power_off, + .switch_output_voltage = rtsx_base_switch_output_voltage, + .force_power_down = rtsx_base_force_power_down, + .set_aspm = rts5249_set_aspm, +}; + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5249_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0xAA), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5249_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5249_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5249_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +void rts5249_init_params(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &(pcr->option); + + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 2; + pcr->ops = &rts5249_pcr_ops; + + pcr->flags = 0; + pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT; + pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; + pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; + pcr->aspm_en = ASPM_L1_EN; + pcr->tx_initial_phase = SET_CLOCK_PHASE(1, 29, 16); + pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); + + pcr->ic_version = rts5249_get_ic_version(pcr); + pcr->sd_pull_ctl_enable_tbl = rts5249_sd_pull_ctl_enable_tbl; + pcr->sd_pull_ctl_disable_tbl = rts5249_sd_pull_ctl_disable_tbl; + pcr->ms_pull_ctl_enable_tbl = rts5249_ms_pull_ctl_enable_tbl; + pcr->ms_pull_ctl_disable_tbl = rts5249_ms_pull_ctl_disable_tbl; + + pcr->reg_pm_ctrl3 = PM_CTRL3; + + option->dev_flags = (LTR_L1SS_PWR_GATE_CHECK_CARD_EN + | LTR_L1SS_PWR_GATE_EN); + option->ltr_en = true; + + /* Init latency of active, idle, L1OFF to 60us, 300us, 3ms */ + option->ltr_active_latency = LTR_ACTIVE_LATENCY_DEF; + option->ltr_idle_latency = LTR_IDLE_LATENCY_DEF; + option->ltr_l1off_latency = LTR_L1OFF_LATENCY_DEF; + option->dev_aspm_mode = DEV_ASPM_DYNAMIC; + option->l1_snooze_delay = L1_SNOOZE_DELAY_DEF; + option->ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5249_DEF; + option->ltr_l1off_snooze_sspwrgate = + LTR_L1OFF_SNOOZE_SSPWRGATE_5249_DEF; +} + +static int rts524a_write_phy(struct rtsx_pcr *pcr, u8 addr, u16 val) +{ + addr = addr & 0x80 ? (addr & 0x7F) | 0x40 : addr; + + return __rtsx_pci_write_phy_register(pcr, addr, val); +} + +static int rts524a_read_phy(struct rtsx_pcr *pcr, u8 addr, u16 *val) +{ + addr = addr & 0x80 ? (addr & 0x7F) | 0x40 : addr; + + return __rtsx_pci_read_phy_register(pcr, addr, val); +} + +static int rts524a_optimize_phy(struct rtsx_pcr *pcr) +{ + int err; + + err = rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3, + D3_DELINK_MODE_EN, 0x00); + if (err < 0) + return err; + + rtsx_pci_write_phy_register(pcr, PHY_PCR, + PHY_PCR_FORCE_CODE | PHY_PCR_OOBS_CALI_50 | + PHY_PCR_OOBS_VCM_08 | PHY_PCR_OOBS_SEN_90 | PHY_PCR_RSSI_EN); + rtsx_pci_write_phy_register(pcr, PHY_SSCCR3, + PHY_SSCCR3_STEP_IN | PHY_SSCCR3_CHECK_DELAY); + + if (is_version(pcr, 0x524A, IC_VER_A)) { + rtsx_pci_write_phy_register(pcr, PHY_SSCCR3, + PHY_SSCCR3_STEP_IN | PHY_SSCCR3_CHECK_DELAY); + rtsx_pci_write_phy_register(pcr, PHY_SSCCR2, + PHY_SSCCR2_PLL_NCODE | PHY_SSCCR2_TIME0 | + PHY_SSCCR2_TIME2_WIDTH); + rtsx_pci_write_phy_register(pcr, PHY_ANA1A, + PHY_ANA1A_TXR_LOOPBACK | PHY_ANA1A_RXT_BIST | + PHY_ANA1A_TXR_BIST | PHY_ANA1A_REV); + rtsx_pci_write_phy_register(pcr, PHY_ANA1D, + PHY_ANA1D_DEBUG_ADDR); + rtsx_pci_write_phy_register(pcr, PHY_DIG1E, + PHY_DIG1E_REV | PHY_DIG1E_D0_X_D1 | + PHY_DIG1E_RX_ON_HOST | PHY_DIG1E_RCLK_REF_HOST | + PHY_DIG1E_RCLK_TX_EN_KEEP | + PHY_DIG1E_RCLK_TX_TERM_KEEP | + PHY_DIG1E_RCLK_RX_EIDLE_ON | PHY_DIG1E_TX_TERM_KEEP | + PHY_DIG1E_RX_TERM_KEEP | PHY_DIG1E_TX_EN_KEEP | + PHY_DIG1E_RX_EN_KEEP); + } + + rtsx_pci_write_phy_register(pcr, PHY_ANA08, + PHY_ANA08_RX_EQ_DCGAIN | PHY_ANA08_SEL_RX_EN | + PHY_ANA08_RX_EQ_VAL | PHY_ANA08_SCP | PHY_ANA08_SEL_IPI); + + return 0; +} + +static int rts524a_extra_init_hw(struct rtsx_pcr *pcr) +{ + rts5249_extra_init_hw(pcr); + + rtsx_pci_write_register(pcr, FUNC_FORCE_CTL, + FORCE_ASPM_L1_EN, FORCE_ASPM_L1_EN); + rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0); + rtsx_pci_write_register(pcr, LDO_VCC_CFG1, LDO_VCC_LMT_EN, + LDO_VCC_LMT_EN); + rtsx_pci_write_register(pcr, PCLK_CTL, PCLK_MODE_SEL, PCLK_MODE_SEL); + if (is_version(pcr, 0x524A, IC_VER_A)) { + rtsx_pci_write_register(pcr, LDO_DV18_CFG, + LDO_DV18_SR_MASK, LDO_DV18_SR_DF); + rtsx_pci_write_register(pcr, LDO_VCC_CFG1, + LDO_VCC_REF_TUNE_MASK, LDO_VCC_REF_1V2); + rtsx_pci_write_register(pcr, LDO_VIO_CFG, + LDO_VIO_REF_TUNE_MASK, LDO_VIO_REF_1V2); + rtsx_pci_write_register(pcr, LDO_VIO_CFG, + LDO_VIO_SR_MASK, LDO_VIO_SR_DF); + rtsx_pci_write_register(pcr, LDO_DV12S_CFG, + LDO_REF12_TUNE_MASK, LDO_REF12_TUNE_DF); + rtsx_pci_write_register(pcr, SD40_LDO_CTL1, + SD40_VIO_TUNE_MASK, SD40_VIO_TUNE_1V7); + } + + return 0; +} + +static void rts5250_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active) +{ + struct rtsx_cr_option *option = &(pcr->option); + + u32 interrupt = rtsx_pci_readl(pcr, RTSX_BIPR); + int card_exist = (interrupt & SD_EXIST) | (interrupt & MS_EXIST); + int aspm_L1_1, aspm_L1_2; + u8 val = 0; + + aspm_L1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN); + aspm_L1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN); + + if (active) { + /* Run, latency: 60us */ + if (aspm_L1_1) + val = option->ltr_l1off_snooze_sspwrgate; + } else { + /* L1off, latency: 300us */ + if (aspm_L1_2) + val = option->ltr_l1off_sspwrgate; + } + + if (aspm_L1_1 || aspm_L1_2) { + if (rtsx_check_dev_flag(pcr, + LTR_L1SS_PWR_GATE_CHECK_CARD_EN)) { + if (card_exist) + val &= ~L1OFF_MBIAS2_EN_5250; + else + val |= L1OFF_MBIAS2_EN_5250; + } + } + rtsx_set_l1off_sub(pcr, val); +} + +static const struct pcr_ops rts524a_pcr_ops = { + .write_phy = rts524a_write_phy, + .read_phy = rts524a_read_phy, + .fetch_vendor_settings = rtsx_base_fetch_vendor_settings, + .extra_init_hw = rts524a_extra_init_hw, + .optimize_phy = rts524a_optimize_phy, + .turn_on_led = rtsx_base_turn_on_led, + .turn_off_led = rtsx_base_turn_off_led, + .enable_auto_blink = rtsx_base_enable_auto_blink, + .disable_auto_blink = rtsx_base_disable_auto_blink, + .card_power_on = rtsx_base_card_power_on, + .card_power_off = rtsx_base_card_power_off, + .switch_output_voltage = rtsx_base_switch_output_voltage, + .force_power_down = rtsx_base_force_power_down, + .set_l1off_cfg_sub_d0 = rts5250_set_l1off_cfg_sub_d0, + .set_aspm = rts5249_set_aspm, +}; + +void rts524a_init_params(struct rtsx_pcr *pcr) +{ + rts5249_init_params(pcr); + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11); + pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF; + pcr->option.ltr_l1off_snooze_sspwrgate = + LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF; + + pcr->reg_pm_ctrl3 = RTS524A_PM_CTRL3; + pcr->ops = &rts524a_pcr_ops; +} + +static int rts525a_card_power_on(struct rtsx_pcr *pcr, int card) +{ + rtsx_pci_write_register(pcr, LDO_VCC_CFG1, + LDO_VCC_TUNE_MASK, LDO_VCC_3V3); + return rtsx_base_card_power_on(pcr, card); +} + +static int rts525a_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + switch (voltage) { + case OUTPUT_3V3: + rtsx_pci_write_register(pcr, LDO_CONFIG2, + LDO_D3318_MASK, LDO_D3318_33V); + rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8, 0); + break; + case OUTPUT_1V8: + rtsx_pci_write_register(pcr, LDO_CONFIG2, + LDO_D3318_MASK, LDO_D3318_18V); + rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8, + SD_IO_USING_1V8); + break; + default: + return -EINVAL; + } + + rtsx_pci_init_cmd(pcr); + rts5249_fill_driving(pcr, voltage); + return rtsx_pci_send_cmd(pcr, 100); +} + +static int rts525a_optimize_phy(struct rtsx_pcr *pcr) +{ + int err; + + err = rtsx_pci_write_register(pcr, RTS524A_PM_CTRL3, + D3_DELINK_MODE_EN, 0x00); + if (err < 0) + return err; + + rtsx_pci_write_phy_register(pcr, _PHY_FLD0, + _PHY_FLD0_CLK_REQ_20C | _PHY_FLD0_RX_IDLE_EN | + _PHY_FLD0_BIT_ERR_RSTN | _PHY_FLD0_BER_COUNT | + _PHY_FLD0_BER_TIMER | _PHY_FLD0_CHECK_EN); + + rtsx_pci_write_phy_register(pcr, _PHY_ANA03, + _PHY_ANA03_TIMER_MAX | _PHY_ANA03_OOBS_DEB_EN | + _PHY_CMU_DEBUG_EN); + + if (is_version(pcr, 0x525A, IC_VER_A)) + rtsx_pci_write_phy_register(pcr, _PHY_REV0, + _PHY_REV0_FILTER_OUT | _PHY_REV0_CDR_BYPASS_PFD | + _PHY_REV0_CDR_RX_IDLE_BYPASS); + + return 0; +} + +static int rts525a_extra_init_hw(struct rtsx_pcr *pcr) +{ + rts5249_extra_init_hw(pcr); + + rtsx_pci_write_register(pcr, PCLK_CTL, PCLK_MODE_SEL, PCLK_MODE_SEL); + if (is_version(pcr, 0x525A, IC_VER_A)) { + rtsx_pci_write_register(pcr, L1SUB_CONFIG2, + L1SUB_AUTO_CFG, L1SUB_AUTO_CFG); + rtsx_pci_write_register(pcr, RREF_CFG, + RREF_VBGSEL_MASK, RREF_VBGSEL_1V25); + rtsx_pci_write_register(pcr, LDO_VIO_CFG, + LDO_VIO_TUNE_MASK, LDO_VIO_1V7); + rtsx_pci_write_register(pcr, LDO_DV12S_CFG, + LDO_D12_TUNE_MASK, LDO_D12_TUNE_DF); + rtsx_pci_write_register(pcr, LDO_AV12S_CFG, + LDO_AV12S_TUNE_MASK, LDO_AV12S_TUNE_DF); + rtsx_pci_write_register(pcr, LDO_VCC_CFG0, + LDO_VCC_LMTVTH_MASK, LDO_VCC_LMTVTH_2A); + rtsx_pci_write_register(pcr, OOBS_CONFIG, + OOBS_AUTOK_DIS | OOBS_VAL_MASK, 0x89); + } + + return 0; +} + +static const struct pcr_ops rts525a_pcr_ops = { + .fetch_vendor_settings = rtsx_base_fetch_vendor_settings, + .extra_init_hw = rts525a_extra_init_hw, + .optimize_phy = rts525a_optimize_phy, + .turn_on_led = rtsx_base_turn_on_led, + .turn_off_led = rtsx_base_turn_off_led, + .enable_auto_blink = rtsx_base_enable_auto_blink, + .disable_auto_blink = rtsx_base_disable_auto_blink, + .card_power_on = rts525a_card_power_on, + .card_power_off = rtsx_base_card_power_off, + .switch_output_voltage = rts525a_switch_output_voltage, + .force_power_down = rtsx_base_force_power_down, + .set_l1off_cfg_sub_d0 = rts5250_set_l1off_cfg_sub_d0, + .set_aspm = rts5249_set_aspm, +}; + +void rts525a_init_params(struct rtsx_pcr *pcr) +{ + rts5249_init_params(pcr); + pcr->tx_initial_phase = SET_CLOCK_PHASE(25, 29, 11); + pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF; + pcr->option.ltr_l1off_snooze_sspwrgate = + LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF; + + pcr->reg_pm_ctrl3 = RTS524A_PM_CTRL3; + pcr->ops = &rts525a_pcr_ops; +} diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c new file mode 100644 index 000000000..958b19f7c --- /dev/null +++ b/drivers/misc/cardreader/rts5260.c @@ -0,0 +1,748 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2016-2017 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Steven FENG + * Rui FENG + * Wei WANG + */ + +#include +#include +#include + +#include "rts5260.h" +#include "rtsx_pcr.h" + +static u8 rts5260_get_ic_version(struct rtsx_pcr *pcr) +{ + u8 val; + + rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val); + return val & IC_VERSION_MASK; +} + +static void rts5260_fill_driving(struct rtsx_pcr *pcr, u8 voltage) +{ + u8 driving_3v3[6][3] = { + {0x94, 0x94, 0x94}, + {0x11, 0x11, 0x18}, + {0x55, 0x55, 0x5C}, + {0x94, 0x94, 0x94}, + {0x94, 0x94, 0x94}, + {0xFF, 0xFF, 0xFF}, + }; + u8 driving_1v8[6][3] = { + {0x9A, 0x89, 0x89}, + {0xC4, 0xC4, 0xC4}, + {0x3C, 0x3C, 0x3C}, + {0x9B, 0x99, 0x99}, + {0x9A, 0x89, 0x89}, + {0xFE, 0xFE, 0xFE}, + }; + u8 (*driving)[3], drive_sel; + + if (voltage == OUTPUT_3V3) { + driving = driving_3v3; + drive_sel = pcr->sd30_drive_sel_3v3; + } else { + driving = driving_1v8; + drive_sel = pcr->sd30_drive_sel_1v8; + } + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CLK_DRIVE_SEL, + 0xFF, driving[drive_sel][0]); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_CMD_DRIVE_SEL, + 0xFF, driving[drive_sel][1]); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD30_DAT_DRIVE_SEL, + 0xFF, driving[drive_sel][2]); +} + +static void rtsx_base_fetch_vendor_settings(struct rtsx_pcr *pcr) +{ + u32 reg; + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg); + + if (!rtsx_vendor_setting_valid(reg)) { + pcr_dbg(pcr, "skip fetch vendor setting\n"); + return; + } + + pcr->aspm_en = rtsx_reg_to_aspm(reg); + pcr->sd30_drive_sel_1v8 = rtsx_reg_to_sd30_drive_sel_1v8(reg); + pcr->card_drive_sel &= 0x3F; + pcr->card_drive_sel |= rtsx_reg_to_card_drive_sel(reg); + + rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, ®); + pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg); + pcr->sd30_drive_sel_3v3 = rtsx_reg_to_sd30_drive_sel_3v3(reg); + if (rtsx_reg_check_reverse_socket(reg)) + pcr->flags |= PCR_REVERSE_SOCKET; +} + +static void rtsx_base_force_power_down(struct rtsx_pcr *pcr, u8 pm_state) +{ + /* Set relink_time to 0 */ + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0); + rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3, + RELINK_TIME_MASK, 0); + + if (pm_state == HOST_ENTER_S3) + rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, + D3_DELINK_MODE_EN, D3_DELINK_MODE_EN); + + rtsx_pci_write_register(pcr, FPDCTL, ALL_POWER_DOWN, ALL_POWER_DOWN); +} + +static int rtsx_base_enable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, + LED_SHINE_MASK, LED_SHINE_EN); +} + +static int rtsx_base_disable_auto_blink(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, OLT_LED_CTL, + LED_SHINE_MASK, LED_SHINE_DISABLE); +} + +static int rts5260_turn_on_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, RTS5260_REG_GPIO_CTL0, + RTS5260_REG_GPIO_MASK, RTS5260_REG_GPIO_ON); +} + +static int rts5260_turn_off_led(struct rtsx_pcr *pcr) +{ + return rtsx_pci_write_register(pcr, RTS5260_REG_GPIO_CTL0, + RTS5260_REG_GPIO_MASK, RTS5260_REG_GPIO_OFF); +} + +/* SD Pull Control Enable: + * SD_DAT[3:0] ==> pull up + * SD_CD ==> pull up + * SD_WP ==> pull up + * SD_CMD ==> pull up + * SD_CLK ==> pull down + */ +static const u32 rts5260_sd_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0xAA), + 0, +}; + +/* SD Pull Control Disable: + * SD_DAT[3:0] ==> pull down + * SD_CD ==> pull up + * SD_WP ==> pull down + * SD_CMD ==> pull down + * SD_CLK ==> pull down + */ +static const u32 rts5260_sd_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL1, 0x66), + RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5), + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + 0, +}; + +/* MS Pull Control Enable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5260_ms_pull_ctl_enable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +/* MS Pull Control Disable: + * MS CD ==> pull up + * others ==> pull down + */ +static const u32 rts5260_ms_pull_ctl_disable_tbl[] = { + RTSX_REG_PAIR(CARD_PULL_CTL4, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL5, 0x55), + RTSX_REG_PAIR(CARD_PULL_CTL6, 0x15), + 0, +}; + +static int sd_set_sample_push_timing_sd30(struct rtsx_pcr *pcr) +{ + rtsx_pci_write_register(pcr, SD_CFG1, SD_MODE_SELECT_MASK + | SD_ASYNC_FIFO_NOT_RST, SD_30_MODE | SD_ASYNC_FIFO_NOT_RST); + rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, CLK_LOW_FREQ); + rtsx_pci_write_register(pcr, CARD_CLK_SOURCE, 0xFF, + CRC_VAR_CLK0 | SD30_FIX_CLK | SAMPLE_VAR_CLK1); + rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0); + + return 0; +} + +static int rts5260_card_power_on(struct rtsx_pcr *pcr, int card) +{ + int err = 0; + struct rtsx_cr_option *option = &pcr->option; + + if (option->ocp_en) + rtsx_pci_enable_ocp(pcr); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CONFIG2, + DV331812_VDD1, DV331812_VDD1); + err = rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF); + if (err < 0) + return err; + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_VCC_CFG0, + RTS5260_DVCC_TUNE_MASK, RTS5260_DVCC_33); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_VCC_CFG1, + LDO_POW_SDVDD1_MASK, LDO_POW_SDVDD1_ON); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CONFIG2, + DV331812_POWERON, DV331812_POWERON); + err = rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF); + + msleep(20); + + if (pcr->extra_caps & EXTRA_CAPS_SD_SDR50 || + pcr->extra_caps & EXTRA_CAPS_SD_SDR104) + sd_set_sample_push_timing_sd30(pcr); + + /* Initialize SD_CFG1 register */ + rtsx_pci_write_register(pcr, SD_CFG1, 0xFF, + SD_CLK_DIVIDE_128 | SD_20_MODE); + + rtsx_pci_write_register(pcr, SD_SAMPLE_POINT_CTL, + 0xFF, SD20_RX_POS_EDGE); + rtsx_pci_write_register(pcr, SD_PUSH_POINT_CTL, 0xFF, 0); + rtsx_pci_write_register(pcr, CARD_STOP, SD_STOP | SD_CLR_ERR, + SD_STOP | SD_CLR_ERR); + + /* Reset SD_CFG3 register */ + rtsx_pci_write_register(pcr, SD_CFG3, SD30_CLK_END_EN, 0); + rtsx_pci_write_register(pcr, REG_SD_STOP_SDCLK_CFG, + SD30_CLK_STOP_CFG_EN | SD30_CLK_STOP_CFG1 | + SD30_CLK_STOP_CFG0, 0); + + rtsx_pci_write_register(pcr, REG_PRE_RW_MODE, EN_INFINITE_MODE, 0); + + return err; +} + +static int rts5260_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + switch (voltage) { + case OUTPUT_3V3: + rtsx_pci_write_register(pcr, LDO_CONFIG2, + DV331812_VDD1, DV331812_VDD1); + rtsx_pci_write_register(pcr, LDO_DV18_CFG, + DV331812_MASK, DV331812_33); + rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8, 0); + break; + case OUTPUT_1V8: + rtsx_pci_write_register(pcr, LDO_CONFIG2, + DV331812_VDD1, DV331812_VDD1); + rtsx_pci_write_register(pcr, LDO_DV18_CFG, + DV331812_MASK, DV331812_17); + rtsx_pci_write_register(pcr, SD_PAD_CTL, SD_IO_USING_1V8, + SD_IO_USING_1V8); + break; + default: + return -EINVAL; + } + + /* set pad drive */ + rtsx_pci_init_cmd(pcr); + rts5260_fill_driving(pcr, voltage); + return rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF); +} + +static void rts5260_stop_cmd(struct rtsx_pcr *pcr) +{ + rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD); + rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA); + rtsx_pci_write_register(pcr, RTS5260_DMA_RST_CTL_0, + RTS5260_DMA_RST | RTS5260_ADMA3_RST, + RTS5260_DMA_RST | RTS5260_ADMA3_RST); + rtsx_pci_write_register(pcr, RBCTL, RB_FLUSH, RB_FLUSH); +} + +static void rts5260_card_before_power_off(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + rts5260_stop_cmd(pcr); + rts5260_switch_output_voltage(pcr, OUTPUT_3V3); + + if (option->ocp_en) + rtsx_pci_disable_ocp(pcr); +} + +static int rts5260_card_power_off(struct rtsx_pcr *pcr, int card) +{ + int err = 0; + + rts5260_card_before_power_off(pcr); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_VCC_CFG1, + LDO_POW_SDVDD1_MASK, LDO_POW_SDVDD1_OFF); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, LDO_CONFIG2, + DV331812_POWERON, DV331812_POWEROFF); + err = rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF); + + return err; +} + +static void rts5260_init_ocp(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + if (option->ocp_en) { + u8 mask, val; + + rtsx_pci_write_register(pcr, RTS5260_DVCC_CTRL, + RTS5260_DVCC_OCP_EN | + RTS5260_DVCC_OCP_CL_EN, + RTS5260_DVCC_OCP_EN | + RTS5260_DVCC_OCP_CL_EN); + rtsx_pci_write_register(pcr, RTS5260_DVIO_CTRL, + RTS5260_DVIO_OCP_EN | + RTS5260_DVIO_OCP_CL_EN, + RTS5260_DVIO_OCP_EN | + RTS5260_DVIO_OCP_CL_EN); + + rtsx_pci_write_register(pcr, RTS5260_DVCC_CTRL, + RTS5260_DVCC_OCP_THD_MASK, + option->sd_400mA_ocp_thd); + + rtsx_pci_write_register(pcr, RTS5260_DVIO_CTRL, + RTS5260_DVIO_OCP_THD_MASK, + RTS5260_DVIO_OCP_THD_350); + + rtsx_pci_write_register(pcr, RTS5260_DV331812_CFG, + RTS5260_DV331812_OCP_THD_MASK, + RTS5260_DV331812_OCP_THD_210); + + mask = SD_OCP_GLITCH_MASK | SDVIO_OCP_GLITCH_MASK; + val = pcr->hw_param.ocp_glitch; + rtsx_pci_write_register(pcr, REG_OCPGLITCH, mask, val); + + rtsx_pci_enable_ocp(pcr); + } else { + rtsx_pci_write_register(pcr, RTS5260_DVCC_CTRL, + RTS5260_DVCC_OCP_EN | + RTS5260_DVCC_OCP_CL_EN, 0); + rtsx_pci_write_register(pcr, RTS5260_DVIO_CTRL, + RTS5260_DVIO_OCP_EN | + RTS5260_DVIO_OCP_CL_EN, 0); + } +} + +static void rts5260_enable_ocp(struct rtsx_pcr *pcr) +{ + u8 val = 0; + + rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN, 0); + + val = SD_OCP_INT_EN | SD_DETECT_EN; + val |= SDVIO_OCP_INT_EN | SDVIO_DETECT_EN; + rtsx_pci_write_register(pcr, REG_OCPCTL, 0xFF, val); + rtsx_pci_write_register(pcr, REG_DV3318_OCPCTL, + DV3318_DETECT_EN | DV3318_OCP_INT_EN, + DV3318_DETECT_EN | DV3318_OCP_INT_EN); +} + +static void rts5260_disable_ocp(struct rtsx_pcr *pcr) +{ + u8 mask = 0; + + mask = SD_OCP_INT_EN | SD_DETECT_EN; + mask |= SDVIO_OCP_INT_EN | SDVIO_DETECT_EN; + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0); + rtsx_pci_write_register(pcr, REG_DV3318_OCPCTL, + DV3318_DETECT_EN | DV3318_OCP_INT_EN, 0); + + rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN, + OC_POWER_DOWN); +} + +static int rts5260_get_ocpstat(struct rtsx_pcr *pcr, u8 *val) +{ + return rtsx_pci_read_register(pcr, REG_OCPSTAT, val); +} + +static int rts5260_get_ocpstat2(struct rtsx_pcr *pcr, u8 *val) +{ + return rtsx_pci_read_register(pcr, REG_DV3318_OCPSTAT, val); +} + +static void rts5260_clear_ocpstat(struct rtsx_pcr *pcr) +{ + u8 mask = 0; + u8 val = 0; + + mask = SD_OCP_INT_CLR | SD_OC_CLR; + mask |= SDVIO_OCP_INT_CLR | SDVIO_OC_CLR; + val = SD_OCP_INT_CLR | SD_OC_CLR; + val |= SDVIO_OCP_INT_CLR | SDVIO_OC_CLR; + + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, val); + rtsx_pci_write_register(pcr, REG_DV3318_OCPCTL, + DV3318_OCP_INT_CLR | DV3318_OCP_CLR, + DV3318_OCP_INT_CLR | DV3318_OCP_CLR); + udelay(10); + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0); + rtsx_pci_write_register(pcr, REG_DV3318_OCPCTL, + DV3318_OCP_INT_CLR | DV3318_OCP_CLR, 0); +} + +static void rts5260_process_ocp(struct rtsx_pcr *pcr) +{ + if (!pcr->option.ocp_en) + return; + + rtsx_pci_get_ocpstat(pcr, &pcr->ocp_stat); + rts5260_get_ocpstat2(pcr, &pcr->ocp_stat2); + if (pcr->card_exist & SD_EXIST) + rtsx_sd_power_off_card3v3(pcr); + else if (pcr->card_exist & MS_EXIST) + rtsx_ms_power_off_card3v3(pcr); + + if (!(pcr->card_exist & MS_EXIST) && !(pcr->card_exist & SD_EXIST)) { + if ((pcr->ocp_stat & (SD_OC_NOW | SD_OC_EVER | + SDVIO_OC_NOW | SDVIO_OC_EVER)) || + (pcr->ocp_stat2 & (DV3318_OCP_NOW | DV3318_OCP_EVER))) + rtsx_pci_clear_ocpstat(pcr); + pcr->ocp_stat = 0; + pcr->ocp_stat2 = 0; + } + + if ((pcr->ocp_stat & (SD_OC_NOW | SD_OC_EVER | + SDVIO_OC_NOW | SDVIO_OC_EVER)) || + (pcr->ocp_stat2 & (DV3318_OCP_NOW | DV3318_OCP_EVER))) { + if (pcr->card_exist & SD_EXIST) + rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, 0); + else if (pcr->card_exist & MS_EXIST) + rtsx_pci_write_register(pcr, CARD_OE, MS_OUTPUT_EN, 0); + } +} + +static int rts5260_init_hw(struct rtsx_pcr *pcr) +{ + int err; + + rtsx_pci_init_ocp(pcr); + + rtsx_pci_init_cmd(pcr); + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, L1SUB_CONFIG1, + AUX_CLK_ACTIVE_SEL_MASK, MAC_CKSW_DONE); + /* Rest L1SUB Config */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, L1SUB_CONFIG3, 0xFF, 0x00); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PM_CLK_FORCE_CTL, + CLK_PM_EN, CLK_PM_EN); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWD_SUSPEND_EN, 0xFF, 0xFF); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PWR_GATE_CTRL, + PWR_GATE_EN, PWR_GATE_EN); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, REG_VREF, + PWD_SUSPND_EN, PWD_SUSPND_EN); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RBCTL, + U_AUTO_DMA_EN_MASK, U_AUTO_DMA_DISABLE); + + if (pcr->flags & PCR_REVERSE_SOCKET) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0xB0); + else + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, 0xB0, 0x80); + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, OBFF_CFG, + OBFF_EN_MASK, OBFF_DISABLE); + + err = rtsx_pci_send_cmd(pcr, CMD_TIMEOUT_DEF); + if (err < 0) + return err; + + return 0; +} + +static void rts5260_pwr_saving_setting(struct rtsx_pcr *pcr) +{ + int lss_l1_1, lss_l1_2; + + lss_l1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN) + | rtsx_check_dev_flag(pcr, PM_L1_1_EN); + lss_l1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN) + | rtsx_check_dev_flag(pcr, PM_L1_2_EN); + + if (lss_l1_2) { + pcr_dbg(pcr, "Set parameters for L1.2."); + rtsx_pci_write_register(pcr, PWR_GLOBAL_CTRL, + 0xFF, PCIE_L1_2_EN); + rtsx_pci_write_register(pcr, PWR_FE_CTL, + 0xFF, PCIE_L1_2_PD_FE_EN); + } else if (lss_l1_1) { + pcr_dbg(pcr, "Set parameters for L1.1."); + rtsx_pci_write_register(pcr, PWR_GLOBAL_CTRL, + 0xFF, PCIE_L1_1_EN); + rtsx_pci_write_register(pcr, PWR_FE_CTL, + 0xFF, PCIE_L1_1_PD_FE_EN); + } else { + pcr_dbg(pcr, "Set parameters for L1."); + rtsx_pci_write_register(pcr, PWR_GLOBAL_CTRL, + 0xFF, PCIE_L1_0_EN); + rtsx_pci_write_register(pcr, PWR_FE_CTL, + 0xFF, PCIE_L1_0_PD_FE_EN); + } + + rtsx_pci_write_register(pcr, CFG_L1_0_PCIE_DPHY_RET_VALUE, + 0xFF, CFG_L1_0_RET_VALUE_DEFAULT); + rtsx_pci_write_register(pcr, CFG_L1_0_PCIE_MAC_RET_VALUE, + 0xFF, CFG_L1_0_RET_VALUE_DEFAULT); + rtsx_pci_write_register(pcr, CFG_L1_0_CRC_SD30_RET_VALUE, + 0xFF, CFG_L1_0_RET_VALUE_DEFAULT); + rtsx_pci_write_register(pcr, CFG_L1_0_CRC_SD40_RET_VALUE, + 0xFF, CFG_L1_0_RET_VALUE_DEFAULT); + rtsx_pci_write_register(pcr, CFG_L1_0_SYS_RET_VALUE, + 0xFF, CFG_L1_0_RET_VALUE_DEFAULT); + /*Option cut APHY*/ + rtsx_pci_write_register(pcr, CFG_PCIE_APHY_OFF_0, + 0xFF, CFG_PCIE_APHY_OFF_0_DEFAULT); + rtsx_pci_write_register(pcr, CFG_PCIE_APHY_OFF_1, + 0xFF, CFG_PCIE_APHY_OFF_1_DEFAULT); + rtsx_pci_write_register(pcr, CFG_PCIE_APHY_OFF_2, + 0xFF, CFG_PCIE_APHY_OFF_2_DEFAULT); + rtsx_pci_write_register(pcr, CFG_PCIE_APHY_OFF_3, + 0xFF, CFG_PCIE_APHY_OFF_3_DEFAULT); + /*CDR DEC*/ + rtsx_pci_write_register(pcr, PWC_CDR, 0xFF, PWC_CDR_DEFAULT); + /*PWMPFM*/ + rtsx_pci_write_register(pcr, CFG_LP_FPWM_VALUE, + 0xFF, CFG_LP_FPWM_VALUE_DEFAULT); + /*No Power Saving WA*/ + rtsx_pci_write_register(pcr, CFG_L1_0_CRC_MISC_RET_VALUE, + 0xFF, CFG_L1_0_CRC_MISC_RET_VALUE_DEFAULT); +} + +static void rts5260_init_from_cfg(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + u32 lval; + + rtsx_pci_read_config_dword(pcr, PCR_ASPM_SETTING_5260, &lval); + + if (lval & ASPM_L1_1_EN_MASK) + rtsx_set_dev_flag(pcr, ASPM_L1_1_EN); + + if (lval & ASPM_L1_2_EN_MASK) + rtsx_set_dev_flag(pcr, ASPM_L1_2_EN); + + if (lval & PM_L1_1_EN_MASK) + rtsx_set_dev_flag(pcr, PM_L1_1_EN); + + if (lval & PM_L1_2_EN_MASK) + rtsx_set_dev_flag(pcr, PM_L1_2_EN); + + rts5260_pwr_saving_setting(pcr); + + if (option->ltr_en) { + u16 val; + + pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &val); + if (val & PCI_EXP_DEVCTL2_LTR_EN) { + option->ltr_enabled = true; + option->ltr_active = true; + rtsx_set_ltr_latency(pcr, option->ltr_active_latency); + } else { + option->ltr_enabled = false; + } + } + + if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN + | PM_L1_1_EN | PM_L1_2_EN)) + option->force_clkreq_0 = false; + else + option->force_clkreq_0 = true; +} + +static int rts5260_extra_init_hw(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + /* Set mcu_cnt to 7 to ensure data can be sampled properly */ + rtsx_pci_write_register(pcr, 0xFC03, 0x7F, 0x07); + rtsx_pci_write_register(pcr, SSC_DIV_N_0, 0xFF, 0x5D); + + rts5260_init_from_cfg(pcr); + + /* force no MDIO*/ + rtsx_pci_write_register(pcr, RTS5260_AUTOLOAD_CFG4, + 0xFF, RTS5260_MIMO_DISABLE); + /*Modify SDVCC Tune Default Parameters!*/ + rtsx_pci_write_register(pcr, LDO_VCC_CFG0, + RTS5260_DVCC_TUNE_MASK, RTS5260_DVCC_33); + + rtsx_pci_write_register(pcr, PCLK_CTL, PCLK_MODE_SEL, PCLK_MODE_SEL); + + rts5260_init_hw(pcr); + + /* + * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced + * to drive low, and we forcibly request clock. + */ + if (option->force_clkreq_0) + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW); + else + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG, + FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH); + + return 0; +} + +static void rts5260_set_aspm(struct rtsx_pcr *pcr, bool enable) +{ + struct rtsx_cr_option *option = &pcr->option; + u8 val = 0; + + if (pcr->aspm_enabled == enable) + return; + + if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) { + if (enable) + val = pcr->aspm_en; + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + ASPM_MASK_NEG, val); + } else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) { + u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0; + + if (!enable) + val = FORCE_ASPM_CTL0; + rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val); + } + + pcr->aspm_enabled = enable; +} + +static void rts5260_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active) +{ + struct rtsx_cr_option *option = &pcr->option; + u32 interrupt = rtsx_pci_readl(pcr, RTSX_BIPR); + int card_exist = (interrupt & SD_EXIST) | (interrupt & MS_EXIST); + int aspm_L1_1, aspm_L1_2; + u8 val = 0; + + aspm_L1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN); + aspm_L1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN); + + if (active) { + /* run, latency: 60us */ + if (aspm_L1_1) + val = option->ltr_l1off_snooze_sspwrgate; + } else { + /* l1off, latency: 300us */ + if (aspm_L1_2) + val = option->ltr_l1off_sspwrgate; + } + + if (aspm_L1_1 || aspm_L1_2) { + if (rtsx_check_dev_flag(pcr, + LTR_L1SS_PWR_GATE_CHECK_CARD_EN)) { + if (card_exist) + val &= ~L1OFF_MBIAS2_EN_5250; + else + val |= L1OFF_MBIAS2_EN_5250; + } + } + rtsx_set_l1off_sub(pcr, val); +} + +static const struct pcr_ops rts5260_pcr_ops = { + .fetch_vendor_settings = rtsx_base_fetch_vendor_settings, + .turn_on_led = rts5260_turn_on_led, + .turn_off_led = rts5260_turn_off_led, + .extra_init_hw = rts5260_extra_init_hw, + .enable_auto_blink = rtsx_base_enable_auto_blink, + .disable_auto_blink = rtsx_base_disable_auto_blink, + .card_power_on = rts5260_card_power_on, + .card_power_off = rts5260_card_power_off, + .switch_output_voltage = rts5260_switch_output_voltage, + .force_power_down = rtsx_base_force_power_down, + .stop_cmd = rts5260_stop_cmd, + .set_aspm = rts5260_set_aspm, + .set_l1off_cfg_sub_d0 = rts5260_set_l1off_cfg_sub_d0, + .enable_ocp = rts5260_enable_ocp, + .disable_ocp = rts5260_disable_ocp, + .init_ocp = rts5260_init_ocp, + .process_ocp = rts5260_process_ocp, + .get_ocpstat = rts5260_get_ocpstat, + .clear_ocpstat = rts5260_clear_ocpstat, +}; + +void rts5260_init_params(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + struct rtsx_hw_param *hw_param = &pcr->hw_param; + + pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104; + pcr->num_slots = 2; + + pcr->flags = 0; + pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT; + pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; + pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; + pcr->aspm_en = ASPM_L1_EN; + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11); + pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); + + pcr->ic_version = rts5260_get_ic_version(pcr); + pcr->sd_pull_ctl_enable_tbl = rts5260_sd_pull_ctl_enable_tbl; + pcr->sd_pull_ctl_disable_tbl = rts5260_sd_pull_ctl_disable_tbl; + pcr->ms_pull_ctl_enable_tbl = rts5260_ms_pull_ctl_enable_tbl; + pcr->ms_pull_ctl_disable_tbl = rts5260_ms_pull_ctl_disable_tbl; + + pcr->reg_pm_ctrl3 = RTS524A_PM_CTRL3; + + pcr->ops = &rts5260_pcr_ops; + + option->dev_flags = (LTR_L1SS_PWR_GATE_CHECK_CARD_EN + | LTR_L1SS_PWR_GATE_EN); + option->ltr_en = true; + + /* init latency of active, idle, L1OFF to 60us, 300us, 3ms */ + option->ltr_active_latency = LTR_ACTIVE_LATENCY_DEF; + option->ltr_idle_latency = LTR_IDLE_LATENCY_DEF; + option->ltr_l1off_latency = LTR_L1OFF_LATENCY_DEF; + option->dev_aspm_mode = DEV_ASPM_DYNAMIC; + option->l1_snooze_delay = L1_SNOOZE_DELAY_DEF; + option->ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF; + option->ltr_l1off_snooze_sspwrgate = + LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF; + + option->ocp_en = 1; + if (option->ocp_en) + hw_param->interrupt_en |= SD_OC_INT_EN; + hw_param->ocp_glitch = SD_OCP_GLITCH_10M | SDVIO_OCP_GLITCH_800U; + option->sd_400mA_ocp_thd = RTS5260_DVCC_OCP_THD_550; + option->sd_800mA_ocp_thd = RTS5260_DVCC_OCP_THD_970; +} diff --git a/drivers/misc/cardreader/rts5260.h b/drivers/misc/cardreader/rts5260.h new file mode 100644 index 000000000..53a1411c8 --- /dev/null +++ b/drivers/misc/cardreader/rts5260.h @@ -0,0 +1,45 @@ +#ifndef __RTS5260_H__ +#define __RTS5260_H__ + +#define RTS5260_DVCC_CTRL 0xFF73 +#define RTS5260_DVCC_OCP_EN (0x01 << 7) +#define RTS5260_DVCC_OCP_THD_MASK (0x07 << 4) +#define RTS5260_DVCC_POWERON (0x01 << 3) +#define RTS5260_DVCC_OCP_CL_EN (0x01 << 2) + +#define RTS5260_DVIO_CTRL 0xFF75 +#define RTS5260_DVIO_OCP_EN (0x01 << 7) +#define RTS5260_DVIO_OCP_THD_MASK (0x07 << 4) +#define RTS5260_DVIO_POWERON (0x01 << 3) +#define RTS5260_DVIO_OCP_CL_EN (0x01 << 2) + +#define RTS5260_DV331812_CFG 0xFF71 +#define RTS5260_DV331812_OCP_EN (0x01 << 7) +#define RTS5260_DV331812_OCP_THD_MASK (0x07 << 4) +#define RTS5260_DV331812_POWERON (0x01 << 3) +#define RTS5260_DV331812_SEL (0x01 << 2) +#define RTS5260_DV331812_VDD1 (0x01 << 2) +#define RTS5260_DV331812_VDD2 (0x00 << 2) + +#define RTS5260_DV331812_OCP_THD_120 (0x00 << 4) +#define RTS5260_DV331812_OCP_THD_140 (0x01 << 4) +#define RTS5260_DV331812_OCP_THD_160 (0x02 << 4) +#define RTS5260_DV331812_OCP_THD_180 (0x03 << 4) +#define RTS5260_DV331812_OCP_THD_210 (0x04 << 4) +#define RTS5260_DV331812_OCP_THD_240 (0x05 << 4) +#define RTS5260_DV331812_OCP_THD_270 (0x06 << 4) +#define RTS5260_DV331812_OCP_THD_300 (0x07 << 4) + +#define RTS5260_DVIO_OCP_THD_250 (0x00 << 4) +#define RTS5260_DVIO_OCP_THD_300 (0x01 << 4) +#define RTS5260_DVIO_OCP_THD_350 (0x02 << 4) +#define RTS5260_DVIO_OCP_THD_400 (0x03 << 4) +#define RTS5260_DVIO_OCP_THD_450 (0x04 << 4) +#define RTS5260_DVIO_OCP_THD_500 (0x05 << 4) +#define RTS5260_DVIO_OCP_THD_550 (0x06 << 4) +#define RTS5260_DVIO_OCP_THD_600 (0x07 << 4) + +#define RTS5260_DVCC_OCP_THD_550 (0x00 << 4) +#define RTS5260_DVCC_OCP_THD_970 (0x05 << 4) + +#endif diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c new file mode 100644 index 000000000..3eb3c237f --- /dev/null +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -0,0 +1,1698 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rtsx_pcr.h" + +static bool msi_en = true; +module_param(msi_en, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(msi_en, "Enable MSI"); + +static DEFINE_IDR(rtsx_pci_idr); +static DEFINE_SPINLOCK(rtsx_pci_lock); + +static struct mfd_cell rtsx_pcr_cells[] = { + [RTSX_SD_CARD] = { + .name = DRV_NAME_RTSX_PCI_SDMMC, + }, + [RTSX_MS_CARD] = { + .name = DRV_NAME_RTSX_PCI_MS, + }, +}; + +static const struct pci_device_id rtsx_pci_ids[] = { + { PCI_DEVICE(0x10EC, 0x5209), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5229), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5289), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5227), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x522A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5249), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5287), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5286), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x524A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x525A), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { PCI_DEVICE(0x10EC, 0x5260), PCI_CLASS_OTHERS << 16, 0xFF0000 }, + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, rtsx_pci_ids); + +static inline void rtsx_pci_enable_aspm(struct rtsx_pcr *pcr) +{ + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + 0xFC, pcr->aspm_en); +} + +static inline void rtsx_pci_disable_aspm(struct rtsx_pcr *pcr) +{ + rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL, + 0xFC, 0); +} + +static int rtsx_comm_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency) +{ + rtsx_pci_write_register(pcr, MSGTXDATA0, + MASK_8_BIT_DEF, (u8) (latency & 0xFF)); + rtsx_pci_write_register(pcr, MSGTXDATA1, + MASK_8_BIT_DEF, (u8)((latency >> 8) & 0xFF)); + rtsx_pci_write_register(pcr, MSGTXDATA2, + MASK_8_BIT_DEF, (u8)((latency >> 16) & 0xFF)); + rtsx_pci_write_register(pcr, MSGTXDATA3, + MASK_8_BIT_DEF, (u8)((latency >> 24) & 0xFF)); + rtsx_pci_write_register(pcr, LTR_CTL, LTR_TX_EN_MASK | + LTR_LATENCY_MODE_MASK, LTR_TX_EN_1 | LTR_LATENCY_MODE_SW); + + return 0; +} + +int rtsx_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency) +{ + if (pcr->ops->set_ltr_latency) + return pcr->ops->set_ltr_latency(pcr, latency); + else + return rtsx_comm_set_ltr_latency(pcr, latency); +} + +static void rtsx_comm_set_aspm(struct rtsx_pcr *pcr, bool enable) +{ + struct rtsx_cr_option *option = &pcr->option; + + if (pcr->aspm_enabled == enable) + return; + + if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) { + if (enable) + rtsx_pci_enable_aspm(pcr); + else + rtsx_pci_disable_aspm(pcr); + } else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) { + u8 mask = FORCE_ASPM_VAL_MASK; + u8 val = 0; + + if (enable) + val = pcr->aspm_en; + rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val); + } + + pcr->aspm_enabled = enable; +} + +static void rtsx_disable_aspm(struct rtsx_pcr *pcr) +{ + if (pcr->ops->set_aspm) + pcr->ops->set_aspm(pcr, false); + else + rtsx_comm_set_aspm(pcr, false); +} + +int rtsx_set_l1off_sub(struct rtsx_pcr *pcr, u8 val) +{ + rtsx_pci_write_register(pcr, L1SUB_CONFIG3, 0xFF, val); + + return 0; +} + +static void rtsx_set_l1off_sub_cfg_d0(struct rtsx_pcr *pcr, int active) +{ + if (pcr->ops->set_l1off_cfg_sub_d0) + pcr->ops->set_l1off_cfg_sub_d0(pcr, active); +} + +static void rtsx_comm_pm_full_on(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + rtsx_disable_aspm(pcr); + + /* Fixes DMA transfer timout issue after disabling ASPM on RTS5260 */ + msleep(1); + + if (option->ltr_enabled) + rtsx_set_ltr_latency(pcr, option->ltr_active_latency); + + if (rtsx_check_dev_flag(pcr, LTR_L1SS_PWR_GATE_EN)) + rtsx_set_l1off_sub_cfg_d0(pcr, 1); +} + +static void rtsx_pm_full_on(struct rtsx_pcr *pcr) +{ + if (pcr->ops->full_on) + pcr->ops->full_on(pcr); + else + rtsx_comm_pm_full_on(pcr); +} + +void rtsx_pci_start_run(struct rtsx_pcr *pcr) +{ + /* If pci device removed, don't queue idle work any more */ + if (pcr->remove_pci) + return; + + if (pcr->state != PDEV_STAT_RUN) { + pcr->state = PDEV_STAT_RUN; + if (pcr->ops->enable_auto_blink) + pcr->ops->enable_auto_blink(pcr); + rtsx_pm_full_on(pcr); + } + + mod_delayed_work(system_wq, &pcr->idle_work, msecs_to_jiffies(200)); +} +EXPORT_SYMBOL_GPL(rtsx_pci_start_run); + +int rtsx_pci_write_register(struct rtsx_pcr *pcr, u16 addr, u8 mask, u8 data) +{ + int i; + u32 val = HAIMR_WRITE_START; + + val |= (u32)(addr & 0x3FFF) << 16; + val |= (u32)mask << 8; + val |= (u32)data; + + rtsx_pci_writel(pcr, RTSX_HAIMR, val); + + for (i = 0; i < MAX_RW_REG_CNT; i++) { + val = rtsx_pci_readl(pcr, RTSX_HAIMR); + if ((val & HAIMR_TRANS_END) == 0) { + if (data != (u8)val) + return -EIO; + return 0; + } + } + + return -ETIMEDOUT; +} +EXPORT_SYMBOL_GPL(rtsx_pci_write_register); + +int rtsx_pci_read_register(struct rtsx_pcr *pcr, u16 addr, u8 *data) +{ + u32 val = HAIMR_READ_START; + int i; + + val |= (u32)(addr & 0x3FFF) << 16; + rtsx_pci_writel(pcr, RTSX_HAIMR, val); + + for (i = 0; i < MAX_RW_REG_CNT; i++) { + val = rtsx_pci_readl(pcr, RTSX_HAIMR); + if ((val & HAIMR_TRANS_END) == 0) + break; + } + + if (i >= MAX_RW_REG_CNT) + return -ETIMEDOUT; + + if (data) + *data = (u8)(val & 0xFF); + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_read_register); + +int __rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val) +{ + int err, i, finished = 0; + u8 tmp; + + rtsx_pci_init_cmd(pcr); + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYDATA0, 0xFF, (u8)val); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYDATA1, 0xFF, (u8)(val >> 8)); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYADDR, 0xFF, addr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYRWCTL, 0xFF, 0x81); + + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + for (i = 0; i < 100000; i++) { + err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp); + if (err < 0) + return err; + + if (!(tmp & 0x80)) { + finished = 1; + break; + } + } + + if (!finished) + return -ETIMEDOUT; + + return 0; +} + +int rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val) +{ + if (pcr->ops->write_phy) + return pcr->ops->write_phy(pcr, addr, val); + + return __rtsx_pci_write_phy_register(pcr, addr, val); +} +EXPORT_SYMBOL_GPL(rtsx_pci_write_phy_register); + +int __rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val) +{ + int err, i, finished = 0; + u16 data; + u8 *ptr, tmp; + + rtsx_pci_init_cmd(pcr); + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYADDR, 0xFF, addr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PHYRWCTL, 0xFF, 0x80); + + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + for (i = 0; i < 100000; i++) { + err = rtsx_pci_read_register(pcr, PHYRWCTL, &tmp); + if (err < 0) + return err; + + if (!(tmp & 0x80)) { + finished = 1; + break; + } + } + + if (!finished) + return -ETIMEDOUT; + + rtsx_pci_init_cmd(pcr); + + rtsx_pci_add_cmd(pcr, READ_REG_CMD, PHYDATA0, 0, 0); + rtsx_pci_add_cmd(pcr, READ_REG_CMD, PHYDATA1, 0, 0); + + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + ptr = rtsx_pci_get_cmd_data(pcr); + data = ((u16)ptr[1] << 8) | ptr[0]; + + if (val) + *val = data; + + return 0; +} + +int rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val) +{ + if (pcr->ops->read_phy) + return pcr->ops->read_phy(pcr, addr, val); + + return __rtsx_pci_read_phy_register(pcr, addr, val); +} +EXPORT_SYMBOL_GPL(rtsx_pci_read_phy_register); + +void rtsx_pci_stop_cmd(struct rtsx_pcr *pcr) +{ + if (pcr->ops->stop_cmd) + return pcr->ops->stop_cmd(pcr); + + rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD); + rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA); + + rtsx_pci_write_register(pcr, DMACTL, 0x80, 0x80); + rtsx_pci_write_register(pcr, RBCTL, 0x80, 0x80); +} +EXPORT_SYMBOL_GPL(rtsx_pci_stop_cmd); + +void rtsx_pci_add_cmd(struct rtsx_pcr *pcr, + u8 cmd_type, u16 reg_addr, u8 mask, u8 data) +{ + unsigned long flags; + u32 val = 0; + u32 *ptr = (u32 *)(pcr->host_cmds_ptr); + + val |= (u32)(cmd_type & 0x03) << 30; + val |= (u32)(reg_addr & 0x3FFF) << 16; + val |= (u32)mask << 8; + val |= (u32)data; + + spin_lock_irqsave(&pcr->lock, flags); + ptr += pcr->ci; + if (pcr->ci < (HOST_CMDS_BUF_LEN / 4)) { + put_unaligned_le32(val, ptr); + ptr++; + pcr->ci++; + } + spin_unlock_irqrestore(&pcr->lock, flags); +} +EXPORT_SYMBOL_GPL(rtsx_pci_add_cmd); + +void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr) +{ + u32 val = 1 << 31; + + rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr); + + val |= (u32)(pcr->ci * 4) & 0x00FFFFFF; + /* Hardware Auto Response */ + val |= 0x40000000; + rtsx_pci_writel(pcr, RTSX_HCBCTLR, val); +} +EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd_no_wait); + +int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout) +{ + struct completion trans_done; + u32 val = 1 << 31; + long timeleft; + unsigned long flags; + int err = 0; + + spin_lock_irqsave(&pcr->lock, flags); + + /* set up data structures for the wakeup system */ + pcr->done = &trans_done; + pcr->trans_result = TRANS_NOT_READY; + init_completion(&trans_done); + + rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr); + + val |= (u32)(pcr->ci * 4) & 0x00FFFFFF; + /* Hardware Auto Response */ + val |= 0x40000000; + rtsx_pci_writel(pcr, RTSX_HCBCTLR, val); + + spin_unlock_irqrestore(&pcr->lock, flags); + + /* Wait for TRANS_OK_INT */ + timeleft = wait_for_completion_interruptible_timeout( + &trans_done, msecs_to_jiffies(timeout)); + if (timeleft <= 0) { + pcr_dbg(pcr, "Timeout (%s %d)\n", __func__, __LINE__); + err = -ETIMEDOUT; + goto finish_send_cmd; + } + + spin_lock_irqsave(&pcr->lock, flags); + if (pcr->trans_result == TRANS_RESULT_FAIL) + err = -EINVAL; + else if (pcr->trans_result == TRANS_RESULT_OK) + err = 0; + else if (pcr->trans_result == TRANS_NO_DEVICE) + err = -ENODEV; + spin_unlock_irqrestore(&pcr->lock, flags); + +finish_send_cmd: + spin_lock_irqsave(&pcr->lock, flags); + pcr->done = NULL; + spin_unlock_irqrestore(&pcr->lock, flags); + + if ((err < 0) && (err != -ENODEV)) + rtsx_pci_stop_cmd(pcr); + + if (pcr->finish_me) + complete(pcr->finish_me); + + return err; +} +EXPORT_SYMBOL_GPL(rtsx_pci_send_cmd); + +static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr, + dma_addr_t addr, unsigned int len, int end) +{ + u64 *ptr = (u64 *)(pcr->host_sg_tbl_ptr) + pcr->sgi; + u64 val; + u8 option = RTSX_SG_VALID | RTSX_SG_TRANS_DATA; + + pcr_dbg(pcr, "DMA addr: 0x%x, Len: 0x%x\n", (unsigned int)addr, len); + + if (end) + option |= RTSX_SG_END; + val = ((u64)addr << 32) | ((u64)len << 12) | option; + + put_unaligned_le64(val, ptr); + pcr->sgi++; +} + +int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read, int timeout) +{ + int err = 0, count; + + pcr_dbg(pcr, "--> %s: num_sg = %d\n", __func__, num_sg); + count = rtsx_pci_dma_map_sg(pcr, sglist, num_sg, read); + if (count < 1) + return -EINVAL; + pcr_dbg(pcr, "DMA mapping count: %d\n", count); + + err = rtsx_pci_dma_transfer(pcr, sglist, count, read, timeout); + + rtsx_pci_dma_unmap_sg(pcr, sglist, num_sg, read); + + return err; +} +EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data); + +int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read) +{ + enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + + if (pcr->remove_pci) + return -EINVAL; + + if ((sglist == NULL) || (num_sg <= 0)) + return -EINVAL; + + return dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dir); +} +EXPORT_SYMBOL_GPL(rtsx_pci_dma_map_sg); + +void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read) +{ + enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE; + + dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dir); +} +EXPORT_SYMBOL_GPL(rtsx_pci_dma_unmap_sg); + +int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int count, bool read, int timeout) +{ + struct completion trans_done; + struct scatterlist *sg; + dma_addr_t addr; + long timeleft; + unsigned long flags; + unsigned int len; + int i, err = 0; + u32 val; + u8 dir = read ? DEVICE_TO_HOST : HOST_TO_DEVICE; + + if (pcr->remove_pci) + return -ENODEV; + + if ((sglist == NULL) || (count < 1)) + return -EINVAL; + + val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE; + pcr->sgi = 0; + for_each_sg(sglist, sg, count, i) { + addr = sg_dma_address(sg); + len = sg_dma_len(sg); + rtsx_pci_add_sg_tbl(pcr, addr, len, i == count - 1); + } + + spin_lock_irqsave(&pcr->lock, flags); + + pcr->done = &trans_done; + pcr->trans_result = TRANS_NOT_READY; + init_completion(&trans_done); + rtsx_pci_writel(pcr, RTSX_HDBAR, pcr->host_sg_tbl_addr); + rtsx_pci_writel(pcr, RTSX_HDBCTLR, val); + + spin_unlock_irqrestore(&pcr->lock, flags); + + timeleft = wait_for_completion_interruptible_timeout( + &trans_done, msecs_to_jiffies(timeout)); + if (timeleft <= 0) { + pcr_dbg(pcr, "Timeout (%s %d)\n", __func__, __LINE__); + err = -ETIMEDOUT; + goto out; + } + + spin_lock_irqsave(&pcr->lock, flags); + if (pcr->trans_result == TRANS_RESULT_FAIL) { + err = -EILSEQ; + if (pcr->dma_error_count < RTS_MAX_TIMES_FREQ_REDUCTION) + pcr->dma_error_count++; + } + + else if (pcr->trans_result == TRANS_NO_DEVICE) + err = -ENODEV; + spin_unlock_irqrestore(&pcr->lock, flags); + +out: + spin_lock_irqsave(&pcr->lock, flags); + pcr->done = NULL; + spin_unlock_irqrestore(&pcr->lock, flags); + + if ((err < 0) && (err != -ENODEV)) + rtsx_pci_stop_cmd(pcr); + + if (pcr->finish_me) + complete(pcr->finish_me); + + return err; +} +EXPORT_SYMBOL_GPL(rtsx_pci_dma_transfer); + +int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len) +{ + int err; + int i, j; + u16 reg; + u8 *ptr; + + if (buf_len > 512) + buf_len = 512; + + ptr = buf; + reg = PPBUF_BASE2; + for (i = 0; i < buf_len / 256; i++) { + rtsx_pci_init_cmd(pcr); + + for (j = 0; j < 256; j++) + rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0); + + err = rtsx_pci_send_cmd(pcr, 250); + if (err < 0) + return err; + + memcpy(ptr, rtsx_pci_get_cmd_data(pcr), 256); + ptr += 256; + } + + if (buf_len % 256) { + rtsx_pci_init_cmd(pcr); + + for (j = 0; j < buf_len % 256; j++) + rtsx_pci_add_cmd(pcr, READ_REG_CMD, reg++, 0, 0); + + err = rtsx_pci_send_cmd(pcr, 250); + if (err < 0) + return err; + } + + memcpy(ptr, rtsx_pci_get_cmd_data(pcr), buf_len % 256); + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_read_ppbuf); + +int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len) +{ + int err; + int i, j; + u16 reg; + u8 *ptr; + + if (buf_len > 512) + buf_len = 512; + + ptr = buf; + reg = PPBUF_BASE2; + for (i = 0; i < buf_len / 256; i++) { + rtsx_pci_init_cmd(pcr); + + for (j = 0; j < 256; j++) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + reg++, 0xFF, *ptr); + ptr++; + } + + err = rtsx_pci_send_cmd(pcr, 250); + if (err < 0) + return err; + } + + if (buf_len % 256) { + rtsx_pci_init_cmd(pcr); + + for (j = 0; j < buf_len % 256; j++) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + reg++, 0xFF, *ptr); + ptr++; + } + + err = rtsx_pci_send_cmd(pcr, 250); + if (err < 0) + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_write_ppbuf); + +static int rtsx_pci_set_pull_ctl(struct rtsx_pcr *pcr, const u32 *tbl) +{ + rtsx_pci_init_cmd(pcr); + + while (*tbl & 0xFFFF0000) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, + (u16)(*tbl >> 16), 0xFF, (u8)(*tbl)); + tbl++; + } + + return rtsx_pci_send_cmd(pcr, 100); +} + +int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card) +{ + const u32 *tbl; + + if (card == RTSX_SD_CARD) + tbl = pcr->sd_pull_ctl_enable_tbl; + else if (card == RTSX_MS_CARD) + tbl = pcr->ms_pull_ctl_enable_tbl; + else + return -EINVAL; + + return rtsx_pci_set_pull_ctl(pcr, tbl); +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_enable); + +int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card) +{ + const u32 *tbl; + + if (card == RTSX_SD_CARD) + tbl = pcr->sd_pull_ctl_disable_tbl; + else if (card == RTSX_MS_CARD) + tbl = pcr->ms_pull_ctl_disable_tbl; + else + return -EINVAL; + + + return rtsx_pci_set_pull_ctl(pcr, tbl); +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_disable); + +static void rtsx_pci_enable_bus_int(struct rtsx_pcr *pcr) +{ + pcr->bier = TRANS_OK_INT_EN | TRANS_FAIL_INT_EN | SD_INT_EN; + + if (pcr->num_slots > 1) + pcr->bier |= MS_INT_EN; + + /* Enable Bus Interrupt */ + rtsx_pci_writel(pcr, RTSX_BIER, pcr->bier); + + pcr_dbg(pcr, "RTSX_BIER: 0x%08x\n", pcr->bier); +} + +static inline u8 double_ssc_depth(u8 depth) +{ + return ((depth > 1) ? (depth - 1) : depth); +} + +static u8 revise_ssc_depth(u8 ssc_depth, u8 div) +{ + if (div > CLK_DIV_1) { + if (ssc_depth > (div - 1)) + ssc_depth -= (div - 1); + else + ssc_depth = SSC_DEPTH_4M; + } + + return ssc_depth; +} + +int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk) +{ + int err, clk; + u8 n, clk_divider, mcu_cnt, div; + static const u8 depth[] = { + [RTSX_SSC_DEPTH_4M] = SSC_DEPTH_4M, + [RTSX_SSC_DEPTH_2M] = SSC_DEPTH_2M, + [RTSX_SSC_DEPTH_1M] = SSC_DEPTH_1M, + [RTSX_SSC_DEPTH_500K] = SSC_DEPTH_500K, + [RTSX_SSC_DEPTH_250K] = SSC_DEPTH_250K, + }; + + if (initial_mode) { + /* We use 250k(around) here, in initial stage */ + clk_divider = SD_CLK_DIVIDE_128; + card_clock = 30000000; + } else { + clk_divider = SD_CLK_DIVIDE_0; + } + err = rtsx_pci_write_register(pcr, SD_CFG1, + SD_CLK_DIVIDE_MASK, clk_divider); + if (err < 0) + return err; + + /* Reduce card clock by 20MHz each time a DMA transfer error occurs */ + if (card_clock == UHS_SDR104_MAX_DTR && + pcr->dma_error_count && + PCI_PID(pcr) == RTS5227_DEVICE_ID) + card_clock = UHS_SDR104_MAX_DTR - + (pcr->dma_error_count * 20000000); + + card_clock /= 1000000; + pcr_dbg(pcr, "Switch card clock to %dMHz\n", card_clock); + + clk = card_clock; + if (!initial_mode && double_clk) + clk = card_clock * 2; + pcr_dbg(pcr, "Internal SSC clock: %dMHz (cur_clock = %d)\n", + clk, pcr->cur_clock); + + if (clk == pcr->cur_clock) + return 0; + + if (pcr->ops->conv_clk_and_div_n) + n = (u8)pcr->ops->conv_clk_and_div_n(clk, CLK_TO_DIV_N); + else + n = (u8)(clk - 2); + if ((clk <= 2) || (n > MAX_DIV_N_PCR)) + return -EINVAL; + + mcu_cnt = (u8)(125/clk + 3); + if (mcu_cnt > 15) + mcu_cnt = 15; + + /* Make sure that the SSC clock div_n is not less than MIN_DIV_N_PCR */ + div = CLK_DIV_1; + while ((n < MIN_DIV_N_PCR) && (div < CLK_DIV_8)) { + if (pcr->ops->conv_clk_and_div_n) { + int dbl_clk = pcr->ops->conv_clk_and_div_n(n, + DIV_N_TO_CLK) * 2; + n = (u8)pcr->ops->conv_clk_and_div_n(dbl_clk, + CLK_TO_DIV_N); + } else { + n = (n + 2) * 2 - 2; + } + div++; + } + pcr_dbg(pcr, "n = %d, div = %d\n", n, div); + + ssc_depth = depth[ssc_depth]; + if (double_clk) + ssc_depth = double_ssc_depth(ssc_depth); + + ssc_depth = revise_ssc_depth(ssc_depth, div); + pcr_dbg(pcr, "ssc_depth = %d\n", ssc_depth); + + rtsx_pci_init_cmd(pcr); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL, + CLK_LOW_FREQ, CLK_LOW_FREQ); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV, + 0xFF, (div << 4) | mcu_cnt); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, + SSC_DEPTH_MASK, ssc_depth); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB); + if (vpclk) { + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, 0); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, PHASE_NOT_RESET); + } + + err = rtsx_pci_send_cmd(pcr, 2000); + if (err < 0) + return err; + + /* Wait SSC clock stable */ + udelay(SSC_CLOCK_STABLE_WAIT); + err = rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0); + if (err < 0) + return err; + + pcr->cur_clock = clk; + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_switch_clock); + +int rtsx_pci_card_power_on(struct rtsx_pcr *pcr, int card) +{ + if (pcr->ops->card_power_on) + return pcr->ops->card_power_on(pcr, card); + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_power_on); + +int rtsx_pci_card_power_off(struct rtsx_pcr *pcr, int card) +{ + if (pcr->ops->card_power_off) + return pcr->ops->card_power_off(pcr, card); + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_power_off); + +int rtsx_pci_card_exclusive_check(struct rtsx_pcr *pcr, int card) +{ + static const unsigned int cd_mask[] = { + [RTSX_SD_CARD] = SD_EXIST, + [RTSX_MS_CARD] = MS_EXIST + }; + + if (!(pcr->flags & PCR_MS_PMOS)) { + /* When using single PMOS, accessing card is not permitted + * if the existing card is not the designated one. + */ + if (pcr->card_exist & (~cd_mask[card])) + return -EIO; + } + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_exclusive_check); + +int rtsx_pci_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage) +{ + if (pcr->ops->switch_output_voltage) + return pcr->ops->switch_output_voltage(pcr, voltage); + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_pci_switch_output_voltage); + +unsigned int rtsx_pci_card_exist(struct rtsx_pcr *pcr) +{ + unsigned int val; + + val = rtsx_pci_readl(pcr, RTSX_BIPR); + if (pcr->ops->cd_deglitch) + val = pcr->ops->cd_deglitch(pcr); + + return val; +} +EXPORT_SYMBOL_GPL(rtsx_pci_card_exist); + +void rtsx_pci_complete_unfinished_transfer(struct rtsx_pcr *pcr) +{ + struct completion finish; + + pcr->finish_me = &finish; + init_completion(&finish); + + if (pcr->done) + complete(pcr->done); + + if (!pcr->remove_pci) + rtsx_pci_stop_cmd(pcr); + + wait_for_completion_interruptible_timeout(&finish, + msecs_to_jiffies(2)); + pcr->finish_me = NULL; +} +EXPORT_SYMBOL_GPL(rtsx_pci_complete_unfinished_transfer); + +static void rtsx_pci_card_detect(struct work_struct *work) +{ + struct delayed_work *dwork; + struct rtsx_pcr *pcr; + unsigned long flags; + unsigned int card_detect = 0, card_inserted, card_removed; + u32 irq_status; + + dwork = to_delayed_work(work); + pcr = container_of(dwork, struct rtsx_pcr, carddet_work); + + pcr_dbg(pcr, "--> %s\n", __func__); + + mutex_lock(&pcr->pcr_mutex); + spin_lock_irqsave(&pcr->lock, flags); + + irq_status = rtsx_pci_readl(pcr, RTSX_BIPR); + pcr_dbg(pcr, "irq_status: 0x%08x\n", irq_status); + + irq_status &= CARD_EXIST; + card_inserted = pcr->card_inserted & irq_status; + card_removed = pcr->card_removed; + pcr->card_inserted = 0; + pcr->card_removed = 0; + + spin_unlock_irqrestore(&pcr->lock, flags); + + if (card_inserted || card_removed) { + pcr_dbg(pcr, "card_inserted: 0x%x, card_removed: 0x%x\n", + card_inserted, card_removed); + + if (pcr->ops->cd_deglitch) + card_inserted = pcr->ops->cd_deglitch(pcr); + + card_detect = card_inserted | card_removed; + + pcr->card_exist |= card_inserted; + pcr->card_exist &= ~card_removed; + } + + mutex_unlock(&pcr->pcr_mutex); + + if ((card_detect & SD_EXIST) && pcr->slots[RTSX_SD_CARD].card_event) + pcr->slots[RTSX_SD_CARD].card_event( + pcr->slots[RTSX_SD_CARD].p_dev); + if ((card_detect & MS_EXIST) && pcr->slots[RTSX_MS_CARD].card_event) + pcr->slots[RTSX_MS_CARD].card_event( + pcr->slots[RTSX_MS_CARD].p_dev); +} + +static void rtsx_pci_process_ocp(struct rtsx_pcr *pcr) +{ + if (pcr->ops->process_ocp) + pcr->ops->process_ocp(pcr); +} + +static int rtsx_pci_process_ocp_interrupt(struct rtsx_pcr *pcr) +{ + if (pcr->option.ocp_en) + rtsx_pci_process_ocp(pcr); + + return 0; +} + +static irqreturn_t rtsx_pci_isr(int irq, void *dev_id) +{ + struct rtsx_pcr *pcr = dev_id; + u32 int_reg; + + if (!pcr) + return IRQ_NONE; + + spin_lock(&pcr->lock); + + int_reg = rtsx_pci_readl(pcr, RTSX_BIPR); + /* Clear interrupt flag */ + rtsx_pci_writel(pcr, RTSX_BIPR, int_reg); + if ((int_reg & pcr->bier) == 0) { + spin_unlock(&pcr->lock); + return IRQ_NONE; + } + if (int_reg == 0xFFFFFFFF) { + spin_unlock(&pcr->lock); + return IRQ_HANDLED; + } + + int_reg &= (pcr->bier | 0x7FFFFF); + + if (int_reg & SD_OC_INT) + rtsx_pci_process_ocp_interrupt(pcr); + + if (int_reg & SD_INT) { + if (int_reg & SD_EXIST) { + pcr->card_inserted |= SD_EXIST; + } else { + pcr->card_removed |= SD_EXIST; + pcr->card_inserted &= ~SD_EXIST; + } + pcr->dma_error_count = 0; + } + + if (int_reg & MS_INT) { + if (int_reg & MS_EXIST) { + pcr->card_inserted |= MS_EXIST; + } else { + pcr->card_removed |= MS_EXIST; + pcr->card_inserted &= ~MS_EXIST; + } + } + + if (int_reg & (NEED_COMPLETE_INT | DELINK_INT)) { + if (int_reg & (TRANS_FAIL_INT | DELINK_INT)) { + pcr->trans_result = TRANS_RESULT_FAIL; + if (pcr->done) + complete(pcr->done); + } else if (int_reg & TRANS_OK_INT) { + pcr->trans_result = TRANS_RESULT_OK; + if (pcr->done) + complete(pcr->done); + } + } + + if (pcr->card_inserted || pcr->card_removed) + schedule_delayed_work(&pcr->carddet_work, + msecs_to_jiffies(200)); + + spin_unlock(&pcr->lock); + return IRQ_HANDLED; +} + +static int rtsx_pci_acquire_irq(struct rtsx_pcr *pcr) +{ + pcr_dbg(pcr, "%s: pcr->msi_en = %d, pci->irq = %d\n", + __func__, pcr->msi_en, pcr->pci->irq); + + if (request_irq(pcr->pci->irq, rtsx_pci_isr, + pcr->msi_en ? 0 : IRQF_SHARED, + DRV_NAME_RTSX_PCI, pcr)) { + dev_err(&(pcr->pci->dev), + "rtsx_sdmmc: unable to grab IRQ %d, disabling device\n", + pcr->pci->irq); + return -1; + } + + pcr->irq = pcr->pci->irq; + pci_intx(pcr->pci, !pcr->msi_en); + + return 0; +} + +static void rtsx_enable_aspm(struct rtsx_pcr *pcr) +{ + if (pcr->ops->set_aspm) + pcr->ops->set_aspm(pcr, true); + else + rtsx_comm_set_aspm(pcr, true); +} + +static void rtsx_comm_pm_power_saving(struct rtsx_pcr *pcr) +{ + struct rtsx_cr_option *option = &pcr->option; + + if (option->ltr_enabled) { + u32 latency = option->ltr_l1off_latency; + + if (rtsx_check_dev_flag(pcr, L1_SNOOZE_TEST_EN)) + mdelay(option->l1_snooze_delay); + + rtsx_set_ltr_latency(pcr, latency); + } + + if (rtsx_check_dev_flag(pcr, LTR_L1SS_PWR_GATE_EN)) + rtsx_set_l1off_sub_cfg_d0(pcr, 0); + + rtsx_enable_aspm(pcr); +} + +static void rtsx_pm_power_saving(struct rtsx_pcr *pcr) +{ + if (pcr->ops->power_saving) + pcr->ops->power_saving(pcr); + else + rtsx_comm_pm_power_saving(pcr); +} + +static void rtsx_pci_idle_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct rtsx_pcr *pcr = container_of(dwork, struct rtsx_pcr, idle_work); + + pcr_dbg(pcr, "--> %s\n", __func__); + + mutex_lock(&pcr->pcr_mutex); + + pcr->state = PDEV_STAT_IDLE; + + if (pcr->ops->disable_auto_blink) + pcr->ops->disable_auto_blink(pcr); + if (pcr->ops->turn_off_led) + pcr->ops->turn_off_led(pcr); + + rtsx_pm_power_saving(pcr); + + mutex_unlock(&pcr->pcr_mutex); +} + +#ifdef CONFIG_PM +static void rtsx_pci_power_off(struct rtsx_pcr *pcr, u8 pm_state) +{ + if (pcr->ops->turn_off_led) + pcr->ops->turn_off_led(pcr); + + rtsx_pci_writel(pcr, RTSX_BIER, 0); + pcr->bier = 0; + + rtsx_pci_write_register(pcr, PETXCFG, 0x08, 0x08); + rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, pm_state); + + if (pcr->ops->force_power_down) + pcr->ops->force_power_down(pcr, pm_state); +} +#endif + +void rtsx_pci_enable_ocp(struct rtsx_pcr *pcr) +{ + u8 val = SD_OCP_INT_EN | SD_DETECT_EN; + + if (pcr->ops->enable_ocp) + pcr->ops->enable_ocp(pcr); + else + rtsx_pci_write_register(pcr, REG_OCPCTL, 0xFF, val); + +} + +void rtsx_pci_disable_ocp(struct rtsx_pcr *pcr) +{ + u8 mask = SD_OCP_INT_EN | SD_DETECT_EN; + + if (pcr->ops->disable_ocp) + pcr->ops->disable_ocp(pcr); + else + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0); +} + +void rtsx_pci_init_ocp(struct rtsx_pcr *pcr) +{ + if (pcr->ops->init_ocp) { + pcr->ops->init_ocp(pcr); + } else { + struct rtsx_cr_option *option = &(pcr->option); + + if (option->ocp_en) { + u8 val = option->sd_400mA_ocp_thd; + + rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN, 0); + rtsx_pci_write_register(pcr, REG_OCPPARA1, + SD_OCP_TIME_MASK, SD_OCP_TIME_800); + rtsx_pci_write_register(pcr, REG_OCPPARA2, + SD_OCP_THD_MASK, val); + rtsx_pci_write_register(pcr, REG_OCPGLITCH, + SD_OCP_GLITCH_MASK, pcr->hw_param.ocp_glitch); + rtsx_pci_enable_ocp(pcr); + } else { + /* OC power down */ + rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN, + OC_POWER_DOWN); + } + } +} + +int rtsx_pci_get_ocpstat(struct rtsx_pcr *pcr, u8 *val) +{ + if (pcr->ops->get_ocpstat) + return pcr->ops->get_ocpstat(pcr, val); + else + return rtsx_pci_read_register(pcr, REG_OCPSTAT, val); +} + +void rtsx_pci_clear_ocpstat(struct rtsx_pcr *pcr) +{ + if (pcr->ops->clear_ocpstat) { + pcr->ops->clear_ocpstat(pcr); + } else { + u8 mask = SD_OCP_INT_CLR | SD_OC_CLR; + u8 val = SD_OCP_INT_CLR | SD_OC_CLR; + + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, val); + rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0); + } +} + +int rtsx_sd_power_off_card3v3(struct rtsx_pcr *pcr) +{ + rtsx_pci_write_register(pcr, CARD_CLK_EN, SD_CLK_EN | + MS_CLK_EN | SD40_CLK_EN, 0); + rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, 0); + + rtsx_pci_card_power_off(pcr, RTSX_SD_CARD); + + msleep(50); + + rtsx_pci_card_pull_ctl_disable(pcr, RTSX_SD_CARD); + + return 0; +} + +int rtsx_ms_power_off_card3v3(struct rtsx_pcr *pcr) +{ + rtsx_pci_write_register(pcr, CARD_CLK_EN, SD_CLK_EN | + MS_CLK_EN | SD40_CLK_EN, 0); + + rtsx_pci_card_pull_ctl_disable(pcr, RTSX_MS_CARD); + + rtsx_pci_write_register(pcr, CARD_OE, MS_OUTPUT_EN, 0); + rtsx_pci_card_power_off(pcr, RTSX_MS_CARD); + + return 0; +} + +static int rtsx_pci_init_hw(struct rtsx_pcr *pcr) +{ + int err; + + pcr->pcie_cap = pci_find_capability(pcr->pci, PCI_CAP_ID_EXP); + rtsx_pci_writel(pcr, RTSX_HCBAR, pcr->host_cmds_addr); + + rtsx_pci_enable_bus_int(pcr); + + /* Power on SSC */ + err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0); + if (err < 0) + return err; + + /* Wait SSC power stable */ + udelay(200); + + rtsx_pci_disable_aspm(pcr); + if (pcr->ops->optimize_phy) { + err = pcr->ops->optimize_phy(pcr); + if (err < 0) + return err; + } + + rtsx_pci_init_cmd(pcr); + + /* Set mcu_cnt to 7 to ensure data can be sampled properly */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV, 0x07, 0x07); + + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, HOST_SLEEP_STATE, 0x03, 0x00); + /* Disable card clock */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_CLK_EN, 0x1E, 0); + /* Reset delink mode */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x0A, 0); + /* Card driving select */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CARD_DRIVE_SEL, + 0xFF, pcr->card_drive_sel); + /* Enable SSC Clock */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, + 0xFF, SSC_8X_EN | SSC_SEL_4M); + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12); + /* Disable cd_pwr_save */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x16, 0x10); + /* Clear Link Ready Interrupt */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, IRQSTAT0, + LINK_RDY_INT, LINK_RDY_INT); + /* Enlarge the estimation window of PERST# glitch + * to reduce the chance of invalid card interrupt + */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PERST_GLITCH_WIDTH, 0xFF, 0x80); + /* Update RC oscillator to 400k + * bit[0] F_HIGH: for RC oscillator, Rst_value is 1'b1 + * 1: 2M 0: 400k + */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, RCCTL, 0x01, 0x00); + /* Set interrupt write clear + * bit 1: U_elbi_if_rd_clr_en + * 1: Enable ELBI interrupt[31:22] & [7:0] flag read clear + * 0: ELBI interrupt flag[31:22] & [7:0] only can be write clear + */ + rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, NFTS_TX_CTRL, 0x02, 0); + + err = rtsx_pci_send_cmd(pcr, 100); + if (err < 0) + return err; + + switch (PCI_PID(pcr)) { + case PID_5250: + case PID_524A: + case PID_525A: + case PID_5260: + rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, 1, 1); + break; + default: + break; + } + + /* Enable clk_request_n to enable clock power management */ + rtsx_pci_write_config_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL + 1, 1); + /* Enter L1 when host tx idle */ + rtsx_pci_write_config_byte(pcr, 0x70F, 0x5B); + + if (pcr->ops->extra_init_hw) { + err = pcr->ops->extra_init_hw(pcr); + if (err < 0) + return err; + } + + /* No CD interrupt if probing driver with card inserted. + * So we need to initialize pcr->card_exist here. + */ + if (pcr->ops->cd_deglitch) + pcr->card_exist = pcr->ops->cd_deglitch(pcr); + else + pcr->card_exist = rtsx_pci_readl(pcr, RTSX_BIPR) & CARD_EXIST; + + return 0; +} + +static int rtsx_pci_init_chip(struct rtsx_pcr *pcr) +{ + int err; + + spin_lock_init(&pcr->lock); + mutex_init(&pcr->pcr_mutex); + + switch (PCI_PID(pcr)) { + default: + case 0x5209: + rts5209_init_params(pcr); + break; + + case 0x5229: + rts5229_init_params(pcr); + break; + + case 0x5289: + rtl8411_init_params(pcr); + break; + + case 0x5227: + rts5227_init_params(pcr); + break; + + case 0x522A: + rts522a_init_params(pcr); + break; + + case 0x5249: + rts5249_init_params(pcr); + break; + + case 0x524A: + rts524a_init_params(pcr); + break; + + case 0x525A: + rts525a_init_params(pcr); + break; + + case 0x5287: + rtl8411b_init_params(pcr); + break; + + case 0x5286: + rtl8402_init_params(pcr); + break; + case 0x5260: + rts5260_init_params(pcr); + break; + } + + pcr_dbg(pcr, "PID: 0x%04x, IC version: 0x%02x\n", + PCI_PID(pcr), pcr->ic_version); + + pcr->slots = kcalloc(pcr->num_slots, sizeof(struct rtsx_slot), + GFP_KERNEL); + if (!pcr->slots) + return -ENOMEM; + + if (pcr->ops->fetch_vendor_settings) + pcr->ops->fetch_vendor_settings(pcr); + + pcr_dbg(pcr, "pcr->aspm_en = 0x%x\n", pcr->aspm_en); + pcr_dbg(pcr, "pcr->sd30_drive_sel_1v8 = 0x%x\n", + pcr->sd30_drive_sel_1v8); + pcr_dbg(pcr, "pcr->sd30_drive_sel_3v3 = 0x%x\n", + pcr->sd30_drive_sel_3v3); + pcr_dbg(pcr, "pcr->card_drive_sel = 0x%x\n", + pcr->card_drive_sel); + pcr_dbg(pcr, "pcr->flags = 0x%x\n", pcr->flags); + + pcr->state = PDEV_STAT_IDLE; + err = rtsx_pci_init_hw(pcr); + if (err < 0) { + kfree(pcr->slots); + return err; + } + + return 0; +} + +static int rtsx_pci_probe(struct pci_dev *pcidev, + const struct pci_device_id *id) +{ + struct rtsx_pcr *pcr; + struct pcr_handle *handle; + u32 base, len; + int ret, i, bar = 0; + + dev_dbg(&(pcidev->dev), + ": Realtek PCI-E Card Reader found at %s [%04x:%04x] (rev %x)\n", + pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device, + (int)pcidev->revision); + + ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32)); + if (ret < 0) + return ret; + + ret = pci_enable_device(pcidev); + if (ret) + return ret; + + ret = pci_request_regions(pcidev, DRV_NAME_RTSX_PCI); + if (ret) + goto disable; + + pcr = kzalloc(sizeof(*pcr), GFP_KERNEL); + if (!pcr) { + ret = -ENOMEM; + goto release_pci; + } + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) { + ret = -ENOMEM; + goto free_pcr; + } + handle->pcr = pcr; + + idr_preload(GFP_KERNEL); + spin_lock(&rtsx_pci_lock); + ret = idr_alloc(&rtsx_pci_idr, pcr, 0, 0, GFP_NOWAIT); + if (ret >= 0) + pcr->id = ret; + spin_unlock(&rtsx_pci_lock); + idr_preload_end(); + if (ret < 0) + goto free_handle; + + pcr->pci = pcidev; + dev_set_drvdata(&pcidev->dev, handle); + + if (CHK_PCI_PID(pcr, 0x525A)) + bar = 1; + len = pci_resource_len(pcidev, bar); + base = pci_resource_start(pcidev, bar); + pcr->remap_addr = ioremap_nocache(base, len); + if (!pcr->remap_addr) { + ret = -ENOMEM; + goto free_handle; + } + + pcr->rtsx_resv_buf = dma_alloc_coherent(&(pcidev->dev), + RTSX_RESV_BUF_LEN, &(pcr->rtsx_resv_buf_addr), + GFP_KERNEL); + if (pcr->rtsx_resv_buf == NULL) { + ret = -ENXIO; + goto unmap; + } + pcr->host_cmds_ptr = pcr->rtsx_resv_buf; + pcr->host_cmds_addr = pcr->rtsx_resv_buf_addr; + pcr->host_sg_tbl_ptr = pcr->rtsx_resv_buf + HOST_CMDS_BUF_LEN; + pcr->host_sg_tbl_addr = pcr->rtsx_resv_buf_addr + HOST_CMDS_BUF_LEN; + + pcr->card_inserted = 0; + pcr->card_removed = 0; + INIT_DELAYED_WORK(&pcr->carddet_work, rtsx_pci_card_detect); + INIT_DELAYED_WORK(&pcr->idle_work, rtsx_pci_idle_work); + + pcr->msi_en = msi_en; + if (pcr->msi_en) { + ret = pci_enable_msi(pcidev); + if (ret) + pcr->msi_en = false; + } + + ret = rtsx_pci_acquire_irq(pcr); + if (ret < 0) + goto disable_msi; + + pci_set_master(pcidev); + synchronize_irq(pcr->irq); + + ret = rtsx_pci_init_chip(pcr); + if (ret < 0) + goto disable_irq; + + for (i = 0; i < ARRAY_SIZE(rtsx_pcr_cells); i++) { + rtsx_pcr_cells[i].platform_data = handle; + rtsx_pcr_cells[i].pdata_size = sizeof(*handle); + } + ret = mfd_add_devices(&pcidev->dev, pcr->id, rtsx_pcr_cells, + ARRAY_SIZE(rtsx_pcr_cells), NULL, 0, NULL); + if (ret < 0) + goto free_slots; + + schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200)); + + return 0; + +free_slots: + kfree(pcr->slots); +disable_irq: + free_irq(pcr->irq, (void *)pcr); +disable_msi: + if (pcr->msi_en) + pci_disable_msi(pcr->pci); + dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN, + pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr); +unmap: + iounmap(pcr->remap_addr); +free_handle: + kfree(handle); +free_pcr: + kfree(pcr); +release_pci: + pci_release_regions(pcidev); +disable: + pci_disable_device(pcidev); + + return ret; +} + +static void rtsx_pci_remove(struct pci_dev *pcidev) +{ + struct pcr_handle *handle = pci_get_drvdata(pcidev); + struct rtsx_pcr *pcr = handle->pcr; + + pcr->remove_pci = true; + + /* Disable interrupts at the pcr level */ + spin_lock_irq(&pcr->lock); + rtsx_pci_writel(pcr, RTSX_BIER, 0); + pcr->bier = 0; + spin_unlock_irq(&pcr->lock); + + cancel_delayed_work_sync(&pcr->carddet_work); + cancel_delayed_work_sync(&pcr->idle_work); + + mfd_remove_devices(&pcidev->dev); + + dma_free_coherent(&(pcr->pci->dev), RTSX_RESV_BUF_LEN, + pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr); + free_irq(pcr->irq, (void *)pcr); + if (pcr->msi_en) + pci_disable_msi(pcr->pci); + iounmap(pcr->remap_addr); + + pci_release_regions(pcidev); + pci_disable_device(pcidev); + + spin_lock(&rtsx_pci_lock); + idr_remove(&rtsx_pci_idr, pcr->id); + spin_unlock(&rtsx_pci_lock); + + kfree(pcr->slots); + kfree(pcr); + kfree(handle); + + dev_dbg(&(pcidev->dev), + ": Realtek PCI-E Card Reader at %s [%04x:%04x] has been removed\n", + pci_name(pcidev), (int)pcidev->vendor, (int)pcidev->device); +} + +#ifdef CONFIG_PM + +static int rtsx_pci_suspend(struct pci_dev *pcidev, pm_message_t state) +{ + struct pcr_handle *handle; + struct rtsx_pcr *pcr; + + dev_dbg(&(pcidev->dev), "--> %s\n", __func__); + + handle = pci_get_drvdata(pcidev); + pcr = handle->pcr; + + cancel_delayed_work(&pcr->carddet_work); + cancel_delayed_work(&pcr->idle_work); + + mutex_lock(&pcr->pcr_mutex); + + rtsx_pci_power_off(pcr, HOST_ENTER_S3); + + pci_save_state(pcidev); + pci_enable_wake(pcidev, pci_choose_state(pcidev, state), 0); + pci_disable_device(pcidev); + pci_set_power_state(pcidev, pci_choose_state(pcidev, state)); + + mutex_unlock(&pcr->pcr_mutex); + return 0; +} + +static int rtsx_pci_resume(struct pci_dev *pcidev) +{ + struct pcr_handle *handle; + struct rtsx_pcr *pcr; + int ret = 0; + + dev_dbg(&(pcidev->dev), "--> %s\n", __func__); + + handle = pci_get_drvdata(pcidev); + pcr = handle->pcr; + + mutex_lock(&pcr->pcr_mutex); + + pci_set_power_state(pcidev, PCI_D0); + pci_restore_state(pcidev); + ret = pci_enable_device(pcidev); + if (ret) + goto out; + pci_set_master(pcidev); + + ret = rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00); + if (ret) + goto out; + + ret = rtsx_pci_init_hw(pcr); + if (ret) + goto out; + + schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200)); + +out: + mutex_unlock(&pcr->pcr_mutex); + return ret; +} + +static void rtsx_pci_shutdown(struct pci_dev *pcidev) +{ + struct pcr_handle *handle; + struct rtsx_pcr *pcr; + + dev_dbg(&(pcidev->dev), "--> %s\n", __func__); + + handle = pci_get_drvdata(pcidev); + pcr = handle->pcr; + rtsx_pci_power_off(pcr, HOST_ENTER_S1); + + pci_disable_device(pcidev); + free_irq(pcr->irq, (void *)pcr); + if (pcr->msi_en) + pci_disable_msi(pcr->pci); +} + +#else /* CONFIG_PM */ + +#define rtsx_pci_suspend NULL +#define rtsx_pci_resume NULL +#define rtsx_pci_shutdown NULL + +#endif /* CONFIG_PM */ + +static struct pci_driver rtsx_pci_driver = { + .name = DRV_NAME_RTSX_PCI, + .id_table = rtsx_pci_ids, + .probe = rtsx_pci_probe, + .remove = rtsx_pci_remove, + .suspend = rtsx_pci_suspend, + .resume = rtsx_pci_resume, + .shutdown = rtsx_pci_shutdown, +}; +module_pci_driver(rtsx_pci_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Wei WANG "); +MODULE_DESCRIPTION("Realtek PCI-E Card Reader Driver"); diff --git a/drivers/misc/cardreader/rtsx_pcr.h b/drivers/misc/cardreader/rtsx_pcr.h new file mode 100644 index 000000000..6ea1655db --- /dev/null +++ b/drivers/misc/cardreader/rtsx_pcr.h @@ -0,0 +1,113 @@ +/* Driver for Realtek PCI-Express card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Wei WANG + */ + +#ifndef __RTSX_PCR_H +#define __RTSX_PCR_H + +#include + +#define MIN_DIV_N_PCR 80 +#define MAX_DIV_N_PCR 208 + +#define RTS522A_PM_CTRL3 0xFF7E + +#define RTS524A_PME_FORCE_CTL 0xFF78 +#define RTS524A_PM_CTRL3 0xFF7E + +#define LTR_ACTIVE_LATENCY_DEF 0x883C +#define LTR_IDLE_LATENCY_DEF 0x892C +#define LTR_L1OFF_LATENCY_DEF 0x9003 +#define L1_SNOOZE_DELAY_DEF 1 +#define LTR_L1OFF_SSPWRGATE_5249_DEF 0xAF +#define LTR_L1OFF_SSPWRGATE_5250_DEF 0xFF +#define LTR_L1OFF_SNOOZE_SSPWRGATE_5249_DEF 0xAC +#define LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF 0xF8 +#define CMD_TIMEOUT_DEF 100 +#define ASPM_MASK_NEG 0xFC +#define MASK_8_BIT_DEF 0xFF + +#define SSC_CLOCK_STABLE_WAIT 130 + +int __rtsx_pci_write_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 val); +int __rtsx_pci_read_phy_register(struct rtsx_pcr *pcr, u8 addr, u16 *val); + +void rts5209_init_params(struct rtsx_pcr *pcr); +void rts5229_init_params(struct rtsx_pcr *pcr); +void rtl8411_init_params(struct rtsx_pcr *pcr); +void rtl8402_init_params(struct rtsx_pcr *pcr); +void rts5227_init_params(struct rtsx_pcr *pcr); +void rts522a_init_params(struct rtsx_pcr *pcr); +void rts5249_init_params(struct rtsx_pcr *pcr); +void rts524a_init_params(struct rtsx_pcr *pcr); +void rts525a_init_params(struct rtsx_pcr *pcr); +void rtl8411b_init_params(struct rtsx_pcr *pcr); +void rts5260_init_params(struct rtsx_pcr *pcr); + +static inline u8 map_sd_drive(int idx) +{ + u8 sd_drive[4] = { + 0x01, /* Type D */ + 0x02, /* Type C */ + 0x05, /* Type A */ + 0x03 /* Type B */ + }; + + return sd_drive[idx]; +} + +#define rtsx_vendor_setting_valid(reg) (!((reg) & 0x1000000)) +#define rts5209_vendor_setting1_valid(reg) (!((reg) & 0x80)) +#define rts5209_vendor_setting2_valid(reg) ((reg) & 0x80) + +#define rtsx_reg_to_aspm(reg) (((reg) >> 28) & 0x03) +#define rtsx_reg_to_sd30_drive_sel_1v8(reg) (((reg) >> 26) & 0x03) +#define rtsx_reg_to_sd30_drive_sel_3v3(reg) (((reg) >> 5) & 0x03) +#define rtsx_reg_to_card_drive_sel(reg) ((((reg) >> 25) & 0x01) << 6) +#define rtsx_reg_check_reverse_socket(reg) ((reg) & 0x4000) +#define rts5209_reg_to_aspm(reg) (((reg) >> 5) & 0x03) +#define rts5209_reg_check_ms_pmos(reg) (!((reg) & 0x08)) +#define rts5209_reg_to_sd30_drive_sel_1v8(reg) (((reg) >> 3) & 0x07) +#define rts5209_reg_to_sd30_drive_sel_3v3(reg) ((reg) & 0x07) +#define rts5209_reg_to_card_drive_sel(reg) ((reg) >> 8) +#define rtl8411_reg_to_sd30_drive_sel_3v3(reg) (((reg) >> 5) & 0x07) +#define rtl8411b_reg_to_sd30_drive_sel_3v3(reg) ((reg) & 0x03) + +#define set_pull_ctrl_tables(pcr, __device) \ +do { \ + pcr->sd_pull_ctl_enable_tbl = __device##_sd_pull_ctl_enable_tbl; \ + pcr->sd_pull_ctl_disable_tbl = __device##_sd_pull_ctl_disable_tbl; \ + pcr->ms_pull_ctl_enable_tbl = __device##_ms_pull_ctl_enable_tbl; \ + pcr->ms_pull_ctl_disable_tbl = __device##_ms_pull_ctl_disable_tbl; \ +} while (0) + +/* generic operations */ +int rtsx_gops_pm_reset(struct rtsx_pcr *pcr); +int rtsx_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency); +int rtsx_set_l1off_sub(struct rtsx_pcr *pcr, u8 val); +void rtsx_pci_init_ocp(struct rtsx_pcr *pcr); +void rtsx_pci_disable_ocp(struct rtsx_pcr *pcr); +void rtsx_pci_enable_ocp(struct rtsx_pcr *pcr); +int rtsx_pci_get_ocpstat(struct rtsx_pcr *pcr, u8 *val); +void rtsx_pci_clear_ocpstat(struct rtsx_pcr *pcr); +int rtsx_sd_power_off_card3v3(struct rtsx_pcr *pcr); +int rtsx_ms_power_off_card3v3(struct rtsx_pcr *pcr); + +#endif diff --git a/drivers/misc/cardreader/rtsx_usb.c b/drivers/misc/cardreader/rtsx_usb.c new file mode 100644 index 000000000..869c176d4 --- /dev/null +++ b/drivers/misc/cardreader/rtsx_usb.c @@ -0,0 +1,792 @@ +/* Driver for Realtek USB card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Roger Tseng + */ +#include +#include +#include +#include +#include +#include +#include + +static int polling_pipe = 1; +module_param(polling_pipe, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(polling_pipe, "polling pipe (0: ctl, 1: bulk)"); + +static const struct mfd_cell rtsx_usb_cells[] = { + [RTSX_USB_SD_CARD] = { + .name = "rtsx_usb_sdmmc", + .pdata_size = 0, + }, + [RTSX_USB_MS_CARD] = { + .name = "rtsx_usb_ms", + .pdata_size = 0, + }, +}; + +static void rtsx_usb_sg_timed_out(struct timer_list *t) +{ + struct rtsx_ucr *ucr = from_timer(ucr, t, sg_timer); + + dev_dbg(&ucr->pusb_intf->dev, "%s: sg transfer timed out", __func__); + usb_sg_cancel(&ucr->current_sg); +} + +static int rtsx_usb_bulk_transfer_sglist(struct rtsx_ucr *ucr, + unsigned int pipe, struct scatterlist *sg, int num_sg, + unsigned int length, unsigned int *act_len, int timeout) +{ + int ret; + + dev_dbg(&ucr->pusb_intf->dev, "%s: xfer %u bytes, %d entries\n", + __func__, length, num_sg); + ret = usb_sg_init(&ucr->current_sg, ucr->pusb_dev, pipe, 0, + sg, num_sg, length, GFP_NOIO); + if (ret) + return ret; + + ucr->sg_timer.expires = jiffies + msecs_to_jiffies(timeout); + add_timer(&ucr->sg_timer); + usb_sg_wait(&ucr->current_sg); + if (!del_timer_sync(&ucr->sg_timer)) + ret = -ETIMEDOUT; + else + ret = ucr->current_sg.status; + + if (act_len) + *act_len = ucr->current_sg.bytes; + + return ret; +} + +int rtsx_usb_transfer_data(struct rtsx_ucr *ucr, unsigned int pipe, + void *buf, unsigned int len, int num_sg, + unsigned int *act_len, int timeout) +{ + if (timeout < 600) + timeout = 600; + + if (num_sg) + return rtsx_usb_bulk_transfer_sglist(ucr, pipe, + (struct scatterlist *)buf, num_sg, len, act_len, + timeout); + else + return usb_bulk_msg(ucr->pusb_dev, pipe, buf, len, act_len, + timeout); +} +EXPORT_SYMBOL_GPL(rtsx_usb_transfer_data); + +static inline void rtsx_usb_seq_cmd_hdr(struct rtsx_ucr *ucr, + u16 addr, u16 len, u8 seq_type) +{ + rtsx_usb_cmd_hdr_tag(ucr); + + ucr->cmd_buf[PACKET_TYPE] = seq_type; + ucr->cmd_buf[5] = (u8)(len >> 8); + ucr->cmd_buf[6] = (u8)len; + ucr->cmd_buf[8] = (u8)(addr >> 8); + ucr->cmd_buf[9] = (u8)addr; + + if (seq_type == SEQ_WRITE) + ucr->cmd_buf[STAGE_FLAG] = 0; + else + ucr->cmd_buf[STAGE_FLAG] = STAGE_R; +} + +static int rtsx_usb_seq_write_register(struct rtsx_ucr *ucr, + u16 addr, u16 len, u8 *data) +{ + u16 cmd_len = ALIGN(SEQ_WRITE_DATA_OFFSET + len, 4); + + if (!data) + return -EINVAL; + + if (cmd_len > IOBUF_SIZE) + return -EINVAL; + + rtsx_usb_seq_cmd_hdr(ucr, addr, len, SEQ_WRITE); + memcpy(ucr->cmd_buf + SEQ_WRITE_DATA_OFFSET, data, len); + + return rtsx_usb_transfer_data(ucr, + usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT), + ucr->cmd_buf, cmd_len, 0, NULL, 100); +} + +static int rtsx_usb_seq_read_register(struct rtsx_ucr *ucr, + u16 addr, u16 len, u8 *data) +{ + int i, ret; + u16 rsp_len = round_down(len, 4); + u16 res_len = len - rsp_len; + + if (!data) + return -EINVAL; + + /* 4-byte aligned part */ + if (rsp_len) { + rtsx_usb_seq_cmd_hdr(ucr, addr, len, SEQ_READ); + ret = rtsx_usb_transfer_data(ucr, + usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT), + ucr->cmd_buf, 12, 0, NULL, 100); + if (ret) + return ret; + + ret = rtsx_usb_transfer_data(ucr, + usb_rcvbulkpipe(ucr->pusb_dev, EP_BULK_IN), + data, rsp_len, 0, NULL, 100); + if (ret) + return ret; + } + + /* unaligned part */ + for (i = 0; i < res_len; i++) { + ret = rtsx_usb_read_register(ucr, addr + rsp_len + i, + data + rsp_len + i); + if (ret) + return ret; + } + + return 0; +} + +int rtsx_usb_read_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len) +{ + return rtsx_usb_seq_read_register(ucr, PPBUF_BASE2, (u16)buf_len, buf); +} +EXPORT_SYMBOL_GPL(rtsx_usb_read_ppbuf); + +int rtsx_usb_write_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len) +{ + return rtsx_usb_seq_write_register(ucr, PPBUF_BASE2, (u16)buf_len, buf); +} +EXPORT_SYMBOL_GPL(rtsx_usb_write_ppbuf); + +int rtsx_usb_ep0_write_register(struct rtsx_ucr *ucr, u16 addr, + u8 mask, u8 data) +{ + u16 value, index; + + addr |= EP0_WRITE_REG_CMD << EP0_OP_SHIFT; + value = swab16(addr); + index = mask | data << 8; + + return usb_control_msg(ucr->pusb_dev, + usb_sndctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP, + USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, index, NULL, 0, 100); +} +EXPORT_SYMBOL_GPL(rtsx_usb_ep0_write_register); + +int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data) +{ + u16 value; + u8 *buf; + int ret; + + if (!data) + return -EINVAL; + + buf = kzalloc(sizeof(u8), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + addr |= EP0_READ_REG_CMD << EP0_OP_SHIFT; + value = swab16(addr); + + ret = usb_control_msg(ucr->pusb_dev, + usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP, + USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, 0, buf, 1, 100); + *data = *buf; + + kfree(buf); + return ret; +} +EXPORT_SYMBOL_GPL(rtsx_usb_ep0_read_register); + +void rtsx_usb_add_cmd(struct rtsx_ucr *ucr, u8 cmd_type, u16 reg_addr, + u8 mask, u8 data) +{ + int i; + + if (ucr->cmd_idx < (IOBUF_SIZE - CMD_OFFSET) / 4) { + i = CMD_OFFSET + ucr->cmd_idx * 4; + + ucr->cmd_buf[i++] = ((cmd_type & 0x03) << 6) | + (u8)((reg_addr >> 8) & 0x3F); + ucr->cmd_buf[i++] = (u8)reg_addr; + ucr->cmd_buf[i++] = mask; + ucr->cmd_buf[i++] = data; + + ucr->cmd_idx++; + } +} +EXPORT_SYMBOL_GPL(rtsx_usb_add_cmd); + +int rtsx_usb_send_cmd(struct rtsx_ucr *ucr, u8 flag, int timeout) +{ + int ret; + + ucr->cmd_buf[CNT_H] = (u8)(ucr->cmd_idx >> 8); + ucr->cmd_buf[CNT_L] = (u8)(ucr->cmd_idx); + ucr->cmd_buf[STAGE_FLAG] = flag; + + ret = rtsx_usb_transfer_data(ucr, + usb_sndbulkpipe(ucr->pusb_dev, EP_BULK_OUT), + ucr->cmd_buf, ucr->cmd_idx * 4 + CMD_OFFSET, + 0, NULL, timeout); + if (ret) { + rtsx_usb_clear_fsm_err(ucr); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_usb_send_cmd); + +int rtsx_usb_get_rsp(struct rtsx_ucr *ucr, int rsp_len, int timeout) +{ + if (rsp_len <= 0) + return -EINVAL; + + rsp_len = ALIGN(rsp_len, 4); + + return rtsx_usb_transfer_data(ucr, + usb_rcvbulkpipe(ucr->pusb_dev, EP_BULK_IN), + ucr->rsp_buf, rsp_len, 0, NULL, timeout); +} +EXPORT_SYMBOL_GPL(rtsx_usb_get_rsp); + +static int rtsx_usb_get_status_with_bulk(struct rtsx_ucr *ucr, u16 *status) +{ + int ret; + + rtsx_usb_init_cmd(ucr); + rtsx_usb_add_cmd(ucr, READ_REG_CMD, CARD_EXIST, 0x00, 0x00); + rtsx_usb_add_cmd(ucr, READ_REG_CMD, OCPSTAT, 0x00, 0x00); + ret = rtsx_usb_send_cmd(ucr, MODE_CR, 100); + if (ret) + return ret; + + ret = rtsx_usb_get_rsp(ucr, 2, 100); + if (ret) + return ret; + + *status = ((ucr->rsp_buf[0] >> 2) & 0x0f) | + ((ucr->rsp_buf[1] & 0x03) << 4); + + return 0; +} + +int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status) +{ + int ret; + u16 *buf; + + if (!status) + return -EINVAL; + + if (polling_pipe == 0) { + buf = kzalloc(sizeof(u16), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = usb_control_msg(ucr->pusb_dev, + usb_rcvctrlpipe(ucr->pusb_dev, 0), + RTSX_USB_REQ_POLL, + USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + 0, 0, buf, 2, 100); + *status = *buf; + + kfree(buf); + } else { + ret = rtsx_usb_get_status_with_bulk(ucr, status); + } + + /* usb_control_msg may return positive when success */ + if (ret < 0) + return ret; + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_usb_get_card_status); + +static int rtsx_usb_write_phy_register(struct rtsx_ucr *ucr, u8 addr, u8 val) +{ + dev_dbg(&ucr->pusb_intf->dev, "Write 0x%x to phy register 0x%x\n", + val, addr); + + rtsx_usb_init_cmd(ucr); + + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VSTAIN, 0xFF, val); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VCONTROL, 0xFF, addr & 0x0F); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x01); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VCONTROL, + 0xFF, (addr >> 4) & 0x0F); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x00); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, HS_VLOADM, 0xFF, 0x01); + + return rtsx_usb_send_cmd(ucr, MODE_C, 100); +} + +int rtsx_usb_write_register(struct rtsx_ucr *ucr, u16 addr, u8 mask, u8 data) +{ + rtsx_usb_init_cmd(ucr); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, addr, mask, data); + return rtsx_usb_send_cmd(ucr, MODE_C, 100); +} +EXPORT_SYMBOL_GPL(rtsx_usb_write_register); + +int rtsx_usb_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data) +{ + int ret; + + if (data != NULL) + *data = 0; + + rtsx_usb_init_cmd(ucr); + rtsx_usb_add_cmd(ucr, READ_REG_CMD, addr, 0, 0); + ret = rtsx_usb_send_cmd(ucr, MODE_CR, 100); + if (ret) + return ret; + + ret = rtsx_usb_get_rsp(ucr, 1, 100); + if (ret) + return ret; + + if (data != NULL) + *data = ucr->rsp_buf[0]; + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_usb_read_register); + +static inline u8 double_ssc_depth(u8 depth) +{ + return (depth > 1) ? (depth - 1) : depth; +} + +static u8 revise_ssc_depth(u8 ssc_depth, u8 div) +{ + if (div > CLK_DIV_1) { + if (ssc_depth > div - 1) + ssc_depth -= (div - 1); + else + ssc_depth = SSC_DEPTH_2M; + } + + return ssc_depth; +} + +int rtsx_usb_switch_clock(struct rtsx_ucr *ucr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk) +{ + int ret; + u8 n, clk_divider, mcu_cnt, div; + + if (!card_clock) { + ucr->cur_clk = 0; + return 0; + } + + if (initial_mode) { + /* We use 250k(around) here, in initial stage */ + clk_divider = SD_CLK_DIVIDE_128; + card_clock = 30000000; + } else { + clk_divider = SD_CLK_DIVIDE_0; + } + + ret = rtsx_usb_write_register(ucr, SD_CFG1, + SD_CLK_DIVIDE_MASK, clk_divider); + if (ret < 0) + return ret; + + card_clock /= 1000000; + dev_dbg(&ucr->pusb_intf->dev, + "Switch card clock to %dMHz\n", card_clock); + + if (!initial_mode && double_clk) + card_clock *= 2; + dev_dbg(&ucr->pusb_intf->dev, + "Internal SSC clock: %dMHz (cur_clk = %d)\n", + card_clock, ucr->cur_clk); + + if (card_clock == ucr->cur_clk) + return 0; + + /* Converting clock value into internal settings: n and div */ + n = card_clock - 2; + if ((card_clock <= 2) || (n > MAX_DIV_N)) + return -EINVAL; + + mcu_cnt = 60/card_clock + 3; + if (mcu_cnt > 15) + mcu_cnt = 15; + + /* Make sure that the SSC clock div_n is not less than MIN_DIV_N */ + + div = CLK_DIV_1; + while (n < MIN_DIV_N && div < CLK_DIV_4) { + n = (n + 2) * 2 - 2; + div++; + } + dev_dbg(&ucr->pusb_intf->dev, "n = %d, div = %d\n", n, div); + + if (double_clk) + ssc_depth = double_ssc_depth(ssc_depth); + + ssc_depth = revise_ssc_depth(ssc_depth, div); + dev_dbg(&ucr->pusb_intf->dev, "ssc_depth = %d\n", ssc_depth); + + rtsx_usb_init_cmd(ucr); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CLK_DIV, CLK_CHANGE, CLK_CHANGE); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CLK_DIV, + 0x3F, (div << 4) | mcu_cnt); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL2, + SSC_DEPTH_MASK, ssc_depth); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB); + if (vpclk) { + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, 0); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD_VPCLK0_CTL, + PHASE_NOT_RESET, PHASE_NOT_RESET); + } + + ret = rtsx_usb_send_cmd(ucr, MODE_C, 2000); + if (ret < 0) + return ret; + + ret = rtsx_usb_write_register(ucr, SSC_CTL1, 0xff, + SSC_RSTB | SSC_8X_EN | SSC_SEL_4M); + if (ret < 0) + return ret; + + /* Wait SSC clock stable */ + usleep_range(100, 1000); + + ret = rtsx_usb_write_register(ucr, CLK_DIV, CLK_CHANGE, 0); + if (ret < 0) + return ret; + + ucr->cur_clk = card_clock; + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_usb_switch_clock); + +int rtsx_usb_card_exclusive_check(struct rtsx_ucr *ucr, int card) +{ + int ret; + u16 val; + u16 cd_mask[] = { + [RTSX_USB_SD_CARD] = (CD_MASK & ~SD_CD), + [RTSX_USB_MS_CARD] = (CD_MASK & ~MS_CD) + }; + + ret = rtsx_usb_get_card_status(ucr, &val); + /* + * If get status fails, return 0 (ok) for the exclusive check + * and let the flow fail at somewhere else. + */ + if (ret) + return 0; + + if (val & cd_mask[card]) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_GPL(rtsx_usb_card_exclusive_check); + +static int rtsx_usb_reset_chip(struct rtsx_ucr *ucr) +{ + int ret; + u8 val; + + rtsx_usb_init_cmd(ucr); + + if (CHECK_PKG(ucr, LQFP48)) { + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL, + LDO3318_PWR_MASK, LDO_SUSPEND); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PWR_CTL, + FORCE_LDO_POWERB, FORCE_LDO_POWERB); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL1, + 0x30, 0x10); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL5, + 0x03, 0x01); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_PULL_CTL6, + 0x0C, 0x04); + } + + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SYS_DUMMY0, NYET_MSAK, NYET_EN); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CD_DEGLITCH_WIDTH, 0xFF, 0x08); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, + CD_DEGLITCH_EN, XD_CD_DEGLITCH_EN, 0x0); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, SD30_DRIVE_SEL, + SD30_DRIVE_MASK, DRIVER_TYPE_D); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, + CARD_DRIVE_SEL, SD20_DRIVE_MASK, 0x0); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, LDO_POWER_CFG, 0xE0, 0x0); + + if (ucr->is_rts5179) + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, + CARD_PULL_CTL5, 0x03, 0x01); + + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_DMA1_CTL, + EXTEND_DMA1_ASYNC_SIGNAL, EXTEND_DMA1_ASYNC_SIGNAL); + rtsx_usb_add_cmd(ucr, WRITE_REG_CMD, CARD_INT_PEND, + XD_INT | MS_INT | SD_INT, + XD_INT | MS_INT | SD_INT); + + ret = rtsx_usb_send_cmd(ucr, MODE_C, 100); + if (ret) + return ret; + + /* config non-crystal mode */ + rtsx_usb_read_register(ucr, CFG_MODE, &val); + if ((val & XTAL_FREE) || ((val & CLK_MODE_MASK) == CLK_MODE_NON_XTAL)) { + ret = rtsx_usb_write_phy_register(ucr, 0xC2, 0x7C); + if (ret) + return ret; + } + + return 0; +} + +static int rtsx_usb_init_chip(struct rtsx_ucr *ucr) +{ + int ret; + u8 val; + + rtsx_usb_clear_fsm_err(ucr); + + /* power on SSC */ + ret = rtsx_usb_write_register(ucr, + FPDCTL, SSC_POWER_MASK, SSC_POWER_ON); + if (ret) + return ret; + + usleep_range(100, 1000); + ret = rtsx_usb_write_register(ucr, CLK_DIV, CLK_CHANGE, 0x00); + if (ret) + return ret; + + /* determine IC version */ + ret = rtsx_usb_read_register(ucr, HW_VERSION, &val); + if (ret) + return ret; + + ucr->ic_version = val & HW_VER_MASK; + + /* determine package */ + ret = rtsx_usb_read_register(ucr, CARD_SHARE_MODE, &val); + if (ret) + return ret; + + if (val & CARD_SHARE_LQFP_SEL) { + ucr->package = LQFP48; + dev_dbg(&ucr->pusb_intf->dev, "Package: LQFP48\n"); + } else { + ucr->package = QFN24; + dev_dbg(&ucr->pusb_intf->dev, "Package: QFN24\n"); + } + + /* determine IC variations */ + rtsx_usb_read_register(ucr, CFG_MODE_1, &val); + if (val & RTS5179) { + ucr->is_rts5179 = true; + dev_dbg(&ucr->pusb_intf->dev, "Device is rts5179\n"); + } else { + ucr->is_rts5179 = false; + } + + return rtsx_usb_reset_chip(ucr); +} + +static int rtsx_usb_probe(struct usb_interface *intf, + const struct usb_device_id *id) +{ + struct usb_device *usb_dev = interface_to_usbdev(intf); + struct rtsx_ucr *ucr; + int ret; + + dev_dbg(&intf->dev, + ": Realtek USB Card Reader found at bus %03d address %03d\n", + usb_dev->bus->busnum, usb_dev->devnum); + + ucr = devm_kzalloc(&intf->dev, sizeof(*ucr), GFP_KERNEL); + if (!ucr) + return -ENOMEM; + + ucr->pusb_dev = usb_dev; + + ucr->iobuf = usb_alloc_coherent(ucr->pusb_dev, IOBUF_SIZE, + GFP_KERNEL, &ucr->iobuf_dma); + if (!ucr->iobuf) + return -ENOMEM; + + usb_set_intfdata(intf, ucr); + + ucr->vendor_id = id->idVendor; + ucr->product_id = id->idProduct; + ucr->cmd_buf = ucr->rsp_buf = ucr->iobuf; + + mutex_init(&ucr->dev_mutex); + + ucr->pusb_intf = intf; + + /* initialize */ + ret = rtsx_usb_init_chip(ucr); + if (ret) + goto out_init_fail; + + /* initialize USB SG transfer timer */ + timer_setup(&ucr->sg_timer, rtsx_usb_sg_timed_out, 0); + + ret = mfd_add_hotplug_devices(&intf->dev, rtsx_usb_cells, + ARRAY_SIZE(rtsx_usb_cells)); + if (ret) + goto out_init_fail; + +#ifdef CONFIG_PM + intf->needs_remote_wakeup = 1; + usb_enable_autosuspend(usb_dev); +#endif + + return 0; + +out_init_fail: + usb_set_intfdata(ucr->pusb_intf, NULL); + usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, + ucr->iobuf_dma); + return ret; +} + +static void rtsx_usb_disconnect(struct usb_interface *intf) +{ + struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); + + dev_dbg(&intf->dev, "%s called\n", __func__); + + mfd_remove_devices(&intf->dev); + + usb_set_intfdata(ucr->pusb_intf, NULL); + usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf, + ucr->iobuf_dma); +} + +#ifdef CONFIG_PM +static int rtsx_usb_suspend(struct usb_interface *intf, pm_message_t message) +{ + struct rtsx_ucr *ucr = + (struct rtsx_ucr *)usb_get_intfdata(intf); + u16 val = 0; + + dev_dbg(&intf->dev, "%s called with pm message 0x%04x\n", + __func__, message.event); + + if (PMSG_IS_AUTO(message)) { + if (mutex_trylock(&ucr->dev_mutex)) { + rtsx_usb_get_card_status(ucr, &val); + mutex_unlock(&ucr->dev_mutex); + + /* Defer the autosuspend if card exists */ + if (val & (SD_CD | MS_CD)) + return -EAGAIN; + } else { + /* There is an ongoing operation*/ + return -EAGAIN; + } + } + + return 0; +} + +static int rtsx_usb_resume(struct usb_interface *intf) +{ + return 0; +} + +static int rtsx_usb_reset_resume(struct usb_interface *intf) +{ + struct rtsx_ucr *ucr = + (struct rtsx_ucr *)usb_get_intfdata(intf); + + rtsx_usb_reset_chip(ucr); + return 0; +} + +#else /* CONFIG_PM */ + +#define rtsx_usb_suspend NULL +#define rtsx_usb_resume NULL +#define rtsx_usb_reset_resume NULL + +#endif /* CONFIG_PM */ + + +static int rtsx_usb_pre_reset(struct usb_interface *intf) +{ + struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); + + mutex_lock(&ucr->dev_mutex); + return 0; +} + +static int rtsx_usb_post_reset(struct usb_interface *intf) +{ + struct rtsx_ucr *ucr = (struct rtsx_ucr *)usb_get_intfdata(intf); + + mutex_unlock(&ucr->dev_mutex); + return 0; +} + +static struct usb_device_id rtsx_usb_usb_ids[] = { + { USB_DEVICE(0x0BDA, 0x0129) }, + { USB_DEVICE(0x0BDA, 0x0139) }, + { USB_DEVICE(0x0BDA, 0x0140) }, + { } +}; +MODULE_DEVICE_TABLE(usb, rtsx_usb_usb_ids); + +static struct usb_driver rtsx_usb_driver = { + .name = "rtsx_usb", + .probe = rtsx_usb_probe, + .disconnect = rtsx_usb_disconnect, + .suspend = rtsx_usb_suspend, + .resume = rtsx_usb_resume, + .reset_resume = rtsx_usb_reset_resume, + .pre_reset = rtsx_usb_pre_reset, + .post_reset = rtsx_usb_post_reset, + .id_table = rtsx_usb_usb_ids, + .supports_autosuspend = 1, + .soft_unbind = 1, +}; + +module_usb_driver(rtsx_usb_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Roger Tseng "); +MODULE_DESCRIPTION("Realtek USB Card Reader Driver"); diff --git a/drivers/misc/cb710/Kconfig b/drivers/misc/cb710/Kconfig new file mode 100644 index 000000000..22429b8b1 --- /dev/null +++ b/drivers/misc/cb710/Kconfig @@ -0,0 +1,25 @@ +config CB710_CORE + tristate "ENE CB710/720 Flash memory card reader support" + depends on PCI + help + This option enables support for PCI ENE CB710/720 Flash memory card + reader found in some laptops (ie. some versions of HP Compaq nx9500). + + You will also have to select some flash card format drivers (MMC/SD, + MemoryStick). + + This driver can also be built as a module. If so, the module + will be called cb710. + +config CB710_DEBUG + bool "Enable driver debugging" + depends on CB710_CORE != n + default n + help + This is an option for use by developers; most people should + say N here. This adds a lot of debugging output to dmesg. + +config CB710_DEBUG_ASSUMPTIONS + bool + depends on CB710_CORE != n + default y diff --git a/drivers/misc/cb710/Makefile b/drivers/misc/cb710/Makefile new file mode 100644 index 000000000..467c8e9ca --- /dev/null +++ b/drivers/misc/cb710/Makefile @@ -0,0 +1,6 @@ +ccflags-$(CONFIG_CB710_DEBUG) := -DDEBUG + +obj-$(CONFIG_CB710_CORE) += cb710.o + +cb710-y := core.o sgbuf2.o +cb710-$(CONFIG_CB710_DEBUG) += debug.o diff --git a/drivers/misc/cb710/core.c b/drivers/misc/cb710/core.c new file mode 100644 index 000000000..2c43fd09d --- /dev/null +++ b/drivers/misc/cb710/core.c @@ -0,0 +1,346 @@ +/* + * cb710/core.c + * + * Copyright by MichaÅ‚ MirosÅ‚aw, 2008-2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_IDA(cb710_ida); + +void cb710_pci_update_config_reg(struct pci_dev *pdev, + int reg, uint32_t mask, uint32_t xor) +{ + u32 rval; + + pci_read_config_dword(pdev, reg, &rval); + rval = (rval & mask) ^ xor; + pci_write_config_dword(pdev, reg, rval); +} +EXPORT_SYMBOL_GPL(cb710_pci_update_config_reg); + +/* Some magic writes based on Windows driver init code */ +static int cb710_pci_configure(struct pci_dev *pdev) +{ + unsigned int devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0); + struct pci_dev *pdev0; + u32 val; + + cb710_pci_update_config_reg(pdev, 0x48, + ~0x000000FF, 0x0000003F); + + pci_read_config_dword(pdev, 0x48, &val); + if (val & 0x80000000) + return 0; + + pdev0 = pci_get_slot(pdev->bus, devfn); + if (!pdev0) + return -ENODEV; + + if (pdev0->vendor == PCI_VENDOR_ID_ENE + && pdev0->device == PCI_DEVICE_ID_ENE_720) { + cb710_pci_update_config_reg(pdev0, 0x8C, + ~0x00F00000, 0x00100000); + cb710_pci_update_config_reg(pdev0, 0xB0, + ~0x08000000, 0x08000000); + } + + cb710_pci_update_config_reg(pdev0, 0x8C, + ~0x00000F00, 0x00000200); + cb710_pci_update_config_reg(pdev0, 0x90, + ~0x00060000, 0x00040000); + + pci_dev_put(pdev0); + + return 0; +} + +static irqreturn_t cb710_irq_handler(int irq, void *data) +{ + struct cb710_chip *chip = data; + struct cb710_slot *slot = &chip->slot[0]; + irqreturn_t handled = IRQ_NONE; + unsigned nr; + + spin_lock(&chip->irq_lock); /* incl. smp_rmb() */ + + for (nr = chip->slots; nr; ++slot, --nr) { + cb710_irq_handler_t handler_func = slot->irq_handler; + if (handler_func && handler_func(slot)) + handled = IRQ_HANDLED; + } + + spin_unlock(&chip->irq_lock); + + return handled; +} + +static void cb710_release_slot(struct device *dev) +{ +#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS + struct cb710_slot *slot = cb710_pdev_to_slot(to_platform_device(dev)); + struct cb710_chip *chip = cb710_slot_to_chip(slot); + + /* slot struct can be freed now */ + atomic_dec(&chip->slot_refs_count); +#endif +} + +static int cb710_register_slot(struct cb710_chip *chip, + unsigned slot_mask, unsigned io_offset, const char *name) +{ + int nr = chip->slots; + struct cb710_slot *slot = &chip->slot[nr]; + int err; + + dev_dbg(cb710_chip_dev(chip), + "register: %s.%d; slot %d; mask %d; IO offset: 0x%02X\n", + name, chip->platform_id, nr, slot_mask, io_offset); + + /* slot->irq_handler == NULL here; this needs to be + * seen before platform_device_register() */ + ++chip->slots; + smp_wmb(); + + slot->iobase = chip->iobase + io_offset; + slot->pdev.name = name; + slot->pdev.id = chip->platform_id; + slot->pdev.dev.parent = &chip->pdev->dev; + slot->pdev.dev.release = cb710_release_slot; + + err = platform_device_register(&slot->pdev); + +#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS + atomic_inc(&chip->slot_refs_count); +#endif + + if (err) { + /* device_initialize() called from platform_device_register() + * wants this on error path */ + platform_device_put(&slot->pdev); + + /* slot->irq_handler == NULL here anyway, so no lock needed */ + --chip->slots; + return err; + } + + chip->slot_mask |= slot_mask; + + return 0; +} + +static void cb710_unregister_slot(struct cb710_chip *chip, + unsigned slot_mask) +{ + int nr = chip->slots - 1; + + if (!(chip->slot_mask & slot_mask)) + return; + + platform_device_unregister(&chip->slot[nr].pdev); + + /* complementary to spin_unlock() in cb710_set_irq_handler() */ + smp_rmb(); + BUG_ON(chip->slot[nr].irq_handler != NULL); + + /* slot->irq_handler == NULL here, so no lock needed */ + --chip->slots; + chip->slot_mask &= ~slot_mask; +} + +void cb710_set_irq_handler(struct cb710_slot *slot, + cb710_irq_handler_t handler) +{ + struct cb710_chip *chip = cb710_slot_to_chip(slot); + unsigned long flags; + + spin_lock_irqsave(&chip->irq_lock, flags); + slot->irq_handler = handler; + spin_unlock_irqrestore(&chip->irq_lock, flags); +} +EXPORT_SYMBOL_GPL(cb710_set_irq_handler); + +#ifdef CONFIG_PM + +static int cb710_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct cb710_chip *chip = pci_get_drvdata(pdev); + + devm_free_irq(&pdev->dev, pdev->irq, chip); + pci_save_state(pdev); + pci_disable_device(pdev); + if (state.event & PM_EVENT_SLEEP) + pci_set_power_state(pdev, PCI_D3hot); + return 0; +} + +static int cb710_resume(struct pci_dev *pdev) +{ + struct cb710_chip *chip = pci_get_drvdata(pdev); + int err; + + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + err = pcim_enable_device(pdev); + if (err) + return err; + + return devm_request_irq(&pdev->dev, pdev->irq, + cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip); +} + +#endif /* CONFIG_PM */ + +static int cb710_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct cb710_chip *chip; + u32 val; + int err; + int n = 0; + + err = cb710_pci_configure(pdev); + if (err) + return err; + + /* this is actually magic... */ + pci_read_config_dword(pdev, 0x48, &val); + if (!(val & 0x80000000)) { + pci_write_config_dword(pdev, 0x48, val|0x71000000); + pci_read_config_dword(pdev, 0x48, &val); + } + + dev_dbg(&pdev->dev, "PCI config[0x48] = 0x%08X\n", val); + if (!(val & 0x70000000)) + return -ENODEV; + val = (val >> 28) & 7; + if (val & CB710_SLOT_MMC) + ++n; + if (val & CB710_SLOT_MS) + ++n; + if (val & CB710_SLOT_SM) + ++n; + + chip = devm_kzalloc(&pdev->dev, struct_size(chip, slot, n), + GFP_KERNEL); + if (!chip) + return -ENOMEM; + + err = pcim_enable_device(pdev); + if (err) + return err; + + err = pcim_iomap_regions(pdev, 0x0001, KBUILD_MODNAME); + if (err) + return err; + + spin_lock_init(&chip->irq_lock); + chip->pdev = pdev; + chip->iobase = pcim_iomap_table(pdev)[0]; + + pci_set_drvdata(pdev, chip); + + err = devm_request_irq(&pdev->dev, pdev->irq, + cb710_irq_handler, IRQF_SHARED, KBUILD_MODNAME, chip); + if (err) + return err; + + err = ida_alloc(&cb710_ida, GFP_KERNEL); + if (err < 0) + return err; + chip->platform_id = err; + + dev_info(&pdev->dev, "id %d, IO 0x%p, IRQ %d\n", + chip->platform_id, chip->iobase, pdev->irq); + + if (val & CB710_SLOT_MMC) { /* MMC/SD slot */ + err = cb710_register_slot(chip, + CB710_SLOT_MMC, 0x00, "cb710-mmc"); + if (err) + return err; + } + + if (val & CB710_SLOT_MS) { /* MemoryStick slot */ + err = cb710_register_slot(chip, + CB710_SLOT_MS, 0x40, "cb710-ms"); + if (err) + goto unreg_mmc; + } + + if (val & CB710_SLOT_SM) { /* SmartMedia slot */ + err = cb710_register_slot(chip, + CB710_SLOT_SM, 0x60, "cb710-sm"); + if (err) + goto unreg_ms; + } + + return 0; +unreg_ms: + cb710_unregister_slot(chip, CB710_SLOT_MS); +unreg_mmc: + cb710_unregister_slot(chip, CB710_SLOT_MMC); + +#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS + BUG_ON(atomic_read(&chip->slot_refs_count) != 0); +#endif + return err; +} + +static void cb710_remove_one(struct pci_dev *pdev) +{ + struct cb710_chip *chip = pci_get_drvdata(pdev); + + cb710_unregister_slot(chip, CB710_SLOT_SM); + cb710_unregister_slot(chip, CB710_SLOT_MS); + cb710_unregister_slot(chip, CB710_SLOT_MMC); +#ifdef CONFIG_CB710_DEBUG_ASSUMPTIONS + BUG_ON(atomic_read(&chip->slot_refs_count) != 0); +#endif + + ida_free(&cb710_ida, chip->platform_id); +} + +static const struct pci_device_id cb710_pci_tbl[] = { + { PCI_VENDOR_ID_ENE, PCI_DEVICE_ID_ENE_CB710_FLASH, + PCI_ANY_ID, PCI_ANY_ID, }, + { 0, } +}; + +static struct pci_driver cb710_driver = { + .name = KBUILD_MODNAME, + .id_table = cb710_pci_tbl, + .probe = cb710_probe, + .remove = cb710_remove_one, +#ifdef CONFIG_PM + .suspend = cb710_suspend, + .resume = cb710_resume, +#endif +}; + +static int __init cb710_init_module(void) +{ + return pci_register_driver(&cb710_driver); +} + +static void __exit cb710_cleanup_module(void) +{ + pci_unregister_driver(&cb710_driver); + ida_destroy(&cb710_ida); +} + +module_init(cb710_init_module); +module_exit(cb710_cleanup_module); + +MODULE_AUTHOR("MichaÅ‚ MirosÅ‚aw "); +MODULE_DESCRIPTION("ENE CB710 memory card reader driver"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, cb710_pci_tbl); diff --git a/drivers/misc/cb710/debug.c b/drivers/misc/cb710/debug.c new file mode 100644 index 000000000..fcb3b8e30 --- /dev/null +++ b/drivers/misc/cb710/debug.c @@ -0,0 +1,118 @@ +/* + * cb710/debug.c + * + * Copyright by MichaÅ‚ MirosÅ‚aw, 2008-2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + +#define CB710_REG_COUNT 0x80 + +static const u16 allow[CB710_REG_COUNT/16] = { + 0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF, +}; +static const char *const prefix[ARRAY_SIZE(allow)] = { + "MMC", "MMC", "MMC", "MMC", + "MS?", "MS?", "SM?", "SM?" +}; + +static inline int allow_reg_read(unsigned block, unsigned offset, unsigned bits) +{ + unsigned mask = (1 << bits/8) - 1; + offset *= bits/8; + return ((allow[block] >> offset) & mask) == mask; +} + +#define CB710_READ_REGS_TEMPLATE(t) \ +static void cb710_read_regs_##t(void __iomem *iobase, \ + u##t *reg, unsigned select) \ +{ \ + unsigned i, j; \ + \ + for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) { \ + if (!(select & (1 << i))) \ + continue; \ + \ + for (j = 0; j < 0x10/(t/8); ++j) { \ + if (!allow_reg_read(i, j, t)) \ + continue; \ + reg[j] = ioread##t(iobase \ + + (i << 4) + (j * (t/8))); \ + } \ + } \ +} + +static const char cb710_regf_8[] = "%02X"; +static const char cb710_regf_16[] = "%04X"; +static const char cb710_regf_32[] = "%08X"; +static const char cb710_xes[] = "xxxxxxxx"; + +#define CB710_DUMP_REGS_TEMPLATE(t) \ +static void cb710_dump_regs_##t(struct device *dev, \ + const u##t *reg, unsigned select) \ +{ \ + const char *const xp = &cb710_xes[8 - t/4]; \ + const char *const format = cb710_regf_##t; \ + \ + char msg[100], *p; \ + unsigned i, j; \ + \ + for (i = 0; i < ARRAY_SIZE(allow); ++i, reg += 16/(t/8)) { \ + if (!(select & (1 << i))) \ + continue; \ + p = msg; \ + for (j = 0; j < 0x10/(t/8); ++j) { \ + *p++ = ' '; \ + if (j == 8/(t/8)) \ + *p++ = ' '; \ + if (allow_reg_read(i, j, t)) \ + p += sprintf(p, format, reg[j]); \ + else \ + p += sprintf(p, "%s", xp); \ + } \ + dev_dbg(dev, "%s 0x%02X %s\n", prefix[i], i << 4, msg); \ + } \ +} + +#define CB710_READ_AND_DUMP_REGS_TEMPLATE(t) \ +static void cb710_read_and_dump_regs_##t(struct cb710_chip *chip, \ + unsigned select) \ +{ \ + u##t regs[CB710_REG_COUNT/sizeof(u##t)]; \ + \ + memset(®s, 0, sizeof(regs)); \ + cb710_read_regs_##t(chip->iobase, regs, select); \ + cb710_dump_regs_##t(cb710_chip_dev(chip), regs, select); \ +} + +#define CB710_REG_ACCESS_TEMPLATES(t) \ + CB710_READ_REGS_TEMPLATE(t) \ + CB710_DUMP_REGS_TEMPLATE(t) \ + CB710_READ_AND_DUMP_REGS_TEMPLATE(t) + +CB710_REG_ACCESS_TEMPLATES(8) +CB710_REG_ACCESS_TEMPLATES(16) +CB710_REG_ACCESS_TEMPLATES(32) + +void cb710_dump_regs(struct cb710_chip *chip, unsigned select) +{ + if (!(select & CB710_DUMP_REGS_MASK)) + select = CB710_DUMP_REGS_ALL; + if (!(select & CB710_DUMP_ACCESS_MASK)) + select |= CB710_DUMP_ACCESS_8; + + if (select & CB710_DUMP_ACCESS_32) + cb710_read_and_dump_regs_32(chip, select); + if (select & CB710_DUMP_ACCESS_16) + cb710_read_and_dump_regs_16(chip, select); + if (select & CB710_DUMP_ACCESS_8) + cb710_read_and_dump_regs_8(chip, select); +} +EXPORT_SYMBOL_GPL(cb710_dump_regs); + diff --git a/drivers/misc/cb710/sgbuf2.c b/drivers/misc/cb710/sgbuf2.c new file mode 100644 index 000000000..4d2a72a53 --- /dev/null +++ b/drivers/misc/cb710/sgbuf2.c @@ -0,0 +1,146 @@ +/* + * cb710/sgbuf2.c + * + * Copyright by MichaÅ‚ MirosÅ‚aw, 2008-2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + +static bool sg_dwiter_next(struct sg_mapping_iter *miter) +{ + if (sg_miter_next(miter)) { + miter->consumed = 0; + return true; + } else + return false; +} + +static bool sg_dwiter_is_at_end(struct sg_mapping_iter *miter) +{ + return miter->length == miter->consumed && !sg_dwiter_next(miter); +} + +static uint32_t sg_dwiter_read_buffer(struct sg_mapping_iter *miter) +{ + size_t len, left = 4; + uint32_t data; + void *addr = &data; + + do { + len = min(miter->length - miter->consumed, left); + memcpy(addr, miter->addr + miter->consumed, len); + miter->consumed += len; + left -= len; + if (!left) + return data; + addr += len; + } while (sg_dwiter_next(miter)); + + memset(addr, 0, left); + return data; +} + +static inline bool needs_unaligned_copy(const void *ptr) +{ +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + return false; +#else + return ((uintptr_t)ptr & 3) != 0; +#endif +} + +static bool sg_dwiter_get_next_block(struct sg_mapping_iter *miter, uint32_t **ptr) +{ + size_t len; + + if (sg_dwiter_is_at_end(miter)) + return true; + + len = miter->length - miter->consumed; + + if (likely(len >= 4 && !needs_unaligned_copy( + miter->addr + miter->consumed))) { + *ptr = miter->addr + miter->consumed; + miter->consumed += 4; + return true; + } + + return false; +} + +/** + * cb710_sg_dwiter_read_next_block() - get next 32-bit word from sg buffer + * @miter: sg mapping iterator used for reading + * + * Description: + * Returns 32-bit word starting at byte pointed to by @miter@ + * handling any alignment issues. Bytes past the buffer's end + * are not accessed (read) but are returned as zeroes. @miter@ + * is advanced by 4 bytes or to the end of buffer whichever is + * closer. + * + * Context: + * Same requirements as in sg_miter_next(). + * + * Returns: + * 32-bit word just read. + */ +uint32_t cb710_sg_dwiter_read_next_block(struct sg_mapping_iter *miter) +{ + uint32_t *ptr = NULL; + + if (likely(sg_dwiter_get_next_block(miter, &ptr))) + return ptr ? *ptr : 0; + + return sg_dwiter_read_buffer(miter); +} +EXPORT_SYMBOL_GPL(cb710_sg_dwiter_read_next_block); + +static void sg_dwiter_write_slow(struct sg_mapping_iter *miter, uint32_t data) +{ + size_t len, left = 4; + void *addr = &data; + + do { + len = min(miter->length - miter->consumed, left); + memcpy(miter->addr, addr, len); + miter->consumed += len; + left -= len; + if (!left) + return; + addr += len; + } while (sg_dwiter_next(miter)); +} + +/** + * cb710_sg_dwiter_write_next_block() - write next 32-bit word to sg buffer + * @miter: sg mapping iterator used for writing + * + * Description: + * Writes 32-bit word starting at byte pointed to by @miter@ + * handling any alignment issues. Bytes which would be written + * past the buffer's end are silently discarded. @miter@ is + * advanced by 4 bytes or to the end of buffer whichever is closer. + * + * Context: + * Same requirements as in sg_miter_next(). + */ +void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t data) +{ + uint32_t *ptr = NULL; + + if (likely(sg_dwiter_get_next_block(miter, &ptr))) { + if (ptr) + *ptr = data; + else + return; + } else + sg_dwiter_write_slow(miter, data); +} +EXPORT_SYMBOL_GPL(cb710_sg_dwiter_write_next_block); + diff --git a/drivers/misc/cs5535-mfgpt.c b/drivers/misc/cs5535-mfgpt.c new file mode 100644 index 000000000..347f08f2f --- /dev/null +++ b/drivers/misc/cs5535-mfgpt.c @@ -0,0 +1,383 @@ +/* + * Driver for the CS5535/CS5536 Multi-Function General Purpose Timers (MFGPT) + * + * Copyright (C) 2006, Advanced Micro Devices, Inc. + * Copyright (C) 2007 Andres Salomon + * Copyright (C) 2009 Andres Salomon + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * The MFGPTs are documented in AMD Geode CS5536 Companion Device Data Book. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "cs5535-mfgpt" + +static int mfgpt_reset_timers; +module_param_named(mfgptfix, mfgpt_reset_timers, int, 0644); +MODULE_PARM_DESC(mfgptfix, "Try to reset the MFGPT timers during init; " + "required by some broken BIOSes (ie, TinyBIOS < 0.99) or kexec " + "(1 = reset the MFGPT using an undocumented bit, " + "2 = perform a soft reset by unconfiguring all timers); " + "use what works best for you."); + +struct cs5535_mfgpt_timer { + struct cs5535_mfgpt_chip *chip; + int nr; +}; + +static struct cs5535_mfgpt_chip { + DECLARE_BITMAP(avail, MFGPT_MAX_TIMERS); + resource_size_t base; + + struct platform_device *pdev; + spinlock_t lock; + int initialized; +} cs5535_mfgpt_chip; + +int cs5535_mfgpt_toggle_event(struct cs5535_mfgpt_timer *timer, int cmp, + int event, int enable) +{ + uint32_t msr, mask, value, dummy; + int shift = (cmp == MFGPT_CMP1) ? 0 : 8; + + if (!timer) { + WARN_ON(1); + return -EIO; + } + + /* + * The register maps for these are described in sections 6.17.1.x of + * the AMD Geode CS5536 Companion Device Data Book. + */ + switch (event) { + case MFGPT_EVENT_RESET: + /* + * XXX: According to the docs, we cannot reset timers above + * 6; that is, resets for 7 and 8 will be ignored. Is this + * a problem? -dilinger + */ + msr = MSR_MFGPT_NR; + mask = 1 << (timer->nr + 24); + break; + + case MFGPT_EVENT_NMI: + msr = MSR_MFGPT_NR; + mask = 1 << (timer->nr + shift); + break; + + case MFGPT_EVENT_IRQ: + msr = MSR_MFGPT_IRQ; + mask = 1 << (timer->nr + shift); + break; + + default: + return -EIO; + } + + rdmsr(msr, value, dummy); + + if (enable) + value |= mask; + else + value &= ~mask; + + wrmsr(msr, value, dummy); + return 0; +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_toggle_event); + +int cs5535_mfgpt_set_irq(struct cs5535_mfgpt_timer *timer, int cmp, int *irq, + int enable) +{ + uint32_t zsel, lpc, dummy; + int shift; + + if (!timer) { + WARN_ON(1); + return -EIO; + } + + /* + * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA + * is using the same CMP of the timer's Siamese twin, the IRQ is set to + * 2, and we mustn't use nor change it. + * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the + * IRQ of the 1st. This can only happen if forcing an IRQ, calling this + * with *irq==0 is safe. Currently there _are_ no 2 drivers. + */ + rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); + shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer->nr % 4) * 4; + if (((zsel >> shift) & 0xF) == 2) + return -EIO; + + /* Choose IRQ: if none supplied, keep IRQ already set or use default */ + if (!*irq) + *irq = (zsel >> shift) & 0xF; + if (!*irq) + *irq = CONFIG_CS5535_MFGPT_DEFAULT_IRQ; + + /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */ + if (*irq < 1 || *irq == 2 || *irq > 15) + return -EIO; + rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy); + if (lpc & (1 << *irq)) + return -EIO; + + /* All chosen and checked - go for it */ + if (cs5535_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) + return -EIO; + if (enable) { + zsel = (zsel & ~(0xF << shift)) | (*irq << shift); + wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); + } + + return 0; +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_set_irq); + +struct cs5535_mfgpt_timer *cs5535_mfgpt_alloc_timer(int timer_nr, int domain) +{ + struct cs5535_mfgpt_chip *mfgpt = &cs5535_mfgpt_chip; + struct cs5535_mfgpt_timer *timer = NULL; + unsigned long flags; + int max; + + if (!mfgpt->initialized) + goto done; + + /* only allocate timers from the working domain if requested */ + if (domain == MFGPT_DOMAIN_WORKING) + max = 6; + else + max = MFGPT_MAX_TIMERS; + + if (timer_nr >= max) { + /* programmer error. silly programmers! */ + WARN_ON(1); + goto done; + } + + spin_lock_irqsave(&mfgpt->lock, flags); + if (timer_nr < 0) { + unsigned long t; + + /* try to find any available timer */ + t = find_first_bit(mfgpt->avail, max); + /* set timer_nr to -1 if no timers available */ + timer_nr = t < max ? (int) t : -1; + } else { + /* check if the requested timer's available */ + if (!test_bit(timer_nr, mfgpt->avail)) + timer_nr = -1; + } + + if (timer_nr >= 0) + /* if timer_nr is not -1, it's an available timer */ + __clear_bit(timer_nr, mfgpt->avail); + spin_unlock_irqrestore(&mfgpt->lock, flags); + + if (timer_nr < 0) + goto done; + + timer = kmalloc(sizeof(*timer), GFP_KERNEL); + if (!timer) { + /* aw hell */ + spin_lock_irqsave(&mfgpt->lock, flags); + __set_bit(timer_nr, mfgpt->avail); + spin_unlock_irqrestore(&mfgpt->lock, flags); + goto done; + } + timer->chip = mfgpt; + timer->nr = timer_nr; + dev_info(&mfgpt->pdev->dev, "registered timer %d\n", timer_nr); + +done: + return timer; +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_alloc_timer); + +/* + * XXX: This frees the timer memory, but never resets the actual hardware + * timer. The old geode_mfgpt code did this; it would be good to figure + * out a way to actually release the hardware timer. See comments below. + */ +void cs5535_mfgpt_free_timer(struct cs5535_mfgpt_timer *timer) +{ + unsigned long flags; + uint16_t val; + + /* timer can be made available again only if never set up */ + val = cs5535_mfgpt_read(timer, MFGPT_REG_SETUP); + if (!(val & MFGPT_SETUP_SETUP)) { + spin_lock_irqsave(&timer->chip->lock, flags); + __set_bit(timer->nr, timer->chip->avail); + spin_unlock_irqrestore(&timer->chip->lock, flags); + } + + kfree(timer); +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_free_timer); + +uint16_t cs5535_mfgpt_read(struct cs5535_mfgpt_timer *timer, uint16_t reg) +{ + return inw(timer->chip->base + reg + (timer->nr * 8)); +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_read); + +void cs5535_mfgpt_write(struct cs5535_mfgpt_timer *timer, uint16_t reg, + uint16_t value) +{ + outw(value, timer->chip->base + reg + (timer->nr * 8)); +} +EXPORT_SYMBOL_GPL(cs5535_mfgpt_write); + +/* + * This is a sledgehammer that resets all MFGPT timers. This is required by + * some broken BIOSes which leave the system in an unstable state + * (TinyBIOS 0.98, for example; fixed in 0.99). It's uncertain as to + * whether or not this secret MSR can be used to release individual timers. + * Jordan tells me that he and Mitch once played w/ it, but it's unclear + * what the results of that were (and they experienced some instability). + */ +static void reset_all_timers(void) +{ + uint32_t val, dummy; + + /* The following undocumented bit resets the MFGPT timers */ + val = 0xFF; dummy = 0; + wrmsr(MSR_MFGPT_SETUP, val, dummy); +} + +/* + * This is another sledgehammer to reset all MFGPT timers. + * Instead of using the undocumented bit method it clears + * IRQ, NMI and RESET settings. + */ +static void soft_reset(void) +{ + int i; + struct cs5535_mfgpt_timer t; + + for (i = 0; i < MFGPT_MAX_TIMERS; i++) { + t.nr = i; + + cs5535_mfgpt_toggle_event(&t, MFGPT_CMP1, MFGPT_EVENT_RESET, 0); + cs5535_mfgpt_toggle_event(&t, MFGPT_CMP2, MFGPT_EVENT_RESET, 0); + cs5535_mfgpt_toggle_event(&t, MFGPT_CMP1, MFGPT_EVENT_NMI, 0); + cs5535_mfgpt_toggle_event(&t, MFGPT_CMP2, MFGPT_EVENT_NMI, 0); + cs5535_mfgpt_toggle_event(&t, MFGPT_CMP1, MFGPT_EVENT_IRQ, 0); + cs5535_mfgpt_toggle_event(&t, MFGPT_CMP2, MFGPT_EVENT_IRQ, 0); + } +} + +/* + * Check whether any MFGPTs are available for the kernel to use. In most + * cases, firmware that uses AMD's VSA code will claim all timers during + * bootup; we certainly don't want to take them if they're already in use. + * In other cases (such as with VSAless OpenFirmware), the system firmware + * leaves timers available for us to use. + */ +static int scan_timers(struct cs5535_mfgpt_chip *mfgpt) +{ + struct cs5535_mfgpt_timer timer = { .chip = mfgpt }; + unsigned long flags; + int timers = 0; + uint16_t val; + int i; + + /* bios workaround */ + if (mfgpt_reset_timers == 1) + reset_all_timers(); + else if (mfgpt_reset_timers == 2) + soft_reset(); + + /* just to be safe, protect this section w/ lock */ + spin_lock_irqsave(&mfgpt->lock, flags); + for (i = 0; i < MFGPT_MAX_TIMERS; i++) { + timer.nr = i; + val = cs5535_mfgpt_read(&timer, MFGPT_REG_SETUP); + if (!(val & MFGPT_SETUP_SETUP) || mfgpt_reset_timers == 2) { + __set_bit(i, mfgpt->avail); + timers++; + } + } + spin_unlock_irqrestore(&mfgpt->lock, flags); + + return timers; +} + +static int cs5535_mfgpt_probe(struct platform_device *pdev) +{ + struct resource *res; + int err = -EIO, t; + + if (mfgpt_reset_timers < 0 || mfgpt_reset_timers > 2) { + dev_err(&pdev->dev, "Bad mfgpt_reset_timers value: %i\n", + mfgpt_reset_timers); + goto done; + } + + /* There are two ways to get the MFGPT base address; one is by + * fetching it from MSR_LBAR_MFGPT, the other is by reading the + * PCI BAR info. The latter method is easier (especially across + * different architectures), so we'll stick with that for now. If + * it turns out to be unreliable in the face of crappy BIOSes, we + * can always go back to using MSRs.. */ + + res = platform_get_resource(pdev, IORESOURCE_IO, 0); + if (!res) { + dev_err(&pdev->dev, "can't fetch device resource info\n"); + goto done; + } + + if (!request_region(res->start, resource_size(res), pdev->name)) { + dev_err(&pdev->dev, "can't request region\n"); + goto done; + } + + /* set up the driver-specific struct */ + cs5535_mfgpt_chip.base = res->start; + cs5535_mfgpt_chip.pdev = pdev; + spin_lock_init(&cs5535_mfgpt_chip.lock); + + dev_info(&pdev->dev, "reserved resource region %pR\n", res); + + /* detect the available timers */ + t = scan_timers(&cs5535_mfgpt_chip); + dev_info(&pdev->dev, "%d MFGPT timers available\n", t); + cs5535_mfgpt_chip.initialized = 1; + return 0; + +done: + return err; +} + +static struct platform_driver cs5535_mfgpt_driver = { + .driver = { + .name = DRV_NAME, + }, + .probe = cs5535_mfgpt_probe, +}; + + +static int __init cs5535_mfgpt_init(void) +{ + return platform_driver_register(&cs5535_mfgpt_driver); +} + +module_init(cs5535_mfgpt_init); + +MODULE_AUTHOR("Andres Salomon "); +MODULE_DESCRIPTION("CS5535/CS5536 MFGPT timer driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig new file mode 100644 index 000000000..3ce933707 --- /dev/null +++ b/drivers/misc/cxl/Kconfig @@ -0,0 +1,35 @@ +# +# IBM Coherent Accelerator (CXL) compatible devices +# + +config CXL_BASE + bool + default n + select PPC_COPRO_BASE + +config CXL_AFU_DRIVER_OPS + bool + default n + +config CXL_LIB + bool + default n + +config CXL + tristate "Support for IBM Coherent Accelerators (CXL)" + depends on PPC_POWERNV && PCI_MSI && EEH + select CXL_BASE + select CXL_AFU_DRIVER_OPS + select CXL_LIB + default m + help + Select this option to enable driver support for IBM Coherent + Accelerators (CXL). CXL is otherwise known as Coherent Accelerator + Processor Interface (CAPI). CAPI allows accelerators in FPGAs to be + coherently attached to a CPU via an MMU. This driver enables + userspace programs to access these accelerators via /dev/cxl/afuM.N + devices. + + CAPI adapters are found in POWER8 based systems. + + If unsure, say N. diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile new file mode 100644 index 000000000..5eea61b95 --- /dev/null +++ b/drivers/misc/cxl/Makefile @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 +ccflags-y := $(call cc-disable-warning, unused-const-variable) +ccflags-$(CONFIG_PPC_WERROR) += -Werror + +cxl-y += main.o file.o irq.o fault.o native.o +cxl-y += context.o sysfs.o pci.o trace.o +cxl-y += vphb.o api.o cxllib.o +cxl-$(CONFIG_PPC_PSERIES) += flash.o guest.o of.o hcalls.o +cxl-$(CONFIG_DEBUG_FS) += debugfs.o +obj-$(CONFIG_CXL) += cxl.o +obj-$(CONFIG_CXL_BASE) += base.o + +# For tracepoints to include our trace.h from tracepoint infrastructure: +CFLAGS_trace.o := -I$(src) diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c new file mode 100644 index 000000000..750470ef2 --- /dev/null +++ b/drivers/misc/cxl/api.c @@ -0,0 +1,540 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cxl.h" + +/* + * Since we want to track memory mappings to be able to force-unmap + * when the AFU is no longer reachable, we need an inode. For devices + * opened through the cxl user API, this is not a problem, but a + * userland process can also get a cxl fd through the cxl_get_fd() + * API, which is used by the cxlflash driver. + * + * Therefore we implement our own simple pseudo-filesystem and inode + * allocator. We don't use the anonymous inode, as we need the + * meta-data associated with it (address_space) and it is shared by + * other drivers/processes, so it could lead to cxl unmapping VMAs + * from random processes. + */ + +#define CXL_PSEUDO_FS_MAGIC 0x1697697f + +static int cxl_fs_cnt; +static struct vfsmount *cxl_vfs_mount; + +static const struct dentry_operations cxl_fs_dops = { + .d_dname = simple_dname, +}; + +static struct dentry *cxl_fs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + return mount_pseudo(fs_type, "cxl:", NULL, &cxl_fs_dops, + CXL_PSEUDO_FS_MAGIC); +} + +static struct file_system_type cxl_fs_type = { + .name = "cxl", + .owner = THIS_MODULE, + .mount = cxl_fs_mount, + .kill_sb = kill_anon_super, +}; + + +void cxl_release_mapping(struct cxl_context *ctx) +{ + if (ctx->kernelapi && ctx->mapping) + simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt); +} + +static struct file *cxl_getfile(const char *name, + const struct file_operations *fops, + void *priv, int flags) +{ + struct file *file; + struct inode *inode; + int rc; + + /* strongly inspired by anon_inode_getfile() */ + + if (fops->owner && !try_module_get(fops->owner)) + return ERR_PTR(-ENOENT); + + rc = simple_pin_fs(&cxl_fs_type, &cxl_vfs_mount, &cxl_fs_cnt); + if (rc < 0) { + pr_err("Cannot mount cxl pseudo filesystem: %d\n", rc); + file = ERR_PTR(rc); + goto err_module; + } + + inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb); + if (IS_ERR(inode)) { + file = ERR_CAST(inode); + goto err_fs; + } + + file = alloc_file_pseudo(inode, cxl_vfs_mount, name, + flags & (O_ACCMODE | O_NONBLOCK), fops); + if (IS_ERR(file)) + goto err_inode; + + file->private_data = priv; + + return file; + +err_inode: + iput(inode); +err_fs: + simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt); +err_module: + module_put(fops->owner); + return file; +} + +struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) +{ + struct cxl_afu *afu; + struct cxl_context *ctx; + int rc; + + afu = cxl_pci_to_afu(dev); + if (IS_ERR(afu)) + return ERR_CAST(afu); + + ctx = cxl_context_alloc(); + if (!ctx) + return ERR_PTR(-ENOMEM); + + ctx->kernelapi = true; + + /* Make it a slave context. We can promote it later? */ + rc = cxl_context_init(ctx, afu, false); + if (rc) + goto err_ctx; + + return ctx; + +err_ctx: + kfree(ctx); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_GPL(cxl_dev_context_init); + +struct cxl_context *cxl_get_context(struct pci_dev *dev) +{ + return dev->dev.archdata.cxl_ctx; +} +EXPORT_SYMBOL_GPL(cxl_get_context); + +int cxl_release_context(struct cxl_context *ctx) +{ + if (ctx->status >= STARTED) + return -EBUSY; + + cxl_context_free(ctx); + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_release_context); + +static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num) +{ + __u16 range; + int r; + + for (r = 0; r < CXL_IRQ_RANGES; r++) { + range = ctx->irqs.range[r]; + if (num < range) { + return ctx->irqs.offset[r] + num; + } + num -= range; + } + return 0; +} + + +int cxl_set_priv(struct cxl_context *ctx, void *priv) +{ + if (!ctx) + return -EINVAL; + + ctx->priv = priv; + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_set_priv); + +void *cxl_get_priv(struct cxl_context *ctx) +{ + if (!ctx) + return ERR_PTR(-EINVAL); + + return ctx->priv; +} +EXPORT_SYMBOL_GPL(cxl_get_priv); + +int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) +{ + int res; + irq_hw_number_t hwirq; + + if (num == 0) + num = ctx->afu->pp_irqs; + res = afu_allocate_irqs(ctx, num); + if (res) + return res; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) { + /* In a guest, the PSL interrupt is not multiplexed. It was + * allocated above, and we need to set its handler + */ + hwirq = cxl_find_afu_irq(ctx, 0); + if (hwirq) + cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl"); + } + + if (ctx->status == STARTED) { + if (cxl_ops->update_ivtes) + cxl_ops->update_ivtes(ctx); + else WARN(1, "BUG: cxl_allocate_afu_irqs must be called prior to starting the context on this platform\n"); + } + + return res; +} +EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs); + +void cxl_free_afu_irqs(struct cxl_context *ctx) +{ + irq_hw_number_t hwirq; + unsigned int virq; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) { + hwirq = cxl_find_afu_irq(ctx, 0); + if (hwirq) { + virq = irq_find_mapping(NULL, hwirq); + if (virq) + cxl_unmap_irq(virq, ctx); + } + } + afu_irq_name_free(ctx); + cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter); +} +EXPORT_SYMBOL_GPL(cxl_free_afu_irqs); + +int cxl_map_afu_irq(struct cxl_context *ctx, int num, + irq_handler_t handler, void *cookie, char *name) +{ + irq_hw_number_t hwirq; + + /* + * Find interrupt we are to register. + */ + hwirq = cxl_find_afu_irq(ctx, num); + if (!hwirq) + return -ENOENT; + + return cxl_map_irq(ctx->afu->adapter, hwirq, handler, cookie, name); +} +EXPORT_SYMBOL_GPL(cxl_map_afu_irq); + +void cxl_unmap_afu_irq(struct cxl_context *ctx, int num, void *cookie) +{ + irq_hw_number_t hwirq; + unsigned int virq; + + hwirq = cxl_find_afu_irq(ctx, num); + if (!hwirq) + return; + + virq = irq_find_mapping(NULL, hwirq); + if (virq) + cxl_unmap_irq(virq, cookie); +} +EXPORT_SYMBOL_GPL(cxl_unmap_afu_irq); + +/* + * Start a context + * Code here similar to afu_ioctl_start_work(). + */ +int cxl_start_context(struct cxl_context *ctx, u64 wed, + struct task_struct *task) +{ + int rc = 0; + bool kernel = true; + + pr_devel("%s: pe: %i\n", __func__, ctx->pe); + + mutex_lock(&ctx->status_mutex); + if (ctx->status == STARTED) + goto out; /* already started */ + + /* + * Increment the mapped context count for adapter. This also checks + * if adapter_context_lock is taken. + */ + rc = cxl_adapter_context_get(ctx->afu->adapter); + if (rc) + goto out; + + if (task) { + ctx->pid = get_task_pid(task, PIDTYPE_PID); + kernel = false; + + /* acquire a reference to the task's mm */ + ctx->mm = get_task_mm(current); + + /* ensure this mm_struct can't be freed */ + cxl_context_mm_count_get(ctx); + + if (ctx->mm) { + /* decrement the use count from above */ + mmput(ctx->mm); + /* make TLBIs for this context global */ + mm_context_add_copro(ctx->mm); + } + } + + /* + * Increment driver use count. Enables global TLBIs for hash + * and callbacks to handle the segment table + */ + cxl_ctx_get(); + + /* See the comment in afu_ioctl_start_work() */ + smp_mb(); + + if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) { + put_pid(ctx->pid); + ctx->pid = NULL; + cxl_adapter_context_put(ctx->afu->adapter); + cxl_ctx_put(); + if (task) { + cxl_context_mm_count_put(ctx); + if (ctx->mm) + mm_context_remove_copro(ctx->mm); + } + goto out; + } + + ctx->status = STARTED; +out: + mutex_unlock(&ctx->status_mutex); + return rc; +} +EXPORT_SYMBOL_GPL(cxl_start_context); + +int cxl_process_element(struct cxl_context *ctx) +{ + return ctx->external_pe; +} +EXPORT_SYMBOL_GPL(cxl_process_element); + +/* Stop a context. Returns 0 on success, otherwise -Errno */ +int cxl_stop_context(struct cxl_context *ctx) +{ + return __detach_context(ctx); +} +EXPORT_SYMBOL_GPL(cxl_stop_context); + +void cxl_set_master(struct cxl_context *ctx) +{ + ctx->master = true; +} +EXPORT_SYMBOL_GPL(cxl_set_master); + +/* wrappers around afu_* file ops which are EXPORTED */ +int cxl_fd_open(struct inode *inode, struct file *file) +{ + return afu_open(inode, file); +} +EXPORT_SYMBOL_GPL(cxl_fd_open); +int cxl_fd_release(struct inode *inode, struct file *file) +{ + return afu_release(inode, file); +} +EXPORT_SYMBOL_GPL(cxl_fd_release); +long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return afu_ioctl(file, cmd, arg); +} +EXPORT_SYMBOL_GPL(cxl_fd_ioctl); +int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm) +{ + return afu_mmap(file, vm); +} +EXPORT_SYMBOL_GPL(cxl_fd_mmap); +__poll_t cxl_fd_poll(struct file *file, struct poll_table_struct *poll) +{ + return afu_poll(file, poll); +} +EXPORT_SYMBOL_GPL(cxl_fd_poll); +ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count, + loff_t *off) +{ + return afu_read(file, buf, count, off); +} +EXPORT_SYMBOL_GPL(cxl_fd_read); + +#define PATCH_FOPS(NAME) if (!fops->NAME) fops->NAME = afu_fops.NAME + +/* Get a struct file and fd for a context and attach the ops */ +struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops, + int *fd) +{ + struct file *file; + int rc, flags, fdtmp; + char *name = NULL; + + /* only allow one per context */ + if (ctx->mapping) + return ERR_PTR(-EEXIST); + + flags = O_RDWR | O_CLOEXEC; + + /* This code is similar to anon_inode_getfd() */ + rc = get_unused_fd_flags(flags); + if (rc < 0) + return ERR_PTR(rc); + fdtmp = rc; + + /* + * Patch the file ops. Needs to be careful that this is rentrant safe. + */ + if (fops) { + PATCH_FOPS(open); + PATCH_FOPS(poll); + PATCH_FOPS(read); + PATCH_FOPS(release); + PATCH_FOPS(unlocked_ioctl); + PATCH_FOPS(compat_ioctl); + PATCH_FOPS(mmap); + } else /* use default ops */ + fops = (struct file_operations *)&afu_fops; + + name = kasprintf(GFP_KERNEL, "cxl:%d", ctx->pe); + file = cxl_getfile(name, fops, ctx, flags); + kfree(name); + if (IS_ERR(file)) + goto err_fd; + + cxl_context_set_mapping(ctx, file->f_mapping); + *fd = fdtmp; + return file; + +err_fd: + put_unused_fd(fdtmp); + return NULL; +} +EXPORT_SYMBOL_GPL(cxl_get_fd); + +struct cxl_context *cxl_fops_get_context(struct file *file) +{ + return file->private_data; +} +EXPORT_SYMBOL_GPL(cxl_fops_get_context); + +void cxl_set_driver_ops(struct cxl_context *ctx, + struct cxl_afu_driver_ops *ops) +{ + WARN_ON(!ops->fetch_event || !ops->event_delivered); + atomic_set(&ctx->afu_driver_events, 0); + ctx->afu_driver_ops = ops; +} +EXPORT_SYMBOL_GPL(cxl_set_driver_ops); + +void cxl_context_events_pending(struct cxl_context *ctx, + unsigned int new_events) +{ + atomic_add(new_events, &ctx->afu_driver_events); + wake_up_all(&ctx->wq); +} +EXPORT_SYMBOL_GPL(cxl_context_events_pending); + +int cxl_start_work(struct cxl_context *ctx, + struct cxl_ioctl_start_work *work) +{ + int rc; + + /* code taken from afu_ioctl_start_work */ + if (!(work->flags & CXL_START_WORK_NUM_IRQS)) + work->num_interrupts = ctx->afu->pp_irqs; + else if ((work->num_interrupts < ctx->afu->pp_irqs) || + (work->num_interrupts > ctx->afu->irqs_max)) { + return -EINVAL; + } + + rc = afu_register_irqs(ctx, work->num_interrupts); + if (rc) + return rc; + + rc = cxl_start_context(ctx, work->work_element_descriptor, current); + if (rc < 0) { + afu_release_irqs(ctx, ctx); + return rc; + } + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_start_work); + +void __iomem *cxl_psa_map(struct cxl_context *ctx) +{ + if (ctx->status != STARTED) + return NULL; + + pr_devel("%s: psn_phys%llx size:%llx\n", + __func__, ctx->psn_phys, ctx->psn_size); + return ioremap(ctx->psn_phys, ctx->psn_size); +} +EXPORT_SYMBOL_GPL(cxl_psa_map); + +void cxl_psa_unmap(void __iomem *addr) +{ + iounmap(addr); +} +EXPORT_SYMBOL_GPL(cxl_psa_unmap); + +int cxl_afu_reset(struct cxl_context *ctx) +{ + struct cxl_afu *afu = ctx->afu; + int rc; + + rc = cxl_ops->afu_reset(afu); + if (rc) + return rc; + + return cxl_ops->afu_check_and_enable(afu); +} +EXPORT_SYMBOL_GPL(cxl_afu_reset); + +void cxl_perst_reloads_same_image(struct cxl_afu *afu, + bool perst_reloads_same_image) +{ + afu->adapter->perst_same_image = perst_reloads_same_image; +} +EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image); + +ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count) +{ + struct cxl_afu *afu = cxl_pci_to_afu(dev); + if (IS_ERR(afu)) + return -ENODEV; + + return cxl_ops->read_adapter_vpd(afu->adapter, buf, count); +} +EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd); diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c new file mode 100644 index 000000000..7557835cd --- /dev/null +++ b/drivers/misc/cxl/base.c @@ -0,0 +1,129 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include "cxl.h" + +/* protected by rcu */ +static struct cxl_calls *cxl_calls; + +atomic_t cxl_use_count = ATOMIC_INIT(0); +EXPORT_SYMBOL(cxl_use_count); + +#ifdef CONFIG_CXL_MODULE + +static inline struct cxl_calls *cxl_calls_get(void) +{ + struct cxl_calls *calls = NULL; + + rcu_read_lock(); + calls = rcu_dereference(cxl_calls); + if (calls && !try_module_get(calls->owner)) + calls = NULL; + rcu_read_unlock(); + + return calls; +} + +static inline void cxl_calls_put(struct cxl_calls *calls) +{ + BUG_ON(calls != cxl_calls); + + /* we don't need to rcu this, as we hold a reference to the module */ + module_put(cxl_calls->owner); +} + +#else /* !defined CONFIG_CXL_MODULE */ + +static inline struct cxl_calls *cxl_calls_get(void) +{ + return cxl_calls; +} + +static inline void cxl_calls_put(struct cxl_calls *calls) { } + +#endif /* CONFIG_CXL_MODULE */ + +/* AFU refcount management */ +struct cxl_afu *cxl_afu_get(struct cxl_afu *afu) +{ + return (get_device(&afu->dev) == NULL) ? NULL : afu; +} +EXPORT_SYMBOL_GPL(cxl_afu_get); + +void cxl_afu_put(struct cxl_afu *afu) +{ + put_device(&afu->dev); +} +EXPORT_SYMBOL_GPL(cxl_afu_put); + +void cxl_slbia(struct mm_struct *mm) +{ + struct cxl_calls *calls; + + calls = cxl_calls_get(); + if (!calls) + return; + + if (cxl_ctx_in_use()) + calls->cxl_slbia(mm); + + cxl_calls_put(calls); +} + +int register_cxl_calls(struct cxl_calls *calls) +{ + if (cxl_calls) + return -EBUSY; + + rcu_assign_pointer(cxl_calls, calls); + return 0; +} +EXPORT_SYMBOL_GPL(register_cxl_calls); + +void unregister_cxl_calls(struct cxl_calls *calls) +{ + BUG_ON(cxl_calls->owner != calls->owner); + RCU_INIT_POINTER(cxl_calls, NULL); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(unregister_cxl_calls); + +int cxl_update_properties(struct device_node *dn, + struct property *new_prop) +{ + return of_update_property(dn, new_prop); +} +EXPORT_SYMBOL_GPL(cxl_update_properties); + +static int __init cxl_base_init(void) +{ + struct device_node *np; + struct platform_device *dev; + int count = 0; + + /* + * Scan for compatible devices in guest only + */ + if (cpu_has_feature(CPU_FTR_HVMODE)) + return 0; + + for_each_compatible_node(np, NULL, "ibm,coherent-platform-facility") { + dev = of_platform_device_create(np, NULL, NULL); + if (dev) + count++; + } + pr_devel("Found %d cxl device(s)\n", count); + return 0; +} +device_initcall(cxl_base_init); diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c new file mode 100644 index 000000000..5fe529b43 --- /dev/null +++ b/drivers/misc/cxl/context.c @@ -0,0 +1,366 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cxl.h" + +/* + * Allocates space for a CXL context. + */ +struct cxl_context *cxl_context_alloc(void) +{ + return kzalloc(sizeof(struct cxl_context), GFP_KERNEL); +} + +/* + * Initialises a CXL context. + */ +int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master) +{ + int i; + + ctx->afu = afu; + ctx->master = master; + ctx->pid = NULL; /* Set in start work ioctl */ + mutex_init(&ctx->mapping_lock); + ctx->mapping = NULL; + ctx->tidr = 0; + ctx->assign_tidr = false; + + if (cxl_is_power8()) { + spin_lock_init(&ctx->sste_lock); + + /* + * Allocate the segment table before we put it in the IDR so that we + * can always access it when dereferenced from IDR. For the same + * reason, the segment table is only destroyed after the context is + * removed from the IDR. Access to this in the IOCTL is protected by + * Linux filesytem symantics (can't IOCTL until open is complete). + */ + i = cxl_alloc_sst(ctx); + if (i) + return i; + } + + INIT_WORK(&ctx->fault_work, cxl_handle_fault); + + init_waitqueue_head(&ctx->wq); + spin_lock_init(&ctx->lock); + + ctx->irq_bitmap = NULL; + ctx->pending_irq = false; + ctx->pending_fault = false; + ctx->pending_afu_err = false; + + INIT_LIST_HEAD(&ctx->irq_names); + + /* + * When we have to destroy all contexts in cxl_context_detach_all() we + * end up with afu_release_irqs() called from inside a + * idr_for_each_entry(). Hence we need to make sure that anything + * dereferenced from this IDR is ok before we allocate the IDR here. + * This clears out the IRQ ranges to ensure this. + */ + for (i = 0; i < CXL_IRQ_RANGES; i++) + ctx->irqs.range[i] = 0; + + mutex_init(&ctx->status_mutex); + + ctx->status = OPENED; + + /* + * Allocating IDR! We better make sure everything's setup that + * dereferences from it. + */ + mutex_lock(&afu->contexts_lock); + idr_preload(GFP_KERNEL); + i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0, + ctx->afu->num_procs, GFP_NOWAIT); + idr_preload_end(); + mutex_unlock(&afu->contexts_lock); + if (i < 0) + return i; + + ctx->pe = i; + if (cpu_has_feature(CPU_FTR_HVMODE)) { + ctx->elem = &ctx->afu->native->spa[i]; + ctx->external_pe = ctx->pe; + } else { + ctx->external_pe = -1; /* assigned when attaching */ + } + ctx->pe_inserted = false; + + /* + * take a ref on the afu so that it stays alive at-least till + * this context is reclaimed inside reclaim_ctx. + */ + cxl_afu_get(afu); + return 0; +} + +void cxl_context_set_mapping(struct cxl_context *ctx, + struct address_space *mapping) +{ + mutex_lock(&ctx->mapping_lock); + ctx->mapping = mapping; + mutex_unlock(&ctx->mapping_lock); +} + +static vm_fault_t cxl_mmap_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct cxl_context *ctx = vma->vm_file->private_data; + u64 area, offset; + vm_fault_t ret; + + offset = vmf->pgoff << PAGE_SHIFT; + + pr_devel("%s: pe: %i address: 0x%lx offset: 0x%llx\n", + __func__, ctx->pe, vmf->address, offset); + + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { + area = ctx->afu->psn_phys; + if (offset >= ctx->afu->adapter->ps_size) + return VM_FAULT_SIGBUS; + } else { + area = ctx->psn_phys; + if (offset >= ctx->psn_size) + return VM_FAULT_SIGBUS; + } + + mutex_lock(&ctx->status_mutex); + + if (ctx->status != STARTED) { + mutex_unlock(&ctx->status_mutex); + pr_devel("%s: Context not started, failing problem state access\n", __func__); + if (ctx->mmio_err_ff) { + if (!ctx->ff_page) { + ctx->ff_page = alloc_page(GFP_USER); + if (!ctx->ff_page) + return VM_FAULT_OOM; + memset(page_address(ctx->ff_page), 0xff, PAGE_SIZE); + } + get_page(ctx->ff_page); + vmf->page = ctx->ff_page; + vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); + return 0; + } + return VM_FAULT_SIGBUS; + } + + ret = vmf_insert_pfn(vma, vmf->address, (area + offset) >> PAGE_SHIFT); + + mutex_unlock(&ctx->status_mutex); + + return ret; +} + +static const struct vm_operations_struct cxl_mmap_vmops = { + .fault = cxl_mmap_fault, +}; + +/* + * Map a per-context mmio space into the given vma. + */ +int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma) +{ + u64 start = vma->vm_pgoff << PAGE_SHIFT; + u64 len = vma->vm_end - vma->vm_start; + + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { + if (start + len > ctx->afu->adapter->ps_size) + return -EINVAL; + + if (cxl_is_power9()) { + /* + * Make sure there is a valid problem state + * area space for this AFU. + */ + if (ctx->master && !ctx->afu->psa) { + pr_devel("AFU doesn't support mmio space\n"); + return -EINVAL; + } + + /* Can't mmap until the AFU is enabled */ + if (!ctx->afu->enabled) + return -EBUSY; + } + } else { + if (start + len > ctx->psn_size) + return -EINVAL; + + /* Make sure there is a valid per process space for this AFU */ + if ((ctx->master && !ctx->afu->psa) || (!ctx->afu->pp_psa)) { + pr_devel("AFU doesn't support mmio space\n"); + return -EINVAL; + } + + /* Can't mmap until the AFU is enabled */ + if (!ctx->afu->enabled) + return -EBUSY; + } + + pr_devel("%s: mmio physical: %llx pe: %i master:%i\n", __func__, + ctx->psn_phys, ctx->pe , ctx->master); + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_ops = &cxl_mmap_vmops; + return 0; +} + +/* + * Detach a context from the hardware. This disables interrupts and doesn't + * return until all outstanding interrupts for this context have completed. The + * hardware should no longer access *ctx after this has returned. + */ +int __detach_context(struct cxl_context *ctx) +{ + enum cxl_context_status status; + + mutex_lock(&ctx->status_mutex); + status = ctx->status; + ctx->status = CLOSED; + mutex_unlock(&ctx->status_mutex); + if (status != STARTED) + return -EBUSY; + + /* Only warn if we detached while the link was OK. + * If detach fails when hw is down, we don't care. + */ + WARN_ON(cxl_ops->detach_process(ctx) && + cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)); + flush_work(&ctx->fault_work); /* Only needed for dedicated process */ + + /* + * Wait until no further interrupts are presented by the PSL + * for this context. + */ + if (cxl_ops->irq_wait) + cxl_ops->irq_wait(ctx); + + /* release the reference to the group leader and mm handling pid */ + put_pid(ctx->pid); + + cxl_ctx_put(); + + /* Decrease the attached context count on the adapter */ + cxl_adapter_context_put(ctx->afu->adapter); + + /* Decrease the mm count on the context */ + cxl_context_mm_count_put(ctx); + if (ctx->mm) + mm_context_remove_copro(ctx->mm); + ctx->mm = NULL; + + return 0; +} + +/* + * Detach the given context from the AFU. This doesn't actually + * free the context but it should stop the context running in hardware + * (ie. prevent this context from generating any further interrupts + * so that it can be freed). + */ +void cxl_context_detach(struct cxl_context *ctx) +{ + int rc; + + rc = __detach_context(ctx); + if (rc) + return; + + afu_release_irqs(ctx, ctx); + wake_up_all(&ctx->wq); +} + +/* + * Detach all contexts on the given AFU. + */ +void cxl_context_detach_all(struct cxl_afu *afu) +{ + struct cxl_context *ctx; + int tmp; + + mutex_lock(&afu->contexts_lock); + idr_for_each_entry(&afu->contexts_idr, ctx, tmp) { + /* + * Anything done in here needs to be setup before the IDR is + * created and torn down after the IDR removed + */ + cxl_context_detach(ctx); + + /* + * We are force detaching - remove any active PSA mappings so + * userspace cannot interfere with the card if it comes back. + * Easiest way to exercise this is to unbind and rebind the + * driver via sysfs while it is in use. + */ + mutex_lock(&ctx->mapping_lock); + if (ctx->mapping) + unmap_mapping_range(ctx->mapping, 0, 0, 1); + mutex_unlock(&ctx->mapping_lock); + } + mutex_unlock(&afu->contexts_lock); +} + +static void reclaim_ctx(struct rcu_head *rcu) +{ + struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu); + + if (cxl_is_power8()) + free_page((u64)ctx->sstp); + if (ctx->ff_page) + __free_page(ctx->ff_page); + ctx->sstp = NULL; + + kfree(ctx->irq_bitmap); + + /* Drop ref to the afu device taken during cxl_context_init */ + cxl_afu_put(ctx->afu); + + kfree(ctx); +} + +void cxl_context_free(struct cxl_context *ctx) +{ + if (ctx->kernelapi && ctx->mapping) + cxl_release_mapping(ctx); + mutex_lock(&ctx->afu->contexts_lock); + idr_remove(&ctx->afu->contexts_idr, ctx->pe); + mutex_unlock(&ctx->afu->contexts_lock); + call_rcu(&ctx->rcu, reclaim_ctx); +} + +void cxl_context_mm_count_get(struct cxl_context *ctx) +{ + if (ctx->mm) + atomic_inc(&ctx->mm->mm_count); +} + +void cxl_context_mm_count_put(struct cxl_context *ctx) +{ + if (ctx->mm) + mmdrop(ctx->mm); +} diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h new file mode 100644 index 000000000..d1d927ccb --- /dev/null +++ b/drivers/misc/cxl/cxl.h @@ -0,0 +1,1139 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _CXL_H_ +#define _CXL_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +extern uint cxl_verbose; + +#define CXL_TIMEOUT 5 + +/* + * Bump version each time a user API change is made, whether it is + * backwards compatible ot not. + */ +#define CXL_API_VERSION 3 +#define CXL_API_VERSION_COMPATIBLE 1 + +/* + * Opaque types to avoid accidentally passing registers for the wrong MMIO + * + * At the end of the day, I'm not married to using typedef here, but it might + * (and has!) help avoid bugs like mixing up CXL_PSL_CtxTime and + * CXL_PSL_CtxTime_An, or calling cxl_p1n_write instead of cxl_p1_write. + * + * I'm quite happy if these are changed back to #defines before upstreaming, it + * should be little more than a regexp search+replace operation in this file. + */ +typedef struct { + const int x; +} cxl_p1_reg_t; +typedef struct { + const int x; +} cxl_p1n_reg_t; +typedef struct { + const int x; +} cxl_p2n_reg_t; +#define cxl_reg_off(reg) \ + (reg.x) + +/* Memory maps. Ref CXL Appendix A */ + +/* PSL Privilege 1 Memory Map */ +/* Configuration and Control area - CAIA 1&2 */ +static const cxl_p1_reg_t CXL_PSL_CtxTime = {0x0000}; +static const cxl_p1_reg_t CXL_PSL_ErrIVTE = {0x0008}; +static const cxl_p1_reg_t CXL_PSL_KEY1 = {0x0010}; +static const cxl_p1_reg_t CXL_PSL_KEY2 = {0x0018}; +static const cxl_p1_reg_t CXL_PSL_Control = {0x0020}; +/* Downloading */ +static const cxl_p1_reg_t CXL_PSL_DLCNTL = {0x0060}; +static const cxl_p1_reg_t CXL_PSL_DLADDR = {0x0068}; + +/* PSL Lookaside Buffer Management Area - CAIA 1 */ +static const cxl_p1_reg_t CXL_PSL_LBISEL = {0x0080}; +static const cxl_p1_reg_t CXL_PSL_SLBIE = {0x0088}; +static const cxl_p1_reg_t CXL_PSL_SLBIA = {0x0090}; +static const cxl_p1_reg_t CXL_PSL_TLBIE = {0x00A0}; +static const cxl_p1_reg_t CXL_PSL_TLBIA = {0x00A8}; +static const cxl_p1_reg_t CXL_PSL_AFUSEL = {0x00B0}; + +/* 0x00C0:7EFF Implementation dependent area */ +/* PSL registers - CAIA 1 */ +static const cxl_p1_reg_t CXL_PSL_FIR1 = {0x0100}; +static const cxl_p1_reg_t CXL_PSL_FIR2 = {0x0108}; +static const cxl_p1_reg_t CXL_PSL_Timebase = {0x0110}; +static const cxl_p1_reg_t CXL_PSL_VERSION = {0x0118}; +static const cxl_p1_reg_t CXL_PSL_RESLCKTO = {0x0128}; +static const cxl_p1_reg_t CXL_PSL_TB_CTLSTAT = {0x0140}; +static const cxl_p1_reg_t CXL_PSL_FIR_CNTL = {0x0148}; +static const cxl_p1_reg_t CXL_PSL_DSNDCTL = {0x0150}; +static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158}; +static const cxl_p1_reg_t CXL_PSL_TRACE = {0x0170}; +/* PSL registers - CAIA 2 */ +static const cxl_p1_reg_t CXL_PSL9_CONTROL = {0x0020}; +static const cxl_p1_reg_t CXL_XSL9_INV = {0x0110}; +static const cxl_p1_reg_t CXL_XSL9_DBG = {0x0130}; +static const cxl_p1_reg_t CXL_XSL9_DEF = {0x0140}; +static const cxl_p1_reg_t CXL_XSL9_DSNCTL = {0x0168}; +static const cxl_p1_reg_t CXL_PSL9_FIR1 = {0x0300}; +static const cxl_p1_reg_t CXL_PSL9_FIR_MASK = {0x0308}; +static const cxl_p1_reg_t CXL_PSL9_Timebase = {0x0310}; +static const cxl_p1_reg_t CXL_PSL9_DEBUG = {0x0320}; +static const cxl_p1_reg_t CXL_PSL9_FIR_CNTL = {0x0348}; +static const cxl_p1_reg_t CXL_PSL9_DSNDCTL = {0x0350}; +static const cxl_p1_reg_t CXL_PSL9_TB_CTLSTAT = {0x0340}; +static const cxl_p1_reg_t CXL_PSL9_TRACECFG = {0x0368}; +static const cxl_p1_reg_t CXL_PSL9_APCDEDALLOC = {0x0378}; +static const cxl_p1_reg_t CXL_PSL9_APCDEDTYPE = {0x0380}; +static const cxl_p1_reg_t CXL_PSL9_TNR_ADDR = {0x0388}; +static const cxl_p1_reg_t CXL_PSL9_CTCCFG = {0x0390}; +static const cxl_p1_reg_t CXL_PSL9_GP_CT = {0x0398}; +static const cxl_p1_reg_t CXL_XSL9_IERAT = {0x0588}; +static const cxl_p1_reg_t CXL_XSL9_ILPP = {0x0590}; + +/* 0x7F00:7FFF Reserved PCIe MSI-X Pending Bit Array area */ +/* 0x8000:FFFF Reserved PCIe MSI-X Table Area */ + +/* PSL Slice Privilege 1 Memory Map */ +/* Configuration Area - CAIA 1&2 */ +static const cxl_p1n_reg_t CXL_PSL_SR_An = {0x00}; +static const cxl_p1n_reg_t CXL_PSL_LPID_An = {0x08}; +static const cxl_p1n_reg_t CXL_PSL_AMBAR_An = {0x10}; +static const cxl_p1n_reg_t CXL_PSL_SPOffset_An = {0x18}; +static const cxl_p1n_reg_t CXL_PSL_ID_An = {0x20}; +static const cxl_p1n_reg_t CXL_PSL_SERR_An = {0x28}; +/* Memory Management and Lookaside Buffer Management - CAIA 1*/ +static const cxl_p1n_reg_t CXL_PSL_SDR_An = {0x30}; +/* Memory Management and Lookaside Buffer Management - CAIA 1&2 */ +static const cxl_p1n_reg_t CXL_PSL_AMOR_An = {0x38}; +/* Pointer Area - CAIA 1&2 */ +static const cxl_p1n_reg_t CXL_HAURP_An = {0x80}; +static const cxl_p1n_reg_t CXL_PSL_SPAP_An = {0x88}; +static const cxl_p1n_reg_t CXL_PSL_LLCMD_An = {0x90}; +/* Control Area - CAIA 1&2 */ +static const cxl_p1n_reg_t CXL_PSL_SCNTL_An = {0xA0}; +static const cxl_p1n_reg_t CXL_PSL_CtxTime_An = {0xA8}; +static const cxl_p1n_reg_t CXL_PSL_IVTE_Offset_An = {0xB0}; +static const cxl_p1n_reg_t CXL_PSL_IVTE_Limit_An = {0xB8}; +/* 0xC0:FF Implementation Dependent Area - CAIA 1&2 */ +static const cxl_p1n_reg_t CXL_PSL_FIR_SLICE_An = {0xC0}; +static const cxl_p1n_reg_t CXL_AFU_DEBUG_An = {0xC8}; +/* 0xC0:FF Implementation Dependent Area - CAIA 1 */ +static const cxl_p1n_reg_t CXL_PSL_APCALLOC_A = {0xD0}; +static const cxl_p1n_reg_t CXL_PSL_COALLOC_A = {0xD8}; +static const cxl_p1n_reg_t CXL_PSL_RXCTL_A = {0xE0}; +static const cxl_p1n_reg_t CXL_PSL_SLICE_TRACE = {0xE8}; + +/* PSL Slice Privilege 2 Memory Map */ +/* Configuration and Control Area - CAIA 1&2 */ +static const cxl_p2n_reg_t CXL_PSL_PID_TID_An = {0x000}; +static const cxl_p2n_reg_t CXL_CSRP_An = {0x008}; +/* Configuration and Control Area - CAIA 1 */ +static const cxl_p2n_reg_t CXL_AURP0_An = {0x010}; +static const cxl_p2n_reg_t CXL_AURP1_An = {0x018}; +static const cxl_p2n_reg_t CXL_SSTP0_An = {0x020}; +static const cxl_p2n_reg_t CXL_SSTP1_An = {0x028}; +/* Configuration and Control Area - CAIA 1 */ +static const cxl_p2n_reg_t CXL_PSL_AMR_An = {0x030}; +/* Segment Lookaside Buffer Management - CAIA 1 */ +static const cxl_p2n_reg_t CXL_SLBIE_An = {0x040}; +static const cxl_p2n_reg_t CXL_SLBIA_An = {0x048}; +static const cxl_p2n_reg_t CXL_SLBI_Select_An = {0x050}; +/* Interrupt Registers - CAIA 1&2 */ +static const cxl_p2n_reg_t CXL_PSL_DSISR_An = {0x060}; +static const cxl_p2n_reg_t CXL_PSL_DAR_An = {0x068}; +static const cxl_p2n_reg_t CXL_PSL_DSR_An = {0x070}; +static const cxl_p2n_reg_t CXL_PSL_TFC_An = {0x078}; +static const cxl_p2n_reg_t CXL_PSL_PEHandle_An = {0x080}; +static const cxl_p2n_reg_t CXL_PSL_ErrStat_An = {0x088}; +/* AFU Registers - CAIA 1&2 */ +static const cxl_p2n_reg_t CXL_AFU_Cntl_An = {0x090}; +static const cxl_p2n_reg_t CXL_AFU_ERR_An = {0x098}; +/* Work Element Descriptor - CAIA 1&2 */ +static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0}; +/* 0x0C0:FFF Implementation Dependent Area */ + +#define CXL_PSL_SPAP_Addr 0x0ffffffffffff000ULL +#define CXL_PSL_SPAP_Size 0x0000000000000ff0ULL +#define CXL_PSL_SPAP_Size_Shift 4 +#define CXL_PSL_SPAP_V 0x0000000000000001ULL + +/****** CXL_PSL_Control ****************************************************/ +#define CXL_PSL_Control_tb (0x1ull << (63-63)) +#define CXL_PSL_Control_Fr (0x1ull << (63-31)) +#define CXL_PSL_Control_Fs_MASK (0x3ull << (63-29)) +#define CXL_PSL_Control_Fs_Complete (0x3ull << (63-29)) + +/****** CXL_PSL_DLCNTL *****************************************************/ +#define CXL_PSL_DLCNTL_D (0x1ull << (63-28)) +#define CXL_PSL_DLCNTL_C (0x1ull << (63-29)) +#define CXL_PSL_DLCNTL_E (0x1ull << (63-30)) +#define CXL_PSL_DLCNTL_S (0x1ull << (63-31)) +#define CXL_PSL_DLCNTL_CE (CXL_PSL_DLCNTL_C | CXL_PSL_DLCNTL_E) +#define CXL_PSL_DLCNTL_DCES (CXL_PSL_DLCNTL_D | CXL_PSL_DLCNTL_CE | CXL_PSL_DLCNTL_S) + +/****** CXL_PSL_SR_An ******************************************************/ +#define CXL_PSL_SR_An_SF MSR_SF /* 64bit */ +#define CXL_PSL_SR_An_TA (1ull << (63-1)) /* Tags active, GA1: 0 */ +#define CXL_PSL_SR_An_HV MSR_HV /* Hypervisor, GA1: 0 */ +#define CXL_PSL_SR_An_XLAT_hpt (0ull << (63-6))/* Hashed page table (HPT) mode */ +#define CXL_PSL_SR_An_XLAT_roh (2ull << (63-6))/* Radix on HPT mode */ +#define CXL_PSL_SR_An_XLAT_ror (3ull << (63-6))/* Radix on Radix mode */ +#define CXL_PSL_SR_An_BOT (1ull << (63-10)) /* Use the in-memory segment table */ +#define CXL_PSL_SR_An_PR MSR_PR /* Problem state, GA1: 1 */ +#define CXL_PSL_SR_An_ISL (1ull << (63-53)) /* Ignore Segment Large Page */ +#define CXL_PSL_SR_An_TC (1ull << (63-54)) /* Page Table secondary hash */ +#define CXL_PSL_SR_An_US (1ull << (63-56)) /* User state, GA1: X */ +#define CXL_PSL_SR_An_SC (1ull << (63-58)) /* Segment Table secondary hash */ +#define CXL_PSL_SR_An_R MSR_DR /* Relocate, GA1: 1 */ +#define CXL_PSL_SR_An_MP (1ull << (63-62)) /* Master Process */ +#define CXL_PSL_SR_An_LE (1ull << (63-63)) /* Little Endian */ + +/****** CXL_PSL_ID_An ****************************************************/ +#define CXL_PSL_ID_An_F (1ull << (63-31)) +#define CXL_PSL_ID_An_L (1ull << (63-30)) + +/****** CXL_PSL_SERR_An ****************************************************/ +#define CXL_PSL_SERR_An_afuto (1ull << (63-0)) +#define CXL_PSL_SERR_An_afudis (1ull << (63-1)) +#define CXL_PSL_SERR_An_afuov (1ull << (63-2)) +#define CXL_PSL_SERR_An_badsrc (1ull << (63-3)) +#define CXL_PSL_SERR_An_badctx (1ull << (63-4)) +#define CXL_PSL_SERR_An_llcmdis (1ull << (63-5)) +#define CXL_PSL_SERR_An_llcmdto (1ull << (63-6)) +#define CXL_PSL_SERR_An_afupar (1ull << (63-7)) +#define CXL_PSL_SERR_An_afudup (1ull << (63-8)) +#define CXL_PSL_SERR_An_IRQS ( \ + CXL_PSL_SERR_An_afuto | CXL_PSL_SERR_An_afudis | CXL_PSL_SERR_An_afuov | \ + CXL_PSL_SERR_An_badsrc | CXL_PSL_SERR_An_badctx | CXL_PSL_SERR_An_llcmdis | \ + CXL_PSL_SERR_An_llcmdto | CXL_PSL_SERR_An_afupar | CXL_PSL_SERR_An_afudup) +#define CXL_PSL_SERR_An_afuto_mask (1ull << (63-32)) +#define CXL_PSL_SERR_An_afudis_mask (1ull << (63-33)) +#define CXL_PSL_SERR_An_afuov_mask (1ull << (63-34)) +#define CXL_PSL_SERR_An_badsrc_mask (1ull << (63-35)) +#define CXL_PSL_SERR_An_badctx_mask (1ull << (63-36)) +#define CXL_PSL_SERR_An_llcmdis_mask (1ull << (63-37)) +#define CXL_PSL_SERR_An_llcmdto_mask (1ull << (63-38)) +#define CXL_PSL_SERR_An_afupar_mask (1ull << (63-39)) +#define CXL_PSL_SERR_An_afudup_mask (1ull << (63-40)) +#define CXL_PSL_SERR_An_IRQ_MASKS ( \ + CXL_PSL_SERR_An_afuto_mask | CXL_PSL_SERR_An_afudis_mask | CXL_PSL_SERR_An_afuov_mask | \ + CXL_PSL_SERR_An_badsrc_mask | CXL_PSL_SERR_An_badctx_mask | CXL_PSL_SERR_An_llcmdis_mask | \ + CXL_PSL_SERR_An_llcmdto_mask | CXL_PSL_SERR_An_afupar_mask | CXL_PSL_SERR_An_afudup_mask) + +#define CXL_PSL_SERR_An_AE (1ull << (63-30)) + +/****** CXL_PSL_SCNTL_An ****************************************************/ +#define CXL_PSL_SCNTL_An_CR (0x1ull << (63-15)) +/* Programming Modes: */ +#define CXL_PSL_SCNTL_An_PM_MASK (0xffffull << (63-31)) +#define CXL_PSL_SCNTL_An_PM_Shared (0x0000ull << (63-31)) +#define CXL_PSL_SCNTL_An_PM_OS (0x0001ull << (63-31)) +#define CXL_PSL_SCNTL_An_PM_Process (0x0002ull << (63-31)) +#define CXL_PSL_SCNTL_An_PM_AFU (0x0004ull << (63-31)) +#define CXL_PSL_SCNTL_An_PM_AFU_PBT (0x0104ull << (63-31)) +/* Purge Status (ro) */ +#define CXL_PSL_SCNTL_An_Ps_MASK (0x3ull << (63-39)) +#define CXL_PSL_SCNTL_An_Ps_Pending (0x1ull << (63-39)) +#define CXL_PSL_SCNTL_An_Ps_Complete (0x3ull << (63-39)) +/* Purge */ +#define CXL_PSL_SCNTL_An_Pc (0x1ull << (63-48)) +/* Suspend Status (ro) */ +#define CXL_PSL_SCNTL_An_Ss_MASK (0x3ull << (63-55)) +#define CXL_PSL_SCNTL_An_Ss_Pending (0x1ull << (63-55)) +#define CXL_PSL_SCNTL_An_Ss_Complete (0x3ull << (63-55)) +/* Suspend Control */ +#define CXL_PSL_SCNTL_An_Sc (0x1ull << (63-63)) + +/* AFU Slice Enable Status (ro) */ +#define CXL_AFU_Cntl_An_ES_MASK (0x7ull << (63-2)) +#define CXL_AFU_Cntl_An_ES_Disabled (0x0ull << (63-2)) +#define CXL_AFU_Cntl_An_ES_Enabled (0x4ull << (63-2)) +/* AFU Slice Enable */ +#define CXL_AFU_Cntl_An_E (0x1ull << (63-3)) +/* AFU Slice Reset status (ro) */ +#define CXL_AFU_Cntl_An_RS_MASK (0x3ull << (63-5)) +#define CXL_AFU_Cntl_An_RS_Pending (0x1ull << (63-5)) +#define CXL_AFU_Cntl_An_RS_Complete (0x2ull << (63-5)) +/* AFU Slice Reset */ +#define CXL_AFU_Cntl_An_RA (0x1ull << (63-7)) + +/****** CXL_SSTP0/1_An ******************************************************/ +/* These top bits are for the segment that CONTAINS the segment table */ +#define CXL_SSTP0_An_B_SHIFT SLB_VSID_SSIZE_SHIFT +#define CXL_SSTP0_An_KS (1ull << (63-2)) +#define CXL_SSTP0_An_KP (1ull << (63-3)) +#define CXL_SSTP0_An_N (1ull << (63-4)) +#define CXL_SSTP0_An_L (1ull << (63-5)) +#define CXL_SSTP0_An_C (1ull << (63-6)) +#define CXL_SSTP0_An_TA (1ull << (63-7)) +#define CXL_SSTP0_An_LP_SHIFT (63-9) /* 2 Bits */ +/* And finally, the virtual address & size of the segment table: */ +#define CXL_SSTP0_An_SegTableSize_SHIFT (63-31) /* 12 Bits */ +#define CXL_SSTP0_An_SegTableSize_MASK \ + (((1ull << 12) - 1) << CXL_SSTP0_An_SegTableSize_SHIFT) +#define CXL_SSTP0_An_STVA_U_MASK ((1ull << (63-49))-1) +#define CXL_SSTP1_An_STVA_L_MASK (~((1ull << (63-55))-1)) +#define CXL_SSTP1_An_V (1ull << (63-63)) + +/****** CXL_PSL_SLBIE_[An] - CAIA 1 **************************************************/ +/* write: */ +#define CXL_SLBIE_C PPC_BIT(36) /* Class */ +#define CXL_SLBIE_SS PPC_BITMASK(37, 38) /* Segment Size */ +#define CXL_SLBIE_SS_SHIFT PPC_BITLSHIFT(38) +#define CXL_SLBIE_TA PPC_BIT(38) /* Tags Active */ +/* read: */ +#define CXL_SLBIE_MAX PPC_BITMASK(24, 31) +#define CXL_SLBIE_PENDING PPC_BITMASK(56, 63) + +/****** Common to all CXL_TLBIA/SLBIA_[An] - CAIA 1 **********************************/ +#define CXL_TLB_SLB_P (1ull) /* Pending (read) */ + +/****** Common to all CXL_TLB/SLB_IA/IE_[An] registers - CAIA 1 **********************/ +#define CXL_TLB_SLB_IQ_ALL (0ull) /* Inv qualifier */ +#define CXL_TLB_SLB_IQ_LPID (1ull) /* Inv qualifier */ +#define CXL_TLB_SLB_IQ_LPIDPID (3ull) /* Inv qualifier */ + +/****** CXL_PSL_AFUSEL ******************************************************/ +#define CXL_PSL_AFUSEL_A (1ull << (63-55)) /* Adapter wide invalidates affect all AFUs */ + +/****** CXL_PSL_DSISR_An - CAIA 1 ****************************************************/ +#define CXL_PSL_DSISR_An_DS (1ull << (63-0)) /* Segment not found */ +#define CXL_PSL_DSISR_An_DM (1ull << (63-1)) /* PTE not found (See also: M) or protection fault */ +#define CXL_PSL_DSISR_An_ST (1ull << (63-2)) /* Segment Table PTE not found */ +#define CXL_PSL_DSISR_An_UR (1ull << (63-3)) /* AURP PTE not found */ +#define CXL_PSL_DSISR_TRANS (CXL_PSL_DSISR_An_DS | CXL_PSL_DSISR_An_DM | CXL_PSL_DSISR_An_ST | CXL_PSL_DSISR_An_UR) +#define CXL_PSL_DSISR_An_PE (1ull << (63-4)) /* PSL Error (implementation specific) */ +#define CXL_PSL_DSISR_An_AE (1ull << (63-5)) /* AFU Error */ +#define CXL_PSL_DSISR_An_OC (1ull << (63-6)) /* OS Context Warning */ +#define CXL_PSL_DSISR_PENDING (CXL_PSL_DSISR_TRANS | CXL_PSL_DSISR_An_PE | CXL_PSL_DSISR_An_AE | CXL_PSL_DSISR_An_OC) +/* NOTE: Bits 32:63 are undefined if DSISR[DS] = 1 */ +#define CXL_PSL_DSISR_An_M DSISR_NOHPTE /* PTE not found */ +#define CXL_PSL_DSISR_An_P DSISR_PROTFAULT /* Storage protection violation */ +#define CXL_PSL_DSISR_An_A (1ull << (63-37)) /* AFU lock access to write through or cache inhibited storage */ +#define CXL_PSL_DSISR_An_S DSISR_ISSTORE /* Access was afu_wr or afu_zero */ +#define CXL_PSL_DSISR_An_K DSISR_KEYFAULT /* Access not permitted by virtual page class key protection */ + +/****** CXL_PSL_DSISR_An - CAIA 2 ****************************************************/ +#define CXL_PSL9_DSISR_An_TF (1ull << (63-3)) /* Translation fault */ +#define CXL_PSL9_DSISR_An_PE (1ull << (63-4)) /* PSL Error (implementation specific) */ +#define CXL_PSL9_DSISR_An_AE (1ull << (63-5)) /* AFU Error */ +#define CXL_PSL9_DSISR_An_OC (1ull << (63-6)) /* OS Context Warning */ +#define CXL_PSL9_DSISR_An_S (1ull << (63-38)) /* TF for a write operation */ +#define CXL_PSL9_DSISR_PENDING (CXL_PSL9_DSISR_An_TF | CXL_PSL9_DSISR_An_PE | CXL_PSL9_DSISR_An_AE | CXL_PSL9_DSISR_An_OC) +/* + * NOTE: Bits 56:63 (Checkout Response Status) are valid when DSISR_An[TF] = 1 + * Status (0:7) Encoding + */ +#define CXL_PSL9_DSISR_An_CO_MASK 0x00000000000000ffULL +#define CXL_PSL9_DSISR_An_SF 0x0000000000000080ULL /* Segment Fault 0b10000000 */ +#define CXL_PSL9_DSISR_An_PF_SLR 0x0000000000000088ULL /* PTE not found (Single Level Radix) 0b10001000 */ +#define CXL_PSL9_DSISR_An_PF_RGC 0x000000000000008CULL /* PTE not found (Radix Guest (child)) 0b10001100 */ +#define CXL_PSL9_DSISR_An_PF_RGP 0x0000000000000090ULL /* PTE not found (Radix Guest (parent)) 0b10010000 */ +#define CXL_PSL9_DSISR_An_PF_HRH 0x0000000000000094ULL /* PTE not found (HPT/Radix Host) 0b10010100 */ +#define CXL_PSL9_DSISR_An_PF_STEG 0x000000000000009CULL /* PTE not found (STEG VA) 0b10011100 */ +#define CXL_PSL9_DSISR_An_URTCH 0x00000000000000B4ULL /* Unsupported Radix Tree Configuration 0b10110100 */ + +/****** CXL_PSL_TFC_An ******************************************************/ +#define CXL_PSL_TFC_An_A (1ull << (63-28)) /* Acknowledge non-translation fault */ +#define CXL_PSL_TFC_An_C (1ull << (63-29)) /* Continue (abort transaction) */ +#define CXL_PSL_TFC_An_AE (1ull << (63-30)) /* Restart PSL with address error */ +#define CXL_PSL_TFC_An_R (1ull << (63-31)) /* Restart PSL transaction */ + +/****** CXL_PSL_DEBUG *****************************************************/ +#define CXL_PSL_DEBUG_CDC (1ull << (63-27)) /* Coherent Data cache support */ + +/****** CXL_XSL9_IERAT_ERAT - CAIA 2 **********************************/ +#define CXL_XSL9_IERAT_MLPID (1ull << (63-0)) /* Match LPID */ +#define CXL_XSL9_IERAT_MPID (1ull << (63-1)) /* Match PID */ +#define CXL_XSL9_IERAT_PRS (1ull << (63-4)) /* PRS bit for Radix invalidations */ +#define CXL_XSL9_IERAT_INVR (1ull << (63-3)) /* Invalidate Radix */ +#define CXL_XSL9_IERAT_IALL (1ull << (63-8)) /* Invalidate All */ +#define CXL_XSL9_IERAT_IINPROG (1ull << (63-63)) /* Invalidate in progress */ + +/* cxl_process_element->software_status */ +#define CXL_PE_SOFTWARE_STATE_V (1ul << (31 - 0)) /* Valid */ +#define CXL_PE_SOFTWARE_STATE_C (1ul << (31 - 29)) /* Complete */ +#define CXL_PE_SOFTWARE_STATE_S (1ul << (31 - 30)) /* Suspend */ +#define CXL_PE_SOFTWARE_STATE_T (1ul << (31 - 31)) /* Terminate */ + +/****** CXL_PSL_RXCTL_An (Implementation Specific) ************************** + * Controls AFU Hang Pulse, which sets the timeout for the AFU to respond to + * the PSL for any response (except MMIO). Timeouts will occur between 1x to 2x + * of the hang pulse frequency. + */ +#define CXL_PSL_RXCTL_AFUHP_4S 0x7000000000000000ULL + +/* SPA->sw_command_status */ +#define CXL_SPA_SW_CMD_MASK 0xffff000000000000ULL +#define CXL_SPA_SW_CMD_TERMINATE 0x0001000000000000ULL +#define CXL_SPA_SW_CMD_REMOVE 0x0002000000000000ULL +#define CXL_SPA_SW_CMD_SUSPEND 0x0003000000000000ULL +#define CXL_SPA_SW_CMD_RESUME 0x0004000000000000ULL +#define CXL_SPA_SW_CMD_ADD 0x0005000000000000ULL +#define CXL_SPA_SW_CMD_UPDATE 0x0006000000000000ULL +#define CXL_SPA_SW_STATE_MASK 0x0000ffff00000000ULL +#define CXL_SPA_SW_STATE_TERMINATED 0x0000000100000000ULL +#define CXL_SPA_SW_STATE_REMOVED 0x0000000200000000ULL +#define CXL_SPA_SW_STATE_SUSPENDED 0x0000000300000000ULL +#define CXL_SPA_SW_STATE_RESUMED 0x0000000400000000ULL +#define CXL_SPA_SW_STATE_ADDED 0x0000000500000000ULL +#define CXL_SPA_SW_STATE_UPDATED 0x0000000600000000ULL +#define CXL_SPA_SW_PSL_ID_MASK 0x00000000ffff0000ULL +#define CXL_SPA_SW_LINK_MASK 0x000000000000ffffULL + +#define CXL_MAX_SLICES 4 +#define MAX_AFU_MMIO_REGS 3 + +#define CXL_MODE_TIME_SLICED 0x4 +#define CXL_SUPPORTED_MODES (CXL_MODE_DEDICATED | CXL_MODE_DIRECTED) + +#define CXL_DEV_MINORS 13 /* 1 control + 4 AFUs * 3 (dedicated/master/shared) */ +#define CXL_CARD_MINOR(adapter) (adapter->adapter_num * CXL_DEV_MINORS) +#define CXL_DEVT_ADAPTER(dev) (MINOR(dev) / CXL_DEV_MINORS) + +#define CXL_PSL9_TRACEID_MAX 0xAU +#define CXL_PSL9_TRACESTATE_FIN 0x3U + +enum cxl_context_status { + CLOSED, + OPENED, + STARTED +}; + +enum prefault_modes { + CXL_PREFAULT_NONE, + CXL_PREFAULT_WED, + CXL_PREFAULT_ALL, +}; + +enum cxl_attrs { + CXL_ADAPTER_ATTRS, + CXL_AFU_MASTER_ATTRS, + CXL_AFU_ATTRS, +}; + +struct cxl_sste { + __be64 esid_data; + __be64 vsid_data; +}; + +#define to_cxl_adapter(d) container_of(d, struct cxl, dev) +#define to_cxl_afu(d) container_of(d, struct cxl_afu, dev) + +struct cxl_afu_native { + void __iomem *p1n_mmio; + void __iomem *afu_desc_mmio; + irq_hw_number_t psl_hwirq; + unsigned int psl_virq; + struct mutex spa_mutex; + /* + * Only the first part of the SPA is used for the process element + * linked list. The only other part that software needs to worry about + * is sw_command_status, which we store a separate pointer to. + * Everything else in the SPA is only used by hardware + */ + struct cxl_process_element *spa; + __be64 *sw_command_status; + unsigned int spa_size; + int spa_order; + int spa_max_procs; + u64 pp_offset; +}; + +struct cxl_afu_guest { + struct cxl_afu *parent; + u64 handle; + phys_addr_t p2n_phys; + u64 p2n_size; + int max_ints; + bool handle_err; + struct delayed_work work_err; + int previous_state; +}; + +struct cxl_afu { + struct cxl_afu_native *native; + struct cxl_afu_guest *guest; + irq_hw_number_t serr_hwirq; + unsigned int serr_virq; + char *psl_irq_name; + char *err_irq_name; + void __iomem *p2n_mmio; + phys_addr_t psn_phys; + u64 pp_size; + + struct cxl *adapter; + struct device dev; + struct cdev afu_cdev_s, afu_cdev_m, afu_cdev_d; + struct device *chardev_s, *chardev_m, *chardev_d; + struct idr contexts_idr; + struct dentry *debugfs; + struct mutex contexts_lock; + spinlock_t afu_cntl_lock; + + /* -1: AFU deconfigured/locked, >= 0: number of readers */ + atomic_t configured_state; + + /* AFU error buffer fields and bin attribute for sysfs */ + u64 eb_len, eb_offset; + struct bin_attribute attr_eb; + + /* pointer to the vphb */ + struct pci_controller *phb; + + int pp_irqs; + int irqs_max; + int num_procs; + int max_procs_virtualised; + int slice; + int modes_supported; + int current_mode; + int crs_num; + u64 crs_len; + u64 crs_offset; + struct list_head crs; + enum prefault_modes prefault_mode; + bool psa; + bool pp_psa; + bool enabled; +}; + + +struct cxl_irq_name { + struct list_head list; + char *name; +}; + +struct irq_avail { + irq_hw_number_t offset; + irq_hw_number_t range; + unsigned long *bitmap; +}; + +/* + * This is a cxl context. If the PSL is in dedicated mode, there will be one + * of these per AFU. If in AFU directed there can be lots of these. + */ +struct cxl_context { + struct cxl_afu *afu; + + /* Problem state MMIO */ + phys_addr_t psn_phys; + u64 psn_size; + + /* Used to unmap any mmaps when force detaching */ + struct address_space *mapping; + struct mutex mapping_lock; + struct page *ff_page; + bool mmio_err_ff; + bool kernelapi; + + spinlock_t sste_lock; /* Protects segment table entries */ + struct cxl_sste *sstp; + u64 sstp0, sstp1; + unsigned int sst_size, sst_lru; + + wait_queue_head_t wq; + /* use mm context associated with this pid for ds faults */ + struct pid *pid; + spinlock_t lock; /* Protects pending_irq_mask, pending_fault and fault_addr */ + /* Only used in PR mode */ + u64 process_token; + + /* driver private data */ + void *priv; + + unsigned long *irq_bitmap; /* Accessed from IRQ context */ + struct cxl_irq_ranges irqs; + struct list_head irq_names; + u64 fault_addr; + u64 fault_dsisr; + u64 afu_err; + + /* + * This status and it's lock pretects start and detach context + * from racing. It also prevents detach from racing with + * itself + */ + enum cxl_context_status status; + struct mutex status_mutex; + + + /* XXX: Is it possible to need multiple work items at once? */ + struct work_struct fault_work; + u64 dsisr; + u64 dar; + + struct cxl_process_element *elem; + + /* + * pe is the process element handle, assigned by this driver when the + * context is initialized. + * + * external_pe is the PE shown outside of cxl. + * On bare-metal, pe=external_pe, because we decide what the handle is. + * In a guest, we only find out about the pe used by pHyp when the + * context is attached, and that's the value we want to report outside + * of cxl. + */ + int pe; + int external_pe; + + u32 irq_count; + bool pe_inserted; + bool master; + bool kernel; + bool pending_irq; + bool pending_fault; + bool pending_afu_err; + + /* Used by AFU drivers for driver specific event delivery */ + struct cxl_afu_driver_ops *afu_driver_ops; + atomic_t afu_driver_events; + + struct rcu_head rcu; + + struct mm_struct *mm; + + u16 tidr; + bool assign_tidr; +}; + +struct cxl_irq_info; + +struct cxl_service_layer_ops { + int (*adapter_regs_init)(struct cxl *adapter, struct pci_dev *dev); + int (*invalidate_all)(struct cxl *adapter); + int (*afu_regs_init)(struct cxl_afu *afu); + int (*sanitise_afu_regs)(struct cxl_afu *afu); + int (*register_serr_irq)(struct cxl_afu *afu); + void (*release_serr_irq)(struct cxl_afu *afu); + irqreturn_t (*handle_interrupt)(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info); + irqreturn_t (*fail_irq)(struct cxl_afu *afu, struct cxl_irq_info *irq_info); + int (*activate_dedicated_process)(struct cxl_afu *afu); + int (*attach_afu_directed)(struct cxl_context *ctx, u64 wed, u64 amr); + int (*attach_dedicated_process)(struct cxl_context *ctx, u64 wed, u64 amr); + void (*update_dedicated_ivtes)(struct cxl_context *ctx); + void (*debugfs_add_adapter_regs)(struct cxl *adapter, struct dentry *dir); + void (*debugfs_add_afu_regs)(struct cxl_afu *afu, struct dentry *dir); + void (*psl_irq_dump_registers)(struct cxl_context *ctx); + void (*err_irq_dump_registers)(struct cxl *adapter); + void (*debugfs_stop_trace)(struct cxl *adapter); + void (*write_timebase_ctrl)(struct cxl *adapter); + u64 (*timebase_read)(struct cxl *adapter); + int capi_mode; + bool needs_reset_before_disable; +}; + +struct cxl_native { + u64 afu_desc_off; + u64 afu_desc_size; + void __iomem *p1_mmio; + void __iomem *p2_mmio; + irq_hw_number_t err_hwirq; + unsigned int err_virq; + u64 ps_off; + bool no_data_cache; /* set if no data cache on the card */ + const struct cxl_service_layer_ops *sl_ops; +}; + +struct cxl_guest { + struct platform_device *pdev; + int irq_nranges; + struct cdev cdev; + irq_hw_number_t irq_base_offset; + struct irq_avail *irq_avail; + spinlock_t irq_alloc_lock; + u64 handle; + char *status; + u16 vendor; + u16 device; + u16 subsystem_vendor; + u16 subsystem; +}; + +struct cxl { + struct cxl_native *native; + struct cxl_guest *guest; + spinlock_t afu_list_lock; + struct cxl_afu *afu[CXL_MAX_SLICES]; + struct device dev; + struct dentry *trace; + struct dentry *psl_err_chk; + struct dentry *debugfs; + char *irq_name; + struct bin_attribute cxl_attr; + int adapter_num; + int user_irqs; + u64 ps_size; + u16 psl_rev; + u16 base_image; + u8 vsec_status; + u8 caia_major; + u8 caia_minor; + u8 slices; + bool user_image_loaded; + bool perst_loads_image; + bool perst_select_user; + bool perst_same_image; + bool psl_timebase_synced; + bool tunneled_ops_supported; + + /* + * number of contexts mapped on to this card. Possible values are: + * >0: Number of contexts mapped and new one can be mapped. + * 0: No active contexts and new ones can be mapped. + * -1: No contexts mapped and new ones cannot be mapped. + */ + atomic_t contexts_num; +}; + +int cxl_pci_alloc_one_irq(struct cxl *adapter); +void cxl_pci_release_one_irq(struct cxl *adapter, int hwirq); +int cxl_pci_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num); +void cxl_pci_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter); +int cxl_pci_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned int virq); +int cxl_update_image_control(struct cxl *adapter); +int cxl_pci_reset(struct cxl *adapter); +void cxl_pci_release_afu(struct device *dev); +ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len); + +/* common == phyp + powernv - CAIA 1&2 */ +struct cxl_process_element_common { + __be32 tid; + __be32 pid; + __be64 csrp; + union { + struct { + __be64 aurp0; + __be64 aurp1; + __be64 sstp0; + __be64 sstp1; + } psl8; /* CAIA 1 */ + struct { + u8 reserved2[8]; + u8 reserved3[8]; + u8 reserved4[8]; + u8 reserved5[8]; + } psl9; /* CAIA 2 */ + } u; + __be64 amr; + u8 reserved6[4]; + __be64 wed; +} __packed; + +/* just powernv - CAIA 1&2 */ +struct cxl_process_element { + __be64 sr; + __be64 SPOffset; + union { + __be64 sdr; /* CAIA 1 */ + u8 reserved1[8]; /* CAIA 2 */ + } u; + __be64 haurp; + __be32 ctxtime; + __be16 ivte_offsets[4]; + __be16 ivte_ranges[4]; + __be32 lpid; + struct cxl_process_element_common common; + __be32 software_state; +} __packed; + +static inline bool cxl_adapter_link_ok(struct cxl *cxl, struct cxl_afu *afu) +{ + struct pci_dev *pdev; + + if (cpu_has_feature(CPU_FTR_HVMODE)) { + pdev = to_pci_dev(cxl->dev.parent); + return !pci_channel_offline(pdev); + } + return true; +} + +static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg) +{ + WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE)); + return cxl->native->p1_mmio + cxl_reg_off(reg); +} + +static inline void cxl_p1_write(struct cxl *cxl, cxl_p1_reg_t reg, u64 val) +{ + if (likely(cxl_adapter_link_ok(cxl, NULL))) + out_be64(_cxl_p1_addr(cxl, reg), val); +} + +static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg) +{ + if (likely(cxl_adapter_link_ok(cxl, NULL))) + return in_be64(_cxl_p1_addr(cxl, reg)); + else + return ~0ULL; +} + +static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg) +{ + WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE)); + return afu->native->p1n_mmio + cxl_reg_off(reg); +} + +static inline void cxl_p1n_write(struct cxl_afu *afu, cxl_p1n_reg_t reg, u64 val) +{ + if (likely(cxl_adapter_link_ok(afu->adapter, afu))) + out_be64(_cxl_p1n_addr(afu, reg), val); +} + +static inline u64 cxl_p1n_read(struct cxl_afu *afu, cxl_p1n_reg_t reg) +{ + if (likely(cxl_adapter_link_ok(afu->adapter, afu))) + return in_be64(_cxl_p1n_addr(afu, reg)); + else + return ~0ULL; +} + +static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg) +{ + return afu->p2n_mmio + cxl_reg_off(reg); +} + +static inline void cxl_p2n_write(struct cxl_afu *afu, cxl_p2n_reg_t reg, u64 val) +{ + if (likely(cxl_adapter_link_ok(afu->adapter, afu))) + out_be64(_cxl_p2n_addr(afu, reg), val); +} + +static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg) +{ + if (likely(cxl_adapter_link_ok(afu->adapter, afu))) + return in_be64(_cxl_p2n_addr(afu, reg)); + else + return ~0ULL; +} + +static inline bool cxl_is_power8(void) +{ + if ((pvr_version_is(PVR_POWER8E)) || + (pvr_version_is(PVR_POWER8NVL)) || + (pvr_version_is(PVR_POWER8))) + return true; + return false; +} + +static inline bool cxl_is_power9(void) +{ + if (pvr_version_is(PVR_POWER9)) + return true; + return false; +} + +ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, + loff_t off, size_t count); + + +struct cxl_calls { + void (*cxl_slbia)(struct mm_struct *mm); + struct module *owner; +}; +int register_cxl_calls(struct cxl_calls *calls); +void unregister_cxl_calls(struct cxl_calls *calls); +int cxl_update_properties(struct device_node *dn, struct property *new_prop); + +void cxl_remove_adapter_nr(struct cxl *adapter); + +void cxl_release_spa(struct cxl_afu *afu); + +dev_t cxl_get_dev(void); +int cxl_file_init(void); +void cxl_file_exit(void); +int cxl_register_adapter(struct cxl *adapter); +int cxl_register_afu(struct cxl_afu *afu); +int cxl_chardev_d_afu_add(struct cxl_afu *afu); +int cxl_chardev_m_afu_add(struct cxl_afu *afu); +int cxl_chardev_s_afu_add(struct cxl_afu *afu); +void cxl_chardev_afu_remove(struct cxl_afu *afu); + +void cxl_context_detach_all(struct cxl_afu *afu); +void cxl_context_free(struct cxl_context *ctx); +void cxl_context_detach(struct cxl_context *ctx); + +int cxl_sysfs_adapter_add(struct cxl *adapter); +void cxl_sysfs_adapter_remove(struct cxl *adapter); +int cxl_sysfs_afu_add(struct cxl_afu *afu); +void cxl_sysfs_afu_remove(struct cxl_afu *afu); +int cxl_sysfs_afu_m_add(struct cxl_afu *afu); +void cxl_sysfs_afu_m_remove(struct cxl_afu *afu); + +struct cxl *cxl_alloc_adapter(void); +struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice); +int cxl_afu_select_best_mode(struct cxl_afu *afu); + +int cxl_native_register_psl_irq(struct cxl_afu *afu); +void cxl_native_release_psl_irq(struct cxl_afu *afu); +int cxl_native_register_psl_err_irq(struct cxl *adapter); +void cxl_native_release_psl_err_irq(struct cxl *adapter); +int cxl_native_register_serr_irq(struct cxl_afu *afu); +void cxl_native_release_serr_irq(struct cxl_afu *afu); +int afu_register_irqs(struct cxl_context *ctx, u32 count); +void afu_release_irqs(struct cxl_context *ctx, void *cookie); +void afu_irq_name_free(struct cxl_context *ctx); + +int cxl_attach_afu_directed_psl9(struct cxl_context *ctx, u64 wed, u64 amr); +int cxl_attach_afu_directed_psl8(struct cxl_context *ctx, u64 wed, u64 amr); +int cxl_activate_dedicated_process_psl9(struct cxl_afu *afu); +int cxl_activate_dedicated_process_psl8(struct cxl_afu *afu); +int cxl_attach_dedicated_process_psl9(struct cxl_context *ctx, u64 wed, u64 amr); +int cxl_attach_dedicated_process_psl8(struct cxl_context *ctx, u64 wed, u64 amr); +void cxl_update_dedicated_ivtes_psl9(struct cxl_context *ctx); +void cxl_update_dedicated_ivtes_psl8(struct cxl_context *ctx); + +#ifdef CONFIG_DEBUG_FS + +int cxl_debugfs_init(void); +void cxl_debugfs_exit(void); +int cxl_debugfs_adapter_add(struct cxl *adapter); +void cxl_debugfs_adapter_remove(struct cxl *adapter); +int cxl_debugfs_afu_add(struct cxl_afu *afu); +void cxl_debugfs_afu_remove(struct cxl_afu *afu); +void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, struct dentry *dir); +void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir); +void cxl_debugfs_add_afu_regs_psl9(struct cxl_afu *afu, struct dentry *dir); +void cxl_debugfs_add_afu_regs_psl8(struct cxl_afu *afu, struct dentry *dir); + +#else /* CONFIG_DEBUG_FS */ + +static inline int __init cxl_debugfs_init(void) +{ + return 0; +} + +static inline void cxl_debugfs_exit(void) +{ +} + +static inline int cxl_debugfs_adapter_add(struct cxl *adapter) +{ + return 0; +} + +static inline void cxl_debugfs_adapter_remove(struct cxl *adapter) +{ +} + +static inline int cxl_debugfs_afu_add(struct cxl_afu *afu) +{ + return 0; +} + +static inline void cxl_debugfs_afu_remove(struct cxl_afu *afu) +{ +} + +static inline void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, + struct dentry *dir) +{ +} + +static inline void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, + struct dentry *dir) +{ +} + +static inline void cxl_debugfs_add_afu_regs_psl9(struct cxl_afu *afu, struct dentry *dir) +{ +} + +static inline void cxl_debugfs_add_afu_regs_psl8(struct cxl_afu *afu, struct dentry *dir) +{ +} + +#endif /* CONFIG_DEBUG_FS */ + +void cxl_handle_fault(struct work_struct *work); +void cxl_prefault(struct cxl_context *ctx, u64 wed); +int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar); + +struct cxl *get_cxl_adapter(int num); +int cxl_alloc_sst(struct cxl_context *ctx); +void cxl_dump_debug_buffer(void *addr, size_t size); + +void init_cxl_native(void); + +struct cxl_context *cxl_context_alloc(void); +int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master); +void cxl_context_set_mapping(struct cxl_context *ctx, + struct address_space *mapping); +void cxl_context_free(struct cxl_context *ctx); +int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma); +unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, + irq_handler_t handler, void *cookie, const char *name); +void cxl_unmap_irq(unsigned int virq, void *cookie); +int __detach_context(struct cxl_context *ctx); + +/* + * This must match the layout of the H_COLLECT_CA_INT_INFO retbuf defined + * in PAPR. + * Field pid_tid is now 'reserved' because it's no more used on bare-metal. + * On a guest environment, PSL_PID_An is located on the upper 32 bits and + * PSL_TID_An register in the lower 32 bits. + */ +struct cxl_irq_info { + u64 dsisr; + u64 dar; + u64 dsr; + u64 reserved; + u64 afu_err; + u64 errstat; + u64 proc_handle; + u64 padding[2]; /* to match the expected retbuf size for plpar_hcall9 */ +}; + +void cxl_assign_psn_space(struct cxl_context *ctx); +int cxl_invalidate_all_psl9(struct cxl *adapter); +int cxl_invalidate_all_psl8(struct cxl *adapter); +irqreturn_t cxl_irq_psl9(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info); +irqreturn_t cxl_irq_psl8(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info); +irqreturn_t cxl_fail_irq_psl(struct cxl_afu *afu, struct cxl_irq_info *irq_info); +int cxl_register_one_irq(struct cxl *adapter, irq_handler_t handler, + void *cookie, irq_hw_number_t *dest_hwirq, + unsigned int *dest_virq, const char *name); + +int cxl_check_error(struct cxl_afu *afu); +int cxl_afu_slbia(struct cxl_afu *afu); +int cxl_data_cache_flush(struct cxl *adapter); +int cxl_afu_disable(struct cxl_afu *afu); +int cxl_psl_purge(struct cxl_afu *afu); +int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid, + u32 *phb_index, u64 *capp_unit_id); +int cxl_slot_is_switched(struct pci_dev *dev); +int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg); +u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9); + +void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx); +void cxl_native_irq_dump_regs_psl8(struct cxl_context *ctx); +void cxl_native_err_irq_dump_regs_psl8(struct cxl *adapter); +void cxl_native_err_irq_dump_regs_psl9(struct cxl *adapter); +int cxl_pci_vphb_add(struct cxl_afu *afu); +void cxl_pci_vphb_remove(struct cxl_afu *afu); +void cxl_release_mapping(struct cxl_context *ctx); + +extern struct pci_driver cxl_pci_driver; +extern struct platform_driver cxl_of_driver; +int afu_allocate_irqs(struct cxl_context *ctx, u32 count); + +int afu_open(struct inode *inode, struct file *file); +int afu_release(struct inode *inode, struct file *file); +long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int afu_mmap(struct file *file, struct vm_area_struct *vm); +__poll_t afu_poll(struct file *file, struct poll_table_struct *poll); +ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off); +extern const struct file_operations afu_fops; + +struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_device *dev); +void cxl_guest_remove_adapter(struct cxl *adapter); +int cxl_of_read_adapter_handle(struct cxl *adapter, struct device_node *np); +int cxl_of_read_adapter_properties(struct cxl *adapter, struct device_node *np); +ssize_t cxl_guest_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len); +ssize_t cxl_guest_read_afu_vpd(struct cxl_afu *afu, void *buf, size_t len); +int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_np); +void cxl_guest_remove_afu(struct cxl_afu *afu); +int cxl_of_read_afu_handle(struct cxl_afu *afu, struct device_node *afu_np); +int cxl_of_read_afu_properties(struct cxl_afu *afu, struct device_node *afu_np); +int cxl_guest_add_chardev(struct cxl *adapter); +void cxl_guest_remove_chardev(struct cxl *adapter); +void cxl_guest_reload_module(struct cxl *adapter); +int cxl_of_probe(struct platform_device *pdev); + +struct cxl_backend_ops { + struct module *module; + int (*adapter_reset)(struct cxl *adapter); + int (*alloc_one_irq)(struct cxl *adapter); + void (*release_one_irq)(struct cxl *adapter, int hwirq); + int (*alloc_irq_ranges)(struct cxl_irq_ranges *irqs, + struct cxl *adapter, unsigned int num); + void (*release_irq_ranges)(struct cxl_irq_ranges *irqs, + struct cxl *adapter); + int (*setup_irq)(struct cxl *adapter, unsigned int hwirq, + unsigned int virq); + irqreturn_t (*handle_psl_slice_error)(struct cxl_context *ctx, + u64 dsisr, u64 errstat); + irqreturn_t (*psl_interrupt)(int irq, void *data); + int (*ack_irq)(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask); + void (*irq_wait)(struct cxl_context *ctx); + int (*attach_process)(struct cxl_context *ctx, bool kernel, + u64 wed, u64 amr); + int (*detach_process)(struct cxl_context *ctx); + void (*update_ivtes)(struct cxl_context *ctx); + bool (*support_attributes)(const char *attr_name, enum cxl_attrs type); + bool (*link_ok)(struct cxl *cxl, struct cxl_afu *afu); + void (*release_afu)(struct device *dev); + ssize_t (*afu_read_err_buffer)(struct cxl_afu *afu, char *buf, + loff_t off, size_t count); + int (*afu_check_and_enable)(struct cxl_afu *afu); + int (*afu_activate_mode)(struct cxl_afu *afu, int mode); + int (*afu_deactivate_mode)(struct cxl_afu *afu, int mode); + int (*afu_reset)(struct cxl_afu *afu); + int (*afu_cr_read8)(struct cxl_afu *afu, int cr_idx, u64 offset, u8 *val); + int (*afu_cr_read16)(struct cxl_afu *afu, int cr_idx, u64 offset, u16 *val); + int (*afu_cr_read32)(struct cxl_afu *afu, int cr_idx, u64 offset, u32 *val); + int (*afu_cr_read64)(struct cxl_afu *afu, int cr_idx, u64 offset, u64 *val); + int (*afu_cr_write8)(struct cxl_afu *afu, int cr_idx, u64 offset, u8 val); + int (*afu_cr_write16)(struct cxl_afu *afu, int cr_idx, u64 offset, u16 val); + int (*afu_cr_write32)(struct cxl_afu *afu, int cr_idx, u64 offset, u32 val); + ssize_t (*read_adapter_vpd)(struct cxl *adapter, void *buf, size_t count); +}; +extern const struct cxl_backend_ops cxl_native_ops; +extern const struct cxl_backend_ops cxl_guest_ops; +extern const struct cxl_backend_ops *cxl_ops; + +/* check if the given pci_dev is on the the cxl vphb bus */ +bool cxl_pci_is_vphb_device(struct pci_dev *dev); + +/* decode AFU error bits in the PSL register PSL_SERR_An */ +void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr); + +/* + * Increments the number of attached contexts on an adapter. + * In case an adapter_context_lock is taken the return -EBUSY. + */ +int cxl_adapter_context_get(struct cxl *adapter); + +/* Decrements the number of attached contexts on an adapter */ +void cxl_adapter_context_put(struct cxl *adapter); + +/* If no active contexts then prevents contexts from being attached */ +int cxl_adapter_context_lock(struct cxl *adapter); + +/* Unlock the contexts-lock if taken. Warn and force unlock otherwise */ +void cxl_adapter_context_unlock(struct cxl *adapter); + +/* Increases the reference count to "struct mm_struct" */ +void cxl_context_mm_count_get(struct cxl_context *ctx); + +/* Decrements the reference count to "struct mm_struct" */ +void cxl_context_mm_count_put(struct cxl_context *ctx); + +#endif diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c new file mode 100644 index 000000000..5a3f91255 --- /dev/null +++ b/drivers/misc/cxl/cxllib.c @@ -0,0 +1,277 @@ +/* + * Copyright 2017 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#include "cxl.h" + +#define CXL_INVALID_DRA ~0ull +#define CXL_DUMMY_READ_SIZE 128 +#define CXL_DUMMY_READ_ALIGN 8 +#define CXL_CAPI_WINDOW_START 0x2000000000000ull +#define CXL_CAPI_WINDOW_LOG_SIZE 48 +#define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1 + + +bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags) +{ + int rc; + u32 phb_index; + u64 chip_id, capp_unit_id; + + /* No flags currently supported */ + if (flags) + return false; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return false; + + if (!cxl_is_power9()) + return false; + + if (cxl_slot_is_switched(dev)) + return false; + + /* on p9, some pci slots are not connected to a CAPP unit */ + rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); + if (rc) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(cxllib_slot_is_supported); + +static DEFINE_MUTEX(dra_mutex); +static u64 dummy_read_addr = CXL_INVALID_DRA; + +static int allocate_dummy_read_buf(void) +{ + u64 buf, vaddr; + size_t buf_size; + + /* + * Dummy read buffer is 128-byte long, aligned on a + * 256-byte boundary and we need the physical address. + */ + buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN); + buf = (u64) kzalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) & + (~0ull << CXL_DUMMY_READ_ALIGN); + + WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size), + "Dummy read buffer alignment issue"); + dummy_read_addr = virt_to_phys((void *) vaddr); + return 0; +} + +int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg) +{ + int rc; + u32 phb_index; + u64 chip_id, capp_unit_id; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return -EINVAL; + + mutex_lock(&dra_mutex); + if (dummy_read_addr == CXL_INVALID_DRA) { + rc = allocate_dummy_read_buf(); + if (rc) { + mutex_unlock(&dra_mutex); + return rc; + } + } + mutex_unlock(&dra_mutex); + + rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); + if (rc) + return rc; + + rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl); + if (rc) + return rc; + + cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION; + cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE; + cfg->bar_addr = CXL_CAPI_WINDOW_START; + cfg->dra = dummy_read_addr; + return 0; +} +EXPORT_SYMBOL_GPL(cxllib_get_xsl_config); + +int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode, + unsigned long flags) +{ + int rc = 0; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return -EINVAL; + + switch (mode) { + case CXL_MODE_PCI: + /* + * We currently don't support going back to PCI mode + * However, we'll turn the invalidations off, so that + * the firmware doesn't have to ack them and can do + * things like reset, etc.. with no worries. + * So always return EPERM (can't go back to PCI) or + * EBUSY if we couldn't even turn off snooping + */ + rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF); + if (rc) + rc = -EBUSY; + else + rc = -EPERM; + break; + case CXL_MODE_CXL: + /* DMA only supported on TVT1 for the time being */ + if (flags != CXL_MODE_DMA_TVT1) + return -EINVAL; + rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1); + if (rc) + return rc; + rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON); + break; + default: + rc = -EINVAL; + } + return rc; +} +EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode); + +/* + * When switching the PHB to capi mode, the TVT#1 entry for + * the Partitionable Endpoint is set in bypass mode, like + * in PCI mode. + * Configure the device dma to use TVT#1, which is done + * by calling dma_set_mask() with a mask large enough. + */ +int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags) +{ + int rc; + + if (flags) + return -EINVAL; + + rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64)); + return rc; +} +EXPORT_SYMBOL_GPL(cxllib_set_device_dma); + +int cxllib_get_PE_attributes(struct task_struct *task, + unsigned long translation_mode, + struct cxllib_pe_attributes *attr) +{ + struct mm_struct *mm = NULL; + + if (translation_mode != CXL_TRANSLATED_MODE && + translation_mode != CXL_REAL_MODE) + return -EINVAL; + + attr->sr = cxl_calculate_sr(false, + task == NULL, + translation_mode == CXL_REAL_MODE, + true); + attr->lpid = mfspr(SPRN_LPID); + if (task) { + mm = get_task_mm(task); + if (mm == NULL) + return -EINVAL; + /* + * Caller is keeping a reference on mm_users for as long + * as XSL uses the memory context + */ + attr->pid = mm->context.id; + mmput(mm); + attr->tid = task->thread.tidr; + } else { + attr->pid = 0; + attr->tid = 0; + } + return 0; +} +EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes); + +static int get_vma_info(struct mm_struct *mm, u64 addr, + u64 *vma_start, u64 *vma_end, + unsigned long *page_size) +{ + struct vm_area_struct *vma = NULL; + int rc = 0; + + down_read(&mm->mmap_sem); + + vma = find_vma(mm, addr); + if (!vma) { + rc = -EFAULT; + goto out; + } + *page_size = vma_kernel_pagesize(vma); + *vma_start = vma->vm_start; + *vma_end = vma->vm_end; +out: + up_read(&mm->mmap_sem); + return rc; +} + +int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags) +{ + int rc; + u64 dar, vma_start, vma_end; + unsigned long page_size; + + if (mm == NULL) + return -EFAULT; + + /* + * The buffer we have to process can extend over several pages + * and may also cover several VMAs. + * We iterate over all the pages. The page size could vary + * between VMAs. + */ + rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size); + if (rc) + return rc; + + for (dar = (addr & ~(page_size - 1)); dar < (addr + size); + dar += page_size) { + if (dar < vma_start || dar >= vma_end) { + /* + * We don't hold the mm->mmap_sem semaphore + * while iterating, since the semaphore is + * required by one of the lower-level page + * fault processing functions and it could + * create a deadlock. + * + * It means the VMAs can be altered between 2 + * loop iterations and we could theoretically + * miss a page (however unlikely). But that's + * not really a problem, as the driver will + * retry access, get another page fault on the + * missing page and call us again. + */ + rc = get_vma_info(mm, dar, &vma_start, &vma_end, + &page_size); + if (rc) + return rc; + } + + rc = cxl_handle_mm_fault(mm, flags, dar); + if (rc) + return -EFAULT; + } + return 0; +} +EXPORT_SYMBOL_GPL(cxllib_handle_fault); diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c new file mode 100644 index 000000000..a1921d815 --- /dev/null +++ b/drivers/misc/cxl/debugfs.c @@ -0,0 +1,152 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include "cxl.h" + +static struct dentry *cxl_debugfs; + +/* Helpers to export CXL mmaped IO registers via debugfs */ +static int debugfs_io_u64_get(void *data, u64 *val) +{ + *val = in_be64((u64 __iomem *)data); + return 0; +} + +static int debugfs_io_u64_set(void *data, u64 val) +{ + out_be64((u64 __iomem *)data, val); + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_io_x64, debugfs_io_u64_get, debugfs_io_u64_set, + "0x%016llx\n"); + +static struct dentry *debugfs_create_io_x64(const char *name, umode_t mode, + struct dentry *parent, u64 __iomem *value) +{ + return debugfs_create_file_unsafe(name, mode, parent, + (void __force *)value, &fops_io_x64); +} + +void cxl_debugfs_add_adapter_regs_psl9(struct cxl *adapter, struct dentry *dir) +{ + debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_FIR1)); + debugfs_create_io_x64("fir_mask", 0400, dir, + _cxl_p1_addr(adapter, CXL_PSL9_FIR_MASK)); + debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_FIR_CNTL)); + debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL9_TRACECFG)); + debugfs_create_io_x64("debug", 0600, dir, + _cxl_p1_addr(adapter, CXL_PSL9_DEBUG)); + debugfs_create_io_x64("xsl-debug", 0600, dir, + _cxl_p1_addr(adapter, CXL_XSL9_DBG)); +} + +void cxl_debugfs_add_adapter_regs_psl8(struct cxl *adapter, struct dentry *dir) +{ + debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR1)); + debugfs_create_io_x64("fir2", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR2)); + debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR_CNTL)); + debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_TRACE)); +} + +int cxl_debugfs_adapter_add(struct cxl *adapter) +{ + struct dentry *dir; + char buf[32]; + + if (!cxl_debugfs) + return -ENODEV; + + snprintf(buf, 32, "card%i", adapter->adapter_num); + dir = debugfs_create_dir(buf, cxl_debugfs); + if (IS_ERR(dir)) + return PTR_ERR(dir); + adapter->debugfs = dir; + + debugfs_create_io_x64("err_ivte", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_ErrIVTE)); + + if (adapter->native->sl_ops->debugfs_add_adapter_regs) + adapter->native->sl_ops->debugfs_add_adapter_regs(adapter, dir); + return 0; +} + +void cxl_debugfs_adapter_remove(struct cxl *adapter) +{ + debugfs_remove_recursive(adapter->debugfs); +} + +void cxl_debugfs_add_afu_regs_psl9(struct cxl_afu *afu, struct dentry *dir) +{ + debugfs_create_io_x64("serr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SERR_An)); +} + +void cxl_debugfs_add_afu_regs_psl8(struct cxl_afu *afu, struct dentry *dir) +{ + debugfs_create_io_x64("sstp0", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP0_An)); + debugfs_create_io_x64("sstp1", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP1_An)); + + debugfs_create_io_x64("fir", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_FIR_SLICE_An)); + debugfs_create_io_x64("serr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SERR_An)); + debugfs_create_io_x64("afu_debug", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_AFU_DEBUG_An)); + debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SLICE_TRACE)); +} + +int cxl_debugfs_afu_add(struct cxl_afu *afu) +{ + struct dentry *dir; + char buf[32]; + + if (!afu->adapter->debugfs) + return -ENODEV; + + snprintf(buf, 32, "psl%i.%i", afu->adapter->adapter_num, afu->slice); + dir = debugfs_create_dir(buf, afu->adapter->debugfs); + if (IS_ERR(dir)) + return PTR_ERR(dir); + afu->debugfs = dir; + + debugfs_create_io_x64("sr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SR_An)); + debugfs_create_io_x64("dsisr", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DSISR_An)); + debugfs_create_io_x64("dar", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DAR_An)); + + debugfs_create_io_x64("err_status", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_ErrStat_An)); + + if (afu->adapter->native->sl_ops->debugfs_add_afu_regs) + afu->adapter->native->sl_ops->debugfs_add_afu_regs(afu, dir); + + return 0; +} + +void cxl_debugfs_afu_remove(struct cxl_afu *afu) +{ + debugfs_remove_recursive(afu->debugfs); +} + +int __init cxl_debugfs_init(void) +{ + struct dentry *ent; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return 0; + + ent = debugfs_create_dir("cxl", NULL); + if (IS_ERR(ent)) + return PTR_ERR(ent); + cxl_debugfs = ent; + + return 0; +} + +void cxl_debugfs_exit(void) +{ + debugfs_remove_recursive(cxl_debugfs); +} diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c new file mode 100644 index 000000000..dc7b34174 --- /dev/null +++ b/drivers/misc/cxl/fault.c @@ -0,0 +1,360 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "cxl" "." +#include +#include +#include + +#include "cxl.h" +#include "trace.h" + +static bool sste_matches(struct cxl_sste *sste, struct copro_slb *slb) +{ + return ((sste->vsid_data == cpu_to_be64(slb->vsid)) && + (sste->esid_data == cpu_to_be64(slb->esid))); +} + +/* + * This finds a free SSTE for the given SLB, or returns NULL if it's already in + * the segment table. + */ +static struct cxl_sste *find_free_sste(struct cxl_context *ctx, + struct copro_slb *slb) +{ + struct cxl_sste *primary, *sste, *ret = NULL; + unsigned int mask = (ctx->sst_size >> 7) - 1; /* SSTP0[SegTableSize] */ + unsigned int entry; + unsigned int hash; + + if (slb->vsid & SLB_VSID_B_1T) + hash = (slb->esid >> SID_SHIFT_1T) & mask; + else /* 256M */ + hash = (slb->esid >> SID_SHIFT) & mask; + + primary = ctx->sstp + (hash << 3); + + for (entry = 0, sste = primary; entry < 8; entry++, sste++) { + if (!ret && !(be64_to_cpu(sste->esid_data) & SLB_ESID_V)) + ret = sste; + if (sste_matches(sste, slb)) + return NULL; + } + if (ret) + return ret; + + /* Nothing free, select an entry to cast out */ + ret = primary + ctx->sst_lru; + ctx->sst_lru = (ctx->sst_lru + 1) & 0x7; + + return ret; +} + +static void cxl_load_segment(struct cxl_context *ctx, struct copro_slb *slb) +{ + /* mask is the group index, we search primary and secondary here. */ + struct cxl_sste *sste; + unsigned long flags; + + spin_lock_irqsave(&ctx->sste_lock, flags); + sste = find_free_sste(ctx, slb); + if (!sste) + goto out_unlock; + + pr_devel("CXL Populating SST[%li]: %#llx %#llx\n", + sste - ctx->sstp, slb->vsid, slb->esid); + trace_cxl_ste_write(ctx, sste - ctx->sstp, slb->esid, slb->vsid); + + sste->vsid_data = cpu_to_be64(slb->vsid); + sste->esid_data = cpu_to_be64(slb->esid); +out_unlock: + spin_unlock_irqrestore(&ctx->sste_lock, flags); +} + +static int cxl_fault_segment(struct cxl_context *ctx, struct mm_struct *mm, + u64 ea) +{ + struct copro_slb slb = {0,0}; + int rc; + + if (!(rc = copro_calculate_slb(mm, ea, &slb))) { + cxl_load_segment(ctx, &slb); + } + + return rc; +} + +static void cxl_ack_ae(struct cxl_context *ctx) +{ + unsigned long flags; + + cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_AE, 0); + + spin_lock_irqsave(&ctx->lock, flags); + ctx->pending_fault = true; + ctx->fault_addr = ctx->dar; + ctx->fault_dsisr = ctx->dsisr; + spin_unlock_irqrestore(&ctx->lock, flags); + + wake_up_all(&ctx->wq); +} + +static int cxl_handle_segment_miss(struct cxl_context *ctx, + struct mm_struct *mm, u64 ea) +{ + int rc; + + pr_devel("CXL interrupt: Segment fault pe: %i ea: %#llx\n", ctx->pe, ea); + trace_cxl_ste_miss(ctx, ea); + + if ((rc = cxl_fault_segment(ctx, mm, ea))) + cxl_ack_ae(ctx); + else { + + mb(); /* Order seg table write to TFC MMIO write */ + cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0); + } + + return IRQ_HANDLED; +} + +int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar) +{ + vm_fault_t flt = 0; + int result; + unsigned long access, flags, inv_flags = 0; + + /* + * Add the fault handling cpu to task mm cpumask so that we + * can do a safe lockless page table walk when inserting the + * hash page table entry. This function get called with a + * valid mm for user space addresses. Hence using the if (mm) + * check is sufficient here. + */ + if (mm && !cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) { + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + /* + * We need to make sure we walk the table only after + * we update the cpumask. The other side of the barrier + * is explained in serialize_against_pte_lookup() + */ + smp_mb(); + } + if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) { + pr_devel("copro_handle_mm_fault failed: %#x\n", result); + return result; + } + + if (!radix_enabled()) { + /* + * update_mmu_cache() will not have loaded the hash since current->trap + * is not a 0x400 or 0x300, so just call hash_page_mm() here. + */ + access = _PAGE_PRESENT | _PAGE_READ; + if (dsisr & CXL_PSL_DSISR_An_S) + access |= _PAGE_WRITE; + + if (!mm && (REGION_ID(dar) != USER_REGION_ID)) + access |= _PAGE_PRIVILEGED; + + if (dsisr & DSISR_NOHPTE) + inv_flags |= HPTE_NOHPTE_UPDATE; + + local_irq_save(flags); + hash_page_mm(mm, dar, access, 0x300, inv_flags); + local_irq_restore(flags); + } + return 0; +} + +static void cxl_handle_page_fault(struct cxl_context *ctx, + struct mm_struct *mm, + u64 dsisr, u64 dar) +{ + trace_cxl_pte_miss(ctx, dsisr, dar); + + if (cxl_handle_mm_fault(mm, dsisr, dar)) { + cxl_ack_ae(ctx); + } else { + pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe); + cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0); + } +} + +/* + * Returns the mm_struct corresponding to the context ctx. + * mm_users == 0, the context may be in the process of being closed. + */ +static struct mm_struct *get_mem_context(struct cxl_context *ctx) +{ + if (ctx->mm == NULL) + return NULL; + + if (!atomic_inc_not_zero(&ctx->mm->mm_users)) + return NULL; + + return ctx->mm; +} + +static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr) +{ + if ((cxl_is_power8() && (dsisr & CXL_PSL_DSISR_An_DS))) + return true; + + return false; +} + +static bool cxl_is_page_fault(struct cxl_context *ctx, u64 dsisr) +{ + if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_An_DM)) + return true; + + if (cxl_is_power9()) + return true; + + return false; +} + +void cxl_handle_fault(struct work_struct *fault_work) +{ + struct cxl_context *ctx = + container_of(fault_work, struct cxl_context, fault_work); + u64 dsisr = ctx->dsisr; + u64 dar = ctx->dar; + struct mm_struct *mm = NULL; + + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr || + cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar || + cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) { + /* Most likely explanation is harmless - a dedicated + * process has detached and these were cleared by the + * PSL purge, but warn about it just in case + */ + dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n"); + return; + } + } + + /* Early return if the context is being / has been detached */ + if (ctx->status == CLOSED) { + cxl_ack_ae(ctx); + return; + } + + pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. " + "DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar); + + if (!ctx->kernel) { + + mm = get_mem_context(ctx); + if (mm == NULL) { + pr_devel("%s: unable to get mm for pe=%d pid=%i\n", + __func__, ctx->pe, pid_nr(ctx->pid)); + cxl_ack_ae(ctx); + return; + } else { + pr_devel("Handling page fault for pe=%d pid=%i\n", + ctx->pe, pid_nr(ctx->pid)); + } + } + + if (cxl_is_segment_miss(ctx, dsisr)) + cxl_handle_segment_miss(ctx, mm, dar); + else if (cxl_is_page_fault(ctx, dsisr)) + cxl_handle_page_fault(ctx, mm, dsisr, dar); + else + WARN(1, "cxl_handle_fault has nothing to handle\n"); + + if (mm) + mmput(mm); +} + +static void cxl_prefault_one(struct cxl_context *ctx, u64 ea) +{ + struct mm_struct *mm; + + mm = get_mem_context(ctx); + if (mm == NULL) { + pr_devel("cxl_prefault_one unable to get mm %i\n", + pid_nr(ctx->pid)); + return; + } + + cxl_fault_segment(ctx, mm, ea); + + mmput(mm); +} + +static u64 next_segment(u64 ea, u64 vsid) +{ + if (vsid & SLB_VSID_B_1T) + ea |= (1ULL << 40) - 1; + else + ea |= (1ULL << 28) - 1; + + return ea + 1; +} + +static void cxl_prefault_vma(struct cxl_context *ctx) +{ + u64 ea, last_esid = 0; + struct copro_slb slb; + struct vm_area_struct *vma; + int rc; + struct mm_struct *mm; + + mm = get_mem_context(ctx); + if (mm == NULL) { + pr_devel("cxl_prefault_vm unable to get mm %i\n", + pid_nr(ctx->pid)); + return; + } + + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + for (ea = vma->vm_start; ea < vma->vm_end; + ea = next_segment(ea, slb.vsid)) { + rc = copro_calculate_slb(mm, ea, &slb); + if (rc) + continue; + + if (last_esid == slb.esid) + continue; + + cxl_load_segment(ctx, &slb); + last_esid = slb.esid; + } + } + up_read(&mm->mmap_sem); + + mmput(mm); +} + +void cxl_prefault(struct cxl_context *ctx, u64 wed) +{ + switch (ctx->afu->prefault_mode) { + case CXL_PREFAULT_WED: + cxl_prefault_one(ctx, wed); + break; + case CXL_PREFAULT_ALL: + cxl_prefault_vma(ctx); + break; + default: + break; + } +} diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c new file mode 100644 index 000000000..bd6ddbdb5 --- /dev/null +++ b/drivers/misc/cxl/file.c @@ -0,0 +1,703 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cxl.h" +#include "trace.h" + +#define CXL_NUM_MINORS 256 /* Total to reserve */ + +#define CXL_AFU_MINOR_D(afu) (CXL_CARD_MINOR(afu->adapter) + 1 + (3 * afu->slice)) +#define CXL_AFU_MINOR_M(afu) (CXL_AFU_MINOR_D(afu) + 1) +#define CXL_AFU_MINOR_S(afu) (CXL_AFU_MINOR_D(afu) + 2) +#define CXL_AFU_MKDEV_D(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_D(afu)) +#define CXL_AFU_MKDEV_M(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_M(afu)) +#define CXL_AFU_MKDEV_S(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_S(afu)) + +#define CXL_DEVT_AFU(dev) ((MINOR(dev) % CXL_DEV_MINORS - 1) / 3) + +#define CXL_DEVT_IS_CARD(dev) (MINOR(dev) % CXL_DEV_MINORS == 0) + +static dev_t cxl_dev; + +static struct class *cxl_class; + +static int __afu_open(struct inode *inode, struct file *file, bool master) +{ + struct cxl *adapter; + struct cxl_afu *afu; + struct cxl_context *ctx; + int adapter_num = CXL_DEVT_ADAPTER(inode->i_rdev); + int slice = CXL_DEVT_AFU(inode->i_rdev); + int rc = -ENODEV; + + pr_devel("afu_open afu%i.%i\n", slice, adapter_num); + + if (!(adapter = get_cxl_adapter(adapter_num))) + return -ENODEV; + + if (slice > adapter->slices) + goto err_put_adapter; + + spin_lock(&adapter->afu_list_lock); + if (!(afu = adapter->afu[slice])) { + spin_unlock(&adapter->afu_list_lock); + goto err_put_adapter; + } + + /* + * taking a ref to the afu so that it doesn't go away + * for rest of the function. This ref is released before + * we return. + */ + cxl_afu_get(afu); + spin_unlock(&adapter->afu_list_lock); + + if (!afu->current_mode) + goto err_put_afu; + + if (!cxl_ops->link_ok(adapter, afu)) { + rc = -EIO; + goto err_put_afu; + } + + if (!(ctx = cxl_context_alloc())) { + rc = -ENOMEM; + goto err_put_afu; + } + + rc = cxl_context_init(ctx, afu, master); + if (rc) + goto err_put_afu; + + cxl_context_set_mapping(ctx, inode->i_mapping); + + pr_devel("afu_open pe: %i\n", ctx->pe); + file->private_data = ctx; + + /* indicate success */ + rc = 0; + +err_put_afu: + /* release the ref taken earlier */ + cxl_afu_put(afu); +err_put_adapter: + put_device(&adapter->dev); + return rc; +} + +int afu_open(struct inode *inode, struct file *file) +{ + return __afu_open(inode, file, false); +} + +static int afu_master_open(struct inode *inode, struct file *file) +{ + return __afu_open(inode, file, true); +} + +int afu_release(struct inode *inode, struct file *file) +{ + struct cxl_context *ctx = file->private_data; + + pr_devel("%s: closing cxl file descriptor. pe: %i\n", + __func__, ctx->pe); + cxl_context_detach(ctx); + + + /* + * Delete the context's mapping pointer, unless it's created by the + * kernel API, in which case leave it so it can be freed by reclaim_ctx() + */ + if (!ctx->kernelapi) { + mutex_lock(&ctx->mapping_lock); + ctx->mapping = NULL; + mutex_unlock(&ctx->mapping_lock); + } + + /* + * At this this point all bottom halfs have finished and we should be + * getting no more IRQs from the hardware for this context. Once it's + * removed from the IDR (and RCU synchronised) it's safe to free the + * sstp and context. + */ + cxl_context_free(ctx); + + return 0; +} + +static long afu_ioctl_start_work(struct cxl_context *ctx, + struct cxl_ioctl_start_work __user *uwork) +{ + struct cxl_ioctl_start_work work; + u64 amr = 0; + int rc; + + pr_devel("%s: pe: %i\n", __func__, ctx->pe); + + /* Do this outside the status_mutex to avoid a circular dependency with + * the locking in cxl_mmap_fault() */ + if (copy_from_user(&work, uwork, sizeof(work))) + return -EFAULT; + + mutex_lock(&ctx->status_mutex); + if (ctx->status != OPENED) { + rc = -EIO; + goto out; + } + + /* + * if any of the reserved fields are set or any of the unused + * flags are set it's invalid + */ + if (work.reserved1 || work.reserved2 || work.reserved3 || + work.reserved4 || work.reserved5 || + (work.flags & ~CXL_START_WORK_ALL)) { + rc = -EINVAL; + goto out; + } + + if (!(work.flags & CXL_START_WORK_NUM_IRQS)) + work.num_interrupts = ctx->afu->pp_irqs; + else if ((work.num_interrupts < ctx->afu->pp_irqs) || + (work.num_interrupts > ctx->afu->irqs_max)) { + rc = -EINVAL; + goto out; + } + + if ((rc = afu_register_irqs(ctx, work.num_interrupts))) + goto out; + + if (work.flags & CXL_START_WORK_AMR) + amr = work.amr & mfspr(SPRN_UAMOR); + + if (work.flags & CXL_START_WORK_TID) + ctx->assign_tidr = true; + + ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF); + + /* + * Increment the mapped context count for adapter. This also checks + * if adapter_context_lock is taken. + */ + rc = cxl_adapter_context_get(ctx->afu->adapter); + if (rc) { + afu_release_irqs(ctx, ctx); + goto out; + } + + /* + * We grab the PID here and not in the file open to allow for the case + * where a process (master, some daemon, etc) has opened the chardev on + * behalf of another process, so the AFU's mm gets bound to the process + * that performs this ioctl and not the process that opened the file. + * Also we grab the PID of the group leader so that if the task that + * has performed the attach operation exits the mm context of the + * process is still accessible. + */ + ctx->pid = get_task_pid(current, PIDTYPE_PID); + + /* acquire a reference to the task's mm */ + ctx->mm = get_task_mm(current); + + /* ensure this mm_struct can't be freed */ + cxl_context_mm_count_get(ctx); + + if (ctx->mm) { + /* decrement the use count from above */ + mmput(ctx->mm); + /* make TLBIs for this context global */ + mm_context_add_copro(ctx->mm); + } + + /* + * Increment driver use count. Enables global TLBIs for hash + * and callbacks to handle the segment table + */ + cxl_ctx_get(); + + /* + * A barrier is needed to make sure all TLBIs are global + * before we attach and the context starts being used by the + * adapter. + * + * Needed after mm_context_add_copro() for radix and + * cxl_ctx_get() for hash/p8. + * + * The barrier should really be mb(), since it involves a + * device. However, it's only useful when we have local + * vs. global TLBIs, i.e SMP=y. So keep smp_mb(). + */ + smp_mb(); + + trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr); + + if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor, + amr))) { + afu_release_irqs(ctx, ctx); + cxl_adapter_context_put(ctx->afu->adapter); + put_pid(ctx->pid); + ctx->pid = NULL; + cxl_ctx_put(); + cxl_context_mm_count_put(ctx); + if (ctx->mm) + mm_context_remove_copro(ctx->mm); + goto out; + } + + rc = 0; + if (work.flags & CXL_START_WORK_TID) { + work.tid = ctx->tidr; + if (copy_to_user(uwork, &work, sizeof(work))) + rc = -EFAULT; + } + + ctx->status = STARTED; + +out: + mutex_unlock(&ctx->status_mutex); + return rc; +} + +static long afu_ioctl_process_element(struct cxl_context *ctx, + int __user *upe) +{ + pr_devel("%s: pe: %i\n", __func__, ctx->pe); + + if (copy_to_user(upe, &ctx->external_pe, sizeof(__u32))) + return -EFAULT; + + return 0; +} + +static long afu_ioctl_get_afu_id(struct cxl_context *ctx, + struct cxl_afu_id __user *upafuid) +{ + struct cxl_afu_id afuid = { 0 }; + + afuid.card_id = ctx->afu->adapter->adapter_num; + afuid.afu_offset = ctx->afu->slice; + afuid.afu_mode = ctx->afu->current_mode; + + /* set the flag bit in case the afu is a slave */ + if (ctx->afu->current_mode == CXL_MODE_DIRECTED && !ctx->master) + afuid.flags |= CXL_AFUID_FLAG_SLAVE; + + if (copy_to_user(upafuid, &afuid, sizeof(afuid))) + return -EFAULT; + + return 0; +} + +long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct cxl_context *ctx = file->private_data; + + if (ctx->status == CLOSED) + return -EIO; + + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) + return -EIO; + + pr_devel("afu_ioctl\n"); + switch (cmd) { + case CXL_IOCTL_START_WORK: + return afu_ioctl_start_work(ctx, (struct cxl_ioctl_start_work __user *)arg); + case CXL_IOCTL_GET_PROCESS_ELEMENT: + return afu_ioctl_process_element(ctx, (__u32 __user *)arg); + case CXL_IOCTL_GET_AFU_ID: + return afu_ioctl_get_afu_id(ctx, (struct cxl_afu_id __user *) + arg); + } + return -EINVAL; +} + +static long afu_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return afu_ioctl(file, cmd, arg); +} + +int afu_mmap(struct file *file, struct vm_area_struct *vm) +{ + struct cxl_context *ctx = file->private_data; + + /* AFU must be started before we can MMIO */ + if (ctx->status != STARTED) + return -EIO; + + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) + return -EIO; + + return cxl_context_iomap(ctx, vm); +} + +static inline bool ctx_event_pending(struct cxl_context *ctx) +{ + if (ctx->pending_irq || ctx->pending_fault || ctx->pending_afu_err) + return true; + + if (ctx->afu_driver_ops && atomic_read(&ctx->afu_driver_events)) + return true; + + return false; +} + +__poll_t afu_poll(struct file *file, struct poll_table_struct *poll) +{ + struct cxl_context *ctx = file->private_data; + __poll_t mask = 0; + unsigned long flags; + + + poll_wait(file, &ctx->wq, poll); + + pr_devel("afu_poll wait done pe: %i\n", ctx->pe); + + spin_lock_irqsave(&ctx->lock, flags); + if (ctx_event_pending(ctx)) + mask |= EPOLLIN | EPOLLRDNORM; + else if (ctx->status == CLOSED) + /* Only error on closed when there are no futher events pending + */ + mask |= EPOLLERR; + spin_unlock_irqrestore(&ctx->lock, flags); + + pr_devel("afu_poll pe: %i returning %#x\n", ctx->pe, mask); + + return mask; +} + +static ssize_t afu_driver_event_copy(struct cxl_context *ctx, + char __user *buf, + struct cxl_event *event, + struct cxl_event_afu_driver_reserved *pl) +{ + /* Check event */ + if (!pl) { + ctx->afu_driver_ops->event_delivered(ctx, pl, -EINVAL); + return -EFAULT; + } + + /* Check event size */ + event->header.size += pl->data_size; + if (event->header.size > CXL_READ_MIN_SIZE) { + ctx->afu_driver_ops->event_delivered(ctx, pl, -EINVAL); + return -EFAULT; + } + + /* Copy event header */ + if (copy_to_user(buf, event, sizeof(struct cxl_event_header))) { + ctx->afu_driver_ops->event_delivered(ctx, pl, -EFAULT); + return -EFAULT; + } + + /* Copy event data */ + buf += sizeof(struct cxl_event_header); + if (copy_to_user(buf, &pl->data, pl->data_size)) { + ctx->afu_driver_ops->event_delivered(ctx, pl, -EFAULT); + return -EFAULT; + } + + ctx->afu_driver_ops->event_delivered(ctx, pl, 0); /* Success */ + return event->header.size; +} + +ssize_t afu_read(struct file *file, char __user *buf, size_t count, + loff_t *off) +{ + struct cxl_context *ctx = file->private_data; + struct cxl_event_afu_driver_reserved *pl = NULL; + struct cxl_event event; + unsigned long flags; + int rc; + DEFINE_WAIT(wait); + + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) + return -EIO; + + if (count < CXL_READ_MIN_SIZE) + return -EINVAL; + + spin_lock_irqsave(&ctx->lock, flags); + + for (;;) { + prepare_to_wait(&ctx->wq, &wait, TASK_INTERRUPTIBLE); + if (ctx_event_pending(ctx) || (ctx->status == CLOSED)) + break; + + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { + rc = -EIO; + goto out; + } + + if (file->f_flags & O_NONBLOCK) { + rc = -EAGAIN; + goto out; + } + + if (signal_pending(current)) { + rc = -ERESTARTSYS; + goto out; + } + + spin_unlock_irqrestore(&ctx->lock, flags); + pr_devel("afu_read going to sleep...\n"); + schedule(); + pr_devel("afu_read woken up\n"); + spin_lock_irqsave(&ctx->lock, flags); + } + + finish_wait(&ctx->wq, &wait); + + memset(&event, 0, sizeof(event)); + event.header.process_element = ctx->pe; + event.header.size = sizeof(struct cxl_event_header); + if (ctx->afu_driver_ops && atomic_read(&ctx->afu_driver_events)) { + pr_devel("afu_read delivering AFU driver specific event\n"); + pl = ctx->afu_driver_ops->fetch_event(ctx); + atomic_dec(&ctx->afu_driver_events); + event.header.type = CXL_EVENT_AFU_DRIVER; + } else if (ctx->pending_irq) { + pr_devel("afu_read delivering AFU interrupt\n"); + event.header.size += sizeof(struct cxl_event_afu_interrupt); + event.header.type = CXL_EVENT_AFU_INTERRUPT; + event.irq.irq = find_first_bit(ctx->irq_bitmap, ctx->irq_count) + 1; + clear_bit(event.irq.irq - 1, ctx->irq_bitmap); + if (bitmap_empty(ctx->irq_bitmap, ctx->irq_count)) + ctx->pending_irq = false; + } else if (ctx->pending_fault) { + pr_devel("afu_read delivering data storage fault\n"); + event.header.size += sizeof(struct cxl_event_data_storage); + event.header.type = CXL_EVENT_DATA_STORAGE; + event.fault.addr = ctx->fault_addr; + event.fault.dsisr = ctx->fault_dsisr; + ctx->pending_fault = false; + } else if (ctx->pending_afu_err) { + pr_devel("afu_read delivering afu error\n"); + event.header.size += sizeof(struct cxl_event_afu_error); + event.header.type = CXL_EVENT_AFU_ERROR; + event.afu_error.error = ctx->afu_err; + ctx->pending_afu_err = false; + } else if (ctx->status == CLOSED) { + pr_devel("afu_read fatal error\n"); + spin_unlock_irqrestore(&ctx->lock, flags); + return -EIO; + } else + WARN(1, "afu_read must be buggy\n"); + + spin_unlock_irqrestore(&ctx->lock, flags); + + if (event.header.type == CXL_EVENT_AFU_DRIVER) + return afu_driver_event_copy(ctx, buf, &event, pl); + + if (copy_to_user(buf, &event, event.header.size)) + return -EFAULT; + return event.header.size; + +out: + finish_wait(&ctx->wq, &wait); + spin_unlock_irqrestore(&ctx->lock, flags); + return rc; +} + +/* + * Note: if this is updated, we need to update api.c to patch the new ones in + * too + */ +const struct file_operations afu_fops = { + .owner = THIS_MODULE, + .open = afu_open, + .poll = afu_poll, + .read = afu_read, + .release = afu_release, + .unlocked_ioctl = afu_ioctl, + .compat_ioctl = afu_compat_ioctl, + .mmap = afu_mmap, +}; + +static const struct file_operations afu_master_fops = { + .owner = THIS_MODULE, + .open = afu_master_open, + .poll = afu_poll, + .read = afu_read, + .release = afu_release, + .unlocked_ioctl = afu_ioctl, + .compat_ioctl = afu_compat_ioctl, + .mmap = afu_mmap, +}; + + +static char *cxl_devnode(struct device *dev, umode_t *mode) +{ + if (cpu_has_feature(CPU_FTR_HVMODE) && + CXL_DEVT_IS_CARD(dev->devt)) { + /* + * These minor numbers will eventually be used to program the + * PSL and AFUs once we have dynamic reprogramming support + */ + return NULL; + } + return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev)); +} + +extern struct class *cxl_class; + +static int cxl_add_chardev(struct cxl_afu *afu, dev_t devt, struct cdev *cdev, + struct device **chardev, char *postfix, char *desc, + const struct file_operations *fops) +{ + struct device *dev; + int rc; + + cdev_init(cdev, fops); + if ((rc = cdev_add(cdev, devt, 1))) { + dev_err(&afu->dev, "Unable to add %s chardev: %i\n", desc, rc); + return rc; + } + + dev = device_create(cxl_class, &afu->dev, devt, afu, + "afu%i.%i%s", afu->adapter->adapter_num, afu->slice, postfix); + if (IS_ERR(dev)) { + dev_err(&afu->dev, "Unable to create %s chardev in sysfs: %i\n", desc, rc); + rc = PTR_ERR(dev); + goto err; + } + + *chardev = dev; + + return 0; +err: + cdev_del(cdev); + return rc; +} + +int cxl_chardev_d_afu_add(struct cxl_afu *afu) +{ + return cxl_add_chardev(afu, CXL_AFU_MKDEV_D(afu), &afu->afu_cdev_d, + &afu->chardev_d, "d", "dedicated", + &afu_master_fops); /* Uses master fops */ +} + +int cxl_chardev_m_afu_add(struct cxl_afu *afu) +{ + return cxl_add_chardev(afu, CXL_AFU_MKDEV_M(afu), &afu->afu_cdev_m, + &afu->chardev_m, "m", "master", + &afu_master_fops); +} + +int cxl_chardev_s_afu_add(struct cxl_afu *afu) +{ + return cxl_add_chardev(afu, CXL_AFU_MKDEV_S(afu), &afu->afu_cdev_s, + &afu->chardev_s, "s", "shared", + &afu_fops); +} + +void cxl_chardev_afu_remove(struct cxl_afu *afu) +{ + if (afu->chardev_d) { + cdev_del(&afu->afu_cdev_d); + device_unregister(afu->chardev_d); + afu->chardev_d = NULL; + } + if (afu->chardev_m) { + cdev_del(&afu->afu_cdev_m); + device_unregister(afu->chardev_m); + afu->chardev_m = NULL; + } + if (afu->chardev_s) { + cdev_del(&afu->afu_cdev_s); + device_unregister(afu->chardev_s); + afu->chardev_s = NULL; + } +} + +int cxl_register_afu(struct cxl_afu *afu) +{ + afu->dev.class = cxl_class; + + return device_register(&afu->dev); +} + +int cxl_register_adapter(struct cxl *adapter) +{ + adapter->dev.class = cxl_class; + + /* + * Future: When we support dynamically reprogramming the PSL & AFU we + * will expose the interface to do that via a chardev: + * adapter->dev.devt = CXL_CARD_MKDEV(adapter); + */ + + return device_register(&adapter->dev); +} + +dev_t cxl_get_dev(void) +{ + return cxl_dev; +} + +int __init cxl_file_init(void) +{ + int rc; + + /* + * If these change we really need to update API. Either change some + * flags or update API version number CXL_API_VERSION. + */ + BUILD_BUG_ON(CXL_API_VERSION != 3); + BUILD_BUG_ON(sizeof(struct cxl_ioctl_start_work) != 64); + BUILD_BUG_ON(sizeof(struct cxl_event_header) != 8); + BUILD_BUG_ON(sizeof(struct cxl_event_afu_interrupt) != 8); + BUILD_BUG_ON(sizeof(struct cxl_event_data_storage) != 32); + BUILD_BUG_ON(sizeof(struct cxl_event_afu_error) != 16); + + if ((rc = alloc_chrdev_region(&cxl_dev, 0, CXL_NUM_MINORS, "cxl"))) { + pr_err("Unable to allocate CXL major number: %i\n", rc); + return rc; + } + + pr_devel("CXL device allocated, MAJOR %i\n", MAJOR(cxl_dev)); + + cxl_class = class_create(THIS_MODULE, "cxl"); + if (IS_ERR(cxl_class)) { + pr_err("Unable to create CXL class\n"); + rc = PTR_ERR(cxl_class); + goto err; + } + cxl_class->devnode = cxl_devnode; + + return 0; + +err: + unregister_chrdev_region(cxl_dev, CXL_NUM_MINORS); + return rc; +} + +void cxl_file_exit(void) +{ + unregister_chrdev_region(cxl_dev, CXL_NUM_MINORS); + class_destroy(cxl_class); +} diff --git a/drivers/misc/cxl/flash.c b/drivers/misc/cxl/flash.c new file mode 100644 index 000000000..43917898f --- /dev/null +++ b/drivers/misc/cxl/flash.c @@ -0,0 +1,543 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include + +#include "cxl.h" +#include "hcalls.h" + +#define DOWNLOAD_IMAGE 1 +#define VALIDATE_IMAGE 2 + +struct ai_header { + u16 version; + u8 reserved0[6]; + u16 vendor; + u16 device; + u16 subsystem_vendor; + u16 subsystem; + u64 image_offset; + u64 image_length; + u8 reserved1[96]; +}; + +static struct semaphore sem; +static unsigned long *buffer[CXL_AI_MAX_ENTRIES]; +static struct sg_list *le; +static u64 continue_token; +static unsigned int transfer; + +struct update_props_workarea { + __be32 phandle; + __be32 state; + __be64 reserved; + __be32 nprops; +} __packed; + +struct update_nodes_workarea { + __be32 state; + __be64 unit_address; + __be32 reserved; +} __packed; + +#define DEVICE_SCOPE 3 +#define NODE_ACTION_MASK 0xff000000 +#define NODE_COUNT_MASK 0x00ffffff +#define OPCODE_DELETE 0x01000000 +#define OPCODE_UPDATE 0x02000000 +#define OPCODE_ADD 0x03000000 + +static int rcall(int token, char *buf, s32 scope) +{ + int rc; + + spin_lock(&rtas_data_buf_lock); + + memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE); + rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, scope); + memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE); + + spin_unlock(&rtas_data_buf_lock); + return rc; +} + +static int update_property(struct device_node *dn, const char *name, + u32 vd, char *value) +{ + struct property *new_prop; + u32 *val; + int rc; + + new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL); + if (!new_prop) + return -ENOMEM; + + new_prop->name = kstrdup(name, GFP_KERNEL); + if (!new_prop->name) { + kfree(new_prop); + return -ENOMEM; + } + + new_prop->length = vd; + new_prop->value = kzalloc(new_prop->length, GFP_KERNEL); + if (!new_prop->value) { + kfree(new_prop->name); + kfree(new_prop); + return -ENOMEM; + } + memcpy(new_prop->value, value, vd); + + val = (u32 *)new_prop->value; + rc = cxl_update_properties(dn, new_prop); + pr_devel("%s: update property (%s, length: %i, value: %#x)\n", + dn->name, name, vd, be32_to_cpu(*val)); + + if (rc) { + kfree(new_prop->name); + kfree(new_prop->value); + kfree(new_prop); + } + return rc; +} + +static int update_node(__be32 phandle, s32 scope) +{ + struct update_props_workarea *upwa; + struct device_node *dn; + int i, rc, ret; + char *prop_data; + char *buf; + int token; + u32 nprops; + u32 vd; + + token = rtas_token("ibm,update-properties"); + if (token == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + dn = of_find_node_by_phandle(be32_to_cpu(phandle)); + if (!dn) { + kfree(buf); + return -ENOENT; + } + + upwa = (struct update_props_workarea *)&buf[0]; + upwa->phandle = phandle; + do { + rc = rcall(token, buf, scope); + if (rc < 0) + break; + + prop_data = buf + sizeof(*upwa); + nprops = be32_to_cpu(upwa->nprops); + + if (*prop_data == 0) { + prop_data++; + vd = be32_to_cpu(*(__be32 *)prop_data); + prop_data += vd + sizeof(vd); + nprops--; + } + + for (i = 0; i < nprops; i++) { + char *prop_name; + + prop_name = prop_data; + prop_data += strlen(prop_name) + 1; + vd = be32_to_cpu(*(__be32 *)prop_data); + prop_data += sizeof(vd); + + if ((vd != 0x00000000) && (vd != 0x80000000)) { + ret = update_property(dn, prop_name, vd, + prop_data); + if (ret) + pr_err("cxl: Could not update property %s - %i\n", + prop_name, ret); + + prop_data += vd; + } + } + } while (rc == 1); + + of_node_put(dn); + kfree(buf); + return rc; +} + +static int update_devicetree(struct cxl *adapter, s32 scope) +{ + struct update_nodes_workarea *unwa; + u32 action, node_count; + int token, rc, i; + __be32 *data, drc_index, phandle; + char *buf; + + token = rtas_token("ibm,update-nodes"); + if (token == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + unwa = (struct update_nodes_workarea *)&buf[0]; + unwa->unit_address = cpu_to_be64(adapter->guest->handle); + do { + rc = rcall(token, buf, scope); + if (rc && rc != 1) + break; + + data = (__be32 *)buf + 4; + while (be32_to_cpu(*data) & NODE_ACTION_MASK) { + action = be32_to_cpu(*data) & NODE_ACTION_MASK; + node_count = be32_to_cpu(*data) & NODE_COUNT_MASK; + pr_devel("device reconfiguration - action: %#x, nodes: %#x\n", + action, node_count); + data++; + + for (i = 0; i < node_count; i++) { + phandle = *data++; + + switch (action) { + case OPCODE_DELETE: + /* nothing to do */ + break; + case OPCODE_UPDATE: + update_node(phandle, scope); + break; + case OPCODE_ADD: + /* nothing to do, just move pointer */ + drc_index = *data++; + break; + } + } + } + } while (rc == 1); + + kfree(buf); + return 0; +} + +static int handle_image(struct cxl *adapter, int operation, + long (*fct)(u64, u64, u64, u64 *), + struct cxl_adapter_image *ai) +{ + size_t mod, s_copy, len_chunk = 0; + struct ai_header *header = NULL; + unsigned int entries = 0, i; + void *dest, *from; + int rc = 0, need_header; + + /* base adapter image header */ + need_header = (ai->flags & CXL_AI_NEED_HEADER); + if (need_header) { + header = kzalloc(sizeof(struct ai_header), GFP_KERNEL); + if (!header) + return -ENOMEM; + header->version = cpu_to_be16(1); + header->vendor = cpu_to_be16(adapter->guest->vendor); + header->device = cpu_to_be16(adapter->guest->device); + header->subsystem_vendor = cpu_to_be16(adapter->guest->subsystem_vendor); + header->subsystem = cpu_to_be16(adapter->guest->subsystem); + header->image_offset = cpu_to_be64(CXL_AI_HEADER_SIZE); + header->image_length = cpu_to_be64(ai->len_image); + } + + /* number of entries in the list */ + len_chunk = ai->len_data; + if (need_header) + len_chunk += CXL_AI_HEADER_SIZE; + + entries = len_chunk / CXL_AI_BUFFER_SIZE; + mod = len_chunk % CXL_AI_BUFFER_SIZE; + if (mod) + entries++; + + if (entries > CXL_AI_MAX_ENTRIES) { + rc = -EINVAL; + goto err; + } + + /* < -- MAX_CHUNK_SIZE = 4096 * 256 = 1048576 bytes --> + * chunk 0 ---------------------------------------------------- + * | header | data | + * ---------------------------------------------------- + * chunk 1 ---------------------------------------------------- + * | data | + * ---------------------------------------------------- + * .... + * chunk n ---------------------------------------------------- + * | data | + * ---------------------------------------------------- + */ + from = (void *) ai->data; + for (i = 0; i < entries; i++) { + dest = buffer[i]; + s_copy = CXL_AI_BUFFER_SIZE; + + if ((need_header) && (i == 0)) { + /* add adapter image header */ + memcpy(buffer[i], header, sizeof(struct ai_header)); + s_copy = CXL_AI_BUFFER_SIZE - CXL_AI_HEADER_SIZE; + dest += CXL_AI_HEADER_SIZE; /* image offset */ + } + if ((i == (entries - 1)) && mod) + s_copy = mod; + + /* copy data */ + if (copy_from_user(dest, from, s_copy)) + goto err; + + /* fill in the list */ + le[i].phys_addr = cpu_to_be64(virt_to_phys(buffer[i])); + le[i].len = cpu_to_be64(CXL_AI_BUFFER_SIZE); + if ((i == (entries - 1)) && mod) + le[i].len = cpu_to_be64(mod); + from += s_copy; + } + pr_devel("%s (op: %i, need header: %i, entries: %i, token: %#llx)\n", + __func__, operation, need_header, entries, continue_token); + + /* + * download/validate the adapter image to the coherent + * platform facility + */ + rc = fct(adapter->guest->handle, virt_to_phys(le), entries, + &continue_token); + if (rc == 0) /* success of download/validation operation */ + continue_token = 0; + +err: + kfree(header); + + return rc; +} + +static int transfer_image(struct cxl *adapter, int operation, + struct cxl_adapter_image *ai) +{ + int rc = 0; + int afu; + + switch (operation) { + case DOWNLOAD_IMAGE: + rc = handle_image(adapter, operation, + &cxl_h_download_adapter_image, ai); + if (rc < 0) { + pr_devel("resetting adapter\n"); + cxl_h_reset_adapter(adapter->guest->handle); + } + return rc; + + case VALIDATE_IMAGE: + rc = handle_image(adapter, operation, + &cxl_h_validate_adapter_image, ai); + if (rc < 0) { + pr_devel("resetting adapter\n"); + cxl_h_reset_adapter(adapter->guest->handle); + return rc; + } + if (rc == 0) { + pr_devel("remove current afu\n"); + for (afu = 0; afu < adapter->slices; afu++) + cxl_guest_remove_afu(adapter->afu[afu]); + + pr_devel("resetting adapter\n"); + cxl_h_reset_adapter(adapter->guest->handle); + + /* The entire image has now been + * downloaded and the validation has + * been successfully performed. + * After that, the partition should call + * ibm,update-nodes and + * ibm,update-properties to receive the + * current configuration + */ + rc = update_devicetree(adapter, DEVICE_SCOPE); + transfer = 1; + } + return rc; + } + + return -EINVAL; +} + +static long ioctl_transfer_image(struct cxl *adapter, int operation, + struct cxl_adapter_image __user *uai) +{ + struct cxl_adapter_image ai; + + pr_devel("%s\n", __func__); + + if (copy_from_user(&ai, uai, sizeof(struct cxl_adapter_image))) + return -EFAULT; + + /* + * Make sure reserved fields and bits are set to 0 + */ + if (ai.reserved1 || ai.reserved2 || ai.reserved3 || ai.reserved4 || + (ai.flags & ~CXL_AI_ALL)) + return -EINVAL; + + return transfer_image(adapter, operation, &ai); +} + +static int device_open(struct inode *inode, struct file *file) +{ + int adapter_num = CXL_DEVT_ADAPTER(inode->i_rdev); + struct cxl *adapter; + int rc = 0, i; + + pr_devel("in %s\n", __func__); + + BUG_ON(sizeof(struct ai_header) != CXL_AI_HEADER_SIZE); + + /* Allows one process to open the device by using a semaphore */ + if (down_interruptible(&sem) != 0) + return -EPERM; + + if (!(adapter = get_cxl_adapter(adapter_num))) { + rc = -ENODEV; + goto err_unlock; + } + + file->private_data = adapter; + continue_token = 0; + transfer = 0; + + for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) + buffer[i] = NULL; + + /* aligned buffer containing list entries which describes up to + * 1 megabyte of data (256 entries of 4096 bytes each) + * Logical real address of buffer 0 - Buffer 0 length in bytes + * Logical real address of buffer 1 - Buffer 1 length in bytes + * Logical real address of buffer 2 - Buffer 2 length in bytes + * .... + * .... + * Logical real address of buffer N - Buffer N length in bytes + */ + le = (struct sg_list *)get_zeroed_page(GFP_KERNEL); + if (!le) { + rc = -ENOMEM; + goto err; + } + + for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) { + buffer[i] = (unsigned long *)get_zeroed_page(GFP_KERNEL); + if (!buffer[i]) { + rc = -ENOMEM; + goto err1; + } + } + + return 0; + +err1: + for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) { + if (buffer[i]) + free_page((unsigned long) buffer[i]); + } + + if (le) + free_page((unsigned long) le); +err: + put_device(&adapter->dev); +err_unlock: + up(&sem); + + return rc; +} + +static long device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct cxl *adapter = file->private_data; + + pr_devel("in %s\n", __func__); + + if (cmd == CXL_IOCTL_DOWNLOAD_IMAGE) + return ioctl_transfer_image(adapter, + DOWNLOAD_IMAGE, + (struct cxl_adapter_image __user *)arg); + else if (cmd == CXL_IOCTL_VALIDATE_IMAGE) + return ioctl_transfer_image(adapter, + VALIDATE_IMAGE, + (struct cxl_adapter_image __user *)arg); + else + return -EINVAL; +} + +static long device_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return device_ioctl(file, cmd, arg); +} + +static int device_close(struct inode *inode, struct file *file) +{ + struct cxl *adapter = file->private_data; + int i; + + pr_devel("in %s\n", __func__); + + for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) { + if (buffer[i]) + free_page((unsigned long) buffer[i]); + } + + if (le) + free_page((unsigned long) le); + + up(&sem); + put_device(&adapter->dev); + continue_token = 0; + + /* reload the module */ + if (transfer) + cxl_guest_reload_module(adapter); + else { + pr_devel("resetting adapter\n"); + cxl_h_reset_adapter(adapter->guest->handle); + } + + transfer = 0; + return 0; +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = device_open, + .unlocked_ioctl = device_ioctl, + .compat_ioctl = device_compat_ioctl, + .release = device_close, +}; + +void cxl_guest_remove_chardev(struct cxl *adapter) +{ + cdev_del(&adapter->guest->cdev); +} + +int cxl_guest_add_chardev(struct cxl *adapter) +{ + dev_t devt; + int rc; + + devt = MKDEV(MAJOR(cxl_get_dev()), CXL_CARD_MINOR(adapter)); + cdev_init(&adapter->guest->cdev, &fops); + if ((rc = cdev_add(&adapter->guest->cdev, devt, 1))) { + dev_err(&adapter->dev, + "Unable to add chardev on adapter (card%i): %i\n", + adapter->adapter_num, rc); + goto err; + } + adapter->dev.devt = devt; + sema_init(&sem, 1); +err: + return rc; +} diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c new file mode 100644 index 000000000..08f4a512a --- /dev/null +++ b/drivers/misc/cxl/guest.c @@ -0,0 +1,1202 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include "cxl.h" +#include "hcalls.h" +#include "trace.h" + +#define CXL_ERROR_DETECTED_EVENT 1 +#define CXL_SLOT_RESET_EVENT 2 +#define CXL_RESUME_EVENT 3 + +static void pci_error_handlers(struct cxl_afu *afu, + int bus_error_event, + pci_channel_state_t state) +{ + struct pci_dev *afu_dev; + + if (afu->phb == NULL) + return; + + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { + if (!afu_dev->driver) + continue; + + switch (bus_error_event) { + case CXL_ERROR_DETECTED_EVENT: + afu_dev->error_state = state; + + if (afu_dev->driver->err_handler && + afu_dev->driver->err_handler->error_detected) + afu_dev->driver->err_handler->error_detected(afu_dev, state); + break; + case CXL_SLOT_RESET_EVENT: + afu_dev->error_state = state; + + if (afu_dev->driver->err_handler && + afu_dev->driver->err_handler->slot_reset) + afu_dev->driver->err_handler->slot_reset(afu_dev); + break; + case CXL_RESUME_EVENT: + if (afu_dev->driver->err_handler && + afu_dev->driver->err_handler->resume) + afu_dev->driver->err_handler->resume(afu_dev); + break; + } + } +} + +static irqreturn_t guest_handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr, + u64 errstat) +{ + pr_devel("in %s\n", __func__); + dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%.16llx\n", errstat); + + return cxl_ops->ack_irq(ctx, 0, errstat); +} + +static ssize_t guest_collect_vpd(struct cxl *adapter, struct cxl_afu *afu, + void *buf, size_t len) +{ + unsigned int entries, mod; + unsigned long **vpd_buf = NULL; + struct sg_list *le; + int rc = 0, i, tocopy; + u64 out = 0; + + if (buf == NULL) + return -EINVAL; + + /* number of entries in the list */ + entries = len / SG_BUFFER_SIZE; + mod = len % SG_BUFFER_SIZE; + if (mod) + entries++; + + if (entries > SG_MAX_ENTRIES) { + entries = SG_MAX_ENTRIES; + len = SG_MAX_ENTRIES * SG_BUFFER_SIZE; + mod = 0; + } + + vpd_buf = kcalloc(entries, sizeof(unsigned long *), GFP_KERNEL); + if (!vpd_buf) + return -ENOMEM; + + le = (struct sg_list *)get_zeroed_page(GFP_KERNEL); + if (!le) { + rc = -ENOMEM; + goto err1; + } + + for (i = 0; i < entries; i++) { + vpd_buf[i] = (unsigned long *)get_zeroed_page(GFP_KERNEL); + if (!vpd_buf[i]) { + rc = -ENOMEM; + goto err2; + } + le[i].phys_addr = cpu_to_be64(virt_to_phys(vpd_buf[i])); + le[i].len = cpu_to_be64(SG_BUFFER_SIZE); + if ((i == (entries - 1)) && mod) + le[i].len = cpu_to_be64(mod); + } + + if (adapter) + rc = cxl_h_collect_vpd_adapter(adapter->guest->handle, + virt_to_phys(le), entries, &out); + else + rc = cxl_h_collect_vpd(afu->guest->handle, 0, + virt_to_phys(le), entries, &out); + pr_devel("length of available (entries: %i), vpd: %#llx\n", + entries, out); + + if (!rc) { + /* + * hcall returns in 'out' the size of available VPDs. + * It fills the buffer with as much data as possible. + */ + if (out < len) + len = out; + rc = len; + if (out) { + for (i = 0; i < entries; i++) { + if (len < SG_BUFFER_SIZE) + tocopy = len; + else + tocopy = SG_BUFFER_SIZE; + memcpy(buf, vpd_buf[i], tocopy); + buf += tocopy; + len -= tocopy; + } + } + } +err2: + for (i = 0; i < entries; i++) { + if (vpd_buf[i]) + free_page((unsigned long) vpd_buf[i]); + } + free_page((unsigned long) le); +err1: + kfree(vpd_buf); + return rc; +} + +static int guest_get_irq_info(struct cxl_context *ctx, struct cxl_irq_info *info) +{ + return cxl_h_collect_int_info(ctx->afu->guest->handle, ctx->process_token, info); +} + +static irqreturn_t guest_psl_irq(int irq, void *data) +{ + struct cxl_context *ctx = data; + struct cxl_irq_info irq_info; + int rc; + + pr_devel("%d: received PSL interrupt %i\n", ctx->pe, irq); + rc = guest_get_irq_info(ctx, &irq_info); + if (rc) { + WARN(1, "Unable to get IRQ info: %i\n", rc); + return IRQ_HANDLED; + } + + rc = cxl_irq_psl8(irq, ctx, &irq_info); + return rc; +} + +static int afu_read_error_state(struct cxl_afu *afu, int *state_out) +{ + u64 state; + int rc = 0; + + if (!afu) + return -EIO; + + rc = cxl_h_read_error_state(afu->guest->handle, &state); + if (!rc) { + WARN_ON(state != H_STATE_NORMAL && + state != H_STATE_DISABLE && + state != H_STATE_TEMP_UNAVAILABLE && + state != H_STATE_PERM_UNAVAILABLE); + *state_out = state & 0xffffffff; + } + return rc; +} + +static irqreturn_t guest_slice_irq_err(int irq, void *data) +{ + struct cxl_afu *afu = data; + int rc; + u64 serr, afu_error, dsisr; + + rc = cxl_h_get_fn_error_interrupt(afu->guest->handle, &serr); + if (rc) { + dev_crit(&afu->dev, "Couldn't read PSL_SERR_An: %d\n", rc); + return IRQ_HANDLED; + } + afu_error = cxl_p2n_read(afu, CXL_AFU_ERR_An); + dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + cxl_afu_decode_psl_serr(afu, serr); + dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error); + dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr); + + rc = cxl_h_ack_fn_error_interrupt(afu->guest->handle, serr); + if (rc) + dev_crit(&afu->dev, "Couldn't ack slice error interrupt: %d\n", + rc); + + return IRQ_HANDLED; +} + + +static int irq_alloc_range(struct cxl *adapter, int len, int *irq) +{ + int i, n; + struct irq_avail *cur; + + for (i = 0; i < adapter->guest->irq_nranges; i++) { + cur = &adapter->guest->irq_avail[i]; + n = bitmap_find_next_zero_area(cur->bitmap, cur->range, + 0, len, 0); + if (n < cur->range) { + bitmap_set(cur->bitmap, n, len); + *irq = cur->offset + n; + pr_devel("guest: allocate IRQs %#x->%#x\n", + *irq, *irq + len - 1); + + return 0; + } + } + return -ENOSPC; +} + +static int irq_free_range(struct cxl *adapter, int irq, int len) +{ + int i, n; + struct irq_avail *cur; + + if (len == 0) + return -ENOENT; + + for (i = 0; i < adapter->guest->irq_nranges; i++) { + cur = &adapter->guest->irq_avail[i]; + if (irq >= cur->offset && + (irq + len) <= (cur->offset + cur->range)) { + n = irq - cur->offset; + bitmap_clear(cur->bitmap, n, len); + pr_devel("guest: release IRQs %#x->%#x\n", + irq, irq + len - 1); + return 0; + } + } + return -ENOENT; +} + +static int guest_reset(struct cxl *adapter) +{ + struct cxl_afu *afu = NULL; + int i, rc; + + pr_devel("Adapter reset request\n"); + spin_lock(&adapter->afu_list_lock); + for (i = 0; i < adapter->slices; i++) { + if ((afu = adapter->afu[i])) { + pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT, + pci_channel_io_frozen); + cxl_context_detach_all(afu); + } + } + + rc = cxl_h_reset_adapter(adapter->guest->handle); + for (i = 0; i < adapter->slices; i++) { + if (!rc && (afu = adapter->afu[i])) { + pci_error_handlers(afu, CXL_SLOT_RESET_EVENT, + pci_channel_io_normal); + pci_error_handlers(afu, CXL_RESUME_EVENT, 0); + } + } + spin_unlock(&adapter->afu_list_lock); + return rc; +} + +static int guest_alloc_one_irq(struct cxl *adapter) +{ + int irq; + + spin_lock(&adapter->guest->irq_alloc_lock); + if (irq_alloc_range(adapter, 1, &irq)) + irq = -ENOSPC; + spin_unlock(&adapter->guest->irq_alloc_lock); + return irq; +} + +static void guest_release_one_irq(struct cxl *adapter, int irq) +{ + spin_lock(&adapter->guest->irq_alloc_lock); + irq_free_range(adapter, irq, 1); + spin_unlock(&adapter->guest->irq_alloc_lock); +} + +static int guest_alloc_irq_ranges(struct cxl_irq_ranges *irqs, + struct cxl *adapter, unsigned int num) +{ + int i, try, irq; + + memset(irqs, 0, sizeof(struct cxl_irq_ranges)); + + spin_lock(&adapter->guest->irq_alloc_lock); + for (i = 0; i < CXL_IRQ_RANGES && num; i++) { + try = num; + while (try) { + if (irq_alloc_range(adapter, try, &irq) == 0) + break; + try /= 2; + } + if (!try) + goto error; + irqs->offset[i] = irq; + irqs->range[i] = try; + num -= try; + } + if (num) + goto error; + spin_unlock(&adapter->guest->irq_alloc_lock); + return 0; + +error: + for (i = 0; i < CXL_IRQ_RANGES; i++) + irq_free_range(adapter, irqs->offset[i], irqs->range[i]); + spin_unlock(&adapter->guest->irq_alloc_lock); + return -ENOSPC; +} + +static void guest_release_irq_ranges(struct cxl_irq_ranges *irqs, + struct cxl *adapter) +{ + int i; + + spin_lock(&adapter->guest->irq_alloc_lock); + for (i = 0; i < CXL_IRQ_RANGES; i++) + irq_free_range(adapter, irqs->offset[i], irqs->range[i]); + spin_unlock(&adapter->guest->irq_alloc_lock); +} + +static int guest_register_serr_irq(struct cxl_afu *afu) +{ + afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&afu->dev)); + if (!afu->err_irq_name) + return -ENOMEM; + + if (!(afu->serr_virq = cxl_map_irq(afu->adapter, afu->serr_hwirq, + guest_slice_irq_err, afu, afu->err_irq_name))) { + kfree(afu->err_irq_name); + afu->err_irq_name = NULL; + return -ENOMEM; + } + + return 0; +} + +static void guest_release_serr_irq(struct cxl_afu *afu) +{ + cxl_unmap_irq(afu->serr_virq, afu); + cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq); + kfree(afu->err_irq_name); +} + +static int guest_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask) +{ + return cxl_h_control_faults(ctx->afu->guest->handle, ctx->process_token, + tfc >> 32, (psl_reset_mask != 0)); +} + +static void disable_afu_irqs(struct cxl_context *ctx) +{ + irq_hw_number_t hwirq; + unsigned int virq; + int r, i; + + pr_devel("Disabling AFU(%d) interrupts\n", ctx->afu->slice); + for (r = 0; r < CXL_IRQ_RANGES; r++) { + hwirq = ctx->irqs.offset[r]; + for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { + virq = irq_find_mapping(NULL, hwirq); + disable_irq(virq); + } + } +} + +static void enable_afu_irqs(struct cxl_context *ctx) +{ + irq_hw_number_t hwirq; + unsigned int virq; + int r, i; + + pr_devel("Enabling AFU(%d) interrupts\n", ctx->afu->slice); + for (r = 0; r < CXL_IRQ_RANGES; r++) { + hwirq = ctx->irqs.offset[r]; + for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { + virq = irq_find_mapping(NULL, hwirq); + enable_irq(virq); + } + } +} + +static int _guest_afu_cr_readXX(int sz, struct cxl_afu *afu, int cr_idx, + u64 offset, u64 *val) +{ + unsigned long cr; + char c; + int rc = 0; + + if (afu->crs_len < sz) + return -ENOENT; + + if (unlikely(offset >= afu->crs_len)) + return -ERANGE; + + cr = get_zeroed_page(GFP_KERNEL); + if (!cr) + return -ENOMEM; + + rc = cxl_h_get_config(afu->guest->handle, cr_idx, offset, + virt_to_phys((void *)cr), sz); + if (rc) + goto err; + + switch (sz) { + case 1: + c = *((char *) cr); + *val = c; + break; + case 2: + *val = in_le16((u16 *)cr); + break; + case 4: + *val = in_le32((unsigned *)cr); + break; + case 8: + *val = in_le64((u64 *)cr); + break; + default: + WARN_ON(1); + } +err: + free_page(cr); + return rc; +} + +static int guest_afu_cr_read32(struct cxl_afu *afu, int cr_idx, u64 offset, + u32 *out) +{ + int rc; + u64 val; + + rc = _guest_afu_cr_readXX(4, afu, cr_idx, offset, &val); + if (!rc) + *out = (u32) val; + return rc; +} + +static int guest_afu_cr_read16(struct cxl_afu *afu, int cr_idx, u64 offset, + u16 *out) +{ + int rc; + u64 val; + + rc = _guest_afu_cr_readXX(2, afu, cr_idx, offset, &val); + if (!rc) + *out = (u16) val; + return rc; +} + +static int guest_afu_cr_read8(struct cxl_afu *afu, int cr_idx, u64 offset, + u8 *out) +{ + int rc; + u64 val; + + rc = _guest_afu_cr_readXX(1, afu, cr_idx, offset, &val); + if (!rc) + *out = (u8) val; + return rc; +} + +static int guest_afu_cr_read64(struct cxl_afu *afu, int cr_idx, u64 offset, + u64 *out) +{ + return _guest_afu_cr_readXX(8, afu, cr_idx, offset, out); +} + +static int guest_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in) +{ + /* config record is not writable from guest */ + return -EPERM; +} + +static int guest_afu_cr_write16(struct cxl_afu *afu, int cr, u64 off, u16 in) +{ + /* config record is not writable from guest */ + return -EPERM; +} + +static int guest_afu_cr_write8(struct cxl_afu *afu, int cr, u64 off, u8 in) +{ + /* config record is not writable from guest */ + return -EPERM; +} + +static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) +{ + struct cxl_process_element_hcall *elem; + struct cxl *adapter = ctx->afu->adapter; + const struct cred *cred; + u32 pid, idx; + int rc, r, i; + u64 mmio_addr, mmio_size; + __be64 flags = 0; + + /* Must be 8 byte aligned and cannot cross a 4096 byte boundary */ + if (!(elem = (struct cxl_process_element_hcall *) + get_zeroed_page(GFP_KERNEL))) + return -ENOMEM; + + elem->version = cpu_to_be64(CXL_PROCESS_ELEMENT_VERSION); + if (ctx->kernel) { + pid = 0; + flags |= CXL_PE_TRANSLATION_ENABLED; + flags |= CXL_PE_PRIVILEGED_PROCESS; + if (mfmsr() & MSR_SF) + flags |= CXL_PE_64_BIT; + } else { + pid = current->pid; + flags |= CXL_PE_PROBLEM_STATE; + flags |= CXL_PE_TRANSLATION_ENABLED; + if (!test_tsk_thread_flag(current, TIF_32BIT)) + flags |= CXL_PE_64_BIT; + cred = get_current_cred(); + if (uid_eq(cred->euid, GLOBAL_ROOT_UID)) + flags |= CXL_PE_PRIVILEGED_PROCESS; + put_cred(cred); + } + elem->flags = cpu_to_be64(flags); + elem->common.tid = cpu_to_be32(0); /* Unused */ + elem->common.pid = cpu_to_be32(pid); + elem->common.csrp = cpu_to_be64(0); /* disable */ + elem->common.u.psl8.aurp0 = cpu_to_be64(0); /* disable */ + elem->common.u.psl8.aurp1 = cpu_to_be64(0); /* disable */ + + cxl_prefault(ctx, wed); + + elem->common.u.psl8.sstp0 = cpu_to_be64(ctx->sstp0); + elem->common.u.psl8.sstp1 = cpu_to_be64(ctx->sstp1); + + /* + * Ensure we have at least one interrupt allocated to take faults for + * kernel contexts that may not have allocated any AFU IRQs at all: + */ + if (ctx->irqs.range[0] == 0) { + rc = afu_register_irqs(ctx, 0); + if (rc) + goto out_free; + } + + for (r = 0; r < CXL_IRQ_RANGES; r++) { + for (i = 0; i < ctx->irqs.range[r]; i++) { + if (r == 0 && i == 0) { + elem->pslVirtualIsn = cpu_to_be32(ctx->irqs.offset[0]); + } else { + idx = ctx->irqs.offset[r] + i - adapter->guest->irq_base_offset; + elem->applicationVirtualIsnBitmap[idx / 8] |= 0x80 >> (idx % 8); + } + } + } + elem->common.amr = cpu_to_be64(amr); + elem->common.wed = cpu_to_be64(wed); + + disable_afu_irqs(ctx); + + rc = cxl_h_attach_process(ctx->afu->guest->handle, elem, + &ctx->process_token, &mmio_addr, &mmio_size); + if (rc == H_SUCCESS) { + if (ctx->master || !ctx->afu->pp_psa) { + ctx->psn_phys = ctx->afu->psn_phys; + ctx->psn_size = ctx->afu->adapter->ps_size; + } else { + ctx->psn_phys = mmio_addr; + ctx->psn_size = mmio_size; + } + if (ctx->afu->pp_psa && mmio_size && + ctx->afu->pp_size == 0) { + /* + * There's no property in the device tree to read the + * pp_size. We only find out at the 1st attach. + * Compared to bare-metal, it is too late and we + * should really lock here. However, on powerVM, + * pp_size is really only used to display in /sys. + * Being discussed with pHyp for their next release. + */ + ctx->afu->pp_size = mmio_size; + } + /* from PAPR: process element is bytes 4-7 of process token */ + ctx->external_pe = ctx->process_token & 0xFFFFFFFF; + pr_devel("CXL pe=%i is known as %i for pHyp, mmio_size=%#llx", + ctx->pe, ctx->external_pe, ctx->psn_size); + ctx->pe_inserted = true; + enable_afu_irqs(ctx); + } + +out_free: + free_page((u64)elem); + return rc; +} + +static int guest_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr) +{ + pr_devel("in %s\n", __func__); + + ctx->kernel = kernel; + if (ctx->afu->current_mode == CXL_MODE_DIRECTED) + return attach_afu_directed(ctx, wed, amr); + + /* dedicated mode not supported on FW840 */ + + return -EINVAL; +} + +static int detach_afu_directed(struct cxl_context *ctx) +{ + if (!ctx->pe_inserted) + return 0; + if (cxl_h_detach_process(ctx->afu->guest->handle, ctx->process_token)) + return -1; + return 0; +} + +static int guest_detach_process(struct cxl_context *ctx) +{ + pr_devel("in %s\n", __func__); + trace_cxl_detach(ctx); + + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) + return -EIO; + + if (ctx->afu->current_mode == CXL_MODE_DIRECTED) + return detach_afu_directed(ctx); + + return -EINVAL; +} + +static void guest_release_afu(struct device *dev) +{ + struct cxl_afu *afu = to_cxl_afu(dev); + + pr_devel("%s\n", __func__); + + idr_destroy(&afu->contexts_idr); + + kfree(afu->guest); + kfree(afu); +} + +ssize_t cxl_guest_read_afu_vpd(struct cxl_afu *afu, void *buf, size_t len) +{ + return guest_collect_vpd(NULL, afu, buf, len); +} + +#define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE +static ssize_t guest_afu_read_err_buffer(struct cxl_afu *afu, char *buf, + loff_t off, size_t count) +{ + void *tbuf = NULL; + int rc = 0; + + tbuf = (void *) get_zeroed_page(GFP_KERNEL); + if (!tbuf) + return -ENOMEM; + + rc = cxl_h_get_afu_err(afu->guest->handle, + off & 0x7, + virt_to_phys(tbuf), + count); + if (rc) + goto err; + + if (count > ERR_BUFF_MAX_COPY_SIZE) + count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7); + memcpy(buf, tbuf, count); +err: + free_page((u64)tbuf); + + return rc; +} + +static int guest_afu_check_and_enable(struct cxl_afu *afu) +{ + return 0; +} + +static bool guest_support_attributes(const char *attr_name, + enum cxl_attrs type) +{ + switch (type) { + case CXL_ADAPTER_ATTRS: + if ((strcmp(attr_name, "base_image") == 0) || + (strcmp(attr_name, "load_image_on_perst") == 0) || + (strcmp(attr_name, "perst_reloads_same_image") == 0) || + (strcmp(attr_name, "image_loaded") == 0)) + return false; + break; + case CXL_AFU_MASTER_ATTRS: + if ((strcmp(attr_name, "pp_mmio_off") == 0)) + return false; + break; + case CXL_AFU_ATTRS: + break; + default: + break; + } + + return true; +} + +static int activate_afu_directed(struct cxl_afu *afu) +{ + int rc; + + dev_info(&afu->dev, "Activating AFU(%d) directed mode\n", afu->slice); + + afu->current_mode = CXL_MODE_DIRECTED; + + afu->num_procs = afu->max_procs_virtualised; + + if ((rc = cxl_chardev_m_afu_add(afu))) + return rc; + + if ((rc = cxl_sysfs_afu_m_add(afu))) + goto err; + + if ((rc = cxl_chardev_s_afu_add(afu))) + goto err1; + + return 0; +err1: + cxl_sysfs_afu_m_remove(afu); +err: + cxl_chardev_afu_remove(afu); + return rc; +} + +static int guest_afu_activate_mode(struct cxl_afu *afu, int mode) +{ + if (!mode) + return 0; + if (!(mode & afu->modes_supported)) + return -EINVAL; + + if (mode == CXL_MODE_DIRECTED) + return activate_afu_directed(afu); + + if (mode == CXL_MODE_DEDICATED) + dev_err(&afu->dev, "Dedicated mode not supported\n"); + + return -EINVAL; +} + +static int deactivate_afu_directed(struct cxl_afu *afu) +{ + dev_info(&afu->dev, "Deactivating AFU(%d) directed mode\n", afu->slice); + + afu->current_mode = 0; + afu->num_procs = 0; + + cxl_sysfs_afu_m_remove(afu); + cxl_chardev_afu_remove(afu); + + cxl_ops->afu_reset(afu); + + return 0; +} + +static int guest_afu_deactivate_mode(struct cxl_afu *afu, int mode) +{ + if (!mode) + return 0; + if (!(mode & afu->modes_supported)) + return -EINVAL; + + if (mode == CXL_MODE_DIRECTED) + return deactivate_afu_directed(afu); + return 0; +} + +static int guest_afu_reset(struct cxl_afu *afu) +{ + pr_devel("AFU(%d) reset request\n", afu->slice); + return cxl_h_reset_afu(afu->guest->handle); +} + +static int guest_map_slice_regs(struct cxl_afu *afu) +{ + if (!(afu->p2n_mmio = ioremap(afu->guest->p2n_phys, afu->guest->p2n_size))) { + dev_err(&afu->dev, "Error mapping AFU(%d) MMIO regions\n", + afu->slice); + return -ENOMEM; + } + return 0; +} + +static void guest_unmap_slice_regs(struct cxl_afu *afu) +{ + if (afu->p2n_mmio) + iounmap(afu->p2n_mmio); +} + +static int afu_update_state(struct cxl_afu *afu) +{ + int rc, cur_state; + + rc = afu_read_error_state(afu, &cur_state); + if (rc) + return rc; + + if (afu->guest->previous_state == cur_state) + return 0; + + pr_devel("AFU(%d) update state to %#x\n", afu->slice, cur_state); + + switch (cur_state) { + case H_STATE_NORMAL: + afu->guest->previous_state = cur_state; + break; + + case H_STATE_DISABLE: + pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT, + pci_channel_io_frozen); + + cxl_context_detach_all(afu); + if ((rc = cxl_ops->afu_reset(afu))) + pr_devel("reset hcall failed %d\n", rc); + + rc = afu_read_error_state(afu, &cur_state); + if (!rc && cur_state == H_STATE_NORMAL) { + pci_error_handlers(afu, CXL_SLOT_RESET_EVENT, + pci_channel_io_normal); + pci_error_handlers(afu, CXL_RESUME_EVENT, 0); + } + afu->guest->previous_state = 0; + break; + + case H_STATE_TEMP_UNAVAILABLE: + afu->guest->previous_state = cur_state; + break; + + case H_STATE_PERM_UNAVAILABLE: + dev_err(&afu->dev, "AFU is in permanent error state\n"); + pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT, + pci_channel_io_perm_failure); + afu->guest->previous_state = cur_state; + break; + + default: + pr_err("Unexpected AFU(%d) error state: %#x\n", + afu->slice, cur_state); + return -EINVAL; + } + + return rc; +} + +static void afu_handle_errstate(struct work_struct *work) +{ + struct cxl_afu_guest *afu_guest = + container_of(to_delayed_work(work), struct cxl_afu_guest, work_err); + + if (!afu_update_state(afu_guest->parent) && + afu_guest->previous_state == H_STATE_PERM_UNAVAILABLE) + return; + + if (afu_guest->handle_err) + schedule_delayed_work(&afu_guest->work_err, + msecs_to_jiffies(3000)); +} + +static bool guest_link_ok(struct cxl *cxl, struct cxl_afu *afu) +{ + int state; + + if (afu && (!afu_read_error_state(afu, &state))) { + if (state == H_STATE_NORMAL) + return true; + } + + return false; +} + +static int afu_properties_look_ok(struct cxl_afu *afu) +{ + if (afu->pp_irqs < 0) { + dev_err(&afu->dev, "Unexpected per-process minimum interrupt value\n"); + return -EINVAL; + } + + if (afu->max_procs_virtualised < 1) { + dev_err(&afu->dev, "Unexpected max number of processes virtualised value\n"); + return -EINVAL; + } + + return 0; +} + +int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_np) +{ + struct cxl_afu *afu; + bool free = true; + int rc; + + pr_devel("in %s - AFU(%d)\n", __func__, slice); + if (!(afu = cxl_alloc_afu(adapter, slice))) + return -ENOMEM; + + if (!(afu->guest = kzalloc(sizeof(struct cxl_afu_guest), GFP_KERNEL))) { + kfree(afu); + return -ENOMEM; + } + + if ((rc = dev_set_name(&afu->dev, "afu%i.%i", + adapter->adapter_num, + slice))) + goto err1; + + adapter->slices++; + + if ((rc = cxl_of_read_afu_handle(afu, afu_np))) + goto err1; + + if ((rc = cxl_ops->afu_reset(afu))) + goto err1; + + if ((rc = cxl_of_read_afu_properties(afu, afu_np))) + goto err1; + + if ((rc = afu_properties_look_ok(afu))) + goto err1; + + if ((rc = guest_map_slice_regs(afu))) + goto err1; + + if ((rc = guest_register_serr_irq(afu))) + goto err2; + + /* + * After we call this function we must not free the afu directly, even + * if it returns an error! + */ + if ((rc = cxl_register_afu(afu))) + goto err_put1; + + if ((rc = cxl_sysfs_afu_add(afu))) + goto err_put1; + + /* + * pHyp doesn't expose the programming models supported by the + * AFU. pHyp currently only supports directed mode. If it adds + * dedicated mode later, this version of cxl has no way to + * detect it. So we'll initialize the driver, but the first + * attach will fail. + * Being discussed with pHyp to do better (likely new property) + */ + if (afu->max_procs_virtualised == 1) + afu->modes_supported = CXL_MODE_DEDICATED; + else + afu->modes_supported = CXL_MODE_DIRECTED; + + if ((rc = cxl_afu_select_best_mode(afu))) + goto err_put2; + + adapter->afu[afu->slice] = afu; + + afu->enabled = true; + + /* + * wake up the cpu periodically to check the state + * of the AFU using "afu" stored in the guest structure. + */ + afu->guest->parent = afu; + afu->guest->handle_err = true; + INIT_DELAYED_WORK(&afu->guest->work_err, afu_handle_errstate); + schedule_delayed_work(&afu->guest->work_err, msecs_to_jiffies(1000)); + + if ((rc = cxl_pci_vphb_add(afu))) + dev_info(&afu->dev, "Can't register vPHB\n"); + + return 0; + +err_put2: + cxl_sysfs_afu_remove(afu); +err_put1: + device_unregister(&afu->dev); + free = false; + guest_release_serr_irq(afu); +err2: + guest_unmap_slice_regs(afu); +err1: + if (free) { + kfree(afu->guest); + kfree(afu); + } + return rc; +} + +void cxl_guest_remove_afu(struct cxl_afu *afu) +{ + if (!afu) + return; + + /* flush and stop pending job */ + afu->guest->handle_err = false; + flush_delayed_work(&afu->guest->work_err); + + cxl_pci_vphb_remove(afu); + cxl_sysfs_afu_remove(afu); + + spin_lock(&afu->adapter->afu_list_lock); + afu->adapter->afu[afu->slice] = NULL; + spin_unlock(&afu->adapter->afu_list_lock); + + cxl_context_detach_all(afu); + cxl_ops->afu_deactivate_mode(afu, afu->current_mode); + guest_release_serr_irq(afu); + guest_unmap_slice_regs(afu); + + device_unregister(&afu->dev); +} + +static void free_adapter(struct cxl *adapter) +{ + struct irq_avail *cur; + int i; + + if (adapter->guest) { + if (adapter->guest->irq_avail) { + for (i = 0; i < adapter->guest->irq_nranges; i++) { + cur = &adapter->guest->irq_avail[i]; + kfree(cur->bitmap); + } + kfree(adapter->guest->irq_avail); + } + kfree(adapter->guest->status); + kfree(adapter->guest); + } + cxl_remove_adapter_nr(adapter); + kfree(adapter); +} + +static int properties_look_ok(struct cxl *adapter) +{ + /* The absence of this property means that the operational + * status is unknown or okay + */ + if (strlen(adapter->guest->status) && + strcmp(adapter->guest->status, "okay")) { + pr_err("ABORTING:Bad operational status of the device\n"); + return -EINVAL; + } + + return 0; +} + +ssize_t cxl_guest_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len) +{ + return guest_collect_vpd(adapter, NULL, buf, len); +} + +void cxl_guest_remove_adapter(struct cxl *adapter) +{ + pr_devel("in %s\n", __func__); + + cxl_sysfs_adapter_remove(adapter); + + cxl_guest_remove_chardev(adapter); + device_unregister(&adapter->dev); +} + +static void release_adapter(struct device *dev) +{ + free_adapter(to_cxl_adapter(dev)); +} + +struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_device *pdev) +{ + struct cxl *adapter; + bool free = true; + int rc; + + if (!(adapter = cxl_alloc_adapter())) + return ERR_PTR(-ENOMEM); + + if (!(adapter->guest = kzalloc(sizeof(struct cxl_guest), GFP_KERNEL))) { + free_adapter(adapter); + return ERR_PTR(-ENOMEM); + } + + adapter->slices = 0; + adapter->guest->pdev = pdev; + adapter->dev.parent = &pdev->dev; + adapter->dev.release = release_adapter; + dev_set_drvdata(&pdev->dev, adapter); + + /* + * Hypervisor controls PSL timebase initialization (p1 register). + * On FW840, PSL is initialized. + */ + adapter->psl_timebase_synced = true; + + if ((rc = cxl_of_read_adapter_handle(adapter, np))) + goto err1; + + if ((rc = cxl_of_read_adapter_properties(adapter, np))) + goto err1; + + if ((rc = properties_look_ok(adapter))) + goto err1; + + if ((rc = cxl_guest_add_chardev(adapter))) + goto err1; + + /* + * After we call this function we must not free the adapter directly, + * even if it returns an error! + */ + if ((rc = cxl_register_adapter(adapter))) + goto err_put1; + + if ((rc = cxl_sysfs_adapter_add(adapter))) + goto err_put1; + + /* release the context lock as the adapter is configured */ + cxl_adapter_context_unlock(adapter); + + return adapter; + +err_put1: + device_unregister(&adapter->dev); + free = false; + cxl_guest_remove_chardev(adapter); +err1: + if (free) + free_adapter(adapter); + return ERR_PTR(rc); +} + +void cxl_guest_reload_module(struct cxl *adapter) +{ + struct platform_device *pdev; + + pdev = adapter->guest->pdev; + cxl_guest_remove_adapter(adapter); + + cxl_of_probe(pdev); +} + +const struct cxl_backend_ops cxl_guest_ops = { + .module = THIS_MODULE, + .adapter_reset = guest_reset, + .alloc_one_irq = guest_alloc_one_irq, + .release_one_irq = guest_release_one_irq, + .alloc_irq_ranges = guest_alloc_irq_ranges, + .release_irq_ranges = guest_release_irq_ranges, + .setup_irq = NULL, + .handle_psl_slice_error = guest_handle_psl_slice_error, + .psl_interrupt = guest_psl_irq, + .ack_irq = guest_ack_irq, + .attach_process = guest_attach_process, + .detach_process = guest_detach_process, + .update_ivtes = NULL, + .support_attributes = guest_support_attributes, + .link_ok = guest_link_ok, + .release_afu = guest_release_afu, + .afu_read_err_buffer = guest_afu_read_err_buffer, + .afu_check_and_enable = guest_afu_check_and_enable, + .afu_activate_mode = guest_afu_activate_mode, + .afu_deactivate_mode = guest_afu_deactivate_mode, + .afu_reset = guest_afu_reset, + .afu_cr_read8 = guest_afu_cr_read8, + .afu_cr_read16 = guest_afu_cr_read16, + .afu_cr_read32 = guest_afu_cr_read32, + .afu_cr_read64 = guest_afu_cr_read64, + .afu_cr_write8 = guest_afu_cr_write8, + .afu_cr_write16 = guest_afu_cr_write16, + .afu_cr_write32 = guest_afu_cr_write32, + .read_adapter_vpd = cxl_guest_read_adapter_vpd, +}; diff --git a/drivers/misc/cxl/hcalls.c b/drivers/misc/cxl/hcalls.c new file mode 100644 index 000000000..9b8bb0f80 --- /dev/null +++ b/drivers/misc/cxl/hcalls.c @@ -0,0 +1,647 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include +#include +#include +#include +#include "hcalls.h" +#include "trace.h" + +#define CXL_HCALL_TIMEOUT 60000 +#define CXL_HCALL_TIMEOUT_DOWNLOAD 120000 + +#define H_ATTACH_CA_PROCESS 0x344 +#define H_CONTROL_CA_FUNCTION 0x348 +#define H_DETACH_CA_PROCESS 0x34C +#define H_COLLECT_CA_INT_INFO 0x350 +#define H_CONTROL_CA_FAULTS 0x354 +#define H_DOWNLOAD_CA_FUNCTION 0x35C +#define H_DOWNLOAD_CA_FACILITY 0x364 +#define H_CONTROL_CA_FACILITY 0x368 + +#define H_CONTROL_CA_FUNCTION_RESET 1 /* perform a reset */ +#define H_CONTROL_CA_FUNCTION_SUSPEND_PROCESS 2 /* suspend a process from being executed */ +#define H_CONTROL_CA_FUNCTION_RESUME_PROCESS 3 /* resume a process to be executed */ +#define H_CONTROL_CA_FUNCTION_READ_ERR_STATE 4 /* read the error state */ +#define H_CONTROL_CA_FUNCTION_GET_AFU_ERR 5 /* collect the AFU error buffer */ +#define H_CONTROL_CA_FUNCTION_GET_CONFIG 6 /* collect configuration record */ +#define H_CONTROL_CA_FUNCTION_GET_DOWNLOAD_STATE 7 /* query to return download status */ +#define H_CONTROL_CA_FUNCTION_TERMINATE_PROCESS 8 /* terminate the process before completion */ +#define H_CONTROL_CA_FUNCTION_COLLECT_VPD 9 /* collect VPD */ +#define H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT 11 /* read the function-wide error data based on an interrupt */ +#define H_CONTROL_CA_FUNCTION_ACK_FUNCTION_ERR_INT 12 /* acknowledge function-wide error data based on an interrupt */ +#define H_CONTROL_CA_FUNCTION_GET_ERROR_LOG 13 /* retrieve the Platform Log ID (PLID) of an error log */ + +#define H_CONTROL_CA_FAULTS_RESPOND_PSL 1 +#define H_CONTROL_CA_FAULTS_RESPOND_AFU 2 + +#define H_CONTROL_CA_FACILITY_RESET 1 /* perform a reset */ +#define H_CONTROL_CA_FACILITY_COLLECT_VPD 2 /* collect VPD */ + +#define H_DOWNLOAD_CA_FACILITY_DOWNLOAD 1 /* download adapter image */ +#define H_DOWNLOAD_CA_FACILITY_VALIDATE 2 /* validate adapter image */ + + +#define _CXL_LOOP_HCALL(call, rc, retbuf, fn, ...) \ + { \ + unsigned int delay, total_delay = 0; \ + u64 token = 0; \ + \ + memset(retbuf, 0, sizeof(retbuf)); \ + while (1) { \ + rc = call(fn, retbuf, __VA_ARGS__, token); \ + token = retbuf[0]; \ + if (rc != H_BUSY && !H_IS_LONG_BUSY(rc)) \ + break; \ + \ + if (rc == H_BUSY) \ + delay = 10; \ + else \ + delay = get_longbusy_msecs(rc); \ + \ + total_delay += delay; \ + if (total_delay > CXL_HCALL_TIMEOUT) { \ + WARN(1, "Warning: Giving up waiting for CXL hcall " \ + "%#x after %u msec\n", fn, total_delay); \ + rc = H_BUSY; \ + break; \ + } \ + msleep(delay); \ + } \ + } +#define CXL_H_WAIT_UNTIL_DONE(...) _CXL_LOOP_HCALL(plpar_hcall, __VA_ARGS__) +#define CXL_H9_WAIT_UNTIL_DONE(...) _CXL_LOOP_HCALL(plpar_hcall9, __VA_ARGS__) + +#define _PRINT_MSG(rc, format, ...) \ + { \ + if ((rc != H_SUCCESS) && (rc != H_CONTINUE)) \ + pr_err(format, __VA_ARGS__); \ + else \ + pr_devel(format, __VA_ARGS__); \ + } \ + + +static char *afu_op_names[] = { + "UNKNOWN_OP", /* 0 undefined */ + "RESET", /* 1 */ + "SUSPEND_PROCESS", /* 2 */ + "RESUME_PROCESS", /* 3 */ + "READ_ERR_STATE", /* 4 */ + "GET_AFU_ERR", /* 5 */ + "GET_CONFIG", /* 6 */ + "GET_DOWNLOAD_STATE", /* 7 */ + "TERMINATE_PROCESS", /* 8 */ + "COLLECT_VPD", /* 9 */ + "UNKNOWN_OP", /* 10 undefined */ + "GET_FUNCTION_ERR_INT", /* 11 */ + "ACK_FUNCTION_ERR_INT", /* 12 */ + "GET_ERROR_LOG", /* 13 */ +}; + +static char *control_adapter_op_names[] = { + "UNKNOWN_OP", /* 0 undefined */ + "RESET", /* 1 */ + "COLLECT_VPD", /* 2 */ +}; + +static char *download_op_names[] = { + "UNKNOWN_OP", /* 0 undefined */ + "DOWNLOAD", /* 1 */ + "VALIDATE", /* 2 */ +}; + +static char *op_str(unsigned int op, char *name_array[], int array_len) +{ + if (op >= array_len) + return "UNKNOWN_OP"; + return name_array[op]; +} + +#define OP_STR(op, name_array) op_str(op, name_array, ARRAY_SIZE(name_array)) + +#define OP_STR_AFU(op) OP_STR(op, afu_op_names) +#define OP_STR_CONTROL_ADAPTER(op) OP_STR(op, control_adapter_op_names) +#define OP_STR_DOWNLOAD_ADAPTER(op) OP_STR(op, download_op_names) + + +long cxl_h_attach_process(u64 unit_address, + struct cxl_process_element_hcall *element, + u64 *process_token, u64 *mmio_addr, u64 *mmio_size) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + CXL_H_WAIT_UNTIL_DONE(rc, retbuf, H_ATTACH_CA_PROCESS, unit_address, virt_to_phys(element)); + _PRINT_MSG(rc, "cxl_h_attach_process(%#.16llx, %#.16lx): %li\n", + unit_address, virt_to_phys(element), rc); + trace_cxl_hcall_attach(unit_address, virt_to_phys(element), retbuf[0], retbuf[1], retbuf[2], rc); + + pr_devel("token: 0x%.8lx mmio_addr: 0x%lx mmio_size: 0x%lx\nProcess Element Structure:\n", + retbuf[0], retbuf[1], retbuf[2]); + cxl_dump_debug_buffer(element, sizeof(*element)); + + switch (rc) { + case H_SUCCESS: /* The process info is attached to the coherent platform function */ + *process_token = retbuf[0]; + if (mmio_addr) + *mmio_addr = retbuf[1]; + if (mmio_size) + *mmio_size = retbuf[2]; + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + case H_FUNCTION: /* The function is not supported. */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ + case H_RESOURCE: /* The coherent platform function does not have enough additional resource to attach the process */ + case H_HARDWARE: /* A hardware event prevented the attach operation */ + case H_STATE: /* The coherent platform function is not in a valid state */ + case H_BUSY: + return -EBUSY; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_detach_process - Detach a process element from a coherent + * platform function. + */ +long cxl_h_detach_process(u64 unit_address, u64 process_token) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + CXL_H_WAIT_UNTIL_DONE(rc, retbuf, H_DETACH_CA_PROCESS, unit_address, process_token); + _PRINT_MSG(rc, "cxl_h_detach_process(%#.16llx, 0x%.8llx): %li\n", unit_address, process_token, rc); + trace_cxl_hcall_detach(unit_address, process_token, rc); + + switch (rc) { + case H_SUCCESS: /* The process was detached from the coherent platform function */ + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ + case H_RESOURCE: /* The function has page table mappings for MMIO */ + case H_HARDWARE: /* A hardware event prevented the detach operation */ + case H_STATE: /* The coherent platform function is not in a valid state */ + case H_BUSY: + return -EBUSY; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_control_function - This H_CONTROL_CA_FUNCTION hypervisor call allows + * the partition to manipulate or query + * certain coherent platform function behaviors. + */ +static long cxl_h_control_function(u64 unit_address, u64 op, + u64 p1, u64 p2, u64 p3, u64 p4, u64 *out) +{ + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + long rc; + + CXL_H9_WAIT_UNTIL_DONE(rc, retbuf, H_CONTROL_CA_FUNCTION, unit_address, op, p1, p2, p3, p4); + _PRINT_MSG(rc, "cxl_h_control_function(%#.16llx, %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li\n", + unit_address, OP_STR_AFU(op), p1, p2, p3, p4, retbuf[0], rc); + trace_cxl_hcall_control_function(unit_address, OP_STR_AFU(op), p1, p2, p3, p4, retbuf[0], rc); + + switch (rc) { + case H_SUCCESS: /* The operation is completed for the coherent platform function */ + if ((op == H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT || + op == H_CONTROL_CA_FUNCTION_READ_ERR_STATE || + op == H_CONTROL_CA_FUNCTION_COLLECT_VPD)) + *out = retbuf[0]; + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + case H_FUNCTION: /* The function is not supported. */ + case H_NOT_FOUND: /* The operation supplied was not valid */ + case H_NOT_AVAILABLE: /* The operation cannot be performed because the AFU has not been downloaded */ + case H_SG_LIST: /* An block list entry was invalid */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ + case H_RESOURCE: /* The function has page table mappings for MMIO */ + case H_HARDWARE: /* A hardware event prevented the attach operation */ + case H_STATE: /* The coherent platform function is not in a valid state */ + case H_BUSY: + return -EBUSY; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_reset_afu - Perform a reset to the coherent platform function. + */ +long cxl_h_reset_afu(u64 unit_address) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_RESET, + 0, 0, 0, 0, + NULL); +} + +/** + * cxl_h_suspend_process - Suspend a process from being executed + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_suspend_process(u64 unit_address, u64 process_token) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_SUSPEND_PROCESS, + process_token, 0, 0, 0, + NULL); +} + +/** + * cxl_h_resume_process - Resume a process to be executed + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_resume_process(u64 unit_address, u64 process_token) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_RESUME_PROCESS, + process_token, 0, 0, 0, + NULL); +} + +/** + * cxl_h_read_error_state - Checks the error state of the coherent + * platform function. + * R4 contains the error state + */ +long cxl_h_read_error_state(u64 unit_address, u64 *state) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_READ_ERR_STATE, + 0, 0, 0, 0, + state); +} + +/** + * cxl_h_get_afu_err - collect the AFU error buffer + * Parameter1 = byte offset into error buffer to retrieve, valid values + * are between 0 and (ibm,error-buffer-size - 1) + * Parameter2 = 4K aligned real address of error buffer, to be filled in + * Parameter3 = length of error buffer, valid values are 4K or less + */ +long cxl_h_get_afu_err(u64 unit_address, u64 offset, + u64 buf_address, u64 len) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_GET_AFU_ERR, + offset, buf_address, len, 0, + NULL); +} + +/** + * cxl_h_get_config - collect configuration record for the + * coherent platform function + * Parameter1 = # of configuration record to retrieve, valid values are + * between 0 and (ibm,#config-records - 1) + * Parameter2 = byte offset into configuration record to retrieve, + * valid values are between 0 and (ibm,config-record-size - 1) + * Parameter3 = 4K aligned real address of configuration record buffer, + * to be filled in + * Parameter4 = length of configuration buffer, valid values are 4K or less + */ +long cxl_h_get_config(u64 unit_address, u64 cr_num, u64 offset, + u64 buf_address, u64 len) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_GET_CONFIG, + cr_num, offset, buf_address, len, + NULL); +} + +/** + * cxl_h_terminate_process - Terminate the process before completion + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_terminate_process(u64 unit_address, u64 process_token) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_TERMINATE_PROCESS, + process_token, 0, 0, 0, + NULL); +} + +/** + * cxl_h_collect_vpd - Collect VPD for the coherent platform function. + * Parameter1 = # of VPD record to retrieve, valid values are between 0 + * and (ibm,#config-records - 1). + * Parameter2 = 4K naturally aligned real buffer containing block + * list entries + * Parameter3 = number of block list entries in the block list, valid + * values are between 0 and 256 + */ +long cxl_h_collect_vpd(u64 unit_address, u64 record, u64 list_address, + u64 num, u64 *out) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_COLLECT_VPD, + record, list_address, num, 0, + out); +} + +/** + * cxl_h_get_fn_error_interrupt - Read the function-wide error data based on an interrupt + */ +long cxl_h_get_fn_error_interrupt(u64 unit_address, u64 *reg) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT, + 0, 0, 0, 0, reg); +} + +/** + * cxl_h_ack_fn_error_interrupt - Acknowledge function-wide error data + * based on an interrupt + * Parameter1 = value to write to the function-wide error interrupt register + */ +long cxl_h_ack_fn_error_interrupt(u64 unit_address, u64 value) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_ACK_FUNCTION_ERR_INT, + value, 0, 0, 0, + NULL); +} + +/** + * cxl_h_get_error_log - Retrieve the Platform Log ID (PLID) of + * an error log + */ +long cxl_h_get_error_log(u64 unit_address, u64 value) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_GET_ERROR_LOG, + 0, 0, 0, 0, + NULL); +} + +/** + * cxl_h_collect_int_info - Collect interrupt info about a coherent + * platform function after an interrupt occurred. + */ +long cxl_h_collect_int_info(u64 unit_address, u64 process_token, + struct cxl_irq_info *info) +{ + long rc; + + BUG_ON(sizeof(*info) != sizeof(unsigned long[PLPAR_HCALL9_BUFSIZE])); + + rc = plpar_hcall9(H_COLLECT_CA_INT_INFO, (unsigned long *) info, + unit_address, process_token); + _PRINT_MSG(rc, "cxl_h_collect_int_info(%#.16llx, 0x%llx): %li\n", + unit_address, process_token, rc); + trace_cxl_hcall_collect_int_info(unit_address, process_token, rc); + + switch (rc) { + case H_SUCCESS: /* The interrupt info is returned in return registers. */ + pr_devel("dsisr:%#llx, dar:%#llx, dsr:%#llx, pid_tid:%#llx, afu_err:%#llx, errstat:%#llx\n", + info->dsisr, info->dar, info->dsr, info->reserved, + info->afu_err, info->errstat); + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall. */ + case H_HARDWARE: /* A hardware event prevented the collection of the interrupt info.*/ + case H_STATE: /* The coherent platform function is not in a valid state to collect interrupt info. */ + return -EBUSY; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_control_faults - Control the operation of a coherent platform + * function after a fault occurs. + * + * Parameters + * control-mask: value to control the faults + * looks like PSL_TFC_An shifted >> 32 + * reset-mask: mask to control reset of function faults + * Set reset_mask = 1 to reset PSL errors + */ +long cxl_h_control_faults(u64 unit_address, u64 process_token, + u64 control_mask, u64 reset_mask) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + memset(retbuf, 0, sizeof(retbuf)); + + rc = plpar_hcall(H_CONTROL_CA_FAULTS, retbuf, unit_address, + H_CONTROL_CA_FAULTS_RESPOND_PSL, process_token, + control_mask, reset_mask); + _PRINT_MSG(rc, "cxl_h_control_faults(%#.16llx, 0x%llx, %#llx, %#llx): %li (%#lx)\n", + unit_address, process_token, control_mask, reset_mask, + rc, retbuf[0]); + trace_cxl_hcall_control_faults(unit_address, process_token, + control_mask, reset_mask, retbuf[0], rc); + + switch (rc) { + case H_SUCCESS: /* Faults were successfully controlled for the function. */ + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + return -EINVAL; + case H_HARDWARE: /* A hardware event prevented the control of faults. */ + case H_STATE: /* The function was in an invalid state. */ + case H_AUTHORITY: /* The partition does not have authority to perform this hcall; the coherent platform facilities may need to be licensed. */ + return -EBUSY; + case H_FUNCTION: /* The function is not supported */ + case H_NOT_FOUND: /* The operation supplied was not valid */ + return -EINVAL; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_control_facility - This H_CONTROL_CA_FACILITY hypervisor call + * allows the partition to manipulate or query + * certain coherent platform facility behaviors. + */ +static long cxl_h_control_facility(u64 unit_address, u64 op, + u64 p1, u64 p2, u64 p3, u64 p4, u64 *out) +{ + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + long rc; + + CXL_H9_WAIT_UNTIL_DONE(rc, retbuf, H_CONTROL_CA_FACILITY, unit_address, op, p1, p2, p3, p4); + _PRINT_MSG(rc, "cxl_h_control_facility(%#.16llx, %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li\n", + unit_address, OP_STR_CONTROL_ADAPTER(op), p1, p2, p3, p4, retbuf[0], rc); + trace_cxl_hcall_control_facility(unit_address, OP_STR_CONTROL_ADAPTER(op), p1, p2, p3, p4, retbuf[0], rc); + + switch (rc) { + case H_SUCCESS: /* The operation is completed for the coherent platform facility */ + if (op == H_CONTROL_CA_FACILITY_COLLECT_VPD) + *out = retbuf[0]; + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + case H_FUNCTION: /* The function is not supported. */ + case H_NOT_FOUND: /* The operation supplied was not valid */ + case H_NOT_AVAILABLE: /* The operation cannot be performed because the AFU has not been downloaded */ + case H_SG_LIST: /* An block list entry was invalid */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ + case H_RESOURCE: /* The function has page table mappings for MMIO */ + case H_HARDWARE: /* A hardware event prevented the attach operation */ + case H_STATE: /* The coherent platform facility is not in a valid state */ + case H_BUSY: + return -EBUSY; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_reset_adapter - Perform a reset to the coherent platform facility. + */ +long cxl_h_reset_adapter(u64 unit_address) +{ + return cxl_h_control_facility(unit_address, + H_CONTROL_CA_FACILITY_RESET, + 0, 0, 0, 0, + NULL); +} + +/** + * cxl_h_collect_vpd - Collect VPD for the coherent platform function. + * Parameter1 = 4K naturally aligned real buffer containing block + * list entries + * Parameter2 = number of block list entries in the block list, valid + * values are between 0 and 256 + */ +long cxl_h_collect_vpd_adapter(u64 unit_address, u64 list_address, + u64 num, u64 *out) +{ + return cxl_h_control_facility(unit_address, + H_CONTROL_CA_FACILITY_COLLECT_VPD, + list_address, num, 0, 0, + out); +} + +/** + * cxl_h_download_facility - This H_DOWNLOAD_CA_FACILITY + * hypervisor call provide platform support for + * downloading a base adapter image to the coherent + * platform facility, and for validating the entire + * image after the download. + * Parameters + * op: operation to perform to the coherent platform function + * Download: operation = 1, the base image in the coherent platform + * facility is first erased, and then + * programmed using the image supplied + * in the scatter/gather list. + * Validate: operation = 2, the base image in the coherent platform + * facility is compared with the image + * supplied in the scatter/gather list. + * list_address: 4K naturally aligned real buffer containing + * scatter/gather list entries. + * num: number of block list entries in the scatter/gather list. + */ +static long cxl_h_download_facility(u64 unit_address, u64 op, + u64 list_address, u64 num, + u64 *out) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + unsigned int delay, total_delay = 0; + u64 token = 0; + long rc; + + if (*out != 0) + token = *out; + + memset(retbuf, 0, sizeof(retbuf)); + while (1) { + rc = plpar_hcall(H_DOWNLOAD_CA_FACILITY, retbuf, + unit_address, op, list_address, num, + token); + token = retbuf[0]; + if (rc != H_BUSY && !H_IS_LONG_BUSY(rc)) + break; + + if (rc != H_BUSY) { + delay = get_longbusy_msecs(rc); + total_delay += delay; + if (total_delay > CXL_HCALL_TIMEOUT_DOWNLOAD) { + WARN(1, "Warning: Giving up waiting for CXL hcall " + "%#x after %u msec\n", + H_DOWNLOAD_CA_FACILITY, total_delay); + rc = H_BUSY; + break; + } + msleep(delay); + } + } + _PRINT_MSG(rc, "cxl_h_download_facility(%#.16llx, %s(%#llx, %#llx), %#lx): %li\n", + unit_address, OP_STR_DOWNLOAD_ADAPTER(op), list_address, num, retbuf[0], rc); + trace_cxl_hcall_download_facility(unit_address, OP_STR_DOWNLOAD_ADAPTER(op), list_address, num, retbuf[0], rc); + + switch (rc) { + case H_SUCCESS: /* The operation is completed for the coherent platform facility */ + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied */ + case H_FUNCTION: /* The function is not supported. */ + case H_SG_LIST: /* An block list entry was invalid */ + case H_BAD_DATA: /* Image verification failed */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ + case H_RESOURCE: /* The function has page table mappings for MMIO */ + case H_HARDWARE: /* A hardware event prevented the attach operation */ + case H_STATE: /* The coherent platform facility is not in a valid state */ + case H_BUSY: + return -EBUSY; + case H_CONTINUE: + *out = retbuf[0]; + return 1; /* More data is needed for the complete image */ + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_download_adapter_image - Download the base image to the coherent + * platform facility. + */ +long cxl_h_download_adapter_image(u64 unit_address, + u64 list_address, u64 num, + u64 *out) +{ + return cxl_h_download_facility(unit_address, + H_DOWNLOAD_CA_FACILITY_DOWNLOAD, + list_address, num, out); +} + +/** + * cxl_h_validate_adapter_image - Validate the base image in the coherent + * platform facility. + */ +long cxl_h_validate_adapter_image(u64 unit_address, + u64 list_address, u64 num, + u64 *out) +{ + return cxl_h_download_facility(unit_address, + H_DOWNLOAD_CA_FACILITY_VALIDATE, + list_address, num, out); +} diff --git a/drivers/misc/cxl/hcalls.h b/drivers/misc/cxl/hcalls.h new file mode 100644 index 000000000..3e25522a5 --- /dev/null +++ b/drivers/misc/cxl/hcalls.h @@ -0,0 +1,204 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _HCALLS_H +#define _HCALLS_H + +#include +#include +#include +#include "cxl.h" + +#define SG_BUFFER_SIZE 4096 +#define SG_MAX_ENTRIES 256 + +struct sg_list { + u64 phys_addr; + u64 len; +}; + +/* + * This is straight out of PAPR, but replacing some of the compound fields with + * a single field, where they were identical to the register layout. + * + * The 'flags' parameter regroups the various bit-fields + */ +#define CXL_PE_CSRP_VALID (1ULL << 63) +#define CXL_PE_PROBLEM_STATE (1ULL << 62) +#define CXL_PE_SECONDARY_SEGMENT_TBL_SRCH (1ULL << 61) +#define CXL_PE_TAGS_ACTIVE (1ULL << 60) +#define CXL_PE_USER_STATE (1ULL << 59) +#define CXL_PE_TRANSLATION_ENABLED (1ULL << 58) +#define CXL_PE_64_BIT (1ULL << 57) +#define CXL_PE_PRIVILEGED_PROCESS (1ULL << 56) + +#define CXL_PROCESS_ELEMENT_VERSION 1 +struct cxl_process_element_hcall { + __be64 version; + __be64 flags; + u8 reserved0[12]; + __be32 pslVirtualIsn; + u8 applicationVirtualIsnBitmap[256]; + u8 reserved1[144]; + struct cxl_process_element_common common; + u8 reserved4[12]; +} __packed; + +#define H_STATE_NORMAL 1 +#define H_STATE_DISABLE 2 +#define H_STATE_TEMP_UNAVAILABLE 3 +#define H_STATE_PERM_UNAVAILABLE 4 + +/* NOTE: element must be a logical real address, and must be pinned */ +long cxl_h_attach_process(u64 unit_address, struct cxl_process_element_hcall *element, + u64 *process_token, u64 *mmio_addr, u64 *mmio_size); + +/** + * cxl_h_detach_process - Detach a process element from a coherent + * platform function. + */ +long cxl_h_detach_process(u64 unit_address, u64 process_token); + +/** + * cxl_h_reset_afu - Perform a reset to the coherent platform function. + */ +long cxl_h_reset_afu(u64 unit_address); + +/** + * cxl_h_suspend_process - Suspend a process from being executed + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_suspend_process(u64 unit_address, u64 process_token); + +/** + * cxl_h_resume_process - Resume a process to be executed + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_resume_process(u64 unit_address, u64 process_token); + +/** + * cxl_h_read_error_state - Reads the error state of the coherent + * platform function. + * R4 contains the error state + */ +long cxl_h_read_error_state(u64 unit_address, u64 *state); + +/** + * cxl_h_get_afu_err - collect the AFU error buffer + * Parameter1 = byte offset into error buffer to retrieve, valid values + * are between 0 and (ibm,error-buffer-size - 1) + * Parameter2 = 4K aligned real address of error buffer, to be filled in + * Parameter3 = length of error buffer, valid values are 4K or less + */ +long cxl_h_get_afu_err(u64 unit_address, u64 offset, u64 buf_address, u64 len); + +/** + * cxl_h_get_config - collect configuration record for the + * coherent platform function + * Parameter1 = # of configuration record to retrieve, valid values are + * between 0 and (ibm,#config-records - 1) + * Parameter2 = byte offset into configuration record to retrieve, + * valid values are between 0 and (ibm,config-record-size - 1) + * Parameter3 = 4K aligned real address of configuration record buffer, + * to be filled in + * Parameter4 = length of configuration buffer, valid values are 4K or less + */ +long cxl_h_get_config(u64 unit_address, u64 cr_num, u64 offset, + u64 buf_address, u64 len); + +/** + * cxl_h_terminate_process - Terminate the process before completion + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_terminate_process(u64 unit_address, u64 process_token); + +/** + * cxl_h_collect_vpd - Collect VPD for the coherent platform function. + * Parameter1 = # of VPD record to retrieve, valid values are between 0 + * and (ibm,#config-records - 1). + * Parameter2 = 4K naturally aligned real buffer containing block + * list entries + * Parameter3 = number of block list entries in the block list, valid + * values are between 0 and 256 + */ +long cxl_h_collect_vpd(u64 unit_address, u64 record, u64 list_address, + u64 num, u64 *out); + +/** + * cxl_h_get_fn_error_interrupt - Read the function-wide error data based on an interrupt + */ +long cxl_h_get_fn_error_interrupt(u64 unit_address, u64 *reg); + +/** + * cxl_h_ack_fn_error_interrupt - Acknowledge function-wide error data + * based on an interrupt + * Parameter1 = value to write to the function-wide error interrupt register + */ +long cxl_h_ack_fn_error_interrupt(u64 unit_address, u64 value); + +/** + * cxl_h_get_error_log - Retrieve the Platform Log ID (PLID) of + * an error log + */ +long cxl_h_get_error_log(u64 unit_address, u64 value); + +/** + * cxl_h_collect_int_info - Collect interrupt info about a coherent + * platform function after an interrupt occurred. + */ +long cxl_h_collect_int_info(u64 unit_address, u64 process_token, + struct cxl_irq_info *info); + +/** + * cxl_h_control_faults - Control the operation of a coherent platform + * function after a fault occurs. + * + * Parameters + * control-mask: value to control the faults + * looks like PSL_TFC_An shifted >> 32 + * reset-mask: mask to control reset of function faults + * Set reset_mask = 1 to reset PSL errors + */ +long cxl_h_control_faults(u64 unit_address, u64 process_token, + u64 control_mask, u64 reset_mask); + +/** + * cxl_h_reset_adapter - Perform a reset to the coherent platform facility. + */ +long cxl_h_reset_adapter(u64 unit_address); + +/** + * cxl_h_collect_vpd - Collect VPD for the coherent platform function. + * Parameter1 = 4K naturally aligned real buffer containing block + * list entries + * Parameter2 = number of block list entries in the block list, valid + * values are between 0 and 256 + */ +long cxl_h_collect_vpd_adapter(u64 unit_address, u64 list_address, + u64 num, u64 *out); + +/** + * cxl_h_download_adapter_image - Download the base image to the coherent + * platform facility. + */ +long cxl_h_download_adapter_image(u64 unit_address, + u64 list_address, u64 num, + u64 *out); + +/** + * cxl_h_validate_adapter_image - Validate the base image in the coherent + * platform facility. + */ +long cxl_h_validate_adapter_image(u64 unit_address, + u64 list_address, u64 num, + u64 *out); +#endif /* _HCALLS_H */ diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c new file mode 100644 index 000000000..ce08a9f22 --- /dev/null +++ b/drivers/misc/cxl/irq.c @@ -0,0 +1,453 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cxl.h" +#include "trace.h" + +static int afu_irq_range_start(void) +{ + if (cpu_has_feature(CPU_FTR_HVMODE)) + return 1; + return 0; +} + +static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 dar) +{ + ctx->dsisr = dsisr; + ctx->dar = dar; + schedule_work(&ctx->fault_work); + return IRQ_HANDLED; +} + +irqreturn_t cxl_irq_psl9(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info) +{ + u64 dsisr, dar; + + dsisr = irq_info->dsisr; + dar = irq_info->dar; + + trace_cxl_psl9_irq(ctx, irq, dsisr, dar); + + pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar); + + if (dsisr & CXL_PSL9_DSISR_An_TF) { + pr_devel("CXL interrupt: Scheduling translation fault handling for later (pe: %i)\n", ctx->pe); + return schedule_cxl_fault(ctx, dsisr, dar); + } + + if (dsisr & CXL_PSL9_DSISR_An_PE) + return cxl_ops->handle_psl_slice_error(ctx, dsisr, + irq_info->errstat); + if (dsisr & CXL_PSL9_DSISR_An_AE) { + pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err); + + if (ctx->pending_afu_err) { + /* + * This shouldn't happen - the PSL treats these errors + * as fatal and will have reset the AFU, so there's not + * much point buffering multiple AFU errors. + * OTOH if we DO ever see a storm of these come in it's + * probably best that we log them somewhere: + */ + dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error undelivered to pe %i: 0x%016llx\n", + ctx->pe, irq_info->afu_err); + } else { + spin_lock(&ctx->lock); + ctx->afu_err = irq_info->afu_err; + ctx->pending_afu_err = 1; + spin_unlock(&ctx->lock); + + wake_up_all(&ctx->wq); + } + + cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_A, 0); + return IRQ_HANDLED; + } + if (dsisr & CXL_PSL9_DSISR_An_OC) + pr_devel("CXL interrupt: OS Context Warning\n"); + + WARN(1, "Unhandled CXL PSL IRQ\n"); + return IRQ_HANDLED; +} + +irqreturn_t cxl_irq_psl8(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info) +{ + u64 dsisr, dar; + + dsisr = irq_info->dsisr; + dar = irq_info->dar; + + trace_cxl_psl_irq(ctx, irq, dsisr, dar); + + pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar); + + if (dsisr & CXL_PSL_DSISR_An_DS) { + /* + * We don't inherently need to sleep to handle this, but we do + * need to get a ref to the task's mm, which we can't do from + * irq context without the potential for a deadlock since it + * takes the task_lock. An alternate option would be to keep a + * reference to the task's mm the entire time it has cxl open, + * but to do that we need to solve the issue where we hold a + * ref to the mm, but the mm can hold a ref to the fd after an + * mmap preventing anything from being cleaned up. + */ + pr_devel("Scheduling segment miss handling for later pe: %i\n", ctx->pe); + return schedule_cxl_fault(ctx, dsisr, dar); + } + + if (dsisr & CXL_PSL_DSISR_An_M) + pr_devel("CXL interrupt: PTE not found\n"); + if (dsisr & CXL_PSL_DSISR_An_P) + pr_devel("CXL interrupt: Storage protection violation\n"); + if (dsisr & CXL_PSL_DSISR_An_A) + pr_devel("CXL interrupt: AFU lock access to write through or cache inhibited storage\n"); + if (dsisr & CXL_PSL_DSISR_An_S) + pr_devel("CXL interrupt: Access was afu_wr or afu_zero\n"); + if (dsisr & CXL_PSL_DSISR_An_K) + pr_devel("CXL interrupt: Access not permitted by virtual page class key protection\n"); + + if (dsisr & CXL_PSL_DSISR_An_DM) { + /* + * In some cases we might be able to handle the fault + * immediately if hash_page would succeed, but we still need + * the task's mm, which as above we can't get without a lock + */ + pr_devel("Scheduling page fault handling for later pe: %i\n", ctx->pe); + return schedule_cxl_fault(ctx, dsisr, dar); + } + if (dsisr & CXL_PSL_DSISR_An_ST) + WARN(1, "CXL interrupt: Segment Table PTE not found\n"); + if (dsisr & CXL_PSL_DSISR_An_UR) + pr_devel("CXL interrupt: AURP PTE not found\n"); + if (dsisr & CXL_PSL_DSISR_An_PE) + return cxl_ops->handle_psl_slice_error(ctx, dsisr, + irq_info->errstat); + if (dsisr & CXL_PSL_DSISR_An_AE) { + pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err); + + if (ctx->pending_afu_err) { + /* + * This shouldn't happen - the PSL treats these errors + * as fatal and will have reset the AFU, so there's not + * much point buffering multiple AFU errors. + * OTOH if we DO ever see a storm of these come in it's + * probably best that we log them somewhere: + */ + dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error " + "undelivered to pe %i: 0x%016llx\n", + ctx->pe, irq_info->afu_err); + } else { + spin_lock(&ctx->lock); + ctx->afu_err = irq_info->afu_err; + ctx->pending_afu_err = true; + spin_unlock(&ctx->lock); + + wake_up_all(&ctx->wq); + } + + cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_A, 0); + return IRQ_HANDLED; + } + if (dsisr & CXL_PSL_DSISR_An_OC) + pr_devel("CXL interrupt: OS Context Warning\n"); + + WARN(1, "Unhandled CXL PSL IRQ\n"); + return IRQ_HANDLED; +} + +static irqreturn_t cxl_irq_afu(int irq, void *data) +{ + struct cxl_context *ctx = data; + irq_hw_number_t hwirq = irqd_to_hwirq(irq_get_irq_data(irq)); + int irq_off, afu_irq = 0; + __u16 range; + int r; + + /* + * Look for the interrupt number. + * On bare-metal, we know range 0 only contains the PSL + * interrupt so we could start counting at range 1 and initialize + * afu_irq at 1. + * In a guest, range 0 also contains AFU interrupts, so it must + * be counted for. Therefore we initialize afu_irq at 0 to take into + * account the PSL interrupt. + * + * For code-readability, it just seems easier to go over all + * the ranges on bare-metal and guest. The end result is the same. + */ + for (r = 0; r < CXL_IRQ_RANGES; r++) { + irq_off = hwirq - ctx->irqs.offset[r]; + range = ctx->irqs.range[r]; + if (irq_off >= 0 && irq_off < range) { + afu_irq += irq_off; + break; + } + afu_irq += range; + } + if (unlikely(r >= CXL_IRQ_RANGES)) { + WARN(1, "Received AFU IRQ out of range for pe %i (virq %i hwirq %lx)\n", + ctx->pe, irq, hwirq); + return IRQ_HANDLED; + } + + trace_cxl_afu_irq(ctx, afu_irq, irq, hwirq); + pr_devel("Received AFU interrupt %i for pe: %i (virq %i hwirq %lx)\n", + afu_irq, ctx->pe, irq, hwirq); + + if (unlikely(!ctx->irq_bitmap)) { + WARN(1, "Received AFU IRQ for context with no IRQ bitmap\n"); + return IRQ_HANDLED; + } + spin_lock(&ctx->lock); + set_bit(afu_irq - 1, ctx->irq_bitmap); + ctx->pending_irq = true; + spin_unlock(&ctx->lock); + + wake_up_all(&ctx->wq); + + return IRQ_HANDLED; +} + +unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, + irq_handler_t handler, void *cookie, const char *name) +{ + unsigned int virq; + int result; + + /* IRQ Domain? */ + virq = irq_create_mapping(NULL, hwirq); + if (!virq) { + dev_warn(&adapter->dev, "cxl_map_irq: irq_create_mapping failed\n"); + return 0; + } + + if (cxl_ops->setup_irq) + cxl_ops->setup_irq(adapter, hwirq, virq); + + pr_devel("hwirq %#lx mapped to virq %u\n", hwirq, virq); + + result = request_irq(virq, handler, 0, name, cookie); + if (result) { + dev_warn(&adapter->dev, "cxl_map_irq: request_irq failed: %i\n", result); + return 0; + } + + return virq; +} + +void cxl_unmap_irq(unsigned int virq, void *cookie) +{ + free_irq(virq, cookie); +} + +int cxl_register_one_irq(struct cxl *adapter, + irq_handler_t handler, + void *cookie, + irq_hw_number_t *dest_hwirq, + unsigned int *dest_virq, + const char *name) +{ + int hwirq, virq; + + if ((hwirq = cxl_ops->alloc_one_irq(adapter)) < 0) + return hwirq; + + if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie, name))) + goto err; + + *dest_hwirq = hwirq; + *dest_virq = virq; + + return 0; + +err: + cxl_ops->release_one_irq(adapter, hwirq); + return -ENOMEM; +} + +void afu_irq_name_free(struct cxl_context *ctx) +{ + struct cxl_irq_name *irq_name, *tmp; + + list_for_each_entry_safe(irq_name, tmp, &ctx->irq_names, list) { + kfree(irq_name->name); + list_del(&irq_name->list); + kfree(irq_name); + } +} + +int afu_allocate_irqs(struct cxl_context *ctx, u32 count) +{ + int rc, r, i, j = 1; + struct cxl_irq_name *irq_name; + int alloc_count; + + /* + * In native mode, range 0 is reserved for the multiplexed + * PSL interrupt. It has been allocated when the AFU was initialized. + * + * In a guest, the PSL interrupt is not mutliplexed, but per-context, + * and is the first interrupt from range 0. It still needs to be + * allocated, so bump the count by one. + */ + if (cpu_has_feature(CPU_FTR_HVMODE)) + alloc_count = count; + else + alloc_count = count + 1; + + if ((rc = cxl_ops->alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, + alloc_count))) + return rc; + + if (cpu_has_feature(CPU_FTR_HVMODE)) { + /* Multiplexed PSL Interrupt */ + ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq; + ctx->irqs.range[0] = 1; + } + + ctx->irq_count = count; + ctx->irq_bitmap = kcalloc(BITS_TO_LONGS(count), + sizeof(*ctx->irq_bitmap), GFP_KERNEL); + if (!ctx->irq_bitmap) + goto out; + + /* + * Allocate names first. If any fail, bail out before allocating + * actual hardware IRQs. + */ + for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) { + for (i = 0; i < ctx->irqs.range[r]; i++) { + irq_name = kmalloc(sizeof(struct cxl_irq_name), + GFP_KERNEL); + if (!irq_name) + goto out; + irq_name->name = kasprintf(GFP_KERNEL, "cxl-%s-pe%i-%i", + dev_name(&ctx->afu->dev), + ctx->pe, j); + if (!irq_name->name) { + kfree(irq_name); + goto out; + } + /* Add to tail so next look get the correct order */ + list_add_tail(&irq_name->list, &ctx->irq_names); + j++; + } + } + return 0; + +out: + cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter); + afu_irq_name_free(ctx); + return -ENOMEM; +} + +static void afu_register_hwirqs(struct cxl_context *ctx) +{ + irq_hw_number_t hwirq; + struct cxl_irq_name *irq_name; + int r, i; + irqreturn_t (*handler)(int irq, void *data); + + /* We've allocated all memory now, so let's do the irq allocations */ + irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list); + for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) { + hwirq = ctx->irqs.offset[r]; + for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { + if (r == 0 && i == 0) + /* + * The very first interrupt of range 0 is + * always the PSL interrupt, but we only + * need to connect a handler for guests, + * because there's one PSL interrupt per + * context. + * On bare-metal, the PSL interrupt is + * multiplexed and was setup when the AFU + * was configured. + */ + handler = cxl_ops->psl_interrupt; + else + handler = cxl_irq_afu; + cxl_map_irq(ctx->afu->adapter, hwirq, handler, ctx, + irq_name->name); + irq_name = list_next_entry(irq_name, list); + } + } +} + +int afu_register_irqs(struct cxl_context *ctx, u32 count) +{ + int rc; + + rc = afu_allocate_irqs(ctx, count); + if (rc) + return rc; + + afu_register_hwirqs(ctx); + return 0; +} + +void afu_release_irqs(struct cxl_context *ctx, void *cookie) +{ + irq_hw_number_t hwirq; + unsigned int virq; + int r, i; + + for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) { + hwirq = ctx->irqs.offset[r]; + for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { + virq = irq_find_mapping(NULL, hwirq); + if (virq) + cxl_unmap_irq(virq, cookie); + } + } + + afu_irq_name_free(ctx); + cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter); + + ctx->irq_count = 0; +} + +void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr) +{ + dev_crit(&afu->dev, + "PSL Slice error received. Check AFU for root cause.\n"); + dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + if (serr & CXL_PSL_SERR_An_afuto) + dev_crit(&afu->dev, "AFU MMIO Timeout\n"); + if (serr & CXL_PSL_SERR_An_afudis) + dev_crit(&afu->dev, + "MMIO targeted Accelerator that was not enabled\n"); + if (serr & CXL_PSL_SERR_An_afuov) + dev_crit(&afu->dev, "AFU CTAG Overflow\n"); + if (serr & CXL_PSL_SERR_An_badsrc) + dev_crit(&afu->dev, "Bad Interrupt Source\n"); + if (serr & CXL_PSL_SERR_An_badctx) + dev_crit(&afu->dev, "Bad Context Handle\n"); + if (serr & CXL_PSL_SERR_An_llcmdis) + dev_crit(&afu->dev, "LLCMD to Disabled AFU\n"); + if (serr & CXL_PSL_SERR_An_llcmdto) + dev_crit(&afu->dev, "LLCMD Timeout to AFU\n"); + if (serr & CXL_PSL_SERR_An_afupar) + dev_crit(&afu->dev, "AFU MMIO Parity Error\n"); + if (serr & CXL_PSL_SERR_An_afudup) + dev_crit(&afu->dev, "AFU MMIO Duplicate CTAG Error\n"); + if (serr & CXL_PSL_SERR_An_AE) + dev_crit(&afu->dev, + "AFU asserted JDONE with JERROR in AFU Directed Mode\n"); +} diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c new file mode 100644 index 000000000..f35406be4 --- /dev/null +++ b/drivers/misc/cxl/main.c @@ -0,0 +1,382 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "cxl.h" +#include "trace.h" + +static DEFINE_SPINLOCK(adapter_idr_lock); +static DEFINE_IDR(cxl_adapter_idr); + +uint cxl_verbose; +module_param_named(verbose, cxl_verbose, uint, 0600); +MODULE_PARM_DESC(verbose, "Enable verbose dmesg output"); + +const struct cxl_backend_ops *cxl_ops; + +int cxl_afu_slbia(struct cxl_afu *afu) +{ + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + + pr_devel("cxl_afu_slbia issuing SLBIA command\n"); + cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL); + while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n"); + return -EBUSY; + } + /* If the adapter has gone down, we can assume that we + * will PERST it and that will invalidate everything. + */ + if (!cxl_ops->link_ok(afu->adapter, afu)) + return -EIO; + cpu_relax(); + } + return 0; +} + +static inline void _cxl_slbia(struct cxl_context *ctx, struct mm_struct *mm) +{ + unsigned long flags; + + if (ctx->mm != mm) + return; + + pr_devel("%s matched mm - card: %i afu: %i pe: %i\n", __func__, + ctx->afu->adapter->adapter_num, ctx->afu->slice, ctx->pe); + + spin_lock_irqsave(&ctx->sste_lock, flags); + trace_cxl_slbia(ctx); + memset(ctx->sstp, 0, ctx->sst_size); + spin_unlock_irqrestore(&ctx->sste_lock, flags); + mb(); + cxl_afu_slbia(ctx->afu); +} + +static inline void cxl_slbia_core(struct mm_struct *mm) +{ + struct cxl *adapter; + struct cxl_afu *afu; + struct cxl_context *ctx; + int card, slice, id; + + pr_devel("%s called\n", __func__); + + spin_lock(&adapter_idr_lock); + idr_for_each_entry(&cxl_adapter_idr, adapter, card) { + /* XXX: Make this lookup faster with link from mm to ctx */ + spin_lock(&adapter->afu_list_lock); + for (slice = 0; slice < adapter->slices; slice++) { + afu = adapter->afu[slice]; + if (!afu || !afu->enabled) + continue; + rcu_read_lock(); + idr_for_each_entry(&afu->contexts_idr, ctx, id) + _cxl_slbia(ctx, mm); + rcu_read_unlock(); + } + spin_unlock(&adapter->afu_list_lock); + } + spin_unlock(&adapter_idr_lock); +} + +static struct cxl_calls cxl_calls = { + .cxl_slbia = cxl_slbia_core, + .owner = THIS_MODULE, +}; + +int cxl_alloc_sst(struct cxl_context *ctx) +{ + unsigned long vsid; + u64 ea_mask, size, sstp0, sstp1; + + sstp0 = 0; + sstp1 = 0; + + ctx->sst_size = PAGE_SIZE; + ctx->sst_lru = 0; + ctx->sstp = (struct cxl_sste *)get_zeroed_page(GFP_KERNEL); + if (!ctx->sstp) { + pr_err("cxl_alloc_sst: Unable to allocate segment table\n"); + return -ENOMEM; + } + pr_devel("SSTP allocated at 0x%p\n", ctx->sstp); + + vsid = get_kernel_vsid((u64)ctx->sstp, mmu_kernel_ssize) << 12; + + sstp0 |= (u64)mmu_kernel_ssize << CXL_SSTP0_An_B_SHIFT; + sstp0 |= (SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp) << 50; + + size = (((u64)ctx->sst_size >> 8) - 1) << CXL_SSTP0_An_SegTableSize_SHIFT; + if (unlikely(size & ~CXL_SSTP0_An_SegTableSize_MASK)) { + WARN(1, "Impossible segment table size\n"); + return -EINVAL; + } + sstp0 |= size; + + if (mmu_kernel_ssize == MMU_SEGSIZE_256M) + ea_mask = 0xfffff00ULL; + else + ea_mask = 0xffffffff00ULL; + + sstp0 |= vsid >> (50-14); /* Top 14 bits of VSID */ + sstp1 |= (vsid << (64-(50-14))) & ~ea_mask; + sstp1 |= (u64)ctx->sstp & ea_mask; + sstp1 |= CXL_SSTP1_An_V; + + pr_devel("Looked up %#llx: slbfee. %#llx (ssize: %x, vsid: %#lx), copied to SSTP0: %#llx, SSTP1: %#llx\n", + (u64)ctx->sstp, (u64)ctx->sstp & ESID_MASK, mmu_kernel_ssize, vsid, sstp0, sstp1); + + /* Store calculated sstp hardware points for use later */ + ctx->sstp0 = sstp0; + ctx->sstp1 = sstp1; + + return 0; +} + +/* print buffer content as integers when debugging */ +void cxl_dump_debug_buffer(void *buf, size_t buf_len) +{ +#ifdef DEBUG + int i, *ptr; + + /* + * We want to regroup up to 4 integers per line, which means they + * need to be in the same pr_devel() statement + */ + ptr = (int *) buf; + for (i = 0; i * 4 < buf_len; i += 4) { + if ((i + 3) * 4 < buf_len) + pr_devel("%.8x %.8x %.8x %.8x\n", ptr[i], ptr[i + 1], + ptr[i + 2], ptr[i + 3]); + else if ((i + 2) * 4 < buf_len) + pr_devel("%.8x %.8x %.8x\n", ptr[i], ptr[i + 1], + ptr[i + 2]); + else if ((i + 1) * 4 < buf_len) + pr_devel("%.8x %.8x\n", ptr[i], ptr[i + 1]); + else + pr_devel("%.8x\n", ptr[i]); + } +#endif /* DEBUG */ +} + +/* Find a CXL adapter by it's number and increase it's refcount */ +struct cxl *get_cxl_adapter(int num) +{ + struct cxl *adapter; + + spin_lock(&adapter_idr_lock); + if ((adapter = idr_find(&cxl_adapter_idr, num))) + get_device(&adapter->dev); + spin_unlock(&adapter_idr_lock); + + return adapter; +} + +static int cxl_alloc_adapter_nr(struct cxl *adapter) +{ + int i; + + idr_preload(GFP_KERNEL); + spin_lock(&adapter_idr_lock); + i = idr_alloc(&cxl_adapter_idr, adapter, 0, 0, GFP_NOWAIT); + spin_unlock(&adapter_idr_lock); + idr_preload_end(); + if (i < 0) + return i; + + adapter->adapter_num = i; + + return 0; +} + +void cxl_remove_adapter_nr(struct cxl *adapter) +{ + idr_remove(&cxl_adapter_idr, adapter->adapter_num); +} + +struct cxl *cxl_alloc_adapter(void) +{ + struct cxl *adapter; + + if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL))) + return NULL; + + spin_lock_init(&adapter->afu_list_lock); + + if (cxl_alloc_adapter_nr(adapter)) + goto err1; + + if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num)) + goto err2; + + /* start with context lock taken */ + atomic_set(&adapter->contexts_num, -1); + + return adapter; +err2: + cxl_remove_adapter_nr(adapter); +err1: + kfree(adapter); + return NULL; +} + +struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice) +{ + struct cxl_afu *afu; + + if (!(afu = kzalloc(sizeof(struct cxl_afu), GFP_KERNEL))) + return NULL; + + afu->adapter = adapter; + afu->dev.parent = &adapter->dev; + afu->dev.release = cxl_ops->release_afu; + afu->slice = slice; + idr_init(&afu->contexts_idr); + mutex_init(&afu->contexts_lock); + spin_lock_init(&afu->afu_cntl_lock); + atomic_set(&afu->configured_state, -1); + afu->prefault_mode = CXL_PREFAULT_NONE; + afu->irqs_max = afu->adapter->user_irqs; + + return afu; +} + +int cxl_afu_select_best_mode(struct cxl_afu *afu) +{ + if (afu->modes_supported & CXL_MODE_DIRECTED) + return cxl_ops->afu_activate_mode(afu, CXL_MODE_DIRECTED); + + if (afu->modes_supported & CXL_MODE_DEDICATED) + return cxl_ops->afu_activate_mode(afu, CXL_MODE_DEDICATED); + + dev_warn(&afu->dev, "No supported programming modes available\n"); + /* We don't fail this so the user can inspect sysfs */ + return 0; +} + +int cxl_adapter_context_get(struct cxl *adapter) +{ + int rc; + + rc = atomic_inc_unless_negative(&adapter->contexts_num); + return rc ? 0 : -EBUSY; +} + +void cxl_adapter_context_put(struct cxl *adapter) +{ + atomic_dec_if_positive(&adapter->contexts_num); +} + +int cxl_adapter_context_lock(struct cxl *adapter) +{ + int rc; + /* no active contexts -> contexts_num == 0 */ + rc = atomic_cmpxchg(&adapter->contexts_num, 0, -1); + return rc ? -EBUSY : 0; +} + +void cxl_adapter_context_unlock(struct cxl *adapter) +{ + int val = atomic_cmpxchg(&adapter->contexts_num, -1, 0); + + /* + * contexts lock taken -> contexts_num == -1 + * If not true then show a warning and force reset the lock. + * This will happen when context_unlock was requested without + * doing a context_lock. + */ + if (val != -1) { + atomic_set(&adapter->contexts_num, 0); + WARN(1, "Adapter context unlocked with %d active contexts", + val); + } +} + +static int __init init_cxl(void) +{ + int rc = 0; + + if ((rc = cxl_file_init())) + return rc; + + cxl_debugfs_init(); + + /* + * we don't register the callback on P9. slb callack is only + * used for the PSL8 MMU and CX4. + */ + if (cxl_is_power8()) { + rc = register_cxl_calls(&cxl_calls); + if (rc) + goto err; + } + + if (cpu_has_feature(CPU_FTR_HVMODE)) { + cxl_ops = &cxl_native_ops; + rc = pci_register_driver(&cxl_pci_driver); + } +#ifdef CONFIG_PPC_PSERIES + else { + cxl_ops = &cxl_guest_ops; + rc = platform_driver_register(&cxl_of_driver); + } +#endif + if (rc) + goto err1; + + return 0; +err1: + if (cxl_is_power8()) + unregister_cxl_calls(&cxl_calls); +err: + cxl_debugfs_exit(); + cxl_file_exit(); + + return rc; +} + +static void exit_cxl(void) +{ + if (cpu_has_feature(CPU_FTR_HVMODE)) + pci_unregister_driver(&cxl_pci_driver); +#ifdef CONFIG_PPC_PSERIES + else + platform_driver_unregister(&cxl_of_driver); +#endif + + cxl_debugfs_exit(); + cxl_file_exit(); + if (cxl_is_power8()) + unregister_cxl_calls(&cxl_calls); + idr_destroy(&cxl_adapter_idr); +} + +module_init(init_cxl); +module_exit(exit_cxl); + +MODULE_DESCRIPTION("IBM Coherent Accelerator"); +MODULE_AUTHOR("Ian Munsie "); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c new file mode 100644 index 000000000..c9d5d82dc --- /dev/null +++ b/drivers/misc/cxl/native.c @@ -0,0 +1,1600 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cxl.h" +#include "trace.h" + +static int afu_control(struct cxl_afu *afu, u64 command, u64 clear, + u64 result, u64 mask, bool enabled) +{ + u64 AFU_Cntl; + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + int rc = 0; + + spin_lock(&afu->afu_cntl_lock); + pr_devel("AFU command starting: %llx\n", command); + + trace_cxl_afu_ctrl(afu, command); + + AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + cxl_p2n_write(afu, CXL_AFU_Cntl_An, (AFU_Cntl & ~clear) | command); + + AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + while ((AFU_Cntl & mask) != result) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&afu->dev, "WARNING: AFU control timed out!\n"); + rc = -EBUSY; + goto out; + } + + if (!cxl_ops->link_ok(afu->adapter, afu)) { + afu->enabled = enabled; + rc = -EIO; + goto out; + } + + pr_devel_ratelimited("AFU control... (0x%016llx)\n", + AFU_Cntl | command); + cpu_relax(); + AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + } + + if (AFU_Cntl & CXL_AFU_Cntl_An_RA) { + /* + * Workaround for a bug in the XSL used in the Mellanox CX4 + * that fails to clear the RA bit after an AFU reset, + * preventing subsequent AFU resets from working. + */ + cxl_p2n_write(afu, CXL_AFU_Cntl_An, AFU_Cntl & ~CXL_AFU_Cntl_An_RA); + } + + pr_devel("AFU command complete: %llx\n", command); + afu->enabled = enabled; +out: + trace_cxl_afu_ctrl_done(afu, command, rc); + spin_unlock(&afu->afu_cntl_lock); + + return rc; +} + +static int afu_enable(struct cxl_afu *afu) +{ + pr_devel("AFU enable request\n"); + + return afu_control(afu, CXL_AFU_Cntl_An_E, 0, + CXL_AFU_Cntl_An_ES_Enabled, + CXL_AFU_Cntl_An_ES_MASK, true); +} + +int cxl_afu_disable(struct cxl_afu *afu) +{ + pr_devel("AFU disable request\n"); + + return afu_control(afu, 0, CXL_AFU_Cntl_An_E, + CXL_AFU_Cntl_An_ES_Disabled, + CXL_AFU_Cntl_An_ES_MASK, false); +} + +/* This will disable as well as reset */ +static int native_afu_reset(struct cxl_afu *afu) +{ + int rc; + u64 serr; + + pr_devel("AFU reset request\n"); + + rc = afu_control(afu, CXL_AFU_Cntl_An_RA, 0, + CXL_AFU_Cntl_An_RS_Complete | CXL_AFU_Cntl_An_ES_Disabled, + CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK, + false); + + /* + * Re-enable any masked interrupts when the AFU is not + * activated to avoid side effects after attaching a process + * in dedicated mode. + */ + if (afu->current_mode == 0) { + serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); + serr &= ~CXL_PSL_SERR_An_IRQ_MASKS; + cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); + } + + return rc; +} + +static int native_afu_check_and_enable(struct cxl_afu *afu) +{ + if (!cxl_ops->link_ok(afu->adapter, afu)) { + WARN(1, "Refusing to enable afu while link down!\n"); + return -EIO; + } + if (afu->enabled) + return 0; + return afu_enable(afu); +} + +int cxl_psl_purge(struct cxl_afu *afu) +{ + u64 PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An); + u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + u64 dsisr, dar; + u64 start, end; + u64 trans_fault = 0x0ULL; + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + int rc = 0; + + trace_cxl_psl_ctrl(afu, CXL_PSL_SCNTL_An_Pc); + + pr_devel("PSL purge request\n"); + + if (cxl_is_power8()) + trans_fault = CXL_PSL_DSISR_TRANS; + if (cxl_is_power9()) + trans_fault = CXL_PSL9_DSISR_An_TF; + + if (!cxl_ops->link_ok(afu->adapter, afu)) { + dev_warn(&afu->dev, "PSL Purge called with link down, ignoring\n"); + rc = -EIO; + goto out; + } + + if ((AFU_Cntl & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { + WARN(1, "psl_purge request while AFU not disabled!\n"); + cxl_afu_disable(afu); + } + + cxl_p1n_write(afu, CXL_PSL_SCNTL_An, + PSL_CNTL | CXL_PSL_SCNTL_An_Pc); + start = local_clock(); + PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An); + while ((PSL_CNTL & CXL_PSL_SCNTL_An_Ps_MASK) + == CXL_PSL_SCNTL_An_Ps_Pending) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&afu->dev, "WARNING: PSL Purge timed out!\n"); + rc = -EBUSY; + goto out; + } + if (!cxl_ops->link_ok(afu->adapter, afu)) { + rc = -EIO; + goto out; + } + + dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%016llx PSL_DSISR: 0x%016llx\n", + PSL_CNTL, dsisr); + + if (dsisr & trans_fault) { + dar = cxl_p2n_read(afu, CXL_PSL_DAR_An); + dev_notice(&afu->dev, "PSL purge terminating pending translation, DSISR: 0x%016llx, DAR: 0x%016llx\n", + dsisr, dar); + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); + } else if (dsisr) { + dev_notice(&afu->dev, "PSL purge acknowledging pending non-translation fault, DSISR: 0x%016llx\n", + dsisr); + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); + } else { + cpu_relax(); + } + PSL_CNTL = cxl_p1n_read(afu, CXL_PSL_SCNTL_An); + } + end = local_clock(); + pr_devel("PSL purged in %lld ns\n", end - start); + + cxl_p1n_write(afu, CXL_PSL_SCNTL_An, + PSL_CNTL & ~CXL_PSL_SCNTL_An_Pc); +out: + trace_cxl_psl_ctrl_done(afu, CXL_PSL_SCNTL_An_Pc, rc); + return rc; +} + +static int spa_max_procs(int spa_size) +{ + /* + * From the CAIA: + * end_of_SPA_area = SPA_Base + ((n+4) * 128) + (( ((n*8) + 127) >> 7) * 128) + 255 + * Most of that junk is really just an overly-complicated way of saying + * the last 256 bytes are __aligned(128), so it's really: + * end_of_SPA_area = end_of_PSL_queue_area + __aligned(128) 255 + * and + * end_of_PSL_queue_area = SPA_Base + ((n+4) * 128) + (n*8) - 1 + * so + * sizeof(SPA) = ((n+4) * 128) + (n*8) + __aligned(128) 256 + * Ignore the alignment (which is safe in this case as long as we are + * careful with our rounding) and solve for n: + */ + return ((spa_size / 8) - 96) / 17; +} + +static int cxl_alloc_spa(struct cxl_afu *afu, int mode) +{ + unsigned spa_size; + + /* Work out how many pages to allocate */ + afu->native->spa_order = -1; + do { + afu->native->spa_order++; + spa_size = (1 << afu->native->spa_order) * PAGE_SIZE; + + if (spa_size > 0x100000) { + dev_warn(&afu->dev, "num_of_processes too large for the SPA, limiting to %i (0x%x)\n", + afu->native->spa_max_procs, afu->native->spa_size); + if (mode != CXL_MODE_DEDICATED) + afu->num_procs = afu->native->spa_max_procs; + break; + } + + afu->native->spa_size = spa_size; + afu->native->spa_max_procs = spa_max_procs(afu->native->spa_size); + } while (afu->native->spa_max_procs < afu->num_procs); + + if (!(afu->native->spa = (struct cxl_process_element *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, afu->native->spa_order))) { + pr_err("cxl_alloc_spa: Unable to allocate scheduled process area\n"); + return -ENOMEM; + } + pr_devel("spa pages: %i afu->spa_max_procs: %i afu->num_procs: %i\n", + 1<native->spa_order, afu->native->spa_max_procs, afu->num_procs); + + return 0; +} + +static void attach_spa(struct cxl_afu *afu) +{ + u64 spap; + + afu->native->sw_command_status = (__be64 *)((char *)afu->native->spa + + ((afu->native->spa_max_procs + 3) * 128)); + + spap = virt_to_phys(afu->native->spa) & CXL_PSL_SPAP_Addr; + spap |= ((afu->native->spa_size >> (12 - CXL_PSL_SPAP_Size_Shift)) - 1) & CXL_PSL_SPAP_Size; + spap |= CXL_PSL_SPAP_V; + pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n", + afu->native->spa, afu->native->spa_max_procs, + afu->native->sw_command_status, spap); + cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap); +} + +static inline void detach_spa(struct cxl_afu *afu) +{ + cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0); +} + +void cxl_release_spa(struct cxl_afu *afu) +{ + if (afu->native->spa) { + free_pages((unsigned long) afu->native->spa, + afu->native->spa_order); + afu->native->spa = NULL; + } +} + +/* + * Invalidation of all ERAT entries is no longer required by CAIA2. Use + * only for debug. + */ +int cxl_invalidate_all_psl9(struct cxl *adapter) +{ + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + u64 ierat; + + pr_devel("CXL adapter - invalidation of all ERAT entries\n"); + + /* Invalidates all ERAT entries for Radix or HPT */ + ierat = CXL_XSL9_IERAT_IALL; + if (radix_enabled()) + ierat |= CXL_XSL9_IERAT_INVR; + cxl_p1_write(adapter, CXL_XSL9_IERAT, ierat); + + while (cxl_p1_read(adapter, CXL_XSL9_IERAT) & CXL_XSL9_IERAT_IINPROG) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&adapter->dev, + "WARNING: CXL adapter invalidation of all ERAT entries timed out!\n"); + return -EBUSY; + } + if (!cxl_ops->link_ok(adapter, NULL)) + return -EIO; + cpu_relax(); + } + return 0; +} + +int cxl_invalidate_all_psl8(struct cxl *adapter) +{ + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + + pr_devel("CXL adapter wide TLBIA & SLBIA\n"); + + cxl_p1_write(adapter, CXL_PSL_AFUSEL, CXL_PSL_AFUSEL_A); + + cxl_p1_write(adapter, CXL_PSL_TLBIA, CXL_TLB_SLB_IQ_ALL); + while (cxl_p1_read(adapter, CXL_PSL_TLBIA) & CXL_TLB_SLB_P) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n"); + return -EBUSY; + } + if (!cxl_ops->link_ok(adapter, NULL)) + return -EIO; + cpu_relax(); + } + + cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_ALL); + while (cxl_p1_read(adapter, CXL_PSL_SLBIA) & CXL_TLB_SLB_P) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n"); + return -EBUSY; + } + if (!cxl_ops->link_ok(adapter, NULL)) + return -EIO; + cpu_relax(); + } + return 0; +} + +int cxl_data_cache_flush(struct cxl *adapter) +{ + u64 reg; + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + + /* + * Do a datacache flush only if datacache is available. + * In case of PSL9D datacache absent hence flush operation. + * would timeout. + */ + if (adapter->native->no_data_cache) { + pr_devel("No PSL data cache. Ignoring cache flush req.\n"); + return 0; + } + + pr_devel("Flushing data cache\n"); + reg = cxl_p1_read(adapter, CXL_PSL_Control); + reg |= CXL_PSL_Control_Fr; + cxl_p1_write(adapter, CXL_PSL_Control, reg); + + reg = cxl_p1_read(adapter, CXL_PSL_Control); + while ((reg & CXL_PSL_Control_Fs_MASK) != CXL_PSL_Control_Fs_Complete) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&adapter->dev, "WARNING: cache flush timed out!\n"); + return -EBUSY; + } + + if (!cxl_ops->link_ok(adapter, NULL)) { + dev_warn(&adapter->dev, "WARNING: link down when flushing cache\n"); + return -EIO; + } + cpu_relax(); + reg = cxl_p1_read(adapter, CXL_PSL_Control); + } + + reg &= ~CXL_PSL_Control_Fr; + cxl_p1_write(adapter, CXL_PSL_Control, reg); + return 0; +} + +static int cxl_write_sstp(struct cxl_afu *afu, u64 sstp0, u64 sstp1) +{ + int rc; + + /* 1. Disable SSTP by writing 0 to SSTP1[V] */ + cxl_p2n_write(afu, CXL_SSTP1_An, 0); + + /* 2. Invalidate all SLB entries */ + if ((rc = cxl_afu_slbia(afu))) + return rc; + + /* 3. Set SSTP0_An */ + cxl_p2n_write(afu, CXL_SSTP0_An, sstp0); + + /* 4. Set SSTP1_An */ + cxl_p2n_write(afu, CXL_SSTP1_An, sstp1); + + return 0; +} + +/* Using per slice version may improve performance here. (ie. SLBIA_An) */ +static void slb_invalid(struct cxl_context *ctx) +{ + struct cxl *adapter = ctx->afu->adapter; + u64 slbia; + + WARN_ON(!mutex_is_locked(&ctx->afu->native->spa_mutex)); + + cxl_p1_write(adapter, CXL_PSL_LBISEL, + ((u64)be32_to_cpu(ctx->elem->common.pid) << 32) | + be32_to_cpu(ctx->elem->lpid)); + cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID); + + while (1) { + if (!cxl_ops->link_ok(adapter, NULL)) + break; + slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA); + if (!(slbia & CXL_TLB_SLB_P)) + break; + cpu_relax(); + } +} + +static int do_process_element_cmd(struct cxl_context *ctx, + u64 cmd, u64 pe_state) +{ + u64 state; + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + int rc = 0; + + trace_cxl_llcmd(ctx, cmd); + + WARN_ON(!ctx->afu->enabled); + + ctx->elem->software_state = cpu_to_be32(pe_state); + smp_wmb(); + *(ctx->afu->native->sw_command_status) = cpu_to_be64(cmd | 0 | ctx->pe); + smp_mb(); + cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe); + while (1) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&ctx->afu->dev, "WARNING: Process Element Command timed out!\n"); + rc = -EBUSY; + goto out; + } + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { + dev_warn(&ctx->afu->dev, "WARNING: Device link down, aborting Process Element Command!\n"); + rc = -EIO; + goto out; + } + state = be64_to_cpup(ctx->afu->native->sw_command_status); + if (state == ~0ULL) { + pr_err("cxl: Error adding process element to AFU\n"); + rc = -1; + goto out; + } + if ((state & (CXL_SPA_SW_CMD_MASK | CXL_SPA_SW_STATE_MASK | CXL_SPA_SW_LINK_MASK)) == + (cmd | (cmd >> 16) | ctx->pe)) + break; + /* + * The command won't finish in the PSL if there are + * outstanding DSIs. Hence we need to yield here in + * case there are outstanding DSIs that we need to + * service. Tuning possiblity: we could wait for a + * while before sched + */ + schedule(); + + } +out: + trace_cxl_llcmd_done(ctx, cmd, rc); + return rc; +} + +static int add_process_element(struct cxl_context *ctx) +{ + int rc = 0; + + mutex_lock(&ctx->afu->native->spa_mutex); + pr_devel("%s Adding pe: %i started\n", __func__, ctx->pe); + if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_ADD, CXL_PE_SOFTWARE_STATE_V))) + ctx->pe_inserted = true; + pr_devel("%s Adding pe: %i finished\n", __func__, ctx->pe); + mutex_unlock(&ctx->afu->native->spa_mutex); + return rc; +} + +static int terminate_process_element(struct cxl_context *ctx) +{ + int rc = 0; + + /* fast path terminate if it's already invalid */ + if (!(ctx->elem->software_state & cpu_to_be32(CXL_PE_SOFTWARE_STATE_V))) + return rc; + + mutex_lock(&ctx->afu->native->spa_mutex); + pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe); + /* We could be asked to terminate when the hw is down. That + * should always succeed: it's not running if the hw has gone + * away and is being reset. + */ + if (cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) + rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE, + CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T); + ctx->elem->software_state = 0; /* Remove Valid bit */ + pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe); + mutex_unlock(&ctx->afu->native->spa_mutex); + return rc; +} + +static int remove_process_element(struct cxl_context *ctx) +{ + int rc = 0; + + mutex_lock(&ctx->afu->native->spa_mutex); + pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe); + + /* We could be asked to remove when the hw is down. Again, if + * the hw is down, the PE is gone, so we succeed. + */ + if (cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) + rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0); + + if (!rc) + ctx->pe_inserted = false; + if (cxl_is_power8()) + slb_invalid(ctx); + pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe); + mutex_unlock(&ctx->afu->native->spa_mutex); + + return rc; +} + +void cxl_assign_psn_space(struct cxl_context *ctx) +{ + if (!ctx->afu->pp_size || ctx->master) { + ctx->psn_phys = ctx->afu->psn_phys; + ctx->psn_size = ctx->afu->adapter->ps_size; + } else { + ctx->psn_phys = ctx->afu->psn_phys + + (ctx->afu->native->pp_offset + ctx->afu->pp_size * ctx->pe); + ctx->psn_size = ctx->afu->pp_size; + } +} + +static int activate_afu_directed(struct cxl_afu *afu) +{ + int rc; + + dev_info(&afu->dev, "Activating AFU directed mode\n"); + + afu->num_procs = afu->max_procs_virtualised; + if (afu->native->spa == NULL) { + if (cxl_alloc_spa(afu, CXL_MODE_DIRECTED)) + return -ENOMEM; + } + attach_spa(afu); + + cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_AFU); + if (cxl_is_power8()) + cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL); + cxl_p1n_write(afu, CXL_PSL_ID_An, CXL_PSL_ID_An_F | CXL_PSL_ID_An_L); + + afu->current_mode = CXL_MODE_DIRECTED; + + if ((rc = cxl_chardev_m_afu_add(afu))) + return rc; + + if ((rc = cxl_sysfs_afu_m_add(afu))) + goto err; + + if ((rc = cxl_chardev_s_afu_add(afu))) + goto err1; + + return 0; +err1: + cxl_sysfs_afu_m_remove(afu); +err: + cxl_chardev_afu_remove(afu); + return rc; +} + +#ifdef CONFIG_CPU_LITTLE_ENDIAN +#define set_endian(sr) ((sr) |= CXL_PSL_SR_An_LE) +#else +#define set_endian(sr) ((sr) &= ~(CXL_PSL_SR_An_LE)) +#endif + +u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9) +{ + u64 sr = 0; + + set_endian(sr); + if (master) + sr |= CXL_PSL_SR_An_MP; + if (mfspr(SPRN_LPCR) & LPCR_TC) + sr |= CXL_PSL_SR_An_TC; + + if (kernel) { + if (!real_mode) + sr |= CXL_PSL_SR_An_R; + sr |= (mfmsr() & MSR_SF) | CXL_PSL_SR_An_HV; + } else { + sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R; + if (radix_enabled()) + sr |= CXL_PSL_SR_An_HV; + else + sr &= ~(CXL_PSL_SR_An_HV); + if (!test_tsk_thread_flag(current, TIF_32BIT)) + sr |= CXL_PSL_SR_An_SF; + } + if (p9) { + if (radix_enabled()) + sr |= CXL_PSL_SR_An_XLAT_ror; + else + sr |= CXL_PSL_SR_An_XLAT_hpt; + } + return sr; +} + +static u64 calculate_sr(struct cxl_context *ctx) +{ + return cxl_calculate_sr(ctx->master, ctx->kernel, false, + cxl_is_power9()); +} + +static void update_ivtes_directed(struct cxl_context *ctx) +{ + bool need_update = (ctx->status == STARTED); + int r; + + if (need_update) { + WARN_ON(terminate_process_element(ctx)); + WARN_ON(remove_process_element(ctx)); + } + + for (r = 0; r < CXL_IRQ_RANGES; r++) { + ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]); + ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]); + } + + /* + * Theoretically we could use the update llcmd, instead of a + * terminate/remove/add (or if an atomic update was required we could + * do a suspend/update/resume), however it seems there might be issues + * with the update llcmd on some cards (including those using an XSL on + * an ASIC) so for now it's safest to go with the commands that are + * known to work. In the future if we come across a situation where the + * card may be performing transactions using the same PE while we are + * doing this update we might need to revisit this. + */ + if (need_update) + WARN_ON(add_process_element(ctx)); +} + +static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr) +{ + u32 pid; + int rc; + + cxl_assign_psn_space(ctx); + + ctx->elem->ctxtime = 0; /* disable */ + ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID)); + ctx->elem->haurp = 0; /* disable */ + + if (ctx->kernel) + pid = 0; + else { + if (ctx->mm == NULL) { + pr_devel("%s: unable to get mm for pe=%d pid=%i\n", + __func__, ctx->pe, pid_nr(ctx->pid)); + return -EINVAL; + } + pid = ctx->mm->context.id; + } + + /* Assign a unique TIDR (thread id) for the current thread */ + if (!(ctx->tidr) && (ctx->assign_tidr)) { + rc = set_thread_tidr(current); + if (rc) + return -ENODEV; + ctx->tidr = current->thread.tidr; + pr_devel("%s: current tidr: %d\n", __func__, ctx->tidr); + } + + ctx->elem->common.tid = cpu_to_be32(ctx->tidr); + ctx->elem->common.pid = cpu_to_be32(pid); + + ctx->elem->sr = cpu_to_be64(calculate_sr(ctx)); + + ctx->elem->common.csrp = 0; /* disable */ + + cxl_prefault(ctx, wed); + + /* + * Ensure we have the multiplexed PSL interrupt set up to take faults + * for kernel contexts that may not have allocated any AFU IRQs at all: + */ + if (ctx->irqs.range[0] == 0) { + ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq; + ctx->irqs.range[0] = 1; + } + + ctx->elem->common.amr = cpu_to_be64(amr); + ctx->elem->common.wed = cpu_to_be64(wed); + + return 0; +} + +int cxl_attach_afu_directed_psl9(struct cxl_context *ctx, u64 wed, u64 amr) +{ + int result; + + /* fill the process element entry */ + result = process_element_entry_psl9(ctx, wed, amr); + if (result) + return result; + + update_ivtes_directed(ctx); + + /* first guy needs to enable */ + result = cxl_ops->afu_check_and_enable(ctx->afu); + if (result) + return result; + + return add_process_element(ctx); +} + +int cxl_attach_afu_directed_psl8(struct cxl_context *ctx, u64 wed, u64 amr) +{ + u32 pid; + int result; + + cxl_assign_psn_space(ctx); + + ctx->elem->ctxtime = 0; /* disable */ + ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID)); + ctx->elem->haurp = 0; /* disable */ + ctx->elem->u.sdr = cpu_to_be64(mfspr(SPRN_SDR1)); + + pid = current->pid; + if (ctx->kernel) + pid = 0; + ctx->elem->common.tid = 0; + ctx->elem->common.pid = cpu_to_be32(pid); + + ctx->elem->sr = cpu_to_be64(calculate_sr(ctx)); + + ctx->elem->common.csrp = 0; /* disable */ + ctx->elem->common.u.psl8.aurp0 = 0; /* disable */ + ctx->elem->common.u.psl8.aurp1 = 0; /* disable */ + + cxl_prefault(ctx, wed); + + ctx->elem->common.u.psl8.sstp0 = cpu_to_be64(ctx->sstp0); + ctx->elem->common.u.psl8.sstp1 = cpu_to_be64(ctx->sstp1); + + /* + * Ensure we have the multiplexed PSL interrupt set up to take faults + * for kernel contexts that may not have allocated any AFU IRQs at all: + */ + if (ctx->irqs.range[0] == 0) { + ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq; + ctx->irqs.range[0] = 1; + } + + update_ivtes_directed(ctx); + + ctx->elem->common.amr = cpu_to_be64(amr); + ctx->elem->common.wed = cpu_to_be64(wed); + + /* first guy needs to enable */ + if ((result = cxl_ops->afu_check_and_enable(ctx->afu))) + return result; + + return add_process_element(ctx); +} + +static int deactivate_afu_directed(struct cxl_afu *afu) +{ + dev_info(&afu->dev, "Deactivating AFU directed mode\n"); + + afu->current_mode = 0; + afu->num_procs = 0; + + cxl_sysfs_afu_m_remove(afu); + cxl_chardev_afu_remove(afu); + + /* + * The CAIA section 2.2.1 indicates that the procedure for starting and + * stopping an AFU in AFU directed mode is AFU specific, which is not + * ideal since this code is generic and with one exception has no + * knowledge of the AFU. This is in contrast to the procedure for + * disabling a dedicated process AFU, which is documented to just + * require a reset. The architecture does indicate that both an AFU + * reset and an AFU disable should result in the AFU being disabled and + * we do both followed by a PSL purge for safety. + * + * Notably we used to have some issues with the disable sequence on PSL + * cards, which is why we ended up using this heavy weight procedure in + * the first place, however a bug was discovered that had rendered the + * disable operation ineffective, so it is conceivable that was the + * sole explanation for those difficulties. Careful regression testing + * is recommended if anyone attempts to remove or reorder these + * operations. + * + * The XSL on the Mellanox CX4 behaves a little differently from the + * PSL based cards and will time out an AFU reset if the AFU is still + * enabled. That card is special in that we do have a means to identify + * it from this code, so in that case we skip the reset and just use a + * disable/purge to avoid the timeout and corresponding noise in the + * kernel log. + */ + if (afu->adapter->native->sl_ops->needs_reset_before_disable) + cxl_ops->afu_reset(afu); + cxl_afu_disable(afu); + cxl_psl_purge(afu); + + return 0; +} + +int cxl_activate_dedicated_process_psl9(struct cxl_afu *afu) +{ + dev_info(&afu->dev, "Activating dedicated process mode\n"); + + /* + * If XSL is set to dedicated mode (Set in PSL_SCNTL reg), the + * XSL and AFU are programmed to work with a single context. + * The context information should be configured in the SPA area + * index 0 (so PSL_SPAP must be configured before enabling the + * AFU). + */ + afu->num_procs = 1; + if (afu->native->spa == NULL) { + if (cxl_alloc_spa(afu, CXL_MODE_DEDICATED)) + return -ENOMEM; + } + attach_spa(afu); + + cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_Process); + cxl_p1n_write(afu, CXL_PSL_ID_An, CXL_PSL_ID_An_F | CXL_PSL_ID_An_L); + + afu->current_mode = CXL_MODE_DEDICATED; + + return cxl_chardev_d_afu_add(afu); +} + +int cxl_activate_dedicated_process_psl8(struct cxl_afu *afu) +{ + dev_info(&afu->dev, "Activating dedicated process mode\n"); + + cxl_p1n_write(afu, CXL_PSL_SCNTL_An, CXL_PSL_SCNTL_An_PM_Process); + + cxl_p1n_write(afu, CXL_PSL_CtxTime_An, 0); /* disable */ + cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0); /* disable */ + cxl_p1n_write(afu, CXL_PSL_AMOR_An, 0xFFFFFFFFFFFFFFFFULL); + cxl_p1n_write(afu, CXL_PSL_LPID_An, mfspr(SPRN_LPID)); + cxl_p1n_write(afu, CXL_HAURP_An, 0); /* disable */ + cxl_p1n_write(afu, CXL_PSL_SDR_An, mfspr(SPRN_SDR1)); + + cxl_p2n_write(afu, CXL_CSRP_An, 0); /* disable */ + cxl_p2n_write(afu, CXL_AURP0_An, 0); /* disable */ + cxl_p2n_write(afu, CXL_AURP1_An, 0); /* disable */ + + afu->current_mode = CXL_MODE_DEDICATED; + afu->num_procs = 1; + + return cxl_chardev_d_afu_add(afu); +} + +void cxl_update_dedicated_ivtes_psl9(struct cxl_context *ctx) +{ + int r; + + for (r = 0; r < CXL_IRQ_RANGES; r++) { + ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]); + ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]); + } +} + +void cxl_update_dedicated_ivtes_psl8(struct cxl_context *ctx) +{ + struct cxl_afu *afu = ctx->afu; + + cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An, + (((u64)ctx->irqs.offset[0] & 0xffff) << 48) | + (((u64)ctx->irqs.offset[1] & 0xffff) << 32) | + (((u64)ctx->irqs.offset[2] & 0xffff) << 16) | + ((u64)ctx->irqs.offset[3] & 0xffff)); + cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, (u64) + (((u64)ctx->irqs.range[0] & 0xffff) << 48) | + (((u64)ctx->irqs.range[1] & 0xffff) << 32) | + (((u64)ctx->irqs.range[2] & 0xffff) << 16) | + ((u64)ctx->irqs.range[3] & 0xffff)); +} + +int cxl_attach_dedicated_process_psl9(struct cxl_context *ctx, u64 wed, u64 amr) +{ + struct cxl_afu *afu = ctx->afu; + int result; + + /* fill the process element entry */ + result = process_element_entry_psl9(ctx, wed, amr); + if (result) + return result; + + if (ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes) + afu->adapter->native->sl_ops->update_dedicated_ivtes(ctx); + + ctx->elem->software_state = cpu_to_be32(CXL_PE_SOFTWARE_STATE_V); + /* + * Ideally we should do a wmb() here to make sure the changes to the + * PE are visible to the card before we call afu_enable. + * On ppc64 though all mmios are preceded by a 'sync' instruction hence + * we dont dont need one here. + */ + + result = cxl_ops->afu_reset(afu); + if (result) + return result; + + return afu_enable(afu); +} + +int cxl_attach_dedicated_process_psl8(struct cxl_context *ctx, u64 wed, u64 amr) +{ + struct cxl_afu *afu = ctx->afu; + u64 pid; + int rc; + + pid = (u64)current->pid << 32; + if (ctx->kernel) + pid = 0; + cxl_p2n_write(afu, CXL_PSL_PID_TID_An, pid); + + cxl_p1n_write(afu, CXL_PSL_SR_An, calculate_sr(ctx)); + + if ((rc = cxl_write_sstp(afu, ctx->sstp0, ctx->sstp1))) + return rc; + + cxl_prefault(ctx, wed); + + if (ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes) + afu->adapter->native->sl_ops->update_dedicated_ivtes(ctx); + + cxl_p2n_write(afu, CXL_PSL_AMR_An, amr); + + /* master only context for dedicated */ + cxl_assign_psn_space(ctx); + + if ((rc = cxl_ops->afu_reset(afu))) + return rc; + + cxl_p2n_write(afu, CXL_PSL_WED_An, wed); + + return afu_enable(afu); +} + +static int deactivate_dedicated_process(struct cxl_afu *afu) +{ + dev_info(&afu->dev, "Deactivating dedicated process mode\n"); + + afu->current_mode = 0; + afu->num_procs = 0; + + cxl_chardev_afu_remove(afu); + + return 0; +} + +static int native_afu_deactivate_mode(struct cxl_afu *afu, int mode) +{ + if (mode == CXL_MODE_DIRECTED) + return deactivate_afu_directed(afu); + if (mode == CXL_MODE_DEDICATED) + return deactivate_dedicated_process(afu); + return 0; +} + +static int native_afu_activate_mode(struct cxl_afu *afu, int mode) +{ + if (!mode) + return 0; + if (!(mode & afu->modes_supported)) + return -EINVAL; + + if (!cxl_ops->link_ok(afu->adapter, afu)) { + WARN(1, "Device link is down, refusing to activate!\n"); + return -EIO; + } + + if (mode == CXL_MODE_DIRECTED) + return activate_afu_directed(afu); + if ((mode == CXL_MODE_DEDICATED) && + (afu->adapter->native->sl_ops->activate_dedicated_process)) + return afu->adapter->native->sl_ops->activate_dedicated_process(afu); + + return -EINVAL; +} + +static int native_attach_process(struct cxl_context *ctx, bool kernel, + u64 wed, u64 amr) +{ + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { + WARN(1, "Device link is down, refusing to attach process!\n"); + return -EIO; + } + + ctx->kernel = kernel; + if ((ctx->afu->current_mode == CXL_MODE_DIRECTED) && + (ctx->afu->adapter->native->sl_ops->attach_afu_directed)) + return ctx->afu->adapter->native->sl_ops->attach_afu_directed(ctx, wed, amr); + + if ((ctx->afu->current_mode == CXL_MODE_DEDICATED) && + (ctx->afu->adapter->native->sl_ops->attach_dedicated_process)) + return ctx->afu->adapter->native->sl_ops->attach_dedicated_process(ctx, wed, amr); + + return -EINVAL; +} + +static inline int detach_process_native_dedicated(struct cxl_context *ctx) +{ + /* + * The CAIA section 2.1.1 indicates that we need to do an AFU reset to + * stop the AFU in dedicated mode (we therefore do not make that + * optional like we do in the afu directed path). It does not indicate + * that we need to do an explicit disable (which should occur + * implicitly as part of the reset) or purge, but we do these as well + * to be on the safe side. + * + * Notably we used to have some issues with the disable sequence + * (before the sequence was spelled out in the architecture) which is + * why we were so heavy weight in the first place, however a bug was + * discovered that had rendered the disable operation ineffective, so + * it is conceivable that was the sole explanation for those + * difficulties. Point is, we should be careful and do some regression + * testing if we ever attempt to remove any part of this procedure. + */ + cxl_ops->afu_reset(ctx->afu); + cxl_afu_disable(ctx->afu); + cxl_psl_purge(ctx->afu); + return 0; +} + +static void native_update_ivtes(struct cxl_context *ctx) +{ + if (ctx->afu->current_mode == CXL_MODE_DIRECTED) + return update_ivtes_directed(ctx); + if ((ctx->afu->current_mode == CXL_MODE_DEDICATED) && + (ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes)) + return ctx->afu->adapter->native->sl_ops->update_dedicated_ivtes(ctx); + WARN(1, "native_update_ivtes: Bad mode\n"); +} + +static inline int detach_process_native_afu_directed(struct cxl_context *ctx) +{ + if (!ctx->pe_inserted) + return 0; + if (terminate_process_element(ctx)) + return -1; + if (remove_process_element(ctx)) + return -1; + + return 0; +} + +static int native_detach_process(struct cxl_context *ctx) +{ + trace_cxl_detach(ctx); + + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) + return detach_process_native_dedicated(ctx); + + return detach_process_native_afu_directed(ctx); +} + +static int native_get_irq_info(struct cxl_afu *afu, struct cxl_irq_info *info) +{ + /* If the adapter has gone away, we can't get any meaningful + * information. + */ + if (!cxl_ops->link_ok(afu->adapter, afu)) + return -EIO; + + info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An); + if (cxl_is_power8()) + info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An); + info->afu_err = cxl_p2n_read(afu, CXL_AFU_ERR_An); + info->errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); + info->proc_handle = 0; + + return 0; +} + +void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx) +{ + u64 fir1, serr; + + fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL9_FIR1); + + dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1); + if (ctx->afu->adapter->native->sl_ops->register_serr_irq) { + serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); + cxl_afu_decode_psl_serr(ctx->afu, serr); + } +} + +void cxl_native_irq_dump_regs_psl8(struct cxl_context *ctx) +{ + u64 fir1, fir2, fir_slice, serr, afu_debug; + + fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR1); + fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR2); + fir_slice = cxl_p1n_read(ctx->afu, CXL_PSL_FIR_SLICE_An); + afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An); + + dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1); + dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2); + if (ctx->afu->adapter->native->sl_ops->register_serr_irq) { + serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); + cxl_afu_decode_psl_serr(ctx->afu, serr); + } + dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); + dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); +} + +static irqreturn_t native_handle_psl_slice_error(struct cxl_context *ctx, + u64 dsisr, u64 errstat) +{ + + dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat); + + if (ctx->afu->adapter->native->sl_ops->psl_irq_dump_registers) + ctx->afu->adapter->native->sl_ops->psl_irq_dump_registers(ctx); + + if (ctx->afu->adapter->native->sl_ops->debugfs_stop_trace) { + dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n"); + ctx->afu->adapter->native->sl_ops->debugfs_stop_trace(ctx->afu->adapter); + } + + return cxl_ops->ack_irq(ctx, 0, errstat); +} + +static bool cxl_is_translation_fault(struct cxl_afu *afu, u64 dsisr) +{ + if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_TRANS)) + return true; + + if ((cxl_is_power9()) && (dsisr & CXL_PSL9_DSISR_An_TF)) + return true; + + return false; +} + +irqreturn_t cxl_fail_irq_psl(struct cxl_afu *afu, struct cxl_irq_info *irq_info) +{ + if (cxl_is_translation_fault(afu, irq_info->dsisr)) + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); + else + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); + + return IRQ_HANDLED; +} + +static irqreturn_t native_irq_multiplexed(int irq, void *data) +{ + struct cxl_afu *afu = data; + struct cxl_context *ctx; + struct cxl_irq_info irq_info; + u64 phreg = cxl_p2n_read(afu, CXL_PSL_PEHandle_An); + int ph, ret = IRQ_HANDLED, res; + + /* check if eeh kicked in while the interrupt was in flight */ + if (unlikely(phreg == ~0ULL)) { + dev_warn(&afu->dev, + "Ignoring slice interrupt(%d) due to fenced card", + irq); + return IRQ_HANDLED; + } + /* Mask the pe-handle from register value */ + ph = phreg & 0xffff; + if ((res = native_get_irq_info(afu, &irq_info))) { + WARN(1, "Unable to get CXL IRQ Info: %i\n", res); + if (afu->adapter->native->sl_ops->fail_irq) + return afu->adapter->native->sl_ops->fail_irq(afu, &irq_info); + return ret; + } + + rcu_read_lock(); + ctx = idr_find(&afu->contexts_idr, ph); + if (ctx) { + if (afu->adapter->native->sl_ops->handle_interrupt) + ret = afu->adapter->native->sl_ops->handle_interrupt(irq, ctx, &irq_info); + rcu_read_unlock(); + return ret; + } + rcu_read_unlock(); + + WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %016llx DAR" + " %016llx\n(Possible AFU HW issue - was a term/remove acked" + " with outstanding transactions?)\n", ph, irq_info.dsisr, + irq_info.dar); + if (afu->adapter->native->sl_ops->fail_irq) + ret = afu->adapter->native->sl_ops->fail_irq(afu, &irq_info); + return ret; +} + +static void native_irq_wait(struct cxl_context *ctx) +{ + u64 dsisr; + int timeout = 1000; + int ph; + + /* + * Wait until no further interrupts are presented by the PSL + * for this context. + */ + while (timeout--) { + ph = cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) & 0xffff; + if (ph != ctx->pe) + return; + dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An); + if (cxl_is_power8() && + ((dsisr & CXL_PSL_DSISR_PENDING) == 0)) + return; + if (cxl_is_power9() && + ((dsisr & CXL_PSL9_DSISR_PENDING) == 0)) + return; + /* + * We are waiting for the workqueue to process our + * irq, so need to let that run here. + */ + msleep(1); + } + + dev_warn(&ctx->afu->dev, "WARNING: waiting on DSI for PE %i" + " DSISR %016llx!\n", ph, dsisr); + return; +} + +static irqreturn_t native_slice_irq_err(int irq, void *data) +{ + struct cxl_afu *afu = data; + u64 errstat, serr, afu_error, dsisr; + u64 fir_slice, afu_debug, irq_mask; + + /* + * slice err interrupt is only used with full PSL (no XSL) + */ + serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); + errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); + afu_error = cxl_p2n_read(afu, CXL_AFU_ERR_An); + dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + cxl_afu_decode_psl_serr(afu, serr); + + if (cxl_is_power8()) { + fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An); + afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An); + dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); + dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); + } + dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%016llx\n", errstat); + dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error); + dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr); + + /* mask off the IRQ so it won't retrigger until the AFU is reset */ + irq_mask = (serr & CXL_PSL_SERR_An_IRQS) >> 32; + serr |= irq_mask; + cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); + dev_info(&afu->dev, "Further such interrupts will be masked until the AFU is reset\n"); + + return IRQ_HANDLED; +} + +void cxl_native_err_irq_dump_regs_psl9(struct cxl *adapter) +{ + u64 fir1; + + fir1 = cxl_p1_read(adapter, CXL_PSL9_FIR1); + dev_crit(&adapter->dev, "PSL_FIR: 0x%016llx\n", fir1); +} + +void cxl_native_err_irq_dump_regs_psl8(struct cxl *adapter) +{ + u64 fir1, fir2; + + fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1); + fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2); + dev_crit(&adapter->dev, + "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", + fir1, fir2); +} + +static irqreturn_t native_irq_err(int irq, void *data) +{ + struct cxl *adapter = data; + u64 err_ivte; + + WARN(1, "CXL ERROR interrupt %i\n", irq); + + err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE); + dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%016llx\n", err_ivte); + + if (adapter->native->sl_ops->debugfs_stop_trace) { + dev_crit(&adapter->dev, "STOPPING CXL TRACE\n"); + adapter->native->sl_ops->debugfs_stop_trace(adapter); + } + + if (adapter->native->sl_ops->err_irq_dump_registers) + adapter->native->sl_ops->err_irq_dump_registers(adapter); + + return IRQ_HANDLED; +} + +int cxl_native_register_psl_err_irq(struct cxl *adapter) +{ + int rc; + + adapter->irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&adapter->dev)); + if (!adapter->irq_name) + return -ENOMEM; + + if ((rc = cxl_register_one_irq(adapter, native_irq_err, adapter, + &adapter->native->err_hwirq, + &adapter->native->err_virq, + adapter->irq_name))) { + kfree(adapter->irq_name); + adapter->irq_name = NULL; + return rc; + } + + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, adapter->native->err_hwirq & 0xffff); + + return 0; +} + +void cxl_native_release_psl_err_irq(struct cxl *adapter) +{ + if (adapter->native->err_virq == 0 || + adapter->native->err_virq != + irq_find_mapping(NULL, adapter->native->err_hwirq)) + return; + + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); + cxl_unmap_irq(adapter->native->err_virq, adapter); + cxl_ops->release_one_irq(adapter, adapter->native->err_hwirq); + kfree(adapter->irq_name); + adapter->native->err_virq = 0; +} + +int cxl_native_register_serr_irq(struct cxl_afu *afu) +{ + u64 serr; + int rc; + + afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&afu->dev)); + if (!afu->err_irq_name) + return -ENOMEM; + + if ((rc = cxl_register_one_irq(afu->adapter, native_slice_irq_err, afu, + &afu->serr_hwirq, + &afu->serr_virq, afu->err_irq_name))) { + kfree(afu->err_irq_name); + afu->err_irq_name = NULL; + return rc; + } + + serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); + if (cxl_is_power8()) + serr = (serr & 0x00ffffffffff0000ULL) | (afu->serr_hwirq & 0xffff); + if (cxl_is_power9()) { + /* + * By default, all errors are masked. So don't set all masks. + * Slice errors will be transfered. + */ + serr = (serr & ~0xff0000007fffffffULL) | (afu->serr_hwirq & 0xffff); + } + cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); + + return 0; +} + +void cxl_native_release_serr_irq(struct cxl_afu *afu) +{ + if (afu->serr_virq == 0 || + afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq)) + return; + + cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000); + cxl_unmap_irq(afu->serr_virq, afu); + cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq); + kfree(afu->err_irq_name); + afu->serr_virq = 0; +} + +int cxl_native_register_psl_irq(struct cxl_afu *afu) +{ + int rc; + + afu->psl_irq_name = kasprintf(GFP_KERNEL, "cxl-%s", + dev_name(&afu->dev)); + if (!afu->psl_irq_name) + return -ENOMEM; + + if ((rc = cxl_register_one_irq(afu->adapter, native_irq_multiplexed, + afu, &afu->native->psl_hwirq, &afu->native->psl_virq, + afu->psl_irq_name))) { + kfree(afu->psl_irq_name); + afu->psl_irq_name = NULL; + } + return rc; +} + +void cxl_native_release_psl_irq(struct cxl_afu *afu) +{ + if (afu->native->psl_virq == 0 || + afu->native->psl_virq != + irq_find_mapping(NULL, afu->native->psl_hwirq)) + return; + + cxl_unmap_irq(afu->native->psl_virq, afu); + cxl_ops->release_one_irq(afu->adapter, afu->native->psl_hwirq); + kfree(afu->psl_irq_name); + afu->native->psl_virq = 0; +} + +static void recover_psl_err(struct cxl_afu *afu, u64 errstat) +{ + u64 dsisr; + + pr_devel("RECOVERING FROM PSL ERROR... (0x%016llx)\n", errstat); + + /* Clear PSL_DSISR[PE] */ + dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + cxl_p2n_write(afu, CXL_PSL_DSISR_An, dsisr & ~CXL_PSL_DSISR_An_PE); + + /* Write 1s to clear error status bits */ + cxl_p2n_write(afu, CXL_PSL_ErrStat_An, errstat); +} + +static int native_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask) +{ + trace_cxl_psl_irq_ack(ctx, tfc); + if (tfc) + cxl_p2n_write(ctx->afu, CXL_PSL_TFC_An, tfc); + if (psl_reset_mask) + recover_psl_err(ctx->afu, psl_reset_mask); + + return 0; +} + +int cxl_check_error(struct cxl_afu *afu) +{ + return (cxl_p1n_read(afu, CXL_PSL_SCNTL_An) == ~0ULL); +} + +static bool native_support_attributes(const char *attr_name, + enum cxl_attrs type) +{ + return true; +} + +static int native_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off, u64 *out) +{ + if (unlikely(!cxl_ops->link_ok(afu->adapter, afu))) + return -EIO; + if (unlikely(off >= afu->crs_len)) + return -ERANGE; + *out = in_le64(afu->native->afu_desc_mmio + afu->crs_offset + + (cr * afu->crs_len) + off); + return 0; +} + +static int native_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off, u32 *out) +{ + if (unlikely(!cxl_ops->link_ok(afu->adapter, afu))) + return -EIO; + if (unlikely(off >= afu->crs_len)) + return -ERANGE; + *out = in_le32(afu->native->afu_desc_mmio + afu->crs_offset + + (cr * afu->crs_len) + off); + return 0; +} + +static int native_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off, u16 *out) +{ + u64 aligned_off = off & ~0x3L; + u32 val; + int rc; + + rc = native_afu_cr_read32(afu, cr, aligned_off, &val); + if (!rc) + *out = (val >> ((off & 0x3) * 8)) & 0xffff; + return rc; +} + +static int native_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off, u8 *out) +{ + u64 aligned_off = off & ~0x3L; + u32 val; + int rc; + + rc = native_afu_cr_read32(afu, cr, aligned_off, &val); + if (!rc) + *out = (val >> ((off & 0x3) * 8)) & 0xff; + return rc; +} + +static int native_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in) +{ + if (unlikely(!cxl_ops->link_ok(afu->adapter, afu))) + return -EIO; + if (unlikely(off >= afu->crs_len)) + return -ERANGE; + out_le32(afu->native->afu_desc_mmio + afu->crs_offset + + (cr * afu->crs_len) + off, in); + return 0; +} + +static int native_afu_cr_write16(struct cxl_afu *afu, int cr, u64 off, u16 in) +{ + u64 aligned_off = off & ~0x3L; + u32 val32, mask, shift; + int rc; + + rc = native_afu_cr_read32(afu, cr, aligned_off, &val32); + if (rc) + return rc; + shift = (off & 0x3) * 8; + WARN_ON(shift == 24); + mask = 0xffff << shift; + val32 = (val32 & ~mask) | (in << shift); + + rc = native_afu_cr_write32(afu, cr, aligned_off, val32); + return rc; +} + +static int native_afu_cr_write8(struct cxl_afu *afu, int cr, u64 off, u8 in) +{ + u64 aligned_off = off & ~0x3L; + u32 val32, mask, shift; + int rc; + + rc = native_afu_cr_read32(afu, cr, aligned_off, &val32); + if (rc) + return rc; + shift = (off & 0x3) * 8; + mask = 0xff << shift; + val32 = (val32 & ~mask) | (in << shift); + + rc = native_afu_cr_write32(afu, cr, aligned_off, val32); + return rc; +} + +const struct cxl_backend_ops cxl_native_ops = { + .module = THIS_MODULE, + .adapter_reset = cxl_pci_reset, + .alloc_one_irq = cxl_pci_alloc_one_irq, + .release_one_irq = cxl_pci_release_one_irq, + .alloc_irq_ranges = cxl_pci_alloc_irq_ranges, + .release_irq_ranges = cxl_pci_release_irq_ranges, + .setup_irq = cxl_pci_setup_irq, + .handle_psl_slice_error = native_handle_psl_slice_error, + .psl_interrupt = NULL, + .ack_irq = native_ack_irq, + .irq_wait = native_irq_wait, + .attach_process = native_attach_process, + .detach_process = native_detach_process, + .update_ivtes = native_update_ivtes, + .support_attributes = native_support_attributes, + .link_ok = cxl_adapter_link_ok, + .release_afu = cxl_pci_release_afu, + .afu_read_err_buffer = cxl_pci_afu_read_err_buffer, + .afu_check_and_enable = native_afu_check_and_enable, + .afu_activate_mode = native_afu_activate_mode, + .afu_deactivate_mode = native_afu_deactivate_mode, + .afu_reset = native_afu_reset, + .afu_cr_read8 = native_afu_cr_read8, + .afu_cr_read16 = native_afu_cr_read16, + .afu_cr_read32 = native_afu_cr_read32, + .afu_cr_read64 = native_afu_cr_read64, + .afu_cr_write8 = native_afu_cr_write8, + .afu_cr_write16 = native_afu_cr_write16, + .afu_cr_write32 = native_afu_cr_write32, + .read_adapter_vpd = cxl_pci_read_adapter_vpd, +}; diff --git a/drivers/misc/cxl/of.c b/drivers/misc/cxl/of.c new file mode 100644 index 000000000..aff181cd0 --- /dev/null +++ b/drivers/misc/cxl/of.c @@ -0,0 +1,511 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include "cxl.h" + + +static const __be32 *read_prop_string(const struct device_node *np, + const char *prop_name) +{ + const __be32 *prop; + + prop = of_get_property(np, prop_name, NULL); + if (cxl_verbose && prop) + pr_info("%s: %s\n", prop_name, (char *) prop); + return prop; +} + +static const __be32 *read_prop_dword(const struct device_node *np, + const char *prop_name, u32 *val) +{ + const __be32 *prop; + + prop = of_get_property(np, prop_name, NULL); + if (prop) + *val = be32_to_cpu(prop[0]); + if (cxl_verbose && prop) + pr_info("%s: %#x (%u)\n", prop_name, *val, *val); + return prop; +} + +static const __be64 *read_prop64_dword(const struct device_node *np, + const char *prop_name, u64 *val) +{ + const __be64 *prop; + + prop = of_get_property(np, prop_name, NULL); + if (prop) + *val = be64_to_cpu(prop[0]); + if (cxl_verbose && prop) + pr_info("%s: %#llx (%llu)\n", prop_name, *val, *val); + return prop; +} + + +static int read_handle(struct device_node *np, u64 *handle) +{ + const __be32 *prop; + u64 size; + + /* Get address and size of the node */ + prop = of_get_address(np, 0, &size, NULL); + if (size) + return -EINVAL; + + /* Helper to read a big number; size is in cells (not bytes) */ + *handle = of_read_number(prop, of_n_addr_cells(np)); + return 0; +} + +static int read_phys_addr(struct device_node *np, char *prop_name, + struct cxl_afu *afu) +{ + int i, len, entry_size, naddr, nsize, type; + u64 addr, size; + const __be32 *prop; + + naddr = of_n_addr_cells(np); + nsize = of_n_size_cells(np); + + prop = of_get_property(np, prop_name, &len); + if (prop) { + entry_size = naddr + nsize; + for (i = 0; i < (len / 4); i += entry_size, prop += entry_size) { + type = be32_to_cpu(prop[0]); + addr = of_read_number(prop, naddr); + size = of_read_number(&prop[naddr], nsize); + switch (type) { + case 0: /* unit address */ + afu->guest->handle = addr; + break; + case 1: /* p2 area */ + afu->guest->p2n_phys += addr; + afu->guest->p2n_size = size; + break; + case 2: /* problem state area */ + afu->psn_phys += addr; + afu->adapter->ps_size = size; + break; + default: + pr_err("Invalid address type %d found in %s property of AFU\n", + type, prop_name); + return -EINVAL; + } + if (cxl_verbose) + pr_info("%s: %#x %#llx (size %#llx)\n", + prop_name, type, addr, size); + } + } + return 0; +} + +static int read_vpd(struct cxl *adapter, struct cxl_afu *afu) +{ + char vpd[256]; + int rc; + size_t len = sizeof(vpd); + + memset(vpd, 0, len); + + if (adapter) + rc = cxl_guest_read_adapter_vpd(adapter, vpd, len); + else + rc = cxl_guest_read_afu_vpd(afu, vpd, len); + + if (rc > 0) { + cxl_dump_debug_buffer(vpd, rc); + rc = 0; + } + return rc; +} + +int cxl_of_read_afu_handle(struct cxl_afu *afu, struct device_node *afu_np) +{ + if (read_handle(afu_np, &afu->guest->handle)) + return -EINVAL; + pr_devel("AFU handle: 0x%.16llx\n", afu->guest->handle); + + return 0; +} + +int cxl_of_read_afu_properties(struct cxl_afu *afu, struct device_node *np) +{ + int i, len, rc; + char *p; + const __be32 *prop; + u16 device_id, vendor_id; + u32 val = 0, class_code; + + /* Properties are read in the same order as listed in PAPR */ + + if (cxl_verbose) { + pr_info("Dump of the 'ibm,coherent-platform-function' node properties:\n"); + + prop = of_get_property(np, "compatible", &len); + i = 0; + while (i < len) { + p = (char *) prop + i; + pr_info("compatible: %s\n", p); + i += strlen(p) + 1; + } + read_prop_string(np, "name"); + } + + rc = read_phys_addr(np, "reg", afu); + if (rc) + return rc; + + rc = read_phys_addr(np, "assigned-addresses", afu); + if (rc) + return rc; + + if (afu->psn_phys == 0) + afu->psa = false; + else + afu->psa = true; + + if (cxl_verbose) { + read_prop_string(np, "ibm,loc-code"); + read_prop_string(np, "device_type"); + } + + read_prop_dword(np, "ibm,#processes", &afu->max_procs_virtualised); + + if (cxl_verbose) { + read_prop_dword(np, "ibm,scratchpad-size", &val); + read_prop_dword(np, "ibm,programmable", &val); + read_prop_string(np, "ibm,phandle"); + read_vpd(NULL, afu); + } + + read_prop_dword(np, "ibm,max-ints-per-process", &afu->guest->max_ints); + afu->irqs_max = afu->guest->max_ints; + + prop = read_prop_dword(np, "ibm,min-ints-per-process", &afu->pp_irqs); + if (prop) { + /* One extra interrupt for the PSL interrupt is already + * included. Remove it now to keep only AFU interrupts and + * match the native case. + */ + afu->pp_irqs--; + } + + if (cxl_verbose) { + read_prop_dword(np, "ibm,max-ints", &val); + read_prop_dword(np, "ibm,vpd-size", &val); + } + + read_prop64_dword(np, "ibm,error-buffer-size", &afu->eb_len); + afu->eb_offset = 0; + + if (cxl_verbose) + read_prop_dword(np, "ibm,config-record-type", &val); + + read_prop64_dword(np, "ibm,config-record-size", &afu->crs_len); + afu->crs_offset = 0; + + read_prop_dword(np, "ibm,#config-records", &afu->crs_num); + + if (cxl_verbose) { + for (i = 0; i < afu->crs_num; i++) { + rc = cxl_ops->afu_cr_read16(afu, i, PCI_DEVICE_ID, + &device_id); + if (!rc) + pr_info("record %d - device-id: %#x\n", + i, device_id); + rc = cxl_ops->afu_cr_read16(afu, i, PCI_VENDOR_ID, + &vendor_id); + if (!rc) + pr_info("record %d - vendor-id: %#x\n", + i, vendor_id); + rc = cxl_ops->afu_cr_read32(afu, i, PCI_CLASS_REVISION, + &class_code); + if (!rc) { + class_code >>= 8; + pr_info("record %d - class-code: %#x\n", + i, class_code); + } + } + + read_prop_dword(np, "ibm,function-number", &val); + read_prop_dword(np, "ibm,privileged-function", &val); + read_prop_dword(np, "vendor-id", &val); + read_prop_dword(np, "device-id", &val); + read_prop_dword(np, "revision-id", &val); + read_prop_dword(np, "class-code", &val); + read_prop_dword(np, "subsystem-vendor-id", &val); + read_prop_dword(np, "subsystem-id", &val); + } + /* + * if "ibm,process-mmio" doesn't exist then per-process mmio is + * not supported + */ + val = 0; + prop = read_prop_dword(np, "ibm,process-mmio", &val); + if (prop && val == 1) + afu->pp_psa = true; + else + afu->pp_psa = false; + + if (cxl_verbose) { + read_prop_dword(np, "ibm,supports-aur", &val); + read_prop_dword(np, "ibm,supports-csrp", &val); + read_prop_dword(np, "ibm,supports-prr", &val); + } + + prop = read_prop_dword(np, "ibm,function-error-interrupt", &val); + if (prop) + afu->serr_hwirq = val; + + pr_devel("AFU handle: %#llx\n", afu->guest->handle); + pr_devel("p2n_phys: %#llx (size %#llx)\n", + afu->guest->p2n_phys, afu->guest->p2n_size); + pr_devel("psn_phys: %#llx (size %#llx)\n", + afu->psn_phys, afu->adapter->ps_size); + pr_devel("Max number of processes virtualised=%i\n", + afu->max_procs_virtualised); + pr_devel("Per-process irqs min=%i, max=%i\n", afu->pp_irqs, + afu->irqs_max); + pr_devel("Slice error interrupt=%#lx\n", afu->serr_hwirq); + + return 0; +} + +static int read_adapter_irq_config(struct cxl *adapter, struct device_node *np) +{ + const __be32 *ranges; + int len, nranges, i; + struct irq_avail *cur; + + ranges = of_get_property(np, "interrupt-ranges", &len); + if (ranges == NULL || len < (2 * sizeof(int))) + return -EINVAL; + + /* + * encoded array of two cells per entry, each cell encoded as + * with encode-int + */ + nranges = len / (2 * sizeof(int)); + if (nranges == 0 || (nranges * 2 * sizeof(int)) != len) + return -EINVAL; + + adapter->guest->irq_avail = kcalloc(nranges, sizeof(struct irq_avail), + GFP_KERNEL); + if (adapter->guest->irq_avail == NULL) + return -ENOMEM; + + adapter->guest->irq_base_offset = be32_to_cpu(ranges[0]); + for (i = 0; i < nranges; i++) { + cur = &adapter->guest->irq_avail[i]; + cur->offset = be32_to_cpu(ranges[i * 2]); + cur->range = be32_to_cpu(ranges[i * 2 + 1]); + cur->bitmap = kcalloc(BITS_TO_LONGS(cur->range), + sizeof(*cur->bitmap), GFP_KERNEL); + if (cur->bitmap == NULL) + goto err; + if (cur->offset < adapter->guest->irq_base_offset) + adapter->guest->irq_base_offset = cur->offset; + if (cxl_verbose) + pr_info("available IRQ range: %#lx-%#lx (%lu)\n", + cur->offset, cur->offset + cur->range - 1, + cur->range); + } + adapter->guest->irq_nranges = nranges; + spin_lock_init(&adapter->guest->irq_alloc_lock); + + return 0; +err: + for (i--; i >= 0; i--) { + cur = &adapter->guest->irq_avail[i]; + kfree(cur->bitmap); + } + kfree(adapter->guest->irq_avail); + adapter->guest->irq_avail = NULL; + return -ENOMEM; +} + +int cxl_of_read_adapter_handle(struct cxl *adapter, struct device_node *np) +{ + if (read_handle(np, &adapter->guest->handle)) + return -EINVAL; + pr_devel("Adapter handle: 0x%.16llx\n", adapter->guest->handle); + + return 0; +} + +int cxl_of_read_adapter_properties(struct cxl *adapter, struct device_node *np) +{ + int rc, len, naddr, i; + char *p; + const __be32 *prop; + u32 val = 0; + + /* Properties are read in the same order as listed in PAPR */ + + naddr = of_n_addr_cells(np); + + if (cxl_verbose) { + pr_info("Dump of the 'ibm,coherent-platform-facility' node properties:\n"); + + read_prop_dword(np, "#address-cells", &val); + read_prop_dword(np, "#size-cells", &val); + + prop = of_get_property(np, "compatible", &len); + i = 0; + while (i < len) { + p = (char *) prop + i; + pr_info("compatible: %s\n", p); + i += strlen(p) + 1; + } + read_prop_string(np, "name"); + read_prop_string(np, "model"); + + prop = of_get_property(np, "reg", NULL); + if (prop) { + pr_info("reg: addr:%#llx size:%#x\n", + of_read_number(prop, naddr), + be32_to_cpu(prop[naddr])); + } + + read_prop_string(np, "ibm,loc-code"); + } + + if ((rc = read_adapter_irq_config(adapter, np))) + return rc; + + if (cxl_verbose) { + read_prop_string(np, "device_type"); + read_prop_string(np, "ibm,phandle"); + } + + prop = read_prop_dword(np, "ibm,caia-version", &val); + if (prop) { + adapter->caia_major = (val & 0xFF00) >> 8; + adapter->caia_minor = val & 0xFF; + } + + prop = read_prop_dword(np, "ibm,psl-revision", &val); + if (prop) + adapter->psl_rev = val; + + prop = read_prop_string(np, "status"); + if (prop) { + adapter->guest->status = kasprintf(GFP_KERNEL, "%s", (char *) prop); + if (adapter->guest->status == NULL) + return -ENOMEM; + } + + prop = read_prop_dword(np, "vendor-id", &val); + if (prop) + adapter->guest->vendor = val; + + prop = read_prop_dword(np, "device-id", &val); + if (prop) + adapter->guest->device = val; + + if (cxl_verbose) { + read_prop_dword(np, "ibm,privileged-facility", &val); + read_prop_dword(np, "revision-id", &val); + read_prop_dword(np, "class-code", &val); + } + + prop = read_prop_dword(np, "subsystem-vendor-id", &val); + if (prop) + adapter->guest->subsystem_vendor = val; + + prop = read_prop_dword(np, "subsystem-id", &val); + if (prop) + adapter->guest->subsystem = val; + + if (cxl_verbose) + read_vpd(adapter, NULL); + + return 0; +} + +static int cxl_of_remove(struct platform_device *pdev) +{ + struct cxl *adapter; + int afu; + + adapter = dev_get_drvdata(&pdev->dev); + for (afu = 0; afu < adapter->slices; afu++) + cxl_guest_remove_afu(adapter->afu[afu]); + + cxl_guest_remove_adapter(adapter); + return 0; +} + +static void cxl_of_shutdown(struct platform_device *pdev) +{ + cxl_of_remove(pdev); +} + +int cxl_of_probe(struct platform_device *pdev) +{ + struct device_node *np = NULL; + struct device_node *afu_np = NULL; + struct cxl *adapter = NULL; + int ret; + int slice = 0, slice_ok = 0; + + pr_devel("in %s\n", __func__); + + np = pdev->dev.of_node; + if (np == NULL) + return -ENODEV; + + /* init adapter */ + adapter = cxl_guest_init_adapter(np, pdev); + if (IS_ERR(adapter)) { + dev_err(&pdev->dev, "guest_init_adapter failed: %li\n", PTR_ERR(adapter)); + return PTR_ERR(adapter); + } + + /* init afu */ + for_each_child_of_node(np, afu_np) { + if ((ret = cxl_guest_init_afu(adapter, slice, afu_np))) + dev_err(&pdev->dev, "AFU %i failed to initialise: %i\n", + slice, ret); + else + slice_ok++; + slice++; + } + + if (slice_ok == 0) { + dev_info(&pdev->dev, "No active AFU"); + adapter->slices = 0; + } + + return 0; +} + +static const struct of_device_id cxl_of_match[] = { + { .compatible = "ibm,coherent-platform-facility",}, + {}, +}; +MODULE_DEVICE_TABLE(of, cxl_of_match); + +struct platform_driver cxl_of_driver = { + .driver = { + .name = "cxl_of", + .of_match_table = cxl_of_match, + .owner = THIS_MODULE + }, + .probe = cxl_of_probe, + .remove = cxl_of_remove, + .shutdown = cxl_of_shutdown, +}; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c new file mode 100644 index 000000000..b9d3c87e9 --- /dev/null +++ b/drivers/misc/cxl/pci.c @@ -0,0 +1,2104 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cxl.h" +#include + + +#define CXL_PCI_VSEC_ID 0x1280 +#define CXL_VSEC_MIN_SIZE 0x80 + +#define CXL_READ_VSEC_LENGTH(dev, vsec, dest) \ + { \ + pci_read_config_word(dev, vsec + 0x6, dest); \ + *dest >>= 4; \ + } +#define CXL_READ_VSEC_NAFUS(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0x8, dest) + +#define CXL_READ_VSEC_STATUS(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0x9, dest) +#define CXL_STATUS_SECOND_PORT 0x80 +#define CXL_STATUS_MSI_X_FULL 0x40 +#define CXL_STATUS_MSI_X_SINGLE 0x20 +#define CXL_STATUS_FLASH_RW 0x08 +#define CXL_STATUS_FLASH_RO 0x04 +#define CXL_STATUS_LOADABLE_AFU 0x02 +#define CXL_STATUS_LOADABLE_PSL 0x01 +/* If we see these features we won't try to use the card */ +#define CXL_UNSUPPORTED_FEATURES \ + (CXL_STATUS_MSI_X_FULL | CXL_STATUS_MSI_X_SINGLE) + +#define CXL_READ_VSEC_MODE_CONTROL(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0xa, dest) +#define CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val) \ + pci_write_config_byte(dev, vsec + 0xa, val) +#define CXL_VSEC_PROTOCOL_MASK 0xe0 +#define CXL_VSEC_PROTOCOL_1024TB 0x80 +#define CXL_VSEC_PROTOCOL_512TB 0x40 +#define CXL_VSEC_PROTOCOL_256TB 0x20 /* Power 8/9 uses this */ +#define CXL_VSEC_PROTOCOL_ENABLE 0x01 + +#define CXL_READ_VSEC_PSL_REVISION(dev, vsec, dest) \ + pci_read_config_word(dev, vsec + 0xc, dest) +#define CXL_READ_VSEC_CAIA_MINOR(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0xe, dest) +#define CXL_READ_VSEC_CAIA_MAJOR(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0xf, dest) +#define CXL_READ_VSEC_BASE_IMAGE(dev, vsec, dest) \ + pci_read_config_word(dev, vsec + 0x10, dest) + +#define CXL_READ_VSEC_IMAGE_STATE(dev, vsec, dest) \ + pci_read_config_byte(dev, vsec + 0x13, dest) +#define CXL_WRITE_VSEC_IMAGE_STATE(dev, vsec, val) \ + pci_write_config_byte(dev, vsec + 0x13, val) +#define CXL_VSEC_USER_IMAGE_LOADED 0x80 /* RO */ +#define CXL_VSEC_PERST_LOADS_IMAGE 0x20 /* RW */ +#define CXL_VSEC_PERST_SELECT_USER 0x10 /* RW */ + +#define CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, dest) \ + pci_read_config_dword(dev, vsec + 0x20, dest) +#define CXL_READ_VSEC_AFU_DESC_SIZE(dev, vsec, dest) \ + pci_read_config_dword(dev, vsec + 0x24, dest) +#define CXL_READ_VSEC_PS_OFF(dev, vsec, dest) \ + pci_read_config_dword(dev, vsec + 0x28, dest) +#define CXL_READ_VSEC_PS_SIZE(dev, vsec, dest) \ + pci_read_config_dword(dev, vsec + 0x2c, dest) + + +/* This works a little different than the p1/p2 register accesses to make it + * easier to pull out individual fields */ +#define AFUD_READ(afu, off) in_be64(afu->native->afu_desc_mmio + off) +#define AFUD_READ_LE(afu, off) in_le64(afu->native->afu_desc_mmio + off) +#define EXTRACT_PPC_BIT(val, bit) (!!(val & PPC_BIT(bit))) +#define EXTRACT_PPC_BITS(val, bs, be) ((val & PPC_BITMASK(bs, be)) >> PPC_BITLSHIFT(be)) + +#define AFUD_READ_INFO(afu) AFUD_READ(afu, 0x0) +#define AFUD_NUM_INTS_PER_PROC(val) EXTRACT_PPC_BITS(val, 0, 15) +#define AFUD_NUM_PROCS(val) EXTRACT_PPC_BITS(val, 16, 31) +#define AFUD_NUM_CRS(val) EXTRACT_PPC_BITS(val, 32, 47) +#define AFUD_MULTIMODE(val) EXTRACT_PPC_BIT(val, 48) +#define AFUD_PUSH_BLOCK_TRANSFER(val) EXTRACT_PPC_BIT(val, 55) +#define AFUD_DEDICATED_PROCESS(val) EXTRACT_PPC_BIT(val, 59) +#define AFUD_AFU_DIRECTED(val) EXTRACT_PPC_BIT(val, 61) +#define AFUD_TIME_SLICED(val) EXTRACT_PPC_BIT(val, 63) +#define AFUD_READ_CR(afu) AFUD_READ(afu, 0x20) +#define AFUD_CR_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) +#define AFUD_READ_CR_OFF(afu) AFUD_READ(afu, 0x28) +#define AFUD_READ_PPPSA(afu) AFUD_READ(afu, 0x30) +#define AFUD_PPPSA_PP(val) EXTRACT_PPC_BIT(val, 6) +#define AFUD_PPPSA_PSA(val) EXTRACT_PPC_BIT(val, 7) +#define AFUD_PPPSA_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) +#define AFUD_READ_PPPSA_OFF(afu) AFUD_READ(afu, 0x38) +#define AFUD_READ_EB(afu) AFUD_READ(afu, 0x40) +#define AFUD_EB_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) +#define AFUD_READ_EB_OFF(afu) AFUD_READ(afu, 0x48) + +static const struct pci_device_id cxl_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), }, + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), }, + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x04cf), }, + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0601), }, + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0623), }, + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0628), }, + { } +}; +MODULE_DEVICE_TABLE(pci, cxl_pci_tbl); + + +/* + * Mostly using these wrappers to avoid confusion: + * priv 1 is BAR2, while priv 2 is BAR0 + */ +static inline resource_size_t p1_base(struct pci_dev *dev) +{ + return pci_resource_start(dev, 2); +} + +static inline resource_size_t p1_size(struct pci_dev *dev) +{ + return pci_resource_len(dev, 2); +} + +static inline resource_size_t p2_base(struct pci_dev *dev) +{ + return pci_resource_start(dev, 0); +} + +static inline resource_size_t p2_size(struct pci_dev *dev) +{ + return pci_resource_len(dev, 0); +} + +static int find_cxl_vsec(struct pci_dev *dev) +{ + int vsec = 0; + u16 val; + + while ((vsec = pci_find_next_ext_capability(dev, vsec, PCI_EXT_CAP_ID_VNDR))) { + pci_read_config_word(dev, vsec + 0x4, &val); + if (val == CXL_PCI_VSEC_ID) + return vsec; + } + return 0; + +} + +static void dump_cxl_config_space(struct pci_dev *dev) +{ + int vsec; + u32 val; + + dev_info(&dev->dev, "dump_cxl_config_space\n"); + + pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, &val); + dev_info(&dev->dev, "BAR0: %#.8x\n", val); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &val); + dev_info(&dev->dev, "BAR1: %#.8x\n", val); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_2, &val); + dev_info(&dev->dev, "BAR2: %#.8x\n", val); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_3, &val); + dev_info(&dev->dev, "BAR3: %#.8x\n", val); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_4, &val); + dev_info(&dev->dev, "BAR4: %#.8x\n", val); + pci_read_config_dword(dev, PCI_BASE_ADDRESS_5, &val); + dev_info(&dev->dev, "BAR5: %#.8x\n", val); + + dev_info(&dev->dev, "p1 regs: %#llx, len: %#llx\n", + p1_base(dev), p1_size(dev)); + dev_info(&dev->dev, "p2 regs: %#llx, len: %#llx\n", + p2_base(dev), p2_size(dev)); + dev_info(&dev->dev, "BAR 4/5: %#llx, len: %#llx\n", + pci_resource_start(dev, 4), pci_resource_len(dev, 4)); + + if (!(vsec = find_cxl_vsec(dev))) + return; + +#define show_reg(name, what) \ + dev_info(&dev->dev, "cxl vsec: %30s: %#x\n", name, what) + + pci_read_config_dword(dev, vsec + 0x0, &val); + show_reg("Cap ID", (val >> 0) & 0xffff); + show_reg("Cap Ver", (val >> 16) & 0xf); + show_reg("Next Cap Ptr", (val >> 20) & 0xfff); + pci_read_config_dword(dev, vsec + 0x4, &val); + show_reg("VSEC ID", (val >> 0) & 0xffff); + show_reg("VSEC Rev", (val >> 16) & 0xf); + show_reg("VSEC Length", (val >> 20) & 0xfff); + pci_read_config_dword(dev, vsec + 0x8, &val); + show_reg("Num AFUs", (val >> 0) & 0xff); + show_reg("Status", (val >> 8) & 0xff); + show_reg("Mode Control", (val >> 16) & 0xff); + show_reg("Reserved", (val >> 24) & 0xff); + pci_read_config_dword(dev, vsec + 0xc, &val); + show_reg("PSL Rev", (val >> 0) & 0xffff); + show_reg("CAIA Ver", (val >> 16) & 0xffff); + pci_read_config_dword(dev, vsec + 0x10, &val); + show_reg("Base Image Rev", (val >> 0) & 0xffff); + show_reg("Reserved", (val >> 16) & 0x0fff); + show_reg("Image Control", (val >> 28) & 0x3); + show_reg("Reserved", (val >> 30) & 0x1); + show_reg("Image Loaded", (val >> 31) & 0x1); + + pci_read_config_dword(dev, vsec + 0x14, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x18, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x1c, &val); + show_reg("Reserved", val); + + pci_read_config_dword(dev, vsec + 0x20, &val); + show_reg("AFU Descriptor Offset", val); + pci_read_config_dword(dev, vsec + 0x24, &val); + show_reg("AFU Descriptor Size", val); + pci_read_config_dword(dev, vsec + 0x28, &val); + show_reg("Problem State Offset", val); + pci_read_config_dword(dev, vsec + 0x2c, &val); + show_reg("Problem State Size", val); + + pci_read_config_dword(dev, vsec + 0x30, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x34, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x38, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x3c, &val); + show_reg("Reserved", val); + + pci_read_config_dword(dev, vsec + 0x40, &val); + show_reg("PSL Programming Port", val); + pci_read_config_dword(dev, vsec + 0x44, &val); + show_reg("PSL Programming Control", val); + + pci_read_config_dword(dev, vsec + 0x48, &val); + show_reg("Reserved", val); + pci_read_config_dword(dev, vsec + 0x4c, &val); + show_reg("Reserved", val); + + pci_read_config_dword(dev, vsec + 0x50, &val); + show_reg("Flash Address Register", val); + pci_read_config_dword(dev, vsec + 0x54, &val); + show_reg("Flash Size Register", val); + pci_read_config_dword(dev, vsec + 0x58, &val); + show_reg("Flash Status/Control Register", val); + pci_read_config_dword(dev, vsec + 0x58, &val); + show_reg("Flash Data Port", val); + +#undef show_reg +} + +static void dump_afu_descriptor(struct cxl_afu *afu) +{ + u64 val, afu_cr_num, afu_cr_off, afu_cr_len; + int i; + +#define show_reg(name, what) \ + dev_info(&afu->dev, "afu desc: %30s: %#llx\n", name, what) + + val = AFUD_READ_INFO(afu); + show_reg("num_ints_per_process", AFUD_NUM_INTS_PER_PROC(val)); + show_reg("num_of_processes", AFUD_NUM_PROCS(val)); + show_reg("num_of_afu_CRs", AFUD_NUM_CRS(val)); + show_reg("req_prog_mode", val & 0xffffULL); + afu_cr_num = AFUD_NUM_CRS(val); + + val = AFUD_READ(afu, 0x8); + show_reg("Reserved", val); + val = AFUD_READ(afu, 0x10); + show_reg("Reserved", val); + val = AFUD_READ(afu, 0x18); + show_reg("Reserved", val); + + val = AFUD_READ_CR(afu); + show_reg("Reserved", (val >> (63-7)) & 0xff); + show_reg("AFU_CR_len", AFUD_CR_LEN(val)); + afu_cr_len = AFUD_CR_LEN(val) * 256; + + val = AFUD_READ_CR_OFF(afu); + afu_cr_off = val; + show_reg("AFU_CR_offset", val); + + val = AFUD_READ_PPPSA(afu); + show_reg("PerProcessPSA_control", (val >> (63-7)) & 0xff); + show_reg("PerProcessPSA Length", AFUD_PPPSA_LEN(val)); + + val = AFUD_READ_PPPSA_OFF(afu); + show_reg("PerProcessPSA_offset", val); + + val = AFUD_READ_EB(afu); + show_reg("Reserved", (val >> (63-7)) & 0xff); + show_reg("AFU_EB_len", AFUD_EB_LEN(val)); + + val = AFUD_READ_EB_OFF(afu); + show_reg("AFU_EB_offset", val); + + for (i = 0; i < afu_cr_num; i++) { + val = AFUD_READ_LE(afu, afu_cr_off + i * afu_cr_len); + show_reg("CR Vendor", val & 0xffff); + show_reg("CR Device", (val >> 16) & 0xffff); + } +#undef show_reg +} + +#define P8_CAPP_UNIT0_ID 0xBA +#define P8_CAPP_UNIT1_ID 0XBE +#define P9_CAPP_UNIT0_ID 0xC0 +#define P9_CAPP_UNIT1_ID 0xE0 + +static int get_phb_index(struct device_node *np, u32 *phb_index) +{ + if (of_property_read_u32(np, "ibm,phb-index", phb_index)) + return -ENODEV; + return 0; +} + +static u64 get_capp_unit_id(struct device_node *np, u32 phb_index) +{ + /* + * POWER 8: + * - For chips other than POWER8NVL, we only have CAPP 0, + * irrespective of which PHB is used. + * - For POWER8NVL, assume CAPP 0 is attached to PHB0 and + * CAPP 1 is attached to PHB1. + */ + if (cxl_is_power8()) { + if (!pvr_version_is(PVR_POWER8NVL)) + return P8_CAPP_UNIT0_ID; + + if (phb_index == 0) + return P8_CAPP_UNIT0_ID; + + if (phb_index == 1) + return P8_CAPP_UNIT1_ID; + } + + /* + * POWER 9: + * PEC0 (PHB0). Capp ID = CAPP0 (0b1100_0000) + * PEC1 (PHB1 - PHB2). No capi mode + * PEC2 (PHB3 - PHB4 - PHB5): Capi mode on PHB3 only. Capp ID = CAPP1 (0b1110_0000) + */ + if (cxl_is_power9()) { + if (phb_index == 0) + return P9_CAPP_UNIT0_ID; + + if (phb_index == 3) + return P9_CAPP_UNIT1_ID; + } + + return 0; +} + +int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid, + u32 *phb_index, u64 *capp_unit_id) +{ + int rc; + struct device_node *np; + const __be32 *prop; + + if (!(np = pnv_pci_get_phb_node(dev))) + return -ENODEV; + + while (np && !(prop = of_get_property(np, "ibm,chip-id", NULL))) + np = of_get_next_parent(np); + if (!np) + return -ENODEV; + + *chipid = be32_to_cpup(prop); + + rc = get_phb_index(np, phb_index); + if (rc) { + pr_err("cxl: invalid phb index\n"); + return rc; + } + + *capp_unit_id = get_capp_unit_id(np, *phb_index); + of_node_put(np); + if (!*capp_unit_id) { + pr_err("cxl: No capp unit found for PHB[%lld,%d]. Make sure the adapter is on a capi-compatible slot\n", + *chipid, *phb_index); + return -ENODEV; + } + + return 0; +} + +static DEFINE_MUTEX(indications_mutex); + +static int get_phb_indications(struct pci_dev *dev, u64 *capiind, u64 *asnind, + u64 *nbwind) +{ + static u64 nbw, asn, capi = 0; + struct device_node *np; + const __be32 *prop; + + mutex_lock(&indications_mutex); + if (!capi) { + if (!(np = pnv_pci_get_phb_node(dev))) { + mutex_unlock(&indications_mutex); + return -ENODEV; + } + + prop = of_get_property(np, "ibm,phb-indications", NULL); + if (!prop) { + nbw = 0x0300UL; /* legacy values */ + asn = 0x0400UL; + capi = 0x0200UL; + } else { + nbw = (u64)be32_to_cpu(prop[2]); + asn = (u64)be32_to_cpu(prop[1]); + capi = (u64)be32_to_cpu(prop[0]); + } + of_node_put(np); + } + *capiind = capi; + *asnind = asn; + *nbwind = nbw; + mutex_unlock(&indications_mutex); + return 0; +} + +int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg) +{ + u64 xsl_dsnctl; + u64 capiind, asnind, nbwind; + + /* + * CAPI Identifier bits [0:7] + * bit 61:60 MSI bits --> 0 + * bit 59 TVT selector --> 0 + */ + if (get_phb_indications(dev, &capiind, &asnind, &nbwind)) + return -ENODEV; + + /* + * Tell XSL where to route data to. + * The field chipid should match the PHB CAPI_CMPM register + */ + xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */ + xsl_dsnctl |= (capp_unit_id << (63-15)); + + /* nMMU_ID Defaults to: b’000001001’*/ + xsl_dsnctl |= ((u64)0x09 << (63-28)); + + /* + * Used to identify CAPI packets which should be sorted into + * the Non-Blocking queues by the PHB. This field should match + * the PHB PBL_NBW_CMPM register + * nbwind=0x03, bits [57:58], must include capi indicator. + * Not supported on P9 DD1. + */ + xsl_dsnctl |= (nbwind << (63-55)); + + /* + * Upper 16b address bits of ASB_Notify messages sent to the + * system. Need to match the PHB’s ASN Compare/Mask Register. + * Not supported on P9 DD1. + */ + xsl_dsnctl |= asnind; + + *reg = xsl_dsnctl; + return 0; +} + +static int init_implementation_adapter_regs_psl9(struct cxl *adapter, + struct pci_dev *dev) +{ + u64 xsl_dsnctl, psl_fircntl; + u64 chipid; + u32 phb_index; + u64 capp_unit_id; + u64 psl_debug; + int rc; + + rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); + if (rc) + return rc; + + rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &xsl_dsnctl); + if (rc) + return rc; + + cxl_p1_write(adapter, CXL_XSL9_DSNCTL, xsl_dsnctl); + + /* Set fir_cntl to recommended value for production env */ + psl_fircntl = (0x2ULL << (63-3)); /* ce_report */ + psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */ + psl_fircntl |= 0x1ULL; /* ce_thresh */ + cxl_p1_write(adapter, CXL_PSL9_FIR_CNTL, psl_fircntl); + + /* Setup the PSL to transmit packets on the PCIe before the + * CAPP is enabled. Make sure that CAPP virtual machines are disabled + */ + cxl_p1_write(adapter, CXL_PSL9_DSNDCTL, 0x0001001000012A10ULL); + + /* + * A response to an ASB_Notify request is returned by the + * system as an MMIO write to the address defined in + * the PSL_TNR_ADDR register. + * keep the Reset Value: 0x00020000E0000000 + */ + + /* Enable XSL rty limit */ + cxl_p1_write(adapter, CXL_XSL9_DEF, 0x51F8000000000005ULL); + + /* Change XSL_INV dummy read threshold */ + cxl_p1_write(adapter, CXL_XSL9_INV, 0x0000040007FFC200ULL); + + if (phb_index == 3) { + /* disable machines 31-47 and 20-27 for DMA */ + cxl_p1_write(adapter, CXL_PSL9_APCDEDTYPE, 0x40000FF3FFFF0000ULL); + } + + /* Snoop machines */ + cxl_p1_write(adapter, CXL_PSL9_APCDEDALLOC, 0x800F000200000000ULL); + + /* Enable NORST and DD2 features */ + cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL); + + /* + * Check if PSL has data-cache. We need to flush adapter datacache + * when as its about to be removed. + */ + psl_debug = cxl_p1_read(adapter, CXL_PSL9_DEBUG); + if (psl_debug & CXL_PSL_DEBUG_CDC) { + dev_dbg(&dev->dev, "No data-cache present\n"); + adapter->native->no_data_cache = true; + } + + return 0; +} + +static int init_implementation_adapter_regs_psl8(struct cxl *adapter, struct pci_dev *dev) +{ + u64 psl_dsnctl, psl_fircntl; + u64 chipid; + u32 phb_index; + u64 capp_unit_id; + int rc; + + rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); + if (rc) + return rc; + + psl_dsnctl = 0x0000900000000000ULL; /* pteupd ttype, scdone */ + psl_dsnctl |= (0x2ULL << (63-38)); /* MMIO hang pulse: 256 us */ + /* Tell PSL where to route data to */ + psl_dsnctl |= (chipid << (63-5)); + psl_dsnctl |= (capp_unit_id << (63-13)); + + cxl_p1_write(adapter, CXL_PSL_DSNDCTL, psl_dsnctl); + cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL); + /* snoop write mask */ + cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL); + /* set fir_cntl to recommended value for production env */ + psl_fircntl = (0x2ULL << (63-3)); /* ce_report */ + psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */ + psl_fircntl |= 0x1ULL; /* ce_thresh */ + cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, psl_fircntl); + /* for debugging with trace arrays */ + cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL); + + return 0; +} + +/* PSL */ +#define TBSYNC_CAL(n) (((u64)n & 0x7) << (63-3)) +#define TBSYNC_CNT(n) (((u64)n & 0x7) << (63-6)) +/* For the PSL this is a multiple for 0 < n <= 7: */ +#define PSL_2048_250MHZ_CYCLES 1 + +static void write_timebase_ctrl_psl8(struct cxl *adapter) +{ + cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT, + TBSYNC_CNT(2 * PSL_2048_250MHZ_CYCLES)); +} + +static u64 timebase_read_psl9(struct cxl *adapter) +{ + return cxl_p1_read(adapter, CXL_PSL9_Timebase); +} + +static u64 timebase_read_psl8(struct cxl *adapter) +{ + return cxl_p1_read(adapter, CXL_PSL_Timebase); +} + +static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) +{ + struct device_node *np; + + adapter->psl_timebase_synced = false; + + if (!(np = pnv_pci_get_phb_node(dev))) + return; + + /* Do not fail when CAPP timebase sync is not supported by OPAL */ + of_node_get(np); + if (! of_get_property(np, "ibm,capp-timebase-sync", NULL)) { + of_node_put(np); + dev_info(&dev->dev, "PSL timebase inactive: OPAL support missing\n"); + return; + } + of_node_put(np); + + /* + * Setup PSL Timebase Control and Status register + * with the recommended Timebase Sync Count value + */ + if (adapter->native->sl_ops->write_timebase_ctrl) + adapter->native->sl_ops->write_timebase_ctrl(adapter); + + /* Enable PSL Timebase */ + cxl_p1_write(adapter, CXL_PSL_Control, 0x0000000000000000); + cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb); + + return; +} + +static int init_implementation_afu_regs_psl9(struct cxl_afu *afu) +{ + return 0; +} + +static int init_implementation_afu_regs_psl8(struct cxl_afu *afu) +{ + /* read/write masks for this slice */ + cxl_p1n_write(afu, CXL_PSL_APCALLOC_A, 0xFFFFFFFEFEFEFEFEULL); + /* APC read/write masks for this slice */ + cxl_p1n_write(afu, CXL_PSL_COALLOC_A, 0xFF000000FEFEFEFEULL); + /* for debugging with trace arrays */ + cxl_p1n_write(afu, CXL_PSL_SLICE_TRACE, 0x0000FFFF00000000ULL); + cxl_p1n_write(afu, CXL_PSL_RXCTL_A, CXL_PSL_RXCTL_AFUHP_4S); + + return 0; +} + +int cxl_pci_setup_irq(struct cxl *adapter, unsigned int hwirq, + unsigned int virq) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + + return pnv_cxl_ioda_msi_setup(dev, hwirq, virq); +} + +int cxl_update_image_control(struct cxl *adapter) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + int rc; + int vsec; + u8 image_state; + + if (!(vsec = find_cxl_vsec(dev))) { + dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); + return -ENODEV; + } + + if ((rc = CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state))) { + dev_err(&dev->dev, "failed to read image state: %i\n", rc); + return rc; + } + + if (adapter->perst_loads_image) + image_state |= CXL_VSEC_PERST_LOADS_IMAGE; + else + image_state &= ~CXL_VSEC_PERST_LOADS_IMAGE; + + if (adapter->perst_select_user) + image_state |= CXL_VSEC_PERST_SELECT_USER; + else + image_state &= ~CXL_VSEC_PERST_SELECT_USER; + + if ((rc = CXL_WRITE_VSEC_IMAGE_STATE(dev, vsec, image_state))) { + dev_err(&dev->dev, "failed to update image control: %i\n", rc); + return rc; + } + + return 0; +} + +int cxl_pci_alloc_one_irq(struct cxl *adapter) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + + return pnv_cxl_alloc_hwirqs(dev, 1); +} + +void cxl_pci_release_one_irq(struct cxl *adapter, int hwirq) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + + return pnv_cxl_release_hwirqs(dev, hwirq, 1); +} + +int cxl_pci_alloc_irq_ranges(struct cxl_irq_ranges *irqs, + struct cxl *adapter, unsigned int num) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + + return pnv_cxl_alloc_hwirq_ranges(irqs, dev, num); +} + +void cxl_pci_release_irq_ranges(struct cxl_irq_ranges *irqs, + struct cxl *adapter) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + + pnv_cxl_release_hwirq_ranges(irqs, dev); +} + +static int setup_cxl_bars(struct pci_dev *dev) +{ + /* Safety check in case we get backported to < 3.17 without M64 */ + if ((p1_base(dev) < 0x100000000ULL) || + (p2_base(dev) < 0x100000000ULL)) { + dev_err(&dev->dev, "ABORTING: M32 BAR assignment incompatible with CXL\n"); + return -ENODEV; + } + + /* + * BAR 4/5 has a special meaning for CXL and must be programmed with a + * special value corresponding to the CXL protocol address range. + * For POWER 8/9 that means bits 48:49 must be set to 10 + */ + pci_write_config_dword(dev, PCI_BASE_ADDRESS_4, 0x00000000); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_5, 0x00020000); + + return 0; +} + +/* pciex node: ibm,opal-m64-window = <0x3d058 0x0 0x3d058 0x0 0x8 0x0>; */ +static int switch_card_to_cxl(struct pci_dev *dev) +{ + int vsec; + u8 val; + int rc; + + dev_info(&dev->dev, "switch card to CXL\n"); + + if (!(vsec = find_cxl_vsec(dev))) { + dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); + return -ENODEV; + } + + if ((rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val))) { + dev_err(&dev->dev, "failed to read current mode control: %i", rc); + return rc; + } + val &= ~CXL_VSEC_PROTOCOL_MASK; + val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE; + if ((rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val))) { + dev_err(&dev->dev, "failed to enable CXL protocol: %i", rc); + return rc; + } + /* + * The CAIA spec (v0.12 11.6 Bi-modal Device Support) states + * we must wait 100ms after this mode switch before touching + * PCIe config space. + */ + msleep(100); + + return 0; +} + +static int pci_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) +{ + u64 p1n_base, p2n_base, afu_desc; + const u64 p1n_size = 0x100; + const u64 p2n_size = 0x1000; + + p1n_base = p1_base(dev) + 0x10000 + (afu->slice * p1n_size); + p2n_base = p2_base(dev) + (afu->slice * p2n_size); + afu->psn_phys = p2_base(dev) + (adapter->native->ps_off + (afu->slice * adapter->ps_size)); + afu_desc = p2_base(dev) + adapter->native->afu_desc_off + (afu->slice * adapter->native->afu_desc_size); + + if (!(afu->native->p1n_mmio = ioremap(p1n_base, p1n_size))) + goto err; + if (!(afu->p2n_mmio = ioremap(p2n_base, p2n_size))) + goto err1; + if (afu_desc) { + if (!(afu->native->afu_desc_mmio = ioremap(afu_desc, adapter->native->afu_desc_size))) + goto err2; + } + + return 0; +err2: + iounmap(afu->p2n_mmio); +err1: + iounmap(afu->native->p1n_mmio); +err: + dev_err(&afu->dev, "Error mapping AFU MMIO regions\n"); + return -ENOMEM; +} + +static void pci_unmap_slice_regs(struct cxl_afu *afu) +{ + if (afu->p2n_mmio) { + iounmap(afu->p2n_mmio); + afu->p2n_mmio = NULL; + } + if (afu->native->p1n_mmio) { + iounmap(afu->native->p1n_mmio); + afu->native->p1n_mmio = NULL; + } + if (afu->native->afu_desc_mmio) { + iounmap(afu->native->afu_desc_mmio); + afu->native->afu_desc_mmio = NULL; + } +} + +void cxl_pci_release_afu(struct device *dev) +{ + struct cxl_afu *afu = to_cxl_afu(dev); + + pr_devel("%s\n", __func__); + + idr_destroy(&afu->contexts_idr); + cxl_release_spa(afu); + + kfree(afu->native); + kfree(afu); +} + +/* Expects AFU struct to have recently been zeroed out */ +static int cxl_read_afu_descriptor(struct cxl_afu *afu) +{ + u64 val; + + val = AFUD_READ_INFO(afu); + afu->pp_irqs = AFUD_NUM_INTS_PER_PROC(val); + afu->max_procs_virtualised = AFUD_NUM_PROCS(val); + afu->crs_num = AFUD_NUM_CRS(val); + + if (AFUD_AFU_DIRECTED(val)) + afu->modes_supported |= CXL_MODE_DIRECTED; + if (AFUD_DEDICATED_PROCESS(val)) + afu->modes_supported |= CXL_MODE_DEDICATED; + if (AFUD_TIME_SLICED(val)) + afu->modes_supported |= CXL_MODE_TIME_SLICED; + + val = AFUD_READ_PPPSA(afu); + afu->pp_size = AFUD_PPPSA_LEN(val) * 4096; + afu->psa = AFUD_PPPSA_PSA(val); + if ((afu->pp_psa = AFUD_PPPSA_PP(val))) + afu->native->pp_offset = AFUD_READ_PPPSA_OFF(afu); + + val = AFUD_READ_CR(afu); + afu->crs_len = AFUD_CR_LEN(val) * 256; + afu->crs_offset = AFUD_READ_CR_OFF(afu); + + + /* eb_len is in multiple of 4K */ + afu->eb_len = AFUD_EB_LEN(AFUD_READ_EB(afu)) * 4096; + afu->eb_offset = AFUD_READ_EB_OFF(afu); + + /* eb_off is 4K aligned so lower 12 bits are always zero */ + if (EXTRACT_PPC_BITS(afu->eb_offset, 0, 11) != 0) { + dev_warn(&afu->dev, + "Invalid AFU error buffer offset %Lx\n", + afu->eb_offset); + dev_info(&afu->dev, + "Ignoring AFU error buffer in the descriptor\n"); + /* indicate that no afu buffer exists */ + afu->eb_len = 0; + } + + return 0; +} + +static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu) +{ + int i, rc; + u32 val; + + if (afu->psa && afu->adapter->ps_size < + (afu->native->pp_offset + afu->pp_size*afu->max_procs_virtualised)) { + dev_err(&afu->dev, "per-process PSA can't fit inside the PSA!\n"); + return -ENODEV; + } + + if (afu->pp_psa && (afu->pp_size < PAGE_SIZE)) + dev_warn(&afu->dev, "AFU uses pp_size(%#016llx) < PAGE_SIZE per-process PSA!\n", afu->pp_size); + + for (i = 0; i < afu->crs_num; i++) { + rc = cxl_ops->afu_cr_read32(afu, i, 0, &val); + if (rc || val == 0) { + dev_err(&afu->dev, "ABORTING: AFU configuration record %i is invalid\n", i); + return -EINVAL; + } + } + + if ((afu->modes_supported & ~CXL_MODE_DEDICATED) && afu->max_procs_virtualised == 0) { + /* + * We could also check this for the dedicated process model + * since the architecture indicates it should be set to 1, but + * in that case we ignore the value and I'd rather not risk + * breaking any existing dedicated process AFUs that left it as + * 0 (not that I'm aware of any). It is clearly an error for an + * AFU directed AFU to set this to 0, and would have previously + * triggered a bug resulting in the maximum not being enforced + * at all since idr_alloc treats 0 as no maximum. + */ + dev_err(&afu->dev, "AFU does not support any processes\n"); + return -EINVAL; + } + + return 0; +} + +static int sanitise_afu_regs_psl9(struct cxl_afu *afu) +{ + u64 reg; + + /* + * Clear out any regs that contain either an IVTE or address or may be + * waiting on an acknowledgment to try to be a bit safer as we bring + * it online + */ + reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { + dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#016llx\n", reg); + if (cxl_ops->afu_reset(afu)) + return -EIO; + if (cxl_afu_disable(afu)) + return -EIO; + if (cxl_psl_purge(afu)) + return -EIO; + } + cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0x0000000000000000); + cxl_p1n_write(afu, CXL_PSL_AMBAR_An, 0x0000000000000000); + reg = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + if (reg) { + dev_warn(&afu->dev, "AFU had pending DSISR: %#016llx\n", reg); + if (reg & CXL_PSL9_DSISR_An_TF) + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); + else + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); + } + if (afu->adapter->native->sl_ops->register_serr_irq) { + reg = cxl_p1n_read(afu, CXL_PSL_SERR_An); + if (reg) { + if (reg & ~0x000000007fffffff) + dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg); + cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff); + } + } + reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); + if (reg) { + dev_warn(&afu->dev, "AFU had pending error status: %#016llx\n", reg); + cxl_p2n_write(afu, CXL_PSL_ErrStat_An, reg); + } + + return 0; +} + +static int sanitise_afu_regs_psl8(struct cxl_afu *afu) +{ + u64 reg; + + /* + * Clear out any regs that contain either an IVTE or address or may be + * waiting on an acknowledgement to try to be a bit safer as we bring + * it online + */ + reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { + dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#016llx\n", reg); + if (cxl_ops->afu_reset(afu)) + return -EIO; + if (cxl_afu_disable(afu)) + return -EIO; + if (cxl_psl_purge(afu)) + return -EIO; + } + cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0x0000000000000000); + cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, 0x0000000000000000); + cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An, 0x0000000000000000); + cxl_p1n_write(afu, CXL_PSL_AMBAR_An, 0x0000000000000000); + cxl_p1n_write(afu, CXL_PSL_SPOffset_An, 0x0000000000000000); + cxl_p1n_write(afu, CXL_HAURP_An, 0x0000000000000000); + cxl_p2n_write(afu, CXL_CSRP_An, 0x0000000000000000); + cxl_p2n_write(afu, CXL_AURP1_An, 0x0000000000000000); + cxl_p2n_write(afu, CXL_AURP0_An, 0x0000000000000000); + cxl_p2n_write(afu, CXL_SSTP1_An, 0x0000000000000000); + cxl_p2n_write(afu, CXL_SSTP0_An, 0x0000000000000000); + reg = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + if (reg) { + dev_warn(&afu->dev, "AFU had pending DSISR: %#016llx\n", reg); + if (reg & CXL_PSL_DSISR_TRANS) + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); + else + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); + } + if (afu->adapter->native->sl_ops->register_serr_irq) { + reg = cxl_p1n_read(afu, CXL_PSL_SERR_An); + if (reg) { + if (reg & ~0xffff) + dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg); + cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff); + } + } + reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); + if (reg) { + dev_warn(&afu->dev, "AFU had pending error status: %#016llx\n", reg); + cxl_p2n_write(afu, CXL_PSL_ErrStat_An, reg); + } + + return 0; +} + +#define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE +/* + * afu_eb_read: + * Called from sysfs and reads the afu error info buffer. The h/w only supports + * 4/8 bytes aligned access. So in case the requested offset/count arent 8 byte + * aligned the function uses a bounce buffer which can be max PAGE_SIZE. + */ +ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, + loff_t off, size_t count) +{ + loff_t aligned_start, aligned_end; + size_t aligned_length; + void *tbuf; + const void __iomem *ebuf = afu->native->afu_desc_mmio + afu->eb_offset; + + if (count == 0 || off < 0 || (size_t)off >= afu->eb_len) + return 0; + + /* calculate aligned read window */ + count = min((size_t)(afu->eb_len - off), count); + aligned_start = round_down(off, 8); + aligned_end = round_up(off + count, 8); + aligned_length = aligned_end - aligned_start; + + /* max we can copy in one read is PAGE_SIZE */ + if (aligned_length > ERR_BUFF_MAX_COPY_SIZE) { + aligned_length = ERR_BUFF_MAX_COPY_SIZE; + count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7); + } + + /* use bounce buffer for copy */ + tbuf = (void *)__get_free_page(GFP_KERNEL); + if (!tbuf) + return -ENOMEM; + + /* perform aligned read from the mmio region */ + memcpy_fromio(tbuf, ebuf + aligned_start, aligned_length); + memcpy(buf, tbuf + (off & 0x7), count); + + free_page((unsigned long)tbuf); + + return count; +} + +static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) +{ + int rc; + + if ((rc = pci_map_slice_regs(afu, adapter, dev))) + return rc; + + if (adapter->native->sl_ops->sanitise_afu_regs) { + rc = adapter->native->sl_ops->sanitise_afu_regs(afu); + if (rc) + goto err1; + } + + /* We need to reset the AFU before we can read the AFU descriptor */ + if ((rc = cxl_ops->afu_reset(afu))) + goto err1; + + if (cxl_verbose) + dump_afu_descriptor(afu); + + if ((rc = cxl_read_afu_descriptor(afu))) + goto err1; + + if ((rc = cxl_afu_descriptor_looks_ok(afu))) + goto err1; + + if (adapter->native->sl_ops->afu_regs_init) + if ((rc = adapter->native->sl_ops->afu_regs_init(afu))) + goto err1; + + if (adapter->native->sl_ops->register_serr_irq) + if ((rc = adapter->native->sl_ops->register_serr_irq(afu))) + goto err1; + + if ((rc = cxl_native_register_psl_irq(afu))) + goto err2; + + atomic_set(&afu->configured_state, 0); + return 0; + +err2: + if (adapter->native->sl_ops->release_serr_irq) + adapter->native->sl_ops->release_serr_irq(afu); +err1: + pci_unmap_slice_regs(afu); + return rc; +} + +static void pci_deconfigure_afu(struct cxl_afu *afu) +{ + /* + * It's okay to deconfigure when AFU is already locked, otherwise wait + * until there are no readers + */ + if (atomic_read(&afu->configured_state) != -1) { + while (atomic_cmpxchg(&afu->configured_state, 0, -1) != -1) + schedule(); + } + cxl_native_release_psl_irq(afu); + if (afu->adapter->native->sl_ops->release_serr_irq) + afu->adapter->native->sl_ops->release_serr_irq(afu); + pci_unmap_slice_regs(afu); +} + +static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) +{ + struct cxl_afu *afu; + int rc = -ENOMEM; + + afu = cxl_alloc_afu(adapter, slice); + if (!afu) + return -ENOMEM; + + afu->native = kzalloc(sizeof(struct cxl_afu_native), GFP_KERNEL); + if (!afu->native) + goto err_free_afu; + + mutex_init(&afu->native->spa_mutex); + + rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice); + if (rc) + goto err_free_native; + + rc = pci_configure_afu(afu, adapter, dev); + if (rc) + goto err_free_native; + + /* Don't care if this fails */ + cxl_debugfs_afu_add(afu); + + /* + * After we call this function we must not free the afu directly, even + * if it returns an error! + */ + if ((rc = cxl_register_afu(afu))) + goto err_put1; + + if ((rc = cxl_sysfs_afu_add(afu))) + goto err_put1; + + adapter->afu[afu->slice] = afu; + + if ((rc = cxl_pci_vphb_add(afu))) + dev_info(&afu->dev, "Can't register vPHB\n"); + + return 0; + +err_put1: + pci_deconfigure_afu(afu); + cxl_debugfs_afu_remove(afu); + device_unregister(&afu->dev); + return rc; + +err_free_native: + kfree(afu->native); +err_free_afu: + kfree(afu); + return rc; + +} + +static void cxl_pci_remove_afu(struct cxl_afu *afu) +{ + pr_devel("%s\n", __func__); + + if (!afu) + return; + + cxl_pci_vphb_remove(afu); + cxl_sysfs_afu_remove(afu); + cxl_debugfs_afu_remove(afu); + + spin_lock(&afu->adapter->afu_list_lock); + afu->adapter->afu[afu->slice] = NULL; + spin_unlock(&afu->adapter->afu_list_lock); + + cxl_context_detach_all(afu); + cxl_ops->afu_deactivate_mode(afu, afu->current_mode); + + pci_deconfigure_afu(afu); + device_unregister(&afu->dev); +} + +int cxl_pci_reset(struct cxl *adapter) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + int rc; + + if (adapter->perst_same_image) { + dev_warn(&dev->dev, + "cxl: refusing to reset/reflash when perst_reloads_same_image is set.\n"); + return -EINVAL; + } + + dev_info(&dev->dev, "CXL reset\n"); + + /* + * The adapter is about to be reset, so ignore errors. + */ + cxl_data_cache_flush(adapter); + + /* pcie_warm_reset requests a fundamental pci reset which includes a + * PERST assert/deassert. PERST triggers a loading of the image + * if "user" or "factory" is selected in sysfs */ + if ((rc = pci_set_pcie_reset_state(dev, pcie_warm_reset))) { + dev_err(&dev->dev, "cxl: pcie_warm_reset failed\n"); + return rc; + } + + return rc; +} + +static int cxl_map_adapter_regs(struct cxl *adapter, struct pci_dev *dev) +{ + if (pci_request_region(dev, 2, "priv 2 regs")) + goto err1; + if (pci_request_region(dev, 0, "priv 1 regs")) + goto err2; + + pr_devel("cxl_map_adapter_regs: p1: %#016llx %#llx, p2: %#016llx %#llx", + p1_base(dev), p1_size(dev), p2_base(dev), p2_size(dev)); + + if (!(adapter->native->p1_mmio = ioremap(p1_base(dev), p1_size(dev)))) + goto err3; + + if (!(adapter->native->p2_mmio = ioremap(p2_base(dev), p2_size(dev)))) + goto err4; + + return 0; + +err4: + iounmap(adapter->native->p1_mmio); + adapter->native->p1_mmio = NULL; +err3: + pci_release_region(dev, 0); +err2: + pci_release_region(dev, 2); +err1: + return -ENOMEM; +} + +static void cxl_unmap_adapter_regs(struct cxl *adapter) +{ + if (adapter->native->p1_mmio) { + iounmap(adapter->native->p1_mmio); + adapter->native->p1_mmio = NULL; + pci_release_region(to_pci_dev(adapter->dev.parent), 2); + } + if (adapter->native->p2_mmio) { + iounmap(adapter->native->p2_mmio); + adapter->native->p2_mmio = NULL; + pci_release_region(to_pci_dev(adapter->dev.parent), 0); + } +} + +static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) +{ + int vsec; + u32 afu_desc_off, afu_desc_size; + u32 ps_off, ps_size; + u16 vseclen; + u8 image_state; + + if (!(vsec = find_cxl_vsec(dev))) { + dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); + return -ENODEV; + } + + CXL_READ_VSEC_LENGTH(dev, vsec, &vseclen); + if (vseclen < CXL_VSEC_MIN_SIZE) { + dev_err(&dev->dev, "ABORTING: CXL VSEC too short\n"); + return -EINVAL; + } + + CXL_READ_VSEC_STATUS(dev, vsec, &adapter->vsec_status); + CXL_READ_VSEC_PSL_REVISION(dev, vsec, &adapter->psl_rev); + CXL_READ_VSEC_CAIA_MAJOR(dev, vsec, &adapter->caia_major); + CXL_READ_VSEC_CAIA_MINOR(dev, vsec, &adapter->caia_minor); + CXL_READ_VSEC_BASE_IMAGE(dev, vsec, &adapter->base_image); + CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state); + adapter->user_image_loaded = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); + adapter->perst_select_user = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); + adapter->perst_loads_image = !!(image_state & CXL_VSEC_PERST_LOADS_IMAGE); + + CXL_READ_VSEC_NAFUS(dev, vsec, &adapter->slices); + CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, &afu_desc_off); + CXL_READ_VSEC_AFU_DESC_SIZE(dev, vsec, &afu_desc_size); + CXL_READ_VSEC_PS_OFF(dev, vsec, &ps_off); + CXL_READ_VSEC_PS_SIZE(dev, vsec, &ps_size); + + /* Convert everything to bytes, because there is NO WAY I'd look at the + * code a month later and forget what units these are in ;-) */ + adapter->native->ps_off = ps_off * 64 * 1024; + adapter->ps_size = ps_size * 64 * 1024; + adapter->native->afu_desc_off = afu_desc_off * 64 * 1024; + adapter->native->afu_desc_size = afu_desc_size * 64 * 1024; + + /* Total IRQs - 1 PSL ERROR - #AFU*(1 slice error + 1 DSI) */ + adapter->user_irqs = pnv_cxl_get_irq_count(dev) - 1 - 2*adapter->slices; + + return 0; +} + +/* + * Workaround a PCIe Host Bridge defect on some cards, that can cause + * malformed Transaction Layer Packet (TLP) errors to be erroneously + * reported. Mask this error in the Uncorrectable Error Mask Register. + * + * The upper nibble of the PSL revision is used to distinguish between + * different cards. The affected ones have it set to 0. + */ +static void cxl_fixup_malformed_tlp(struct cxl *adapter, struct pci_dev *dev) +{ + int aer; + u32 data; + + if (adapter->psl_rev & 0xf000) + return; + if (!(aer = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))) + return; + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &data); + if (data & PCI_ERR_UNC_MALF_TLP) + if (data & PCI_ERR_UNC_INTN) + return; + data |= PCI_ERR_UNC_MALF_TLP; + data |= PCI_ERR_UNC_INTN; + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, data); +} + +static bool cxl_compatible_caia_version(struct cxl *adapter) +{ + if (cxl_is_power8() && (adapter->caia_major == 1)) + return true; + + if (cxl_is_power9() && (adapter->caia_major == 2)) + return true; + + return false; +} + +static int cxl_vsec_looks_ok(struct cxl *adapter, struct pci_dev *dev) +{ + if (adapter->vsec_status & CXL_STATUS_SECOND_PORT) + return -EBUSY; + + if (adapter->vsec_status & CXL_UNSUPPORTED_FEATURES) { + dev_err(&dev->dev, "ABORTING: CXL requires unsupported features\n"); + return -EINVAL; + } + + if (!cxl_compatible_caia_version(adapter)) { + dev_info(&dev->dev, "Ignoring card. PSL type is not supported (caia version: %d)\n", + adapter->caia_major); + return -ENODEV; + } + + if (!adapter->slices) { + /* Once we support dynamic reprogramming we can use the card if + * it supports loadable AFUs */ + dev_err(&dev->dev, "ABORTING: Device has no AFUs\n"); + return -EINVAL; + } + + if (!adapter->native->afu_desc_off || !adapter->native->afu_desc_size) { + dev_err(&dev->dev, "ABORTING: VSEC shows no AFU descriptors\n"); + return -EINVAL; + } + + if (adapter->ps_size > p2_size(dev) - adapter->native->ps_off) { + dev_err(&dev->dev, "ABORTING: Problem state size larger than " + "available in BAR2: 0x%llx > 0x%llx\n", + adapter->ps_size, p2_size(dev) - adapter->native->ps_off); + return -EINVAL; + } + + return 0; +} + +ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len) +{ + return pci_read_vpd(to_pci_dev(adapter->dev.parent), 0, len, buf); +} + +static void cxl_release_adapter(struct device *dev) +{ + struct cxl *adapter = to_cxl_adapter(dev); + + pr_devel("cxl_release_adapter\n"); + + cxl_remove_adapter_nr(adapter); + + kfree(adapter->native); + kfree(adapter); +} + +#define CXL_PSL_ErrIVTE_tberror (0x1ull << (63-31)) + +static int sanitise_adapter_regs(struct cxl *adapter) +{ + int rc = 0; + + /* Clear PSL tberror bit by writing 1 to it */ + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, CXL_PSL_ErrIVTE_tberror); + + if (adapter->native->sl_ops->invalidate_all) { + /* do not invalidate ERAT entries when not reloading on PERST */ + if (cxl_is_power9() && (adapter->perst_loads_image)) + return 0; + rc = adapter->native->sl_ops->invalidate_all(adapter); + } + + return rc; +} + +/* This should contain *only* operations that can safely be done in + * both creation and recovery. + */ +static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) +{ + int rc; + + adapter->dev.parent = &dev->dev; + adapter->dev.release = cxl_release_adapter; + pci_set_drvdata(dev, adapter); + + rc = pci_enable_device(dev); + if (rc) { + dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc); + return rc; + } + + if ((rc = cxl_read_vsec(adapter, dev))) + return rc; + + if ((rc = cxl_vsec_looks_ok(adapter, dev))) + return rc; + + cxl_fixup_malformed_tlp(adapter, dev); + + if ((rc = setup_cxl_bars(dev))) + return rc; + + if ((rc = switch_card_to_cxl(dev))) + return rc; + + if ((rc = cxl_update_image_control(adapter))) + return rc; + + if ((rc = cxl_map_adapter_regs(adapter, dev))) + return rc; + + if ((rc = sanitise_adapter_regs(adapter))) + goto err; + + if ((rc = adapter->native->sl_ops->adapter_regs_init(adapter, dev))) + goto err; + + /* Required for devices using CAPP DMA mode, harmless for others */ + pci_set_master(dev); + + adapter->tunneled_ops_supported = false; + + if (cxl_is_power9()) { + if (pnv_pci_set_tunnel_bar(dev, 0x00020000E0000000ull, 1)) + dev_info(&dev->dev, "Tunneled operations unsupported\n"); + else + adapter->tunneled_ops_supported = true; + } + + if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode))) + goto err; + + /* If recovery happened, the last step is to turn on snooping. + * In the non-recovery case this has no effect */ + if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON))) + goto err; + + /* Ignore error, adapter init is not dependant on timebase sync */ + cxl_setup_psl_timebase(adapter, dev); + + if ((rc = cxl_native_register_psl_err_irq(adapter))) + goto err; + + return 0; + +err: + cxl_unmap_adapter_regs(adapter); + return rc; + +} + +static void cxl_deconfigure_adapter(struct cxl *adapter) +{ + struct pci_dev *pdev = to_pci_dev(adapter->dev.parent); + + if (cxl_is_power9()) + pnv_pci_set_tunnel_bar(pdev, 0x00020000E0000000ull, 0); + + cxl_native_release_psl_err_irq(adapter); + cxl_unmap_adapter_regs(adapter); + + pci_disable_device(pdev); +} + +static void cxl_stop_trace_psl9(struct cxl *adapter) +{ + int traceid; + u64 trace_state, trace_mask; + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + + /* read each tracearray state and issue mmio to stop them is needed */ + for (traceid = 0; traceid <= CXL_PSL9_TRACEID_MAX; ++traceid) { + trace_state = cxl_p1_read(adapter, CXL_PSL9_CTCCFG); + trace_mask = (0x3ULL << (62 - traceid * 2)); + trace_state = (trace_state & trace_mask) >> (62 - traceid * 2); + dev_dbg(&dev->dev, "cxl: Traceid-%d trace_state=0x%0llX\n", + traceid, trace_state); + + /* issue mmio if the trace array isn't in FIN state */ + if (trace_state != CXL_PSL9_TRACESTATE_FIN) + cxl_p1_write(adapter, CXL_PSL9_TRACECFG, + 0x8400000000000000ULL | traceid); + } +} + +static void cxl_stop_trace_psl8(struct cxl *adapter) +{ + int slice; + + /* Stop the trace */ + cxl_p1_write(adapter, CXL_PSL_TRACE, 0x8000000000000017LL); + + /* Stop the slice traces */ + spin_lock(&adapter->afu_list_lock); + for (slice = 0; slice < adapter->slices; slice++) { + if (adapter->afu[slice]) + cxl_p1n_write(adapter->afu[slice], CXL_PSL_SLICE_TRACE, + 0x8000000000000000LL); + } + spin_unlock(&adapter->afu_list_lock); +} + +static const struct cxl_service_layer_ops psl9_ops = { + .adapter_regs_init = init_implementation_adapter_regs_psl9, + .invalidate_all = cxl_invalidate_all_psl9, + .afu_regs_init = init_implementation_afu_regs_psl9, + .sanitise_afu_regs = sanitise_afu_regs_psl9, + .register_serr_irq = cxl_native_register_serr_irq, + .release_serr_irq = cxl_native_release_serr_irq, + .handle_interrupt = cxl_irq_psl9, + .fail_irq = cxl_fail_irq_psl, + .activate_dedicated_process = cxl_activate_dedicated_process_psl9, + .attach_afu_directed = cxl_attach_afu_directed_psl9, + .attach_dedicated_process = cxl_attach_dedicated_process_psl9, + .update_dedicated_ivtes = cxl_update_dedicated_ivtes_psl9, + .debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl9, + .debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl9, + .psl_irq_dump_registers = cxl_native_irq_dump_regs_psl9, + .err_irq_dump_registers = cxl_native_err_irq_dump_regs_psl9, + .debugfs_stop_trace = cxl_stop_trace_psl9, + .timebase_read = timebase_read_psl9, + .capi_mode = OPAL_PHB_CAPI_MODE_CAPI, + .needs_reset_before_disable = true, +}; + +static const struct cxl_service_layer_ops psl8_ops = { + .adapter_regs_init = init_implementation_adapter_regs_psl8, + .invalidate_all = cxl_invalidate_all_psl8, + .afu_regs_init = init_implementation_afu_regs_psl8, + .sanitise_afu_regs = sanitise_afu_regs_psl8, + .register_serr_irq = cxl_native_register_serr_irq, + .release_serr_irq = cxl_native_release_serr_irq, + .handle_interrupt = cxl_irq_psl8, + .fail_irq = cxl_fail_irq_psl, + .activate_dedicated_process = cxl_activate_dedicated_process_psl8, + .attach_afu_directed = cxl_attach_afu_directed_psl8, + .attach_dedicated_process = cxl_attach_dedicated_process_psl8, + .update_dedicated_ivtes = cxl_update_dedicated_ivtes_psl8, + .debugfs_add_adapter_regs = cxl_debugfs_add_adapter_regs_psl8, + .debugfs_add_afu_regs = cxl_debugfs_add_afu_regs_psl8, + .psl_irq_dump_registers = cxl_native_irq_dump_regs_psl8, + .err_irq_dump_registers = cxl_native_err_irq_dump_regs_psl8, + .debugfs_stop_trace = cxl_stop_trace_psl8, + .write_timebase_ctrl = write_timebase_ctrl_psl8, + .timebase_read = timebase_read_psl8, + .capi_mode = OPAL_PHB_CAPI_MODE_CAPI, + .needs_reset_before_disable = true, +}; + +static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev) +{ + if (cxl_is_power8()) { + dev_info(&dev->dev, "Device uses a PSL8\n"); + adapter->native->sl_ops = &psl8_ops; + } else { + dev_info(&dev->dev, "Device uses a PSL9\n"); + adapter->native->sl_ops = &psl9_ops; + } +} + + +static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev) +{ + struct cxl *adapter; + int rc; + + adapter = cxl_alloc_adapter(); + if (!adapter) + return ERR_PTR(-ENOMEM); + + adapter->native = kzalloc(sizeof(struct cxl_native), GFP_KERNEL); + if (!adapter->native) { + rc = -ENOMEM; + goto err_release; + } + + set_sl_ops(adapter, dev); + + /* Set defaults for parameters which need to persist over + * configure/reconfigure + */ + adapter->perst_loads_image = true; + adapter->perst_same_image = false; + + rc = cxl_configure_adapter(adapter, dev); + if (rc) { + pci_disable_device(dev); + goto err_release; + } + + /* Don't care if this one fails: */ + cxl_debugfs_adapter_add(adapter); + + /* + * After we call this function we must not free the adapter directly, + * even if it returns an error! + */ + if ((rc = cxl_register_adapter(adapter))) + goto err_put1; + + if ((rc = cxl_sysfs_adapter_add(adapter))) + goto err_put1; + + /* Release the context lock as adapter is configured */ + cxl_adapter_context_unlock(adapter); + + return adapter; + +err_put1: + /* This should mirror cxl_remove_adapter, except without the + * sysfs parts + */ + cxl_debugfs_adapter_remove(adapter); + cxl_deconfigure_adapter(adapter); + device_unregister(&adapter->dev); + return ERR_PTR(rc); + +err_release: + cxl_release_adapter(&adapter->dev); + return ERR_PTR(rc); +} + +static void cxl_pci_remove_adapter(struct cxl *adapter) +{ + pr_devel("cxl_remove_adapter\n"); + + cxl_sysfs_adapter_remove(adapter); + cxl_debugfs_adapter_remove(adapter); + + /* + * Flush adapter datacache as its about to be removed. + */ + cxl_data_cache_flush(adapter); + + cxl_deconfigure_adapter(adapter); + + device_unregister(&adapter->dev); +} + +#define CXL_MAX_PCIEX_PARENT 2 + +int cxl_slot_is_switched(struct pci_dev *dev) +{ + struct device_node *np; + int depth = 0; + const __be32 *prop; + + if (!(np = pci_device_to_OF_node(dev))) { + pr_err("cxl: np = NULL\n"); + return -ENODEV; + } + of_node_get(np); + while (np) { + np = of_get_next_parent(np); + prop = of_get_property(np, "device_type", NULL); + if (!prop || strcmp((char *)prop, "pciex")) + break; + depth++; + } + of_node_put(np); + return (depth > CXL_MAX_PCIEX_PARENT); +} + +static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + struct cxl *adapter; + int slice; + int rc; + + if (cxl_pci_is_vphb_device(dev)) { + dev_dbg(&dev->dev, "cxl_init_adapter: Ignoring cxl vphb device\n"); + return -ENODEV; + } + + if (cxl_slot_is_switched(dev)) { + dev_info(&dev->dev, "Ignoring card on incompatible PCI slot\n"); + return -ENODEV; + } + + if (cxl_is_power9() && !radix_enabled()) { + dev_info(&dev->dev, "Only Radix mode supported\n"); + return -ENODEV; + } + + if (cxl_verbose) + dump_cxl_config_space(dev); + + adapter = cxl_pci_init_adapter(dev); + if (IS_ERR(adapter)) { + dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter)); + return PTR_ERR(adapter); + } + + for (slice = 0; slice < adapter->slices; slice++) { + if ((rc = pci_init_afu(adapter, slice, dev))) { + dev_err(&dev->dev, "AFU %i failed to initialise: %i\n", slice, rc); + continue; + } + + rc = cxl_afu_select_best_mode(adapter->afu[slice]); + if (rc) + dev_err(&dev->dev, "AFU %i failed to start: %i\n", slice, rc); + } + + return 0; +} + +static void cxl_remove(struct pci_dev *dev) +{ + struct cxl *adapter = pci_get_drvdata(dev); + struct cxl_afu *afu; + int i; + + /* + * Lock to prevent someone grabbing a ref through the adapter list as + * we are removing it + */ + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + cxl_pci_remove_afu(afu); + } + cxl_pci_remove_adapter(adapter); +} + +static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu, + pci_channel_state_t state) +{ + struct pci_dev *afu_dev; + pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET; + pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET; + + /* There should only be one entry, but go through the list + * anyway + */ + if (afu == NULL || afu->phb == NULL) + return result; + + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { + if (!afu_dev->driver) + continue; + + afu_dev->error_state = state; + + if (afu_dev->driver->err_handler) + afu_result = afu_dev->driver->err_handler->error_detected(afu_dev, + state); + /* Disconnect trumps all, NONE trumps NEED_RESET */ + if (afu_result == PCI_ERS_RESULT_DISCONNECT) + result = PCI_ERS_RESULT_DISCONNECT; + else if ((afu_result == PCI_ERS_RESULT_NONE) && + (result == PCI_ERS_RESULT_NEED_RESET)) + result = PCI_ERS_RESULT_NONE; + } + return result; +} + +static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct cxl *adapter = pci_get_drvdata(pdev); + struct cxl_afu *afu; + pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET; + pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET; + int i; + + /* At this point, we could still have an interrupt pending. + * Let's try to get them out of the way before they do + * anything we don't like. + */ + schedule(); + + /* If we're permanently dead, give up. */ + if (state == pci_channel_io_perm_failure) { + spin_lock(&adapter->afu_list_lock); + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + /* + * Tell the AFU drivers; but we don't care what they + * say, we're going away. + */ + cxl_vphb_error_detected(afu, state); + } + spin_unlock(&adapter->afu_list_lock); + return PCI_ERS_RESULT_DISCONNECT; + } + + /* Are we reflashing? + * + * If we reflash, we could come back as something entirely + * different, including a non-CAPI card. As such, by default + * we don't participate in the process. We'll be unbound and + * the slot re-probed. (TODO: check EEH doesn't blindly rebind + * us!) + * + * However, this isn't the entire story: for reliablity + * reasons, we usually want to reflash the FPGA on PERST in + * order to get back to a more reliable known-good state. + * + * This causes us a bit of a problem: if we reflash we can't + * trust that we'll come back the same - we could have a new + * image and been PERSTed in order to load that + * image. However, most of the time we actually *will* come + * back the same - for example a regular EEH event. + * + * Therefore, we allow the user to assert that the image is + * indeed the same and that we should continue on into EEH + * anyway. + */ + if (adapter->perst_loads_image && !adapter->perst_same_image) { + /* TODO take the PHB out of CXL mode */ + dev_info(&pdev->dev, "reflashing, so opting out of EEH!\n"); + return PCI_ERS_RESULT_NONE; + } + + /* + * At this point, we want to try to recover. We'll always + * need a complete slot reset: we don't trust any other reset. + * + * Now, we go through each AFU: + * - We send the driver, if bound, an error_detected callback. + * We expect it to clean up, but it can also tell us to give + * up and permanently detach the card. To simplify things, if + * any bound AFU driver doesn't support EEH, we give up on EEH. + * + * - We detach all contexts associated with the AFU. This + * does not free them, but puts them into a CLOSED state + * which causes any the associated files to return useful + * errors to userland. It also unmaps, but does not free, + * any IRQs. + * + * - We clean up our side: releasing and unmapping resources we hold + * so we can wire them up again when the hardware comes back up. + * + * Driver authors should note: + * + * - Any contexts you create in your kernel driver (except + * those associated with anonymous file descriptors) are + * your responsibility to free and recreate. Likewise with + * any attached resources. + * + * - We will take responsibility for re-initialising the + * device context (the one set up for you in + * cxl_pci_enable_device_hook and accessed through + * cxl_get_context). If you've attached IRQs or other + * resources to it, they remains yours to free. + * + * You can call the same functions to release resources as you + * normally would: we make sure that these functions continue + * to work when the hardware is down. + * + * Two examples: + * + * 1) If you normally free all your resources at the end of + * each request, or if you use anonymous FDs, your + * error_detected callback can simply set a flag to tell + * your driver not to start any new calls. You can then + * clear the flag in the resume callback. + * + * 2) If you normally allocate your resources on startup: + * * Set a flag in error_detected as above. + * * Let CXL detach your contexts. + * * In slot_reset, free the old resources and allocate new ones. + * * In resume, clear the flag to allow things to start. + */ + + /* Make sure no one else changes the afu list */ + spin_lock(&adapter->afu_list_lock); + + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + + if (afu == NULL) + continue; + + afu_result = cxl_vphb_error_detected(afu, state); + cxl_context_detach_all(afu); + cxl_ops->afu_deactivate_mode(afu, afu->current_mode); + pci_deconfigure_afu(afu); + + /* Disconnect trumps all, NONE trumps NEED_RESET */ + if (afu_result == PCI_ERS_RESULT_DISCONNECT) + result = PCI_ERS_RESULT_DISCONNECT; + else if ((afu_result == PCI_ERS_RESULT_NONE) && + (result == PCI_ERS_RESULT_NEED_RESET)) + result = PCI_ERS_RESULT_NONE; + } + spin_unlock(&adapter->afu_list_lock); + + /* should take the context lock here */ + if (cxl_adapter_context_lock(adapter) != 0) + dev_warn(&adapter->dev, + "Couldn't take context lock with %d active-contexts\n", + atomic_read(&adapter->contexts_num)); + + cxl_deconfigure_adapter(adapter); + + return result; +} + +static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) +{ + struct cxl *adapter = pci_get_drvdata(pdev); + struct cxl_afu *afu; + struct cxl_context *ctx; + struct pci_dev *afu_dev; + pci_ers_result_t afu_result = PCI_ERS_RESULT_RECOVERED; + pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED; + int i; + + if (cxl_configure_adapter(adapter, pdev)) + goto err; + + /* + * Unlock context activation for the adapter. Ideally this should be + * done in cxl_pci_resume but cxlflash module tries to activate the + * master context as part of slot_reset callback. + */ + cxl_adapter_context_unlock(adapter); + + spin_lock(&adapter->afu_list_lock); + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + + if (afu == NULL) + continue; + + if (pci_configure_afu(afu, adapter, pdev)) + goto err_unlock; + + if (cxl_afu_select_best_mode(afu)) + goto err_unlock; + + if (afu->phb == NULL) + continue; + + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { + /* Reset the device context. + * TODO: make this less disruptive + */ + ctx = cxl_get_context(afu_dev); + + if (ctx && cxl_release_context(ctx)) + goto err_unlock; + + ctx = cxl_dev_context_init(afu_dev); + if (IS_ERR(ctx)) + goto err_unlock; + + afu_dev->dev.archdata.cxl_ctx = ctx; + + if (cxl_ops->afu_check_and_enable(afu)) + goto err_unlock; + + afu_dev->error_state = pci_channel_io_normal; + + /* If there's a driver attached, allow it to + * chime in on recovery. Drivers should check + * if everything has come back OK, but + * shouldn't start new work until we call + * their resume function. + */ + if (!afu_dev->driver) + continue; + + if (afu_dev->driver->err_handler && + afu_dev->driver->err_handler->slot_reset) + afu_result = afu_dev->driver->err_handler->slot_reset(afu_dev); + + if (afu_result == PCI_ERS_RESULT_DISCONNECT) + result = PCI_ERS_RESULT_DISCONNECT; + } + } + + spin_unlock(&adapter->afu_list_lock); + return result; + +err_unlock: + spin_unlock(&adapter->afu_list_lock); + +err: + /* All the bits that happen in both error_detected and cxl_remove + * should be idempotent, so we don't need to worry about leaving a mix + * of unconfigured and reconfigured resources. + */ + dev_err(&pdev->dev, "EEH recovery failed. Asking to be disconnected.\n"); + return PCI_ERS_RESULT_DISCONNECT; +} + +static void cxl_pci_resume(struct pci_dev *pdev) +{ + struct cxl *adapter = pci_get_drvdata(pdev); + struct cxl_afu *afu; + struct pci_dev *afu_dev; + int i; + + /* Everything is back now. Drivers should restart work now. + * This is not the place to be checking if everything came back up + * properly, because there's no return value: do that in slot_reset. + */ + spin_lock(&adapter->afu_list_lock); + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + + if (afu == NULL || afu->phb == NULL) + continue; + + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { + if (afu_dev->driver && afu_dev->driver->err_handler && + afu_dev->driver->err_handler->resume) + afu_dev->driver->err_handler->resume(afu_dev); + } + } + spin_unlock(&adapter->afu_list_lock); +} + +static const struct pci_error_handlers cxl_err_handler = { + .error_detected = cxl_pci_error_detected, + .slot_reset = cxl_pci_slot_reset, + .resume = cxl_pci_resume, +}; + +struct pci_driver cxl_pci_driver = { + .name = "cxl-pci", + .id_table = cxl_pci_tbl, + .probe = cxl_probe, + .remove = cxl_remove, + .shutdown = cxl_remove, + .err_handler = &cxl_err_handler, +}; diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c new file mode 100644 index 000000000..0baa229d2 --- /dev/null +++ b/drivers/misc/cxl/sysfs.c @@ -0,0 +1,774 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#include "cxl.h" + +#define to_afu_chardev_m(d) dev_get_drvdata(d) + +/********* Adapter attributes **********************************************/ + +static ssize_t caia_version_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + return scnprintf(buf, PAGE_SIZE, "%i.%i\n", adapter->caia_major, + adapter->caia_minor); +} + +static ssize_t psl_revision_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_rev); +} + +static ssize_t base_image_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->base_image); +} + +static ssize_t image_loaded_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + if (adapter->user_image_loaded) + return scnprintf(buf, PAGE_SIZE, "user\n"); + return scnprintf(buf, PAGE_SIZE, "factory\n"); +} + +static ssize_t psl_timebase_synced_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + u64 psl_tb, delta; + + /* Recompute the status only in native mode */ + if (cpu_has_feature(CPU_FTR_HVMODE)) { + psl_tb = adapter->native->sl_ops->timebase_read(adapter); + delta = abs(mftb() - psl_tb); + + /* CORE TB and PSL TB difference <= 16usecs ? */ + adapter->psl_timebase_synced = (tb_to_ns(delta) < 16000) ? true : false; + pr_devel("PSL timebase %s - delta: 0x%016llx\n", + (tb_to_ns(delta) < 16000) ? "synchronized" : + "not synchronized", tb_to_ns(delta)); + } + return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_timebase_synced); +} + +static ssize_t tunneled_ops_supported_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->tunneled_ops_supported); +} + +static ssize_t reset_adapter_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl *adapter = to_cxl_adapter(device); + int rc; + int val; + + rc = sscanf(buf, "%i", &val); + if ((rc != 1) || (val != 1 && val != -1)) + return -EINVAL; + + /* + * See if we can lock the context mapping that's only allowed + * when there are no contexts attached to the adapter. Once + * taken this will also prevent any context from getting activated. + */ + if (val == 1) { + rc = cxl_adapter_context_lock(adapter); + if (rc) + goto out; + + rc = cxl_ops->adapter_reset(adapter); + /* In case reset failed release context lock */ + if (rc) + cxl_adapter_context_unlock(adapter); + + } else if (val == -1) { + /* Perform a forced adapter reset */ + rc = cxl_ops->adapter_reset(adapter); + } + +out: + return rc ? rc : count; +} + +static ssize_t load_image_on_perst_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + if (!adapter->perst_loads_image) + return scnprintf(buf, PAGE_SIZE, "none\n"); + + if (adapter->perst_select_user) + return scnprintf(buf, PAGE_SIZE, "user\n"); + return scnprintf(buf, PAGE_SIZE, "factory\n"); +} + +static ssize_t load_image_on_perst_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl *adapter = to_cxl_adapter(device); + int rc; + + if (!strncmp(buf, "none", 4)) + adapter->perst_loads_image = false; + else if (!strncmp(buf, "user", 4)) { + adapter->perst_select_user = true; + adapter->perst_loads_image = true; + } else if (!strncmp(buf, "factory", 7)) { + adapter->perst_select_user = false; + adapter->perst_loads_image = true; + } else + return -EINVAL; + + if ((rc = cxl_update_image_control(adapter))) + return rc; + + return count; +} + +static ssize_t perst_reloads_same_image_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->perst_same_image); +} + +static ssize_t perst_reloads_same_image_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl *adapter = to_cxl_adapter(device); + int rc; + int val; + + rc = sscanf(buf, "%i", &val); + if ((rc != 1) || !(val == 1 || val == 0)) + return -EINVAL; + + adapter->perst_same_image = (val == 1 ? true : false); + return count; +} + +static struct device_attribute adapter_attrs[] = { + __ATTR_RO(caia_version), + __ATTR_RO(psl_revision), + __ATTR_RO(base_image), + __ATTR_RO(image_loaded), + __ATTR_RO(psl_timebase_synced), + __ATTR_RO(tunneled_ops_supported), + __ATTR_RW(load_image_on_perst), + __ATTR_RW(perst_reloads_same_image), + __ATTR(reset, S_IWUSR, NULL, reset_adapter_store), +}; + + +/********* AFU master specific attributes **********************************/ + +static ssize_t mmio_size_show_master(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_afu_chardev_m(device); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->adapter->ps_size); +} + +static ssize_t pp_mmio_off_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_afu_chardev_m(device); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->native->pp_offset); +} + +static ssize_t pp_mmio_len_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_afu_chardev_m(device); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_size); +} + +static struct device_attribute afu_master_attrs[] = { + __ATTR(mmio_size, S_IRUGO, mmio_size_show_master, NULL), + __ATTR_RO(pp_mmio_off), + __ATTR_RO(pp_mmio_len), +}; + + +/********* AFU attributes **************************************************/ + +static ssize_t mmio_size_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + + if (afu->pp_size) + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_size); + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->adapter->ps_size); +} + +static ssize_t reset_store_afu(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl_afu *afu = to_cxl_afu(device); + int rc; + + /* Not safe to reset if it is currently in use */ + mutex_lock(&afu->contexts_lock); + if (!idr_is_empty(&afu->contexts_idr)) { + rc = -EBUSY; + goto err; + } + + if ((rc = cxl_ops->afu_reset(afu))) + goto err; + + rc = count; +err: + mutex_unlock(&afu->contexts_lock); + return rc; +} + +static ssize_t irqs_min_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", afu->pp_irqs); +} + +static ssize_t irqs_max_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", afu->irqs_max); +} + +static ssize_t irqs_max_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl_afu *afu = to_cxl_afu(device); + ssize_t ret; + int irqs_max; + + ret = sscanf(buf, "%i", &irqs_max); + if (ret != 1) + return -EINVAL; + + if (irqs_max < afu->pp_irqs) + return -EINVAL; + + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (irqs_max > afu->adapter->user_irqs) + return -EINVAL; + } else { + /* pHyp sets a per-AFU limit */ + if (irqs_max > afu->guest->max_ints) + return -EINVAL; + } + + afu->irqs_max = irqs_max; + return count; +} + +static ssize_t modes_supported_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + char *p = buf, *end = buf + PAGE_SIZE; + + if (afu->modes_supported & CXL_MODE_DEDICATED) + p += scnprintf(p, end - p, "dedicated_process\n"); + if (afu->modes_supported & CXL_MODE_DIRECTED) + p += scnprintf(p, end - p, "afu_directed\n"); + return (p - buf); +} + +static ssize_t prefault_mode_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + + switch (afu->prefault_mode) { + case CXL_PREFAULT_WED: + return scnprintf(buf, PAGE_SIZE, "work_element_descriptor\n"); + case CXL_PREFAULT_ALL: + return scnprintf(buf, PAGE_SIZE, "all\n"); + default: + return scnprintf(buf, PAGE_SIZE, "none\n"); + } +} + +static ssize_t prefault_mode_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl_afu *afu = to_cxl_afu(device); + enum prefault_modes mode = -1; + + if (!strncmp(buf, "none", 4)) + mode = CXL_PREFAULT_NONE; + else { + if (!radix_enabled()) { + + /* only allowed when not in radix mode */ + if (!strncmp(buf, "work_element_descriptor", 23)) + mode = CXL_PREFAULT_WED; + if (!strncmp(buf, "all", 3)) + mode = CXL_PREFAULT_ALL; + } else { + dev_err(device, "Cannot prefault with radix enabled\n"); + } + } + + if (mode == -1) + return -EINVAL; + + afu->prefault_mode = mode; + return count; +} + +static ssize_t mode_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl_afu *afu = to_cxl_afu(device); + + if (afu->current_mode == CXL_MODE_DEDICATED) + return scnprintf(buf, PAGE_SIZE, "dedicated_process\n"); + if (afu->current_mode == CXL_MODE_DIRECTED) + return scnprintf(buf, PAGE_SIZE, "afu_directed\n"); + return scnprintf(buf, PAGE_SIZE, "none\n"); +} + +static ssize_t mode_store(struct device *device, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl_afu *afu = to_cxl_afu(device); + int old_mode, mode = -1; + int rc = -EBUSY; + + /* can't change this if we have a user */ + mutex_lock(&afu->contexts_lock); + if (!idr_is_empty(&afu->contexts_idr)) + goto err; + + if (!strncmp(buf, "dedicated_process", 17)) + mode = CXL_MODE_DEDICATED; + if (!strncmp(buf, "afu_directed", 12)) + mode = CXL_MODE_DIRECTED; + if (!strncmp(buf, "none", 4)) + mode = 0; + + if (mode == -1) { + rc = -EINVAL; + goto err; + } + + /* + * afu_deactivate_mode needs to be done outside the lock, prevent + * other contexts coming in before we are ready: + */ + old_mode = afu->current_mode; + afu->current_mode = 0; + afu->num_procs = 0; + + mutex_unlock(&afu->contexts_lock); + + if ((rc = cxl_ops->afu_deactivate_mode(afu, old_mode))) + return rc; + if ((rc = cxl_ops->afu_activate_mode(afu, mode))) + return rc; + + return count; +err: + mutex_unlock(&afu->contexts_lock); + return rc; +} + +static ssize_t api_version_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION); +} + +static ssize_t api_version_compatible_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION_COMPATIBLE); +} + +static ssize_t afu_eb_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct cxl_afu *afu = to_cxl_afu(kobj_to_dev(kobj)); + + return cxl_ops->afu_read_err_buffer(afu, buf, off, count); +} + +static struct device_attribute afu_attrs[] = { + __ATTR_RO(mmio_size), + __ATTR_RO(irqs_min), + __ATTR_RW(irqs_max), + __ATTR_RO(modes_supported), + __ATTR_RW(mode), + __ATTR_RW(prefault_mode), + __ATTR_RO(api_version), + __ATTR_RO(api_version_compatible), + __ATTR(reset, S_IWUSR, NULL, reset_store_afu), +}; + +int cxl_sysfs_adapter_add(struct cxl *adapter) +{ + struct device_attribute *dev_attr; + int i, rc; + + for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) { + dev_attr = &adapter_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_ADAPTER_ATTRS)) { + if ((rc = device_create_file(&adapter->dev, dev_attr))) + goto err; + } + } + return 0; +err: + for (i--; i >= 0; i--) { + dev_attr = &adapter_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_ADAPTER_ATTRS)) + device_remove_file(&adapter->dev, dev_attr); + } + return rc; +} + +void cxl_sysfs_adapter_remove(struct cxl *adapter) +{ + struct device_attribute *dev_attr; + int i; + + for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) { + dev_attr = &adapter_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_ADAPTER_ATTRS)) + device_remove_file(&adapter->dev, dev_attr); + } +} + +struct afu_config_record { + struct kobject kobj; + struct bin_attribute config_attr; + struct list_head list; + int cr; + u16 device; + u16 vendor; + u32 class; +}; + +#define to_cr(obj) container_of(obj, struct afu_config_record, kobj) + +static ssize_t vendor_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct afu_config_record *cr = to_cr(kobj); + + return scnprintf(buf, PAGE_SIZE, "0x%.4x\n", cr->vendor); +} + +static ssize_t device_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct afu_config_record *cr = to_cr(kobj); + + return scnprintf(buf, PAGE_SIZE, "0x%.4x\n", cr->device); +} + +static ssize_t class_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct afu_config_record *cr = to_cr(kobj); + + return scnprintf(buf, PAGE_SIZE, "0x%.6x\n", cr->class); +} + +static ssize_t afu_read_config(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct afu_config_record *cr = to_cr(kobj); + struct cxl_afu *afu = to_cxl_afu(kobj_to_dev(kobj->parent)); + + u64 i, j, val, rc; + + for (i = 0; i < count;) { + rc = cxl_ops->afu_cr_read64(afu, cr->cr, off & ~0x7, &val); + if (rc) + val = ~0ULL; + for (j = off & 0x7; j < 8 && i < count; i++, j++, off++) + buf[i] = (val >> (j * 8)) & 0xff; + } + + return count; +} + +static struct kobj_attribute vendor_attribute = + __ATTR_RO(vendor); +static struct kobj_attribute device_attribute = + __ATTR_RO(device); +static struct kobj_attribute class_attribute = + __ATTR_RO(class); + +static struct attribute *afu_cr_attrs[] = { + &vendor_attribute.attr, + &device_attribute.attr, + &class_attribute.attr, + NULL, +}; + +static void release_afu_config_record(struct kobject *kobj) +{ + struct afu_config_record *cr = to_cr(kobj); + + kfree(cr); +} + +static struct kobj_type afu_config_record_type = { + .sysfs_ops = &kobj_sysfs_ops, + .release = release_afu_config_record, + .default_attrs = afu_cr_attrs, +}; + +static struct afu_config_record *cxl_sysfs_afu_new_cr(struct cxl_afu *afu, int cr_idx) +{ + struct afu_config_record *cr; + int rc; + + cr = kzalloc(sizeof(struct afu_config_record), GFP_KERNEL); + if (!cr) + return ERR_PTR(-ENOMEM); + + cr->cr = cr_idx; + + rc = cxl_ops->afu_cr_read16(afu, cr_idx, PCI_DEVICE_ID, &cr->device); + if (rc) + goto err; + rc = cxl_ops->afu_cr_read16(afu, cr_idx, PCI_VENDOR_ID, &cr->vendor); + if (rc) + goto err; + rc = cxl_ops->afu_cr_read32(afu, cr_idx, PCI_CLASS_REVISION, &cr->class); + if (rc) + goto err; + cr->class >>= 8; + + /* + * Export raw AFU PCIe like config record. For now this is read only by + * root - we can expand that later to be readable by non-root and maybe + * even writable provided we have a good use-case. Once we support + * exposing AFUs through a virtual PHB they will get that for free from + * Linux' PCI infrastructure, but until then it's not clear that we + * need it for anything since the main use case is just identifying + * AFUs, which can be done via the vendor, device and class attributes. + */ + sysfs_bin_attr_init(&cr->config_attr); + cr->config_attr.attr.name = "config"; + cr->config_attr.attr.mode = S_IRUSR; + cr->config_attr.size = afu->crs_len; + cr->config_attr.read = afu_read_config; + + rc = kobject_init_and_add(&cr->kobj, &afu_config_record_type, + &afu->dev.kobj, "cr%i", cr->cr); + if (rc) + goto err1; + + rc = sysfs_create_bin_file(&cr->kobj, &cr->config_attr); + if (rc) + goto err1; + + rc = kobject_uevent(&cr->kobj, KOBJ_ADD); + if (rc) + goto err2; + + return cr; +err2: + sysfs_remove_bin_file(&cr->kobj, &cr->config_attr); +err1: + kobject_put(&cr->kobj); + return ERR_PTR(rc); +err: + kfree(cr); + return ERR_PTR(rc); +} + +void cxl_sysfs_afu_remove(struct cxl_afu *afu) +{ + struct device_attribute *dev_attr; + struct afu_config_record *cr, *tmp; + int i; + + /* remove the err buffer bin attribute */ + if (afu->eb_len) + device_remove_bin_file(&afu->dev, &afu->attr_eb); + + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) { + dev_attr = &afu_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_ATTRS)) + device_remove_file(&afu->dev, &afu_attrs[i]); + } + + list_for_each_entry_safe(cr, tmp, &afu->crs, list) { + sysfs_remove_bin_file(&cr->kobj, &cr->config_attr); + kobject_put(&cr->kobj); + } +} + +int cxl_sysfs_afu_add(struct cxl_afu *afu) +{ + struct device_attribute *dev_attr; + struct afu_config_record *cr; + int i, rc; + + INIT_LIST_HEAD(&afu->crs); + + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) { + dev_attr = &afu_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_ATTRS)) { + if ((rc = device_create_file(&afu->dev, &afu_attrs[i]))) + goto err; + } + } + + /* conditionally create the add the binary file for error info buffer */ + if (afu->eb_len) { + sysfs_attr_init(&afu->attr_eb.attr); + + afu->attr_eb.attr.name = "afu_err_buff"; + afu->attr_eb.attr.mode = S_IRUGO; + afu->attr_eb.size = afu->eb_len; + afu->attr_eb.read = afu_eb_read; + + rc = device_create_bin_file(&afu->dev, &afu->attr_eb); + if (rc) { + dev_err(&afu->dev, + "Unable to create eb attr for the afu. Err(%d)\n", + rc); + goto err; + } + } + + for (i = 0; i < afu->crs_num; i++) { + cr = cxl_sysfs_afu_new_cr(afu, i); + if (IS_ERR(cr)) { + rc = PTR_ERR(cr); + goto err1; + } + list_add(&cr->list, &afu->crs); + } + + return 0; + +err1: + cxl_sysfs_afu_remove(afu); + return rc; +err: + /* reset the eb_len as we havent created the bin attr */ + afu->eb_len = 0; + + for (i--; i >= 0; i--) { + dev_attr = &afu_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_ATTRS)) + device_remove_file(&afu->dev, &afu_attrs[i]); + } + return rc; +} + +int cxl_sysfs_afu_m_add(struct cxl_afu *afu) +{ + struct device_attribute *dev_attr; + int i, rc; + + for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) { + dev_attr = &afu_master_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_MASTER_ATTRS)) { + if ((rc = device_create_file(afu->chardev_m, &afu_master_attrs[i]))) + goto err; + } + } + + return 0; + +err: + for (i--; i >= 0; i--) { + dev_attr = &afu_master_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_MASTER_ATTRS)) + device_remove_file(afu->chardev_m, &afu_master_attrs[i]); + } + return rc; +} + +void cxl_sysfs_afu_m_remove(struct cxl_afu *afu) +{ + struct device_attribute *dev_attr; + int i; + + for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) { + dev_attr = &afu_master_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_MASTER_ATTRS)) + device_remove_file(afu->chardev_m, &afu_master_attrs[i]); + } +} diff --git a/drivers/misc/cxl/trace.c b/drivers/misc/cxl/trace.c new file mode 100644 index 000000000..c2b06d319 --- /dev/null +++ b/drivers/misc/cxl/trace.c @@ -0,0 +1,13 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "trace.h" +#endif diff --git a/drivers/misc/cxl/trace.h b/drivers/misc/cxl/trace.h new file mode 100644 index 000000000..b8e300af0 --- /dev/null +++ b/drivers/misc/cxl/trace.h @@ -0,0 +1,695 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cxl + +#if !defined(_CXL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _CXL_TRACE_H + +#include + +#include "cxl.h" + +#define dsisr_psl9_flags(flags) \ + __print_flags(flags, "|", \ + { CXL_PSL9_DSISR_An_CO_MASK, "FR" }, \ + { CXL_PSL9_DSISR_An_TF, "TF" }, \ + { CXL_PSL9_DSISR_An_PE, "PE" }, \ + { CXL_PSL9_DSISR_An_AE, "AE" }, \ + { CXL_PSL9_DSISR_An_OC, "OC" }, \ + { CXL_PSL9_DSISR_An_S, "S" }) + +#define DSISR_FLAGS \ + { CXL_PSL_DSISR_An_DS, "DS" }, \ + { CXL_PSL_DSISR_An_DM, "DM" }, \ + { CXL_PSL_DSISR_An_ST, "ST" }, \ + { CXL_PSL_DSISR_An_UR, "UR" }, \ + { CXL_PSL_DSISR_An_PE, "PE" }, \ + { CXL_PSL_DSISR_An_AE, "AE" }, \ + { CXL_PSL_DSISR_An_OC, "OC" }, \ + { CXL_PSL_DSISR_An_M, "M" }, \ + { CXL_PSL_DSISR_An_P, "P" }, \ + { CXL_PSL_DSISR_An_A, "A" }, \ + { CXL_PSL_DSISR_An_S, "S" }, \ + { CXL_PSL_DSISR_An_K, "K" } + +#define TFC_FLAGS \ + { CXL_PSL_TFC_An_A, "A" }, \ + { CXL_PSL_TFC_An_C, "C" }, \ + { CXL_PSL_TFC_An_AE, "AE" }, \ + { CXL_PSL_TFC_An_R, "R" } + +#define LLCMD_NAMES \ + { CXL_SPA_SW_CMD_TERMINATE, "TERMINATE" }, \ + { CXL_SPA_SW_CMD_REMOVE, "REMOVE" }, \ + { CXL_SPA_SW_CMD_SUSPEND, "SUSPEND" }, \ + { CXL_SPA_SW_CMD_RESUME, "RESUME" }, \ + { CXL_SPA_SW_CMD_ADD, "ADD" }, \ + { CXL_SPA_SW_CMD_UPDATE, "UPDATE" } + +#define AFU_COMMANDS \ + { 0, "DISABLE" }, \ + { CXL_AFU_Cntl_An_E, "ENABLE" }, \ + { CXL_AFU_Cntl_An_RA, "RESET" } + +#define PSL_COMMANDS \ + { CXL_PSL_SCNTL_An_Pc, "PURGE" }, \ + { CXL_PSL_SCNTL_An_Sc, "SUSPEND" } + + +DECLARE_EVENT_CLASS(cxl_pe_class, + TP_PROTO(struct cxl_context *ctx), + + TP_ARGS(ctx), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + ), + + TP_printk("afu%i.%i pe=%i", + __entry->card, + __entry->afu, + __entry->pe + ) +); + + +TRACE_EVENT(cxl_attach, + TP_PROTO(struct cxl_context *ctx, u64 wed, s16 num_interrupts, u64 amr), + + TP_ARGS(ctx, wed, num_interrupts, amr), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(pid_t, pid) + __field(u64, wed) + __field(u64, amr) + __field(s16, num_interrupts) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->pid = pid_nr(ctx->pid); + __entry->wed = wed; + __entry->amr = amr; + __entry->num_interrupts = num_interrupts; + ), + + TP_printk("afu%i.%i pid=%i pe=%i wed=0x%016llx irqs=%i amr=0x%llx", + __entry->card, + __entry->afu, + __entry->pid, + __entry->pe, + __entry->wed, + __entry->num_interrupts, + __entry->amr + ) +); + +DEFINE_EVENT(cxl_pe_class, cxl_detach, + TP_PROTO(struct cxl_context *ctx), + TP_ARGS(ctx) +); + +TRACE_EVENT(cxl_afu_irq, + TP_PROTO(struct cxl_context *ctx, int afu_irq, int virq, irq_hw_number_t hwirq), + + TP_ARGS(ctx, afu_irq, virq, hwirq), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u16, afu_irq) + __field(int, virq) + __field(irq_hw_number_t, hwirq) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->afu_irq = afu_irq; + __entry->virq = virq; + __entry->hwirq = hwirq; + ), + + TP_printk("afu%i.%i pe=%i afu_irq=%i virq=%i hwirq=0x%lx", + __entry->card, + __entry->afu, + __entry->pe, + __entry->afu_irq, + __entry->virq, + __entry->hwirq + ) +); + +TRACE_EVENT(cxl_psl9_irq, + TP_PROTO(struct cxl_context *ctx, int irq, u64 dsisr, u64 dar), + + TP_ARGS(ctx, irq, dsisr, dar), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(int, irq) + __field(u64, dsisr) + __field(u64, dar) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->irq = irq; + __entry->dsisr = dsisr; + __entry->dar = dar; + ), + + TP_printk("afu%i.%i pe=%i irq=%i dsisr=0x%016llx dsisr=%s dar=0x%016llx", + __entry->card, + __entry->afu, + __entry->pe, + __entry->irq, + __entry->dsisr, + dsisr_psl9_flags(__entry->dsisr), + __entry->dar + ) +); + +TRACE_EVENT(cxl_psl_irq, + TP_PROTO(struct cxl_context *ctx, int irq, u64 dsisr, u64 dar), + + TP_ARGS(ctx, irq, dsisr, dar), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(int, irq) + __field(u64, dsisr) + __field(u64, dar) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->irq = irq; + __entry->dsisr = dsisr; + __entry->dar = dar; + ), + + TP_printk("afu%i.%i pe=%i irq=%i dsisr=%s dar=0x%016llx", + __entry->card, + __entry->afu, + __entry->pe, + __entry->irq, + __print_flags(__entry->dsisr, "|", DSISR_FLAGS), + __entry->dar + ) +); + +TRACE_EVENT(cxl_psl_irq_ack, + TP_PROTO(struct cxl_context *ctx, u64 tfc), + + TP_ARGS(ctx, tfc), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u64, tfc) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->tfc = tfc; + ), + + TP_printk("afu%i.%i pe=%i tfc=%s", + __entry->card, + __entry->afu, + __entry->pe, + __print_flags(__entry->tfc, "|", TFC_FLAGS) + ) +); + +TRACE_EVENT(cxl_ste_miss, + TP_PROTO(struct cxl_context *ctx, u64 dar), + + TP_ARGS(ctx, dar), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u64, dar) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->dar = dar; + ), + + TP_printk("afu%i.%i pe=%i dar=0x%016llx", + __entry->card, + __entry->afu, + __entry->pe, + __entry->dar + ) +); + +TRACE_EVENT(cxl_ste_write, + TP_PROTO(struct cxl_context *ctx, unsigned int idx, u64 e, u64 v), + + TP_ARGS(ctx, idx, e, v), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(unsigned int, idx) + __field(u64, e) + __field(u64, v) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->idx = idx; + __entry->e = e; + __entry->v = v; + ), + + TP_printk("afu%i.%i pe=%i SSTE[%i] E=0x%016llx V=0x%016llx", + __entry->card, + __entry->afu, + __entry->pe, + __entry->idx, + __entry->e, + __entry->v + ) +); + +TRACE_EVENT(cxl_pte_miss, + TP_PROTO(struct cxl_context *ctx, u64 dsisr, u64 dar), + + TP_ARGS(ctx, dsisr, dar), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u64, dsisr) + __field(u64, dar) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->dsisr = dsisr; + __entry->dar = dar; + ), + + TP_printk("afu%i.%i pe=%i dsisr=%s dar=0x%016llx", + __entry->card, + __entry->afu, + __entry->pe, + __print_flags(__entry->dsisr, "|", DSISR_FLAGS), + __entry->dar + ) +); + +TRACE_EVENT(cxl_llcmd, + TP_PROTO(struct cxl_context *ctx, u64 cmd), + + TP_ARGS(ctx, cmd), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u64, cmd) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->cmd = cmd; + ), + + TP_printk("afu%i.%i pe=%i cmd=%s", + __entry->card, + __entry->afu, + __entry->pe, + __print_symbolic_u64(__entry->cmd, LLCMD_NAMES) + ) +); + +TRACE_EVENT(cxl_llcmd_done, + TP_PROTO(struct cxl_context *ctx, u64 cmd, int rc), + + TP_ARGS(ctx, cmd, rc), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u64, cmd) + __field(int, rc) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->rc = rc; + __entry->cmd = cmd; + ), + + TP_printk("afu%i.%i pe=%i cmd=%s rc=%i", + __entry->card, + __entry->afu, + __entry->pe, + __print_symbolic_u64(__entry->cmd, LLCMD_NAMES), + __entry->rc + ) +); + +DECLARE_EVENT_CLASS(cxl_afu_psl_ctrl, + TP_PROTO(struct cxl_afu *afu, u64 cmd), + + TP_ARGS(afu, cmd), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u64, cmd) + ), + + TP_fast_assign( + __entry->card = afu->adapter->adapter_num; + __entry->afu = afu->slice; + __entry->cmd = cmd; + ), + + TP_printk("afu%i.%i cmd=%s", + __entry->card, + __entry->afu, + __print_symbolic_u64(__entry->cmd, AFU_COMMANDS) + ) +); + +DECLARE_EVENT_CLASS(cxl_afu_psl_ctrl_done, + TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc), + + TP_ARGS(afu, cmd, rc), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u64, cmd) + __field(int, rc) + ), + + TP_fast_assign( + __entry->card = afu->adapter->adapter_num; + __entry->afu = afu->slice; + __entry->rc = rc; + __entry->cmd = cmd; + ), + + TP_printk("afu%i.%i cmd=%s rc=%i", + __entry->card, + __entry->afu, + __print_symbolic_u64(__entry->cmd, AFU_COMMANDS), + __entry->rc + ) +); + +DEFINE_EVENT(cxl_afu_psl_ctrl, cxl_afu_ctrl, + TP_PROTO(struct cxl_afu *afu, u64 cmd), + TP_ARGS(afu, cmd) +); + +DEFINE_EVENT(cxl_afu_psl_ctrl_done, cxl_afu_ctrl_done, + TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc), + TP_ARGS(afu, cmd, rc) +); + +DEFINE_EVENT_PRINT(cxl_afu_psl_ctrl, cxl_psl_ctrl, + TP_PROTO(struct cxl_afu *afu, u64 cmd), + TP_ARGS(afu, cmd), + + TP_printk("psl%i.%i cmd=%s", + __entry->card, + __entry->afu, + __print_symbolic_u64(__entry->cmd, PSL_COMMANDS) + ) +); + +DEFINE_EVENT_PRINT(cxl_afu_psl_ctrl_done, cxl_psl_ctrl_done, + TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc), + TP_ARGS(afu, cmd, rc), + + TP_printk("psl%i.%i cmd=%s rc=%i", + __entry->card, + __entry->afu, + __print_symbolic_u64(__entry->cmd, PSL_COMMANDS), + __entry->rc + ) +); + +DEFINE_EVENT(cxl_pe_class, cxl_slbia, + TP_PROTO(struct cxl_context *ctx), + TP_ARGS(ctx) +); + +TRACE_EVENT(cxl_hcall, + TP_PROTO(u64 unit_address, u64 process_token, long rc), + + TP_ARGS(unit_address, process_token, rc), + + TP_STRUCT__entry( + __field(u64, unit_address) + __field(u64, process_token) + __field(long, rc) + ), + + TP_fast_assign( + __entry->unit_address = unit_address; + __entry->process_token = process_token; + __entry->rc = rc; + ), + + TP_printk("unit_address=0x%016llx process_token=0x%016llx rc=%li", + __entry->unit_address, + __entry->process_token, + __entry->rc + ) +); + +TRACE_EVENT(cxl_hcall_control, + TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3, + u64 p4, unsigned long r4, long rc), + + TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc), + + TP_STRUCT__entry( + __field(u64, unit_address) + __field(char *, fct) + __field(u64, p1) + __field(u64, p2) + __field(u64, p3) + __field(u64, p4) + __field(unsigned long, r4) + __field(long, rc) + ), + + TP_fast_assign( + __entry->unit_address = unit_address; + __entry->fct = fct; + __entry->p1 = p1; + __entry->p2 = p2; + __entry->p3 = p3; + __entry->p4 = p4; + __entry->r4 = r4; + __entry->rc = rc; + ), + + TP_printk("unit_address=%#.16llx %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li", + __entry->unit_address, + __entry->fct, + __entry->p1, + __entry->p2, + __entry->p3, + __entry->p4, + __entry->r4, + __entry->rc + ) +); + +TRACE_EVENT(cxl_hcall_attach, + TP_PROTO(u64 unit_address, u64 phys_addr, unsigned long process_token, + unsigned long mmio_addr, unsigned long mmio_size, long rc), + + TP_ARGS(unit_address, phys_addr, process_token, + mmio_addr, mmio_size, rc), + + TP_STRUCT__entry( + __field(u64, unit_address) + __field(u64, phys_addr) + __field(unsigned long, process_token) + __field(unsigned long, mmio_addr) + __field(unsigned long, mmio_size) + __field(long, rc) + ), + + TP_fast_assign( + __entry->unit_address = unit_address; + __entry->phys_addr = phys_addr; + __entry->process_token = process_token; + __entry->mmio_addr = mmio_addr; + __entry->mmio_size = mmio_size; + __entry->rc = rc; + ), + + TP_printk("unit_address=0x%016llx phys_addr=0x%016llx " + "token=0x%.8lx mmio_addr=0x%lx mmio_size=0x%lx rc=%li", + __entry->unit_address, + __entry->phys_addr, + __entry->process_token, + __entry->mmio_addr, + __entry->mmio_size, + __entry->rc + ) +); + +DEFINE_EVENT(cxl_hcall, cxl_hcall_detach, + TP_PROTO(u64 unit_address, u64 process_token, long rc), + TP_ARGS(unit_address, process_token, rc) +); + +DEFINE_EVENT(cxl_hcall_control, cxl_hcall_control_function, + TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3, + u64 p4, unsigned long r4, long rc), + TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc) +); + +DEFINE_EVENT(cxl_hcall, cxl_hcall_collect_int_info, + TP_PROTO(u64 unit_address, u64 process_token, long rc), + TP_ARGS(unit_address, process_token, rc) +); + +TRACE_EVENT(cxl_hcall_control_faults, + TP_PROTO(u64 unit_address, u64 process_token, + u64 control_mask, u64 reset_mask, unsigned long r4, + long rc), + + TP_ARGS(unit_address, process_token, + control_mask, reset_mask, r4, rc), + + TP_STRUCT__entry( + __field(u64, unit_address) + __field(u64, process_token) + __field(u64, control_mask) + __field(u64, reset_mask) + __field(unsigned long, r4) + __field(long, rc) + ), + + TP_fast_assign( + __entry->unit_address = unit_address; + __entry->process_token = process_token; + __entry->control_mask = control_mask; + __entry->reset_mask = reset_mask; + __entry->r4 = r4; + __entry->rc = rc; + ), + + TP_printk("unit_address=0x%016llx process_token=0x%llx " + "control_mask=%#llx reset_mask=%#llx r4=%#lx rc=%li", + __entry->unit_address, + __entry->process_token, + __entry->control_mask, + __entry->reset_mask, + __entry->r4, + __entry->rc + ) +); + +DEFINE_EVENT(cxl_hcall_control, cxl_hcall_control_facility, + TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3, + u64 p4, unsigned long r4, long rc), + TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc) +); + +TRACE_EVENT(cxl_hcall_download_facility, + TP_PROTO(u64 unit_address, char *fct, u64 list_address, u64 num, + unsigned long r4, long rc), + + TP_ARGS(unit_address, fct, list_address, num, r4, rc), + + TP_STRUCT__entry( + __field(u64, unit_address) + __field(char *, fct) + __field(u64, list_address) + __field(u64, num) + __field(unsigned long, r4) + __field(long, rc) + ), + + TP_fast_assign( + __entry->unit_address = unit_address; + __entry->fct = fct; + __entry->list_address = list_address; + __entry->num = num; + __entry->r4 = r4; + __entry->rc = rc; + ), + + TP_printk("%#.16llx, %s(%#llx, %#llx), %#lx): %li", + __entry->unit_address, + __entry->fct, + __entry->list_address, + __entry->num, + __entry->r4, + __entry->rc + ) +); + +#endif /* _CXL_TRACE_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace +#include diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c new file mode 100644 index 000000000..7908633d9 --- /dev/null +++ b/drivers/misc/cxl/vphb.c @@ -0,0 +1,333 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include "cxl.h" + +static int cxl_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) +{ + if (dma_mask < DMA_BIT_MASK(64)) { + pr_info("%s only 64bit DMA supported on CXL", __func__); + return -EIO; + } + + *(pdev->dev.dma_mask) = dma_mask; + return 0; +} + +static int cxl_pci_probe_mode(struct pci_bus *bus) +{ + return PCI_PROBE_NORMAL; +} + +static int cxl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + return -ENODEV; +} + +static void cxl_teardown_msi_irqs(struct pci_dev *pdev) +{ + /* + * MSI should never be set but need still need to provide this call + * back. + */ +} + +static bool cxl_pci_enable_device_hook(struct pci_dev *dev) +{ + struct pci_controller *phb; + struct cxl_afu *afu; + struct cxl_context *ctx; + + phb = pci_bus_to_host(dev->bus); + afu = (struct cxl_afu *)phb->private_data; + + if (!cxl_ops->link_ok(afu->adapter, afu)) { + dev_warn(&dev->dev, "%s: Device link is down, refusing to enable AFU\n", __func__); + return false; + } + + set_dma_ops(&dev->dev, &dma_nommu_ops); + set_dma_offset(&dev->dev, PAGE_OFFSET); + + /* + * Allocate a context to do cxl things too. If we eventually do real + * DMA ops, we'll need a default context to attach them to + */ + ctx = cxl_dev_context_init(dev); + if (IS_ERR(ctx)) + return false; + dev->dev.archdata.cxl_ctx = ctx; + + return (cxl_ops->afu_check_and_enable(afu) == 0); +} + +static void cxl_pci_disable_device(struct pci_dev *dev) +{ + struct cxl_context *ctx = cxl_get_context(dev); + + if (ctx) { + if (ctx->status == STARTED) { + dev_err(&dev->dev, "Default context started\n"); + return; + } + dev->dev.archdata.cxl_ctx = NULL; + cxl_release_context(ctx); + } +} + +static resource_size_t cxl_pci_window_alignment(struct pci_bus *bus, + unsigned long type) +{ + return 1; +} + +static void cxl_pci_reset_secondary_bus(struct pci_dev *dev) +{ + /* Should we do an AFU reset here ? */ +} + +static int cxl_pcie_cfg_record(u8 bus, u8 devfn) +{ + return (bus << 8) + devfn; +} + +static inline struct cxl_afu *pci_bus_to_afu(struct pci_bus *bus) +{ + struct pci_controller *phb = bus ? pci_bus_to_host(bus) : NULL; + + return phb ? phb->private_data : NULL; +} + +static void cxl_afu_configured_put(struct cxl_afu *afu) +{ + atomic_dec_if_positive(&afu->configured_state); +} + +static bool cxl_afu_configured_get(struct cxl_afu *afu) +{ + return atomic_inc_unless_negative(&afu->configured_state); +} + +static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, + struct cxl_afu *afu, int *_record) +{ + int record; + + record = cxl_pcie_cfg_record(bus->number, devfn); + if (record > afu->crs_num) + return PCIBIOS_DEVICE_NOT_FOUND; + + *_record = record; + return 0; +} + +static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + int rc, record; + struct cxl_afu *afu; + u8 val8; + u16 val16; + u32 val32; + + afu = pci_bus_to_afu(bus); + /* Grab a reader lock on afu. */ + if (afu == NULL || !cxl_afu_configured_get(afu)) + return PCIBIOS_DEVICE_NOT_FOUND; + + rc = cxl_pcie_config_info(bus, devfn, afu, &record); + if (rc) + goto out; + + switch (len) { + case 1: + rc = cxl_ops->afu_cr_read8(afu, record, offset, &val8); + *val = val8; + break; + case 2: + rc = cxl_ops->afu_cr_read16(afu, record, offset, &val16); + *val = val16; + break; + case 4: + rc = cxl_ops->afu_cr_read32(afu, record, offset, &val32); + *val = val32; + break; + default: + WARN_ON(1); + } + +out: + cxl_afu_configured_put(afu); + return rc ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL; +} + +static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + int rc, record; + struct cxl_afu *afu; + + afu = pci_bus_to_afu(bus); + /* Grab a reader lock on afu. */ + if (afu == NULL || !cxl_afu_configured_get(afu)) + return PCIBIOS_DEVICE_NOT_FOUND; + + rc = cxl_pcie_config_info(bus, devfn, afu, &record); + if (rc) + goto out; + + switch (len) { + case 1: + rc = cxl_ops->afu_cr_write8(afu, record, offset, val & 0xff); + break; + case 2: + rc = cxl_ops->afu_cr_write16(afu, record, offset, val & 0xffff); + break; + case 4: + rc = cxl_ops->afu_cr_write32(afu, record, offset, val); + break; + default: + WARN_ON(1); + } + +out: + cxl_afu_configured_put(afu); + return rc ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops cxl_pcie_pci_ops = +{ + .read = cxl_pcie_read_config, + .write = cxl_pcie_write_config, +}; + + +static struct pci_controller_ops cxl_pci_controller_ops = +{ + .probe_mode = cxl_pci_probe_mode, + .enable_device_hook = cxl_pci_enable_device_hook, + .disable_device = cxl_pci_disable_device, + .release_device = cxl_pci_disable_device, + .window_alignment = cxl_pci_window_alignment, + .reset_secondary_bus = cxl_pci_reset_secondary_bus, + .setup_msi_irqs = cxl_setup_msi_irqs, + .teardown_msi_irqs = cxl_teardown_msi_irqs, + .dma_set_mask = cxl_dma_set_mask, +}; + +int cxl_pci_vphb_add(struct cxl_afu *afu) +{ + struct pci_controller *phb; + struct device_node *vphb_dn; + struct device *parent; + + /* + * If there are no AFU configuration records we won't have anything to + * expose under the vPHB, so skip creating one, returning success since + * this is still a valid case. This will also opt us out of EEH + * handling since we won't have anything special to do if there are no + * kernel drivers attached to the vPHB, and EEH handling is not yet + * supported in the peer model. + */ + if (!afu->crs_num) + return 0; + + /* The parent device is the adapter. Reuse the device node of + * the adapter. + * We don't seem to care what device node is used for the vPHB, + * but tools such as lsvpd walk up the device parents looking + * for a valid location code, so we might as well show devices + * attached to the adapter as being located on that adapter. + */ + parent = afu->adapter->dev.parent; + vphb_dn = parent->of_node; + + /* Alloc and setup PHB data structure */ + phb = pcibios_alloc_controller(vphb_dn); + if (!phb) + return -ENODEV; + + /* Setup parent in sysfs */ + phb->parent = parent; + + /* Setup the PHB using arch provided callback */ + phb->ops = &cxl_pcie_pci_ops; + phb->cfg_addr = NULL; + phb->cfg_data = NULL; + phb->private_data = afu; + phb->controller_ops = cxl_pci_controller_ops; + + /* Scan the bus */ + pcibios_scan_phb(phb); + if (phb->bus == NULL) + return -ENXIO; + + /* Set release hook on root bus */ + pci_set_host_bridge_release(to_pci_host_bridge(phb->bus->bridge), + pcibios_free_controller_deferred, + (void *) phb); + + /* Claim resources. This might need some rework as well depending + * whether we are doing probe-only or not, like assigning unassigned + * resources etc... + */ + pcibios_claim_one_bus(phb->bus); + + /* Add probed PCI devices to the device model */ + pci_bus_add_devices(phb->bus); + + afu->phb = phb; + + return 0; +} + +void cxl_pci_vphb_remove(struct cxl_afu *afu) +{ + struct pci_controller *phb; + + /* If there is no configuration record we won't have one of these */ + if (!afu || !afu->phb) + return; + + phb = afu->phb; + afu->phb = NULL; + + pci_remove_root_bus(phb->bus); + /* + * We don't free phb here - that's handled by + * pcibios_free_controller_deferred() + */ +} + +bool cxl_pci_is_vphb_device(struct pci_dev *dev) +{ + struct pci_controller *phb; + + phb = pci_bus_to_host(dev->bus); + + return (phb->ops == &cxl_pcie_pci_ops); +} + +struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev) +{ + struct pci_controller *phb; + + phb = pci_bus_to_host(dev->bus); + + return (struct cxl_afu *)phb->private_data; +} +EXPORT_SYMBOL_GPL(cxl_pci_to_afu); + +unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev) +{ + return cxl_pcie_cfg_record(dev->bus->number, dev->devfn); +} +EXPORT_SYMBOL_GPL(cxl_pci_to_cfg_record); diff --git a/drivers/misc/ds1682.c b/drivers/misc/ds1682.c new file mode 100644 index 000000000..98a921ea9 --- /dev/null +++ b/drivers/misc/ds1682.c @@ -0,0 +1,267 @@ +/* + * Dallas Semiconductor DS1682 Elapsed Time Recorder device driver + * + * Written by: Grant Likely + * + * Copyright (C) 2007 Secret Lab Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * The DS1682 elapsed timer recorder is a simple device that implements + * one elapsed time counter, one event counter, an alarm signal and 10 + * bytes of general purpose EEPROM. + * + * This driver provides access to the DS1682 counters and user data via + * the sysfs. The following attributes are added to the device node: + * elapsed_time (u32): Total elapsed event time in ms resolution + * alarm_time (u32): When elapsed time exceeds the value in alarm_time, + * then the alarm pin is asserted. + * event_count (u16): number of times the event pin has gone low. + * eeprom (u8[10]): general purpose EEPROM + * + * Counter registers and user data are both read/write unless the device + * has been write protected. This driver does not support turning off write + * protection. Once write protection is turned on, it is impossible to + * turn it off again, so I have left the feature out of this driver to avoid + * accidental enabling, but it is trivial to add write protect support. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +/* Device registers */ +#define DS1682_REG_CONFIG 0x00 +#define DS1682_REG_ALARM 0x01 +#define DS1682_REG_ELAPSED 0x05 +#define DS1682_REG_EVT_CNTR 0x09 +#define DS1682_REG_EEPROM 0x0b +#define DS1682_REG_RESET 0x1d +#define DS1682_REG_WRITE_DISABLE 0x1e +#define DS1682_REG_WRITE_MEM_DISABLE 0x1f + +#define DS1682_EEPROM_SIZE 10 + +/* + * Generic counter attributes + */ +static ssize_t ds1682_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + struct i2c_client *client = to_i2c_client(dev); + unsigned long long val, check; + __le32 val_le = 0; + int rc; + + dev_dbg(dev, "ds1682_show() called on %s\n", attr->attr.name); + + /* Read the register */ + rc = i2c_smbus_read_i2c_block_data(client, sattr->index, sattr->nr, + (u8 *)&val_le); + if (rc < 0) + return -EIO; + + val = le32_to_cpu(val_le); + + if (sattr->index == DS1682_REG_ELAPSED) { + int retries = 5; + + /* Detect and retry when a tick occurs mid-read */ + do { + rc = i2c_smbus_read_i2c_block_data(client, sattr->index, + sattr->nr, + (u8 *)&val_le); + if (rc < 0 || retries <= 0) + return -EIO; + + check = val; + val = le32_to_cpu(val_le); + retries--; + } while (val != check && val != (check + 1)); + } + + /* Format the output string and return # of bytes + * Special case: the 32 bit regs are time values with 1/4s + * resolution, scale them up to milliseconds + */ + return sprintf(buf, "%llu\n", (sattr->nr == 4) ? (val * 250) : val); +} + +static ssize_t ds1682_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + struct i2c_client *client = to_i2c_client(dev); + u64 val; + __le32 val_le; + int rc; + + dev_dbg(dev, "ds1682_store() called on %s\n", attr->attr.name); + + /* Decode input */ + rc = kstrtoull(buf, 0, &val); + if (rc < 0) { + dev_dbg(dev, "input string not a number\n"); + return -EINVAL; + } + + /* Special case: the 32 bit regs are time values with 1/4s + * resolution, scale input down to quarter-seconds */ + if (sattr->nr == 4) + do_div(val, 250); + + /* write out the value */ + val_le = cpu_to_le32(val); + rc = i2c_smbus_write_i2c_block_data(client, sattr->index, sattr->nr, + (u8 *) & val_le); + if (rc < 0) { + dev_err(dev, "register write failed; reg=0x%x, size=%i\n", + sattr->index, sattr->nr); + return -EIO; + } + + return count; +} + +/* + * Simple register attributes + */ +static SENSOR_DEVICE_ATTR_2(elapsed_time, S_IRUGO | S_IWUSR, ds1682_show, + ds1682_store, 4, DS1682_REG_ELAPSED); +static SENSOR_DEVICE_ATTR_2(alarm_time, S_IRUGO | S_IWUSR, ds1682_show, + ds1682_store, 4, DS1682_REG_ALARM); +static SENSOR_DEVICE_ATTR_2(event_count, S_IRUGO | S_IWUSR, ds1682_show, + ds1682_store, 2, DS1682_REG_EVT_CNTR); + +static const struct attribute_group ds1682_group = { + .attrs = (struct attribute *[]) { + &sensor_dev_attr_elapsed_time.dev_attr.attr, + &sensor_dev_attr_alarm_time.dev_attr.attr, + &sensor_dev_attr_event_count.dev_attr.attr, + NULL, + }, +}; + +/* + * User data attribute + */ +static ssize_t ds1682_eeprom_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + struct i2c_client *client = kobj_to_i2c_client(kobj); + int rc; + + dev_dbg(&client->dev, "ds1682_eeprom_read(p=%p, off=%lli, c=%zi)\n", + buf, off, count); + + rc = i2c_smbus_read_i2c_block_data(client, DS1682_REG_EEPROM + off, + count, buf); + if (rc < 0) + return -EIO; + + return count; +} + +static ssize_t ds1682_eeprom_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + struct i2c_client *client = kobj_to_i2c_client(kobj); + + dev_dbg(&client->dev, "ds1682_eeprom_write(p=%p, off=%lli, c=%zi)\n", + buf, off, count); + + /* Write out to the device */ + if (i2c_smbus_write_i2c_block_data(client, DS1682_REG_EEPROM + off, + count, buf) < 0) + return -EIO; + + return count; +} + +static const struct bin_attribute ds1682_eeprom_attr = { + .attr = { + .name = "eeprom", + .mode = S_IRUGO | S_IWUSR, + }, + .size = DS1682_EEPROM_SIZE, + .read = ds1682_eeprom_read, + .write = ds1682_eeprom_write, +}; + +/* + * Called when a ds1682 device is matched with this driver + */ +static int ds1682_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int rc; + + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_I2C_BLOCK)) { + dev_err(&client->dev, "i2c bus does not support the ds1682\n"); + rc = -ENODEV; + goto exit; + } + + rc = sysfs_create_group(&client->dev.kobj, &ds1682_group); + if (rc) + goto exit; + + rc = sysfs_create_bin_file(&client->dev.kobj, &ds1682_eeprom_attr); + if (rc) + goto exit_bin_attr; + + return 0; + + exit_bin_attr: + sysfs_remove_group(&client->dev.kobj, &ds1682_group); + exit: + return rc; +} + +static int ds1682_remove(struct i2c_client *client) +{ + sysfs_remove_bin_file(&client->dev.kobj, &ds1682_eeprom_attr); + sysfs_remove_group(&client->dev.kobj, &ds1682_group); + return 0; +} + +static const struct i2c_device_id ds1682_id[] = { + { "ds1682", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, ds1682_id); + +static const struct of_device_id ds1682_of_match[] = { + { .compatible = "dallas,ds1682", }, + {} +}; +MODULE_DEVICE_TABLE(of, ds1682_of_match); + +static struct i2c_driver ds1682_driver = { + .driver = { + .name = "ds1682", + .of_match_table = ds1682_of_match, + }, + .probe = ds1682_probe, + .remove = ds1682_remove, + .id_table = ds1682_id, +}; + +module_i2c_driver(ds1682_driver); + +MODULE_AUTHOR("Grant Likely "); +MODULE_DESCRIPTION("DS1682 Elapsed Time Indicator driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/dummy-irq.c b/drivers/misc/dummy-irq.c new file mode 100644 index 000000000..76a1015d5 --- /dev/null +++ b/drivers/misc/dummy-irq.c @@ -0,0 +1,64 @@ +/* + * Dummy IRQ handler driver. + * + * This module only registers itself as a handler that is specified to it + * by the 'irq' parameter. + * + * The sole purpose of this module is to help with debugging of systems on + * which spurious IRQs would happen on disabled IRQ vector. + * + * Copyright (C) 2013 Jiri Kosina + */ + +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ +#include +#include +#include + +static int irq = -1; + +static irqreturn_t dummy_interrupt(int irq, void *dev_id) +{ + static int count = 0; + + if (count == 0) { + printk(KERN_INFO "dummy-irq: interrupt occurred on IRQ %d\n", + irq); + count++; + } + + return IRQ_NONE; +} + +static int __init dummy_irq_init(void) +{ + if (irq < 0) { + printk(KERN_ERR "dummy-irq: no IRQ given. Use irq=N\n"); + return -EIO; + } + if (request_irq(irq, &dummy_interrupt, IRQF_SHARED, "dummy_irq", &irq)) { + printk(KERN_ERR "dummy-irq: cannot register IRQ %d\n", irq); + return -EIO; + } + printk(KERN_INFO "dummy-irq: registered for IRQ %d\n", irq); + return 0; +} + +static void __exit dummy_irq_exit(void) +{ + printk(KERN_INFO "dummy-irq unloaded\n"); + free_irq(irq, &irq); +} + +module_init(dummy_irq_init); +module_exit(dummy_irq_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jiri Kosina"); +module_param_hw(irq, uint, irq, 0444); +MODULE_PARM_DESC(irq, "The IRQ to register for"); +MODULE_DESCRIPTION("Dummy IRQ handler driver"); diff --git a/drivers/misc/echo/Kconfig b/drivers/misc/echo/Kconfig new file mode 100644 index 000000000..f1d41ea9c --- /dev/null +++ b/drivers/misc/echo/Kconfig @@ -0,0 +1,9 @@ +config ECHO + tristate "Line Echo Canceller support" + default n + ---help--- + This driver provides line echo cancelling support for mISDN and + Zaptel drivers. + + To compile this driver as a module, choose M here. The module + will be called echo. diff --git a/drivers/misc/echo/Makefile b/drivers/misc/echo/Makefile new file mode 100644 index 000000000..7d4caac12 --- /dev/null +++ b/drivers/misc/echo/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_ECHO) += echo.o diff --git a/drivers/misc/echo/echo.c b/drivers/misc/echo/echo.c new file mode 100644 index 000000000..3ebe5d75a --- /dev/null +++ b/drivers/misc/echo/echo.c @@ -0,0 +1,601 @@ +/* + * SpanDSP - a series of DSP components for telephony + * + * echo.c - A line echo canceller. This code is being developed + * against and partially complies with G168. + * + * Written by Steve Underwood + * and David Rowe + * + * Copyright (C) 2001, 2003 Steve Underwood, 2007 David Rowe + * + * Based on a bit from here, a bit from there, eye of toad, ear of + * bat, 15 years of failed attempts by David and a few fried brain + * cells. + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/*! \file */ + +/* Implementation Notes + David Rowe + April 2007 + + This code started life as Steve's NLMS algorithm with a tap + rotation algorithm to handle divergence during double talk. I + added a Geigel Double Talk Detector (DTD) [2] and performed some + G168 tests. However I had trouble meeting the G168 requirements, + especially for double talk - there were always cases where my DTD + failed, for example where near end speech was under the 6dB + threshold required for declaring double talk. + + So I tried a two path algorithm [1], which has so far given better + results. The original tap rotation/Geigel algorithm is available + in SVN http://svn.rowetel.com/software/oslec/tags/before_16bit. + It's probably possible to make it work if some one wants to put some + serious work into it. + + At present no special treatment is provided for tones, which + generally cause NLMS algorithms to diverge. Initial runs of a + subset of the G168 tests for tones (e.g ./echo_test 6) show the + current algorithm is passing OK, which is kind of surprising. The + full set of tests needs to be performed to confirm this result. + + One other interesting change is that I have managed to get the NLMS + code to work with 16 bit coefficients, rather than the original 32 + bit coefficents. This reduces the MIPs and storage required. + I evaulated the 16 bit port using g168_tests.sh and listening tests + on 4 real-world samples. + + I also attempted the implementation of a block based NLMS update + [2] but although this passes g168_tests.sh it didn't converge well + on the real-world samples. I have no idea why, perhaps a scaling + problem. The block based code is also available in SVN + http://svn.rowetel.com/software/oslec/tags/before_16bit. If this + code can be debugged, it will lead to further reduction in MIPS, as + the block update code maps nicely onto DSP instruction sets (it's a + dot product) compared to the current sample-by-sample update. + + Steve also has some nice notes on echo cancellers in echo.h + + References: + + [1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo + Path Models", IEEE Transactions on communications, COM-25, + No. 6, June + 1977. + http://www.rowetel.com/images/echo/dual_path_paper.pdf + + [2] The classic, very useful paper that tells you how to + actually build a real world echo canceller: + Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice + Echo Canceller with a TMS320020, + http://www.rowetel.com/images/echo/spra129.pdf + + [3] I have written a series of blog posts on this work, here is + Part 1: http://www.rowetel.com/blog/?p=18 + + [4] The source code http://svn.rowetel.com/software/oslec/ + + [5] A nice reference on LMS filters: + http://en.wikipedia.org/wiki/Least_mean_squares_filter + + Credits: + + Thanks to Steve Underwood, Jean-Marc Valin, and Ramakrishnan + Muthukrishnan for their suggestions and email discussions. Thanks + also to those people who collected echo samples for me such as + Mark, Pawel, and Pavel. +*/ + +#include +#include +#include + +#include "echo.h" + +#define MIN_TX_POWER_FOR_ADAPTION 64 +#define MIN_RX_POWER_FOR_ADAPTION 64 +#define DTD_HANGOVER 600 /* 600 samples, or 75ms */ +#define DC_LOG2BETA 3 /* log2() of DC filter Beta */ + +/* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */ + +static inline void lms_adapt_bg(struct oslec_state *ec, int clean, int shift) +{ + int i; + + int offset1; + int offset2; + int factor; + int exp; + + if (shift > 0) + factor = clean << shift; + else + factor = clean >> -shift; + + /* Update the FIR taps */ + + offset2 = ec->curr_pos; + offset1 = ec->taps - offset2; + + for (i = ec->taps - 1; i >= offset1; i--) { + exp = (ec->fir_state_bg.history[i - offset1] * factor); + ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); + } + for (; i >= 0; i--) { + exp = (ec->fir_state_bg.history[i + offset2] * factor); + ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); + } +} + +static inline int top_bit(unsigned int bits) +{ + if (bits == 0) + return -1; + else + return (int)fls((int32_t) bits) - 1; +} + +struct oslec_state *oslec_create(int len, int adaption_mode) +{ + struct oslec_state *ec; + int i; + const int16_t *history; + + ec = kzalloc(sizeof(*ec), GFP_KERNEL); + if (!ec) + return NULL; + + ec->taps = len; + ec->log2taps = top_bit(len); + ec->curr_pos = ec->taps - 1; + + ec->fir_taps16[0] = + kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); + if (!ec->fir_taps16[0]) + goto error_oom_0; + + ec->fir_taps16[1] = + kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); + if (!ec->fir_taps16[1]) + goto error_oom_1; + + history = fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps); + if (!history) + goto error_state; + history = fir16_create(&ec->fir_state_bg, ec->fir_taps16[1], ec->taps); + if (!history) + goto error_state_bg; + + for (i = 0; i < 5; i++) + ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0; + + ec->cng_level = 1000; + oslec_adaption_mode(ec, adaption_mode); + + ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); + if (!ec->snapshot) + goto error_snap; + + ec->cond_met = 0; + ec->pstates = 0; + ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0; + ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0; + ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0; + ec->lbgn = ec->lbgn_acc = 0; + ec->lbgn_upper = 200; + ec->lbgn_upper_acc = ec->lbgn_upper << 13; + + return ec; + +error_snap: + fir16_free(&ec->fir_state_bg); +error_state_bg: + fir16_free(&ec->fir_state); +error_state: + kfree(ec->fir_taps16[1]); +error_oom_1: + kfree(ec->fir_taps16[0]); +error_oom_0: + kfree(ec); + return NULL; +} +EXPORT_SYMBOL_GPL(oslec_create); + +void oslec_free(struct oslec_state *ec) +{ + int i; + + fir16_free(&ec->fir_state); + fir16_free(&ec->fir_state_bg); + for (i = 0; i < 2; i++) + kfree(ec->fir_taps16[i]); + kfree(ec->snapshot); + kfree(ec); +} +EXPORT_SYMBOL_GPL(oslec_free); + +void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode) +{ + ec->adaption_mode = adaption_mode; +} +EXPORT_SYMBOL_GPL(oslec_adaption_mode); + +void oslec_flush(struct oslec_state *ec) +{ + int i; + + ec->ltxacc = ec->lrxacc = ec->lcleanacc = ec->lclean_bgacc = 0; + ec->ltx = ec->lrx = ec->lclean = ec->lclean_bg = 0; + ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0; + + ec->lbgn = ec->lbgn_acc = 0; + ec->lbgn_upper = 200; + ec->lbgn_upper_acc = ec->lbgn_upper << 13; + + ec->nonupdate_dwell = 0; + + fir16_flush(&ec->fir_state); + fir16_flush(&ec->fir_state_bg); + ec->fir_state.curr_pos = ec->taps - 1; + ec->fir_state_bg.curr_pos = ec->taps - 1; + for (i = 0; i < 2; i++) + memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t)); + + ec->curr_pos = ec->taps - 1; + ec->pstates = 0; +} +EXPORT_SYMBOL_GPL(oslec_flush); + +void oslec_snapshot(struct oslec_state *ec) +{ + memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps * sizeof(int16_t)); +} +EXPORT_SYMBOL_GPL(oslec_snapshot); + +/* Dual Path Echo Canceller */ + +int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx) +{ + int32_t echo_value; + int clean_bg; + int tmp; + int tmp1; + + /* + * Input scaling was found be required to prevent problems when tx + * starts clipping. Another possible way to handle this would be the + * filter coefficent scaling. + */ + + ec->tx = tx; + ec->rx = rx; + tx >>= 1; + rx >>= 1; + + /* + * Filter DC, 3dB point is 160Hz (I think), note 32 bit precision + * required otherwise values do not track down to 0. Zero at DC, Pole + * at (1-Beta) on real axis. Some chip sets (like Si labs) don't + * need this, but something like a $10 X100P card does. Any DC really + * slows down convergence. + * + * Note: removes some low frequency from the signal, this reduces the + * speech quality when listening to samples through headphones but may + * not be obvious through a telephone handset. + * + * Note that the 3dB frequency in radians is approx Beta, e.g. for Beta + * = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz. + */ + + if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) { + tmp = rx << 15; + + /* + * Make sure the gain of the HPF is 1.0. This can still + * saturate a little under impulse conditions, and it might + * roll to 32768 and need clipping on sustained peak level + * signals. However, the scale of such clipping is small, and + * the error due to any saturation should not markedly affect + * the downstream processing. + */ + tmp -= (tmp >> 4); + + ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2; + + /* + * hard limit filter to prevent clipping. Note that at this + * stage rx should be limited to +/- 16383 due to right shift + * above + */ + tmp1 = ec->rx_1 >> 15; + if (tmp1 > 16383) + tmp1 = 16383; + if (tmp1 < -16383) + tmp1 = -16383; + rx = tmp1; + ec->rx_2 = tmp; + } + + /* Block average of power in the filter states. Used for + adaption power calculation. */ + + { + int new, old; + + /* efficient "out with the old and in with the new" algorithm so + we don't have to recalculate over the whole block of + samples. */ + new = (int)tx * (int)tx; + old = (int)ec->fir_state.history[ec->fir_state.curr_pos] * + (int)ec->fir_state.history[ec->fir_state.curr_pos]; + ec->pstates += + ((new - old) + (1 << (ec->log2taps - 1))) >> ec->log2taps; + if (ec->pstates < 0) + ec->pstates = 0; + } + + /* Calculate short term average levels using simple single pole IIRs */ + + ec->ltxacc += abs(tx) - ec->ltx; + ec->ltx = (ec->ltxacc + (1 << 4)) >> 5; + ec->lrxacc += abs(rx) - ec->lrx; + ec->lrx = (ec->lrxacc + (1 << 4)) >> 5; + + /* Foreground filter */ + + ec->fir_state.coeffs = ec->fir_taps16[0]; + echo_value = fir16(&ec->fir_state, tx); + ec->clean = rx - echo_value; + ec->lcleanacc += abs(ec->clean) - ec->lclean; + ec->lclean = (ec->lcleanacc + (1 << 4)) >> 5; + + /* Background filter */ + + echo_value = fir16(&ec->fir_state_bg, tx); + clean_bg = rx - echo_value; + ec->lclean_bgacc += abs(clean_bg) - ec->lclean_bg; + ec->lclean_bg = (ec->lclean_bgacc + (1 << 4)) >> 5; + + /* Background Filter adaption */ + + /* Almost always adap bg filter, just simple DT and energy + detection to minimise adaption in cases of strong double talk. + However this is not critical for the dual path algorithm. + */ + ec->factor = 0; + ec->shift = 0; + if (!ec->nonupdate_dwell) { + int p, logp, shift; + + /* Determine: + + f = Beta * clean_bg_rx/P ------ (1) + + where P is the total power in the filter states. + + The Boffins have shown that if we obey (1) we converge + quickly and avoid instability. + + The correct factor f must be in Q30, as this is the fixed + point format required by the lms_adapt_bg() function, + therefore the scaled version of (1) is: + + (2^30) * f = (2^30) * Beta * clean_bg_rx/P + factor = (2^30) * Beta * clean_bg_rx/P ----- (2) + + We have chosen Beta = 0.25 by experiment, so: + + factor = (2^30) * (2^-2) * clean_bg_rx/P + + (30 - 2 - log2(P)) + factor = clean_bg_rx 2 ----- (3) + + To avoid a divide we approximate log2(P) as top_bit(P), + which returns the position of the highest non-zero bit in + P. This approximation introduces an error as large as a + factor of 2, but the algorithm seems to handle it OK. + + Come to think of it a divide may not be a big deal on a + modern DSP, so its probably worth checking out the cycles + for a divide versus a top_bit() implementation. + */ + + p = MIN_TX_POWER_FOR_ADAPTION + ec->pstates; + logp = top_bit(p) + ec->log2taps; + shift = 30 - 2 - logp; + ec->shift = shift; + + lms_adapt_bg(ec, clean_bg, shift); + } + + /* very simple DTD to make sure we dont try and adapt with strong + near end speech */ + + ec->adapt = 0; + if ((ec->lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->lrx > ec->ltx)) + ec->nonupdate_dwell = DTD_HANGOVER; + if (ec->nonupdate_dwell) + ec->nonupdate_dwell--; + + /* Transfer logic */ + + /* These conditions are from the dual path paper [1], I messed with + them a bit to improve performance. */ + + if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) && + (ec->nonupdate_dwell == 0) && + /* (ec->Lclean_bg < 0.875*ec->Lclean) */ + (8 * ec->lclean_bg < 7 * ec->lclean) && + /* (ec->Lclean_bg < 0.125*ec->Ltx) */ + (8 * ec->lclean_bg < ec->ltx)) { + if (ec->cond_met == 6) { + /* + * BG filter has had better results for 6 consecutive + * samples + */ + ec->adapt = 1; + memcpy(ec->fir_taps16[0], ec->fir_taps16[1], + ec->taps * sizeof(int16_t)); + } else + ec->cond_met++; + } else + ec->cond_met = 0; + + /* Non-Linear Processing */ + + ec->clean_nlp = ec->clean; + if (ec->adaption_mode & ECHO_CAN_USE_NLP) { + /* + * Non-linear processor - a fancy way to say "zap small + * signals, to avoid residual echo due to (uLaw/ALaw) + * non-linearity in the channel.". + */ + + if ((16 * ec->lclean < ec->ltx)) { + /* + * Our e/c has improved echo by at least 24 dB (each + * factor of 2 is 6dB, so 2*2*2*2=16 is the same as + * 6+6+6+6=24dB) + */ + if (ec->adaption_mode & ECHO_CAN_USE_CNG) { + ec->cng_level = ec->lbgn; + + /* + * Very elementary comfort noise generation. + * Just random numbers rolled off very vaguely + * Hoth-like. DR: This noise doesn't sound + * quite right to me - I suspect there are some + * overflow issues in the filtering as it's too + * "crackly". + * TODO: debug this, maybe just play noise at + * high level or look at spectrum. + */ + + ec->cng_rndnum = + 1664525U * ec->cng_rndnum + 1013904223U; + ec->cng_filter = + ((ec->cng_rndnum & 0xFFFF) - 32768 + + 5 * ec->cng_filter) >> 3; + ec->clean_nlp = + (ec->cng_filter * ec->cng_level * 8) >> 14; + + } else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) { + /* This sounds much better than CNG */ + if (ec->clean_nlp > ec->lbgn) + ec->clean_nlp = ec->lbgn; + if (ec->clean_nlp < -ec->lbgn) + ec->clean_nlp = -ec->lbgn; + } else { + /* + * just mute the residual, doesn't sound very + * good, used mainly in G168 tests + */ + ec->clean_nlp = 0; + } + } else { + /* + * Background noise estimator. I tried a few + * algorithms here without much luck. This very simple + * one seems to work best, we just average the level + * using a slow (1 sec time const) filter if the + * current level is less than a (experimentally + * derived) constant. This means we dont include high + * level signals like near end speech. When combined + * with CNG or especially CLIP seems to work OK. + */ + if (ec->lclean < 40) { + ec->lbgn_acc += abs(ec->clean) - ec->lbgn; + ec->lbgn = (ec->lbgn_acc + (1 << 11)) >> 12; + } + } + } + + /* Roll around the taps buffer */ + if (ec->curr_pos <= 0) + ec->curr_pos = ec->taps; + ec->curr_pos--; + + if (ec->adaption_mode & ECHO_CAN_DISABLE) + ec->clean_nlp = rx; + + /* Output scaled back up again to match input scaling */ + + return (int16_t) ec->clean_nlp << 1; +} +EXPORT_SYMBOL_GPL(oslec_update); + +/* This function is separated from the echo canceller is it is usually called + as part of the tx process. See rx HP (DC blocking) filter above, it's + the same design. + + Some soft phones send speech signals with a lot of low frequency + energy, e.g. down to 20Hz. This can make the hybrid non-linear + which causes the echo canceller to fall over. This filter can help + by removing any low frequency before it gets to the tx port of the + hybrid. + + It can also help by removing and DC in the tx signal. DC is bad + for LMS algorithms. + + This is one of the classic DC removal filters, adjusted to provide + sufficient bass rolloff to meet the above requirement to protect hybrids + from things that upset them. The difference between successive samples + produces a lousy HPF, and then a suitably placed pole flattens things out. + The final result is a nicely rolled off bass end. The filtering is + implemented with extended fractional precision, which noise shapes things, + giving very clean DC removal. +*/ + +int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx) +{ + int tmp; + int tmp1; + + if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) { + tmp = tx << 15; + + /* + * Make sure the gain of the HPF is 1.0. The first can still + * saturate a little under impulse conditions, and it might + * roll to 32768 and need clipping on sustained peak level + * signals. However, the scale of such clipping is small, and + * the error due to any saturation should not markedly affect + * the downstream processing. + */ + tmp -= (tmp >> 4); + + ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2; + tmp1 = ec->tx_1 >> 15; + if (tmp1 > 32767) + tmp1 = 32767; + if (tmp1 < -32767) + tmp1 = -32767; + tx = tmp1; + ec->tx_2 = tmp; + } + + return tx; +} +EXPORT_SYMBOL_GPL(oslec_hpf_tx); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("David Rowe"); +MODULE_DESCRIPTION("Open Source Line Echo Canceller"); +MODULE_VERSION("0.3.0"); diff --git a/drivers/misc/echo/echo.h b/drivers/misc/echo/echo.h new file mode 100644 index 000000000..9b08c63e6 --- /dev/null +++ b/drivers/misc/echo/echo.h @@ -0,0 +1,187 @@ +/* + * SpanDSP - a series of DSP components for telephony + * + * echo.c - A line echo canceller. This code is being developed + * against and partially complies with G168. + * + * Written by Steve Underwood + * and David Rowe + * + * Copyright (C) 2001 Steve Underwood and 2007 David Rowe + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __ECHO_H +#define __ECHO_H + +/* +Line echo cancellation for voice + +What does it do? + +This module aims to provide G.168-2002 compliant echo cancellation, to remove +electrical echoes (e.g. from 2-4 wire hybrids) from voice calls. + +How does it work? + +The heart of the echo cancellor is FIR filter. This is adapted to match the +echo impulse response of the telephone line. It must be long enough to +adequately cover the duration of that impulse response. The signal transmitted +to the telephone line is passed through the FIR filter. Once the FIR is +properly adapted, the resulting output is an estimate of the echo signal +received from the line. This is subtracted from the received signal. The result +is an estimate of the signal which originated at the far end of the line, free +from echos of our own transmitted signal. + +The least mean squares (LMS) algorithm is attributed to Widrow and Hoff, and +was introduced in 1960. It is the commonest form of filter adaption used in +things like modem line equalisers and line echo cancellers. There it works very +well. However, it only works well for signals of constant amplitude. It works +very poorly for things like speech echo cancellation, where the signal level +varies widely. This is quite easy to fix. If the signal level is normalised - +similar to applying AGC - LMS can work as well for a signal of varying +amplitude as it does for a modem signal. This normalised least mean squares +(NLMS) algorithm is the commonest one used for speech echo cancellation. Many +other algorithms exist - e.g. RLS (essentially the same as Kalman filtering), +FAP, etc. Some perform significantly better than NLMS. However, factors such +as computational complexity and patents favour the use of NLMS. + +A simple refinement to NLMS can improve its performance with speech. NLMS tends +to adapt best to the strongest parts of a signal. If the signal is white noise, +the NLMS algorithm works very well. However, speech has more low frequency than +high frequency content. Pre-whitening (i.e. filtering the signal to flatten its +spectrum) the echo signal improves the adapt rate for speech, and ensures the +final residual signal is not heavily biased towards high frequencies. A very +low complexity filter is adequate for this, so pre-whitening adds little to the +compute requirements of the echo canceller. + +An FIR filter adapted using pre-whitened NLMS performs well, provided certain +conditions are met: + + - The transmitted signal has poor self-correlation. + - There is no signal being generated within the environment being + cancelled. + +The difficulty is that neither of these can be guaranteed. + +If the adaption is performed while transmitting noise (or something fairly +noise like, such as voice) the adaption works very well. If the adaption is +performed while transmitting something highly correlative (typically narrow +band energy such as signalling tones or DTMF), the adaption can go seriously +wrong. The reason is there is only one solution for the adaption on a near +random signal - the impulse response of the line. For a repetitive signal, +there are any number of solutions which converge the adaption, and nothing +guides the adaption to choose the generalised one. Allowing an untrained +canceller to converge on this kind of narrowband energy probably a good thing, +since at least it cancels the tones. Allowing a well converged canceller to +continue converging on such energy is just a way to ruin its generalised +adaption. A narrowband detector is needed, so adapation can be suspended at +appropriate times. + +The adaption process is based on trying to eliminate the received signal. When +there is any signal from within the environment being cancelled it may upset +the adaption process. Similarly, if the signal we are transmitting is small, +noise may dominate and disturb the adaption process. If we can ensure that the +adaption is only performed when we are transmitting a significant signal level, +and the environment is not, things will be OK. Clearly, it is easy to tell when +we are sending a significant signal. Telling, if the environment is generating +a significant signal, and doing it with sufficient speed that the adaption will +not have diverged too much more we stop it, is a little harder. + +The key problem in detecting when the environment is sourcing significant +energy is that we must do this very quickly. Given a reasonably long sample of +the received signal, there are a number of strategies which may be used to +assess whether that signal contains a strong far end component. However, by the +time that assessment is complete the far end signal will have already caused +major mis-convergence in the adaption process. An assessment algorithm is +needed which produces a fairly accurate result from a very short burst of far +end energy. + +How do I use it? + +The echo cancellor processes both the transmit and receive streams sample by +sample. The processing function is not declared inline. Unfortunately, +cancellation requires many operations per sample, so the call overhead is only +a minor burden. +*/ + +#include "fir.h" +#include "oslec.h" + +/* + G.168 echo canceller descriptor. This defines the working state for a line + echo canceller. +*/ +struct oslec_state { + int16_t tx; + int16_t rx; + int16_t clean; + int16_t clean_nlp; + + int nonupdate_dwell; + int curr_pos; + int taps; + int log2taps; + int adaption_mode; + + int cond_met; + int32_t pstates; + int16_t adapt; + int32_t factor; + int16_t shift; + + /* Average levels and averaging filter states */ + int ltxacc; + int lrxacc; + int lcleanacc; + int lclean_bgacc; + int ltx; + int lrx; + int lclean; + int lclean_bg; + int lbgn; + int lbgn_acc; + int lbgn_upper; + int lbgn_upper_acc; + + /* foreground and background filter states */ + struct fir16_state_t fir_state; + struct fir16_state_t fir_state_bg; + int16_t *fir_taps16[2]; + + /* DC blocking filter states */ + int tx_1; + int tx_2; + int rx_1; + int rx_2; + + /* optional High Pass Filter states */ + int32_t xvtx[5]; + int32_t yvtx[5]; + int32_t xvrx[5]; + int32_t yvrx[5]; + + /* Parameters for the optional Hoth noise generator */ + int cng_level; + int cng_rndnum; + int cng_filter; + + /* snapshot sample of coeffs used for development */ + int16_t *snapshot; +}; + +#endif /* __ECHO_H */ diff --git a/drivers/misc/echo/fir.h b/drivers/misc/echo/fir.h new file mode 100644 index 000000000..4e0f365f0 --- /dev/null +++ b/drivers/misc/echo/fir.h @@ -0,0 +1,166 @@ +/* + * SpanDSP - a series of DSP components for telephony + * + * fir.h - General telephony FIR routines + * + * Written by Steve Underwood + * + * Copyright (C) 2002 Steve Underwood + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#if !defined(_FIR_H_) +#define _FIR_H_ + +/* + Ideas for improvement: + + 1/ Rewrite filter for dual MAC inner loop. The issue here is handling + history sample offsets that are 16 bit aligned - the dual MAC needs + 32 bit aligmnent. There are some good examples in libbfdsp. + + 2/ Use the hardware circular buffer facility tohalve memory usage. + + 3/ Consider using internal memory. + + Using less memory might also improve speed as cache misses will be + reduced. A drop in MIPs and memory approaching 50% should be + possible. + + The foreground and background filters currenlty use a total of + about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo + can. +*/ + +/* + * 16 bit integer FIR descriptor. This defines the working state for a single + * instance of an FIR filter using 16 bit integer coefficients. + */ +struct fir16_state_t { + int taps; + int curr_pos; + const int16_t *coeffs; + int16_t *history; +}; + +/* + * 32 bit integer FIR descriptor. This defines the working state for a single + * instance of an FIR filter using 32 bit integer coefficients, and filtering + * 16 bit integer data. + */ +struct fir32_state_t { + int taps; + int curr_pos; + const int32_t *coeffs; + int16_t *history; +}; + +/* + * Floating point FIR descriptor. This defines the working state for a single + * instance of an FIR filter using floating point coefficients and data. + */ +struct fir_float_state_t { + int taps; + int curr_pos; + const float *coeffs; + float *history; +}; + +static inline const int16_t *fir16_create(struct fir16_state_t *fir, + const int16_t *coeffs, int taps) +{ + fir->taps = taps; + fir->curr_pos = taps - 1; + fir->coeffs = coeffs; + fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); + return fir->history; +} + +static inline void fir16_flush(struct fir16_state_t *fir) +{ + memset(fir->history, 0, fir->taps * sizeof(int16_t)); +} + +static inline void fir16_free(struct fir16_state_t *fir) +{ + kfree(fir->history); +} + +static inline int16_t fir16(struct fir16_state_t *fir, int16_t sample) +{ + int32_t y; + int i; + int offset1; + int offset2; + + fir->history[fir->curr_pos] = sample; + + offset2 = fir->curr_pos; + offset1 = fir->taps - offset2; + y = 0; + for (i = fir->taps - 1; i >= offset1; i--) + y += fir->coeffs[i] * fir->history[i - offset1]; + for (; i >= 0; i--) + y += fir->coeffs[i] * fir->history[i + offset2]; + if (fir->curr_pos <= 0) + fir->curr_pos = fir->taps; + fir->curr_pos--; + return (int16_t) (y >> 15); +} + +static inline const int16_t *fir32_create(struct fir32_state_t *fir, + const int32_t *coeffs, int taps) +{ + fir->taps = taps; + fir->curr_pos = taps - 1; + fir->coeffs = coeffs; + fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); + return fir->history; +} + +static inline void fir32_flush(struct fir32_state_t *fir) +{ + memset(fir->history, 0, fir->taps * sizeof(int16_t)); +} + +static inline void fir32_free(struct fir32_state_t *fir) +{ + kfree(fir->history); +} + +static inline int16_t fir32(struct fir32_state_t *fir, int16_t sample) +{ + int i; + int32_t y; + int offset1; + int offset2; + + fir->history[fir->curr_pos] = sample; + offset2 = fir->curr_pos; + offset1 = fir->taps - offset2; + y = 0; + for (i = fir->taps - 1; i >= offset1; i--) + y += fir->coeffs[i] * fir->history[i - offset1]; + for (; i >= 0; i--) + y += fir->coeffs[i] * fir->history[i + offset2]; + if (fir->curr_pos <= 0) + fir->curr_pos = fir->taps; + fir->curr_pos--; + return (int16_t) (y >> 15); +} + +#endif diff --git a/drivers/misc/echo/oslec.h b/drivers/misc/echo/oslec.h new file mode 100644 index 000000000..f4175360c --- /dev/null +++ b/drivers/misc/echo/oslec.h @@ -0,0 +1,94 @@ +/* + * OSLEC - A line echo canceller. This code is being developed + * against and partially complies with G168. Using code from SpanDSP + * + * Written by Steve Underwood + * and David Rowe + * + * Copyright (C) 2001 Steve Underwood and 2007-2008 David Rowe + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __OSLEC_H +#define __OSLEC_H + +/* Mask bits for the adaption mode */ +#define ECHO_CAN_USE_ADAPTION 0x01 +#define ECHO_CAN_USE_NLP 0x02 +#define ECHO_CAN_USE_CNG 0x04 +#define ECHO_CAN_USE_CLIP 0x08 +#define ECHO_CAN_USE_TX_HPF 0x10 +#define ECHO_CAN_USE_RX_HPF 0x20 +#define ECHO_CAN_DISABLE 0x40 + +/** + * oslec_state: G.168 echo canceller descriptor. + * + * This defines the working state for a line echo canceller. + */ +struct oslec_state; + +/** + * oslec_create - Create a voice echo canceller context. + * @len: The length of the canceller, in samples. + * @return: The new canceller context, or NULL if the canceller could not be + * created. + */ +struct oslec_state *oslec_create(int len, int adaption_mode); + +/** + * oslec_free - Free a voice echo canceller context. + * @ec: The echo canceller context. + */ +void oslec_free(struct oslec_state *ec); + +/** + * oslec_flush - Flush (reinitialise) a voice echo canceller context. + * @ec: The echo canceller context. + */ +void oslec_flush(struct oslec_state *ec); + +/** + * oslec_adaption_mode - set the adaption mode of a voice echo canceller context. + * @ec The echo canceller context. + * @adaption_mode: The mode. + */ +void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode); + +void oslec_snapshot(struct oslec_state *ec); + +/** + * oslec_update: Process a sample through a voice echo canceller. + * @ec: The echo canceller context. + * @tx: The transmitted audio sample. + * @rx: The received audio sample. + * + * The return value is the clean (echo cancelled) received sample. + */ +int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx); + +/** + * oslec_hpf_tx: Process to high pass filter the tx signal. + * @ec: The echo canceller context. + * @tx: The transmitted auio sample. + * + * The return value is the HP filtered transmit sample, send this to your D/A. + */ +int16_t oslec_hpf_tx(struct oslec_state *ec, int16_t tx); + +#endif /* __OSLEC_H */ diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig new file mode 100644 index 000000000..d382b13c2 --- /dev/null +++ b/drivers/misc/eeprom/Kconfig @@ -0,0 +1,114 @@ +menu "EEPROM support" + +config EEPROM_AT24 + tristate "I2C EEPROMs / RAMs / ROMs from most vendors" + depends on I2C && SYSFS + select NVMEM + select REGMAP_I2C + help + Enable this driver to get read/write support to most I2C EEPROMs + and compatible devices like FRAMs, SRAMs, ROMs etc. After you + configure the driver to know about each chip on your target + board. Use these generic chip names, instead of vendor-specific + ones like at24c64, 24lc02 or fm24c04: + + 24c00, 24c01, 24c02, spd (readonly 24c02), 24c04, 24c08, + 24c16, 24c32, 24c64, 24c128, 24c256, 24c512, 24c1024, 24c2048 + + Unless you like data loss puzzles, always be sure that any chip + you configure as a 24c32 (32 kbit) or larger is NOT really a + 24c16 (16 kbit) or smaller, and vice versa. Marking the chip + as read-only won't help recover from this. Also, if your chip + has any software write-protect mechanism you may want to review the + code to make sure this driver won't turn it on by accident. + + If you use this with an SMBus adapter instead of an I2C adapter, + full functionality is not available. Only smaller devices are + supported (24c16 and below, max 4 kByte). + + This driver can also be built as a module. If so, the module + will be called at24. + +config EEPROM_AT25 + tristate "SPI EEPROMs from most vendors" + depends on SPI && SYSFS + select NVMEM + help + Enable this driver to get read/write support to most SPI EEPROMs, + after you configure the board init code to know about each eeprom + on your target board. + + This driver can also be built as a module. If so, the module + will be called at25. + +config EEPROM_LEGACY + tristate "Old I2C EEPROM reader" + depends on I2C && SYSFS + help + If you say yes here you get read-only access to the EEPROM data + available on modern memory DIMMs and Sony Vaio laptops via I2C. Such + EEPROMs could theoretically be available on other devices as well. + + This driver can also be built as a module. If so, the module + will be called eeprom. + +config EEPROM_MAX6875 + tristate "Maxim MAX6874/5 power supply supervisor" + depends on I2C + help + If you say yes here you get read-only support for the user EEPROM of + the Maxim MAX6874/5 EEPROM-programmable, quad power-supply + sequencer/supervisor. + + All other features of this chip should be accessed via i2c-dev. + + This driver can also be built as a module. If so, the module + will be called max6875. + + +config EEPROM_93CX6 + tristate "EEPROM 93CX6 support" + help + This is a driver for the EEPROM chipsets 93c46 and 93c66. + The driver supports both read as well as write commands. + + If unsure, say N. + +config EEPROM_93XX46 + tristate "Microwire EEPROM 93XX46 support" + depends on SPI && SYSFS + select REGMAP + select NVMEM + help + Driver for the microwire EEPROM chipsets 93xx46x. The driver + supports both read and write commands and also the command to + erase the whole EEPROM. + + This driver can also be built as a module. If so, the module + will be called eeprom_93xx46. + + If unsure, say N. + +config EEPROM_DIGSY_MTC_CFG + bool "DigsyMTC display configuration EEPROMs device" + depends on GPIO_MPC5200 && SPI_GPIO + help + This option enables access to display configuration EEPROMs + on digsy_mtc board. You have to additionally select Microwire + EEPROM 93XX46 driver. sysfs entries will be created for that + EEPROM allowing to read/write the configuration data or to + erase the whole EEPROM. + + If unsure, say N. + +config EEPROM_IDT_89HPESX + tristate "IDT 89HPESx PCIe-swtiches EEPROM / CSR support" + depends on I2C && SYSFS + help + Enable this driver to get read/write access to EEPROM / CSRs + over IDT PCIe-swtich i2c-slave interface. + + This driver can also be built as a module. If so, the module + will be called idt_89hpesx. + +endmenu diff --git a/drivers/misc/eeprom/Makefile b/drivers/misc/eeprom/Makefile new file mode 100644 index 000000000..2aab60ef3 --- /dev/null +++ b/drivers/misc/eeprom/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_EEPROM_AT24) += at24.o +obj-$(CONFIG_EEPROM_AT25) += at25.o +obj-$(CONFIG_EEPROM_LEGACY) += eeprom.o +obj-$(CONFIG_EEPROM_MAX6875) += max6875.o +obj-$(CONFIG_EEPROM_93CX6) += eeprom_93cx6.o +obj-$(CONFIG_EEPROM_93XX46) += eeprom_93xx46.o +obj-$(CONFIG_EEPROM_DIGSY_MTC_CFG) += digsy_mtc_eeprom.o +obj-$(CONFIG_EEPROM_IDT_89HPESX) += idt_89hpesx.o diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c new file mode 100644 index 000000000..dc3537651 --- /dev/null +++ b/drivers/misc/eeprom/at24.c @@ -0,0 +1,805 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * at24.c - handle most I2C EEPROMs + * + * Copyright (C) 2005-2007 David Brownell + * Copyright (C) 2008 Wolfram Sang, Pengutronix + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * I2C EEPROMs from most vendors are inexpensive and mostly interchangeable. + * Differences between different vendor product lines (like Atmel AT24C or + * MicroChip 24LC, etc) won't much matter for typical read/write access. + * There are also I2C RAM chips, likewise interchangeable. One example + * would be the PCF8570, which acts like a 24c02 EEPROM (256 bytes). + * + * However, misconfiguration can lose data. "Set 16-bit memory address" + * to a part with 8-bit addressing will overwrite data. Writing with too + * big a page size also loses data. And it's not safe to assume that the + * conventional addresses 0x50..0x57 only hold eeproms; a PCF8563 RTC + * uses 0x51, for just one example. + * + * Accordingly, explicit board-specific configuration data should be used + * in almost all cases. (One partial exception is an SMBus used to access + * "SPD" data for DRAM sticks. Those only use 24c02 EEPROMs.) + * + * So this driver uses "new style" I2C driver binding, expecting to be + * told what devices exist. That may be in arch/X/mach-Y/board-Z.c or + * similar kernel-resident tables; or, configuration data coming from + * a bootloader. + * + * Other than binding model, current differences from "eeprom" driver are + * that this one handles write access and isn't restricted to 24c02 devices. + * It also handles larger devices (32 kbit and up) with two-byte addresses, + * which won't work on pure SMBus systems. + */ + +struct at24_client { + struct i2c_client *client; + struct regmap *regmap; +}; + +struct at24_data { + /* + * Lock protects against activities from other Linux tasks, + * but not from changes by other I2C masters. + */ + struct mutex lock; + + unsigned int write_max; + unsigned int num_addresses; + unsigned int offset_adj; + + u32 byte_len; + u16 page_size; + u8 flags; + + struct nvmem_device *nvmem; + + struct gpio_desc *wp_gpio; + + /* + * Some chips tie up multiple I2C addresses; dummy devices reserve + * them for us, and we'll use them with SMBus calls. + */ + struct at24_client client[]; +}; + +/* + * This parameter is to help this driver avoid blocking other drivers out + * of I2C for potentially troublesome amounts of time. With a 100 kHz I2C + * clock, one 256 byte read takes about 1/43 second which is excessive; + * but the 1/170 second it takes at 400 kHz may be quite reasonable; and + * at 1 MHz (Fm+) a 1/430 second delay could easily be invisible. + * + * This value is forced to be a power of two so that writes align on pages. + */ +static unsigned int at24_io_limit = 128; +module_param_named(io_limit, at24_io_limit, uint, 0); +MODULE_PARM_DESC(at24_io_limit, "Maximum bytes per I/O (default 128)"); + +/* + * Specs often allow 5 msec for a page write, sometimes 20 msec; + * it's important to recover from write timeouts. + */ +static unsigned int at24_write_timeout = 25; +module_param_named(write_timeout, at24_write_timeout, uint, 0); +MODULE_PARM_DESC(at24_write_timeout, "Time (in ms) to try writes (default 25)"); + +struct at24_chip_data { + /* + * these fields mirror their equivalents in + * struct at24_platform_data + */ + u32 byte_len; + u8 flags; +}; + +#define AT24_CHIP_DATA(_name, _len, _flags) \ + static const struct at24_chip_data _name = { \ + .byte_len = _len, .flags = _flags, \ + } + +/* needs 8 addresses as A0-A2 are ignored */ +AT24_CHIP_DATA(at24_data_24c00, 128 / 8, AT24_FLAG_TAKE8ADDR); +/* old variants can't be handled with this generic entry! */ +AT24_CHIP_DATA(at24_data_24c01, 1024 / 8, 0); +AT24_CHIP_DATA(at24_data_24cs01, 16, + AT24_FLAG_SERIAL | AT24_FLAG_READONLY); +AT24_CHIP_DATA(at24_data_24c02, 2048 / 8, 0); +AT24_CHIP_DATA(at24_data_24cs02, 16, + AT24_FLAG_SERIAL | AT24_FLAG_READONLY); +AT24_CHIP_DATA(at24_data_24mac402, 48 / 8, + AT24_FLAG_MAC | AT24_FLAG_READONLY); +AT24_CHIP_DATA(at24_data_24mac602, 64 / 8, + AT24_FLAG_MAC | AT24_FLAG_READONLY); +/* spd is a 24c02 in memory DIMMs */ +AT24_CHIP_DATA(at24_data_spd, 2048 / 8, + AT24_FLAG_READONLY | AT24_FLAG_IRUGO); +AT24_CHIP_DATA(at24_data_24c04, 4096 / 8, 0); +AT24_CHIP_DATA(at24_data_24cs04, 16, + AT24_FLAG_SERIAL | AT24_FLAG_READONLY); +/* 24rf08 quirk is handled at i2c-core */ +AT24_CHIP_DATA(at24_data_24c08, 8192 / 8, 0); +AT24_CHIP_DATA(at24_data_24cs08, 16, + AT24_FLAG_SERIAL | AT24_FLAG_READONLY); +AT24_CHIP_DATA(at24_data_24c16, 16384 / 8, 0); +AT24_CHIP_DATA(at24_data_24cs16, 16, + AT24_FLAG_SERIAL | AT24_FLAG_READONLY); +AT24_CHIP_DATA(at24_data_24c32, 32768 / 8, AT24_FLAG_ADDR16); +AT24_CHIP_DATA(at24_data_24cs32, 16, + AT24_FLAG_ADDR16 | AT24_FLAG_SERIAL | AT24_FLAG_READONLY); +AT24_CHIP_DATA(at24_data_24c64, 65536 / 8, AT24_FLAG_ADDR16); +AT24_CHIP_DATA(at24_data_24cs64, 16, + AT24_FLAG_ADDR16 | AT24_FLAG_SERIAL | AT24_FLAG_READONLY); +AT24_CHIP_DATA(at24_data_24c128, 131072 / 8, AT24_FLAG_ADDR16); +AT24_CHIP_DATA(at24_data_24c256, 262144 / 8, AT24_FLAG_ADDR16); +AT24_CHIP_DATA(at24_data_24c512, 524288 / 8, AT24_FLAG_ADDR16); +AT24_CHIP_DATA(at24_data_24c1024, 1048576 / 8, AT24_FLAG_ADDR16); +AT24_CHIP_DATA(at24_data_24c2048, 2097152 / 8, AT24_FLAG_ADDR16); +/* identical to 24c08 ? */ +AT24_CHIP_DATA(at24_data_INT3499, 8192 / 8, 0); + +static const struct i2c_device_id at24_ids[] = { + { "24c00", (kernel_ulong_t)&at24_data_24c00 }, + { "24c01", (kernel_ulong_t)&at24_data_24c01 }, + { "24cs01", (kernel_ulong_t)&at24_data_24cs01 }, + { "24c02", (kernel_ulong_t)&at24_data_24c02 }, + { "24cs02", (kernel_ulong_t)&at24_data_24cs02 }, + { "24mac402", (kernel_ulong_t)&at24_data_24mac402 }, + { "24mac602", (kernel_ulong_t)&at24_data_24mac602 }, + { "spd", (kernel_ulong_t)&at24_data_spd }, + { "24c04", (kernel_ulong_t)&at24_data_24c04 }, + { "24cs04", (kernel_ulong_t)&at24_data_24cs04 }, + { "24c08", (kernel_ulong_t)&at24_data_24c08 }, + { "24cs08", (kernel_ulong_t)&at24_data_24cs08 }, + { "24c16", (kernel_ulong_t)&at24_data_24c16 }, + { "24cs16", (kernel_ulong_t)&at24_data_24cs16 }, + { "24c32", (kernel_ulong_t)&at24_data_24c32 }, + { "24cs32", (kernel_ulong_t)&at24_data_24cs32 }, + { "24c64", (kernel_ulong_t)&at24_data_24c64 }, + { "24cs64", (kernel_ulong_t)&at24_data_24cs64 }, + { "24c128", (kernel_ulong_t)&at24_data_24c128 }, + { "24c256", (kernel_ulong_t)&at24_data_24c256 }, + { "24c512", (kernel_ulong_t)&at24_data_24c512 }, + { "24c1024", (kernel_ulong_t)&at24_data_24c1024 }, + { "24c2048", (kernel_ulong_t)&at24_data_24c2048 }, + { "at24", 0 }, + { /* END OF LIST */ } +}; +MODULE_DEVICE_TABLE(i2c, at24_ids); + +static const struct of_device_id at24_of_match[] = { + { .compatible = "atmel,24c00", .data = &at24_data_24c00 }, + { .compatible = "atmel,24c01", .data = &at24_data_24c01 }, + { .compatible = "atmel,24cs01", .data = &at24_data_24cs01 }, + { .compatible = "atmel,24c02", .data = &at24_data_24c02 }, + { .compatible = "atmel,24cs02", .data = &at24_data_24cs02 }, + { .compatible = "atmel,24mac402", .data = &at24_data_24mac402 }, + { .compatible = "atmel,24mac602", .data = &at24_data_24mac602 }, + { .compatible = "atmel,spd", .data = &at24_data_spd }, + { .compatible = "atmel,24c04", .data = &at24_data_24c04 }, + { .compatible = "atmel,24cs04", .data = &at24_data_24cs04 }, + { .compatible = "atmel,24c08", .data = &at24_data_24c08 }, + { .compatible = "atmel,24cs08", .data = &at24_data_24cs08 }, + { .compatible = "atmel,24c16", .data = &at24_data_24c16 }, + { .compatible = "atmel,24cs16", .data = &at24_data_24cs16 }, + { .compatible = "atmel,24c32", .data = &at24_data_24c32 }, + { .compatible = "atmel,24cs32", .data = &at24_data_24cs32 }, + { .compatible = "atmel,24c64", .data = &at24_data_24c64 }, + { .compatible = "atmel,24cs64", .data = &at24_data_24cs64 }, + { .compatible = "atmel,24c128", .data = &at24_data_24c128 }, + { .compatible = "atmel,24c256", .data = &at24_data_24c256 }, + { .compatible = "atmel,24c512", .data = &at24_data_24c512 }, + { .compatible = "atmel,24c1024", .data = &at24_data_24c1024 }, + { .compatible = "atmel,24c2048", .data = &at24_data_24c2048 }, + { /* END OF LIST */ }, +}; +MODULE_DEVICE_TABLE(of, at24_of_match); + +static const struct acpi_device_id at24_acpi_ids[] = { + { "INT3499", (kernel_ulong_t)&at24_data_INT3499 }, + { /* END OF LIST */ } +}; +MODULE_DEVICE_TABLE(acpi, at24_acpi_ids); + +/* + * This routine supports chips which consume multiple I2C addresses. It + * computes the addressing information to be used for a given r/w request. + * Assumes that sanity checks for offset happened at sysfs-layer. + * + * Slave address and byte offset derive from the offset. Always + * set the byte address; on a multi-master board, another master + * may have changed the chip's "current" address pointer. + */ +static struct at24_client *at24_translate_offset(struct at24_data *at24, + unsigned int *offset) +{ + unsigned int i; + + if (at24->flags & AT24_FLAG_ADDR16) { + i = *offset >> 16; + *offset &= 0xffff; + } else { + i = *offset >> 8; + *offset &= 0xff; + } + + return &at24->client[i]; +} + +static struct device *at24_base_client_dev(struct at24_data *at24) +{ + return &at24->client[0].client->dev; +} + +static size_t at24_adjust_read_count(struct at24_data *at24, + unsigned int offset, size_t count) +{ + unsigned int bits; + size_t remainder; + + /* + * In case of multi-address chips that don't rollover reads to + * the next slave address: truncate the count to the slave boundary, + * so that the read never straddles slaves. + */ + if (at24->flags & AT24_FLAG_NO_RDROL) { + bits = (at24->flags & AT24_FLAG_ADDR16) ? 16 : 8; + remainder = BIT(bits) - offset; + if (count > remainder) + count = remainder; + } + + if (count > at24_io_limit) + count = at24_io_limit; + + return count; +} + +static ssize_t at24_regmap_read(struct at24_data *at24, char *buf, + unsigned int offset, size_t count) +{ + unsigned long timeout, read_time; + struct at24_client *at24_client; + struct i2c_client *client; + struct regmap *regmap; + int ret; + + at24_client = at24_translate_offset(at24, &offset); + regmap = at24_client->regmap; + client = at24_client->client; + count = at24_adjust_read_count(at24, offset, count); + + /* adjust offset for mac and serial read ops */ + offset += at24->offset_adj; + + timeout = jiffies + msecs_to_jiffies(at24_write_timeout); + do { + /* + * The timestamp shall be taken before the actual operation + * to avoid a premature timeout in case of high CPU load. + */ + read_time = jiffies; + + ret = regmap_bulk_read(regmap, offset, buf, count); + dev_dbg(&client->dev, "read %zu@%d --> %d (%ld)\n", + count, offset, ret, jiffies); + if (!ret) + return count; + + usleep_range(1000, 1500); + } while (time_before(read_time, timeout)); + + return -ETIMEDOUT; +} + +/* + * Note that if the hardware write-protect pin is pulled high, the whole + * chip is normally write protected. But there are plenty of product + * variants here, including OTP fuses and partial chip protect. + * + * We only use page mode writes; the alternative is sloooow. These routines + * write at most one page. + */ + +static size_t at24_adjust_write_count(struct at24_data *at24, + unsigned int offset, size_t count) +{ + unsigned int next_page; + + /* write_max is at most a page */ + if (count > at24->write_max) + count = at24->write_max; + + /* Never roll over backwards, to the start of this page */ + next_page = roundup(offset + 1, at24->page_size); + if (offset + count > next_page) + count = next_page - offset; + + return count; +} + +static ssize_t at24_regmap_write(struct at24_data *at24, const char *buf, + unsigned int offset, size_t count) +{ + unsigned long timeout, write_time; + struct at24_client *at24_client; + struct i2c_client *client; + struct regmap *regmap; + int ret; + + at24_client = at24_translate_offset(at24, &offset); + regmap = at24_client->regmap; + client = at24_client->client; + count = at24_adjust_write_count(at24, offset, count); + timeout = jiffies + msecs_to_jiffies(at24_write_timeout); + + do { + /* + * The timestamp shall be taken before the actual operation + * to avoid a premature timeout in case of high CPU load. + */ + write_time = jiffies; + + ret = regmap_bulk_write(regmap, offset, buf, count); + dev_dbg(&client->dev, "write %zu@%d --> %d (%ld)\n", + count, offset, ret, jiffies); + if (!ret) + return count; + + usleep_range(1000, 1500); + } while (time_before(write_time, timeout)); + + return -ETIMEDOUT; +} + +static int at24_read(void *priv, unsigned int off, void *val, size_t count) +{ + struct at24_data *at24; + struct device *dev; + char *buf = val; + int ret; + + at24 = priv; + dev = at24_base_client_dev(at24); + + if (unlikely(!count)) + return count; + + if (off + count > at24->byte_len) + return -EINVAL; + + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + pm_runtime_put_noidle(dev); + return ret; + } + + /* + * Read data from chip, protecting against concurrent updates + * from this host, but not from other I2C masters. + */ + mutex_lock(&at24->lock); + + while (count) { + ret = at24_regmap_read(at24, buf, off, count); + if (ret < 0) { + mutex_unlock(&at24->lock); + pm_runtime_put(dev); + return ret; + } + buf += ret; + off += ret; + count -= ret; + } + + mutex_unlock(&at24->lock); + + pm_runtime_put(dev); + + return 0; +} + +static int at24_write(void *priv, unsigned int off, void *val, size_t count) +{ + struct at24_data *at24; + struct device *dev; + char *buf = val; + int ret; + + at24 = priv; + dev = at24_base_client_dev(at24); + + if (unlikely(!count)) + return -EINVAL; + + if (off + count > at24->byte_len) + return -EINVAL; + + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + pm_runtime_put_noidle(dev); + return ret; + } + + /* + * Write data to chip, protecting against concurrent updates + * from this host, but not from other I2C masters. + */ + mutex_lock(&at24->lock); + gpiod_set_value_cansleep(at24->wp_gpio, 0); + + while (count) { + ret = at24_regmap_write(at24, buf, off, count); + if (ret < 0) { + gpiod_set_value_cansleep(at24->wp_gpio, 1); + mutex_unlock(&at24->lock); + pm_runtime_put(dev); + return ret; + } + buf += ret; + off += ret; + count -= ret; + } + + gpiod_set_value_cansleep(at24->wp_gpio, 1); + mutex_unlock(&at24->lock); + + pm_runtime_put(dev); + + return 0; +} + +static void at24_properties_to_pdata(struct device *dev, + struct at24_platform_data *chip) +{ + int err; + u32 val; + + if (device_property_present(dev, "read-only")) + chip->flags |= AT24_FLAG_READONLY; + if (device_property_present(dev, "no-read-rollover")) + chip->flags |= AT24_FLAG_NO_RDROL; + + err = device_property_read_u32(dev, "address-width", &val); + if (!err) { + switch (val) { + case 8: + if (chip->flags & AT24_FLAG_ADDR16) + dev_warn(dev, "Override address width to be 8, while default is 16\n"); + chip->flags &= ~AT24_FLAG_ADDR16; + break; + case 16: + chip->flags |= AT24_FLAG_ADDR16; + break; + default: + dev_warn(dev, "Bad \"address-width\" property: %u\n", + val); + } + } + + err = device_property_read_u32(dev, "size", &val); + if (!err) + chip->byte_len = val; + + err = device_property_read_u32(dev, "pagesize", &val); + if (!err) { + chip->page_size = val; + } else { + /* + * This is slow, but we can't know all eeproms, so we better + * play safe. Specifying custom eeprom-types via platform_data + * is recommended anyhow. + */ + chip->page_size = 1; + } +} + +static int at24_get_pdata(struct device *dev, struct at24_platform_data *pdata) +{ + struct device_node *of_node = dev->of_node; + const struct at24_chip_data *cdata; + const struct i2c_device_id *id; + struct at24_platform_data *pd; + + pd = dev_get_platdata(dev); + if (pd) { + memcpy(pdata, pd, sizeof(*pdata)); + return 0; + } + + id = i2c_match_id(at24_ids, to_i2c_client(dev)); + + /* + * The I2C core allows OF nodes compatibles to match against the + * I2C device ID table as a fallback, so check not only if an OF + * node is present but also if it matches an OF device ID entry. + */ + if (of_node && of_match_device(at24_of_match, dev)) + cdata = of_device_get_match_data(dev); + else if (id) + cdata = (void *)id->driver_data; + else + cdata = acpi_device_get_match_data(dev); + + if (!cdata) + return -ENODEV; + + pdata->byte_len = cdata->byte_len; + pdata->flags = cdata->flags; + at24_properties_to_pdata(dev, pdata); + + return 0; +} + +static void at24_remove_dummy_clients(struct at24_data *at24) +{ + int i; + + for (i = 1; i < at24->num_addresses; i++) + i2c_unregister_device(at24->client[i].client); +} + +static int at24_make_dummy_client(struct at24_data *at24, unsigned int index, + struct regmap_config *regmap_config) +{ + struct i2c_client *base_client, *dummy_client; + unsigned short int addr; + struct regmap *regmap; + struct device *dev; + + base_client = at24->client[0].client; + dev = &base_client->dev; + addr = base_client->addr + index; + + dummy_client = i2c_new_dummy(base_client->adapter, + base_client->addr + index); + if (!dummy_client) { + dev_err(dev, "address 0x%02x unavailable\n", addr); + return -EADDRINUSE; + } + + regmap = devm_regmap_init_i2c(dummy_client, regmap_config); + if (IS_ERR(regmap)) { + i2c_unregister_device(dummy_client); + return PTR_ERR(regmap); + } + + at24->client[index].client = dummy_client; + at24->client[index].regmap = regmap; + + return 0; +} + +static unsigned int at24_get_offset_adj(u8 flags, unsigned int byte_len) +{ + if (flags & AT24_FLAG_MAC) { + /* EUI-48 starts from 0x9a, EUI-64 from 0x98 */ + return 0xa0 - byte_len; + } else if (flags & AT24_FLAG_SERIAL && flags & AT24_FLAG_ADDR16) { + /* + * For 16 bit address pointers, the word address must contain + * a '10' sequence in bits 11 and 10 regardless of the + * intended position of the address pointer. + */ + return 0x0800; + } else if (flags & AT24_FLAG_SERIAL) { + /* + * Otherwise the word address must begin with a '10' sequence, + * regardless of the intended address. + */ + return 0x0080; + } else { + return 0; + } +} + +static int at24_probe(struct i2c_client *client) +{ + struct regmap_config regmap_config = { }; + struct nvmem_config nvmem_config = { }; + struct at24_platform_data pdata = { }; + struct device *dev = &client->dev; + bool i2c_fn_i2c, i2c_fn_block; + unsigned int i, num_addresses; + struct at24_data *at24; + struct regmap *regmap; + size_t at24_size; + bool writable; + u8 test_byte; + int err; + + i2c_fn_i2c = i2c_check_functionality(client->adapter, I2C_FUNC_I2C); + i2c_fn_block = i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_WRITE_I2C_BLOCK); + + err = at24_get_pdata(dev, &pdata); + if (err) + return err; + + if (!i2c_fn_i2c && !i2c_fn_block) + pdata.page_size = 1; + + if (!pdata.page_size) { + dev_err(dev, "page_size must not be 0!\n"); + return -EINVAL; + } + + if (!is_power_of_2(pdata.page_size)) + dev_warn(dev, "page_size looks suspicious (no power of 2)!\n"); + + if (pdata.flags & AT24_FLAG_TAKE8ADDR) + num_addresses = 8; + else + num_addresses = DIV_ROUND_UP(pdata.byte_len, + (pdata.flags & AT24_FLAG_ADDR16) ? 65536 : 256); + + if ((pdata.flags & AT24_FLAG_SERIAL) && (pdata.flags & AT24_FLAG_MAC)) { + dev_err(dev, + "invalid device data - cannot have both AT24_FLAG_SERIAL & AT24_FLAG_MAC."); + return -EINVAL; + } + + regmap_config.val_bits = 8; + regmap_config.reg_bits = (pdata.flags & AT24_FLAG_ADDR16) ? 16 : 8; + regmap_config.disable_locking = true; + + regmap = devm_regmap_init_i2c(client, ®map_config); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + at24_size = sizeof(*at24) + num_addresses * sizeof(struct at24_client); + at24 = devm_kzalloc(dev, at24_size, GFP_KERNEL); + if (!at24) + return -ENOMEM; + + mutex_init(&at24->lock); + at24->byte_len = pdata.byte_len; + at24->page_size = pdata.page_size; + at24->flags = pdata.flags; + at24->num_addresses = num_addresses; + at24->offset_adj = at24_get_offset_adj(pdata.flags, pdata.byte_len); + at24->client[0].client = client; + at24->client[0].regmap = regmap; + + at24->wp_gpio = devm_gpiod_get_optional(dev, "wp", GPIOD_OUT_HIGH); + if (IS_ERR(at24->wp_gpio)) + return PTR_ERR(at24->wp_gpio); + + writable = !(pdata.flags & AT24_FLAG_READONLY); + if (writable) { + at24->write_max = min_t(unsigned int, + pdata.page_size, at24_io_limit); + if (!i2c_fn_i2c && at24->write_max > I2C_SMBUS_BLOCK_MAX) + at24->write_max = I2C_SMBUS_BLOCK_MAX; + } + + /* use dummy devices for multiple-address chips */ + for (i = 1; i < num_addresses; i++) { + err = at24_make_dummy_client(at24, i, ®map_config); + if (err) { + at24_remove_dummy_clients(at24); + return err; + } + } + + i2c_set_clientdata(client, at24); + + /* enable runtime pm */ + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + + /* + * Perform a one-byte test read to verify that the + * chip is functional. + */ + err = at24_read(at24, 0, &test_byte, 1); + pm_runtime_idle(dev); + if (err) { + err = -ENODEV; + goto err_clients; + } + + nvmem_config.name = dev_name(dev); + nvmem_config.dev = dev; + nvmem_config.read_only = !writable; + nvmem_config.root_only = !(pdata.flags & AT24_FLAG_IRUGO); + nvmem_config.owner = THIS_MODULE; + nvmem_config.compat = true; + nvmem_config.base_dev = dev; + nvmem_config.reg_read = at24_read; + nvmem_config.reg_write = at24_write; + nvmem_config.priv = at24; + nvmem_config.stride = 1; + nvmem_config.word_size = 1; + nvmem_config.size = pdata.byte_len; + + at24->nvmem = devm_nvmem_register(dev, &nvmem_config); + if (IS_ERR(at24->nvmem)) { + err = PTR_ERR(at24->nvmem); + goto err_clients; + } + + dev_info(dev, "%u byte %s EEPROM, %s, %u bytes/write\n", + pdata.byte_len, client->name, + writable ? "writable" : "read-only", at24->write_max); + + /* export data to kernel code */ + if (pdata.setup) + pdata.setup(at24->nvmem, pdata.context); + + return 0; + +err_clients: + at24_remove_dummy_clients(at24); + pm_runtime_disable(dev); + + return err; +} + +static int at24_remove(struct i2c_client *client) +{ + struct at24_data *at24; + + at24 = i2c_get_clientdata(client); + + at24_remove_dummy_clients(at24); + pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); + + return 0; +} + +static struct i2c_driver at24_driver = { + .driver = { + .name = "at24", + .of_match_table = at24_of_match, + .acpi_match_table = ACPI_PTR(at24_acpi_ids), + }, + .probe_new = at24_probe, + .remove = at24_remove, + .id_table = at24_ids, +}; + +static int __init at24_init(void) +{ + if (!at24_io_limit) { + pr_err("at24: at24_io_limit must not be 0!\n"); + return -EINVAL; + } + + at24_io_limit = rounddown_pow_of_two(at24_io_limit); + return i2c_add_driver(&at24_driver); +} +module_init(at24_init); + +static void __exit at24_exit(void) +{ + i2c_del_driver(&at24_driver); +} +module_exit(at24_exit); + +MODULE_DESCRIPTION("Driver for most I2C EEPROMs"); +MODULE_AUTHOR("David Brownell and Wolfram Sang"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c new file mode 100644 index 000000000..8dd5c610c --- /dev/null +++ b/drivers/misc/eeprom/at25.c @@ -0,0 +1,414 @@ +/* + * at25.c -- support most SPI EEPROMs, such as Atmel AT25 models + * + * Copyright (C) 2006 David Brownell + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * NOTE: this is an *EEPROM* driver. The vagaries of product naming + * mean that some AT25 products are EEPROMs, and others are FLASH. + * Handle FLASH chips with the drivers/mtd/devices/m25p80.c driver, + * not this one! + */ + +struct at25_data { + struct spi_device *spi; + struct mutex lock; + struct spi_eeprom chip; + unsigned addrlen; + struct nvmem_config nvmem_config; + struct nvmem_device *nvmem; +}; + +#define AT25_WREN 0x06 /* latch the write enable */ +#define AT25_WRDI 0x04 /* reset the write enable */ +#define AT25_RDSR 0x05 /* read status register */ +#define AT25_WRSR 0x01 /* write status register */ +#define AT25_READ 0x03 /* read byte(s) */ +#define AT25_WRITE 0x02 /* write byte(s)/sector */ + +#define AT25_SR_nRDY 0x01 /* nRDY = write-in-progress */ +#define AT25_SR_WEN 0x02 /* write enable (latched) */ +#define AT25_SR_BP0 0x04 /* BP for software writeprotect */ +#define AT25_SR_BP1 0x08 +#define AT25_SR_WPEN 0x80 /* writeprotect enable */ + +#define AT25_INSTR_BIT3 0x08 /* Additional address bit in instr */ + +#define EE_MAXADDRLEN 3 /* 24 bit addresses, up to 2 MBytes */ + +/* Specs often allow 5 msec for a page write, sometimes 20 msec; + * it's important to recover from write timeouts. + */ +#define EE_TIMEOUT 25 + +/*-------------------------------------------------------------------------*/ + +#define io_limit PAGE_SIZE /* bytes */ + +static int at25_ee_read(void *priv, unsigned int offset, + void *val, size_t count) +{ + struct at25_data *at25 = priv; + char *buf = val; + u8 command[EE_MAXADDRLEN + 1]; + u8 *cp; + ssize_t status; + struct spi_transfer t[2]; + struct spi_message m; + u8 instr; + + if (unlikely(offset >= at25->chip.byte_len)) + return -EINVAL; + if ((offset + count) > at25->chip.byte_len) + count = at25->chip.byte_len - offset; + if (unlikely(!count)) + return -EINVAL; + + cp = command; + + instr = AT25_READ; + if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR) + if (offset >= (1U << (at25->addrlen * 8))) + instr |= AT25_INSTR_BIT3; + *cp++ = instr; + + /* 8/16/24-bit address is written MSB first */ + switch (at25->addrlen) { + default: /* case 3 */ + *cp++ = offset >> 16; + /* fall through */ + case 2: + *cp++ = offset >> 8; + /* fall through */ + case 1: + case 0: /* can't happen: for better codegen */ + *cp++ = offset >> 0; + } + + spi_message_init(&m); + memset(t, 0, sizeof(t)); + + t[0].tx_buf = command; + t[0].len = at25->addrlen + 1; + spi_message_add_tail(&t[0], &m); + + t[1].rx_buf = buf; + t[1].len = count; + spi_message_add_tail(&t[1], &m); + + mutex_lock(&at25->lock); + + /* Read it all at once. + * + * REVISIT that's potentially a problem with large chips, if + * other devices on the bus need to be accessed regularly or + * this chip is clocked very slowly + */ + status = spi_sync(at25->spi, &m); + dev_dbg(&at25->spi->dev, "read %zu bytes at %d --> %zd\n", + count, offset, status); + + mutex_unlock(&at25->lock); + return status; +} + +static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count) +{ + struct at25_data *at25 = priv; + const char *buf = val; + int status = 0; + unsigned buf_size; + u8 *bounce; + + if (unlikely(off >= at25->chip.byte_len)) + return -EFBIG; + if ((off + count) > at25->chip.byte_len) + count = at25->chip.byte_len - off; + if (unlikely(!count)) + return -EINVAL; + + /* Temp buffer starts with command and address */ + buf_size = at25->chip.page_size; + if (buf_size > io_limit) + buf_size = io_limit; + bounce = kmalloc(buf_size + at25->addrlen + 1, GFP_KERNEL); + if (!bounce) + return -ENOMEM; + + /* For write, rollover is within the page ... so we write at + * most one page, then manually roll over to the next page. + */ + mutex_lock(&at25->lock); + do { + unsigned long timeout, retries; + unsigned segment; + unsigned offset = (unsigned) off; + u8 *cp = bounce; + int sr; + u8 instr; + + *cp = AT25_WREN; + status = spi_write(at25->spi, cp, 1); + if (status < 0) { + dev_dbg(&at25->spi->dev, "WREN --> %d\n", status); + break; + } + + instr = AT25_WRITE; + if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR) + if (offset >= (1U << (at25->addrlen * 8))) + instr |= AT25_INSTR_BIT3; + *cp++ = instr; + + /* 8/16/24-bit address is written MSB first */ + switch (at25->addrlen) { + default: /* case 3 */ + *cp++ = offset >> 16; + /* fall through */ + case 2: + *cp++ = offset >> 8; + /* fall through */ + case 1: + case 0: /* can't happen: for better codegen */ + *cp++ = offset >> 0; + } + + /* Write as much of a page as we can */ + segment = buf_size - (offset % buf_size); + if (segment > count) + segment = count; + memcpy(cp, buf, segment); + status = spi_write(at25->spi, bounce, + segment + at25->addrlen + 1); + dev_dbg(&at25->spi->dev, "write %u bytes at %u --> %d\n", + segment, offset, status); + if (status < 0) + break; + + /* REVISIT this should detect (or prevent) failed writes + * to readonly sections of the EEPROM... + */ + + /* Wait for non-busy status */ + timeout = jiffies + msecs_to_jiffies(EE_TIMEOUT); + retries = 0; + do { + + sr = spi_w8r8(at25->spi, AT25_RDSR); + if (sr < 0 || (sr & AT25_SR_nRDY)) { + dev_dbg(&at25->spi->dev, + "rdsr --> %d (%02x)\n", sr, sr); + /* at HZ=100, this is sloooow */ + msleep(1); + continue; + } + if (!(sr & AT25_SR_nRDY)) + break; + } while (retries++ < 3 || time_before_eq(jiffies, timeout)); + + if ((sr < 0) || (sr & AT25_SR_nRDY)) { + dev_err(&at25->spi->dev, + "write %u bytes offset %u, timeout after %u msecs\n", + segment, offset, + jiffies_to_msecs(jiffies - + (timeout - EE_TIMEOUT))); + status = -ETIMEDOUT; + break; + } + + off += segment; + buf += segment; + count -= segment; + + } while (count > 0); + + mutex_unlock(&at25->lock); + + kfree(bounce); + return status; +} + +/*-------------------------------------------------------------------------*/ + +static int at25_fw_to_chip(struct device *dev, struct spi_eeprom *chip) +{ + u32 val; + + memset(chip, 0, sizeof(*chip)); + strncpy(chip->name, "at25", sizeof(chip->name)); + + if (device_property_read_u32(dev, "size", &val) == 0 || + device_property_read_u32(dev, "at25,byte-len", &val) == 0) { + chip->byte_len = val; + } else { + dev_err(dev, "Error: missing \"size\" property\n"); + return -ENODEV; + } + + if (device_property_read_u32(dev, "pagesize", &val) == 0 || + device_property_read_u32(dev, "at25,page-size", &val) == 0) { + chip->page_size = (u16)val; + } else { + dev_err(dev, "Error: missing \"pagesize\" property\n"); + return -ENODEV; + } + + if (device_property_read_u32(dev, "at25,addr-mode", &val) == 0) { + chip->flags = (u16)val; + } else { + if (device_property_read_u32(dev, "address-width", &val)) { + dev_err(dev, + "Error: missing \"address-width\" property\n"); + return -ENODEV; + } + switch (val) { + case 9: + chip->flags |= EE_INSTR_BIT3_IS_ADDR; + /* fall through */ + case 8: + chip->flags |= EE_ADDR1; + break; + case 16: + chip->flags |= EE_ADDR2; + break; + case 24: + chip->flags |= EE_ADDR3; + break; + default: + dev_err(dev, + "Error: bad \"address-width\" property: %u\n", + val); + return -ENODEV; + } + if (device_property_present(dev, "read-only")) + chip->flags |= EE_READONLY; + } + return 0; +} + +static int at25_probe(struct spi_device *spi) +{ + struct at25_data *at25 = NULL; + struct spi_eeprom chip; + int err; + int sr; + int addrlen; + + /* Chip description */ + if (!spi->dev.platform_data) { + err = at25_fw_to_chip(&spi->dev, &chip); + if (err) + return err; + } else + chip = *(struct spi_eeprom *)spi->dev.platform_data; + + /* For now we only support 8/16/24 bit addressing */ + if (chip.flags & EE_ADDR1) + addrlen = 1; + else if (chip.flags & EE_ADDR2) + addrlen = 2; + else if (chip.flags & EE_ADDR3) + addrlen = 3; + else { + dev_dbg(&spi->dev, "unsupported address type\n"); + return -EINVAL; + } + + /* Ping the chip ... the status register is pretty portable, + * unlike probing manufacturer IDs. We do expect that system + * firmware didn't write it in the past few milliseconds! + */ + sr = spi_w8r8(spi, AT25_RDSR); + if (sr < 0 || sr & AT25_SR_nRDY) { + dev_dbg(&spi->dev, "rdsr --> %d (%02x)\n", sr, sr); + return -ENXIO; + } + + at25 = devm_kzalloc(&spi->dev, sizeof(struct at25_data), GFP_KERNEL); + if (!at25) + return -ENOMEM; + + mutex_init(&at25->lock); + at25->chip = chip; + at25->spi = spi; + spi_set_drvdata(spi, at25); + at25->addrlen = addrlen; + + at25->nvmem_config.name = dev_name(&spi->dev); + at25->nvmem_config.dev = &spi->dev; + at25->nvmem_config.read_only = chip.flags & EE_READONLY; + at25->nvmem_config.root_only = true; + at25->nvmem_config.owner = THIS_MODULE; + at25->nvmem_config.compat = true; + at25->nvmem_config.base_dev = &spi->dev; + at25->nvmem_config.reg_read = at25_ee_read; + at25->nvmem_config.reg_write = at25_ee_write; + at25->nvmem_config.priv = at25; + at25->nvmem_config.stride = 1; + at25->nvmem_config.word_size = 1; + at25->nvmem_config.size = chip.byte_len; + + at25->nvmem = nvmem_register(&at25->nvmem_config); + if (IS_ERR(at25->nvmem)) + return PTR_ERR(at25->nvmem); + + dev_info(&spi->dev, "%d %s %s eeprom%s, pagesize %u\n", + (chip.byte_len < 1024) ? chip.byte_len : (chip.byte_len / 1024), + (chip.byte_len < 1024) ? "Byte" : "KByte", + at25->chip.name, + (chip.flags & EE_READONLY) ? " (readonly)" : "", + at25->chip.page_size); + return 0; +} + +static int at25_remove(struct spi_device *spi) +{ + struct at25_data *at25; + + at25 = spi_get_drvdata(spi); + nvmem_unregister(at25->nvmem); + + return 0; +} + +/*-------------------------------------------------------------------------*/ + +static const struct of_device_id at25_of_match[] = { + { .compatible = "atmel,at25", }, + { } +}; +MODULE_DEVICE_TABLE(of, at25_of_match); + +static struct spi_driver at25_driver = { + .driver = { + .name = "at25", + .of_match_table = at25_of_match, + }, + .probe = at25_probe, + .remove = at25_remove, +}; + +module_spi_driver(at25_driver); + +MODULE_DESCRIPTION("Driver for most SPI EEPROMs"); +MODULE_AUTHOR("David Brownell"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("spi:at25"); diff --git a/drivers/misc/eeprom/digsy_mtc_eeprom.c b/drivers/misc/eeprom/digsy_mtc_eeprom.c new file mode 100644 index 000000000..fbde2516c --- /dev/null +++ b/drivers/misc/eeprom/digsy_mtc_eeprom.c @@ -0,0 +1,106 @@ +/* + * EEPROMs access control driver for display configuration EEPROMs + * on DigsyMTC board. + * + * (C) 2011 DENX Software Engineering, Anatolij Gustschin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * FIXME: this driver is used on a device-tree probed platform: it + * should be defined as a bit-banged SPI device and probed from the device + * tree and not like this with static grabbing of a few numbered GPIO + * lines at random. + * + * Add proper SPI and EEPROM in arch/powerpc/boot/dts/digsy_mtc.dts + * and delete this driver. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define GPIO_EEPROM_CLK 216 +#define GPIO_EEPROM_CS 210 +#define GPIO_EEPROM_DI 217 +#define GPIO_EEPROM_DO 249 +#define GPIO_EEPROM_OE 255 +#define EE_SPI_BUS_NUM 1 + +static void digsy_mtc_op_prepare(void *p) +{ + /* enable */ + gpio_set_value(GPIO_EEPROM_OE, 0); +} + +static void digsy_mtc_op_finish(void *p) +{ + /* disable */ + gpio_set_value(GPIO_EEPROM_OE, 1); +} + +struct eeprom_93xx46_platform_data digsy_mtc_eeprom_data = { + .flags = EE_ADDR8, + .prepare = digsy_mtc_op_prepare, + .finish = digsy_mtc_op_finish, +}; + +static struct spi_gpio_platform_data eeprom_spi_gpio_data = { + .num_chipselect = 1, +}; + +static struct platform_device digsy_mtc_eeprom = { + .name = "spi_gpio", + .id = EE_SPI_BUS_NUM, + .dev = { + .platform_data = &eeprom_spi_gpio_data, + }, +}; + +static struct gpiod_lookup_table eeprom_spi_gpiod_table = { + .dev_id = "spi_gpio", + .table = { + GPIO_LOOKUP("gpio@b00", GPIO_EEPROM_CLK, + "sck", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio@b00", GPIO_EEPROM_DI, + "mosi", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio@b00", GPIO_EEPROM_DO, + "miso", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio@b00", GPIO_EEPROM_CS, + "cs", GPIO_ACTIVE_HIGH), + { }, + }, +}; + +static struct spi_board_info digsy_mtc_eeprom_info[] __initdata = { + { + .modalias = "93xx46", + .max_speed_hz = 1000000, + .bus_num = EE_SPI_BUS_NUM, + .chip_select = 0, + .mode = SPI_MODE_0, + .platform_data = &digsy_mtc_eeprom_data, + }, +}; + +static int __init digsy_mtc_eeprom_devices_init(void) +{ + int ret; + + ret = gpio_request_one(GPIO_EEPROM_OE, GPIOF_OUT_INIT_HIGH, + "93xx46 EEPROMs OE"); + if (ret) { + pr_err("can't request gpio %d\n", GPIO_EEPROM_OE); + return ret; + } + gpiod_add_lookup_table(&eeprom_spi_gpiod_table); + spi_register_board_info(digsy_mtc_eeprom_info, + ARRAY_SIZE(digsy_mtc_eeprom_info)); + return platform_device_register(&digsy_mtc_eeprom); +} +device_initcall(digsy_mtc_eeprom_devices_init); diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c new file mode 100644 index 000000000..60e3d91b5 --- /dev/null +++ b/drivers/misc/eeprom/eeprom.c @@ -0,0 +1,222 @@ +/* + * Copyright (C) 1998, 1999 Frodo Looijaard and + * Philip Edelbrock + * Copyright (C) 2003 Greg Kroah-Hartman + * Copyright (C) 2003 IBM Corp. + * Copyright (C) 2004 Jean Delvare + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* Addresses to scan */ +static const unsigned short normal_i2c[] = { 0x50, 0x51, 0x52, 0x53, 0x54, + 0x55, 0x56, 0x57, I2C_CLIENT_END }; + + +/* Size of EEPROM in bytes */ +#define EEPROM_SIZE 256 + +/* possible types of eeprom devices */ +enum eeprom_nature { + UNKNOWN, + VAIO, +}; + +/* Each client has this additional data */ +struct eeprom_data { + struct mutex update_lock; + u8 valid; /* bitfield, bit!=0 if slice is valid */ + unsigned long last_updated[8]; /* In jiffies, 8 slices */ + u8 data[EEPROM_SIZE]; /* Register values */ + enum eeprom_nature nature; +}; + + +static void eeprom_update_client(struct i2c_client *client, u8 slice) +{ + struct eeprom_data *data = i2c_get_clientdata(client); + int i; + + mutex_lock(&data->update_lock); + + if (!(data->valid & (1 << slice)) || + time_after(jiffies, data->last_updated[slice] + 300 * HZ)) { + dev_dbg(&client->dev, "Starting eeprom update, slice %u\n", slice); + + if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK)) { + for (i = slice << 5; i < (slice + 1) << 5; i += 32) + if (i2c_smbus_read_i2c_block_data(client, i, + 32, data->data + i) + != 32) + goto exit; + } else { + for (i = slice << 5; i < (slice + 1) << 5; i += 2) { + int word = i2c_smbus_read_word_data(client, i); + if (word < 0) + goto exit; + data->data[i] = word & 0xff; + data->data[i + 1] = word >> 8; + } + } + data->last_updated[slice] = jiffies; + data->valid |= (1 << slice); + } +exit: + mutex_unlock(&data->update_lock); +} + +static ssize_t eeprom_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct i2c_client *client = to_i2c_client(kobj_to_dev(kobj)); + struct eeprom_data *data = i2c_get_clientdata(client); + u8 slice; + + /* Only refresh slices which contain requested bytes */ + for (slice = off >> 5; slice <= (off + count - 1) >> 5; slice++) + eeprom_update_client(client, slice); + + /* Hide Vaio private settings to regular users: + - BIOS passwords: bytes 0x00 to 0x0f + - UUID: bytes 0x10 to 0x1f + - Serial number: 0xc0 to 0xdf */ + if (data->nature == VAIO && !capable(CAP_SYS_ADMIN)) { + int i; + + for (i = 0; i < count; i++) { + if ((off + i <= 0x1f) || + (off + i >= 0xc0 && off + i <= 0xdf)) + buf[i] = 0; + else + buf[i] = data->data[off + i]; + } + } else { + memcpy(buf, &data->data[off], count); + } + + return count; +} + +static const struct bin_attribute eeprom_attr = { + .attr = { + .name = "eeprom", + .mode = S_IRUGO, + }, + .size = EEPROM_SIZE, + .read = eeprom_read, +}; + +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int eeprom_detect(struct i2c_client *client, struct i2c_board_info *info) +{ + struct i2c_adapter *adapter = client->adapter; + + /* EDID EEPROMs are often 24C00 EEPROMs, which answer to all + addresses 0x50-0x57, but we only care about 0x50. So decline + attaching to addresses >= 0x51 on DDC buses */ + if (!(adapter->class & I2C_CLASS_SPD) && client->addr >= 0x51) + return -ENODEV; + + /* There are four ways we can read the EEPROM data: + (1) I2C block reads (faster, but unsupported by most adapters) + (2) Word reads (128% overhead) + (3) Consecutive byte reads (88% overhead, unsafe) + (4) Regular byte data reads (265% overhead) + The third and fourth methods are not implemented by this driver + because all known adapters support one of the first two. */ + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_WORD_DATA) + && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK)) + return -ENODEV; + + strlcpy(info->type, "eeprom", I2C_NAME_SIZE); + + return 0; +} + +static int eeprom_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = client->adapter; + struct eeprom_data *data; + + data = devm_kzalloc(&client->dev, sizeof(struct eeprom_data), + GFP_KERNEL); + if (!data) + return -ENOMEM; + + memset(data->data, 0xff, EEPROM_SIZE); + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); + data->nature = UNKNOWN; + + /* Detect the Vaio nature of EEPROMs. + We use the "PCG-" or "VGN-" prefix as the signature. */ + if (client->addr == 0x57 + && i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA)) { + char name[4]; + + name[0] = i2c_smbus_read_byte_data(client, 0x80); + name[1] = i2c_smbus_read_byte_data(client, 0x81); + name[2] = i2c_smbus_read_byte_data(client, 0x82); + name[3] = i2c_smbus_read_byte_data(client, 0x83); + + if (!memcmp(name, "PCG-", 4) || !memcmp(name, "VGN-", 4)) { + dev_info(&client->dev, "Vaio EEPROM detected, " + "enabling privacy protection\n"); + data->nature = VAIO; + } + } + + /* create the sysfs eeprom file */ + return sysfs_create_bin_file(&client->dev.kobj, &eeprom_attr); +} + +static int eeprom_remove(struct i2c_client *client) +{ + sysfs_remove_bin_file(&client->dev.kobj, &eeprom_attr); + + return 0; +} + +static const struct i2c_device_id eeprom_id[] = { + { "eeprom", 0 }, + { } +}; + +static struct i2c_driver eeprom_driver = { + .driver = { + .name = "eeprom", + }, + .probe = eeprom_probe, + .remove = eeprom_remove, + .id_table = eeprom_id, + + .class = I2C_CLASS_DDC | I2C_CLASS_SPD, + .detect = eeprom_detect, + .address_list = normal_i2c, +}; + +module_i2c_driver(eeprom_driver); + +MODULE_AUTHOR("Frodo Looijaard and " + "Philip Edelbrock and " + "Greg Kroah-Hartman "); +MODULE_DESCRIPTION("I2C EEPROM driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/eeprom/eeprom_93cx6.c b/drivers/misc/eeprom/eeprom_93cx6.c new file mode 100644 index 000000000..0cf2c9d67 --- /dev/null +++ b/drivers/misc/eeprom/eeprom_93cx6.c @@ -0,0 +1,381 @@ +/* + * Copyright (C) 2004 - 2006 rt2x00 SourceForge Project + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Module: eeprom_93cx6 + * Abstract: EEPROM reader routines for 93cx6 chipsets. + * Supported chipsets: 93c46 & 93c66. + */ + +#include +#include +#include +#include + +MODULE_AUTHOR("http://rt2x00.serialmonkey.com"); +MODULE_VERSION("1.0"); +MODULE_DESCRIPTION("EEPROM 93cx6 chip driver"); +MODULE_LICENSE("GPL"); + +static inline void eeprom_93cx6_pulse_high(struct eeprom_93cx6 *eeprom) +{ + eeprom->reg_data_clock = 1; + eeprom->register_write(eeprom); + + /* + * Add a short delay for the pulse to work. + * According to the specifications the "maximum minimum" + * time should be 450ns. + */ + ndelay(450); +} + +static inline void eeprom_93cx6_pulse_low(struct eeprom_93cx6 *eeprom) +{ + eeprom->reg_data_clock = 0; + eeprom->register_write(eeprom); + + /* + * Add a short delay for the pulse to work. + * According to the specifications the "maximum minimum" + * time should be 450ns. + */ + ndelay(450); +} + +static void eeprom_93cx6_startup(struct eeprom_93cx6 *eeprom) +{ + /* + * Clear all flags, and enable chip select. + */ + eeprom->register_read(eeprom); + eeprom->reg_data_in = 0; + eeprom->reg_data_out = 0; + eeprom->reg_data_clock = 0; + eeprom->reg_chip_select = 1; + eeprom->drive_data = 1; + eeprom->register_write(eeprom); + + /* + * kick a pulse. + */ + eeprom_93cx6_pulse_high(eeprom); + eeprom_93cx6_pulse_low(eeprom); +} + +static void eeprom_93cx6_cleanup(struct eeprom_93cx6 *eeprom) +{ + /* + * Clear chip_select and data_in flags. + */ + eeprom->register_read(eeprom); + eeprom->reg_data_in = 0; + eeprom->reg_chip_select = 0; + eeprom->register_write(eeprom); + + /* + * kick a pulse. + */ + eeprom_93cx6_pulse_high(eeprom); + eeprom_93cx6_pulse_low(eeprom); +} + +static void eeprom_93cx6_write_bits(struct eeprom_93cx6 *eeprom, + const u16 data, const u16 count) +{ + unsigned int i; + + eeprom->register_read(eeprom); + + /* + * Clear data flags. + */ + eeprom->reg_data_in = 0; + eeprom->reg_data_out = 0; + eeprom->drive_data = 1; + + /* + * Start writing all bits. + */ + for (i = count; i > 0; i--) { + /* + * Check if this bit needs to be set. + */ + eeprom->reg_data_in = !!(data & (1 << (i - 1))); + + /* + * Write the bit to the eeprom register. + */ + eeprom->register_write(eeprom); + + /* + * Kick a pulse. + */ + eeprom_93cx6_pulse_high(eeprom); + eeprom_93cx6_pulse_low(eeprom); + } + + eeprom->reg_data_in = 0; + eeprom->register_write(eeprom); +} + +static void eeprom_93cx6_read_bits(struct eeprom_93cx6 *eeprom, + u16 *data, const u16 count) +{ + unsigned int i; + u16 buf = 0; + + eeprom->register_read(eeprom); + + /* + * Clear data flags. + */ + eeprom->reg_data_in = 0; + eeprom->reg_data_out = 0; + eeprom->drive_data = 0; + + /* + * Start reading all bits. + */ + for (i = count; i > 0; i--) { + eeprom_93cx6_pulse_high(eeprom); + + eeprom->register_read(eeprom); + + /* + * Clear data_in flag. + */ + eeprom->reg_data_in = 0; + + /* + * Read if the bit has been set. + */ + if (eeprom->reg_data_out) + buf |= (1 << (i - 1)); + + eeprom_93cx6_pulse_low(eeprom); + } + + *data = buf; +} + +/** + * eeprom_93cx6_read - Read a word from eeprom + * @eeprom: Pointer to eeprom structure + * @word: Word index from where we should start reading + * @data: target pointer where the information will have to be stored + * + * This function will read the eeprom data as host-endian word + * into the given data pointer. + */ +void eeprom_93cx6_read(struct eeprom_93cx6 *eeprom, const u8 word, + u16 *data) +{ + u16 command; + + /* + * Initialize the eeprom register + */ + eeprom_93cx6_startup(eeprom); + + /* + * Select the read opcode and the word to be read. + */ + command = (PCI_EEPROM_READ_OPCODE << eeprom->width) | word; + eeprom_93cx6_write_bits(eeprom, command, + PCI_EEPROM_WIDTH_OPCODE + eeprom->width); + + /* + * Read the requested 16 bits. + */ + eeprom_93cx6_read_bits(eeprom, data, 16); + + /* + * Cleanup eeprom register. + */ + eeprom_93cx6_cleanup(eeprom); +} +EXPORT_SYMBOL_GPL(eeprom_93cx6_read); + +/** + * eeprom_93cx6_multiread - Read multiple words from eeprom + * @eeprom: Pointer to eeprom structure + * @word: Word index from where we should start reading + * @data: target pointer where the information will have to be stored + * @words: Number of words that should be read. + * + * This function will read all requested words from the eeprom, + * this is done by calling eeprom_93cx6_read() multiple times. + * But with the additional change that while the eeprom_93cx6_read + * will return host ordered bytes, this method will return little + * endian words. + */ +void eeprom_93cx6_multiread(struct eeprom_93cx6 *eeprom, const u8 word, + __le16 *data, const u16 words) +{ + unsigned int i; + u16 tmp; + + for (i = 0; i < words; i++) { + tmp = 0; + eeprom_93cx6_read(eeprom, word + i, &tmp); + data[i] = cpu_to_le16(tmp); + } +} +EXPORT_SYMBOL_GPL(eeprom_93cx6_multiread); + +/** + * eeprom_93cx6_readb - Read a byte from eeprom + * @eeprom: Pointer to eeprom structure + * @word: Byte index from where we should start reading + * @data: target pointer where the information will have to be stored + * + * This function will read a byte of the eeprom data + * into the given data pointer. + */ +void eeprom_93cx6_readb(struct eeprom_93cx6 *eeprom, const u8 byte, + u8 *data) +{ + u16 command; + u16 tmp; + + /* + * Initialize the eeprom register + */ + eeprom_93cx6_startup(eeprom); + + /* + * Select the read opcode and the byte to be read. + */ + command = (PCI_EEPROM_READ_OPCODE << (eeprom->width + 1)) | byte; + eeprom_93cx6_write_bits(eeprom, command, + PCI_EEPROM_WIDTH_OPCODE + eeprom->width + 1); + + /* + * Read the requested 8 bits. + */ + eeprom_93cx6_read_bits(eeprom, &tmp, 8); + *data = tmp & 0xff; + + /* + * Cleanup eeprom register. + */ + eeprom_93cx6_cleanup(eeprom); +} +EXPORT_SYMBOL_GPL(eeprom_93cx6_readb); + +/** + * eeprom_93cx6_multireadb - Read multiple bytes from eeprom + * @eeprom: Pointer to eeprom structure + * @byte: Index from where we should start reading + * @data: target pointer where the information will have to be stored + * @words: Number of bytes that should be read. + * + * This function will read all requested bytes from the eeprom, + * this is done by calling eeprom_93cx6_readb() multiple times. + */ +void eeprom_93cx6_multireadb(struct eeprom_93cx6 *eeprom, const u8 byte, + u8 *data, const u16 bytes) +{ + unsigned int i; + + for (i = 0; i < bytes; i++) + eeprom_93cx6_readb(eeprom, byte + i, &data[i]); +} +EXPORT_SYMBOL_GPL(eeprom_93cx6_multireadb); + +/** + * eeprom_93cx6_wren - set the write enable state + * @eeprom: Pointer to eeprom structure + * @enable: true to enable writes, otherwise disable writes + * + * Set the EEPROM write enable state to either allow or deny + * writes depending on the @enable value. + */ +void eeprom_93cx6_wren(struct eeprom_93cx6 *eeprom, bool enable) +{ + u16 command; + + /* start the command */ + eeprom_93cx6_startup(eeprom); + + /* create command to enable/disable */ + + command = enable ? PCI_EEPROM_EWEN_OPCODE : PCI_EEPROM_EWDS_OPCODE; + command <<= (eeprom->width - 2); + + eeprom_93cx6_write_bits(eeprom, command, + PCI_EEPROM_WIDTH_OPCODE + eeprom->width); + + eeprom_93cx6_cleanup(eeprom); +} +EXPORT_SYMBOL_GPL(eeprom_93cx6_wren); + +/** + * eeprom_93cx6_write - write data to the EEPROM + * @eeprom: Pointer to eeprom structure + * @addr: Address to write data to. + * @data: The data to write to address @addr. + * + * Write the @data to the specified @addr in the EEPROM and + * waiting for the device to finish writing. + * + * Note, since we do not expect large number of write operations + * we delay in between parts of the operation to avoid using excessive + * amounts of CPU time busy waiting. + */ +void eeprom_93cx6_write(struct eeprom_93cx6 *eeprom, u8 addr, u16 data) +{ + int timeout = 100; + u16 command; + + /* start the command */ + eeprom_93cx6_startup(eeprom); + + command = PCI_EEPROM_WRITE_OPCODE << eeprom->width; + command |= addr; + + /* send write command */ + eeprom_93cx6_write_bits(eeprom, command, + PCI_EEPROM_WIDTH_OPCODE + eeprom->width); + + /* send data */ + eeprom_93cx6_write_bits(eeprom, data, 16); + + /* get ready to check for busy */ + eeprom->drive_data = 0; + eeprom->reg_chip_select = 1; + eeprom->register_write(eeprom); + + /* wait at-least 250ns to get DO to be the busy signal */ + usleep_range(1000, 2000); + + /* wait for DO to go high to signify finish */ + + while (true) { + eeprom->register_read(eeprom); + + if (eeprom->reg_data_out) + break; + + usleep_range(1000, 2000); + + if (--timeout <= 0) { + printk(KERN_ERR "%s: timeout\n", __func__); + break; + } + } + + eeprom_93cx6_cleanup(eeprom); +} +EXPORT_SYMBOL_GPL(eeprom_93cx6_write); diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c new file mode 100644 index 000000000..182feab6d --- /dev/null +++ b/drivers/misc/eeprom/eeprom_93xx46.c @@ -0,0 +1,541 @@ +/* + * Driver for 93xx46 EEPROMs + * + * (C) 2011 DENX Software Engineering, Anatolij Gustschin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define OP_START 0x4 +#define OP_WRITE (OP_START | 0x1) +#define OP_READ (OP_START | 0x2) +#define ADDR_EWDS 0x00 +#define ADDR_ERAL 0x20 +#define ADDR_EWEN 0x30 + +struct eeprom_93xx46_devtype_data { + unsigned int quirks; +}; + +static const struct eeprom_93xx46_devtype_data atmel_at93c46d_data = { + .quirks = EEPROM_93XX46_QUIRK_SINGLE_WORD_READ | + EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH, +}; + +static const struct eeprom_93xx46_devtype_data microchip_93lc46b_data = { + .quirks = EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE, +}; + +struct eeprom_93xx46_dev { + struct spi_device *spi; + struct eeprom_93xx46_platform_data *pdata; + struct mutex lock; + struct nvmem_config nvmem_config; + struct nvmem_device *nvmem; + int addrlen; + int size; +}; + +static inline bool has_quirk_single_word_read(struct eeprom_93xx46_dev *edev) +{ + return edev->pdata->quirks & EEPROM_93XX46_QUIRK_SINGLE_WORD_READ; +} + +static inline bool has_quirk_instruction_length(struct eeprom_93xx46_dev *edev) +{ + return edev->pdata->quirks & EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH; +} + +static inline bool has_quirk_extra_read_cycle(struct eeprom_93xx46_dev *edev) +{ + return edev->pdata->quirks & EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE; +} + +static int eeprom_93xx46_read(void *priv, unsigned int off, + void *val, size_t count) +{ + struct eeprom_93xx46_dev *edev = priv; + char *buf = val; + int err = 0; + + if (unlikely(off >= edev->size)) + return 0; + if ((off + count) > edev->size) + count = edev->size - off; + if (unlikely(!count)) + return count; + + mutex_lock(&edev->lock); + + if (edev->pdata->prepare) + edev->pdata->prepare(edev); + + while (count) { + struct spi_message m; + struct spi_transfer t[2] = { { 0 } }; + u16 cmd_addr = OP_READ << edev->addrlen; + size_t nbytes = count; + int bits; + + if (edev->addrlen == 7) { + cmd_addr |= off & 0x7f; + bits = 10; + if (has_quirk_single_word_read(edev)) + nbytes = 1; + } else { + cmd_addr |= (off >> 1) & 0x3f; + bits = 9; + if (has_quirk_single_word_read(edev)) + nbytes = 2; + } + + dev_dbg(&edev->spi->dev, "read cmd 0x%x, %d Hz\n", + cmd_addr, edev->spi->max_speed_hz); + + if (has_quirk_extra_read_cycle(edev)) { + cmd_addr <<= 1; + bits += 1; + } + + spi_message_init(&m); + + t[0].tx_buf = (char *)&cmd_addr; + t[0].len = 2; + t[0].bits_per_word = bits; + spi_message_add_tail(&t[0], &m); + + t[1].rx_buf = buf; + t[1].len = count; + t[1].bits_per_word = 8; + spi_message_add_tail(&t[1], &m); + + err = spi_sync(edev->spi, &m); + /* have to wait at least Tcsl ns */ + ndelay(250); + + if (err) { + dev_err(&edev->spi->dev, "read %zu bytes at %d: err. %d\n", + nbytes, (int)off, err); + break; + } + + buf += nbytes; + off += nbytes; + count -= nbytes; + } + + if (edev->pdata->finish) + edev->pdata->finish(edev); + + mutex_unlock(&edev->lock); + + return err; +} + +static int eeprom_93xx46_ew(struct eeprom_93xx46_dev *edev, int is_on) +{ + struct spi_message m; + struct spi_transfer t; + int bits, ret; + u16 cmd_addr; + + cmd_addr = OP_START << edev->addrlen; + if (edev->addrlen == 7) { + cmd_addr |= (is_on ? ADDR_EWEN : ADDR_EWDS) << 1; + bits = 10; + } else { + cmd_addr |= (is_on ? ADDR_EWEN : ADDR_EWDS); + bits = 9; + } + + if (has_quirk_instruction_length(edev)) { + cmd_addr <<= 2; + bits += 2; + } + + dev_dbg(&edev->spi->dev, "ew%s cmd 0x%04x, %d bits\n", + is_on ? "en" : "ds", cmd_addr, bits); + + spi_message_init(&m); + memset(&t, 0, sizeof(t)); + + t.tx_buf = &cmd_addr; + t.len = 2; + t.bits_per_word = bits; + spi_message_add_tail(&t, &m); + + mutex_lock(&edev->lock); + + if (edev->pdata->prepare) + edev->pdata->prepare(edev); + + ret = spi_sync(edev->spi, &m); + /* have to wait at least Tcsl ns */ + ndelay(250); + if (ret) + dev_err(&edev->spi->dev, "erase/write %sable error %d\n", + is_on ? "en" : "dis", ret); + + if (edev->pdata->finish) + edev->pdata->finish(edev); + + mutex_unlock(&edev->lock); + return ret; +} + +static ssize_t +eeprom_93xx46_write_word(struct eeprom_93xx46_dev *edev, + const char *buf, unsigned off) +{ + struct spi_message m; + struct spi_transfer t[2]; + int bits, data_len, ret; + u16 cmd_addr; + + cmd_addr = OP_WRITE << edev->addrlen; + + if (edev->addrlen == 7) { + cmd_addr |= off & 0x7f; + bits = 10; + data_len = 1; + } else { + cmd_addr |= (off >> 1) & 0x3f; + bits = 9; + data_len = 2; + } + + dev_dbg(&edev->spi->dev, "write cmd 0x%x\n", cmd_addr); + + spi_message_init(&m); + memset(t, 0, sizeof(t)); + + t[0].tx_buf = (char *)&cmd_addr; + t[0].len = 2; + t[0].bits_per_word = bits; + spi_message_add_tail(&t[0], &m); + + t[1].tx_buf = buf; + t[1].len = data_len; + t[1].bits_per_word = 8; + spi_message_add_tail(&t[1], &m); + + ret = spi_sync(edev->spi, &m); + /* have to wait program cycle time Twc ms */ + mdelay(6); + return ret; +} + +static int eeprom_93xx46_write(void *priv, unsigned int off, + void *val, size_t count) +{ + struct eeprom_93xx46_dev *edev = priv; + char *buf = val; + int i, ret, step = 1; + + if (unlikely(off >= edev->size)) + return -EFBIG; + if ((off + count) > edev->size) + count = edev->size - off; + if (unlikely(!count)) + return count; + + /* only write even number of bytes on 16-bit devices */ + if (edev->addrlen == 6) { + step = 2; + count &= ~1; + } + + /* erase/write enable */ + ret = eeprom_93xx46_ew(edev, 1); + if (ret) + return ret; + + mutex_lock(&edev->lock); + + if (edev->pdata->prepare) + edev->pdata->prepare(edev); + + for (i = 0; i < count; i += step) { + ret = eeprom_93xx46_write_word(edev, &buf[i], off + i); + if (ret) { + dev_err(&edev->spi->dev, "write failed at %d: %d\n", + (int)off + i, ret); + break; + } + } + + if (edev->pdata->finish) + edev->pdata->finish(edev); + + mutex_unlock(&edev->lock); + + /* erase/write disable */ + eeprom_93xx46_ew(edev, 0); + return ret; +} + +static int eeprom_93xx46_eral(struct eeprom_93xx46_dev *edev) +{ + struct eeprom_93xx46_platform_data *pd = edev->pdata; + struct spi_message m; + struct spi_transfer t; + int bits, ret; + u16 cmd_addr; + + cmd_addr = OP_START << edev->addrlen; + if (edev->addrlen == 7) { + cmd_addr |= ADDR_ERAL << 1; + bits = 10; + } else { + cmd_addr |= ADDR_ERAL; + bits = 9; + } + + if (has_quirk_instruction_length(edev)) { + cmd_addr <<= 2; + bits += 2; + } + + dev_dbg(&edev->spi->dev, "eral cmd 0x%04x, %d bits\n", cmd_addr, bits); + + spi_message_init(&m); + memset(&t, 0, sizeof(t)); + + t.tx_buf = &cmd_addr; + t.len = 2; + t.bits_per_word = bits; + spi_message_add_tail(&t, &m); + + mutex_lock(&edev->lock); + + if (edev->pdata->prepare) + edev->pdata->prepare(edev); + + ret = spi_sync(edev->spi, &m); + if (ret) + dev_err(&edev->spi->dev, "erase error %d\n", ret); + /* have to wait erase cycle time Tec ms */ + mdelay(6); + + if (pd->finish) + pd->finish(edev); + + mutex_unlock(&edev->lock); + return ret; +} + +static ssize_t eeprom_93xx46_store_erase(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct eeprom_93xx46_dev *edev = dev_get_drvdata(dev); + int erase = 0, ret; + + sscanf(buf, "%d", &erase); + if (erase) { + ret = eeprom_93xx46_ew(edev, 1); + if (ret) + return ret; + ret = eeprom_93xx46_eral(edev); + if (ret) + return ret; + ret = eeprom_93xx46_ew(edev, 0); + if (ret) + return ret; + } + return count; +} +static DEVICE_ATTR(erase, S_IWUSR, NULL, eeprom_93xx46_store_erase); + +static void select_assert(void *context) +{ + struct eeprom_93xx46_dev *edev = context; + + gpiod_set_value_cansleep(edev->pdata->select, 1); +} + +static void select_deassert(void *context) +{ + struct eeprom_93xx46_dev *edev = context; + + gpiod_set_value_cansleep(edev->pdata->select, 0); +} + +static const struct of_device_id eeprom_93xx46_of_table[] = { + { .compatible = "eeprom-93xx46", }, + { .compatible = "atmel,at93c46d", .data = &atmel_at93c46d_data, }, + { .compatible = "microchip,93lc46b", .data = µchip_93lc46b_data, }, + {} +}; +MODULE_DEVICE_TABLE(of, eeprom_93xx46_of_table); + +static int eeprom_93xx46_probe_dt(struct spi_device *spi) +{ + const struct of_device_id *of_id = + of_match_device(eeprom_93xx46_of_table, &spi->dev); + struct device_node *np = spi->dev.of_node; + struct eeprom_93xx46_platform_data *pd; + u32 tmp; + int ret; + + pd = devm_kzalloc(&spi->dev, sizeof(*pd), GFP_KERNEL); + if (!pd) + return -ENOMEM; + + ret = of_property_read_u32(np, "data-size", &tmp); + if (ret < 0) { + dev_err(&spi->dev, "data-size property not found\n"); + return ret; + } + + if (tmp == 8) { + pd->flags |= EE_ADDR8; + } else if (tmp == 16) { + pd->flags |= EE_ADDR16; + } else { + dev_err(&spi->dev, "invalid data-size (%d)\n", tmp); + return -EINVAL; + } + + if (of_property_read_bool(np, "read-only")) + pd->flags |= EE_READONLY; + + pd->select = devm_gpiod_get_optional(&spi->dev, "select", + GPIOD_OUT_LOW); + if (IS_ERR(pd->select)) + return PTR_ERR(pd->select); + + pd->prepare = select_assert; + pd->finish = select_deassert; + gpiod_direction_output(pd->select, 0); + + if (of_id->data) { + const struct eeprom_93xx46_devtype_data *data = of_id->data; + + pd->quirks = data->quirks; + } + + spi->dev.platform_data = pd; + + return 0; +} + +static int eeprom_93xx46_probe(struct spi_device *spi) +{ + struct eeprom_93xx46_platform_data *pd; + struct eeprom_93xx46_dev *edev; + int err; + + if (spi->dev.of_node) { + err = eeprom_93xx46_probe_dt(spi); + if (err < 0) + return err; + } + + pd = spi->dev.platform_data; + if (!pd) { + dev_err(&spi->dev, "missing platform data\n"); + return -ENODEV; + } + + edev = kzalloc(sizeof(*edev), GFP_KERNEL); + if (!edev) + return -ENOMEM; + + if (pd->flags & EE_ADDR8) + edev->addrlen = 7; + else if (pd->flags & EE_ADDR16) + edev->addrlen = 6; + else { + dev_err(&spi->dev, "unspecified address type\n"); + err = -EINVAL; + goto fail; + } + + mutex_init(&edev->lock); + + edev->spi = spi; + edev->pdata = pd; + + edev->size = 128; + edev->nvmem_config.name = dev_name(&spi->dev); + edev->nvmem_config.dev = &spi->dev; + edev->nvmem_config.read_only = pd->flags & EE_READONLY; + edev->nvmem_config.root_only = true; + edev->nvmem_config.owner = THIS_MODULE; + edev->nvmem_config.compat = true; + edev->nvmem_config.base_dev = &spi->dev; + edev->nvmem_config.reg_read = eeprom_93xx46_read; + edev->nvmem_config.reg_write = eeprom_93xx46_write; + edev->nvmem_config.priv = edev; + edev->nvmem_config.stride = 4; + edev->nvmem_config.word_size = 1; + edev->nvmem_config.size = edev->size; + + edev->nvmem = nvmem_register(&edev->nvmem_config); + if (IS_ERR(edev->nvmem)) { + err = PTR_ERR(edev->nvmem); + goto fail; + } + + dev_info(&spi->dev, "%d-bit eeprom %s\n", + (pd->flags & EE_ADDR8) ? 8 : 16, + (pd->flags & EE_READONLY) ? "(readonly)" : ""); + + if (!(pd->flags & EE_READONLY)) { + if (device_create_file(&spi->dev, &dev_attr_erase)) + dev_err(&spi->dev, "can't create erase interface\n"); + } + + spi_set_drvdata(spi, edev); + return 0; +fail: + kfree(edev); + return err; +} + +static int eeprom_93xx46_remove(struct spi_device *spi) +{ + struct eeprom_93xx46_dev *edev = spi_get_drvdata(spi); + + nvmem_unregister(edev->nvmem); + + if (!(edev->pdata->flags & EE_READONLY)) + device_remove_file(&spi->dev, &dev_attr_erase); + + kfree(edev); + return 0; +} + +static struct spi_driver eeprom_93xx46_driver = { + .driver = { + .name = "93xx46", + .of_match_table = of_match_ptr(eeprom_93xx46_of_table), + }, + .probe = eeprom_93xx46_probe, + .remove = eeprom_93xx46_remove, +}; + +module_spi_driver(eeprom_93xx46_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Driver for 93xx46 EEPROMs"); +MODULE_AUTHOR("Anatolij Gustschin "); +MODULE_ALIAS("spi:93xx46"); +MODULE_ALIAS("spi:eeprom-93xx46"); diff --git a/drivers/misc/eeprom/idt_89hpesx.c b/drivers/misc/eeprom/idt_89hpesx.c new file mode 100644 index 000000000..5879ba82c --- /dev/null +++ b/drivers/misc/eeprom/idt_89hpesx.c @@ -0,0 +1,1620 @@ +/* + * This file is provided under a GPLv2 license. When using or + * redistributing this file, you may do so under that license. + * + * GPL LICENSE SUMMARY + * + * Copyright (C) 2016 T-Platforms. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, it can be found . + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * IDT PCIe-switch NTB Linux driver + * + * Contact Information: + * Serge Semin , + */ +/* + * NOTE of the IDT 89HPESx SMBus-slave interface driver + * This driver primarily is developed to have an access to EEPROM device of + * IDT PCIe-switches. IDT provides a simple SMBus interface to perform IO- + * operations from/to EEPROM, which is located at private (so called Master) + * SMBus of switches. Using that interface this the driver creates a simple + * binary sysfs-file in the device directory: + * /sys/bus/i2c/devices/-/eeprom + * In case if read-only flag is specified in the dts-node of device desription, + * User-space applications won't be able to write to the EEPROM sysfs-node. + * Additionally IDT 89HPESx SMBus interface has an ability to write/read + * data of device CSRs. This driver exposes debugf-file to perform simple IO + * operations using that ability for just basic debug purpose. Particularly + * next file is created in the specific debugfs-directory: + * /sys/kernel/debug/idt_csr/ + * Format of the debugfs-node is: + * $ cat /sys/kernel/debug/idt_csr/-/; + * : + * So reading the content of the file gives current CSR address and it value. + * If User-space application wishes to change current CSR address, + * it can just write a proper value to the sysfs-file: + * $ echo "" > /sys/kernel/debug/idt_csr/-/ + * If it wants to change the CSR value as well, the format of the write + * operation is: + * $ echo ":" > \ + * /sys/kernel/debug/idt_csr/-/; + * CSR address and value can be any of hexadecimal, decimal or octal format. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IDT_NAME "89hpesx" +#define IDT_89HPESX_DESC "IDT 89HPESx SMBus-slave interface driver" +#define IDT_89HPESX_VER "1.0" + +MODULE_DESCRIPTION(IDT_89HPESX_DESC); +MODULE_VERSION(IDT_89HPESX_VER); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("T-platforms"); + +/* + * csr_dbgdir - CSR read/write operations Debugfs directory + */ +static struct dentry *csr_dbgdir; + +/* + * struct idt_89hpesx_dev - IDT 89HPESx device data structure + * @eesize: Size of EEPROM in bytes (calculated from "idt,eecompatible") + * @eero: EEPROM Read-only flag + * @eeaddr: EEPROM custom address + * + * @inieecmd: Initial cmd value for EEPROM read/write operations + * @inicsrcmd: Initial cmd value for CSR read/write operations + * @iniccode: Initialial command code value for IO-operations + * + * @csr: CSR address to perform read operation + * + * @smb_write: SMBus write method + * @smb_read: SMBus read method + * @smb_mtx: SMBus mutex + * + * @client: i2c client used to perform IO operations + * + * @ee_file: EEPROM read/write sysfs-file + * @csr_file: CSR read/write debugfs-node + */ +struct idt_smb_seq; +struct idt_89hpesx_dev { + u32 eesize; + bool eero; + u8 eeaddr; + + u8 inieecmd; + u8 inicsrcmd; + u8 iniccode; + + u16 csr; + + int (*smb_write)(struct idt_89hpesx_dev *, const struct idt_smb_seq *); + int (*smb_read)(struct idt_89hpesx_dev *, struct idt_smb_seq *); + struct mutex smb_mtx; + + struct i2c_client *client; + + struct bin_attribute *ee_file; + struct dentry *csr_dir; + struct dentry *csr_file; +}; + +/* + * struct idt_smb_seq - sequence of data to be read/written from/to IDT 89HPESx + * @ccode: SMBus command code + * @bytecnt: Byte count of operation + * @data: Data to by written + */ +struct idt_smb_seq { + u8 ccode; + u8 bytecnt; + u8 *data; +}; + +/* + * struct idt_eeprom_seq - sequence of data to be read/written from/to EEPROM + * @cmd: Transaction CMD + * @eeaddr: EEPROM custom address + * @memaddr: Internal memory address of EEPROM + * @data: Data to be written at the memory address + */ +struct idt_eeprom_seq { + u8 cmd; + u8 eeaddr; + u16 memaddr; + u8 data; +} __packed; + +/* + * struct idt_csr_seq - sequence of data to be read/written from/to CSR + * @cmd: Transaction CMD + * @csraddr: Internal IDT device CSR address + * @data: Data to be read/written from/to the CSR address + */ +struct idt_csr_seq { + u8 cmd; + u16 csraddr; + u32 data; +} __packed; + +/* + * SMBus command code macros + * @CCODE_END: Indicates the end of transaction + * @CCODE_START: Indicates the start of transaction + * @CCODE_CSR: CSR read/write transaction + * @CCODE_EEPROM: EEPROM read/write transaction + * @CCODE_BYTE: Supplied data has BYTE length + * @CCODE_WORD: Supplied data has WORD length + * @CCODE_BLOCK: Supplied data has variable length passed in bytecnt + * byte right following CCODE byte + */ +#define CCODE_END ((u8)0x01) +#define CCODE_START ((u8)0x02) +#define CCODE_CSR ((u8)0x00) +#define CCODE_EEPROM ((u8)0x04) +#define CCODE_BYTE ((u8)0x00) +#define CCODE_WORD ((u8)0x20) +#define CCODE_BLOCK ((u8)0x40) +#define CCODE_PEC ((u8)0x80) + +/* + * EEPROM command macros + * @EEPROM_OP_WRITE: EEPROM write operation + * @EEPROM_OP_READ: EEPROM read operation + * @EEPROM_USA: Use specified address of EEPROM + * @EEPROM_NAERR: EEPROM device is not ready to respond + * @EEPROM_LAERR: EEPROM arbitration loss error + * @EEPROM_MSS: EEPROM misplace start & stop bits error + * @EEPROM_WR_CNT: Bytes count to perform write operation + * @EEPROM_WRRD_CNT: Bytes count to write before reading + * @EEPROM_RD_CNT: Bytes count to perform read operation + * @EEPROM_DEF_SIZE: Fall back size of EEPROM + * @EEPROM_DEF_ADDR: Defatul EEPROM address + * @EEPROM_TOUT: Timeout before retry read operation if eeprom is busy + */ +#define EEPROM_OP_WRITE ((u8)0x00) +#define EEPROM_OP_READ ((u8)0x01) +#define EEPROM_USA ((u8)0x02) +#define EEPROM_NAERR ((u8)0x08) +#define EEPROM_LAERR ((u8)0x10) +#define EEPROM_MSS ((u8)0x20) +#define EEPROM_WR_CNT ((u8)5) +#define EEPROM_WRRD_CNT ((u8)4) +#define EEPROM_RD_CNT ((u8)5) +#define EEPROM_DEF_SIZE ((u16)4096) +#define EEPROM_DEF_ADDR ((u8)0x50) +#define EEPROM_TOUT (100) + +/* + * CSR command macros + * @CSR_DWE: Enable all four bytes of the operation + * @CSR_OP_WRITE: CSR write operation + * @CSR_OP_READ: CSR read operation + * @CSR_RERR: Read operation error + * @CSR_WERR: Write operation error + * @CSR_WR_CNT: Bytes count to perform write operation + * @CSR_WRRD_CNT: Bytes count to write before reading + * @CSR_RD_CNT: Bytes count to perform read operation + * @CSR_MAX: Maximum CSR address + * @CSR_DEF: Default CSR address + * @CSR_REAL_ADDR: CSR real unshifted address + */ +#define CSR_DWE ((u8)0x0F) +#define CSR_OP_WRITE ((u8)0x00) +#define CSR_OP_READ ((u8)0x10) +#define CSR_RERR ((u8)0x40) +#define CSR_WERR ((u8)0x80) +#define CSR_WR_CNT ((u8)7) +#define CSR_WRRD_CNT ((u8)3) +#define CSR_RD_CNT ((u8)7) +#define CSR_MAX ((u32)0x3FFFF) +#define CSR_DEF ((u16)0x0000) +#define CSR_REAL_ADDR(val) ((unsigned int)val << 2) + +/* + * IDT 89HPESx basic register + * @IDT_VIDDID_CSR: PCIe VID and DID of IDT 89HPESx + * @IDT_VID_MASK: Mask of VID + */ +#define IDT_VIDDID_CSR ((u32)0x0000) +#define IDT_VID_MASK ((u32)0xFFFF) + +/* + * IDT 89HPESx can send NACK when new command is sent before previous one + * fininshed execution. In this case driver retries operation + * certain times. + * @RETRY_CNT: Number of retries before giving up and fail + * @idt_smb_safe: Generate a retry loop on corresponding SMBus method + */ +#define RETRY_CNT (128) +#define idt_smb_safe(ops, args...) ({ \ + int __retry = RETRY_CNT; \ + s32 __sts; \ + do { \ + __sts = i2c_smbus_ ## ops ## _data(args); \ + } while (__retry-- && __sts < 0); \ + __sts; \ +}) + +/*=========================================================================== + * i2c bus level IO-operations + *=========================================================================== + */ + +/* + * idt_smb_write_byte() - SMBus write method when I2C_SMBUS_BYTE_DATA operation + * is only available + * @pdev: Pointer to the driver data + * @seq: Sequence of data to be written + */ +static int idt_smb_write_byte(struct idt_89hpesx_dev *pdev, + const struct idt_smb_seq *seq) +{ + s32 sts; + u8 ccode; + int idx; + + /* Loop over the supplied data sending byte one-by-one */ + for (idx = 0; idx < seq->bytecnt; idx++) { + /* Collect the command code byte */ + ccode = seq->ccode | CCODE_BYTE; + if (idx == 0) + ccode |= CCODE_START; + if (idx == seq->bytecnt - 1) + ccode |= CCODE_END; + + /* Send data to the device */ + sts = idt_smb_safe(write_byte, pdev->client, ccode, + seq->data[idx]); + if (sts != 0) + return (int)sts; + } + + return 0; +} + +/* + * idt_smb_read_byte() - SMBus read method when I2C_SMBUS_BYTE_DATA operation + * is only available + * @pdev: Pointer to the driver data + * @seq: Buffer to read data to + */ +static int idt_smb_read_byte(struct idt_89hpesx_dev *pdev, + struct idt_smb_seq *seq) +{ + s32 sts; + u8 ccode; + int idx; + + /* Loop over the supplied buffer receiving byte one-by-one */ + for (idx = 0; idx < seq->bytecnt; idx++) { + /* Collect the command code byte */ + ccode = seq->ccode | CCODE_BYTE; + if (idx == 0) + ccode |= CCODE_START; + if (idx == seq->bytecnt - 1) + ccode |= CCODE_END; + + /* Read data from the device */ + sts = idt_smb_safe(read_byte, pdev->client, ccode); + if (sts < 0) + return (int)sts; + + seq->data[idx] = (u8)sts; + } + + return 0; +} + +/* + * idt_smb_write_word() - SMBus write method when I2C_SMBUS_BYTE_DATA and + * I2C_FUNC_SMBUS_WORD_DATA operations are available + * @pdev: Pointer to the driver data + * @seq: Sequence of data to be written + */ +static int idt_smb_write_word(struct idt_89hpesx_dev *pdev, + const struct idt_smb_seq *seq) +{ + s32 sts; + u8 ccode; + int idx, evencnt; + + /* Calculate the even count of data to send */ + evencnt = seq->bytecnt - (seq->bytecnt % 2); + + /* Loop over the supplied data sending two bytes at a time */ + for (idx = 0; idx < evencnt; idx += 2) { + /* Collect the command code byte */ + ccode = seq->ccode | CCODE_WORD; + if (idx == 0) + ccode |= CCODE_START; + if (idx == evencnt - 2) + ccode |= CCODE_END; + + /* Send word data to the device */ + sts = idt_smb_safe(write_word, pdev->client, ccode, + *(u16 *)&seq->data[idx]); + if (sts != 0) + return (int)sts; + } + + /* If there is odd number of bytes then send just one last byte */ + if (seq->bytecnt != evencnt) { + /* Collect the command code byte */ + ccode = seq->ccode | CCODE_BYTE | CCODE_END; + if (idx == 0) + ccode |= CCODE_START; + + /* Send byte data to the device */ + sts = idt_smb_safe(write_byte, pdev->client, ccode, + seq->data[idx]); + if (sts != 0) + return (int)sts; + } + + return 0; +} + +/* + * idt_smb_read_word() - SMBus read method when I2C_SMBUS_BYTE_DATA and + * I2C_FUNC_SMBUS_WORD_DATA operations are available + * @pdev: Pointer to the driver data + * @seq: Buffer to read data to + */ +static int idt_smb_read_word(struct idt_89hpesx_dev *pdev, + struct idt_smb_seq *seq) +{ + s32 sts; + u8 ccode; + int idx, evencnt; + + /* Calculate the even count of data to send */ + evencnt = seq->bytecnt - (seq->bytecnt % 2); + + /* Loop over the supplied data reading two bytes at a time */ + for (idx = 0; idx < evencnt; idx += 2) { + /* Collect the command code byte */ + ccode = seq->ccode | CCODE_WORD; + if (idx == 0) + ccode |= CCODE_START; + if (idx == evencnt - 2) + ccode |= CCODE_END; + + /* Read word data from the device */ + sts = idt_smb_safe(read_word, pdev->client, ccode); + if (sts < 0) + return (int)sts; + + *(u16 *)&seq->data[idx] = (u16)sts; + } + + /* If there is odd number of bytes then receive just one last byte */ + if (seq->bytecnt != evencnt) { + /* Collect the command code byte */ + ccode = seq->ccode | CCODE_BYTE | CCODE_END; + if (idx == 0) + ccode |= CCODE_START; + + /* Read last data byte from the device */ + sts = idt_smb_safe(read_byte, pdev->client, ccode); + if (sts < 0) + return (int)sts; + + seq->data[idx] = (u8)sts; + } + + return 0; +} + +/* + * idt_smb_write_block() - SMBus write method when I2C_SMBUS_BLOCK_DATA + * operation is available + * @pdev: Pointer to the driver data + * @seq: Sequence of data to be written + */ +static int idt_smb_write_block(struct idt_89hpesx_dev *pdev, + const struct idt_smb_seq *seq) +{ + u8 ccode; + + /* Return error if too much data passed to send */ + if (seq->bytecnt > I2C_SMBUS_BLOCK_MAX) + return -EINVAL; + + /* Collect the command code byte */ + ccode = seq->ccode | CCODE_BLOCK | CCODE_START | CCODE_END; + + /* Send block of data to the device */ + return idt_smb_safe(write_block, pdev->client, ccode, seq->bytecnt, + seq->data); +} + +/* + * idt_smb_read_block() - SMBus read method when I2C_SMBUS_BLOCK_DATA + * operation is available + * @pdev: Pointer to the driver data + * @seq: Buffer to read data to + */ +static int idt_smb_read_block(struct idt_89hpesx_dev *pdev, + struct idt_smb_seq *seq) +{ + s32 sts; + u8 ccode; + + /* Return error if too much data passed to send */ + if (seq->bytecnt > I2C_SMBUS_BLOCK_MAX) + return -EINVAL; + + /* Collect the command code byte */ + ccode = seq->ccode | CCODE_BLOCK | CCODE_START | CCODE_END; + + /* Read block of data from the device */ + sts = idt_smb_safe(read_block, pdev->client, ccode, seq->data); + if (sts != seq->bytecnt) + return (sts < 0 ? sts : -ENODATA); + + return 0; +} + +/* + * idt_smb_write_i2c_block() - SMBus write method when I2C_SMBUS_I2C_BLOCK_DATA + * operation is available + * @pdev: Pointer to the driver data + * @seq: Sequence of data to be written + * + * NOTE It's usual SMBus write block operation, except the actual data length is + * sent as first byte of data + */ +static int idt_smb_write_i2c_block(struct idt_89hpesx_dev *pdev, + const struct idt_smb_seq *seq) +{ + u8 ccode, buf[I2C_SMBUS_BLOCK_MAX + 1]; + + /* Return error if too much data passed to send */ + if (seq->bytecnt > I2C_SMBUS_BLOCK_MAX) + return -EINVAL; + + /* Collect the data to send. Length byte must be added prior the data */ + buf[0] = seq->bytecnt; + memcpy(&buf[1], seq->data, seq->bytecnt); + + /* Collect the command code byte */ + ccode = seq->ccode | CCODE_BLOCK | CCODE_START | CCODE_END; + + /* Send length and block of data to the device */ + return idt_smb_safe(write_i2c_block, pdev->client, ccode, + seq->bytecnt + 1, buf); +} + +/* + * idt_smb_read_i2c_block() - SMBus read method when I2C_SMBUS_I2C_BLOCK_DATA + * operation is available + * @pdev: Pointer to the driver data + * @seq: Buffer to read data to + * + * NOTE It's usual SMBus read block operation, except the actual data length is + * retrieved as first byte of data + */ +static int idt_smb_read_i2c_block(struct idt_89hpesx_dev *pdev, + struct idt_smb_seq *seq) +{ + u8 ccode, buf[I2C_SMBUS_BLOCK_MAX + 1]; + s32 sts; + + /* Return error if too much data passed to send */ + if (seq->bytecnt > I2C_SMBUS_BLOCK_MAX) + return -EINVAL; + + /* Collect the command code byte */ + ccode = seq->ccode | CCODE_BLOCK | CCODE_START | CCODE_END; + + /* Read length and block of data from the device */ + sts = idt_smb_safe(read_i2c_block, pdev->client, ccode, + seq->bytecnt + 1, buf); + if (sts != seq->bytecnt + 1) + return (sts < 0 ? sts : -ENODATA); + if (buf[0] != seq->bytecnt) + return -ENODATA; + + /* Copy retrieved data to the output data buffer */ + memcpy(seq->data, &buf[1], seq->bytecnt); + + return 0; +} + +/*=========================================================================== + * EEPROM IO-operations + *=========================================================================== + */ + +/* + * idt_eeprom_read_byte() - read just one byte from EEPROM + * @pdev: Pointer to the driver data + * @memaddr: Start EEPROM memory address + * @data: Data to be written to EEPROM + */ +static int idt_eeprom_read_byte(struct idt_89hpesx_dev *pdev, u16 memaddr, + u8 *data) +{ + struct device *dev = &pdev->client->dev; + struct idt_eeprom_seq eeseq; + struct idt_smb_seq smbseq; + int ret, retry; + + /* Initialize SMBus sequence fields */ + smbseq.ccode = pdev->iniccode | CCODE_EEPROM; + smbseq.data = (u8 *)&eeseq; + + /* + * Sometimes EEPROM may respond with NACK if it's busy with previous + * operation, so we need to perform a few attempts of read cycle + */ + retry = RETRY_CNT; + do { + /* Send EEPROM memory address to read data from */ + smbseq.bytecnt = EEPROM_WRRD_CNT; + eeseq.cmd = pdev->inieecmd | EEPROM_OP_READ; + eeseq.eeaddr = pdev->eeaddr; + eeseq.memaddr = cpu_to_le16(memaddr); + ret = pdev->smb_write(pdev, &smbseq); + if (ret != 0) { + dev_err(dev, "Failed to init eeprom addr 0x%02hhx", + memaddr); + break; + } + + /* Perform read operation */ + smbseq.bytecnt = EEPROM_RD_CNT; + ret = pdev->smb_read(pdev, &smbseq); + if (ret != 0) { + dev_err(dev, "Failed to read eeprom data 0x%02hhx", + memaddr); + break; + } + + /* Restart read operation if the device is busy */ + if (retry && (eeseq.cmd & EEPROM_NAERR)) { + dev_dbg(dev, "EEPROM busy, retry reading after %d ms", + EEPROM_TOUT); + msleep(EEPROM_TOUT); + continue; + } + + /* Check whether IDT successfully read data from EEPROM */ + if (eeseq.cmd & (EEPROM_NAERR | EEPROM_LAERR | EEPROM_MSS)) { + dev_err(dev, + "Communication with eeprom failed, cmd 0x%hhx", + eeseq.cmd); + ret = -EREMOTEIO; + break; + } + + /* Save retrieved data and exit the loop */ + *data = eeseq.data; + break; + } while (retry--); + + /* Return the status of operation */ + return ret; +} + +/* + * idt_eeprom_write() - EEPROM write operation + * @pdev: Pointer to the driver data + * @memaddr: Start EEPROM memory address + * @len: Length of data to be written + * @data: Data to be written to EEPROM + */ +static int idt_eeprom_write(struct idt_89hpesx_dev *pdev, u16 memaddr, u16 len, + const u8 *data) +{ + struct device *dev = &pdev->client->dev; + struct idt_eeprom_seq eeseq; + struct idt_smb_seq smbseq; + int ret; + u16 idx; + + /* Initialize SMBus sequence fields */ + smbseq.ccode = pdev->iniccode | CCODE_EEPROM; + smbseq.data = (u8 *)&eeseq; + + /* Send data byte-by-byte, checking if it is successfully written */ + for (idx = 0; idx < len; idx++, memaddr++) { + /* Lock IDT SMBus device */ + mutex_lock(&pdev->smb_mtx); + + /* Perform write operation */ + smbseq.bytecnt = EEPROM_WR_CNT; + eeseq.cmd = pdev->inieecmd | EEPROM_OP_WRITE; + eeseq.eeaddr = pdev->eeaddr; + eeseq.memaddr = cpu_to_le16(memaddr); + eeseq.data = data[idx]; + ret = pdev->smb_write(pdev, &smbseq); + if (ret != 0) { + dev_err(dev, + "Failed to write 0x%04hx:0x%02hhx to eeprom", + memaddr, data[idx]); + goto err_mutex_unlock; + } + + /* + * Check whether the data is successfully written by reading + * from the same EEPROM memory address. + */ + eeseq.data = ~data[idx]; + ret = idt_eeprom_read_byte(pdev, memaddr, &eeseq.data); + if (ret != 0) + goto err_mutex_unlock; + + /* Check whether the read byte is the same as written one */ + if (eeseq.data != data[idx]) { + dev_err(dev, "Values don't match 0x%02hhx != 0x%02hhx", + eeseq.data, data[idx]); + ret = -EREMOTEIO; + goto err_mutex_unlock; + } + + /* Unlock IDT SMBus device */ +err_mutex_unlock: + mutex_unlock(&pdev->smb_mtx); + if (ret != 0) + return ret; + } + + return 0; +} + +/* + * idt_eeprom_read() - EEPROM read operation + * @pdev: Pointer to the driver data + * @memaddr: Start EEPROM memory address + * @len: Length of data to read + * @buf: Buffer to read data to + */ +static int idt_eeprom_read(struct idt_89hpesx_dev *pdev, u16 memaddr, u16 len, + u8 *buf) +{ + int ret; + u16 idx; + + /* Read data byte-by-byte, retrying if it wasn't successful */ + for (idx = 0; idx < len; idx++, memaddr++) { + /* Lock IDT SMBus device */ + mutex_lock(&pdev->smb_mtx); + + /* Just read the byte to the buffer */ + ret = idt_eeprom_read_byte(pdev, memaddr, &buf[idx]); + + /* Unlock IDT SMBus device */ + mutex_unlock(&pdev->smb_mtx); + + /* Return error if read operation failed */ + if (ret != 0) + return ret; + } + + return 0; +} + +/*=========================================================================== + * CSR IO-operations + *=========================================================================== + */ + +/* + * idt_csr_write() - CSR write operation + * @pdev: Pointer to the driver data + * @csraddr: CSR address (with no two LS bits) + * @data: Data to be written to CSR + */ +static int idt_csr_write(struct idt_89hpesx_dev *pdev, u16 csraddr, + const u32 data) +{ + struct device *dev = &pdev->client->dev; + struct idt_csr_seq csrseq; + struct idt_smb_seq smbseq; + int ret; + + /* Initialize SMBus sequence fields */ + smbseq.ccode = pdev->iniccode | CCODE_CSR; + smbseq.data = (u8 *)&csrseq; + + /* Lock IDT SMBus device */ + mutex_lock(&pdev->smb_mtx); + + /* Perform write operation */ + smbseq.bytecnt = CSR_WR_CNT; + csrseq.cmd = pdev->inicsrcmd | CSR_OP_WRITE; + csrseq.csraddr = cpu_to_le16(csraddr); + csrseq.data = cpu_to_le32(data); + ret = pdev->smb_write(pdev, &smbseq); + if (ret != 0) { + dev_err(dev, "Failed to write 0x%04x: 0x%04x to csr", + CSR_REAL_ADDR(csraddr), data); + goto err_mutex_unlock; + } + + /* Send CSR address to read data from */ + smbseq.bytecnt = CSR_WRRD_CNT; + csrseq.cmd = pdev->inicsrcmd | CSR_OP_READ; + ret = pdev->smb_write(pdev, &smbseq); + if (ret != 0) { + dev_err(dev, "Failed to init csr address 0x%04x", + CSR_REAL_ADDR(csraddr)); + goto err_mutex_unlock; + } + + /* Perform read operation */ + smbseq.bytecnt = CSR_RD_CNT; + ret = pdev->smb_read(pdev, &smbseq); + if (ret != 0) { + dev_err(dev, "Failed to read csr 0x%04x", + CSR_REAL_ADDR(csraddr)); + goto err_mutex_unlock; + } + + /* Check whether IDT successfully retrieved CSR data */ + if (csrseq.cmd & (CSR_RERR | CSR_WERR)) { + dev_err(dev, "IDT failed to perform CSR r/w"); + ret = -EREMOTEIO; + goto err_mutex_unlock; + } + + /* Unlock IDT SMBus device */ +err_mutex_unlock: + mutex_unlock(&pdev->smb_mtx); + + return ret; +} + +/* + * idt_csr_read() - CSR read operation + * @pdev: Pointer to the driver data + * @csraddr: CSR address (with no two LS bits) + * @data: Data to be written to CSR + */ +static int idt_csr_read(struct idt_89hpesx_dev *pdev, u16 csraddr, u32 *data) +{ + struct device *dev = &pdev->client->dev; + struct idt_csr_seq csrseq; + struct idt_smb_seq smbseq; + int ret; + + /* Initialize SMBus sequence fields */ + smbseq.ccode = pdev->iniccode | CCODE_CSR; + smbseq.data = (u8 *)&csrseq; + + /* Lock IDT SMBus device */ + mutex_lock(&pdev->smb_mtx); + + /* Send CSR register address before reading it */ + smbseq.bytecnt = CSR_WRRD_CNT; + csrseq.cmd = pdev->inicsrcmd | CSR_OP_READ; + csrseq.csraddr = cpu_to_le16(csraddr); + ret = pdev->smb_write(pdev, &smbseq); + if (ret != 0) { + dev_err(dev, "Failed to init csr address 0x%04x", + CSR_REAL_ADDR(csraddr)); + goto err_mutex_unlock; + } + + /* Perform read operation */ + smbseq.bytecnt = CSR_RD_CNT; + ret = pdev->smb_read(pdev, &smbseq); + if (ret != 0) { + dev_err(dev, "Failed to read csr 0x%04hx", + CSR_REAL_ADDR(csraddr)); + goto err_mutex_unlock; + } + + /* Check whether IDT successfully retrieved CSR data */ + if (csrseq.cmd & (CSR_RERR | CSR_WERR)) { + dev_err(dev, "IDT failed to perform CSR r/w"); + ret = -EREMOTEIO; + goto err_mutex_unlock; + } + + /* Save data retrieved from IDT */ + *data = le32_to_cpu(csrseq.data); + + /* Unlock IDT SMBus device */ +err_mutex_unlock: + mutex_unlock(&pdev->smb_mtx); + + return ret; +} + +/*=========================================================================== + * Sysfs/debugfs-nodes IO-operations + *=========================================================================== + */ + +/* + * eeprom_write() - EEPROM sysfs-node write callback + * @filep: Pointer to the file system node + * @kobj: Pointer to the kernel object related to the sysfs-node + * @attr: Attributes of the file + * @buf: Buffer to write data to + * @off: Offset at which data should be written to + * @count: Number of bytes to write + */ +static ssize_t eeprom_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + struct idt_89hpesx_dev *pdev; + int ret; + + /* Retrieve driver data */ + pdev = dev_get_drvdata(kobj_to_dev(kobj)); + + /* Perform EEPROM write operation */ + ret = idt_eeprom_write(pdev, (u16)off, (u16)count, (u8 *)buf); + return (ret != 0 ? ret : count); +} + +/* + * eeprom_read() - EEPROM sysfs-node read callback + * @filep: Pointer to the file system node + * @kobj: Pointer to the kernel object related to the sysfs-node + * @attr: Attributes of the file + * @buf: Buffer to write data to + * @off: Offset at which data should be written to + * @count: Number of bytes to write + */ +static ssize_t eeprom_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + struct idt_89hpesx_dev *pdev; + int ret; + + /* Retrieve driver data */ + pdev = dev_get_drvdata(kobj_to_dev(kobj)); + + /* Perform EEPROM read operation */ + ret = idt_eeprom_read(pdev, (u16)off, (u16)count, (u8 *)buf); + return (ret != 0 ? ret : count); +} + +/* + * idt_dbgfs_csr_write() - CSR debugfs-node write callback + * @filep: Pointer to the file system file descriptor + * @buf: Buffer to read data from + * @count: Size of the buffer + * @offp: Offset within the file + * + * It accepts either "0x:0x" for saving register address + * and writing value to specified DWORD register or "0x" for + * just saving register address in order to perform next read operation. + * + * WARNING No spaces are allowed. Incoming string must be strictly formated as: + * ":". Register address must be aligned within 4 bytes + * (one DWORD). + */ +static ssize_t idt_dbgfs_csr_write(struct file *filep, const char __user *ubuf, + size_t count, loff_t *offp) +{ + struct idt_89hpesx_dev *pdev = filep->private_data; + char *colon_ch, *csraddr_str, *csrval_str; + int ret, csraddr_len; + u32 csraddr, csrval; + char *buf; + + /* Copy data from User-space */ + buf = kmalloc(count + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = simple_write_to_buffer(buf, count, offp, ubuf, count); + if (ret < 0) + goto free_buf; + buf[count] = 0; + + /* Find position of colon in the buffer */ + colon_ch = strnchr(buf, count, ':'); + + /* + * If there is colon passed then new CSR value should be parsed as + * well, so allocate buffer for CSR address substring. + * If no colon is found, then string must have just one number with + * no new CSR value + */ + if (colon_ch != NULL) { + csraddr_len = colon_ch - buf; + csraddr_str = + kmalloc(csraddr_len + 1, GFP_KERNEL); + if (csraddr_str == NULL) { + ret = -ENOMEM; + goto free_buf; + } + /* Copy the register address to the substring buffer */ + strncpy(csraddr_str, buf, csraddr_len); + csraddr_str[csraddr_len] = '\0'; + /* Register value must follow the colon */ + csrval_str = colon_ch + 1; + } else /* if (str_colon == NULL) */ { + csraddr_str = (char *)buf; /* Just to shut warning up */ + csraddr_len = strnlen(csraddr_str, count); + csrval_str = NULL; + } + + /* Convert CSR address to u32 value */ + ret = kstrtou32(csraddr_str, 0, &csraddr); + if (ret != 0) + goto free_csraddr_str; + + /* Check whether passed register address is valid */ + if (csraddr > CSR_MAX || !IS_ALIGNED(csraddr, SZ_4)) { + ret = -EINVAL; + goto free_csraddr_str; + } + + /* Shift register address to the right so to have u16 address */ + pdev->csr = (csraddr >> 2); + + /* Parse new CSR value and send it to IDT, if colon has been found */ + if (colon_ch != NULL) { + ret = kstrtou32(csrval_str, 0, &csrval); + if (ret != 0) + goto free_csraddr_str; + + ret = idt_csr_write(pdev, pdev->csr, csrval); + if (ret != 0) + goto free_csraddr_str; + } + + /* Free memory only if colon has been found */ +free_csraddr_str: + if (colon_ch != NULL) + kfree(csraddr_str); + + /* Free buffer allocated for data retrieved from User-space */ +free_buf: + kfree(buf); + + return (ret != 0 ? ret : count); +} + +/* + * idt_dbgfs_csr_read() - CSR debugfs-node read callback + * @filep: Pointer to the file system file descriptor + * @buf: Buffer to write data to + * @count: Size of the buffer + * @offp: Offset within the file + * + * It just prints the pair "0x:0x" to passed buffer. + */ +#define CSRBUF_SIZE ((size_t)32) +static ssize_t idt_dbgfs_csr_read(struct file *filep, char __user *ubuf, + size_t count, loff_t *offp) +{ + struct idt_89hpesx_dev *pdev = filep->private_data; + u32 csraddr, csrval; + char buf[CSRBUF_SIZE]; + int ret, size; + + /* Perform CSR read operation */ + ret = idt_csr_read(pdev, pdev->csr, &csrval); + if (ret != 0) + return ret; + + /* Shift register address to the left so to have real address */ + csraddr = ((u32)pdev->csr << 2); + + /* Print the "0x:0x" to buffer */ + size = snprintf(buf, CSRBUF_SIZE, "0x%05x:0x%08x\n", + (unsigned int)csraddr, (unsigned int)csrval); + + /* Copy data to User-space */ + return simple_read_from_buffer(ubuf, count, offp, buf, size); +} + +/* + * eeprom_attribute - EEPROM sysfs-node attributes + * + * NOTE Size will be changed in compliance with OF node. EEPROM attribute will + * be read-only as well if the corresponding flag is specified in OF node. + */ +static BIN_ATTR_RW(eeprom, EEPROM_DEF_SIZE); + +/* + * csr_dbgfs_ops - CSR debugfs-node read/write operations + */ +static const struct file_operations csr_dbgfs_ops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = idt_dbgfs_csr_write, + .read = idt_dbgfs_csr_read +}; + +/*=========================================================================== + * Driver init/deinit methods + *=========================================================================== + */ + +/* + * idt_set_defval() - disable EEPROM access by default + * @pdev: Pointer to the driver data + */ +static void idt_set_defval(struct idt_89hpesx_dev *pdev) +{ + /* If OF info is missing then use next values */ + pdev->eesize = 0; + pdev->eero = true; + pdev->inieecmd = 0; + pdev->eeaddr = 0; +} + +static const struct i2c_device_id ee_ids[]; + +/* + * idt_ee_match_id() - check whether the node belongs to compatible EEPROMs + */ +static const struct i2c_device_id *idt_ee_match_id(struct fwnode_handle *fwnode) +{ + const struct i2c_device_id *id = ee_ids; + const char *compatible, *p; + char devname[I2C_NAME_SIZE]; + int ret; + + ret = fwnode_property_read_string(fwnode, "compatible", &compatible); + if (ret) + return NULL; + + p = strchr(compatible, ','); + strlcpy(devname, p ? p + 1 : compatible, sizeof(devname)); + /* Search through the device name */ + while (id->name[0]) { + if (strcmp(devname, id->name) == 0) + return id; + id++; + } + return NULL; +} + +/* + * idt_get_fw_data() - get IDT i2c-device parameters from device tree + * @pdev: Pointer to the driver data + */ +static void idt_get_fw_data(struct idt_89hpesx_dev *pdev) +{ + struct device *dev = &pdev->client->dev; + struct fwnode_handle *fwnode; + const struct i2c_device_id *ee_id = NULL; + u32 eeprom_addr; + int ret; + + device_for_each_child_node(dev, fwnode) { + ee_id = idt_ee_match_id(fwnode); + if (ee_id) + break; + + dev_warn(dev, "Skip unsupported EEPROM device %pfw\n", fwnode); + } + + /* If there is no fwnode EEPROM device, then set zero size */ + if (!ee_id) { + dev_warn(dev, "No fwnode, EEPROM access disabled"); + idt_set_defval(pdev); + return; + } + + /* Retrieve EEPROM size */ + pdev->eesize = (u32)ee_id->driver_data; + + /* Get custom EEPROM address from 'reg' attribute */ + ret = fwnode_property_read_u32(fwnode, "reg", &eeprom_addr); + if (ret || (eeprom_addr == 0)) { + dev_warn(dev, "No EEPROM reg found, use default address 0x%x", + EEPROM_DEF_ADDR); + pdev->inieecmd = 0; + pdev->eeaddr = EEPROM_DEF_ADDR << 1; + } else { + pdev->inieecmd = EEPROM_USA; + pdev->eeaddr = eeprom_addr << 1; + } + + /* Check EEPROM 'read-only' flag */ + if (fwnode_property_read_bool(fwnode, "read-only")) + pdev->eero = true; + else /* if (!fwnode_property_read_bool(node, "read-only")) */ + pdev->eero = false; + + fwnode_handle_put(fwnode); + dev_info(dev, "EEPROM of %d bytes found by 0x%x", + pdev->eesize, pdev->eeaddr); +} + +/* + * idt_create_pdev() - create and init data structure of the driver + * @client: i2c client of IDT PCIe-switch device + */ +static struct idt_89hpesx_dev *idt_create_pdev(struct i2c_client *client) +{ + struct idt_89hpesx_dev *pdev; + + /* Allocate memory for driver data */ + pdev = devm_kmalloc(&client->dev, sizeof(struct idt_89hpesx_dev), + GFP_KERNEL); + if (pdev == NULL) + return ERR_PTR(-ENOMEM); + + /* Initialize basic fields of the data */ + pdev->client = client; + i2c_set_clientdata(client, pdev); + + /* Read firmware nodes information */ + idt_get_fw_data(pdev); + + /* Initialize basic CSR CMD field - use full DWORD-sized r/w ops */ + pdev->inicsrcmd = CSR_DWE; + pdev->csr = CSR_DEF; + + /* Enable Packet Error Checking if it's supported by adapter */ + if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_PEC)) { + pdev->iniccode = CCODE_PEC; + client->flags |= I2C_CLIENT_PEC; + } else /* PEC is unsupported */ { + pdev->iniccode = 0; + } + + return pdev; +} + +/* + * idt_free_pdev() - free data structure of the driver + * @pdev: Pointer to the driver data + */ +static void idt_free_pdev(struct idt_89hpesx_dev *pdev) +{ + /* Clear driver data from device private field */ + i2c_set_clientdata(pdev->client, NULL); +} + +/* + * idt_set_smbus_ops() - set supported SMBus operations + * @pdev: Pointer to the driver data + * Return status of smbus check operations + */ +static int idt_set_smbus_ops(struct idt_89hpesx_dev *pdev) +{ + struct i2c_adapter *adapter = pdev->client->adapter; + struct device *dev = &pdev->client->dev; + + /* Check i2c adapter read functionality */ + if (i2c_check_functionality(adapter, + I2C_FUNC_SMBUS_READ_BLOCK_DATA)) { + pdev->smb_read = idt_smb_read_block; + dev_dbg(dev, "SMBus block-read op chosen"); + } else if (i2c_check_functionality(adapter, + I2C_FUNC_SMBUS_READ_I2C_BLOCK)) { + pdev->smb_read = idt_smb_read_i2c_block; + dev_dbg(dev, "SMBus i2c-block-read op chosen"); + } else if (i2c_check_functionality(adapter, + I2C_FUNC_SMBUS_READ_WORD_DATA) && + i2c_check_functionality(adapter, + I2C_FUNC_SMBUS_READ_BYTE_DATA)) { + pdev->smb_read = idt_smb_read_word; + dev_warn(dev, "Use slow word/byte SMBus read ops"); + } else if (i2c_check_functionality(adapter, + I2C_FUNC_SMBUS_READ_BYTE_DATA)) { + pdev->smb_read = idt_smb_read_byte; + dev_warn(dev, "Use slow byte SMBus read op"); + } else /* no supported smbus read operations */ { + dev_err(dev, "No supported SMBus read op"); + return -EPFNOSUPPORT; + } + + /* Check i2c adapter write functionality */ + if (i2c_check_functionality(adapter, + I2C_FUNC_SMBUS_WRITE_BLOCK_DATA)) { + pdev->smb_write = idt_smb_write_block; + dev_dbg(dev, "SMBus block-write op chosen"); + } else if (i2c_check_functionality(adapter, + I2C_FUNC_SMBUS_WRITE_I2C_BLOCK)) { + pdev->smb_write = idt_smb_write_i2c_block; + dev_dbg(dev, "SMBus i2c-block-write op chosen"); + } else if (i2c_check_functionality(adapter, + I2C_FUNC_SMBUS_WRITE_WORD_DATA) && + i2c_check_functionality(adapter, + I2C_FUNC_SMBUS_WRITE_BYTE_DATA)) { + pdev->smb_write = idt_smb_write_word; + dev_warn(dev, "Use slow word/byte SMBus write op"); + } else if (i2c_check_functionality(adapter, + I2C_FUNC_SMBUS_WRITE_BYTE_DATA)) { + pdev->smb_write = idt_smb_write_byte; + dev_warn(dev, "Use slow byte SMBus write op"); + } else /* no supported smbus write operations */ { + dev_err(dev, "No supported SMBus write op"); + return -EPFNOSUPPORT; + } + + /* Initialize IDT SMBus slave interface mutex */ + mutex_init(&pdev->smb_mtx); + + return 0; +} + +/* + * idt_check_dev() - check whether it's really IDT 89HPESx device + * @pdev: Pointer to the driver data + * Return status of i2c adapter check operation + */ +static int idt_check_dev(struct idt_89hpesx_dev *pdev) +{ + struct device *dev = &pdev->client->dev; + u32 viddid; + int ret; + + /* Read VID and DID directly from IDT memory space */ + ret = idt_csr_read(pdev, IDT_VIDDID_CSR, &viddid); + if (ret != 0) { + dev_err(dev, "Failed to read VID/DID"); + return ret; + } + + /* Check whether it's IDT device */ + if ((viddid & IDT_VID_MASK) != PCI_VENDOR_ID_IDT) { + dev_err(dev, "Got unsupported VID/DID: 0x%08x", viddid); + return -ENODEV; + } + + dev_info(dev, "Found IDT 89HPES device VID:0x%04x, DID:0x%04x", + (viddid & IDT_VID_MASK), (viddid >> 16)); + + return 0; +} + +/* + * idt_create_sysfs_files() - create sysfs attribute files + * @pdev: Pointer to the driver data + * Return status of operation + */ +static int idt_create_sysfs_files(struct idt_89hpesx_dev *pdev) +{ + struct device *dev = &pdev->client->dev; + int ret; + + /* Don't do anything if EEPROM isn't accessible */ + if (pdev->eesize == 0) { + dev_dbg(dev, "Skip creating sysfs-files"); + return 0; + } + + /* Allocate memory for attribute file */ + pdev->ee_file = devm_kmalloc(dev, sizeof(*pdev->ee_file), GFP_KERNEL); + if (!pdev->ee_file) + return -ENOMEM; + + /* Copy the declared EEPROM attr structure to change some of fields */ + memcpy(pdev->ee_file, &bin_attr_eeprom, sizeof(*pdev->ee_file)); + + /* In case of read-only EEPROM get rid of write ability */ + if (pdev->eero) { + pdev->ee_file->attr.mode &= ~0200; + pdev->ee_file->write = NULL; + } + /* Create EEPROM sysfs file */ + pdev->ee_file->size = pdev->eesize; + ret = sysfs_create_bin_file(&dev->kobj, pdev->ee_file); + if (ret != 0) { + dev_err(dev, "Failed to create EEPROM sysfs-node"); + return ret; + } + + return 0; +} + +/* + * idt_remove_sysfs_files() - remove sysfs attribute files + * @pdev: Pointer to the driver data + */ +static void idt_remove_sysfs_files(struct idt_89hpesx_dev *pdev) +{ + struct device *dev = &pdev->client->dev; + + /* Don't do anything if EEPROM wasn't accessible */ + if (pdev->eesize == 0) + return; + + /* Remove EEPROM sysfs file */ + sysfs_remove_bin_file(&dev->kobj, pdev->ee_file); +} + +/* + * idt_create_dbgfs_files() - create debugfs files + * @pdev: Pointer to the driver data + */ +#define CSRNAME_LEN ((size_t)32) +static void idt_create_dbgfs_files(struct idt_89hpesx_dev *pdev) +{ + struct i2c_client *cli = pdev->client; + char fname[CSRNAME_LEN]; + + /* Create Debugfs directory for CSR file */ + snprintf(fname, CSRNAME_LEN, "%d-%04hx", cli->adapter->nr, cli->addr); + pdev->csr_dir = debugfs_create_dir(fname, csr_dbgdir); + + /* Create Debugfs file for CSR read/write operations */ + pdev->csr_file = debugfs_create_file(cli->name, 0600, + pdev->csr_dir, pdev, &csr_dbgfs_ops); +} + +/* + * idt_remove_dbgfs_files() - remove debugfs files + * @pdev: Pointer to the driver data + */ +static void idt_remove_dbgfs_files(struct idt_89hpesx_dev *pdev) +{ + /* Remove CSR directory and it sysfs-node */ + debugfs_remove_recursive(pdev->csr_dir); +} + +/* + * idt_probe() - IDT 89HPESx driver probe() callback method + */ +static int idt_probe(struct i2c_client *client, const struct i2c_device_id *id) +{ + struct idt_89hpesx_dev *pdev; + int ret; + + /* Create driver data */ + pdev = idt_create_pdev(client); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + /* Set SMBus operations */ + ret = idt_set_smbus_ops(pdev); + if (ret != 0) + goto err_free_pdev; + + /* Check whether it is truly IDT 89HPESx device */ + ret = idt_check_dev(pdev); + if (ret != 0) + goto err_free_pdev; + + /* Create sysfs files */ + ret = idt_create_sysfs_files(pdev); + if (ret != 0) + goto err_free_pdev; + + /* Create debugfs files */ + idt_create_dbgfs_files(pdev); + + return 0; + +err_free_pdev: + idt_free_pdev(pdev); + + return ret; +} + +/* + * idt_remove() - IDT 89HPESx driver remove() callback method + */ +static int idt_remove(struct i2c_client *client) +{ + struct idt_89hpesx_dev *pdev = i2c_get_clientdata(client); + + /* Remove debugfs files first */ + idt_remove_dbgfs_files(pdev); + + /* Remove sysfs files */ + idt_remove_sysfs_files(pdev); + + /* Discard driver data structure */ + idt_free_pdev(pdev); + + return 0; +} + +/* + * ee_ids - array of supported EEPROMs + */ +static const struct i2c_device_id ee_ids[] = { + { "24c32", 4096}, + { "24c64", 8192}, + { "24c128", 16384}, + { "24c256", 32768}, + { "24c512", 65536}, + {} +}; +MODULE_DEVICE_TABLE(i2c, ee_ids); + +/* + * idt_ids - supported IDT 89HPESx devices + */ +static const struct i2c_device_id idt_ids[] = { + { "89hpes8nt2", 0 }, + { "89hpes12nt3", 0 }, + + { "89hpes24nt6ag2", 0 }, + { "89hpes32nt8ag2", 0 }, + { "89hpes32nt8bg2", 0 }, + { "89hpes12nt12g2", 0 }, + { "89hpes16nt16g2", 0 }, + { "89hpes24nt24g2", 0 }, + { "89hpes32nt24ag2", 0 }, + { "89hpes32nt24bg2", 0 }, + + { "89hpes12n3", 0 }, + { "89hpes12n3a", 0 }, + { "89hpes24n3", 0 }, + { "89hpes24n3a", 0 }, + + { "89hpes32h8", 0 }, + { "89hpes32h8g2", 0 }, + { "89hpes48h12", 0 }, + { "89hpes48h12g2", 0 }, + { "89hpes48h12ag2", 0 }, + { "89hpes16h16", 0 }, + { "89hpes22h16", 0 }, + { "89hpes22h16g2", 0 }, + { "89hpes34h16", 0 }, + { "89hpes34h16g2", 0 }, + { "89hpes64h16", 0 }, + { "89hpes64h16g2", 0 }, + { "89hpes64h16ag2", 0 }, + + /* { "89hpes3t3", 0 }, // No SMBus-slave iface */ + { "89hpes12t3g2", 0 }, + { "89hpes24t3g2", 0 }, + /* { "89hpes4t4", 0 }, // No SMBus-slave iface */ + { "89hpes16t4", 0 }, + { "89hpes4t4g2", 0 }, + { "89hpes10t4g2", 0 }, + { "89hpes16t4g2", 0 }, + { "89hpes16t4ag2", 0 }, + { "89hpes5t5", 0 }, + { "89hpes6t5", 0 }, + { "89hpes8t5", 0 }, + { "89hpes8t5a", 0 }, + { "89hpes24t6", 0 }, + { "89hpes6t6g2", 0 }, + { "89hpes24t6g2", 0 }, + { "89hpes16t7", 0 }, + { "89hpes32t8", 0 }, + { "89hpes32t8g2", 0 }, + { "89hpes48t12", 0 }, + { "89hpes48t12g2", 0 }, + { /* END OF LIST */ } +}; +MODULE_DEVICE_TABLE(i2c, idt_ids); + +static const struct of_device_id idt_of_match[] = { + { .compatible = "idt,89hpes8nt2", }, + { .compatible = "idt,89hpes12nt3", }, + + { .compatible = "idt,89hpes24nt6ag2", }, + { .compatible = "idt,89hpes32nt8ag2", }, + { .compatible = "idt,89hpes32nt8bg2", }, + { .compatible = "idt,89hpes12nt12g2", }, + { .compatible = "idt,89hpes16nt16g2", }, + { .compatible = "idt,89hpes24nt24g2", }, + { .compatible = "idt,89hpes32nt24ag2", }, + { .compatible = "idt,89hpes32nt24bg2", }, + + { .compatible = "idt,89hpes12n3", }, + { .compatible = "idt,89hpes12n3a", }, + { .compatible = "idt,89hpes24n3", }, + { .compatible = "idt,89hpes24n3a", }, + + { .compatible = "idt,89hpes32h8", }, + { .compatible = "idt,89hpes32h8g2", }, + { .compatible = "idt,89hpes48h12", }, + { .compatible = "idt,89hpes48h12g2", }, + { .compatible = "idt,89hpes48h12ag2", }, + { .compatible = "idt,89hpes16h16", }, + { .compatible = "idt,89hpes22h16", }, + { .compatible = "idt,89hpes22h16g2", }, + { .compatible = "idt,89hpes34h16", }, + { .compatible = "idt,89hpes34h16g2", }, + { .compatible = "idt,89hpes64h16", }, + { .compatible = "idt,89hpes64h16g2", }, + { .compatible = "idt,89hpes64h16ag2", }, + + { .compatible = "idt,89hpes12t3g2", }, + { .compatible = "idt,89hpes24t3g2", }, + + { .compatible = "idt,89hpes16t4", }, + { .compatible = "idt,89hpes4t4g2", }, + { .compatible = "idt,89hpes10t4g2", }, + { .compatible = "idt,89hpes16t4g2", }, + { .compatible = "idt,89hpes16t4ag2", }, + { .compatible = "idt,89hpes5t5", }, + { .compatible = "idt,89hpes6t5", }, + { .compatible = "idt,89hpes8t5", }, + { .compatible = "idt,89hpes8t5a", }, + { .compatible = "idt,89hpes24t6", }, + { .compatible = "idt,89hpes6t6g2", }, + { .compatible = "idt,89hpes24t6g2", }, + { .compatible = "idt,89hpes16t7", }, + { .compatible = "idt,89hpes32t8", }, + { .compatible = "idt,89hpes32t8g2", }, + { .compatible = "idt,89hpes48t12", }, + { .compatible = "idt,89hpes48t12g2", }, + { }, +}; +MODULE_DEVICE_TABLE(of, idt_of_match); + +/* + * idt_driver - IDT 89HPESx driver structure + */ +static struct i2c_driver idt_driver = { + .driver = { + .name = IDT_NAME, + .of_match_table = idt_of_match, + }, + .probe = idt_probe, + .remove = idt_remove, + .id_table = idt_ids, +}; + +/* + * idt_init() - IDT 89HPESx driver init() callback method + */ +static int __init idt_init(void) +{ + /* Create Debugfs directory first */ + if (debugfs_initialized()) + csr_dbgdir = debugfs_create_dir("idt_csr", NULL); + + /* Add new i2c-device driver */ + return i2c_add_driver(&idt_driver); +} +module_init(idt_init); + +/* + * idt_exit() - IDT 89HPESx driver exit() callback method + */ +static void __exit idt_exit(void) +{ + /* Discard debugfs directory and all files if any */ + debugfs_remove_recursive(csr_dbgdir); + + /* Unregister i2c-device driver */ + i2c_del_driver(&idt_driver); +} +module_exit(idt_exit); diff --git a/drivers/misc/eeprom/max6875.c b/drivers/misc/eeprom/max6875.c new file mode 100644 index 000000000..fc0cf9a74 --- /dev/null +++ b/drivers/misc/eeprom/max6875.c @@ -0,0 +1,210 @@ +/* + * max6875.c - driver for MAX6874/MAX6875 + * + * Copyright (C) 2005 Ben Gardner + * + * Based on eeprom.c + * + * The MAX6875 has a bank of registers and two banks of EEPROM. + * Address ranges are defined as follows: + * * 0x0000 - 0x0046 = configuration registers + * * 0x8000 - 0x8046 = configuration EEPROM + * * 0x8100 - 0x82FF = user EEPROM + * + * This driver makes the user EEPROM available for read. + * + * The registers & config EEPROM should be accessed via i2c-dev. + * + * The MAX6875 ignores the lowest address bit, so each chip responds to + * two addresses - 0x50/0x51 and 0x52/0x53. + * + * Note that the MAX6875 uses i2c_smbus_write_byte_data() to set the read + * address, so this driver is destructive if loaded for the wrong EEPROM chip. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + */ + +#include +#include +#include +#include +#include + +/* The MAX6875 can only read/write 16 bytes at a time */ +#define SLICE_SIZE 16 +#define SLICE_BITS 4 + +/* USER EEPROM is at addresses 0x8100 - 0x82FF */ +#define USER_EEPROM_BASE 0x8100 +#define USER_EEPROM_SIZE 0x0200 +#define USER_EEPROM_SLICES 32 + +/* MAX6875 commands */ +#define MAX6875_CMD_BLK_READ 0x84 + +/* Each client has this additional data */ +struct max6875_data { + struct i2c_client *fake_client; + struct mutex update_lock; + + u32 valid; + u8 data[USER_EEPROM_SIZE]; + unsigned long last_updated[USER_EEPROM_SLICES]; +}; + +static void max6875_update_slice(struct i2c_client *client, int slice) +{ + struct max6875_data *data = i2c_get_clientdata(client); + int i, j, addr; + u8 *buf; + + if (slice >= USER_EEPROM_SLICES) + return; + + mutex_lock(&data->update_lock); + + buf = &data->data[slice << SLICE_BITS]; + + if (!(data->valid & (1 << slice)) || + time_after(jiffies, data->last_updated[slice])) { + + dev_dbg(&client->dev, "Starting update of slice %u\n", slice); + + data->valid &= ~(1 << slice); + + addr = USER_EEPROM_BASE + (slice << SLICE_BITS); + + /* select the eeprom address */ + if (i2c_smbus_write_byte_data(client, addr >> 8, addr & 0xFF)) { + dev_err(&client->dev, "address set failed\n"); + goto exit_up; + } + + if (i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_READ_I2C_BLOCK)) { + if (i2c_smbus_read_i2c_block_data(client, + MAX6875_CMD_BLK_READ, + SLICE_SIZE, + buf) != SLICE_SIZE) { + goto exit_up; + } + } else { + for (i = 0; i < SLICE_SIZE; i++) { + j = i2c_smbus_read_byte(client); + if (j < 0) { + goto exit_up; + } + buf[i] = j; + } + } + data->last_updated[slice] = jiffies; + data->valid |= (1 << slice); + } +exit_up: + mutex_unlock(&data->update_lock); +} + +static ssize_t max6875_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + struct i2c_client *client = kobj_to_i2c_client(kobj); + struct max6875_data *data = i2c_get_clientdata(client); + int slice, max_slice; + + /* refresh slices which contain requested bytes */ + max_slice = (off + count - 1) >> SLICE_BITS; + for (slice = (off >> SLICE_BITS); slice <= max_slice; slice++) + max6875_update_slice(client, slice); + + memcpy(buf, &data->data[off], count); + + return count; +} + +static const struct bin_attribute user_eeprom_attr = { + .attr = { + .name = "eeprom", + .mode = S_IRUGO, + }, + .size = USER_EEPROM_SIZE, + .read = max6875_read, +}; + +static int max6875_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = client->adapter; + struct max6875_data *data; + int err; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE_DATA + | I2C_FUNC_SMBUS_READ_BYTE)) + return -ENODEV; + + /* Only bind to even addresses */ + if (client->addr & 1) + return -ENODEV; + + data = kzalloc(sizeof(struct max6875_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + /* A fake client is created on the odd address */ + data->fake_client = i2c_new_dummy(client->adapter, client->addr + 1); + if (!data->fake_client) { + err = -ENOMEM; + goto exit_kfree; + } + + /* Init real i2c_client */ + i2c_set_clientdata(client, data); + mutex_init(&data->update_lock); + + err = sysfs_create_bin_file(&client->dev.kobj, &user_eeprom_attr); + if (err) + goto exit_remove_fake; + + return 0; + +exit_remove_fake: + i2c_unregister_device(data->fake_client); +exit_kfree: + kfree(data); + return err; +} + +static int max6875_remove(struct i2c_client *client) +{ + struct max6875_data *data = i2c_get_clientdata(client); + + i2c_unregister_device(data->fake_client); + + sysfs_remove_bin_file(&client->dev.kobj, &user_eeprom_attr); + kfree(data); + + return 0; +} + +static const struct i2c_device_id max6875_id[] = { + { "max6875", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, max6875_id); + +static struct i2c_driver max6875_driver = { + .driver = { + .name = "max6875", + }, + .probe = max6875_probe, + .remove = max6875_remove, + .id_table = max6875_id, +}; + +module_i2c_driver(max6875_driver); + +MODULE_AUTHOR("Ben Gardner "); +MODULE_DESCRIPTION("MAX6875 driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c new file mode 100644 index 000000000..471263ffd --- /dev/null +++ b/drivers/misc/enclosure.c @@ -0,0 +1,695 @@ +/* + * Enclosure Services + * + * Copyright (C) 2008 James Bottomley + * +**----------------------------------------------------------------------------- +** +** This program is free software; you can redistribute it and/or +** modify it under the terms of the GNU General Public License +** version 2 as published by the Free Software Foundation. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +** +**----------------------------------------------------------------------------- +*/ +#include +#include +#include +#include +#include +#include +#include +#include + +static LIST_HEAD(container_list); +static DEFINE_MUTEX(container_list_lock); +static struct class enclosure_class; + +/** + * enclosure_find - find an enclosure given a parent device + * @dev: the parent to match against + * @start: Optional enclosure device to start from (NULL if none) + * + * Looks through the list of registered enclosures to find all those + * with @dev as a parent. Returns NULL if no enclosure is + * found. @start can be used as a starting point to obtain multiple + * enclosures per parent (should begin with NULL and then be set to + * each returned enclosure device). Obtains a reference to the + * enclosure class device which must be released with device_put(). + * If @start is not NULL, a reference must be taken on it which is + * released before returning (this allows a loop through all + * enclosures to exit with only the reference on the enclosure of + * interest held). Note that the @dev may correspond to the actual + * device housing the enclosure, in which case no iteration via @start + * is required. + */ +struct enclosure_device *enclosure_find(struct device *dev, + struct enclosure_device *start) +{ + struct enclosure_device *edev; + + mutex_lock(&container_list_lock); + edev = list_prepare_entry(start, &container_list, node); + if (start) + put_device(&start->edev); + + list_for_each_entry_continue(edev, &container_list, node) { + struct device *parent = edev->edev.parent; + /* parent might not be immediate, so iterate up to + * the root of the tree if necessary */ + while (parent) { + if (parent == dev) { + get_device(&edev->edev); + mutex_unlock(&container_list_lock); + return edev; + } + parent = parent->parent; + } + } + mutex_unlock(&container_list_lock); + + return NULL; +} +EXPORT_SYMBOL_GPL(enclosure_find); + +/** + * enclosure_for_each_device - calls a function for each enclosure + * @fn: the function to call + * @data: the data to pass to each call + * + * Loops over all the enclosures calling the function. + * + * Note, this function uses a mutex which will be held across calls to + * @fn, so it must have non atomic context, and @fn may (although it + * should not) sleep or otherwise cause the mutex to be held for + * indefinite periods + */ +int enclosure_for_each_device(int (*fn)(struct enclosure_device *, void *), + void *data) +{ + int error = 0; + struct enclosure_device *edev; + + mutex_lock(&container_list_lock); + list_for_each_entry(edev, &container_list, node) { + error = fn(edev, data); + if (error) + break; + } + mutex_unlock(&container_list_lock); + + return error; +} +EXPORT_SYMBOL_GPL(enclosure_for_each_device); + +/** + * enclosure_register - register device as an enclosure + * + * @dev: device containing the enclosure + * @components: number of components in the enclosure + * + * This sets up the device for being an enclosure. Note that @dev does + * not have to be a dedicated enclosure device. It may be some other type + * of device that additionally responds to enclosure services + */ +struct enclosure_device * +enclosure_register(struct device *dev, const char *name, int components, + struct enclosure_component_callbacks *cb) +{ + struct enclosure_device *edev = + kzalloc(sizeof(struct enclosure_device) + + sizeof(struct enclosure_component)*components, + GFP_KERNEL); + int err, i; + + BUG_ON(!cb); + + if (!edev) + return ERR_PTR(-ENOMEM); + + edev->components = components; + + edev->edev.class = &enclosure_class; + edev->edev.parent = get_device(dev); + edev->cb = cb; + dev_set_name(&edev->edev, "%s", name); + err = device_register(&edev->edev); + if (err) + goto err; + + for (i = 0; i < components; i++) { + edev->component[i].number = -1; + edev->component[i].slot = -1; + edev->component[i].power_status = -1; + } + + mutex_lock(&container_list_lock); + list_add_tail(&edev->node, &container_list); + mutex_unlock(&container_list_lock); + + return edev; + + err: + put_device(edev->edev.parent); + kfree(edev); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(enclosure_register); + +static struct enclosure_component_callbacks enclosure_null_callbacks; + +/** + * enclosure_unregister - remove an enclosure + * + * @edev: the registered enclosure to remove; + */ +void enclosure_unregister(struct enclosure_device *edev) +{ + int i; + + mutex_lock(&container_list_lock); + list_del(&edev->node); + mutex_unlock(&container_list_lock); + + for (i = 0; i < edev->components; i++) + if (edev->component[i].number != -1) + device_unregister(&edev->component[i].cdev); + + /* prevent any callbacks into service user */ + edev->cb = &enclosure_null_callbacks; + device_unregister(&edev->edev); +} +EXPORT_SYMBOL_GPL(enclosure_unregister); + +#define ENCLOSURE_NAME_SIZE 64 +#define COMPONENT_NAME_SIZE 64 + +static void enclosure_link_name(struct enclosure_component *cdev, char *name) +{ + strcpy(name, "enclosure_device:"); + strcat(name, dev_name(&cdev->cdev)); +} + +static void enclosure_remove_links(struct enclosure_component *cdev) +{ + char name[ENCLOSURE_NAME_SIZE]; + + enclosure_link_name(cdev, name); + + /* + * In odd circumstances, like multipath devices, something else may + * already have removed the links, so check for this condition first. + */ + if (cdev->dev->kobj.sd) + sysfs_remove_link(&cdev->dev->kobj, name); + + if (cdev->cdev.kobj.sd) + sysfs_remove_link(&cdev->cdev.kobj, "device"); +} + +static int enclosure_add_links(struct enclosure_component *cdev) +{ + int error; + char name[ENCLOSURE_NAME_SIZE]; + + error = sysfs_create_link(&cdev->cdev.kobj, &cdev->dev->kobj, "device"); + if (error) + return error; + + enclosure_link_name(cdev, name); + error = sysfs_create_link(&cdev->dev->kobj, &cdev->cdev.kobj, name); + if (error) + sysfs_remove_link(&cdev->cdev.kobj, "device"); + + return error; +} + +static void enclosure_release(struct device *cdev) +{ + struct enclosure_device *edev = to_enclosure_device(cdev); + + put_device(cdev->parent); + kfree(edev); +} + +static void enclosure_component_release(struct device *dev) +{ + struct enclosure_component *cdev = to_enclosure_component(dev); + + if (cdev->dev) { + enclosure_remove_links(cdev); + put_device(cdev->dev); + } + put_device(dev->parent); +} + +static struct enclosure_component * +enclosure_component_find_by_name(struct enclosure_device *edev, + const char *name) +{ + int i; + const char *cname; + struct enclosure_component *ecomp; + + if (!edev || !name || !name[0]) + return NULL; + + for (i = 0; i < edev->components; i++) { + ecomp = &edev->component[i]; + cname = dev_name(&ecomp->cdev); + if (ecomp->number != -1 && + cname && cname[0] && + !strcmp(cname, name)) + return ecomp; + } + + return NULL; +} + +static const struct attribute_group *enclosure_component_groups[]; + +/** + * enclosure_component_alloc - prepare a new enclosure component + * @edev: the enclosure to add the component + * @num: the device number + * @type: the type of component being added + * @name: an optional name to appear in sysfs (leave NULL if none) + * + * The name is optional for enclosures that give their components a unique + * name. If not, leave the field NULL and a name will be assigned. + * + * Returns a pointer to the enclosure component or an error. + */ +struct enclosure_component * +enclosure_component_alloc(struct enclosure_device *edev, + unsigned int number, + enum enclosure_component_type type, + const char *name) +{ + struct enclosure_component *ecomp; + struct device *cdev; + int i; + char newname[COMPONENT_NAME_SIZE]; + + if (number >= edev->components) + return ERR_PTR(-EINVAL); + + ecomp = &edev->component[number]; + + if (ecomp->number != -1) + return ERR_PTR(-EINVAL); + + ecomp->type = type; + ecomp->number = number; + cdev = &ecomp->cdev; + cdev->parent = get_device(&edev->edev); + + if (name && name[0]) { + /* Some hardware (e.g. enclosure in RX300 S6) has components + * with non unique names. Registering duplicates in sysfs + * will lead to warnings during bootup. So make the names + * unique by appending consecutive numbers -1, -2, ... */ + i = 1; + snprintf(newname, COMPONENT_NAME_SIZE, + "%s", name); + while (enclosure_component_find_by_name(edev, newname)) + snprintf(newname, COMPONENT_NAME_SIZE, + "%s-%i", name, i++); + dev_set_name(cdev, "%s", newname); + } else + dev_set_name(cdev, "%u", number); + + cdev->release = enclosure_component_release; + cdev->groups = enclosure_component_groups; + + return ecomp; +} +EXPORT_SYMBOL_GPL(enclosure_component_alloc); + +/** + * enclosure_component_register - publishes an initialized enclosure component + * @ecomp: component to add + * + * Returns 0 on successful registration, releases the component otherwise + */ +int enclosure_component_register(struct enclosure_component *ecomp) +{ + struct device *cdev; + int err; + + cdev = &ecomp->cdev; + err = device_register(cdev); + if (err) { + ecomp->number = -1; + put_device(cdev); + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(enclosure_component_register); + +/** + * enclosure_add_device - add a device as being part of an enclosure + * @edev: the enclosure device being added to. + * @num: the number of the component + * @dev: the device being added + * + * Declares a real device to reside in slot (or identifier) @num of an + * enclosure. This will cause the relevant sysfs links to appear. + * This function may also be used to change a device associated with + * an enclosure without having to call enclosure_remove_device() in + * between. + * + * Returns zero on success or an error. + */ +int enclosure_add_device(struct enclosure_device *edev, int component, + struct device *dev) +{ + struct enclosure_component *cdev; + int err; + + if (!edev || component >= edev->components) + return -EINVAL; + + cdev = &edev->component[component]; + + if (cdev->dev == dev) + return -EEXIST; + + if (cdev->dev) { + enclosure_remove_links(cdev); + put_device(cdev->dev); + } + cdev->dev = get_device(dev); + err = enclosure_add_links(cdev); + if (err) { + put_device(cdev->dev); + cdev->dev = NULL; + } + return err; +} +EXPORT_SYMBOL_GPL(enclosure_add_device); + +/** + * enclosure_remove_device - remove a device from an enclosure + * @edev: the enclosure device + * @num: the number of the component to remove + * + * Returns zero on success or an error. + * + */ +int enclosure_remove_device(struct enclosure_device *edev, struct device *dev) +{ + struct enclosure_component *cdev; + int i; + + if (!edev || !dev) + return -EINVAL; + + for (i = 0; i < edev->components; i++) { + cdev = &edev->component[i]; + if (cdev->dev == dev) { + enclosure_remove_links(cdev); + put_device(dev); + cdev->dev = NULL; + return 0; + } + } + return -ENODEV; +} +EXPORT_SYMBOL_GPL(enclosure_remove_device); + +/* + * sysfs pieces below + */ + +static ssize_t components_show(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev); + + return snprintf(buf, 40, "%d\n", edev->components); +} +static DEVICE_ATTR_RO(components); + +static ssize_t id_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev); + + if (edev->cb->show_id) + return edev->cb->show_id(edev, buf); + return -EINVAL; +} +static DEVICE_ATTR_RO(id); + +static struct attribute *enclosure_class_attrs[] = { + &dev_attr_components.attr, + &dev_attr_id.attr, + NULL, +}; +ATTRIBUTE_GROUPS(enclosure_class); + +static struct class enclosure_class = { + .name = "enclosure", + .owner = THIS_MODULE, + .dev_release = enclosure_release, + .dev_groups = enclosure_class_groups, +}; + +static const char *const enclosure_status[] = { + [ENCLOSURE_STATUS_UNSUPPORTED] = "unsupported", + [ENCLOSURE_STATUS_OK] = "OK", + [ENCLOSURE_STATUS_CRITICAL] = "critical", + [ENCLOSURE_STATUS_NON_CRITICAL] = "non-critical", + [ENCLOSURE_STATUS_UNRECOVERABLE] = "unrecoverable", + [ENCLOSURE_STATUS_NOT_INSTALLED] = "not installed", + [ENCLOSURE_STATUS_UNKNOWN] = "unknown", + [ENCLOSURE_STATUS_UNAVAILABLE] = "unavailable", + [ENCLOSURE_STATUS_MAX] = NULL, +}; + +static const char *const enclosure_type[] = { + [ENCLOSURE_COMPONENT_DEVICE] = "device", + [ENCLOSURE_COMPONENT_ARRAY_DEVICE] = "array device", +}; + +static ssize_t get_component_fault(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + if (edev->cb->get_fault) + edev->cb->get_fault(edev, ecomp); + return snprintf(buf, 40, "%d\n", ecomp->fault); +} + +static ssize_t set_component_fault(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + int val = simple_strtoul(buf, NULL, 0); + + if (edev->cb->set_fault) + edev->cb->set_fault(edev, ecomp, val); + return count; +} + +static ssize_t get_component_status(struct device *cdev, + struct device_attribute *attr,char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + if (edev->cb->get_status) + edev->cb->get_status(edev, ecomp); + return snprintf(buf, 40, "%s\n", enclosure_status[ecomp->status]); +} + +static ssize_t set_component_status(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + int i; + + for (i = 0; enclosure_status[i]; i++) { + if (strncmp(buf, enclosure_status[i], + strlen(enclosure_status[i])) == 0 && + (buf[strlen(enclosure_status[i])] == '\n' || + buf[strlen(enclosure_status[i])] == '\0')) + break; + } + + if (enclosure_status[i] && edev->cb->set_status) { + edev->cb->set_status(edev, ecomp, i); + return count; + } else + return -EINVAL; +} + +static ssize_t get_component_active(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + if (edev->cb->get_active) + edev->cb->get_active(edev, ecomp); + return snprintf(buf, 40, "%d\n", ecomp->active); +} + +static ssize_t set_component_active(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + int val = simple_strtoul(buf, NULL, 0); + + if (edev->cb->set_active) + edev->cb->set_active(edev, ecomp, val); + return count; +} + +static ssize_t get_component_locate(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + if (edev->cb->get_locate) + edev->cb->get_locate(edev, ecomp); + return snprintf(buf, 40, "%d\n", ecomp->locate); +} + +static ssize_t set_component_locate(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + int val = simple_strtoul(buf, NULL, 0); + + if (edev->cb->set_locate) + edev->cb->set_locate(edev, ecomp, val); + return count; +} + +static ssize_t get_component_power_status(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + if (edev->cb->get_power_status) + edev->cb->get_power_status(edev, ecomp); + + /* If still uninitialized, the callback failed or does not exist. */ + if (ecomp->power_status == -1) + return (edev->cb->get_power_status) ? -EIO : -ENOTTY; + + return snprintf(buf, 40, "%s\n", ecomp->power_status ? "on" : "off"); +} + +static ssize_t set_component_power_status(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct enclosure_device *edev = to_enclosure_device(cdev->parent); + struct enclosure_component *ecomp = to_enclosure_component(cdev); + int val; + + if (strncmp(buf, "on", 2) == 0 && + (buf[2] == '\n' || buf[2] == '\0')) + val = 1; + else if (strncmp(buf, "off", 3) == 0 && + (buf[3] == '\n' || buf[3] == '\0')) + val = 0; + else + return -EINVAL; + + if (edev->cb->set_power_status) + edev->cb->set_power_status(edev, ecomp, val); + return count; +} + +static ssize_t get_component_type(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct enclosure_component *ecomp = to_enclosure_component(cdev); + + return snprintf(buf, 40, "%s\n", enclosure_type[ecomp->type]); +} + +static ssize_t get_component_slot(struct device *cdev, + struct device_attribute *attr, char *buf) +{ + struct enclosure_component *ecomp = to_enclosure_component(cdev); + int slot; + + /* if the enclosure does not override then use 'number' as a stand-in */ + if (ecomp->slot >= 0) + slot = ecomp->slot; + else + slot = ecomp->number; + + return snprintf(buf, 40, "%d\n", slot); +} + +static DEVICE_ATTR(fault, S_IRUGO | S_IWUSR, get_component_fault, + set_component_fault); +static DEVICE_ATTR(status, S_IRUGO | S_IWUSR, get_component_status, + set_component_status); +static DEVICE_ATTR(active, S_IRUGO | S_IWUSR, get_component_active, + set_component_active); +static DEVICE_ATTR(locate, S_IRUGO | S_IWUSR, get_component_locate, + set_component_locate); +static DEVICE_ATTR(power_status, S_IRUGO | S_IWUSR, get_component_power_status, + set_component_power_status); +static DEVICE_ATTR(type, S_IRUGO, get_component_type, NULL); +static DEVICE_ATTR(slot, S_IRUGO, get_component_slot, NULL); + +static struct attribute *enclosure_component_attrs[] = { + &dev_attr_fault.attr, + &dev_attr_status.attr, + &dev_attr_active.attr, + &dev_attr_locate.attr, + &dev_attr_power_status.attr, + &dev_attr_type.attr, + &dev_attr_slot.attr, + NULL +}; +ATTRIBUTE_GROUPS(enclosure_component); + +static int __init enclosure_init(void) +{ + return class_register(&enclosure_class); +} + +static void __exit enclosure_exit(void) +{ + class_unregister(&enclosure_class); +} + +module_init(enclosure_init); +module_exit(enclosure_exit); + +MODULE_AUTHOR("James Bottomley"); +MODULE_DESCRIPTION("Enclosure Services"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/fsa9480.c b/drivers/misc/fsa9480.c new file mode 100644 index 000000000..607b489a6 --- /dev/null +++ b/drivers/misc/fsa9480.c @@ -0,0 +1,550 @@ +/* + * fsa9480.c - FSA9480 micro USB switch device driver + * + * Copyright (C) 2010 Samsung Electronics + * Minkyu Kang + * Wonguk Jeong + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* FSA9480 I2C registers */ +#define FSA9480_REG_DEVID 0x01 +#define FSA9480_REG_CTRL 0x02 +#define FSA9480_REG_INT1 0x03 +#define FSA9480_REG_INT2 0x04 +#define FSA9480_REG_INT1_MASK 0x05 +#define FSA9480_REG_INT2_MASK 0x06 +#define FSA9480_REG_ADC 0x07 +#define FSA9480_REG_TIMING1 0x08 +#define FSA9480_REG_TIMING2 0x09 +#define FSA9480_REG_DEV_T1 0x0a +#define FSA9480_REG_DEV_T2 0x0b +#define FSA9480_REG_BTN1 0x0c +#define FSA9480_REG_BTN2 0x0d +#define FSA9480_REG_CK 0x0e +#define FSA9480_REG_CK_INT1 0x0f +#define FSA9480_REG_CK_INT2 0x10 +#define FSA9480_REG_CK_INTMASK1 0x11 +#define FSA9480_REG_CK_INTMASK2 0x12 +#define FSA9480_REG_MANSW1 0x13 +#define FSA9480_REG_MANSW2 0x14 + +/* Control */ +#define CON_SWITCH_OPEN (1 << 4) +#define CON_RAW_DATA (1 << 3) +#define CON_MANUAL_SW (1 << 2) +#define CON_WAIT (1 << 1) +#define CON_INT_MASK (1 << 0) +#define CON_MASK (CON_SWITCH_OPEN | CON_RAW_DATA | \ + CON_MANUAL_SW | CON_WAIT) + +/* Device Type 1 */ +#define DEV_USB_OTG (1 << 7) +#define DEV_DEDICATED_CHG (1 << 6) +#define DEV_USB_CHG (1 << 5) +#define DEV_CAR_KIT (1 << 4) +#define DEV_UART (1 << 3) +#define DEV_USB (1 << 2) +#define DEV_AUDIO_2 (1 << 1) +#define DEV_AUDIO_1 (1 << 0) + +#define DEV_T1_USB_MASK (DEV_USB_OTG | DEV_USB) +#define DEV_T1_UART_MASK (DEV_UART) +#define DEV_T1_CHARGER_MASK (DEV_DEDICATED_CHG | DEV_USB_CHG) + +/* Device Type 2 */ +#define DEV_AV (1 << 6) +#define DEV_TTY (1 << 5) +#define DEV_PPD (1 << 4) +#define DEV_JIG_UART_OFF (1 << 3) +#define DEV_JIG_UART_ON (1 << 2) +#define DEV_JIG_USB_OFF (1 << 1) +#define DEV_JIG_USB_ON (1 << 0) + +#define DEV_T2_USB_MASK (DEV_JIG_USB_OFF | DEV_JIG_USB_ON) +#define DEV_T2_UART_MASK (DEV_JIG_UART_OFF | DEV_JIG_UART_ON) +#define DEV_T2_JIG_MASK (DEV_JIG_USB_OFF | DEV_JIG_USB_ON | \ + DEV_JIG_UART_OFF | DEV_JIG_UART_ON) + +/* + * Manual Switch + * D- [7:5] / D+ [4:2] + * 000: Open all / 001: USB / 010: AUDIO / 011: UART / 100: V_AUDIO + */ +#define SW_VAUDIO ((4 << 5) | (4 << 2)) +#define SW_UART ((3 << 5) | (3 << 2)) +#define SW_AUDIO ((2 << 5) | (2 << 2)) +#define SW_DHOST ((1 << 5) | (1 << 2)) +#define SW_AUTO ((0 << 5) | (0 << 2)) + +/* Interrupt 1 */ +#define INT_DETACH (1 << 1) +#define INT_ATTACH (1 << 0) + +struct fsa9480_usbsw { + struct i2c_client *client; + struct fsa9480_platform_data *pdata; + int dev1; + int dev2; + int mansw; +}; + +static struct fsa9480_usbsw *chip; + +static int fsa9480_write_reg(struct i2c_client *client, + int reg, int value) +{ + int ret; + + ret = i2c_smbus_write_byte_data(client, reg, value); + + if (ret < 0) + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + + return ret; +} + +static int fsa9480_read_reg(struct i2c_client *client, int reg) +{ + int ret; + + ret = i2c_smbus_read_byte_data(client, reg); + + if (ret < 0) + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + + return ret; +} + +static int fsa9480_read_irq(struct i2c_client *client, int *value) +{ + int ret; + + ret = i2c_smbus_read_i2c_block_data(client, + FSA9480_REG_INT1, 2, (u8 *)value); + *value &= 0xffff; + + if (ret < 0) + dev_err(&client->dev, "%s: err %d\n", __func__, ret); + + return ret; +} + +static void fsa9480_set_switch(const char *buf) +{ + struct fsa9480_usbsw *usbsw = chip; + struct i2c_client *client = usbsw->client; + unsigned int value; + unsigned int path = 0; + + value = fsa9480_read_reg(client, FSA9480_REG_CTRL); + + if (!strncmp(buf, "VAUDIO", 6)) { + path = SW_VAUDIO; + value &= ~CON_MANUAL_SW; + } else if (!strncmp(buf, "UART", 4)) { + path = SW_UART; + value &= ~CON_MANUAL_SW; + } else if (!strncmp(buf, "AUDIO", 5)) { + path = SW_AUDIO; + value &= ~CON_MANUAL_SW; + } else if (!strncmp(buf, "DHOST", 5)) { + path = SW_DHOST; + value &= ~CON_MANUAL_SW; + } else if (!strncmp(buf, "AUTO", 4)) { + path = SW_AUTO; + value |= CON_MANUAL_SW; + } else { + printk(KERN_ERR "Wrong command\n"); + return; + } + + usbsw->mansw = path; + fsa9480_write_reg(client, FSA9480_REG_MANSW1, path); + fsa9480_write_reg(client, FSA9480_REG_CTRL, value); +} + +static ssize_t fsa9480_get_switch(char *buf) +{ + struct fsa9480_usbsw *usbsw = chip; + struct i2c_client *client = usbsw->client; + unsigned int value; + + value = fsa9480_read_reg(client, FSA9480_REG_MANSW1); + + if (value == SW_VAUDIO) + return sprintf(buf, "VAUDIO\n"); + else if (value == SW_UART) + return sprintf(buf, "UART\n"); + else if (value == SW_AUDIO) + return sprintf(buf, "AUDIO\n"); + else if (value == SW_DHOST) + return sprintf(buf, "DHOST\n"); + else if (value == SW_AUTO) + return sprintf(buf, "AUTO\n"); + else + return sprintf(buf, "%x", value); +} + +static ssize_t fsa9480_show_device(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct fsa9480_usbsw *usbsw = dev_get_drvdata(dev); + struct i2c_client *client = usbsw->client; + int dev1, dev2; + + dev1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1); + dev2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2); + + if (!dev1 && !dev2) + return sprintf(buf, "NONE\n"); + + /* USB */ + if (dev1 & DEV_T1_USB_MASK || dev2 & DEV_T2_USB_MASK) + return sprintf(buf, "USB\n"); + + /* UART */ + if (dev1 & DEV_T1_UART_MASK || dev2 & DEV_T2_UART_MASK) + return sprintf(buf, "UART\n"); + + /* CHARGER */ + if (dev1 & DEV_T1_CHARGER_MASK) + return sprintf(buf, "CHARGER\n"); + + /* JIG */ + if (dev2 & DEV_T2_JIG_MASK) + return sprintf(buf, "JIG\n"); + + return sprintf(buf, "UNKNOWN\n"); +} + +static ssize_t fsa9480_show_manualsw(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return fsa9480_get_switch(buf); + +} + +static ssize_t fsa9480_set_manualsw(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + fsa9480_set_switch(buf); + + return count; +} + +static DEVICE_ATTR(device, S_IRUGO, fsa9480_show_device, NULL); +static DEVICE_ATTR(switch, S_IRUGO | S_IWUSR, + fsa9480_show_manualsw, fsa9480_set_manualsw); + +static struct attribute *fsa9480_attributes[] = { + &dev_attr_device.attr, + &dev_attr_switch.attr, + NULL +}; + +static const struct attribute_group fsa9480_group = { + .attrs = fsa9480_attributes, +}; + +static void fsa9480_detect_dev(struct fsa9480_usbsw *usbsw, int intr) +{ + int val1, val2, ctrl; + struct fsa9480_platform_data *pdata = usbsw->pdata; + struct i2c_client *client = usbsw->client; + + val1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1); + val2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2); + ctrl = fsa9480_read_reg(client, FSA9480_REG_CTRL); + + dev_info(&client->dev, "intr: 0x%x, dev1: 0x%x, dev2: 0x%x\n", + intr, val1, val2); + + if (!intr) + goto out; + + if (intr & INT_ATTACH) { /* Attached */ + /* USB */ + if (val1 & DEV_T1_USB_MASK || val2 & DEV_T2_USB_MASK) { + if (pdata->usb_cb) + pdata->usb_cb(FSA9480_ATTACHED); + + if (usbsw->mansw) { + fsa9480_write_reg(client, + FSA9480_REG_MANSW1, usbsw->mansw); + } + } + + /* UART */ + if (val1 & DEV_T1_UART_MASK || val2 & DEV_T2_UART_MASK) { + if (pdata->uart_cb) + pdata->uart_cb(FSA9480_ATTACHED); + + if (!(ctrl & CON_MANUAL_SW)) { + fsa9480_write_reg(client, + FSA9480_REG_MANSW1, SW_UART); + } + } + + /* CHARGER */ + if (val1 & DEV_T1_CHARGER_MASK) { + if (pdata->charger_cb) + pdata->charger_cb(FSA9480_ATTACHED); + } + + /* JIG */ + if (val2 & DEV_T2_JIG_MASK) { + if (pdata->jig_cb) + pdata->jig_cb(FSA9480_ATTACHED); + } + } else if (intr & INT_DETACH) { /* Detached */ + /* USB */ + if (usbsw->dev1 & DEV_T1_USB_MASK || + usbsw->dev2 & DEV_T2_USB_MASK) { + if (pdata->usb_cb) + pdata->usb_cb(FSA9480_DETACHED); + } + + /* UART */ + if (usbsw->dev1 & DEV_T1_UART_MASK || + usbsw->dev2 & DEV_T2_UART_MASK) { + if (pdata->uart_cb) + pdata->uart_cb(FSA9480_DETACHED); + } + + /* CHARGER */ + if (usbsw->dev1 & DEV_T1_CHARGER_MASK) { + if (pdata->charger_cb) + pdata->charger_cb(FSA9480_DETACHED); + } + + /* JIG */ + if (usbsw->dev2 & DEV_T2_JIG_MASK) { + if (pdata->jig_cb) + pdata->jig_cb(FSA9480_DETACHED); + } + } + + usbsw->dev1 = val1; + usbsw->dev2 = val2; + +out: + ctrl &= ~CON_INT_MASK; + fsa9480_write_reg(client, FSA9480_REG_CTRL, ctrl); +} + +static irqreturn_t fsa9480_irq_handler(int irq, void *data) +{ + struct fsa9480_usbsw *usbsw = data; + struct i2c_client *client = usbsw->client; + int intr; + + /* clear interrupt */ + fsa9480_read_irq(client, &intr); + + /* device detection */ + fsa9480_detect_dev(usbsw, intr); + + return IRQ_HANDLED; +} + +static int fsa9480_irq_init(struct fsa9480_usbsw *usbsw) +{ + struct fsa9480_platform_data *pdata = usbsw->pdata; + struct i2c_client *client = usbsw->client; + int ret; + int intr; + unsigned int ctrl = CON_MASK; + + /* clear interrupt */ + fsa9480_read_irq(client, &intr); + + /* unmask interrupt (attach/detach only) */ + fsa9480_write_reg(client, FSA9480_REG_INT1_MASK, 0xfc); + fsa9480_write_reg(client, FSA9480_REG_INT2_MASK, 0x1f); + + usbsw->mansw = fsa9480_read_reg(client, FSA9480_REG_MANSW1); + + if (usbsw->mansw) + ctrl &= ~CON_MANUAL_SW; /* Manual Switching Mode */ + + fsa9480_write_reg(client, FSA9480_REG_CTRL, ctrl); + + if (pdata && pdata->cfg_gpio) + pdata->cfg_gpio(); + + if (client->irq) { + ret = request_threaded_irq(client->irq, NULL, + fsa9480_irq_handler, + IRQF_TRIGGER_FALLING | IRQF_ONESHOT, + "fsa9480 micro USB", usbsw); + if (ret) { + dev_err(&client->dev, "failed to request IRQ\n"); + return ret; + } + + if (pdata) + device_init_wakeup(&client->dev, pdata->wakeup); + } + + return 0; +} + +static int fsa9480_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + struct fsa9480_usbsw *usbsw; + int ret = 0; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) + return -EIO; + + usbsw = kzalloc(sizeof(struct fsa9480_usbsw), GFP_KERNEL); + if (!usbsw) { + dev_err(&client->dev, "failed to allocate driver data\n"); + return -ENOMEM; + } + + usbsw->client = client; + usbsw->pdata = client->dev.platform_data; + + chip = usbsw; + + i2c_set_clientdata(client, usbsw); + + ret = fsa9480_irq_init(usbsw); + if (ret) + goto fail1; + + ret = sysfs_create_group(&client->dev.kobj, &fsa9480_group); + if (ret) { + dev_err(&client->dev, + "failed to create fsa9480 attribute group\n"); + goto fail2; + } + + /* ADC Detect Time: 500ms */ + fsa9480_write_reg(client, FSA9480_REG_TIMING1, 0x6); + + if (chip->pdata->reset_cb) + chip->pdata->reset_cb(); + + /* device detection */ + fsa9480_detect_dev(usbsw, INT_ATTACH); + + pm_runtime_set_active(&client->dev); + + return 0; + +fail2: + if (client->irq) + free_irq(client->irq, usbsw); +fail1: + kfree(usbsw); + return ret; +} + +static int fsa9480_remove(struct i2c_client *client) +{ + struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client); + + if (client->irq) + free_irq(client->irq, usbsw); + + sysfs_remove_group(&client->dev.kobj, &fsa9480_group); + device_init_wakeup(&client->dev, 0); + kfree(usbsw); + return 0; +} + +#ifdef CONFIG_PM_SLEEP + +static int fsa9480_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client); + struct fsa9480_platform_data *pdata = usbsw->pdata; + + if (device_may_wakeup(&client->dev) && client->irq) + enable_irq_wake(client->irq); + + if (pdata->usb_power) + pdata->usb_power(0); + + return 0; +} + +static int fsa9480_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client); + int dev1, dev2; + + if (device_may_wakeup(&client->dev) && client->irq) + disable_irq_wake(client->irq); + + /* + * Clear Pending interrupt. Note that detect_dev does what + * the interrupt handler does. So, we don't miss pending and + * we reenable interrupt if there is one. + */ + fsa9480_read_reg(client, FSA9480_REG_INT1); + fsa9480_read_reg(client, FSA9480_REG_INT2); + + dev1 = fsa9480_read_reg(client, FSA9480_REG_DEV_T1); + dev2 = fsa9480_read_reg(client, FSA9480_REG_DEV_T2); + + /* device detection */ + fsa9480_detect_dev(usbsw, (dev1 || dev2) ? INT_ATTACH : INT_DETACH); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(fsa9480_pm_ops, fsa9480_suspend, fsa9480_resume); +#define FSA9480_PM_OPS (&fsa9480_pm_ops) + +#else + +#define FSA9480_PM_OPS NULL + +#endif /* CONFIG_PM_SLEEP */ + +static const struct i2c_device_id fsa9480_id[] = { + {"fsa9480", 0}, + {} +}; +MODULE_DEVICE_TABLE(i2c, fsa9480_id); + +static struct i2c_driver fsa9480_i2c_driver = { + .driver = { + .name = "fsa9480", + .pm = FSA9480_PM_OPS, + }, + .probe = fsa9480_probe, + .remove = fsa9480_remove, + .id_table = fsa9480_id, +}; + +module_i2c_driver(fsa9480_i2c_driver); + +MODULE_AUTHOR("Minkyu Kang "); +MODULE_DESCRIPTION("FSA9480 USB Switch driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/genwqe/Kconfig b/drivers/misc/genwqe/Kconfig new file mode 100644 index 000000000..4c0a033cb --- /dev/null +++ b/drivers/misc/genwqe/Kconfig @@ -0,0 +1,19 @@ +# +# IBM Accelerator Family 'GenWQE' +# + +menuconfig GENWQE + tristate "GenWQE PCIe Accelerator" + depends on PCI && 64BIT + select CRC_ITU_T + default n + help + Enables PCIe card driver for IBM GenWQE accelerators. + The user-space interface is described in + include/linux/genwqe/genwqe_card.h. + +config GENWQE_PLATFORM_ERROR_RECOVERY + int "Use platform recovery procedures (0=off, 1=on)" + depends on GENWQE + default 1 if PPC64 + default 0 diff --git a/drivers/misc/genwqe/Makefile b/drivers/misc/genwqe/Makefile new file mode 100644 index 000000000..98a2b4f0b --- /dev/null +++ b/drivers/misc/genwqe/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for GenWQE driver +# + +obj-$(CONFIG_GENWQE) := genwqe_card.o +genwqe_card-objs := card_base.o card_dev.o card_ddcb.o card_sysfs.o \ + card_debugfs.o card_utils.o diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c new file mode 100644 index 000000000..c7cd3675b --- /dev/null +++ b/drivers/misc/genwqe/card_base.c @@ -0,0 +1,1412 @@ +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp + * Author: Joerg-Stephan Vogt + * Author: Michael Jung + * Author: Michael Ruettger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * Module initialization and PCIe setup. Card health monitoring and + * recovery functionality. Character device creation and deletion are + * controlled from here. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "card_base.h" +#include "card_ddcb.h" + +MODULE_AUTHOR("Frank Haverkamp "); +MODULE_AUTHOR("Michael Ruettger "); +MODULE_AUTHOR("Joerg-Stephan Vogt "); +MODULE_AUTHOR("Michael Jung "); + +MODULE_DESCRIPTION("GenWQE Card"); +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE("GPL"); + +static char genwqe_driver_name[] = GENWQE_DEVNAME; +static struct class *class_genwqe; +static struct dentry *debugfs_genwqe; +static struct genwqe_dev *genwqe_devices[GENWQE_CARD_NO_MAX]; + +/* PCI structure for identifying device by PCI vendor and device ID */ +static const struct pci_device_id genwqe_device_table[] = { + { .vendor = PCI_VENDOR_ID_IBM, + .device = PCI_DEVICE_GENWQE, + .subvendor = PCI_SUBVENDOR_ID_IBM, + .subdevice = PCI_SUBSYSTEM_ID_GENWQE5, + .class = (PCI_CLASSCODE_GENWQE5 << 8), + .class_mask = ~0, + .driver_data = 0 }, + + /* Initial SR-IOV bring-up image */ + { .vendor = PCI_VENDOR_ID_IBM, + .device = PCI_DEVICE_GENWQE, + .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV, + .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV, + .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8), + .class_mask = ~0, + .driver_data = 0 }, + + { .vendor = PCI_VENDOR_ID_IBM, /* VF Vendor ID */ + .device = 0x0000, /* VF Device ID */ + .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV, + .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV, + .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8), + .class_mask = ~0, + .driver_data = 0 }, + + /* Fixed up image */ + { .vendor = PCI_VENDOR_ID_IBM, + .device = PCI_DEVICE_GENWQE, + .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV, + .subdevice = PCI_SUBSYSTEM_ID_GENWQE5, + .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8), + .class_mask = ~0, + .driver_data = 0 }, + + { .vendor = PCI_VENDOR_ID_IBM, /* VF Vendor ID */ + .device = 0x0000, /* VF Device ID */ + .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV, + .subdevice = PCI_SUBSYSTEM_ID_GENWQE5, + .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8), + .class_mask = ~0, + .driver_data = 0 }, + + /* Even one more ... */ + { .vendor = PCI_VENDOR_ID_IBM, + .device = PCI_DEVICE_GENWQE, + .subvendor = PCI_SUBVENDOR_ID_IBM, + .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_NEW, + .class = (PCI_CLASSCODE_GENWQE5 << 8), + .class_mask = ~0, + .driver_data = 0 }, + + { 0, } /* 0 terminated list. */ +}; + +MODULE_DEVICE_TABLE(pci, genwqe_device_table); + +/** + * genwqe_dev_alloc() - Create and prepare a new card descriptor + * + * Return: Pointer to card descriptor, or ERR_PTR(err) on error + */ +static struct genwqe_dev *genwqe_dev_alloc(void) +{ + unsigned int i = 0, j; + struct genwqe_dev *cd; + + for (i = 0; i < GENWQE_CARD_NO_MAX; i++) { + if (genwqe_devices[i] == NULL) + break; + } + if (i >= GENWQE_CARD_NO_MAX) + return ERR_PTR(-ENODEV); + + cd = kzalloc(sizeof(struct genwqe_dev), GFP_KERNEL); + if (!cd) + return ERR_PTR(-ENOMEM); + + cd->card_idx = i; + cd->class_genwqe = class_genwqe; + cd->debugfs_genwqe = debugfs_genwqe; + + /* + * This comes from kernel config option and can be overritten via + * debugfs. + */ + cd->use_platform_recovery = CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY; + + init_waitqueue_head(&cd->queue_waitq); + + spin_lock_init(&cd->file_lock); + INIT_LIST_HEAD(&cd->file_list); + + cd->card_state = GENWQE_CARD_UNUSED; + spin_lock_init(&cd->print_lock); + + cd->ddcb_software_timeout = GENWQE_DDCB_SOFTWARE_TIMEOUT; + cd->kill_timeout = GENWQE_KILL_TIMEOUT; + + for (j = 0; j < GENWQE_MAX_VFS; j++) + cd->vf_jobtimeout_msec[j] = GENWQE_VF_JOBTIMEOUT_MSEC; + + genwqe_devices[i] = cd; + return cd; +} + +static void genwqe_dev_free(struct genwqe_dev *cd) +{ + if (!cd) + return; + + genwqe_devices[cd->card_idx] = NULL; + kfree(cd); +} + +/** + * genwqe_bus_reset() - Card recovery + * + * pci_reset_function() will recover the device and ensure that the + * registers are accessible again when it completes with success. If + * not, the card will stay dead and registers will be unaccessible + * still. + */ +static int genwqe_bus_reset(struct genwqe_dev *cd) +{ + int rc = 0; + struct pci_dev *pci_dev = cd->pci_dev; + void __iomem *mmio; + + if (cd->err_inject & GENWQE_INJECT_BUS_RESET_FAILURE) + return -EIO; + + mmio = cd->mmio; + cd->mmio = NULL; + pci_iounmap(pci_dev, mmio); + + pci_release_mem_regions(pci_dev); + + /* + * Firmware/BIOS might change memory mapping during bus reset. + * Settings like enable bus-mastering, ... are backuped and + * restored by the pci_reset_function(). + */ + dev_dbg(&pci_dev->dev, "[%s] pci_reset function ...\n", __func__); + rc = pci_reset_function(pci_dev); + if (rc) { + dev_err(&pci_dev->dev, + "[%s] err: failed reset func (rc %d)\n", __func__, rc); + return rc; + } + dev_dbg(&pci_dev->dev, "[%s] done with rc=%d\n", __func__, rc); + + /* + * Here is the right spot to clear the register read + * failure. pci_bus_reset() does this job in real systems. + */ + cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE | + GENWQE_INJECT_GFIR_FATAL | + GENWQE_INJECT_GFIR_INFO); + + rc = pci_request_mem_regions(pci_dev, genwqe_driver_name); + if (rc) { + dev_err(&pci_dev->dev, + "[%s] err: request bars failed (%d)\n", __func__, rc); + return -EIO; + } + + cd->mmio = pci_iomap(pci_dev, 0, 0); + if (cd->mmio == NULL) { + dev_err(&pci_dev->dev, + "[%s] err: mapping BAR0 failed\n", __func__); + return -ENOMEM; + } + return 0; +} + +/* + * Hardware circumvention section. Certain bitstreams in our test-lab + * had different kinds of problems. Here is where we adjust those + * bitstreams to function will with this version of our device driver. + * + * Thise circumventions are applied to the physical function only. + * The magical numbers below are identifying development/manufacturing + * versions of the bitstream used on the card. + * + * Turn off error reporting for old/manufacturing images. + */ + +bool genwqe_need_err_masking(struct genwqe_dev *cd) +{ + return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull; +} + +static void genwqe_tweak_hardware(struct genwqe_dev *cd) +{ + struct pci_dev *pci_dev = cd->pci_dev; + + /* Mask FIRs for development images */ + if (((cd->slu_unitcfg & 0xFFFF0ull) >= 0x32000ull) && + ((cd->slu_unitcfg & 0xFFFF0ull) <= 0x33250ull)) { + dev_warn(&pci_dev->dev, + "FIRs masked due to bitstream %016llx.%016llx\n", + cd->slu_unitcfg, cd->app_unitcfg); + + __genwqe_writeq(cd, IO_APP_SEC_LEM_DEBUG_OVR, + 0xFFFFFFFFFFFFFFFFull); + + __genwqe_writeq(cd, IO_APP_ERR_ACT_MASK, + 0x0000000000000000ull); + } +} + +/** + * genwqe_recovery_on_fatal_gfir_required() - Version depended actions + * + * Bitstreams older than 2013-02-17 have a bug where fatal GFIRs must + * be ignored. This is e.g. true for the bitstream we gave to the card + * manufacturer, but also for some old bitstreams we released to our + * test-lab. + */ +int genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd) +{ + return (cd->slu_unitcfg & 0xFFFF0ull) >= 0x32170ull; +} + +int genwqe_flash_readback_fails(struct genwqe_dev *cd) +{ + return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull; +} + +/** + * genwqe_T_psec() - Calculate PF/VF timeout register content + * + * Note: From a design perspective it turned out to be a bad idea to + * use codes here to specifiy the frequency/speed values. An old + * driver cannot understand new codes and is therefore always a + * problem. Better is to measure out the value or put the + * speed/frequency directly into a register which is always a valid + * value for old as well as for new software. + */ +/* T = 1/f */ +static int genwqe_T_psec(struct genwqe_dev *cd) +{ + u16 speed; /* 1/f -> 250, 200, 166, 175 */ + static const int T[] = { 4000, 5000, 6000, 5714 }; + + speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full); + if (speed >= ARRAY_SIZE(T)) + return -1; /* illegal value */ + + return T[speed]; +} + +/** + * genwqe_setup_pf_jtimer() - Setup PF hardware timeouts for DDCB execution + * + * Do this _after_ card_reset() is called. Otherwise the values will + * vanish. The settings need to be done when the queues are inactive. + * + * The max. timeout value is 2^(10+x) * T (6ns for 166MHz) * 15/16. + * The min. timeout value is 2^(10+x) * T (6ns for 166MHz) * 14/16. + */ +static bool genwqe_setup_pf_jtimer(struct genwqe_dev *cd) +{ + u32 T = genwqe_T_psec(cd); + u64 x; + + if (GENWQE_PF_JOBTIMEOUT_MSEC == 0) + return false; + + /* PF: large value needed, flash update 2sec per block */ + x = ilog2(GENWQE_PF_JOBTIMEOUT_MSEC * + 16000000000uL/(T * 15)) - 10; + + genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, + 0xff00 | (x & 0xff), 0); + return true; +} + +/** + * genwqe_setup_vf_jtimer() - Setup VF hardware timeouts for DDCB execution + */ +static bool genwqe_setup_vf_jtimer(struct genwqe_dev *cd) +{ + struct pci_dev *pci_dev = cd->pci_dev; + unsigned int vf; + u32 T = genwqe_T_psec(cd); + u64 x; + int totalvfs; + + totalvfs = pci_sriov_get_totalvfs(pci_dev); + if (totalvfs <= 0) + return false; + + for (vf = 0; vf < totalvfs; vf++) { + + if (cd->vf_jobtimeout_msec[vf] == 0) + continue; + + x = ilog2(cd->vf_jobtimeout_msec[vf] * + 16000000000uL/(T * 15)) - 10; + + genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, + 0xff00 | (x & 0xff), vf + 1); + } + return true; +} + +static int genwqe_ffdc_buffs_alloc(struct genwqe_dev *cd) +{ + unsigned int type, e = 0; + + for (type = 0; type < GENWQE_DBG_UNITS; type++) { + switch (type) { + case GENWQE_DBG_UNIT0: + e = genwqe_ffdc_buff_size(cd, 0); + break; + case GENWQE_DBG_UNIT1: + e = genwqe_ffdc_buff_size(cd, 1); + break; + case GENWQE_DBG_UNIT2: + e = genwqe_ffdc_buff_size(cd, 2); + break; + case GENWQE_DBG_REGS: + e = GENWQE_FFDC_REGS; + break; + } + + /* currently support only the debug units mentioned here */ + cd->ffdc[type].entries = e; + cd->ffdc[type].regs = + kmalloc_array(e, sizeof(struct genwqe_reg), + GFP_KERNEL); + /* + * regs == NULL is ok, the using code treats this as no regs, + * Printing warning is ok in this case. + */ + } + return 0; +} + +static void genwqe_ffdc_buffs_free(struct genwqe_dev *cd) +{ + unsigned int type; + + for (type = 0; type < GENWQE_DBG_UNITS; type++) { + kfree(cd->ffdc[type].regs); + cd->ffdc[type].regs = NULL; + } +} + +static int genwqe_read_ids(struct genwqe_dev *cd) +{ + int err = 0; + int slu_id; + struct pci_dev *pci_dev = cd->pci_dev; + + cd->slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG); + if (cd->slu_unitcfg == IO_ILLEGAL_VALUE) { + dev_err(&pci_dev->dev, + "err: SLUID=%016llx\n", cd->slu_unitcfg); + err = -EIO; + goto out_err; + } + + slu_id = genwqe_get_slu_id(cd); + if (slu_id < GENWQE_SLU_ARCH_REQ || slu_id == 0xff) { + dev_err(&pci_dev->dev, + "err: incompatible SLU Architecture %u\n", slu_id); + err = -ENOENT; + goto out_err; + } + + cd->app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG); + if (cd->app_unitcfg == IO_ILLEGAL_VALUE) { + dev_err(&pci_dev->dev, + "err: APPID=%016llx\n", cd->app_unitcfg); + err = -EIO; + goto out_err; + } + genwqe_read_app_id(cd, cd->app_name, sizeof(cd->app_name)); + + /* + * Is access to all registers possible? If we are a VF the + * answer is obvious. If we run fully virtualized, we need to + * check if we can access all registers. If we do not have + * full access we will cause an UR and some informational FIRs + * in the PF, but that should not harm. + */ + if (pci_dev->is_virtfn) + cd->is_privileged = 0; + else + cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM) + != IO_ILLEGAL_VALUE); + + out_err: + return err; +} + +static int genwqe_start(struct genwqe_dev *cd) +{ + int err; + struct pci_dev *pci_dev = cd->pci_dev; + + err = genwqe_read_ids(cd); + if (err) + return err; + + if (genwqe_is_privileged(cd)) { + /* do this after the tweaks. alloc fail is acceptable */ + genwqe_ffdc_buffs_alloc(cd); + genwqe_stop_traps(cd); + + /* Collect registers e.g. FIRs, UNITIDs, traces ... */ + genwqe_read_ffdc_regs(cd, cd->ffdc[GENWQE_DBG_REGS].regs, + cd->ffdc[GENWQE_DBG_REGS].entries, 0); + + genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT0, + cd->ffdc[GENWQE_DBG_UNIT0].regs, + cd->ffdc[GENWQE_DBG_UNIT0].entries); + + genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT1, + cd->ffdc[GENWQE_DBG_UNIT1].regs, + cd->ffdc[GENWQE_DBG_UNIT1].entries); + + genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT2, + cd->ffdc[GENWQE_DBG_UNIT2].regs, + cd->ffdc[GENWQE_DBG_UNIT2].entries); + + genwqe_start_traps(cd); + + if (cd->card_state == GENWQE_CARD_FATAL_ERROR) { + dev_warn(&pci_dev->dev, + "[%s] chip reload/recovery!\n", __func__); + + /* + * Stealth Mode: Reload chip on either hot + * reset or PERST. + */ + cd->softreset = 0x7Cull; + __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, + cd->softreset); + + err = genwqe_bus_reset(cd); + if (err != 0) { + dev_err(&pci_dev->dev, + "[%s] err: bus reset failed!\n", + __func__); + goto out; + } + + /* + * Re-read the IDs because + * it could happen that the bitstream load + * failed! + */ + err = genwqe_read_ids(cd); + if (err) + goto out; + } + } + + err = genwqe_setup_service_layer(cd); /* does a reset to the card */ + if (err != 0) { + dev_err(&pci_dev->dev, + "[%s] err: could not setup servicelayer!\n", __func__); + err = -ENODEV; + goto out; + } + + if (genwqe_is_privileged(cd)) { /* code is running _after_ reset */ + genwqe_tweak_hardware(cd); + + genwqe_setup_pf_jtimer(cd); + genwqe_setup_vf_jtimer(cd); + } + + err = genwqe_device_create(cd); + if (err < 0) { + dev_err(&pci_dev->dev, + "err: chdev init failed! (err=%d)\n", err); + goto out_release_service_layer; + } + return 0; + + out_release_service_layer: + genwqe_release_service_layer(cd); + out: + if (genwqe_is_privileged(cd)) + genwqe_ffdc_buffs_free(cd); + return -EIO; +} + +/** + * genwqe_stop() - Stop card operation + * + * Recovery notes: + * As long as genwqe_thread runs we might access registers during + * error data capture. Same is with the genwqe_health_thread. + * When genwqe_bus_reset() fails this function might called two times: + * first by the genwqe_health_thread() and later by genwqe_remove() to + * unbind the device. We must be able to survive that. + * + * This function must be robust enough to be called twice. + */ +static int genwqe_stop(struct genwqe_dev *cd) +{ + genwqe_finish_queue(cd); /* no register access */ + genwqe_device_remove(cd); /* device removed, procs killed */ + genwqe_release_service_layer(cd); /* here genwqe_thread is stopped */ + + if (genwqe_is_privileged(cd)) { + pci_disable_sriov(cd->pci_dev); /* access pci config space */ + genwqe_ffdc_buffs_free(cd); + } + + return 0; +} + +/** + * genwqe_recover_card() - Try to recover the card if it is possible + * + * If fatal_err is set no register access is possible anymore. It is + * likely that genwqe_start fails in that situation. Proper error + * handling is required in this case. + * + * genwqe_bus_reset() will cause the pci code to call genwqe_remove() + * and later genwqe_probe() for all virtual functions. + */ +static int genwqe_recover_card(struct genwqe_dev *cd, int fatal_err) +{ + int rc; + struct pci_dev *pci_dev = cd->pci_dev; + + genwqe_stop(cd); + + /* + * Make sure chip is not reloaded to maintain FFDC. Write SLU + * Reset Register, CPLDReset field to 0. + */ + if (!fatal_err) { + cd->softreset = 0x70ull; + __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset); + } + + rc = genwqe_bus_reset(cd); + if (rc != 0) { + dev_err(&pci_dev->dev, + "[%s] err: card recovery impossible!\n", __func__); + return rc; + } + + rc = genwqe_start(cd); + if (rc < 0) { + dev_err(&pci_dev->dev, + "[%s] err: failed to launch device!\n", __func__); + return rc; + } + return 0; +} + +static int genwqe_health_check_cond(struct genwqe_dev *cd, u64 *gfir) +{ + *gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); + return (*gfir & GFIR_ERR_TRIGGER) && + genwqe_recovery_on_fatal_gfir_required(cd); +} + +/** + * genwqe_fir_checking() - Check the fault isolation registers of the card + * + * If this code works ok, can be tried out with help of the genwqe_poke tool: + * sudo ./tools/genwqe_poke 0x8 0xfefefefefef + * + * Now the relevant FIRs/sFIRs should be printed out and the driver should + * invoke recovery (devices are removed and readded). + */ +static u64 genwqe_fir_checking(struct genwqe_dev *cd) +{ + int j, iterations = 0; + u64 mask, fir, fec, uid, gfir, gfir_masked, sfir, sfec; + u32 fir_addr, fir_clr_addr, fec_addr, sfir_addr, sfec_addr; + struct pci_dev *pci_dev = cd->pci_dev; + + healthMonitor: + iterations++; + if (iterations > 16) { + dev_err(&pci_dev->dev, "* exit looping after %d times\n", + iterations); + goto fatal_error; + } + + gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); + if (gfir != 0x0) + dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", + IO_SLC_CFGREG_GFIR, gfir); + if (gfir == IO_ILLEGAL_VALUE) + goto fatal_error; + + /* + * Avoid printing when to GFIR bit is on prevents contignous + * printout e.g. for the following bug: + * FIR set without a 2ndary FIR/FIR cannot be cleared + * Comment out the following if to get the prints: + */ + if (gfir == 0) + return 0; + + gfir_masked = gfir & GFIR_ERR_TRIGGER; /* fatal errors */ + + for (uid = 0; uid < GENWQE_MAX_UNITS; uid++) { /* 0..2 in zEDC */ + + /* read the primary FIR (pfir) */ + fir_addr = (uid << 24) + 0x08; + fir = __genwqe_readq(cd, fir_addr); + if (fir == 0x0) + continue; /* no error in this unit */ + + dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fir_addr, fir); + if (fir == IO_ILLEGAL_VALUE) + goto fatal_error; + + /* read primary FEC */ + fec_addr = (uid << 24) + 0x18; + fec = __genwqe_readq(cd, fec_addr); + + dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fec_addr, fec); + if (fec == IO_ILLEGAL_VALUE) + goto fatal_error; + + for (j = 0, mask = 1ULL; j < 64; j++, mask <<= 1) { + + /* secondary fir empty, skip it */ + if ((fir & mask) == 0x0) + continue; + + sfir_addr = (uid << 24) + 0x100 + 0x08 * j; + sfir = __genwqe_readq(cd, sfir_addr); + + if (sfir == IO_ILLEGAL_VALUE) + goto fatal_error; + dev_err(&pci_dev->dev, + "* 0x%08x 0x%016llx\n", sfir_addr, sfir); + + sfec_addr = (uid << 24) + 0x300 + 0x08 * j; + sfec = __genwqe_readq(cd, sfec_addr); + + if (sfec == IO_ILLEGAL_VALUE) + goto fatal_error; + dev_err(&pci_dev->dev, + "* 0x%08x 0x%016llx\n", sfec_addr, sfec); + + gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); + if (gfir == IO_ILLEGAL_VALUE) + goto fatal_error; + + /* gfir turned on during routine! get out and + start over. */ + if ((gfir_masked == 0x0) && + (gfir & GFIR_ERR_TRIGGER)) { + goto healthMonitor; + } + + /* do not clear if we entered with a fatal gfir */ + if (gfir_masked == 0x0) { + + /* NEW clear by mask the logged bits */ + sfir_addr = (uid << 24) + 0x100 + 0x08 * j; + __genwqe_writeq(cd, sfir_addr, sfir); + + dev_dbg(&pci_dev->dev, + "[HM] Clearing 2ndary FIR 0x%08x with 0x%016llx\n", + sfir_addr, sfir); + + /* + * note, these cannot be error-Firs + * since gfir_masked is 0 after sfir + * was read. Also, it is safe to do + * this write if sfir=0. Still need to + * clear the primary. This just means + * there is no secondary FIR. + */ + + /* clear by mask the logged bit. */ + fir_clr_addr = (uid << 24) + 0x10; + __genwqe_writeq(cd, fir_clr_addr, mask); + + dev_dbg(&pci_dev->dev, + "[HM] Clearing primary FIR 0x%08x with 0x%016llx\n", + fir_clr_addr, mask); + } + } + } + gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); + if (gfir == IO_ILLEGAL_VALUE) + goto fatal_error; + + if ((gfir_masked == 0x0) && (gfir & GFIR_ERR_TRIGGER)) { + /* + * Check once more that it didn't go on after all the + * FIRS were cleared. + */ + dev_dbg(&pci_dev->dev, "ACK! Another FIR! Recursing %d!\n", + iterations); + goto healthMonitor; + } + return gfir_masked; + + fatal_error: + return IO_ILLEGAL_VALUE; +} + +/** + * genwqe_pci_fundamental_reset() - trigger a PCIe fundamental reset on the slot + * + * Note: pci_set_pcie_reset_state() is not implemented on all archs, so this + * reset method will not work in all cases. + * + * Return: 0 on success or error code from pci_set_pcie_reset_state() + */ +static int genwqe_pci_fundamental_reset(struct pci_dev *pci_dev) +{ + int rc; + + /* + * lock pci config space access from userspace, + * save state and issue PCIe fundamental reset + */ + pci_cfg_access_lock(pci_dev); + pci_save_state(pci_dev); + rc = pci_set_pcie_reset_state(pci_dev, pcie_warm_reset); + if (!rc) { + /* keep PCIe reset asserted for 250ms */ + msleep(250); + pci_set_pcie_reset_state(pci_dev, pcie_deassert_reset); + /* Wait for 2s to reload flash and train the link */ + msleep(2000); + } + pci_restore_state(pci_dev); + pci_cfg_access_unlock(pci_dev); + return rc; +} + + +static int genwqe_platform_recovery(struct genwqe_dev *cd) +{ + struct pci_dev *pci_dev = cd->pci_dev; + int rc; + + dev_info(&pci_dev->dev, + "[%s] resetting card for error recovery\n", __func__); + + /* Clear out error injection flags */ + cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE | + GENWQE_INJECT_GFIR_FATAL | + GENWQE_INJECT_GFIR_INFO); + + genwqe_stop(cd); + + /* Try recoverying the card with fundamental reset */ + rc = genwqe_pci_fundamental_reset(pci_dev); + if (!rc) { + rc = genwqe_start(cd); + if (!rc) + dev_info(&pci_dev->dev, + "[%s] card recovered\n", __func__); + else + dev_err(&pci_dev->dev, + "[%s] err: cannot start card services! (err=%d)\n", + __func__, rc); + } else { + dev_err(&pci_dev->dev, + "[%s] card reset failed\n", __func__); + } + + return rc; +} + +/* + * genwqe_reload_bistream() - reload card bitstream + * + * Set the appropriate register and call fundamental reset to reaload the card + * bitstream. + * + * Return: 0 on success, error code otherwise + */ +static int genwqe_reload_bistream(struct genwqe_dev *cd) +{ + struct pci_dev *pci_dev = cd->pci_dev; + int rc; + + dev_info(&pci_dev->dev, + "[%s] resetting card for bitstream reload\n", + __func__); + + genwqe_stop(cd); + + /* + * Cause a CPLD reprogram with the 'next_bitstream' + * partition on PCIe hot or fundamental reset + */ + __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, + (cd->softreset & 0xcull) | 0x70ull); + + rc = genwqe_pci_fundamental_reset(pci_dev); + if (rc) { + /* + * A fundamental reset failure can be caused + * by lack of support on the arch, so we just + * log the error and try to start the card + * again. + */ + dev_err(&pci_dev->dev, + "[%s] err: failed to reset card for bitstream reload\n", + __func__); + } + + rc = genwqe_start(cd); + if (rc) { + dev_err(&pci_dev->dev, + "[%s] err: cannot start card services! (err=%d)\n", + __func__, rc); + return rc; + } + dev_info(&pci_dev->dev, + "[%s] card reloaded\n", __func__); + return 0; +} + + +/** + * genwqe_health_thread() - Health checking thread + * + * This thread is only started for the PF of the card. + * + * This thread monitors the health of the card. A critical situation + * is when we read registers which contain -1 (IO_ILLEGAL_VALUE). In + * this case we need to be recovered from outside. Writing to + * registers will very likely not work either. + * + * This thread must only exit if kthread_should_stop() becomes true. + * + * Condition for the health-thread to trigger: + * a) when a kthread_stop() request comes in or + * b) a critical GFIR occured + * + * Informational GFIRs are checked and potentially printed in + * GENWQE_HEALTH_CHECK_INTERVAL seconds. + */ +static int genwqe_health_thread(void *data) +{ + int rc, should_stop = 0; + struct genwqe_dev *cd = data; + struct pci_dev *pci_dev = cd->pci_dev; + u64 gfir, gfir_masked, slu_unitcfg, app_unitcfg; + + health_thread_begin: + while (!kthread_should_stop()) { + rc = wait_event_interruptible_timeout(cd->health_waitq, + (genwqe_health_check_cond(cd, &gfir) || + (should_stop = kthread_should_stop())), + GENWQE_HEALTH_CHECK_INTERVAL * HZ); + + if (should_stop) + break; + + if (gfir == IO_ILLEGAL_VALUE) { + dev_err(&pci_dev->dev, + "[%s] GFIR=%016llx\n", __func__, gfir); + goto fatal_error; + } + + slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG); + if (slu_unitcfg == IO_ILLEGAL_VALUE) { + dev_err(&pci_dev->dev, + "[%s] SLU_UNITCFG=%016llx\n", + __func__, slu_unitcfg); + goto fatal_error; + } + + app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG); + if (app_unitcfg == IO_ILLEGAL_VALUE) { + dev_err(&pci_dev->dev, + "[%s] APP_UNITCFG=%016llx\n", + __func__, app_unitcfg); + goto fatal_error; + } + + gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); + if (gfir == IO_ILLEGAL_VALUE) { + dev_err(&pci_dev->dev, + "[%s] %s: GFIR=%016llx\n", __func__, + (gfir & GFIR_ERR_TRIGGER) ? "err" : "info", + gfir); + goto fatal_error; + } + + gfir_masked = genwqe_fir_checking(cd); + if (gfir_masked == IO_ILLEGAL_VALUE) + goto fatal_error; + + /* + * GFIR ErrorTrigger bits set => reset the card! + * Never do this for old/manufacturing images! + */ + if ((gfir_masked) && !cd->skip_recovery && + genwqe_recovery_on_fatal_gfir_required(cd)) { + + cd->card_state = GENWQE_CARD_FATAL_ERROR; + + rc = genwqe_recover_card(cd, 0); + if (rc < 0) { + /* FIXME Card is unusable and needs unbind! */ + goto fatal_error; + } + } + + if (cd->card_state == GENWQE_CARD_RELOAD_BITSTREAM) { + /* Userspace requested card bitstream reload */ + rc = genwqe_reload_bistream(cd); + if (rc) + goto fatal_error; + } + + cd->last_gfir = gfir; + cond_resched(); + } + + return 0; + + fatal_error: + if (cd->use_platform_recovery) { + /* + * Since we use raw accessors, EEH errors won't be detected + * by the platform until we do a non-raw MMIO or config space + * read + */ + readq(cd->mmio + IO_SLC_CFGREG_GFIR); + + /* We do nothing if the card is going over PCI recovery */ + if (pci_channel_offline(pci_dev)) + return -EIO; + + /* + * If it's supported by the platform, we try a fundamental reset + * to recover from a fatal error. Otherwise, we continue to wait + * for an external recovery procedure to take care of it. + */ + rc = genwqe_platform_recovery(cd); + if (!rc) + goto health_thread_begin; + } + + dev_err(&pci_dev->dev, + "[%s] card unusable. Please trigger unbind!\n", __func__); + + /* Bring down logical devices to inform user space via udev remove. */ + cd->card_state = GENWQE_CARD_FATAL_ERROR; + genwqe_stop(cd); + + /* genwqe_bus_reset failed(). Now wait for genwqe_remove(). */ + while (!kthread_should_stop()) + cond_resched(); + + return -EIO; +} + +static int genwqe_health_check_start(struct genwqe_dev *cd) +{ + int rc; + + if (GENWQE_HEALTH_CHECK_INTERVAL <= 0) + return 0; /* valid for disabling the service */ + + /* moved before request_irq() */ + /* init_waitqueue_head(&cd->health_waitq); */ + + cd->health_thread = kthread_run(genwqe_health_thread, cd, + GENWQE_DEVNAME "%d_health", + cd->card_idx); + if (IS_ERR(cd->health_thread)) { + rc = PTR_ERR(cd->health_thread); + cd->health_thread = NULL; + return rc; + } + return 0; +} + +static int genwqe_health_thread_running(struct genwqe_dev *cd) +{ + return cd->health_thread != NULL; +} + +static int genwqe_health_check_stop(struct genwqe_dev *cd) +{ + int rc; + + if (!genwqe_health_thread_running(cd)) + return -EIO; + + rc = kthread_stop(cd->health_thread); + cd->health_thread = NULL; + return 0; +} + +/** + * genwqe_pci_setup() - Allocate PCIe related resources for our card + */ +static int genwqe_pci_setup(struct genwqe_dev *cd) +{ + int err; + struct pci_dev *pci_dev = cd->pci_dev; + + err = pci_enable_device_mem(pci_dev); + if (err) { + dev_err(&pci_dev->dev, + "err: failed to enable pci memory (err=%d)\n", err); + goto err_out; + } + + /* Reserve PCI I/O and memory resources */ + err = pci_request_mem_regions(pci_dev, genwqe_driver_name); + if (err) { + dev_err(&pci_dev->dev, + "[%s] err: request bars failed (%d)\n", __func__, err); + err = -EIO; + goto err_disable_device; + } + + /* check for 64-bit DMA address supported (DAC) */ + if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(64))) { + err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pci_dev->dev, + "err: DMA64 consistent mask error\n"); + err = -EIO; + goto out_release_resources; + } + /* check for 32-bit DMA address supported (SAC) */ + } else if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) { + err = pci_set_consistent_dma_mask(pci_dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pci_dev->dev, + "err: DMA32 consistent mask error\n"); + err = -EIO; + goto out_release_resources; + } + } else { + dev_err(&pci_dev->dev, + "err: neither DMA32 nor DMA64 supported\n"); + err = -EIO; + goto out_release_resources; + } + + pci_set_master(pci_dev); + pci_enable_pcie_error_reporting(pci_dev); + + /* EEH recovery requires PCIe fundamental reset */ + pci_dev->needs_freset = 1; + + /* request complete BAR-0 space (length = 0) */ + cd->mmio_len = pci_resource_len(pci_dev, 0); + cd->mmio = pci_iomap(pci_dev, 0, 0); + if (cd->mmio == NULL) { + dev_err(&pci_dev->dev, + "[%s] err: mapping BAR0 failed\n", __func__); + err = -ENOMEM; + goto out_release_resources; + } + + cd->num_vfs = pci_sriov_get_totalvfs(pci_dev); + if (cd->num_vfs < 0) + cd->num_vfs = 0; + + err = genwqe_read_ids(cd); + if (err) + goto out_iounmap; + + return 0; + + out_iounmap: + pci_iounmap(pci_dev, cd->mmio); + out_release_resources: + pci_release_mem_regions(pci_dev); + err_disable_device: + pci_disable_device(pci_dev); + err_out: + return err; +} + +/** + * genwqe_pci_remove() - Free PCIe related resources for our card + */ +static void genwqe_pci_remove(struct genwqe_dev *cd) +{ + struct pci_dev *pci_dev = cd->pci_dev; + + if (cd->mmio) + pci_iounmap(pci_dev, cd->mmio); + + pci_release_mem_regions(pci_dev); + pci_disable_device(pci_dev); +} + +/** + * genwqe_probe() - Device initialization + * @pdev: PCI device information struct + * + * Callable for multiple cards. This function is called on bind. + * + * Return: 0 if succeeded, < 0 when failed + */ +static int genwqe_probe(struct pci_dev *pci_dev, + const struct pci_device_id *id) +{ + int err; + struct genwqe_dev *cd; + + genwqe_init_crc32(); + + cd = genwqe_dev_alloc(); + if (IS_ERR(cd)) { + dev_err(&pci_dev->dev, "err: could not alloc mem (err=%d)!\n", + (int)PTR_ERR(cd)); + return PTR_ERR(cd); + } + + dev_set_drvdata(&pci_dev->dev, cd); + cd->pci_dev = pci_dev; + + err = genwqe_pci_setup(cd); + if (err < 0) { + dev_err(&pci_dev->dev, + "err: problems with PCI setup (err=%d)\n", err); + goto out_free_dev; + } + + err = genwqe_start(cd); + if (err < 0) { + dev_err(&pci_dev->dev, + "err: cannot start card services! (err=%d)\n", err); + goto out_pci_remove; + } + + if (genwqe_is_privileged(cd)) { + err = genwqe_health_check_start(cd); + if (err < 0) { + dev_err(&pci_dev->dev, + "err: cannot start health checking! (err=%d)\n", + err); + goto out_stop_services; + } + } + return 0; + + out_stop_services: + genwqe_stop(cd); + out_pci_remove: + genwqe_pci_remove(cd); + out_free_dev: + genwqe_dev_free(cd); + return err; +} + +/** + * genwqe_remove() - Called when device is removed (hot-plugable) + * + * Or when driver is unloaded respecitively when unbind is done. + */ +static void genwqe_remove(struct pci_dev *pci_dev) +{ + struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); + + genwqe_health_check_stop(cd); + + /* + * genwqe_stop() must survive if it is called twice + * sequentially. This happens when the health thread calls it + * and fails on genwqe_bus_reset(). + */ + genwqe_stop(cd); + genwqe_pci_remove(cd); + genwqe_dev_free(cd); +} + +/* + * genwqe_err_error_detected() - Error detection callback + * + * This callback is called by the PCI subsystem whenever a PCI bus + * error is detected. + */ +static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev, + enum pci_channel_state state) +{ + struct genwqe_dev *cd; + + dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state); + + cd = dev_get_drvdata(&pci_dev->dev); + if (cd == NULL) + return PCI_ERS_RESULT_DISCONNECT; + + /* Stop the card */ + genwqe_health_check_stop(cd); + genwqe_stop(cd); + + /* + * On permanent failure, the PCI code will call device remove + * after the return of this function. + * genwqe_stop() can be called twice. + */ + if (state == pci_channel_io_perm_failure) { + return PCI_ERS_RESULT_DISCONNECT; + } else { + genwqe_pci_remove(cd); + return PCI_ERS_RESULT_NEED_RESET; + } +} + +static pci_ers_result_t genwqe_err_slot_reset(struct pci_dev *pci_dev) +{ + int rc; + struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); + + rc = genwqe_pci_setup(cd); + if (!rc) { + return PCI_ERS_RESULT_RECOVERED; + } else { + dev_err(&pci_dev->dev, + "err: problems with PCI setup (err=%d)\n", rc); + return PCI_ERS_RESULT_DISCONNECT; + } +} + +static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev) +{ + return PCI_ERS_RESULT_NONE; +} + +static void genwqe_err_resume(struct pci_dev *pci_dev) +{ + int rc; + struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); + + rc = genwqe_start(cd); + if (!rc) { + rc = genwqe_health_check_start(cd); + if (rc) + dev_err(&pci_dev->dev, + "err: cannot start health checking! (err=%d)\n", + rc); + } else { + dev_err(&pci_dev->dev, + "err: cannot start card services! (err=%d)\n", rc); + } +} + +static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs) +{ + int rc; + struct genwqe_dev *cd = dev_get_drvdata(&dev->dev); + + if (numvfs > 0) { + genwqe_setup_vf_jtimer(cd); + rc = pci_enable_sriov(dev, numvfs); + if (rc < 0) + return rc; + return numvfs; + } + if (numvfs == 0) { + pci_disable_sriov(dev); + return 0; + } + return 0; +} + +static struct pci_error_handlers genwqe_err_handler = { + .error_detected = genwqe_err_error_detected, + .mmio_enabled = genwqe_err_result_none, + .slot_reset = genwqe_err_slot_reset, + .resume = genwqe_err_resume, +}; + +static struct pci_driver genwqe_driver = { + .name = genwqe_driver_name, + .id_table = genwqe_device_table, + .probe = genwqe_probe, + .remove = genwqe_remove, + .sriov_configure = genwqe_sriov_configure, + .err_handler = &genwqe_err_handler, +}; + +/** + * genwqe_devnode() - Set default access mode for genwqe devices. + * + * Default mode should be rw for everybody. Do not change default + * device name. + */ +static char *genwqe_devnode(struct device *dev, umode_t *mode) +{ + if (mode) + *mode = 0666; + return NULL; +} + +/** + * genwqe_init_module() - Driver registration and initialization + */ +static int __init genwqe_init_module(void) +{ + int rc; + + class_genwqe = class_create(THIS_MODULE, GENWQE_DEVNAME); + if (IS_ERR(class_genwqe)) { + pr_err("[%s] create class failed\n", __func__); + return -ENOMEM; + } + + class_genwqe->devnode = genwqe_devnode; + + debugfs_genwqe = debugfs_create_dir(GENWQE_DEVNAME, NULL); + if (!debugfs_genwqe) { + rc = -ENOMEM; + goto err_out; + } + + rc = pci_register_driver(&genwqe_driver); + if (rc != 0) { + pr_err("[%s] pci_reg_driver (rc=%d)\n", __func__, rc); + goto err_out0; + } + + return rc; + + err_out0: + debugfs_remove(debugfs_genwqe); + err_out: + class_destroy(class_genwqe); + return rc; +} + +/** + * genwqe_exit_module() - Driver exit + */ +static void __exit genwqe_exit_module(void) +{ + pci_unregister_driver(&genwqe_driver); + debugfs_remove(debugfs_genwqe); + class_destroy(class_genwqe); +} + +module_init(genwqe_init_module); +module_exit(genwqe_exit_module); diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h new file mode 100644 index 000000000..77ed3967c --- /dev/null +++ b/drivers/misc/genwqe/card_base.h @@ -0,0 +1,585 @@ +#ifndef __CARD_BASE_H__ +#define __CARD_BASE_H__ + +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp + * Author: Joerg-Stephan Vogt + * Author: Michael Jung + * Author: Michael Ruettger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * Interfaces within the GenWQE module. Defines genwqe_card and + * ddcb_queue as well as ddcb_requ. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "genwqe_driver.h" + +#define GENWQE_MSI_IRQS 4 /* Just one supported, no MSIx */ + +#define GENWQE_MAX_VFS 15 /* maximum 15 VFs are possible */ +#define GENWQE_MAX_FUNCS 16 /* 1 PF and 15 VFs */ +#define GENWQE_CARD_NO_MAX (16 * GENWQE_MAX_FUNCS) + +/* Compile parameters, some of them appear in debugfs for later adjustment */ +#define GENWQE_DDCB_MAX 32 /* DDCBs on the work-queue */ +#define GENWQE_POLLING_ENABLED 0 /* in case of irqs not working */ +#define GENWQE_DDCB_SOFTWARE_TIMEOUT 10 /* timeout per DDCB in seconds */ +#define GENWQE_KILL_TIMEOUT 8 /* time until process gets killed */ +#define GENWQE_VF_JOBTIMEOUT_MSEC 250 /* 250 msec */ +#define GENWQE_PF_JOBTIMEOUT_MSEC 8000 /* 8 sec should be ok */ +#define GENWQE_HEALTH_CHECK_INTERVAL 4 /* <= 0: disabled */ + +/* Sysfs attribute groups used when we create the genwqe device */ +extern const struct attribute_group *genwqe_attribute_groups[]; + +/* + * Config space for Genwqe5 A7: + * 00:[14 10 4b 04]40 00 10 00[00 00 00 12]00 00 00 00 + * 10: 0c 00 00 f0 07 3c 00 00 00 00 00 00 00 00 00 00 + * 20: 00 00 00 00 00 00 00 00 00 00 00 00[14 10 4b 04] + * 30: 00 00 00 00 50 00 00 00 00 00 00 00 00 00 00 00 + */ +#define PCI_DEVICE_GENWQE 0x044b /* Genwqe DeviceID */ + +#define PCI_SUBSYSTEM_ID_GENWQE5 0x035f /* Genwqe A5 Subsystem-ID */ +#define PCI_SUBSYSTEM_ID_GENWQE5_NEW 0x044b /* Genwqe A5 Subsystem-ID */ +#define PCI_CLASSCODE_GENWQE5 0x1200 /* UNKNOWN */ + +#define PCI_SUBVENDOR_ID_IBM_SRIOV 0x0000 +#define PCI_SUBSYSTEM_ID_GENWQE5_SRIOV 0x0000 /* Genwqe A5 Subsystem-ID */ +#define PCI_CLASSCODE_GENWQE5_SRIOV 0x1200 /* UNKNOWN */ + +#define GENWQE_SLU_ARCH_REQ 2 /* Required SLU architecture level */ + +/** + * struct genwqe_reg - Genwqe data dump functionality + */ +struct genwqe_reg { + u32 addr; + u32 idx; + u64 val; +}; + +/* + * enum genwqe_dbg_type - Specify chip unit to dump/debug + */ +enum genwqe_dbg_type { + GENWQE_DBG_UNIT0 = 0, /* captured before prev errs cleared */ + GENWQE_DBG_UNIT1 = 1, + GENWQE_DBG_UNIT2 = 2, + GENWQE_DBG_UNIT3 = 3, + GENWQE_DBG_UNIT4 = 4, + GENWQE_DBG_UNIT5 = 5, + GENWQE_DBG_UNIT6 = 6, + GENWQE_DBG_UNIT7 = 7, + GENWQE_DBG_REGS = 8, + GENWQE_DBG_DMA = 9, + GENWQE_DBG_UNITS = 10, /* max number of possible debug units */ +}; + +/* Software error injection to simulate card failures */ +#define GENWQE_INJECT_HARDWARE_FAILURE 0x00000001 /* injects -1 reg reads */ +#define GENWQE_INJECT_BUS_RESET_FAILURE 0x00000002 /* pci_bus_reset fail */ +#define GENWQE_INJECT_GFIR_FATAL 0x00000004 /* GFIR = 0x0000ffff */ +#define GENWQE_INJECT_GFIR_INFO 0x00000008 /* GFIR = 0xffff0000 */ + +/* + * Genwqe card description and management data. + * + * Error-handling in case of card malfunction + * ------------------------------------------ + * + * If the card is detected to be defective the outside environment + * will cause the PCI layer to call deinit (the cleanup function for + * probe). This is the same effect like doing a unbind/bind operation + * on the card. + * + * The genwqe card driver implements a health checking thread which + * verifies the card function. If this detects a problem the cards + * device is being shutdown and restarted again, along with a reset of + * the card and queue. + * + * All functions accessing the card device return either -EIO or -ENODEV + * code to indicate the malfunction to the user. The user has to close + * the file descriptor and open a new one, once the card becomes + * available again. + * + * If the open file descriptor is setup to receive SIGIO, the signal is + * genereated for the application which has to provide a handler to + * react on it. If the application does not close the open + * file descriptor a SIGKILL is send to enforce freeing the cards + * resources. + * + * I did not find a different way to prevent kernel problems due to + * reference counters for the cards character devices getting out of + * sync. The character device deallocation does not block, even if + * there is still an open file descriptor pending. If this pending + * descriptor is closed, the data structures used by the character + * device is reinstantiated, which will lead to the reference counter + * dropping below the allowed values. + * + * Card recovery + * ------------- + * + * To test the internal driver recovery the following command can be used: + * sudo sh -c 'echo 0xfffff > /sys/class/genwqe/genwqe0_card/err_inject' + */ + + +/** + * struct dma_mapping_type - Mapping type definition + * + * To avoid memcpying data arround we use user memory directly. To do + * this we need to pin/swap-in the memory and request a DMA address + * for it. + */ +enum dma_mapping_type { + GENWQE_MAPPING_RAW = 0, /* contignous memory buffer */ + GENWQE_MAPPING_SGL_TEMP, /* sglist dynamically used */ + GENWQE_MAPPING_SGL_PINNED, /* sglist used with pinning */ +}; + +/** + * struct dma_mapping - Information about memory mappings done by the driver + */ +struct dma_mapping { + enum dma_mapping_type type; + + void *u_vaddr; /* user-space vaddr/non-aligned */ + void *k_vaddr; /* kernel-space vaddr/non-aligned */ + dma_addr_t dma_addr; /* physical DMA address */ + + struct page **page_list; /* list of pages used by user buff */ + dma_addr_t *dma_list; /* list of dma addresses per page */ + unsigned int nr_pages; /* number of pages */ + unsigned int size; /* size in bytes */ + + struct list_head card_list; /* list of usr_maps for card */ + struct list_head pin_list; /* list of pinned memory for dev */ + int write; /* writable map? useful in unmapping */ +}; + +static inline void genwqe_mapping_init(struct dma_mapping *m, + enum dma_mapping_type type) +{ + memset(m, 0, sizeof(*m)); + m->type = type; + m->write = 1; /* Assume the maps we create are R/W */ +} + +/** + * struct ddcb_queue - DDCB queue data + * @ddcb_max: Number of DDCBs on the queue + * @ddcb_next: Next free DDCB + * @ddcb_act: Next DDCB supposed to finish + * @ddcb_seq: Sequence number of last DDCB + * @ddcbs_in_flight: Currently enqueued DDCBs + * @ddcbs_completed: Number of already completed DDCBs + * @return_on_busy: Number of -EBUSY returns on full queue + * @wait_on_busy: Number of waits on full queue + * @ddcb_daddr: DMA address of first DDCB in the queue + * @ddcb_vaddr: Kernel virtual address of first DDCB in the queue + * @ddcb_req: Associated requests (one per DDCB) + * @ddcb_waitqs: Associated wait queues (one per DDCB) + * @ddcb_lock: Lock to protect queuing operations + * @ddcb_waitq: Wait on next DDCB finishing + */ + +struct ddcb_queue { + int ddcb_max; /* amount of DDCBs */ + int ddcb_next; /* next available DDCB num */ + int ddcb_act; /* DDCB to be processed */ + u16 ddcb_seq; /* slc seq num */ + unsigned int ddcbs_in_flight; /* number of ddcbs in processing */ + unsigned int ddcbs_completed; + unsigned int ddcbs_max_in_flight; + unsigned int return_on_busy; /* how many times -EBUSY? */ + unsigned int wait_on_busy; + + dma_addr_t ddcb_daddr; /* DMA address */ + struct ddcb *ddcb_vaddr; /* kernel virtual addr for DDCBs */ + struct ddcb_requ **ddcb_req; /* ddcb processing parameter */ + wait_queue_head_t *ddcb_waitqs; /* waitqueue per ddcb */ + + spinlock_t ddcb_lock; /* exclusive access to queue */ + wait_queue_head_t busy_waitq; /* wait for ddcb processing */ + + /* registers or the respective queue to be used */ + u32 IO_QUEUE_CONFIG; + u32 IO_QUEUE_STATUS; + u32 IO_QUEUE_SEGMENT; + u32 IO_QUEUE_INITSQN; + u32 IO_QUEUE_WRAP; + u32 IO_QUEUE_OFFSET; + u32 IO_QUEUE_WTIME; + u32 IO_QUEUE_ERRCNTS; + u32 IO_QUEUE_LRW; +}; + +/* + * GFIR, SLU_UNITCFG, APP_UNITCFG + * 8 Units with FIR/FEC + 64 * 2ndary FIRS/FEC. + */ +#define GENWQE_FFDC_REGS (3 + (8 * (2 + 2 * 64))) + +struct genwqe_ffdc { + unsigned int entries; + struct genwqe_reg *regs; +}; + +/** + * struct genwqe_dev - GenWQE device information + * @card_state: Card operation state, see above + * @ffdc: First Failure Data Capture buffers for each unit + * @card_thread: Working thread to operate the DDCB queue + * @card_waitq: Wait queue used in card_thread + * @queue: DDCB queue + * @health_thread: Card monitoring thread (only for PFs) + * @health_waitq: Wait queue used in health_thread + * @pci_dev: Associated PCI device (function) + * @mmio: Base address of 64-bit register space + * @mmio_len: Length of register area + * @file_lock: Lock to protect access to file_list + * @file_list: List of all processes with open GenWQE file descriptors + * + * This struct contains all information needed to communicate with a + * GenWQE card. It is initialized when a GenWQE device is found and + * destroyed when it goes away. It holds data to maintain the queue as + * well as data needed to feed the user interfaces. + */ +struct genwqe_dev { + enum genwqe_card_state card_state; + spinlock_t print_lock; + + int card_idx; /* card index 0..CARD_NO_MAX-1 */ + u64 flags; /* general flags */ + + /* FFDC data gathering */ + struct genwqe_ffdc ffdc[GENWQE_DBG_UNITS]; + + /* DDCB workqueue */ + struct task_struct *card_thread; + wait_queue_head_t queue_waitq; + struct ddcb_queue queue; /* genwqe DDCB queue */ + unsigned int irqs_processed; + + /* Card health checking thread */ + struct task_struct *health_thread; + wait_queue_head_t health_waitq; + + int use_platform_recovery; /* use platform recovery mechanisms */ + + /* char device */ + dev_t devnum_genwqe; /* major/minor num card */ + struct class *class_genwqe; /* reference to class object */ + struct device *dev; /* for device creation */ + struct cdev cdev_genwqe; /* char device for card */ + + struct dentry *debugfs_root; /* debugfs card root directory */ + struct dentry *debugfs_genwqe; /* debugfs driver root directory */ + + /* pci resources */ + struct pci_dev *pci_dev; /* PCI device */ + void __iomem *mmio; /* BAR-0 MMIO start */ + unsigned long mmio_len; + int num_vfs; + u32 vf_jobtimeout_msec[GENWQE_MAX_VFS]; + int is_privileged; /* access to all regs possible */ + + /* config regs which we need often */ + u64 slu_unitcfg; + u64 app_unitcfg; + u64 softreset; + u64 err_inject; + u64 last_gfir; + char app_name[5]; + + spinlock_t file_lock; /* lock for open files */ + struct list_head file_list; /* list of open files */ + + /* debugfs parameters */ + int ddcb_software_timeout; /* wait until DDCB times out */ + int skip_recovery; /* circumvention if recovery fails */ + int kill_timeout; /* wait after sending SIGKILL */ +}; + +/** + * enum genwqe_requ_state - State of a DDCB execution request + */ +enum genwqe_requ_state { + GENWQE_REQU_NEW = 0, + GENWQE_REQU_ENQUEUED = 1, + GENWQE_REQU_TAPPED = 2, + GENWQE_REQU_FINISHED = 3, + GENWQE_REQU_STATE_MAX, +}; + +/** + * struct genwqe_sgl - Scatter gather list describing user-space memory + * @sgl: scatter gather list needs to be 128 byte aligned + * @sgl_dma_addr: dma address of sgl + * @sgl_size: size of area used for sgl + * @user_addr: user-space address of memory area + * @user_size: size of user-space memory area + * @page: buffer for partial pages if needed + * @page_dma_addr: dma address partial pages + * @write: should we write it back to userspace? + */ +struct genwqe_sgl { + dma_addr_t sgl_dma_addr; + struct sg_entry *sgl; + size_t sgl_size; /* size of sgl */ + + void __user *user_addr; /* user-space base-address */ + size_t user_size; /* size of memory area */ + + int write; + + unsigned long nr_pages; + unsigned long fpage_offs; + size_t fpage_size; + size_t lpage_size; + + void *fpage; + dma_addr_t fpage_dma_addr; + + void *lpage; + dma_addr_t lpage_dma_addr; +}; + +int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, + void __user *user_addr, size_t user_size, int write); + +int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, + dma_addr_t *dma_list); + +int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl); + +/** + * struct ddcb_requ - Kernel internal representation of the DDCB request + * @cmd: User space representation of the DDCB execution request + */ +struct ddcb_requ { + /* kernel specific content */ + enum genwqe_requ_state req_state; /* request status */ + int num; /* ddcb_no for this request */ + struct ddcb_queue *queue; /* associated queue */ + + struct dma_mapping dma_mappings[DDCB_FIXUPS]; + struct genwqe_sgl sgls[DDCB_FIXUPS]; + + /* kernel/user shared content */ + struct genwqe_ddcb_cmd cmd; /* ddcb_no for this request */ + struct genwqe_debug_data debug_data; +}; + +/** + * struct genwqe_file - Information for open GenWQE devices + */ +struct genwqe_file { + struct genwqe_dev *cd; + struct genwqe_driver *client; + struct file *filp; + + struct fasync_struct *async_queue; + struct pid *opener; + struct list_head list; /* entry in list of open files */ + + spinlock_t map_lock; /* lock for dma_mappings */ + struct list_head map_list; /* list of dma_mappings */ + + spinlock_t pin_lock; /* lock for pinned memory */ + struct list_head pin_list; /* list of pinned memory */ +}; + +int genwqe_setup_service_layer(struct genwqe_dev *cd); /* for PF only */ +int genwqe_finish_queue(struct genwqe_dev *cd); +int genwqe_release_service_layer(struct genwqe_dev *cd); + +/** + * genwqe_get_slu_id() - Read Service Layer Unit Id + * Return: 0x00: Development code + * 0x01: SLC1 (old) + * 0x02: SLC2 (sept2012) + * 0x03: SLC2 (feb2013, generic driver) + */ +static inline int genwqe_get_slu_id(struct genwqe_dev *cd) +{ + return (int)((cd->slu_unitcfg >> 32) & 0xff); +} + +int genwqe_ddcbs_in_flight(struct genwqe_dev *cd); + +u8 genwqe_card_type(struct genwqe_dev *cd); +int genwqe_card_reset(struct genwqe_dev *cd); +int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count); +void genwqe_reset_interrupt_capability(struct genwqe_dev *cd); + +int genwqe_device_create(struct genwqe_dev *cd); +int genwqe_device_remove(struct genwqe_dev *cd); + +/* debugfs */ +int genwqe_init_debugfs(struct genwqe_dev *cd); +void genqwe_exit_debugfs(struct genwqe_dev *cd); + +int genwqe_read_softreset(struct genwqe_dev *cd); + +/* Hardware Circumventions */ +int genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd); +int genwqe_flash_readback_fails(struct genwqe_dev *cd); + +/** + * genwqe_write_vreg() - Write register in VF window + * @cd: genwqe device + * @reg: register address + * @val: value to write + * @func: 0: PF, 1: VF0, ..., 15: VF14 + */ +int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func); + +/** + * genwqe_read_vreg() - Read register in VF window + * @cd: genwqe device + * @reg: register address + * @func: 0: PF, 1: VF0, ..., 15: VF14 + * + * Return: content of the register + */ +u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func); + +/* FFDC Buffer Management */ +int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int unit_id); +int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int unit_id, + struct genwqe_reg *regs, unsigned int max_regs); +int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs, + unsigned int max_regs, int all); +int genwqe_ffdc_dump_dma(struct genwqe_dev *cd, + struct genwqe_reg *regs, unsigned int max_regs); + +int genwqe_init_debug_data(struct genwqe_dev *cd, + struct genwqe_debug_data *d); + +void genwqe_init_crc32(void); +int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len); + +/* Memory allocation/deallocation; dma address handling */ +int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, + void *uaddr, unsigned long size); + +int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m); + +static inline bool dma_mapping_used(struct dma_mapping *m) +{ + if (!m) + return false; + return m->size != 0; +} + +/** + * __genwqe_execute_ddcb() - Execute DDCB request with addr translation + * + * This function will do the address translation changes to the DDCBs + * according to the definitions required by the ATS field. It looks up + * the memory allocation buffer or does vmap/vunmap for the respective + * user-space buffers, inclusive page pinning and scatter gather list + * buildup and teardown. + */ +int __genwqe_execute_ddcb(struct genwqe_dev *cd, + struct genwqe_ddcb_cmd *cmd, unsigned int f_flags); + +/** + * __genwqe_execute_raw_ddcb() - Execute DDCB request without addr translation + * + * This version will not do address translation or any modification of + * the DDCB data. It is used e.g. for the MoveFlash DDCB which is + * entirely prepared by the driver itself. That means the appropriate + * DMA addresses are already in the DDCB and do not need any + * modification. + */ +int __genwqe_execute_raw_ddcb(struct genwqe_dev *cd, + struct genwqe_ddcb_cmd *cmd, + unsigned int f_flags); +int __genwqe_enqueue_ddcb(struct genwqe_dev *cd, + struct ddcb_requ *req, + unsigned int f_flags); + +int __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req); +int __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req); + +/* register access */ +int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val); +u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs); +int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val); +u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs); + +void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, + dma_addr_t *dma_handle); +void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size, + void *vaddr, dma_addr_t dma_handle); + +/* Base clock frequency in MHz */ +int genwqe_base_clock_frequency(struct genwqe_dev *cd); + +/* Before FFDC is captured the traps should be stopped. */ +void genwqe_stop_traps(struct genwqe_dev *cd); +void genwqe_start_traps(struct genwqe_dev *cd); + +/* Hardware circumvention */ +bool genwqe_need_err_masking(struct genwqe_dev *cd); + +/** + * genwqe_is_privileged() - Determine operation mode for PCI function + * + * On Intel with SRIOV support we see: + * PF: is_physfn = 1 is_virtfn = 0 + * VF: is_physfn = 0 is_virtfn = 1 + * + * On Systems with no SRIOV support _and_ virtualized systems we get: + * is_physfn = 0 is_virtfn = 0 + * + * Other vendors have individual pci device ids to distinguish between + * virtual function drivers and physical function drivers. GenWQE + * unfortunately has just on pci device id for both, VFs and PF. + * + * The following code is used to distinguish if the card is running in + * privileged mode, either as true PF or in a virtualized system with + * full register access e.g. currently on PowerPC. + * + * if (pci_dev->is_virtfn) + * cd->is_privileged = 0; + * else + * cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM) + * != IO_ILLEGAL_VALUE); + */ +static inline int genwqe_is_privileged(struct genwqe_dev *cd) +{ + return cd->is_privileged; +} + +#endif /* __CARD_BASE_H__ */ diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c new file mode 100644 index 000000000..656449cb4 --- /dev/null +++ b/drivers/misc/genwqe/card_ddcb.c @@ -0,0 +1,1410 @@ +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp + * Author: Joerg-Stephan Vogt + * Author: Michael Jung + * Author: Michael Ruettger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * Device Driver Control Block (DDCB) queue support. Definition of + * interrupt handlers for queue support as well as triggering the + * health monitor code in case of problems. The current hardware uses + * an MSI interrupt which is shared between error handling and + * functional code. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "card_base.h" +#include "card_ddcb.h" + +/* + * N: next DDCB, this is where the next DDCB will be put. + * A: active DDCB, this is where the code will look for the next completion. + * x: DDCB is enqueued, we are waiting for its completion. + + * Situation (1): Empty queue + * +---+---+---+---+---+---+---+---+ + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + * | | | | | | | | | + * +---+---+---+---+---+---+---+---+ + * A/N + * enqueued_ddcbs = A - N = 2 - 2 = 0 + * + * Situation (2): Wrapped, N > A + * +---+---+---+---+---+---+---+---+ + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + * | | | x | x | | | | | + * +---+---+---+---+---+---+---+---+ + * A N + * enqueued_ddcbs = N - A = 4 - 2 = 2 + * + * Situation (3): Queue wrapped, A > N + * +---+---+---+---+---+---+---+---+ + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + * | x | x | | | x | x | x | x | + * +---+---+---+---+---+---+---+---+ + * N A + * enqueued_ddcbs = queue_max - (A - N) = 8 - (4 - 2) = 6 + * + * Situation (4a): Queue full N > A + * +---+---+---+---+---+---+---+---+ + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + * | x | x | x | x | x | x | x | | + * +---+---+---+---+---+---+---+---+ + * A N + * + * enqueued_ddcbs = N - A = 7 - 0 = 7 + * + * Situation (4a): Queue full A > N + * +---+---+---+---+---+---+---+---+ + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | + * | x | x | x | | x | x | x | x | + * +---+---+---+---+---+---+---+---+ + * N A + * enqueued_ddcbs = queue_max - (A - N) = 8 - (4 - 3) = 7 + */ + +static int queue_empty(struct ddcb_queue *queue) +{ + return queue->ddcb_next == queue->ddcb_act; +} + +static int queue_enqueued_ddcbs(struct ddcb_queue *queue) +{ + if (queue->ddcb_next >= queue->ddcb_act) + return queue->ddcb_next - queue->ddcb_act; + + return queue->ddcb_max - (queue->ddcb_act - queue->ddcb_next); +} + +static int queue_free_ddcbs(struct ddcb_queue *queue) +{ + int free_ddcbs = queue->ddcb_max - queue_enqueued_ddcbs(queue) - 1; + + if (WARN_ON_ONCE(free_ddcbs < 0)) { /* must never ever happen! */ + return 0; + } + return free_ddcbs; +} + +/* + * Use of the PRIV field in the DDCB for queue debugging: + * + * (1) Trying to get rid of a DDCB which saw a timeout: + * pddcb->priv[6] = 0xcc; # cleared + * + * (2) Append a DDCB via NEXT bit: + * pddcb->priv[7] = 0xaa; # appended + * + * (3) DDCB needed tapping: + * pddcb->priv[7] = 0xbb; # tapped + * + * (4) DDCB marked as correctly finished: + * pddcb->priv[6] = 0xff; # finished + */ + +static inline void ddcb_mark_tapped(struct ddcb *pddcb) +{ + pddcb->priv[7] = 0xbb; /* tapped */ +} + +static inline void ddcb_mark_appended(struct ddcb *pddcb) +{ + pddcb->priv[7] = 0xaa; /* appended */ +} + +static inline void ddcb_mark_cleared(struct ddcb *pddcb) +{ + pddcb->priv[6] = 0xcc; /* cleared */ +} + +static inline void ddcb_mark_finished(struct ddcb *pddcb) +{ + pddcb->priv[6] = 0xff; /* finished */ +} + +static inline void ddcb_mark_unused(struct ddcb *pddcb) +{ + pddcb->priv_64 = cpu_to_be64(0); /* not tapped */ +} + +/** + * genwqe_crc16() - Generate 16-bit crc as required for DDCBs + * @buff: pointer to data buffer + * @len: length of data for calculation + * @init: initial crc (0xffff at start) + * + * Polynomial = x^16 + x^12 + x^5 + 1 (0x1021) + * Example: 4 bytes 0x01 0x02 0x03 0x04 with init = 0xffff + * should result in a crc16 of 0x89c3 + * + * Return: crc16 checksum in big endian format ! + */ +static inline u16 genwqe_crc16(const u8 *buff, size_t len, u16 init) +{ + return crc_itu_t(init, buff, len); +} + +static void print_ddcb_info(struct genwqe_dev *cd, struct ddcb_queue *queue) +{ + int i; + struct ddcb *pddcb; + unsigned long flags; + struct pci_dev *pci_dev = cd->pci_dev; + + spin_lock_irqsave(&cd->print_lock, flags); + + dev_info(&pci_dev->dev, + "DDCB list for card #%d (ddcb_act=%d / ddcb_next=%d):\n", + cd->card_idx, queue->ddcb_act, queue->ddcb_next); + + pddcb = queue->ddcb_vaddr; + for (i = 0; i < queue->ddcb_max; i++) { + dev_err(&pci_dev->dev, + " %c %-3d: RETC=%03x SEQ=%04x HSI=%02X SHI=%02x PRIV=%06llx CMD=%03x\n", + i == queue->ddcb_act ? '>' : ' ', + i, + be16_to_cpu(pddcb->retc_16), + be16_to_cpu(pddcb->seqnum_16), + pddcb->hsi, + pddcb->shi, + be64_to_cpu(pddcb->priv_64), + pddcb->cmd); + pddcb++; + } + spin_unlock_irqrestore(&cd->print_lock, flags); +} + +struct genwqe_ddcb_cmd *ddcb_requ_alloc(void) +{ + struct ddcb_requ *req; + + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return NULL; + + return &req->cmd; +} + +void ddcb_requ_free(struct genwqe_ddcb_cmd *cmd) +{ + struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd); + + kfree(req); +} + +static inline enum genwqe_requ_state ddcb_requ_get_state(struct ddcb_requ *req) +{ + return req->req_state; +} + +static inline void ddcb_requ_set_state(struct ddcb_requ *req, + enum genwqe_requ_state new_state) +{ + req->req_state = new_state; +} + +static inline int ddcb_requ_collect_debug_data(struct ddcb_requ *req) +{ + return req->cmd.ddata_addr != 0x0; +} + +/** + * ddcb_requ_finished() - Returns the hardware state of the associated DDCB + * @cd: pointer to genwqe device descriptor + * @req: DDCB work request + * + * Status of ddcb_requ mirrors this hardware state, but is copied in + * the ddcb_requ on interrupt/polling function. The lowlevel code + * should check the hardware state directly, the higher level code + * should check the copy. + * + * This function will also return true if the state of the queue is + * not GENWQE_CARD_USED. This enables us to purge all DDCBs in the + * shutdown case. + */ +static int ddcb_requ_finished(struct genwqe_dev *cd, struct ddcb_requ *req) +{ + return (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED) || + (cd->card_state != GENWQE_CARD_USED); +} + +/** + * enqueue_ddcb() - Enqueue a DDCB + * @cd: pointer to genwqe device descriptor + * @queue: queue this operation should be done on + * @ddcb_no: pointer to ddcb number being tapped + * + * Start execution of DDCB by tapping or append to queue via NEXT + * bit. This is done by an atomic 'compare and swap' instruction and + * checking SHI and HSI of the previous DDCB. + * + * This function must only be called with ddcb_lock held. + * + * Return: 1 if new DDCB is appended to previous + * 2 if DDCB queue is tapped via register/simulation + */ +#define RET_DDCB_APPENDED 1 +#define RET_DDCB_TAPPED 2 + +static int enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_queue *queue, + struct ddcb *pddcb, int ddcb_no) +{ + unsigned int try; + int prev_no; + struct ddcb *prev_ddcb; + __be32 old, new, icrc_hsi_shi; + u64 num; + + /* + * For performance checks a Dispatch Timestamp can be put into + * DDCB It is supposed to use the SLU's free running counter, + * but this requires PCIe cycles. + */ + ddcb_mark_unused(pddcb); + + /* check previous DDCB if already fetched */ + prev_no = (ddcb_no == 0) ? queue->ddcb_max - 1 : ddcb_no - 1; + prev_ddcb = &queue->ddcb_vaddr[prev_no]; + + /* + * It might have happened that the HSI.FETCHED bit is + * set. Retry in this case. Therefore I expect maximum 2 times + * trying. + */ + ddcb_mark_appended(pddcb); + for (try = 0; try < 2; try++) { + old = prev_ddcb->icrc_hsi_shi_32; /* read SHI/HSI in BE32 */ + + /* try to append via NEXT bit if prev DDCB is not completed */ + if ((old & DDCB_COMPLETED_BE32) != 0x00000000) + break; + + new = (old | DDCB_NEXT_BE32); + + wmb(); /* need to ensure write ordering */ + icrc_hsi_shi = cmpxchg(&prev_ddcb->icrc_hsi_shi_32, old, new); + + if (icrc_hsi_shi == old) + return RET_DDCB_APPENDED; /* appended to queue */ + } + + /* Queue must be re-started by updating QUEUE_OFFSET */ + ddcb_mark_tapped(pddcb); + num = (u64)ddcb_no << 8; + + wmb(); /* need to ensure write ordering */ + __genwqe_writeq(cd, queue->IO_QUEUE_OFFSET, num); /* start queue */ + + return RET_DDCB_TAPPED; +} + +/** + * copy_ddcb_results() - Copy output state from real DDCB to request + * + * Copy DDCB ASV to request struct. There is no endian + * conversion made, since data structure in ASV is still + * unknown here. + * + * This is needed by: + * - genwqe_purge_ddcb() + * - genwqe_check_ddcb_queue() + */ +static void copy_ddcb_results(struct ddcb_requ *req, int ddcb_no) +{ + struct ddcb_queue *queue = req->queue; + struct ddcb *pddcb = &queue->ddcb_vaddr[req->num]; + + memcpy(&req->cmd.asv[0], &pddcb->asv[0], DDCB_ASV_LENGTH); + + /* copy status flags of the variant part */ + req->cmd.vcrc = be16_to_cpu(pddcb->vcrc_16); + req->cmd.deque_ts = be64_to_cpu(pddcb->deque_ts_64); + req->cmd.cmplt_ts = be64_to_cpu(pddcb->cmplt_ts_64); + + req->cmd.attn = be16_to_cpu(pddcb->attn_16); + req->cmd.progress = be32_to_cpu(pddcb->progress_32); + req->cmd.retc = be16_to_cpu(pddcb->retc_16); + + if (ddcb_requ_collect_debug_data(req)) { + int prev_no = (ddcb_no == 0) ? + queue->ddcb_max - 1 : ddcb_no - 1; + struct ddcb *prev_pddcb = &queue->ddcb_vaddr[prev_no]; + + memcpy(&req->debug_data.ddcb_finished, pddcb, + sizeof(req->debug_data.ddcb_finished)); + memcpy(&req->debug_data.ddcb_prev, prev_pddcb, + sizeof(req->debug_data.ddcb_prev)); + } +} + +/** + * genwqe_check_ddcb_queue() - Checks DDCB queue for completed work equests. + * @cd: pointer to genwqe device descriptor + * + * Return: Number of DDCBs which were finished + */ +static int genwqe_check_ddcb_queue(struct genwqe_dev *cd, + struct ddcb_queue *queue) +{ + unsigned long flags; + int ddcbs_finished = 0; + struct pci_dev *pci_dev = cd->pci_dev; + + spin_lock_irqsave(&queue->ddcb_lock, flags); + + /* FIXME avoid soft locking CPU */ + while (!queue_empty(queue) && (ddcbs_finished < queue->ddcb_max)) { + + struct ddcb *pddcb; + struct ddcb_requ *req; + u16 vcrc, vcrc_16, retc_16; + + pddcb = &queue->ddcb_vaddr[queue->ddcb_act]; + + if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) == + 0x00000000) + goto go_home; /* not completed, continue waiting */ + + wmb(); /* Add sync to decouple prev. read operations */ + + /* Note: DDCB could be purged */ + req = queue->ddcb_req[queue->ddcb_act]; + if (req == NULL) { + /* this occurs if DDCB is purged, not an error */ + /* Move active DDCB further; Nothing to do anymore. */ + goto pick_next_one; + } + + /* + * HSI=0x44 (fetched and completed), but RETC is + * 0x101, or even worse 0x000. + * + * In case of seeing the queue in inconsistent state + * we read the errcnts and the queue status to provide + * a trigger for our PCIe analyzer stop capturing. + */ + retc_16 = be16_to_cpu(pddcb->retc_16); + if ((pddcb->hsi == 0x44) && (retc_16 <= 0x101)) { + u64 errcnts, status; + u64 ddcb_offs = (u64)pddcb - (u64)queue->ddcb_vaddr; + + errcnts = __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS); + status = __genwqe_readq(cd, queue->IO_QUEUE_STATUS); + + dev_err(&pci_dev->dev, + "[%s] SEQN=%04x HSI=%02x RETC=%03x Q_ERRCNTS=%016llx Q_STATUS=%016llx DDCB_DMA_ADDR=%016llx\n", + __func__, be16_to_cpu(pddcb->seqnum_16), + pddcb->hsi, retc_16, errcnts, status, + queue->ddcb_daddr + ddcb_offs); + } + + copy_ddcb_results(req, queue->ddcb_act); + queue->ddcb_req[queue->ddcb_act] = NULL; /* take from queue */ + + dev_dbg(&pci_dev->dev, "FINISHED DDCB#%d\n", req->num); + genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb)); + + ddcb_mark_finished(pddcb); + + /* calculate CRC_16 to see if VCRC is correct */ + vcrc = genwqe_crc16(pddcb->asv, + VCRC_LENGTH(req->cmd.asv_length), + 0xffff); + vcrc_16 = be16_to_cpu(pddcb->vcrc_16); + if (vcrc != vcrc_16) { + printk_ratelimited(KERN_ERR + "%s %s: err: wrong VCRC pre=%02x vcrc_len=%d bytes vcrc_data=%04x is not vcrc_card=%04x\n", + GENWQE_DEVNAME, dev_name(&pci_dev->dev), + pddcb->pre, VCRC_LENGTH(req->cmd.asv_length), + vcrc, vcrc_16); + } + + ddcb_requ_set_state(req, GENWQE_REQU_FINISHED); + queue->ddcbs_completed++; + queue->ddcbs_in_flight--; + + /* wake up process waiting for this DDCB, and + processes on the busy queue */ + wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]); + wake_up_interruptible(&queue->busy_waitq); + +pick_next_one: + queue->ddcb_act = (queue->ddcb_act + 1) % queue->ddcb_max; + ddcbs_finished++; + } + + go_home: + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + return ddcbs_finished; +} + +/** + * __genwqe_wait_ddcb(): Waits until DDCB is completed + * @cd: pointer to genwqe device descriptor + * @req: pointer to requsted DDCB parameters + * + * The Service Layer will update the RETC in DDCB when processing is + * pending or done. + * + * Return: > 0 remaining jiffies, DDCB completed + * -ETIMEDOUT when timeout + * -ERESTARTSYS when ^C + * -EINVAL when unknown error condition + * + * When an error is returned the called needs to ensure that + * purge_ddcb() is being called to get the &req removed from the + * queue. + */ +int __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req) +{ + int rc; + unsigned int ddcb_no; + struct ddcb_queue *queue; + struct pci_dev *pci_dev = cd->pci_dev; + + if (req == NULL) + return -EINVAL; + + queue = req->queue; + if (queue == NULL) + return -EINVAL; + + ddcb_no = req->num; + if (ddcb_no >= queue->ddcb_max) + return -EINVAL; + + rc = wait_event_interruptible_timeout(queue->ddcb_waitqs[ddcb_no], + ddcb_requ_finished(cd, req), + GENWQE_DDCB_SOFTWARE_TIMEOUT * HZ); + + /* + * We need to distinguish 3 cases here: + * 1. rc == 0 timeout occured + * 2. rc == -ERESTARTSYS signal received + * 3. rc > 0 remaining jiffies condition is true + */ + if (rc == 0) { + struct ddcb_queue *queue = req->queue; + struct ddcb *pddcb; + + /* + * Timeout may be caused by long task switching time. + * When timeout happens, check if the request has + * meanwhile completed. + */ + genwqe_check_ddcb_queue(cd, req->queue); + if (ddcb_requ_finished(cd, req)) + return rc; + + dev_err(&pci_dev->dev, + "[%s] err: DDCB#%d timeout rc=%d state=%d req @ %p\n", + __func__, req->num, rc, ddcb_requ_get_state(req), + req); + dev_err(&pci_dev->dev, + "[%s] IO_QUEUE_STATUS=0x%016llx\n", __func__, + __genwqe_readq(cd, queue->IO_QUEUE_STATUS)); + + pddcb = &queue->ddcb_vaddr[req->num]; + genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb)); + + print_ddcb_info(cd, req->queue); + return -ETIMEDOUT; + + } else if (rc == -ERESTARTSYS) { + return rc; + /* + * EINTR: Stops the application + * ERESTARTSYS: Restartable systemcall; called again + */ + + } else if (rc < 0) { + dev_err(&pci_dev->dev, + "[%s] err: DDCB#%d unknown result (rc=%d) %d!\n", + __func__, req->num, rc, ddcb_requ_get_state(req)); + return -EINVAL; + } + + /* Severe error occured. Driver is forced to stop operation */ + if (cd->card_state != GENWQE_CARD_USED) { + dev_err(&pci_dev->dev, + "[%s] err: DDCB#%d forced to stop (rc=%d)\n", + __func__, req->num, rc); + return -EIO; + } + return rc; +} + +/** + * get_next_ddcb() - Get next available DDCB + * @cd: pointer to genwqe device descriptor + * + * DDCB's content is completely cleared but presets for PRE and + * SEQNUM. This function must only be called when ddcb_lock is held. + * + * Return: NULL if no empty DDCB available otherwise ptr to next DDCB. + */ +static struct ddcb *get_next_ddcb(struct genwqe_dev *cd, + struct ddcb_queue *queue, + int *num) +{ + u64 *pu64; + struct ddcb *pddcb; + + if (queue_free_ddcbs(queue) == 0) /* queue is full */ + return NULL; + + /* find new ddcb */ + pddcb = &queue->ddcb_vaddr[queue->ddcb_next]; + + /* if it is not completed, we are not allowed to use it */ + /* barrier(); */ + if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) == 0x00000000) + return NULL; + + *num = queue->ddcb_next; /* internal DDCB number */ + queue->ddcb_next = (queue->ddcb_next + 1) % queue->ddcb_max; + + /* clear important DDCB fields */ + pu64 = (u64 *)pddcb; + pu64[0] = 0ULL; /* offs 0x00 (ICRC,HSI,SHI,...) */ + pu64[1] = 0ULL; /* offs 0x01 (ACFUNC,CMD...) */ + + /* destroy previous results in ASV */ + pu64[0x80/8] = 0ULL; /* offs 0x80 (ASV + 0) */ + pu64[0x88/8] = 0ULL; /* offs 0x88 (ASV + 0x08) */ + pu64[0x90/8] = 0ULL; /* offs 0x90 (ASV + 0x10) */ + pu64[0x98/8] = 0ULL; /* offs 0x98 (ASV + 0x18) */ + pu64[0xd0/8] = 0ULL; /* offs 0xd0 (RETC,ATTN...) */ + + pddcb->pre = DDCB_PRESET_PRE; /* 128 */ + pddcb->seqnum_16 = cpu_to_be16(queue->ddcb_seq++); + return pddcb; +} + +/** + * __genwqe_purge_ddcb() - Remove a DDCB from the workqueue + * @cd: genwqe device descriptor + * @req: DDCB request + * + * This will fail when the request was already FETCHED. In this case + * we need to wait until it is finished. Else the DDCB can be + * reused. This function also ensures that the request data structure + * is removed from ddcb_req[]. + * + * Do not forget to call this function when genwqe_wait_ddcb() fails, + * such that the request gets really removed from ddcb_req[]. + * + * Return: 0 success + */ +int __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req) +{ + struct ddcb *pddcb = NULL; + unsigned int t; + unsigned long flags; + struct ddcb_queue *queue = req->queue; + struct pci_dev *pci_dev = cd->pci_dev; + u64 queue_status; + __be32 icrc_hsi_shi = 0x0000; + __be32 old, new; + + /* unsigned long flags; */ + if (GENWQE_DDCB_SOFTWARE_TIMEOUT <= 0) { + dev_err(&pci_dev->dev, + "[%s] err: software timeout is not set!\n", __func__); + return -EFAULT; + } + + pddcb = &queue->ddcb_vaddr[req->num]; + + for (t = 0; t < GENWQE_DDCB_SOFTWARE_TIMEOUT * 10; t++) { + + spin_lock_irqsave(&queue->ddcb_lock, flags); + + /* Check if req was meanwhile finished */ + if (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED) + goto go_home; + + /* try to set PURGE bit if FETCHED/COMPLETED are not set */ + old = pddcb->icrc_hsi_shi_32; /* read SHI/HSI in BE32 */ + if ((old & DDCB_FETCHED_BE32) == 0x00000000) { + + new = (old | DDCB_PURGE_BE32); + icrc_hsi_shi = cmpxchg(&pddcb->icrc_hsi_shi_32, + old, new); + if (icrc_hsi_shi == old) + goto finish_ddcb; + } + + /* normal finish with HSI bit */ + barrier(); + icrc_hsi_shi = pddcb->icrc_hsi_shi_32; + if (icrc_hsi_shi & DDCB_COMPLETED_BE32) + goto finish_ddcb; + + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + + /* + * Here the check_ddcb() function will most likely + * discover this DDCB to be finished some point in + * time. It will mark the req finished and free it up + * in the list. + */ + + copy_ddcb_results(req, req->num); /* for the failing case */ + msleep(100); /* sleep for 1/10 second and try again */ + continue; + +finish_ddcb: + copy_ddcb_results(req, req->num); + ddcb_requ_set_state(req, GENWQE_REQU_FINISHED); + queue->ddcbs_in_flight--; + queue->ddcb_req[req->num] = NULL; /* delete from array */ + ddcb_mark_cleared(pddcb); + + /* Move active DDCB further; Nothing to do here anymore. */ + + /* + * We need to ensure that there is at least one free + * DDCB in the queue. To do that, we must update + * ddcb_act only if the COMPLETED bit is set for the + * DDCB we are working on else we treat that DDCB even + * if we PURGED it as occupied (hardware is supposed + * to set the COMPLETED bit yet!). + */ + icrc_hsi_shi = pddcb->icrc_hsi_shi_32; + if ((icrc_hsi_shi & DDCB_COMPLETED_BE32) && + (queue->ddcb_act == req->num)) { + queue->ddcb_act = ((queue->ddcb_act + 1) % + queue->ddcb_max); + } +go_home: + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + return 0; + } + + /* + * If the card is dead and the queue is forced to stop, we + * might see this in the queue status register. + */ + queue_status = __genwqe_readq(cd, queue->IO_QUEUE_STATUS); + + dev_dbg(&pci_dev->dev, "UN/FINISHED DDCB#%d\n", req->num); + genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb)); + + dev_err(&pci_dev->dev, + "[%s] err: DDCB#%d not purged and not completed after %d seconds QSTAT=%016llx!!\n", + __func__, req->num, GENWQE_DDCB_SOFTWARE_TIMEOUT, + queue_status); + + print_ddcb_info(cd, req->queue); + + return -EFAULT; +} + +int genwqe_init_debug_data(struct genwqe_dev *cd, struct genwqe_debug_data *d) +{ + int len; + struct pci_dev *pci_dev = cd->pci_dev; + + if (d == NULL) { + dev_err(&pci_dev->dev, + "[%s] err: invalid memory for debug data!\n", + __func__); + return -EFAULT; + } + + len = sizeof(d->driver_version); + snprintf(d->driver_version, len, "%s", DRV_VERSION); + d->slu_unitcfg = cd->slu_unitcfg; + d->app_unitcfg = cd->app_unitcfg; + return 0; +} + +/** + * __genwqe_enqueue_ddcb() - Enqueue a DDCB + * @cd: pointer to genwqe device descriptor + * @req: pointer to DDCB execution request + * @f_flags: file mode: blocking, non-blocking + * + * Return: 0 if enqueuing succeeded + * -EIO if card is unusable/PCIe problems + * -EBUSY if enqueuing failed + */ +int __genwqe_enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req, + unsigned int f_flags) +{ + struct ddcb *pddcb; + unsigned long flags; + struct ddcb_queue *queue; + struct pci_dev *pci_dev = cd->pci_dev; + u16 icrc; + + retry: + if (cd->card_state != GENWQE_CARD_USED) { + printk_ratelimited(KERN_ERR + "%s %s: [%s] Card is unusable/PCIe problem Req#%d\n", + GENWQE_DEVNAME, dev_name(&pci_dev->dev), + __func__, req->num); + return -EIO; + } + + queue = req->queue = &cd->queue; + + /* FIXME circumvention to improve performance when no irq is + * there. + */ + if (GENWQE_POLLING_ENABLED) + genwqe_check_ddcb_queue(cd, queue); + + /* + * It must be ensured to process all DDCBs in successive + * order. Use a lock here in order to prevent nested DDCB + * enqueuing. + */ + spin_lock_irqsave(&queue->ddcb_lock, flags); + + pddcb = get_next_ddcb(cd, queue, &req->num); /* get ptr and num */ + if (pddcb == NULL) { + int rc; + + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + + if (f_flags & O_NONBLOCK) { + queue->return_on_busy++; + return -EBUSY; + } + + queue->wait_on_busy++; + rc = wait_event_interruptible(queue->busy_waitq, + queue_free_ddcbs(queue) != 0); + dev_dbg(&pci_dev->dev, "[%s] waiting for free DDCB: rc=%d\n", + __func__, rc); + if (rc == -ERESTARTSYS) + return rc; /* interrupted by a signal */ + + goto retry; + } + + if (queue->ddcb_req[req->num] != NULL) { + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + + dev_err(&pci_dev->dev, + "[%s] picked DDCB %d with req=%p still in use!!\n", + __func__, req->num, req); + return -EFAULT; + } + ddcb_requ_set_state(req, GENWQE_REQU_ENQUEUED); + queue->ddcb_req[req->num] = req; + + pddcb->cmdopts_16 = cpu_to_be16(req->cmd.cmdopts); + pddcb->cmd = req->cmd.cmd; + pddcb->acfunc = req->cmd.acfunc; /* functional unit */ + + /* + * We know that we can get retc 0x104 with CRC error, do not + * stop the queue in those cases for this command. XDIR = 1 + * does not work for old SLU versions. + * + * Last bitstream with the old XDIR behavior had SLU_ID + * 0x34199. + */ + if ((cd->slu_unitcfg & 0xFFFF0ull) > 0x34199ull) + pddcb->xdir = 0x1; + else + pddcb->xdir = 0x0; + + + pddcb->psp = (((req->cmd.asiv_length / 8) << 4) | + ((req->cmd.asv_length / 8))); + pddcb->disp_ts_64 = cpu_to_be64(req->cmd.disp_ts); + + /* + * If copying the whole DDCB_ASIV_LENGTH is impacting + * performance we need to change it to + * req->cmd.asiv_length. But simulation benefits from some + * non-architectured bits behind the architectured content. + * + * How much data is copied depends on the availability of the + * ATS field, which was introduced late. If the ATS field is + * supported ASIV is 8 bytes shorter than it used to be. Since + * the ATS field is copied too, the code should do exactly + * what it did before, but I wanted to make copying of the ATS + * field very explicit. + */ + if (genwqe_get_slu_id(cd) <= 0x2) { + memcpy(&pddcb->__asiv[0], /* destination */ + &req->cmd.__asiv[0], /* source */ + DDCB_ASIV_LENGTH); /* req->cmd.asiv_length */ + } else { + pddcb->n.ats_64 = cpu_to_be64(req->cmd.ats); + memcpy(&pddcb->n.asiv[0], /* destination */ + &req->cmd.asiv[0], /* source */ + DDCB_ASIV_LENGTH_ATS); /* req->cmd.asiv_length */ + } + + pddcb->icrc_hsi_shi_32 = cpu_to_be32(0x00000000); /* for crc */ + + /* + * Calculate CRC_16 for corresponding range PSP(7:4). Include + * empty 4 bytes prior to the data. + */ + icrc = genwqe_crc16((const u8 *)pddcb, + ICRC_LENGTH(req->cmd.asiv_length), 0xffff); + pddcb->icrc_hsi_shi_32 = cpu_to_be32((u32)icrc << 16); + + /* enable DDCB completion irq */ + if (!GENWQE_POLLING_ENABLED) + pddcb->icrc_hsi_shi_32 |= DDCB_INTR_BE32; + + dev_dbg(&pci_dev->dev, "INPUT DDCB#%d\n", req->num); + genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb)); + + if (ddcb_requ_collect_debug_data(req)) { + /* use the kernel copy of debug data. copying back to + user buffer happens later */ + + genwqe_init_debug_data(cd, &req->debug_data); + memcpy(&req->debug_data.ddcb_before, pddcb, + sizeof(req->debug_data.ddcb_before)); + } + + enqueue_ddcb(cd, queue, pddcb, req->num); + queue->ddcbs_in_flight++; + + if (queue->ddcbs_in_flight > queue->ddcbs_max_in_flight) + queue->ddcbs_max_in_flight = queue->ddcbs_in_flight; + + ddcb_requ_set_state(req, GENWQE_REQU_TAPPED); + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + wake_up_interruptible(&cd->queue_waitq); + + return 0; +} + +/** + * __genwqe_execute_raw_ddcb() - Setup and execute DDCB + * @cd: pointer to genwqe device descriptor + * @req: user provided DDCB request + * @f_flags: file mode: blocking, non-blocking + */ +int __genwqe_execute_raw_ddcb(struct genwqe_dev *cd, + struct genwqe_ddcb_cmd *cmd, + unsigned int f_flags) +{ + int rc = 0; + struct pci_dev *pci_dev = cd->pci_dev; + struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd); + + if (cmd->asiv_length > DDCB_ASIV_LENGTH) { + dev_err(&pci_dev->dev, "[%s] err: wrong asiv_length of %d\n", + __func__, cmd->asiv_length); + return -EINVAL; + } + if (cmd->asv_length > DDCB_ASV_LENGTH) { + dev_err(&pci_dev->dev, "[%s] err: wrong asv_length of %d\n", + __func__, cmd->asiv_length); + return -EINVAL; + } + rc = __genwqe_enqueue_ddcb(cd, req, f_flags); + if (rc != 0) + return rc; + + rc = __genwqe_wait_ddcb(cd, req); + if (rc < 0) /* error or signal interrupt */ + goto err_exit; + + if (ddcb_requ_collect_debug_data(req)) { + if (copy_to_user((struct genwqe_debug_data __user *) + (unsigned long)cmd->ddata_addr, + &req->debug_data, + sizeof(struct genwqe_debug_data))) + return -EFAULT; + } + + /* + * Higher values than 0x102 indicate completion with faults, + * lower values than 0x102 indicate processing faults. Note + * that DDCB might have been purged. E.g. Cntl+C. + */ + if (cmd->retc != DDCB_RETC_COMPLETE) { + /* This might happen e.g. flash read, and needs to be + handled by the upper layer code. */ + rc = -EBADMSG; /* not processed/error retc */ + } + + return rc; + + err_exit: + __genwqe_purge_ddcb(cd, req); + + if (ddcb_requ_collect_debug_data(req)) { + if (copy_to_user((struct genwqe_debug_data __user *) + (unsigned long)cmd->ddata_addr, + &req->debug_data, + sizeof(struct genwqe_debug_data))) + return -EFAULT; + } + return rc; +} + +/** + * genwqe_next_ddcb_ready() - Figure out if the next DDCB is already finished + * + * We use this as condition for our wait-queue code. + */ +static int genwqe_next_ddcb_ready(struct genwqe_dev *cd) +{ + unsigned long flags; + struct ddcb *pddcb; + struct ddcb_queue *queue = &cd->queue; + + spin_lock_irqsave(&queue->ddcb_lock, flags); + + if (queue_empty(queue)) { /* emtpy queue */ + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + return 0; + } + + pddcb = &queue->ddcb_vaddr[queue->ddcb_act]; + if (pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) { /* ddcb ready */ + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + return 1; + } + + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + return 0; +} + +/** + * genwqe_ddcbs_in_flight() - Check how many DDCBs are in flight + * + * Keep track on the number of DDCBs which ware currently in the + * queue. This is needed for statistics as well as conditon if we want + * to wait or better do polling in case of no interrupts available. + */ +int genwqe_ddcbs_in_flight(struct genwqe_dev *cd) +{ + unsigned long flags; + int ddcbs_in_flight = 0; + struct ddcb_queue *queue = &cd->queue; + + spin_lock_irqsave(&queue->ddcb_lock, flags); + ddcbs_in_flight += queue->ddcbs_in_flight; + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + + return ddcbs_in_flight; +} + +static int setup_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue) +{ + int rc, i; + struct ddcb *pddcb; + u64 val64; + unsigned int queue_size; + struct pci_dev *pci_dev = cd->pci_dev; + + if (GENWQE_DDCB_MAX < 2) + return -EINVAL; + + queue_size = roundup(GENWQE_DDCB_MAX * sizeof(struct ddcb), PAGE_SIZE); + + queue->ddcbs_in_flight = 0; /* statistics */ + queue->ddcbs_max_in_flight = 0; + queue->ddcbs_completed = 0; + queue->return_on_busy = 0; + queue->wait_on_busy = 0; + + queue->ddcb_seq = 0x100; /* start sequence number */ + queue->ddcb_max = GENWQE_DDCB_MAX; + queue->ddcb_vaddr = __genwqe_alloc_consistent(cd, queue_size, + &queue->ddcb_daddr); + if (queue->ddcb_vaddr == NULL) { + dev_err(&pci_dev->dev, + "[%s] **err: could not allocate DDCB **\n", __func__); + return -ENOMEM; + } + queue->ddcb_req = kcalloc(queue->ddcb_max, sizeof(struct ddcb_requ *), + GFP_KERNEL); + if (!queue->ddcb_req) { + rc = -ENOMEM; + goto free_ddcbs; + } + + queue->ddcb_waitqs = kcalloc(queue->ddcb_max, + sizeof(wait_queue_head_t), + GFP_KERNEL); + if (!queue->ddcb_waitqs) { + rc = -ENOMEM; + goto free_requs; + } + + for (i = 0; i < queue->ddcb_max; i++) { + pddcb = &queue->ddcb_vaddr[i]; /* DDCBs */ + pddcb->icrc_hsi_shi_32 = DDCB_COMPLETED_BE32; + pddcb->retc_16 = cpu_to_be16(0xfff); + + queue->ddcb_req[i] = NULL; /* requests */ + init_waitqueue_head(&queue->ddcb_waitqs[i]); /* waitqueues */ + } + + queue->ddcb_act = 0; + queue->ddcb_next = 0; /* queue is empty */ + + spin_lock_init(&queue->ddcb_lock); + init_waitqueue_head(&queue->busy_waitq); + + val64 = ((u64)(queue->ddcb_max - 1) << 8); /* lastptr */ + __genwqe_writeq(cd, queue->IO_QUEUE_CONFIG, 0x07); /* iCRC/vCRC */ + __genwqe_writeq(cd, queue->IO_QUEUE_SEGMENT, queue->ddcb_daddr); + __genwqe_writeq(cd, queue->IO_QUEUE_INITSQN, queue->ddcb_seq); + __genwqe_writeq(cd, queue->IO_QUEUE_WRAP, val64); + return 0; + + free_requs: + kfree(queue->ddcb_req); + queue->ddcb_req = NULL; + free_ddcbs: + __genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr, + queue->ddcb_daddr); + queue->ddcb_vaddr = NULL; + queue->ddcb_daddr = 0ull; + return -ENODEV; + +} + +static int ddcb_queue_initialized(struct ddcb_queue *queue) +{ + return queue->ddcb_vaddr != NULL; +} + +static void free_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue) +{ + unsigned int queue_size; + + queue_size = roundup(queue->ddcb_max * sizeof(struct ddcb), PAGE_SIZE); + + kfree(queue->ddcb_req); + queue->ddcb_req = NULL; + + if (queue->ddcb_vaddr) { + __genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr, + queue->ddcb_daddr); + queue->ddcb_vaddr = NULL; + queue->ddcb_daddr = 0ull; + } +} + +static irqreturn_t genwqe_pf_isr(int irq, void *dev_id) +{ + u64 gfir; + struct genwqe_dev *cd = (struct genwqe_dev *)dev_id; + struct pci_dev *pci_dev = cd->pci_dev; + + /* + * In case of fatal FIR error the queue is stopped, such that + * we can safely check it without risking anything. + */ + cd->irqs_processed++; + wake_up_interruptible(&cd->queue_waitq); + + /* + * Checking for errors before kicking the queue might be + * safer, but slower for the good-case ... See above. + */ + gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); + if (((gfir & GFIR_ERR_TRIGGER) != 0x0) && + !pci_channel_offline(pci_dev)) { + + if (cd->use_platform_recovery) { + /* + * Since we use raw accessors, EEH errors won't be + * detected by the platform until we do a non-raw + * MMIO or config space read + */ + readq(cd->mmio + IO_SLC_CFGREG_GFIR); + + /* Don't do anything if the PCI channel is frozen */ + if (pci_channel_offline(pci_dev)) + goto exit; + } + + wake_up_interruptible(&cd->health_waitq); + + /* + * By default GFIRs causes recovery actions. This + * count is just for debug when recovery is masked. + */ + dev_err_ratelimited(&pci_dev->dev, + "[%s] GFIR=%016llx\n", + __func__, gfir); + } + + exit: + return IRQ_HANDLED; +} + +static irqreturn_t genwqe_vf_isr(int irq, void *dev_id) +{ + struct genwqe_dev *cd = (struct genwqe_dev *)dev_id; + + cd->irqs_processed++; + wake_up_interruptible(&cd->queue_waitq); + + return IRQ_HANDLED; +} + +/** + * genwqe_card_thread() - Work thread for the DDCB queue + * + * The idea is to check if there are DDCBs in processing. If there are + * some finished DDCBs, we process them and wakeup the + * requestors. Otherwise we give other processes time using + * cond_resched(). + */ +static int genwqe_card_thread(void *data) +{ + int should_stop = 0, rc = 0; + struct genwqe_dev *cd = (struct genwqe_dev *)data; + + while (!kthread_should_stop()) { + + genwqe_check_ddcb_queue(cd, &cd->queue); + + if (GENWQE_POLLING_ENABLED) { + rc = wait_event_interruptible_timeout( + cd->queue_waitq, + genwqe_ddcbs_in_flight(cd) || + (should_stop = kthread_should_stop()), 1); + } else { + rc = wait_event_interruptible_timeout( + cd->queue_waitq, + genwqe_next_ddcb_ready(cd) || + (should_stop = kthread_should_stop()), HZ); + } + if (should_stop) + break; + + /* + * Avoid soft lockups on heavy loads; we do not want + * to disable our interrupts. + */ + cond_resched(); + } + return 0; +} + +/** + * genwqe_setup_service_layer() - Setup DDCB queue + * @cd: pointer to genwqe device descriptor + * + * Allocate DDCBs. Configure Service Layer Controller (SLC). + * + * Return: 0 success + */ +int genwqe_setup_service_layer(struct genwqe_dev *cd) +{ + int rc; + struct ddcb_queue *queue; + struct pci_dev *pci_dev = cd->pci_dev; + + if (genwqe_is_privileged(cd)) { + rc = genwqe_card_reset(cd); + if (rc < 0) { + dev_err(&pci_dev->dev, + "[%s] err: reset failed.\n", __func__); + return rc; + } + genwqe_read_softreset(cd); + } + + queue = &cd->queue; + queue->IO_QUEUE_CONFIG = IO_SLC_QUEUE_CONFIG; + queue->IO_QUEUE_STATUS = IO_SLC_QUEUE_STATUS; + queue->IO_QUEUE_SEGMENT = IO_SLC_QUEUE_SEGMENT; + queue->IO_QUEUE_INITSQN = IO_SLC_QUEUE_INITSQN; + queue->IO_QUEUE_OFFSET = IO_SLC_QUEUE_OFFSET; + queue->IO_QUEUE_WRAP = IO_SLC_QUEUE_WRAP; + queue->IO_QUEUE_WTIME = IO_SLC_QUEUE_WTIME; + queue->IO_QUEUE_ERRCNTS = IO_SLC_QUEUE_ERRCNTS; + queue->IO_QUEUE_LRW = IO_SLC_QUEUE_LRW; + + rc = setup_ddcb_queue(cd, queue); + if (rc != 0) { + rc = -ENODEV; + goto err_out; + } + + init_waitqueue_head(&cd->queue_waitq); + cd->card_thread = kthread_run(genwqe_card_thread, cd, + GENWQE_DEVNAME "%d_thread", + cd->card_idx); + if (IS_ERR(cd->card_thread)) { + rc = PTR_ERR(cd->card_thread); + cd->card_thread = NULL; + goto stop_free_queue; + } + + rc = genwqe_set_interrupt_capability(cd, GENWQE_MSI_IRQS); + if (rc) + goto stop_kthread; + + /* + * We must have all wait-queues initialized when we enable the + * interrupts. Otherwise we might crash if we get an early + * irq. + */ + init_waitqueue_head(&cd->health_waitq); + + if (genwqe_is_privileged(cd)) { + rc = request_irq(pci_dev->irq, genwqe_pf_isr, IRQF_SHARED, + GENWQE_DEVNAME, cd); + } else { + rc = request_irq(pci_dev->irq, genwqe_vf_isr, IRQF_SHARED, + GENWQE_DEVNAME, cd); + } + if (rc < 0) { + dev_err(&pci_dev->dev, "irq %d not free.\n", pci_dev->irq); + goto stop_irq_cap; + } + + cd->card_state = GENWQE_CARD_USED; + return 0; + + stop_irq_cap: + genwqe_reset_interrupt_capability(cd); + stop_kthread: + kthread_stop(cd->card_thread); + cd->card_thread = NULL; + stop_free_queue: + free_ddcb_queue(cd, queue); + err_out: + return rc; +} + +/** + * queue_wake_up_all() - Handles fatal error case + * + * The PCI device got unusable and we have to stop all pending + * requests as fast as we can. The code after this must purge the + * DDCBs in question and ensure that all mappings are freed. + */ +static int queue_wake_up_all(struct genwqe_dev *cd) +{ + unsigned int i; + unsigned long flags; + struct ddcb_queue *queue = &cd->queue; + + spin_lock_irqsave(&queue->ddcb_lock, flags); + + for (i = 0; i < queue->ddcb_max; i++) + wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]); + + wake_up_interruptible(&queue->busy_waitq); + spin_unlock_irqrestore(&queue->ddcb_lock, flags); + + return 0; +} + +/** + * genwqe_finish_queue() - Remove any genwqe devices and user-interfaces + * + * Relies on the pre-condition that there are no users of the card + * device anymore e.g. with open file-descriptors. + * + * This function must be robust enough to be called twice. + */ +int genwqe_finish_queue(struct genwqe_dev *cd) +{ + int i, rc = 0, in_flight; + int waitmax = GENWQE_DDCB_SOFTWARE_TIMEOUT; + struct pci_dev *pci_dev = cd->pci_dev; + struct ddcb_queue *queue = &cd->queue; + + if (!ddcb_queue_initialized(queue)) + return 0; + + /* Do not wipe out the error state. */ + if (cd->card_state == GENWQE_CARD_USED) + cd->card_state = GENWQE_CARD_UNUSED; + + /* Wake up all requests in the DDCB queue such that they + should be removed nicely. */ + queue_wake_up_all(cd); + + /* We must wait to get rid of the DDCBs in flight */ + for (i = 0; i < waitmax; i++) { + in_flight = genwqe_ddcbs_in_flight(cd); + + if (in_flight == 0) + break; + + dev_dbg(&pci_dev->dev, + " DEBUG [%d/%d] waiting for queue to get empty: %d requests!\n", + i, waitmax, in_flight); + + /* + * Severe severe error situation: The card itself has + * 16 DDCB queues, each queue has e.g. 32 entries, + * each DDBC has a hardware timeout of currently 250 + * msec but the PFs have a hardware timeout of 8 sec + * ... so I take something large. + */ + msleep(1000); + } + if (i == waitmax) { + dev_err(&pci_dev->dev, " [%s] err: queue is not empty!!\n", + __func__); + rc = -EIO; + } + return rc; +} + +/** + * genwqe_release_service_layer() - Shutdown DDCB queue + * @cd: genwqe device descriptor + * + * This function must be robust enough to be called twice. + */ +int genwqe_release_service_layer(struct genwqe_dev *cd) +{ + struct pci_dev *pci_dev = cd->pci_dev; + + if (!ddcb_queue_initialized(&cd->queue)) + return 1; + + free_irq(pci_dev->irq, cd); + genwqe_reset_interrupt_capability(cd); + + if (cd->card_thread != NULL) { + kthread_stop(cd->card_thread); + cd->card_thread = NULL; + } + + free_ddcb_queue(cd, &cd->queue); + return 0; +} diff --git a/drivers/misc/genwqe/card_ddcb.h b/drivers/misc/genwqe/card_ddcb.h new file mode 100644 index 000000000..0361a68d7 --- /dev/null +++ b/drivers/misc/genwqe/card_ddcb.h @@ -0,0 +1,188 @@ +#ifndef __CARD_DDCB_H__ +#define __CARD_DDCB_H__ + +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp + * Author: Joerg-Stephan Vogt + * Author: Michael Jung + * Author: Michael Ruettger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include + +#include "genwqe_driver.h" +#include "card_base.h" + +/** + * struct ddcb - Device Driver Control Block DDCB + * @hsi: Hardware software interlock + * @shi: Software hardware interlock. Hsi and shi are used to interlock + * software and hardware activities. We are using a compare and + * swap operation to ensure that there are no races when + * activating new DDCBs on the queue, or when we need to + * purge a DDCB from a running queue. + * @acfunc: Accelerator function addresses a unit within the chip + * @cmd: Command to work on + * @cmdopts_16: Options for the command + * @asiv: Input data + * @asv: Output data + * + * The DDCB data format is big endian. Multiple consequtive DDBCs form + * a DDCB queue. + */ +#define ASIV_LENGTH 104 /* Old specification without ATS field */ +#define ASIV_LENGTH_ATS 96 /* New specification with ATS field */ +#define ASV_LENGTH 64 + +struct ddcb { + union { + __be32 icrc_hsi_shi_32; /* iCRC, Hardware/SW interlock */ + struct { + __be16 icrc_16; + u8 hsi; + u8 shi; + }; + }; + u8 pre; /* Preamble */ + u8 xdir; /* Execution Directives */ + __be16 seqnum_16; /* Sequence Number */ + + u8 acfunc; /* Accelerator Function.. */ + u8 cmd; /* Command. */ + __be16 cmdopts_16; /* Command Options */ + u8 sur; /* Status Update Rate */ + u8 psp; /* Protection Section Pointer */ + __be16 rsvd_0e_16; /* Reserved invariant */ + + __be64 fwiv_64; /* Firmware Invariant. */ + + union { + struct { + __be64 ats_64; /* Address Translation Spec */ + u8 asiv[ASIV_LENGTH_ATS]; /* New ASIV */ + } n; + u8 __asiv[ASIV_LENGTH]; /* obsolete */ + }; + u8 asv[ASV_LENGTH]; /* Appl Spec Variant */ + + __be16 rsvd_c0_16; /* Reserved Variant */ + __be16 vcrc_16; /* Variant CRC */ + __be32 rsvd_32; /* Reserved unprotected */ + + __be64 deque_ts_64; /* Deque Time Stamp. */ + + __be16 retc_16; /* Return Code */ + __be16 attn_16; /* Attention/Extended Error Codes */ + __be32 progress_32; /* Progress indicator. */ + + __be64 cmplt_ts_64; /* Completion Time Stamp. */ + + /* The following layout matches the new service layer format */ + __be32 ibdc_32; /* Inbound Data Count (* 256) */ + __be32 obdc_32; /* Outbound Data Count (* 256) */ + + __be64 rsvd_SLH_64; /* Reserved for hardware */ + union { /* private data for driver */ + u8 priv[8]; + __be64 priv_64; + }; + __be64 disp_ts_64; /* Dispatch TimeStamp */ +} __attribute__((__packed__)); + +/* CRC polynomials for DDCB */ +#define CRC16_POLYNOMIAL 0x1021 + +/* + * SHI: Software to Hardware Interlock + * This 1 byte field is written by software to interlock the + * movement of one queue entry to another with the hardware in the + * chip. + */ +#define DDCB_SHI_INTR 0x04 /* Bit 2 */ +#define DDCB_SHI_PURGE 0x02 /* Bit 1 */ +#define DDCB_SHI_NEXT 0x01 /* Bit 0 */ + +/* + * HSI: Hardware to Software interlock + * This 1 byte field is written by hardware to interlock the movement + * of one queue entry to another with the software in the chip. + */ +#define DDCB_HSI_COMPLETED 0x40 /* Bit 6 */ +#define DDCB_HSI_FETCHED 0x04 /* Bit 2 */ + +/* + * Accessing HSI/SHI is done 32-bit wide + * Normally 16-bit access would work too, but on some platforms the + * 16 compare and swap operation is not supported. Therefore + * switching to 32-bit such that those platforms will work too. + * + * iCRC HSI/SHI + */ +#define DDCB_INTR_BE32 cpu_to_be32(0x00000004) +#define DDCB_PURGE_BE32 cpu_to_be32(0x00000002) +#define DDCB_NEXT_BE32 cpu_to_be32(0x00000001) +#define DDCB_COMPLETED_BE32 cpu_to_be32(0x00004000) +#define DDCB_FETCHED_BE32 cpu_to_be32(0x00000400) + +/* Definitions of DDCB presets */ +#define DDCB_PRESET_PRE 0x80 +#define ICRC_LENGTH(n) ((n) + 8 + 8 + 8) /* used ASIV + hdr fields */ +#define VCRC_LENGTH(n) ((n)) /* used ASV */ + +/* + * Genwqe Scatter Gather list + * Each element has up to 8 entries. + * The chaining element is element 0 cause of prefetching needs. + */ + +/* + * 0b0110 Chained descriptor. The descriptor is describing the next + * descriptor list. + */ +#define SG_CHAINED (0x6) + +/* + * 0b0010 First entry of a descriptor list. Start from a Buffer-Empty + * condition. + */ +#define SG_DATA (0x2) + +/* + * 0b0000 Early terminator. This is the last entry on the list + * irregardless of the length indicated. + */ +#define SG_END_LIST (0x0) + +/** + * struct sglist - Scatter gather list + * @target_addr: Either a dma addr of memory to work on or a + * dma addr or a subsequent sglist block. + * @len: Length of the data block. + * @flags: See above. + * + * Depending on the command the GenWQE card can use a scatter gather + * list to describe the memory it works on. Always 8 sg_entry's form + * a block. + */ +struct sg_entry { + __be64 target_addr; + __be32 len; + __be32 flags; +}; + +#endif /* __CARD_DDCB_H__ */ diff --git a/drivers/misc/genwqe/card_debugfs.c b/drivers/misc/genwqe/card_debugfs.c new file mode 100644 index 000000000..c6b82f09b --- /dev/null +++ b/drivers/misc/genwqe/card_debugfs.c @@ -0,0 +1,504 @@ +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp + * Author: Joerg-Stephan Vogt + * Author: Michael Jung + * Author: Michael Ruettger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * Debugfs interfaces for the GenWQE card. Help to debug potential + * problems. Dump internal chip state for debugging and failure + * determination. + */ + +#include +#include +#include +#include +#include + +#include "card_base.h" +#include "card_ddcb.h" + +#define GENWQE_DEBUGFS_RO(_name, _showfn) \ + static int genwqe_debugfs_##_name##_open(struct inode *inode, \ + struct file *file) \ + { \ + return single_open(file, _showfn, inode->i_private); \ + } \ + static const struct file_operations genwqe_##_name##_fops = { \ + .open = genwqe_debugfs_##_name##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ + } + +static void dbg_uidn_show(struct seq_file *s, struct genwqe_reg *regs, + int entries) +{ + unsigned int i; + u32 v_hi, v_lo; + + for (i = 0; i < entries; i++) { + v_hi = (regs[i].val >> 32) & 0xffffffff; + v_lo = (regs[i].val) & 0xffffffff; + + seq_printf(s, " 0x%08x 0x%08x 0x%08x 0x%08x EXT_ERR_REC\n", + regs[i].addr, regs[i].idx, v_hi, v_lo); + } +} + +static int curr_dbg_uidn_show(struct seq_file *s, void *unused, int uid) +{ + struct genwqe_dev *cd = s->private; + int entries; + struct genwqe_reg *regs; + + entries = genwqe_ffdc_buff_size(cd, uid); + if (entries < 0) + return -EINVAL; + + if (entries == 0) + return 0; + + regs = kcalloc(entries, sizeof(*regs), GFP_KERNEL); + if (regs == NULL) + return -ENOMEM; + + genwqe_stop_traps(cd); /* halt the traps while dumping data */ + genwqe_ffdc_buff_read(cd, uid, regs, entries); + genwqe_start_traps(cd); + + dbg_uidn_show(s, regs, entries); + kfree(regs); + return 0; +} + +static int genwqe_curr_dbg_uid0_show(struct seq_file *s, void *unused) +{ + return curr_dbg_uidn_show(s, unused, 0); +} + +GENWQE_DEBUGFS_RO(curr_dbg_uid0, genwqe_curr_dbg_uid0_show); + +static int genwqe_curr_dbg_uid1_show(struct seq_file *s, void *unused) +{ + return curr_dbg_uidn_show(s, unused, 1); +} + +GENWQE_DEBUGFS_RO(curr_dbg_uid1, genwqe_curr_dbg_uid1_show); + +static int genwqe_curr_dbg_uid2_show(struct seq_file *s, void *unused) +{ + return curr_dbg_uidn_show(s, unused, 2); +} + +GENWQE_DEBUGFS_RO(curr_dbg_uid2, genwqe_curr_dbg_uid2_show); + +static int prev_dbg_uidn_show(struct seq_file *s, void *unused, int uid) +{ + struct genwqe_dev *cd = s->private; + + dbg_uidn_show(s, cd->ffdc[uid].regs, cd->ffdc[uid].entries); + return 0; +} + +static int genwqe_prev_dbg_uid0_show(struct seq_file *s, void *unused) +{ + return prev_dbg_uidn_show(s, unused, 0); +} + +GENWQE_DEBUGFS_RO(prev_dbg_uid0, genwqe_prev_dbg_uid0_show); + +static int genwqe_prev_dbg_uid1_show(struct seq_file *s, void *unused) +{ + return prev_dbg_uidn_show(s, unused, 1); +} + +GENWQE_DEBUGFS_RO(prev_dbg_uid1, genwqe_prev_dbg_uid1_show); + +static int genwqe_prev_dbg_uid2_show(struct seq_file *s, void *unused) +{ + return prev_dbg_uidn_show(s, unused, 2); +} + +GENWQE_DEBUGFS_RO(prev_dbg_uid2, genwqe_prev_dbg_uid2_show); + +static int genwqe_curr_regs_show(struct seq_file *s, void *unused) +{ + struct genwqe_dev *cd = s->private; + unsigned int i; + struct genwqe_reg *regs; + + regs = kcalloc(GENWQE_FFDC_REGS, sizeof(*regs), GFP_KERNEL); + if (regs == NULL) + return -ENOMEM; + + genwqe_stop_traps(cd); + genwqe_read_ffdc_regs(cd, regs, GENWQE_FFDC_REGS, 1); + genwqe_start_traps(cd); + + for (i = 0; i < GENWQE_FFDC_REGS; i++) { + if (regs[i].addr == 0xffffffff) + break; /* invalid entries */ + + if (regs[i].val == 0x0ull) + continue; /* do not print 0x0 FIRs */ + + seq_printf(s, " 0x%08x 0x%016llx\n", + regs[i].addr, regs[i].val); + } + return 0; +} + +GENWQE_DEBUGFS_RO(curr_regs, genwqe_curr_regs_show); + +static int genwqe_prev_regs_show(struct seq_file *s, void *unused) +{ + struct genwqe_dev *cd = s->private; + unsigned int i; + struct genwqe_reg *regs = cd->ffdc[GENWQE_DBG_REGS].regs; + + if (regs == NULL) + return -EINVAL; + + for (i = 0; i < GENWQE_FFDC_REGS; i++) { + if (regs[i].addr == 0xffffffff) + break; /* invalid entries */ + + if (regs[i].val == 0x0ull) + continue; /* do not print 0x0 FIRs */ + + seq_printf(s, " 0x%08x 0x%016llx\n", + regs[i].addr, regs[i].val); + } + return 0; +} + +GENWQE_DEBUGFS_RO(prev_regs, genwqe_prev_regs_show); + +static int genwqe_jtimer_show(struct seq_file *s, void *unused) +{ + struct genwqe_dev *cd = s->private; + unsigned int vf_num; + u64 jtimer; + + jtimer = genwqe_read_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, 0); + seq_printf(s, " PF 0x%016llx %d msec\n", jtimer, + GENWQE_PF_JOBTIMEOUT_MSEC); + + for (vf_num = 0; vf_num < cd->num_vfs; vf_num++) { + jtimer = genwqe_read_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT, + vf_num + 1); + seq_printf(s, " VF%-2d 0x%016llx %d msec\n", vf_num, jtimer, + cd->vf_jobtimeout_msec[vf_num]); + } + return 0; +} + +GENWQE_DEBUGFS_RO(jtimer, genwqe_jtimer_show); + +static int genwqe_queue_working_time_show(struct seq_file *s, void *unused) +{ + struct genwqe_dev *cd = s->private; + unsigned int vf_num; + u64 t; + + t = genwqe_read_vreg(cd, IO_SLC_VF_QUEUE_WTIME, 0); + seq_printf(s, " PF 0x%016llx\n", t); + + for (vf_num = 0; vf_num < cd->num_vfs; vf_num++) { + t = genwqe_read_vreg(cd, IO_SLC_VF_QUEUE_WTIME, vf_num + 1); + seq_printf(s, " VF%-2d 0x%016llx\n", vf_num, t); + } + return 0; +} + +GENWQE_DEBUGFS_RO(queue_working_time, genwqe_queue_working_time_show); + +static int genwqe_ddcb_info_show(struct seq_file *s, void *unused) +{ + struct genwqe_dev *cd = s->private; + unsigned int i; + struct ddcb_queue *queue; + struct ddcb *pddcb; + + queue = &cd->queue; + seq_puts(s, "DDCB QUEUE:\n"); + seq_printf(s, " ddcb_max: %d\n" + " ddcb_daddr: %016llx - %016llx\n" + " ddcb_vaddr: %016llx\n" + " ddcbs_in_flight: %u\n" + " ddcbs_max_in_flight: %u\n" + " ddcbs_completed: %u\n" + " return_on_busy: %u\n" + " wait_on_busy: %u\n" + " irqs_processed: %u\n", + queue->ddcb_max, (long long)queue->ddcb_daddr, + (long long)queue->ddcb_daddr + + (queue->ddcb_max * DDCB_LENGTH), + (long long)queue->ddcb_vaddr, queue->ddcbs_in_flight, + queue->ddcbs_max_in_flight, queue->ddcbs_completed, + queue->return_on_busy, queue->wait_on_busy, + cd->irqs_processed); + + /* Hardware State */ + seq_printf(s, " 0x%08x 0x%016llx IO_QUEUE_CONFIG\n" + " 0x%08x 0x%016llx IO_QUEUE_STATUS\n" + " 0x%08x 0x%016llx IO_QUEUE_SEGMENT\n" + " 0x%08x 0x%016llx IO_QUEUE_INITSQN\n" + " 0x%08x 0x%016llx IO_QUEUE_WRAP\n" + " 0x%08x 0x%016llx IO_QUEUE_OFFSET\n" + " 0x%08x 0x%016llx IO_QUEUE_WTIME\n" + " 0x%08x 0x%016llx IO_QUEUE_ERRCNTS\n" + " 0x%08x 0x%016llx IO_QUEUE_LRW\n", + queue->IO_QUEUE_CONFIG, + __genwqe_readq(cd, queue->IO_QUEUE_CONFIG), + queue->IO_QUEUE_STATUS, + __genwqe_readq(cd, queue->IO_QUEUE_STATUS), + queue->IO_QUEUE_SEGMENT, + __genwqe_readq(cd, queue->IO_QUEUE_SEGMENT), + queue->IO_QUEUE_INITSQN, + __genwqe_readq(cd, queue->IO_QUEUE_INITSQN), + queue->IO_QUEUE_WRAP, + __genwqe_readq(cd, queue->IO_QUEUE_WRAP), + queue->IO_QUEUE_OFFSET, + __genwqe_readq(cd, queue->IO_QUEUE_OFFSET), + queue->IO_QUEUE_WTIME, + __genwqe_readq(cd, queue->IO_QUEUE_WTIME), + queue->IO_QUEUE_ERRCNTS, + __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS), + queue->IO_QUEUE_LRW, + __genwqe_readq(cd, queue->IO_QUEUE_LRW)); + + seq_printf(s, "DDCB list (ddcb_act=%d/ddcb_next=%d):\n", + queue->ddcb_act, queue->ddcb_next); + + pddcb = queue->ddcb_vaddr; + for (i = 0; i < queue->ddcb_max; i++) { + seq_printf(s, " %-3d: RETC=%03x SEQ=%04x HSI/SHI=%02x/%02x ", + i, be16_to_cpu(pddcb->retc_16), + be16_to_cpu(pddcb->seqnum_16), + pddcb->hsi, pddcb->shi); + seq_printf(s, "PRIV=%06llx CMD=%02x\n", + be64_to_cpu(pddcb->priv_64), pddcb->cmd); + pddcb++; + } + return 0; +} + +GENWQE_DEBUGFS_RO(ddcb_info, genwqe_ddcb_info_show); + +static int genwqe_info_show(struct seq_file *s, void *unused) +{ + struct genwqe_dev *cd = s->private; + u64 app_id, slu_id, bitstream = -1; + struct pci_dev *pci_dev = cd->pci_dev; + + slu_id = __genwqe_readq(cd, IO_SLU_UNITCFG); + app_id = __genwqe_readq(cd, IO_APP_UNITCFG); + + if (genwqe_is_privileged(cd)) + bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM); + + seq_printf(s, "%s driver version: %s\n" + " Device Name/Type: %s %s CardIdx: %d\n" + " SLU/APP Config : 0x%016llx/0x%016llx\n" + " Build Date : %u/%x/%u\n" + " Base Clock : %u MHz\n" + " Arch/SVN Release: %u/%llx\n" + " Bitstream : %llx\n", + GENWQE_DEVNAME, DRV_VERSION, dev_name(&pci_dev->dev), + genwqe_is_privileged(cd) ? + "Physical" : "Virtual or no SR-IOV", + cd->card_idx, slu_id, app_id, + (u16)((slu_id >> 12) & 0x0fLLU), /* month */ + (u16)((slu_id >> 4) & 0xffLLU), /* day */ + (u16)((slu_id >> 16) & 0x0fLLU) + 2010, /* year */ + genwqe_base_clock_frequency(cd), + (u16)((slu_id >> 32) & 0xffLLU), slu_id >> 40, + bitstream); + + return 0; +} + +GENWQE_DEBUGFS_RO(info, genwqe_info_show); + +int genwqe_init_debugfs(struct genwqe_dev *cd) +{ + struct dentry *root; + struct dentry *file; + int ret; + char card_name[64]; + char name[64]; + unsigned int i; + + sprintf(card_name, "%s%d_card", GENWQE_DEVNAME, cd->card_idx); + + root = debugfs_create_dir(card_name, cd->debugfs_genwqe); + if (!root) { + ret = -ENOMEM; + goto err0; + } + + /* non privileged interfaces are done here */ + file = debugfs_create_file("ddcb_info", S_IRUGO, root, cd, + &genwqe_ddcb_info_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("info", S_IRUGO, root, cd, + &genwqe_info_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_x64("err_inject", 0666, root, &cd->err_inject); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_u32("ddcb_software_timeout", 0666, root, + &cd->ddcb_software_timeout); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_u32("kill_timeout", 0666, root, + &cd->kill_timeout); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + /* privileged interfaces follow here */ + if (!genwqe_is_privileged(cd)) { + cd->debugfs_root = root; + return 0; + } + + file = debugfs_create_file("curr_regs", S_IRUGO, root, cd, + &genwqe_curr_regs_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("curr_dbg_uid0", S_IRUGO, root, cd, + &genwqe_curr_dbg_uid0_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("curr_dbg_uid1", S_IRUGO, root, cd, + &genwqe_curr_dbg_uid1_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("curr_dbg_uid2", S_IRUGO, root, cd, + &genwqe_curr_dbg_uid2_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("prev_regs", S_IRUGO, root, cd, + &genwqe_prev_regs_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("prev_dbg_uid0", S_IRUGO, root, cd, + &genwqe_prev_dbg_uid0_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("prev_dbg_uid1", S_IRUGO, root, cd, + &genwqe_prev_dbg_uid1_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("prev_dbg_uid2", S_IRUGO, root, cd, + &genwqe_prev_dbg_uid2_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + for (i = 0; i < GENWQE_MAX_VFS; i++) { + sprintf(name, "vf%u_jobtimeout_msec", i); + + file = debugfs_create_u32(name, 0666, root, + &cd->vf_jobtimeout_msec[i]); + if (!file) { + ret = -ENOMEM; + goto err1; + } + } + + file = debugfs_create_file("jobtimer", S_IRUGO, root, cd, + &genwqe_jtimer_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_file("queue_working_time", S_IRUGO, root, cd, + &genwqe_queue_working_time_fops); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_u32("skip_recovery", 0666, root, + &cd->skip_recovery); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + file = debugfs_create_u32("use_platform_recovery", 0666, root, + &cd->use_platform_recovery); + if (!file) { + ret = -ENOMEM; + goto err1; + } + + cd->debugfs_root = root; + return 0; +err1: + debugfs_remove_recursive(root); +err0: + return ret; +} + +void genqwe_exit_debugfs(struct genwqe_dev *cd) +{ + debugfs_remove_recursive(cd->debugfs_root); +} diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c new file mode 100644 index 000000000..d2098b4d2 --- /dev/null +++ b/drivers/misc/genwqe/card_dev.c @@ -0,0 +1,1412 @@ +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp + * Author: Joerg-Stephan Vogt + * Author: Michael Jung + * Author: Michael Ruettger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * Character device representation of the GenWQE device. This allows + * user-space applications to communicate with the card. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "card_base.h" +#include "card_ddcb.h" + +static int genwqe_open_files(struct genwqe_dev *cd) +{ + int rc; + unsigned long flags; + + spin_lock_irqsave(&cd->file_lock, flags); + rc = list_empty(&cd->file_list); + spin_unlock_irqrestore(&cd->file_lock, flags); + return !rc; +} + +static void genwqe_add_file(struct genwqe_dev *cd, struct genwqe_file *cfile) +{ + unsigned long flags; + + cfile->opener = get_pid(task_tgid(current)); + spin_lock_irqsave(&cd->file_lock, flags); + list_add(&cfile->list, &cd->file_list); + spin_unlock_irqrestore(&cd->file_lock, flags); +} + +static int genwqe_del_file(struct genwqe_dev *cd, struct genwqe_file *cfile) +{ + unsigned long flags; + + spin_lock_irqsave(&cd->file_lock, flags); + list_del(&cfile->list); + spin_unlock_irqrestore(&cd->file_lock, flags); + put_pid(cfile->opener); + + return 0; +} + +static void genwqe_add_pin(struct genwqe_file *cfile, struct dma_mapping *m) +{ + unsigned long flags; + + spin_lock_irqsave(&cfile->pin_lock, flags); + list_add(&m->pin_list, &cfile->pin_list); + spin_unlock_irqrestore(&cfile->pin_lock, flags); +} + +static int genwqe_del_pin(struct genwqe_file *cfile, struct dma_mapping *m) +{ + unsigned long flags; + + spin_lock_irqsave(&cfile->pin_lock, flags); + list_del(&m->pin_list); + spin_unlock_irqrestore(&cfile->pin_lock, flags); + + return 0; +} + +/** + * genwqe_search_pin() - Search for the mapping for a userspace address + * @cfile: Descriptor of opened file + * @u_addr: User virtual address + * @size: Size of buffer + * @dma_addr: DMA address to be updated + * + * Return: Pointer to the corresponding mapping NULL if not found + */ +static struct dma_mapping *genwqe_search_pin(struct genwqe_file *cfile, + unsigned long u_addr, + unsigned int size, + void **virt_addr) +{ + unsigned long flags; + struct dma_mapping *m; + + spin_lock_irqsave(&cfile->pin_lock, flags); + + list_for_each_entry(m, &cfile->pin_list, pin_list) { + if ((((u64)m->u_vaddr) <= (u_addr)) && + (((u64)m->u_vaddr + m->size) >= (u_addr + size))) { + + if (virt_addr) + *virt_addr = m->k_vaddr + + (u_addr - (u64)m->u_vaddr); + + spin_unlock_irqrestore(&cfile->pin_lock, flags); + return m; + } + } + spin_unlock_irqrestore(&cfile->pin_lock, flags); + return NULL; +} + +static void __genwqe_add_mapping(struct genwqe_file *cfile, + struct dma_mapping *dma_map) +{ + unsigned long flags; + + spin_lock_irqsave(&cfile->map_lock, flags); + list_add(&dma_map->card_list, &cfile->map_list); + spin_unlock_irqrestore(&cfile->map_lock, flags); +} + +static void __genwqe_del_mapping(struct genwqe_file *cfile, + struct dma_mapping *dma_map) +{ + unsigned long flags; + + spin_lock_irqsave(&cfile->map_lock, flags); + list_del(&dma_map->card_list); + spin_unlock_irqrestore(&cfile->map_lock, flags); +} + + +/** + * __genwqe_search_mapping() - Search for the mapping for a userspace address + * @cfile: descriptor of opened file + * @u_addr: user virtual address + * @size: size of buffer + * @dma_addr: DMA address to be updated + * Return: Pointer to the corresponding mapping NULL if not found + */ +static struct dma_mapping *__genwqe_search_mapping(struct genwqe_file *cfile, + unsigned long u_addr, + unsigned int size, + dma_addr_t *dma_addr, + void **virt_addr) +{ + unsigned long flags; + struct dma_mapping *m; + struct pci_dev *pci_dev = cfile->cd->pci_dev; + + spin_lock_irqsave(&cfile->map_lock, flags); + list_for_each_entry(m, &cfile->map_list, card_list) { + + if ((((u64)m->u_vaddr) <= (u_addr)) && + (((u64)m->u_vaddr + m->size) >= (u_addr + size))) { + + /* match found: current is as expected and + addr is in range */ + if (dma_addr) + *dma_addr = m->dma_addr + + (u_addr - (u64)m->u_vaddr); + + if (virt_addr) + *virt_addr = m->k_vaddr + + (u_addr - (u64)m->u_vaddr); + + spin_unlock_irqrestore(&cfile->map_lock, flags); + return m; + } + } + spin_unlock_irqrestore(&cfile->map_lock, flags); + + dev_err(&pci_dev->dev, + "[%s] Entry not found: u_addr=%lx, size=%x\n", + __func__, u_addr, size); + + return NULL; +} + +static void genwqe_remove_mappings(struct genwqe_file *cfile) +{ + int i = 0; + struct list_head *node, *next; + struct dma_mapping *dma_map; + struct genwqe_dev *cd = cfile->cd; + struct pci_dev *pci_dev = cfile->cd->pci_dev; + + list_for_each_safe(node, next, &cfile->map_list) { + dma_map = list_entry(node, struct dma_mapping, card_list); + + list_del_init(&dma_map->card_list); + + /* + * This is really a bug, because those things should + * have been already tidied up. + * + * GENWQE_MAPPING_RAW should have been removed via mmunmap(). + * GENWQE_MAPPING_SGL_TEMP should be removed by tidy up code. + */ + dev_err(&pci_dev->dev, + "[%s] %d. cleanup mapping: u_vaddr=%p u_kaddr=%016lx dma_addr=%lx\n", + __func__, i++, dma_map->u_vaddr, + (unsigned long)dma_map->k_vaddr, + (unsigned long)dma_map->dma_addr); + + if (dma_map->type == GENWQE_MAPPING_RAW) { + /* we allocated this dynamically */ + __genwqe_free_consistent(cd, dma_map->size, + dma_map->k_vaddr, + dma_map->dma_addr); + kfree(dma_map); + } else if (dma_map->type == GENWQE_MAPPING_SGL_TEMP) { + /* we use dma_map statically from the request */ + genwqe_user_vunmap(cd, dma_map); + } + } +} + +static void genwqe_remove_pinnings(struct genwqe_file *cfile) +{ + struct list_head *node, *next; + struct dma_mapping *dma_map; + struct genwqe_dev *cd = cfile->cd; + + list_for_each_safe(node, next, &cfile->pin_list) { + dma_map = list_entry(node, struct dma_mapping, pin_list); + + /* + * This is not a bug, because a killed processed might + * not call the unpin ioctl, which is supposed to free + * the resources. + * + * Pinnings are dymically allocated and need to be + * deleted. + */ + list_del_init(&dma_map->pin_list); + genwqe_user_vunmap(cd, dma_map); + kfree(dma_map); + } +} + +/** + * genwqe_kill_fasync() - Send signal to all processes with open GenWQE files + * + * E.g. genwqe_send_signal(cd, SIGIO); + */ +static int genwqe_kill_fasync(struct genwqe_dev *cd, int sig) +{ + unsigned int files = 0; + unsigned long flags; + struct genwqe_file *cfile; + + spin_lock_irqsave(&cd->file_lock, flags); + list_for_each_entry(cfile, &cd->file_list, list) { + if (cfile->async_queue) + kill_fasync(&cfile->async_queue, sig, POLL_HUP); + files++; + } + spin_unlock_irqrestore(&cd->file_lock, flags); + return files; +} + +static int genwqe_terminate(struct genwqe_dev *cd) +{ + unsigned int files = 0; + unsigned long flags; + struct genwqe_file *cfile; + + spin_lock_irqsave(&cd->file_lock, flags); + list_for_each_entry(cfile, &cd->file_list, list) { + kill_pid(cfile->opener, SIGKILL, 1); + files++; + } + spin_unlock_irqrestore(&cd->file_lock, flags); + return files; +} + +/** + * genwqe_open() - file open + * @inode: file system information + * @filp: file handle + * + * This function is executed whenever an application calls + * open("/dev/genwqe",..). + * + * Return: 0 if successful or <0 if errors + */ +static int genwqe_open(struct inode *inode, struct file *filp) +{ + struct genwqe_dev *cd; + struct genwqe_file *cfile; + + cfile = kzalloc(sizeof(*cfile), GFP_KERNEL); + if (cfile == NULL) + return -ENOMEM; + + cd = container_of(inode->i_cdev, struct genwqe_dev, cdev_genwqe); + cfile->cd = cd; + cfile->filp = filp; + cfile->client = NULL; + + spin_lock_init(&cfile->map_lock); /* list of raw memory allocations */ + INIT_LIST_HEAD(&cfile->map_list); + + spin_lock_init(&cfile->pin_lock); /* list of user pinned memory */ + INIT_LIST_HEAD(&cfile->pin_list); + + filp->private_data = cfile; + + genwqe_add_file(cd, cfile); + return 0; +} + +/** + * genwqe_fasync() - Setup process to receive SIGIO. + * @fd: file descriptor + * @filp: file handle + * @mode: file mode + * + * Sending a signal is working as following: + * + * if (cdev->async_queue) + * kill_fasync(&cdev->async_queue, SIGIO, POLL_IN); + * + * Some devices also implement asynchronous notification to indicate + * when the device can be written; in this case, of course, + * kill_fasync must be called with a mode of POLL_OUT. + */ +static int genwqe_fasync(int fd, struct file *filp, int mode) +{ + struct genwqe_file *cdev = (struct genwqe_file *)filp->private_data; + + return fasync_helper(fd, filp, mode, &cdev->async_queue); +} + + +/** + * genwqe_release() - file close + * @inode: file system information + * @filp: file handle + * + * This function is executed whenever an application calls 'close(fd_genwqe)' + * + * Return: always 0 + */ +static int genwqe_release(struct inode *inode, struct file *filp) +{ + struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data; + struct genwqe_dev *cd = cfile->cd; + + /* there must be no entries in these lists! */ + genwqe_remove_mappings(cfile); + genwqe_remove_pinnings(cfile); + + /* remove this filp from the asynchronously notified filp's */ + genwqe_fasync(-1, filp, 0); + + /* + * For this to work we must not release cd when this cfile is + * not yet released, otherwise the list entry is invalid, + * because the list itself gets reinstantiated! + */ + genwqe_del_file(cd, cfile); + kfree(cfile); + return 0; +} + +static void genwqe_vma_open(struct vm_area_struct *vma) +{ + /* nothing ... */ +} + +/** + * genwqe_vma_close() - Called each time when vma is unmapped + * + * Free memory which got allocated by GenWQE mmap(). + */ +static void genwqe_vma_close(struct vm_area_struct *vma) +{ + unsigned long vsize = vma->vm_end - vma->vm_start; + struct inode *inode = file_inode(vma->vm_file); + struct dma_mapping *dma_map; + struct genwqe_dev *cd = container_of(inode->i_cdev, struct genwqe_dev, + cdev_genwqe); + struct pci_dev *pci_dev = cd->pci_dev; + dma_addr_t d_addr = 0; + struct genwqe_file *cfile = vma->vm_private_data; + + dma_map = __genwqe_search_mapping(cfile, vma->vm_start, vsize, + &d_addr, NULL); + if (dma_map == NULL) { + dev_err(&pci_dev->dev, + " [%s] err: mapping not found: v=%lx, p=%lx s=%lx\n", + __func__, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT, + vsize); + return; + } + __genwqe_del_mapping(cfile, dma_map); + __genwqe_free_consistent(cd, dma_map->size, dma_map->k_vaddr, + dma_map->dma_addr); + kfree(dma_map); +} + +static const struct vm_operations_struct genwqe_vma_ops = { + .open = genwqe_vma_open, + .close = genwqe_vma_close, +}; + +/** + * genwqe_mmap() - Provide contignous buffers to userspace + * + * We use mmap() to allocate contignous buffers used for DMA + * transfers. After the buffer is allocated we remap it to user-space + * and remember a reference to our dma_mapping data structure, where + * we store the associated DMA address and allocated size. + * + * When we receive a DDCB execution request with the ATS bits set to + * plain buffer, we lookup our dma_mapping list to find the + * corresponding DMA address for the associated user-space address. + */ +static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int rc; + unsigned long pfn, vsize = vma->vm_end - vma->vm_start; + struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data; + struct genwqe_dev *cd = cfile->cd; + struct dma_mapping *dma_map; + + if (vsize == 0) + return -EINVAL; + + if (get_order(vsize) > MAX_ORDER) + return -ENOMEM; + + dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL); + if (dma_map == NULL) + return -ENOMEM; + + genwqe_mapping_init(dma_map, GENWQE_MAPPING_RAW); + dma_map->u_vaddr = (void *)vma->vm_start; + dma_map->size = vsize; + dma_map->nr_pages = DIV_ROUND_UP(vsize, PAGE_SIZE); + dma_map->k_vaddr = __genwqe_alloc_consistent(cd, vsize, + &dma_map->dma_addr); + if (dma_map->k_vaddr == NULL) { + rc = -ENOMEM; + goto free_dma_map; + } + + if (capable(CAP_SYS_ADMIN) && (vsize > sizeof(dma_addr_t))) + *(dma_addr_t *)dma_map->k_vaddr = dma_map->dma_addr; + + pfn = virt_to_phys(dma_map->k_vaddr) >> PAGE_SHIFT; + rc = remap_pfn_range(vma, + vma->vm_start, + pfn, + vsize, + vma->vm_page_prot); + if (rc != 0) { + rc = -EFAULT; + goto free_dma_mem; + } + + vma->vm_private_data = cfile; + vma->vm_ops = &genwqe_vma_ops; + __genwqe_add_mapping(cfile, dma_map); + + return 0; + + free_dma_mem: + __genwqe_free_consistent(cd, dma_map->size, + dma_map->k_vaddr, + dma_map->dma_addr); + free_dma_map: + kfree(dma_map); + return rc; +} + +/** + * do_flash_update() - Excute flash update (write image or CVPD) + * @cd: genwqe device + * @load: details about image load + * + * Return: 0 if successful + */ + +#define FLASH_BLOCK 0x40000 /* we use 256k blocks */ + +static int do_flash_update(struct genwqe_file *cfile, + struct genwqe_bitstream *load) +{ + int rc = 0; + int blocks_to_flash; + dma_addr_t dma_addr; + u64 flash = 0; + size_t tocopy = 0; + u8 __user *buf; + u8 *xbuf; + u32 crc; + u8 cmdopts; + struct genwqe_dev *cd = cfile->cd; + struct file *filp = cfile->filp; + struct pci_dev *pci_dev = cd->pci_dev; + + if ((load->size & 0x3) != 0) + return -EINVAL; + + if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0) + return -EINVAL; + + /* FIXME Bits have changed for new service layer! */ + switch ((char)load->partition) { + case '0': + cmdopts = 0x14; + break; /* download/erase_first/part_0 */ + case '1': + cmdopts = 0x1C; + break; /* download/erase_first/part_1 */ + case 'v': + cmdopts = 0x0C; + break; /* download/erase_first/vpd */ + default: + return -EINVAL; + } + + buf = (u8 __user *)load->data_addr; + xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr); + if (xbuf == NULL) + return -ENOMEM; + + blocks_to_flash = load->size / FLASH_BLOCK; + while (load->size) { + struct genwqe_ddcb_cmd *req; + + /* + * We must be 4 byte aligned. Buffer must be 0 appened + * to have defined values when calculating CRC. + */ + tocopy = min_t(size_t, load->size, FLASH_BLOCK); + + rc = copy_from_user(xbuf, buf, tocopy); + if (rc) { + rc = -EFAULT; + goto free_buffer; + } + crc = genwqe_crc32(xbuf, tocopy, 0xffffffff); + + dev_dbg(&pci_dev->dev, + "[%s] DMA: %lx CRC: %08x SZ: %ld %d\n", + __func__, (unsigned long)dma_addr, crc, tocopy, + blocks_to_flash); + + /* prepare DDCB for SLU process */ + req = ddcb_requ_alloc(); + if (req == NULL) { + rc = -ENOMEM; + goto free_buffer; + } + + req->cmd = SLCMD_MOVE_FLASH; + req->cmdopts = cmdopts; + + /* prepare invariant values */ + if (genwqe_get_slu_id(cd) <= 0x2) { + *(__be64 *)&req->__asiv[0] = cpu_to_be64(dma_addr); + *(__be64 *)&req->__asiv[8] = cpu_to_be64(tocopy); + *(__be64 *)&req->__asiv[16] = cpu_to_be64(flash); + *(__be32 *)&req->__asiv[24] = cpu_to_be32(0); + req->__asiv[24] = load->uid; + *(__be32 *)&req->__asiv[28] = cpu_to_be32(crc); + + /* for simulation only */ + *(__be64 *)&req->__asiv[88] = cpu_to_be64(load->slu_id); + *(__be64 *)&req->__asiv[96] = cpu_to_be64(load->app_id); + req->asiv_length = 32; /* bytes included in crc calc */ + } else { /* setup DDCB for ATS architecture */ + *(__be64 *)&req->asiv[0] = cpu_to_be64(dma_addr); + *(__be32 *)&req->asiv[8] = cpu_to_be32(tocopy); + *(__be32 *)&req->asiv[12] = cpu_to_be32(0); /* resvd */ + *(__be64 *)&req->asiv[16] = cpu_to_be64(flash); + *(__be32 *)&req->asiv[24] = cpu_to_be32(load->uid<<24); + *(__be32 *)&req->asiv[28] = cpu_to_be32(crc); + + /* for simulation only */ + *(__be64 *)&req->asiv[80] = cpu_to_be64(load->slu_id); + *(__be64 *)&req->asiv[88] = cpu_to_be64(load->app_id); + + /* Rd only */ + req->ats = 0x4ULL << 44; + req->asiv_length = 40; /* bytes included in crc calc */ + } + req->asv_length = 8; + + /* For Genwqe5 we get back the calculated CRC */ + *(u64 *)&req->asv[0] = 0ULL; /* 0x80 */ + + rc = __genwqe_execute_raw_ddcb(cd, req, filp->f_flags); + + load->retc = req->retc; + load->attn = req->attn; + load->progress = req->progress; + + if (rc < 0) { + ddcb_requ_free(req); + goto free_buffer; + } + + if (req->retc != DDCB_RETC_COMPLETE) { + rc = -EIO; + ddcb_requ_free(req); + goto free_buffer; + } + + load->size -= tocopy; + flash += tocopy; + buf += tocopy; + blocks_to_flash--; + ddcb_requ_free(req); + } + + free_buffer: + __genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr); + return rc; +} + +static int do_flash_read(struct genwqe_file *cfile, + struct genwqe_bitstream *load) +{ + int rc, blocks_to_flash; + dma_addr_t dma_addr; + u64 flash = 0; + size_t tocopy = 0; + u8 __user *buf; + u8 *xbuf; + u8 cmdopts; + struct genwqe_dev *cd = cfile->cd; + struct file *filp = cfile->filp; + struct pci_dev *pci_dev = cd->pci_dev; + struct genwqe_ddcb_cmd *cmd; + + if ((load->size & 0x3) != 0) + return -EINVAL; + + if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0) + return -EINVAL; + + /* FIXME Bits have changed for new service layer! */ + switch ((char)load->partition) { + case '0': + cmdopts = 0x12; + break; /* upload/part_0 */ + case '1': + cmdopts = 0x1A; + break; /* upload/part_1 */ + case 'v': + cmdopts = 0x0A; + break; /* upload/vpd */ + default: + return -EINVAL; + } + + buf = (u8 __user *)load->data_addr; + xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr); + if (xbuf == NULL) + return -ENOMEM; + + blocks_to_flash = load->size / FLASH_BLOCK; + while (load->size) { + /* + * We must be 4 byte aligned. Buffer must be 0 appened + * to have defined values when calculating CRC. + */ + tocopy = min_t(size_t, load->size, FLASH_BLOCK); + + dev_dbg(&pci_dev->dev, + "[%s] DMA: %lx SZ: %ld %d\n", + __func__, (unsigned long)dma_addr, tocopy, + blocks_to_flash); + + /* prepare DDCB for SLU process */ + cmd = ddcb_requ_alloc(); + if (cmd == NULL) { + rc = -ENOMEM; + goto free_buffer; + } + cmd->cmd = SLCMD_MOVE_FLASH; + cmd->cmdopts = cmdopts; + + /* prepare invariant values */ + if (genwqe_get_slu_id(cd) <= 0x2) { + *(__be64 *)&cmd->__asiv[0] = cpu_to_be64(dma_addr); + *(__be64 *)&cmd->__asiv[8] = cpu_to_be64(tocopy); + *(__be64 *)&cmd->__asiv[16] = cpu_to_be64(flash); + *(__be32 *)&cmd->__asiv[24] = cpu_to_be32(0); + cmd->__asiv[24] = load->uid; + *(__be32 *)&cmd->__asiv[28] = cpu_to_be32(0) /* CRC */; + cmd->asiv_length = 32; /* bytes included in crc calc */ + } else { /* setup DDCB for ATS architecture */ + *(__be64 *)&cmd->asiv[0] = cpu_to_be64(dma_addr); + *(__be32 *)&cmd->asiv[8] = cpu_to_be32(tocopy); + *(__be32 *)&cmd->asiv[12] = cpu_to_be32(0); /* resvd */ + *(__be64 *)&cmd->asiv[16] = cpu_to_be64(flash); + *(__be32 *)&cmd->asiv[24] = cpu_to_be32(load->uid<<24); + *(__be32 *)&cmd->asiv[28] = cpu_to_be32(0); /* CRC */ + + /* rd/wr */ + cmd->ats = 0x5ULL << 44; + cmd->asiv_length = 40; /* bytes included in crc calc */ + } + cmd->asv_length = 8; + + /* we only get back the calculated CRC */ + *(u64 *)&cmd->asv[0] = 0ULL; /* 0x80 */ + + rc = __genwqe_execute_raw_ddcb(cd, cmd, filp->f_flags); + + load->retc = cmd->retc; + load->attn = cmd->attn; + load->progress = cmd->progress; + + if ((rc < 0) && (rc != -EBADMSG)) { + ddcb_requ_free(cmd); + goto free_buffer; + } + + rc = copy_to_user(buf, xbuf, tocopy); + if (rc) { + rc = -EFAULT; + ddcb_requ_free(cmd); + goto free_buffer; + } + + /* We know that we can get retc 0x104 with CRC err */ + if (((cmd->retc == DDCB_RETC_FAULT) && + (cmd->attn != 0x02)) || /* Normally ignore CRC error */ + ((cmd->retc == DDCB_RETC_COMPLETE) && + (cmd->attn != 0x00))) { /* Everything was fine */ + rc = -EIO; + ddcb_requ_free(cmd); + goto free_buffer; + } + + load->size -= tocopy; + flash += tocopy; + buf += tocopy; + blocks_to_flash--; + ddcb_requ_free(cmd); + } + rc = 0; + + free_buffer: + __genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr); + return rc; +} + +static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m) +{ + int rc; + struct genwqe_dev *cd = cfile->cd; + struct pci_dev *pci_dev = cfile->cd->pci_dev; + struct dma_mapping *dma_map; + unsigned long map_addr; + unsigned long map_size; + + if ((m->addr == 0x0) || (m->size == 0)) + return -EINVAL; + if (m->size > ULONG_MAX - PAGE_SIZE - (m->addr & ~PAGE_MASK)) + return -EINVAL; + + map_addr = (m->addr & PAGE_MASK); + map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE); + + dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL); + if (dma_map == NULL) + return -ENOMEM; + + genwqe_mapping_init(dma_map, GENWQE_MAPPING_SGL_PINNED); + rc = genwqe_user_vmap(cd, dma_map, (void *)map_addr, map_size); + if (rc != 0) { + dev_err(&pci_dev->dev, + "[%s] genwqe_user_vmap rc=%d\n", __func__, rc); + kfree(dma_map); + return rc; + } + + genwqe_add_pin(cfile, dma_map); + return 0; +} + +static int genwqe_unpin_mem(struct genwqe_file *cfile, struct genwqe_mem *m) +{ + struct genwqe_dev *cd = cfile->cd; + struct dma_mapping *dma_map; + unsigned long map_addr; + unsigned long map_size; + + if (m->addr == 0x0) + return -EINVAL; + + map_addr = (m->addr & PAGE_MASK); + map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE); + + dma_map = genwqe_search_pin(cfile, map_addr, map_size, NULL); + if (dma_map == NULL) + return -ENOENT; + + genwqe_del_pin(cfile, dma_map); + genwqe_user_vunmap(cd, dma_map); + kfree(dma_map); + return 0; +} + +/** + * ddcb_cmd_cleanup() - Remove dynamically created fixup entries + * + * Only if there are any. Pinnings are not removed. + */ +static int ddcb_cmd_cleanup(struct genwqe_file *cfile, struct ddcb_requ *req) +{ + unsigned int i; + struct dma_mapping *dma_map; + struct genwqe_dev *cd = cfile->cd; + + for (i = 0; i < DDCB_FIXUPS; i++) { + dma_map = &req->dma_mappings[i]; + + if (dma_mapping_used(dma_map)) { + __genwqe_del_mapping(cfile, dma_map); + genwqe_user_vunmap(cd, dma_map); + } + if (req->sgls[i].sgl != NULL) + genwqe_free_sync_sgl(cd, &req->sgls[i]); + } + return 0; +} + +/** + * ddcb_cmd_fixups() - Establish DMA fixups/sglists for user memory references + * + * Before the DDCB gets executed we need to handle the fixups. We + * replace the user-space addresses with DMA addresses or do + * additional setup work e.g. generating a scatter-gather list which + * is used to describe the memory referred to in the fixup. + */ +static int ddcb_cmd_fixups(struct genwqe_file *cfile, struct ddcb_requ *req) +{ + int rc; + unsigned int asiv_offs, i; + struct genwqe_dev *cd = cfile->cd; + struct genwqe_ddcb_cmd *cmd = &req->cmd; + struct dma_mapping *m; + + for (i = 0, asiv_offs = 0x00; asiv_offs <= 0x58; + i++, asiv_offs += 0x08) { + + u64 u_addr; + dma_addr_t d_addr; + u32 u_size = 0; + u64 ats_flags; + + ats_flags = ATS_GET_FLAGS(cmd->ats, asiv_offs); + + switch (ats_flags) { + + case ATS_TYPE_DATA: + break; /* nothing to do here */ + + case ATS_TYPE_FLAT_RDWR: + case ATS_TYPE_FLAT_RD: { + u_addr = be64_to_cpu(*((__be64 *)&cmd-> + asiv[asiv_offs])); + u_size = be32_to_cpu(*((__be32 *)&cmd-> + asiv[asiv_offs + 0x08])); + + /* + * No data available. Ignore u_addr in this + * case and set addr to 0. Hardware must not + * fetch the buffer. + */ + if (u_size == 0x0) { + *((__be64 *)&cmd->asiv[asiv_offs]) = + cpu_to_be64(0x0); + break; + } + + m = __genwqe_search_mapping(cfile, u_addr, u_size, + &d_addr, NULL); + if (m == NULL) { + rc = -EFAULT; + goto err_out; + } + + *((__be64 *)&cmd->asiv[asiv_offs]) = + cpu_to_be64(d_addr); + break; + } + + case ATS_TYPE_SGL_RDWR: + case ATS_TYPE_SGL_RD: { + int page_offs; + + u_addr = be64_to_cpu(*((__be64 *) + &cmd->asiv[asiv_offs])); + u_size = be32_to_cpu(*((__be32 *) + &cmd->asiv[asiv_offs + 0x08])); + + /* + * No data available. Ignore u_addr in this + * case and set addr to 0. Hardware must not + * fetch the empty sgl. + */ + if (u_size == 0x0) { + *((__be64 *)&cmd->asiv[asiv_offs]) = + cpu_to_be64(0x0); + break; + } + + m = genwqe_search_pin(cfile, u_addr, u_size, NULL); + if (m != NULL) { + page_offs = (u_addr - + (u64)m->u_vaddr)/PAGE_SIZE; + } else { + m = &req->dma_mappings[i]; + + genwqe_mapping_init(m, + GENWQE_MAPPING_SGL_TEMP); + + if (ats_flags == ATS_TYPE_SGL_RD) + m->write = 0; + + rc = genwqe_user_vmap(cd, m, (void *)u_addr, + u_size); + if (rc != 0) + goto err_out; + + __genwqe_add_mapping(cfile, m); + page_offs = 0; + } + + /* create genwqe style scatter gather list */ + rc = genwqe_alloc_sync_sgl(cd, &req->sgls[i], + (void __user *)u_addr, + u_size, m->write); + if (rc != 0) + goto err_out; + + genwqe_setup_sgl(cd, &req->sgls[i], + &m->dma_list[page_offs]); + + *((__be64 *)&cmd->asiv[asiv_offs]) = + cpu_to_be64(req->sgls[i].sgl_dma_addr); + + break; + } + default: + rc = -EINVAL; + goto err_out; + } + } + return 0; + + err_out: + ddcb_cmd_cleanup(cfile, req); + return rc; +} + +/** + * genwqe_execute_ddcb() - Execute DDCB using userspace address fixups + * + * The code will build up the translation tables or lookup the + * contignous memory allocation table to find the right translations + * and DMA addresses. + */ +static int genwqe_execute_ddcb(struct genwqe_file *cfile, + struct genwqe_ddcb_cmd *cmd) +{ + int rc; + struct genwqe_dev *cd = cfile->cd; + struct file *filp = cfile->filp; + struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd); + + rc = ddcb_cmd_fixups(cfile, req); + if (rc != 0) + return rc; + + rc = __genwqe_execute_raw_ddcb(cd, cmd, filp->f_flags); + ddcb_cmd_cleanup(cfile, req); + return rc; +} + +static int do_execute_ddcb(struct genwqe_file *cfile, + unsigned long arg, int raw) +{ + int rc; + struct genwqe_ddcb_cmd *cmd; + struct genwqe_dev *cd = cfile->cd; + struct file *filp = cfile->filp; + + cmd = ddcb_requ_alloc(); + if (cmd == NULL) + return -ENOMEM; + + if (copy_from_user(cmd, (void __user *)arg, sizeof(*cmd))) { + ddcb_requ_free(cmd); + return -EFAULT; + } + + if (!raw) + rc = genwqe_execute_ddcb(cfile, cmd); + else + rc = __genwqe_execute_raw_ddcb(cd, cmd, filp->f_flags); + + /* Copy back only the modifed fields. Do not copy ASIV + back since the copy got modified by the driver. */ + if (copy_to_user((void __user *)arg, cmd, + sizeof(*cmd) - DDCB_ASIV_LENGTH)) { + ddcb_requ_free(cmd); + return -EFAULT; + } + + ddcb_requ_free(cmd); + return rc; +} + +/** + * genwqe_ioctl() - IO control + * @filp: file handle + * @cmd: command identifier (passed from user) + * @arg: argument (passed from user) + * + * Return: 0 success + */ +static long genwqe_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + int rc = 0; + struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data; + struct genwqe_dev *cd = cfile->cd; + struct pci_dev *pci_dev = cd->pci_dev; + struct genwqe_reg_io __user *io; + u64 val; + u32 reg_offs; + + /* Return -EIO if card hit EEH */ + if (pci_channel_offline(pci_dev)) + return -EIO; + + if (_IOC_TYPE(cmd) != GENWQE_IOC_CODE) + return -EINVAL; + + switch (cmd) { + + case GENWQE_GET_CARD_STATE: + put_user(cd->card_state, (enum genwqe_card_state __user *)arg); + return 0; + + /* Register access */ + case GENWQE_READ_REG64: { + io = (struct genwqe_reg_io __user *)arg; + + if (get_user(reg_offs, &io->num)) + return -EFAULT; + + if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7)) + return -EINVAL; + + val = __genwqe_readq(cd, reg_offs); + put_user(val, &io->val64); + return 0; + } + + case GENWQE_WRITE_REG64: { + io = (struct genwqe_reg_io __user *)arg; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((filp->f_flags & O_ACCMODE) == O_RDONLY) + return -EPERM; + + if (get_user(reg_offs, &io->num)) + return -EFAULT; + + if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7)) + return -EINVAL; + + if (get_user(val, &io->val64)) + return -EFAULT; + + __genwqe_writeq(cd, reg_offs, val); + return 0; + } + + case GENWQE_READ_REG32: { + io = (struct genwqe_reg_io __user *)arg; + + if (get_user(reg_offs, &io->num)) + return -EFAULT; + + if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3)) + return -EINVAL; + + val = __genwqe_readl(cd, reg_offs); + put_user(val, &io->val64); + return 0; + } + + case GENWQE_WRITE_REG32: { + io = (struct genwqe_reg_io __user *)arg; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((filp->f_flags & O_ACCMODE) == O_RDONLY) + return -EPERM; + + if (get_user(reg_offs, &io->num)) + return -EFAULT; + + if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3)) + return -EINVAL; + + if (get_user(val, &io->val64)) + return -EFAULT; + + __genwqe_writel(cd, reg_offs, val); + return 0; + } + + /* Flash update/reading */ + case GENWQE_SLU_UPDATE: { + struct genwqe_bitstream load; + + if (!genwqe_is_privileged(cd)) + return -EPERM; + + if ((filp->f_flags & O_ACCMODE) == O_RDONLY) + return -EPERM; + + if (copy_from_user(&load, (void __user *)arg, + sizeof(load))) + return -EFAULT; + + rc = do_flash_update(cfile, &load); + + if (copy_to_user((void __user *)arg, &load, sizeof(load))) + return -EFAULT; + + return rc; + } + + case GENWQE_SLU_READ: { + struct genwqe_bitstream load; + + if (!genwqe_is_privileged(cd)) + return -EPERM; + + if (genwqe_flash_readback_fails(cd)) + return -ENOSPC; /* known to fail for old versions */ + + if (copy_from_user(&load, (void __user *)arg, sizeof(load))) + return -EFAULT; + + rc = do_flash_read(cfile, &load); + + if (copy_to_user((void __user *)arg, &load, sizeof(load))) + return -EFAULT; + + return rc; + } + + /* memory pinning and unpinning */ + case GENWQE_PIN_MEM: { + struct genwqe_mem m; + + if (copy_from_user(&m, (void __user *)arg, sizeof(m))) + return -EFAULT; + + return genwqe_pin_mem(cfile, &m); + } + + case GENWQE_UNPIN_MEM: { + struct genwqe_mem m; + + if (copy_from_user(&m, (void __user *)arg, sizeof(m))) + return -EFAULT; + + return genwqe_unpin_mem(cfile, &m); + } + + /* launch an DDCB and wait for completion */ + case GENWQE_EXECUTE_DDCB: + return do_execute_ddcb(cfile, arg, 0); + + case GENWQE_EXECUTE_RAW_DDCB: { + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return do_execute_ddcb(cfile, arg, 1); + } + + default: + return -EINVAL; + } + + return rc; +} + +#if defined(CONFIG_COMPAT) +/** + * genwqe_compat_ioctl() - Compatibility ioctl + * + * Called whenever a 32-bit process running under a 64-bit kernel + * performs an ioctl on /dev/genwqe_card. + * + * @filp: file pointer. + * @cmd: command. + * @arg: user argument. + * Return: zero on success or negative number on failure. + */ +static long genwqe_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + return genwqe_ioctl(filp, cmd, arg); +} +#endif /* defined(CONFIG_COMPAT) */ + +static const struct file_operations genwqe_fops = { + .owner = THIS_MODULE, + .open = genwqe_open, + .fasync = genwqe_fasync, + .mmap = genwqe_mmap, + .unlocked_ioctl = genwqe_ioctl, +#if defined(CONFIG_COMPAT) + .compat_ioctl = genwqe_compat_ioctl, +#endif + .release = genwqe_release, +}; + +static int genwqe_device_initialized(struct genwqe_dev *cd) +{ + return cd->dev != NULL; +} + +/** + * genwqe_device_create() - Create and configure genwqe char device + * @cd: genwqe device descriptor + * + * This function must be called before we create any more genwqe + * character devices, because it is allocating the major and minor + * number which are supposed to be used by the client drivers. + */ +int genwqe_device_create(struct genwqe_dev *cd) +{ + int rc; + struct pci_dev *pci_dev = cd->pci_dev; + + /* + * Here starts the individual setup per client. It must + * initialize its own cdev data structure with its own fops. + * The appropriate devnum needs to be created. The ranges must + * not overlap. + */ + rc = alloc_chrdev_region(&cd->devnum_genwqe, 0, + GENWQE_MAX_MINOR, GENWQE_DEVNAME); + if (rc < 0) { + dev_err(&pci_dev->dev, "err: alloc_chrdev_region failed\n"); + goto err_dev; + } + + cdev_init(&cd->cdev_genwqe, &genwqe_fops); + cd->cdev_genwqe.owner = THIS_MODULE; + + rc = cdev_add(&cd->cdev_genwqe, cd->devnum_genwqe, 1); + if (rc < 0) { + dev_err(&pci_dev->dev, "err: cdev_add failed\n"); + goto err_add; + } + + /* + * Finally the device in /dev/... must be created. The rule is + * to use card%d_clientname for each created device. + */ + cd->dev = device_create_with_groups(cd->class_genwqe, + &cd->pci_dev->dev, + cd->devnum_genwqe, cd, + genwqe_attribute_groups, + GENWQE_DEVNAME "%u_card", + cd->card_idx); + if (IS_ERR(cd->dev)) { + rc = PTR_ERR(cd->dev); + goto err_cdev; + } + + rc = genwqe_init_debugfs(cd); + if (rc != 0) + goto err_debugfs; + + return 0; + + err_debugfs: + device_destroy(cd->class_genwqe, cd->devnum_genwqe); + err_cdev: + cdev_del(&cd->cdev_genwqe); + err_add: + unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR); + err_dev: + cd->dev = NULL; + return rc; +} + +static int genwqe_inform_and_stop_processes(struct genwqe_dev *cd) +{ + int rc; + unsigned int i; + struct pci_dev *pci_dev = cd->pci_dev; + + if (!genwqe_open_files(cd)) + return 0; + + dev_warn(&pci_dev->dev, "[%s] send SIGIO and wait ...\n", __func__); + + rc = genwqe_kill_fasync(cd, SIGIO); + if (rc > 0) { + /* give kill_timeout seconds to close file descriptors ... */ + for (i = 0; (i < GENWQE_KILL_TIMEOUT) && + genwqe_open_files(cd); i++) { + dev_info(&pci_dev->dev, " %d sec ...", i); + + cond_resched(); + msleep(1000); + } + + /* if no open files we can safely continue, else ... */ + if (!genwqe_open_files(cd)) + return 0; + + dev_warn(&pci_dev->dev, + "[%s] send SIGKILL and wait ...\n", __func__); + + rc = genwqe_terminate(cd); + if (rc) { + /* Give kill_timout more seconds to end processes */ + for (i = 0; (i < GENWQE_KILL_TIMEOUT) && + genwqe_open_files(cd); i++) { + dev_warn(&pci_dev->dev, " %d sec ...", i); + + cond_resched(); + msleep(1000); + } + } + } + return 0; +} + +/** + * genwqe_device_remove() - Remove genwqe's char device + * + * This function must be called after the client devices are removed + * because it will free the major/minor number range for the genwqe + * drivers. + * + * This function must be robust enough to be called twice. + */ +int genwqe_device_remove(struct genwqe_dev *cd) +{ + int rc; + struct pci_dev *pci_dev = cd->pci_dev; + + if (!genwqe_device_initialized(cd)) + return 1; + + genwqe_inform_and_stop_processes(cd); + + /* + * We currently do wait until all filedescriptors are + * closed. This leads to a problem when we abort the + * application which will decrease this reference from + * 1/unused to 0/illegal and not from 2/used 1/empty. + */ + rc = kref_read(&cd->cdev_genwqe.kobj.kref); + if (rc != 1) { + dev_err(&pci_dev->dev, + "[%s] err: cdev_genwqe...refcount=%d\n", __func__, rc); + panic("Fatal err: cannot free resources with pending references!"); + } + + genqwe_exit_debugfs(cd); + device_destroy(cd->class_genwqe, cd->devnum_genwqe); + cdev_del(&cd->cdev_genwqe); + unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR); + cd->dev = NULL; + + return 0; +} diff --git a/drivers/misc/genwqe/card_sysfs.c b/drivers/misc/genwqe/card_sysfs.c new file mode 100644 index 000000000..c24c9b7c1 --- /dev/null +++ b/drivers/misc/genwqe/card_sysfs.c @@ -0,0 +1,303 @@ +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp + * Author: Joerg-Stephan Vogt + * Author: Michael Jung + * Author: Michael Ruettger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * Sysfs interfaces for the GenWQE card. There are attributes to query + * the version of the bitstream as well as some for the driver. For + * debugging, please also see the debugfs interfaces of this driver. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "card_base.h" +#include "card_ddcb.h" + +static const char * const genwqe_types[] = { + [GENWQE_TYPE_ALTERA_230] = "GenWQE4-230", + [GENWQE_TYPE_ALTERA_530] = "GenWQE4-530", + [GENWQE_TYPE_ALTERA_A4] = "GenWQE5-A4", + [GENWQE_TYPE_ALTERA_A7] = "GenWQE5-A7", +}; + +static ssize_t status_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct genwqe_dev *cd = dev_get_drvdata(dev); + const char *cs[GENWQE_CARD_STATE_MAX] = { "unused", "used", "error" }; + + return sprintf(buf, "%s\n", cs[cd->card_state]); +} +static DEVICE_ATTR_RO(status); + +static ssize_t appid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + char app_name[5]; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + genwqe_read_app_id(cd, app_name, sizeof(app_name)); + return sprintf(buf, "%s\n", app_name); +} +static DEVICE_ATTR_RO(appid); + +static ssize_t version_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + u64 slu_id, app_id; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + slu_id = __genwqe_readq(cd, IO_SLU_UNITCFG); + app_id = __genwqe_readq(cd, IO_APP_UNITCFG); + + return sprintf(buf, "%016llx.%016llx\n", slu_id, app_id); +} +static DEVICE_ATTR_RO(version); + +static ssize_t type_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + u8 card_type; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + card_type = genwqe_card_type(cd); + return sprintf(buf, "%s\n", (card_type >= ARRAY_SIZE(genwqe_types)) ? + "invalid" : genwqe_types[card_type]); +} +static DEVICE_ATTR_RO(type); + +static ssize_t tempsens_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + u64 tempsens; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + tempsens = __genwqe_readq(cd, IO_SLU_TEMPERATURE_SENSOR); + return sprintf(buf, "%016llx\n", tempsens); +} +static DEVICE_ATTR_RO(tempsens); + +static ssize_t freerunning_timer_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u64 t; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + t = __genwqe_readq(cd, IO_SLC_FREE_RUNNING_TIMER); + return sprintf(buf, "%016llx\n", t); +} +static DEVICE_ATTR_RO(freerunning_timer); + +static ssize_t queue_working_time_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u64 t; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + t = __genwqe_readq(cd, IO_SLC_QUEUE_WTIME); + return sprintf(buf, "%016llx\n", t); +} +static DEVICE_ATTR_RO(queue_working_time); + +static ssize_t base_clock_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + u64 base_clock; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + base_clock = genwqe_base_clock_frequency(cd); + return sprintf(buf, "%lld\n", base_clock); +} +static DEVICE_ATTR_RO(base_clock); + +/** + * curr_bitstream_show() - Show the current bitstream id + * + * There is a bug in some old versions of the CPLD which selects the + * bitstream, which causes the IO_SLU_BITSTREAM register to report + * unreliable data in very rare cases. This makes this sysfs + * unreliable up to the point were a new CPLD version is being used. + * + * Unfortunately there is no automatic way yet to query the CPLD + * version, such that you need to manually ensure via programming + * tools that you have a recent version of the CPLD software. + * + * The proposed circumvention is to use a special recovery bitstream + * on the backup partition (0) to identify problems while loading the + * image. + */ +static ssize_t curr_bitstream_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int curr_bitstream; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + curr_bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1; + return sprintf(buf, "%d\n", curr_bitstream); +} +static DEVICE_ATTR_RO(curr_bitstream); + +/** + * next_bitstream_show() - Show the next activated bitstream + * + * IO_SLC_CFGREG_SOFTRESET: This register can only be accessed by the PF. + */ +static ssize_t next_bitstream_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int next_bitstream; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + switch ((cd->softreset & 0xc) >> 2) { + case 0x2: + next_bitstream = 0; + break; + case 0x3: + next_bitstream = 1; + break; + default: + next_bitstream = -1; + break; /* error */ + } + return sprintf(buf, "%d\n", next_bitstream); +} + +static ssize_t next_bitstream_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int partition; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + if (kstrtoint(buf, 0, &partition) < 0) + return -EINVAL; + + switch (partition) { + case 0x0: + cd->softreset = 0x78; + break; + case 0x1: + cd->softreset = 0x7c; + break; + default: + return -EINVAL; + } + + __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset); + return count; +} +static DEVICE_ATTR_RW(next_bitstream); + +static ssize_t reload_bitstream_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int reload; + struct genwqe_dev *cd = dev_get_drvdata(dev); + + if (kstrtoint(buf, 0, &reload) < 0) + return -EINVAL; + + if (reload == 0x1) { + if (cd->card_state == GENWQE_CARD_UNUSED || + cd->card_state == GENWQE_CARD_USED) + cd->card_state = GENWQE_CARD_RELOAD_BITSTREAM; + else + return -EIO; + } else { + return -EINVAL; + } + + return count; +} +static DEVICE_ATTR_WO(reload_bitstream); + +/* + * Create device_attribute structures / params: name, mode, show, store + * additional flag if valid in VF + */ +static struct attribute *genwqe_attributes[] = { + &dev_attr_tempsens.attr, + &dev_attr_next_bitstream.attr, + &dev_attr_curr_bitstream.attr, + &dev_attr_base_clock.attr, + &dev_attr_type.attr, + &dev_attr_version.attr, + &dev_attr_appid.attr, + &dev_attr_status.attr, + &dev_attr_freerunning_timer.attr, + &dev_attr_queue_working_time.attr, + &dev_attr_reload_bitstream.attr, + NULL, +}; + +static struct attribute *genwqe_normal_attributes[] = { + &dev_attr_type.attr, + &dev_attr_version.attr, + &dev_attr_appid.attr, + &dev_attr_status.attr, + &dev_attr_freerunning_timer.attr, + &dev_attr_queue_working_time.attr, + NULL, +}; + +/** + * genwqe_is_visible() - Determine if sysfs attribute should be visible or not + * + * VFs have restricted mmio capabilities, so not all sysfs entries + * are allowed in VFs. + */ +static umode_t genwqe_is_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + unsigned int j; + struct device *dev = kobj_to_dev(kobj); + struct genwqe_dev *cd = dev_get_drvdata(dev); + umode_t mode = attr->mode; + + if (genwqe_is_privileged(cd)) + return mode; + + for (j = 0; genwqe_normal_attributes[j] != NULL; j++) + if (genwqe_normal_attributes[j] == attr) + return mode; + + return 0; +} + +static struct attribute_group genwqe_attribute_group = { + .is_visible = genwqe_is_visible, + .attrs = genwqe_attributes, +}; + +const struct attribute_group *genwqe_attribute_groups[] = { + &genwqe_attribute_group, + NULL, +}; diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c new file mode 100644 index 000000000..22301bba8 --- /dev/null +++ b/drivers/misc/genwqe/card_utils.c @@ -0,0 +1,1061 @@ +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp + * Author: Joerg-Stephan Vogt + * Author: Michael Jung + * Author: Michael Ruettger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* + * Miscelanous functionality used in the other GenWQE driver parts. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "genwqe_driver.h" +#include "card_base.h" +#include "card_ddcb.h" + +/** + * __genwqe_writeq() - Write 64-bit register + * @cd: genwqe device descriptor + * @byte_offs: byte offset within BAR + * @val: 64-bit value + * + * Return: 0 if success; < 0 if error + */ +int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val) +{ + struct pci_dev *pci_dev = cd->pci_dev; + + if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) + return -EIO; + + if (cd->mmio == NULL) + return -EIO; + + if (pci_channel_offline(pci_dev)) + return -EIO; + + __raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs); + return 0; +} + +/** + * __genwqe_readq() - Read 64-bit register + * @cd: genwqe device descriptor + * @byte_offs: offset within BAR + * + * Return: value from register + */ +u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs) +{ + if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) + return 0xffffffffffffffffull; + + if ((cd->err_inject & GENWQE_INJECT_GFIR_FATAL) && + (byte_offs == IO_SLC_CFGREG_GFIR)) + return 0x000000000000ffffull; + + if ((cd->err_inject & GENWQE_INJECT_GFIR_INFO) && + (byte_offs == IO_SLC_CFGREG_GFIR)) + return 0x00000000ffff0000ull; + + if (cd->mmio == NULL) + return 0xffffffffffffffffull; + + return be64_to_cpu((__force __be64)__raw_readq(cd->mmio + byte_offs)); +} + +/** + * __genwqe_writel() - Write 32-bit register + * @cd: genwqe device descriptor + * @byte_offs: byte offset within BAR + * @val: 32-bit value + * + * Return: 0 if success; < 0 if error + */ +int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val) +{ + struct pci_dev *pci_dev = cd->pci_dev; + + if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) + return -EIO; + + if (cd->mmio == NULL) + return -EIO; + + if (pci_channel_offline(pci_dev)) + return -EIO; + + __raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs); + return 0; +} + +/** + * __genwqe_readl() - Read 32-bit register + * @cd: genwqe device descriptor + * @byte_offs: offset within BAR + * + * Return: Value from register + */ +u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs) +{ + if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) + return 0xffffffff; + + if (cd->mmio == NULL) + return 0xffffffff; + + return be32_to_cpu((__force __be32)__raw_readl(cd->mmio + byte_offs)); +} + +/** + * genwqe_read_app_id() - Extract app_id + * + * app_unitcfg need to be filled with valid data first + */ +int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len) +{ + int i, j; + u32 app_id = (u32)cd->app_unitcfg; + + memset(app_name, 0, len); + for (i = 0, j = 0; j < min(len, 4); j++) { + char ch = (char)((app_id >> (24 - j*8)) & 0xff); + + if (ch == ' ') + continue; + app_name[i++] = isprint(ch) ? ch : 'X'; + } + return i; +} + +/** + * genwqe_init_crc32() - Prepare a lookup table for fast crc32 calculations + * + * Existing kernel functions seem to use a different polynom, + * therefore we could not use them here. + * + * Genwqe's Polynomial = 0x20044009 + */ +#define CRC32_POLYNOMIAL 0x20044009 +static u32 crc32_tab[256]; /* crc32 lookup table */ + +void genwqe_init_crc32(void) +{ + int i, j; + u32 crc; + + for (i = 0; i < 256; i++) { + crc = i << 24; + for (j = 0; j < 8; j++) { + if (crc & 0x80000000) + crc = (crc << 1) ^ CRC32_POLYNOMIAL; + else + crc = (crc << 1); + } + crc32_tab[i] = crc; + } +} + +/** + * genwqe_crc32() - Generate 32-bit crc as required for DDCBs + * @buff: pointer to data buffer + * @len: length of data for calculation + * @init: initial crc (0xffffffff at start) + * + * polynomial = x^32 * + x^29 + x^18 + x^14 + x^3 + 1 (0x20044009) + + * Example: 4 bytes 0x01 0x02 0x03 0x04 with init=0xffffffff should + * result in a crc32 of 0xf33cb7d3. + * + * The existing kernel crc functions did not cover this polynom yet. + * + * Return: crc32 checksum. + */ +u32 genwqe_crc32(u8 *buff, size_t len, u32 init) +{ + int i; + u32 crc; + + crc = init; + while (len--) { + i = ((crc >> 24) ^ *buff++) & 0xFF; + crc = (crc << 8) ^ crc32_tab[i]; + } + return crc; +} + +void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, + dma_addr_t *dma_handle) +{ + if (get_order(size) >= MAX_ORDER) + return NULL; + + return dma_zalloc_coherent(&cd->pci_dev->dev, size, dma_handle, + GFP_KERNEL); +} + +void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + if (vaddr == NULL) + return; + + dma_free_coherent(&cd->pci_dev->dev, size, vaddr, dma_handle); +} + +static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list, + int num_pages) +{ + int i; + struct pci_dev *pci_dev = cd->pci_dev; + + for (i = 0; (i < num_pages) && (dma_list[i] != 0x0); i++) { + pci_unmap_page(pci_dev, dma_list[i], + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + dma_list[i] = 0x0; + } +} + +static int genwqe_map_pages(struct genwqe_dev *cd, + struct page **page_list, int num_pages, + dma_addr_t *dma_list) +{ + int i; + struct pci_dev *pci_dev = cd->pci_dev; + + /* establish DMA mapping for requested pages */ + for (i = 0; i < num_pages; i++) { + dma_addr_t daddr; + + dma_list[i] = 0x0; + daddr = pci_map_page(pci_dev, page_list[i], + 0, /* map_offs */ + PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); /* FIXME rd/rw */ + + if (pci_dma_mapping_error(pci_dev, daddr)) { + dev_err(&pci_dev->dev, + "[%s] err: no dma addr daddr=%016llx!\n", + __func__, (long long)daddr); + goto err; + } + + dma_list[i] = daddr; + } + return 0; + + err: + genwqe_unmap_pages(cd, dma_list, num_pages); + return -EIO; +} + +static int genwqe_sgl_size(int num_pages) +{ + int len, num_tlb = num_pages / 7; + + len = sizeof(struct sg_entry) * (num_pages+num_tlb + 1); + return roundup(len, PAGE_SIZE); +} + +/** + * genwqe_alloc_sync_sgl() - Allocate memory for sgl and overlapping pages + * + * Allocates memory for sgl and overlapping pages. Pages which might + * overlap other user-space memory blocks are being cached for DMAs, + * such that we do not run into syncronization issues. Data is copied + * from user-space into the cached pages. + */ +int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, + void __user *user_addr, size_t user_size, int write) +{ + int ret = -ENOMEM; + struct pci_dev *pci_dev = cd->pci_dev; + + sgl->fpage_offs = offset_in_page((unsigned long)user_addr); + sgl->fpage_size = min_t(size_t, PAGE_SIZE-sgl->fpage_offs, user_size); + sgl->nr_pages = DIV_ROUND_UP(sgl->fpage_offs + user_size, PAGE_SIZE); + sgl->lpage_size = (user_size - sgl->fpage_size) % PAGE_SIZE; + + dev_dbg(&pci_dev->dev, "[%s] uaddr=%p usize=%8ld nr_pages=%ld fpage_offs=%lx fpage_size=%ld lpage_size=%ld\n", + __func__, user_addr, user_size, sgl->nr_pages, + sgl->fpage_offs, sgl->fpage_size, sgl->lpage_size); + + sgl->user_addr = user_addr; + sgl->user_size = user_size; + sgl->write = write; + sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages); + + if (get_order(sgl->sgl_size) > MAX_ORDER) { + dev_err(&pci_dev->dev, + "[%s] err: too much memory requested!\n", __func__); + return ret; + } + + sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size, + &sgl->sgl_dma_addr); + if (sgl->sgl == NULL) { + dev_err(&pci_dev->dev, + "[%s] err: no memory available!\n", __func__); + return ret; + } + + /* Only use buffering on incomplete pages */ + if ((sgl->fpage_size != 0) && (sgl->fpage_size != PAGE_SIZE)) { + sgl->fpage = __genwqe_alloc_consistent(cd, PAGE_SIZE, + &sgl->fpage_dma_addr); + if (sgl->fpage == NULL) + goto err_out; + + /* Sync with user memory */ + if (copy_from_user(sgl->fpage + sgl->fpage_offs, + user_addr, sgl->fpage_size)) { + ret = -EFAULT; + goto err_out; + } + } + if (sgl->lpage_size != 0) { + sgl->lpage = __genwqe_alloc_consistent(cd, PAGE_SIZE, + &sgl->lpage_dma_addr); + if (sgl->lpage == NULL) + goto err_out1; + + /* Sync with user memory */ + if (copy_from_user(sgl->lpage, user_addr + user_size - + sgl->lpage_size, sgl->lpage_size)) { + ret = -EFAULT; + goto err_out2; + } + } + return 0; + + err_out2: + __genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage, + sgl->lpage_dma_addr); + sgl->lpage = NULL; + sgl->lpage_dma_addr = 0; + err_out1: + __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage, + sgl->fpage_dma_addr); + sgl->fpage = NULL; + sgl->fpage_dma_addr = 0; + err_out: + __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl, + sgl->sgl_dma_addr); + sgl->sgl = NULL; + sgl->sgl_dma_addr = 0; + sgl->sgl_size = 0; + + return ret; +} + +int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, + dma_addr_t *dma_list) +{ + int i = 0, j = 0, p; + unsigned long dma_offs, map_offs; + dma_addr_t prev_daddr = 0; + struct sg_entry *s, *last_s = NULL; + size_t size = sgl->user_size; + + dma_offs = 128; /* next block if needed/dma_offset */ + map_offs = sgl->fpage_offs; /* offset in first page */ + + s = &sgl->sgl[0]; /* first set of 8 entries */ + p = 0; /* page */ + while (p < sgl->nr_pages) { + dma_addr_t daddr; + unsigned int size_to_map; + + /* always write the chaining entry, cleanup is done later */ + j = 0; + s[j].target_addr = cpu_to_be64(sgl->sgl_dma_addr + dma_offs); + s[j].len = cpu_to_be32(128); + s[j].flags = cpu_to_be32(SG_CHAINED); + j++; + + while (j < 8) { + /* DMA mapping for requested page, offs, size */ + size_to_map = min(size, PAGE_SIZE - map_offs); + + if ((p == 0) && (sgl->fpage != NULL)) { + daddr = sgl->fpage_dma_addr + map_offs; + + } else if ((p == sgl->nr_pages - 1) && + (sgl->lpage != NULL)) { + daddr = sgl->lpage_dma_addr; + } else { + daddr = dma_list[p] + map_offs; + } + + size -= size_to_map; + map_offs = 0; + + if (prev_daddr == daddr) { + u32 prev_len = be32_to_cpu(last_s->len); + + /* pr_info("daddr combining: " + "%016llx/%08x -> %016llx\n", + prev_daddr, prev_len, daddr); */ + + last_s->len = cpu_to_be32(prev_len + + size_to_map); + + p++; /* process next page */ + if (p == sgl->nr_pages) + goto fixup; /* nothing to do */ + + prev_daddr = daddr + size_to_map; + continue; + } + + /* start new entry */ + s[j].target_addr = cpu_to_be64(daddr); + s[j].len = cpu_to_be32(size_to_map); + s[j].flags = cpu_to_be32(SG_DATA); + prev_daddr = daddr + size_to_map; + last_s = &s[j]; + j++; + + p++; /* process next page */ + if (p == sgl->nr_pages) + goto fixup; /* nothing to do */ + } + dma_offs += 128; + s += 8; /* continue 8 elements further */ + } + fixup: + if (j == 1) { /* combining happened on last entry! */ + s -= 8; /* full shift needed on previous sgl block */ + j = 7; /* shift all elements */ + } + + for (i = 0; i < j; i++) /* move elements 1 up */ + s[i] = s[i + 1]; + + s[i].target_addr = cpu_to_be64(0); + s[i].len = cpu_to_be32(0); + s[i].flags = cpu_to_be32(SG_END_LIST); + return 0; +} + +/** + * genwqe_free_sync_sgl() - Free memory for sgl and overlapping pages + * + * After the DMA transfer has been completed we free the memory for + * the sgl and the cached pages. Data is being transferred from cached + * pages into user-space buffers. + */ +int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl) +{ + int rc = 0; + size_t offset; + unsigned long res; + struct pci_dev *pci_dev = cd->pci_dev; + + if (sgl->fpage) { + if (sgl->write) { + res = copy_to_user(sgl->user_addr, + sgl->fpage + sgl->fpage_offs, sgl->fpage_size); + if (res) { + dev_err(&pci_dev->dev, + "[%s] err: copying fpage! (res=%lu)\n", + __func__, res); + rc = -EFAULT; + } + } + __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage, + sgl->fpage_dma_addr); + sgl->fpage = NULL; + sgl->fpage_dma_addr = 0; + } + if (sgl->lpage) { + if (sgl->write) { + offset = sgl->user_size - sgl->lpage_size; + res = copy_to_user(sgl->user_addr + offset, sgl->lpage, + sgl->lpage_size); + if (res) { + dev_err(&pci_dev->dev, + "[%s] err: copying lpage! (res=%lu)\n", + __func__, res); + rc = -EFAULT; + } + } + __genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage, + sgl->lpage_dma_addr); + sgl->lpage = NULL; + sgl->lpage_dma_addr = 0; + } + __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl, + sgl->sgl_dma_addr); + + sgl->sgl = NULL; + sgl->sgl_dma_addr = 0x0; + sgl->sgl_size = 0; + return rc; +} + +/** + * genwqe_free_user_pages() - Give pinned pages back + * + * Documentation of get_user_pages is in mm/gup.c: + * + * If the page is written to, set_page_dirty (or set_page_dirty_lock, + * as appropriate) must be called after the page is finished with, and + * before put_page is called. + */ +static int genwqe_free_user_pages(struct page **page_list, + unsigned int nr_pages, int dirty) +{ + unsigned int i; + + for (i = 0; i < nr_pages; i++) { + if (page_list[i] != NULL) { + if (dirty) + set_page_dirty_lock(page_list[i]); + put_page(page_list[i]); + } + } + return 0; +} + +/** + * genwqe_user_vmap() - Map user-space memory to virtual kernel memory + * @cd: pointer to genwqe device + * @m: mapping params + * @uaddr: user virtual address + * @size: size of memory to be mapped + * + * We need to think about how we could speed this up. Of course it is + * not a good idea to do this over and over again, like we are + * currently doing it. Nevertheless, I am curious where on the path + * the performance is spend. Most probably within the memory + * allocation functions, but maybe also in the DMA mapping code. + * + * Restrictions: The maximum size of the possible mapping currently depends + * on the amount of memory we can get using kzalloc() for the + * page_list and pci_alloc_consistent for the sg_list. + * The sg_list is currently itself not scattered, which could + * be fixed with some effort. The page_list must be split into + * PAGE_SIZE chunks too. All that will make the complicated + * code more complicated. + * + * Return: 0 if success + */ +int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr, + unsigned long size) +{ + int rc = -EINVAL; + unsigned long data, offs; + struct pci_dev *pci_dev = cd->pci_dev; + + if ((uaddr == NULL) || (size == 0)) { + m->size = 0; /* mark unused and not added */ + return -EINVAL; + } + m->u_vaddr = uaddr; + m->size = size; + + /* determine space needed for page_list. */ + data = (unsigned long)uaddr; + offs = offset_in_page(data); + if (size > ULONG_MAX - PAGE_SIZE - offs) { + m->size = 0; /* mark unused and not added */ + return -EINVAL; + } + m->nr_pages = DIV_ROUND_UP(offs + size, PAGE_SIZE); + + m->page_list = kcalloc(m->nr_pages, + sizeof(struct page *) + sizeof(dma_addr_t), + GFP_KERNEL); + if (!m->page_list) { + dev_err(&pci_dev->dev, "err: alloc page_list failed\n"); + m->nr_pages = 0; + m->u_vaddr = NULL; + m->size = 0; /* mark unused and not added */ + return -ENOMEM; + } + m->dma_list = (dma_addr_t *)(m->page_list + m->nr_pages); + + /* pin user pages in memory */ + rc = get_user_pages_fast(data & PAGE_MASK, /* page aligned addr */ + m->nr_pages, + m->write, /* readable/writable */ + m->page_list); /* ptrs to pages */ + if (rc < 0) + goto fail_get_user_pages; + + /* assumption: get_user_pages can be killed by signals. */ + if (rc < m->nr_pages) { + genwqe_free_user_pages(m->page_list, rc, m->write); + rc = -EFAULT; + goto fail_get_user_pages; + } + + rc = genwqe_map_pages(cd, m->page_list, m->nr_pages, m->dma_list); + if (rc != 0) + goto fail_free_user_pages; + + return 0; + + fail_free_user_pages: + genwqe_free_user_pages(m->page_list, m->nr_pages, m->write); + + fail_get_user_pages: + kfree(m->page_list); + m->page_list = NULL; + m->dma_list = NULL; + m->nr_pages = 0; + m->u_vaddr = NULL; + m->size = 0; /* mark unused and not added */ + return rc; +} + +/** + * genwqe_user_vunmap() - Undo mapping of user-space mem to virtual kernel + * memory + * @cd: pointer to genwqe device + * @m: mapping params + */ +int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m) +{ + struct pci_dev *pci_dev = cd->pci_dev; + + if (!dma_mapping_used(m)) { + dev_err(&pci_dev->dev, "[%s] err: mapping %p not used!\n", + __func__, m); + return -EINVAL; + } + + if (m->dma_list) + genwqe_unmap_pages(cd, m->dma_list, m->nr_pages); + + if (m->page_list) { + genwqe_free_user_pages(m->page_list, m->nr_pages, m->write); + + kfree(m->page_list); + m->page_list = NULL; + m->dma_list = NULL; + m->nr_pages = 0; + } + + m->u_vaddr = NULL; + m->size = 0; /* mark as unused and not added */ + return 0; +} + +/** + * genwqe_card_type() - Get chip type SLU Configuration Register + * @cd: pointer to the genwqe device descriptor + * Return: 0: Altera Stratix-IV 230 + * 1: Altera Stratix-IV 530 + * 2: Altera Stratix-V A4 + * 3: Altera Stratix-V A7 + */ +u8 genwqe_card_type(struct genwqe_dev *cd) +{ + u64 card_type = cd->slu_unitcfg; + + return (u8)((card_type & IO_SLU_UNITCFG_TYPE_MASK) >> 20); +} + +/** + * genwqe_card_reset() - Reset the card + * @cd: pointer to the genwqe device descriptor + */ +int genwqe_card_reset(struct genwqe_dev *cd) +{ + u64 softrst; + struct pci_dev *pci_dev = cd->pci_dev; + + if (!genwqe_is_privileged(cd)) + return -ENODEV; + + /* new SL */ + __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 0x1ull); + msleep(1000); + __genwqe_readq(cd, IO_HSU_FIR_CLR); + __genwqe_readq(cd, IO_APP_FIR_CLR); + __genwqe_readq(cd, IO_SLU_FIR_CLR); + + /* + * Read-modify-write to preserve the stealth bits + * + * For SL >= 039, Stealth WE bit allows removing + * the read-modify-wrote. + * r-m-w may require a mask 0x3C to avoid hitting hard + * reset again for error reset (should be 0, chicken). + */ + softrst = __genwqe_readq(cd, IO_SLC_CFGREG_SOFTRESET) & 0x3cull; + __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, softrst | 0x2ull); + + /* give ERRORRESET some time to finish */ + msleep(50); + + if (genwqe_need_err_masking(cd)) { + dev_info(&pci_dev->dev, + "[%s] masking errors for old bitstreams\n", __func__); + __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull); + } + return 0; +} + +int genwqe_read_softreset(struct genwqe_dev *cd) +{ + u64 bitstream; + + if (!genwqe_is_privileged(cd)) + return -ENODEV; + + bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1; + cd->softreset = (bitstream == 0) ? 0x8ull : 0xcull; + return 0; +} + +/** + * genwqe_set_interrupt_capability() - Configure MSI capability structure + * @cd: pointer to the device + * Return: 0 if no error + */ +int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count) +{ + int rc; + + rc = pci_alloc_irq_vectors(cd->pci_dev, 1, count, PCI_IRQ_MSI); + if (rc < 0) + return rc; + return 0; +} + +/** + * genwqe_reset_interrupt_capability() - Undo genwqe_set_interrupt_capability() + * @cd: pointer to the device + */ +void genwqe_reset_interrupt_capability(struct genwqe_dev *cd) +{ + pci_free_irq_vectors(cd->pci_dev); +} + +/** + * set_reg_idx() - Fill array with data. Ignore illegal offsets. + * @cd: card device + * @r: debug register array + * @i: index to desired entry + * @m: maximum possible entries + * @addr: addr which is read + * @index: index in debug array + * @val: read value + */ +static int set_reg_idx(struct genwqe_dev *cd, struct genwqe_reg *r, + unsigned int *i, unsigned int m, u32 addr, u32 idx, + u64 val) +{ + if (WARN_ON_ONCE(*i >= m)) + return -EFAULT; + + r[*i].addr = addr; + r[*i].idx = idx; + r[*i].val = val; + ++*i; + return 0; +} + +static int set_reg(struct genwqe_dev *cd, struct genwqe_reg *r, + unsigned int *i, unsigned int m, u32 addr, u64 val) +{ + return set_reg_idx(cd, r, i, m, addr, 0, val); +} + +int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs, + unsigned int max_regs, int all) +{ + unsigned int i, j, idx = 0; + u32 ufir_addr, ufec_addr, sfir_addr, sfec_addr; + u64 gfir, sluid, appid, ufir, ufec, sfir, sfec; + + /* Global FIR */ + gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); + set_reg(cd, regs, &idx, max_regs, IO_SLC_CFGREG_GFIR, gfir); + + /* UnitCfg for SLU */ + sluid = __genwqe_readq(cd, IO_SLU_UNITCFG); /* 0x00000000 */ + set_reg(cd, regs, &idx, max_regs, IO_SLU_UNITCFG, sluid); + + /* UnitCfg for APP */ + appid = __genwqe_readq(cd, IO_APP_UNITCFG); /* 0x02000000 */ + set_reg(cd, regs, &idx, max_regs, IO_APP_UNITCFG, appid); + + /* Check all chip Units */ + for (i = 0; i < GENWQE_MAX_UNITS; i++) { + + /* Unit FIR */ + ufir_addr = (i << 24) | 0x008; + ufir = __genwqe_readq(cd, ufir_addr); + set_reg(cd, regs, &idx, max_regs, ufir_addr, ufir); + + /* Unit FEC */ + ufec_addr = (i << 24) | 0x018; + ufec = __genwqe_readq(cd, ufec_addr); + set_reg(cd, regs, &idx, max_regs, ufec_addr, ufec); + + for (j = 0; j < 64; j++) { + /* wherever there is a primary 1, read the 2ndary */ + if (!all && (!(ufir & (1ull << j)))) + continue; + + sfir_addr = (i << 24) | (0x100 + 8 * j); + sfir = __genwqe_readq(cd, sfir_addr); + set_reg(cd, regs, &idx, max_regs, sfir_addr, sfir); + + sfec_addr = (i << 24) | (0x300 + 8 * j); + sfec = __genwqe_readq(cd, sfec_addr); + set_reg(cd, regs, &idx, max_regs, sfec_addr, sfec); + } + } + + /* fill with invalid data until end */ + for (i = idx; i < max_regs; i++) { + regs[i].addr = 0xffffffff; + regs[i].val = 0xffffffffffffffffull; + } + return idx; +} + +/** + * genwqe_ffdc_buff_size() - Calculates the number of dump registers + */ +int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int uid) +{ + int entries = 0, ring, traps, traces, trace_entries; + u32 eevptr_addr, l_addr, d_len, d_type; + u64 eevptr, val, addr; + + eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER; + eevptr = __genwqe_readq(cd, eevptr_addr); + + if ((eevptr != 0x0) && (eevptr != -1ull)) { + l_addr = GENWQE_UID_OFFS(uid) | eevptr; + + while (1) { + val = __genwqe_readq(cd, l_addr); + + if ((val == 0x0) || (val == -1ull)) + break; + + /* 38:24 */ + d_len = (val & 0x0000007fff000000ull) >> 24; + + /* 39 */ + d_type = (val & 0x0000008000000000ull) >> 36; + + if (d_type) { /* repeat */ + entries += d_len; + } else { /* size in bytes! */ + entries += d_len >> 3; + } + + l_addr += 8; + } + } + + for (ring = 0; ring < 8; ring++) { + addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring); + val = __genwqe_readq(cd, addr); + + if ((val == 0x0ull) || (val == -1ull)) + continue; + + traps = (val >> 24) & 0xff; + traces = (val >> 16) & 0xff; + trace_entries = val & 0xffff; + + entries += traps + (traces * trace_entries); + } + return entries; +} + +/** + * genwqe_ffdc_buff_read() - Implements LogoutExtendedErrorRegisters procedure + */ +int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int uid, + struct genwqe_reg *regs, unsigned int max_regs) +{ + int i, traps, traces, trace, trace_entries, trace_entry, ring; + unsigned int idx = 0; + u32 eevptr_addr, l_addr, d_addr, d_len, d_type; + u64 eevptr, e, val, addr; + + eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER; + eevptr = __genwqe_readq(cd, eevptr_addr); + + if ((eevptr != 0x0) && (eevptr != 0xffffffffffffffffull)) { + l_addr = GENWQE_UID_OFFS(uid) | eevptr; + while (1) { + e = __genwqe_readq(cd, l_addr); + if ((e == 0x0) || (e == 0xffffffffffffffffull)) + break; + + d_addr = (e & 0x0000000000ffffffull); /* 23:0 */ + d_len = (e & 0x0000007fff000000ull) >> 24; /* 38:24 */ + d_type = (e & 0x0000008000000000ull) >> 36; /* 39 */ + d_addr |= GENWQE_UID_OFFS(uid); + + if (d_type) { + for (i = 0; i < (int)d_len; i++) { + val = __genwqe_readq(cd, d_addr); + set_reg_idx(cd, regs, &idx, max_regs, + d_addr, i, val); + } + } else { + d_len >>= 3; /* Size in bytes! */ + for (i = 0; i < (int)d_len; i++, d_addr += 8) { + val = __genwqe_readq(cd, d_addr); + set_reg_idx(cd, regs, &idx, max_regs, + d_addr, 0, val); + } + } + l_addr += 8; + } + } + + /* + * To save time, there are only 6 traces poplulated on Uid=2, + * Ring=1. each with iters=512. + */ + for (ring = 0; ring < 8; ring++) { /* 0 is fls, 1 is fds, + 2...7 are ASI rings */ + addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring); + val = __genwqe_readq(cd, addr); + + if ((val == 0x0ull) || (val == -1ull)) + continue; + + traps = (val >> 24) & 0xff; /* Number of Traps */ + traces = (val >> 16) & 0xff; /* Number of Traces */ + trace_entries = val & 0xffff; /* Entries per trace */ + + /* Note: This is a combined loop that dumps both the traps */ + /* (for the trace == 0 case) as well as the traces 1 to */ + /* 'traces'. */ + for (trace = 0; trace <= traces; trace++) { + u32 diag_sel = + GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace); + + addr = (GENWQE_UID_OFFS(uid) | + IO_EXTENDED_DIAG_SELECTOR); + __genwqe_writeq(cd, addr, diag_sel); + + for (trace_entry = 0; + trace_entry < (trace ? trace_entries : traps); + trace_entry++) { + addr = (GENWQE_UID_OFFS(uid) | + IO_EXTENDED_DIAG_READ_MBX); + val = __genwqe_readq(cd, addr); + set_reg_idx(cd, regs, &idx, max_regs, addr, + (diag_sel<<16) | trace_entry, val); + } + } + } + return 0; +} + +/** + * genwqe_write_vreg() - Write register in virtual window + * + * Note, these registers are only accessible to the PF through the + * VF-window. It is not intended for the VF to access. + */ +int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func) +{ + __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf); + __genwqe_writeq(cd, reg, val); + return 0; +} + +/** + * genwqe_read_vreg() - Read register in virtual window + * + * Note, these registers are only accessible to the PF through the + * VF-window. It is not intended for the VF to access. + */ +u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func) +{ + __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf); + return __genwqe_readq(cd, reg); +} + +/** + * genwqe_base_clock_frequency() - Deteremine base clock frequency of the card + * + * Note: From a design perspective it turned out to be a bad idea to + * use codes here to specifiy the frequency/speed values. An old + * driver cannot understand new codes and is therefore always a + * problem. Better is to measure out the value or put the + * speed/frequency directly into a register which is always a valid + * value for old as well as for new software. + * + * Return: Card clock in MHz + */ +int genwqe_base_clock_frequency(struct genwqe_dev *cd) +{ + u16 speed; /* MHz MHz MHz MHz */ + static const int speed_grade[] = { 250, 200, 166, 175 }; + + speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full); + if (speed >= ARRAY_SIZE(speed_grade)) + return 0; /* illegal value */ + + return speed_grade[speed]; +} + +/** + * genwqe_stop_traps() - Stop traps + * + * Before reading out the analysis data, we need to stop the traps. + */ +void genwqe_stop_traps(struct genwqe_dev *cd) +{ + __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_SET, 0xcull); +} + +/** + * genwqe_start_traps() - Start traps + * + * After having read the data, we can/must enable the traps again. + */ +void genwqe_start_traps(struct genwqe_dev *cd) +{ + __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_CLR, 0xcull); + + if (genwqe_need_err_masking(cd)) + __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull); +} diff --git a/drivers/misc/genwqe/genwqe_driver.h b/drivers/misc/genwqe/genwqe_driver.h new file mode 100644 index 000000000..15355350e --- /dev/null +++ b/drivers/misc/genwqe/genwqe_driver.h @@ -0,0 +1,77 @@ +#ifndef __GENWQE_DRIVER_H__ +#define __GENWQE_DRIVER_H__ + +/** + * IBM Accelerator Family 'GenWQE' + * + * (C) Copyright IBM Corp. 2013 + * + * Author: Frank Haverkamp + * Author: Joerg-Stephan Vogt + * Author: Michael Jung + * Author: Michael Ruettger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define DRV_VERSION "2.0.25" + +/* + * Static minor number assignement, until we decide/implement + * something dynamic. + */ +#define GENWQE_MAX_MINOR 128 /* up to 128 possible genwqe devices */ + +/** + * genwqe_requ_alloc() - Allocate a new DDCB execution request + * + * This data structure contains the user visiable fields of the DDCB + * to be executed. + * + * Return: ptr to genwqe_ddcb_cmd data structure + */ +struct genwqe_ddcb_cmd *ddcb_requ_alloc(void); + +/** + * ddcb_requ_free() - Free DDCB execution request. + * @req: ptr to genwqe_ddcb_cmd data structure. + */ +void ddcb_requ_free(struct genwqe_ddcb_cmd *req); + +u32 genwqe_crc32(u8 *buff, size_t len, u32 init); + +static inline void genwqe_hexdump(struct pci_dev *pci_dev, + const void *buff, unsigned int size) +{ + char prefix[32]; + + scnprintf(prefix, sizeof(prefix), "%s %s: ", + GENWQE_DEVNAME, pci_name(pci_dev)); + + print_hex_dump_debug(prefix, DUMP_PREFIX_OFFSET, 16, 1, buff, + size, true); +} + +#endif /* __GENWQE_DRIVER_H__ */ diff --git a/drivers/misc/hmc6352.c b/drivers/misc/hmc6352.c new file mode 100644 index 000000000..38f90e179 --- /dev/null +++ b/drivers/misc/hmc6352.c @@ -0,0 +1,157 @@ +/* + * hmc6352.c - Honeywell Compass Driver + * + * Copyright (C) 2009 Intel Corp + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + */ + +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_MUTEX(compass_mutex); + +static int compass_command(struct i2c_client *c, u8 cmd) +{ + int ret = i2c_master_send(c, &cmd, 1); + if (ret < 0) + dev_warn(&c->dev, "command '%c' failed.\n", cmd); + return ret; +} + +static int compass_store(struct device *dev, const char *buf, size_t count, + const char *map) +{ + struct i2c_client *c = to_i2c_client(dev); + int ret; + unsigned long val; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + if (val >= strlen(map)) + return -EINVAL; + val = array_index_nospec(val, strlen(map)); + mutex_lock(&compass_mutex); + ret = compass_command(c, map[val]); + mutex_unlock(&compass_mutex); + if (ret < 0) + return ret; + return count; +} + +static ssize_t compass_calibration_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + return compass_store(dev, buf, count, "EC"); +} + +static ssize_t compass_power_mode_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + return compass_store(dev, buf, count, "SW"); +} + +static ssize_t compass_heading_data_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned char i2c_data[2]; + int ret; + + mutex_lock(&compass_mutex); + ret = compass_command(client, 'A'); + if (ret != 1) { + mutex_unlock(&compass_mutex); + return ret; + } + msleep(10); /* sending 'A' cmd we need to wait for 7-10 millisecs */ + ret = i2c_master_recv(client, i2c_data, 2); + mutex_unlock(&compass_mutex); + if (ret < 0) { + dev_warn(dev, "i2c read data cmd failed\n"); + return ret; + } + ret = (i2c_data[0] << 8) | i2c_data[1]; + return sprintf(buf, "%d.%d\n", ret/10, ret%10); +} + + +static DEVICE_ATTR(heading0_input, S_IRUGO, compass_heading_data_show, NULL); +static DEVICE_ATTR(calibration, S_IWUSR, NULL, compass_calibration_store); +static DEVICE_ATTR(power_state, S_IWUSR, NULL, compass_power_mode_store); + +static struct attribute *mid_att_compass[] = { + &dev_attr_heading0_input.attr, + &dev_attr_calibration.attr, + &dev_attr_power_state.attr, + NULL +}; + +static const struct attribute_group m_compass_gr = { + .name = "hmc6352", + .attrs = mid_att_compass +}; + +static int hmc6352_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int res; + + res = sysfs_create_group(&client->dev.kobj, &m_compass_gr); + if (res) { + dev_err(&client->dev, "device_create_file failed\n"); + return res; + } + dev_info(&client->dev, "%s HMC6352 compass chip found\n", + client->name); + return 0; +} + +static int hmc6352_remove(struct i2c_client *client) +{ + sysfs_remove_group(&client->dev.kobj, &m_compass_gr); + return 0; +} + +static const struct i2c_device_id hmc6352_id[] = { + { "hmc6352", 0 }, + { } +}; + +MODULE_DEVICE_TABLE(i2c, hmc6352_id); + +static struct i2c_driver hmc6352_driver = { + .driver = { + .name = "hmc6352", + }, + .probe = hmc6352_probe, + .remove = hmc6352_remove, + .id_table = hmc6352_id, +}; + +module_i2c_driver(hmc6352_driver); + +MODULE_AUTHOR("Kalhan Trisal + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "hpilo.h" + +static struct class *ilo_class; +static unsigned int ilo_major; +static unsigned int max_ccb = 16; +static char ilo_hwdev[MAX_ILO_DEV]; + +static inline int get_entry_id(int entry) +{ + return (entry & ENTRY_MASK_DESCRIPTOR) >> ENTRY_BITPOS_DESCRIPTOR; +} + +static inline int get_entry_len(int entry) +{ + return ((entry & ENTRY_MASK_QWORDS) >> ENTRY_BITPOS_QWORDS) << 3; +} + +static inline int mk_entry(int id, int len) +{ + int qlen = len & 7 ? (len >> 3) + 1 : len >> 3; + return id << ENTRY_BITPOS_DESCRIPTOR | qlen << ENTRY_BITPOS_QWORDS; +} + +static inline int desc_mem_sz(int nr_entry) +{ + return nr_entry << L2_QENTRY_SZ; +} + +/* + * FIFO queues, shared with hardware. + * + * If a queue has empty slots, an entry is added to the queue tail, + * and that entry is marked as occupied. + * Entries can be dequeued from the head of the list, when the device + * has marked the entry as consumed. + * + * Returns true on successful queue/dequeue, false on failure. + */ +static int fifo_enqueue(struct ilo_hwinfo *hw, char *fifobar, int entry) +{ + struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar); + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&hw->fifo_lock, flags); + if (!(fifo_q->fifobar[(fifo_q->tail + 1) & fifo_q->imask] + & ENTRY_MASK_O)) { + fifo_q->fifobar[fifo_q->tail & fifo_q->imask] |= + (entry & ENTRY_MASK_NOSTATE) | fifo_q->merge; + fifo_q->tail += 1; + ret = 1; + } + spin_unlock_irqrestore(&hw->fifo_lock, flags); + + return ret; +} + +static int fifo_dequeue(struct ilo_hwinfo *hw, char *fifobar, int *entry) +{ + struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar); + unsigned long flags; + int ret = 0; + u64 c; + + spin_lock_irqsave(&hw->fifo_lock, flags); + c = fifo_q->fifobar[fifo_q->head & fifo_q->imask]; + if (c & ENTRY_MASK_C) { + if (entry) + *entry = c & ENTRY_MASK_NOSTATE; + + fifo_q->fifobar[fifo_q->head & fifo_q->imask] = + (c | ENTRY_MASK) + 1; + fifo_q->head += 1; + ret = 1; + } + spin_unlock_irqrestore(&hw->fifo_lock, flags); + + return ret; +} + +static int fifo_check_recv(struct ilo_hwinfo *hw, char *fifobar) +{ + struct fifo *fifo_q = FIFOBARTOHANDLE(fifobar); + unsigned long flags; + int ret = 0; + u64 c; + + spin_lock_irqsave(&hw->fifo_lock, flags); + c = fifo_q->fifobar[fifo_q->head & fifo_q->imask]; + if (c & ENTRY_MASK_C) + ret = 1; + spin_unlock_irqrestore(&hw->fifo_lock, flags); + + return ret; +} + +static int ilo_pkt_enqueue(struct ilo_hwinfo *hw, struct ccb *ccb, + int dir, int id, int len) +{ + char *fifobar; + int entry; + + if (dir == SENDQ) + fifobar = ccb->ccb_u1.send_fifobar; + else + fifobar = ccb->ccb_u3.recv_fifobar; + + entry = mk_entry(id, len); + return fifo_enqueue(hw, fifobar, entry); +} + +static int ilo_pkt_dequeue(struct ilo_hwinfo *hw, struct ccb *ccb, + int dir, int *id, int *len, void **pkt) +{ + char *fifobar, *desc; + int entry = 0, pkt_id = 0; + int ret; + + if (dir == SENDQ) { + fifobar = ccb->ccb_u1.send_fifobar; + desc = ccb->ccb_u2.send_desc; + } else { + fifobar = ccb->ccb_u3.recv_fifobar; + desc = ccb->ccb_u4.recv_desc; + } + + ret = fifo_dequeue(hw, fifobar, &entry); + if (ret) { + pkt_id = get_entry_id(entry); + if (id) + *id = pkt_id; + if (len) + *len = get_entry_len(entry); + if (pkt) + *pkt = (void *)(desc + desc_mem_sz(pkt_id)); + } + + return ret; +} + +static int ilo_pkt_recv(struct ilo_hwinfo *hw, struct ccb *ccb) +{ + char *fifobar = ccb->ccb_u3.recv_fifobar; + + return fifo_check_recv(hw, fifobar); +} + +static inline void doorbell_set(struct ccb *ccb) +{ + iowrite8(1, ccb->ccb_u5.db_base); +} + +static inline void doorbell_clr(struct ccb *ccb) +{ + iowrite8(2, ccb->ccb_u5.db_base); +} + +static inline int ctrl_set(int l2sz, int idxmask, int desclim) +{ + int active = 0, go = 1; + return l2sz << CTRL_BITPOS_L2SZ | + idxmask << CTRL_BITPOS_FIFOINDEXMASK | + desclim << CTRL_BITPOS_DESCLIMIT | + active << CTRL_BITPOS_A | + go << CTRL_BITPOS_G; +} + +static void ctrl_setup(struct ccb *ccb, int nr_desc, int l2desc_sz) +{ + /* for simplicity, use the same parameters for send and recv ctrls */ + ccb->send_ctrl = ctrl_set(l2desc_sz, nr_desc-1, nr_desc-1); + ccb->recv_ctrl = ctrl_set(l2desc_sz, nr_desc-1, nr_desc-1); +} + +static inline int fifo_sz(int nr_entry) +{ + /* size of a fifo is determined by the number of entries it contains */ + return (nr_entry * sizeof(u64)) + FIFOHANDLESIZE; +} + +static void fifo_setup(void *base_addr, int nr_entry) +{ + struct fifo *fifo_q = base_addr; + int i; + + /* set up an empty fifo */ + fifo_q->head = 0; + fifo_q->tail = 0; + fifo_q->reset = 0; + fifo_q->nrents = nr_entry; + fifo_q->imask = nr_entry - 1; + fifo_q->merge = ENTRY_MASK_O; + + for (i = 0; i < nr_entry; i++) + fifo_q->fifobar[i] = 0; +} + +static void ilo_ccb_close(struct pci_dev *pdev, struct ccb_data *data) +{ + struct ccb *driver_ccb = &data->driver_ccb; + struct ccb __iomem *device_ccb = data->mapped_ccb; + int retries; + + /* complicated dance to tell the hw we are stopping */ + doorbell_clr(driver_ccb); + iowrite32(ioread32(&device_ccb->send_ctrl) & ~(1 << CTRL_BITPOS_G), + &device_ccb->send_ctrl); + iowrite32(ioread32(&device_ccb->recv_ctrl) & ~(1 << CTRL_BITPOS_G), + &device_ccb->recv_ctrl); + + /* give iLO some time to process stop request */ + for (retries = MAX_WAIT; retries > 0; retries--) { + doorbell_set(driver_ccb); + udelay(WAIT_TIME); + if (!(ioread32(&device_ccb->send_ctrl) & (1 << CTRL_BITPOS_A)) + && + !(ioread32(&device_ccb->recv_ctrl) & (1 << CTRL_BITPOS_A))) + break; + } + if (retries == 0) + dev_err(&pdev->dev, "Closing, but controller still active\n"); + + /* clear the hw ccb */ + memset_io(device_ccb, 0, sizeof(struct ccb)); + + /* free resources used to back send/recv queues */ + pci_free_consistent(pdev, data->dma_size, data->dma_va, data->dma_pa); +} + +static int ilo_ccb_setup(struct ilo_hwinfo *hw, struct ccb_data *data, int slot) +{ + char *dma_va; + dma_addr_t dma_pa; + struct ccb *driver_ccb, *ilo_ccb; + + driver_ccb = &data->driver_ccb; + ilo_ccb = &data->ilo_ccb; + + data->dma_size = 2 * fifo_sz(NR_QENTRY) + + 2 * desc_mem_sz(NR_QENTRY) + + ILO_START_ALIGN + ILO_CACHE_SZ; + + data->dma_va = pci_alloc_consistent(hw->ilo_dev, data->dma_size, + &data->dma_pa); + if (!data->dma_va) + return -ENOMEM; + + dma_va = (char *)data->dma_va; + dma_pa = data->dma_pa; + + memset(dma_va, 0, data->dma_size); + + dma_va = (char *)roundup((unsigned long)dma_va, ILO_START_ALIGN); + dma_pa = roundup(dma_pa, ILO_START_ALIGN); + + /* + * Create two ccb's, one with virt addrs, one with phys addrs. + * Copy the phys addr ccb to device shared mem. + */ + ctrl_setup(driver_ccb, NR_QENTRY, L2_QENTRY_SZ); + ctrl_setup(ilo_ccb, NR_QENTRY, L2_QENTRY_SZ); + + fifo_setup(dma_va, NR_QENTRY); + driver_ccb->ccb_u1.send_fifobar = dma_va + FIFOHANDLESIZE; + ilo_ccb->ccb_u1.send_fifobar_pa = dma_pa + FIFOHANDLESIZE; + dma_va += fifo_sz(NR_QENTRY); + dma_pa += fifo_sz(NR_QENTRY); + + dma_va = (char *)roundup((unsigned long)dma_va, ILO_CACHE_SZ); + dma_pa = roundup(dma_pa, ILO_CACHE_SZ); + + fifo_setup(dma_va, NR_QENTRY); + driver_ccb->ccb_u3.recv_fifobar = dma_va + FIFOHANDLESIZE; + ilo_ccb->ccb_u3.recv_fifobar_pa = dma_pa + FIFOHANDLESIZE; + dma_va += fifo_sz(NR_QENTRY); + dma_pa += fifo_sz(NR_QENTRY); + + driver_ccb->ccb_u2.send_desc = dma_va; + ilo_ccb->ccb_u2.send_desc_pa = dma_pa; + dma_pa += desc_mem_sz(NR_QENTRY); + dma_va += desc_mem_sz(NR_QENTRY); + + driver_ccb->ccb_u4.recv_desc = dma_va; + ilo_ccb->ccb_u4.recv_desc_pa = dma_pa; + + driver_ccb->channel = slot; + ilo_ccb->channel = slot; + + driver_ccb->ccb_u5.db_base = hw->db_vaddr + (slot << L2_DB_SIZE); + ilo_ccb->ccb_u5.db_base = NULL; /* hw ccb's doorbell is not used */ + + return 0; +} + +static void ilo_ccb_open(struct ilo_hwinfo *hw, struct ccb_data *data, int slot) +{ + int pkt_id, pkt_sz; + struct ccb *driver_ccb = &data->driver_ccb; + + /* copy the ccb with physical addrs to device memory */ + data->mapped_ccb = (struct ccb __iomem *) + (hw->ram_vaddr + (slot * ILOHW_CCB_SZ)); + memcpy_toio(data->mapped_ccb, &data->ilo_ccb, sizeof(struct ccb)); + + /* put packets on the send and receive queues */ + pkt_sz = 0; + for (pkt_id = 0; pkt_id < NR_QENTRY; pkt_id++) { + ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, pkt_sz); + doorbell_set(driver_ccb); + } + + pkt_sz = desc_mem_sz(1); + for (pkt_id = 0; pkt_id < NR_QENTRY; pkt_id++) + ilo_pkt_enqueue(hw, driver_ccb, RECVQ, pkt_id, pkt_sz); + + /* the ccb is ready to use */ + doorbell_clr(driver_ccb); +} + +static int ilo_ccb_verify(struct ilo_hwinfo *hw, struct ccb_data *data) +{ + int pkt_id, i; + struct ccb *driver_ccb = &data->driver_ccb; + + /* make sure iLO is really handling requests */ + for (i = MAX_WAIT; i > 0; i--) { + if (ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, NULL, NULL)) + break; + udelay(WAIT_TIME); + } + + if (i == 0) { + dev_err(&hw->ilo_dev->dev, "Open could not dequeue a packet\n"); + return -EBUSY; + } + + ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, 0); + doorbell_set(driver_ccb); + return 0; +} + +static inline int is_channel_reset(struct ccb *ccb) +{ + /* check for this particular channel needing a reset */ + return FIFOBARTOHANDLE(ccb->ccb_u1.send_fifobar)->reset; +} + +static inline void set_channel_reset(struct ccb *ccb) +{ + /* set a flag indicating this channel needs a reset */ + FIFOBARTOHANDLE(ccb->ccb_u1.send_fifobar)->reset = 1; +} + +static inline int get_device_outbound(struct ilo_hwinfo *hw) +{ + return ioread32(&hw->mmio_vaddr[DB_OUT]); +} + +static inline int is_db_reset(int db_out) +{ + return db_out & (1 << DB_RESET); +} + +static inline int is_device_reset(struct ilo_hwinfo *hw) +{ + /* check for global reset condition */ + return is_db_reset(get_device_outbound(hw)); +} + +static inline void clear_pending_db(struct ilo_hwinfo *hw, int clr) +{ + iowrite32(clr, &hw->mmio_vaddr[DB_OUT]); +} + +static inline void clear_device(struct ilo_hwinfo *hw) +{ + /* clear the device (reset bits, pending channel entries) */ + clear_pending_db(hw, -1); +} + +static inline void ilo_enable_interrupts(struct ilo_hwinfo *hw) +{ + iowrite8(ioread8(&hw->mmio_vaddr[DB_IRQ]) | 1, &hw->mmio_vaddr[DB_IRQ]); +} + +static inline void ilo_disable_interrupts(struct ilo_hwinfo *hw) +{ + iowrite8(ioread8(&hw->mmio_vaddr[DB_IRQ]) & ~1, + &hw->mmio_vaddr[DB_IRQ]); +} + +static void ilo_set_reset(struct ilo_hwinfo *hw) +{ + int slot; + + /* + * Mapped memory is zeroed on ilo reset, so set a per ccb flag + * to indicate that this ccb needs to be closed and reopened. + */ + for (slot = 0; slot < max_ccb; slot++) { + if (!hw->ccb_alloc[slot]) + continue; + set_channel_reset(&hw->ccb_alloc[slot]->driver_ccb); + } +} + +static ssize_t ilo_read(struct file *fp, char __user *buf, + size_t len, loff_t *off) +{ + int err, found, cnt, pkt_id, pkt_len; + struct ccb_data *data = fp->private_data; + struct ccb *driver_ccb = &data->driver_ccb; + struct ilo_hwinfo *hw = data->ilo_hw; + void *pkt; + + if (is_channel_reset(driver_ccb)) { + /* + * If the device has been reset, applications + * need to close and reopen all ccbs. + */ + return -ENODEV; + } + + /* + * This function is to be called when data is expected + * in the channel, and will return an error if no packet is found + * during the loop below. The sleep/retry logic is to allow + * applications to call read() immediately post write(), + * and give iLO some time to process the sent packet. + */ + cnt = 20; + do { + /* look for a received packet */ + found = ilo_pkt_dequeue(hw, driver_ccb, RECVQ, &pkt_id, + &pkt_len, &pkt); + if (found) + break; + cnt--; + msleep(100); + } while (!found && cnt); + + if (!found) + return -EAGAIN; + + /* only copy the length of the received packet */ + if (pkt_len < len) + len = pkt_len; + + err = copy_to_user(buf, pkt, len); + + /* return the received packet to the queue */ + ilo_pkt_enqueue(hw, driver_ccb, RECVQ, pkt_id, desc_mem_sz(1)); + + return err ? -EFAULT : len; +} + +static ssize_t ilo_write(struct file *fp, const char __user *buf, + size_t len, loff_t *off) +{ + int err, pkt_id, pkt_len; + struct ccb_data *data = fp->private_data; + struct ccb *driver_ccb = &data->driver_ccb; + struct ilo_hwinfo *hw = data->ilo_hw; + void *pkt; + + if (is_channel_reset(driver_ccb)) + return -ENODEV; + + /* get a packet to send the user command */ + if (!ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, &pkt_len, &pkt)) + return -EBUSY; + + /* limit the length to the length of the packet */ + if (pkt_len < len) + len = pkt_len; + + /* on failure, set the len to 0 to return empty packet to the device */ + err = copy_from_user(pkt, buf, len); + if (err) + len = 0; + + /* send the packet */ + ilo_pkt_enqueue(hw, driver_ccb, SENDQ, pkt_id, len); + doorbell_set(driver_ccb); + + return err ? -EFAULT : len; +} + +static __poll_t ilo_poll(struct file *fp, poll_table *wait) +{ + struct ccb_data *data = fp->private_data; + struct ccb *driver_ccb = &data->driver_ccb; + + poll_wait(fp, &data->ccb_waitq, wait); + + if (is_channel_reset(driver_ccb)) + return EPOLLERR; + else if (ilo_pkt_recv(data->ilo_hw, driver_ccb)) + return EPOLLIN | EPOLLRDNORM; + + return 0; +} + +static int ilo_close(struct inode *ip, struct file *fp) +{ + int slot; + struct ccb_data *data; + struct ilo_hwinfo *hw; + unsigned long flags; + + slot = iminor(ip) % max_ccb; + hw = container_of(ip->i_cdev, struct ilo_hwinfo, cdev); + + spin_lock(&hw->open_lock); + + if (hw->ccb_alloc[slot]->ccb_cnt == 1) { + + data = fp->private_data; + + spin_lock_irqsave(&hw->alloc_lock, flags); + hw->ccb_alloc[slot] = NULL; + spin_unlock_irqrestore(&hw->alloc_lock, flags); + + ilo_ccb_close(hw->ilo_dev, data); + + kfree(data); + } else + hw->ccb_alloc[slot]->ccb_cnt--; + + spin_unlock(&hw->open_lock); + + return 0; +} + +static int ilo_open(struct inode *ip, struct file *fp) +{ + int slot, error; + struct ccb_data *data; + struct ilo_hwinfo *hw; + unsigned long flags; + + slot = iminor(ip) % max_ccb; + hw = container_of(ip->i_cdev, struct ilo_hwinfo, cdev); + + /* new ccb allocation */ + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + spin_lock(&hw->open_lock); + + /* each fd private_data holds sw/hw view of ccb */ + if (hw->ccb_alloc[slot] == NULL) { + /* create a channel control block for this minor */ + error = ilo_ccb_setup(hw, data, slot); + if (error) { + kfree(data); + goto out; + } + + data->ccb_cnt = 1; + data->ccb_excl = fp->f_flags & O_EXCL; + data->ilo_hw = hw; + init_waitqueue_head(&data->ccb_waitq); + + /* write the ccb to hw */ + spin_lock_irqsave(&hw->alloc_lock, flags); + ilo_ccb_open(hw, data, slot); + hw->ccb_alloc[slot] = data; + spin_unlock_irqrestore(&hw->alloc_lock, flags); + + /* make sure the channel is functional */ + error = ilo_ccb_verify(hw, data); + if (error) { + + spin_lock_irqsave(&hw->alloc_lock, flags); + hw->ccb_alloc[slot] = NULL; + spin_unlock_irqrestore(&hw->alloc_lock, flags); + + ilo_ccb_close(hw->ilo_dev, data); + + kfree(data); + goto out; + } + + } else { + kfree(data); + if (fp->f_flags & O_EXCL || hw->ccb_alloc[slot]->ccb_excl) { + /* + * The channel exists, and either this open + * or a previous open of this channel wants + * exclusive access. + */ + error = -EBUSY; + } else { + hw->ccb_alloc[slot]->ccb_cnt++; + error = 0; + } + } +out: + spin_unlock(&hw->open_lock); + + if (!error) + fp->private_data = hw->ccb_alloc[slot]; + + return error; +} + +static const struct file_operations ilo_fops = { + .owner = THIS_MODULE, + .read = ilo_read, + .write = ilo_write, + .poll = ilo_poll, + .open = ilo_open, + .release = ilo_close, + .llseek = noop_llseek, +}; + +static irqreturn_t ilo_isr(int irq, void *data) +{ + struct ilo_hwinfo *hw = data; + int pending, i; + + spin_lock(&hw->alloc_lock); + + /* check for ccbs which have data */ + pending = get_device_outbound(hw); + if (!pending) { + spin_unlock(&hw->alloc_lock); + return IRQ_NONE; + } + + if (is_db_reset(pending)) { + /* wake up all ccbs if the device was reset */ + pending = -1; + ilo_set_reset(hw); + } + + for (i = 0; i < max_ccb; i++) { + if (!hw->ccb_alloc[i]) + continue; + if (pending & (1 << i)) + wake_up_interruptible(&hw->ccb_alloc[i]->ccb_waitq); + } + + /* clear the device of the channels that have been handled */ + clear_pending_db(hw, pending); + + spin_unlock(&hw->alloc_lock); + + return IRQ_HANDLED; +} + +static void ilo_unmap_device(struct pci_dev *pdev, struct ilo_hwinfo *hw) +{ + pci_iounmap(pdev, hw->db_vaddr); + pci_iounmap(pdev, hw->ram_vaddr); + pci_iounmap(pdev, hw->mmio_vaddr); +} + +static int ilo_map_device(struct pci_dev *pdev, struct ilo_hwinfo *hw) +{ + int bar; + unsigned long off; + + /* map the memory mapped i/o registers */ + hw->mmio_vaddr = pci_iomap(pdev, 1, 0); + if (hw->mmio_vaddr == NULL) { + dev_err(&pdev->dev, "Error mapping mmio\n"); + goto out; + } + + /* map the adapter shared memory region */ + if (pdev->subsystem_device == 0x00E4) { + bar = 5; + /* Last 8k is reserved for CCBs */ + off = pci_resource_len(pdev, bar) - 0x2000; + } else { + bar = 2; + off = 0; + } + hw->ram_vaddr = pci_iomap_range(pdev, bar, off, max_ccb * ILOHW_CCB_SZ); + if (hw->ram_vaddr == NULL) { + dev_err(&pdev->dev, "Error mapping shared mem\n"); + goto mmio_free; + } + + /* map the doorbell aperture */ + hw->db_vaddr = pci_iomap(pdev, 3, max_ccb * ONE_DB_SIZE); + if (hw->db_vaddr == NULL) { + dev_err(&pdev->dev, "Error mapping doorbell\n"); + goto ram_free; + } + + return 0; +ram_free: + pci_iounmap(pdev, hw->ram_vaddr); +mmio_free: + pci_iounmap(pdev, hw->mmio_vaddr); +out: + return -ENOMEM; +} + +static void ilo_remove(struct pci_dev *pdev) +{ + int i, minor; + struct ilo_hwinfo *ilo_hw = pci_get_drvdata(pdev); + + if (!ilo_hw) + return; + + clear_device(ilo_hw); + + minor = MINOR(ilo_hw->cdev.dev); + for (i = minor; i < minor + max_ccb; i++) + device_destroy(ilo_class, MKDEV(ilo_major, i)); + + cdev_del(&ilo_hw->cdev); + ilo_disable_interrupts(ilo_hw); + free_irq(pdev->irq, ilo_hw); + ilo_unmap_device(pdev, ilo_hw); + pci_release_regions(pdev); + /* + * pci_disable_device(pdev) used to be here. But this PCI device has + * two functions with interrupt lines connected to a single pin. The + * other one is a USB host controller. So when we disable the PIN here + * e.g. by rmmod hpilo, the controller stops working. It is because + * the interrupt link is disabled in ACPI since it is not refcounted + * yet. See acpi_pci_link_free_irq called from acpi_pci_irq_disable. + */ + kfree(ilo_hw); + ilo_hwdev[(minor / max_ccb)] = 0; +} + +static int ilo_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int devnum, minor, start, error = 0; + struct ilo_hwinfo *ilo_hw; + + /* Ignore subsystem_device = 0x1979 (set by BIOS) */ + if (pdev->subsystem_device == 0x1979) + return 0; + + if (max_ccb > MAX_CCB) + max_ccb = MAX_CCB; + else if (max_ccb < MIN_CCB) + max_ccb = MIN_CCB; + + /* find a free range for device files */ + for (devnum = 0; devnum < MAX_ILO_DEV; devnum++) { + if (ilo_hwdev[devnum] == 0) { + ilo_hwdev[devnum] = 1; + break; + } + } + + if (devnum == MAX_ILO_DEV) { + dev_err(&pdev->dev, "Error finding free device\n"); + return -ENODEV; + } + + /* track global allocations for this device */ + error = -ENOMEM; + ilo_hw = kzalloc(sizeof(*ilo_hw), GFP_KERNEL); + if (!ilo_hw) + goto out; + + ilo_hw->ilo_dev = pdev; + spin_lock_init(&ilo_hw->alloc_lock); + spin_lock_init(&ilo_hw->fifo_lock); + spin_lock_init(&ilo_hw->open_lock); + + error = pci_enable_device(pdev); + if (error) + goto free; + + pci_set_master(pdev); + + error = pci_request_regions(pdev, ILO_NAME); + if (error) + goto disable; + + error = ilo_map_device(pdev, ilo_hw); + if (error) + goto free_regions; + + pci_set_drvdata(pdev, ilo_hw); + clear_device(ilo_hw); + + error = request_irq(pdev->irq, ilo_isr, IRQF_SHARED, "hpilo", ilo_hw); + if (error) + goto unmap; + + ilo_enable_interrupts(ilo_hw); + + cdev_init(&ilo_hw->cdev, &ilo_fops); + ilo_hw->cdev.owner = THIS_MODULE; + start = devnum * max_ccb; + error = cdev_add(&ilo_hw->cdev, MKDEV(ilo_major, start), max_ccb); + if (error) { + dev_err(&pdev->dev, "Could not add cdev\n"); + goto remove_isr; + } + + for (minor = 0 ; minor < max_ccb; minor++) { + struct device *dev; + dev = device_create(ilo_class, &pdev->dev, + MKDEV(ilo_major, minor), NULL, + "hpilo!d%dccb%d", devnum, minor); + if (IS_ERR(dev)) + dev_err(&pdev->dev, "Could not create files\n"); + } + + return 0; +remove_isr: + ilo_disable_interrupts(ilo_hw); + free_irq(pdev->irq, ilo_hw); +unmap: + ilo_unmap_device(pdev, ilo_hw); +free_regions: + pci_release_regions(pdev); +disable: +/* pci_disable_device(pdev); see comment in ilo_remove */ +free: + kfree(ilo_hw); +out: + ilo_hwdev[devnum] = 0; + return error; +} + +static const struct pci_device_id ilo_devices[] = { + { PCI_DEVICE(PCI_VENDOR_ID_COMPAQ, 0xB204) }, + { PCI_DEVICE(PCI_VENDOR_ID_HP, 0x3307) }, + { } +}; +MODULE_DEVICE_TABLE(pci, ilo_devices); + +static struct pci_driver ilo_driver = { + .name = ILO_NAME, + .id_table = ilo_devices, + .probe = ilo_probe, + .remove = ilo_remove, +}; + +static int __init ilo_init(void) +{ + int error; + dev_t dev; + + ilo_class = class_create(THIS_MODULE, "iLO"); + if (IS_ERR(ilo_class)) { + error = PTR_ERR(ilo_class); + goto out; + } + + error = alloc_chrdev_region(&dev, 0, MAX_OPEN, ILO_NAME); + if (error) + goto class_destroy; + + ilo_major = MAJOR(dev); + + error = pci_register_driver(&ilo_driver); + if (error) + goto chr_remove; + + return 0; +chr_remove: + unregister_chrdev_region(dev, MAX_OPEN); +class_destroy: + class_destroy(ilo_class); +out: + return error; +} + +static void __exit ilo_exit(void) +{ + pci_unregister_driver(&ilo_driver); + unregister_chrdev_region(MKDEV(ilo_major, 0), MAX_OPEN); + class_destroy(ilo_class); +} + +MODULE_VERSION("1.5.0"); +MODULE_ALIAS(ILO_NAME); +MODULE_DESCRIPTION(ILO_NAME); +MODULE_AUTHOR("David Altobelli "); +MODULE_LICENSE("GPL v2"); + +module_param(max_ccb, uint, 0444); +MODULE_PARM_DESC(max_ccb, "Maximum number of HP iLO channels to attach (8-24)(default=16)"); + +module_init(ilo_init); +module_exit(ilo_exit); diff --git a/drivers/misc/hpilo.h b/drivers/misc/hpilo.h new file mode 100644 index 000000000..94dfb9e40 --- /dev/null +++ b/drivers/misc/hpilo.h @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * linux/drivers/char/hpilo.h + * + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * David Altobelli + */ +#ifndef __HPILO_H +#define __HPILO_H + +#define ILO_NAME "hpilo" + +/* max number of open channel control blocks per device, hw limited to 32 */ +#define MAX_CCB 24 +/* min number of open channel control blocks per device, hw limited to 32 */ +#define MIN_CCB 8 +/* max number of supported devices */ +#define MAX_ILO_DEV 1 +/* max number of files */ +#define MAX_OPEN (MAX_CCB * MAX_ILO_DEV) +/* total wait time in usec */ +#define MAX_WAIT_TIME 10000 +/* per spin wait time in usec */ +#define WAIT_TIME 10 +/* spin counter for open/close delay */ +#define MAX_WAIT (MAX_WAIT_TIME / WAIT_TIME) + +/* + * Per device, used to track global memory allocations. + */ +struct ilo_hwinfo { + /* mmio registers on device */ + char __iomem *mmio_vaddr; + + /* doorbell registers on device */ + char __iomem *db_vaddr; + + /* shared memory on device used for channel control blocks */ + char __iomem *ram_vaddr; + + /* files corresponding to this device */ + struct ccb_data *ccb_alloc[MAX_CCB]; + + struct pci_dev *ilo_dev; + + /* + * open_lock serializes ccb_cnt during open and close + * [ irq disabled ] + * -> alloc_lock used when adding/removing/searching ccb_alloc, + * which represents all ccbs open on the device + * --> fifo_lock controls access to fifo queues shared with hw + * + * Locks must be taken in this order, but open_lock and alloc_lock + * are optional, they do not need to be held in order to take a + * lower level lock. + */ + spinlock_t open_lock; + spinlock_t alloc_lock; + spinlock_t fifo_lock; + + struct cdev cdev; +}; + +/* offset from mmio_vaddr for enabling doorbell interrupts */ +#define DB_IRQ 0xB2 +/* offset from mmio_vaddr for outbound communications */ +#define DB_OUT 0xD4 +/* DB_OUT reset bit */ +#define DB_RESET 26 + +/* + * Channel control block. Used to manage hardware queues. + * The format must match hw's version. The hw ccb is 128 bytes, + * but the context area shouldn't be touched by the driver. + */ +#define ILOSW_CCB_SZ 64 +#define ILOHW_CCB_SZ 128 +struct ccb { + union { + char *send_fifobar; + u64 send_fifobar_pa; + } ccb_u1; + union { + char *send_desc; + u64 send_desc_pa; + } ccb_u2; + u64 send_ctrl; + + union { + char *recv_fifobar; + u64 recv_fifobar_pa; + } ccb_u3; + union { + char *recv_desc; + u64 recv_desc_pa; + } ccb_u4; + u64 recv_ctrl; + + union { + char __iomem *db_base; + u64 padding5; + } ccb_u5; + + u64 channel; + + /* unused context area (64 bytes) */ +}; + +/* ccb queue parameters */ +#define SENDQ 1 +#define RECVQ 2 +#define NR_QENTRY 4 +#define L2_QENTRY_SZ 12 + +/* ccb ctrl bitfields */ +#define CTRL_BITPOS_L2SZ 0 +#define CTRL_BITPOS_FIFOINDEXMASK 4 +#define CTRL_BITPOS_DESCLIMIT 18 +#define CTRL_BITPOS_A 30 +#define CTRL_BITPOS_G 31 + +/* ccb doorbell macros */ +#define L2_DB_SIZE 14 +#define ONE_DB_SIZE (1 << L2_DB_SIZE) + +/* + * Per fd structure used to track the ccb allocated to that dev file. + */ +struct ccb_data { + /* software version of ccb, using virtual addrs */ + struct ccb driver_ccb; + + /* hardware version of ccb, using physical addrs */ + struct ccb ilo_ccb; + + /* hardware ccb is written to this shared mapped device memory */ + struct ccb __iomem *mapped_ccb; + + /* dma'able memory used for send/recv queues */ + void *dma_va; + dma_addr_t dma_pa; + size_t dma_size; + + /* pointer to hardware device info */ + struct ilo_hwinfo *ilo_hw; + + /* queue for this ccb to wait for recv data */ + wait_queue_head_t ccb_waitq; + + /* usage count, to allow for shared ccb's */ + int ccb_cnt; + + /* open wanted exclusive access to this ccb */ + int ccb_excl; +}; + +/* + * FIFO queue structure, shared with hw. + */ +#define ILO_START_ALIGN 4096 +#define ILO_CACHE_SZ 128 +struct fifo { + u64 nrents; /* user requested number of fifo entries */ + u64 imask; /* mask to extract valid fifo index */ + u64 merge; /* O/C bits to merge in during enqueue operation */ + u64 reset; /* set to non-zero when the target device resets */ + u8 pad_0[ILO_CACHE_SZ - (sizeof(u64) * 4)]; + + u64 head; + u8 pad_1[ILO_CACHE_SZ - (sizeof(u64))]; + + u64 tail; + u8 pad_2[ILO_CACHE_SZ - (sizeof(u64))]; + + u64 fifobar[1]; +}; + +/* convert between struct fifo, and the fifobar, which is saved in the ccb */ +#define FIFOHANDLESIZE (sizeof(struct fifo) - sizeof(u64)) +#define FIFOBARTOHANDLE(_fifo) \ + ((struct fifo *)(((char *)(_fifo)) - FIFOHANDLESIZE)) + +/* the number of qwords to consume from the entry descriptor */ +#define ENTRY_BITPOS_QWORDS 0 +/* descriptor index number (within a specified queue) */ +#define ENTRY_BITPOS_DESCRIPTOR 10 +/* state bit, fifo entry consumed by consumer */ +#define ENTRY_BITPOS_C 22 +/* state bit, fifo entry is occupied */ +#define ENTRY_BITPOS_O 23 + +#define ENTRY_BITS_QWORDS 10 +#define ENTRY_BITS_DESCRIPTOR 12 +#define ENTRY_BITS_C 1 +#define ENTRY_BITS_O 1 +#define ENTRY_BITS_TOTAL \ + (ENTRY_BITS_C + ENTRY_BITS_O + \ + ENTRY_BITS_QWORDS + ENTRY_BITS_DESCRIPTOR) + +/* extract various entry fields */ +#define ENTRY_MASK ((1 << ENTRY_BITS_TOTAL) - 1) +#define ENTRY_MASK_C (((1 << ENTRY_BITS_C) - 1) << ENTRY_BITPOS_C) +#define ENTRY_MASK_O (((1 << ENTRY_BITS_O) - 1) << ENTRY_BITPOS_O) +#define ENTRY_MASK_QWORDS \ + (((1 << ENTRY_BITS_QWORDS) - 1) << ENTRY_BITPOS_QWORDS) +#define ENTRY_MASK_DESCRIPTOR \ + (((1 << ENTRY_BITS_DESCRIPTOR) - 1) << ENTRY_BITPOS_DESCRIPTOR) + +#define ENTRY_MASK_NOSTATE (ENTRY_MASK >> (ENTRY_BITS_C + ENTRY_BITS_O)) + +#endif /* __HPILO_H */ diff --git a/drivers/misc/ibmasm/Makefile b/drivers/misc/ibmasm/Makefile new file mode 100644 index 000000000..1b9dd0f44 --- /dev/null +++ b/drivers/misc/ibmasm/Makefile @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_IBM_ASM) := ibmasm.o + +ibmasm-y := module.o \ + ibmasmfs.o \ + event.o \ + command.o \ + remote.o \ + heartbeat.o \ + r_heartbeat.o \ + dot_command.o \ + lowlevel.o + +ibmasm-$(CONFIG_SERIAL_8250) += uart.o + diff --git a/drivers/misc/ibmasm/command.c b/drivers/misc/ibmasm/command.c new file mode 100644 index 000000000..7d56f45de --- /dev/null +++ b/drivers/misc/ibmasm/command.c @@ -0,0 +1,187 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck + * + */ + +#include +#include +#include "ibmasm.h" +#include "lowlevel.h" + +static void exec_next_command(struct service_processor *sp); + +static atomic_t command_count = ATOMIC_INIT(0); + +struct command *ibmasm_new_command(struct service_processor *sp, size_t buffer_size) +{ + struct command *cmd; + + if (buffer_size > IBMASM_CMD_MAX_BUFFER_SIZE) + return NULL; + + cmd = kzalloc(sizeof(struct command), GFP_KERNEL); + if (cmd == NULL) + return NULL; + + + cmd->buffer = kzalloc(buffer_size, GFP_KERNEL); + if (cmd->buffer == NULL) { + kfree(cmd); + return NULL; + } + cmd->buffer_size = buffer_size; + + kref_init(&cmd->kref); + cmd->lock = &sp->lock; + + cmd->status = IBMASM_CMD_PENDING; + init_waitqueue_head(&cmd->wait); + INIT_LIST_HEAD(&cmd->queue_node); + + atomic_inc(&command_count); + dbg("command count: %d\n", atomic_read(&command_count)); + + return cmd; +} + +void ibmasm_free_command(struct kref *kref) +{ + struct command *cmd = to_command(kref); + + list_del(&cmd->queue_node); + atomic_dec(&command_count); + dbg("command count: %d\n", atomic_read(&command_count)); + kfree(cmd->buffer); + kfree(cmd); +} + +static void enqueue_command(struct service_processor *sp, struct command *cmd) +{ + list_add_tail(&cmd->queue_node, &sp->command_queue); +} + +static struct command *dequeue_command(struct service_processor *sp) +{ + struct command *cmd; + struct list_head *next; + + if (list_empty(&sp->command_queue)) + return NULL; + + next = sp->command_queue.next; + list_del_init(next); + cmd = list_entry(next, struct command, queue_node); + + return cmd; +} + +static inline void do_exec_command(struct service_processor *sp) +{ + char tsbuf[32]; + + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + + if (ibmasm_send_i2o_message(sp)) { + sp->current_command->status = IBMASM_CMD_FAILED; + wake_up(&sp->current_command->wait); + command_put(sp->current_command); + exec_next_command(sp); + } +} + +/** + * exec_command + * send a command to a service processor + * Commands are executed sequentially. One command (sp->current_command) + * is sent to the service processor. Once the interrupt handler gets a + * message of type command_response, the message is copied into + * the current commands buffer, + */ +void ibmasm_exec_command(struct service_processor *sp, struct command *cmd) +{ + unsigned long flags; + char tsbuf[32]; + + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + + spin_lock_irqsave(&sp->lock, flags); + + if (!sp->current_command) { + sp->current_command = cmd; + command_get(sp->current_command); + spin_unlock_irqrestore(&sp->lock, flags); + do_exec_command(sp); + } else { + enqueue_command(sp, cmd); + spin_unlock_irqrestore(&sp->lock, flags); + } +} + +static void exec_next_command(struct service_processor *sp) +{ + unsigned long flags; + char tsbuf[32]; + + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + + spin_lock_irqsave(&sp->lock, flags); + sp->current_command = dequeue_command(sp); + if (sp->current_command) { + command_get(sp->current_command); + spin_unlock_irqrestore(&sp->lock, flags); + do_exec_command(sp); + } else { + spin_unlock_irqrestore(&sp->lock, flags); + } +} + +/** + * Sleep until a command has failed or a response has been received + * and the command status been updated by the interrupt handler. + * (see receive_response). + */ +void ibmasm_wait_for_response(struct command *cmd, int timeout) +{ + wait_event_interruptible_timeout(cmd->wait, + cmd->status == IBMASM_CMD_COMPLETE || + cmd->status == IBMASM_CMD_FAILED, + timeout * HZ); +} + +/** + * receive_command_response + * called by the interrupt handler when a dot command of type command_response + * was received. + */ +void ibmasm_receive_command_response(struct service_processor *sp, void *response, size_t size) +{ + struct command *cmd = sp->current_command; + + if (!sp->current_command) + return; + + memcpy_fromio(cmd->buffer, response, min(size, cmd->buffer_size)); + cmd->status = IBMASM_CMD_COMPLETE; + wake_up(&sp->current_command->wait); + command_put(sp->current_command); + exec_next_command(sp); +} diff --git a/drivers/misc/ibmasm/dot_command.c b/drivers/misc/ibmasm/dot_command.c new file mode 100644 index 000000000..d7b2ca358 --- /dev/null +++ b/drivers/misc/ibmasm/dot_command.c @@ -0,0 +1,152 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck + * + */ + +#include "ibmasm.h" +#include "dot_command.h" + +/** + * Dispatch an incoming message to the specific handler for the message. + * Called from interrupt context. + */ +void ibmasm_receive_message(struct service_processor *sp, void *message, int message_size) +{ + u32 size; + struct dot_command_header *header = (struct dot_command_header *)message; + + if (message_size == 0) + return; + + size = get_dot_command_size(message); + if (size == 0) + return; + + if (size > message_size) + size = message_size; + + switch (header->type) { + case sp_event: + ibmasm_receive_event(sp, message, size); + break; + case sp_command_response: + ibmasm_receive_command_response(sp, message, size); + break; + case sp_heartbeat: + ibmasm_receive_heartbeat(sp, message, size); + break; + default: + dev_err(sp->dev, "Received unknown message from service processor\n"); + } +} + + +#define INIT_BUFFER_SIZE 32 + + +/** + * send the 4.3.5.10 dot command (driver VPD) to the service processor + */ +int ibmasm_send_driver_vpd(struct service_processor *sp) +{ + struct command *command; + struct dot_command_header *header; + u8 *vpd_command; + u8 *vpd_data; + int result = 0; + + command = ibmasm_new_command(sp, INIT_BUFFER_SIZE); + if (command == NULL) + return -ENOMEM; + + header = (struct dot_command_header *)command->buffer; + header->type = sp_write; + header->command_size = 4; + header->data_size = 16; + header->status = 0; + header->reserved = 0; + + vpd_command = command->buffer + sizeof(struct dot_command_header); + vpd_command[0] = 0x4; + vpd_command[1] = 0x3; + vpd_command[2] = 0x5; + vpd_command[3] = 0xa; + + vpd_data = vpd_command + header->command_size; + vpd_data[0] = 0; + strcat(vpd_data, IBMASM_DRIVER_VPD); + vpd_data[10] = 0; + vpd_data[15] = 0; + + ibmasm_exec_command(sp, command); + ibmasm_wait_for_response(command, IBMASM_CMD_TIMEOUT_NORMAL); + + if (command->status != IBMASM_CMD_COMPLETE) + result = -ENODEV; + + command_put(command); + + return result; +} + +struct os_state_command { + struct dot_command_header header; + unsigned char command[3]; + unsigned char data; +}; + +/** + * send the 4.3.6 dot command (os state) to the service processor + * During driver init this function is called with os state "up". + * This causes the service processor to start sending heartbeats the + * driver. + * During driver exit the function is called with os state "down", + * causing the service processor to stop the heartbeats. + */ +int ibmasm_send_os_state(struct service_processor *sp, int os_state) +{ + struct command *cmd; + struct os_state_command *os_state_cmd; + int result = 0; + + cmd = ibmasm_new_command(sp, sizeof(struct os_state_command)); + if (cmd == NULL) + return -ENOMEM; + + os_state_cmd = (struct os_state_command *)cmd->buffer; + os_state_cmd->header.type = sp_write; + os_state_cmd->header.command_size = 3; + os_state_cmd->header.data_size = 1; + os_state_cmd->header.status = 0; + os_state_cmd->command[0] = 4; + os_state_cmd->command[1] = 3; + os_state_cmd->command[2] = 6; + os_state_cmd->data = os_state; + + ibmasm_exec_command(sp, cmd); + ibmasm_wait_for_response(cmd, IBMASM_CMD_TIMEOUT_NORMAL); + + if (cmd->status != IBMASM_CMD_COMPLETE) + result = -ENODEV; + + command_put(cmd); + return result; +} diff --git a/drivers/misc/ibmasm/dot_command.h b/drivers/misc/ibmasm/dot_command.h new file mode 100644 index 000000000..fc9fc9d4e --- /dev/null +++ b/drivers/misc/ibmasm/dot_command.h @@ -0,0 +1,78 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck + * + */ + +#ifndef __DOT_COMMAND_H__ +#define __DOT_COMMAND_H__ + +/* + * dot commands are the protocol used to communicate with the service + * processor. + * They consist of header, a command of variable length and data of + * variable length. + */ + +/* dot command types */ +#define sp_write 0 +#define sp_write_next 1 +#define sp_read 2 +#define sp_read_next 3 +#define sp_command_response 4 +#define sp_event 5 +#define sp_heartbeat 6 + +#pragma pack(1) +struct dot_command_header { + u8 type; + u8 command_size; + u16 data_size; + u8 status; + u8 reserved; +}; +#pragma pack() + +static inline size_t get_dot_command_size(void *buffer) +{ + struct dot_command_header *cmd = (struct dot_command_header *)buffer; + return sizeof(struct dot_command_header) + cmd->command_size + cmd->data_size; +} + +static inline unsigned int get_dot_command_timeout(void *buffer) +{ + struct dot_command_header *header = (struct dot_command_header *)buffer; + unsigned char *cmd = buffer + sizeof(struct dot_command_header); + + /* dot commands 6.3.1, 7.1 and 8.x need a longer timeout */ + + if (header->command_size == 3) { + if ((cmd[0] == 6) && (cmd[1] == 3) && (cmd[2] == 1)) + return IBMASM_CMD_TIMEOUT_EXTRA; + } else if (header->command_size == 2) { + if ((cmd[0] == 7) && (cmd[1] == 1)) + return IBMASM_CMD_TIMEOUT_EXTRA; + if (cmd[0] == 8) + return IBMASM_CMD_TIMEOUT_EXTRA; + } + return IBMASM_CMD_TIMEOUT_NORMAL; +} + +#endif /* __DOT_COMMAND_H__ */ diff --git a/drivers/misc/ibmasm/event.c b/drivers/misc/ibmasm/event.c new file mode 100644 index 000000000..7e33025b4 --- /dev/null +++ b/drivers/misc/ibmasm/event.c @@ -0,0 +1,177 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck + * + */ + +#include +#include +#include "ibmasm.h" +#include "lowlevel.h" + +/* + * ASM service processor event handling routines. + * + * Events are signalled to the device drivers through interrupts. + * They have the format of dot commands, with the type field set to + * sp_event. + * The driver does not interpret the events, it simply stores them in a + * circular buffer. + */ + +static void wake_up_event_readers(struct service_processor *sp) +{ + struct event_reader *reader; + + list_for_each_entry(reader, &sp->event_buffer->readers, node) + wake_up_interruptible(&reader->wait); +} + +/** + * receive_event + * Called by the interrupt handler when a dot command of type sp_event is + * received. + * Store the event in the circular event buffer, wake up any sleeping + * event readers. + * There is no reader marker in the buffer, therefore readers are + * responsible for keeping up with the writer, or they will lose events. + */ +void ibmasm_receive_event(struct service_processor *sp, void *data, unsigned int data_size) +{ + struct event_buffer *buffer = sp->event_buffer; + struct ibmasm_event *event; + unsigned long flags; + + data_size = min(data_size, IBMASM_EVENT_MAX_SIZE); + + spin_lock_irqsave(&sp->lock, flags); + /* copy the event into the next slot in the circular buffer */ + event = &buffer->events[buffer->next_index]; + memcpy_fromio(event->data, data, data_size); + event->data_size = data_size; + event->serial_number = buffer->next_serial_number; + + /* advance indices in the buffer */ + buffer->next_index = (buffer->next_index + 1) % IBMASM_NUM_EVENTS; + buffer->next_serial_number++; + spin_unlock_irqrestore(&sp->lock, flags); + + wake_up_event_readers(sp); +} + +static inline int event_available(struct event_buffer *b, struct event_reader *r) +{ + return (r->next_serial_number < b->next_serial_number); +} + +/** + * get_next_event + * Called by event readers (initiated from user space through the file + * system). + * Sleeps until a new event is available. + */ +int ibmasm_get_next_event(struct service_processor *sp, struct event_reader *reader) +{ + struct event_buffer *buffer = sp->event_buffer; + struct ibmasm_event *event; + unsigned int index; + unsigned long flags; + + reader->cancelled = 0; + + if (wait_event_interruptible(reader->wait, + event_available(buffer, reader) || reader->cancelled)) + return -ERESTARTSYS; + + if (!event_available(buffer, reader)) + return 0; + + spin_lock_irqsave(&sp->lock, flags); + + index = buffer->next_index; + event = &buffer->events[index]; + while (event->serial_number < reader->next_serial_number) { + index = (index + 1) % IBMASM_NUM_EVENTS; + event = &buffer->events[index]; + } + memcpy(reader->data, event->data, event->data_size); + reader->data_size = event->data_size; + reader->next_serial_number = event->serial_number + 1; + + spin_unlock_irqrestore(&sp->lock, flags); + + return event->data_size; +} + +void ibmasm_cancel_next_event(struct event_reader *reader) +{ + reader->cancelled = 1; + wake_up_interruptible(&reader->wait); +} + +void ibmasm_event_reader_register(struct service_processor *sp, struct event_reader *reader) +{ + unsigned long flags; + + reader->next_serial_number = sp->event_buffer->next_serial_number; + init_waitqueue_head(&reader->wait); + spin_lock_irqsave(&sp->lock, flags); + list_add(&reader->node, &sp->event_buffer->readers); + spin_unlock_irqrestore(&sp->lock, flags); +} + +void ibmasm_event_reader_unregister(struct service_processor *sp, struct event_reader *reader) +{ + unsigned long flags; + + spin_lock_irqsave(&sp->lock, flags); + list_del(&reader->node); + spin_unlock_irqrestore(&sp->lock, flags); +} + +int ibmasm_event_buffer_init(struct service_processor *sp) +{ + struct event_buffer *buffer; + struct ibmasm_event *event; + int i; + + buffer = kmalloc(sizeof(struct event_buffer), GFP_KERNEL); + if (!buffer) + return -ENOMEM; + + buffer->next_index = 0; + buffer->next_serial_number = 1; + + event = buffer->events; + for (i=0; iserial_number = 0; + + INIT_LIST_HEAD(&buffer->readers); + + sp->event_buffer = buffer; + + return 0; +} + +void ibmasm_event_buffer_exit(struct service_processor *sp) +{ + kfree(sp->event_buffer); +} diff --git a/drivers/misc/ibmasm/heartbeat.c b/drivers/misc/ibmasm/heartbeat.c new file mode 100644 index 000000000..90746378f --- /dev/null +++ b/drivers/misc/ibmasm/heartbeat.c @@ -0,0 +1,101 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck + * + */ + +#include +#include "ibmasm.h" +#include "dot_command.h" +#include "lowlevel.h" + +static int suspend_heartbeats = 0; + +/* + * Once the driver indicates to the service processor that it is running + * - see send_os_state() - the service processor sends periodic heartbeats + * to the driver. The driver must respond to the heartbeats or else the OS + * will be rebooted. + * In the case of a panic the interrupt handler continues to work and thus + * continues to respond to heartbeats, making the service processor believe + * the OS is still running and thus preventing a reboot. + * To prevent this from happening a callback is added the panic_notifier_list. + * Before responding to a heartbeat the driver checks if a panic has happened, + * if yes it suspends heartbeat, causing the service processor to reboot as + * expected. + */ +static int panic_happened(struct notifier_block *n, unsigned long val, void *v) +{ + suspend_heartbeats = 1; + return 0; +} + +static struct notifier_block panic_notifier = { panic_happened, NULL, 1 }; + +void ibmasm_register_panic_notifier(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, &panic_notifier); +} + +void ibmasm_unregister_panic_notifier(void) +{ + atomic_notifier_chain_unregister(&panic_notifier_list, + &panic_notifier); +} + + +int ibmasm_heartbeat_init(struct service_processor *sp) +{ + sp->heartbeat = ibmasm_new_command(sp, HEARTBEAT_BUFFER_SIZE); + if (sp->heartbeat == NULL) + return -ENOMEM; + + return 0; +} + +void ibmasm_heartbeat_exit(struct service_processor *sp) +{ + char tsbuf[32]; + + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + ibmasm_wait_for_response(sp->heartbeat, IBMASM_CMD_TIMEOUT_NORMAL); + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + suspend_heartbeats = 1; + command_put(sp->heartbeat); +} + +void ibmasm_receive_heartbeat(struct service_processor *sp, void *message, size_t size) +{ + struct command *cmd = sp->heartbeat; + struct dot_command_header *header = (struct dot_command_header *)cmd->buffer; + char tsbuf[32]; + + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); + if (suspend_heartbeats) + return; + + /* return the received dot command to sender */ + cmd->status = IBMASM_CMD_PENDING; + size = min(size, cmd->buffer_size); + memcpy_fromio(cmd->buffer, message, size); + header->type = sp_write; + ibmasm_exec_command(sp, cmd); +} diff --git a/drivers/misc/ibmasm/i2o.h b/drivers/misc/ibmasm/i2o.h new file mode 100644 index 000000000..2e9566dab --- /dev/null +++ b/drivers/misc/ibmasm/i2o.h @@ -0,0 +1,77 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck + * + */ + +#pragma pack(1) +struct i2o_header { + u8 version; + u8 message_flags; + u16 message_size; + u8 target; + u8 initiator_and_target; + u8 initiator; + u8 function; + u32 initiator_context; +}; +#pragma pack() + +#define I2O_HEADER_TEMPLATE \ + { .version = 0x01, \ + .message_flags = 0x00, \ + .function = 0xFF, \ + .initiator = 0x00, \ + .initiator_and_target = 0x40, \ + .target = 0x00, \ + .initiator_context = 0x0 } + +#define I2O_MESSAGE_SIZE 0x1000 +#define I2O_COMMAND_SIZE (I2O_MESSAGE_SIZE - sizeof(struct i2o_header)) + +#pragma pack(1) +struct i2o_message { + struct i2o_header header; + void *data; +}; +#pragma pack() + +static inline unsigned short outgoing_message_size(unsigned int data_size) +{ + unsigned int size; + unsigned short i2o_size; + + if (data_size > I2O_COMMAND_SIZE) + data_size = I2O_COMMAND_SIZE; + + size = sizeof(struct i2o_header) + data_size; + + i2o_size = size / sizeof(u32); + + if (size % sizeof(u32)) + i2o_size++; + + return i2o_size; +} + +static inline u32 incoming_data_size(struct i2o_message *i2o_message) +{ + return (sizeof(u32) * i2o_message->header.message_size); +} diff --git a/drivers/misc/ibmasm/ibmasm.h b/drivers/misc/ibmasm/ibmasm.h new file mode 100644 index 000000000..9fea49d2e --- /dev/null +++ b/drivers/misc/ibmasm/ibmasm.h @@ -0,0 +1,223 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Driver identification */ +#define DRIVER_NAME "ibmasm" +#define DRIVER_VERSION "1.0" +#define DRIVER_AUTHOR "Max Asbock , Vernon Mauery " +#define DRIVER_DESC "IBM ASM Service Processor Driver" + +#define err(msg) printk(KERN_ERR "%s: " msg "\n", DRIVER_NAME) +#define info(msg) printk(KERN_INFO "%s: " msg "\n", DRIVER_NAME) + +extern int ibmasm_debug; +#define dbg(STR, ARGS...) \ + do { \ + if (ibmasm_debug) \ + printk(KERN_DEBUG STR , ##ARGS); \ + } while (0) + +static inline char *get_timestamp(char *buf) +{ + struct timespec64 now; + + ktime_get_real_ts64(&now); + sprintf(buf, "%llu.%.08lu", (long long)now.tv_sec, + now.tv_nsec / NSEC_PER_USEC); + return buf; +} + +#define IBMASM_CMD_PENDING 0 +#define IBMASM_CMD_COMPLETE 1 +#define IBMASM_CMD_FAILED 2 + +#define IBMASM_CMD_TIMEOUT_NORMAL 45 +#define IBMASM_CMD_TIMEOUT_EXTRA 240 + +#define IBMASM_CMD_MAX_BUFFER_SIZE 0x8000 + +#define REVERSE_HEARTBEAT_TIMEOUT 120 + +#define HEARTBEAT_BUFFER_SIZE 0x400 + +#ifdef IA64 +#define IBMASM_DRIVER_VPD "Lin64 6.08 " +#else +#define IBMASM_DRIVER_VPD "Lin32 6.08 " +#endif + +#define SYSTEM_STATE_OS_UP 5 +#define SYSTEM_STATE_OS_DOWN 4 + +#define IBMASM_NAME_SIZE 16 + +#define IBMASM_NUM_EVENTS 10 +#define IBMASM_EVENT_MAX_SIZE 2048u + + +struct command { + struct list_head queue_node; + wait_queue_head_t wait; + unsigned char *buffer; + size_t buffer_size; + int status; + struct kref kref; + spinlock_t *lock; +}; +#define to_command(c) container_of(c, struct command, kref) + +void ibmasm_free_command(struct kref *kref); +static inline void command_put(struct command *cmd) +{ + unsigned long flags; + spinlock_t *lock = cmd->lock; + + spin_lock_irqsave(lock, flags); + kref_put(&cmd->kref, ibmasm_free_command); + spin_unlock_irqrestore(lock, flags); +} + +static inline void command_get(struct command *cmd) +{ + kref_get(&cmd->kref); +} + + +struct ibmasm_event { + unsigned int serial_number; + unsigned int data_size; + unsigned char data[IBMASM_EVENT_MAX_SIZE]; +}; + +struct event_buffer { + struct ibmasm_event events[IBMASM_NUM_EVENTS]; + unsigned int next_serial_number; + unsigned int next_index; + struct list_head readers; +}; + +struct event_reader { + int cancelled; + unsigned int next_serial_number; + wait_queue_head_t wait; + struct list_head node; + unsigned int data_size; + unsigned char data[IBMASM_EVENT_MAX_SIZE]; +}; + +struct reverse_heartbeat { + wait_queue_head_t wait; + unsigned int stopped; +}; + +struct ibmasm_remote { + struct input_dev *keybd_dev; + struct input_dev *mouse_dev; +}; + +struct service_processor { + struct list_head node; + spinlock_t lock; + void __iomem *base_address; + unsigned int irq; + struct command *current_command; + struct command *heartbeat; + struct list_head command_queue; + struct event_buffer *event_buffer; + char dirname[IBMASM_NAME_SIZE]; + char devname[IBMASM_NAME_SIZE]; + unsigned int number; + struct ibmasm_remote remote; + int serial_line; + struct device *dev; +}; + +/* command processing */ +struct command *ibmasm_new_command(struct service_processor *sp, size_t buffer_size); +void ibmasm_exec_command(struct service_processor *sp, struct command *cmd); +void ibmasm_wait_for_response(struct command *cmd, int timeout); +void ibmasm_receive_command_response(struct service_processor *sp, void *response, size_t size); + +/* event processing */ +int ibmasm_event_buffer_init(struct service_processor *sp); +void ibmasm_event_buffer_exit(struct service_processor *sp); +void ibmasm_receive_event(struct service_processor *sp, void *data, unsigned int data_size); +void ibmasm_event_reader_register(struct service_processor *sp, struct event_reader *reader); +void ibmasm_event_reader_unregister(struct service_processor *sp, struct event_reader *reader); +int ibmasm_get_next_event(struct service_processor *sp, struct event_reader *reader); +void ibmasm_cancel_next_event(struct event_reader *reader); + +/* heartbeat - from SP to OS */ +void ibmasm_register_panic_notifier(void); +void ibmasm_unregister_panic_notifier(void); +int ibmasm_heartbeat_init(struct service_processor *sp); +void ibmasm_heartbeat_exit(struct service_processor *sp); +void ibmasm_receive_heartbeat(struct service_processor *sp, void *message, size_t size); + +/* reverse heartbeat - from OS to SP */ +void ibmasm_init_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb); +int ibmasm_start_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb); +void ibmasm_stop_reverse_heartbeat(struct reverse_heartbeat *rhb); + +/* dot commands */ +void ibmasm_receive_message(struct service_processor *sp, void *data, int data_size); +int ibmasm_send_driver_vpd(struct service_processor *sp); +int ibmasm_send_os_state(struct service_processor *sp, int os_state); + +/* low level message processing */ +int ibmasm_send_i2o_message(struct service_processor *sp); +irqreturn_t ibmasm_interrupt_handler(int irq, void * dev_id); + +/* remote console */ +void ibmasm_handle_mouse_interrupt(struct service_processor *sp); +int ibmasm_init_remote_input_dev(struct service_processor *sp); +void ibmasm_free_remote_input_dev(struct service_processor *sp); + +/* file system */ +int ibmasmfs_register(void); +void ibmasmfs_unregister(void); +void ibmasmfs_add_sp(struct service_processor *sp); + +/* uart */ +#if IS_ENABLED(CONFIG_SERIAL_8250) +void ibmasm_register_uart(struct service_processor *sp); +void ibmasm_unregister_uart(struct service_processor *sp); +#else +#define ibmasm_register_uart(sp) do { } while(0) +#define ibmasm_unregister_uart(sp) do { } while(0) +#endif diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c new file mode 100644 index 000000000..fa840666b --- /dev/null +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -0,0 +1,608 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck + * + */ + +/* + * Parts of this code are based on an article by Jonathan Corbet + * that appeared in Linux Weekly News. + */ + + +/* + * The IBMASM file virtual filesystem. It creates the following hierarchy + * dynamically when mounted from user space: + * + * /ibmasm + * |-- 0 + * | |-- command + * | |-- event + * | |-- reverse_heartbeat + * | `-- remote_video + * | |-- depth + * | |-- height + * | `-- width + * . + * . + * . + * `-- n + * |-- command + * |-- event + * |-- reverse_heartbeat + * `-- remote_video + * |-- depth + * |-- height + * `-- width + * + * For each service processor the following files are created: + * + * command: execute dot commands + * write: execute a dot command on the service processor + * read: return the result of a previously executed dot command + * + * events: listen for service processor events + * read: sleep (interruptible) until an event occurs + * write: wakeup sleeping event listener + * + * reverse_heartbeat: send a heartbeat to the service processor + * read: sleep (interruptible) until the reverse heartbeat fails + * write: wakeup sleeping heartbeat listener + * + * remote_video/width + * remote_video/height + * remote_video/width: control remote display settings + * write: set value + * read: read value + */ + +#include +#include +#include +#include +#include +#include "ibmasm.h" +#include "remote.h" +#include "dot_command.h" + +#define IBMASMFS_MAGIC 0x66726f67 + +static LIST_HEAD(service_processors); + +static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode); +static void ibmasmfs_create_files (struct super_block *sb); +static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent); + + +static struct dentry *ibmasmfs_mount(struct file_system_type *fst, + int flags, const char *name, void *data) +{ + return mount_single(fst, flags, data, ibmasmfs_fill_super); +} + +static const struct super_operations ibmasmfs_s_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, +}; + +static const struct file_operations *ibmasmfs_dir_ops = &simple_dir_operations; + +static struct file_system_type ibmasmfs_type = { + .owner = THIS_MODULE, + .name = "ibmasmfs", + .mount = ibmasmfs_mount, + .kill_sb = kill_litter_super, +}; +MODULE_ALIAS_FS("ibmasmfs"); + +static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent) +{ + struct inode *root; + + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; + sb->s_magic = IBMASMFS_MAGIC; + sb->s_op = &ibmasmfs_s_ops; + sb->s_time_gran = 1; + + root = ibmasmfs_make_inode (sb, S_IFDIR | 0500); + if (!root) + return -ENOMEM; + + root->i_op = &simple_dir_inode_operations; + root->i_fop = ibmasmfs_dir_ops; + + sb->s_root = d_make_root(root); + if (!sb->s_root) + return -ENOMEM; + + ibmasmfs_create_files(sb); + return 0; +} + +static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode) +{ + struct inode *ret = new_inode(sb); + + if (ret) { + ret->i_ino = get_next_ino(); + ret->i_mode = mode; + ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret); + } + return ret; +} + +static struct dentry *ibmasmfs_create_file(struct dentry *parent, + const char *name, + const struct file_operations *fops, + void *data, + int mode) +{ + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_name(parent, name); + if (!dentry) + return NULL; + + inode = ibmasmfs_make_inode(parent->d_sb, S_IFREG | mode); + if (!inode) { + dput(dentry); + return NULL; + } + + inode->i_fop = fops; + inode->i_private = data; + + d_add(dentry, inode); + return dentry; +} + +static struct dentry *ibmasmfs_create_dir(struct dentry *parent, + const char *name) +{ + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_name(parent, name); + if (!dentry) + return NULL; + + inode = ibmasmfs_make_inode(parent->d_sb, S_IFDIR | 0500); + if (!inode) { + dput(dentry); + return NULL; + } + + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = ibmasmfs_dir_ops; + + d_add(dentry, inode); + return dentry; +} + +int ibmasmfs_register(void) +{ + return register_filesystem(&ibmasmfs_type); +} + +void ibmasmfs_unregister(void) +{ + unregister_filesystem(&ibmasmfs_type); +} + +void ibmasmfs_add_sp(struct service_processor *sp) +{ + list_add(&sp->node, &service_processors); +} + +/* struct to save state between command file operations */ +struct ibmasmfs_command_data { + struct service_processor *sp; + struct command *command; +}; + +/* struct to save state between event file operations */ +struct ibmasmfs_event_data { + struct service_processor *sp; + struct event_reader reader; + int active; +}; + +/* struct to save state between reverse heartbeat file operations */ +struct ibmasmfs_heartbeat_data { + struct service_processor *sp; + struct reverse_heartbeat heartbeat; + int active; +}; + +static int command_file_open(struct inode *inode, struct file *file) +{ + struct ibmasmfs_command_data *command_data; + + if (!inode->i_private) + return -ENODEV; + + command_data = kmalloc(sizeof(struct ibmasmfs_command_data), GFP_KERNEL); + if (!command_data) + return -ENOMEM; + + command_data->command = NULL; + command_data->sp = inode->i_private; + file->private_data = command_data; + return 0; +} + +static int command_file_close(struct inode *inode, struct file *file) +{ + struct ibmasmfs_command_data *command_data = file->private_data; + + if (command_data->command) + command_put(command_data->command); + + kfree(command_data); + return 0; +} + +static ssize_t command_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + struct ibmasmfs_command_data *command_data = file->private_data; + struct command *cmd; + int len; + unsigned long flags; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > IBMASM_CMD_MAX_BUFFER_SIZE) + return 0; + if (*offset != 0) + return 0; + + spin_lock_irqsave(&command_data->sp->lock, flags); + cmd = command_data->command; + if (cmd == NULL) { + spin_unlock_irqrestore(&command_data->sp->lock, flags); + return 0; + } + command_data->command = NULL; + spin_unlock_irqrestore(&command_data->sp->lock, flags); + + if (cmd->status != IBMASM_CMD_COMPLETE) { + command_put(cmd); + return -EIO; + } + len = min(count, cmd->buffer_size); + if (copy_to_user(buf, cmd->buffer, len)) { + command_put(cmd); + return -EFAULT; + } + command_put(cmd); + + return len; +} + +static ssize_t command_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset) +{ + struct ibmasmfs_command_data *command_data = file->private_data; + struct command *cmd; + unsigned long flags; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > IBMASM_CMD_MAX_BUFFER_SIZE) + return 0; + if (*offset != 0) + return 0; + + /* commands are executed sequentially, only one command at a time */ + if (command_data->command) + return -EAGAIN; + + cmd = ibmasm_new_command(command_data->sp, count); + if (!cmd) + return -ENOMEM; + + if (copy_from_user(cmd->buffer, ubuff, count)) { + command_put(cmd); + return -EFAULT; + } + + spin_lock_irqsave(&command_data->sp->lock, flags); + if (command_data->command) { + spin_unlock_irqrestore(&command_data->sp->lock, flags); + command_put(cmd); + return -EAGAIN; + } + command_data->command = cmd; + spin_unlock_irqrestore(&command_data->sp->lock, flags); + + ibmasm_exec_command(command_data->sp, cmd); + ibmasm_wait_for_response(cmd, get_dot_command_timeout(cmd->buffer)); + + return count; +} + +static int event_file_open(struct inode *inode, struct file *file) +{ + struct ibmasmfs_event_data *event_data; + struct service_processor *sp; + + if (!inode->i_private) + return -ENODEV; + + sp = inode->i_private; + + event_data = kmalloc(sizeof(struct ibmasmfs_event_data), GFP_KERNEL); + if (!event_data) + return -ENOMEM; + + ibmasm_event_reader_register(sp, &event_data->reader); + + event_data->sp = sp; + event_data->active = 0; + file->private_data = event_data; + return 0; +} + +static int event_file_close(struct inode *inode, struct file *file) +{ + struct ibmasmfs_event_data *event_data = file->private_data; + + ibmasm_event_reader_unregister(event_data->sp, &event_data->reader); + kfree(event_data); + return 0; +} + +static ssize_t event_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + struct ibmasmfs_event_data *event_data = file->private_data; + struct event_reader *reader = &event_data->reader; + struct service_processor *sp = event_data->sp; + int ret; + unsigned long flags; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > IBMASM_EVENT_MAX_SIZE) + return 0; + if (*offset != 0) + return 0; + + spin_lock_irqsave(&sp->lock, flags); + if (event_data->active) { + spin_unlock_irqrestore(&sp->lock, flags); + return -EBUSY; + } + event_data->active = 1; + spin_unlock_irqrestore(&sp->lock, flags); + + ret = ibmasm_get_next_event(sp, reader); + if (ret <= 0) + goto out; + + if (count < reader->data_size) { + ret = -EINVAL; + goto out; + } + + if (copy_to_user(buf, reader->data, reader->data_size)) { + ret = -EFAULT; + goto out; + } + ret = reader->data_size; + +out: + event_data->active = 0; + return ret; +} + +static ssize_t event_file_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) +{ + struct ibmasmfs_event_data *event_data = file->private_data; + + if (*offset < 0) + return -EINVAL; + if (count != 1) + return 0; + if (*offset != 0) + return 0; + + ibmasm_cancel_next_event(&event_data->reader); + return 0; +} + +static int r_heartbeat_file_open(struct inode *inode, struct file *file) +{ + struct ibmasmfs_heartbeat_data *rhbeat; + + if (!inode->i_private) + return -ENODEV; + + rhbeat = kmalloc(sizeof(struct ibmasmfs_heartbeat_data), GFP_KERNEL); + if (!rhbeat) + return -ENOMEM; + + rhbeat->sp = inode->i_private; + rhbeat->active = 0; + ibmasm_init_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat); + file->private_data = rhbeat; + return 0; +} + +static int r_heartbeat_file_close(struct inode *inode, struct file *file) +{ + struct ibmasmfs_heartbeat_data *rhbeat = file->private_data; + + kfree(rhbeat); + return 0; +} + +static ssize_t r_heartbeat_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + struct ibmasmfs_heartbeat_data *rhbeat = file->private_data; + unsigned long flags; + int result; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > 1024) + return 0; + if (*offset != 0) + return 0; + + /* allow only one reverse heartbeat per process */ + spin_lock_irqsave(&rhbeat->sp->lock, flags); + if (rhbeat->active) { + spin_unlock_irqrestore(&rhbeat->sp->lock, flags); + return -EBUSY; + } + rhbeat->active = 1; + spin_unlock_irqrestore(&rhbeat->sp->lock, flags); + + result = ibmasm_start_reverse_heartbeat(rhbeat->sp, &rhbeat->heartbeat); + rhbeat->active = 0; + + return result; +} + +static ssize_t r_heartbeat_file_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) +{ + struct ibmasmfs_heartbeat_data *rhbeat = file->private_data; + + if (*offset < 0) + return -EINVAL; + if (count != 1) + return 0; + if (*offset != 0) + return 0; + + if (rhbeat->active) + ibmasm_stop_reverse_heartbeat(&rhbeat->heartbeat); + + return 1; +} + +static int remote_settings_file_close(struct inode *inode, struct file *file) +{ + return 0; +} + +static ssize_t remote_settings_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + void __iomem *address = (void __iomem *)file->private_data; + int len = 0; + unsigned int value; + char lbuf[20]; + + value = readl(address); + len = snprintf(lbuf, sizeof(lbuf), "%d\n", value); + + return simple_read_from_buffer(buf, count, offset, lbuf, len); +} + +static ssize_t remote_settings_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset) +{ + void __iomem *address = (void __iomem *)file->private_data; + char *buff; + unsigned int value; + + if (*offset < 0) + return -EINVAL; + if (count == 0 || count > 1024) + return 0; + if (*offset != 0) + return 0; + + buff = kzalloc (count + 1, GFP_KERNEL); + if (!buff) + return -ENOMEM; + + + if (copy_from_user(buff, ubuff, count)) { + kfree(buff); + return -EFAULT; + } + + value = simple_strtoul(buff, NULL, 10); + writel(value, address); + kfree(buff); + + return count; +} + +static const struct file_operations command_fops = { + .open = command_file_open, + .release = command_file_close, + .read = command_file_read, + .write = command_file_write, + .llseek = generic_file_llseek, +}; + +static const struct file_operations event_fops = { + .open = event_file_open, + .release = event_file_close, + .read = event_file_read, + .write = event_file_write, + .llseek = generic_file_llseek, +}; + +static const struct file_operations r_heartbeat_fops = { + .open = r_heartbeat_file_open, + .release = r_heartbeat_file_close, + .read = r_heartbeat_file_read, + .write = r_heartbeat_file_write, + .llseek = generic_file_llseek, +}; + +static const struct file_operations remote_settings_fops = { + .open = simple_open, + .release = remote_settings_file_close, + .read = remote_settings_file_read, + .write = remote_settings_file_write, + .llseek = generic_file_llseek, +}; + + +static void ibmasmfs_create_files (struct super_block *sb) +{ + struct list_head *entry; + struct service_processor *sp; + + list_for_each(entry, &service_processors) { + struct dentry *dir; + struct dentry *remote_dir; + sp = list_entry(entry, struct service_processor, node); + dir = ibmasmfs_create_dir(sb->s_root, sp->dirname); + if (!dir) + continue; + + ibmasmfs_create_file(dir, "command", &command_fops, sp, S_IRUSR|S_IWUSR); + ibmasmfs_create_file(dir, "event", &event_fops, sp, S_IRUSR|S_IWUSR); + ibmasmfs_create_file(dir, "reverse_heartbeat", &r_heartbeat_fops, sp, S_IRUSR|S_IWUSR); + + remote_dir = ibmasmfs_create_dir(dir, "remote_video"); + if (!remote_dir) + continue; + + ibmasmfs_create_file(remote_dir, "width", &remote_settings_fops, (void *)display_width(sp), S_IRUSR|S_IWUSR); + ibmasmfs_create_file(remote_dir, "height", &remote_settings_fops, (void *)display_height(sp), S_IRUSR|S_IWUSR); + ibmasmfs_create_file(remote_dir, "depth", &remote_settings_fops, (void *)display_depth(sp), S_IRUSR|S_IWUSR); + } +} diff --git a/drivers/misc/ibmasm/lowlevel.c b/drivers/misc/ibmasm/lowlevel.c new file mode 100644 index 000000000..5319ea261 --- /dev/null +++ b/drivers/misc/ibmasm/lowlevel.c @@ -0,0 +1,85 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck + * + */ + +#include "ibmasm.h" +#include "lowlevel.h" +#include "i2o.h" +#include "dot_command.h" +#include "remote.h" + +static struct i2o_header header = I2O_HEADER_TEMPLATE; + + +int ibmasm_send_i2o_message(struct service_processor *sp) +{ + u32 mfa; + unsigned int command_size; + struct i2o_message *message; + struct command *command = sp->current_command; + + mfa = get_mfa_inbound(sp->base_address); + if (!mfa) + return 1; + + command_size = get_dot_command_size(command->buffer); + header.message_size = outgoing_message_size(command_size); + + message = get_i2o_message(sp->base_address, mfa); + + memcpy_toio(&message->header, &header, sizeof(struct i2o_header)); + memcpy_toio(&message->data, command->buffer, command_size); + + set_mfa_inbound(sp->base_address, mfa); + + return 0; +} + +irqreturn_t ibmasm_interrupt_handler(int irq, void * dev_id) +{ + u32 mfa; + struct service_processor *sp = (struct service_processor *)dev_id; + void __iomem *base_address = sp->base_address; + char tsbuf[32]; + + if (!sp_interrupt_pending(base_address)) + return IRQ_NONE; + + dbg("respond to interrupt at %s\n", get_timestamp(tsbuf)); + + if (mouse_interrupt_pending(sp)) { + ibmasm_handle_mouse_interrupt(sp); + clear_mouse_interrupt(sp); + } + + mfa = get_mfa_outbound(base_address); + if (valid_mfa(mfa)) { + struct i2o_message *msg = get_i2o_message(base_address, mfa); + ibmasm_receive_message(sp, &msg->data, incoming_data_size(msg)); + } else + dbg("didn't get a valid MFA\n"); + + set_mfa_outbound(base_address, mfa); + dbg("finished interrupt at %s\n", get_timestamp(tsbuf)); + + return IRQ_HANDLED; +} diff --git a/drivers/misc/ibmasm/lowlevel.h b/drivers/misc/ibmasm/lowlevel.h new file mode 100644 index 000000000..e97848f51 --- /dev/null +++ b/drivers/misc/ibmasm/lowlevel.h @@ -0,0 +1,137 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck + * + */ + +/* Condor service processor specific hardware definitions */ + +#ifndef __IBMASM_CONDOR_H__ +#define __IBMASM_CONDOR_H__ + +#include + +#define VENDORID_IBM 0x1014 +#define DEVICEID_RSA 0x010F + +#define GET_MFA_ADDR(x) (x & 0xFFFFFF00) + +#define MAILBOX_FULL(x) (x & 0x00000001) + +#define NO_MFAS_AVAILABLE 0xFFFFFFFF + + +#define INBOUND_QUEUE_PORT 0x40 /* contains address of next free MFA */ +#define OUTBOUND_QUEUE_PORT 0x44 /* contains address of posted MFA */ + +#define SP_INTR_MASK 0x00000008 +#define UART_INTR_MASK 0x00000010 + +#define INTR_STATUS_REGISTER 0x13A0 +#define INTR_CONTROL_REGISTER 0x13A4 + +#define SCOUT_COM_A_BASE 0x0000 +#define SCOUT_COM_B_BASE 0x0100 +#define SCOUT_COM_C_BASE 0x0200 +#define SCOUT_COM_D_BASE 0x0300 + +static inline int sp_interrupt_pending(void __iomem *base_address) +{ + return SP_INTR_MASK & readl(base_address + INTR_STATUS_REGISTER); +} + +static inline int uart_interrupt_pending(void __iomem *base_address) +{ + return UART_INTR_MASK & readl(base_address + INTR_STATUS_REGISTER); +} + +static inline void ibmasm_enable_interrupts(void __iomem *base_address, int mask) +{ + void __iomem *ctrl_reg = base_address + INTR_CONTROL_REGISTER; + writel( readl(ctrl_reg) & ~mask, ctrl_reg); +} + +static inline void ibmasm_disable_interrupts(void __iomem *base_address, int mask) +{ + void __iomem *ctrl_reg = base_address + INTR_CONTROL_REGISTER; + writel( readl(ctrl_reg) | mask, ctrl_reg); +} + +static inline void enable_sp_interrupts(void __iomem *base_address) +{ + ibmasm_enable_interrupts(base_address, SP_INTR_MASK); +} + +static inline void disable_sp_interrupts(void __iomem *base_address) +{ + ibmasm_disable_interrupts(base_address, SP_INTR_MASK); +} + +static inline void enable_uart_interrupts(void __iomem *base_address) +{ + ibmasm_enable_interrupts(base_address, UART_INTR_MASK); +} + +static inline void disable_uart_interrupts(void __iomem *base_address) +{ + ibmasm_disable_interrupts(base_address, UART_INTR_MASK); +} + +#define valid_mfa(mfa) ( (mfa) != NO_MFAS_AVAILABLE ) + +static inline u32 get_mfa_outbound(void __iomem *base_address) +{ + int retry; + u32 mfa; + + for (retry=0; retry<=10; retry++) { + mfa = readl(base_address + OUTBOUND_QUEUE_PORT); + if (valid_mfa(mfa)) + break; + } + return mfa; +} + +static inline void set_mfa_outbound(void __iomem *base_address, u32 mfa) +{ + writel(mfa, base_address + OUTBOUND_QUEUE_PORT); +} + +static inline u32 get_mfa_inbound(void __iomem *base_address) +{ + u32 mfa = readl(base_address + INBOUND_QUEUE_PORT); + + if (MAILBOX_FULL(mfa)) + return 0; + + return mfa; +} + +static inline void set_mfa_inbound(void __iomem *base_address, u32 mfa) +{ + writel(mfa, base_address + INBOUND_QUEUE_PORT); +} + +static inline struct i2o_message *get_i2o_message(void __iomem *base_address, u32 mfa) +{ + return (struct i2o_message *)(GET_MFA_ADDR(mfa) + base_address); +} + +#endif /* __IBMASM_CONDOR_H__ */ diff --git a/drivers/misc/ibmasm/module.c b/drivers/misc/ibmasm/module.c new file mode 100644 index 000000000..9f8344169 --- /dev/null +++ b/drivers/misc/ibmasm/module.c @@ -0,0 +1,238 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck + * + * This driver is based on code originally written by Pete Reynolds + * and others. + * + */ + +/* + * The ASM device driver does the following things: + * + * 1) When loaded it sends a message to the service processor, + * indicating that an OS is * running. This causes the service processor + * to send periodic heartbeats to the OS. + * + * 2) Answers the periodic heartbeats sent by the service processor. + * Failure to do so would result in system reboot. + * + * 3) Acts as a pass through for dot commands sent from user applications. + * The interface for this is the ibmasmfs file system. + * + * 4) Allows user applications to register for event notification. Events + * are sent to the driver through interrupts. They can be read from user + * space through the ibmasmfs file system. + * + * 5) Allows user space applications to send heartbeats to the service + * processor (aka reverse heartbeats). Again this happens through ibmasmfs. + * + * 6) Handles remote mouse and keyboard event interrupts and makes them + * available to user applications through ibmasmfs. + * + */ + +#include +#include +#include +#include "ibmasm.h" +#include "lowlevel.h" +#include "remote.h" + +int ibmasm_debug = 0; +module_param(ibmasm_debug, int , S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(ibmasm_debug, " Set debug mode on or off"); + + +static int ibmasm_init_one(struct pci_dev *pdev, const struct pci_device_id *id) +{ + int result; + struct service_processor *sp; + + if ((result = pci_enable_device(pdev))) { + dev_err(&pdev->dev, "Failed to enable PCI device\n"); + return result; + } + if ((result = pci_request_regions(pdev, DRIVER_NAME))) { + dev_err(&pdev->dev, "Failed to allocate PCI resources\n"); + goto error_resources; + } + /* vnc client won't work without bus-mastering */ + pci_set_master(pdev); + + sp = kzalloc(sizeof(struct service_processor), GFP_KERNEL); + if (sp == NULL) { + dev_err(&pdev->dev, "Failed to allocate memory\n"); + result = -ENOMEM; + goto error_kmalloc; + } + + spin_lock_init(&sp->lock); + INIT_LIST_HEAD(&sp->command_queue); + + pci_set_drvdata(pdev, (void *)sp); + sp->dev = &pdev->dev; + sp->number = pdev->bus->number; + snprintf(sp->dirname, IBMASM_NAME_SIZE, "%d", sp->number); + snprintf(sp->devname, IBMASM_NAME_SIZE, "%s%d", DRIVER_NAME, sp->number); + + result = ibmasm_event_buffer_init(sp); + if (result) { + dev_err(sp->dev, "Failed to allocate event buffer\n"); + goto error_eventbuffer; + } + + result = ibmasm_heartbeat_init(sp); + if (result) { + dev_err(sp->dev, "Failed to allocate heartbeat command\n"); + goto error_heartbeat; + } + + sp->irq = pdev->irq; + sp->base_address = pci_ioremap_bar(pdev, 0); + if (!sp->base_address) { + dev_err(sp->dev, "Failed to ioremap pci memory\n"); + result = -ENODEV; + goto error_ioremap; + } + + result = request_irq(sp->irq, ibmasm_interrupt_handler, IRQF_SHARED, sp->devname, (void*)sp); + if (result) { + dev_err(sp->dev, "Failed to register interrupt handler\n"); + goto error_request_irq; + } + + enable_sp_interrupts(sp->base_address); + + result = ibmasm_init_remote_input_dev(sp); + if (result) { + dev_err(sp->dev, "Failed to initialize remote queue\n"); + goto error_init_remote; + } + + result = ibmasm_send_driver_vpd(sp); + if (result) { + dev_err(sp->dev, "Failed to send driver VPD to service processor\n"); + goto error_send_message; + } + result = ibmasm_send_os_state(sp, SYSTEM_STATE_OS_UP); + if (result) { + dev_err(sp->dev, "Failed to send OS state to service processor\n"); + goto error_send_message; + } + ibmasmfs_add_sp(sp); + + ibmasm_register_uart(sp); + + return 0; + +error_send_message: + ibmasm_free_remote_input_dev(sp); +error_init_remote: + disable_sp_interrupts(sp->base_address); + free_irq(sp->irq, (void *)sp); +error_request_irq: + iounmap(sp->base_address); +error_ioremap: + ibmasm_heartbeat_exit(sp); +error_heartbeat: + ibmasm_event_buffer_exit(sp); +error_eventbuffer: + kfree(sp); +error_kmalloc: + pci_release_regions(pdev); +error_resources: + pci_disable_device(pdev); + + return result; +} + +static void ibmasm_remove_one(struct pci_dev *pdev) +{ + struct service_processor *sp = pci_get_drvdata(pdev); + + dbg("Unregistering UART\n"); + ibmasm_unregister_uart(sp); + dbg("Sending OS down message\n"); + if (ibmasm_send_os_state(sp, SYSTEM_STATE_OS_DOWN)) + err("failed to get response to 'Send OS State' command\n"); + dbg("Disabling heartbeats\n"); + ibmasm_heartbeat_exit(sp); + dbg("Disabling interrupts\n"); + disable_sp_interrupts(sp->base_address); + dbg("Freeing SP irq\n"); + free_irq(sp->irq, (void *)sp); + dbg("Cleaning up\n"); + ibmasm_free_remote_input_dev(sp); + iounmap(sp->base_address); + ibmasm_event_buffer_exit(sp); + kfree(sp); + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +static struct pci_device_id ibmasm_pci_table[] = +{ + { PCI_DEVICE(VENDORID_IBM, DEVICEID_RSA) }, + {}, +}; + +static struct pci_driver ibmasm_driver = { + .name = DRIVER_NAME, + .id_table = ibmasm_pci_table, + .probe = ibmasm_init_one, + .remove = ibmasm_remove_one, +}; + +static void __exit ibmasm_exit (void) +{ + ibmasm_unregister_panic_notifier(); + ibmasmfs_unregister(); + pci_unregister_driver(&ibmasm_driver); + info(DRIVER_DESC " version " DRIVER_VERSION " unloaded"); +} + +static int __init ibmasm_init(void) +{ + int result = pci_register_driver(&ibmasm_driver); + if (result) + return result; + + result = ibmasmfs_register(); + if (result) { + pci_unregister_driver(&ibmasm_driver); + err("Failed to register ibmasmfs file system"); + return result; + } + + ibmasm_register_panic_notifier(); + info(DRIVER_DESC " version " DRIVER_VERSION " loaded"); + return 0; +} + +module_init(ibmasm_init); +module_exit(ibmasm_exit); + +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, ibmasm_pci_table); + diff --git a/drivers/misc/ibmasm/r_heartbeat.c b/drivers/misc/ibmasm/r_heartbeat.c new file mode 100644 index 000000000..5c7dd26db --- /dev/null +++ b/drivers/misc/ibmasm/r_heartbeat.c @@ -0,0 +1,99 @@ + +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck + * + */ + +#include +#include "ibmasm.h" +#include "dot_command.h" + +/* + * Reverse Heartbeat, i.e. heartbeats sent from the driver to the + * service processor. + * These heartbeats are initiated by user level programs. + */ + +/* the reverse heartbeat dot command */ +#pragma pack(1) +static struct { + struct dot_command_header header; + unsigned char command[3]; +} rhb_dot_cmd = { + .header = { + .type = sp_read, + .command_size = 3, + .data_size = 0, + .status = 0 + }, + .command = { 4, 3, 6 } +}; +#pragma pack() + +void ibmasm_init_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb) +{ + init_waitqueue_head(&rhb->wait); + rhb->stopped = 0; +} + +/** + * start_reverse_heartbeat + * Loop forever, sending a reverse heartbeat dot command to the service + * processor, then sleeping. The loop comes to an end if the service + * processor fails to respond 3 times or we were interrupted. + */ +int ibmasm_start_reverse_heartbeat(struct service_processor *sp, struct reverse_heartbeat *rhb) +{ + struct command *cmd; + int times_failed = 0; + int result = 1; + + cmd = ibmasm_new_command(sp, sizeof rhb_dot_cmd); + if (!cmd) + return -ENOMEM; + + while (times_failed < 3) { + memcpy(cmd->buffer, (void *)&rhb_dot_cmd, sizeof rhb_dot_cmd); + cmd->status = IBMASM_CMD_PENDING; + ibmasm_exec_command(sp, cmd); + ibmasm_wait_for_response(cmd, IBMASM_CMD_TIMEOUT_NORMAL); + + if (cmd->status != IBMASM_CMD_COMPLETE) + times_failed++; + + wait_event_interruptible_timeout(rhb->wait, + rhb->stopped, + REVERSE_HEARTBEAT_TIMEOUT * HZ); + + if (signal_pending(current) || rhb->stopped) { + result = -EINTR; + break; + } + } + command_put(cmd); + rhb->stopped = 0; + + return result; +} + +void ibmasm_stop_reverse_heartbeat(struct reverse_heartbeat *rhb) +{ + rhb->stopped = 1; + wake_up_interruptible(&rhb->wait); +} diff --git a/drivers/misc/ibmasm/remote.c b/drivers/misc/ibmasm/remote.c new file mode 100644 index 000000000..477bb43c8 --- /dev/null +++ b/drivers/misc/ibmasm/remote.c @@ -0,0 +1,282 @@ +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Authors: Max Asböck + * Vernon Mauery + * + */ + +/* Remote mouse and keyboard event handling functions */ + +#include +#include "ibmasm.h" +#include "remote.h" + +#define MOUSE_X_MAX 1600 +#define MOUSE_Y_MAX 1200 + +static const unsigned short xlate_high[XLATE_SIZE] = { + [KEY_SYM_ENTER & 0xff] = KEY_ENTER, + [KEY_SYM_KPSLASH & 0xff] = KEY_KPSLASH, + [KEY_SYM_KPSTAR & 0xff] = KEY_KPASTERISK, + [KEY_SYM_KPMINUS & 0xff] = KEY_KPMINUS, + [KEY_SYM_KPDOT & 0xff] = KEY_KPDOT, + [KEY_SYM_KPPLUS & 0xff] = KEY_KPPLUS, + [KEY_SYM_KP0 & 0xff] = KEY_KP0, + [KEY_SYM_KP1 & 0xff] = KEY_KP1, + [KEY_SYM_KP2 & 0xff] = KEY_KP2, [KEY_SYM_KPDOWN & 0xff] = KEY_KP2, + [KEY_SYM_KP3 & 0xff] = KEY_KP3, + [KEY_SYM_KP4 & 0xff] = KEY_KP4, [KEY_SYM_KPLEFT & 0xff] = KEY_KP4, + [KEY_SYM_KP5 & 0xff] = KEY_KP5, + [KEY_SYM_KP6 & 0xff] = KEY_KP6, [KEY_SYM_KPRIGHT & 0xff] = KEY_KP6, + [KEY_SYM_KP7 & 0xff] = KEY_KP7, + [KEY_SYM_KP8 & 0xff] = KEY_KP8, [KEY_SYM_KPUP & 0xff] = KEY_KP8, + [KEY_SYM_KP9 & 0xff] = KEY_KP9, + [KEY_SYM_BK_SPC & 0xff] = KEY_BACKSPACE, + [KEY_SYM_TAB & 0xff] = KEY_TAB, + [KEY_SYM_CTRL & 0xff] = KEY_LEFTCTRL, + [KEY_SYM_ALT & 0xff] = KEY_LEFTALT, + [KEY_SYM_INSERT & 0xff] = KEY_INSERT, + [KEY_SYM_DELETE & 0xff] = KEY_DELETE, + [KEY_SYM_SHIFT & 0xff] = KEY_LEFTSHIFT, + [KEY_SYM_UARROW & 0xff] = KEY_UP, + [KEY_SYM_DARROW & 0xff] = KEY_DOWN, + [KEY_SYM_LARROW & 0xff] = KEY_LEFT, + [KEY_SYM_RARROW & 0xff] = KEY_RIGHT, + [KEY_SYM_ESCAPE & 0xff] = KEY_ESC, + [KEY_SYM_PAGEUP & 0xff] = KEY_PAGEUP, + [KEY_SYM_PAGEDOWN & 0xff] = KEY_PAGEDOWN, + [KEY_SYM_HOME & 0xff] = KEY_HOME, + [KEY_SYM_END & 0xff] = KEY_END, + [KEY_SYM_F1 & 0xff] = KEY_F1, + [KEY_SYM_F2 & 0xff] = KEY_F2, + [KEY_SYM_F3 & 0xff] = KEY_F3, + [KEY_SYM_F4 & 0xff] = KEY_F4, + [KEY_SYM_F5 & 0xff] = KEY_F5, + [KEY_SYM_F6 & 0xff] = KEY_F6, + [KEY_SYM_F7 & 0xff] = KEY_F7, + [KEY_SYM_F8 & 0xff] = KEY_F8, + [KEY_SYM_F9 & 0xff] = KEY_F9, + [KEY_SYM_F10 & 0xff] = KEY_F10, + [KEY_SYM_F11 & 0xff] = KEY_F11, + [KEY_SYM_F12 & 0xff] = KEY_F12, + [KEY_SYM_CAP_LOCK & 0xff] = KEY_CAPSLOCK, + [KEY_SYM_NUM_LOCK & 0xff] = KEY_NUMLOCK, + [KEY_SYM_SCR_LOCK & 0xff] = KEY_SCROLLLOCK, +}; + +static const unsigned short xlate[XLATE_SIZE] = { + [NO_KEYCODE] = KEY_RESERVED, + [KEY_SYM_SPACE] = KEY_SPACE, + [KEY_SYM_TILDE] = KEY_GRAVE, [KEY_SYM_BKTIC] = KEY_GRAVE, + [KEY_SYM_ONE] = KEY_1, [KEY_SYM_BANG] = KEY_1, + [KEY_SYM_TWO] = KEY_2, [KEY_SYM_AT] = KEY_2, + [KEY_SYM_THREE] = KEY_3, [KEY_SYM_POUND] = KEY_3, + [KEY_SYM_FOUR] = KEY_4, [KEY_SYM_DOLLAR] = KEY_4, + [KEY_SYM_FIVE] = KEY_5, [KEY_SYM_PERCENT] = KEY_5, + [KEY_SYM_SIX] = KEY_6, [KEY_SYM_CARAT] = KEY_6, + [KEY_SYM_SEVEN] = KEY_7, [KEY_SYM_AMPER] = KEY_7, + [KEY_SYM_EIGHT] = KEY_8, [KEY_SYM_STAR] = KEY_8, + [KEY_SYM_NINE] = KEY_9, [KEY_SYM_LPAREN] = KEY_9, + [KEY_SYM_ZERO] = KEY_0, [KEY_SYM_RPAREN] = KEY_0, + [KEY_SYM_MINUS] = KEY_MINUS, [KEY_SYM_USCORE] = KEY_MINUS, + [KEY_SYM_EQUAL] = KEY_EQUAL, [KEY_SYM_PLUS] = KEY_EQUAL, + [KEY_SYM_LBRKT] = KEY_LEFTBRACE, [KEY_SYM_LCURLY] = KEY_LEFTBRACE, + [KEY_SYM_RBRKT] = KEY_RIGHTBRACE, [KEY_SYM_RCURLY] = KEY_RIGHTBRACE, + [KEY_SYM_SLASH] = KEY_BACKSLASH, [KEY_SYM_PIPE] = KEY_BACKSLASH, + [KEY_SYM_TIC] = KEY_APOSTROPHE, [KEY_SYM_QUOTE] = KEY_APOSTROPHE, + [KEY_SYM_SEMIC] = KEY_SEMICOLON, [KEY_SYM_COLON] = KEY_SEMICOLON, + [KEY_SYM_COMMA] = KEY_COMMA, [KEY_SYM_LT] = KEY_COMMA, + [KEY_SYM_PERIOD] = KEY_DOT, [KEY_SYM_GT] = KEY_DOT, + [KEY_SYM_BSLASH] = KEY_SLASH, [KEY_SYM_QMARK] = KEY_SLASH, + [KEY_SYM_A] = KEY_A, [KEY_SYM_a] = KEY_A, + [KEY_SYM_B] = KEY_B, [KEY_SYM_b] = KEY_B, + [KEY_SYM_C] = KEY_C, [KEY_SYM_c] = KEY_C, + [KEY_SYM_D] = KEY_D, [KEY_SYM_d] = KEY_D, + [KEY_SYM_E] = KEY_E, [KEY_SYM_e] = KEY_E, + [KEY_SYM_F] = KEY_F, [KEY_SYM_f] = KEY_F, + [KEY_SYM_G] = KEY_G, [KEY_SYM_g] = KEY_G, + [KEY_SYM_H] = KEY_H, [KEY_SYM_h] = KEY_H, + [KEY_SYM_I] = KEY_I, [KEY_SYM_i] = KEY_I, + [KEY_SYM_J] = KEY_J, [KEY_SYM_j] = KEY_J, + [KEY_SYM_K] = KEY_K, [KEY_SYM_k] = KEY_K, + [KEY_SYM_L] = KEY_L, [KEY_SYM_l] = KEY_L, + [KEY_SYM_M] = KEY_M, [KEY_SYM_m] = KEY_M, + [KEY_SYM_N] = KEY_N, [KEY_SYM_n] = KEY_N, + [KEY_SYM_O] = KEY_O, [KEY_SYM_o] = KEY_O, + [KEY_SYM_P] = KEY_P, [KEY_SYM_p] = KEY_P, + [KEY_SYM_Q] = KEY_Q, [KEY_SYM_q] = KEY_Q, + [KEY_SYM_R] = KEY_R, [KEY_SYM_r] = KEY_R, + [KEY_SYM_S] = KEY_S, [KEY_SYM_s] = KEY_S, + [KEY_SYM_T] = KEY_T, [KEY_SYM_t] = KEY_T, + [KEY_SYM_U] = KEY_U, [KEY_SYM_u] = KEY_U, + [KEY_SYM_V] = KEY_V, [KEY_SYM_v] = KEY_V, + [KEY_SYM_W] = KEY_W, [KEY_SYM_w] = KEY_W, + [KEY_SYM_X] = KEY_X, [KEY_SYM_x] = KEY_X, + [KEY_SYM_Y] = KEY_Y, [KEY_SYM_y] = KEY_Y, + [KEY_SYM_Z] = KEY_Z, [KEY_SYM_z] = KEY_Z, +}; + +static void print_input(struct remote_input *input) +{ + if (input->type == INPUT_TYPE_MOUSE) { + unsigned char buttons = input->mouse_buttons; + dbg("remote mouse movement: (x,y)=(%d,%d)%s%s%s%s\n", + input->data.mouse.x, input->data.mouse.y, + (buttons) ? " -- buttons:" : "", + (buttons & REMOTE_BUTTON_LEFT) ? "left " : "", + (buttons & REMOTE_BUTTON_MIDDLE) ? "middle " : "", + (buttons & REMOTE_BUTTON_RIGHT) ? "right" : "" + ); + } else { + dbg("remote keypress (code, flag, down):" + "%d (0x%x) [0x%x] [0x%x]\n", + input->data.keyboard.key_code, + input->data.keyboard.key_code, + input->data.keyboard.key_flag, + input->data.keyboard.key_down + ); + } +} + +static void send_mouse_event(struct input_dev *dev, struct remote_input *input) +{ + unsigned char buttons = input->mouse_buttons; + + input_report_abs(dev, ABS_X, input->data.mouse.x); + input_report_abs(dev, ABS_Y, input->data.mouse.y); + input_report_key(dev, BTN_LEFT, buttons & REMOTE_BUTTON_LEFT); + input_report_key(dev, BTN_MIDDLE, buttons & REMOTE_BUTTON_MIDDLE); + input_report_key(dev, BTN_RIGHT, buttons & REMOTE_BUTTON_RIGHT); + input_sync(dev); +} + +static void send_keyboard_event(struct input_dev *dev, + struct remote_input *input) +{ + unsigned int key; + unsigned short code = input->data.keyboard.key_code; + + if (code & 0xff00) + key = xlate_high[code & 0xff]; + else + key = xlate[code]; + input_report_key(dev, key, input->data.keyboard.key_down); + input_sync(dev); +} + +void ibmasm_handle_mouse_interrupt(struct service_processor *sp) +{ + unsigned long reader; + unsigned long writer; + struct remote_input input; + + reader = get_queue_reader(sp); + writer = get_queue_writer(sp); + + while (reader != writer) { + memcpy_fromio(&input, get_queue_entry(sp, reader), + sizeof(struct remote_input)); + + print_input(&input); + if (input.type == INPUT_TYPE_MOUSE) { + send_mouse_event(sp->remote.mouse_dev, &input); + } else if (input.type == INPUT_TYPE_KEYBOARD) { + send_keyboard_event(sp->remote.keybd_dev, &input); + } else + break; + + reader = advance_queue_reader(sp, reader); + writer = get_queue_writer(sp); + } +} + +int ibmasm_init_remote_input_dev(struct service_processor *sp) +{ + /* set up the mouse input device */ + struct input_dev *mouse_dev, *keybd_dev; + struct pci_dev *pdev = to_pci_dev(sp->dev); + int error = -ENOMEM; + int i; + + sp->remote.mouse_dev = mouse_dev = input_allocate_device(); + sp->remote.keybd_dev = keybd_dev = input_allocate_device(); + + if (!mouse_dev || !keybd_dev) + goto err_free_devices; + + mouse_dev->id.bustype = BUS_PCI; + mouse_dev->id.vendor = pdev->vendor; + mouse_dev->id.product = pdev->device; + mouse_dev->id.version = 1; + mouse_dev->dev.parent = sp->dev; + mouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); + mouse_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) | + BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE); + set_bit(BTN_TOUCH, mouse_dev->keybit); + mouse_dev->name = "ibmasm RSA I remote mouse"; + input_set_abs_params(mouse_dev, ABS_X, 0, MOUSE_X_MAX, 0, 0); + input_set_abs_params(mouse_dev, ABS_Y, 0, MOUSE_Y_MAX, 0, 0); + + keybd_dev->id.bustype = BUS_PCI; + keybd_dev->id.vendor = pdev->vendor; + keybd_dev->id.product = pdev->device; + keybd_dev->id.version = 2; + keybd_dev->dev.parent = sp->dev; + keybd_dev->evbit[0] = BIT_MASK(EV_KEY); + keybd_dev->name = "ibmasm RSA I remote keyboard"; + + for (i = 0; i < XLATE_SIZE; i++) { + if (xlate_high[i]) + set_bit(xlate_high[i], keybd_dev->keybit); + if (xlate[i]) + set_bit(xlate[i], keybd_dev->keybit); + } + + error = input_register_device(mouse_dev); + if (error) + goto err_free_devices; + + error = input_register_device(keybd_dev); + if (error) + goto err_unregister_mouse_dev; + + enable_mouse_interrupts(sp); + + printk(KERN_INFO "ibmasm remote responding to events on RSA card %d\n", sp->number); + + return 0; + + err_unregister_mouse_dev: + input_unregister_device(mouse_dev); + mouse_dev = NULL; /* so we don't try to free it again below */ + err_free_devices: + input_free_device(mouse_dev); + input_free_device(keybd_dev); + + return error; +} + +void ibmasm_free_remote_input_dev(struct service_processor *sp) +{ + disable_mouse_interrupts(sp); + input_unregister_device(sp->remote.mouse_dev); + input_unregister_device(sp->remote.keybd_dev); +} + diff --git a/drivers/misc/ibmasm/remote.h b/drivers/misc/ibmasm/remote.h new file mode 100644 index 000000000..a7729ef76 --- /dev/null +++ b/drivers/misc/ibmasm/remote.h @@ -0,0 +1,270 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck + * + * Originally written by Pete Reynolds + */ + +#ifndef _IBMASM_REMOTE_H_ +#define _IBMASM_REMOTE_H_ + +#include + +/* pci offsets */ +#define CONDOR_MOUSE_DATA 0x000AC000 +#define CONDOR_MOUSE_ISR_CONTROL 0x00 +#define CONDOR_MOUSE_ISR_STATUS 0x04 +#define CONDOR_MOUSE_Q_READER 0x08 +#define CONDOR_MOUSE_Q_WRITER 0x0C +#define CONDOR_MOUSE_Q_BEGIN 0x10 +#define CONDOR_MOUSE_MAX_X 0x14 +#define CONDOR_MOUSE_MAX_Y 0x18 + +#define CONDOR_INPUT_DESKTOP_INFO 0x1F0 +#define CONDOR_INPUT_DISPLAY_RESX 0x1F4 +#define CONDOR_INPUT_DISPLAY_RESY 0x1F8 +#define CONDOR_INPUT_DISPLAY_BITS 0x1FC +#define CONDOR_OUTPUT_VNC_STATUS 0x200 + +#define CONDOR_MOUSE_INTR_STATUS_MASK 0x00000001 + +#define INPUT_TYPE_MOUSE 0x1 +#define INPUT_TYPE_KEYBOARD 0x2 + + +/* mouse button states received from SP */ +#define REMOTE_DOUBLE_CLICK 0xF0 +#define REMOTE_BUTTON_LEFT 0x01 +#define REMOTE_BUTTON_MIDDLE 0x02 +#define REMOTE_BUTTON_RIGHT 0x04 + +/* size of keysym/keycode translation matricies */ +#define XLATE_SIZE 256 + +struct mouse_input { + unsigned short y; + unsigned short x; +}; + + +struct keyboard_input { + unsigned short key_code; + unsigned char key_flag; + unsigned char key_down; +}; + + + +struct remote_input { + union { + struct mouse_input mouse; + struct keyboard_input keyboard; + } data; + + unsigned char type; + unsigned char pad1; + unsigned char mouse_buttons; + unsigned char pad3; +}; + +#define mouse_addr(sp) (sp->base_address + CONDOR_MOUSE_DATA) +#define display_width(sp) (mouse_addr(sp) + CONDOR_INPUT_DISPLAY_RESX) +#define display_height(sp) (mouse_addr(sp) + CONDOR_INPUT_DISPLAY_RESY) +#define display_depth(sp) (mouse_addr(sp) + CONDOR_INPUT_DISPLAY_BITS) +#define desktop_info(sp) (mouse_addr(sp) + CONDOR_INPUT_DESKTOP_INFO) +#define vnc_status(sp) (mouse_addr(sp) + CONDOR_OUTPUT_VNC_STATUS) +#define isr_control(sp) (mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL) + +#define mouse_interrupt_pending(sp) readl(mouse_addr(sp) + CONDOR_MOUSE_ISR_STATUS) +#define clear_mouse_interrupt(sp) writel(0, mouse_addr(sp) + CONDOR_MOUSE_ISR_STATUS) +#define enable_mouse_interrupts(sp) writel(1, mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL) +#define disable_mouse_interrupts(sp) writel(0, mouse_addr(sp) + CONDOR_MOUSE_ISR_CONTROL) + +/* remote input queue operations */ +#define REMOTE_QUEUE_SIZE 60 + +#define get_queue_writer(sp) readl(mouse_addr(sp) + CONDOR_MOUSE_Q_WRITER) +#define get_queue_reader(sp) readl(mouse_addr(sp) + CONDOR_MOUSE_Q_READER) +#define set_queue_reader(sp, reader) writel(reader, mouse_addr(sp) + CONDOR_MOUSE_Q_READER) + +#define queue_begin (mouse_addr(sp) + CONDOR_MOUSE_Q_BEGIN) + +#define get_queue_entry(sp, read_index) \ + ((void*)(queue_begin + read_index * sizeof(struct remote_input))) + +static inline int advance_queue_reader(struct service_processor *sp, unsigned long reader) +{ + reader++; + if (reader == REMOTE_QUEUE_SIZE) + reader = 0; + + set_queue_reader(sp, reader); + return reader; +} + +#define NO_KEYCODE 0 +#define KEY_SYM_BK_SPC 0xFF08 +#define KEY_SYM_TAB 0xFF09 +#define KEY_SYM_ENTER 0xFF0D +#define KEY_SYM_SCR_LOCK 0xFF14 +#define KEY_SYM_ESCAPE 0xFF1B +#define KEY_SYM_HOME 0xFF50 +#define KEY_SYM_LARROW 0xFF51 +#define KEY_SYM_UARROW 0xFF52 +#define KEY_SYM_RARROW 0xFF53 +#define KEY_SYM_DARROW 0xFF54 +#define KEY_SYM_PAGEUP 0xFF55 +#define KEY_SYM_PAGEDOWN 0xFF56 +#define KEY_SYM_END 0xFF57 +#define KEY_SYM_INSERT 0xFF63 +#define KEY_SYM_NUM_LOCK 0xFF7F +#define KEY_SYM_KPSTAR 0xFFAA +#define KEY_SYM_KPPLUS 0xFFAB +#define KEY_SYM_KPMINUS 0xFFAD +#define KEY_SYM_KPDOT 0xFFAE +#define KEY_SYM_KPSLASH 0xFFAF +#define KEY_SYM_KPRIGHT 0xFF96 +#define KEY_SYM_KPUP 0xFF97 +#define KEY_SYM_KPLEFT 0xFF98 +#define KEY_SYM_KPDOWN 0xFF99 +#define KEY_SYM_KP0 0xFFB0 +#define KEY_SYM_KP1 0xFFB1 +#define KEY_SYM_KP2 0xFFB2 +#define KEY_SYM_KP3 0xFFB3 +#define KEY_SYM_KP4 0xFFB4 +#define KEY_SYM_KP5 0xFFB5 +#define KEY_SYM_KP6 0xFFB6 +#define KEY_SYM_KP7 0xFFB7 +#define KEY_SYM_KP8 0xFFB8 +#define KEY_SYM_KP9 0xFFB9 +#define KEY_SYM_F1 0xFFBE // 1B 5B 5B 41 +#define KEY_SYM_F2 0xFFBF // 1B 5B 5B 42 +#define KEY_SYM_F3 0xFFC0 // 1B 5B 5B 43 +#define KEY_SYM_F4 0xFFC1 // 1B 5B 5B 44 +#define KEY_SYM_F5 0xFFC2 // 1B 5B 5B 45 +#define KEY_SYM_F6 0xFFC3 // 1B 5B 31 37 7E +#define KEY_SYM_F7 0xFFC4 // 1B 5B 31 38 7E +#define KEY_SYM_F8 0xFFC5 // 1B 5B 31 39 7E +#define KEY_SYM_F9 0xFFC6 // 1B 5B 32 30 7E +#define KEY_SYM_F10 0xFFC7 // 1B 5B 32 31 7E +#define KEY_SYM_F11 0xFFC8 // 1B 5B 32 33 7E +#define KEY_SYM_F12 0xFFC9 // 1B 5B 32 34 7E +#define KEY_SYM_SHIFT 0xFFE1 +#define KEY_SYM_CTRL 0xFFE3 +#define KEY_SYM_ALT 0xFFE9 +#define KEY_SYM_CAP_LOCK 0xFFE5 +#define KEY_SYM_DELETE 0xFFFF +#define KEY_SYM_TILDE 0x60 +#define KEY_SYM_BKTIC 0x7E +#define KEY_SYM_ONE 0x31 +#define KEY_SYM_BANG 0x21 +#define KEY_SYM_TWO 0x32 +#define KEY_SYM_AT 0x40 +#define KEY_SYM_THREE 0x33 +#define KEY_SYM_POUND 0x23 +#define KEY_SYM_FOUR 0x34 +#define KEY_SYM_DOLLAR 0x24 +#define KEY_SYM_FIVE 0x35 +#define KEY_SYM_PERCENT 0x25 +#define KEY_SYM_SIX 0x36 +#define KEY_SYM_CARAT 0x5E +#define KEY_SYM_SEVEN 0x37 +#define KEY_SYM_AMPER 0x26 +#define KEY_SYM_EIGHT 0x38 +#define KEY_SYM_STAR 0x2A +#define KEY_SYM_NINE 0x39 +#define KEY_SYM_LPAREN 0x28 +#define KEY_SYM_ZERO 0x30 +#define KEY_SYM_RPAREN 0x29 +#define KEY_SYM_MINUS 0x2D +#define KEY_SYM_USCORE 0x5F +#define KEY_SYM_EQUAL 0x2B +#define KEY_SYM_PLUS 0x3D +#define KEY_SYM_LBRKT 0x5B +#define KEY_SYM_LCURLY 0x7B +#define KEY_SYM_RBRKT 0x5D +#define KEY_SYM_RCURLY 0x7D +#define KEY_SYM_SLASH 0x5C +#define KEY_SYM_PIPE 0x7C +#define KEY_SYM_TIC 0x27 +#define KEY_SYM_QUOTE 0x22 +#define KEY_SYM_SEMIC 0x3B +#define KEY_SYM_COLON 0x3A +#define KEY_SYM_COMMA 0x2C +#define KEY_SYM_LT 0x3C +#define KEY_SYM_PERIOD 0x2E +#define KEY_SYM_GT 0x3E +#define KEY_SYM_BSLASH 0x2F +#define KEY_SYM_QMARK 0x3F +#define KEY_SYM_A 0x41 +#define KEY_SYM_B 0x42 +#define KEY_SYM_C 0x43 +#define KEY_SYM_D 0x44 +#define KEY_SYM_E 0x45 +#define KEY_SYM_F 0x46 +#define KEY_SYM_G 0x47 +#define KEY_SYM_H 0x48 +#define KEY_SYM_I 0x49 +#define KEY_SYM_J 0x4A +#define KEY_SYM_K 0x4B +#define KEY_SYM_L 0x4C +#define KEY_SYM_M 0x4D +#define KEY_SYM_N 0x4E +#define KEY_SYM_O 0x4F +#define KEY_SYM_P 0x50 +#define KEY_SYM_Q 0x51 +#define KEY_SYM_R 0x52 +#define KEY_SYM_S 0x53 +#define KEY_SYM_T 0x54 +#define KEY_SYM_U 0x55 +#define KEY_SYM_V 0x56 +#define KEY_SYM_W 0x57 +#define KEY_SYM_X 0x58 +#define KEY_SYM_Y 0x59 +#define KEY_SYM_Z 0x5A +#define KEY_SYM_a 0x61 +#define KEY_SYM_b 0x62 +#define KEY_SYM_c 0x63 +#define KEY_SYM_d 0x64 +#define KEY_SYM_e 0x65 +#define KEY_SYM_f 0x66 +#define KEY_SYM_g 0x67 +#define KEY_SYM_h 0x68 +#define KEY_SYM_i 0x69 +#define KEY_SYM_j 0x6A +#define KEY_SYM_k 0x6B +#define KEY_SYM_l 0x6C +#define KEY_SYM_m 0x6D +#define KEY_SYM_n 0x6E +#define KEY_SYM_o 0x6F +#define KEY_SYM_p 0x70 +#define KEY_SYM_q 0x71 +#define KEY_SYM_r 0x72 +#define KEY_SYM_s 0x73 +#define KEY_SYM_t 0x74 +#define KEY_SYM_u 0x75 +#define KEY_SYM_v 0x76 +#define KEY_SYM_w 0x77 +#define KEY_SYM_x 0x78 +#define KEY_SYM_y 0x79 +#define KEY_SYM_z 0x7A +#define KEY_SYM_SPACE 0x20 +#endif /* _IBMASM_REMOTE_H_ */ diff --git a/drivers/misc/ibmasm/uart.c b/drivers/misc/ibmasm/uart.c new file mode 100644 index 000000000..01e2b0d7e --- /dev/null +++ b/drivers/misc/ibmasm/uart.c @@ -0,0 +1,72 @@ + +/* + * IBM ASM Service Processor Device Driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2004 + * + * Author: Max Asböck + * + */ + +#include +#include +#include +#include +#include +#include "ibmasm.h" +#include "lowlevel.h" + + +void ibmasm_register_uart(struct service_processor *sp) +{ + struct uart_8250_port uart; + void __iomem *iomem_base; + + iomem_base = sp->base_address + SCOUT_COM_B_BASE; + + /* read the uart scratch register to determine if the UART + * is dedicated to the service processor or if the OS can use it + */ + if (0 == readl(iomem_base + UART_SCR)) { + dev_info(sp->dev, "IBM SP UART not registered, owned by service processor\n"); + sp->serial_line = -1; + return; + } + + memset(&uart, 0, sizeof(uart)); + uart.port.irq = sp->irq; + uart.port.uartclk = 3686400; + uart.port.flags = UPF_SHARE_IRQ; + uart.port.iotype = UPIO_MEM; + uart.port.membase = iomem_base; + + sp->serial_line = serial8250_register_8250_port(&uart); + if (sp->serial_line < 0) { + dev_err(sp->dev, "Failed to register serial port\n"); + return; + } + enable_uart_interrupts(sp->base_address); +} + +void ibmasm_unregister_uart(struct service_processor *sp) +{ + if (sp->serial_line < 0) + return; + + disable_uart_interrupts(sp->base_address); + serial8250_unregister_port(sp->serial_line); +} diff --git a/drivers/misc/ibmvmc.c b/drivers/misc/ibmvmc.c new file mode 100644 index 000000000..2ed23c99f --- /dev/null +++ b/drivers/misc/ibmvmc.c @@ -0,0 +1,2421 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * IBM Power Systems Virtual Management Channel Support. + * + * Copyright (c) 2004, 2018 IBM Corp. + * Dave Engebretsen engebret@us.ibm.com + * Steven Royer seroyer@linux.vnet.ibm.com + * Adam Reznechek adreznec@linux.vnet.ibm.com + * Bryant G. Ly + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "ibmvmc.h" + +#define IBMVMC_DRIVER_VERSION "1.0" + +/* + * Static global variables + */ +static DECLARE_WAIT_QUEUE_HEAD(ibmvmc_read_wait); + +static const char ibmvmc_driver_name[] = "ibmvmc"; + +static struct ibmvmc_struct ibmvmc; +static struct ibmvmc_hmc hmcs[MAX_HMCS]; +static struct crq_server_adapter ibmvmc_adapter; + +static int ibmvmc_max_buf_pool_size = DEFAULT_BUF_POOL_SIZE; +static int ibmvmc_max_hmcs = DEFAULT_HMCS; +static int ibmvmc_max_mtu = DEFAULT_MTU; + +static inline long h_copy_rdma(s64 length, u64 sliobn, u64 slioba, + u64 dliobn, u64 dlioba) +{ + long rc = 0; + + /* Ensure all writes to source memory are visible before hcall */ + dma_wmb(); + pr_debug("ibmvmc: h_copy_rdma(0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", + length, sliobn, slioba, dliobn, dlioba); + rc = plpar_hcall_norets(H_COPY_RDMA, length, sliobn, slioba, + dliobn, dlioba); + pr_debug("ibmvmc: h_copy_rdma rc = 0x%lx\n", rc); + + return rc; +} + +static inline void h_free_crq(uint32_t unit_address) +{ + long rc = 0; + + do { + if (H_IS_LONG_BUSY(rc)) + msleep(get_longbusy_msecs(rc)); + + rc = plpar_hcall_norets(H_FREE_CRQ, unit_address); + } while ((rc == H_BUSY) || (H_IS_LONG_BUSY(rc))); +} + +/** + * h_request_vmc: - request a hypervisor virtual management channel device + * @vmc_index: drc index of the vmc device created + * + * Requests the hypervisor create a new virtual management channel device, + * allowing this partition to send hypervisor virtualization control + * commands. + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static inline long h_request_vmc(u32 *vmc_index) +{ + long rc = 0; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + do { + if (H_IS_LONG_BUSY(rc)) + msleep(get_longbusy_msecs(rc)); + + /* Call to request the VMC device from phyp */ + rc = plpar_hcall(H_REQUEST_VMC, retbuf); + pr_debug("ibmvmc: %s rc = 0x%lx\n", __func__, rc); + *vmc_index = retbuf[0]; + } while ((rc == H_BUSY) || (H_IS_LONG_BUSY(rc))); + + return rc; +} + +/* routines for managing a command/response queue */ +/** + * ibmvmc_handle_event: - Interrupt handler for crq events + * @irq: number of irq to handle, not used + * @dev_instance: crq_server_adapter that received interrupt + * + * Disables interrupts and schedules ibmvmc_task + * + * Always returns IRQ_HANDLED + */ +static irqreturn_t ibmvmc_handle_event(int irq, void *dev_instance) +{ + struct crq_server_adapter *adapter = + (struct crq_server_adapter *)dev_instance; + + vio_disable_interrupts(to_vio_dev(adapter->dev)); + tasklet_schedule(&adapter->work_task); + + return IRQ_HANDLED; +} + +/** + * ibmvmc_release_crq_queue - Release CRQ Queue + * + * @adapter: crq_server_adapter struct + * + * Return: + * 0 - Success + * Non-Zero - Failure + */ +static void ibmvmc_release_crq_queue(struct crq_server_adapter *adapter) +{ + struct vio_dev *vdev = to_vio_dev(adapter->dev); + struct crq_queue *queue = &adapter->queue; + + free_irq(vdev->irq, (void *)adapter); + tasklet_kill(&adapter->work_task); + + if (adapter->reset_task) + kthread_stop(adapter->reset_task); + + h_free_crq(vdev->unit_address); + dma_unmap_single(adapter->dev, + queue->msg_token, + queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL); + free_page((unsigned long)queue->msgs); +} + +/** + * ibmvmc_reset_crq_queue - Reset CRQ Queue + * + * @adapter: crq_server_adapter struct + * + * This function calls h_free_crq and then calls H_REG_CRQ and does all the + * bookkeeping to get us back to where we can communicate. + * + * Return: + * 0 - Success + * Non-Zero - Failure + */ +static int ibmvmc_reset_crq_queue(struct crq_server_adapter *adapter) +{ + struct vio_dev *vdev = to_vio_dev(adapter->dev); + struct crq_queue *queue = &adapter->queue; + int rc = 0; + + /* Close the CRQ */ + h_free_crq(vdev->unit_address); + + /* Clean out the queue */ + memset(queue->msgs, 0x00, PAGE_SIZE); + queue->cur = 0; + + /* And re-open it again */ + rc = plpar_hcall_norets(H_REG_CRQ, + vdev->unit_address, + queue->msg_token, PAGE_SIZE); + if (rc == 2) + /* Adapter is good, but other end is not ready */ + dev_warn(adapter->dev, "Partner adapter not ready\n"); + else if (rc != 0) + dev_err(adapter->dev, "couldn't register crq--rc 0x%x\n", rc); + + return rc; +} + +/** + * crq_queue_next_crq: - Returns the next entry in message queue + * @queue: crq_queue to use + * + * Returns pointer to next entry in queue, or NULL if there are no new + * entried in the CRQ. + */ +static struct ibmvmc_crq_msg *crq_queue_next_crq(struct crq_queue *queue) +{ + struct ibmvmc_crq_msg *crq; + unsigned long flags; + + spin_lock_irqsave(&queue->lock, flags); + crq = &queue->msgs[queue->cur]; + if (crq->valid & 0x80) { + if (++queue->cur == queue->size) + queue->cur = 0; + + /* Ensure the read of the valid bit occurs before reading any + * other bits of the CRQ entry + */ + dma_rmb(); + } else { + crq = NULL; + } + + spin_unlock_irqrestore(&queue->lock, flags); + + return crq; +} + +/** + * ibmvmc_send_crq - Send CRQ + * + * @adapter: crq_server_adapter struct + * @word1: Word1 Data field + * @word2: Word2 Data field + * + * Return: + * 0 - Success + * Non-Zero - Failure + */ +static long ibmvmc_send_crq(struct crq_server_adapter *adapter, + u64 word1, u64 word2) +{ + struct vio_dev *vdev = to_vio_dev(adapter->dev); + long rc = 0; + + dev_dbg(adapter->dev, "(0x%x, 0x%016llx, 0x%016llx)\n", + vdev->unit_address, word1, word2); + + /* + * Ensure the command buffer is flushed to memory before handing it + * over to the other side to prevent it from fetching any stale data. + */ + dma_wmb(); + rc = plpar_hcall_norets(H_SEND_CRQ, vdev->unit_address, word1, word2); + dev_dbg(adapter->dev, "rc = 0x%lx\n", rc); + + return rc; +} + +/** + * alloc_dma_buffer - Create DMA Buffer + * + * @vdev: vio_dev struct + * @size: Size field + * @dma_handle: DMA address field + * + * Allocates memory for the command queue and maps remote memory into an + * ioba. + * + * Returns a pointer to the buffer + */ +static void *alloc_dma_buffer(struct vio_dev *vdev, size_t size, + dma_addr_t *dma_handle) +{ + /* allocate memory */ + void *buffer = kzalloc(size, GFP_ATOMIC); + + if (!buffer) { + *dma_handle = 0; + return NULL; + } + + /* DMA map */ + *dma_handle = dma_map_single(&vdev->dev, buffer, size, + DMA_BIDIRECTIONAL); + + if (dma_mapping_error(&vdev->dev, *dma_handle)) { + *dma_handle = 0; + kzfree(buffer); + return NULL; + } + + return buffer; +} + +/** + * free_dma_buffer - Free DMA Buffer + * + * @vdev: vio_dev struct + * @size: Size field + * @vaddr: Address field + * @dma_handle: DMA address field + * + * Releases memory for a command queue and unmaps mapped remote memory. + */ +static void free_dma_buffer(struct vio_dev *vdev, size_t size, void *vaddr, + dma_addr_t dma_handle) +{ + /* DMA unmap */ + dma_unmap_single(&vdev->dev, dma_handle, size, DMA_BIDIRECTIONAL); + + /* deallocate memory */ + kzfree(vaddr); +} + +/** + * ibmvmc_get_valid_hmc_buffer - Retrieve Valid HMC Buffer + * + * @hmc_index: HMC Index Field + * + * Return: + * Pointer to ibmvmc_buffer + */ +static struct ibmvmc_buffer *ibmvmc_get_valid_hmc_buffer(u8 hmc_index) +{ + struct ibmvmc_buffer *buffer; + struct ibmvmc_buffer *ret_buf = NULL; + unsigned long i; + + if (hmc_index > ibmvmc.max_hmc_index) + return NULL; + + buffer = hmcs[hmc_index].buffer; + + for (i = 0; i < ibmvmc_max_buf_pool_size; i++) { + if (buffer[i].valid && buffer[i].free && + buffer[i].owner == VMC_BUF_OWNER_ALPHA) { + buffer[i].free = 0; + ret_buf = &buffer[i]; + break; + } + } + + return ret_buf; +} + +/** + * ibmvmc_get_free_hmc_buffer - Get Free HMC Buffer + * + * @adapter: crq_server_adapter struct + * @hmc_index: Hmc Index field + * + * Return: + * Pointer to ibmvmc_buffer + */ +static struct ibmvmc_buffer *ibmvmc_get_free_hmc_buffer(struct crq_server_adapter *adapter, + u8 hmc_index) +{ + struct ibmvmc_buffer *buffer; + struct ibmvmc_buffer *ret_buf = NULL; + unsigned long i; + + if (hmc_index > ibmvmc.max_hmc_index) { + dev_info(adapter->dev, "get_free_hmc_buffer: invalid hmc_index=0x%x\n", + hmc_index); + return NULL; + } + + buffer = hmcs[hmc_index].buffer; + + for (i = 0; i < ibmvmc_max_buf_pool_size; i++) { + if (buffer[i].free && + buffer[i].owner == VMC_BUF_OWNER_ALPHA) { + buffer[i].free = 0; + ret_buf = &buffer[i]; + break; + } + } + + return ret_buf; +} + +/** + * ibmvmc_free_hmc_buffer - Free an HMC Buffer + * + * @hmc: ibmvmc_hmc struct + * @buffer: ibmvmc_buffer struct + * + */ +static void ibmvmc_free_hmc_buffer(struct ibmvmc_hmc *hmc, + struct ibmvmc_buffer *buffer) +{ + unsigned long flags; + + spin_lock_irqsave(&hmc->lock, flags); + buffer->free = 1; + spin_unlock_irqrestore(&hmc->lock, flags); +} + +/** + * ibmvmc_count_hmc_buffers - Count HMC Buffers + * + * @hmc_index: HMC Index field + * @valid: Valid number of buffers field + * @free: Free number of buffers field + * + */ +static void ibmvmc_count_hmc_buffers(u8 hmc_index, unsigned int *valid, + unsigned int *free) +{ + struct ibmvmc_buffer *buffer; + unsigned long i; + unsigned long flags; + + if (hmc_index > ibmvmc.max_hmc_index) + return; + + if (!valid || !free) + return; + + *valid = 0; *free = 0; + + buffer = hmcs[hmc_index].buffer; + spin_lock_irqsave(&hmcs[hmc_index].lock, flags); + + for (i = 0; i < ibmvmc_max_buf_pool_size; i++) { + if (buffer[i].valid) { + *valid = *valid + 1; + if (buffer[i].free) + *free = *free + 1; + } + } + + spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags); +} + +/** + * ibmvmc_get_free_hmc - Get Free HMC + * + * Return: + * Pointer to an available HMC Connection + * Null otherwise + */ +static struct ibmvmc_hmc *ibmvmc_get_free_hmc(void) +{ + unsigned long i; + unsigned long flags; + + /* + * Find an available HMC connection. + */ + for (i = 0; i <= ibmvmc.max_hmc_index; i++) { + spin_lock_irqsave(&hmcs[i].lock, flags); + if (hmcs[i].state == ibmhmc_state_free) { + hmcs[i].index = i; + hmcs[i].state = ibmhmc_state_initial; + spin_unlock_irqrestore(&hmcs[i].lock, flags); + return &hmcs[i]; + } + spin_unlock_irqrestore(&hmcs[i].lock, flags); + } + + return NULL; +} + +/** + * ibmvmc_return_hmc - Return an HMC Connection + * + * @hmc: ibmvmc_hmc struct + * @release_readers: Number of readers connected to session + * + * This function releases the HMC connections back into the pool. + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static int ibmvmc_return_hmc(struct ibmvmc_hmc *hmc, bool release_readers) +{ + struct ibmvmc_buffer *buffer; + struct crq_server_adapter *adapter; + struct vio_dev *vdev; + unsigned long i; + unsigned long flags; + + if (!hmc || !hmc->adapter) + return -EIO; + + if (release_readers) { + if (hmc->file_session) { + struct ibmvmc_file_session *session = hmc->file_session; + + session->valid = 0; + wake_up_interruptible(&ibmvmc_read_wait); + } + } + + adapter = hmc->adapter; + vdev = to_vio_dev(adapter->dev); + + spin_lock_irqsave(&hmc->lock, flags); + hmc->index = 0; + hmc->state = ibmhmc_state_free; + hmc->queue_head = 0; + hmc->queue_tail = 0; + buffer = hmc->buffer; + for (i = 0; i < ibmvmc_max_buf_pool_size; i++) { + if (buffer[i].valid) { + free_dma_buffer(vdev, + ibmvmc.max_mtu, + buffer[i].real_addr_local, + buffer[i].dma_addr_local); + dev_dbg(adapter->dev, "Forgot buffer id 0x%lx\n", i); + } + memset(&buffer[i], 0, sizeof(struct ibmvmc_buffer)); + + hmc->queue_outbound_msgs[i] = VMC_INVALID_BUFFER_ID; + } + + spin_unlock_irqrestore(&hmc->lock, flags); + + return 0; +} + +/** + * ibmvmc_send_open - Interface Open + * @buffer: Pointer to ibmvmc_buffer struct + * @hmc: Pointer to ibmvmc_hmc struct + * + * This command is sent by the management partition as the result of a + * management partition device request. It causes the hypervisor to + * prepare a set of data buffers for the management application connection + * indicated HMC idx. A unique HMC Idx would be used if multiple management + * applications running concurrently were desired. Before responding to this + * command, the hypervisor must provide the management partition with at + * least one of these new buffers via the Add Buffer. This indicates whether + * the messages are inbound or outbound from the hypervisor. + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static int ibmvmc_send_open(struct ibmvmc_buffer *buffer, + struct ibmvmc_hmc *hmc) +{ + struct ibmvmc_crq_msg crq_msg; + struct crq_server_adapter *adapter; + __be64 *crq_as_u64 = (__be64 *)&crq_msg; + int rc = 0; + + if (!hmc || !hmc->adapter) + return -EIO; + + adapter = hmc->adapter; + + dev_dbg(adapter->dev, "send_open: 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", + (unsigned long)buffer->size, (unsigned long)adapter->liobn, + (unsigned long)buffer->dma_addr_local, + (unsigned long)adapter->riobn, + (unsigned long)buffer->dma_addr_remote); + + rc = h_copy_rdma(buffer->size, + adapter->liobn, + buffer->dma_addr_local, + adapter->riobn, + buffer->dma_addr_remote); + if (rc) { + dev_err(adapter->dev, "Error: In send_open, h_copy_rdma rc 0x%x\n", + rc); + return -EIO; + } + + hmc->state = ibmhmc_state_opening; + + crq_msg.valid = 0x80; + crq_msg.type = VMC_MSG_OPEN; + crq_msg.status = 0; + crq_msg.var1.rsvd = 0; + crq_msg.hmc_session = hmc->session; + crq_msg.hmc_index = hmc->index; + crq_msg.var2.buffer_id = cpu_to_be16(buffer->id); + crq_msg.rsvd = 0; + crq_msg.var3.rsvd = 0; + + ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]), + be64_to_cpu(crq_as_u64[1])); + + return rc; +} + +/** + * ibmvmc_send_close - Interface Close + * @hmc: Pointer to ibmvmc_hmc struct + * + * This command is sent by the management partition to terminate a + * management application to hypervisor connection. When this command is + * sent, the management partition has quiesced all I/O operations to all + * buffers associated with this management application connection, and + * has freed any storage for these buffers. + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static int ibmvmc_send_close(struct ibmvmc_hmc *hmc) +{ + struct ibmvmc_crq_msg crq_msg; + struct crq_server_adapter *adapter; + __be64 *crq_as_u64 = (__be64 *)&crq_msg; + int rc = 0; + + if (!hmc || !hmc->adapter) + return -EIO; + + adapter = hmc->adapter; + + dev_info(adapter->dev, "CRQ send: close\n"); + + crq_msg.valid = 0x80; + crq_msg.type = VMC_MSG_CLOSE; + crq_msg.status = 0; + crq_msg.var1.rsvd = 0; + crq_msg.hmc_session = hmc->session; + crq_msg.hmc_index = hmc->index; + crq_msg.var2.rsvd = 0; + crq_msg.rsvd = 0; + crq_msg.var3.rsvd = 0; + + ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]), + be64_to_cpu(crq_as_u64[1])); + + return rc; +} + +/** + * ibmvmc_send_capabilities - Send VMC Capabilities + * + * @adapter: crq_server_adapter struct + * + * The capabilities message is an administrative message sent after the CRQ + * initialization sequence of messages and is used to exchange VMC capabilities + * between the management partition and the hypervisor. The management + * partition must send this message and the hypervisor must respond with VMC + * capabilities Response message before HMC interface message can begin. Any + * HMC interface messages received before the exchange of capabilities has + * complete are dropped. + * + * Return: + * 0 - Success + */ +static int ibmvmc_send_capabilities(struct crq_server_adapter *adapter) +{ + struct ibmvmc_admin_crq_msg crq_msg; + __be64 *crq_as_u64 = (__be64 *)&crq_msg; + + dev_dbg(adapter->dev, "ibmvmc: CRQ send: capabilities\n"); + crq_msg.valid = 0x80; + crq_msg.type = VMC_MSG_CAP; + crq_msg.status = 0; + crq_msg.rsvd[0] = 0; + crq_msg.rsvd[1] = 0; + crq_msg.max_hmc = ibmvmc_max_hmcs; + crq_msg.max_mtu = cpu_to_be32(ibmvmc_max_mtu); + crq_msg.pool_size = cpu_to_be16(ibmvmc_max_buf_pool_size); + crq_msg.crq_size = cpu_to_be16(adapter->queue.size); + crq_msg.version = cpu_to_be16(IBMVMC_PROTOCOL_VERSION); + + ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]), + be64_to_cpu(crq_as_u64[1])); + + ibmvmc.state = ibmvmc_state_capabilities; + + return 0; +} + +/** + * ibmvmc_send_add_buffer_resp - Add Buffer Response + * + * @adapter: crq_server_adapter struct + * @status: Status field + * @hmc_session: HMC Session field + * @hmc_index: HMC Index field + * @buffer_id: Buffer Id field + * + * This command is sent by the management partition to the hypervisor in + * response to the Add Buffer message. The Status field indicates the result of + * the command. + * + * Return: + * 0 - Success + */ +static int ibmvmc_send_add_buffer_resp(struct crq_server_adapter *adapter, + u8 status, u8 hmc_session, + u8 hmc_index, u16 buffer_id) +{ + struct ibmvmc_crq_msg crq_msg; + __be64 *crq_as_u64 = (__be64 *)&crq_msg; + + dev_dbg(adapter->dev, "CRQ send: add_buffer_resp\n"); + crq_msg.valid = 0x80; + crq_msg.type = VMC_MSG_ADD_BUF_RESP; + crq_msg.status = status; + crq_msg.var1.rsvd = 0; + crq_msg.hmc_session = hmc_session; + crq_msg.hmc_index = hmc_index; + crq_msg.var2.buffer_id = cpu_to_be16(buffer_id); + crq_msg.rsvd = 0; + crq_msg.var3.rsvd = 0; + + ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]), + be64_to_cpu(crq_as_u64[1])); + + return 0; +} + +/** + * ibmvmc_send_rem_buffer_resp - Remove Buffer Response + * + * @adapter: crq_server_adapter struct + * @status: Status field + * @hmc_session: HMC Session field + * @hmc_index: HMC Index field + * @buffer_id: Buffer Id field + * + * This command is sent by the management partition to the hypervisor in + * response to the Remove Buffer message. The Buffer ID field indicates + * which buffer the management partition selected to remove. The Status + * field indicates the result of the command. + * + * Return: + * 0 - Success + */ +static int ibmvmc_send_rem_buffer_resp(struct crq_server_adapter *adapter, + u8 status, u8 hmc_session, + u8 hmc_index, u16 buffer_id) +{ + struct ibmvmc_crq_msg crq_msg; + __be64 *crq_as_u64 = (__be64 *)&crq_msg; + + dev_dbg(adapter->dev, "CRQ send: rem_buffer_resp\n"); + crq_msg.valid = 0x80; + crq_msg.type = VMC_MSG_REM_BUF_RESP; + crq_msg.status = status; + crq_msg.var1.rsvd = 0; + crq_msg.hmc_session = hmc_session; + crq_msg.hmc_index = hmc_index; + crq_msg.var2.buffer_id = cpu_to_be16(buffer_id); + crq_msg.rsvd = 0; + crq_msg.var3.rsvd = 0; + + ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]), + be64_to_cpu(crq_as_u64[1])); + + return 0; +} + +/** + * ibmvmc_send_msg - Signal Message + * + * @adapter: crq_server_adapter struct + * @buffer: ibmvmc_buffer struct + * @hmc: ibmvmc_hmc struct + * @msg_length: message length field + * + * This command is sent between the management partition and the hypervisor + * in order to signal the arrival of an HMC protocol message. The command + * can be sent by both the management partition and the hypervisor. It is + * used for all traffic between the management application and the hypervisor, + * regardless of who initiated the communication. + * + * There is no response to this message. + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static int ibmvmc_send_msg(struct crq_server_adapter *adapter, + struct ibmvmc_buffer *buffer, + struct ibmvmc_hmc *hmc, int msg_len) +{ + struct ibmvmc_crq_msg crq_msg; + __be64 *crq_as_u64 = (__be64 *)&crq_msg; + int rc = 0; + + dev_dbg(adapter->dev, "CRQ send: rdma to HV\n"); + rc = h_copy_rdma(msg_len, + adapter->liobn, + buffer->dma_addr_local, + adapter->riobn, + buffer->dma_addr_remote); + if (rc) { + dev_err(adapter->dev, "Error in send_msg, h_copy_rdma rc 0x%x\n", + rc); + return rc; + } + + crq_msg.valid = 0x80; + crq_msg.type = VMC_MSG_SIGNAL; + crq_msg.status = 0; + crq_msg.var1.rsvd = 0; + crq_msg.hmc_session = hmc->session; + crq_msg.hmc_index = hmc->index; + crq_msg.var2.buffer_id = cpu_to_be16(buffer->id); + crq_msg.var3.msg_len = cpu_to_be32(msg_len); + dev_dbg(adapter->dev, "CRQ send: msg to HV 0x%llx 0x%llx\n", + be64_to_cpu(crq_as_u64[0]), be64_to_cpu(crq_as_u64[1])); + + buffer->owner = VMC_BUF_OWNER_HV; + ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]), + be64_to_cpu(crq_as_u64[1])); + + return rc; +} + +/** + * ibmvmc_open - Open Session + * + * @inode: inode struct + * @file: file struct + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static int ibmvmc_open(struct inode *inode, struct file *file) +{ + struct ibmvmc_file_session *session; + + pr_debug("%s: inode = 0x%lx, file = 0x%lx, state = 0x%x\n", __func__, + (unsigned long)inode, (unsigned long)file, + ibmvmc.state); + + session = kzalloc(sizeof(*session), GFP_KERNEL); + if (!session) + return -ENOMEM; + + session->file = file; + file->private_data = session; + + return 0; +} + +/** + * ibmvmc_close - Close Session + * + * @inode: inode struct + * @file: file struct + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static int ibmvmc_close(struct inode *inode, struct file *file) +{ + struct ibmvmc_file_session *session; + struct ibmvmc_hmc *hmc; + int rc = 0; + unsigned long flags; + + pr_debug("%s: file = 0x%lx, state = 0x%x\n", __func__, + (unsigned long)file, ibmvmc.state); + + session = file->private_data; + if (!session) + return -EIO; + + hmc = session->hmc; + if (hmc) { + if (!hmc->adapter) + return -EIO; + + if (ibmvmc.state == ibmvmc_state_failed) { + dev_warn(hmc->adapter->dev, "close: state_failed\n"); + return -EIO; + } + + spin_lock_irqsave(&hmc->lock, flags); + if (hmc->state >= ibmhmc_state_opening) { + rc = ibmvmc_send_close(hmc); + if (rc) + dev_warn(hmc->adapter->dev, "close: send_close failed.\n"); + } + spin_unlock_irqrestore(&hmc->lock, flags); + } + + kzfree(session); + + return rc; +} + +/** + * ibmvmc_read - Read + * + * @file: file struct + * @buf: Character buffer + * @nbytes: Size in bytes + * @ppos: Offset + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static ssize_t ibmvmc_read(struct file *file, char *buf, size_t nbytes, + loff_t *ppos) +{ + struct ibmvmc_file_session *session; + struct ibmvmc_hmc *hmc; + struct crq_server_adapter *adapter; + struct ibmvmc_buffer *buffer; + ssize_t n; + ssize_t retval = 0; + unsigned long flags; + DEFINE_WAIT(wait); + + pr_debug("ibmvmc: read: file = 0x%lx, buf = 0x%lx, nbytes = 0x%lx\n", + (unsigned long)file, (unsigned long)buf, + (unsigned long)nbytes); + + if (nbytes == 0) + return 0; + + if (nbytes > ibmvmc.max_mtu) { + pr_warn("ibmvmc: read: nbytes invalid 0x%x\n", + (unsigned int)nbytes); + return -EINVAL; + } + + session = file->private_data; + if (!session) { + pr_warn("ibmvmc: read: no session\n"); + return -EIO; + } + + hmc = session->hmc; + if (!hmc) { + pr_warn("ibmvmc: read: no hmc\n"); + return -EIO; + } + + adapter = hmc->adapter; + if (!adapter) { + pr_warn("ibmvmc: read: no adapter\n"); + return -EIO; + } + + do { + prepare_to_wait(&ibmvmc_read_wait, &wait, TASK_INTERRUPTIBLE); + + spin_lock_irqsave(&hmc->lock, flags); + if (hmc->queue_tail != hmc->queue_head) + /* Data is available */ + break; + + spin_unlock_irqrestore(&hmc->lock, flags); + + if (!session->valid) { + retval = -EBADFD; + goto out; + } + if (file->f_flags & O_NONBLOCK) { + retval = -EAGAIN; + goto out; + } + + schedule(); + + if (signal_pending(current)) { + retval = -ERESTARTSYS; + goto out; + } + } while (1); + + buffer = &(hmc->buffer[hmc->queue_outbound_msgs[hmc->queue_tail]]); + hmc->queue_tail++; + if (hmc->queue_tail == ibmvmc_max_buf_pool_size) + hmc->queue_tail = 0; + spin_unlock_irqrestore(&hmc->lock, flags); + + nbytes = min_t(size_t, nbytes, buffer->msg_len); + n = copy_to_user((void *)buf, buffer->real_addr_local, nbytes); + dev_dbg(adapter->dev, "read: copy to user nbytes = 0x%lx.\n", nbytes); + ibmvmc_free_hmc_buffer(hmc, buffer); + retval = nbytes; + + if (n) { + dev_warn(adapter->dev, "read: copy to user failed.\n"); + retval = -EFAULT; + } + + out: + finish_wait(&ibmvmc_read_wait, &wait); + dev_dbg(adapter->dev, "read: out %ld\n", retval); + return retval; +} + +/** + * ibmvmc_poll - Poll + * + * @file: file struct + * @wait: Poll Table + * + * Return: + * poll.h return values + */ +static unsigned int ibmvmc_poll(struct file *file, poll_table *wait) +{ + struct ibmvmc_file_session *session; + struct ibmvmc_hmc *hmc; + unsigned int mask = 0; + + session = file->private_data; + if (!session) + return 0; + + hmc = session->hmc; + if (!hmc) + return 0; + + poll_wait(file, &ibmvmc_read_wait, wait); + + if (hmc->queue_head != hmc->queue_tail) + mask |= POLLIN | POLLRDNORM; + + return mask; +} + +/** + * ibmvmc_write - Write + * + * @file: file struct + * @buf: Character buffer + * @count: Count field + * @ppos: Offset + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static ssize_t ibmvmc_write(struct file *file, const char *buffer, + size_t count, loff_t *ppos) +{ + struct ibmvmc_buffer *vmc_buffer; + struct ibmvmc_file_session *session; + struct crq_server_adapter *adapter; + struct ibmvmc_hmc *hmc; + unsigned char *buf; + unsigned long flags; + size_t bytes; + const char *p = buffer; + size_t c = count; + int ret = 0; + + session = file->private_data; + if (!session) + return -EIO; + + hmc = session->hmc; + if (!hmc) + return -EIO; + + spin_lock_irqsave(&hmc->lock, flags); + if (hmc->state == ibmhmc_state_free) { + /* HMC connection is not valid (possibly was reset under us). */ + ret = -EIO; + goto out; + } + + adapter = hmc->adapter; + if (!adapter) { + ret = -EIO; + goto out; + } + + if (count > ibmvmc.max_mtu) { + dev_warn(adapter->dev, "invalid buffer size 0x%lx\n", + (unsigned long)count); + ret = -EIO; + goto out; + } + + /* Waiting for the open resp message to the ioctl(1) - retry */ + if (hmc->state == ibmhmc_state_opening) { + ret = -EBUSY; + goto out; + } + + /* Make sure the ioctl() was called & the open msg sent, and that + * the HMC connection has not failed. + */ + if (hmc->state != ibmhmc_state_ready) { + ret = -EIO; + goto out; + } + + vmc_buffer = ibmvmc_get_valid_hmc_buffer(hmc->index); + if (!vmc_buffer) { + /* No buffer available for the msg send, or we have not yet + * completed the open/open_resp sequence. Retry until this is + * complete. + */ + ret = -EBUSY; + goto out; + } + if (!vmc_buffer->real_addr_local) { + dev_err(adapter->dev, "no buffer storage assigned\n"); + ret = -EIO; + goto out; + } + buf = vmc_buffer->real_addr_local; + + while (c > 0) { + bytes = min_t(size_t, c, vmc_buffer->size); + + bytes -= copy_from_user(buf, p, bytes); + if (!bytes) { + ret = -EFAULT; + goto out; + } + c -= bytes; + p += bytes; + } + if (p == buffer) + goto out; + + file->f_path.dentry->d_inode->i_mtime = current_time(file_inode(file)); + mark_inode_dirty(file->f_path.dentry->d_inode); + + dev_dbg(adapter->dev, "write: file = 0x%lx, count = 0x%lx\n", + (unsigned long)file, (unsigned long)count); + + ibmvmc_send_msg(adapter, vmc_buffer, hmc, count); + ret = p - buffer; + out: + spin_unlock_irqrestore(&hmc->lock, flags); + return (ssize_t)(ret); +} + +/** + * ibmvmc_setup_hmc - Setup the HMC + * + * @session: ibmvmc_file_session struct + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static long ibmvmc_setup_hmc(struct ibmvmc_file_session *session) +{ + struct ibmvmc_hmc *hmc; + unsigned int valid, free, index; + + if (ibmvmc.state == ibmvmc_state_failed) { + pr_warn("ibmvmc: Reserve HMC: state_failed\n"); + return -EIO; + } + + if (ibmvmc.state < ibmvmc_state_ready) { + pr_warn("ibmvmc: Reserve HMC: not state_ready\n"); + return -EAGAIN; + } + + /* Device is busy until capabilities have been exchanged and we + * have a generic buffer for each possible HMC connection. + */ + for (index = 0; index <= ibmvmc.max_hmc_index; index++) { + valid = 0; + ibmvmc_count_hmc_buffers(index, &valid, &free); + if (valid == 0) { + pr_warn("ibmvmc: buffers not ready for index %d\n", + index); + return -ENOBUFS; + } + } + + /* Get an hmc object, and transition to ibmhmc_state_initial */ + hmc = ibmvmc_get_free_hmc(); + if (!hmc) { + pr_warn("%s: free hmc not found\n", __func__); + return -EBUSY; + } + + hmc->session = hmc->session + 1; + if (hmc->session == 0xff) + hmc->session = 1; + + session->hmc = hmc; + hmc->adapter = &ibmvmc_adapter; + hmc->file_session = session; + session->valid = 1; + + return 0; +} + +/** + * ibmvmc_ioctl_sethmcid - IOCTL Set HMC ID + * + * @session: ibmvmc_file_session struct + * @new_hmc_id: HMC id field + * + * IOCTL command to setup the hmc id + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static long ibmvmc_ioctl_sethmcid(struct ibmvmc_file_session *session, + unsigned char __user *new_hmc_id) +{ + struct ibmvmc_hmc *hmc; + struct ibmvmc_buffer *buffer; + size_t bytes; + char print_buffer[HMC_ID_LEN + 1]; + unsigned long flags; + long rc = 0; + + /* Reserve HMC session */ + hmc = session->hmc; + if (!hmc) { + rc = ibmvmc_setup_hmc(session); + if (rc) + return rc; + + hmc = session->hmc; + if (!hmc) { + pr_err("ibmvmc: setup_hmc success but no hmc\n"); + return -EIO; + } + } + + if (hmc->state != ibmhmc_state_initial) { + pr_warn("ibmvmc: sethmcid: invalid state to send open 0x%x\n", + hmc->state); + return -EIO; + } + + bytes = copy_from_user(hmc->hmc_id, new_hmc_id, HMC_ID_LEN); + if (bytes) + return -EFAULT; + + /* Send Open Session command */ + spin_lock_irqsave(&hmc->lock, flags); + buffer = ibmvmc_get_valid_hmc_buffer(hmc->index); + spin_unlock_irqrestore(&hmc->lock, flags); + + if (!buffer || !buffer->real_addr_local) { + pr_warn("ibmvmc: sethmcid: no buffer available\n"); + return -EIO; + } + + /* Make sure buffer is NULL terminated before trying to print it */ + memset(print_buffer, 0, HMC_ID_LEN + 1); + strncpy(print_buffer, hmc->hmc_id, HMC_ID_LEN); + pr_info("ibmvmc: sethmcid: Set HMC ID: \"%s\"\n", print_buffer); + + memcpy(buffer->real_addr_local, hmc->hmc_id, HMC_ID_LEN); + /* RDMA over ID, send open msg, change state to ibmhmc_state_opening */ + rc = ibmvmc_send_open(buffer, hmc); + + return rc; +} + +/** + * ibmvmc_ioctl_query - IOCTL Query + * + * @session: ibmvmc_file_session struct + * @ret_struct: ibmvmc_query_struct + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static long ibmvmc_ioctl_query(struct ibmvmc_file_session *session, + struct ibmvmc_query_struct __user *ret_struct) +{ + struct ibmvmc_query_struct query_struct; + size_t bytes; + + memset(&query_struct, 0, sizeof(query_struct)); + query_struct.have_vmc = (ibmvmc.state > ibmvmc_state_initial); + query_struct.state = ibmvmc.state; + query_struct.vmc_drc_index = ibmvmc.vmc_drc_index; + + bytes = copy_to_user(ret_struct, &query_struct, + sizeof(query_struct)); + if (bytes) + return -EFAULT; + + return 0; +} + +/** + * ibmvmc_ioctl_requestvmc - IOCTL Request VMC + * + * @session: ibmvmc_file_session struct + * @ret_vmc_index: VMC Index + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static long ibmvmc_ioctl_requestvmc(struct ibmvmc_file_session *session, + u32 __user *ret_vmc_index) +{ + /* TODO: (adreznec) Add locking to control multiple process access */ + size_t bytes; + long rc; + u32 vmc_drc_index; + + /* Call to request the VMC device from phyp*/ + rc = h_request_vmc(&vmc_drc_index); + pr_debug("ibmvmc: requestvmc: H_REQUEST_VMC rc = 0x%lx\n", rc); + + if (rc == H_SUCCESS) { + rc = 0; + } else if (rc == H_FUNCTION) { + pr_err("ibmvmc: requestvmc: h_request_vmc not supported\n"); + return -EPERM; + } else if (rc == H_AUTHORITY) { + pr_err("ibmvmc: requestvmc: hypervisor denied vmc request\n"); + return -EPERM; + } else if (rc == H_HARDWARE) { + pr_err("ibmvmc: requestvmc: hypervisor hardware fault\n"); + return -EIO; + } else if (rc == H_RESOURCE) { + pr_err("ibmvmc: requestvmc: vmc resource unavailable\n"); + return -ENODEV; + } else if (rc == H_NOT_AVAILABLE) { + pr_err("ibmvmc: requestvmc: system cannot be vmc managed\n"); + return -EPERM; + } else if (rc == H_PARAMETER) { + pr_err("ibmvmc: requestvmc: invalid parameter\n"); + return -EINVAL; + } + + /* Success, set the vmc index in global struct */ + ibmvmc.vmc_drc_index = vmc_drc_index; + + bytes = copy_to_user(ret_vmc_index, &vmc_drc_index, + sizeof(*ret_vmc_index)); + if (bytes) { + pr_warn("ibmvmc: requestvmc: copy to user failed.\n"); + return -EFAULT; + } + return rc; +} + +/** + * ibmvmc_ioctl - IOCTL + * + * @session: ibmvmc_file_session struct + * @cmd: cmd field + * @arg: Argument field + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static long ibmvmc_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct ibmvmc_file_session *session = file->private_data; + + pr_debug("ibmvmc: ioctl file=0x%lx, cmd=0x%x, arg=0x%lx, ses=0x%lx\n", + (unsigned long)file, cmd, arg, + (unsigned long)session); + + if (!session) { + pr_warn("ibmvmc: ioctl: no session\n"); + return -EIO; + } + + switch (cmd) { + case VMC_IOCTL_SETHMCID: + return ibmvmc_ioctl_sethmcid(session, + (unsigned char __user *)arg); + case VMC_IOCTL_QUERY: + return ibmvmc_ioctl_query(session, + (struct ibmvmc_query_struct __user *)arg); + case VMC_IOCTL_REQUESTVMC: + return ibmvmc_ioctl_requestvmc(session, + (unsigned int __user *)arg); + default: + pr_warn("ibmvmc: unknown ioctl 0x%x\n", cmd); + return -EINVAL; + } +} + +static const struct file_operations ibmvmc_fops = { + .owner = THIS_MODULE, + .read = ibmvmc_read, + .write = ibmvmc_write, + .poll = ibmvmc_poll, + .unlocked_ioctl = ibmvmc_ioctl, + .open = ibmvmc_open, + .release = ibmvmc_close, +}; + +/** + * ibmvmc_add_buffer - Add Buffer + * + * @adapter: crq_server_adapter struct + * @crq: ibmvmc_crq_msg struct + * + * This message transfers a buffer from hypervisor ownership to management + * partition ownership. The LIOBA is obtained from the virtual TCE table + * associated with the hypervisor side of the VMC device, and points to a + * buffer of size MTU (as established in the capabilities exchange). + * + * Typical flow for ading buffers: + * 1. A new management application connection is opened by the management + * partition. + * 2. The hypervisor assigns new buffers for the traffic associated with + * that connection. + * 3. The hypervisor sends VMC Add Buffer messages to the management + * partition, informing it of the new buffers. + * 4. The hypervisor sends an HMC protocol message (to the management + * application) notifying it of the new buffers. This informs the + * application that it has buffers available for sending HMC + * commands. + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static int ibmvmc_add_buffer(struct crq_server_adapter *adapter, + struct ibmvmc_crq_msg *crq) +{ + struct ibmvmc_buffer *buffer; + u8 hmc_index; + u8 hmc_session; + u16 buffer_id; + unsigned long flags; + int rc = 0; + + if (!crq) + return -1; + + hmc_session = crq->hmc_session; + hmc_index = crq->hmc_index; + buffer_id = be16_to_cpu(crq->var2.buffer_id); + + if (hmc_index > ibmvmc.max_hmc_index) { + dev_err(adapter->dev, "add_buffer: invalid hmc_index = 0x%x\n", + hmc_index); + ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_HMC_INDEX, + hmc_session, hmc_index, buffer_id); + return -1; + } + + if (buffer_id >= ibmvmc.max_buffer_pool_size) { + dev_err(adapter->dev, "add_buffer: invalid buffer_id = 0x%x\n", + buffer_id); + ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_BUFFER_ID, + hmc_session, hmc_index, buffer_id); + return -1; + } + + spin_lock_irqsave(&hmcs[hmc_index].lock, flags); + buffer = &hmcs[hmc_index].buffer[buffer_id]; + + if (buffer->real_addr_local || buffer->dma_addr_local) { + dev_warn(adapter->dev, "add_buffer: already allocated id = 0x%lx\n", + (unsigned long)buffer_id); + spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags); + ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_BUFFER_ID, + hmc_session, hmc_index, buffer_id); + return -1; + } + + buffer->real_addr_local = alloc_dma_buffer(to_vio_dev(adapter->dev), + ibmvmc.max_mtu, + &buffer->dma_addr_local); + + if (!buffer->real_addr_local) { + dev_err(adapter->dev, "add_buffer: alloc_dma_buffer failed.\n"); + spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags); + ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INTERFACE_FAILURE, + hmc_session, hmc_index, buffer_id); + return -1; + } + + buffer->dma_addr_remote = be32_to_cpu(crq->var3.lioba); + buffer->size = ibmvmc.max_mtu; + buffer->owner = crq->var1.owner; + buffer->free = 1; + /* Must ensure valid==1 is observable only after all other fields are */ + dma_wmb(); + buffer->valid = 1; + buffer->id = buffer_id; + + dev_dbg(adapter->dev, "add_buffer: successfully added a buffer:\n"); + dev_dbg(adapter->dev, " index: %d, session: %d, buffer: 0x%x, owner: %d\n", + hmc_index, hmc_session, buffer_id, buffer->owner); + dev_dbg(adapter->dev, " local: 0x%x, remote: 0x%x\n", + (u32)buffer->dma_addr_local, + (u32)buffer->dma_addr_remote); + spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags); + + ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_SUCCESS, hmc_session, + hmc_index, buffer_id); + + return rc; +} + +/** + * ibmvmc_rem_buffer - Remove Buffer + * + * @adapter: crq_server_adapter struct + * @crq: ibmvmc_crq_msg struct + * + * This message requests an HMC buffer to be transferred from management + * partition ownership to hypervisor ownership. The management partition may + * not be able to satisfy the request at a particular point in time if all its + * buffers are in use. The management partition requires a depth of at least + * one inbound buffer to allow management application commands to flow to the + * hypervisor. It is, therefore, an interface error for the hypervisor to + * attempt to remove the management partition's last buffer. + * + * The hypervisor is expected to manage buffer usage with the management + * application directly and inform the management partition when buffers may be + * removed. The typical flow for removing buffers: + * + * 1. The management application no longer needs a communication path to a + * particular hypervisor function. That function is closed. + * 2. The hypervisor and the management application quiesce all traffic to that + * function. The hypervisor requests a reduction in buffer pool size. + * 3. The management application acknowledges the reduction in buffer pool size. + * 4. The hypervisor sends a Remove Buffer message to the management partition, + * informing it of the reduction in buffers. + * 5. The management partition verifies it can remove the buffer. This is + * possible if buffers have been quiesced. + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +/* + * The hypervisor requested that we pick an unused buffer, and return it. + * Before sending the buffer back, we free any storage associated with the + * buffer. + */ +static int ibmvmc_rem_buffer(struct crq_server_adapter *adapter, + struct ibmvmc_crq_msg *crq) +{ + struct ibmvmc_buffer *buffer; + u8 hmc_index; + u8 hmc_session; + u16 buffer_id = 0; + unsigned long flags; + int rc = 0; + + if (!crq) + return -1; + + hmc_session = crq->hmc_session; + hmc_index = crq->hmc_index; + + if (hmc_index > ibmvmc.max_hmc_index) { + dev_warn(adapter->dev, "rem_buffer: invalid hmc_index = 0x%x\n", + hmc_index); + ibmvmc_send_rem_buffer_resp(adapter, VMC_MSG_INVALID_HMC_INDEX, + hmc_session, hmc_index, buffer_id); + return -1; + } + + spin_lock_irqsave(&hmcs[hmc_index].lock, flags); + buffer = ibmvmc_get_free_hmc_buffer(adapter, hmc_index); + if (!buffer) { + dev_info(adapter->dev, "rem_buffer: no buffer to remove\n"); + spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags); + ibmvmc_send_rem_buffer_resp(adapter, VMC_MSG_NO_BUFFER, + hmc_session, hmc_index, + VMC_INVALID_BUFFER_ID); + return -1; + } + + buffer_id = buffer->id; + + if (buffer->valid) + free_dma_buffer(to_vio_dev(adapter->dev), + ibmvmc.max_mtu, + buffer->real_addr_local, + buffer->dma_addr_local); + + memset(buffer, 0, sizeof(struct ibmvmc_buffer)); + spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags); + + dev_dbg(adapter->dev, "rem_buffer: removed buffer 0x%x.\n", buffer_id); + ibmvmc_send_rem_buffer_resp(adapter, VMC_MSG_SUCCESS, hmc_session, + hmc_index, buffer_id); + + return rc; +} + +static int ibmvmc_recv_msg(struct crq_server_adapter *adapter, + struct ibmvmc_crq_msg *crq) +{ + struct ibmvmc_buffer *buffer; + struct ibmvmc_hmc *hmc; + unsigned long msg_len; + u8 hmc_index; + u8 hmc_session; + u16 buffer_id; + unsigned long flags; + int rc = 0; + + if (!crq) + return -1; + + /* Hypervisor writes CRQs directly into our memory in big endian */ + dev_dbg(adapter->dev, "Recv_msg: msg from HV 0x%016llx 0x%016llx\n", + be64_to_cpu(*((unsigned long *)crq)), + be64_to_cpu(*(((unsigned long *)crq) + 1))); + + hmc_session = crq->hmc_session; + hmc_index = crq->hmc_index; + buffer_id = be16_to_cpu(crq->var2.buffer_id); + msg_len = be32_to_cpu(crq->var3.msg_len); + + if (hmc_index > ibmvmc.max_hmc_index) { + dev_err(adapter->dev, "Recv_msg: invalid hmc_index = 0x%x\n", + hmc_index); + ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_HMC_INDEX, + hmc_session, hmc_index, buffer_id); + return -1; + } + + if (buffer_id >= ibmvmc.max_buffer_pool_size) { + dev_err(adapter->dev, "Recv_msg: invalid buffer_id = 0x%x\n", + buffer_id); + ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_BUFFER_ID, + hmc_session, hmc_index, buffer_id); + return -1; + } + + hmc = &hmcs[hmc_index]; + spin_lock_irqsave(&hmc->lock, flags); + + if (hmc->state == ibmhmc_state_free) { + dev_err(adapter->dev, "Recv_msg: invalid hmc state = 0x%x\n", + hmc->state); + /* HMC connection is not valid (possibly was reset under us). */ + spin_unlock_irqrestore(&hmc->lock, flags); + return -1; + } + + buffer = &hmc->buffer[buffer_id]; + + if (buffer->valid == 0 || buffer->owner == VMC_BUF_OWNER_ALPHA) { + dev_err(adapter->dev, "Recv_msg: not valid, or not HV. 0x%x 0x%x\n", + buffer->valid, buffer->owner); + spin_unlock_irqrestore(&hmc->lock, flags); + return -1; + } + + /* RDMA the data into the partition. */ + rc = h_copy_rdma(msg_len, + adapter->riobn, + buffer->dma_addr_remote, + adapter->liobn, + buffer->dma_addr_local); + + dev_dbg(adapter->dev, "Recv_msg: msg_len = 0x%x, buffer_id = 0x%x, queue_head = 0x%x, hmc_idx = 0x%x\n", + (unsigned int)msg_len, (unsigned int)buffer_id, + (unsigned int)hmc->queue_head, (unsigned int)hmc_index); + buffer->msg_len = msg_len; + buffer->free = 0; + buffer->owner = VMC_BUF_OWNER_ALPHA; + + if (rc) { + dev_err(adapter->dev, "Failure in recv_msg: h_copy_rdma = 0x%x\n", + rc); + spin_unlock_irqrestore(&hmc->lock, flags); + return -1; + } + + /* Must be locked because read operates on the same data */ + hmc->queue_outbound_msgs[hmc->queue_head] = buffer_id; + hmc->queue_head++; + if (hmc->queue_head == ibmvmc_max_buf_pool_size) + hmc->queue_head = 0; + + if (hmc->queue_head == hmc->queue_tail) + dev_err(adapter->dev, "outbound buffer queue wrapped.\n"); + + spin_unlock_irqrestore(&hmc->lock, flags); + + wake_up_interruptible(&ibmvmc_read_wait); + + return 0; +} + +/** + * ibmvmc_process_capabilities - Process Capabilities + * + * @adapter: crq_server_adapter struct + * @crqp: ibmvmc_crq_msg struct + * + */ +static void ibmvmc_process_capabilities(struct crq_server_adapter *adapter, + struct ibmvmc_crq_msg *crqp) +{ + struct ibmvmc_admin_crq_msg *crq = (struct ibmvmc_admin_crq_msg *)crqp; + + if ((be16_to_cpu(crq->version) >> 8) != + (IBMVMC_PROTOCOL_VERSION >> 8)) { + dev_err(adapter->dev, "init failed, incompatible versions 0x%x 0x%x\n", + be16_to_cpu(crq->version), + IBMVMC_PROTOCOL_VERSION); + ibmvmc.state = ibmvmc_state_failed; + return; + } + + ibmvmc.max_mtu = min_t(u32, ibmvmc_max_mtu, be32_to_cpu(crq->max_mtu)); + ibmvmc.max_buffer_pool_size = min_t(u16, ibmvmc_max_buf_pool_size, + be16_to_cpu(crq->pool_size)); + ibmvmc.max_hmc_index = min_t(u8, ibmvmc_max_hmcs, crq->max_hmc) - 1; + ibmvmc.state = ibmvmc_state_ready; + + dev_info(adapter->dev, "Capabilities: mtu=0x%x, pool_size=0x%x, max_hmc=0x%x\n", + ibmvmc.max_mtu, ibmvmc.max_buffer_pool_size, + ibmvmc.max_hmc_index); +} + +/** + * ibmvmc_validate_hmc_session - Validate HMC Session + * + * @adapter: crq_server_adapter struct + * @crq: ibmvmc_crq_msg struct + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static int ibmvmc_validate_hmc_session(struct crq_server_adapter *adapter, + struct ibmvmc_crq_msg *crq) +{ + unsigned char hmc_index; + + hmc_index = crq->hmc_index; + + if (crq->hmc_session == 0) + return 0; + + if (hmc_index > ibmvmc.max_hmc_index) + return -1; + + if (hmcs[hmc_index].session != crq->hmc_session) { + dev_warn(adapter->dev, "Drop, bad session: expected 0x%x, recv 0x%x\n", + hmcs[hmc_index].session, crq->hmc_session); + return -1; + } + + return 0; +} + +/** + * ibmvmc_reset - Reset + * + * @adapter: crq_server_adapter struct + * @xport_event: export_event field + * + * Closes all HMC sessions and conditionally schedules a CRQ reset. + * @xport_event: If true, the partner closed their CRQ; we don't need to reset. + * If false, we need to schedule a CRQ reset. + */ +static void ibmvmc_reset(struct crq_server_adapter *adapter, bool xport_event) +{ + int i; + + if (ibmvmc.state != ibmvmc_state_sched_reset) { + dev_info(adapter->dev, "*** Reset to initial state.\n"); + for (i = 0; i < ibmvmc_max_hmcs; i++) + ibmvmc_return_hmc(&hmcs[i], xport_event); + + if (xport_event) { + /* CRQ was closed by the partner. We don't need to do + * anything except set ourself to the correct state to + * handle init msgs. + */ + ibmvmc.state = ibmvmc_state_crqinit; + } else { + /* The partner did not close their CRQ - instead, we're + * closing the CRQ on our end. Need to schedule this + * for process context, because CRQ reset may require a + * sleep. + * + * Setting ibmvmc.state here immediately prevents + * ibmvmc_open from completing until the reset + * completes in process context. + */ + ibmvmc.state = ibmvmc_state_sched_reset; + dev_dbg(adapter->dev, "Device reset scheduled"); + wake_up_interruptible(&adapter->reset_wait_queue); + } + } +} + +/** + * ibmvmc_reset_task - Reset Task + * + * @data: Data field + * + * Performs a CRQ reset of the VMC device in process context. + * NOTE: This function should not be called directly, use ibmvmc_reset. + */ +static int ibmvmc_reset_task(void *data) +{ + struct crq_server_adapter *adapter = data; + int rc; + + set_user_nice(current, -20); + + while (!kthread_should_stop()) { + wait_event_interruptible(adapter->reset_wait_queue, + (ibmvmc.state == ibmvmc_state_sched_reset) || + kthread_should_stop()); + + if (kthread_should_stop()) + break; + + dev_dbg(adapter->dev, "CRQ resetting in process context"); + tasklet_disable(&adapter->work_task); + + rc = ibmvmc_reset_crq_queue(adapter); + + if (rc != H_SUCCESS && rc != H_RESOURCE) { + dev_err(adapter->dev, "Error initializing CRQ. rc = 0x%x\n", + rc); + ibmvmc.state = ibmvmc_state_failed; + } else { + ibmvmc.state = ibmvmc_state_crqinit; + + if (ibmvmc_send_crq(adapter, 0xC001000000000000LL, 0) + != 0 && rc != H_RESOURCE) + dev_warn(adapter->dev, "Failed to send initialize CRQ message\n"); + } + + vio_enable_interrupts(to_vio_dev(adapter->dev)); + tasklet_enable(&adapter->work_task); + } + + return 0; +} + +/** + * ibmvmc_process_open_resp - Process Open Response + * + * @crq: ibmvmc_crq_msg struct + * @adapter: crq_server_adapter struct + * + * This command is sent by the hypervisor in response to the Interface + * Open message. When this message is received, the indicated buffer is + * again available for management partition use. + */ +static void ibmvmc_process_open_resp(struct ibmvmc_crq_msg *crq, + struct crq_server_adapter *adapter) +{ + unsigned char hmc_index; + unsigned short buffer_id; + + hmc_index = crq->hmc_index; + if (hmc_index > ibmvmc.max_hmc_index) { + /* Why would PHYP give an index > max negotiated? */ + ibmvmc_reset(adapter, false); + return; + } + + if (crq->status) { + dev_warn(adapter->dev, "open_resp: failed - status 0x%x\n", + crq->status); + ibmvmc_return_hmc(&hmcs[hmc_index], false); + return; + } + + if (hmcs[hmc_index].state == ibmhmc_state_opening) { + buffer_id = be16_to_cpu(crq->var2.buffer_id); + if (buffer_id >= ibmvmc.max_buffer_pool_size) { + dev_err(adapter->dev, "open_resp: invalid buffer_id = 0x%x\n", + buffer_id); + hmcs[hmc_index].state = ibmhmc_state_failed; + } else { + ibmvmc_free_hmc_buffer(&hmcs[hmc_index], + &hmcs[hmc_index].buffer[buffer_id]); + hmcs[hmc_index].state = ibmhmc_state_ready; + dev_dbg(adapter->dev, "open_resp: set hmc state = ready\n"); + } + } else { + dev_warn(adapter->dev, "open_resp: invalid hmc state (0x%x)\n", + hmcs[hmc_index].state); + } +} + +/** + * ibmvmc_process_close_resp - Process Close Response + * + * @crq: ibmvmc_crq_msg struct + * @adapter: crq_server_adapter struct + * + * This command is sent by the hypervisor in response to the managemant + * application Interface Close message. + * + * If the close fails, simply reset the entire driver as the state of the VMC + * must be in tough shape. + */ +static void ibmvmc_process_close_resp(struct ibmvmc_crq_msg *crq, + struct crq_server_adapter *adapter) +{ + unsigned char hmc_index; + + hmc_index = crq->hmc_index; + if (hmc_index > ibmvmc.max_hmc_index) { + ibmvmc_reset(adapter, false); + return; + } + + if (crq->status) { + dev_warn(adapter->dev, "close_resp: failed - status 0x%x\n", + crq->status); + ibmvmc_reset(adapter, false); + return; + } + + ibmvmc_return_hmc(&hmcs[hmc_index], false); +} + +/** + * ibmvmc_crq_process - Process CRQ + * + * @adapter: crq_server_adapter struct + * @crq: ibmvmc_crq_msg struct + * + * Process the CRQ message based upon the type of message received. + * + */ +static void ibmvmc_crq_process(struct crq_server_adapter *adapter, + struct ibmvmc_crq_msg *crq) +{ + switch (crq->type) { + case VMC_MSG_CAP_RESP: + dev_dbg(adapter->dev, "CRQ recv: capabilities resp (0x%x)\n", + crq->type); + if (ibmvmc.state == ibmvmc_state_capabilities) + ibmvmc_process_capabilities(adapter, crq); + else + dev_warn(adapter->dev, "caps msg invalid in state 0x%x\n", + ibmvmc.state); + break; + case VMC_MSG_OPEN_RESP: + dev_dbg(adapter->dev, "CRQ recv: open resp (0x%x)\n", + crq->type); + if (ibmvmc_validate_hmc_session(adapter, crq) == 0) + ibmvmc_process_open_resp(crq, adapter); + break; + case VMC_MSG_ADD_BUF: + dev_dbg(adapter->dev, "CRQ recv: add buf (0x%x)\n", + crq->type); + if (ibmvmc_validate_hmc_session(adapter, crq) == 0) + ibmvmc_add_buffer(adapter, crq); + break; + case VMC_MSG_REM_BUF: + dev_dbg(adapter->dev, "CRQ recv: rem buf (0x%x)\n", + crq->type); + if (ibmvmc_validate_hmc_session(adapter, crq) == 0) + ibmvmc_rem_buffer(adapter, crq); + break; + case VMC_MSG_SIGNAL: + dev_dbg(adapter->dev, "CRQ recv: signal msg (0x%x)\n", + crq->type); + if (ibmvmc_validate_hmc_session(adapter, crq) == 0) + ibmvmc_recv_msg(adapter, crq); + break; + case VMC_MSG_CLOSE_RESP: + dev_dbg(adapter->dev, "CRQ recv: close resp (0x%x)\n", + crq->type); + if (ibmvmc_validate_hmc_session(adapter, crq) == 0) + ibmvmc_process_close_resp(crq, adapter); + break; + case VMC_MSG_CAP: + case VMC_MSG_OPEN: + case VMC_MSG_CLOSE: + case VMC_MSG_ADD_BUF_RESP: + case VMC_MSG_REM_BUF_RESP: + dev_warn(adapter->dev, "CRQ recv: unexpected msg (0x%x)\n", + crq->type); + break; + default: + dev_warn(adapter->dev, "CRQ recv: unknown msg (0x%x)\n", + crq->type); + break; + } +} + +/** + * ibmvmc_handle_crq_init - Handle CRQ Init + * + * @crq: ibmvmc_crq_msg struct + * @adapter: crq_server_adapter struct + * + * Handle the type of crq initialization based on whether + * it is a message or a response. + * + */ +static void ibmvmc_handle_crq_init(struct ibmvmc_crq_msg *crq, + struct crq_server_adapter *adapter) +{ + switch (crq->type) { + case 0x01: /* Initialization message */ + dev_dbg(adapter->dev, "CRQ recv: CRQ init msg - state 0x%x\n", + ibmvmc.state); + if (ibmvmc.state == ibmvmc_state_crqinit) { + /* Send back a response */ + if (ibmvmc_send_crq(adapter, 0xC002000000000000, + 0) == 0) + ibmvmc_send_capabilities(adapter); + else + dev_err(adapter->dev, " Unable to send init rsp\n"); + } else { + dev_err(adapter->dev, "Invalid state 0x%x mtu = 0x%x\n", + ibmvmc.state, ibmvmc.max_mtu); + } + + break; + case 0x02: /* Initialization response */ + dev_dbg(adapter->dev, "CRQ recv: initialization resp msg - state 0x%x\n", + ibmvmc.state); + if (ibmvmc.state == ibmvmc_state_crqinit) + ibmvmc_send_capabilities(adapter); + break; + default: + dev_warn(adapter->dev, "Unknown crq message type 0x%lx\n", + (unsigned long)crq->type); + } +} + +/** + * ibmvmc_handle_crq - Handle CRQ + * + * @crq: ibmvmc_crq_msg struct + * @adapter: crq_server_adapter struct + * + * Read the command elements from the command queue and execute the + * requests based upon the type of crq message. + * + */ +static void ibmvmc_handle_crq(struct ibmvmc_crq_msg *crq, + struct crq_server_adapter *adapter) +{ + switch (crq->valid) { + case 0xC0: /* initialization */ + ibmvmc_handle_crq_init(crq, adapter); + break; + case 0xFF: /* Hypervisor telling us the connection is closed */ + dev_warn(adapter->dev, "CRQ recv: virtual adapter failed - resetting.\n"); + ibmvmc_reset(adapter, true); + break; + case 0x80: /* real payload */ + ibmvmc_crq_process(adapter, crq); + break; + default: + dev_warn(adapter->dev, "CRQ recv: unknown msg 0x%02x.\n", + crq->valid); + break; + } +} + +static void ibmvmc_task(unsigned long data) +{ + struct crq_server_adapter *adapter = + (struct crq_server_adapter *)data; + struct vio_dev *vdev = to_vio_dev(adapter->dev); + struct ibmvmc_crq_msg *crq; + int done = 0; + + while (!done) { + /* Pull all the valid messages off the CRQ */ + while ((crq = crq_queue_next_crq(&adapter->queue)) != NULL) { + ibmvmc_handle_crq(crq, adapter); + crq->valid = 0x00; + /* CRQ reset was requested, stop processing CRQs. + * Interrupts will be re-enabled by the reset task. + */ + if (ibmvmc.state == ibmvmc_state_sched_reset) + return; + } + + vio_enable_interrupts(vdev); + crq = crq_queue_next_crq(&adapter->queue); + if (crq) { + vio_disable_interrupts(vdev); + ibmvmc_handle_crq(crq, adapter); + crq->valid = 0x00; + /* CRQ reset was requested, stop processing CRQs. + * Interrupts will be re-enabled by the reset task. + */ + if (ibmvmc.state == ibmvmc_state_sched_reset) + return; + } else { + done = 1; + } + } +} + +/** + * ibmvmc_init_crq_queue - Init CRQ Queue + * + * @adapter: crq_server_adapter struct + * + * Return: + * 0 - Success + * Non-zero - Failure + */ +static int ibmvmc_init_crq_queue(struct crq_server_adapter *adapter) +{ + struct vio_dev *vdev = to_vio_dev(adapter->dev); + struct crq_queue *queue = &adapter->queue; + int rc = 0; + int retrc = 0; + + queue->msgs = (struct ibmvmc_crq_msg *)get_zeroed_page(GFP_KERNEL); + + if (!queue->msgs) + goto malloc_failed; + + queue->size = PAGE_SIZE / sizeof(*queue->msgs); + + queue->msg_token = dma_map_single(adapter->dev, queue->msgs, + queue->size * sizeof(*queue->msgs), + DMA_BIDIRECTIONAL); + + if (dma_mapping_error(adapter->dev, queue->msg_token)) + goto map_failed; + + retrc = plpar_hcall_norets(H_REG_CRQ, + vdev->unit_address, + queue->msg_token, PAGE_SIZE); + rc = retrc; + + if (rc == H_RESOURCE) + rc = ibmvmc_reset_crq_queue(adapter); + + if (rc == 2) { + dev_warn(adapter->dev, "Partner adapter not ready\n"); + retrc = 0; + } else if (rc != 0) { + dev_err(adapter->dev, "Error %d opening adapter\n", rc); + goto reg_crq_failed; + } + + queue->cur = 0; + spin_lock_init(&queue->lock); + + tasklet_init(&adapter->work_task, ibmvmc_task, (unsigned long)adapter); + + if (request_irq(vdev->irq, + ibmvmc_handle_event, + 0, "ibmvmc", (void *)adapter) != 0) { + dev_err(adapter->dev, "couldn't register irq 0x%x\n", + vdev->irq); + goto req_irq_failed; + } + + rc = vio_enable_interrupts(vdev); + if (rc != 0) { + dev_err(adapter->dev, "Error %d enabling interrupts!!!\n", rc); + goto req_irq_failed; + } + + return retrc; + +req_irq_failed: + /* Cannot have any work since we either never got our IRQ registered, + * or never got interrupts enabled + */ + tasklet_kill(&adapter->work_task); + h_free_crq(vdev->unit_address); +reg_crq_failed: + dma_unmap_single(adapter->dev, + queue->msg_token, + queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL); +map_failed: + free_page((unsigned long)queue->msgs); +malloc_failed: + return -ENOMEM; +} + +/* Fill in the liobn and riobn fields on the adapter */ +static int read_dma_window(struct vio_dev *vdev, + struct crq_server_adapter *adapter) +{ + const __be32 *dma_window; + const __be32 *prop; + + /* TODO Using of_parse_dma_window would be better, but it doesn't give + * a way to read multiple windows without already knowing the size of + * a window or the number of windows + */ + dma_window = + (const __be32 *)vio_get_attribute(vdev, "ibm,my-dma-window", + NULL); + if (!dma_window) { + dev_warn(adapter->dev, "Couldn't find ibm,my-dma-window property\n"); + return -1; + } + + adapter->liobn = be32_to_cpu(*dma_window); + dma_window++; + + prop = (const __be32 *)vio_get_attribute(vdev, "ibm,#dma-address-cells", + NULL); + if (!prop) { + dev_warn(adapter->dev, "Couldn't find ibm,#dma-address-cells property\n"); + dma_window++; + } else { + dma_window += be32_to_cpu(*prop); + } + + prop = (const __be32 *)vio_get_attribute(vdev, "ibm,#dma-size-cells", + NULL); + if (!prop) { + dev_warn(adapter->dev, "Couldn't find ibm,#dma-size-cells property\n"); + dma_window++; + } else { + dma_window += be32_to_cpu(*prop); + } + + /* dma_window should point to the second window now */ + adapter->riobn = be32_to_cpu(*dma_window); + + return 0; +} + +static int ibmvmc_probe(struct vio_dev *vdev, const struct vio_device_id *id) +{ + struct crq_server_adapter *adapter = &ibmvmc_adapter; + int rc; + + dev_set_drvdata(&vdev->dev, NULL); + memset(adapter, 0, sizeof(*adapter)); + adapter->dev = &vdev->dev; + + dev_info(adapter->dev, "Probe for UA 0x%x\n", vdev->unit_address); + + rc = read_dma_window(vdev, adapter); + if (rc != 0) { + ibmvmc.state = ibmvmc_state_failed; + return -1; + } + + dev_dbg(adapter->dev, "Probe: liobn 0x%x, riobn 0x%x\n", + adapter->liobn, adapter->riobn); + + init_waitqueue_head(&adapter->reset_wait_queue); + adapter->reset_task = kthread_run(ibmvmc_reset_task, adapter, "ibmvmc"); + if (IS_ERR(adapter->reset_task)) { + dev_err(adapter->dev, "Failed to start reset thread\n"); + ibmvmc.state = ibmvmc_state_failed; + rc = PTR_ERR(adapter->reset_task); + adapter->reset_task = NULL; + return rc; + } + + rc = ibmvmc_init_crq_queue(adapter); + if (rc != 0 && rc != H_RESOURCE) { + dev_err(adapter->dev, "Error initializing CRQ. rc = 0x%x\n", + rc); + ibmvmc.state = ibmvmc_state_failed; + goto crq_failed; + } + + ibmvmc.state = ibmvmc_state_crqinit; + + /* Try to send an initialization message. Note that this is allowed + * to fail if the other end is not acive. In that case we just wait + * for the other side to initialize. + */ + if (ibmvmc_send_crq(adapter, 0xC001000000000000LL, 0) != 0 && + rc != H_RESOURCE) + dev_warn(adapter->dev, "Failed to send initialize CRQ message\n"); + + dev_set_drvdata(&vdev->dev, adapter); + + return 0; + +crq_failed: + kthread_stop(adapter->reset_task); + adapter->reset_task = NULL; + return -EPERM; +} + +static int ibmvmc_remove(struct vio_dev *vdev) +{ + struct crq_server_adapter *adapter = dev_get_drvdata(&vdev->dev); + + dev_info(adapter->dev, "Entering remove for UA 0x%x\n", + vdev->unit_address); + ibmvmc_release_crq_queue(adapter); + + return 0; +} + +static struct vio_device_id ibmvmc_device_table[] = { + { "ibm,vmc", "IBM,vmc" }, + { "", "" } +}; +MODULE_DEVICE_TABLE(vio, ibmvmc_device_table); + +static struct vio_driver ibmvmc_driver = { + .name = ibmvmc_driver_name, + .id_table = ibmvmc_device_table, + .probe = ibmvmc_probe, + .remove = ibmvmc_remove, +}; + +static void __init ibmvmc_scrub_module_parms(void) +{ + if (ibmvmc_max_mtu > MAX_MTU) { + pr_warn("ibmvmc: Max MTU reduced to %d\n", MAX_MTU); + ibmvmc_max_mtu = MAX_MTU; + } else if (ibmvmc_max_mtu < MIN_MTU) { + pr_warn("ibmvmc: Max MTU increased to %d\n", MIN_MTU); + ibmvmc_max_mtu = MIN_MTU; + } + + if (ibmvmc_max_buf_pool_size > MAX_BUF_POOL_SIZE) { + pr_warn("ibmvmc: Max buffer pool size reduced to %d\n", + MAX_BUF_POOL_SIZE); + ibmvmc_max_buf_pool_size = MAX_BUF_POOL_SIZE; + } else if (ibmvmc_max_buf_pool_size < MIN_BUF_POOL_SIZE) { + pr_warn("ibmvmc: Max buffer pool size increased to %d\n", + MIN_BUF_POOL_SIZE); + ibmvmc_max_buf_pool_size = MIN_BUF_POOL_SIZE; + } + + if (ibmvmc_max_hmcs > MAX_HMCS) { + pr_warn("ibmvmc: Max HMCs reduced to %d\n", MAX_HMCS); + ibmvmc_max_hmcs = MAX_HMCS; + } else if (ibmvmc_max_hmcs < MIN_HMCS) { + pr_warn("ibmvmc: Max HMCs increased to %d\n", MIN_HMCS); + ibmvmc_max_hmcs = MIN_HMCS; + } +} + +static struct miscdevice ibmvmc_miscdev = { + .name = ibmvmc_driver_name, + .minor = MISC_DYNAMIC_MINOR, + .fops = &ibmvmc_fops, +}; + +static int __init ibmvmc_module_init(void) +{ + int rc, i, j; + + ibmvmc.state = ibmvmc_state_initial; + pr_info("ibmvmc: version %s\n", IBMVMC_DRIVER_VERSION); + + rc = misc_register(&ibmvmc_miscdev); + if (rc) { + pr_err("ibmvmc: misc registration failed\n"); + goto misc_register_failed; + } + pr_info("ibmvmc: node %d:%d\n", MISC_MAJOR, + ibmvmc_miscdev.minor); + + /* Initialize data structures */ + memset(hmcs, 0, sizeof(struct ibmvmc_hmc) * MAX_HMCS); + for (i = 0; i < MAX_HMCS; i++) { + spin_lock_init(&hmcs[i].lock); + hmcs[i].state = ibmhmc_state_free; + for (j = 0; j < MAX_BUF_POOL_SIZE; j++) + hmcs[i].queue_outbound_msgs[j] = VMC_INVALID_BUFFER_ID; + } + + /* Sanity check module parms */ + ibmvmc_scrub_module_parms(); + + /* + * Initialize some reasonable values. Might be negotiated smaller + * values during the capabilities exchange. + */ + ibmvmc.max_mtu = ibmvmc_max_mtu; + ibmvmc.max_buffer_pool_size = ibmvmc_max_buf_pool_size; + ibmvmc.max_hmc_index = ibmvmc_max_hmcs - 1; + + rc = vio_register_driver(&ibmvmc_driver); + + if (rc) { + pr_err("ibmvmc: rc %d from vio_register_driver\n", rc); + goto vio_reg_failed; + } + + return 0; + +vio_reg_failed: + misc_deregister(&ibmvmc_miscdev); +misc_register_failed: + return rc; +} + +static void __exit ibmvmc_module_exit(void) +{ + pr_info("ibmvmc: module exit\n"); + vio_unregister_driver(&ibmvmc_driver); + misc_deregister(&ibmvmc_miscdev); +} + +module_init(ibmvmc_module_init); +module_exit(ibmvmc_module_exit); + +module_param_named(buf_pool_size, ibmvmc_max_buf_pool_size, + int, 0644); +MODULE_PARM_DESC(buf_pool_size, "Buffer pool size"); +module_param_named(max_hmcs, ibmvmc_max_hmcs, int, 0644); +MODULE_PARM_DESC(max_hmcs, "Max HMCs"); +module_param_named(max_mtu, ibmvmc_max_mtu, int, 0644); +MODULE_PARM_DESC(max_mtu, "Max MTU"); + +MODULE_AUTHOR("Steven Royer "); +MODULE_DESCRIPTION("IBM VMC"); +MODULE_VERSION(IBMVMC_DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/ibmvmc.h b/drivers/misc/ibmvmc.h new file mode 100644 index 000000000..e140ada8f --- /dev/null +++ b/drivers/misc/ibmvmc.h @@ -0,0 +1,209 @@ +/* SPDX-License-Identifier: GPL-2.0+ + * + * linux/drivers/misc/ibmvmc.h + * + * IBM Power Systems Virtual Management Channel Support. + * + * Copyright (c) 2004, 2018 IBM Corp. + * Dave Engebretsen engebret@us.ibm.com + * Steven Royer seroyer@linux.vnet.ibm.com + * Adam Reznechek adreznec@linux.vnet.ibm.com + * Bryant G. Ly + */ +#ifndef IBMVMC_H +#define IBMVMC_H + +#include +#include + +#include + +#define IBMVMC_PROTOCOL_VERSION 0x0101 + +#define MIN_BUF_POOL_SIZE 16 +#define MIN_HMCS 1 +#define MIN_MTU 4096 +#define MAX_BUF_POOL_SIZE 64 +#define MAX_HMCS 2 +#define MAX_MTU (4 * 4096) +#define DEFAULT_BUF_POOL_SIZE 32 +#define DEFAULT_HMCS 1 +#define DEFAULT_MTU 4096 +#define HMC_ID_LEN 32 + +#define VMC_INVALID_BUFFER_ID 0xFFFF + +/* ioctl numbers */ +#define VMC_BASE 0xCC +#define VMC_IOCTL_SETHMCID _IOW(VMC_BASE, 0x00, unsigned char *) +#define VMC_IOCTL_QUERY _IOR(VMC_BASE, 0x01, struct ibmvmc_query_struct) +#define VMC_IOCTL_REQUESTVMC _IOR(VMC_BASE, 0x02, u32) + +#define VMC_MSG_CAP 0x01 +#define VMC_MSG_CAP_RESP 0x81 +#define VMC_MSG_OPEN 0x02 +#define VMC_MSG_OPEN_RESP 0x82 +#define VMC_MSG_CLOSE 0x03 +#define VMC_MSG_CLOSE_RESP 0x83 +#define VMC_MSG_ADD_BUF 0x04 +#define VMC_MSG_ADD_BUF_RESP 0x84 +#define VMC_MSG_REM_BUF 0x05 +#define VMC_MSG_REM_BUF_RESP 0x85 +#define VMC_MSG_SIGNAL 0x06 + +#define VMC_MSG_SUCCESS 0 +#define VMC_MSG_INVALID_HMC_INDEX 1 +#define VMC_MSG_INVALID_BUFFER_ID 2 +#define VMC_MSG_CLOSED_HMC 3 +#define VMC_MSG_INTERFACE_FAILURE 4 +#define VMC_MSG_NO_BUFFER 5 + +#define VMC_BUF_OWNER_ALPHA 0 +#define VMC_BUF_OWNER_HV 1 + +enum ibmvmc_states { + ibmvmc_state_sched_reset = -1, + ibmvmc_state_initial = 0, + ibmvmc_state_crqinit = 1, + ibmvmc_state_capabilities = 2, + ibmvmc_state_ready = 3, + ibmvmc_state_failed = 4, +}; + +enum ibmhmc_states { + /* HMC connection not established */ + ibmhmc_state_free = 0, + + /* HMC connection established (open called) */ + ibmhmc_state_initial = 1, + + /* open msg sent to HV, due to ioctl(1) call */ + ibmhmc_state_opening = 2, + + /* HMC connection ready, open resp msg from HV */ + ibmhmc_state_ready = 3, + + /* HMC connection failure */ + ibmhmc_state_failed = 4, +}; + +struct ibmvmc_buffer { + u8 valid; /* 1 when DMA storage allocated to buffer */ + u8 free; /* 1 when buffer available for the Alpha Partition */ + u8 owner; + u16 id; + u32 size; + u32 msg_len; + dma_addr_t dma_addr_local; + dma_addr_t dma_addr_remote; + void *real_addr_local; +}; + +struct ibmvmc_admin_crq_msg { + u8 valid; /* RPA Defined */ + u8 type; /* ibmvmc msg type */ + u8 status; /* Response msg status. Zero is success and on failure, + * either 1 - General Failure, or 2 - Invalid Version is + * returned. + */ + u8 rsvd[2]; + u8 max_hmc; /* Max # of independent HMC connections supported */ + __be16 pool_size; /* Maximum number of buffers supported per HMC + * connection + */ + __be32 max_mtu; /* Maximum message size supported (bytes) */ + __be16 crq_size; /* # of entries available in the CRQ for the + * source partition. The target partition must + * limit the number of outstanding messages to + * one half or less. + */ + __be16 version; /* Indicates the code level of the management partition + * or the hypervisor with the high-order byte + * indicating a major version and the low-order byte + * indicating a minor version. + */ +}; + +struct ibmvmc_crq_msg { + u8 valid; /* RPA Defined */ + u8 type; /* ibmvmc msg type */ + u8 status; /* Response msg status */ + union { + u8 rsvd; /* Reserved */ + u8 owner; + } var1; + u8 hmc_session; /* Session Identifier for the current VMC connection */ + u8 hmc_index; /* A unique HMC Idx would be used if multiple management + * applications running concurrently were desired + */ + union { + __be16 rsvd; + __be16 buffer_id; + } var2; + __be32 rsvd; + union { + __be32 rsvd; + __be32 lioba; + __be32 msg_len; + } var3; +}; + +/* an RPA command/response transport queue */ +struct crq_queue { + struct ibmvmc_crq_msg *msgs; + int size, cur; + dma_addr_t msg_token; + spinlock_t lock; +}; + +/* VMC server adapter settings */ +struct crq_server_adapter { + struct device *dev; + struct crq_queue queue; + u32 liobn; + u32 riobn; + struct tasklet_struct work_task; + wait_queue_head_t reset_wait_queue; + struct task_struct *reset_task; +}; + +/* Driver wide settings */ +struct ibmvmc_struct { + u32 state; + u32 max_mtu; + u32 max_buffer_pool_size; + u32 max_hmc_index; + struct crq_server_adapter *adapter; + struct cdev cdev; + u32 vmc_drc_index; +}; + +struct ibmvmc_file_session; + +/* Connection specific settings */ +struct ibmvmc_hmc { + u8 session; + u8 index; + u32 state; + struct crq_server_adapter *adapter; + spinlock_t lock; + unsigned char hmc_id[HMC_ID_LEN]; + struct ibmvmc_buffer buffer[MAX_BUF_POOL_SIZE]; + unsigned short queue_outbound_msgs[MAX_BUF_POOL_SIZE]; + int queue_head, queue_tail; + struct ibmvmc_file_session *file_session; +}; + +struct ibmvmc_file_session { + struct file *file; + struct ibmvmc_hmc *hmc; + bool valid; +}; + +struct ibmvmc_query_struct { + int have_vmc; + int state; + int vmc_drc_index; +}; + +#endif /* __IBMVMC_H */ diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c new file mode 100644 index 000000000..81a0541ef --- /dev/null +++ b/drivers/misc/ics932s401.c @@ -0,0 +1,495 @@ +/* + * A driver for the Integrated Circuits ICS932S401 + * Copyright (C) 2008 IBM + * + * Author: Darrick J. Wong + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Addresses to scan */ +static const unsigned short normal_i2c[] = { 0x69, I2C_CLIENT_END }; + +/* ICS932S401 registers */ +#define ICS932S401_REG_CFG2 0x01 +#define ICS932S401_CFG1_SPREAD 0x01 +#define ICS932S401_REG_CFG7 0x06 +#define ICS932S401_FS_MASK 0x07 +#define ICS932S401_REG_VENDOR_REV 0x07 +#define ICS932S401_VENDOR 1 +#define ICS932S401_VENDOR_MASK 0x0F +#define ICS932S401_REV 4 +#define ICS932S401_REV_SHIFT 4 +#define ICS932S401_REG_DEVICE 0x09 +#define ICS932S401_DEVICE 11 +#define ICS932S401_REG_CTRL 0x0A +#define ICS932S401_MN_ENABLED 0x80 +#define ICS932S401_CPU_ALT 0x04 +#define ICS932S401_SRC_ALT 0x08 +#define ICS932S401_REG_CPU_M_CTRL 0x0B +#define ICS932S401_M_MASK 0x3F +#define ICS932S401_REG_CPU_N_CTRL 0x0C +#define ICS932S401_REG_CPU_SPREAD1 0x0D +#define ICS932S401_REG_CPU_SPREAD2 0x0E +#define ICS932S401_SPREAD_MASK 0x7FFF +#define ICS932S401_REG_SRC_M_CTRL 0x0F +#define ICS932S401_REG_SRC_N_CTRL 0x10 +#define ICS932S401_REG_SRC_SPREAD1 0x11 +#define ICS932S401_REG_SRC_SPREAD2 0x12 +#define ICS932S401_REG_CPU_DIVISOR 0x13 +#define ICS932S401_CPU_DIVISOR_SHIFT 4 +#define ICS932S401_REG_PCISRC_DIVISOR 0x14 +#define ICS932S401_SRC_DIVISOR_MASK 0x0F +#define ICS932S401_PCI_DIVISOR_SHIFT 4 + +/* Base clock is 14.318MHz */ +#define BASE_CLOCK 14318 + +#define NUM_REGS 21 +#define NUM_MIRRORED_REGS 15 + +static int regs_to_copy[NUM_MIRRORED_REGS] = { + ICS932S401_REG_CFG2, + ICS932S401_REG_CFG7, + ICS932S401_REG_VENDOR_REV, + ICS932S401_REG_DEVICE, + ICS932S401_REG_CTRL, + ICS932S401_REG_CPU_M_CTRL, + ICS932S401_REG_CPU_N_CTRL, + ICS932S401_REG_CPU_SPREAD1, + ICS932S401_REG_CPU_SPREAD2, + ICS932S401_REG_SRC_M_CTRL, + ICS932S401_REG_SRC_N_CTRL, + ICS932S401_REG_SRC_SPREAD1, + ICS932S401_REG_SRC_SPREAD2, + ICS932S401_REG_CPU_DIVISOR, + ICS932S401_REG_PCISRC_DIVISOR, +}; + +/* How often do we reread sensors values? (In jiffies) */ +#define SENSOR_REFRESH_INTERVAL (2 * HZ) + +/* How often do we reread sensor limit values? (In jiffies) */ +#define LIMIT_REFRESH_INTERVAL (60 * HZ) + +struct ics932s401_data { + struct attribute_group attrs; + struct mutex lock; + char sensors_valid; + unsigned long sensors_last_updated; /* In jiffies */ + + u8 regs[NUM_REGS]; +}; + +static int ics932s401_probe(struct i2c_client *client, + const struct i2c_device_id *id); +static int ics932s401_detect(struct i2c_client *client, + struct i2c_board_info *info); +static int ics932s401_remove(struct i2c_client *client); + +static const struct i2c_device_id ics932s401_id[] = { + { "ics932s401", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, ics932s401_id); + +static struct i2c_driver ics932s401_driver = { + .class = I2C_CLASS_HWMON, + .driver = { + .name = "ics932s401", + }, + .probe = ics932s401_probe, + .remove = ics932s401_remove, + .id_table = ics932s401_id, + .detect = ics932s401_detect, + .address_list = normal_i2c, +}; + +static struct ics932s401_data *ics932s401_update_device(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct ics932s401_data *data = i2c_get_clientdata(client); + unsigned long local_jiffies = jiffies; + int i, temp; + + mutex_lock(&data->lock); + if (time_before(local_jiffies, data->sensors_last_updated + + SENSOR_REFRESH_INTERVAL) + && data->sensors_valid) + goto out; + + /* + * Each register must be read as a word and then right shifted 8 bits. + * Not really sure why this is; setting the "byte count programming" + * register to 1 does not fix this problem. + */ + for (i = 0; i < NUM_MIRRORED_REGS; i++) { + temp = i2c_smbus_read_word_data(client, regs_to_copy[i]); + data->regs[regs_to_copy[i]] = temp >> 8; + } + + data->sensors_last_updated = local_jiffies; + data->sensors_valid = 1; + +out: + mutex_unlock(&data->lock); + return data; +} + +static ssize_t show_spread_enabled(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + + if (data->regs[ICS932S401_REG_CFG2] & ICS932S401_CFG1_SPREAD) + return sprintf(buf, "1\n"); + + return sprintf(buf, "0\n"); +} + +/* bit to cpu khz map */ +static const int fs_speeds[] = { + 266666, + 133333, + 200000, + 166666, + 333333, + 100000, + 400000, + 0, +}; + +/* clock divisor map */ +static const int divisors[] = {2, 3, 5, 15, 4, 6, 10, 30, 8, 12, 20, 60, 16, + 24, 40, 120}; + +/* Calculate CPU frequency from the M/N registers. */ +static int calculate_cpu_freq(struct ics932s401_data *data) +{ + int m, n, freq; + + m = data->regs[ICS932S401_REG_CPU_M_CTRL] & ICS932S401_M_MASK; + n = data->regs[ICS932S401_REG_CPU_N_CTRL]; + + /* Pull in bits 8 & 9 from the M register */ + n |= ((int)data->regs[ICS932S401_REG_CPU_M_CTRL] & 0x80) << 1; + n |= ((int)data->regs[ICS932S401_REG_CPU_M_CTRL] & 0x40) << 3; + + freq = BASE_CLOCK * (n + 8) / (m + 2); + freq /= divisors[data->regs[ICS932S401_REG_CPU_DIVISOR] >> + ICS932S401_CPU_DIVISOR_SHIFT]; + + return freq; +} + +static ssize_t show_cpu_clock(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + + return sprintf(buf, "%d\n", calculate_cpu_freq(data)); +} + +static ssize_t show_cpu_clock_sel(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + int freq; + + if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED) + freq = calculate_cpu_freq(data); + else { + /* Freq is neatly wrapped up for us */ + int fid = data->regs[ICS932S401_REG_CFG7] & ICS932S401_FS_MASK; + + freq = fs_speeds[fid]; + if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_CPU_ALT) { + switch (freq) { + case 166666: + freq = 160000; + break; + case 333333: + freq = 320000; + break; + } + } + } + + return sprintf(buf, "%d\n", freq); +} + +/* Calculate SRC frequency from the M/N registers. */ +static int calculate_src_freq(struct ics932s401_data *data) +{ + int m, n, freq; + + m = data->regs[ICS932S401_REG_SRC_M_CTRL] & ICS932S401_M_MASK; + n = data->regs[ICS932S401_REG_SRC_N_CTRL]; + + /* Pull in bits 8 & 9 from the M register */ + n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x80) << 1; + n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x40) << 3; + + freq = BASE_CLOCK * (n + 8) / (m + 2); + freq /= divisors[data->regs[ICS932S401_REG_PCISRC_DIVISOR] & + ICS932S401_SRC_DIVISOR_MASK]; + + return freq; +} + +static ssize_t show_src_clock(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + + return sprintf(buf, "%d\n", calculate_src_freq(data)); +} + +static ssize_t show_src_clock_sel(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + int freq; + + if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED) + freq = calculate_src_freq(data); + else + /* Freq is neatly wrapped up for us */ + if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_CPU_ALT && + data->regs[ICS932S401_REG_CTRL] & ICS932S401_SRC_ALT) + freq = 96000; + else + freq = 100000; + + return sprintf(buf, "%d\n", freq); +} + +/* Calculate PCI frequency from the SRC M/N registers. */ +static int calculate_pci_freq(struct ics932s401_data *data) +{ + int m, n, freq; + + m = data->regs[ICS932S401_REG_SRC_M_CTRL] & ICS932S401_M_MASK; + n = data->regs[ICS932S401_REG_SRC_N_CTRL]; + + /* Pull in bits 8 & 9 from the M register */ + n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x80) << 1; + n |= ((int)data->regs[ICS932S401_REG_SRC_M_CTRL] & 0x40) << 3; + + freq = BASE_CLOCK * (n + 8) / (m + 2); + freq /= divisors[data->regs[ICS932S401_REG_PCISRC_DIVISOR] >> + ICS932S401_PCI_DIVISOR_SHIFT]; + + return freq; +} + +static ssize_t show_pci_clock(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + + return sprintf(buf, "%d\n", calculate_pci_freq(data)); +} + +static ssize_t show_pci_clock_sel(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + int freq; + + if (data->regs[ICS932S401_REG_CTRL] & ICS932S401_MN_ENABLED) + freq = calculate_pci_freq(data); + else + freq = 33333; + + return sprintf(buf, "%d\n", freq); +} + +static ssize_t show_value(struct device *dev, + struct device_attribute *devattr, + char *buf); + +static ssize_t show_spread(struct device *dev, + struct device_attribute *devattr, + char *buf); + +static DEVICE_ATTR(spread_enabled, S_IRUGO, show_spread_enabled, NULL); +static DEVICE_ATTR(cpu_clock_selection, S_IRUGO, show_cpu_clock_sel, NULL); +static DEVICE_ATTR(cpu_clock, S_IRUGO, show_cpu_clock, NULL); +static DEVICE_ATTR(src_clock_selection, S_IRUGO, show_src_clock_sel, NULL); +static DEVICE_ATTR(src_clock, S_IRUGO, show_src_clock, NULL); +static DEVICE_ATTR(pci_clock_selection, S_IRUGO, show_pci_clock_sel, NULL); +static DEVICE_ATTR(pci_clock, S_IRUGO, show_pci_clock, NULL); +static DEVICE_ATTR(usb_clock, S_IRUGO, show_value, NULL); +static DEVICE_ATTR(ref_clock, S_IRUGO, show_value, NULL); +static DEVICE_ATTR(cpu_spread, S_IRUGO, show_spread, NULL); +static DEVICE_ATTR(src_spread, S_IRUGO, show_spread, NULL); + +static struct attribute *ics932s401_attr[] = { + &dev_attr_spread_enabled.attr, + &dev_attr_cpu_clock_selection.attr, + &dev_attr_cpu_clock.attr, + &dev_attr_src_clock_selection.attr, + &dev_attr_src_clock.attr, + &dev_attr_pci_clock_selection.attr, + &dev_attr_pci_clock.attr, + &dev_attr_usb_clock.attr, + &dev_attr_ref_clock.attr, + &dev_attr_cpu_spread.attr, + &dev_attr_src_spread.attr, + NULL +}; + +static ssize_t show_value(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + int x; + + if (devattr == &dev_attr_usb_clock) + x = 48000; + else if (devattr == &dev_attr_ref_clock) + x = BASE_CLOCK; + else + BUG(); + + return sprintf(buf, "%d\n", x); +} + +static ssize_t show_spread(struct device *dev, + struct device_attribute *devattr, + char *buf) +{ + struct ics932s401_data *data = ics932s401_update_device(dev); + int reg; + unsigned long val; + + if (!(data->regs[ICS932S401_REG_CFG2] & ICS932S401_CFG1_SPREAD)) + return sprintf(buf, "0%%\n"); + + if (devattr == &dev_attr_src_spread) + reg = ICS932S401_REG_SRC_SPREAD1; + else if (devattr == &dev_attr_cpu_spread) + reg = ICS932S401_REG_CPU_SPREAD1; + else + BUG(); + + val = data->regs[reg] | (data->regs[reg + 1] << 8); + val &= ICS932S401_SPREAD_MASK; + + /* Scale 0..2^14 to -0.5. */ + val = 500000 * val / 16384; + return sprintf(buf, "-0.%lu%%\n", val); +} + +/* Return 0 if detection is successful, -ENODEV otherwise */ +static int ics932s401_detect(struct i2c_client *client, + struct i2c_board_info *info) +{ + struct i2c_adapter *adapter = client->adapter; + int vendor, device, revision; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) + return -ENODEV; + + vendor = i2c_smbus_read_word_data(client, ICS932S401_REG_VENDOR_REV); + vendor >>= 8; + revision = vendor >> ICS932S401_REV_SHIFT; + vendor &= ICS932S401_VENDOR_MASK; + if (vendor != ICS932S401_VENDOR) + return -ENODEV; + + device = i2c_smbus_read_word_data(client, ICS932S401_REG_DEVICE); + device >>= 8; + if (device != ICS932S401_DEVICE) + return -ENODEV; + + if (revision != ICS932S401_REV) + dev_info(&adapter->dev, "Unknown revision %d\n", revision); + + strlcpy(info->type, "ics932s401", I2C_NAME_SIZE); + + return 0; +} + +static int ics932s401_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct ics932s401_data *data; + int err; + + data = kzalloc(sizeof(struct ics932s401_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + + i2c_set_clientdata(client, data); + mutex_init(&data->lock); + + dev_info(&client->dev, "%s chip found\n", client->name); + + /* Register sysfs hooks */ + data->attrs.attrs = ics932s401_attr; + err = sysfs_create_group(&client->dev.kobj, &data->attrs); + if (err) + goto exit_free; + + return 0; + +exit_free: + kfree(data); +exit: + return err; +} + +static int ics932s401_remove(struct i2c_client *client) +{ + struct ics932s401_data *data = i2c_get_clientdata(client); + + sysfs_remove_group(&client->dev.kobj, &data->attrs); + kfree(data); + return 0; +} + +module_i2c_driver(ics932s401_driver); + +MODULE_AUTHOR("Darrick J. Wong "); +MODULE_DESCRIPTION("ICS932S401 driver"); +MODULE_LICENSE("GPL"); + +/* IBM IntelliStation Z30 */ +MODULE_ALIAS("dmi:bvnIBM:*:rn9228:*"); +MODULE_ALIAS("dmi:bvnIBM:*:rn9232:*"); + +/* IBM x3650/x3550 */ +MODULE_ALIAS("dmi:bvnIBM:*:pnIBMSystemx3650*"); +MODULE_ALIAS("dmi:bvnIBM:*:pnIBMSystemx3550*"); diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c new file mode 100644 index 000000000..ec0832278 --- /dev/null +++ b/drivers/misc/ioc4.c @@ -0,0 +1,500 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2005-2006 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* This file contains the master driver module for use by SGI IOC4 subdrivers. + * + * It allocates any resources shared between multiple subdevices, and + * provides accessor functions (where needed) and the like for those + * resources. It also provides a mechanism for the subdevice modules + * to support loading and unloading. + * + * Non-shared resources (e.g. external interrupt A_INT_OUT register page + * alias, serial port and UART registers) are handled by the subdevice + * modules themselves. + * + * This is all necessary because IOC4 is not implemented as a multi-function + * PCI device, but an amalgamation of disparate registers for several + * types of device (ATA, serial, external interrupts). The normal + * resource management in the kernel doesn't have quite the right interfaces + * to handle this situation (e.g. multiple modules can't claim the same + * PCI ID), thus this IOC4 master module. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/*************** + * Definitions * + ***************/ + +/* Tweakable values */ + +/* PCI bus speed detection/calibration */ +#define IOC4_CALIBRATE_COUNT 63 /* Calibration cycle period */ +#define IOC4_CALIBRATE_CYCLES 256 /* Average over this many cycles */ +#define IOC4_CALIBRATE_DISCARD 2 /* Discard first few cycles */ +#define IOC4_CALIBRATE_LOW_MHZ 25 /* Lower bound on bus speed sanity */ +#define IOC4_CALIBRATE_HIGH_MHZ 75 /* Upper bound on bus speed sanity */ +#define IOC4_CALIBRATE_DEFAULT_MHZ 66 /* Assumed if sanity check fails */ + +/************************ + * Submodule management * + ************************/ + +static DEFINE_MUTEX(ioc4_mutex); + +static LIST_HEAD(ioc4_devices); +static LIST_HEAD(ioc4_submodules); + +/* Register an IOC4 submodule */ +int +ioc4_register_submodule(struct ioc4_submodule *is) +{ + struct ioc4_driver_data *idd; + + mutex_lock(&ioc4_mutex); + list_add(&is->is_list, &ioc4_submodules); + + /* Initialize submodule for each IOC4 */ + if (!is->is_probe) + goto out; + + list_for_each_entry(idd, &ioc4_devices, idd_list) { + if (is->is_probe(idd)) { + printk(KERN_WARNING + "%s: IOC4 submodule %s probe failed " + "for pci_dev %s", + __func__, module_name(is->is_owner), + pci_name(idd->idd_pdev)); + } + } + out: + mutex_unlock(&ioc4_mutex); + return 0; +} + +/* Unregister an IOC4 submodule */ +void +ioc4_unregister_submodule(struct ioc4_submodule *is) +{ + struct ioc4_driver_data *idd; + + mutex_lock(&ioc4_mutex); + list_del(&is->is_list); + + /* Remove submodule for each IOC4 */ + if (!is->is_remove) + goto out; + + list_for_each_entry(idd, &ioc4_devices, idd_list) { + if (is->is_remove(idd)) { + printk(KERN_WARNING + "%s: IOC4 submodule %s remove failed " + "for pci_dev %s.\n", + __func__, module_name(is->is_owner), + pci_name(idd->idd_pdev)); + } + } + out: + mutex_unlock(&ioc4_mutex); +} + +/********************* + * Device management * + *********************/ + +#define IOC4_CALIBRATE_LOW_LIMIT \ + (1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_LOW_MHZ) +#define IOC4_CALIBRATE_HIGH_LIMIT \ + (1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_HIGH_MHZ) +#define IOC4_CALIBRATE_DEFAULT \ + (1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_DEFAULT_MHZ) + +#define IOC4_CALIBRATE_END \ + (IOC4_CALIBRATE_CYCLES + IOC4_CALIBRATE_DISCARD) + +#define IOC4_INT_OUT_MODE_TOGGLE 0x7 /* Toggle INT_OUT every COUNT+1 ticks */ + +/* Determines external interrupt output clock period of the PCI bus an + * IOC4 is attached to. This value can be used to determine the PCI + * bus speed. + * + * IOC4 has a design feature that various internal timers are derived from + * the PCI bus clock. This causes IOC4 device drivers to need to take the + * bus speed into account when setting various register values (e.g. INT_OUT + * register COUNT field, UART divisors, etc). Since this information is + * needed by several subdrivers, it is determined by the main IOC4 driver, + * even though the following code utilizes external interrupt registers + * to perform the speed calculation. + */ +static void +ioc4_clock_calibrate(struct ioc4_driver_data *idd) +{ + union ioc4_int_out int_out; + union ioc4_gpcr gpcr; + unsigned int state, last_state; + uint64_t start, end, period; + unsigned int count; + + /* Enable output */ + gpcr.raw = 0; + gpcr.fields.dir = IOC4_GPCR_DIR_0; + gpcr.fields.int_out_en = 1; + writel(gpcr.raw, &idd->idd_misc_regs->gpcr_s.raw); + + /* Reset to power-on state */ + writel(0, &idd->idd_misc_regs->int_out.raw); + mmiowb(); + + /* Set up square wave */ + int_out.raw = 0; + int_out.fields.count = IOC4_CALIBRATE_COUNT; + int_out.fields.mode = IOC4_INT_OUT_MODE_TOGGLE; + int_out.fields.diag = 0; + writel(int_out.raw, &idd->idd_misc_regs->int_out.raw); + mmiowb(); + + /* Check square wave period averaged over some number of cycles */ + start = ktime_get_ns(); + state = 1; /* make sure the first read isn't a rising edge */ + for (count = 0; count <= IOC4_CALIBRATE_END; count++) { + do { /* wait for a rising edge */ + last_state = state; + int_out.raw = readl(&idd->idd_misc_regs->int_out.raw); + state = int_out.fields.int_out; + } while (last_state || !state); + + /* discard the first few cycles */ + if (count == IOC4_CALIBRATE_DISCARD) + start = ktime_get_ns(); + } + end = ktime_get_ns(); + + /* Calculation rearranged to preserve intermediate precision. + * Logically: + * 1. "end - start" gives us the measurement period over all + * the square wave cycles. + * 2. Divide by number of square wave cycles to get the period + * of a square wave cycle. + * 3. Divide by 2*(int_out.fields.count+1), which is the formula + * by which the IOC4 generates the square wave, to get the + * period of an IOC4 INT_OUT count. + */ + period = (end - start) / + (IOC4_CALIBRATE_CYCLES * 2 * (IOC4_CALIBRATE_COUNT + 1)); + + /* Bounds check the result. */ + if (period > IOC4_CALIBRATE_LOW_LIMIT || + period < IOC4_CALIBRATE_HIGH_LIMIT) { + printk(KERN_INFO + "IOC4 %s: Clock calibration failed. Assuming" + "PCI clock is %d ns.\n", + pci_name(idd->idd_pdev), + IOC4_CALIBRATE_DEFAULT / IOC4_EXTINT_COUNT_DIVISOR); + period = IOC4_CALIBRATE_DEFAULT; + } else { + u64 ns = period; + + do_div(ns, IOC4_EXTINT_COUNT_DIVISOR); + printk(KERN_DEBUG + "IOC4 %s: PCI clock is %llu ns.\n", + pci_name(idd->idd_pdev), (unsigned long long)ns); + } + + /* Remember results. We store the extint clock period rather + * than the PCI clock period so that greater precision is + * retained. Divide by IOC4_EXTINT_COUNT_DIVISOR to get + * PCI clock period. + */ + idd->count_period = period; +} + +/* There are three variants of IOC4 cards: IO9, IO10, and PCI-RT. + * Each brings out different combinations of IOC4 signals, thus. + * the IOC4 subdrivers need to know to which we're attached. + * + * We look for the presence of a SCSI (IO9) or SATA (IO10) controller + * on the same PCI bus at slot number 3 to differentiate IO9 from IO10. + * If neither is present, it's a PCI-RT. + */ +static unsigned int +ioc4_variant(struct ioc4_driver_data *idd) +{ + struct pci_dev *pdev = NULL; + int found = 0; + + /* IO9: Look for a QLogic ISP 12160 at the same bus and slot 3. */ + do { + pdev = pci_get_device(PCI_VENDOR_ID_QLOGIC, + PCI_DEVICE_ID_QLOGIC_ISP12160, pdev); + if (pdev && + idd->idd_pdev->bus->number == pdev->bus->number && + 3 == PCI_SLOT(pdev->devfn)) + found = 1; + } while (pdev && !found); + if (NULL != pdev) { + pci_dev_put(pdev); + return IOC4_VARIANT_IO9; + } + + /* IO10: Look for a Vitesse VSC 7174 at the same bus and slot 3. */ + pdev = NULL; + do { + pdev = pci_get_device(PCI_VENDOR_ID_VITESSE, + PCI_DEVICE_ID_VITESSE_VSC7174, pdev); + if (pdev && + idd->idd_pdev->bus->number == pdev->bus->number && + 3 == PCI_SLOT(pdev->devfn)) + found = 1; + } while (pdev && !found); + if (NULL != pdev) { + pci_dev_put(pdev); + return IOC4_VARIANT_IO10; + } + + /* PCI-RT: No SCSI/SATA controller will be present */ + return IOC4_VARIANT_PCI_RT; +} + +static void +ioc4_load_modules(struct work_struct *work) +{ + request_module("sgiioc4"); +} + +static DECLARE_WORK(ioc4_load_modules_work, ioc4_load_modules); + +/* Adds a new instance of an IOC4 card */ +static int +ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) +{ + struct ioc4_driver_data *idd; + struct ioc4_submodule *is; + uint32_t pcmd; + int ret; + + /* Enable IOC4 and take ownership of it */ + if ((ret = pci_enable_device(pdev))) { + printk(KERN_WARNING + "%s: Failed to enable IOC4 device for pci_dev %s.\n", + __func__, pci_name(pdev)); + goto out; + } + pci_set_master(pdev); + + /* Set up per-IOC4 data */ + idd = kmalloc(sizeof(struct ioc4_driver_data), GFP_KERNEL); + if (!idd) { + printk(KERN_WARNING + "%s: Failed to allocate IOC4 data for pci_dev %s.\n", + __func__, pci_name(pdev)); + ret = -ENODEV; + goto out_idd; + } + idd->idd_pdev = pdev; + idd->idd_pci_id = pci_id; + + /* Map IOC4 misc registers. These are shared between subdevices + * so the main IOC4 module manages them. + */ + idd->idd_bar0 = pci_resource_start(idd->idd_pdev, 0); + if (!idd->idd_bar0) { + printk(KERN_WARNING + "%s: Unable to find IOC4 misc resource " + "for pci_dev %s.\n", + __func__, pci_name(idd->idd_pdev)); + ret = -ENODEV; + goto out_pci; + } + if (!request_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs), + "ioc4_misc")) { + printk(KERN_WARNING + "%s: Unable to request IOC4 misc region " + "for pci_dev %s.\n", + __func__, pci_name(idd->idd_pdev)); + ret = -ENODEV; + goto out_pci; + } + idd->idd_misc_regs = ioremap(idd->idd_bar0, + sizeof(struct ioc4_misc_regs)); + if (!idd->idd_misc_regs) { + printk(KERN_WARNING + "%s: Unable to remap IOC4 misc region " + "for pci_dev %s.\n", + __func__, pci_name(idd->idd_pdev)); + ret = -ENODEV; + goto out_misc_region; + } + + /* Failsafe portion of per-IOC4 initialization */ + + /* Detect card variant */ + idd->idd_variant = ioc4_variant(idd); + printk(KERN_INFO "IOC4 %s: %s card detected.\n", pci_name(pdev), + idd->idd_variant == IOC4_VARIANT_IO9 ? "IO9" : + idd->idd_variant == IOC4_VARIANT_PCI_RT ? "PCI-RT" : + idd->idd_variant == IOC4_VARIANT_IO10 ? "IO10" : "unknown"); + + /* Initialize IOC4 */ + pci_read_config_dword(idd->idd_pdev, PCI_COMMAND, &pcmd); + pci_write_config_dword(idd->idd_pdev, PCI_COMMAND, + pcmd | PCI_COMMAND_PARITY | PCI_COMMAND_SERR); + + /* Determine PCI clock */ + ioc4_clock_calibrate(idd); + + /* Disable/clear all interrupts. Need to do this here lest + * one submodule request the shared IOC4 IRQ, but interrupt + * is generated by a different subdevice. + */ + /* Disable */ + writel(~0, &idd->idd_misc_regs->other_iec.raw); + writel(~0, &idd->idd_misc_regs->sio_iec); + /* Clear (i.e. acknowledge) */ + writel(~0, &idd->idd_misc_regs->other_ir.raw); + writel(~0, &idd->idd_misc_regs->sio_ir); + + /* Track PCI-device specific data */ + idd->idd_serial_data = NULL; + pci_set_drvdata(idd->idd_pdev, idd); + + mutex_lock(&ioc4_mutex); + list_add_tail(&idd->idd_list, &ioc4_devices); + + /* Add this IOC4 to all submodules */ + list_for_each_entry(is, &ioc4_submodules, is_list) { + if (is->is_probe && is->is_probe(idd)) { + printk(KERN_WARNING + "%s: IOC4 submodule 0x%s probe failed " + "for pci_dev %s.\n", + __func__, module_name(is->is_owner), + pci_name(idd->idd_pdev)); + } + } + mutex_unlock(&ioc4_mutex); + + /* Request sgiioc4 IDE driver on boards that bring that functionality + * off of IOC4. The root filesystem may be hosted on a drive connected + * to IOC4, so we need to make sure the sgiioc4 driver is loaded as it + * won't be picked up by modprobes due to the ioc4 module owning the + * PCI device. + */ + if (idd->idd_variant != IOC4_VARIANT_PCI_RT) { + /* Request the module from a work procedure as the modprobe + * goes out to a userland helper and that will hang if done + * directly from ioc4_probe(). + */ + printk(KERN_INFO "IOC4 loading sgiioc4 submodule\n"); + schedule_work(&ioc4_load_modules_work); + } + + return 0; + +out_misc_region: + release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs)); +out_pci: + kfree(idd); +out_idd: + pci_disable_device(pdev); +out: + return ret; +} + +/* Removes a particular instance of an IOC4 card. */ +static void +ioc4_remove(struct pci_dev *pdev) +{ + struct ioc4_submodule *is; + struct ioc4_driver_data *idd; + + idd = pci_get_drvdata(pdev); + + /* Remove this IOC4 from all submodules */ + mutex_lock(&ioc4_mutex); + list_for_each_entry(is, &ioc4_submodules, is_list) { + if (is->is_remove && is->is_remove(idd)) { + printk(KERN_WARNING + "%s: IOC4 submodule 0x%s remove failed " + "for pci_dev %s.\n", + __func__, module_name(is->is_owner), + pci_name(idd->idd_pdev)); + } + } + mutex_unlock(&ioc4_mutex); + + /* Release resources */ + iounmap(idd->idd_misc_regs); + if (!idd->idd_bar0) { + printk(KERN_WARNING + "%s: Unable to get IOC4 misc mapping for pci_dev %s. " + "Device removal may be incomplete.\n", + __func__, pci_name(idd->idd_pdev)); + } + release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs)); + + /* Disable IOC4 and relinquish */ + pci_disable_device(pdev); + + /* Remove and free driver data */ + mutex_lock(&ioc4_mutex); + list_del(&idd->idd_list); + mutex_unlock(&ioc4_mutex); + kfree(idd); +} + +static const struct pci_device_id ioc4_id_table[] = { + {PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_IOC4, PCI_ANY_ID, + PCI_ANY_ID, 0x0b4000, 0xFFFFFF}, + {0} +}; + +static struct pci_driver ioc4_driver = { + .name = "IOC4", + .id_table = ioc4_id_table, + .probe = ioc4_probe, + .remove = ioc4_remove, +}; + +MODULE_DEVICE_TABLE(pci, ioc4_id_table); + +/********************* + * Module management * + *********************/ + +/* Module load */ +static int __init +ioc4_init(void) +{ + return pci_register_driver(&ioc4_driver); +} + +/* Module unload */ +static void __exit +ioc4_exit(void) +{ + /* Ensure ioc4_load_modules() has completed before exiting */ + flush_work(&ioc4_load_modules_work); + pci_unregister_driver(&ioc4_driver); +} + +module_init(ioc4_init); +module_exit(ioc4_exit); + +MODULE_AUTHOR("Brent Casavant - Silicon Graphics, Inc. "); +MODULE_DESCRIPTION("PCI driver master module for SGI IOC4 Base-IO Card"); +MODULE_LICENSE("GPL"); + +EXPORT_SYMBOL(ioc4_register_submodule); +EXPORT_SYMBOL(ioc4_unregister_submodule); diff --git a/drivers/misc/isl29003.c b/drivers/misc/isl29003.c new file mode 100644 index 000000000..b8032882c --- /dev/null +++ b/drivers/misc/isl29003.c @@ -0,0 +1,487 @@ +/* + * isl29003.c - Linux kernel module for + * Intersil ISL29003 ambient light sensor + * + * See file:Documentation/misc-devices/isl29003 + * + * Copyright (c) 2009 Daniel Mack + * + * Based on code written by + * Rodolfo Giometti + * Eurotech S.p.A. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include + +#define ISL29003_DRV_NAME "isl29003" +#define DRIVER_VERSION "1.0" + +#define ISL29003_REG_COMMAND 0x00 +#define ISL29003_ADC_ENABLED (1 << 7) +#define ISL29003_ADC_PD (1 << 6) +#define ISL29003_TIMING_INT (1 << 5) +#define ISL29003_MODE_SHIFT (2) +#define ISL29003_MODE_MASK (0x3 << ISL29003_MODE_SHIFT) +#define ISL29003_RES_SHIFT (0) +#define ISL29003_RES_MASK (0x3 << ISL29003_RES_SHIFT) + +#define ISL29003_REG_CONTROL 0x01 +#define ISL29003_INT_FLG (1 << 5) +#define ISL29003_RANGE_SHIFT (2) +#define ISL29003_RANGE_MASK (0x3 << ISL29003_RANGE_SHIFT) +#define ISL29003_INT_PERSISTS_SHIFT (0) +#define ISL29003_INT_PERSISTS_MASK (0xf << ISL29003_INT_PERSISTS_SHIFT) + +#define ISL29003_REG_IRQ_THRESH_HI 0x02 +#define ISL29003_REG_IRQ_THRESH_LO 0x03 +#define ISL29003_REG_LSB_SENSOR 0x04 +#define ISL29003_REG_MSB_SENSOR 0x05 +#define ISL29003_REG_LSB_TIMER 0x06 +#define ISL29003_REG_MSB_TIMER 0x07 + +#define ISL29003_NUM_CACHABLE_REGS 4 + +struct isl29003_data { + struct i2c_client *client; + struct mutex lock; + u8 reg_cache[ISL29003_NUM_CACHABLE_REGS]; + u8 power_state_before_suspend; +}; + +static int gain_range[] = { + 1000, 4000, 16000, 64000 +}; + +/* + * register access helpers + */ + +static int __isl29003_read_reg(struct i2c_client *client, + u32 reg, u8 mask, u8 shift) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + + return (data->reg_cache[reg] & mask) >> shift; +} + +static int __isl29003_write_reg(struct i2c_client *client, + u32 reg, u8 mask, u8 shift, u8 val) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + int ret = 0; + u8 tmp; + + if (reg >= ISL29003_NUM_CACHABLE_REGS) + return -EINVAL; + + mutex_lock(&data->lock); + + tmp = data->reg_cache[reg]; + tmp &= ~mask; + tmp |= val << shift; + + ret = i2c_smbus_write_byte_data(client, reg, tmp); + if (!ret) + data->reg_cache[reg] = tmp; + + mutex_unlock(&data->lock); + return ret; +} + +/* + * internally used functions + */ + +/* range */ +static int isl29003_get_range(struct i2c_client *client) +{ + return __isl29003_read_reg(client, ISL29003_REG_CONTROL, + ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT); +} + +static int isl29003_set_range(struct i2c_client *client, int range) +{ + return __isl29003_write_reg(client, ISL29003_REG_CONTROL, + ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT, range); +} + +/* resolution */ +static int isl29003_get_resolution(struct i2c_client *client) +{ + return __isl29003_read_reg(client, ISL29003_REG_COMMAND, + ISL29003_RES_MASK, ISL29003_RES_SHIFT); +} + +static int isl29003_set_resolution(struct i2c_client *client, int res) +{ + return __isl29003_write_reg(client, ISL29003_REG_COMMAND, + ISL29003_RES_MASK, ISL29003_RES_SHIFT, res); +} + +/* mode */ +static int isl29003_get_mode(struct i2c_client *client) +{ + return __isl29003_read_reg(client, ISL29003_REG_COMMAND, + ISL29003_RES_MASK, ISL29003_RES_SHIFT); +} + +static int isl29003_set_mode(struct i2c_client *client, int mode) +{ + return __isl29003_write_reg(client, ISL29003_REG_COMMAND, + ISL29003_RES_MASK, ISL29003_RES_SHIFT, mode); +} + +/* power_state */ +static int isl29003_set_power_state(struct i2c_client *client, int state) +{ + return __isl29003_write_reg(client, ISL29003_REG_COMMAND, + ISL29003_ADC_ENABLED | ISL29003_ADC_PD, 0, + state ? ISL29003_ADC_ENABLED : ISL29003_ADC_PD); +} + +static int isl29003_get_power_state(struct i2c_client *client) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + u8 cmdreg = data->reg_cache[ISL29003_REG_COMMAND]; + + return ~cmdreg & ISL29003_ADC_PD; +} + +static int isl29003_get_adc_value(struct i2c_client *client) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + int lsb, msb, range, bitdepth; + + mutex_lock(&data->lock); + lsb = i2c_smbus_read_byte_data(client, ISL29003_REG_LSB_SENSOR); + + if (lsb < 0) { + mutex_unlock(&data->lock); + return lsb; + } + + msb = i2c_smbus_read_byte_data(client, ISL29003_REG_MSB_SENSOR); + mutex_unlock(&data->lock); + + if (msb < 0) + return msb; + + range = isl29003_get_range(client); + bitdepth = (4 - isl29003_get_resolution(client)) * 4; + return (((msb << 8) | lsb) * gain_range[range]) >> bitdepth; +} + +/* + * sysfs layer + */ + +/* range */ +static ssize_t isl29003_show_range(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + + return sprintf(buf, "%i\n", isl29003_get_range(client)); +} + +static ssize_t isl29003_store_range(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + if (val > 3) + return -EINVAL; + + ret = isl29003_set_range(client, val); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(range, S_IWUSR | S_IRUGO, + isl29003_show_range, isl29003_store_range); + + +/* resolution */ +static ssize_t isl29003_show_resolution(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + + return sprintf(buf, "%d\n", isl29003_get_resolution(client)); +} + +static ssize_t isl29003_store_resolution(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + if (val > 3) + return -EINVAL; + + ret = isl29003_set_resolution(client, val); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(resolution, S_IWUSR | S_IRUGO, + isl29003_show_resolution, isl29003_store_resolution); + +/* mode */ +static ssize_t isl29003_show_mode(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + + return sprintf(buf, "%d\n", isl29003_get_mode(client)); +} + +static ssize_t isl29003_store_mode(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + if (val > 2) + return -EINVAL; + + ret = isl29003_set_mode(client, val); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, + isl29003_show_mode, isl29003_store_mode); + + +/* power state */ +static ssize_t isl29003_show_power_state(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + + return sprintf(buf, "%d\n", isl29003_get_power_state(client)); +} + +static ssize_t isl29003_store_power_state(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + if (val > 1) + return -EINVAL; + + ret = isl29003_set_power_state(client, val); + return ret ? ret : count; +} + +static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO, + isl29003_show_power_state, isl29003_store_power_state); + + +/* lux */ +static ssize_t isl29003_show_lux(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + + /* No LUX data if not operational */ + if (!isl29003_get_power_state(client)) + return -EBUSY; + + return sprintf(buf, "%d\n", isl29003_get_adc_value(client)); +} + +static DEVICE_ATTR(lux, S_IRUGO, isl29003_show_lux, NULL); + +static struct attribute *isl29003_attributes[] = { + &dev_attr_range.attr, + &dev_attr_resolution.attr, + &dev_attr_mode.attr, + &dev_attr_power_state.attr, + &dev_attr_lux.attr, + NULL +}; + +static const struct attribute_group isl29003_attr_group = { + .attrs = isl29003_attributes, +}; + +static int isl29003_init_client(struct i2c_client *client) +{ + struct isl29003_data *data = i2c_get_clientdata(client); + int i; + + /* read all the registers once to fill the cache. + * if one of the reads fails, we consider the init failed */ + for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++) { + int v = i2c_smbus_read_byte_data(client, i); + + if (v < 0) + return -ENODEV; + + data->reg_cache[i] = v; + } + + /* set defaults */ + isl29003_set_range(client, 0); + isl29003_set_resolution(client, 0); + isl29003_set_mode(client, 0); + isl29003_set_power_state(client, 0); + + return 0; +} + +/* + * I2C layer + */ + +static int isl29003_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + struct isl29003_data *data; + int err = 0; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE)) + return -EIO; + + data = kzalloc(sizeof(struct isl29003_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->client = client; + i2c_set_clientdata(client, data); + mutex_init(&data->lock); + + /* initialize the ISL29003 chip */ + err = isl29003_init_client(client); + if (err) + goto exit_kfree; + + /* register sysfs hooks */ + err = sysfs_create_group(&client->dev.kobj, &isl29003_attr_group); + if (err) + goto exit_kfree; + + dev_info(&client->dev, "driver version %s enabled\n", DRIVER_VERSION); + return 0; + +exit_kfree: + kfree(data); + return err; +} + +static int isl29003_remove(struct i2c_client *client) +{ + sysfs_remove_group(&client->dev.kobj, &isl29003_attr_group); + isl29003_set_power_state(client, 0); + kfree(i2c_get_clientdata(client)); + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int isl29003_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct isl29003_data *data = i2c_get_clientdata(client); + + data->power_state_before_suspend = isl29003_get_power_state(client); + return isl29003_set_power_state(client, 0); +} + +static int isl29003_resume(struct device *dev) +{ + int i; + struct i2c_client *client = to_i2c_client(dev); + struct isl29003_data *data = i2c_get_clientdata(client); + + /* restore registers from cache */ + for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++) + if (i2c_smbus_write_byte_data(client, i, data->reg_cache[i])) + return -EIO; + + return isl29003_set_power_state(client, + data->power_state_before_suspend); +} + +static SIMPLE_DEV_PM_OPS(isl29003_pm_ops, isl29003_suspend, isl29003_resume); +#define ISL29003_PM_OPS (&isl29003_pm_ops) + +#else +#define ISL29003_PM_OPS NULL +#endif /* CONFIG_PM_SLEEP */ + +static const struct i2c_device_id isl29003_id[] = { + { "isl29003", 0 }, + {} +}; +MODULE_DEVICE_TABLE(i2c, isl29003_id); + +static struct i2c_driver isl29003_driver = { + .driver = { + .name = ISL29003_DRV_NAME, + .pm = ISL29003_PM_OPS, + }, + .probe = isl29003_probe, + .remove = isl29003_remove, + .id_table = isl29003_id, +}; + +module_i2c_driver(isl29003_driver); + +MODULE_AUTHOR("Daniel Mack "); +MODULE_DESCRIPTION("ISL29003 ambient light sensor driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRIVER_VERSION); diff --git a/drivers/misc/isl29020.c b/drivers/misc/isl29020.c new file mode 100644 index 000000000..e3bd3c1d2 --- /dev/null +++ b/drivers/misc/isl29020.c @@ -0,0 +1,238 @@ +/* + * isl29020.c - Intersil ALS Driver + * + * Copyright (C) 2008 Intel Corp + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * Data sheet at: http://www.intersil.com/data/fn/fn6505.pdf + */ + +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_MUTEX(mutex); + +static ssize_t als_sensing_range_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + int val; + + val = i2c_smbus_read_byte_data(client, 0x00); + + if (val < 0) + return val; + return sprintf(buf, "%d000\n", 1 << (2 * (val & 3))); + +} + +static ssize_t als_lux_input_data_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + int ret_val, val; + unsigned long int lux; + int temp; + + pm_runtime_get_sync(dev); + msleep(100); + + mutex_lock(&mutex); + temp = i2c_smbus_read_byte_data(client, 0x02); /* MSB data */ + if (temp < 0) { + pm_runtime_put_sync(dev); + mutex_unlock(&mutex); + return temp; + } + + ret_val = i2c_smbus_read_byte_data(client, 0x01); /* LSB data */ + mutex_unlock(&mutex); + + if (ret_val < 0) { + pm_runtime_put_sync(dev); + return ret_val; + } + + ret_val |= temp << 8; + val = i2c_smbus_read_byte_data(client, 0x00); + pm_runtime_put_sync(dev); + if (val < 0) + return val; + lux = ((((1 << (2 * (val & 3))))*1000) * ret_val) / 65536; + return sprintf(buf, "%ld\n", lux); +} + +static ssize_t als_sensing_range_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + int ret_val; + unsigned long val; + + ret_val = kstrtoul(buf, 10, &val); + if (ret_val) + return ret_val; + + if (val < 1 || val > 64000) + return -EINVAL; + + /* Pick the smallest sensor range that will meet our requirements */ + if (val <= 1000) + val = 1; + else if (val <= 4000) + val = 2; + else if (val <= 16000) + val = 3; + else + val = 4; + + ret_val = i2c_smbus_read_byte_data(client, 0x00); + if (ret_val < 0) + return ret_val; + + ret_val &= 0xFC; /*reset the bit before setting them */ + ret_val |= val - 1; + ret_val = i2c_smbus_write_byte_data(client, 0x00, ret_val); + + if (ret_val < 0) + return ret_val; + return count; +} + +static void als_set_power_state(struct i2c_client *client, int enable) +{ + int ret_val; + + ret_val = i2c_smbus_read_byte_data(client, 0x00); + if (ret_val < 0) + return; + + if (enable) + ret_val |= 0x80; + else + ret_val &= 0x7F; + + i2c_smbus_write_byte_data(client, 0x00, ret_val); +} + +static DEVICE_ATTR(lux0_sensor_range, S_IRUGO | S_IWUSR, + als_sensing_range_show, als_sensing_range_store); +static DEVICE_ATTR(lux0_input, S_IRUGO, als_lux_input_data_show, NULL); + +static struct attribute *mid_att_als[] = { + &dev_attr_lux0_sensor_range.attr, + &dev_attr_lux0_input.attr, + NULL +}; + +static const struct attribute_group m_als_gr = { + .name = "isl29020", + .attrs = mid_att_als +}; + +static int als_set_default_config(struct i2c_client *client) +{ + int retval; + + retval = i2c_smbus_write_byte_data(client, 0x00, 0xc0); + if (retval < 0) { + dev_err(&client->dev, "default write failed."); + return retval; + } + return 0; +} + +static int isl29020_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int res; + + res = als_set_default_config(client); + if (res < 0) + return res; + + res = sysfs_create_group(&client->dev.kobj, &m_als_gr); + if (res) { + dev_err(&client->dev, "isl29020: device create file failed\n"); + return res; + } + dev_info(&client->dev, "%s isl29020: ALS chip found\n", client->name); + als_set_power_state(client, 0); + pm_runtime_enable(&client->dev); + return res; +} + +static int isl29020_remove(struct i2c_client *client) +{ + sysfs_remove_group(&client->dev.kobj, &m_als_gr); + return 0; +} + +static const struct i2c_device_id isl29020_id[] = { + { "isl29020", 0 }, + { } +}; + +MODULE_DEVICE_TABLE(i2c, isl29020_id); + +#ifdef CONFIG_PM + +static int isl29020_runtime_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + als_set_power_state(client, 0); + return 0; +} + +static int isl29020_runtime_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + als_set_power_state(client, 1); + return 0; +} + +static const struct dev_pm_ops isl29020_pm_ops = { + .runtime_suspend = isl29020_runtime_suspend, + .runtime_resume = isl29020_runtime_resume, +}; + +#define ISL29020_PM_OPS (&isl29020_pm_ops) +#else /* CONFIG_PM */ +#define ISL29020_PM_OPS NULL +#endif /* CONFIG_PM */ + +static struct i2c_driver isl29020_driver = { + .driver = { + .name = "isl29020", + .pm = ISL29020_PM_OPS, + }, + .probe = isl29020_probe, + .remove = isl29020_remove, + .id_table = isl29020_id, +}; + +module_i2c_driver(isl29020_driver); + +MODULE_AUTHOR("Kalhan Trisal "); +MODULE_DESCRIPTION("Intersil isl29020 ALS Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c new file mode 100644 index 000000000..bc4dc92af --- /dev/null +++ b/drivers/misc/kgdbts.c @@ -0,0 +1,1193 @@ +/* + * kgdbts is a test suite for kgdb for the sole purpose of validating + * that key pieces of the kgdb internals are working properly such as + * HW/SW breakpoints, single stepping, and NMI. + * + * Created by: Jason Wessel + * + * Copyright (c) 2008 Wind River Systems, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* Information about the kgdb test suite. + * ------------------------------------- + * + * The kgdb test suite is designed as a KGDB I/O module which + * simulates the communications that a debugger would have with kgdb. + * The tests are broken up in to a line by line and referenced here as + * a "get" which is kgdb requesting input and "put" which is kgdb + * sending a response. + * + * The kgdb suite can be invoked from the kernel command line + * arguments system or executed dynamically at run time. The test + * suite uses the variable "kgdbts" to obtain the information about + * which tests to run and to configure the verbosity level. The + * following are the various characters you can use with the kgdbts= + * line: + * + * When using the "kgdbts=" you only choose one of the following core + * test types: + * A = Run all the core tests silently + * V1 = Run all the core tests with minimal output + * V2 = Run all the core tests in debug mode + * + * You can also specify optional tests: + * N## = Go to sleep with interrupts of for ## seconds + * to test the HW NMI watchdog + * F## = Break at do_fork for ## iterations + * S## = Break at sys_open for ## iterations + * I## = Run the single step test ## iterations + * + * NOTE: that the do_fork and sys_open tests are mutually exclusive. + * + * To invoke the kgdb test suite from boot you use a kernel start + * argument as follows: + * kgdbts=V1 kgdbwait + * Or if you wanted to perform the NMI test for 6 seconds and do_fork + * test for 100 forks, you could use: + * kgdbts=V1N6F100 kgdbwait + * + * The test suite can also be invoked at run time with: + * echo kgdbts=V1N6F100 > /sys/module/kgdbts/parameters/kgdbts + * Or as another example: + * echo kgdbts=V2 > /sys/module/kgdbts/parameters/kgdbts + * + * When developing a new kgdb arch specific implementation or + * using these tests for the purpose of regression testing, + * several invocations are required. + * + * 1) Boot with the test suite enabled by using the kernel arguments + * "kgdbts=V1F100 kgdbwait" + * ## If kgdb arch specific implementation has NMI use + * "kgdbts=V1N6F100 + * + * 2) After the system boot run the basic test. + * echo kgdbts=V1 > /sys/module/kgdbts/parameters/kgdbts + * + * 3) Run the concurrency tests. It is best to use n+1 + * while loops where n is the number of cpus you have + * in your system. The example below uses only two + * loops. + * + * ## This tests break points on sys_open + * while [ 1 ] ; do find / > /dev/null 2>&1 ; done & + * while [ 1 ] ; do find / > /dev/null 2>&1 ; done & + * echo kgdbts=V1S10000 > /sys/module/kgdbts/parameters/kgdbts + * fg # and hit control-c + * fg # and hit control-c + * ## This tests break points on do_fork + * while [ 1 ] ; do date > /dev/null ; done & + * while [ 1 ] ; do date > /dev/null ; done & + * echo kgdbts=V1F1000 > /sys/module/kgdbts/parameters/kgdbts + * fg # and hit control-c + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define v1printk(a...) do { \ + if (verbose) \ + printk(KERN_INFO a); \ +} while (0) +#define v2printk(a...) do { \ + if (verbose > 1) { \ + printk(KERN_INFO a); \ + } \ + touch_nmi_watchdog(); \ +} while (0) +#define eprintk(a...) do { \ + printk(KERN_ERR a); \ + WARN_ON(1); \ +} while (0) +#define MAX_CONFIG_LEN 40 + +static struct kgdb_io kgdbts_io_ops; +static char get_buf[BUFMAX]; +static int get_buf_cnt; +static char put_buf[BUFMAX]; +static int put_buf_cnt; +static char scratch_buf[BUFMAX]; +static int verbose; +static int repeat_test; +static int test_complete; +static int send_ack; +static int final_ack; +static int force_hwbrks; +static int hwbreaks_ok; +static int hw_break_val; +static int hw_break_val2; +static int cont_instead_of_sstep; +static unsigned long cont_thread_id; +static unsigned long sstep_thread_id; +#if defined(CONFIG_ARM) || defined(CONFIG_MIPS) || defined(CONFIG_SPARC) +static int arch_needs_sstep_emulation = 1; +#else +static int arch_needs_sstep_emulation; +#endif +static unsigned long cont_addr; +static unsigned long sstep_addr; +static int restart_from_top_after_write; +static int sstep_state; + +/* Storage for the registers, in GDB format. */ +static unsigned long kgdbts_gdb_regs[(NUMREGBYTES + + sizeof(unsigned long) - 1) / + sizeof(unsigned long)]; +static struct pt_regs kgdbts_regs; + +/* -1 = init not run yet, 0 = unconfigured, 1 = configured. */ +static int configured = -1; + +#ifdef CONFIG_KGDB_TESTS_BOOT_STRING +static char config[MAX_CONFIG_LEN] = CONFIG_KGDB_TESTS_BOOT_STRING; +#else +static char config[MAX_CONFIG_LEN]; +#endif +static struct kparam_string kps = { + .string = config, + .maxlen = MAX_CONFIG_LEN, +}; + +static void fill_get_buf(char *buf); + +struct test_struct { + char *get; + char *put; + void (*get_handler)(char *); + int (*put_handler)(char *, char *); +}; + +struct test_state { + char *name; + struct test_struct *tst; + int idx; + int (*run_test) (int, int); + int (*validate_put) (char *); +}; + +static struct test_state ts; + +static int kgdbts_unreg_thread(void *ptr) +{ + /* Wait until the tests are complete and then ungresiter the I/O + * driver. + */ + while (!final_ack) + msleep_interruptible(1500); + /* Pause for any other threads to exit after final ack. */ + msleep_interruptible(1000); + if (configured) + kgdb_unregister_io_module(&kgdbts_io_ops); + configured = 0; + + return 0; +} + +/* This is noinline such that it can be used for a single location to + * place a breakpoint + */ +static noinline void kgdbts_break_test(void) +{ + v2printk("kgdbts: breakpoint complete\n"); +} + +/* Lookup symbol info in the kernel */ +static unsigned long lookup_addr(char *arg) +{ + unsigned long addr = 0; + + if (!strcmp(arg, "kgdbts_break_test")) + addr = (unsigned long)kgdbts_break_test; + else if (!strcmp(arg, "sys_open")) + addr = (unsigned long)do_sys_open; + else if (!strcmp(arg, "do_fork")) + addr = (unsigned long)_do_fork; + else if (!strcmp(arg, "hw_break_val")) + addr = (unsigned long)&hw_break_val; + addr = (unsigned long) dereference_function_descriptor((void *)addr); + return addr; +} + +static void break_helper(char *bp_type, char *arg, unsigned long vaddr) +{ + unsigned long addr; + + if (arg) + addr = lookup_addr(arg); + else + addr = vaddr; + + sprintf(scratch_buf, "%s,%lx,%i", bp_type, addr, + BREAK_INSTR_SIZE); + fill_get_buf(scratch_buf); +} + +static void sw_break(char *arg) +{ + break_helper(force_hwbrks ? "Z1" : "Z0", arg, 0); +} + +static void sw_rem_break(char *arg) +{ + break_helper(force_hwbrks ? "z1" : "z0", arg, 0); +} + +static void hw_break(char *arg) +{ + break_helper("Z1", arg, 0); +} + +static void hw_rem_break(char *arg) +{ + break_helper("z1", arg, 0); +} + +static void hw_write_break(char *arg) +{ + break_helper("Z2", arg, 0); +} + +static void hw_rem_write_break(char *arg) +{ + break_helper("z2", arg, 0); +} + +static void hw_access_break(char *arg) +{ + break_helper("Z4", arg, 0); +} + +static void hw_rem_access_break(char *arg) +{ + break_helper("z4", arg, 0); +} + +static void hw_break_val_access(void) +{ + hw_break_val2 = hw_break_val; +} + +static void hw_break_val_write(void) +{ + hw_break_val++; +} + +static int get_thread_id_continue(char *put_str, char *arg) +{ + char *ptr = &put_str[11]; + + if (put_str[1] != 'T' || put_str[2] != '0') + return 1; + kgdb_hex2long(&ptr, &cont_thread_id); + return 0; +} + +static int check_and_rewind_pc(char *put_str, char *arg) +{ + unsigned long addr = lookup_addr(arg); + unsigned long ip; + int offset = 0; + + kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs, + NUMREGBYTES); + gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs); + ip = instruction_pointer(&kgdbts_regs); + v2printk("Stopped at IP: %lx\n", ip); +#ifdef GDB_ADJUSTS_BREAK_OFFSET + /* On some arches, a breakpoint stop requires it to be decremented */ + if (addr + BREAK_INSTR_SIZE == ip) + offset = -BREAK_INSTR_SIZE; +#endif + + if (arch_needs_sstep_emulation && sstep_addr && + ip + offset == sstep_addr && + ((!strcmp(arg, "sys_open") || !strcmp(arg, "do_fork")))) { + /* This is special case for emulated single step */ + v2printk("Emul: rewind hit single step bp\n"); + restart_from_top_after_write = 1; + } else if (strcmp(arg, "silent") && ip + offset != addr) { + eprintk("kgdbts: BP mismatch %lx expected %lx\n", + ip + offset, addr); + return 1; + } + /* Readjust the instruction pointer if needed */ + ip += offset; + cont_addr = ip; +#ifdef GDB_ADJUSTS_BREAK_OFFSET + instruction_pointer_set(&kgdbts_regs, ip); +#endif + return 0; +} + +static int check_single_step(char *put_str, char *arg) +{ + unsigned long addr = lookup_addr(arg); + static int matched_id; + + /* + * From an arch indepent point of view the instruction pointer + * should be on a different instruction + */ + kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs, + NUMREGBYTES); + gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs); + v2printk("Singlestep stopped at IP: %lx\n", + instruction_pointer(&kgdbts_regs)); + + if (sstep_thread_id != cont_thread_id) { + /* + * Ensure we stopped in the same thread id as before, else the + * debugger should continue until the original thread that was + * single stepped is scheduled again, emulating gdb's behavior. + */ + v2printk("ThrID does not match: %lx\n", cont_thread_id); + if (arch_needs_sstep_emulation) { + if (matched_id && + instruction_pointer(&kgdbts_regs) != addr) + goto continue_test; + matched_id++; + ts.idx -= 2; + sstep_state = 0; + return 0; + } + cont_instead_of_sstep = 1; + ts.idx -= 4; + return 0; + } +continue_test: + matched_id = 0; + if (instruction_pointer(&kgdbts_regs) == addr) { + eprintk("kgdbts: SingleStep failed at %lx\n", + instruction_pointer(&kgdbts_regs)); + return 1; + } + + return 0; +} + +static void write_regs(char *arg) +{ + memset(scratch_buf, 0, sizeof(scratch_buf)); + scratch_buf[0] = 'G'; + pt_regs_to_gdb_regs(kgdbts_gdb_regs, &kgdbts_regs); + kgdb_mem2hex((char *)kgdbts_gdb_regs, &scratch_buf[1], NUMREGBYTES); + fill_get_buf(scratch_buf); +} + +static void skip_back_repeat_test(char *arg) +{ + int go_back = simple_strtol(arg, NULL, 10); + + repeat_test--; + if (repeat_test <= 0) { + ts.idx++; + } else { + if (repeat_test % 100 == 0) + v1printk("kgdbts:RUN ... %d remaining\n", repeat_test); + + ts.idx -= go_back; + } + fill_get_buf(ts.tst[ts.idx].get); +} + +static int got_break(char *put_str, char *arg) +{ + test_complete = 1; + if (!strncmp(put_str+1, arg, 2)) { + if (!strncmp(arg, "T0", 2)) + test_complete = 2; + return 0; + } + return 1; +} + +static void get_cont_catch(char *arg) +{ + /* Always send detach because the test is completed at this point */ + fill_get_buf("D"); +} + +static int put_cont_catch(char *put_str, char *arg) +{ + /* This is at the end of the test and we catch any and all input */ + v2printk("kgdbts: cleanup task: %lx\n", sstep_thread_id); + ts.idx--; + return 0; +} + +static int emul_reset(char *put_str, char *arg) +{ + if (strncmp(put_str, "$OK", 3)) + return 1; + if (restart_from_top_after_write) { + restart_from_top_after_write = 0; + ts.idx = -1; + } + return 0; +} + +static void emul_sstep_get(char *arg) +{ + if (!arch_needs_sstep_emulation) { + if (cont_instead_of_sstep) { + cont_instead_of_sstep = 0; + fill_get_buf("c"); + } else { + fill_get_buf(arg); + } + return; + } + switch (sstep_state) { + case 0: + v2printk("Emulate single step\n"); + /* Start by looking at the current PC */ + fill_get_buf("g"); + break; + case 1: + /* set breakpoint */ + break_helper("Z0", NULL, sstep_addr); + break; + case 2: + /* Continue */ + fill_get_buf("c"); + break; + case 3: + /* Clear breakpoint */ + break_helper("z0", NULL, sstep_addr); + break; + default: + eprintk("kgdbts: ERROR failed sstep get emulation\n"); + } + sstep_state++; +} + +static int emul_sstep_put(char *put_str, char *arg) +{ + if (!arch_needs_sstep_emulation) { + char *ptr = &put_str[11]; + if (put_str[1] != 'T' || put_str[2] != '0') + return 1; + kgdb_hex2long(&ptr, &sstep_thread_id); + return 0; + } + switch (sstep_state) { + case 1: + /* validate the "g" packet to get the IP */ + kgdb_hex2mem(&put_str[1], (char *)kgdbts_gdb_regs, + NUMREGBYTES); + gdb_regs_to_pt_regs(kgdbts_gdb_regs, &kgdbts_regs); + v2printk("Stopped at IP: %lx\n", + instruction_pointer(&kgdbts_regs)); + /* Want to stop at IP + break instruction size by default */ + sstep_addr = cont_addr + BREAK_INSTR_SIZE; + break; + case 2: + if (strncmp(put_str, "$OK", 3)) { + eprintk("kgdbts: failed sstep break set\n"); + return 1; + } + break; + case 3: + if (strncmp(put_str, "$T0", 3)) { + eprintk("kgdbts: failed continue sstep\n"); + return 1; + } else { + char *ptr = &put_str[11]; + kgdb_hex2long(&ptr, &sstep_thread_id); + } + break; + case 4: + if (strncmp(put_str, "$OK", 3)) { + eprintk("kgdbts: failed sstep break unset\n"); + return 1; + } + /* Single step is complete so continue on! */ + sstep_state = 0; + return 0; + default: + eprintk("kgdbts: ERROR failed sstep put emulation\n"); + } + + /* Continue on the same test line until emulation is complete */ + ts.idx--; + return 0; +} + +static int final_ack_set(char *put_str, char *arg) +{ + if (strncmp(put_str+1, arg, 2)) + return 1; + final_ack = 1; + return 0; +} +/* + * Test to plant a breakpoint and detach, which should clear out the + * breakpoint and restore the original instruction. + */ +static struct test_struct plant_and_detach_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */ + { "D", "OK" }, /* Detach */ + { "", "" }, +}; + +/* + * Simple test to write in a software breakpoint, check for the + * correct stop location and detach. + */ +static struct test_struct sw_breakpoint_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */ + { "c", "T0*", }, /* Continue */ + { "g", "kgdbts_break_test", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, + { "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */ + { "D", "OK" }, /* Detach */ + { "D", "OK", NULL, got_break }, /* On success we made it here */ + { "", "" }, +}; + +/* + * Test a known bad memory read location to test the fault handler and + * read bytes 1-8 at the bad address + */ +static struct test_struct bad_read_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "m0,1", "E*" }, /* read 1 byte at address 1 */ + { "m0,2", "E*" }, /* read 1 byte at address 2 */ + { "m0,3", "E*" }, /* read 1 byte at address 3 */ + { "m0,4", "E*" }, /* read 1 byte at address 4 */ + { "m0,5", "E*" }, /* read 1 byte at address 5 */ + { "m0,6", "E*" }, /* read 1 byte at address 6 */ + { "m0,7", "E*" }, /* read 1 byte at address 7 */ + { "m0,8", "E*" }, /* read 1 byte at address 8 */ + { "D", "OK" }, /* Detach which removes all breakpoints and continues */ + { "", "" }, +}; + +/* + * Test for hitting a breakpoint, remove it, single step, plant it + * again and detach. + */ +static struct test_struct singlestep_break_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */ + { "c", "T0*", NULL, get_thread_id_continue }, /* Continue */ + { "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */ + { "g", "kgdbts_break_test", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, /* Write registers */ + { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */ + { "g", "kgdbts_break_test", NULL, check_single_step }, + { "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */ + { "c", "T0*", }, /* Continue */ + { "g", "kgdbts_break_test", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, /* Write registers */ + { "D", "OK" }, /* Remove all breakpoints and continues */ + { "", "" }, +}; + +/* + * Test for hitting a breakpoint at do_fork for what ever the number + * of iterations required by the variable repeat_test. + */ +static struct test_struct do_fork_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "do_fork", "OK", sw_break, }, /* set sw breakpoint */ + { "c", "T0*", NULL, get_thread_id_continue }, /* Continue */ + { "do_fork", "OK", sw_rem_break }, /*remove breakpoint */ + { "g", "do_fork", NULL, check_and_rewind_pc }, /* check location */ + { "write", "OK", write_regs, emul_reset }, /* Write registers */ + { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */ + { "g", "do_fork", NULL, check_single_step }, + { "do_fork", "OK", sw_break, }, /* set sw breakpoint */ + { "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */ + { "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */ + { "", "", get_cont_catch, put_cont_catch }, +}; + +/* Test for hitting a breakpoint at sys_open for what ever the number + * of iterations required by the variable repeat_test. + */ +static struct test_struct sys_open_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "sys_open", "OK", sw_break, }, /* set sw breakpoint */ + { "c", "T0*", NULL, get_thread_id_continue }, /* Continue */ + { "sys_open", "OK", sw_rem_break }, /*remove breakpoint */ + { "g", "sys_open", NULL, check_and_rewind_pc }, /* check location */ + { "write", "OK", write_regs, emul_reset }, /* Write registers */ + { "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */ + { "g", "sys_open", NULL, check_single_step }, + { "sys_open", "OK", sw_break, }, /* set sw breakpoint */ + { "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */ + { "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */ + { "", "", get_cont_catch, put_cont_catch }, +}; + +/* + * Test for hitting a simple hw breakpoint + */ +static struct test_struct hw_breakpoint_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "kgdbts_break_test", "OK", hw_break, }, /* set hw breakpoint */ + { "c", "T0*", }, /* Continue */ + { "g", "kgdbts_break_test", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, + { "kgdbts_break_test", "OK", hw_rem_break }, /*remove breakpoint */ + { "D", "OK" }, /* Detach */ + { "D", "OK", NULL, got_break }, /* On success we made it here */ + { "", "" }, +}; + +/* + * Test for hitting a hw write breakpoint + */ +static struct test_struct hw_write_break_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "hw_break_val", "OK", hw_write_break, }, /* set hw breakpoint */ + { "c", "T0*", NULL, got_break }, /* Continue */ + { "g", "silent", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, + { "hw_break_val", "OK", hw_rem_write_break }, /*remove breakpoint */ + { "D", "OK" }, /* Detach */ + { "D", "OK", NULL, got_break }, /* On success we made it here */ + { "", "" }, +}; + +/* + * Test for hitting a hw access breakpoint + */ +static struct test_struct hw_access_break_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "hw_break_val", "OK", hw_access_break, }, /* set hw breakpoint */ + { "c", "T0*", NULL, got_break }, /* Continue */ + { "g", "silent", NULL, check_and_rewind_pc }, + { "write", "OK", write_regs }, + { "hw_break_val", "OK", hw_rem_access_break }, /*remove breakpoint */ + { "D", "OK" }, /* Detach */ + { "D", "OK", NULL, got_break }, /* On success we made it here */ + { "", "" }, +}; + +/* + * Test for hitting a hw access breakpoint + */ +static struct test_struct nmi_sleep_test[] = { + { "?", "S0*" }, /* Clear break points */ + { "c", "T0*", NULL, got_break }, /* Continue */ + { "D", "OK" }, /* Detach */ + { "D", "OK", NULL, got_break }, /* On success we made it here */ + { "", "" }, +}; + +static void fill_get_buf(char *buf) +{ + unsigned char checksum = 0; + int count = 0; + char ch; + + strcpy(get_buf, "$"); + strcat(get_buf, buf); + while ((ch = buf[count])) { + checksum += ch; + count++; + } + strcat(get_buf, "#"); + get_buf[count + 2] = hex_asc_hi(checksum); + get_buf[count + 3] = hex_asc_lo(checksum); + get_buf[count + 4] = '\0'; + v2printk("get%i: %s\n", ts.idx, get_buf); +} + +static int validate_simple_test(char *put_str) +{ + char *chk_str; + + if (ts.tst[ts.idx].put_handler) + return ts.tst[ts.idx].put_handler(put_str, + ts.tst[ts.idx].put); + + chk_str = ts.tst[ts.idx].put; + if (*put_str == '$') + put_str++; + + while (*chk_str != '\0' && *put_str != '\0') { + /* If someone does a * to match the rest of the string, allow + * it, or stop if the received string is complete. + */ + if (*put_str == '#' || *chk_str == '*') + return 0; + if (*put_str != *chk_str) + return 1; + + chk_str++; + put_str++; + } + if (*chk_str == '\0' && (*put_str == '\0' || *put_str == '#')) + return 0; + + return 1; +} + +static int run_simple_test(int is_get_char, int chr) +{ + int ret = 0; + if (is_get_char) { + /* Send an ACK on the get if a prior put completed and set the + * send ack variable + */ + if (send_ack) { + send_ack = 0; + return '+'; + } + /* On the first get char, fill the transmit buffer and then + * take from the get_string. + */ + if (get_buf_cnt == 0) { + if (ts.tst[ts.idx].get_handler) + ts.tst[ts.idx].get_handler(ts.tst[ts.idx].get); + else + fill_get_buf(ts.tst[ts.idx].get); + } + + if (get_buf[get_buf_cnt] == '\0') { + eprintk("kgdbts: ERROR GET: EOB on '%s' at %i\n", + ts.name, ts.idx); + get_buf_cnt = 0; + fill_get_buf("D"); + } + ret = get_buf[get_buf_cnt]; + get_buf_cnt++; + return ret; + } + + /* This callback is a put char which is when kgdb sends data to + * this I/O module. + */ + if (ts.tst[ts.idx].get[0] == '\0' && ts.tst[ts.idx].put[0] == '\0' && + !ts.tst[ts.idx].get_handler) { + eprintk("kgdbts: ERROR: beyond end of test on" + " '%s' line %i\n", ts.name, ts.idx); + return 0; + } + + if (put_buf_cnt >= BUFMAX) { + eprintk("kgdbts: ERROR: put buffer overflow on" + " '%s' line %i\n", ts.name, ts.idx); + put_buf_cnt = 0; + return 0; + } + /* Ignore everything until the first valid packet start '$' */ + if (put_buf_cnt == 0 && chr != '$') + return 0; + + put_buf[put_buf_cnt] = chr; + put_buf_cnt++; + + /* End of packet == #XX so look for the '#' */ + if (put_buf_cnt > 3 && put_buf[put_buf_cnt - 3] == '#') { + if (put_buf_cnt >= BUFMAX) { + eprintk("kgdbts: ERROR: put buffer overflow on" + " '%s' line %i\n", ts.name, ts.idx); + put_buf_cnt = 0; + return 0; + } + put_buf[put_buf_cnt] = '\0'; + v2printk("put%i: %s\n", ts.idx, put_buf); + /* Trigger check here */ + if (ts.validate_put && ts.validate_put(put_buf)) { + eprintk("kgdbts: ERROR PUT: end of test " + "buffer on '%s' line %i expected %s got %s\n", + ts.name, ts.idx, ts.tst[ts.idx].put, put_buf); + } + ts.idx++; + put_buf_cnt = 0; + get_buf_cnt = 0; + send_ack = 1; + } + return 0; +} + +static void init_simple_test(void) +{ + memset(&ts, 0, sizeof(ts)); + ts.run_test = run_simple_test; + ts.validate_put = validate_simple_test; +} + +static void run_plant_and_detach_test(int is_early) +{ + char before[BREAK_INSTR_SIZE]; + char after[BREAK_INSTR_SIZE]; + + probe_kernel_read(before, (char *)kgdbts_break_test, + BREAK_INSTR_SIZE); + init_simple_test(); + ts.tst = plant_and_detach_test; + ts.name = "plant_and_detach_test"; + /* Activate test with initial breakpoint */ + if (!is_early) + kgdb_breakpoint(); + probe_kernel_read(after, (char *)kgdbts_break_test, + BREAK_INSTR_SIZE); + if (memcmp(before, after, BREAK_INSTR_SIZE)) { + printk(KERN_CRIT "kgdbts: ERROR kgdb corrupted memory\n"); + panic("kgdb memory corruption"); + } + + /* complete the detach test */ + if (!is_early) + kgdbts_break_test(); +} + +static void run_breakpoint_test(int is_hw_breakpoint) +{ + test_complete = 0; + init_simple_test(); + if (is_hw_breakpoint) { + ts.tst = hw_breakpoint_test; + ts.name = "hw_breakpoint_test"; + } else { + ts.tst = sw_breakpoint_test; + ts.name = "sw_breakpoint_test"; + } + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); + /* run code with the break point in it */ + kgdbts_break_test(); + kgdb_breakpoint(); + + if (test_complete) + return; + + eprintk("kgdbts: ERROR %s test failed\n", ts.name); + if (is_hw_breakpoint) + hwbreaks_ok = 0; +} + +static void run_hw_break_test(int is_write_test) +{ + test_complete = 0; + init_simple_test(); + if (is_write_test) { + ts.tst = hw_write_break_test; + ts.name = "hw_write_break_test"; + } else { + ts.tst = hw_access_break_test; + ts.name = "hw_access_break_test"; + } + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); + hw_break_val_access(); + if (is_write_test) { + if (test_complete == 2) { + eprintk("kgdbts: ERROR %s broke on access\n", + ts.name); + hwbreaks_ok = 0; + } + hw_break_val_write(); + } + kgdb_breakpoint(); + + if (test_complete == 1) + return; + + eprintk("kgdbts: ERROR %s test failed\n", ts.name); + hwbreaks_ok = 0; +} + +static void run_nmi_sleep_test(int nmi_sleep) +{ + unsigned long flags; + + init_simple_test(); + ts.tst = nmi_sleep_test; + ts.name = "nmi_sleep_test"; + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); + local_irq_save(flags); + mdelay(nmi_sleep*1000); + touch_nmi_watchdog(); + local_irq_restore(flags); + if (test_complete != 2) + eprintk("kgdbts: ERROR nmi_test did not hit nmi\n"); + kgdb_breakpoint(); + if (test_complete == 1) + return; + + eprintk("kgdbts: ERROR %s test failed\n", ts.name); +} + +static void run_bad_read_test(void) +{ + init_simple_test(); + ts.tst = bad_read_test; + ts.name = "bad_read_test"; + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); +} + +static void run_do_fork_test(void) +{ + init_simple_test(); + ts.tst = do_fork_test; + ts.name = "do_fork_test"; + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); +} + +static void run_sys_open_test(void) +{ + init_simple_test(); + ts.tst = sys_open_test; + ts.name = "sys_open_test"; + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); +} + +static void run_singlestep_break_test(void) +{ + init_simple_test(); + ts.tst = singlestep_break_test; + ts.name = "singlestep_breakpoint_test"; + /* Activate test with initial breakpoint */ + kgdb_breakpoint(); + kgdbts_break_test(); + kgdbts_break_test(); +} + +static void kgdbts_run_tests(void) +{ + char *ptr; + int fork_test = 0; + int do_sys_open_test = 0; + int sstep_test = 1000; + int nmi_sleep = 0; + int i; + + verbose = 0; + if (strstr(config, "V1")) + verbose = 1; + if (strstr(config, "V2")) + verbose = 2; + + ptr = strchr(config, 'F'); + if (ptr) + fork_test = simple_strtol(ptr + 1, NULL, 10); + ptr = strchr(config, 'S'); + if (ptr) + do_sys_open_test = simple_strtol(ptr + 1, NULL, 10); + ptr = strchr(config, 'N'); + if (ptr) + nmi_sleep = simple_strtol(ptr+1, NULL, 10); + ptr = strchr(config, 'I'); + if (ptr) + sstep_test = simple_strtol(ptr+1, NULL, 10); + + /* All HW break point tests */ + if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT) { + hwbreaks_ok = 1; + v1printk("kgdbts:RUN hw breakpoint test\n"); + run_breakpoint_test(1); + v1printk("kgdbts:RUN hw write breakpoint test\n"); + run_hw_break_test(1); + v1printk("kgdbts:RUN access write breakpoint test\n"); + run_hw_break_test(0); + } + + /* required internal KGDB tests */ + v1printk("kgdbts:RUN plant and detach test\n"); + run_plant_and_detach_test(0); + v1printk("kgdbts:RUN sw breakpoint test\n"); + run_breakpoint_test(0); + v1printk("kgdbts:RUN bad memory access test\n"); + run_bad_read_test(); + v1printk("kgdbts:RUN singlestep test %i iterations\n", sstep_test); + for (i = 0; i < sstep_test; i++) { + run_singlestep_break_test(); + if (i % 100 == 0) + v1printk("kgdbts:RUN singlestep [%i/%i]\n", + i, sstep_test); + } + + /* ===Optional tests=== */ + + if (nmi_sleep) { + v1printk("kgdbts:RUN NMI sleep %i seconds test\n", nmi_sleep); + run_nmi_sleep_test(nmi_sleep); + } + + /* If the do_fork test is run it will be the last test that is + * executed because a kernel thread will be spawned at the very + * end to unregister the debug hooks. + */ + if (fork_test) { + repeat_test = fork_test; + printk(KERN_INFO "kgdbts:RUN do_fork for %i breakpoints\n", + repeat_test); + kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg"); + run_do_fork_test(); + return; + } + + /* If the sys_open test is run it will be the last test that is + * executed because a kernel thread will be spawned at the very + * end to unregister the debug hooks. + */ + if (do_sys_open_test) { + repeat_test = do_sys_open_test; + printk(KERN_INFO "kgdbts:RUN sys_open for %i breakpoints\n", + repeat_test); + kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg"); + run_sys_open_test(); + return; + } + /* Shutdown and unregister */ + kgdb_unregister_io_module(&kgdbts_io_ops); + configured = 0; +} + +static int kgdbts_option_setup(char *opt) +{ + if (strlen(opt) >= MAX_CONFIG_LEN) { + printk(KERN_ERR "kgdbts: config string too long\n"); + return 1; + } + strcpy(config, opt); + return 1; +} + +__setup("kgdbts=", kgdbts_option_setup); + +static int configure_kgdbts(void) +{ + int err = 0; + + if (!strlen(config) || isspace(config[0])) + goto noconfig; + + final_ack = 0; + run_plant_and_detach_test(1); + + err = kgdb_register_io_module(&kgdbts_io_ops); + if (err) { + configured = 0; + return err; + } + configured = 1; + kgdbts_run_tests(); + + return err; + +noconfig: + config[0] = 0; + configured = 0; + + return err; +} + +static int __init init_kgdbts(void) +{ + /* Already configured? */ + if (configured == 1) + return 0; + + return configure_kgdbts(); +} +device_initcall(init_kgdbts); + +static int kgdbts_get_char(void) +{ + int val = 0; + + if (ts.run_test) + val = ts.run_test(1, 0); + + return val; +} + +static void kgdbts_put_char(u8 chr) +{ + if (ts.run_test) + ts.run_test(0, chr); +} + +static int param_set_kgdbts_var(const char *kmessage, + const struct kernel_param *kp) +{ + size_t len = strlen(kmessage); + + if (len >= MAX_CONFIG_LEN) { + printk(KERN_ERR "kgdbts: config string too long\n"); + return -ENOSPC; + } + + /* Only copy in the string if the init function has not run yet */ + if (configured < 0) { + strcpy(config, kmessage); + return 0; + } + + if (configured == 1) { + printk(KERN_ERR "kgdbts: ERROR: Already configured and running.\n"); + return -EBUSY; + } + + strcpy(config, kmessage); + /* Chop out \n char as a result of echo */ + if (len && config[len - 1] == '\n') + config[len - 1] = '\0'; + + /* Go and configure with the new params. */ + return configure_kgdbts(); +} + +static void kgdbts_pre_exp_handler(void) +{ + /* Increment the module count when the debugger is active */ + if (!kgdb_connected) + try_module_get(THIS_MODULE); +} + +static void kgdbts_post_exp_handler(void) +{ + /* decrement the module count when the debugger detaches */ + if (!kgdb_connected) + module_put(THIS_MODULE); +} + +static struct kgdb_io kgdbts_io_ops = { + .name = "kgdbts", + .read_char = kgdbts_get_char, + .write_char = kgdbts_put_char, + .pre_exception = kgdbts_pre_exp_handler, + .post_exception = kgdbts_post_exp_handler, +}; + +/* + * not really modular, but the easiest way to keep compat with existing + * bootargs behaviour is to continue using module_param here. + */ +module_param_call(kgdbts, param_set_kgdbts_var, param_get_string, &kps, 0644); +MODULE_PARM_DESC(kgdbts, "[F#|S#][N#]"); diff --git a/drivers/misc/lattice-ecp3-config.c b/drivers/misc/lattice-ecp3-config.c new file mode 100644 index 000000000..645d26536 --- /dev/null +++ b/drivers/misc/lattice-ecp3-config.c @@ -0,0 +1,249 @@ +/* + * Copyright (C) 2012 Stefan Roese + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define FIRMWARE_NAME "lattice-ecp3.bit" + +/* + * The JTAG ID's of the supported FPGA's. The ID is 32bit wide + * reversed as noted in the manual. + */ +#define ID_ECP3_17 0xc2088080 +#define ID_ECP3_35 0xc2048080 + +/* FPGA commands */ +#define FPGA_CMD_READ_ID 0x07 /* plus 24 bits */ +#define FPGA_CMD_READ_STATUS 0x09 /* plus 24 bits */ +#define FPGA_CMD_CLEAR 0x70 +#define FPGA_CMD_REFRESH 0x71 +#define FPGA_CMD_WRITE_EN 0x4a /* plus 2 bits */ +#define FPGA_CMD_WRITE_DIS 0x4f /* plus 8 bits */ +#define FPGA_CMD_WRITE_INC 0x41 /* plus 0 bits */ + +/* + * The status register is 32bit revered, DONE is bit 17 from the TN1222.pdf + * (LatticeECP3 Slave SPI Port User's Guide) + */ +#define FPGA_STATUS_DONE 0x00004000 +#define FPGA_STATUS_CLEARED 0x00010000 + +#define FPGA_CLEAR_TIMEOUT 5000 /* max. 5000ms for FPGA clear */ +#define FPGA_CLEAR_MSLEEP 10 +#define FPGA_CLEAR_LOOP_COUNT (FPGA_CLEAR_TIMEOUT / FPGA_CLEAR_MSLEEP) + +struct fpga_data { + struct completion fw_loaded; +}; + +struct ecp3_dev { + u32 jedec_id; + char *name; +}; + +static const struct ecp3_dev ecp3_dev[] = { + { + .jedec_id = ID_ECP3_17, + .name = "Lattice ECP3-17", + }, + { + .jedec_id = ID_ECP3_35, + .name = "Lattice ECP3-35", + }, +}; + +static void firmware_load(const struct firmware *fw, void *context) +{ + struct spi_device *spi = (struct spi_device *)context; + struct fpga_data *data = spi_get_drvdata(spi); + u8 *buffer; + int ret; + u8 txbuf[8]; + u8 rxbuf[8]; + int rx_len = 8; + int i; + u32 jedec_id; + u32 status; + + if (fw == NULL) { + dev_err(&spi->dev, "Cannot load firmware, aborting\n"); + goto out; + } + + if (fw->size == 0) { + dev_err(&spi->dev, "Error: Firmware size is 0!\n"); + goto out; + } + + /* Fill dummy data (24 stuffing bits for commands) */ + txbuf[1] = 0x00; + txbuf[2] = 0x00; + txbuf[3] = 0x00; + + /* Trying to speak with the FPGA via SPI... */ + txbuf[0] = FPGA_CMD_READ_ID; + ret = spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len); + jedec_id = get_unaligned_be32(&rxbuf[4]); + dev_dbg(&spi->dev, "FPGA JTAG ID=%08x\n", jedec_id); + + for (i = 0; i < ARRAY_SIZE(ecp3_dev); i++) { + if (jedec_id == ecp3_dev[i].jedec_id) + break; + } + if (i == ARRAY_SIZE(ecp3_dev)) { + dev_err(&spi->dev, + "Error: No supported FPGA detected (JEDEC_ID=%08x)!\n", + jedec_id); + goto out; + } + + dev_info(&spi->dev, "FPGA %s detected\n", ecp3_dev[i].name); + + txbuf[0] = FPGA_CMD_READ_STATUS; + ret = spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len); + status = get_unaligned_be32(&rxbuf[4]); + dev_dbg(&spi->dev, "FPGA Status=%08x\n", status); + + buffer = kzalloc(fw->size + 8, GFP_KERNEL); + if (!buffer) { + dev_err(&spi->dev, "Error: Can't allocate memory!\n"); + goto out; + } + + /* + * Insert WRITE_INC command into stream (one SPI frame) + */ + buffer[0] = FPGA_CMD_WRITE_INC; + buffer[1] = 0xff; + buffer[2] = 0xff; + buffer[3] = 0xff; + memcpy(buffer + 4, fw->data, fw->size); + + txbuf[0] = FPGA_CMD_REFRESH; + ret = spi_write(spi, txbuf, 4); + + txbuf[0] = FPGA_CMD_WRITE_EN; + ret = spi_write(spi, txbuf, 4); + + txbuf[0] = FPGA_CMD_CLEAR; + ret = spi_write(spi, txbuf, 4); + + /* + * Wait for FPGA memory to become cleared + */ + for (i = 0; i < FPGA_CLEAR_LOOP_COUNT; i++) { + txbuf[0] = FPGA_CMD_READ_STATUS; + ret = spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len); + status = get_unaligned_be32(&rxbuf[4]); + if (status == FPGA_STATUS_CLEARED) + break; + + msleep(FPGA_CLEAR_MSLEEP); + } + + if (i == FPGA_CLEAR_LOOP_COUNT) { + dev_err(&spi->dev, + "Error: Timeout waiting for FPGA to clear (status=%08x)!\n", + status); + kfree(buffer); + goto out; + } + + dev_info(&spi->dev, "Configuring the FPGA...\n"); + ret = spi_write(spi, buffer, fw->size + 8); + + txbuf[0] = FPGA_CMD_WRITE_DIS; + ret = spi_write(spi, txbuf, 4); + + txbuf[0] = FPGA_CMD_READ_STATUS; + ret = spi_write_then_read(spi, txbuf, 8, rxbuf, rx_len); + status = get_unaligned_be32(&rxbuf[4]); + dev_dbg(&spi->dev, "FPGA Status=%08x\n", status); + + /* Check result */ + if (status & FPGA_STATUS_DONE) + dev_info(&spi->dev, "FPGA successfully configured!\n"); + else + dev_info(&spi->dev, "FPGA not configured (DONE not set)\n"); + + /* + * Don't forget to release the firmware again + */ + release_firmware(fw); + + kfree(buffer); +out: + complete(&data->fw_loaded); +} + +static int lattice_ecp3_probe(struct spi_device *spi) +{ + struct fpga_data *data; + int err; + + data = devm_kzalloc(&spi->dev, sizeof(*data), GFP_KERNEL); + if (!data) { + dev_err(&spi->dev, "Memory allocation for fpga_data failed\n"); + return -ENOMEM; + } + spi_set_drvdata(spi, data); + + init_completion(&data->fw_loaded); + err = request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG, + FIRMWARE_NAME, &spi->dev, + GFP_KERNEL, spi, firmware_load); + if (err) { + dev_err(&spi->dev, "Firmware loading failed with %d!\n", err); + return err; + } + + dev_info(&spi->dev, "FPGA bitstream configuration driver registered\n"); + + return 0; +} + +static int lattice_ecp3_remove(struct spi_device *spi) +{ + struct fpga_data *data = spi_get_drvdata(spi); + + wait_for_completion(&data->fw_loaded); + + return 0; +} + +static const struct spi_device_id lattice_ecp3_id[] = { + { "ecp3-17", 0 }, + { "ecp3-35", 0 }, + { } +}; +MODULE_DEVICE_TABLE(spi, lattice_ecp3_id); + +static struct spi_driver lattice_ecp3_driver = { + .driver = { + .name = "lattice-ecp3", + }, + .probe = lattice_ecp3_probe, + .remove = lattice_ecp3_remove, + .id_table = lattice_ecp3_id, +}; + +module_spi_driver(lattice_ecp3_driver); + +MODULE_AUTHOR("Stefan Roese "); +MODULE_DESCRIPTION("Lattice ECP3 FPGA configuration via SPI"); +MODULE_LICENSE("GPL"); +MODULE_FIRMWARE(FIRMWARE_NAME); diff --git a/drivers/misc/lis3lv02d/Kconfig b/drivers/misc/lis3lv02d/Kconfig new file mode 100644 index 000000000..8f474e6fc --- /dev/null +++ b/drivers/misc/lis3lv02d/Kconfig @@ -0,0 +1,37 @@ +# +# STMicroelectonics LIS3LV02D and similar accelerometers +# + +config SENSORS_LIS3_SPI + tristate "STMicroeletronics LIS3LV02Dx three-axis digital accelerometer (SPI)" + depends on !ACPI && SPI_MASTER && INPUT + select SENSORS_LIS3LV02D + default n + help + This driver provides support for the LIS3LV02Dx accelerometer connected + via SPI. The accelerometer data is readable via + /sys/devices/platform/lis3lv02d. + + This driver also provides an absolute input class device, allowing + the laptop to act as a pinball machine-esque joystick. + + This driver can also be built as modules. If so, the core module + will be called lis3lv02d and a specific module for the SPI transport + is called lis3lv02d_spi. + +config SENSORS_LIS3_I2C + tristate "STMicroeletronics LIS3LV02Dx three-axis digital accelerometer (I2C)" + depends on I2C && INPUT + select SENSORS_LIS3LV02D + default n + help + This driver provides support for the LIS3LV02Dx accelerometer connected + via I2C. The accelerometer data is readable via + /sys/devices/platform/lis3lv02d. + + This driver also provides an absolute input class device, allowing + the device to act as a pinball machine-esque joystick. + + This driver can also be built as modules. If so, the core module + will be called lis3lv02d and a specific module for the I2C transport + is called lis3lv02d_i2c. diff --git a/drivers/misc/lis3lv02d/Makefile b/drivers/misc/lis3lv02d/Makefile new file mode 100644 index 000000000..4bf58b16f --- /dev/null +++ b/drivers/misc/lis3lv02d/Makefile @@ -0,0 +1,7 @@ +# +# STMicroelectonics LIS3LV02D and similar accelerometers +# + +obj-$(CONFIG_SENSORS_LIS3LV02D) += lis3lv02d.o +obj-$(CONFIG_SENSORS_LIS3_SPI) += lis3lv02d_spi.o +obj-$(CONFIG_SENSORS_LIS3_I2C) += lis3lv02d_i2c.o diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c new file mode 100644 index 000000000..21ac34b38 --- /dev/null +++ b/drivers/misc/lis3lv02d/lis3lv02d.c @@ -0,0 +1,1274 @@ +/* + * lis3lv02d.c - ST LIS3LV02DL accelerometer driver + * + * Copyright (C) 2007-2008 Yan Burman + * Copyright (C) 2008 Eric Piel + * Copyright (C) 2008-2009 Pavel Machek + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "lis3lv02d.h" + +#define DRIVER_NAME "lis3lv02d" + +/* joystick device poll interval in milliseconds */ +#define MDPS_POLL_INTERVAL 50 +#define MDPS_POLL_MIN 0 +#define MDPS_POLL_MAX 2000 + +#define LIS3_SYSFS_POWERDOWN_DELAY 5000 /* In milliseconds */ + +#define SELFTEST_OK 0 +#define SELFTEST_FAIL -1 +#define SELFTEST_IRQ -2 + +#define IRQ_LINE0 0 +#define IRQ_LINE1 1 + +/* + * The sensor can also generate interrupts (DRDY) but it's pretty pointless + * because they are generated even if the data do not change. So it's better + * to keep the interrupt for the free-fall event. The values are updated at + * 40Hz (at the lowest frequency), but as it can be pretty time consuming on + * some low processor, we poll the sensor only at 20Hz... enough for the + * joystick. + */ + +#define LIS3_PWRON_DELAY_WAI_12B (5000) +#define LIS3_PWRON_DELAY_WAI_8B (3000) + +/* + * LIS3LV02D spec says 1024 LSBs corresponds 1 G -> 1LSB is 1000/1024 mG + * LIS302D spec says: 18 mG / digit + * LIS3_ACCURACY is used to increase accuracy of the intermediate + * calculation results. + */ +#define LIS3_ACCURACY 1024 +/* Sensitivity values for -2G +2G scale */ +#define LIS3_SENSITIVITY_12B ((LIS3_ACCURACY * 1000) / 1024) +#define LIS3_SENSITIVITY_8B (18 * LIS3_ACCURACY) + +/* + * LIS331DLH spec says 1LSBs corresponds 4G/4096 -> 1LSB is 1000/1024 mG. + * Below macros defines sensitivity values for +/-2G. Dataout bits for + * +/-2G range is 12 bits so 4 bits adjustment must be done to get 12bit + * data from 16bit value. Currently this driver supports only 2G range. + */ +#define LIS3DLH_SENSITIVITY_2G ((LIS3_ACCURACY * 1000) / 1024) +#define SHIFT_ADJ_2G 4 + +#define LIS3_DEFAULT_FUZZ_12B 3 +#define LIS3_DEFAULT_FLAT_12B 3 +#define LIS3_DEFAULT_FUZZ_8B 1 +#define LIS3_DEFAULT_FLAT_8B 1 + +struct lis3lv02d lis3_dev = { + .misc_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lis3_dev.misc_wait), +}; +EXPORT_SYMBOL_GPL(lis3_dev); + +/* just like param_set_int() but does sanity-check so that it won't point + * over the axis array size + */ +static int param_set_axis(const char *val, const struct kernel_param *kp) +{ + int ret = param_set_int(val, kp); + if (!ret) { + int val = *(int *)kp->arg; + if (val < 0) + val = -val; + if (!val || val > 3) + return -EINVAL; + } + return ret; +} + +static const struct kernel_param_ops param_ops_axis = { + .set = param_set_axis, + .get = param_get_int, +}; + +#define param_check_axis(name, p) param_check_int(name, p) + +module_param_array_named(axes, lis3_dev.ac.as_array, axis, NULL, 0644); +MODULE_PARM_DESC(axes, "Axis-mapping for x,y,z directions"); + +static s16 lis3lv02d_read_8(struct lis3lv02d *lis3, int reg) +{ + s8 lo; + if (lis3->read(lis3, reg, &lo) < 0) + return 0; + + return lo; +} + +static s16 lis3lv02d_read_12(struct lis3lv02d *lis3, int reg) +{ + u8 lo, hi; + + lis3->read(lis3, reg - 1, &lo); + lis3->read(lis3, reg, &hi); + /* In "12 bit right justified" mode, bit 6, bit 7, bit 8 = bit 5 */ + return (s16)((hi << 8) | lo); +} + +/* 12bits for 2G range, 13 bits for 4G range and 14 bits for 8G range */ +static s16 lis331dlh_read_data(struct lis3lv02d *lis3, int reg) +{ + u8 lo, hi; + int v; + + lis3->read(lis3, reg - 1, &lo); + lis3->read(lis3, reg, &hi); + v = (int) ((hi << 8) | lo); + + return (s16) v >> lis3->shift_adj; +} + +/** + * lis3lv02d_get_axis - For the given axis, give the value converted + * @axis: 1,2,3 - can also be negative + * @hw_values: raw values returned by the hardware + * + * Returns the converted value. + */ +static inline int lis3lv02d_get_axis(s8 axis, int hw_values[3]) +{ + if (axis > 0) + return hw_values[axis - 1]; + else + return -hw_values[-axis - 1]; +} + +/** + * lis3lv02d_get_xyz - Get X, Y and Z axis values from the accelerometer + * @lis3: pointer to the device struct + * @x: where to store the X axis value + * @y: where to store the Y axis value + * @z: where to store the Z axis value + * + * Note that 40Hz input device can eat up about 10% CPU at 800MHZ + */ +static void lis3lv02d_get_xyz(struct lis3lv02d *lis3, int *x, int *y, int *z) +{ + int position[3]; + int i; + + if (lis3->blkread) { + if (lis3->whoami == WAI_12B) { + u16 data[3]; + lis3->blkread(lis3, OUTX_L, 6, (u8 *)data); + for (i = 0; i < 3; i++) + position[i] = (s16)le16_to_cpu(data[i]); + } else { + u8 data[5]; + /* Data: x, dummy, y, dummy, z */ + lis3->blkread(lis3, OUTX, 5, data); + for (i = 0; i < 3; i++) + position[i] = (s8)data[i * 2]; + } + } else { + position[0] = lis3->read_data(lis3, OUTX); + position[1] = lis3->read_data(lis3, OUTY); + position[2] = lis3->read_data(lis3, OUTZ); + } + + for (i = 0; i < 3; i++) + position[i] = (position[i] * lis3->scale) / LIS3_ACCURACY; + + *x = lis3lv02d_get_axis(lis3->ac.x, position); + *y = lis3lv02d_get_axis(lis3->ac.y, position); + *z = lis3lv02d_get_axis(lis3->ac.z, position); +} + +/* conversion btw sampling rate and the register values */ +static int lis3_12_rates[4] = {40, 160, 640, 2560}; +static int lis3_8_rates[2] = {100, 400}; +static int lis3_3dc_rates[16] = {0, 1, 10, 25, 50, 100, 200, 400, 1600, 5000}; +static int lis3_3dlh_rates[4] = {50, 100, 400, 1000}; + +/* ODR is Output Data Rate */ +static int lis3lv02d_get_odr_index(struct lis3lv02d *lis3) +{ + u8 ctrl; + int shift; + + lis3->read(lis3, CTRL_REG1, &ctrl); + ctrl &= lis3->odr_mask; + shift = ffs(lis3->odr_mask) - 1; + return (ctrl >> shift); +} + +static int lis3lv02d_get_pwron_wait(struct lis3lv02d *lis3) +{ + int odr_idx = lis3lv02d_get_odr_index(lis3); + int div = lis3->odrs[odr_idx]; + + if (div == 0) { + if (odr_idx == 0) { + /* Power-down mode, not sampling no need to sleep */ + return 0; + } + + dev_err(&lis3->pdev->dev, "Error unknown odrs-index: %d\n", odr_idx); + return -ENXIO; + } + + /* LIS3 power on delay is quite long */ + msleep(lis3->pwron_delay / div); + return 0; +} + +static int lis3lv02d_set_odr(struct lis3lv02d *lis3, int rate) +{ + u8 ctrl; + int i, len, shift; + + if (!rate) + return -EINVAL; + + lis3->read(lis3, CTRL_REG1, &ctrl); + ctrl &= ~lis3->odr_mask; + len = 1 << hweight_long(lis3->odr_mask); /* # of possible values */ + shift = ffs(lis3->odr_mask) - 1; + + for (i = 0; i < len; i++) + if (lis3->odrs[i] == rate) { + lis3->write(lis3, CTRL_REG1, + ctrl | (i << shift)); + return 0; + } + return -EINVAL; +} + +static int lis3lv02d_selftest(struct lis3lv02d *lis3, s16 results[3]) +{ + u8 ctlreg, reg; + s16 x, y, z; + u8 selftest; + int ret; + u8 ctrl_reg_data; + unsigned char irq_cfg; + + mutex_lock(&lis3->mutex); + + irq_cfg = lis3->irq_cfg; + if (lis3->whoami == WAI_8B) { + lis3->data_ready_count[IRQ_LINE0] = 0; + lis3->data_ready_count[IRQ_LINE1] = 0; + + /* Change interrupt cfg to data ready for selftest */ + atomic_inc(&lis3->wake_thread); + lis3->irq_cfg = LIS3_IRQ1_DATA_READY | LIS3_IRQ2_DATA_READY; + lis3->read(lis3, CTRL_REG3, &ctrl_reg_data); + lis3->write(lis3, CTRL_REG3, (ctrl_reg_data & + ~(LIS3_IRQ1_MASK | LIS3_IRQ2_MASK)) | + (LIS3_IRQ1_DATA_READY | LIS3_IRQ2_DATA_READY)); + } + + if ((lis3->whoami == WAI_3DC) || (lis3->whoami == WAI_3DLH)) { + ctlreg = CTRL_REG4; + selftest = CTRL4_ST0; + } else { + ctlreg = CTRL_REG1; + if (lis3->whoami == WAI_12B) + selftest = CTRL1_ST; + else + selftest = CTRL1_STP; + } + + lis3->read(lis3, ctlreg, ®); + lis3->write(lis3, ctlreg, (reg | selftest)); + ret = lis3lv02d_get_pwron_wait(lis3); + if (ret) + goto fail; + + /* Read directly to avoid axis remap */ + x = lis3->read_data(lis3, OUTX); + y = lis3->read_data(lis3, OUTY); + z = lis3->read_data(lis3, OUTZ); + + /* back to normal settings */ + lis3->write(lis3, ctlreg, reg); + ret = lis3lv02d_get_pwron_wait(lis3); + if (ret) + goto fail; + + results[0] = x - lis3->read_data(lis3, OUTX); + results[1] = y - lis3->read_data(lis3, OUTY); + results[2] = z - lis3->read_data(lis3, OUTZ); + + ret = 0; + + if (lis3->whoami == WAI_8B) { + /* Restore original interrupt configuration */ + atomic_dec(&lis3->wake_thread); + lis3->write(lis3, CTRL_REG3, ctrl_reg_data); + lis3->irq_cfg = irq_cfg; + + if ((irq_cfg & LIS3_IRQ1_MASK) && + lis3->data_ready_count[IRQ_LINE0] < 2) { + ret = SELFTEST_IRQ; + goto fail; + } + + if ((irq_cfg & LIS3_IRQ2_MASK) && + lis3->data_ready_count[IRQ_LINE1] < 2) { + ret = SELFTEST_IRQ; + goto fail; + } + } + + if (lis3->pdata) { + int i; + for (i = 0; i < 3; i++) { + /* Check against selftest acceptance limits */ + if ((results[i] < lis3->pdata->st_min_limits[i]) || + (results[i] > lis3->pdata->st_max_limits[i])) { + ret = SELFTEST_FAIL; + goto fail; + } + } + } + + /* test passed */ +fail: + mutex_unlock(&lis3->mutex); + return ret; +} + +/* + * Order of registers in the list affects to order of the restore process. + * Perhaps it is a good idea to set interrupt enable register as a last one + * after all other configurations + */ +static u8 lis3_wai8_regs[] = { FF_WU_CFG_1, FF_WU_THS_1, FF_WU_DURATION_1, + FF_WU_CFG_2, FF_WU_THS_2, FF_WU_DURATION_2, + CLICK_CFG, CLICK_SRC, CLICK_THSY_X, CLICK_THSZ, + CLICK_TIMELIMIT, CLICK_LATENCY, CLICK_WINDOW, + CTRL_REG1, CTRL_REG2, CTRL_REG3}; + +static u8 lis3_wai12_regs[] = {FF_WU_CFG, FF_WU_THS_L, FF_WU_THS_H, + FF_WU_DURATION, DD_CFG, DD_THSI_L, DD_THSI_H, + DD_THSE_L, DD_THSE_H, + CTRL_REG1, CTRL_REG3, CTRL_REG2}; + +static inline void lis3_context_save(struct lis3lv02d *lis3) +{ + int i; + for (i = 0; i < lis3->regs_size; i++) + lis3->read(lis3, lis3->regs[i], &lis3->reg_cache[i]); + lis3->regs_stored = true; +} + +static inline void lis3_context_restore(struct lis3lv02d *lis3) +{ + int i; + if (lis3->regs_stored) + for (i = 0; i < lis3->regs_size; i++) + lis3->write(lis3, lis3->regs[i], lis3->reg_cache[i]); +} + +void lis3lv02d_poweroff(struct lis3lv02d *lis3) +{ + if (lis3->reg_ctrl) + lis3_context_save(lis3); + /* disable X,Y,Z axis and power down */ + lis3->write(lis3, CTRL_REG1, 0x00); + if (lis3->reg_ctrl) + lis3->reg_ctrl(lis3, LIS3_REG_OFF); +} +EXPORT_SYMBOL_GPL(lis3lv02d_poweroff); + +int lis3lv02d_poweron(struct lis3lv02d *lis3) +{ + int err; + u8 reg; + + lis3->init(lis3); + + /* + * Common configuration + * BDU: (12 bits sensors only) LSB and MSB values are not updated until + * both have been read. So the value read will always be correct. + * Set BOOT bit to refresh factory tuning values. + */ + if (lis3->pdata) { + lis3->read(lis3, CTRL_REG2, ®); + if (lis3->whoami == WAI_12B) + reg |= CTRL2_BDU | CTRL2_BOOT; + else if (lis3->whoami == WAI_3DLH) + reg |= CTRL2_BOOT_3DLH; + else + reg |= CTRL2_BOOT_8B; + lis3->write(lis3, CTRL_REG2, reg); + + if (lis3->whoami == WAI_3DLH) { + lis3->read(lis3, CTRL_REG4, ®); + reg |= CTRL4_BDU; + lis3->write(lis3, CTRL_REG4, reg); + } + } + + err = lis3lv02d_get_pwron_wait(lis3); + if (err) + return err; + + if (lis3->reg_ctrl) + lis3_context_restore(lis3); + + return 0; +} +EXPORT_SYMBOL_GPL(lis3lv02d_poweron); + + +static void lis3lv02d_joystick_poll(struct input_polled_dev *pidev) +{ + struct lis3lv02d *lis3 = pidev->private; + int x, y, z; + + mutex_lock(&lis3->mutex); + lis3lv02d_get_xyz(lis3, &x, &y, &z); + input_report_abs(pidev->input, ABS_X, x); + input_report_abs(pidev->input, ABS_Y, y); + input_report_abs(pidev->input, ABS_Z, z); + input_sync(pidev->input); + mutex_unlock(&lis3->mutex); +} + +static void lis3lv02d_joystick_open(struct input_polled_dev *pidev) +{ + struct lis3lv02d *lis3 = pidev->private; + + if (lis3->pm_dev) + pm_runtime_get_sync(lis3->pm_dev); + + if (lis3->pdata && lis3->whoami == WAI_8B && lis3->idev) + atomic_set(&lis3->wake_thread, 1); + /* + * Update coordinates for the case where poll interval is 0 and + * the chip in running purely under interrupt control + */ + lis3lv02d_joystick_poll(pidev); +} + +static void lis3lv02d_joystick_close(struct input_polled_dev *pidev) +{ + struct lis3lv02d *lis3 = pidev->private; + + atomic_set(&lis3->wake_thread, 0); + if (lis3->pm_dev) + pm_runtime_put(lis3->pm_dev); +} + +static irqreturn_t lis302dl_interrupt(int irq, void *data) +{ + struct lis3lv02d *lis3 = data; + + if (!test_bit(0, &lis3->misc_opened)) + goto out; + + /* + * Be careful: on some HP laptops the bios force DD when on battery and + * the lid is closed. This leads to interrupts as soon as a little move + * is done. + */ + atomic_inc(&lis3->count); + + wake_up_interruptible(&lis3->misc_wait); + kill_fasync(&lis3->async_queue, SIGIO, POLL_IN); +out: + if (atomic_read(&lis3->wake_thread)) + return IRQ_WAKE_THREAD; + return IRQ_HANDLED; +} + +static void lis302dl_interrupt_handle_click(struct lis3lv02d *lis3) +{ + struct input_dev *dev = lis3->idev->input; + u8 click_src; + + mutex_lock(&lis3->mutex); + lis3->read(lis3, CLICK_SRC, &click_src); + + if (click_src & CLICK_SINGLE_X) { + input_report_key(dev, lis3->mapped_btns[0], 1); + input_report_key(dev, lis3->mapped_btns[0], 0); + } + + if (click_src & CLICK_SINGLE_Y) { + input_report_key(dev, lis3->mapped_btns[1], 1); + input_report_key(dev, lis3->mapped_btns[1], 0); + } + + if (click_src & CLICK_SINGLE_Z) { + input_report_key(dev, lis3->mapped_btns[2], 1); + input_report_key(dev, lis3->mapped_btns[2], 0); + } + input_sync(dev); + mutex_unlock(&lis3->mutex); +} + +static inline void lis302dl_data_ready(struct lis3lv02d *lis3, int index) +{ + int dummy; + + /* Dummy read to ack interrupt */ + lis3lv02d_get_xyz(lis3, &dummy, &dummy, &dummy); + lis3->data_ready_count[index]++; +} + +static irqreturn_t lis302dl_interrupt_thread1_8b(int irq, void *data) +{ + struct lis3lv02d *lis3 = data; + u8 irq_cfg = lis3->irq_cfg & LIS3_IRQ1_MASK; + + if (irq_cfg == LIS3_IRQ1_CLICK) + lis302dl_interrupt_handle_click(lis3); + else if (unlikely(irq_cfg == LIS3_IRQ1_DATA_READY)) + lis302dl_data_ready(lis3, IRQ_LINE0); + else + lis3lv02d_joystick_poll(lis3->idev); + + return IRQ_HANDLED; +} + +static irqreturn_t lis302dl_interrupt_thread2_8b(int irq, void *data) +{ + struct lis3lv02d *lis3 = data; + u8 irq_cfg = lis3->irq_cfg & LIS3_IRQ2_MASK; + + if (irq_cfg == LIS3_IRQ2_CLICK) + lis302dl_interrupt_handle_click(lis3); + else if (unlikely(irq_cfg == LIS3_IRQ2_DATA_READY)) + lis302dl_data_ready(lis3, IRQ_LINE1); + else + lis3lv02d_joystick_poll(lis3->idev); + + return IRQ_HANDLED; +} + +static int lis3lv02d_misc_open(struct inode *inode, struct file *file) +{ + struct lis3lv02d *lis3 = container_of(file->private_data, + struct lis3lv02d, miscdev); + + if (test_and_set_bit(0, &lis3->misc_opened)) + return -EBUSY; /* already open */ + + if (lis3->pm_dev) + pm_runtime_get_sync(lis3->pm_dev); + + atomic_set(&lis3->count, 0); + return 0; +} + +static int lis3lv02d_misc_release(struct inode *inode, struct file *file) +{ + struct lis3lv02d *lis3 = container_of(file->private_data, + struct lis3lv02d, miscdev); + + clear_bit(0, &lis3->misc_opened); /* release the device */ + if (lis3->pm_dev) + pm_runtime_put(lis3->pm_dev); + return 0; +} + +static ssize_t lis3lv02d_misc_read(struct file *file, char __user *buf, + size_t count, loff_t *pos) +{ + struct lis3lv02d *lis3 = container_of(file->private_data, + struct lis3lv02d, miscdev); + + DECLARE_WAITQUEUE(wait, current); + u32 data; + unsigned char byte_data; + ssize_t retval = 1; + + if (count < 1) + return -EINVAL; + + add_wait_queue(&lis3->misc_wait, &wait); + while (true) { + set_current_state(TASK_INTERRUPTIBLE); + data = atomic_xchg(&lis3->count, 0); + if (data) + break; + + if (file->f_flags & O_NONBLOCK) { + retval = -EAGAIN; + goto out; + } + + if (signal_pending(current)) { + retval = -ERESTARTSYS; + goto out; + } + + schedule(); + } + + if (data < 255) + byte_data = data; + else + byte_data = 255; + + /* make sure we are not going into copy_to_user() with + * TASK_INTERRUPTIBLE state */ + set_current_state(TASK_RUNNING); + if (copy_to_user(buf, &byte_data, sizeof(byte_data))) + retval = -EFAULT; + +out: + __set_current_state(TASK_RUNNING); + remove_wait_queue(&lis3->misc_wait, &wait); + + return retval; +} + +static __poll_t lis3lv02d_misc_poll(struct file *file, poll_table *wait) +{ + struct lis3lv02d *lis3 = container_of(file->private_data, + struct lis3lv02d, miscdev); + + poll_wait(file, &lis3->misc_wait, wait); + if (atomic_read(&lis3->count)) + return EPOLLIN | EPOLLRDNORM; + return 0; +} + +static int lis3lv02d_misc_fasync(int fd, struct file *file, int on) +{ + struct lis3lv02d *lis3 = container_of(file->private_data, + struct lis3lv02d, miscdev); + + return fasync_helper(fd, file, on, &lis3->async_queue); +} + +static const struct file_operations lis3lv02d_misc_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = lis3lv02d_misc_read, + .open = lis3lv02d_misc_open, + .release = lis3lv02d_misc_release, + .poll = lis3lv02d_misc_poll, + .fasync = lis3lv02d_misc_fasync, +}; + +int lis3lv02d_joystick_enable(struct lis3lv02d *lis3) +{ + struct input_dev *input_dev; + int err; + int max_val, fuzz, flat; + int btns[] = {BTN_X, BTN_Y, BTN_Z}; + + if (lis3->idev) + return -EINVAL; + + lis3->idev = input_allocate_polled_device(); + if (!lis3->idev) + return -ENOMEM; + + lis3->idev->poll = lis3lv02d_joystick_poll; + lis3->idev->open = lis3lv02d_joystick_open; + lis3->idev->close = lis3lv02d_joystick_close; + lis3->idev->poll_interval = MDPS_POLL_INTERVAL; + lis3->idev->poll_interval_min = MDPS_POLL_MIN; + lis3->idev->poll_interval_max = MDPS_POLL_MAX; + lis3->idev->private = lis3; + input_dev = lis3->idev->input; + + input_dev->name = "ST LIS3LV02DL Accelerometer"; + input_dev->phys = DRIVER_NAME "/input0"; + input_dev->id.bustype = BUS_HOST; + input_dev->id.vendor = 0; + input_dev->dev.parent = &lis3->pdev->dev; + + set_bit(EV_ABS, input_dev->evbit); + max_val = (lis3->mdps_max_val * lis3->scale) / LIS3_ACCURACY; + if (lis3->whoami == WAI_12B) { + fuzz = LIS3_DEFAULT_FUZZ_12B; + flat = LIS3_DEFAULT_FLAT_12B; + } else { + fuzz = LIS3_DEFAULT_FUZZ_8B; + flat = LIS3_DEFAULT_FLAT_8B; + } + fuzz = (fuzz * lis3->scale) / LIS3_ACCURACY; + flat = (flat * lis3->scale) / LIS3_ACCURACY; + + input_set_abs_params(input_dev, ABS_X, -max_val, max_val, fuzz, flat); + input_set_abs_params(input_dev, ABS_Y, -max_val, max_val, fuzz, flat); + input_set_abs_params(input_dev, ABS_Z, -max_val, max_val, fuzz, flat); + + lis3->mapped_btns[0] = lis3lv02d_get_axis(abs(lis3->ac.x), btns); + lis3->mapped_btns[1] = lis3lv02d_get_axis(abs(lis3->ac.y), btns); + lis3->mapped_btns[2] = lis3lv02d_get_axis(abs(lis3->ac.z), btns); + + err = input_register_polled_device(lis3->idev); + if (err) { + input_free_polled_device(lis3->idev); + lis3->idev = NULL; + } + + return err; +} +EXPORT_SYMBOL_GPL(lis3lv02d_joystick_enable); + +void lis3lv02d_joystick_disable(struct lis3lv02d *lis3) +{ + if (lis3->irq) + free_irq(lis3->irq, lis3); + if (lis3->pdata && lis3->pdata->irq2) + free_irq(lis3->pdata->irq2, lis3); + + if (!lis3->idev) + return; + + if (lis3->irq) + misc_deregister(&lis3->miscdev); + input_unregister_polled_device(lis3->idev); + input_free_polled_device(lis3->idev); + lis3->idev = NULL; +} +EXPORT_SYMBOL_GPL(lis3lv02d_joystick_disable); + +/* Sysfs stuff */ +static void lis3lv02d_sysfs_poweron(struct lis3lv02d *lis3) +{ + /* + * SYSFS functions are fast visitors so put-call + * immediately after the get-call. However, keep + * chip running for a while and schedule delayed + * suspend. This way periodic sysfs calls doesn't + * suffer from relatively long power up time. + */ + + if (lis3->pm_dev) { + pm_runtime_get_sync(lis3->pm_dev); + pm_runtime_put_noidle(lis3->pm_dev); + pm_schedule_suspend(lis3->pm_dev, LIS3_SYSFS_POWERDOWN_DELAY); + } +} + +static ssize_t lis3lv02d_selftest_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct lis3lv02d *lis3 = dev_get_drvdata(dev); + s16 values[3]; + + static const char ok[] = "OK"; + static const char fail[] = "FAIL"; + static const char irq[] = "FAIL_IRQ"; + const char *res; + + lis3lv02d_sysfs_poweron(lis3); + switch (lis3lv02d_selftest(lis3, values)) { + case SELFTEST_FAIL: + res = fail; + break; + case SELFTEST_IRQ: + res = irq; + break; + case SELFTEST_OK: + default: + res = ok; + break; + } + return sprintf(buf, "%s %d %d %d\n", res, + values[0], values[1], values[2]); +} + +static ssize_t lis3lv02d_position_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct lis3lv02d *lis3 = dev_get_drvdata(dev); + int x, y, z; + + lis3lv02d_sysfs_poweron(lis3); + mutex_lock(&lis3->mutex); + lis3lv02d_get_xyz(lis3, &x, &y, &z); + mutex_unlock(&lis3->mutex); + return sprintf(buf, "(%d,%d,%d)\n", x, y, z); +} + +static ssize_t lis3lv02d_rate_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct lis3lv02d *lis3 = dev_get_drvdata(dev); + int odr_idx; + + lis3lv02d_sysfs_poweron(lis3); + + odr_idx = lis3lv02d_get_odr_index(lis3); + return sprintf(buf, "%d\n", lis3->odrs[odr_idx]); +} + +static ssize_t lis3lv02d_rate_set(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct lis3lv02d *lis3 = dev_get_drvdata(dev); + unsigned long rate; + int ret; + + ret = kstrtoul(buf, 0, &rate); + if (ret) + return ret; + + lis3lv02d_sysfs_poweron(lis3); + if (lis3lv02d_set_odr(lis3, rate)) + return -EINVAL; + + return count; +} + +static DEVICE_ATTR(selftest, S_IRUSR, lis3lv02d_selftest_show, NULL); +static DEVICE_ATTR(position, S_IRUGO, lis3lv02d_position_show, NULL); +static DEVICE_ATTR(rate, S_IRUGO | S_IWUSR, lis3lv02d_rate_show, + lis3lv02d_rate_set); + +static struct attribute *lis3lv02d_attributes[] = { + &dev_attr_selftest.attr, + &dev_attr_position.attr, + &dev_attr_rate.attr, + NULL +}; + +static const struct attribute_group lis3lv02d_attribute_group = { + .attrs = lis3lv02d_attributes +}; + + +static int lis3lv02d_add_fs(struct lis3lv02d *lis3) +{ + lis3->pdev = platform_device_register_simple(DRIVER_NAME, -1, NULL, 0); + if (IS_ERR(lis3->pdev)) + return PTR_ERR(lis3->pdev); + + platform_set_drvdata(lis3->pdev, lis3); + return sysfs_create_group(&lis3->pdev->dev.kobj, &lis3lv02d_attribute_group); +} + +int lis3lv02d_remove_fs(struct lis3lv02d *lis3) +{ + sysfs_remove_group(&lis3->pdev->dev.kobj, &lis3lv02d_attribute_group); + platform_device_unregister(lis3->pdev); + if (lis3->pm_dev) { + /* Barrier after the sysfs remove */ + pm_runtime_barrier(lis3->pm_dev); + + /* SYSFS may have left chip running. Turn off if necessary */ + if (!pm_runtime_suspended(lis3->pm_dev)) + lis3lv02d_poweroff(lis3); + + pm_runtime_disable(lis3->pm_dev); + pm_runtime_set_suspended(lis3->pm_dev); + } + kfree(lis3->reg_cache); + return 0; +} +EXPORT_SYMBOL_GPL(lis3lv02d_remove_fs); + +static void lis3lv02d_8b_configure(struct lis3lv02d *lis3, + struct lis3lv02d_platform_data *p) +{ + int err; + int ctrl2 = p->hipass_ctrl; + + if (p->click_flags) { + lis3->write(lis3, CLICK_CFG, p->click_flags); + lis3->write(lis3, CLICK_TIMELIMIT, p->click_time_limit); + lis3->write(lis3, CLICK_LATENCY, p->click_latency); + lis3->write(lis3, CLICK_WINDOW, p->click_window); + lis3->write(lis3, CLICK_THSZ, p->click_thresh_z & 0xf); + lis3->write(lis3, CLICK_THSY_X, + (p->click_thresh_x & 0xf) | + (p->click_thresh_y << 4)); + + if (lis3->idev) { + struct input_dev *input_dev = lis3->idev->input; + input_set_capability(input_dev, EV_KEY, BTN_X); + input_set_capability(input_dev, EV_KEY, BTN_Y); + input_set_capability(input_dev, EV_KEY, BTN_Z); + } + } + + if (p->wakeup_flags) { + lis3->write(lis3, FF_WU_CFG_1, p->wakeup_flags); + lis3->write(lis3, FF_WU_THS_1, p->wakeup_thresh & 0x7f); + /* pdata value + 1 to keep this backward compatible*/ + lis3->write(lis3, FF_WU_DURATION_1, p->duration1 + 1); + ctrl2 ^= HP_FF_WU1; /* Xor to keep compatible with old pdata*/ + } + + if (p->wakeup_flags2) { + lis3->write(lis3, FF_WU_CFG_2, p->wakeup_flags2); + lis3->write(lis3, FF_WU_THS_2, p->wakeup_thresh2 & 0x7f); + /* pdata value + 1 to keep this backward compatible*/ + lis3->write(lis3, FF_WU_DURATION_2, p->duration2 + 1); + ctrl2 ^= HP_FF_WU2; /* Xor to keep compatible with old pdata*/ + } + /* Configure hipass filters */ + lis3->write(lis3, CTRL_REG2, ctrl2); + + if (p->irq2) { + err = request_threaded_irq(p->irq2, + NULL, + lis302dl_interrupt_thread2_8b, + IRQF_TRIGGER_RISING | IRQF_ONESHOT | + (p->irq_flags2 & IRQF_TRIGGER_MASK), + DRIVER_NAME, lis3); + if (err < 0) + pr_err("No second IRQ. Limited functionality\n"); + } +} + +#ifdef CONFIG_OF +int lis3lv02d_init_dt(struct lis3lv02d *lis3) +{ + struct lis3lv02d_platform_data *pdata; + struct device_node *np = lis3->of_node; + u32 val; + s32 sval; + + if (!lis3->of_node) + return 0; + + pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + if (of_get_property(np, "st,click-single-x", NULL)) + pdata->click_flags |= LIS3_CLICK_SINGLE_X; + if (of_get_property(np, "st,click-double-x", NULL)) + pdata->click_flags |= LIS3_CLICK_DOUBLE_X; + + if (of_get_property(np, "st,click-single-y", NULL)) + pdata->click_flags |= LIS3_CLICK_SINGLE_Y; + if (of_get_property(np, "st,click-double-y", NULL)) + pdata->click_flags |= LIS3_CLICK_DOUBLE_Y; + + if (of_get_property(np, "st,click-single-z", NULL)) + pdata->click_flags |= LIS3_CLICK_SINGLE_Z; + if (of_get_property(np, "st,click-double-z", NULL)) + pdata->click_flags |= LIS3_CLICK_DOUBLE_Z; + + if (!of_property_read_u32(np, "st,click-threshold-x", &val)) + pdata->click_thresh_x = val; + if (!of_property_read_u32(np, "st,click-threshold-y", &val)) + pdata->click_thresh_y = val; + if (!of_property_read_u32(np, "st,click-threshold-z", &val)) + pdata->click_thresh_z = val; + + if (!of_property_read_u32(np, "st,click-time-limit", &val)) + pdata->click_time_limit = val; + if (!of_property_read_u32(np, "st,click-latency", &val)) + pdata->click_latency = val; + if (!of_property_read_u32(np, "st,click-window", &val)) + pdata->click_window = val; + + if (of_get_property(np, "st,irq1-disable", NULL)) + pdata->irq_cfg |= LIS3_IRQ1_DISABLE; + if (of_get_property(np, "st,irq1-ff-wu-1", NULL)) + pdata->irq_cfg |= LIS3_IRQ1_FF_WU_1; + if (of_get_property(np, "st,irq1-ff-wu-2", NULL)) + pdata->irq_cfg |= LIS3_IRQ1_FF_WU_2; + if (of_get_property(np, "st,irq1-data-ready", NULL)) + pdata->irq_cfg |= LIS3_IRQ1_DATA_READY; + if (of_get_property(np, "st,irq1-click", NULL)) + pdata->irq_cfg |= LIS3_IRQ1_CLICK; + + if (of_get_property(np, "st,irq2-disable", NULL)) + pdata->irq_cfg |= LIS3_IRQ2_DISABLE; + if (of_get_property(np, "st,irq2-ff-wu-1", NULL)) + pdata->irq_cfg |= LIS3_IRQ2_FF_WU_1; + if (of_get_property(np, "st,irq2-ff-wu-2", NULL)) + pdata->irq_cfg |= LIS3_IRQ2_FF_WU_2; + if (of_get_property(np, "st,irq2-data-ready", NULL)) + pdata->irq_cfg |= LIS3_IRQ2_DATA_READY; + if (of_get_property(np, "st,irq2-click", NULL)) + pdata->irq_cfg |= LIS3_IRQ2_CLICK; + + if (of_get_property(np, "st,irq-open-drain", NULL)) + pdata->irq_cfg |= LIS3_IRQ_OPEN_DRAIN; + if (of_get_property(np, "st,irq-active-low", NULL)) + pdata->irq_cfg |= LIS3_IRQ_ACTIVE_LOW; + + if (!of_property_read_u32(np, "st,wu-duration-1", &val)) + pdata->duration1 = val; + if (!of_property_read_u32(np, "st,wu-duration-2", &val)) + pdata->duration2 = val; + + if (of_get_property(np, "st,wakeup-x-lo", NULL)) + pdata->wakeup_flags |= LIS3_WAKEUP_X_LO; + if (of_get_property(np, "st,wakeup-x-hi", NULL)) + pdata->wakeup_flags |= LIS3_WAKEUP_X_HI; + if (of_get_property(np, "st,wakeup-y-lo", NULL)) + pdata->wakeup_flags |= LIS3_WAKEUP_Y_LO; + if (of_get_property(np, "st,wakeup-y-hi", NULL)) + pdata->wakeup_flags |= LIS3_WAKEUP_Y_HI; + if (of_get_property(np, "st,wakeup-z-lo", NULL)) + pdata->wakeup_flags |= LIS3_WAKEUP_Z_LO; + if (of_get_property(np, "st,wakeup-z-hi", NULL)) + pdata->wakeup_flags |= LIS3_WAKEUP_Z_HI; + if (of_get_property(np, "st,wakeup-threshold", &val)) + pdata->wakeup_thresh = val; + + if (of_get_property(np, "st,wakeup2-x-lo", NULL)) + pdata->wakeup_flags2 |= LIS3_WAKEUP_X_LO; + if (of_get_property(np, "st,wakeup2-x-hi", NULL)) + pdata->wakeup_flags2 |= LIS3_WAKEUP_X_HI; + if (of_get_property(np, "st,wakeup2-y-lo", NULL)) + pdata->wakeup_flags2 |= LIS3_WAKEUP_Y_LO; + if (of_get_property(np, "st,wakeup2-y-hi", NULL)) + pdata->wakeup_flags2 |= LIS3_WAKEUP_Y_HI; + if (of_get_property(np, "st,wakeup2-z-lo", NULL)) + pdata->wakeup_flags2 |= LIS3_WAKEUP_Z_LO; + if (of_get_property(np, "st,wakeup2-z-hi", NULL)) + pdata->wakeup_flags2 |= LIS3_WAKEUP_Z_HI; + if (of_get_property(np, "st,wakeup2-threshold", &val)) + pdata->wakeup_thresh2 = val; + + if (!of_property_read_u32(np, "st,highpass-cutoff-hz", &val)) { + switch (val) { + case 1: + pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_1HZ; + break; + case 2: + pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_2HZ; + break; + case 4: + pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_4HZ; + break; + case 8: + pdata->hipass_ctrl = LIS3_HIPASS_CUTFF_8HZ; + break; + } + } + + if (of_get_property(np, "st,hipass1-disable", NULL)) + pdata->hipass_ctrl |= LIS3_HIPASS1_DISABLE; + if (of_get_property(np, "st,hipass2-disable", NULL)) + pdata->hipass_ctrl |= LIS3_HIPASS2_DISABLE; + + if (of_property_read_s32(np, "st,axis-x", &sval) == 0) + pdata->axis_x = sval; + if (of_property_read_s32(np, "st,axis-y", &sval) == 0) + pdata->axis_y = sval; + if (of_property_read_s32(np, "st,axis-z", &sval) == 0) + pdata->axis_z = sval; + + if (of_get_property(np, "st,default-rate", NULL)) + pdata->default_rate = val; + + if (of_property_read_s32(np, "st,min-limit-x", &sval) == 0) + pdata->st_min_limits[0] = sval; + if (of_property_read_s32(np, "st,min-limit-y", &sval) == 0) + pdata->st_min_limits[1] = sval; + if (of_property_read_s32(np, "st,min-limit-z", &sval) == 0) + pdata->st_min_limits[2] = sval; + + if (of_property_read_s32(np, "st,max-limit-x", &sval) == 0) + pdata->st_max_limits[0] = sval; + if (of_property_read_s32(np, "st,max-limit-y", &sval) == 0) + pdata->st_max_limits[1] = sval; + if (of_property_read_s32(np, "st,max-limit-z", &sval) == 0) + pdata->st_max_limits[2] = sval; + + + lis3->pdata = pdata; + + return 0; +} + +#else +int lis3lv02d_init_dt(struct lis3lv02d *lis3) +{ + return 0; +} +#endif +EXPORT_SYMBOL_GPL(lis3lv02d_init_dt); + +/* + * Initialise the accelerometer and the various subsystems. + * Should be rather independent of the bus system. + */ +int lis3lv02d_init_device(struct lis3lv02d *lis3) +{ + int err; + irq_handler_t thread_fn; + int irq_flags = 0; + + lis3->whoami = lis3lv02d_read_8(lis3, WHO_AM_I); + + switch (lis3->whoami) { + case WAI_12B: + pr_info("12 bits sensor found\n"); + lis3->read_data = lis3lv02d_read_12; + lis3->mdps_max_val = 2048; + lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_12B; + lis3->odrs = lis3_12_rates; + lis3->odr_mask = CTRL1_DF0 | CTRL1_DF1; + lis3->scale = LIS3_SENSITIVITY_12B; + lis3->regs = lis3_wai12_regs; + lis3->regs_size = ARRAY_SIZE(lis3_wai12_regs); + break; + case WAI_8B: + pr_info("8 bits sensor found\n"); + lis3->read_data = lis3lv02d_read_8; + lis3->mdps_max_val = 128; + lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B; + lis3->odrs = lis3_8_rates; + lis3->odr_mask = CTRL1_DR; + lis3->scale = LIS3_SENSITIVITY_8B; + lis3->regs = lis3_wai8_regs; + lis3->regs_size = ARRAY_SIZE(lis3_wai8_regs); + break; + case WAI_3DC: + pr_info("8 bits 3DC sensor found\n"); + lis3->read_data = lis3lv02d_read_8; + lis3->mdps_max_val = 128; + lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B; + lis3->odrs = lis3_3dc_rates; + lis3->odr_mask = CTRL1_ODR0|CTRL1_ODR1|CTRL1_ODR2|CTRL1_ODR3; + lis3->scale = LIS3_SENSITIVITY_8B; + break; + case WAI_3DLH: + pr_info("16 bits lis331dlh sensor found\n"); + lis3->read_data = lis331dlh_read_data; + lis3->mdps_max_val = 2048; /* 12 bits for 2G */ + lis3->shift_adj = SHIFT_ADJ_2G; + lis3->pwron_delay = LIS3_PWRON_DELAY_WAI_8B; + lis3->odrs = lis3_3dlh_rates; + lis3->odr_mask = CTRL1_DR0 | CTRL1_DR1; + lis3->scale = LIS3DLH_SENSITIVITY_2G; + break; + default: + pr_err("unknown sensor type 0x%X\n", lis3->whoami); + return -EINVAL; + } + + lis3->reg_cache = kzalloc(max(sizeof(lis3_wai8_regs), + sizeof(lis3_wai12_regs)), GFP_KERNEL); + + if (lis3->reg_cache == NULL) { + printk(KERN_ERR DRIVER_NAME "out of memory\n"); + return -ENOMEM; + } + + mutex_init(&lis3->mutex); + atomic_set(&lis3->wake_thread, 0); + + lis3lv02d_add_fs(lis3); + err = lis3lv02d_poweron(lis3); + if (err) { + lis3lv02d_remove_fs(lis3); + return err; + } + + if (lis3->pm_dev) { + pm_runtime_set_active(lis3->pm_dev); + pm_runtime_enable(lis3->pm_dev); + } + + if (lis3lv02d_joystick_enable(lis3)) + pr_err("joystick initialization failed\n"); + + /* passing in platform specific data is purely optional and only + * used by the SPI transport layer at the moment */ + if (lis3->pdata) { + struct lis3lv02d_platform_data *p = lis3->pdata; + + if (lis3->whoami == WAI_8B) + lis3lv02d_8b_configure(lis3, p); + + irq_flags = p->irq_flags1 & IRQF_TRIGGER_MASK; + + lis3->irq_cfg = p->irq_cfg; + if (p->irq_cfg) + lis3->write(lis3, CTRL_REG3, p->irq_cfg); + + if (p->default_rate) + lis3lv02d_set_odr(lis3, p->default_rate); + } + + /* bail if we did not get an IRQ from the bus layer */ + if (!lis3->irq) { + pr_debug("No IRQ. Disabling /dev/freefall\n"); + goto out; + } + + /* + * The sensor can generate interrupts for free-fall and direction + * detection (distinguishable with FF_WU_SRC and DD_SRC) but to keep + * the things simple and _fast_ we activate it only for free-fall, so + * no need to read register (very slow with ACPI). For the same reason, + * we forbid shared interrupts. + * + * IRQF_TRIGGER_RISING seems pointless on HP laptops because the + * io-apic is not configurable (and generates a warning) but I keep it + * in case of support for other hardware. + */ + if (lis3->pdata && lis3->whoami == WAI_8B) + thread_fn = lis302dl_interrupt_thread1_8b; + else + thread_fn = NULL; + + err = request_threaded_irq(lis3->irq, lis302dl_interrupt, + thread_fn, + IRQF_TRIGGER_RISING | IRQF_ONESHOT | + irq_flags, + DRIVER_NAME, lis3); + + if (err < 0) { + pr_err("Cannot get IRQ\n"); + goto out; + } + + lis3->miscdev.minor = MISC_DYNAMIC_MINOR; + lis3->miscdev.name = "freefall"; + lis3->miscdev.fops = &lis3lv02d_misc_fops; + + if (misc_register(&lis3->miscdev)) + pr_err("misc_register failed\n"); +out: + return 0; +} +EXPORT_SYMBOL_GPL(lis3lv02d_init_device); + +MODULE_DESCRIPTION("ST LIS3LV02Dx three-axis digital accelerometer driver"); +MODULE_AUTHOR("Yan Burman, Eric Piel, Pavel Machek"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h new file mode 100644 index 000000000..0ef759671 --- /dev/null +++ b/drivers/misc/lis3lv02d/lis3lv02d.h @@ -0,0 +1,332 @@ +/* + * lis3lv02d.h - ST LIS3LV02DL accelerometer driver + * + * Copyright (C) 2007-2008 Yan Burman + * Copyright (C) 2008-2009 Eric Piel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include + +/* + * This driver tries to support the "digital" accelerometer chips from + * STMicroelectronics such as LIS3LV02DL, LIS302DL, LIS3L02DQ, LIS331DL, + * LIS331DLH, LIS35DE, or LIS202DL. They are very similar in terms of + * programming, with almost the same registers. In addition to differing + * on physical properties, they differ on the number of axes (2/3), + * precision (8/12 bits), and special features (freefall detection, + * click...). Unfortunately, not all the differences can be probed via + * a register. They can be connected either via I²C or SPI. + */ + +#include + +enum lis3_reg { + WHO_AM_I = 0x0F, + OFFSET_X = 0x16, + OFFSET_Y = 0x17, + OFFSET_Z = 0x18, + GAIN_X = 0x19, + GAIN_Y = 0x1A, + GAIN_Z = 0x1B, + CTRL_REG1 = 0x20, + CTRL_REG2 = 0x21, + CTRL_REG3 = 0x22, + CTRL_REG4 = 0x23, + HP_FILTER_RESET = 0x23, + STATUS_REG = 0x27, + OUTX_L = 0x28, + OUTX_H = 0x29, + OUTX = 0x29, + OUTY_L = 0x2A, + OUTY_H = 0x2B, + OUTY = 0x2B, + OUTZ_L = 0x2C, + OUTZ_H = 0x2D, + OUTZ = 0x2D, +}; + +enum lis302d_reg { + FF_WU_CFG_1 = 0x30, + FF_WU_SRC_1 = 0x31, + FF_WU_THS_1 = 0x32, + FF_WU_DURATION_1 = 0x33, + FF_WU_CFG_2 = 0x34, + FF_WU_SRC_2 = 0x35, + FF_WU_THS_2 = 0x36, + FF_WU_DURATION_2 = 0x37, + CLICK_CFG = 0x38, + CLICK_SRC = 0x39, + CLICK_THSY_X = 0x3B, + CLICK_THSZ = 0x3C, + CLICK_TIMELIMIT = 0x3D, + CLICK_LATENCY = 0x3E, + CLICK_WINDOW = 0x3F, +}; + +enum lis3lv02d_reg { + FF_WU_CFG = 0x30, + FF_WU_SRC = 0x31, + FF_WU_ACK = 0x32, + FF_WU_THS_L = 0x34, + FF_WU_THS_H = 0x35, + FF_WU_DURATION = 0x36, + DD_CFG = 0x38, + DD_SRC = 0x39, + DD_ACK = 0x3A, + DD_THSI_L = 0x3C, + DD_THSI_H = 0x3D, + DD_THSE_L = 0x3E, + DD_THSE_H = 0x3F, +}; + +enum lis3_who_am_i { + WAI_3DLH = 0x32, /* 16 bits: LIS331DLH */ + WAI_3DC = 0x33, /* 8 bits: LIS3DC, HP3DC */ + WAI_12B = 0x3A, /* 12 bits: LIS3LV02D[LQ]... */ + WAI_8B = 0x3B, /* 8 bits: LIS[23]02D[LQ]... */ + WAI_6B = 0x52, /* 6 bits: LIS331DLF - not supported */ +}; + +enum lis3_type { + LIS3LV02D, + LIS3DC, + HP3DC, + LIS2302D, + LIS331DLF, + LIS331DLH, +}; + +enum lis3lv02d_ctrl1_12b { + CTRL1_Xen = 0x01, + CTRL1_Yen = 0x02, + CTRL1_Zen = 0x04, + CTRL1_ST = 0x08, + CTRL1_DF0 = 0x10, + CTRL1_DF1 = 0x20, + CTRL1_PD0 = 0x40, + CTRL1_PD1 = 0x80, +}; + +/* Delta to ctrl1_12b version */ +enum lis3lv02d_ctrl1_8b { + CTRL1_STM = 0x08, + CTRL1_STP = 0x10, + CTRL1_FS = 0x20, + CTRL1_PD = 0x40, + CTRL1_DR = 0x80, +}; + +enum lis3lv02d_ctrl1_3dc { + CTRL1_ODR0 = 0x10, + CTRL1_ODR1 = 0x20, + CTRL1_ODR2 = 0x40, + CTRL1_ODR3 = 0x80, +}; + +enum lis331dlh_ctrl1 { + CTRL1_DR0 = 0x08, + CTRL1_DR1 = 0x10, + CTRL1_PM0 = 0x20, + CTRL1_PM1 = 0x40, + CTRL1_PM2 = 0x80, +}; + +enum lis331dlh_ctrl2 { + CTRL2_HPEN1 = 0x04, + CTRL2_HPEN2 = 0x08, + CTRL2_FDS_3DLH = 0x10, + CTRL2_BOOT_3DLH = 0x80, +}; + +enum lis331dlh_ctrl4 { + CTRL4_STSIGN = 0x08, + CTRL4_BLE = 0x40, + CTRL4_BDU = 0x80, +}; + +enum lis3lv02d_ctrl2 { + CTRL2_DAS = 0x01, + CTRL2_SIM = 0x02, + CTRL2_DRDY = 0x04, + CTRL2_IEN = 0x08, + CTRL2_BOOT = 0x10, + CTRL2_BLE = 0x20, + CTRL2_BDU = 0x40, /* Block Data Update */ + CTRL2_FS = 0x80, /* Full Scale selection */ +}; + +enum lis3lv02d_ctrl4_3dc { + CTRL4_SIM = 0x01, + CTRL4_ST0 = 0x02, + CTRL4_ST1 = 0x04, + CTRL4_FS0 = 0x10, + CTRL4_FS1 = 0x20, +}; + +enum lis302d_ctrl2 { + HP_FF_WU2 = 0x08, + HP_FF_WU1 = 0x04, + CTRL2_BOOT_8B = 0x40, +}; + +enum lis3lv02d_ctrl3 { + CTRL3_CFS0 = 0x01, + CTRL3_CFS1 = 0x02, + CTRL3_FDS = 0x10, + CTRL3_HPFF = 0x20, + CTRL3_HPDD = 0x40, + CTRL3_ECK = 0x80, +}; + +enum lis3lv02d_status_reg { + STATUS_XDA = 0x01, + STATUS_YDA = 0x02, + STATUS_ZDA = 0x04, + STATUS_XYZDA = 0x08, + STATUS_XOR = 0x10, + STATUS_YOR = 0x20, + STATUS_ZOR = 0x40, + STATUS_XYZOR = 0x80, +}; + +enum lis3lv02d_ff_wu_cfg { + FF_WU_CFG_XLIE = 0x01, + FF_WU_CFG_XHIE = 0x02, + FF_WU_CFG_YLIE = 0x04, + FF_WU_CFG_YHIE = 0x08, + FF_WU_CFG_ZLIE = 0x10, + FF_WU_CFG_ZHIE = 0x20, + FF_WU_CFG_LIR = 0x40, + FF_WU_CFG_AOI = 0x80, +}; + +enum lis3lv02d_ff_wu_src { + FF_WU_SRC_XL = 0x01, + FF_WU_SRC_XH = 0x02, + FF_WU_SRC_YL = 0x04, + FF_WU_SRC_YH = 0x08, + FF_WU_SRC_ZL = 0x10, + FF_WU_SRC_ZH = 0x20, + FF_WU_SRC_IA = 0x40, +}; + +enum lis3lv02d_dd_cfg { + DD_CFG_XLIE = 0x01, + DD_CFG_XHIE = 0x02, + DD_CFG_YLIE = 0x04, + DD_CFG_YHIE = 0x08, + DD_CFG_ZLIE = 0x10, + DD_CFG_ZHIE = 0x20, + DD_CFG_LIR = 0x40, + DD_CFG_IEND = 0x80, +}; + +enum lis3lv02d_dd_src { + DD_SRC_XL = 0x01, + DD_SRC_XH = 0x02, + DD_SRC_YL = 0x04, + DD_SRC_YH = 0x08, + DD_SRC_ZL = 0x10, + DD_SRC_ZH = 0x20, + DD_SRC_IA = 0x40, +}; + +enum lis3lv02d_click_src_8b { + CLICK_SINGLE_X = 0x01, + CLICK_DOUBLE_X = 0x02, + CLICK_SINGLE_Y = 0x04, + CLICK_DOUBLE_Y = 0x08, + CLICK_SINGLE_Z = 0x10, + CLICK_DOUBLE_Z = 0x20, + CLICK_IA = 0x40, +}; + +enum lis3lv02d_reg_state { + LIS3_REG_OFF = 0x00, + LIS3_REG_ON = 0x01, +}; + +union axis_conversion { + struct { + int x, y, z; + }; + int as_array[3]; + +}; + +struct lis3lv02d { + void *bus_priv; /* used by the bus layer only */ + struct device *pm_dev; /* for pm_runtime purposes */ + int (*init) (struct lis3lv02d *lis3); + int (*write) (struct lis3lv02d *lis3, int reg, u8 val); + int (*read) (struct lis3lv02d *lis3, int reg, u8 *ret); + int (*blkread) (struct lis3lv02d *lis3, int reg, int len, u8 *ret); + int (*reg_ctrl) (struct lis3lv02d *lis3, bool state); + + int *odrs; /* Supported output data rates */ + u8 *regs; /* Regs to store / restore */ + int regs_size; + u8 *reg_cache; + bool regs_stored; + bool init_required; + u8 odr_mask; /* ODR bit mask */ + u8 whoami; /* indicates measurement precision */ + s16 (*read_data) (struct lis3lv02d *lis3, int reg); + int mdps_max_val; + int pwron_delay; + int scale; /* + * relationship between 1 LBS and mG + * (1/1000th of earth gravity) + */ + + struct input_polled_dev *idev; /* input device */ + struct platform_device *pdev; /* platform device */ + struct regulator_bulk_data regulators[2]; + atomic_t count; /* interrupt count after last read */ + union axis_conversion ac; /* hw -> logical axis */ + int mapped_btns[3]; + + u32 irq; /* IRQ number */ + struct fasync_struct *async_queue; /* queue for the misc device */ + wait_queue_head_t misc_wait; /* Wait queue for the misc device */ + unsigned long misc_opened; /* bit0: whether the device is open */ + struct miscdevice miscdev; + + int data_ready_count[2]; + atomic_t wake_thread; + unsigned char irq_cfg; + unsigned int shift_adj; + + struct lis3lv02d_platform_data *pdata; /* for passing board config */ + struct mutex mutex; /* Serialize poll and selftest */ + +#ifdef CONFIG_OF + struct device_node *of_node; +#endif +}; + +int lis3lv02d_init_device(struct lis3lv02d *lis3); +int lis3lv02d_joystick_enable(struct lis3lv02d *lis3); +void lis3lv02d_joystick_disable(struct lis3lv02d *lis3); +void lis3lv02d_poweroff(struct lis3lv02d *lis3); +int lis3lv02d_poweron(struct lis3lv02d *lis3); +int lis3lv02d_remove_fs(struct lis3lv02d *lis3); +int lis3lv02d_init_dt(struct lis3lv02d *lis3); + +extern struct lis3lv02d lis3_dev; diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c new file mode 100644 index 000000000..14b7d539f --- /dev/null +++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c @@ -0,0 +1,289 @@ +/* + * drivers/hwmon/lis3lv02d_i2c.c + * + * Implements I2C interface for lis3lv02d (STMicroelectronics) accelerometer. + * Driver is based on corresponding SPI driver written by Daniel Mack + * (lis3lv02d_spi.c (C) 2009 Daniel Mack ). + * + * Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). + * + * Contact: Samu Onkalo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lis3lv02d.h" + +#define DRV_NAME "lis3lv02d_i2c" + +static const char reg_vdd[] = "Vdd"; +static const char reg_vdd_io[] = "Vdd_IO"; + +static int lis3_reg_ctrl(struct lis3lv02d *lis3, bool state) +{ + int ret; + if (state == LIS3_REG_OFF) { + ret = regulator_bulk_disable(ARRAY_SIZE(lis3->regulators), + lis3->regulators); + } else { + ret = regulator_bulk_enable(ARRAY_SIZE(lis3->regulators), + lis3->regulators); + /* Chip needs time to wakeup. Not mentioned in datasheet */ + usleep_range(10000, 20000); + } + return ret; +} + +static inline s32 lis3_i2c_write(struct lis3lv02d *lis3, int reg, u8 value) +{ + struct i2c_client *c = lis3->bus_priv; + return i2c_smbus_write_byte_data(c, reg, value); +} + +static inline s32 lis3_i2c_read(struct lis3lv02d *lis3, int reg, u8 *v) +{ + struct i2c_client *c = lis3->bus_priv; + *v = i2c_smbus_read_byte_data(c, reg); + return 0; +} + +static inline s32 lis3_i2c_blockread(struct lis3lv02d *lis3, int reg, int len, + u8 *v) +{ + struct i2c_client *c = lis3->bus_priv; + reg |= (1 << 7); /* 7th bit enables address auto incrementation */ + return i2c_smbus_read_i2c_block_data(c, reg, len, v); +} + +static int lis3_i2c_init(struct lis3lv02d *lis3) +{ + u8 reg; + int ret; + + lis3_reg_ctrl(lis3, LIS3_REG_ON); + + lis3->read(lis3, WHO_AM_I, ®); + if (reg != lis3->whoami) + printk(KERN_ERR "lis3: power on failure\n"); + + /* power up the device */ + ret = lis3->read(lis3, CTRL_REG1, ®); + if (ret < 0) + return ret; + + if (lis3->whoami == WAI_3DLH) + reg |= CTRL1_PM0 | CTRL1_Xen | CTRL1_Yen | CTRL1_Zen; + else + reg |= CTRL1_PD0 | CTRL1_Xen | CTRL1_Yen | CTRL1_Zen; + + return lis3->write(lis3, CTRL_REG1, reg); +} + +/* Default axis mapping but it can be overwritten by platform data */ +static union axis_conversion lis3lv02d_axis_map = + { .as_array = { LIS3_DEV_X, LIS3_DEV_Y, LIS3_DEV_Z } }; + +#ifdef CONFIG_OF +static const struct of_device_id lis3lv02d_i2c_dt_ids[] = { + { .compatible = "st,lis3lv02d" }, + {} +}; +MODULE_DEVICE_TABLE(of, lis3lv02d_i2c_dt_ids); +#endif + +static int lis3lv02d_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int ret = 0; + struct lis3lv02d_platform_data *pdata = client->dev.platform_data; + +#ifdef CONFIG_OF + if (of_match_device(lis3lv02d_i2c_dt_ids, &client->dev)) { + lis3_dev.of_node = client->dev.of_node; + ret = lis3lv02d_init_dt(&lis3_dev); + if (ret) + return ret; + pdata = lis3_dev.pdata; + } +#endif + + if (pdata) { + if ((pdata->driver_features & LIS3_USE_BLOCK_READ) && + (i2c_check_functionality(client->adapter, + I2C_FUNC_SMBUS_I2C_BLOCK))) + lis3_dev.blkread = lis3_i2c_blockread; + + if (pdata->axis_x) + lis3lv02d_axis_map.x = pdata->axis_x; + + if (pdata->axis_y) + lis3lv02d_axis_map.y = pdata->axis_y; + + if (pdata->axis_z) + lis3lv02d_axis_map.z = pdata->axis_z; + + if (pdata->setup_resources) + ret = pdata->setup_resources(); + + if (ret) + goto fail; + } + + lis3_dev.regulators[0].supply = reg_vdd; + lis3_dev.regulators[1].supply = reg_vdd_io; + ret = regulator_bulk_get(&client->dev, + ARRAY_SIZE(lis3_dev.regulators), + lis3_dev.regulators); + if (ret < 0) + goto fail; + + lis3_dev.pdata = pdata; + lis3_dev.bus_priv = client; + lis3_dev.init = lis3_i2c_init; + lis3_dev.read = lis3_i2c_read; + lis3_dev.write = lis3_i2c_write; + lis3_dev.irq = client->irq; + lis3_dev.ac = lis3lv02d_axis_map; + lis3_dev.pm_dev = &client->dev; + + i2c_set_clientdata(client, &lis3_dev); + + /* Provide power over the init call */ + lis3_reg_ctrl(&lis3_dev, LIS3_REG_ON); + + ret = lis3lv02d_init_device(&lis3_dev); + + lis3_reg_ctrl(&lis3_dev, LIS3_REG_OFF); + + if (ret) + goto fail2; + return 0; + +fail2: + regulator_bulk_free(ARRAY_SIZE(lis3_dev.regulators), + lis3_dev.regulators); +fail: + if (pdata && pdata->release_resources) + pdata->release_resources(); + return ret; +} + +static int lis3lv02d_i2c_remove(struct i2c_client *client) +{ + struct lis3lv02d *lis3 = i2c_get_clientdata(client); + struct lis3lv02d_platform_data *pdata = client->dev.platform_data; + + if (pdata && pdata->release_resources) + pdata->release_resources(); + + lis3lv02d_joystick_disable(lis3); + lis3lv02d_remove_fs(&lis3_dev); + + regulator_bulk_free(ARRAY_SIZE(lis3->regulators), + lis3_dev.regulators); + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int lis3lv02d_i2c_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct lis3lv02d *lis3 = i2c_get_clientdata(client); + + if (!lis3->pdata || !lis3->pdata->wakeup_flags) + lis3lv02d_poweroff(lis3); + return 0; +} + +static int lis3lv02d_i2c_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct lis3lv02d *lis3 = i2c_get_clientdata(client); + + /* + * pm_runtime documentation says that devices should always + * be powered on at resume. Pm_runtime turns them off after system + * wide resume is complete. + */ + if (!lis3->pdata || !lis3->pdata->wakeup_flags || + pm_runtime_suspended(dev)) + lis3lv02d_poweron(lis3); + + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + +#ifdef CONFIG_PM +static int lis3_i2c_runtime_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct lis3lv02d *lis3 = i2c_get_clientdata(client); + + lis3lv02d_poweroff(lis3); + return 0; +} + +static int lis3_i2c_runtime_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct lis3lv02d *lis3 = i2c_get_clientdata(client); + + lis3lv02d_poweron(lis3); + return 0; +} +#endif /* CONFIG_PM */ + +static const struct i2c_device_id lis3lv02d_id[] = { + {"lis3lv02d", LIS3LV02D}, + {"lis331dlh", LIS331DLH}, + {} +}; + +MODULE_DEVICE_TABLE(i2c, lis3lv02d_id); + +static const struct dev_pm_ops lis3_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(lis3lv02d_i2c_suspend, + lis3lv02d_i2c_resume) + SET_RUNTIME_PM_OPS(lis3_i2c_runtime_suspend, + lis3_i2c_runtime_resume, + NULL) +}; + +static struct i2c_driver lis3lv02d_i2c_driver = { + .driver = { + .name = DRV_NAME, + .pm = &lis3_pm_ops, + .of_match_table = of_match_ptr(lis3lv02d_i2c_dt_ids), + }, + .probe = lis3lv02d_i2c_probe, + .remove = lis3lv02d_i2c_remove, + .id_table = lis3lv02d_id, +}; + +module_i2c_driver(lis3lv02d_i2c_driver); + +MODULE_AUTHOR("Nokia Corporation"); +MODULE_DESCRIPTION("lis3lv02d I2C interface"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/lis3lv02d/lis3lv02d_spi.c b/drivers/misc/lis3lv02d/lis3lv02d_spi.c new file mode 100644 index 000000000..e57547512 --- /dev/null +++ b/drivers/misc/lis3lv02d/lis3lv02d_spi.c @@ -0,0 +1,153 @@ +/* + * lis3lv02d_spi - SPI glue layer for lis3lv02d + * + * Copyright (c) 2009 Daniel Mack + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * publishhed by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lis3lv02d.h" + +#define DRV_NAME "lis3lv02d_spi" +#define LIS3_SPI_READ 0x80 + +static int lis3_spi_read(struct lis3lv02d *lis3, int reg, u8 *v) +{ + struct spi_device *spi = lis3->bus_priv; + int ret = spi_w8r8(spi, reg | LIS3_SPI_READ); + if (ret < 0) + return -EINVAL; + + *v = (u8) ret; + return 0; +} + +static int lis3_spi_write(struct lis3lv02d *lis3, int reg, u8 val) +{ + u8 tmp[2] = { reg, val }; + struct spi_device *spi = lis3->bus_priv; + return spi_write(spi, tmp, sizeof(tmp)); +} + +static int lis3_spi_init(struct lis3lv02d *lis3) +{ + u8 reg; + int ret; + + /* power up the device */ + ret = lis3->read(lis3, CTRL_REG1, ®); + if (ret < 0) + return ret; + + reg |= CTRL1_PD0 | CTRL1_Xen | CTRL1_Yen | CTRL1_Zen; + return lis3->write(lis3, CTRL_REG1, reg); +} + +static union axis_conversion lis3lv02d_axis_normal = + { .as_array = { 1, 2, 3 } }; + +#ifdef CONFIG_OF +static const struct of_device_id lis302dl_spi_dt_ids[] = { + { .compatible = "st,lis302dl-spi" }, + {} +}; +MODULE_DEVICE_TABLE(of, lis302dl_spi_dt_ids); +#endif + +static int lis302dl_spi_probe(struct spi_device *spi) +{ + int ret; + + spi->bits_per_word = 8; + spi->mode = SPI_MODE_0; + ret = spi_setup(spi); + if (ret < 0) + return ret; + + lis3_dev.bus_priv = spi; + lis3_dev.init = lis3_spi_init; + lis3_dev.read = lis3_spi_read; + lis3_dev.write = lis3_spi_write; + lis3_dev.irq = spi->irq; + lis3_dev.ac = lis3lv02d_axis_normal; + lis3_dev.pdata = spi->dev.platform_data; + +#ifdef CONFIG_OF + if (of_match_device(lis302dl_spi_dt_ids, &spi->dev)) { + lis3_dev.of_node = spi->dev.of_node; + ret = lis3lv02d_init_dt(&lis3_dev); + if (ret) + return ret; + } +#endif + spi_set_drvdata(spi, &lis3_dev); + + return lis3lv02d_init_device(&lis3_dev); +} + +static int lis302dl_spi_remove(struct spi_device *spi) +{ + struct lis3lv02d *lis3 = spi_get_drvdata(spi); + lis3lv02d_joystick_disable(lis3); + lis3lv02d_poweroff(lis3); + + return lis3lv02d_remove_fs(&lis3_dev); +} + +#ifdef CONFIG_PM_SLEEP +static int lis3lv02d_spi_suspend(struct device *dev) +{ + struct spi_device *spi = to_spi_device(dev); + struct lis3lv02d *lis3 = spi_get_drvdata(spi); + + if (!lis3->pdata || !lis3->pdata->wakeup_flags) + lis3lv02d_poweroff(&lis3_dev); + + return 0; +} + +static int lis3lv02d_spi_resume(struct device *dev) +{ + struct spi_device *spi = to_spi_device(dev); + struct lis3lv02d *lis3 = spi_get_drvdata(spi); + + if (!lis3->pdata || !lis3->pdata->wakeup_flags) + lis3lv02d_poweron(lis3); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(lis3lv02d_spi_pm, lis3lv02d_spi_suspend, + lis3lv02d_spi_resume); + +static struct spi_driver lis302dl_spi_driver = { + .driver = { + .name = DRV_NAME, + .pm = &lis3lv02d_spi_pm, + .of_match_table = of_match_ptr(lis302dl_spi_dt_ids), + }, + .probe = lis302dl_spi_probe, + .remove = lis302dl_spi_remove, +}; + +module_spi_driver(lis302dl_spi_driver); + +MODULE_AUTHOR("Daniel Mack "); +MODULE_DESCRIPTION("lis3lv02d SPI glue layer"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("spi:" DRV_NAME); diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile new file mode 100644 index 000000000..b7ceb5f22 --- /dev/null +++ b/drivers/misc/lkdtm/Makefile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_LKDTM) += lkdtm.o + +lkdtm-$(CONFIG_LKDTM) += core.o +lkdtm-$(CONFIG_LKDTM) += bugs.o +lkdtm-$(CONFIG_LKDTM) += heap.o +lkdtm-$(CONFIG_LKDTM) += perms.o +lkdtm-$(CONFIG_LKDTM) += refcount.o +lkdtm-$(CONFIG_LKDTM) += rodata_objcopy.o +lkdtm-$(CONFIG_LKDTM) += usercopy.o + +KCOV_INSTRUMENT_rodata.o := n + +OBJCOPYFLAGS := +OBJCOPYFLAGS_rodata_objcopy.o := \ + --rename-section .noinstr.text=.rodata,alloc,readonly,load,contents +targets += rodata.o rodata_objcopy.o +$(obj)/rodata_objcopy.o: $(obj)/rodata.o FORCE + $(call if_changed,objcopy) diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c new file mode 100644 index 000000000..7eebbdfbc --- /dev/null +++ b/drivers/misc/lkdtm/bugs.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This is for all the tests related to logic bugs (e.g. bad dereferences, + * bad alignment, bad loops, bad locking, bad scheduling, deep stacks, and + * lockups) along with other things that don't fit well into existing LKDTM + * test source files. + */ +#include "lkdtm.h" +#include +#include +#include +#include +#include + +struct lkdtm_list { + struct list_head node; +}; + +/* + * Make sure our attempts to over run the kernel stack doesn't trigger + * a compiler warning when CONFIG_FRAME_WARN is set. Then make sure we + * recurse past the end of THREAD_SIZE by default. + */ +#if defined(CONFIG_FRAME_WARN) && (CONFIG_FRAME_WARN > 0) +#define REC_STACK_SIZE (CONFIG_FRAME_WARN / 2) +#else +#define REC_STACK_SIZE (THREAD_SIZE / 8) +#endif +#define REC_NUM_DEFAULT ((THREAD_SIZE / REC_STACK_SIZE) * 2) + +static int recur_count = REC_NUM_DEFAULT; + +static DEFINE_SPINLOCK(lock_me_up); + +static int recursive_loop(int remaining) +{ + char buf[REC_STACK_SIZE]; + + /* Make sure compiler does not optimize this away. */ + memset(buf, (remaining & 0xff) | 0x1, REC_STACK_SIZE); + if (!remaining) + return 0; + else + return recursive_loop(remaining - 1); +} + +/* If the depth is negative, use the default, otherwise keep parameter. */ +void __init lkdtm_bugs_init(int *recur_param) +{ + if (*recur_param < 0) + *recur_param = recur_count; + else + recur_count = *recur_param; +} + +void lkdtm_PANIC(void) +{ + panic("dumptest"); +} + +void lkdtm_BUG(void) +{ + BUG(); +} + +static int warn_counter; + +void lkdtm_WARNING(void) +{ + WARN(1, "Warning message trigger count: %d\n", warn_counter++); +} + +void lkdtm_EXCEPTION(void) +{ + *((volatile int *) 0) = 0; +} + +void lkdtm_LOOP(void) +{ + for (;;) + ; +} + +void lkdtm_OVERFLOW(void) +{ + (void) recursive_loop(recur_count); +} + +static noinline void __lkdtm_CORRUPT_STACK(void *stack) +{ + memset(stack, '\xff', 64); +} + +/* This should trip the stack canary, not corrupt the return address. */ +noinline void lkdtm_CORRUPT_STACK(void) +{ + /* Use default char array length that triggers stack protection. */ + char data[8] __aligned(sizeof(void *)); + + __lkdtm_CORRUPT_STACK(&data); + + pr_info("Corrupted stack containing char array ...\n"); +} + +/* Same as above but will only get a canary with -fstack-protector-strong */ +noinline void lkdtm_CORRUPT_STACK_STRONG(void) +{ + union { + unsigned short shorts[4]; + unsigned long *ptr; + } data __aligned(sizeof(void *)); + + __lkdtm_CORRUPT_STACK(&data); + + pr_info("Corrupted stack containing union ...\n"); +} + +void lkdtm_UNALIGNED_LOAD_STORE_WRITE(void) +{ + static u8 data[5] __attribute__((aligned(4))) = {1, 2, 3, 4, 5}; + u32 *p; + u32 val = 0x12345678; + + p = (u32 *)(data + 1); + if (*p == 0) + val = 0x87654321; + *p = val; +} + +void lkdtm_SOFTLOCKUP(void) +{ + preempt_disable(); + for (;;) + cpu_relax(); +} + +void lkdtm_HARDLOCKUP(void) +{ + local_irq_disable(); + for (;;) + cpu_relax(); +} + +void lkdtm_SPINLOCKUP(void) +{ + /* Must be called twice to trigger. */ + spin_lock(&lock_me_up); + /* Let sparse know we intended to exit holding the lock. */ + __release(&lock_me_up); +} + +void lkdtm_HUNG_TASK(void) +{ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); +} + +void lkdtm_CORRUPT_LIST_ADD(void) +{ + /* + * Initially, an empty list via LIST_HEAD: + * test_head.next = &test_head + * test_head.prev = &test_head + */ + LIST_HEAD(test_head); + struct lkdtm_list good, bad; + void *target[2] = { }; + void *redirection = ⌖ + + pr_info("attempting good list addition\n"); + + /* + * Adding to the list performs these actions: + * test_head.next->prev = &good.node + * good.node.next = test_head.next + * good.node.prev = test_head + * test_head.next = good.node + */ + list_add(&good.node, &test_head); + + pr_info("attempting corrupted list addition\n"); + /* + * In simulating this "write what where" primitive, the "what" is + * the address of &bad.node, and the "where" is the address held + * by "redirection". + */ + test_head.next = redirection; + list_add(&bad.node, &test_head); + + if (target[0] == NULL && target[1] == NULL) + pr_err("Overwrite did not happen, but no BUG?!\n"); + else + pr_err("list_add() corruption not detected!\n"); +} + +void lkdtm_CORRUPT_LIST_DEL(void) +{ + LIST_HEAD(test_head); + struct lkdtm_list item; + void *target[2] = { }; + void *redirection = ⌖ + + list_add(&item.node, &test_head); + + pr_info("attempting good list removal\n"); + list_del(&item.node); + + pr_info("attempting corrupted list removal\n"); + list_add(&item.node, &test_head); + + /* As with the list_add() test above, this corrupts "next". */ + item.node.next = redirection; + list_del(&item.node); + + if (target[0] == NULL && target[1] == NULL) + pr_err("Overwrite did not happen, but no BUG?!\n"); + else + pr_err("list_del() corruption not detected!\n"); +} + +/* Test if unbalanced set_fs(KERNEL_DS)/set_fs(USER_DS) check exists. */ +void lkdtm_CORRUPT_USER_DS(void) +{ + pr_info("setting bad task size limit\n"); + set_fs(KERNEL_DS); + + /* Make sure we do not keep running with a KERNEL_DS! */ + force_sig(SIGKILL, current); +} + +/* Test that VMAP_STACK is actually allocating with a leading guard page */ +void lkdtm_STACK_GUARD_PAGE_LEADING(void) +{ + const unsigned char *stack = task_stack_page(current); + const unsigned char *ptr = stack - 1; + volatile unsigned char byte; + + pr_info("attempting bad read from page below current stack\n"); + + byte = *ptr; + + pr_err("FAIL: accessed page before stack!\n"); +} + +/* Test that VMAP_STACK is actually allocating with a trailing guard page */ +void lkdtm_STACK_GUARD_PAGE_TRAILING(void) +{ + const unsigned char *stack = task_stack_page(current); + const unsigned char *ptr = stack + THREAD_SIZE; + volatile unsigned char byte; + + pr_info("attempting bad read from page above current stack\n"); + + byte = *ptr; + + pr_err("FAIL: accessed page after stack!\n"); +} diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c new file mode 100644 index 000000000..07caaa2cf --- /dev/null +++ b/drivers/misc/lkdtm/core.c @@ -0,0 +1,507 @@ +/* + * Linux Kernel Dump Test Module for testing kernel crashes conditions: + * induces system failures at predefined crashpoints and under predefined + * operational conditions in order to evaluate the reliability of kernel + * sanity checking and crash dumps obtained using different dumping + * solutions. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2006 + * + * Author: Ankita Garg + * + * It is adapted from the Linux Kernel Dump Test Tool by + * Fernando Luis Vazquez Cao + * + * Debugfs support added by Simon Kagstrom + * + * See Documentation/fault-injection/provoke-crashes.txt for instructions + */ +#include "lkdtm.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_IDE +#include +#endif + +#define DEFAULT_COUNT 10 + +static int lkdtm_debugfs_open(struct inode *inode, struct file *file); +static ssize_t lkdtm_debugfs_read(struct file *f, char __user *user_buf, + size_t count, loff_t *off); +static ssize_t direct_entry(struct file *f, const char __user *user_buf, + size_t count, loff_t *off); + +#ifdef CONFIG_KPROBES +static int lkdtm_kprobe_handler(struct kprobe *kp, struct pt_regs *regs); +static ssize_t lkdtm_debugfs_entry(struct file *f, + const char __user *user_buf, + size_t count, loff_t *off); +# define CRASHPOINT_KPROBE(_symbol) \ + .kprobe = { \ + .symbol_name = (_symbol), \ + .pre_handler = lkdtm_kprobe_handler, \ + }, +# define CRASHPOINT_WRITE(_symbol) \ + (_symbol) ? lkdtm_debugfs_entry : direct_entry +#else +# define CRASHPOINT_KPROBE(_symbol) +# define CRASHPOINT_WRITE(_symbol) direct_entry +#endif + +/* Crash points */ +struct crashpoint { + const char *name; + const struct file_operations fops; + struct kprobe kprobe; +}; + +#define CRASHPOINT(_name, _symbol) \ + { \ + .name = _name, \ + .fops = { \ + .read = lkdtm_debugfs_read, \ + .llseek = generic_file_llseek, \ + .open = lkdtm_debugfs_open, \ + .write = CRASHPOINT_WRITE(_symbol) \ + }, \ + CRASHPOINT_KPROBE(_symbol) \ + } + +/* Define the possible places where we can trigger a crash point. */ +static struct crashpoint crashpoints[] = { + CRASHPOINT("DIRECT", NULL), +#ifdef CONFIG_KPROBES + CRASHPOINT("INT_HARDWARE_ENTRY", "do_IRQ"), + CRASHPOINT("INT_HW_IRQ_EN", "handle_irq_event"), + CRASHPOINT("INT_TASKLET_ENTRY", "tasklet_action"), + CRASHPOINT("FS_DEVRW", "ll_rw_block"), + CRASHPOINT("MEM_SWAPOUT", "shrink_inactive_list"), + CRASHPOINT("TIMERADD", "hrtimer_start"), + CRASHPOINT("SCSI_DISPATCH_CMD", "scsi_dispatch_cmd"), +# ifdef CONFIG_IDE + CRASHPOINT("IDE_CORE_CP", "generic_ide_ioctl"), +# endif +#endif +}; + + +/* Crash types. */ +struct crashtype { + const char *name; + void (*func)(void); +}; + +#define CRASHTYPE(_name) \ + { \ + .name = __stringify(_name), \ + .func = lkdtm_ ## _name, \ + } + +/* Define the possible types of crashes that can be triggered. */ +static const struct crashtype crashtypes[] = { + CRASHTYPE(PANIC), + CRASHTYPE(BUG), + CRASHTYPE(WARNING), + CRASHTYPE(EXCEPTION), + CRASHTYPE(LOOP), + CRASHTYPE(OVERFLOW), + CRASHTYPE(CORRUPT_LIST_ADD), + CRASHTYPE(CORRUPT_LIST_DEL), + CRASHTYPE(CORRUPT_USER_DS), + CRASHTYPE(CORRUPT_STACK), + CRASHTYPE(CORRUPT_STACK_STRONG), + CRASHTYPE(STACK_GUARD_PAGE_LEADING), + CRASHTYPE(STACK_GUARD_PAGE_TRAILING), + CRASHTYPE(UNALIGNED_LOAD_STORE_WRITE), + CRASHTYPE(OVERWRITE_ALLOCATION), + CRASHTYPE(WRITE_AFTER_FREE), + CRASHTYPE(READ_AFTER_FREE), + CRASHTYPE(WRITE_BUDDY_AFTER_FREE), + CRASHTYPE(READ_BUDDY_AFTER_FREE), + CRASHTYPE(SOFTLOCKUP), + CRASHTYPE(HARDLOCKUP), + CRASHTYPE(SPINLOCKUP), + CRASHTYPE(HUNG_TASK), + CRASHTYPE(EXEC_DATA), + CRASHTYPE(EXEC_STACK), + CRASHTYPE(EXEC_KMALLOC), + CRASHTYPE(EXEC_VMALLOC), + CRASHTYPE(EXEC_RODATA), + CRASHTYPE(EXEC_USERSPACE), + CRASHTYPE(EXEC_NULL), + CRASHTYPE(ACCESS_USERSPACE), + CRASHTYPE(ACCESS_NULL), + CRASHTYPE(WRITE_RO), + CRASHTYPE(WRITE_RO_AFTER_INIT), + CRASHTYPE(WRITE_KERN), + CRASHTYPE(REFCOUNT_INC_OVERFLOW), + CRASHTYPE(REFCOUNT_ADD_OVERFLOW), + CRASHTYPE(REFCOUNT_INC_NOT_ZERO_OVERFLOW), + CRASHTYPE(REFCOUNT_ADD_NOT_ZERO_OVERFLOW), + CRASHTYPE(REFCOUNT_DEC_ZERO), + CRASHTYPE(REFCOUNT_DEC_NEGATIVE), + CRASHTYPE(REFCOUNT_DEC_AND_TEST_NEGATIVE), + CRASHTYPE(REFCOUNT_SUB_AND_TEST_NEGATIVE), + CRASHTYPE(REFCOUNT_INC_ZERO), + CRASHTYPE(REFCOUNT_ADD_ZERO), + CRASHTYPE(REFCOUNT_INC_SATURATED), + CRASHTYPE(REFCOUNT_DEC_SATURATED), + CRASHTYPE(REFCOUNT_ADD_SATURATED), + CRASHTYPE(REFCOUNT_INC_NOT_ZERO_SATURATED), + CRASHTYPE(REFCOUNT_ADD_NOT_ZERO_SATURATED), + CRASHTYPE(REFCOUNT_DEC_AND_TEST_SATURATED), + CRASHTYPE(REFCOUNT_SUB_AND_TEST_SATURATED), + CRASHTYPE(REFCOUNT_TIMING), + CRASHTYPE(ATOMIC_TIMING), + CRASHTYPE(USERCOPY_HEAP_SIZE_TO), + CRASHTYPE(USERCOPY_HEAP_SIZE_FROM), + CRASHTYPE(USERCOPY_HEAP_WHITELIST_TO), + CRASHTYPE(USERCOPY_HEAP_WHITELIST_FROM), + CRASHTYPE(USERCOPY_STACK_FRAME_TO), + CRASHTYPE(USERCOPY_STACK_FRAME_FROM), + CRASHTYPE(USERCOPY_STACK_BEYOND), + CRASHTYPE(USERCOPY_KERNEL), +}; + + +/* Global kprobe entry and crashtype. */ +static struct kprobe *lkdtm_kprobe; +static struct crashpoint *lkdtm_crashpoint; +static const struct crashtype *lkdtm_crashtype; + +/* Module parameters */ +static int recur_count = -1; +module_param(recur_count, int, 0644); +MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test"); + +static char* cpoint_name; +module_param(cpoint_name, charp, 0444); +MODULE_PARM_DESC(cpoint_name, " Crash Point, where kernel is to be crashed"); + +static char* cpoint_type; +module_param(cpoint_type, charp, 0444); +MODULE_PARM_DESC(cpoint_type, " Crash Point Type, action to be taken on "\ + "hitting the crash point"); + +static int cpoint_count = DEFAULT_COUNT; +module_param(cpoint_count, int, 0644); +MODULE_PARM_DESC(cpoint_count, " Crash Point Count, number of times the "\ + "crash point is to be hit to trigger action"); + + +/* Return the crashtype number or NULL if the name is invalid */ +static const struct crashtype *find_crashtype(const char *name) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(crashtypes); i++) { + if (!strcmp(name, crashtypes[i].name)) + return &crashtypes[i]; + } + + return NULL; +} + +/* + * This is forced noinline just so it distinctly shows up in the stackdump + * which makes validation of expected lkdtm crashes easier. + */ +static noinline void lkdtm_do_action(const struct crashtype *crashtype) +{ + if (WARN_ON(!crashtype || !crashtype->func)) + return; + crashtype->func(); +} + +static int lkdtm_register_cpoint(struct crashpoint *crashpoint, + const struct crashtype *crashtype) +{ + int ret; + + /* If this doesn't have a symbol, just call immediately. */ + if (!crashpoint->kprobe.symbol_name) { + lkdtm_do_action(crashtype); + return 0; + } + + if (lkdtm_kprobe != NULL) + unregister_kprobe(lkdtm_kprobe); + + lkdtm_crashpoint = crashpoint; + lkdtm_crashtype = crashtype; + lkdtm_kprobe = &crashpoint->kprobe; + ret = register_kprobe(lkdtm_kprobe); + if (ret < 0) { + pr_info("Couldn't register kprobe %s\n", + crashpoint->kprobe.symbol_name); + lkdtm_kprobe = NULL; + lkdtm_crashpoint = NULL; + lkdtm_crashtype = NULL; + } + + return ret; +} + +#ifdef CONFIG_KPROBES +/* Global crash counter and spinlock. */ +static int crash_count = DEFAULT_COUNT; +static DEFINE_SPINLOCK(crash_count_lock); + +/* Called by kprobe entry points. */ +static int lkdtm_kprobe_handler(struct kprobe *kp, struct pt_regs *regs) +{ + unsigned long flags; + bool do_it = false; + + if (WARN_ON(!lkdtm_crashpoint || !lkdtm_crashtype)) + return 0; + + spin_lock_irqsave(&crash_count_lock, flags); + crash_count--; + pr_info("Crash point %s of type %s hit, trigger in %d rounds\n", + lkdtm_crashpoint->name, lkdtm_crashtype->name, crash_count); + + if (crash_count == 0) { + do_it = true; + crash_count = cpoint_count; + } + spin_unlock_irqrestore(&crash_count_lock, flags); + + if (do_it) + lkdtm_do_action(lkdtm_crashtype); + + return 0; +} + +static ssize_t lkdtm_debugfs_entry(struct file *f, + const char __user *user_buf, + size_t count, loff_t *off) +{ + struct crashpoint *crashpoint = file_inode(f)->i_private; + const struct crashtype *crashtype = NULL; + char *buf; + int err; + + if (count >= PAGE_SIZE) + return -EINVAL; + + buf = (char *)__get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + if (copy_from_user(buf, user_buf, count)) { + free_page((unsigned long) buf); + return -EFAULT; + } + /* NULL-terminate and remove enter */ + buf[count] = '\0'; + strim(buf); + + crashtype = find_crashtype(buf); + free_page((unsigned long)buf); + + if (!crashtype) + return -EINVAL; + + err = lkdtm_register_cpoint(crashpoint, crashtype); + if (err < 0) + return err; + + *off += count; + + return count; +} +#endif + +/* Generic read callback that just prints out the available crash types */ +static ssize_t lkdtm_debugfs_read(struct file *f, char __user *user_buf, + size_t count, loff_t *off) +{ + char *buf; + int i, n, out; + + buf = (char *)__get_free_page(GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + + n = snprintf(buf, PAGE_SIZE, "Available crash types:\n"); + for (i = 0; i < ARRAY_SIZE(crashtypes); i++) { + n += snprintf(buf + n, PAGE_SIZE - n, "%s\n", + crashtypes[i].name); + } + buf[n] = '\0'; + + out = simple_read_from_buffer(user_buf, count, off, + buf, n); + free_page((unsigned long) buf); + + return out; +} + +static int lkdtm_debugfs_open(struct inode *inode, struct file *file) +{ + return 0; +} + +/* Special entry to just crash directly. Available without KPROBEs */ +static ssize_t direct_entry(struct file *f, const char __user *user_buf, + size_t count, loff_t *off) +{ + const struct crashtype *crashtype; + char *buf; + + if (count >= PAGE_SIZE) + return -EINVAL; + if (count < 1) + return -EINVAL; + + buf = (char *)__get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + if (copy_from_user(buf, user_buf, count)) { + free_page((unsigned long) buf); + return -EFAULT; + } + /* NULL-terminate and remove enter */ + buf[count] = '\0'; + strim(buf); + + crashtype = find_crashtype(buf); + free_page((unsigned long) buf); + if (!crashtype) + return -EINVAL; + + pr_info("Performing direct entry %s\n", crashtype->name); + lkdtm_do_action(crashtype); + *off += count; + + return count; +} + +static struct dentry *lkdtm_debugfs_root; + +static int __init lkdtm_module_init(void) +{ + struct crashpoint *crashpoint = NULL; + const struct crashtype *crashtype = NULL; + int ret = -EINVAL; + int i; + + /* Neither or both of these need to be set */ + if ((cpoint_type || cpoint_name) && !(cpoint_type && cpoint_name)) { + pr_err("Need both cpoint_type and cpoint_name or neither\n"); + return -EINVAL; + } + + if (cpoint_type) { + crashtype = find_crashtype(cpoint_type); + if (!crashtype) { + pr_err("Unknown crashtype '%s'\n", cpoint_type); + return -EINVAL; + } + } + + if (cpoint_name) { + for (i = 0; i < ARRAY_SIZE(crashpoints); i++) { + if (!strcmp(cpoint_name, crashpoints[i].name)) + crashpoint = &crashpoints[i]; + } + + /* Refuse unknown crashpoints. */ + if (!crashpoint) { + pr_err("Invalid crashpoint %s\n", cpoint_name); + return -EINVAL; + } + } + +#ifdef CONFIG_KPROBES + /* Set crash count. */ + crash_count = cpoint_count; +#endif + + /* Handle test-specific initialization. */ + lkdtm_bugs_init(&recur_count); + lkdtm_perms_init(); + lkdtm_usercopy_init(); + + /* Register debugfs interface */ + lkdtm_debugfs_root = debugfs_create_dir("provoke-crash", NULL); + if (!lkdtm_debugfs_root) { + pr_err("creating root dir failed\n"); + return -ENODEV; + } + + /* Install debugfs trigger files. */ + for (i = 0; i < ARRAY_SIZE(crashpoints); i++) { + struct crashpoint *cur = &crashpoints[i]; + struct dentry *de; + + de = debugfs_create_file(cur->name, 0644, lkdtm_debugfs_root, + cur, &cur->fops); + if (de == NULL) { + pr_err("could not create crashpoint %s\n", cur->name); + goto out_err; + } + } + + /* Install crashpoint if one was selected. */ + if (crashpoint) { + ret = lkdtm_register_cpoint(crashpoint, crashtype); + if (ret < 0) { + pr_info("Invalid crashpoint %s\n", crashpoint->name); + goto out_err; + } + pr_info("Crash point %s of type %s registered\n", + crashpoint->name, cpoint_type); + } else { + pr_info("No crash points registered, enable through debugfs\n"); + } + + return 0; + +out_err: + debugfs_remove_recursive(lkdtm_debugfs_root); + return ret; +} + +static void __exit lkdtm_module_exit(void) +{ + debugfs_remove_recursive(lkdtm_debugfs_root); + + /* Handle test-specific clean-up. */ + lkdtm_usercopy_exit(); + + if (lkdtm_kprobe != NULL) + unregister_kprobe(lkdtm_kprobe); + + pr_info("Crash point unregistered\n"); +} + +module_init(lkdtm_module_init); +module_exit(lkdtm_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Kernel crash testing module"); diff --git a/drivers/misc/lkdtm/heap.c b/drivers/misc/lkdtm/heap.c new file mode 100644 index 000000000..65026d7de --- /dev/null +++ b/drivers/misc/lkdtm/heap.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This is for all the tests relating directly to heap memory, including + * page allocation and slab allocations. + */ +#include "lkdtm.h" +#include +#include + +/* + * This tries to stay within the next largest power-of-2 kmalloc cache + * to avoid actually overwriting anything important if it's not detected + * correctly. + */ +void lkdtm_OVERWRITE_ALLOCATION(void) +{ + size_t len = 1020; + u32 *data = kmalloc(len, GFP_KERNEL); + if (!data) + return; + + data[1024 / sizeof(u32)] = 0x12345678; + kfree(data); +} + +void lkdtm_WRITE_AFTER_FREE(void) +{ + int *base, *again; + size_t len = 1024; + /* + * The slub allocator uses the first word to store the free + * pointer in some configurations. Use the middle of the + * allocation to avoid running into the freelist + */ + size_t offset = (len / sizeof(*base)) / 2; + + base = kmalloc(len, GFP_KERNEL); + if (!base) + return; + pr_info("Allocated memory %p-%p\n", base, &base[offset * 2]); + pr_info("Attempting bad write to freed memory at %p\n", + &base[offset]); + kfree(base); + base[offset] = 0x0abcdef0; + /* Attempt to notice the overwrite. */ + again = kmalloc(len, GFP_KERNEL); + kfree(again); + if (again != base) + pr_info("Hmm, didn't get the same memory range.\n"); +} + +void lkdtm_READ_AFTER_FREE(void) +{ + int *base, *val, saw; + size_t len = 1024; + /* + * The slub allocator uses the first word to store the free + * pointer in some configurations. Use the middle of the + * allocation to avoid running into the freelist + */ + size_t offset = (len / sizeof(*base)) / 2; + + base = kmalloc(len, GFP_KERNEL); + if (!base) { + pr_info("Unable to allocate base memory.\n"); + return; + } + + val = kmalloc(len, GFP_KERNEL); + if (!val) { + pr_info("Unable to allocate val memory.\n"); + kfree(base); + return; + } + + *val = 0x12345678; + base[offset] = *val; + pr_info("Value in memory before free: %x\n", base[offset]); + + kfree(base); + + pr_info("Attempting bad read from freed memory\n"); + saw = base[offset]; + if (saw != *val) { + /* Good! Poisoning happened, so declare a win. */ + pr_info("Memory correctly poisoned (%x)\n", saw); + BUG(); + } + pr_info("Memory was not poisoned\n"); + + kfree(val); +} + +void lkdtm_WRITE_BUDDY_AFTER_FREE(void) +{ + unsigned long p = __get_free_page(GFP_KERNEL); + if (!p) { + pr_info("Unable to allocate free page\n"); + return; + } + + pr_info("Writing to the buddy page before free\n"); + memset((void *)p, 0x3, PAGE_SIZE); + free_page(p); + schedule(); + pr_info("Attempting bad write to the buddy page after free\n"); + memset((void *)p, 0x78, PAGE_SIZE); + /* Attempt to notice the overwrite. */ + p = __get_free_page(GFP_KERNEL); + free_page(p); + schedule(); +} + +void lkdtm_READ_BUDDY_AFTER_FREE(void) +{ + unsigned long p = __get_free_page(GFP_KERNEL); + int saw, *val; + int *base; + + if (!p) { + pr_info("Unable to allocate free page\n"); + return; + } + + val = kmalloc(1024, GFP_KERNEL); + if (!val) { + pr_info("Unable to allocate val memory.\n"); + free_page(p); + return; + } + + base = (int *)p; + + *val = 0x12345678; + base[0] = *val; + pr_info("Value in memory before free: %x\n", base[0]); + free_page(p); + pr_info("Attempting to read from freed memory\n"); + saw = base[0]; + if (saw != *val) { + /* Good! Poisoning happened, so declare a win. */ + pr_info("Memory correctly poisoned (%x)\n", saw); + BUG(); + } + pr_info("Buddy page was not poisoned\n"); + + kfree(val); +} diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h new file mode 100644 index 000000000..8c3f2e6af --- /dev/null +++ b/drivers/misc/lkdtm/lkdtm.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LKDTM_H +#define __LKDTM_H + +#define pr_fmt(fmt) "lkdtm: " fmt + +#include + +/* lkdtm_bugs.c */ +void __init lkdtm_bugs_init(int *recur_param); +void lkdtm_PANIC(void); +void lkdtm_BUG(void); +void lkdtm_WARNING(void); +void lkdtm_EXCEPTION(void); +void lkdtm_LOOP(void); +void lkdtm_OVERFLOW(void); +void lkdtm_CORRUPT_STACK(void); +void lkdtm_CORRUPT_STACK_STRONG(void); +void lkdtm_UNALIGNED_LOAD_STORE_WRITE(void); +void lkdtm_SOFTLOCKUP(void); +void lkdtm_HARDLOCKUP(void); +void lkdtm_SPINLOCKUP(void); +void lkdtm_HUNG_TASK(void); +void lkdtm_CORRUPT_LIST_ADD(void); +void lkdtm_CORRUPT_LIST_DEL(void); +void lkdtm_CORRUPT_USER_DS(void); +void lkdtm_STACK_GUARD_PAGE_LEADING(void); +void lkdtm_STACK_GUARD_PAGE_TRAILING(void); + +/* lkdtm_heap.c */ +void lkdtm_OVERWRITE_ALLOCATION(void); +void lkdtm_WRITE_AFTER_FREE(void); +void lkdtm_READ_AFTER_FREE(void); +void lkdtm_WRITE_BUDDY_AFTER_FREE(void); +void lkdtm_READ_BUDDY_AFTER_FREE(void); + +/* lkdtm_perms.c */ +void __init lkdtm_perms_init(void); +void lkdtm_WRITE_RO(void); +void lkdtm_WRITE_RO_AFTER_INIT(void); +void lkdtm_WRITE_KERN(void); +void lkdtm_EXEC_DATA(void); +void lkdtm_EXEC_STACK(void); +void lkdtm_EXEC_KMALLOC(void); +void lkdtm_EXEC_VMALLOC(void); +void lkdtm_EXEC_RODATA(void); +void lkdtm_EXEC_USERSPACE(void); +void lkdtm_EXEC_NULL(void); +void lkdtm_ACCESS_USERSPACE(void); +void lkdtm_ACCESS_NULL(void); + +/* lkdtm_refcount.c */ +void lkdtm_REFCOUNT_INC_OVERFLOW(void); +void lkdtm_REFCOUNT_ADD_OVERFLOW(void); +void lkdtm_REFCOUNT_INC_NOT_ZERO_OVERFLOW(void); +void lkdtm_REFCOUNT_ADD_NOT_ZERO_OVERFLOW(void); +void lkdtm_REFCOUNT_DEC_ZERO(void); +void lkdtm_REFCOUNT_DEC_NEGATIVE(void); +void lkdtm_REFCOUNT_DEC_AND_TEST_NEGATIVE(void); +void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void); +void lkdtm_REFCOUNT_INC_ZERO(void); +void lkdtm_REFCOUNT_ADD_ZERO(void); +void lkdtm_REFCOUNT_INC_SATURATED(void); +void lkdtm_REFCOUNT_DEC_SATURATED(void); +void lkdtm_REFCOUNT_ADD_SATURATED(void); +void lkdtm_REFCOUNT_INC_NOT_ZERO_SATURATED(void); +void lkdtm_REFCOUNT_ADD_NOT_ZERO_SATURATED(void); +void lkdtm_REFCOUNT_DEC_AND_TEST_SATURATED(void); +void lkdtm_REFCOUNT_SUB_AND_TEST_SATURATED(void); +void lkdtm_REFCOUNT_TIMING(void); +void lkdtm_ATOMIC_TIMING(void); + +/* lkdtm_rodata.c */ +void lkdtm_rodata_do_nothing(void); + +/* lkdtm_usercopy.c */ +void __init lkdtm_usercopy_init(void); +void __exit lkdtm_usercopy_exit(void); +void lkdtm_USERCOPY_HEAP_SIZE_TO(void); +void lkdtm_USERCOPY_HEAP_SIZE_FROM(void); +void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void); +void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void); +void lkdtm_USERCOPY_STACK_FRAME_TO(void); +void lkdtm_USERCOPY_STACK_FRAME_FROM(void); +void lkdtm_USERCOPY_STACK_BEYOND(void); +void lkdtm_USERCOPY_KERNEL(void); + +#endif diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c new file mode 100644 index 000000000..62f76d506 --- /dev/null +++ b/drivers/misc/lkdtm/perms.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This is for all the tests related to validating kernel memory + * permissions: non-executable regions, non-writable regions, and + * even non-readable regions. + */ +#include "lkdtm.h" +#include +#include +#include +#include +#include + +/* Whether or not to fill the target memory area with do_nothing(). */ +#define CODE_WRITE true +#define CODE_AS_IS false + +/* How many bytes to copy to be sure we've copied enough of do_nothing(). */ +#define EXEC_SIZE 64 + +/* This is non-const, so it will end up in the .data section. */ +static u8 data_area[EXEC_SIZE]; + +/* This is cost, so it will end up in the .rodata section. */ +static const unsigned long rodata = 0xAA55AA55; + +/* This is marked __ro_after_init, so it should ultimately be .rodata. */ +static unsigned long ro_after_init __ro_after_init = 0x55AA5500; + +/* + * This just returns to the caller. It is designed to be copied into + * non-executable memory regions. + */ +static void do_nothing(void) +{ + return; +} + +/* Must immediately follow do_nothing for size calculuations to work out. */ +static void do_overwritten(void) +{ + pr_info("do_overwritten wasn't overwritten!\n"); + return; +} + +static noinline void execute_location(void *dst, bool write) +{ + void (*func)(void) = dst; + + pr_info("attempting ok execution at %px\n", do_nothing); + do_nothing(); + + if (write == CODE_WRITE) { + memcpy(dst, do_nothing, EXEC_SIZE); + flush_icache_range((unsigned long)dst, + (unsigned long)dst + EXEC_SIZE); + } + pr_info("attempting bad execution at %px\n", func); + func(); +} + +static void execute_user_location(void *dst) +{ + int copied; + + /* Intentionally crossing kernel/user memory boundary. */ + void (*func)(void) = dst; + + pr_info("attempting ok execution at %px\n", do_nothing); + do_nothing(); + + copied = access_process_vm(current, (unsigned long)dst, do_nothing, + EXEC_SIZE, FOLL_WRITE); + if (copied < EXEC_SIZE) + return; + pr_info("attempting bad execution at %px\n", func); + func(); +} + +void lkdtm_WRITE_RO(void) +{ + /* Explicitly cast away "const" for the test. */ + unsigned long *ptr = (unsigned long *)&rodata; + + pr_info("attempting bad rodata write at %px\n", ptr); + *ptr ^= 0xabcd1234; +} + +void lkdtm_WRITE_RO_AFTER_INIT(void) +{ + unsigned long *ptr = &ro_after_init; + + /* + * Verify we were written to during init. Since an Oops + * is considered a "success", a failure is to just skip the + * real test. + */ + if ((*ptr & 0xAA) != 0xAA) { + pr_info("%p was NOT written during init!?\n", ptr); + return; + } + + pr_info("attempting bad ro_after_init write at %px\n", ptr); + *ptr ^= 0xabcd1234; +} + +void lkdtm_WRITE_KERN(void) +{ + size_t size; + unsigned char *ptr; + + size = (unsigned long)do_overwritten - (unsigned long)do_nothing; + ptr = (unsigned char *)do_overwritten; + + pr_info("attempting bad %zu byte write at %px\n", size, ptr); + memcpy(ptr, (unsigned char *)do_nothing, size); + flush_icache_range((unsigned long)ptr, (unsigned long)(ptr + size)); + + do_overwritten(); +} + +void lkdtm_EXEC_DATA(void) +{ + execute_location(data_area, CODE_WRITE); +} + +void lkdtm_EXEC_STACK(void) +{ + u8 stack_area[EXEC_SIZE]; + execute_location(stack_area, CODE_WRITE); +} + +void lkdtm_EXEC_KMALLOC(void) +{ + u32 *kmalloc_area = kmalloc(EXEC_SIZE, GFP_KERNEL); + execute_location(kmalloc_area, CODE_WRITE); + kfree(kmalloc_area); +} + +void lkdtm_EXEC_VMALLOC(void) +{ + u32 *vmalloc_area = vmalloc(EXEC_SIZE); + execute_location(vmalloc_area, CODE_WRITE); + vfree(vmalloc_area); +} + +void lkdtm_EXEC_RODATA(void) +{ + execute_location(lkdtm_rodata_do_nothing, CODE_AS_IS); +} + +void lkdtm_EXEC_USERSPACE(void) +{ + unsigned long user_addr; + + user_addr = vm_mmap(NULL, 0, PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + if (user_addr >= TASK_SIZE) { + pr_warn("Failed to allocate user memory\n"); + return; + } + execute_user_location((void *)user_addr); + vm_munmap(user_addr, PAGE_SIZE); +} + +void lkdtm_EXEC_NULL(void) +{ + execute_location(NULL, CODE_AS_IS); +} + +void lkdtm_ACCESS_USERSPACE(void) +{ + unsigned long user_addr, tmp = 0; + unsigned long *ptr; + + user_addr = vm_mmap(NULL, 0, PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + if (user_addr >= TASK_SIZE) { + pr_warn("Failed to allocate user memory\n"); + return; + } + + if (copy_to_user((void __user *)user_addr, &tmp, sizeof(tmp))) { + pr_warn("copy_to_user failed\n"); + vm_munmap(user_addr, PAGE_SIZE); + return; + } + + ptr = (unsigned long *)user_addr; + + pr_info("attempting bad read at %px\n", ptr); + tmp = *ptr; + tmp += 0xc0dec0de; + + pr_info("attempting bad write at %px\n", ptr); + *ptr = tmp; + + vm_munmap(user_addr, PAGE_SIZE); +} + +void lkdtm_ACCESS_NULL(void) +{ + unsigned long tmp; + unsigned long *ptr = (unsigned long *)NULL; + + pr_info("attempting bad read at %px\n", ptr); + tmp = *ptr; + tmp += 0xc0dec0de; + + pr_info("attempting bad write at %px\n", ptr); + *ptr = tmp; +} + +void __init lkdtm_perms_init(void) +{ + /* Make sure we can write to __ro_after_init values during __init */ + ro_after_init |= 0xAA; + +} diff --git a/drivers/misc/lkdtm/refcount.c b/drivers/misc/lkdtm/refcount.c new file mode 100644 index 000000000..0a146b32d --- /dev/null +++ b/drivers/misc/lkdtm/refcount.c @@ -0,0 +1,401 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This is for all the tests related to refcount bugs (e.g. overflow, + * underflow, reaching zero untested, etc). + */ +#include "lkdtm.h" +#include + +#ifdef CONFIG_REFCOUNT_FULL +#define REFCOUNT_MAX (UINT_MAX - 1) +#define REFCOUNT_SATURATED UINT_MAX +#else +#define REFCOUNT_MAX INT_MAX +#define REFCOUNT_SATURATED (INT_MIN / 2) +#endif + +static void overflow_check(refcount_t *ref) +{ + switch (refcount_read(ref)) { + case REFCOUNT_SATURATED: + pr_info("Overflow detected: saturated\n"); + break; + case REFCOUNT_MAX: + pr_warn("Overflow detected: unsafely reset to max\n"); + break; + default: + pr_err("Fail: refcount wrapped to %d\n", refcount_read(ref)); + } +} + +/* + * A refcount_inc() above the maximum value of the refcount implementation, + * should at least saturate, and at most also WARN. + */ +void lkdtm_REFCOUNT_INC_OVERFLOW(void) +{ + refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX - 1); + + pr_info("attempting good refcount_inc() without overflow\n"); + refcount_dec(&over); + refcount_inc(&over); + + pr_info("attempting bad refcount_inc() overflow\n"); + refcount_inc(&over); + refcount_inc(&over); + + overflow_check(&over); +} + +/* refcount_add() should behave just like refcount_inc() above. */ +void lkdtm_REFCOUNT_ADD_OVERFLOW(void) +{ + refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX - 1); + + pr_info("attempting good refcount_add() without overflow\n"); + refcount_dec(&over); + refcount_dec(&over); + refcount_dec(&over); + refcount_dec(&over); + refcount_add(4, &over); + + pr_info("attempting bad refcount_add() overflow\n"); + refcount_add(4, &over); + + overflow_check(&over); +} + +/* refcount_inc_not_zero() should behave just like refcount_inc() above. */ +void lkdtm_REFCOUNT_INC_NOT_ZERO_OVERFLOW(void) +{ + refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX); + + pr_info("attempting bad refcount_inc_not_zero() overflow\n"); + if (!refcount_inc_not_zero(&over)) + pr_warn("Weird: refcount_inc_not_zero() reported zero\n"); + + overflow_check(&over); +} + +/* refcount_add_not_zero() should behave just like refcount_inc() above. */ +void lkdtm_REFCOUNT_ADD_NOT_ZERO_OVERFLOW(void) +{ + refcount_t over = REFCOUNT_INIT(REFCOUNT_MAX); + + pr_info("attempting bad refcount_add_not_zero() overflow\n"); + if (!refcount_add_not_zero(6, &over)) + pr_warn("Weird: refcount_add_not_zero() reported zero\n"); + + overflow_check(&over); +} + +static void check_zero(refcount_t *ref) +{ + switch (refcount_read(ref)) { + case REFCOUNT_SATURATED: + pr_info("Zero detected: saturated\n"); + break; + case REFCOUNT_MAX: + pr_warn("Zero detected: unsafely reset to max\n"); + break; + case 0: + pr_warn("Still at zero: refcount_inc/add() must not inc-from-0\n"); + break; + default: + pr_err("Fail: refcount went crazy: %d\n", refcount_read(ref)); + } +} + +/* + * A refcount_dec(), as opposed to a refcount_dec_and_test(), when it hits + * zero it should either saturate (when inc-from-zero isn't protected) + * or stay at zero (when inc-from-zero is protected) and should WARN for both. + */ +void lkdtm_REFCOUNT_DEC_ZERO(void) +{ + refcount_t zero = REFCOUNT_INIT(2); + + pr_info("attempting good refcount_dec()\n"); + refcount_dec(&zero); + + pr_info("attempting bad refcount_dec() to zero\n"); + refcount_dec(&zero); + + check_zero(&zero); +} + +static void check_negative(refcount_t *ref, int start) +{ + /* + * CONFIG_REFCOUNT_FULL refuses to move a refcount at all on an + * over-sub, so we have to track our starting position instead of + * looking only at zero-pinning. + */ + if (refcount_read(ref) == start) { + pr_warn("Still at %d: refcount_inc/add() must not inc-from-0\n", + start); + return; + } + + switch (refcount_read(ref)) { + case REFCOUNT_SATURATED: + pr_info("Negative detected: saturated\n"); + break; + case REFCOUNT_MAX: + pr_warn("Negative detected: unsafely reset to max\n"); + break; + default: + pr_err("Fail: refcount went crazy: %d\n", refcount_read(ref)); + } +} + +/* A refcount_dec() going negative should saturate and may WARN. */ +void lkdtm_REFCOUNT_DEC_NEGATIVE(void) +{ + refcount_t neg = REFCOUNT_INIT(0); + + pr_info("attempting bad refcount_dec() below zero\n"); + refcount_dec(&neg); + + check_negative(&neg, 0); +} + +/* + * A refcount_dec_and_test() should act like refcount_dec() above when + * going negative. + */ +void lkdtm_REFCOUNT_DEC_AND_TEST_NEGATIVE(void) +{ + refcount_t neg = REFCOUNT_INIT(0); + + pr_info("attempting bad refcount_dec_and_test() below zero\n"); + if (refcount_dec_and_test(&neg)) + pr_warn("Weird: refcount_dec_and_test() reported zero\n"); + + check_negative(&neg, 0); +} + +/* + * A refcount_sub_and_test() should act like refcount_dec_and_test() + * above when going negative. + */ +void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void) +{ + refcount_t neg = REFCOUNT_INIT(3); + + pr_info("attempting bad refcount_sub_and_test() below zero\n"); + if (refcount_sub_and_test(5, &neg)) + pr_warn("Weird: refcount_sub_and_test() reported zero\n"); + + check_negative(&neg, 3); +} + +static void check_from_zero(refcount_t *ref) +{ + switch (refcount_read(ref)) { + case 0: + pr_info("Zero detected: stayed at zero\n"); + break; + case REFCOUNT_SATURATED: + pr_info("Zero detected: saturated\n"); + break; + case REFCOUNT_MAX: + pr_warn("Zero detected: unsafely reset to max\n"); + break; + default: + pr_info("Fail: zero not detected, incremented to %d\n", + refcount_read(ref)); + } +} + +/* + * A refcount_inc() from zero should pin to zero or saturate and may WARN. + * Only CONFIG_REFCOUNT_FULL provides this protection currently. + */ +void lkdtm_REFCOUNT_INC_ZERO(void) +{ + refcount_t zero = REFCOUNT_INIT(0); + + pr_info("attempting safe refcount_inc_not_zero() from zero\n"); + if (!refcount_inc_not_zero(&zero)) { + pr_info("Good: zero detected\n"); + if (refcount_read(&zero) == 0) + pr_info("Correctly stayed at zero\n"); + else + pr_err("Fail: refcount went past zero!\n"); + } else { + pr_err("Fail: Zero not detected!?\n"); + } + + pr_info("attempting bad refcount_inc() from zero\n"); + refcount_inc(&zero); + + check_from_zero(&zero); +} + +/* + * A refcount_add() should act like refcount_inc() above when starting + * at zero. + */ +void lkdtm_REFCOUNT_ADD_ZERO(void) +{ + refcount_t zero = REFCOUNT_INIT(0); + + pr_info("attempting safe refcount_add_not_zero() from zero\n"); + if (!refcount_add_not_zero(3, &zero)) { + pr_info("Good: zero detected\n"); + if (refcount_read(&zero) == 0) + pr_info("Correctly stayed at zero\n"); + else + pr_err("Fail: refcount went past zero\n"); + } else { + pr_err("Fail: Zero not detected!?\n"); + } + + pr_info("attempting bad refcount_add() from zero\n"); + refcount_add(3, &zero); + + check_from_zero(&zero); +} + +static void check_saturated(refcount_t *ref) +{ + switch (refcount_read(ref)) { + case REFCOUNT_SATURATED: + pr_info("Saturation detected: still saturated\n"); + break; + case REFCOUNT_MAX: + pr_warn("Saturation detected: unsafely reset to max\n"); + break; + default: + pr_err("Fail: refcount went crazy: %d\n", refcount_read(ref)); + } +} + +/* + * A refcount_inc() from a saturated value should at most warn about + * being saturated already. + */ +void lkdtm_REFCOUNT_INC_SATURATED(void) +{ + refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED); + + pr_info("attempting bad refcount_inc() from saturated\n"); + refcount_inc(&sat); + + check_saturated(&sat); +} + +/* Should act like refcount_inc() above from saturated. */ +void lkdtm_REFCOUNT_DEC_SATURATED(void) +{ + refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED); + + pr_info("attempting bad refcount_dec() from saturated\n"); + refcount_dec(&sat); + + check_saturated(&sat); +} + +/* Should act like refcount_inc() above from saturated. */ +void lkdtm_REFCOUNT_ADD_SATURATED(void) +{ + refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED); + + pr_info("attempting bad refcount_dec() from saturated\n"); + refcount_add(8, &sat); + + check_saturated(&sat); +} + +/* Should act like refcount_inc() above from saturated. */ +void lkdtm_REFCOUNT_INC_NOT_ZERO_SATURATED(void) +{ + refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED); + + pr_info("attempting bad refcount_inc_not_zero() from saturated\n"); + if (!refcount_inc_not_zero(&sat)) + pr_warn("Weird: refcount_inc_not_zero() reported zero\n"); + + check_saturated(&sat); +} + +/* Should act like refcount_inc() above from saturated. */ +void lkdtm_REFCOUNT_ADD_NOT_ZERO_SATURATED(void) +{ + refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED); + + pr_info("attempting bad refcount_add_not_zero() from saturated\n"); + if (!refcount_add_not_zero(7, &sat)) + pr_warn("Weird: refcount_add_not_zero() reported zero\n"); + + check_saturated(&sat); +} + +/* Should act like refcount_inc() above from saturated. */ +void lkdtm_REFCOUNT_DEC_AND_TEST_SATURATED(void) +{ + refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED); + + pr_info("attempting bad refcount_dec_and_test() from saturated\n"); + if (refcount_dec_and_test(&sat)) + pr_warn("Weird: refcount_dec_and_test() reported zero\n"); + + check_saturated(&sat); +} + +/* Should act like refcount_inc() above from saturated. */ +void lkdtm_REFCOUNT_SUB_AND_TEST_SATURATED(void) +{ + refcount_t sat = REFCOUNT_INIT(REFCOUNT_SATURATED); + + pr_info("attempting bad refcount_sub_and_test() from saturated\n"); + if (refcount_sub_and_test(8, &sat)) + pr_warn("Weird: refcount_sub_and_test() reported zero\n"); + + check_saturated(&sat); +} + +/* Used to time the existing atomic_t when used for reference counting */ +void lkdtm_ATOMIC_TIMING(void) +{ + unsigned int i; + atomic_t count = ATOMIC_INIT(1); + + for (i = 0; i < INT_MAX - 1; i++) + atomic_inc(&count); + + for (i = INT_MAX; i > 0; i--) + if (atomic_dec_and_test(&count)) + break; + + if (i != 1) + pr_err("atomic timing: out of sync up/down cycle: %u\n", i - 1); + else + pr_info("atomic timing: done\n"); +} + +/* + * This can be compared to ATOMIC_TIMING when implementing fast refcount + * protections. Looking at the number of CPU cycles tells the real story + * about performance. For example: + * cd /sys/kernel/debug/provoke-crash + * perf stat -B -- cat <(echo REFCOUNT_TIMING) > DIRECT + */ +void lkdtm_REFCOUNT_TIMING(void) +{ + unsigned int i; + refcount_t count = REFCOUNT_INIT(1); + + for (i = 0; i < INT_MAX - 1; i++) + refcount_inc(&count); + + for (i = INT_MAX; i > 0; i--) + if (refcount_dec_and_test(&count)) + break; + + if (i != 1) + pr_err("refcount: out of sync up/down cycle: %u\n", i - 1); + else + pr_info("refcount timing: done\n"); +} diff --git a/drivers/misc/lkdtm/rodata.c b/drivers/misc/lkdtm/rodata.c new file mode 100644 index 000000000..baacb876d --- /dev/null +++ b/drivers/misc/lkdtm/rodata.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This includes functions that are meant to live entirely in .rodata + * (via objcopy tricks), to validate the non-executability of .rodata. + */ +#include "lkdtm.h" + +void noinstr lkdtm_rodata_do_nothing(void) +{ + /* Does nothing. We just want an architecture agnostic "return". */ +} diff --git a/drivers/misc/lkdtm/usercopy.c b/drivers/misc/lkdtm/usercopy.c new file mode 100644 index 000000000..b0e020372 --- /dev/null +++ b/drivers/misc/lkdtm/usercopy.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This is for all the tests related to copy_to_user() and copy_from_user() + * hardening. + */ +#include "lkdtm.h" +#include +#include +#include +#include +#include +#include + +/* + * Many of the tests here end up using const sizes, but those would + * normally be ignored by hardened usercopy, so force the compiler + * into choosing the non-const path to make sure we trigger the + * hardened usercopy checks by added "unconst" to all the const copies, + * and making sure "cache_size" isn't optimized into a const. + */ +static volatile size_t unconst = 0; +static volatile size_t cache_size = 1024; +static struct kmem_cache *whitelist_cache; + +static const unsigned char test_text[] = "This is a test.\n"; + +/* + * Instead of adding -Wno-return-local-addr, just pass the stack address + * through a function to obfuscate it from the compiler. + */ +static noinline unsigned char *trick_compiler(unsigned char *stack) +{ + return stack + unconst; +} + +static noinline unsigned char *do_usercopy_stack_callee(int value) +{ + unsigned char buf[128]; + int i; + + /* Exercise stack to avoid everything living in registers. */ + for (i = 0; i < sizeof(buf); i++) { + buf[i] = value & 0xff; + } + + /* + * Put the target buffer in the middle of stack allocation + * so that we don't step on future stack users regardless + * of stack growth direction. + */ + return trick_compiler(&buf[(128/2)-32]); +} + +static noinline void do_usercopy_stack(bool to_user, bool bad_frame) +{ + unsigned long user_addr; + unsigned char good_stack[32]; + unsigned char *bad_stack; + int i; + + /* Exercise stack to avoid everything living in registers. */ + for (i = 0; i < sizeof(good_stack); i++) + good_stack[i] = test_text[i % sizeof(test_text)]; + + /* This is a pointer to outside our current stack frame. */ + if (bad_frame) { + bad_stack = do_usercopy_stack_callee((uintptr_t)&bad_stack); + } else { + /* Put start address just inside stack. */ + bad_stack = task_stack_page(current) + THREAD_SIZE; + bad_stack -= sizeof(unsigned long); + } + +#ifdef ARCH_HAS_CURRENT_STACK_POINTER + pr_info("stack : %px\n", (void *)current_stack_pointer); +#endif + pr_info("good_stack: %px-%px\n", good_stack, good_stack + sizeof(good_stack)); + pr_info("bad_stack : %px-%px\n", bad_stack, bad_stack + sizeof(good_stack)); + + user_addr = vm_mmap(NULL, 0, PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + if (user_addr >= TASK_SIZE) { + pr_warn("Failed to allocate user memory\n"); + return; + } + + if (to_user) { + pr_info("attempting good copy_to_user of local stack\n"); + if (copy_to_user((void __user *)user_addr, good_stack, + unconst + sizeof(good_stack))) { + pr_warn("copy_to_user failed unexpectedly?!\n"); + goto free_user; + } + + pr_info("attempting bad copy_to_user of distant stack\n"); + if (copy_to_user((void __user *)user_addr, bad_stack, + unconst + sizeof(good_stack))) { + pr_warn("copy_to_user failed, but lacked Oops\n"); + goto free_user; + } + } else { + /* + * There isn't a safe way to not be protected by usercopy + * if we're going to write to another thread's stack. + */ + if (!bad_frame) + goto free_user; + + pr_info("attempting good copy_from_user of local stack\n"); + if (copy_from_user(good_stack, (void __user *)user_addr, + unconst + sizeof(good_stack))) { + pr_warn("copy_from_user failed unexpectedly?!\n"); + goto free_user; + } + + pr_info("attempting bad copy_from_user of distant stack\n"); + if (copy_from_user(bad_stack, (void __user *)user_addr, + unconst + sizeof(good_stack))) { + pr_warn("copy_from_user failed, but lacked Oops\n"); + goto free_user; + } + } + +free_user: + vm_munmap(user_addr, PAGE_SIZE); +} + +/* + * This checks for whole-object size validation with hardened usercopy, + * with or without usercopy whitelisting. + */ +static void do_usercopy_heap_size(bool to_user) +{ + unsigned long user_addr; + unsigned char *one, *two; + void __user *test_user_addr; + void *test_kern_addr; + size_t size = unconst + 1024; + + one = kmalloc(size, GFP_KERNEL); + two = kmalloc(size, GFP_KERNEL); + if (!one || !two) { + pr_warn("Failed to allocate kernel memory\n"); + goto free_kernel; + } + + user_addr = vm_mmap(NULL, 0, PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + if (user_addr >= TASK_SIZE) { + pr_warn("Failed to allocate user memory\n"); + goto free_kernel; + } + + memset(one, 'A', size); + memset(two, 'B', size); + + test_user_addr = (void __user *)(user_addr + 16); + test_kern_addr = one + 16; + + if (to_user) { + pr_info("attempting good copy_to_user of correct size\n"); + if (copy_to_user(test_user_addr, test_kern_addr, size / 2)) { + pr_warn("copy_to_user failed unexpectedly?!\n"); + goto free_user; + } + + pr_info("attempting bad copy_to_user of too large size\n"); + if (copy_to_user(test_user_addr, test_kern_addr, size)) { + pr_warn("copy_to_user failed, but lacked Oops\n"); + goto free_user; + } + } else { + pr_info("attempting good copy_from_user of correct size\n"); + if (copy_from_user(test_kern_addr, test_user_addr, size / 2)) { + pr_warn("copy_from_user failed unexpectedly?!\n"); + goto free_user; + } + + pr_info("attempting bad copy_from_user of too large size\n"); + if (copy_from_user(test_kern_addr, test_user_addr, size)) { + pr_warn("copy_from_user failed, but lacked Oops\n"); + goto free_user; + } + } + +free_user: + vm_munmap(user_addr, PAGE_SIZE); +free_kernel: + kfree(one); + kfree(two); +} + +/* + * This checks for the specific whitelist window within an object. If this + * test passes, then do_usercopy_heap_size() tests will pass too. + */ +static void do_usercopy_heap_whitelist(bool to_user) +{ + unsigned long user_alloc; + unsigned char *buf = NULL; + unsigned char __user *user_addr; + size_t offset, size; + + /* Make sure cache was prepared. */ + if (!whitelist_cache) { + pr_warn("Failed to allocate kernel cache\n"); + return; + } + + /* + * Allocate a buffer with a whitelisted window in the buffer. + */ + buf = kmem_cache_alloc(whitelist_cache, GFP_KERNEL); + if (!buf) { + pr_warn("Failed to allocate buffer from whitelist cache\n"); + goto free_alloc; + } + + /* Allocate user memory we'll poke at. */ + user_alloc = vm_mmap(NULL, 0, PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + if (user_alloc >= TASK_SIZE) { + pr_warn("Failed to allocate user memory\n"); + goto free_alloc; + } + user_addr = (void __user *)user_alloc; + + memset(buf, 'B', cache_size); + + /* Whitelisted window in buffer, from kmem_cache_create_usercopy. */ + offset = (cache_size / 4) + unconst; + size = (cache_size / 16) + unconst; + + if (to_user) { + pr_info("attempting good copy_to_user inside whitelist\n"); + if (copy_to_user(user_addr, buf + offset, size)) { + pr_warn("copy_to_user failed unexpectedly?!\n"); + goto free_user; + } + + pr_info("attempting bad copy_to_user outside whitelist\n"); + if (copy_to_user(user_addr, buf + offset - 1, size)) { + pr_warn("copy_to_user failed, but lacked Oops\n"); + goto free_user; + } + } else { + pr_info("attempting good copy_from_user inside whitelist\n"); + if (copy_from_user(buf + offset, user_addr, size)) { + pr_warn("copy_from_user failed unexpectedly?!\n"); + goto free_user; + } + + pr_info("attempting bad copy_from_user outside whitelist\n"); + if (copy_from_user(buf + offset - 1, user_addr, size)) { + pr_warn("copy_from_user failed, but lacked Oops\n"); + goto free_user; + } + } + +free_user: + vm_munmap(user_alloc, PAGE_SIZE); +free_alloc: + if (buf) + kmem_cache_free(whitelist_cache, buf); +} + +/* Callable tests. */ +void lkdtm_USERCOPY_HEAP_SIZE_TO(void) +{ + do_usercopy_heap_size(true); +} + +void lkdtm_USERCOPY_HEAP_SIZE_FROM(void) +{ + do_usercopy_heap_size(false); +} + +void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void) +{ + do_usercopy_heap_whitelist(true); +} + +void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void) +{ + do_usercopy_heap_whitelist(false); +} + +void lkdtm_USERCOPY_STACK_FRAME_TO(void) +{ + do_usercopy_stack(true, true); +} + +void lkdtm_USERCOPY_STACK_FRAME_FROM(void) +{ + do_usercopy_stack(false, true); +} + +void lkdtm_USERCOPY_STACK_BEYOND(void) +{ + do_usercopy_stack(true, false); +} + +void lkdtm_USERCOPY_KERNEL(void) +{ + unsigned long user_addr; + + user_addr = vm_mmap(NULL, 0, PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + if (user_addr >= TASK_SIZE) { + pr_warn("Failed to allocate user memory\n"); + return; + } + + pr_info("attempting good copy_to_user from kernel rodata\n"); + if (copy_to_user((void __user *)user_addr, test_text, + unconst + sizeof(test_text))) { + pr_warn("copy_to_user failed unexpectedly?!\n"); + goto free_user; + } + + pr_info("attempting bad copy_to_user from kernel text\n"); + if (copy_to_user((void __user *)user_addr, vm_mmap, + unconst + PAGE_SIZE)) { + pr_warn("copy_to_user failed, but lacked Oops\n"); + goto free_user; + } + +free_user: + vm_munmap(user_addr, PAGE_SIZE); +} + +void __init lkdtm_usercopy_init(void) +{ + /* Prepare cache that lacks SLAB_USERCOPY flag. */ + whitelist_cache = + kmem_cache_create_usercopy("lkdtm-usercopy", cache_size, + 0, 0, + cache_size / 4, + cache_size / 16, + NULL); +} + +void __exit lkdtm_usercopy_exit(void) +{ + kmem_cache_destroy(whitelist_cache); +} diff --git a/drivers/misc/mei/Kconfig b/drivers/misc/mei/Kconfig new file mode 100644 index 000000000..c49e1d226 --- /dev/null +++ b/drivers/misc/mei/Kconfig @@ -0,0 +1,45 @@ +config INTEL_MEI + tristate "Intel Management Engine Interface" + depends on X86 && PCI + help + The Intel Management Engine (Intel ME) provides Manageability, + Security and Media services for system containing Intel chipsets. + if selected /dev/mei misc device will be created. + + For more information see + + +config INTEL_MEI_ME + tristate "ME Enabled Intel Chipsets" + select INTEL_MEI + depends on X86 && PCI + help + MEI support for ME Enabled Intel chipsets. + + Supported Chipsets are: + 7 Series Chipset Family + 6 Series Chipset Family + 5 Series Chipset Family + 4 Series Chipset Family + Mobile 4 Series Chipset Family + ICH9 + 82946GZ/GL + 82G35 Express + 82Q963/Q965 + 82P965/G965 + Mobile PM965/GM965 + Mobile GME965/GLE960 + 82Q35 Express + 82G33/G31/P35/P31 Express + 82Q33 Express + 82X38/X48 Express + +config INTEL_MEI_TXE + tristate "Intel Trusted Execution Environment with ME Interface" + select INTEL_MEI + depends on X86 && PCI + help + MEI Support for Trusted Execution Environment device on Intel SoCs + + Supported SoCs: + Intel Bay Trail diff --git a/drivers/misc/mei/Makefile b/drivers/misc/mei/Makefile new file mode 100644 index 000000000..cd6825afa --- /dev/null +++ b/drivers/misc/mei/Makefile @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile - Intel Management Engine Interface (Intel MEI) Linux driver +# Copyright (c) 2010-2014, Intel Corporation. +# +obj-$(CONFIG_INTEL_MEI) += mei.o +mei-objs := init.o +mei-objs += hbm.o +mei-objs += interrupt.o +mei-objs += client.o +mei-objs += main.o +mei-objs += bus.o +mei-objs += bus-fixup.o +mei-$(CONFIG_DEBUG_FS) += debugfs.o + +obj-$(CONFIG_INTEL_MEI_ME) += mei-me.o +mei-me-objs := pci-me.o +mei-me-objs += hw-me.o + +obj-$(CONFIG_INTEL_MEI_TXE) += mei-txe.o +mei-txe-objs := pci-txe.o +mei-txe-objs += hw-txe.o + +mei-$(CONFIG_EVENT_TRACING) += mei-trace.o +CFLAGS_mei-trace.o = -I$(src) diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c new file mode 100644 index 000000000..198e030e5 --- /dev/null +++ b/drivers/misc/mei/bus-fixup.c @@ -0,0 +1,505 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2018, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "mei_dev.h" +#include "client.h" + +#define MEI_UUID_NFC_INFO UUID_LE(0xd2de1625, 0x382d, 0x417d, \ + 0x48, 0xa4, 0xef, 0xab, 0xba, 0x8a, 0x12, 0x06) + +static const uuid_le mei_nfc_info_guid = MEI_UUID_NFC_INFO; + +#define MEI_UUID_NFC_HCI UUID_LE(0x0bb17a78, 0x2a8e, 0x4c50, \ + 0x94, 0xd4, 0x50, 0x26, 0x67, 0x23, 0x77, 0x5c) + +#define MEI_UUID_WD UUID_LE(0x05B79A6F, 0x4628, 0x4D7F, \ + 0x89, 0x9D, 0xA9, 0x15, 0x14, 0xCB, 0x32, 0xAB) + +#define MEI_UUID_MKHIF_FIX UUID_LE(0x55213584, 0x9a29, 0x4916, \ + 0xba, 0xdf, 0xf, 0xb7, 0xed, 0x68, 0x2a, 0xeb) + +#define MEI_UUID_ANY NULL_UUID_LE + +/** + * number_of_connections - determine whether an client be on the bus + * according number of connections + * We support only clients: + * 1. with single connection + * 2. and fixed clients (max_number_of_connections == 0) + * + * @cldev: me clients device + */ +static void number_of_connections(struct mei_cl_device *cldev) +{ + dev_dbg(&cldev->dev, "running hook %s\n", __func__); + + if (cldev->me_cl->props.max_number_of_connections > 1) + cldev->do_match = 0; +} + +/** + * blacklist - blacklist a client from the bus + * + * @cldev: me clients device + */ +static void blacklist(struct mei_cl_device *cldev) +{ + dev_dbg(&cldev->dev, "running hook %s\n", __func__); + + cldev->do_match = 0; +} + +#define OSTYPE_LINUX 2 +struct mei_os_ver { + __le16 build; + __le16 reserved1; + u8 os_type; + u8 major; + u8 minor; + u8 reserved2; +} __packed; + +#define MKHI_FEATURE_PTT 0x10 + +struct mkhi_rule_id { + __le16 rule_type; + u8 feature_id; + u8 reserved; +} __packed; + +struct mkhi_fwcaps { + struct mkhi_rule_id id; + u8 len; + u8 data[0]; +} __packed; + +struct mkhi_fw_ver_block { + u16 minor; + u8 major; + u8 platform; + u16 buildno; + u16 hotfix; +} __packed; + +struct mkhi_fw_ver { + struct mkhi_fw_ver_block ver[MEI_MAX_FW_VER_BLOCKS]; +} __packed; + +#define MKHI_FWCAPS_GROUP_ID 0x3 +#define MKHI_FWCAPS_SET_OS_VER_APP_RULE_CMD 6 +#define MKHI_GEN_GROUP_ID 0xFF +#define MKHI_GEN_GET_FW_VERSION_CMD 0x2 +struct mkhi_msg_hdr { + u8 group_id; + u8 command; + u8 reserved; + u8 result; +} __packed; + +struct mkhi_msg { + struct mkhi_msg_hdr hdr; + u8 data[0]; +} __packed; + +#define MKHI_OSVER_BUF_LEN (sizeof(struct mkhi_msg_hdr) + \ + sizeof(struct mkhi_fwcaps) + \ + sizeof(struct mei_os_ver)) +static int mei_osver(struct mei_cl_device *cldev) +{ + const size_t size = MKHI_OSVER_BUF_LEN; + char buf[MKHI_OSVER_BUF_LEN]; + struct mkhi_msg *req; + struct mkhi_fwcaps *fwcaps; + struct mei_os_ver *os_ver; + unsigned int mode = MEI_CL_IO_TX_BLOCKING | MEI_CL_IO_TX_INTERNAL; + + memset(buf, 0, size); + + req = (struct mkhi_msg *)buf; + req->hdr.group_id = MKHI_FWCAPS_GROUP_ID; + req->hdr.command = MKHI_FWCAPS_SET_OS_VER_APP_RULE_CMD; + + fwcaps = (struct mkhi_fwcaps *)req->data; + + fwcaps->id.rule_type = 0x0; + fwcaps->id.feature_id = MKHI_FEATURE_PTT; + fwcaps->len = sizeof(*os_ver); + os_ver = (struct mei_os_ver *)fwcaps->data; + os_ver->os_type = OSTYPE_LINUX; + + return __mei_cl_send(cldev->cl, buf, size, mode); +} + +#define MKHI_FWVER_BUF_LEN (sizeof(struct mkhi_msg_hdr) + \ + sizeof(struct mkhi_fw_ver)) +#define MKHI_FWVER_LEN(__num) (sizeof(struct mkhi_msg_hdr) + \ + sizeof(struct mkhi_fw_ver_block) * (__num)) +#define MKHI_RCV_TIMEOUT 500 /* receive timeout in msec */ +static int mei_fwver(struct mei_cl_device *cldev) +{ + char buf[MKHI_FWVER_BUF_LEN]; + struct mkhi_msg *req; + struct mkhi_fw_ver *fwver; + int bytes_recv, ret, i; + + memset(buf, 0, sizeof(buf)); + + req = (struct mkhi_msg *)buf; + req->hdr.group_id = MKHI_GEN_GROUP_ID; + req->hdr.command = MKHI_GEN_GET_FW_VERSION_CMD; + + ret = __mei_cl_send(cldev->cl, buf, sizeof(struct mkhi_msg_hdr), + MEI_CL_IO_TX_BLOCKING); + if (ret < 0) { + dev_err(&cldev->dev, "Could not send ReqFWVersion cmd\n"); + return ret; + } + + ret = 0; + bytes_recv = __mei_cl_recv(cldev->cl, buf, sizeof(buf), 0, + MKHI_RCV_TIMEOUT); + if (bytes_recv < 0 || (size_t)bytes_recv < MKHI_FWVER_LEN(1)) { + /* + * Should be at least one version block, + * error out if nothing found + */ + dev_err(&cldev->dev, "Could not read FW version\n"); + return -EIO; + } + + fwver = (struct mkhi_fw_ver *)req->data; + memset(cldev->bus->fw_ver, 0, sizeof(cldev->bus->fw_ver)); + for (i = 0; i < MEI_MAX_FW_VER_BLOCKS; i++) { + if ((size_t)bytes_recv < MKHI_FWVER_LEN(i + 1)) + break; + dev_dbg(&cldev->dev, "FW version%d %d:%d.%d.%d.%d\n", + i, fwver->ver[i].platform, + fwver->ver[i].major, fwver->ver[i].minor, + fwver->ver[i].hotfix, fwver->ver[i].buildno); + + cldev->bus->fw_ver[i].platform = fwver->ver[i].platform; + cldev->bus->fw_ver[i].major = fwver->ver[i].major; + cldev->bus->fw_ver[i].minor = fwver->ver[i].minor; + cldev->bus->fw_ver[i].hotfix = fwver->ver[i].hotfix; + cldev->bus->fw_ver[i].buildno = fwver->ver[i].buildno; + } + + return ret; +} + +static void mei_mkhi_fix(struct mei_cl_device *cldev) +{ + int ret; + + /* No need to enable the client if nothing is needed from it */ + if (!cldev->bus->fw_f_fw_ver_supported && + !cldev->bus->hbm_f_os_supported) + return; + + ret = mei_cldev_enable(cldev); + if (ret) + return; + + if (cldev->bus->fw_f_fw_ver_supported) { + ret = mei_fwver(cldev); + if (ret < 0) + dev_err(&cldev->dev, "FW version command failed %d\n", + ret); + } + + if (cldev->bus->hbm_f_os_supported) { + ret = mei_osver(cldev); + if (ret < 0) + dev_err(&cldev->dev, "OS version command failed %d\n", + ret); + } + mei_cldev_disable(cldev); +} + +/** + * mei_wd - wd client on the bus, change protocol version + * as the API has changed. + * + * @cldev: me clients device + */ +#if IS_ENABLED(CONFIG_INTEL_MEI_ME) +#include +#include "hw-me-regs.h" +static void mei_wd(struct mei_cl_device *cldev) +{ + struct pci_dev *pdev = to_pci_dev(cldev->dev.parent); + + dev_dbg(&cldev->dev, "running hook %s\n", __func__); + if (pdev->device == MEI_DEV_ID_WPT_LP || + pdev->device == MEI_DEV_ID_SPT || + pdev->device == MEI_DEV_ID_SPT_H) + cldev->me_cl->props.protocol_version = 0x2; + + cldev->do_match = 1; +} +#else +static inline void mei_wd(struct mei_cl_device *cldev) {} +#endif /* CONFIG_INTEL_MEI_ME */ + +struct mei_nfc_cmd { + u8 command; + u8 status; + u16 req_id; + u32 reserved; + u16 data_size; + u8 sub_command; + u8 data[]; +} __packed; + +struct mei_nfc_reply { + u8 command; + u8 status; + u16 req_id; + u32 reserved; + u16 data_size; + u8 sub_command; + u8 reply_status; + u8 data[]; +} __packed; + +struct mei_nfc_if_version { + u8 radio_version_sw[3]; + u8 reserved[3]; + u8 radio_version_hw[3]; + u8 i2c_addr; + u8 fw_ivn; + u8 vendor_id; + u8 radio_type; +} __packed; + + +#define MEI_NFC_CMD_MAINTENANCE 0x00 +#define MEI_NFC_SUBCMD_IF_VERSION 0x01 + +/* Vendors */ +#define MEI_NFC_VENDOR_INSIDE 0x00 +#define MEI_NFC_VENDOR_NXP 0x01 + +/* Radio types */ +#define MEI_NFC_VENDOR_INSIDE_UREAD 0x00 +#define MEI_NFC_VENDOR_NXP_PN544 0x01 + +/** + * mei_nfc_if_version - get NFC interface version + * + * @cl: host client (nfc info) + * @ver: NFC interface version to be filled in + * + * Return: 0 on success; < 0 otherwise + */ +static int mei_nfc_if_version(struct mei_cl *cl, + struct mei_nfc_if_version *ver) +{ + struct mei_device *bus; + struct mei_nfc_cmd cmd = { + .command = MEI_NFC_CMD_MAINTENANCE, + .data_size = 1, + .sub_command = MEI_NFC_SUBCMD_IF_VERSION, + }; + struct mei_nfc_reply *reply = NULL; + size_t if_version_length; + int bytes_recv, ret; + + bus = cl->dev; + + WARN_ON(mutex_is_locked(&bus->device_lock)); + + ret = __mei_cl_send(cl, (u8 *)&cmd, sizeof(struct mei_nfc_cmd), + MEI_CL_IO_TX_BLOCKING); + if (ret < 0) { + dev_err(bus->dev, "Could not send IF version cmd\n"); + return ret; + } + + /* to be sure on the stack we alloc memory */ + if_version_length = sizeof(struct mei_nfc_reply) + + sizeof(struct mei_nfc_if_version); + + reply = kzalloc(if_version_length, GFP_KERNEL); + if (!reply) + return -ENOMEM; + + ret = 0; + bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length, 0, 0); + if (bytes_recv < 0 || (size_t)bytes_recv < if_version_length) { + dev_err(bus->dev, "Could not read IF version\n"); + ret = -EIO; + goto err; + } + + memcpy(ver, reply->data, sizeof(struct mei_nfc_if_version)); + + dev_info(bus->dev, "NFC MEI VERSION: IVN 0x%x Vendor ID 0x%x Type 0x%x\n", + ver->fw_ivn, ver->vendor_id, ver->radio_type); + +err: + kfree(reply); + return ret; +} + +/** + * mei_nfc_radio_name - derive nfc radio name from the interface version + * + * @ver: NFC radio version + * + * Return: radio name string + */ +static const char *mei_nfc_radio_name(struct mei_nfc_if_version *ver) +{ + + if (ver->vendor_id == MEI_NFC_VENDOR_INSIDE) { + if (ver->radio_type == MEI_NFC_VENDOR_INSIDE_UREAD) + return "microread"; + } + + if (ver->vendor_id == MEI_NFC_VENDOR_NXP) { + if (ver->radio_type == MEI_NFC_VENDOR_NXP_PN544) + return "pn544"; + } + + return NULL; +} + +/** + * mei_nfc - The nfc fixup function. The function retrieves nfc radio + * name and set is as device attribute so we can load + * the proper device driver for it + * + * @cldev: me client device (nfc) + */ +static void mei_nfc(struct mei_cl_device *cldev) +{ + struct mei_device *bus; + struct mei_cl *cl; + struct mei_me_client *me_cl = NULL; + struct mei_nfc_if_version ver; + const char *radio_name = NULL; + int ret; + + bus = cldev->bus; + + dev_dbg(&cldev->dev, "running hook %s\n", __func__); + + mutex_lock(&bus->device_lock); + /* we need to connect to INFO GUID */ + cl = mei_cl_alloc_linked(bus); + if (IS_ERR(cl)) { + ret = PTR_ERR(cl); + cl = NULL; + dev_err(bus->dev, "nfc hook alloc failed %d\n", ret); + goto out; + } + + me_cl = mei_me_cl_by_uuid(bus, &mei_nfc_info_guid); + if (!me_cl) { + ret = -ENOTTY; + dev_err(bus->dev, "Cannot find nfc info %d\n", ret); + goto out; + } + + ret = mei_cl_connect(cl, me_cl, NULL); + if (ret < 0) { + dev_err(&cldev->dev, "Can't connect to the NFC INFO ME ret = %d\n", + ret); + goto out; + } + + mutex_unlock(&bus->device_lock); + + ret = mei_nfc_if_version(cl, &ver); + if (ret) + goto disconnect; + + radio_name = mei_nfc_radio_name(&ver); + + if (!radio_name) { + ret = -ENOENT; + dev_err(&cldev->dev, "Can't get the NFC interface version ret = %d\n", + ret); + goto disconnect; + } + + dev_dbg(bus->dev, "nfc radio %s\n", radio_name); + strlcpy(cldev->name, radio_name, sizeof(cldev->name)); + +disconnect: + mutex_lock(&bus->device_lock); + if (mei_cl_disconnect(cl) < 0) + dev_err(bus->dev, "Can't disconnect the NFC INFO ME\n"); + + mei_cl_flush_queues(cl, NULL); + +out: + mei_cl_unlink(cl); + mutex_unlock(&bus->device_lock); + mei_me_cl_put(me_cl); + kfree(cl); + + if (ret) + cldev->do_match = 0; + + dev_dbg(bus->dev, "end of fixup match = %d\n", cldev->do_match); +} + +#define MEI_FIXUP(_uuid, _hook) { _uuid, _hook } + +static struct mei_fixup { + + const uuid_le uuid; + void (*hook)(struct mei_cl_device *cldev); +} mei_fixups[] = { + MEI_FIXUP(MEI_UUID_ANY, number_of_connections), + MEI_FIXUP(MEI_UUID_NFC_INFO, blacklist), + MEI_FIXUP(MEI_UUID_NFC_HCI, mei_nfc), + MEI_FIXUP(MEI_UUID_WD, mei_wd), + MEI_FIXUP(MEI_UUID_MKHIF_FIX, mei_mkhi_fix), +}; + +/** + * mei_cldev_fixup - run fixup handlers + * + * @cldev: me client device + */ +void mei_cl_bus_dev_fixup(struct mei_cl_device *cldev) +{ + struct mei_fixup *f; + const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl); + size_t i; + + for (i = 0; i < ARRAY_SIZE(mei_fixups); i++) { + + f = &mei_fixups[i]; + if (uuid_le_cmp(f->uuid, MEI_UUID_ANY) == 0 || + uuid_le_cmp(f->uuid, *uuid) == 0) + f->hook(cldev); + } +} + diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c new file mode 100644 index 000000000..57b5868c9 --- /dev/null +++ b/drivers/misc/mei/bus.c @@ -0,0 +1,1153 @@ +/* + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2012-2013, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mei_dev.h" +#include "client.h" + +#define to_mei_cl_driver(d) container_of(d, struct mei_cl_driver, driver) +#define to_mei_cl_device(d) container_of(d, struct mei_cl_device, dev) + +/** + * __mei_cl_send - internal client send (write) + * + * @cl: host client + * @buf: buffer to send + * @length: buffer length + * @mode: sending mode + * + * Return: written size bytes or < 0 on error + */ +ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, + unsigned int mode) +{ + struct mei_device *bus; + struct mei_cl_cb *cb; + ssize_t rets; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + bus = cl->dev; + + mutex_lock(&bus->device_lock); + if (bus->dev_state != MEI_DEV_ENABLED) { + rets = -ENODEV; + goto out; + } + + if (!mei_cl_is_connected(cl)) { + rets = -ENODEV; + goto out; + } + + /* Check if we have an ME client device */ + if (!mei_me_cl_is_active(cl->me_cl)) { + rets = -ENOTTY; + goto out; + } + + if (length > mei_cl_mtu(cl)) { + rets = -EFBIG; + goto out; + } + + while (cl->tx_cb_queued >= bus->tx_queue_limit) { + mutex_unlock(&bus->device_lock); + rets = wait_event_interruptible(cl->tx_wait, + cl->writing_state == MEI_WRITE_COMPLETE || + (!mei_cl_is_connected(cl))); + mutex_lock(&bus->device_lock); + if (rets) { + if (signal_pending(current)) + rets = -EINTR; + goto out; + } + if (!mei_cl_is_connected(cl)) { + rets = -ENODEV; + goto out; + } + } + + cb = mei_cl_alloc_cb(cl, length, MEI_FOP_WRITE, NULL); + if (!cb) { + rets = -ENOMEM; + goto out; + } + + cb->internal = !!(mode & MEI_CL_IO_TX_INTERNAL); + cb->blocking = !!(mode & MEI_CL_IO_TX_BLOCKING); + memcpy(cb->buf.data, buf, length); + + rets = mei_cl_write(cl, cb); + +out: + mutex_unlock(&bus->device_lock); + + return rets; +} + +/** + * __mei_cl_recv - internal client receive (read) + * + * @cl: host client + * @buf: buffer to receive + * @length: buffer length + * @mode: io mode + * @timeout: recv timeout, 0 for infinite timeout + * + * Return: read size in bytes of < 0 on error + */ +ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length, + unsigned int mode, unsigned long timeout) +{ + struct mei_device *bus; + struct mei_cl_cb *cb; + size_t r_length; + ssize_t rets; + bool nonblock = !!(mode & MEI_CL_IO_RX_NONBLOCK); + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + bus = cl->dev; + + mutex_lock(&bus->device_lock); + if (bus->dev_state != MEI_DEV_ENABLED) { + rets = -ENODEV; + goto out; + } + + cb = mei_cl_read_cb(cl, NULL); + if (cb) + goto copy; + + rets = mei_cl_read_start(cl, length, NULL); + if (rets && rets != -EBUSY) + goto out; + + if (nonblock) { + rets = -EAGAIN; + goto out; + } + + /* wait on event only if there is no other waiter */ + /* synchronized under device mutex */ + if (!waitqueue_active(&cl->rx_wait)) { + + mutex_unlock(&bus->device_lock); + + if (timeout) { + rets = wait_event_interruptible_timeout + (cl->rx_wait, + (!list_empty(&cl->rd_completed)) || + (!mei_cl_is_connected(cl)), + msecs_to_jiffies(timeout)); + if (rets == 0) + return -ETIME; + if (rets < 0) { + if (signal_pending(current)) + return -EINTR; + return -ERESTARTSYS; + } + } else { + if (wait_event_interruptible + (cl->rx_wait, + (!list_empty(&cl->rd_completed)) || + (!mei_cl_is_connected(cl)))) { + if (signal_pending(current)) + return -EINTR; + return -ERESTARTSYS; + } + } + + mutex_lock(&bus->device_lock); + + if (!mei_cl_is_connected(cl)) { + rets = -ENODEV; + goto out; + } + } + + cb = mei_cl_read_cb(cl, NULL); + if (!cb) { + rets = 0; + goto out; + } + +copy: + if (cb->status) { + rets = cb->status; + goto free; + } + + r_length = min_t(size_t, length, cb->buf_idx); + memcpy(buf, cb->buf.data, r_length); + rets = r_length; + +free: + mei_io_cb_free(cb); +out: + mutex_unlock(&bus->device_lock); + + return rets; +} + +/** + * mei_cldev_send - me device send (write) + * + * @cldev: me client device + * @buf: buffer to send + * @length: buffer length + * + * Return: written size in bytes or < 0 on error + */ +ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length) +{ + struct mei_cl *cl = cldev->cl; + + return __mei_cl_send(cl, buf, length, MEI_CL_IO_TX_BLOCKING); +} +EXPORT_SYMBOL_GPL(mei_cldev_send); + +/** + * mei_cldev_recv_nonblock - non block client receive (read) + * + * @cldev: me client device + * @buf: buffer to receive + * @length: buffer length + * + * Return: read size in bytes of < 0 on error + * -EAGAIN if function will block. + */ +ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf, + size_t length) +{ + struct mei_cl *cl = cldev->cl; + + return __mei_cl_recv(cl, buf, length, MEI_CL_IO_RX_NONBLOCK, 0); +} +EXPORT_SYMBOL_GPL(mei_cldev_recv_nonblock); + +/** + * mei_cldev_recv - client receive (read) + * + * @cldev: me client device + * @buf: buffer to receive + * @length: buffer length + * + * Return: read size in bytes of < 0 on error + */ +ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length) +{ + struct mei_cl *cl = cldev->cl; + + return __mei_cl_recv(cl, buf, length, 0, 0); +} +EXPORT_SYMBOL_GPL(mei_cldev_recv); + +/** + * mei_cl_bus_rx_work - dispatch rx event for a bus device + * + * @work: work + */ +static void mei_cl_bus_rx_work(struct work_struct *work) +{ + struct mei_cl_device *cldev; + struct mei_device *bus; + + cldev = container_of(work, struct mei_cl_device, rx_work); + + bus = cldev->bus; + + if (cldev->rx_cb) + cldev->rx_cb(cldev); + + mutex_lock(&bus->device_lock); + mei_cl_read_start(cldev->cl, mei_cl_mtu(cldev->cl), NULL); + mutex_unlock(&bus->device_lock); +} + +/** + * mei_cl_bus_notif_work - dispatch FW notif event for a bus device + * + * @work: work + */ +static void mei_cl_bus_notif_work(struct work_struct *work) +{ + struct mei_cl_device *cldev; + + cldev = container_of(work, struct mei_cl_device, notif_work); + + if (cldev->notif_cb) + cldev->notif_cb(cldev); +} + +/** + * mei_cl_bus_notify_event - schedule notify cb on bus client + * + * @cl: host client + * + * Return: true if event was scheduled + * false if the client is not waiting for event + */ +bool mei_cl_bus_notify_event(struct mei_cl *cl) +{ + struct mei_cl_device *cldev = cl->cldev; + + if (!cldev || !cldev->notif_cb) + return false; + + if (!cl->notify_ev) + return false; + + schedule_work(&cldev->notif_work); + + cl->notify_ev = false; + + return true; +} + +/** + * mei_cl_bus_rx_event - schedule rx event + * + * @cl: host client + * + * Return: true if event was scheduled + * false if the client is not waiting for event + */ +bool mei_cl_bus_rx_event(struct mei_cl *cl) +{ + struct mei_cl_device *cldev = cl->cldev; + + if (!cldev || !cldev->rx_cb) + return false; + + schedule_work(&cldev->rx_work); + + return true; +} + +/** + * mei_cldev_register_rx_cb - register Rx event callback + * + * @cldev: me client devices + * @rx_cb: callback function + * + * Return: 0 on success + * -EALREADY if an callback is already registered + * <0 on other errors + */ +int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb) +{ + struct mei_device *bus = cldev->bus; + int ret; + + if (!rx_cb) + return -EINVAL; + if (cldev->rx_cb) + return -EALREADY; + + cldev->rx_cb = rx_cb; + INIT_WORK(&cldev->rx_work, mei_cl_bus_rx_work); + + mutex_lock(&bus->device_lock); + ret = mei_cl_read_start(cldev->cl, mei_cl_mtu(cldev->cl), NULL); + mutex_unlock(&bus->device_lock); + if (ret && ret != -EBUSY) + return ret; + + return 0; +} +EXPORT_SYMBOL_GPL(mei_cldev_register_rx_cb); + +/** + * mei_cldev_register_notif_cb - register FW notification event callback + * + * @cldev: me client devices + * @notif_cb: callback function + * + * Return: 0 on success + * -EALREADY if an callback is already registered + * <0 on other errors + */ +int mei_cldev_register_notif_cb(struct mei_cl_device *cldev, + mei_cldev_cb_t notif_cb) +{ + struct mei_device *bus = cldev->bus; + int ret; + + if (!notif_cb) + return -EINVAL; + + if (cldev->notif_cb) + return -EALREADY; + + cldev->notif_cb = notif_cb; + INIT_WORK(&cldev->notif_work, mei_cl_bus_notif_work); + + mutex_lock(&bus->device_lock); + ret = mei_cl_notify_request(cldev->cl, NULL, 1); + mutex_unlock(&bus->device_lock); + if (ret) + return ret; + + return 0; +} +EXPORT_SYMBOL_GPL(mei_cldev_register_notif_cb); + +/** + * mei_cldev_get_drvdata - driver data getter + * + * @cldev: mei client device + * + * Return: driver private data + */ +void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev) +{ + return dev_get_drvdata(&cldev->dev); +} +EXPORT_SYMBOL_GPL(mei_cldev_get_drvdata); + +/** + * mei_cldev_set_drvdata - driver data setter + * + * @cldev: mei client device + * @data: data to store + */ +void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data) +{ + dev_set_drvdata(&cldev->dev, data); +} +EXPORT_SYMBOL_GPL(mei_cldev_set_drvdata); + +/** + * mei_cldev_uuid - return uuid of the underlying me client + * + * @cldev: mei client device + * + * Return: me client uuid + */ +const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev) +{ + return mei_me_cl_uuid(cldev->me_cl); +} +EXPORT_SYMBOL_GPL(mei_cldev_uuid); + +/** + * mei_cldev_ver - return protocol version of the underlying me client + * + * @cldev: mei client device + * + * Return: me client protocol version + */ +u8 mei_cldev_ver(const struct mei_cl_device *cldev) +{ + return mei_me_cl_ver(cldev->me_cl); +} +EXPORT_SYMBOL_GPL(mei_cldev_ver); + +/** + * mei_cldev_enabled - check whether the device is enabled + * + * @cldev: mei client device + * + * Return: true if me client is initialized and connected + */ +bool mei_cldev_enabled(struct mei_cl_device *cldev) +{ + return mei_cl_is_connected(cldev->cl); +} +EXPORT_SYMBOL_GPL(mei_cldev_enabled); + +/** + * mei_cl_bus_module_get - acquire module of the underlying + * hw driver. + * + * @cldev: mei client device + * + * Return: true on success; false if the module was removed. + */ +static bool mei_cl_bus_module_get(struct mei_cl_device *cldev) +{ + return try_module_get(cldev->bus->dev->driver->owner); +} + +/** + * mei_cl_bus_module_put - release the underlying hw module. + * + * @cldev: mei client device + */ +static void mei_cl_bus_module_put(struct mei_cl_device *cldev) +{ + module_put(cldev->bus->dev->driver->owner); +} + +/** + * mei_cldev_enable - enable me client device + * create connection with me client + * + * @cldev: me client device + * + * Return: 0 on success and < 0 on error + */ +int mei_cldev_enable(struct mei_cl_device *cldev) +{ + struct mei_device *bus = cldev->bus; + struct mei_cl *cl; + int ret; + + cl = cldev->cl; + + mutex_lock(&bus->device_lock); + if (cl->state == MEI_FILE_UNINITIALIZED) { + ret = mei_cl_link(cl); + if (ret) + goto out; + /* update pointers */ + cl->cldev = cldev; + } + + if (mei_cl_is_connected(cl)) { + ret = 0; + goto out; + } + + if (!mei_me_cl_is_active(cldev->me_cl)) { + dev_err(&cldev->dev, "me client is not active\n"); + ret = -ENOTTY; + goto out; + } + + ret = mei_cl_connect(cl, cldev->me_cl, NULL); + if (ret < 0) + dev_err(&cldev->dev, "cannot connect\n"); + +out: + mutex_unlock(&bus->device_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(mei_cldev_enable); + +/** + * mei_cldev_unregister_callbacks - internal wrapper for unregistering + * callbacks. + * + * @cldev: client device + */ +static void mei_cldev_unregister_callbacks(struct mei_cl_device *cldev) +{ + if (cldev->rx_cb) { + cancel_work_sync(&cldev->rx_work); + cldev->rx_cb = NULL; + } + + if (cldev->notif_cb) { + cancel_work_sync(&cldev->notif_work); + cldev->notif_cb = NULL; + } +} + +/** + * mei_cldev_disable - disable me client device + * disconnect form the me client + * + * @cldev: me client device + * + * Return: 0 on success and < 0 on error + */ +int mei_cldev_disable(struct mei_cl_device *cldev) +{ + struct mei_device *bus; + struct mei_cl *cl; + int err; + + if (!cldev) + return -ENODEV; + + cl = cldev->cl; + + bus = cldev->bus; + + mei_cldev_unregister_callbacks(cldev); + + mutex_lock(&bus->device_lock); + + if (!mei_cl_is_connected(cl)) { + dev_dbg(bus->dev, "Already disconnected\n"); + err = 0; + goto out; + } + + err = mei_cl_disconnect(cl); + if (err < 0) + dev_err(bus->dev, "Could not disconnect from the ME client\n"); + +out: + /* Flush queues and remove any pending read */ + mei_cl_flush_queues(cl, NULL); + mei_cl_unlink(cl); + + mutex_unlock(&bus->device_lock); + return err; +} +EXPORT_SYMBOL_GPL(mei_cldev_disable); + +/** + * mei_cl_device_find - find matching entry in the driver id table + * + * @cldev: me client device + * @cldrv: me client driver + * + * Return: id on success; NULL if no id is matching + */ +static const +struct mei_cl_device_id *mei_cl_device_find(struct mei_cl_device *cldev, + struct mei_cl_driver *cldrv) +{ + const struct mei_cl_device_id *id; + const uuid_le *uuid; + u8 version; + bool match; + + uuid = mei_me_cl_uuid(cldev->me_cl); + version = mei_me_cl_ver(cldev->me_cl); + + id = cldrv->id_table; + while (uuid_le_cmp(NULL_UUID_LE, id->uuid)) { + if (!uuid_le_cmp(*uuid, id->uuid)) { + match = true; + + if (cldev->name[0]) + if (strncmp(cldev->name, id->name, + sizeof(id->name))) + match = false; + + if (id->version != MEI_CL_VERSION_ANY) + if (id->version != version) + match = false; + if (match) + return id; + } + + id++; + } + + return NULL; +} + +/** + * mei_cl_device_match - device match function + * + * @dev: device + * @drv: driver + * + * Return: 1 if matching device was found 0 otherwise + */ +static int mei_cl_device_match(struct device *dev, struct device_driver *drv) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + struct mei_cl_driver *cldrv = to_mei_cl_driver(drv); + const struct mei_cl_device_id *found_id; + + if (!cldev) + return 0; + + if (!cldev->do_match) + return 0; + + if (!cldrv || !cldrv->id_table) + return 0; + + found_id = mei_cl_device_find(cldev, cldrv); + if (found_id) + return 1; + + return 0; +} + +/** + * mei_cl_device_probe - bus probe function + * + * @dev: device + * + * Return: 0 on success; < 0 otherwise + */ +static int mei_cl_device_probe(struct device *dev) +{ + struct mei_cl_device *cldev; + struct mei_cl_driver *cldrv; + const struct mei_cl_device_id *id; + int ret; + + cldev = to_mei_cl_device(dev); + cldrv = to_mei_cl_driver(dev->driver); + + if (!cldev) + return 0; + + if (!cldrv || !cldrv->probe) + return -ENODEV; + + id = mei_cl_device_find(cldev, cldrv); + if (!id) + return -ENODEV; + + if (!mei_cl_bus_module_get(cldev)) { + dev_err(&cldev->dev, "get hw module failed"); + return -ENODEV; + } + + ret = cldrv->probe(cldev, id); + if (ret) { + mei_cl_bus_module_put(cldev); + return ret; + } + + __module_get(THIS_MODULE); + return 0; +} + +/** + * mei_cl_device_remove - remove device from the bus + * + * @dev: device + * + * Return: 0 on success; < 0 otherwise + */ +static int mei_cl_device_remove(struct device *dev) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + struct mei_cl_driver *cldrv; + int ret = 0; + + if (!cldev || !dev->driver) + return 0; + + cldrv = to_mei_cl_driver(dev->driver); + if (cldrv->remove) + ret = cldrv->remove(cldev); + + mei_cldev_unregister_callbacks(cldev); + + mei_cl_bus_module_put(cldev); + module_put(THIS_MODULE); + + return ret; +} + +static ssize_t name_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + + return scnprintf(buf, PAGE_SIZE, "%s", cldev->name); +} +static DEVICE_ATTR_RO(name); + +static ssize_t uuid_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl); + + return scnprintf(buf, PAGE_SIZE, "%pUl", uuid); +} +static DEVICE_ATTR_RO(uuid); + +static ssize_t version_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + u8 version = mei_me_cl_ver(cldev->me_cl); + + return scnprintf(buf, PAGE_SIZE, "%02X", version); +} +static DEVICE_ATTR_RO(version); + +static ssize_t modalias_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl); + u8 version = mei_me_cl_ver(cldev->me_cl); + + return scnprintf(buf, PAGE_SIZE, "mei:%s:%pUl:%02X:", + cldev->name, uuid, version); +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *mei_cldev_attrs[] = { + &dev_attr_name.attr, + &dev_attr_uuid.attr, + &dev_attr_version.attr, + &dev_attr_modalias.attr, + NULL, +}; +ATTRIBUTE_GROUPS(mei_cldev); + +/** + * mei_cl_device_uevent - me client bus uevent handler + * + * @dev: device + * @env: uevent kobject + * + * Return: 0 on success -ENOMEM on when add_uevent_var fails + */ +static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl); + u8 version = mei_me_cl_ver(cldev->me_cl); + + if (add_uevent_var(env, "MEI_CL_VERSION=%d", version)) + return -ENOMEM; + + if (add_uevent_var(env, "MEI_CL_UUID=%pUl", uuid)) + return -ENOMEM; + + if (add_uevent_var(env, "MEI_CL_NAME=%s", cldev->name)) + return -ENOMEM; + + if (add_uevent_var(env, "MODALIAS=mei:%s:%pUl:%02X:", + cldev->name, uuid, version)) + return -ENOMEM; + + return 0; +} + +static struct bus_type mei_cl_bus_type = { + .name = "mei", + .dev_groups = mei_cldev_groups, + .match = mei_cl_device_match, + .probe = mei_cl_device_probe, + .remove = mei_cl_device_remove, + .uevent = mei_cl_device_uevent, +}; + +static struct mei_device *mei_dev_bus_get(struct mei_device *bus) +{ + if (bus) + get_device(bus->dev); + + return bus; +} + +static void mei_dev_bus_put(struct mei_device *bus) +{ + if (bus) + put_device(bus->dev); +} + +static void mei_cl_bus_dev_release(struct device *dev) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + + if (!cldev) + return; + + mei_me_cl_put(cldev->me_cl); + mei_dev_bus_put(cldev->bus); + mei_cl_unlink(cldev->cl); + kfree(cldev->cl); + kfree(cldev); +} + +static const struct device_type mei_cl_device_type = { + .release = mei_cl_bus_dev_release, +}; + +/** + * mei_cl_bus_set_name - set device name for me client device + * - + * Example: 0000:00:16.0-55213584-9a29-4916-badf-0fb7ed682aeb + * + * @cldev: me client device + */ +static inline void mei_cl_bus_set_name(struct mei_cl_device *cldev) +{ + dev_set_name(&cldev->dev, "%s-%pUl", + dev_name(cldev->bus->dev), + mei_me_cl_uuid(cldev->me_cl)); +} + +/** + * mei_cl_bus_dev_alloc - initialize and allocate mei client device + * + * @bus: mei device + * @me_cl: me client + * + * Return: allocated device structur or NULL on allocation failure + */ +static struct mei_cl_device *mei_cl_bus_dev_alloc(struct mei_device *bus, + struct mei_me_client *me_cl) +{ + struct mei_cl_device *cldev; + struct mei_cl *cl; + + cldev = kzalloc(sizeof(struct mei_cl_device), GFP_KERNEL); + if (!cldev) + return NULL; + + cl = mei_cl_allocate(bus); + if (!cl) { + kfree(cldev); + return NULL; + } + + device_initialize(&cldev->dev); + cldev->dev.parent = bus->dev; + cldev->dev.bus = &mei_cl_bus_type; + cldev->dev.type = &mei_cl_device_type; + cldev->bus = mei_dev_bus_get(bus); + cldev->me_cl = mei_me_cl_get(me_cl); + cldev->cl = cl; + mei_cl_bus_set_name(cldev); + cldev->is_added = 0; + INIT_LIST_HEAD(&cldev->bus_list); + + return cldev; +} + +/** + * mei_cl_dev_setup - setup me client device + * run fix up routines and set the device name + * + * @bus: mei device + * @cldev: me client device + * + * Return: true if the device is eligible for enumeration + */ +static bool mei_cl_bus_dev_setup(struct mei_device *bus, + struct mei_cl_device *cldev) +{ + cldev->do_match = 1; + mei_cl_bus_dev_fixup(cldev); + + /* the device name can change during fix up */ + if (cldev->do_match) + mei_cl_bus_set_name(cldev); + + return cldev->do_match == 1; +} + +/** + * mei_cl_bus_dev_add - add me client devices + * + * @cldev: me client device + * + * Return: 0 on success; < 0 on failre + */ +static int mei_cl_bus_dev_add(struct mei_cl_device *cldev) +{ + int ret; + + dev_dbg(cldev->bus->dev, "adding %pUL:%02X\n", + mei_me_cl_uuid(cldev->me_cl), + mei_me_cl_ver(cldev->me_cl)); + ret = device_add(&cldev->dev); + if (!ret) + cldev->is_added = 1; + + return ret; +} + +/** + * mei_cl_bus_dev_stop - stop the driver + * + * @cldev: me client device + */ +static void mei_cl_bus_dev_stop(struct mei_cl_device *cldev) +{ + if (cldev->is_added) + device_release_driver(&cldev->dev); +} + +/** + * mei_cl_bus_dev_destroy - destroy me client devices object + * + * @cldev: me client device + * + * Locking: called under "dev->cl_bus_lock" lock + */ +static void mei_cl_bus_dev_destroy(struct mei_cl_device *cldev) +{ + + WARN_ON(!mutex_is_locked(&cldev->bus->cl_bus_lock)); + + if (!cldev->is_added) + return; + + device_del(&cldev->dev); + + list_del_init(&cldev->bus_list); + + cldev->is_added = 0; + put_device(&cldev->dev); +} + +/** + * mei_cl_bus_remove_device - remove a devices form the bus + * + * @cldev: me client device + */ +static void mei_cl_bus_remove_device(struct mei_cl_device *cldev) +{ + mei_cl_bus_dev_stop(cldev); + mei_cl_bus_dev_destroy(cldev); +} + +/** + * mei_cl_bus_remove_devices - remove all devices form the bus + * + * @bus: mei device + */ +void mei_cl_bus_remove_devices(struct mei_device *bus) +{ + struct mei_cl_device *cldev, *next; + + mutex_lock(&bus->cl_bus_lock); + list_for_each_entry_safe(cldev, next, &bus->device_list, bus_list) + mei_cl_bus_remove_device(cldev); + mutex_unlock(&bus->cl_bus_lock); +} + + +/** + * mei_cl_bus_dev_init - allocate and initializes an mei client devices + * based on me client + * + * @bus: mei device + * @me_cl: me client + * + * Locking: called under "dev->cl_bus_lock" lock + */ +static void mei_cl_bus_dev_init(struct mei_device *bus, + struct mei_me_client *me_cl) +{ + struct mei_cl_device *cldev; + + WARN_ON(!mutex_is_locked(&bus->cl_bus_lock)); + + dev_dbg(bus->dev, "initializing %pUl", mei_me_cl_uuid(me_cl)); + + if (me_cl->bus_added) + return; + + cldev = mei_cl_bus_dev_alloc(bus, me_cl); + if (!cldev) + return; + + me_cl->bus_added = true; + list_add_tail(&cldev->bus_list, &bus->device_list); + +} + +/** + * mei_cl_bus_rescan - scan me clients list and add create + * devices for eligible clients + * + * @bus: mei device + */ +static void mei_cl_bus_rescan(struct mei_device *bus) +{ + struct mei_cl_device *cldev, *n; + struct mei_me_client *me_cl; + + mutex_lock(&bus->cl_bus_lock); + + down_read(&bus->me_clients_rwsem); + list_for_each_entry(me_cl, &bus->me_clients, list) + mei_cl_bus_dev_init(bus, me_cl); + up_read(&bus->me_clients_rwsem); + + list_for_each_entry_safe(cldev, n, &bus->device_list, bus_list) { + + if (!mei_me_cl_is_active(cldev->me_cl)) { + mei_cl_bus_remove_device(cldev); + continue; + } + + if (cldev->is_added) + continue; + + if (mei_cl_bus_dev_setup(bus, cldev)) + mei_cl_bus_dev_add(cldev); + else { + list_del_init(&cldev->bus_list); + put_device(&cldev->dev); + } + } + mutex_unlock(&bus->cl_bus_lock); + + dev_dbg(bus->dev, "rescan end"); +} + +void mei_cl_bus_rescan_work(struct work_struct *work) +{ + struct mei_device *bus = + container_of(work, struct mei_device, bus_rescan_work); + + mei_cl_bus_rescan(bus); +} + +int __mei_cldev_driver_register(struct mei_cl_driver *cldrv, + struct module *owner) +{ + int err; + + cldrv->driver.name = cldrv->name; + cldrv->driver.owner = owner; + cldrv->driver.bus = &mei_cl_bus_type; + + err = driver_register(&cldrv->driver); + if (err) + return err; + + pr_debug("mei: driver [%s] registered\n", cldrv->driver.name); + + return 0; +} +EXPORT_SYMBOL_GPL(__mei_cldev_driver_register); + +void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv) +{ + driver_unregister(&cldrv->driver); + + pr_debug("mei: driver [%s] unregistered\n", cldrv->driver.name); +} +EXPORT_SYMBOL_GPL(mei_cldev_driver_unregister); + + +int __init mei_cl_bus_init(void) +{ + return bus_register(&mei_cl_bus_type); +} + +void __exit mei_cl_bus_exit(void) +{ + bus_unregister(&mei_cl_bus_type); +} diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c new file mode 100644 index 000000000..524b8c0d3 --- /dev/null +++ b/drivers/misc/mei/client.c @@ -0,0 +1,1844 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#include +#include +#include +#include + +#include + +#include "mei_dev.h" +#include "hbm.h" +#include "client.h" + +/** + * mei_me_cl_init - initialize me client + * + * @me_cl: me client + */ +void mei_me_cl_init(struct mei_me_client *me_cl) +{ + INIT_LIST_HEAD(&me_cl->list); + kref_init(&me_cl->refcnt); +} + +/** + * mei_me_cl_get - increases me client refcount + * + * @me_cl: me client + * + * Locking: called under "dev->device_lock" lock + * + * Return: me client or NULL + */ +struct mei_me_client *mei_me_cl_get(struct mei_me_client *me_cl) +{ + if (me_cl && kref_get_unless_zero(&me_cl->refcnt)) + return me_cl; + + return NULL; +} + +/** + * mei_me_cl_release - free me client + * + * Locking: called under "dev->device_lock" lock + * + * @ref: me_client refcount + */ +static void mei_me_cl_release(struct kref *ref) +{ + struct mei_me_client *me_cl = + container_of(ref, struct mei_me_client, refcnt); + + kfree(me_cl); +} + +/** + * mei_me_cl_put - decrease me client refcount and free client if necessary + * + * Locking: called under "dev->device_lock" lock + * + * @me_cl: me client + */ +void mei_me_cl_put(struct mei_me_client *me_cl) +{ + if (me_cl) + kref_put(&me_cl->refcnt, mei_me_cl_release); +} + +/** + * __mei_me_cl_del - delete me client from the list and decrease + * reference counter + * + * @dev: mei device + * @me_cl: me client + * + * Locking: dev->me_clients_rwsem + */ +static void __mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl) +{ + if (!me_cl) + return; + + list_del_init(&me_cl->list); + mei_me_cl_put(me_cl); +} + +/** + * mei_me_cl_del - delete me client from the list and decrease + * reference counter + * + * @dev: mei device + * @me_cl: me client + */ +void mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl) +{ + down_write(&dev->me_clients_rwsem); + __mei_me_cl_del(dev, me_cl); + up_write(&dev->me_clients_rwsem); +} + +/** + * mei_me_cl_add - add me client to the list + * + * @dev: mei device + * @me_cl: me client + */ +void mei_me_cl_add(struct mei_device *dev, struct mei_me_client *me_cl) +{ + down_write(&dev->me_clients_rwsem); + list_add(&me_cl->list, &dev->me_clients); + up_write(&dev->me_clients_rwsem); +} + +/** + * __mei_me_cl_by_uuid - locate me client by uuid + * increases ref count + * + * @dev: mei device + * @uuid: me client uuid + * + * Return: me client or NULL if not found + * + * Locking: dev->me_clients_rwsem + */ +static struct mei_me_client *__mei_me_cl_by_uuid(struct mei_device *dev, + const uuid_le *uuid) +{ + struct mei_me_client *me_cl; + const uuid_le *pn; + + WARN_ON(!rwsem_is_locked(&dev->me_clients_rwsem)); + + list_for_each_entry(me_cl, &dev->me_clients, list) { + pn = &me_cl->props.protocol_name; + if (uuid_le_cmp(*uuid, *pn) == 0) + return mei_me_cl_get(me_cl); + } + + return NULL; +} + +/** + * mei_me_cl_by_uuid - locate me client by uuid + * increases ref count + * + * @dev: mei device + * @uuid: me client uuid + * + * Return: me client or NULL if not found + * + * Locking: dev->me_clients_rwsem + */ +struct mei_me_client *mei_me_cl_by_uuid(struct mei_device *dev, + const uuid_le *uuid) +{ + struct mei_me_client *me_cl; + + down_read(&dev->me_clients_rwsem); + me_cl = __mei_me_cl_by_uuid(dev, uuid); + up_read(&dev->me_clients_rwsem); + + return me_cl; +} + +/** + * mei_me_cl_by_id - locate me client by client id + * increases ref count + * + * @dev: the device structure + * @client_id: me client id + * + * Return: me client or NULL if not found + * + * Locking: dev->me_clients_rwsem + */ +struct mei_me_client *mei_me_cl_by_id(struct mei_device *dev, u8 client_id) +{ + + struct mei_me_client *__me_cl, *me_cl = NULL; + + down_read(&dev->me_clients_rwsem); + list_for_each_entry(__me_cl, &dev->me_clients, list) { + if (__me_cl->client_id == client_id) { + me_cl = mei_me_cl_get(__me_cl); + break; + } + } + up_read(&dev->me_clients_rwsem); + + return me_cl; +} + +/** + * __mei_me_cl_by_uuid_id - locate me client by client id and uuid + * increases ref count + * + * @dev: the device structure + * @uuid: me client uuid + * @client_id: me client id + * + * Return: me client or null if not found + * + * Locking: dev->me_clients_rwsem + */ +static struct mei_me_client *__mei_me_cl_by_uuid_id(struct mei_device *dev, + const uuid_le *uuid, u8 client_id) +{ + struct mei_me_client *me_cl; + const uuid_le *pn; + + WARN_ON(!rwsem_is_locked(&dev->me_clients_rwsem)); + + list_for_each_entry(me_cl, &dev->me_clients, list) { + pn = &me_cl->props.protocol_name; + if (uuid_le_cmp(*uuid, *pn) == 0 && + me_cl->client_id == client_id) + return mei_me_cl_get(me_cl); + } + + return NULL; +} + + +/** + * mei_me_cl_by_uuid_id - locate me client by client id and uuid + * increases ref count + * + * @dev: the device structure + * @uuid: me client uuid + * @client_id: me client id + * + * Return: me client or null if not found + */ +struct mei_me_client *mei_me_cl_by_uuid_id(struct mei_device *dev, + const uuid_le *uuid, u8 client_id) +{ + struct mei_me_client *me_cl; + + down_read(&dev->me_clients_rwsem); + me_cl = __mei_me_cl_by_uuid_id(dev, uuid, client_id); + up_read(&dev->me_clients_rwsem); + + return me_cl; +} + +/** + * mei_me_cl_rm_by_uuid - remove all me clients matching uuid + * + * @dev: the device structure + * @uuid: me client uuid + * + * Locking: called under "dev->device_lock" lock + */ +void mei_me_cl_rm_by_uuid(struct mei_device *dev, const uuid_le *uuid) +{ + struct mei_me_client *me_cl; + + dev_dbg(dev->dev, "remove %pUl\n", uuid); + + down_write(&dev->me_clients_rwsem); + me_cl = __mei_me_cl_by_uuid(dev, uuid); + __mei_me_cl_del(dev, me_cl); + mei_me_cl_put(me_cl); + up_write(&dev->me_clients_rwsem); +} + +/** + * mei_me_cl_rm_by_uuid_id - remove all me clients matching client id + * + * @dev: the device structure + * @uuid: me client uuid + * @id: me client id + * + * Locking: called under "dev->device_lock" lock + */ +void mei_me_cl_rm_by_uuid_id(struct mei_device *dev, const uuid_le *uuid, u8 id) +{ + struct mei_me_client *me_cl; + + dev_dbg(dev->dev, "remove %pUl %d\n", uuid, id); + + down_write(&dev->me_clients_rwsem); + me_cl = __mei_me_cl_by_uuid_id(dev, uuid, id); + __mei_me_cl_del(dev, me_cl); + mei_me_cl_put(me_cl); + up_write(&dev->me_clients_rwsem); +} + +/** + * mei_me_cl_rm_all - remove all me clients + * + * @dev: the device structure + * + * Locking: called under "dev->device_lock" lock + */ +void mei_me_cl_rm_all(struct mei_device *dev) +{ + struct mei_me_client *me_cl, *next; + + down_write(&dev->me_clients_rwsem); + list_for_each_entry_safe(me_cl, next, &dev->me_clients, list) + __mei_me_cl_del(dev, me_cl); + up_write(&dev->me_clients_rwsem); +} + +/** + * mei_cl_cmp_id - tells if the clients are the same + * + * @cl1: host client 1 + * @cl2: host client 2 + * + * Return: true - if the clients has same host and me ids + * false - otherwise + */ +static inline bool mei_cl_cmp_id(const struct mei_cl *cl1, + const struct mei_cl *cl2) +{ + return cl1 && cl2 && + (cl1->host_client_id == cl2->host_client_id) && + (mei_cl_me_id(cl1) == mei_cl_me_id(cl2)); +} + +/** + * mei_io_cb_free - free mei_cb_private related memory + * + * @cb: mei callback struct + */ +void mei_io_cb_free(struct mei_cl_cb *cb) +{ + if (cb == NULL) + return; + + list_del(&cb->list); + kfree(cb->buf.data); + kfree(cb); +} + +/** + * mei_tx_cb_queue - queue tx callback + * + * Locking: called under "dev->device_lock" lock + * + * @cb: mei callback struct + * @head: an instance of list to queue on + */ +static inline void mei_tx_cb_enqueue(struct mei_cl_cb *cb, + struct list_head *head) +{ + list_add_tail(&cb->list, head); + cb->cl->tx_cb_queued++; +} + +/** + * mei_tx_cb_dequeue - dequeue tx callback + * + * Locking: called under "dev->device_lock" lock + * + * @cb: mei callback struct to dequeue and free + */ +static inline void mei_tx_cb_dequeue(struct mei_cl_cb *cb) +{ + if (!WARN_ON(cb->cl->tx_cb_queued == 0)) + cb->cl->tx_cb_queued--; + + mei_io_cb_free(cb); +} + +/** + * mei_io_cb_init - allocate and initialize io callback + * + * @cl: mei client + * @type: operation type + * @fp: pointer to file structure + * + * Return: mei_cl_cb pointer or NULL; + */ +static struct mei_cl_cb *mei_io_cb_init(struct mei_cl *cl, + enum mei_cb_file_ops type, + const struct file *fp) +{ + struct mei_cl_cb *cb; + + cb = kzalloc(sizeof(struct mei_cl_cb), GFP_KERNEL); + if (!cb) + return NULL; + + INIT_LIST_HEAD(&cb->list); + cb->fp = fp; + cb->cl = cl; + cb->buf_idx = 0; + cb->fop_type = type; + return cb; +} + +/** + * mei_io_list_flush_cl - removes cbs belonging to the cl. + * + * @head: an instance of our list structure + * @cl: host client + */ +static void mei_io_list_flush_cl(struct list_head *head, + const struct mei_cl *cl) +{ + struct mei_cl_cb *cb, *next; + + list_for_each_entry_safe(cb, next, head, list) { + if (mei_cl_cmp_id(cl, cb->cl)) + list_del_init(&cb->list); + } +} + +/** + * mei_io_tx_list_free_cl - removes cb belonging to the cl and free them + * + * @head: An instance of our list structure + * @cl: host client + */ +static void mei_io_tx_list_free_cl(struct list_head *head, + const struct mei_cl *cl) +{ + struct mei_cl_cb *cb, *next; + + list_for_each_entry_safe(cb, next, head, list) { + if (mei_cl_cmp_id(cl, cb->cl)) + mei_tx_cb_dequeue(cb); + } +} + +/** + * mei_io_list_free_fp - free cb from a list that matches file pointer + * + * @head: io list + * @fp: file pointer (matching cb file object), may be NULL + */ +static void mei_io_list_free_fp(struct list_head *head, const struct file *fp) +{ + struct mei_cl_cb *cb, *next; + + list_for_each_entry_safe(cb, next, head, list) + if (!fp || fp == cb->fp) + mei_io_cb_free(cb); +} + +/** + * mei_cl_alloc_cb - a convenient wrapper for allocating read cb + * + * @cl: host client + * @length: size of the buffer + * @fop_type: operation type + * @fp: associated file pointer (might be NULL) + * + * Return: cb on success and NULL on failure + */ +struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length, + enum mei_cb_file_ops fop_type, + const struct file *fp) +{ + struct mei_cl_cb *cb; + + cb = mei_io_cb_init(cl, fop_type, fp); + if (!cb) + return NULL; + + if (length == 0) + return cb; + + cb->buf.data = kmalloc(length, GFP_KERNEL); + if (!cb->buf.data) { + mei_io_cb_free(cb); + return NULL; + } + cb->buf.size = length; + + return cb; +} + +/** + * mei_cl_enqueue_ctrl_wr_cb - a convenient wrapper for allocating + * and enqueuing of the control commands cb + * + * @cl: host client + * @length: size of the buffer + * @fop_type: operation type + * @fp: associated file pointer (might be NULL) + * + * Return: cb on success and NULL on failure + * Locking: called under "dev->device_lock" lock + */ +struct mei_cl_cb *mei_cl_enqueue_ctrl_wr_cb(struct mei_cl *cl, size_t length, + enum mei_cb_file_ops fop_type, + const struct file *fp) +{ + struct mei_cl_cb *cb; + + /* for RX always allocate at least client's mtu */ + if (length) + length = max_t(size_t, length, mei_cl_mtu(cl)); + + cb = mei_cl_alloc_cb(cl, length, fop_type, fp); + if (!cb) + return NULL; + + list_add_tail(&cb->list, &cl->dev->ctrl_wr_list); + return cb; +} + +/** + * mei_cl_read_cb - find this cl's callback in the read list + * for a specific file + * + * @cl: host client + * @fp: file pointer (matching cb file object), may be NULL + * + * Return: cb on success, NULL if cb is not found + */ +struct mei_cl_cb *mei_cl_read_cb(const struct mei_cl *cl, const struct file *fp) +{ + struct mei_cl_cb *cb; + + list_for_each_entry(cb, &cl->rd_completed, list) + if (!fp || fp == cb->fp) + return cb; + + return NULL; +} + +/** + * mei_cl_flush_queues - flushes queue lists belonging to cl. + * + * @cl: host client + * @fp: file pointer (matching cb file object), may be NULL + * + * Return: 0 on success, -EINVAL if cl or cl->dev is NULL. + */ +int mei_cl_flush_queues(struct mei_cl *cl, const struct file *fp) +{ + struct mei_device *dev; + + if (WARN_ON(!cl || !cl->dev)) + return -EINVAL; + + dev = cl->dev; + + cl_dbg(dev, cl, "remove list entry belonging to cl\n"); + mei_io_tx_list_free_cl(&cl->dev->write_list, cl); + mei_io_tx_list_free_cl(&cl->dev->write_waiting_list, cl); + mei_io_list_flush_cl(&cl->dev->ctrl_wr_list, cl); + mei_io_list_flush_cl(&cl->dev->ctrl_rd_list, cl); + mei_io_list_free_fp(&cl->rd_pending, fp); + mei_io_list_free_fp(&cl->rd_completed, fp); + + return 0; +} + +/** + * mei_cl_init - initializes cl. + * + * @cl: host client to be initialized + * @dev: mei device + */ +static void mei_cl_init(struct mei_cl *cl, struct mei_device *dev) +{ + memset(cl, 0, sizeof(struct mei_cl)); + init_waitqueue_head(&cl->wait); + init_waitqueue_head(&cl->rx_wait); + init_waitqueue_head(&cl->tx_wait); + init_waitqueue_head(&cl->ev_wait); + INIT_LIST_HEAD(&cl->rd_completed); + INIT_LIST_HEAD(&cl->rd_pending); + INIT_LIST_HEAD(&cl->link); + cl->writing_state = MEI_IDLE; + cl->state = MEI_FILE_UNINITIALIZED; + cl->dev = dev; +} + +/** + * mei_cl_allocate - allocates cl structure and sets it up. + * + * @dev: mei device + * Return: The allocated file or NULL on failure + */ +struct mei_cl *mei_cl_allocate(struct mei_device *dev) +{ + struct mei_cl *cl; + + cl = kmalloc(sizeof(struct mei_cl), GFP_KERNEL); + if (!cl) + return NULL; + + mei_cl_init(cl, dev); + + return cl; +} + +/** + * mei_cl_link - allocate host id in the host map + * + * @cl: host client + * + * Return: 0 on success + * -EINVAL on incorrect values + * -EMFILE if open count exceeded. + */ +int mei_cl_link(struct mei_cl *cl) +{ + struct mei_device *dev; + int id; + + if (WARN_ON(!cl || !cl->dev)) + return -EINVAL; + + dev = cl->dev; + + id = find_first_zero_bit(dev->host_clients_map, MEI_CLIENTS_MAX); + if (id >= MEI_CLIENTS_MAX) { + dev_err(dev->dev, "id exceeded %d", MEI_CLIENTS_MAX); + return -EMFILE; + } + + if (dev->open_handle_count >= MEI_MAX_OPEN_HANDLE_COUNT) { + dev_err(dev->dev, "open_handle_count exceeded %d", + MEI_MAX_OPEN_HANDLE_COUNT); + return -EMFILE; + } + + dev->open_handle_count++; + + cl->host_client_id = id; + list_add_tail(&cl->link, &dev->file_list); + + set_bit(id, dev->host_clients_map); + + cl->state = MEI_FILE_INITIALIZING; + + cl_dbg(dev, cl, "link cl\n"); + return 0; +} + +/** + * mei_cl_unlink - remove host client from the list + * + * @cl: host client + * + * Return: always 0 + */ +int mei_cl_unlink(struct mei_cl *cl) +{ + struct mei_device *dev; + + /* don't shout on error exit path */ + if (!cl) + return 0; + + if (WARN_ON(!cl->dev)) + return 0; + + dev = cl->dev; + + cl_dbg(dev, cl, "unlink client"); + + if (dev->open_handle_count > 0) + dev->open_handle_count--; + + /* never clear the 0 bit */ + if (cl->host_client_id) + clear_bit(cl->host_client_id, dev->host_clients_map); + + list_del_init(&cl->link); + + cl->state = MEI_FILE_UNINITIALIZED; + cl->writing_state = MEI_IDLE; + + WARN_ON(!list_empty(&cl->rd_completed) || + !list_empty(&cl->rd_pending) || + !list_empty(&cl->link)); + + return 0; +} + +void mei_host_client_init(struct mei_device *dev) +{ + dev->dev_state = MEI_DEV_ENABLED; + dev->reset_count = 0; + + schedule_work(&dev->bus_rescan_work); + + pm_runtime_mark_last_busy(dev->dev); + dev_dbg(dev->dev, "rpm: autosuspend\n"); + pm_request_autosuspend(dev->dev); +} + +/** + * mei_hbuf_acquire - try to acquire host buffer + * + * @dev: the device structure + * Return: true if host buffer was acquired + */ +bool mei_hbuf_acquire(struct mei_device *dev) +{ + if (mei_pg_state(dev) == MEI_PG_ON || + mei_pg_in_transition(dev)) { + dev_dbg(dev->dev, "device is in pg\n"); + return false; + } + + if (!dev->hbuf_is_ready) { + dev_dbg(dev->dev, "hbuf is not ready\n"); + return false; + } + + dev->hbuf_is_ready = false; + + return true; +} + +/** + * mei_cl_wake_all - wake up readers, writers and event waiters so + * they can be interrupted + * + * @cl: host client + */ +static void mei_cl_wake_all(struct mei_cl *cl) +{ + struct mei_device *dev = cl->dev; + + /* synchronized under device mutex */ + if (waitqueue_active(&cl->rx_wait)) { + cl_dbg(dev, cl, "Waking up reading client!\n"); + wake_up_interruptible(&cl->rx_wait); + } + /* synchronized under device mutex */ + if (waitqueue_active(&cl->tx_wait)) { + cl_dbg(dev, cl, "Waking up writing client!\n"); + wake_up_interruptible(&cl->tx_wait); + } + /* synchronized under device mutex */ + if (waitqueue_active(&cl->ev_wait)) { + cl_dbg(dev, cl, "Waking up waiting for event clients!\n"); + wake_up_interruptible(&cl->ev_wait); + } + /* synchronized under device mutex */ + if (waitqueue_active(&cl->wait)) { + cl_dbg(dev, cl, "Waking up ctrl write clients!\n"); + wake_up(&cl->wait); + } +} + +/** + * mei_cl_set_disconnected - set disconnected state and clear + * associated states and resources + * + * @cl: host client + */ +static void mei_cl_set_disconnected(struct mei_cl *cl) +{ + struct mei_device *dev = cl->dev; + + if (cl->state == MEI_FILE_DISCONNECTED || + cl->state <= MEI_FILE_INITIALIZING) + return; + + cl->state = MEI_FILE_DISCONNECTED; + mei_io_tx_list_free_cl(&dev->write_list, cl); + mei_io_tx_list_free_cl(&dev->write_waiting_list, cl); + mei_io_list_flush_cl(&dev->ctrl_rd_list, cl); + mei_io_list_flush_cl(&dev->ctrl_wr_list, cl); + mei_cl_wake_all(cl); + cl->rx_flow_ctrl_creds = 0; + cl->tx_flow_ctrl_creds = 0; + cl->timer_count = 0; + + if (!cl->me_cl) + return; + + if (!WARN_ON(cl->me_cl->connect_count == 0)) + cl->me_cl->connect_count--; + + if (cl->me_cl->connect_count == 0) + cl->me_cl->tx_flow_ctrl_creds = 0; + + mei_me_cl_put(cl->me_cl); + cl->me_cl = NULL; +} + +static int mei_cl_set_connecting(struct mei_cl *cl, struct mei_me_client *me_cl) +{ + if (!mei_me_cl_get(me_cl)) + return -ENOENT; + + /* only one connection is allowed for fixed address clients */ + if (me_cl->props.fixed_address) { + if (me_cl->connect_count) { + mei_me_cl_put(me_cl); + return -EBUSY; + } + } + + cl->me_cl = me_cl; + cl->state = MEI_FILE_CONNECTING; + cl->me_cl->connect_count++; + + return 0; +} + +/* + * mei_cl_send_disconnect - send disconnect request + * + * @cl: host client + * @cb: callback block + * + * Return: 0, OK; otherwise, error. + */ +static int mei_cl_send_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb) +{ + struct mei_device *dev; + int ret; + + dev = cl->dev; + + ret = mei_hbm_cl_disconnect_req(dev, cl); + cl->status = ret; + if (ret) { + cl->state = MEI_FILE_DISCONNECT_REPLY; + return ret; + } + + list_move_tail(&cb->list, &dev->ctrl_rd_list); + cl->timer_count = MEI_CONNECT_TIMEOUT; + mei_schedule_stall_timer(dev); + + return 0; +} + +/** + * mei_cl_irq_disconnect - processes close related operation from + * interrupt thread context - send disconnect request + * + * @cl: client + * @cb: callback block. + * @cmpl_list: complete list. + * + * Return: 0, OK; otherwise, error. + */ +int mei_cl_irq_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb, + struct list_head *cmpl_list) +{ + struct mei_device *dev = cl->dev; + u32 msg_slots; + int slots; + int ret; + + msg_slots = mei_hbm2slots(sizeof(struct hbm_client_connect_request)); + slots = mei_hbuf_empty_slots(dev); + if (slots < 0) + return -EOVERFLOW; + + if ((u32)slots < msg_slots) + return -EMSGSIZE; + + ret = mei_cl_send_disconnect(cl, cb); + if (ret) + list_move_tail(&cb->list, cmpl_list); + + return ret; +} + +/** + * __mei_cl_disconnect - disconnect host client from the me one + * internal function runtime pm has to be already acquired + * + * @cl: host client + * + * Return: 0 on success, <0 on failure. + */ +static int __mei_cl_disconnect(struct mei_cl *cl) +{ + struct mei_device *dev; + struct mei_cl_cb *cb; + int rets; + + dev = cl->dev; + + cl->state = MEI_FILE_DISCONNECTING; + + cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_DISCONNECT, NULL); + if (!cb) { + rets = -ENOMEM; + goto out; + } + + if (mei_hbuf_acquire(dev)) { + rets = mei_cl_send_disconnect(cl, cb); + if (rets) { + cl_err(dev, cl, "failed to disconnect.\n"); + goto out; + } + } + + mutex_unlock(&dev->device_lock); + wait_event_timeout(cl->wait, + cl->state == MEI_FILE_DISCONNECT_REPLY || + cl->state == MEI_FILE_DISCONNECTED, + mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT)); + mutex_lock(&dev->device_lock); + + rets = cl->status; + if (cl->state != MEI_FILE_DISCONNECT_REPLY && + cl->state != MEI_FILE_DISCONNECTED) { + cl_dbg(dev, cl, "timeout on disconnect from FW client.\n"); + rets = -ETIME; + } + +out: + /* we disconnect also on error */ + mei_cl_set_disconnected(cl); + if (!rets) + cl_dbg(dev, cl, "successfully disconnected from FW client.\n"); + + mei_io_cb_free(cb); + return rets; +} + +/** + * mei_cl_disconnect - disconnect host client from the me one + * + * @cl: host client + * + * Locking: called under "dev->device_lock" lock + * + * Return: 0 on success, <0 on failure. + */ +int mei_cl_disconnect(struct mei_cl *cl) +{ + struct mei_device *dev; + int rets; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + cl_dbg(dev, cl, "disconnecting"); + + if (!mei_cl_is_connected(cl)) + return 0; + + if (mei_cl_is_fixed_address(cl)) { + mei_cl_set_disconnected(cl); + return 0; + } + + if (dev->dev_state == MEI_DEV_POWER_DOWN) { + cl_dbg(dev, cl, "Device is powering down, don't bother with disconnection\n"); + mei_cl_set_disconnected(cl); + return 0; + } + + rets = pm_runtime_get(dev->dev); + if (rets < 0 && rets != -EINPROGRESS) { + pm_runtime_put_noidle(dev->dev); + cl_err(dev, cl, "rpm: get failed %d\n", rets); + return rets; + } + + rets = __mei_cl_disconnect(cl); + + cl_dbg(dev, cl, "rpm: autosuspend\n"); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + + return rets; +} + + +/** + * mei_cl_is_other_connecting - checks if other + * client with the same me client id is connecting + * + * @cl: private data of the file object + * + * Return: true if other client is connected, false - otherwise. + */ +static bool mei_cl_is_other_connecting(struct mei_cl *cl) +{ + struct mei_device *dev; + struct mei_cl_cb *cb; + + dev = cl->dev; + + list_for_each_entry(cb, &dev->ctrl_rd_list, list) { + if (cb->fop_type == MEI_FOP_CONNECT && + mei_cl_me_id(cl) == mei_cl_me_id(cb->cl)) + return true; + } + + return false; +} + +/** + * mei_cl_send_connect - send connect request + * + * @cl: host client + * @cb: callback block + * + * Return: 0, OK; otherwise, error. + */ +static int mei_cl_send_connect(struct mei_cl *cl, struct mei_cl_cb *cb) +{ + struct mei_device *dev; + int ret; + + dev = cl->dev; + + ret = mei_hbm_cl_connect_req(dev, cl); + cl->status = ret; + if (ret) { + cl->state = MEI_FILE_DISCONNECT_REPLY; + return ret; + } + + list_move_tail(&cb->list, &dev->ctrl_rd_list); + cl->timer_count = MEI_CONNECT_TIMEOUT; + mei_schedule_stall_timer(dev); + return 0; +} + +/** + * mei_cl_irq_connect - send connect request in irq_thread context + * + * @cl: host client + * @cb: callback block + * @cmpl_list: complete list + * + * Return: 0, OK; otherwise, error. + */ +int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb, + struct list_head *cmpl_list) +{ + struct mei_device *dev = cl->dev; + u32 msg_slots; + int slots; + int rets; + + if (mei_cl_is_other_connecting(cl)) + return 0; + + msg_slots = mei_hbm2slots(sizeof(struct hbm_client_connect_request)); + slots = mei_hbuf_empty_slots(dev); + if (slots < 0) + return -EOVERFLOW; + + if ((u32)slots < msg_slots) + return -EMSGSIZE; + + rets = mei_cl_send_connect(cl, cb); + if (rets) + list_move_tail(&cb->list, cmpl_list); + + return rets; +} + +/** + * mei_cl_connect - connect host client to the me one + * + * @cl: host client + * @me_cl: me client + * @fp: pointer to file structure + * + * Locking: called under "dev->device_lock" lock + * + * Return: 0 on success, <0 on failure. + */ +int mei_cl_connect(struct mei_cl *cl, struct mei_me_client *me_cl, + const struct file *fp) +{ + struct mei_device *dev; + struct mei_cl_cb *cb; + int rets; + + if (WARN_ON(!cl || !cl->dev || !me_cl)) + return -ENODEV; + + dev = cl->dev; + + rets = mei_cl_set_connecting(cl, me_cl); + if (rets) + goto nortpm; + + if (mei_cl_is_fixed_address(cl)) { + cl->state = MEI_FILE_CONNECTED; + rets = 0; + goto nortpm; + } + + rets = pm_runtime_get(dev->dev); + if (rets < 0 && rets != -EINPROGRESS) { + pm_runtime_put_noidle(dev->dev); + cl_err(dev, cl, "rpm: get failed %d\n", rets); + goto nortpm; + } + + cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_CONNECT, fp); + if (!cb) { + rets = -ENOMEM; + goto out; + } + + /* run hbuf acquire last so we don't have to undo */ + if (!mei_cl_is_other_connecting(cl) && mei_hbuf_acquire(dev)) { + rets = mei_cl_send_connect(cl, cb); + if (rets) + goto out; + } + + mutex_unlock(&dev->device_lock); + wait_event_timeout(cl->wait, + (cl->state == MEI_FILE_CONNECTED || + cl->state == MEI_FILE_DISCONNECTED || + cl->state == MEI_FILE_DISCONNECT_REQUIRED || + cl->state == MEI_FILE_DISCONNECT_REPLY), + mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT)); + mutex_lock(&dev->device_lock); + + if (!mei_cl_is_connected(cl)) { + if (cl->state == MEI_FILE_DISCONNECT_REQUIRED) { + mei_io_list_flush_cl(&dev->ctrl_rd_list, cl); + mei_io_list_flush_cl(&dev->ctrl_wr_list, cl); + /* ignore disconnect return valuue; + * in case of failure reset will be invoked + */ + __mei_cl_disconnect(cl); + rets = -EFAULT; + goto out; + } + + /* timeout or something went really wrong */ + if (!cl->status) + cl->status = -EFAULT; + } + + rets = cl->status; +out: + cl_dbg(dev, cl, "rpm: autosuspend\n"); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + + mei_io_cb_free(cb); + +nortpm: + if (!mei_cl_is_connected(cl)) + mei_cl_set_disconnected(cl); + + return rets; +} + +/** + * mei_cl_alloc_linked - allocate and link host client + * + * @dev: the device structure + * + * Return: cl on success ERR_PTR on failure + */ +struct mei_cl *mei_cl_alloc_linked(struct mei_device *dev) +{ + struct mei_cl *cl; + int ret; + + cl = mei_cl_allocate(dev); + if (!cl) { + ret = -ENOMEM; + goto err; + } + + ret = mei_cl_link(cl); + if (ret) + goto err; + + return cl; +err: + kfree(cl); + return ERR_PTR(ret); +} + +/** + * mei_cl_tx_flow_ctrl_creds - checks flow_control credits for cl. + * + * @cl: host client + * + * Return: 1 if tx_flow_ctrl_creds >0, 0 - otherwise. + */ +static int mei_cl_tx_flow_ctrl_creds(struct mei_cl *cl) +{ + if (WARN_ON(!cl || !cl->me_cl)) + return -EINVAL; + + if (cl->tx_flow_ctrl_creds > 0) + return 1; + + if (mei_cl_is_fixed_address(cl)) + return 1; + + if (mei_cl_is_single_recv_buf(cl)) { + if (cl->me_cl->tx_flow_ctrl_creds > 0) + return 1; + } + return 0; +} + +/** + * mei_cl_tx_flow_ctrl_creds_reduce - reduces transmit flow control credits + * for a client + * + * @cl: host client + * + * Return: + * 0 on success + * -EINVAL when ctrl credits are <= 0 + */ +static int mei_cl_tx_flow_ctrl_creds_reduce(struct mei_cl *cl) +{ + if (WARN_ON(!cl || !cl->me_cl)) + return -EINVAL; + + if (mei_cl_is_fixed_address(cl)) + return 0; + + if (mei_cl_is_single_recv_buf(cl)) { + if (WARN_ON(cl->me_cl->tx_flow_ctrl_creds <= 0)) + return -EINVAL; + cl->me_cl->tx_flow_ctrl_creds--; + } else { + if (WARN_ON(cl->tx_flow_ctrl_creds <= 0)) + return -EINVAL; + cl->tx_flow_ctrl_creds--; + } + return 0; +} + +/** + * mei_cl_notify_fop2req - convert fop to proper request + * + * @fop: client notification start response command + * + * Return: MEI_HBM_NOTIFICATION_START/STOP + */ +u8 mei_cl_notify_fop2req(enum mei_cb_file_ops fop) +{ + if (fop == MEI_FOP_NOTIFY_START) + return MEI_HBM_NOTIFICATION_START; + else + return MEI_HBM_NOTIFICATION_STOP; +} + +/** + * mei_cl_notify_req2fop - convert notification request top file operation type + * + * @req: hbm notification request type + * + * Return: MEI_FOP_NOTIFY_START/STOP + */ +enum mei_cb_file_ops mei_cl_notify_req2fop(u8 req) +{ + if (req == MEI_HBM_NOTIFICATION_START) + return MEI_FOP_NOTIFY_START; + else + return MEI_FOP_NOTIFY_STOP; +} + +/** + * mei_cl_irq_notify - send notification request in irq_thread context + * + * @cl: client + * @cb: callback block. + * @cmpl_list: complete list. + * + * Return: 0 on such and error otherwise. + */ +int mei_cl_irq_notify(struct mei_cl *cl, struct mei_cl_cb *cb, + struct list_head *cmpl_list) +{ + struct mei_device *dev = cl->dev; + u32 msg_slots; + int slots; + int ret; + bool request; + + msg_slots = mei_hbm2slots(sizeof(struct hbm_client_connect_request)); + slots = mei_hbuf_empty_slots(dev); + if (slots < 0) + return -EOVERFLOW; + + if ((u32)slots < msg_slots) + return -EMSGSIZE; + + request = mei_cl_notify_fop2req(cb->fop_type); + ret = mei_hbm_cl_notify_req(dev, cl, request); + if (ret) { + cl->status = ret; + list_move_tail(&cb->list, cmpl_list); + return ret; + } + + list_move_tail(&cb->list, &dev->ctrl_rd_list); + return 0; +} + +/** + * mei_cl_notify_request - send notification stop/start request + * + * @cl: host client + * @fp: associate request with file + * @request: 1 for start or 0 for stop + * + * Locking: called under "dev->device_lock" lock + * + * Return: 0 on such and error otherwise. + */ +int mei_cl_notify_request(struct mei_cl *cl, + const struct file *fp, u8 request) +{ + struct mei_device *dev; + struct mei_cl_cb *cb; + enum mei_cb_file_ops fop_type; + int rets; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + if (!dev->hbm_f_ev_supported) { + cl_dbg(dev, cl, "notifications not supported\n"); + return -EOPNOTSUPP; + } + + if (!mei_cl_is_connected(cl)) + return -ENODEV; + + rets = pm_runtime_get(dev->dev); + if (rets < 0 && rets != -EINPROGRESS) { + pm_runtime_put_noidle(dev->dev); + cl_err(dev, cl, "rpm: get failed %d\n", rets); + return rets; + } + + fop_type = mei_cl_notify_req2fop(request); + cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, fop_type, fp); + if (!cb) { + rets = -ENOMEM; + goto out; + } + + if (mei_hbuf_acquire(dev)) { + if (mei_hbm_cl_notify_req(dev, cl, request)) { + rets = -ENODEV; + goto out; + } + list_move_tail(&cb->list, &dev->ctrl_rd_list); + } + + mutex_unlock(&dev->device_lock); + wait_event_timeout(cl->wait, + cl->notify_en == request || !mei_cl_is_connected(cl), + mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT)); + mutex_lock(&dev->device_lock); + + if (cl->notify_en != request && !cl->status) + cl->status = -EFAULT; + + rets = cl->status; + +out: + cl_dbg(dev, cl, "rpm: autosuspend\n"); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + + mei_io_cb_free(cb); + return rets; +} + +/** + * mei_cl_notify - raise notification + * + * @cl: host client + * + * Locking: called under "dev->device_lock" lock + */ +void mei_cl_notify(struct mei_cl *cl) +{ + struct mei_device *dev; + + if (!cl || !cl->dev) + return; + + dev = cl->dev; + + if (!cl->notify_en) + return; + + cl_dbg(dev, cl, "notify event"); + cl->notify_ev = true; + if (!mei_cl_bus_notify_event(cl)) + wake_up_interruptible(&cl->ev_wait); + + if (cl->ev_async) + kill_fasync(&cl->ev_async, SIGIO, POLL_PRI); + +} + +/** + * mei_cl_notify_get - get or wait for notification event + * + * @cl: host client + * @block: this request is blocking + * @notify_ev: true if notification event was received + * + * Locking: called under "dev->device_lock" lock + * + * Return: 0 on such and error otherwise. + */ +int mei_cl_notify_get(struct mei_cl *cl, bool block, bool *notify_ev) +{ + struct mei_device *dev; + int rets; + + *notify_ev = false; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + if (!dev->hbm_f_ev_supported) { + cl_dbg(dev, cl, "notifications not supported\n"); + return -EOPNOTSUPP; + } + + if (!mei_cl_is_connected(cl)) + return -ENODEV; + + if (cl->notify_ev) + goto out; + + if (!block) + return -EAGAIN; + + mutex_unlock(&dev->device_lock); + rets = wait_event_interruptible(cl->ev_wait, cl->notify_ev); + mutex_lock(&dev->device_lock); + + if (rets < 0) + return rets; + +out: + *notify_ev = cl->notify_ev; + cl->notify_ev = false; + return 0; +} + +/** + * mei_cl_read_start - the start read client message function. + * + * @cl: host client + * @length: number of bytes to read + * @fp: pointer to file structure + * + * Return: 0 on success, <0 on failure. + */ +int mei_cl_read_start(struct mei_cl *cl, size_t length, const struct file *fp) +{ + struct mei_device *dev; + struct mei_cl_cb *cb; + int rets; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + if (!mei_cl_is_connected(cl)) + return -ENODEV; + + if (!mei_me_cl_is_active(cl->me_cl)) { + cl_err(dev, cl, "no such me client\n"); + return -ENOTTY; + } + + if (mei_cl_is_fixed_address(cl)) + return 0; + + /* HW currently supports only one pending read */ + if (cl->rx_flow_ctrl_creds) + return -EBUSY; + + cb = mei_cl_enqueue_ctrl_wr_cb(cl, length, MEI_FOP_READ, fp); + if (!cb) + return -ENOMEM; + + rets = pm_runtime_get(dev->dev); + if (rets < 0 && rets != -EINPROGRESS) { + pm_runtime_put_noidle(dev->dev); + cl_err(dev, cl, "rpm: get failed %d\n", rets); + goto nortpm; + } + + rets = 0; + if (mei_hbuf_acquire(dev)) { + rets = mei_hbm_cl_flow_control_req(dev, cl); + if (rets < 0) + goto out; + + list_move_tail(&cb->list, &cl->rd_pending); + } + cl->rx_flow_ctrl_creds++; + +out: + cl_dbg(dev, cl, "rpm: autosuspend\n"); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); +nortpm: + if (rets) + mei_io_cb_free(cb); + + return rets; +} + +/** + * mei_msg_hdr_init - initialize mei message header + * + * @mei_hdr: mei message header + * @cb: message callback structure + */ +static void mei_msg_hdr_init(struct mei_msg_hdr *mei_hdr, struct mei_cl_cb *cb) +{ + mei_hdr->host_addr = mei_cl_host_addr(cb->cl); + mei_hdr->me_addr = mei_cl_me_id(cb->cl); + mei_hdr->length = 0; + mei_hdr->reserved = 0; + mei_hdr->msg_complete = 0; + mei_hdr->dma_ring = 0; + mei_hdr->internal = cb->internal; +} + +/** + * mei_cl_irq_write - write a message to device + * from the interrupt thread context + * + * @cl: client + * @cb: callback block. + * @cmpl_list: complete list. + * + * Return: 0, OK; otherwise error. + */ +int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, + struct list_head *cmpl_list) +{ + struct mei_device *dev; + struct mei_msg_data *buf; + struct mei_msg_hdr mei_hdr; + size_t hdr_len = sizeof(mei_hdr); + size_t len; + size_t hbuf_len; + int hbuf_slots; + int rets; + bool first_chunk; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + buf = &cb->buf; + + first_chunk = cb->buf_idx == 0; + + rets = first_chunk ? mei_cl_tx_flow_ctrl_creds(cl) : 1; + if (rets < 0) + goto err; + + if (rets == 0) { + cl_dbg(dev, cl, "No flow control credentials: not sending.\n"); + return 0; + } + + len = buf->size - cb->buf_idx; + hbuf_slots = mei_hbuf_empty_slots(dev); + if (hbuf_slots < 0) { + rets = -EOVERFLOW; + goto err; + } + + hbuf_len = mei_slots2data(hbuf_slots); + + mei_msg_hdr_init(&mei_hdr, cb); + + /** + * Split the message only if we can write the whole host buffer + * otherwise wait for next time the host buffer is empty. + */ + if (len + hdr_len <= hbuf_len) { + mei_hdr.length = len; + mei_hdr.msg_complete = 1; + } else if ((u32)hbuf_slots == mei_hbuf_depth(dev)) { + mei_hdr.length = hbuf_len - hdr_len; + } else { + return 0; + } + + cl_dbg(dev, cl, "buf: size = %zu idx = %zu\n", + cb->buf.size, cb->buf_idx); + + rets = mei_write_message(dev, &mei_hdr, hdr_len, + buf->data + cb->buf_idx, mei_hdr.length); + if (rets) + goto err; + + cl->status = 0; + cl->writing_state = MEI_WRITING; + cb->buf_idx += mei_hdr.length; + + if (first_chunk) { + if (mei_cl_tx_flow_ctrl_creds_reduce(cl)) { + rets = -EIO; + goto err; + } + } + + if (mei_hdr.msg_complete) + list_move_tail(&cb->list, &dev->write_waiting_list); + + return 0; + +err: + cl->status = rets; + list_move_tail(&cb->list, cmpl_list); + return rets; +} + +/** + * mei_cl_write - submit a write cb to mei device + * assumes device_lock is locked + * + * @cl: host client + * @cb: write callback with filled data + * + * Return: number of bytes sent on success, <0 on failure. + */ +ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb) +{ + struct mei_device *dev; + struct mei_msg_data *buf; + struct mei_msg_hdr mei_hdr; + size_t hdr_len = sizeof(mei_hdr); + size_t len; + size_t hbuf_len; + int hbuf_slots; + ssize_t rets; + bool blocking; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + if (WARN_ON(!cb)) + return -EINVAL; + + dev = cl->dev; + + buf = &cb->buf; + len = buf->size; + blocking = cb->blocking; + + cl_dbg(dev, cl, "len=%zd\n", len); + + rets = pm_runtime_get(dev->dev); + if (rets < 0 && rets != -EINPROGRESS) { + pm_runtime_put_noidle(dev->dev); + cl_err(dev, cl, "rpm: get failed %zd\n", rets); + goto free; + } + + cb->buf_idx = 0; + cl->writing_state = MEI_IDLE; + + + rets = mei_cl_tx_flow_ctrl_creds(cl); + if (rets < 0) + goto err; + + mei_msg_hdr_init(&mei_hdr, cb); + + if (rets == 0) { + cl_dbg(dev, cl, "No flow control credentials: not sending.\n"); + rets = len; + goto out; + } + + if (!mei_hbuf_acquire(dev)) { + cl_dbg(dev, cl, "Cannot acquire the host buffer: not sending.\n"); + rets = len; + goto out; + } + + hbuf_slots = mei_hbuf_empty_slots(dev); + if (hbuf_slots < 0) { + rets = -EOVERFLOW; + goto out; + } + + hbuf_len = mei_slots2data(hbuf_slots); + + if (len + hdr_len <= hbuf_len) { + mei_hdr.length = len; + mei_hdr.msg_complete = 1; + } else { + mei_hdr.length = hbuf_len - hdr_len; + } + + rets = mei_write_message(dev, &mei_hdr, hdr_len, + buf->data, mei_hdr.length); + if (rets) + goto err; + + rets = mei_cl_tx_flow_ctrl_creds_reduce(cl); + if (rets) + goto err; + + cl->writing_state = MEI_WRITING; + cb->buf_idx = mei_hdr.length; + +out: + if (mei_hdr.msg_complete) + mei_tx_cb_enqueue(cb, &dev->write_waiting_list); + else + mei_tx_cb_enqueue(cb, &dev->write_list); + + cb = NULL; + if (blocking && cl->writing_state != MEI_WRITE_COMPLETE) { + + mutex_unlock(&dev->device_lock); + rets = wait_event_interruptible(cl->tx_wait, + cl->writing_state == MEI_WRITE_COMPLETE || + (!mei_cl_is_connected(cl))); + mutex_lock(&dev->device_lock); + /* wait_event_interruptible returns -ERESTARTSYS */ + if (rets) { + if (signal_pending(current)) + rets = -EINTR; + goto err; + } + if (cl->writing_state != MEI_WRITE_COMPLETE) { + rets = -EFAULT; + goto err; + } + } + + rets = len; +err: + cl_dbg(dev, cl, "rpm: autosuspend\n"); + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); +free: + mei_io_cb_free(cb); + + return rets; +} + + +/** + * mei_cl_complete - processes completed operation for a client + * + * @cl: private data of the file object. + * @cb: callback block. + */ +void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb) +{ + struct mei_device *dev = cl->dev; + + switch (cb->fop_type) { + case MEI_FOP_WRITE: + mei_tx_cb_dequeue(cb); + cl->writing_state = MEI_WRITE_COMPLETE; + if (waitqueue_active(&cl->tx_wait)) { + wake_up_interruptible(&cl->tx_wait); + } else { + pm_runtime_mark_last_busy(dev->dev); + pm_request_autosuspend(dev->dev); + } + break; + + case MEI_FOP_READ: + list_add_tail(&cb->list, &cl->rd_completed); + if (!mei_cl_is_fixed_address(cl) && + !WARN_ON(!cl->rx_flow_ctrl_creds)) + cl->rx_flow_ctrl_creds--; + if (!mei_cl_bus_rx_event(cl)) + wake_up_interruptible(&cl->rx_wait); + break; + + case MEI_FOP_CONNECT: + case MEI_FOP_DISCONNECT: + case MEI_FOP_NOTIFY_STOP: + case MEI_FOP_NOTIFY_START: + if (waitqueue_active(&cl->wait)) + wake_up(&cl->wait); + + break; + case MEI_FOP_DISCONNECT_RSP: + mei_io_cb_free(cb); + mei_cl_set_disconnected(cl); + break; + default: + BUG_ON(0); + } +} + + +/** + * mei_cl_all_disconnect - disconnect forcefully all connected clients + * + * @dev: mei device + */ +void mei_cl_all_disconnect(struct mei_device *dev) +{ + struct mei_cl *cl; + + list_for_each_entry(cl, &dev->file_list, link) + mei_cl_set_disconnected(cl); +} diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h new file mode 100644 index 000000000..0d23efcc7 --- /dev/null +++ b/drivers/misc/mei/client.h @@ -0,0 +1,236 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _MEI_CLIENT_H_ +#define _MEI_CLIENT_H_ + +#include +#include +#include + +#include "mei_dev.h" + +/* + * reference counting base function + */ +void mei_me_cl_init(struct mei_me_client *me_cl); +void mei_me_cl_put(struct mei_me_client *me_cl); +struct mei_me_client *mei_me_cl_get(struct mei_me_client *me_cl); + +void mei_me_cl_add(struct mei_device *dev, struct mei_me_client *me_cl); +void mei_me_cl_del(struct mei_device *dev, struct mei_me_client *me_cl); + +struct mei_me_client *mei_me_cl_by_uuid(struct mei_device *dev, + const uuid_le *uuid); +struct mei_me_client *mei_me_cl_by_id(struct mei_device *dev, u8 client_id); +struct mei_me_client *mei_me_cl_by_uuid_id(struct mei_device *dev, + const uuid_le *uuid, u8 client_id); +void mei_me_cl_rm_by_uuid(struct mei_device *dev, const uuid_le *uuid); +void mei_me_cl_rm_by_uuid_id(struct mei_device *dev, + const uuid_le *uuid, u8 id); +void mei_me_cl_rm_all(struct mei_device *dev); + +/** + * mei_me_cl_is_active - check whether me client is active in the fw + * + * @me_cl: me client + * + * Return: true if the me client is active in the firmware + */ +static inline bool mei_me_cl_is_active(const struct mei_me_client *me_cl) +{ + return !list_empty_careful(&me_cl->list); +} + +/** + * mei_me_cl_uuid - return me client protocol name (uuid) + * + * @me_cl: me client + * + * Return: me client protocol name + */ +static inline const uuid_le *mei_me_cl_uuid(const struct mei_me_client *me_cl) +{ + return &me_cl->props.protocol_name; +} + +/** + * mei_me_cl_ver - return me client protocol version + * + * @me_cl: me client + * + * Return: me client protocol version + */ +static inline u8 mei_me_cl_ver(const struct mei_me_client *me_cl) +{ + return me_cl->props.protocol_version; +} + +/* + * MEI IO Functions + */ +void mei_io_cb_free(struct mei_cl_cb *priv_cb); + +/* + * MEI Host Client Functions + */ + +struct mei_cl *mei_cl_allocate(struct mei_device *dev); + +int mei_cl_link(struct mei_cl *cl); +int mei_cl_unlink(struct mei_cl *cl); + +struct mei_cl *mei_cl_alloc_linked(struct mei_device *dev); + +struct mei_cl_cb *mei_cl_read_cb(const struct mei_cl *cl, + const struct file *fp); +struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length, + enum mei_cb_file_ops type, + const struct file *fp); +struct mei_cl_cb *mei_cl_enqueue_ctrl_wr_cb(struct mei_cl *cl, size_t length, + enum mei_cb_file_ops type, + const struct file *fp); +int mei_cl_flush_queues(struct mei_cl *cl, const struct file *fp); + +/* + * MEI input output function prototype + */ + +/** + * mei_cl_is_connected - host client is connected + * + * @cl: host client + * + * Return: true if the host client is connected + */ +static inline bool mei_cl_is_connected(struct mei_cl *cl) +{ + return cl->state == MEI_FILE_CONNECTED; +} + +/** + * mei_cl_me_id - me client id + * + * @cl: host client + * + * Return: me client id or 0 if client is not connected + */ +static inline u8 mei_cl_me_id(const struct mei_cl *cl) +{ + return cl->me_cl ? cl->me_cl->client_id : 0; +} + +/** + * mei_cl_mtu - maximal message that client can send and receive + * + * @cl: host client + * + * Return: mtu or 0 if client is not connected + */ +static inline size_t mei_cl_mtu(const struct mei_cl *cl) +{ + return cl->me_cl ? cl->me_cl->props.max_msg_length : 0; +} + +/** + * mei_cl_is_fixed_address - check whether the me client uses fixed address + * + * @cl: host client + * + * Return: true if the client is connected and it has fixed me address + */ +static inline bool mei_cl_is_fixed_address(const struct mei_cl *cl) +{ + return cl->me_cl && cl->me_cl->props.fixed_address; +} + +/** + * mei_cl_is_single_recv_buf- check whether the me client + * uses single receiving buffer + * + * @cl: host client + * + * Return: true if single_recv_buf == 1; 0 otherwise + */ +static inline bool mei_cl_is_single_recv_buf(const struct mei_cl *cl) +{ + return cl->me_cl->props.single_recv_buf; +} + +/** + * mei_cl_uuid - client's uuid + * + * @cl: host client + * + * Return: return uuid of connected me client + */ +static inline const uuid_le *mei_cl_uuid(const struct mei_cl *cl) +{ + return mei_me_cl_uuid(cl->me_cl); +} + +/** + * mei_cl_host_addr - client's host address + * + * @cl: host client + * + * Return: 0 for fixed address client, host address for dynamic client + */ +static inline u8 mei_cl_host_addr(const struct mei_cl *cl) +{ + return mei_cl_is_fixed_address(cl) ? 0 : cl->host_client_id; +} + +int mei_cl_disconnect(struct mei_cl *cl); +int mei_cl_irq_disconnect(struct mei_cl *cl, struct mei_cl_cb *cb, + struct list_head *cmpl_list); +int mei_cl_connect(struct mei_cl *cl, struct mei_me_client *me_cl, + const struct file *file); +int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb, + struct list_head *cmpl_list); +int mei_cl_read_start(struct mei_cl *cl, size_t length, const struct file *fp); +ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb); +int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb, + struct list_head *cmpl_list); + +void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb); + +void mei_host_client_init(struct mei_device *dev); + +u8 mei_cl_notify_fop2req(enum mei_cb_file_ops fop); +enum mei_cb_file_ops mei_cl_notify_req2fop(u8 request); +int mei_cl_notify_request(struct mei_cl *cl, + const struct file *file, u8 request); +int mei_cl_irq_notify(struct mei_cl *cl, struct mei_cl_cb *cb, + struct list_head *cmpl_list); +int mei_cl_notify_get(struct mei_cl *cl, bool block, bool *notify_ev); +void mei_cl_notify(struct mei_cl *cl); + +void mei_cl_all_disconnect(struct mei_device *dev); + +#define MEI_CL_FMT "cl:host=%02d me=%02d " +#define MEI_CL_PRM(cl) (cl)->host_client_id, mei_cl_me_id(cl) + +#define cl_dbg(dev, cl, format, arg...) \ + dev_dbg((dev)->dev, MEI_CL_FMT format, MEI_CL_PRM(cl), ##arg) + +#define cl_warn(dev, cl, format, arg...) \ + dev_warn((dev)->dev, MEI_CL_FMT format, MEI_CL_PRM(cl), ##arg) + +#define cl_err(dev, cl, format, arg...) \ + dev_err((dev)->dev, MEI_CL_FMT format, MEI_CL_PRM(cl), ##arg) + +#endif /* _MEI_CLIENT_H_ */ diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c new file mode 100644 index 000000000..7b5df8fd6 --- /dev/null +++ b/drivers/misc/mei/debugfs.c @@ -0,0 +1,288 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2012-2013, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include +#include +#include +#include + +#include + +#include "mei_dev.h" +#include "client.h" +#include "hw.h" + +static ssize_t mei_dbgfs_read_meclients(struct file *fp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct mei_device *dev = fp->private_data; + struct mei_me_client *me_cl; + size_t bufsz = 1; + char *buf; + int i = 0; + int pos = 0; + int ret; + +#define HDR \ +" |id|fix| UUID |con|msg len|sb|refc|\n" + + down_read(&dev->me_clients_rwsem); + list_for_each_entry(me_cl, &dev->me_clients, list) + bufsz++; + + bufsz *= sizeof(HDR) + 1; + buf = kzalloc(bufsz, GFP_KERNEL); + if (!buf) { + up_read(&dev->me_clients_rwsem); + return -ENOMEM; + } + + pos += scnprintf(buf + pos, bufsz - pos, HDR); +#undef HDR + + /* if the driver is not enabled the list won't be consistent */ + if (dev->dev_state != MEI_DEV_ENABLED) + goto out; + + list_for_each_entry(me_cl, &dev->me_clients, list) { + + if (mei_me_cl_get(me_cl)) { + pos += scnprintf(buf + pos, bufsz - pos, + "%2d|%2d|%3d|%pUl|%3d|%7d|%2d|%4d|\n", + i++, me_cl->client_id, + me_cl->props.fixed_address, + &me_cl->props.protocol_name, + me_cl->props.max_number_of_connections, + me_cl->props.max_msg_length, + me_cl->props.single_recv_buf, + kref_read(&me_cl->refcnt)); + + mei_me_cl_put(me_cl); + } + } + +out: + up_read(&dev->me_clients_rwsem); + ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, pos); + kfree(buf); + return ret; +} + +static const struct file_operations mei_dbgfs_fops_meclients = { + .open = simple_open, + .read = mei_dbgfs_read_meclients, + .llseek = generic_file_llseek, +}; + +static ssize_t mei_dbgfs_read_active(struct file *fp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct mei_device *dev = fp->private_data; + struct mei_cl *cl; + size_t bufsz = 1; + char *buf; + int i = 0; + int pos = 0; + int ret; + +#define HDR " |me|host|state|rd|wr|wrq\n" + + if (!dev) + return -ENODEV; + + mutex_lock(&dev->device_lock); + + /* + * if the driver is not enabled the list won't be consistent, + * we output empty table + */ + if (dev->dev_state == MEI_DEV_ENABLED) + list_for_each_entry(cl, &dev->file_list, link) + bufsz++; + + bufsz *= sizeof(HDR) + 1; + + buf = kzalloc(bufsz, GFP_KERNEL); + if (!buf) { + mutex_unlock(&dev->device_lock); + return -ENOMEM; + } + + pos += scnprintf(buf + pos, bufsz - pos, HDR); +#undef HDR + + /* if the driver is not enabled the list won't be consistent */ + if (dev->dev_state != MEI_DEV_ENABLED) + goto out; + + list_for_each_entry(cl, &dev->file_list, link) { + + pos += scnprintf(buf + pos, bufsz - pos, + "%3d|%2d|%4d|%5d|%2d|%2d|%3u\n", + i, mei_cl_me_id(cl), cl->host_client_id, cl->state, + !list_empty(&cl->rd_completed), cl->writing_state, + cl->tx_cb_queued); + i++; + } +out: + mutex_unlock(&dev->device_lock); + ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, pos); + kfree(buf); + return ret; +} + +static const struct file_operations mei_dbgfs_fops_active = { + .open = simple_open, + .read = mei_dbgfs_read_active, + .llseek = generic_file_llseek, +}; + +static ssize_t mei_dbgfs_read_devstate(struct file *fp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct mei_device *dev = fp->private_data; + const size_t bufsz = 1024; + char *buf = kzalloc(bufsz, GFP_KERNEL); + int pos = 0; + int ret; + + if (!buf) + return -ENOMEM; + + pos += scnprintf(buf + pos, bufsz - pos, "dev: %s\n", + mei_dev_state_str(dev->dev_state)); + pos += scnprintf(buf + pos, bufsz - pos, "hbm: %s\n", + mei_hbm_state_str(dev->hbm_state)); + + if (dev->hbm_state >= MEI_HBM_ENUM_CLIENTS && + dev->hbm_state <= MEI_HBM_STARTED) { + pos += scnprintf(buf + pos, bufsz - pos, "hbm features:\n"); + pos += scnprintf(buf + pos, bufsz - pos, "\tPG: %01d\n", + dev->hbm_f_pg_supported); + pos += scnprintf(buf + pos, bufsz - pos, "\tDC: %01d\n", + dev->hbm_f_dc_supported); + pos += scnprintf(buf + pos, bufsz - pos, "\tIE: %01d\n", + dev->hbm_f_ie_supported); + pos += scnprintf(buf + pos, bufsz - pos, "\tDOT: %01d\n", + dev->hbm_f_dot_supported); + pos += scnprintf(buf + pos, bufsz - pos, "\tEV: %01d\n", + dev->hbm_f_ev_supported); + pos += scnprintf(buf + pos, bufsz - pos, "\tFA: %01d\n", + dev->hbm_f_fa_supported); + pos += scnprintf(buf + pos, bufsz - pos, "\tOS: %01d\n", + dev->hbm_f_os_supported); + pos += scnprintf(buf + pos, bufsz - pos, "\tDR: %01d\n", + dev->hbm_f_dr_supported); + } + + pos += scnprintf(buf + pos, bufsz - pos, "pg: %s, %s\n", + mei_pg_is_enabled(dev) ? "ENABLED" : "DISABLED", + mei_pg_state_str(mei_pg_state(dev))); + ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, pos); + kfree(buf); + return ret; +} +static const struct file_operations mei_dbgfs_fops_devstate = { + .open = simple_open, + .read = mei_dbgfs_read_devstate, + .llseek = generic_file_llseek, +}; + +static ssize_t mei_dbgfs_write_allow_fa(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct mei_device *dev; + int ret; + + dev = container_of(file->private_data, + struct mei_device, allow_fixed_address); + + ret = debugfs_write_file_bool(file, user_buf, count, ppos); + if (ret < 0) + return ret; + dev->override_fixed_address = true; + return ret; +} + +static const struct file_operations mei_dbgfs_fops_allow_fa = { + .open = simple_open, + .read = debugfs_read_file_bool, + .write = mei_dbgfs_write_allow_fa, + .llseek = generic_file_llseek, +}; + +/** + * mei_dbgfs_deregister - Remove the debugfs files and directories + * + * @dev: the mei device structure + */ +void mei_dbgfs_deregister(struct mei_device *dev) +{ + if (!dev->dbgfs_dir) + return; + debugfs_remove_recursive(dev->dbgfs_dir); + dev->dbgfs_dir = NULL; +} + +/** + * mei_dbgfs_register - Add the debugfs files + * + * @dev: the mei device structure + * @name: the mei device name + * + * Return: 0 on success, <0 on failure. + */ +int mei_dbgfs_register(struct mei_device *dev, const char *name) +{ + struct dentry *dir, *f; + + dir = debugfs_create_dir(name, NULL); + if (!dir) + return -ENOMEM; + + dev->dbgfs_dir = dir; + + f = debugfs_create_file("meclients", S_IRUSR, dir, + dev, &mei_dbgfs_fops_meclients); + if (!f) { + dev_err(dev->dev, "meclients: registration failed\n"); + goto err; + } + f = debugfs_create_file("active", S_IRUSR, dir, + dev, &mei_dbgfs_fops_active); + if (!f) { + dev_err(dev->dev, "active: registration failed\n"); + goto err; + } + f = debugfs_create_file("devstate", S_IRUSR, dir, + dev, &mei_dbgfs_fops_devstate); + if (!f) { + dev_err(dev->dev, "devstate: registration failed\n"); + goto err; + } + f = debugfs_create_file("allow_fixed_address", S_IRUSR | S_IWUSR, dir, + &dev->allow_fixed_address, + &mei_dbgfs_fops_allow_fa); + if (!f) { + dev_err(dev->dev, "allow_fixed_address: registration failed\n"); + goto err; + } + return 0; +err: + mei_dbgfs_deregister(dev); + return -ENODEV; +} + diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c new file mode 100644 index 000000000..d39cc2909 --- /dev/null +++ b/drivers/misc/mei/hbm.c @@ -0,0 +1,1287 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#include +#include +#include +#include + +#include + +#include "mei_dev.h" +#include "hbm.h" +#include "client.h" + +static const char *mei_hbm_status_str(enum mei_hbm_status status) +{ +#define MEI_HBM_STATUS(status) case MEI_HBMS_##status: return #status + switch (status) { + MEI_HBM_STATUS(SUCCESS); + MEI_HBM_STATUS(CLIENT_NOT_FOUND); + MEI_HBM_STATUS(ALREADY_EXISTS); + MEI_HBM_STATUS(REJECTED); + MEI_HBM_STATUS(INVALID_PARAMETER); + MEI_HBM_STATUS(NOT_ALLOWED); + MEI_HBM_STATUS(ALREADY_STARTED); + MEI_HBM_STATUS(NOT_STARTED); + default: return "unknown"; + } +#undef MEI_HBM_STATUS +}; + +static const char *mei_cl_conn_status_str(enum mei_cl_connect_status status) +{ +#define MEI_CL_CS(status) case MEI_CL_CONN_##status: return #status + switch (status) { + MEI_CL_CS(SUCCESS); + MEI_CL_CS(NOT_FOUND); + MEI_CL_CS(ALREADY_STARTED); + MEI_CL_CS(OUT_OF_RESOURCES); + MEI_CL_CS(MESSAGE_SMALL); + MEI_CL_CS(NOT_ALLOWED); + default: return "unknown"; + } +#undef MEI_CL_CCS +} + +const char *mei_hbm_state_str(enum mei_hbm_state state) +{ +#define MEI_HBM_STATE(state) case MEI_HBM_##state: return #state + switch (state) { + MEI_HBM_STATE(IDLE); + MEI_HBM_STATE(STARTING); + MEI_HBM_STATE(STARTED); + MEI_HBM_STATE(ENUM_CLIENTS); + MEI_HBM_STATE(CLIENT_PROPERTIES); + MEI_HBM_STATE(STOPPED); + default: + return "unknown"; + } +#undef MEI_HBM_STATE +} + +/** + * mei_cl_conn_status_to_errno - convert client connect response + * status to error code + * + * @status: client connect response status + * + * Return: corresponding error code + */ +static int mei_cl_conn_status_to_errno(enum mei_cl_connect_status status) +{ + switch (status) { + case MEI_CL_CONN_SUCCESS: return 0; + case MEI_CL_CONN_NOT_FOUND: return -ENOTTY; + case MEI_CL_CONN_ALREADY_STARTED: return -EBUSY; + case MEI_CL_CONN_OUT_OF_RESOURCES: return -EBUSY; + case MEI_CL_CONN_MESSAGE_SMALL: return -EINVAL; + case MEI_CL_CONN_NOT_ALLOWED: return -EBUSY; + default: return -EINVAL; + } +} + +/** + * mei_hbm_write_message - wrapper for sending hbm messages. + * + * @dev: mei device + * @hdr: mei header + * @data: payload + */ +static inline int mei_hbm_write_message(struct mei_device *dev, + struct mei_msg_hdr *hdr, + const void *data) +{ + return mei_write_message(dev, hdr, sizeof(*hdr), data, hdr->length); +} + +/** + * mei_hbm_idle - set hbm to idle state + * + * @dev: the device structure + */ +void mei_hbm_idle(struct mei_device *dev) +{ + dev->init_clients_timer = 0; + dev->hbm_state = MEI_HBM_IDLE; +} + +/** + * mei_hbm_reset - reset hbm counters and book keeping data structurs + * + * @dev: the device structure + */ +void mei_hbm_reset(struct mei_device *dev) +{ + mei_me_cl_rm_all(dev); + + mei_hbm_idle(dev); +} + +/** + * mei_hbm_hdr - construct hbm header + * + * @hdr: hbm header + * @length: payload length + */ + +static inline void mei_hbm_hdr(struct mei_msg_hdr *hdr, size_t length) +{ + hdr->host_addr = 0; + hdr->me_addr = 0; + hdr->length = length; + hdr->msg_complete = 1; + hdr->dma_ring = 0; + hdr->reserved = 0; + hdr->internal = 0; +} + +/** + * mei_hbm_cl_hdr - construct client hbm header + * + * @cl: client + * @hbm_cmd: host bus message command + * @buf: buffer for cl header + * @len: buffer length + */ +static inline +void mei_hbm_cl_hdr(struct mei_cl *cl, u8 hbm_cmd, void *buf, size_t len) +{ + struct mei_hbm_cl_cmd *cmd = buf; + + memset(cmd, 0, len); + + cmd->hbm_cmd = hbm_cmd; + cmd->host_addr = mei_cl_host_addr(cl); + cmd->me_addr = mei_cl_me_id(cl); +} + +/** + * mei_hbm_cl_write - write simple hbm client message + * + * @dev: the device structure + * @cl: client + * @hbm_cmd: host bus message command + * @buf: message buffer + * @len: buffer length + * + * Return: 0 on success, <0 on failure. + */ +static inline int mei_hbm_cl_write(struct mei_device *dev, struct mei_cl *cl, + u8 hbm_cmd, void *buf, size_t len) +{ + struct mei_msg_hdr mei_hdr; + + mei_hbm_hdr(&mei_hdr, len); + mei_hbm_cl_hdr(cl, hbm_cmd, buf, len); + + return mei_hbm_write_message(dev, &mei_hdr, buf); +} + +/** + * mei_hbm_cl_addr_equal - check if the client's and + * the message address match + * + * @cl: client + * @cmd: hbm client message + * + * Return: true if addresses are the same + */ +static inline +bool mei_hbm_cl_addr_equal(struct mei_cl *cl, struct mei_hbm_cl_cmd *cmd) +{ + return mei_cl_host_addr(cl) == cmd->host_addr && + mei_cl_me_id(cl) == cmd->me_addr; +} + +/** + * mei_hbm_cl_find_by_cmd - find recipient client + * + * @dev: the device structure + * @buf: a buffer with hbm cl command + * + * Return: the recipient client or NULL if not found + */ +static inline +struct mei_cl *mei_hbm_cl_find_by_cmd(struct mei_device *dev, void *buf) +{ + struct mei_hbm_cl_cmd *cmd = (struct mei_hbm_cl_cmd *)buf; + struct mei_cl *cl; + + list_for_each_entry(cl, &dev->file_list, link) + if (mei_hbm_cl_addr_equal(cl, cmd)) + return cl; + return NULL; +} + + +/** + * mei_hbm_start_wait - wait for start response message. + * + * @dev: the device structure + * + * Return: 0 on success and < 0 on failure + */ +int mei_hbm_start_wait(struct mei_device *dev) +{ + int ret; + + if (dev->hbm_state > MEI_HBM_STARTING) + return 0; + + mutex_unlock(&dev->device_lock); + ret = wait_event_timeout(dev->wait_hbm_start, + dev->hbm_state != MEI_HBM_STARTING, + mei_secs_to_jiffies(MEI_HBM_TIMEOUT)); + mutex_lock(&dev->device_lock); + + if (ret == 0 && (dev->hbm_state <= MEI_HBM_STARTING)) { + dev->hbm_state = MEI_HBM_IDLE; + dev_err(dev->dev, "waiting for mei start failed\n"); + return -ETIME; + } + return 0; +} + +/** + * mei_hbm_start_req - sends start request message. + * + * @dev: the device structure + * + * Return: 0 on success and < 0 on failure + */ +int mei_hbm_start_req(struct mei_device *dev) +{ + struct mei_msg_hdr mei_hdr; + struct hbm_host_version_request start_req; + const size_t len = sizeof(struct hbm_host_version_request); + int ret; + + mei_hbm_reset(dev); + + mei_hbm_hdr(&mei_hdr, len); + + /* host start message */ + memset(&start_req, 0, len); + start_req.hbm_cmd = HOST_START_REQ_CMD; + start_req.host_version.major_version = HBM_MAJOR_VERSION; + start_req.host_version.minor_version = HBM_MINOR_VERSION; + + dev->hbm_state = MEI_HBM_IDLE; + ret = mei_hbm_write_message(dev, &mei_hdr, &start_req); + if (ret) { + dev_err(dev->dev, "version message write failed: ret = %d\n", + ret); + return ret; + } + + dev->hbm_state = MEI_HBM_STARTING; + dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT; + mei_schedule_stall_timer(dev); + return 0; +} + +/** + * mei_hbm_enum_clients_req - sends enumeration client request message. + * + * @dev: the device structure + * + * Return: 0 on success and < 0 on failure + */ +static int mei_hbm_enum_clients_req(struct mei_device *dev) +{ + struct mei_msg_hdr mei_hdr; + struct hbm_host_enum_request enum_req; + const size_t len = sizeof(struct hbm_host_enum_request); + int ret; + + /* enumerate clients */ + mei_hbm_hdr(&mei_hdr, len); + + memset(&enum_req, 0, len); + enum_req.hbm_cmd = HOST_ENUM_REQ_CMD; + enum_req.flags |= dev->hbm_f_dc_supported ? + MEI_HBM_ENUM_F_ALLOW_ADD : 0; + enum_req.flags |= dev->hbm_f_ie_supported ? + MEI_HBM_ENUM_F_IMMEDIATE_ENUM : 0; + + ret = mei_hbm_write_message(dev, &mei_hdr, &enum_req); + if (ret) { + dev_err(dev->dev, "enumeration request write failed: ret = %d.\n", + ret); + return ret; + } + dev->hbm_state = MEI_HBM_ENUM_CLIENTS; + dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT; + mei_schedule_stall_timer(dev); + return 0; +} + +/** + * mei_hbm_me_cl_add - add new me client to the list + * + * @dev: the device structure + * @res: hbm property response + * + * Return: 0 on success and -ENOMEM on allocation failure + */ + +static int mei_hbm_me_cl_add(struct mei_device *dev, + struct hbm_props_response *res) +{ + struct mei_me_client *me_cl; + const uuid_le *uuid = &res->client_properties.protocol_name; + + mei_me_cl_rm_by_uuid(dev, uuid); + + me_cl = kzalloc(sizeof(struct mei_me_client), GFP_KERNEL); + if (!me_cl) + return -ENOMEM; + + mei_me_cl_init(me_cl); + + me_cl->props = res->client_properties; + me_cl->client_id = res->me_addr; + me_cl->tx_flow_ctrl_creds = 0; + + mei_me_cl_add(dev, me_cl); + + return 0; +} + +/** + * mei_hbm_add_cl_resp - send response to fw on client add request + * + * @dev: the device structure + * @addr: me address + * @status: response status + * + * Return: 0 on success and < 0 on failure + */ +static int mei_hbm_add_cl_resp(struct mei_device *dev, u8 addr, u8 status) +{ + struct mei_msg_hdr mei_hdr; + struct hbm_add_client_response resp; + const size_t len = sizeof(struct hbm_add_client_response); + int ret; + + dev_dbg(dev->dev, "adding client response\n"); + + mei_hbm_hdr(&mei_hdr, len); + + memset(&resp, 0, sizeof(struct hbm_add_client_response)); + resp.hbm_cmd = MEI_HBM_ADD_CLIENT_RES_CMD; + resp.me_addr = addr; + resp.status = status; + + ret = mei_hbm_write_message(dev, &mei_hdr, &resp); + if (ret) + dev_err(dev->dev, "add client response write failed: ret = %d\n", + ret); + return ret; +} + +/** + * mei_hbm_fw_add_cl_req - request from the fw to add a client + * + * @dev: the device structure + * @req: add client request + * + * Return: 0 on success and < 0 on failure + */ +static int mei_hbm_fw_add_cl_req(struct mei_device *dev, + struct hbm_add_client_request *req) +{ + int ret; + u8 status = MEI_HBMS_SUCCESS; + + BUILD_BUG_ON(sizeof(struct hbm_add_client_request) != + sizeof(struct hbm_props_response)); + + ret = mei_hbm_me_cl_add(dev, (struct hbm_props_response *)req); + if (ret) + status = !MEI_HBMS_SUCCESS; + + if (dev->dev_state == MEI_DEV_ENABLED) + schedule_work(&dev->bus_rescan_work); + + return mei_hbm_add_cl_resp(dev, req->me_addr, status); +} + +/** + * mei_hbm_cl_notify_req - send notification request + * + * @dev: the device structure + * @cl: a client to disconnect from + * @start: true for start false for stop + * + * Return: 0 on success and -EIO on write failure + */ +int mei_hbm_cl_notify_req(struct mei_device *dev, + struct mei_cl *cl, u8 start) +{ + + struct mei_msg_hdr mei_hdr; + struct hbm_notification_request req; + const size_t len = sizeof(struct hbm_notification_request); + int ret; + + mei_hbm_hdr(&mei_hdr, len); + mei_hbm_cl_hdr(cl, MEI_HBM_NOTIFY_REQ_CMD, &req, len); + + req.start = start; + + ret = mei_hbm_write_message(dev, &mei_hdr, &req); + if (ret) + dev_err(dev->dev, "notify request failed: ret = %d\n", ret); + + return ret; +} + +/** + * notify_res_to_fop - convert notification response to the proper + * notification FOP + * + * @cmd: client notification start response command + * + * Return: MEI_FOP_NOTIFY_START or MEI_FOP_NOTIFY_STOP; + */ +static inline enum mei_cb_file_ops notify_res_to_fop(struct mei_hbm_cl_cmd *cmd) +{ + struct hbm_notification_response *rs = + (struct hbm_notification_response *)cmd; + + return mei_cl_notify_req2fop(rs->start); +} + +/** + * mei_hbm_cl_notify_start_res - update the client state according + * notify start response + * + * @dev: the device structure + * @cl: mei host client + * @cmd: client notification start response command + */ +static void mei_hbm_cl_notify_start_res(struct mei_device *dev, + struct mei_cl *cl, + struct mei_hbm_cl_cmd *cmd) +{ + struct hbm_notification_response *rs = + (struct hbm_notification_response *)cmd; + + cl_dbg(dev, cl, "hbm: notify start response status=%d\n", rs->status); + + if (rs->status == MEI_HBMS_SUCCESS || + rs->status == MEI_HBMS_ALREADY_STARTED) { + cl->notify_en = true; + cl->status = 0; + } else { + cl->status = -EINVAL; + } +} + +/** + * mei_hbm_cl_notify_stop_res - update the client state according + * notify stop response + * + * @dev: the device structure + * @cl: mei host client + * @cmd: client notification stop response command + */ +static void mei_hbm_cl_notify_stop_res(struct mei_device *dev, + struct mei_cl *cl, + struct mei_hbm_cl_cmd *cmd) +{ + struct hbm_notification_response *rs = + (struct hbm_notification_response *)cmd; + + cl_dbg(dev, cl, "hbm: notify stop response status=%d\n", rs->status); + + if (rs->status == MEI_HBMS_SUCCESS || + rs->status == MEI_HBMS_NOT_STARTED) { + cl->notify_en = false; + cl->status = 0; + } else { + /* TODO: spec is not clear yet about other possible issues */ + cl->status = -EINVAL; + } +} + +/** + * mei_hbm_cl_notify - signal notification event + * + * @dev: the device structure + * @cmd: notification client message + */ +static void mei_hbm_cl_notify(struct mei_device *dev, + struct mei_hbm_cl_cmd *cmd) +{ + struct mei_cl *cl; + + cl = mei_hbm_cl_find_by_cmd(dev, cmd); + if (cl) + mei_cl_notify(cl); +} + +/** + * mei_hbm_prop_req - request property for a single client + * + * @dev: the device structure + * @start_idx: client index to start search + * + * Return: 0 on success and < 0 on failure + */ +static int mei_hbm_prop_req(struct mei_device *dev, unsigned long start_idx) +{ + struct mei_msg_hdr mei_hdr; + struct hbm_props_request prop_req; + const size_t len = sizeof(struct hbm_props_request); + unsigned long addr; + int ret; + + addr = find_next_bit(dev->me_clients_map, MEI_CLIENTS_MAX, start_idx); + + /* We got all client properties */ + if (addr == MEI_CLIENTS_MAX) { + dev->hbm_state = MEI_HBM_STARTED; + mei_host_client_init(dev); + + return 0; + } + + mei_hbm_hdr(&mei_hdr, len); + + memset(&prop_req, 0, sizeof(struct hbm_props_request)); + + prop_req.hbm_cmd = HOST_CLIENT_PROPERTIES_REQ_CMD; + prop_req.me_addr = addr; + + ret = mei_hbm_write_message(dev, &mei_hdr, &prop_req); + if (ret) { + dev_err(dev->dev, "properties request write failed: ret = %d\n", + ret); + return ret; + } + + dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT; + mei_schedule_stall_timer(dev); + + return 0; +} + +/** + * mei_hbm_pg - sends pg command + * + * @dev: the device structure + * @pg_cmd: the pg command code + * + * Return: -EIO on write failure + * -EOPNOTSUPP if the operation is not supported by the protocol + */ +int mei_hbm_pg(struct mei_device *dev, u8 pg_cmd) +{ + struct mei_msg_hdr mei_hdr; + struct hbm_power_gate req; + const size_t len = sizeof(struct hbm_power_gate); + int ret; + + if (!dev->hbm_f_pg_supported) + return -EOPNOTSUPP; + + mei_hbm_hdr(&mei_hdr, len); + + memset(&req, 0, len); + req.hbm_cmd = pg_cmd; + + ret = mei_hbm_write_message(dev, &mei_hdr, &req); + if (ret) + dev_err(dev->dev, "power gate command write failed.\n"); + return ret; +} +EXPORT_SYMBOL_GPL(mei_hbm_pg); + +/** + * mei_hbm_stop_req - send stop request message + * + * @dev: mei device + * + * Return: -EIO on write failure + */ +static int mei_hbm_stop_req(struct mei_device *dev) +{ + struct mei_msg_hdr mei_hdr; + struct hbm_host_stop_request req; + const size_t len = sizeof(struct hbm_host_stop_request); + + mei_hbm_hdr(&mei_hdr, len); + + memset(&req, 0, len); + req.hbm_cmd = HOST_STOP_REQ_CMD; + req.reason = DRIVER_STOP_REQUEST; + + return mei_hbm_write_message(dev, &mei_hdr, &req); +} + +/** + * mei_hbm_cl_flow_control_req - sends flow control request. + * + * @dev: the device structure + * @cl: client info + * + * Return: -EIO on write failure + */ +int mei_hbm_cl_flow_control_req(struct mei_device *dev, struct mei_cl *cl) +{ + struct hbm_flow_control req; + + cl_dbg(dev, cl, "sending flow control\n"); + return mei_hbm_cl_write(dev, cl, MEI_FLOW_CONTROL_CMD, + &req, sizeof(req)); +} + +/** + * mei_hbm_add_single_tx_flow_ctrl_creds - adds single buffer credentials. + * + * @dev: the device structure + * @fctrl: flow control response bus message + * + * Return: 0 on success, < 0 otherwise + */ +static int mei_hbm_add_single_tx_flow_ctrl_creds(struct mei_device *dev, + struct hbm_flow_control *fctrl) +{ + struct mei_me_client *me_cl; + int rets; + + me_cl = mei_me_cl_by_id(dev, fctrl->me_addr); + if (!me_cl) { + dev_err(dev->dev, "no such me client %d\n", fctrl->me_addr); + return -ENOENT; + } + + if (WARN_ON(me_cl->props.single_recv_buf == 0)) { + rets = -EINVAL; + goto out; + } + + me_cl->tx_flow_ctrl_creds++; + dev_dbg(dev->dev, "recv flow ctrl msg ME %d (single) creds = %d.\n", + fctrl->me_addr, me_cl->tx_flow_ctrl_creds); + + rets = 0; +out: + mei_me_cl_put(me_cl); + return rets; +} + +/** + * mei_hbm_cl_flow_control_res - flow control response from me + * + * @dev: the device structure + * @fctrl: flow control response bus message + */ +static void mei_hbm_cl_tx_flow_ctrl_creds_res(struct mei_device *dev, + struct hbm_flow_control *fctrl) +{ + struct mei_cl *cl; + + if (!fctrl->host_addr) { + /* single receive buffer */ + mei_hbm_add_single_tx_flow_ctrl_creds(dev, fctrl); + return; + } + + cl = mei_hbm_cl_find_by_cmd(dev, fctrl); + if (cl) { + cl->tx_flow_ctrl_creds++; + cl_dbg(dev, cl, "flow control creds = %d.\n", + cl->tx_flow_ctrl_creds); + } +} + + +/** + * mei_hbm_cl_disconnect_req - sends disconnect message to fw. + * + * @dev: the device structure + * @cl: a client to disconnect from + * + * Return: -EIO on write failure + */ +int mei_hbm_cl_disconnect_req(struct mei_device *dev, struct mei_cl *cl) +{ + struct hbm_client_connect_request req; + + return mei_hbm_cl_write(dev, cl, CLIENT_DISCONNECT_REQ_CMD, + &req, sizeof(req)); +} + +/** + * mei_hbm_cl_disconnect_rsp - sends disconnect respose to the FW + * + * @dev: the device structure + * @cl: a client to disconnect from + * + * Return: -EIO on write failure + */ +int mei_hbm_cl_disconnect_rsp(struct mei_device *dev, struct mei_cl *cl) +{ + struct hbm_client_connect_response resp; + + return mei_hbm_cl_write(dev, cl, CLIENT_DISCONNECT_RES_CMD, + &resp, sizeof(resp)); +} + +/** + * mei_hbm_cl_disconnect_res - update the client state according + * disconnect response + * + * @dev: the device structure + * @cl: mei host client + * @cmd: disconnect client response host bus message + */ +static void mei_hbm_cl_disconnect_res(struct mei_device *dev, struct mei_cl *cl, + struct mei_hbm_cl_cmd *cmd) +{ + struct hbm_client_connect_response *rs = + (struct hbm_client_connect_response *)cmd; + + cl_dbg(dev, cl, "hbm: disconnect response status=%d\n", rs->status); + + if (rs->status == MEI_CL_DISCONN_SUCCESS) + cl->state = MEI_FILE_DISCONNECT_REPLY; + cl->status = 0; +} + +/** + * mei_hbm_cl_connect_req - send connection request to specific me client + * + * @dev: the device structure + * @cl: a client to connect to + * + * Return: -EIO on write failure + */ +int mei_hbm_cl_connect_req(struct mei_device *dev, struct mei_cl *cl) +{ + struct hbm_client_connect_request req; + + return mei_hbm_cl_write(dev, cl, CLIENT_CONNECT_REQ_CMD, + &req, sizeof(req)); +} + +/** + * mei_hbm_cl_connect_res - update the client state according + * connection response + * + * @dev: the device structure + * @cl: mei host client + * @cmd: connect client response host bus message + */ +static void mei_hbm_cl_connect_res(struct mei_device *dev, struct mei_cl *cl, + struct mei_hbm_cl_cmd *cmd) +{ + struct hbm_client_connect_response *rs = + (struct hbm_client_connect_response *)cmd; + + cl_dbg(dev, cl, "hbm: connect response status=%s\n", + mei_cl_conn_status_str(rs->status)); + + if (rs->status == MEI_CL_CONN_SUCCESS) + cl->state = MEI_FILE_CONNECTED; + else { + cl->state = MEI_FILE_DISCONNECT_REPLY; + if (rs->status == MEI_CL_CONN_NOT_FOUND) { + mei_me_cl_del(dev, cl->me_cl); + if (dev->dev_state == MEI_DEV_ENABLED) + schedule_work(&dev->bus_rescan_work); + } + } + cl->status = mei_cl_conn_status_to_errno(rs->status); +} + +/** + * mei_hbm_cl_res - process hbm response received on behalf + * an client + * + * @dev: the device structure + * @rs: hbm client message + * @fop_type: file operation type + */ +static void mei_hbm_cl_res(struct mei_device *dev, + struct mei_hbm_cl_cmd *rs, + enum mei_cb_file_ops fop_type) +{ + struct mei_cl *cl; + struct mei_cl_cb *cb, *next; + + cl = NULL; + list_for_each_entry_safe(cb, next, &dev->ctrl_rd_list, list) { + + cl = cb->cl; + + if (cb->fop_type != fop_type) + continue; + + if (mei_hbm_cl_addr_equal(cl, rs)) { + list_del_init(&cb->list); + break; + } + } + + if (!cl) + return; + + switch (fop_type) { + case MEI_FOP_CONNECT: + mei_hbm_cl_connect_res(dev, cl, rs); + break; + case MEI_FOP_DISCONNECT: + mei_hbm_cl_disconnect_res(dev, cl, rs); + break; + case MEI_FOP_NOTIFY_START: + mei_hbm_cl_notify_start_res(dev, cl, rs); + break; + case MEI_FOP_NOTIFY_STOP: + mei_hbm_cl_notify_stop_res(dev, cl, rs); + break; + default: + return; + } + + cl->timer_count = 0; + wake_up(&cl->wait); +} + + +/** + * mei_hbm_fw_disconnect_req - disconnect request initiated by ME firmware + * host sends disconnect response + * + * @dev: the device structure. + * @disconnect_req: disconnect request bus message from the me + * + * Return: -ENOMEM on allocation failure + */ +static int mei_hbm_fw_disconnect_req(struct mei_device *dev, + struct hbm_client_connect_request *disconnect_req) +{ + struct mei_cl *cl; + struct mei_cl_cb *cb; + + cl = mei_hbm_cl_find_by_cmd(dev, disconnect_req); + if (cl) { + cl_warn(dev, cl, "fw disconnect request received\n"); + cl->state = MEI_FILE_DISCONNECTING; + cl->timer_count = 0; + + cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_DISCONNECT_RSP, + NULL); + if (!cb) + return -ENOMEM; + } + return 0; +} + +/** + * mei_hbm_pg_enter_res - PG enter response received + * + * @dev: the device structure. + * + * Return: 0 on success, -EPROTO on state mismatch + */ +static int mei_hbm_pg_enter_res(struct mei_device *dev) +{ + if (mei_pg_state(dev) != MEI_PG_OFF || + dev->pg_event != MEI_PG_EVENT_WAIT) { + dev_err(dev->dev, "hbm: pg entry response: state mismatch [%s, %d]\n", + mei_pg_state_str(mei_pg_state(dev)), dev->pg_event); + return -EPROTO; + } + + dev->pg_event = MEI_PG_EVENT_RECEIVED; + wake_up(&dev->wait_pg); + + return 0; +} + +/** + * mei_hbm_pg_resume - process with PG resume + * + * @dev: the device structure. + */ +void mei_hbm_pg_resume(struct mei_device *dev) +{ + pm_request_resume(dev->dev); +} +EXPORT_SYMBOL_GPL(mei_hbm_pg_resume); + +/** + * mei_hbm_pg_exit_res - PG exit response received + * + * @dev: the device structure. + * + * Return: 0 on success, -EPROTO on state mismatch + */ +static int mei_hbm_pg_exit_res(struct mei_device *dev) +{ + if (mei_pg_state(dev) != MEI_PG_ON || + (dev->pg_event != MEI_PG_EVENT_WAIT && + dev->pg_event != MEI_PG_EVENT_IDLE)) { + dev_err(dev->dev, "hbm: pg exit response: state mismatch [%s, %d]\n", + mei_pg_state_str(mei_pg_state(dev)), dev->pg_event); + return -EPROTO; + } + + switch (dev->pg_event) { + case MEI_PG_EVENT_WAIT: + dev->pg_event = MEI_PG_EVENT_RECEIVED; + wake_up(&dev->wait_pg); + break; + case MEI_PG_EVENT_IDLE: + /* + * If the driver is not waiting on this then + * this is HW initiated exit from PG. + * Start runtime pm resume sequence to exit from PG. + */ + dev->pg_event = MEI_PG_EVENT_RECEIVED; + mei_hbm_pg_resume(dev); + break; + default: + WARN(1, "hbm: pg exit response: unexpected pg event = %d\n", + dev->pg_event); + return -EPROTO; + } + + return 0; +} + +/** + * mei_hbm_config_features - check what hbm features and commands + * are supported by the fw + * + * @dev: the device structure + */ +static void mei_hbm_config_features(struct mei_device *dev) +{ + /* Power Gating Isolation Support */ + dev->hbm_f_pg_supported = 0; + if (dev->version.major_version > HBM_MAJOR_VERSION_PGI) + dev->hbm_f_pg_supported = 1; + + if (dev->version.major_version == HBM_MAJOR_VERSION_PGI && + dev->version.minor_version >= HBM_MINOR_VERSION_PGI) + dev->hbm_f_pg_supported = 1; + + dev->hbm_f_dc_supported = 0; + if (dev->version.major_version >= HBM_MAJOR_VERSION_DC) + dev->hbm_f_dc_supported = 1; + + dev->hbm_f_ie_supported = 0; + if (dev->version.major_version >= HBM_MAJOR_VERSION_IE) + dev->hbm_f_ie_supported = 1; + + /* disconnect on connect timeout instead of link reset */ + dev->hbm_f_dot_supported = 0; + if (dev->version.major_version >= HBM_MAJOR_VERSION_DOT) + dev->hbm_f_dot_supported = 1; + + /* Notification Event Support */ + dev->hbm_f_ev_supported = 0; + if (dev->version.major_version >= HBM_MAJOR_VERSION_EV) + dev->hbm_f_ev_supported = 1; + + /* Fixed Address Client Support */ + dev->hbm_f_fa_supported = 0; + if (dev->version.major_version >= HBM_MAJOR_VERSION_FA) + dev->hbm_f_fa_supported = 1; + + /* OS ver message Support */ + dev->hbm_f_os_supported = 0; + if (dev->version.major_version >= HBM_MAJOR_VERSION_OS) + dev->hbm_f_os_supported = 1; + + /* DMA Ring Support */ + dev->hbm_f_dr_supported = 0; + if (dev->version.major_version > HBM_MAJOR_VERSION_DR || + (dev->version.major_version == HBM_MAJOR_VERSION_DR && + dev->version.minor_version >= HBM_MINOR_VERSION_DR)) + dev->hbm_f_dr_supported = 1; +} + +/** + * mei_hbm_version_is_supported - checks whether the driver can + * support the hbm version of the device + * + * @dev: the device structure + * Return: true if driver can support hbm version of the device + */ +bool mei_hbm_version_is_supported(struct mei_device *dev) +{ + return (dev->version.major_version < HBM_MAJOR_VERSION) || + (dev->version.major_version == HBM_MAJOR_VERSION && + dev->version.minor_version <= HBM_MINOR_VERSION); +} + +/** + * mei_hbm_dispatch - bottom half read routine after ISR to + * handle the read bus message cmd processing. + * + * @dev: the device structure + * @hdr: header of bus message + * + * Return: 0 on success and < 0 on failure + */ +int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr) +{ + struct mei_bus_message *mei_msg; + struct hbm_host_version_response *version_res; + struct hbm_props_response *props_res; + struct hbm_host_enum_response *enum_res; + struct hbm_add_client_request *add_cl_req; + int ret; + + struct mei_hbm_cl_cmd *cl_cmd; + struct hbm_client_connect_request *disconnect_req; + struct hbm_flow_control *fctrl; + + /* read the message to our buffer */ + BUG_ON(hdr->length >= sizeof(dev->rd_msg_buf)); + mei_read_slots(dev, dev->rd_msg_buf, hdr->length); + mei_msg = (struct mei_bus_message *)dev->rd_msg_buf; + cl_cmd = (struct mei_hbm_cl_cmd *)mei_msg; + + /* ignore spurious message and prevent reset nesting + * hbm is put to idle during system reset + */ + if (dev->hbm_state == MEI_HBM_IDLE) { + dev_dbg(dev->dev, "hbm: state is idle ignore spurious messages\n"); + return 0; + } + + switch (mei_msg->hbm_cmd) { + case HOST_START_RES_CMD: + dev_dbg(dev->dev, "hbm: start: response message received.\n"); + + dev->init_clients_timer = 0; + + version_res = (struct hbm_host_version_response *)mei_msg; + + dev_dbg(dev->dev, "HBM VERSION: DRIVER=%02d:%02d DEVICE=%02d:%02d\n", + HBM_MAJOR_VERSION, HBM_MINOR_VERSION, + version_res->me_max_version.major_version, + version_res->me_max_version.minor_version); + + if (version_res->host_version_supported) { + dev->version.major_version = HBM_MAJOR_VERSION; + dev->version.minor_version = HBM_MINOR_VERSION; + } else { + dev->version.major_version = + version_res->me_max_version.major_version; + dev->version.minor_version = + version_res->me_max_version.minor_version; + } + + if (!mei_hbm_version_is_supported(dev)) { + dev_warn(dev->dev, "hbm: start: version mismatch - stopping the driver.\n"); + + dev->hbm_state = MEI_HBM_STOPPED; + if (mei_hbm_stop_req(dev)) { + dev_err(dev->dev, "hbm: start: failed to send stop request\n"); + return -EIO; + } + break; + } + + mei_hbm_config_features(dev); + + if (dev->dev_state != MEI_DEV_INIT_CLIENTS || + dev->hbm_state != MEI_HBM_STARTING) { + dev_err(dev->dev, "hbm: start: state mismatch, [%d, %d]\n", + dev->dev_state, dev->hbm_state); + return -EPROTO; + } + + if (mei_hbm_enum_clients_req(dev)) { + dev_err(dev->dev, "hbm: start: failed to send enumeration request\n"); + return -EIO; + } + + wake_up(&dev->wait_hbm_start); + break; + + case CLIENT_CONNECT_RES_CMD: + dev_dbg(dev->dev, "hbm: client connect response: message received.\n"); + mei_hbm_cl_res(dev, cl_cmd, MEI_FOP_CONNECT); + break; + + case CLIENT_DISCONNECT_RES_CMD: + dev_dbg(dev->dev, "hbm: client disconnect response: message received.\n"); + mei_hbm_cl_res(dev, cl_cmd, MEI_FOP_DISCONNECT); + break; + + case MEI_FLOW_CONTROL_CMD: + dev_dbg(dev->dev, "hbm: client flow control response: message received.\n"); + + fctrl = (struct hbm_flow_control *)mei_msg; + mei_hbm_cl_tx_flow_ctrl_creds_res(dev, fctrl); + break; + + case MEI_PG_ISOLATION_ENTRY_RES_CMD: + dev_dbg(dev->dev, "hbm: power gate isolation entry response received\n"); + ret = mei_hbm_pg_enter_res(dev); + if (ret) + return ret; + break; + + case MEI_PG_ISOLATION_EXIT_REQ_CMD: + dev_dbg(dev->dev, "hbm: power gate isolation exit request received\n"); + ret = mei_hbm_pg_exit_res(dev); + if (ret) + return ret; + break; + + case HOST_CLIENT_PROPERTIES_RES_CMD: + dev_dbg(dev->dev, "hbm: properties response: message received.\n"); + + dev->init_clients_timer = 0; + + if (dev->dev_state != MEI_DEV_INIT_CLIENTS || + dev->hbm_state != MEI_HBM_CLIENT_PROPERTIES) { + dev_err(dev->dev, "hbm: properties response: state mismatch, [%d, %d]\n", + dev->dev_state, dev->hbm_state); + return -EPROTO; + } + + props_res = (struct hbm_props_response *)mei_msg; + + if (props_res->status == MEI_HBMS_CLIENT_NOT_FOUND) { + dev_dbg(dev->dev, "hbm: properties response: %d CLIENT_NOT_FOUND\n", + props_res->me_addr); + } else if (props_res->status) { + dev_err(dev->dev, "hbm: properties response: wrong status = %d %s\n", + props_res->status, + mei_hbm_status_str(props_res->status)); + return -EPROTO; + } else { + mei_hbm_me_cl_add(dev, props_res); + } + + /* request property for the next client */ + if (mei_hbm_prop_req(dev, props_res->me_addr + 1)) + return -EIO; + + break; + + case HOST_ENUM_RES_CMD: + dev_dbg(dev->dev, "hbm: enumeration response: message received\n"); + + dev->init_clients_timer = 0; + + enum_res = (struct hbm_host_enum_response *) mei_msg; + BUILD_BUG_ON(sizeof(dev->me_clients_map) + < sizeof(enum_res->valid_addresses)); + memcpy(dev->me_clients_map, enum_res->valid_addresses, + sizeof(enum_res->valid_addresses)); + + if (dev->dev_state != MEI_DEV_INIT_CLIENTS || + dev->hbm_state != MEI_HBM_ENUM_CLIENTS) { + dev_err(dev->dev, "hbm: enumeration response: state mismatch, [%d, %d]\n", + dev->dev_state, dev->hbm_state); + return -EPROTO; + } + + dev->hbm_state = MEI_HBM_CLIENT_PROPERTIES; + + /* first property request */ + if (mei_hbm_prop_req(dev, 0)) + return -EIO; + + break; + + case HOST_STOP_RES_CMD: + dev_dbg(dev->dev, "hbm: stop response: message received\n"); + + dev->init_clients_timer = 0; + + if (dev->hbm_state != MEI_HBM_STOPPED) { + dev_err(dev->dev, "hbm: stop response: state mismatch, [%d, %d]\n", + dev->dev_state, dev->hbm_state); + return -EPROTO; + } + + dev->dev_state = MEI_DEV_POWER_DOWN; + dev_info(dev->dev, "hbm: stop response: resetting.\n"); + /* force the reset */ + return -EPROTO; + break; + + case CLIENT_DISCONNECT_REQ_CMD: + dev_dbg(dev->dev, "hbm: disconnect request: message received\n"); + + disconnect_req = (struct hbm_client_connect_request *)mei_msg; + mei_hbm_fw_disconnect_req(dev, disconnect_req); + break; + + case ME_STOP_REQ_CMD: + dev_dbg(dev->dev, "hbm: stop request: message received\n"); + dev->hbm_state = MEI_HBM_STOPPED; + if (mei_hbm_stop_req(dev)) { + dev_err(dev->dev, "hbm: stop request: failed to send stop request\n"); + return -EIO; + } + break; + + case MEI_HBM_ADD_CLIENT_REQ_CMD: + dev_dbg(dev->dev, "hbm: add client request received\n"); + /* + * after the host receives the enum_resp + * message clients may be added or removed + */ + if (dev->hbm_state <= MEI_HBM_ENUM_CLIENTS || + dev->hbm_state >= MEI_HBM_STOPPED) { + dev_err(dev->dev, "hbm: add client: state mismatch, [%d, %d]\n", + dev->dev_state, dev->hbm_state); + return -EPROTO; + } + add_cl_req = (struct hbm_add_client_request *)mei_msg; + ret = mei_hbm_fw_add_cl_req(dev, add_cl_req); + if (ret) { + dev_err(dev->dev, "hbm: add client: failed to send response %d\n", + ret); + return -EIO; + } + dev_dbg(dev->dev, "hbm: add client request processed\n"); + break; + + case MEI_HBM_NOTIFY_RES_CMD: + dev_dbg(dev->dev, "hbm: notify response received\n"); + mei_hbm_cl_res(dev, cl_cmd, notify_res_to_fop(cl_cmd)); + break; + + case MEI_HBM_NOTIFICATION_CMD: + dev_dbg(dev->dev, "hbm: notification\n"); + mei_hbm_cl_notify(dev, cl_cmd); + break; + + default: + BUG(); + break; + + } + return 0; +} + diff --git a/drivers/misc/mei/hbm.h b/drivers/misc/mei/hbm.h new file mode 100644 index 000000000..a2025a508 --- /dev/null +++ b/drivers/misc/mei/hbm.h @@ -0,0 +1,62 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _MEI_HBM_H_ +#define _MEI_HBM_H_ + +struct mei_device; +struct mei_msg_hdr; +struct mei_cl; + +/** + * enum mei_hbm_state - host bus message protocol state + * + * @MEI_HBM_IDLE : protocol not started + * @MEI_HBM_STARTING : start request message was sent + * @MEI_HBM_ENUM_CLIENTS : enumeration request was sent + * @MEI_HBM_CLIENT_PROPERTIES : acquiring clients properties + * @MEI_HBM_STARTED : enumeration was completed + * @MEI_HBM_STOPPED : stopping exchange + */ +enum mei_hbm_state { + MEI_HBM_IDLE = 0, + MEI_HBM_STARTING, + MEI_HBM_ENUM_CLIENTS, + MEI_HBM_CLIENT_PROPERTIES, + MEI_HBM_STARTED, + MEI_HBM_STOPPED, +}; + +const char *mei_hbm_state_str(enum mei_hbm_state state); + +int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr); + +void mei_hbm_idle(struct mei_device *dev); +void mei_hbm_reset(struct mei_device *dev); +int mei_hbm_start_req(struct mei_device *dev); +int mei_hbm_start_wait(struct mei_device *dev); +int mei_hbm_cl_flow_control_req(struct mei_device *dev, struct mei_cl *cl); +int mei_hbm_cl_disconnect_req(struct mei_device *dev, struct mei_cl *cl); +int mei_hbm_cl_disconnect_rsp(struct mei_device *dev, struct mei_cl *cl); +int mei_hbm_cl_connect_req(struct mei_device *dev, struct mei_cl *cl); +bool mei_hbm_version_is_supported(struct mei_device *dev); +int mei_hbm_pg(struct mei_device *dev, u8 pg_cmd); +void mei_hbm_pg_resume(struct mei_device *dev); +int mei_hbm_cl_notify_req(struct mei_device *dev, + struct mei_cl *cl, u8 request); + +#endif /* _MEI_HBM_H_ */ + diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h new file mode 100644 index 000000000..6bbc78698 --- /dev/null +++ b/drivers/misc/mei/hw-me-regs.h @@ -0,0 +1,245 @@ +/****************************************************************************** + * Intel Management Engine Interface (Intel MEI) Linux driver + * Intel MEI Interface Header + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2003 - 2012 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, + * USA + * + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation. + * linux-mei@linux.intel.com + * http://www.intel.com + * + * BSD LICENSE + * + * Copyright(c) 2003 - 2012 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *****************************************************************************/ +#ifndef _MEI_HW_MEI_REGS_H_ +#define _MEI_HW_MEI_REGS_H_ + +/* + * MEI device IDs + */ +#define MEI_DEV_ID_82946GZ 0x2974 /* 82946GZ/GL */ +#define MEI_DEV_ID_82G35 0x2984 /* 82G35 Express */ +#define MEI_DEV_ID_82Q965 0x2994 /* 82Q963/Q965 */ +#define MEI_DEV_ID_82G965 0x29A4 /* 82P965/G965 */ + +#define MEI_DEV_ID_82GM965 0x2A04 /* Mobile PM965/GM965 */ +#define MEI_DEV_ID_82GME965 0x2A14 /* Mobile GME965/GLE960 */ + +#define MEI_DEV_ID_ICH9_82Q35 0x29B4 /* 82Q35 Express */ +#define MEI_DEV_ID_ICH9_82G33 0x29C4 /* 82G33/G31/P35/P31 Express */ +#define MEI_DEV_ID_ICH9_82Q33 0x29D4 /* 82Q33 Express */ +#define MEI_DEV_ID_ICH9_82X38 0x29E4 /* 82X38/X48 Express */ +#define MEI_DEV_ID_ICH9_3200 0x29F4 /* 3200/3210 Server */ + +#define MEI_DEV_ID_ICH9_6 0x28B4 /* Bearlake */ +#define MEI_DEV_ID_ICH9_7 0x28C4 /* Bearlake */ +#define MEI_DEV_ID_ICH9_8 0x28D4 /* Bearlake */ +#define MEI_DEV_ID_ICH9_9 0x28E4 /* Bearlake */ +#define MEI_DEV_ID_ICH9_10 0x28F4 /* Bearlake */ + +#define MEI_DEV_ID_ICH9M_1 0x2A44 /* Cantiga */ +#define MEI_DEV_ID_ICH9M_2 0x2A54 /* Cantiga */ +#define MEI_DEV_ID_ICH9M_3 0x2A64 /* Cantiga */ +#define MEI_DEV_ID_ICH9M_4 0x2A74 /* Cantiga */ + +#define MEI_DEV_ID_ICH10_1 0x2E04 /* Eaglelake */ +#define MEI_DEV_ID_ICH10_2 0x2E14 /* Eaglelake */ +#define MEI_DEV_ID_ICH10_3 0x2E24 /* Eaglelake */ +#define MEI_DEV_ID_ICH10_4 0x2E34 /* Eaglelake */ + +#define MEI_DEV_ID_IBXPK_1 0x3B64 /* Calpella */ +#define MEI_DEV_ID_IBXPK_2 0x3B65 /* Calpella */ + +#define MEI_DEV_ID_CPT_1 0x1C3A /* Couger Point */ +#define MEI_DEV_ID_PBG_1 0x1D3A /* C600/X79 Patsburg */ + +#define MEI_DEV_ID_PPT_1 0x1E3A /* Panther Point */ +#define MEI_DEV_ID_PPT_2 0x1CBA /* Panther Point */ +#define MEI_DEV_ID_PPT_3 0x1DBA /* Panther Point */ + +#define MEI_DEV_ID_LPT_H 0x8C3A /* Lynx Point H */ +#define MEI_DEV_ID_LPT_W 0x8D3A /* Lynx Point - Wellsburg */ +#define MEI_DEV_ID_LPT_LP 0x9C3A /* Lynx Point LP */ +#define MEI_DEV_ID_LPT_HR 0x8CBA /* Lynx Point H Refresh */ + +#define MEI_DEV_ID_WPT_LP 0x9CBA /* Wildcat Point LP */ +#define MEI_DEV_ID_WPT_LP_2 0x9CBB /* Wildcat Point LP 2 */ + +#define MEI_DEV_ID_SPT 0x9D3A /* Sunrise Point */ +#define MEI_DEV_ID_SPT_2 0x9D3B /* Sunrise Point 2 */ +#define MEI_DEV_ID_SPT_H 0xA13A /* Sunrise Point H */ +#define MEI_DEV_ID_SPT_H_2 0xA13B /* Sunrise Point H 2 */ + +#define MEI_DEV_ID_LBG 0xA1BA /* Lewisburg (SPT) */ + +#define MEI_DEV_ID_BXT_M 0x1A9A /* Broxton M */ +#define MEI_DEV_ID_APL_I 0x5A9A /* Apollo Lake I */ + +#define MEI_DEV_ID_DNV_IE 0x19E5 /* Denverton IE */ + +#define MEI_DEV_ID_GLK 0x319A /* Gemini Lake */ + +#define MEI_DEV_ID_KBP 0xA2BA /* Kaby Point */ +#define MEI_DEV_ID_KBP_2 0xA2BB /* Kaby Point 2 */ + +#define MEI_DEV_ID_CNP_LP 0x9DE0 /* Cannon Point LP */ +#define MEI_DEV_ID_CNP_LP_4 0x9DE4 /* Cannon Point LP 4 (iTouch) */ +#define MEI_DEV_ID_CNP_H 0xA360 /* Cannon Point H */ +#define MEI_DEV_ID_CNP_H_4 0xA364 /* Cannon Point H 4 (iTouch) */ + +#define MEI_DEV_ID_CMP_LP 0x02e0 /* Comet Point LP */ +#define MEI_DEV_ID_CMP_LP_3 0x02e4 /* Comet Point LP 3 (iTouch) */ + +#define MEI_DEV_ID_CMP_V 0xA3BA /* Comet Point Lake V */ + +#define MEI_DEV_ID_CMP_H 0x06e0 /* Comet Lake H */ +#define MEI_DEV_ID_CMP_H_3 0x06e4 /* Comet Lake H 3 (iTouch) */ + +#define MEI_DEV_ID_CDF 0x18D3 /* Cedar Fork */ + +#define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */ +#define MEI_DEV_ID_ICP_N 0x38E0 /* Ice Lake Point N */ + +#define MEI_DEV_ID_TGP_LP 0xA0E0 /* Tiger Lake Point LP */ + +#define MEI_DEV_ID_MCC 0x4B70 /* Mule Creek Canyon (EHL) */ +#define MEI_DEV_ID_MCC_4 0x4B75 /* Mule Creek Canyon 4 (EHL) */ + +/* + * MEI HW Section + */ + +/* Host Firmware Status Registers in PCI Config Space */ +#define PCI_CFG_HFS_1 0x40 +# define PCI_CFG_HFS_1_D0I3_MSK 0x80000000 +#define PCI_CFG_HFS_2 0x48 +#define PCI_CFG_HFS_3 0x60 +#define PCI_CFG_HFS_4 0x64 +#define PCI_CFG_HFS_5 0x68 +#define PCI_CFG_HFS_6 0x6C + +/* MEI registers */ +/* H_CB_WW - Host Circular Buffer (CB) Write Window register */ +#define H_CB_WW 0 +/* H_CSR - Host Control Status register */ +#define H_CSR 4 +/* ME_CB_RW - ME Circular Buffer Read Window register (read only) */ +#define ME_CB_RW 8 +/* ME_CSR_HA - ME Control Status Host Access register (read only) */ +#define ME_CSR_HA 0xC +/* H_HGC_CSR - PGI register */ +#define H_HPG_CSR 0x10 +/* H_D0I3C - D0I3 Control */ +#define H_D0I3C 0x800 + +/* register bits of H_CSR (Host Control Status register) */ +/* Host Circular Buffer Depth - maximum number of 32-bit entries in CB */ +#define H_CBD 0xFF000000 +/* Host Circular Buffer Write Pointer */ +#define H_CBWP 0x00FF0000 +/* Host Circular Buffer Read Pointer */ +#define H_CBRP 0x0000FF00 +/* Host Reset */ +#define H_RST 0x00000010 +/* Host Ready */ +#define H_RDY 0x00000008 +/* Host Interrupt Generate */ +#define H_IG 0x00000004 +/* Host Interrupt Status */ +#define H_IS 0x00000002 +/* Host Interrupt Enable */ +#define H_IE 0x00000001 +/* Host D0I3 Interrupt Enable */ +#define H_D0I3C_IE 0x00000020 +/* Host D0I3 Interrupt Status */ +#define H_D0I3C_IS 0x00000040 + +/* H_CSR masks */ +#define H_CSR_IE_MASK (H_IE | H_D0I3C_IE) +#define H_CSR_IS_MASK (H_IS | H_D0I3C_IS) + +/* register bits of ME_CSR_HA (ME Control Status Host Access register) */ +/* ME CB (Circular Buffer) Depth HRA (Host Read Access) - host read only +access to ME_CBD */ +#define ME_CBD_HRA 0xFF000000 +/* ME CB Write Pointer HRA - host read only access to ME_CBWP */ +#define ME_CBWP_HRA 0x00FF0000 +/* ME CB Read Pointer HRA - host read only access to ME_CBRP */ +#define ME_CBRP_HRA 0x0000FF00 +/* ME Power Gate Isolation Capability HRA - host ready only access */ +#define ME_PGIC_HRA 0x00000040 +/* ME Reset HRA - host read only access to ME_RST */ +#define ME_RST_HRA 0x00000010 +/* ME Ready HRA - host read only access to ME_RDY */ +#define ME_RDY_HRA 0x00000008 +/* ME Interrupt Generate HRA - host read only access to ME_IG */ +#define ME_IG_HRA 0x00000004 +/* ME Interrupt Status HRA - host read only access to ME_IS */ +#define ME_IS_HRA 0x00000002 +/* ME Interrupt Enable HRA - host read only access to ME_IE */ +#define ME_IE_HRA 0x00000001 + + +/* H_HPG_CSR register bits */ +#define H_HPG_CSR_PGIHEXR 0x00000001 +#define H_HPG_CSR_PGI 0x00000002 + +/* H_D0I3C register bits */ +#define H_D0I3C_CIP 0x00000001 +#define H_D0I3C_IR 0x00000002 +#define H_D0I3C_I3 0x00000004 +#define H_D0I3C_RR 0x00000008 + +#endif /* _MEI_HW_MEI_REGS_H_ */ diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c new file mode 100644 index 000000000..60c8c8418 --- /dev/null +++ b/drivers/misc/mei/hw-me.c @@ -0,0 +1,1500 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include + +#include +#include +#include +#include + +#include "mei_dev.h" +#include "hbm.h" + +#include "hw-me.h" +#include "hw-me-regs.h" + +#include "mei-trace.h" + +/** + * mei_me_reg_read - Reads 32bit data from the mei device + * + * @hw: the me hardware structure + * @offset: offset from which to read the data + * + * Return: register value (u32) + */ +static inline u32 mei_me_reg_read(const struct mei_me_hw *hw, + unsigned long offset) +{ + return ioread32(hw->mem_addr + offset); +} + + +/** + * mei_me_reg_write - Writes 32bit data to the mei device + * + * @hw: the me hardware structure + * @offset: offset from which to write the data + * @value: register value to write (u32) + */ +static inline void mei_me_reg_write(const struct mei_me_hw *hw, + unsigned long offset, u32 value) +{ + iowrite32(value, hw->mem_addr + offset); +} + +/** + * mei_me_mecbrw_read - Reads 32bit data from ME circular buffer + * read window register + * + * @dev: the device structure + * + * Return: ME_CB_RW register value (u32) + */ +static inline u32 mei_me_mecbrw_read(const struct mei_device *dev) +{ + return mei_me_reg_read(to_me_hw(dev), ME_CB_RW); +} + +/** + * mei_me_hcbww_write - write 32bit data to the host circular buffer + * + * @dev: the device structure + * @data: 32bit data to be written to the host circular buffer + */ +static inline void mei_me_hcbww_write(struct mei_device *dev, u32 data) +{ + mei_me_reg_write(to_me_hw(dev), H_CB_WW, data); +} + +/** + * mei_me_mecsr_read - Reads 32bit data from the ME CSR + * + * @dev: the device structure + * + * Return: ME_CSR_HA register value (u32) + */ +static inline u32 mei_me_mecsr_read(const struct mei_device *dev) +{ + u32 reg; + + reg = mei_me_reg_read(to_me_hw(dev), ME_CSR_HA); + trace_mei_reg_read(dev->dev, "ME_CSR_HA", ME_CSR_HA, reg); + + return reg; +} + +/** + * mei_hcsr_read - Reads 32bit data from the host CSR + * + * @dev: the device structure + * + * Return: H_CSR register value (u32) + */ +static inline u32 mei_hcsr_read(const struct mei_device *dev) +{ + u32 reg; + + reg = mei_me_reg_read(to_me_hw(dev), H_CSR); + trace_mei_reg_read(dev->dev, "H_CSR", H_CSR, reg); + + return reg; +} + +/** + * mei_hcsr_write - writes H_CSR register to the mei device + * + * @dev: the device structure + * @reg: new register value + */ +static inline void mei_hcsr_write(struct mei_device *dev, u32 reg) +{ + trace_mei_reg_write(dev->dev, "H_CSR", H_CSR, reg); + mei_me_reg_write(to_me_hw(dev), H_CSR, reg); +} + +/** + * mei_hcsr_set - writes H_CSR register to the mei device, + * and ignores the H_IS bit for it is write-one-to-zero. + * + * @dev: the device structure + * @reg: new register value + */ +static inline void mei_hcsr_set(struct mei_device *dev, u32 reg) +{ + reg &= ~H_CSR_IS_MASK; + mei_hcsr_write(dev, reg); +} + +/** + * mei_hcsr_set_hig - set host interrupt (set H_IG) + * + * @dev: the device structure + */ +static inline void mei_hcsr_set_hig(struct mei_device *dev) +{ + u32 hcsr; + + hcsr = mei_hcsr_read(dev) | H_IG; + mei_hcsr_set(dev, hcsr); +} + +/** + * mei_me_d0i3c_read - Reads 32bit data from the D0I3C register + * + * @dev: the device structure + * + * Return: H_D0I3C register value (u32) + */ +static inline u32 mei_me_d0i3c_read(const struct mei_device *dev) +{ + u32 reg; + + reg = mei_me_reg_read(to_me_hw(dev), H_D0I3C); + trace_mei_reg_read(dev->dev, "H_D0I3C", H_D0I3C, reg); + + return reg; +} + +/** + * mei_me_d0i3c_write - writes H_D0I3C register to device + * + * @dev: the device structure + * @reg: new register value + */ +static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg) +{ + trace_mei_reg_write(dev->dev, "H_D0I3C", H_D0I3C, reg); + mei_me_reg_write(to_me_hw(dev), H_D0I3C, reg); +} + +/** + * mei_me_fw_status - read fw status register from pci config space + * + * @dev: mei device + * @fw_status: fw status register values + * + * Return: 0 on success, error otherwise + */ +static int mei_me_fw_status(struct mei_device *dev, + struct mei_fw_status *fw_status) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + struct mei_me_hw *hw = to_me_hw(dev); + const struct mei_fw_status *fw_src = &hw->cfg->fw_status; + int ret; + int i; + + if (!fw_status) + return -EINVAL; + + fw_status->count = fw_src->count; + for (i = 0; i < fw_src->count && i < MEI_FW_STATUS_MAX; i++) { + ret = pci_read_config_dword(pdev, fw_src->status[i], + &fw_status->status[i]); + trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HSF_X", + fw_src->status[i], + fw_status->status[i]); + if (ret) + return ret; + } + + return 0; +} + +/** + * mei_me_hw_config - configure hw dependent settings + * + * @dev: mei device + */ +static void mei_me_hw_config(struct mei_device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + struct mei_me_hw *hw = to_me_hw(dev); + u32 hcsr, reg; + + /* Doesn't change in runtime */ + hcsr = mei_hcsr_read(dev); + hw->hbuf_depth = (hcsr & H_CBD) >> 24; + + reg = 0; + pci_read_config_dword(pdev, PCI_CFG_HFS_1, ®); + trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HFS_1", PCI_CFG_HFS_1, reg); + hw->d0i3_supported = + ((reg & PCI_CFG_HFS_1_D0I3_MSK) == PCI_CFG_HFS_1_D0I3_MSK); + + hw->pg_state = MEI_PG_OFF; + if (hw->d0i3_supported) { + reg = mei_me_d0i3c_read(dev); + if (reg & H_D0I3C_I3) + hw->pg_state = MEI_PG_ON; + } +} + +/** + * mei_me_pg_state - translate internal pg state + * to the mei power gating state + * + * @dev: mei device + * + * Return: MEI_PG_OFF if aliveness is on and MEI_PG_ON otherwise + */ +static inline enum mei_pg_state mei_me_pg_state(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + return hw->pg_state; +} + +static inline u32 me_intr_src(u32 hcsr) +{ + return hcsr & H_CSR_IS_MASK; +} + +/** + * me_intr_disable - disables mei device interrupts + * using supplied hcsr register value. + * + * @dev: the device structure + * @hcsr: supplied hcsr register value + */ +static inline void me_intr_disable(struct mei_device *dev, u32 hcsr) +{ + hcsr &= ~H_CSR_IE_MASK; + mei_hcsr_set(dev, hcsr); +} + +/** + * mei_me_intr_clear - clear and stop interrupts + * + * @dev: the device structure + * @hcsr: supplied hcsr register value + */ +static inline void me_intr_clear(struct mei_device *dev, u32 hcsr) +{ + if (me_intr_src(hcsr)) + mei_hcsr_write(dev, hcsr); +} + +/** + * mei_me_intr_clear - clear and stop interrupts + * + * @dev: the device structure + */ +static void mei_me_intr_clear(struct mei_device *dev) +{ + u32 hcsr = mei_hcsr_read(dev); + + me_intr_clear(dev, hcsr); +} +/** + * mei_me_intr_enable - enables mei device interrupts + * + * @dev: the device structure + */ +static void mei_me_intr_enable(struct mei_device *dev) +{ + u32 hcsr = mei_hcsr_read(dev); + + hcsr |= H_CSR_IE_MASK; + mei_hcsr_set(dev, hcsr); +} + +/** + * mei_me_intr_disable - disables mei device interrupts + * + * @dev: the device structure + */ +static void mei_me_intr_disable(struct mei_device *dev) +{ + u32 hcsr = mei_hcsr_read(dev); + + me_intr_disable(dev, hcsr); +} + +/** + * mei_me_synchronize_irq - wait for pending IRQ handlers + * + * @dev: the device structure + */ +static void mei_me_synchronize_irq(struct mei_device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + + synchronize_irq(pdev->irq); +} + +/** + * mei_me_hw_reset_release - release device from the reset + * + * @dev: the device structure + */ +static void mei_me_hw_reset_release(struct mei_device *dev) +{ + u32 hcsr = mei_hcsr_read(dev); + + hcsr |= H_IG; + hcsr &= ~H_RST; + mei_hcsr_set(dev, hcsr); + + /* complete this write before we set host ready on another CPU */ + mmiowb(); +} + +/** + * mei_me_host_set_ready - enable device + * + * @dev: mei device + */ +static void mei_me_host_set_ready(struct mei_device *dev) +{ + u32 hcsr = mei_hcsr_read(dev); + + hcsr |= H_CSR_IE_MASK | H_IG | H_RDY; + mei_hcsr_set(dev, hcsr); +} + +/** + * mei_me_host_is_ready - check whether the host has turned ready + * + * @dev: mei device + * Return: bool + */ +static bool mei_me_host_is_ready(struct mei_device *dev) +{ + u32 hcsr = mei_hcsr_read(dev); + + return (hcsr & H_RDY) == H_RDY; +} + +/** + * mei_me_hw_is_ready - check whether the me(hw) has turned ready + * + * @dev: mei device + * Return: bool + */ +static bool mei_me_hw_is_ready(struct mei_device *dev) +{ + u32 mecsr = mei_me_mecsr_read(dev); + + return (mecsr & ME_RDY_HRA) == ME_RDY_HRA; +} + +/** + * mei_me_hw_is_resetting - check whether the me(hw) is in reset + * + * @dev: mei device + * Return: bool + */ +static bool mei_me_hw_is_resetting(struct mei_device *dev) +{ + u32 mecsr = mei_me_mecsr_read(dev); + + return (mecsr & ME_RST_HRA) == ME_RST_HRA; +} + +/** + * mei_me_hw_ready_wait - wait until the me(hw) has turned ready + * or timeout is reached + * + * @dev: mei device + * Return: 0 on success, error otherwise + */ +static int mei_me_hw_ready_wait(struct mei_device *dev) +{ + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_hw_ready, + dev->recvd_hw_ready, + mei_secs_to_jiffies(MEI_HW_READY_TIMEOUT)); + mutex_lock(&dev->device_lock); + if (!dev->recvd_hw_ready) { + dev_err(dev->dev, "wait hw ready failed\n"); + return -ETIME; + } + + mei_me_hw_reset_release(dev); + dev->recvd_hw_ready = false; + return 0; +} + +/** + * mei_me_hw_start - hw start routine + * + * @dev: mei device + * Return: 0 on success, error otherwise + */ +static int mei_me_hw_start(struct mei_device *dev) +{ + int ret = mei_me_hw_ready_wait(dev); + + if (ret) + return ret; + dev_dbg(dev->dev, "hw is ready\n"); + + mei_me_host_set_ready(dev); + return ret; +} + + +/** + * mei_hbuf_filled_slots - gets number of device filled buffer slots + * + * @dev: the device structure + * + * Return: number of filled slots + */ +static unsigned char mei_hbuf_filled_slots(struct mei_device *dev) +{ + u32 hcsr; + char read_ptr, write_ptr; + + hcsr = mei_hcsr_read(dev); + + read_ptr = (char) ((hcsr & H_CBRP) >> 8); + write_ptr = (char) ((hcsr & H_CBWP) >> 16); + + return (unsigned char) (write_ptr - read_ptr); +} + +/** + * mei_me_hbuf_is_empty - checks if host buffer is empty. + * + * @dev: the device structure + * + * Return: true if empty, false - otherwise. + */ +static bool mei_me_hbuf_is_empty(struct mei_device *dev) +{ + return mei_hbuf_filled_slots(dev) == 0; +} + +/** + * mei_me_hbuf_empty_slots - counts write empty slots. + * + * @dev: the device structure + * + * Return: -EOVERFLOW if overflow, otherwise empty slots count + */ +static int mei_me_hbuf_empty_slots(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + unsigned char filled_slots, empty_slots; + + filled_slots = mei_hbuf_filled_slots(dev); + empty_slots = hw->hbuf_depth - filled_slots; + + /* check for overflow */ + if (filled_slots > hw->hbuf_depth) + return -EOVERFLOW; + + return empty_slots; +} + +/** + * mei_me_hbuf_depth - returns depth of the hw buffer. + * + * @dev: the device structure + * + * Return: size of hw buffer in slots + */ +static u32 mei_me_hbuf_depth(const struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + return hw->hbuf_depth; +} + +/** + * mei_me_hbuf_write - writes a message to host hw buffer. + * + * @dev: the device structure + * @hdr: header of message + * @hdr_len: header length in bytes: must be multiplication of a slot (4bytes) + * @data: payload + * @data_len: payload length in bytes + * + * Return: 0 if success, < 0 - otherwise. + */ +static int mei_me_hbuf_write(struct mei_device *dev, + const void *hdr, size_t hdr_len, + const void *data, size_t data_len) +{ + unsigned long rem; + unsigned long i; + const u32 *reg_buf; + u32 dw_cnt; + int empty_slots; + + if (WARN_ON(!hdr || !data || hdr_len & 0x3)) + return -EINVAL; + + dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM((struct mei_msg_hdr *)hdr)); + + empty_slots = mei_hbuf_empty_slots(dev); + dev_dbg(dev->dev, "empty slots = %hu.\n", empty_slots); + + if (empty_slots < 0) + return -EOVERFLOW; + + dw_cnt = mei_data2slots(hdr_len + data_len); + if (dw_cnt > (u32)empty_slots) + return -EMSGSIZE; + + reg_buf = hdr; + for (i = 0; i < hdr_len / MEI_SLOT_SIZE; i++) + mei_me_hcbww_write(dev, reg_buf[i]); + + reg_buf = data; + for (i = 0; i < data_len / MEI_SLOT_SIZE; i++) + mei_me_hcbww_write(dev, reg_buf[i]); + + rem = data_len & 0x3; + if (rem > 0) { + u32 reg = 0; + + memcpy(®, (const u8 *)data + data_len - rem, rem); + mei_me_hcbww_write(dev, reg); + } + + mei_hcsr_set_hig(dev); + if (!mei_me_hw_is_ready(dev)) + return -EIO; + + return 0; +} + +/** + * mei_me_count_full_read_slots - counts read full slots. + * + * @dev: the device structure + * + * Return: -EOVERFLOW if overflow, otherwise filled slots count + */ +static int mei_me_count_full_read_slots(struct mei_device *dev) +{ + u32 me_csr; + char read_ptr, write_ptr; + unsigned char buffer_depth, filled_slots; + + me_csr = mei_me_mecsr_read(dev); + buffer_depth = (unsigned char)((me_csr & ME_CBD_HRA) >> 24); + read_ptr = (char) ((me_csr & ME_CBRP_HRA) >> 8); + write_ptr = (char) ((me_csr & ME_CBWP_HRA) >> 16); + filled_slots = (unsigned char) (write_ptr - read_ptr); + + /* check for overflow */ + if (filled_slots > buffer_depth) + return -EOVERFLOW; + + dev_dbg(dev->dev, "filled_slots =%08x\n", filled_slots); + return (int)filled_slots; +} + +/** + * mei_me_read_slots - reads a message from mei device. + * + * @dev: the device structure + * @buffer: message buffer will be written + * @buffer_length: message size will be read + * + * Return: always 0 + */ +static int mei_me_read_slots(struct mei_device *dev, unsigned char *buffer, + unsigned long buffer_length) +{ + u32 *reg_buf = (u32 *)buffer; + + for (; buffer_length >= MEI_SLOT_SIZE; buffer_length -= MEI_SLOT_SIZE) + *reg_buf++ = mei_me_mecbrw_read(dev); + + if (buffer_length > 0) { + u32 reg = mei_me_mecbrw_read(dev); + + memcpy(reg_buf, ®, buffer_length); + } + + mei_hcsr_set_hig(dev); + return 0; +} + +/** + * mei_me_pg_set - write pg enter register + * + * @dev: the device structure + */ +static void mei_me_pg_set(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + u32 reg; + + reg = mei_me_reg_read(hw, H_HPG_CSR); + trace_mei_reg_read(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg); + + reg |= H_HPG_CSR_PGI; + + trace_mei_reg_write(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg); + mei_me_reg_write(hw, H_HPG_CSR, reg); +} + +/** + * mei_me_pg_unset - write pg exit register + * + * @dev: the device structure + */ +static void mei_me_pg_unset(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + u32 reg; + + reg = mei_me_reg_read(hw, H_HPG_CSR); + trace_mei_reg_read(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg); + + WARN(!(reg & H_HPG_CSR_PGI), "PGI is not set\n"); + + reg |= H_HPG_CSR_PGIHEXR; + + trace_mei_reg_write(dev->dev, "H_HPG_CSR", H_HPG_CSR, reg); + mei_me_reg_write(hw, H_HPG_CSR, reg); +} + +/** + * mei_me_pg_legacy_enter_sync - perform legacy pg entry procedure + * + * @dev: the device structure + * + * Return: 0 on success an error code otherwise + */ +static int mei_me_pg_legacy_enter_sync(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + unsigned long timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT); + int ret; + + dev->pg_event = MEI_PG_EVENT_WAIT; + + ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_ENTRY_REQ_CMD); + if (ret) + return ret; + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_pg, + dev->pg_event == MEI_PG_EVENT_RECEIVED, timeout); + mutex_lock(&dev->device_lock); + + if (dev->pg_event == MEI_PG_EVENT_RECEIVED) { + mei_me_pg_set(dev); + ret = 0; + } else { + ret = -ETIME; + } + + dev->pg_event = MEI_PG_EVENT_IDLE; + hw->pg_state = MEI_PG_ON; + + return ret; +} + +/** + * mei_me_pg_legacy_exit_sync - perform legacy pg exit procedure + * + * @dev: the device structure + * + * Return: 0 on success an error code otherwise + */ +static int mei_me_pg_legacy_exit_sync(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + unsigned long timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT); + int ret; + + if (dev->pg_event == MEI_PG_EVENT_RECEIVED) + goto reply; + + dev->pg_event = MEI_PG_EVENT_WAIT; + + mei_me_pg_unset(dev); + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_pg, + dev->pg_event == MEI_PG_EVENT_RECEIVED, timeout); + mutex_lock(&dev->device_lock); + +reply: + if (dev->pg_event != MEI_PG_EVENT_RECEIVED) { + ret = -ETIME; + goto out; + } + + dev->pg_event = MEI_PG_EVENT_INTR_WAIT; + ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_EXIT_RES_CMD); + if (ret) + return ret; + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_pg, + dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, timeout); + mutex_lock(&dev->device_lock); + + if (dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED) + ret = 0; + else + ret = -ETIME; + +out: + dev->pg_event = MEI_PG_EVENT_IDLE; + hw->pg_state = MEI_PG_OFF; + + return ret; +} + +/** + * mei_me_pg_in_transition - is device now in pg transition + * + * @dev: the device structure + * + * Return: true if in pg transition, false otherwise + */ +static bool mei_me_pg_in_transition(struct mei_device *dev) +{ + return dev->pg_event >= MEI_PG_EVENT_WAIT && + dev->pg_event <= MEI_PG_EVENT_INTR_WAIT; +} + +/** + * mei_me_pg_is_enabled - detect if PG is supported by HW + * + * @dev: the device structure + * + * Return: true is pg supported, false otherwise + */ +static bool mei_me_pg_is_enabled(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + u32 reg = mei_me_mecsr_read(dev); + + if (hw->d0i3_supported) + return true; + + if ((reg & ME_PGIC_HRA) == 0) + goto notsupported; + + if (!dev->hbm_f_pg_supported) + goto notsupported; + + return true; + +notsupported: + dev_dbg(dev->dev, "pg: not supported: d0i3 = %d HGP = %d hbm version %d.%d ?= %d.%d\n", + hw->d0i3_supported, + !!(reg & ME_PGIC_HRA), + dev->version.major_version, + dev->version.minor_version, + HBM_MAJOR_VERSION_PGI, + HBM_MINOR_VERSION_PGI); + + return false; +} + +/** + * mei_me_d0i3_set - write d0i3 register bit on mei device. + * + * @dev: the device structure + * @intr: ask for interrupt + * + * Return: D0I3C register value + */ +static u32 mei_me_d0i3_set(struct mei_device *dev, bool intr) +{ + u32 reg = mei_me_d0i3c_read(dev); + + reg |= H_D0I3C_I3; + if (intr) + reg |= H_D0I3C_IR; + else + reg &= ~H_D0I3C_IR; + mei_me_d0i3c_write(dev, reg); + /* read it to ensure HW consistency */ + reg = mei_me_d0i3c_read(dev); + return reg; +} + +/** + * mei_me_d0i3_unset - clean d0i3 register bit on mei device. + * + * @dev: the device structure + * + * Return: D0I3C register value + */ +static u32 mei_me_d0i3_unset(struct mei_device *dev) +{ + u32 reg = mei_me_d0i3c_read(dev); + + reg &= ~H_D0I3C_I3; + reg |= H_D0I3C_IR; + mei_me_d0i3c_write(dev, reg); + /* read it to ensure HW consistency */ + reg = mei_me_d0i3c_read(dev); + return reg; +} + +/** + * mei_me_d0i3_enter_sync - perform d0i3 entry procedure + * + * @dev: the device structure + * + * Return: 0 on success an error code otherwise + */ +static int mei_me_d0i3_enter_sync(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + unsigned long d0i3_timeout = mei_secs_to_jiffies(MEI_D0I3_TIMEOUT); + unsigned long pgi_timeout = mei_secs_to_jiffies(MEI_PGI_TIMEOUT); + int ret; + u32 reg; + + reg = mei_me_d0i3c_read(dev); + if (reg & H_D0I3C_I3) { + /* we are in d0i3, nothing to do */ + dev_dbg(dev->dev, "d0i3 set not needed\n"); + ret = 0; + goto on; + } + + /* PGI entry procedure */ + dev->pg_event = MEI_PG_EVENT_WAIT; + + ret = mei_hbm_pg(dev, MEI_PG_ISOLATION_ENTRY_REQ_CMD); + if (ret) + /* FIXME: should we reset here? */ + goto out; + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_pg, + dev->pg_event == MEI_PG_EVENT_RECEIVED, pgi_timeout); + mutex_lock(&dev->device_lock); + + if (dev->pg_event != MEI_PG_EVENT_RECEIVED) { + ret = -ETIME; + goto out; + } + /* end PGI entry procedure */ + + dev->pg_event = MEI_PG_EVENT_INTR_WAIT; + + reg = mei_me_d0i3_set(dev, true); + if (!(reg & H_D0I3C_CIP)) { + dev_dbg(dev->dev, "d0i3 enter wait not needed\n"); + ret = 0; + goto on; + } + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_pg, + dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, d0i3_timeout); + mutex_lock(&dev->device_lock); + + if (dev->pg_event != MEI_PG_EVENT_INTR_RECEIVED) { + reg = mei_me_d0i3c_read(dev); + if (!(reg & H_D0I3C_I3)) { + ret = -ETIME; + goto out; + } + } + + ret = 0; +on: + hw->pg_state = MEI_PG_ON; +out: + dev->pg_event = MEI_PG_EVENT_IDLE; + dev_dbg(dev->dev, "d0i3 enter ret = %d\n", ret); + return ret; +} + +/** + * mei_me_d0i3_enter - perform d0i3 entry procedure + * no hbm PG handshake + * no waiting for confirmation; runs with interrupts + * disabled + * + * @dev: the device structure + * + * Return: 0 on success an error code otherwise + */ +static int mei_me_d0i3_enter(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + u32 reg; + + reg = mei_me_d0i3c_read(dev); + if (reg & H_D0I3C_I3) { + /* we are in d0i3, nothing to do */ + dev_dbg(dev->dev, "already d0i3 : set not needed\n"); + goto on; + } + + mei_me_d0i3_set(dev, false); +on: + hw->pg_state = MEI_PG_ON; + dev->pg_event = MEI_PG_EVENT_IDLE; + dev_dbg(dev->dev, "d0i3 enter\n"); + return 0; +} + +/** + * mei_me_d0i3_exit_sync - perform d0i3 exit procedure + * + * @dev: the device structure + * + * Return: 0 on success an error code otherwise + */ +static int mei_me_d0i3_exit_sync(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + unsigned long timeout = mei_secs_to_jiffies(MEI_D0I3_TIMEOUT); + int ret; + u32 reg; + + dev->pg_event = MEI_PG_EVENT_INTR_WAIT; + + reg = mei_me_d0i3c_read(dev); + if (!(reg & H_D0I3C_I3)) { + /* we are not in d0i3, nothing to do */ + dev_dbg(dev->dev, "d0i3 exit not needed\n"); + ret = 0; + goto off; + } + + reg = mei_me_d0i3_unset(dev); + if (!(reg & H_D0I3C_CIP)) { + dev_dbg(dev->dev, "d0i3 exit wait not needed\n"); + ret = 0; + goto off; + } + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_pg, + dev->pg_event == MEI_PG_EVENT_INTR_RECEIVED, timeout); + mutex_lock(&dev->device_lock); + + if (dev->pg_event != MEI_PG_EVENT_INTR_RECEIVED) { + reg = mei_me_d0i3c_read(dev); + if (reg & H_D0I3C_I3) { + ret = -ETIME; + goto out; + } + } + + ret = 0; +off: + hw->pg_state = MEI_PG_OFF; +out: + dev->pg_event = MEI_PG_EVENT_IDLE; + + dev_dbg(dev->dev, "d0i3 exit ret = %d\n", ret); + return ret; +} + +/** + * mei_me_pg_legacy_intr - perform legacy pg processing + * in interrupt thread handler + * + * @dev: the device structure + */ +static void mei_me_pg_legacy_intr(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + if (dev->pg_event != MEI_PG_EVENT_INTR_WAIT) + return; + + dev->pg_event = MEI_PG_EVENT_INTR_RECEIVED; + hw->pg_state = MEI_PG_OFF; + if (waitqueue_active(&dev->wait_pg)) + wake_up(&dev->wait_pg); +} + +/** + * mei_me_d0i3_intr - perform d0i3 processing in interrupt thread handler + * + * @dev: the device structure + * @intr_source: interrupt source + */ +static void mei_me_d0i3_intr(struct mei_device *dev, u32 intr_source) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + if (dev->pg_event == MEI_PG_EVENT_INTR_WAIT && + (intr_source & H_D0I3C_IS)) { + dev->pg_event = MEI_PG_EVENT_INTR_RECEIVED; + if (hw->pg_state == MEI_PG_ON) { + hw->pg_state = MEI_PG_OFF; + if (dev->hbm_state != MEI_HBM_IDLE) { + /* + * force H_RDY because it could be + * wiped off during PG + */ + dev_dbg(dev->dev, "d0i3 set host ready\n"); + mei_me_host_set_ready(dev); + } + } else { + hw->pg_state = MEI_PG_ON; + } + + wake_up(&dev->wait_pg); + } + + if (hw->pg_state == MEI_PG_ON && (intr_source & H_IS)) { + /* + * HW sent some data and we are in D0i3, so + * we got here because of HW initiated exit from D0i3. + * Start runtime pm resume sequence to exit low power state. + */ + dev_dbg(dev->dev, "d0i3 want resume\n"); + mei_hbm_pg_resume(dev); + } +} + +/** + * mei_me_pg_intr - perform pg processing in interrupt thread handler + * + * @dev: the device structure + * @intr_source: interrupt source + */ +static void mei_me_pg_intr(struct mei_device *dev, u32 intr_source) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + if (hw->d0i3_supported) + mei_me_d0i3_intr(dev, intr_source); + else + mei_me_pg_legacy_intr(dev); +} + +/** + * mei_me_pg_enter_sync - perform runtime pm entry procedure + * + * @dev: the device structure + * + * Return: 0 on success an error code otherwise + */ +int mei_me_pg_enter_sync(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + if (hw->d0i3_supported) + return mei_me_d0i3_enter_sync(dev); + else + return mei_me_pg_legacy_enter_sync(dev); +} + +/** + * mei_me_pg_exit_sync - perform runtime pm exit procedure + * + * @dev: the device structure + * + * Return: 0 on success an error code otherwise + */ +int mei_me_pg_exit_sync(struct mei_device *dev) +{ + struct mei_me_hw *hw = to_me_hw(dev); + + if (hw->d0i3_supported) + return mei_me_d0i3_exit_sync(dev); + else + return mei_me_pg_legacy_exit_sync(dev); +} + +/** + * mei_me_hw_reset - resets fw via mei csr register. + * + * @dev: the device structure + * @intr_enable: if interrupt should be enabled after reset. + * + * Return: 0 on success an error code otherwise + */ +static int mei_me_hw_reset(struct mei_device *dev, bool intr_enable) +{ + struct mei_me_hw *hw = to_me_hw(dev); + int ret; + u32 hcsr; + + if (intr_enable) { + mei_me_intr_enable(dev); + if (hw->d0i3_supported) { + ret = mei_me_d0i3_exit_sync(dev); + if (ret) + return ret; + } + } + + pm_runtime_set_active(dev->dev); + + hcsr = mei_hcsr_read(dev); + /* H_RST may be found lit before reset is started, + * for example if preceding reset flow hasn't completed. + * In that case asserting H_RST will be ignored, therefore + * we need to clean H_RST bit to start a successful reset sequence. + */ + if ((hcsr & H_RST) == H_RST) { + dev_warn(dev->dev, "H_RST is set = 0x%08X", hcsr); + hcsr &= ~H_RST; + mei_hcsr_set(dev, hcsr); + hcsr = mei_hcsr_read(dev); + } + + hcsr |= H_RST | H_IG | H_CSR_IS_MASK; + + if (!intr_enable) + hcsr &= ~H_CSR_IE_MASK; + + dev->recvd_hw_ready = false; + mei_hcsr_write(dev, hcsr); + + /* + * Host reads the H_CSR once to ensure that the + * posted write to H_CSR completes. + */ + hcsr = mei_hcsr_read(dev); + + if ((hcsr & H_RST) == 0) + dev_warn(dev->dev, "H_RST is not set = 0x%08X", hcsr); + + if ((hcsr & H_RDY) == H_RDY) + dev_warn(dev->dev, "H_RDY is not cleared 0x%08X", hcsr); + + if (!intr_enable) { + mei_me_hw_reset_release(dev); + if (hw->d0i3_supported) { + ret = mei_me_d0i3_enter(dev); + if (ret) + return ret; + } + } + return 0; +} + +/** + * mei_me_irq_quick_handler - The ISR of the MEI device + * + * @irq: The irq number + * @dev_id: pointer to the device structure + * + * Return: irqreturn_t + */ +irqreturn_t mei_me_irq_quick_handler(int irq, void *dev_id) +{ + struct mei_device *dev = (struct mei_device *)dev_id; + u32 hcsr; + + hcsr = mei_hcsr_read(dev); + if (!me_intr_src(hcsr)) + return IRQ_NONE; + + dev_dbg(dev->dev, "interrupt source 0x%08X\n", me_intr_src(hcsr)); + + /* disable interrupts on device */ + me_intr_disable(dev, hcsr); + return IRQ_WAKE_THREAD; +} + +/** + * mei_me_irq_thread_handler - function called after ISR to handle the interrupt + * processing. + * + * @irq: The irq number + * @dev_id: pointer to the device structure + * + * Return: irqreturn_t + * + */ +irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id) +{ + struct mei_device *dev = (struct mei_device *) dev_id; + struct list_head cmpl_list; + s32 slots; + u32 hcsr; + int rets = 0; + + dev_dbg(dev->dev, "function called after ISR to handle the interrupt processing.\n"); + /* initialize our complete list */ + mutex_lock(&dev->device_lock); + + hcsr = mei_hcsr_read(dev); + me_intr_clear(dev, hcsr); + + INIT_LIST_HEAD(&cmpl_list); + + /* check if ME wants a reset */ + if (!mei_hw_is_ready(dev) && dev->dev_state != MEI_DEV_RESETTING) { + dev_warn(dev->dev, "FW not ready: resetting.\n"); + schedule_work(&dev->reset_work); + goto end; + } + + if (mei_me_hw_is_resetting(dev)) + mei_hcsr_set_hig(dev); + + mei_me_pg_intr(dev, me_intr_src(hcsr)); + + /* check if we need to start the dev */ + if (!mei_host_is_ready(dev)) { + if (mei_hw_is_ready(dev)) { + dev_dbg(dev->dev, "we need to start the dev.\n"); + dev->recvd_hw_ready = true; + wake_up(&dev->wait_hw_ready); + } else { + dev_dbg(dev->dev, "Spurious Interrupt\n"); + } + goto end; + } + /* check slots available for reading */ + slots = mei_count_full_read_slots(dev); + while (slots > 0) { + dev_dbg(dev->dev, "slots to read = %08x\n", slots); + rets = mei_irq_read_handler(dev, &cmpl_list, &slots); + /* There is a race between ME write and interrupt delivery: + * Not all data is always available immediately after the + * interrupt, so try to read again on the next interrupt. + */ + if (rets == -ENODATA) + break; + + if (rets && + (dev->dev_state != MEI_DEV_RESETTING && + dev->dev_state != MEI_DEV_POWER_DOWN)) { + dev_err(dev->dev, "mei_irq_read_handler ret = %d.\n", + rets); + schedule_work(&dev->reset_work); + goto end; + } + } + + dev->hbuf_is_ready = mei_hbuf_is_ready(dev); + + /* + * During PG handshake only allowed write is the replay to the + * PG exit message, so block calling write function + * if the pg event is in PG handshake + */ + if (dev->pg_event != MEI_PG_EVENT_WAIT && + dev->pg_event != MEI_PG_EVENT_RECEIVED) { + rets = mei_irq_write_handler(dev, &cmpl_list); + dev->hbuf_is_ready = mei_hbuf_is_ready(dev); + } + + mei_irq_compl_handler(dev, &cmpl_list); + +end: + dev_dbg(dev->dev, "interrupt thread end ret = %d\n", rets); + mei_me_intr_enable(dev); + mutex_unlock(&dev->device_lock); + return IRQ_HANDLED; +} + +static const struct mei_hw_ops mei_me_hw_ops = { + + .fw_status = mei_me_fw_status, + .pg_state = mei_me_pg_state, + + .host_is_ready = mei_me_host_is_ready, + + .hw_is_ready = mei_me_hw_is_ready, + .hw_reset = mei_me_hw_reset, + .hw_config = mei_me_hw_config, + .hw_start = mei_me_hw_start, + + .pg_in_transition = mei_me_pg_in_transition, + .pg_is_enabled = mei_me_pg_is_enabled, + + .intr_clear = mei_me_intr_clear, + .intr_enable = mei_me_intr_enable, + .intr_disable = mei_me_intr_disable, + .synchronize_irq = mei_me_synchronize_irq, + + .hbuf_free_slots = mei_me_hbuf_empty_slots, + .hbuf_is_ready = mei_me_hbuf_is_empty, + .hbuf_depth = mei_me_hbuf_depth, + + .write = mei_me_hbuf_write, + + .rdbuf_full_slots = mei_me_count_full_read_slots, + .read_hdr = mei_me_mecbrw_read, + .read = mei_me_read_slots +}; + +static bool mei_me_fw_type_nm(struct pci_dev *pdev) +{ + u32 reg; + + pci_read_config_dword(pdev, PCI_CFG_HFS_2, ®); + trace_mei_pci_cfg_read(&pdev->dev, "PCI_CFG_HFS_2", PCI_CFG_HFS_2, reg); + /* make sure that bit 9 (NM) is up and bit 10 (DM) is down */ + return (reg & 0x600) == 0x200; +} + +#define MEI_CFG_FW_NM \ + .quirk_probe = mei_me_fw_type_nm + +static bool mei_me_fw_type_sps(struct pci_dev *pdev) +{ + u32 reg; + unsigned int devfn; + + /* + * Read ME FW Status register to check for SPS Firmware + * The SPS FW is only signaled in pci function 0 + */ + devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0); + pci_bus_read_config_dword(pdev->bus, devfn, PCI_CFG_HFS_1, ®); + trace_mei_pci_cfg_read(&pdev->dev, "PCI_CFG_HFS_1", PCI_CFG_HFS_1, reg); + /* if bits [19:16] = 15, running SPS Firmware */ + return (reg & 0xf0000) == 0xf0000; +} + +#define MEI_CFG_FW_SPS \ + .quirk_probe = mei_me_fw_type_sps + +#define MEI_CFG_FW_VER_SUPP \ + .fw_ver_supported = 1 + +#define MEI_CFG_ICH_HFS \ + .fw_status.count = 0 + +#define MEI_CFG_ICH10_HFS \ + .fw_status.count = 1, \ + .fw_status.status[0] = PCI_CFG_HFS_1 + +#define MEI_CFG_PCH_HFS \ + .fw_status.count = 2, \ + .fw_status.status[0] = PCI_CFG_HFS_1, \ + .fw_status.status[1] = PCI_CFG_HFS_2 + +#define MEI_CFG_PCH8_HFS \ + .fw_status.count = 6, \ + .fw_status.status[0] = PCI_CFG_HFS_1, \ + .fw_status.status[1] = PCI_CFG_HFS_2, \ + .fw_status.status[2] = PCI_CFG_HFS_3, \ + .fw_status.status[3] = PCI_CFG_HFS_4, \ + .fw_status.status[4] = PCI_CFG_HFS_5, \ + .fw_status.status[5] = PCI_CFG_HFS_6 + +#define MEI_CFG_DMA_128 \ + .dma_size[DMA_DSCR_HOST] = SZ_128K, \ + .dma_size[DMA_DSCR_DEVICE] = SZ_128K, \ + .dma_size[DMA_DSCR_CTRL] = PAGE_SIZE + +/* ICH Legacy devices */ +static const struct mei_cfg mei_me_ich_cfg = { + MEI_CFG_ICH_HFS, +}; + +/* ICH devices */ +static const struct mei_cfg mei_me_ich10_cfg = { + MEI_CFG_ICH10_HFS, +}; + +/* PCH6 devices */ +static const struct mei_cfg mei_me_pch6_cfg = { + MEI_CFG_PCH_HFS, +}; + +/* PCH7 devices */ +static const struct mei_cfg mei_me_pch7_cfg = { + MEI_CFG_PCH_HFS, + MEI_CFG_FW_VER_SUPP, +}; + +/* PCH Cougar Point and Patsburg with quirk for Node Manager exclusion */ +static const struct mei_cfg mei_me_pch_cpt_pbg_cfg = { + MEI_CFG_PCH_HFS, + MEI_CFG_FW_VER_SUPP, + MEI_CFG_FW_NM, +}; + +/* PCH8 Lynx Point and newer devices */ +static const struct mei_cfg mei_me_pch8_cfg = { + MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, +}; + +/* PCH8 Lynx Point with quirk for SPS Firmware exclusion */ +static const struct mei_cfg mei_me_pch8_sps_cfg = { + MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, + MEI_CFG_FW_SPS, +}; + +/* Cannon Lake and newer devices */ +static const struct mei_cfg mei_me_pch12_cfg = { + MEI_CFG_PCH8_HFS, + MEI_CFG_FW_VER_SUPP, + MEI_CFG_DMA_128, +}; + +/* + * mei_cfg_list - A list of platform platform specific configurations. + * Note: has to be synchronized with enum mei_cfg_idx. + */ +static const struct mei_cfg *const mei_cfg_list[] = { + [MEI_ME_UNDEF_CFG] = NULL, + [MEI_ME_ICH_CFG] = &mei_me_ich_cfg, + [MEI_ME_ICH10_CFG] = &mei_me_ich10_cfg, + [MEI_ME_PCH6_CFG] = &mei_me_pch6_cfg, + [MEI_ME_PCH7_CFG] = &mei_me_pch7_cfg, + [MEI_ME_PCH_CPT_PBG_CFG] = &mei_me_pch_cpt_pbg_cfg, + [MEI_ME_PCH8_CFG] = &mei_me_pch8_cfg, + [MEI_ME_PCH8_SPS_CFG] = &mei_me_pch8_sps_cfg, + [MEI_ME_PCH12_CFG] = &mei_me_pch12_cfg, +}; + +const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx) +{ + BUILD_BUG_ON(ARRAY_SIZE(mei_cfg_list) != MEI_ME_NUM_CFG); + + if (idx >= MEI_ME_NUM_CFG) + return NULL; + + return mei_cfg_list[idx]; +}; + +/** + * mei_me_dev_init - allocates and initializes the mei device structure + * + * @pdev: The pci device structure + * @cfg: per device generation config + * + * Return: The mei_device pointer on success, NULL on failure. + */ +struct mei_device *mei_me_dev_init(struct pci_dev *pdev, + const struct mei_cfg *cfg) +{ + struct mei_device *dev; + struct mei_me_hw *hw; + + dev = devm_kzalloc(&pdev->dev, sizeof(struct mei_device) + + sizeof(struct mei_me_hw), GFP_KERNEL); + if (!dev) + return NULL; + hw = to_me_hw(dev); + + mei_device_init(dev, &pdev->dev, &mei_me_hw_ops); + hw->cfg = cfg; + dev->fw_f_fw_ver_supported = cfg->fw_ver_supported; + + return dev; +} + diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h new file mode 100644 index 000000000..775971339 --- /dev/null +++ b/drivers/misc/mei/hw-me.h @@ -0,0 +1,115 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + + + +#ifndef _MEI_INTERFACE_H_ +#define _MEI_INTERFACE_H_ + +#include +#include +#include + +#include "mei_dev.h" +#include "client.h" + +/* + * mei_cfg - mei device configuration + * + * @fw_status: FW status + * @quirk_probe: device exclusion quirk + * @dma_size: device DMA buffers size + * @fw_ver_supported: is fw version retrievable from FW + */ +struct mei_cfg { + const struct mei_fw_status fw_status; + bool (*quirk_probe)(struct pci_dev *pdev); + size_t dma_size[DMA_DSCR_NUM]; + u32 fw_ver_supported:1; +}; + + +#define MEI_PCI_DEVICE(dev, cfg) \ + .vendor = PCI_VENDOR_ID_INTEL, .device = (dev), \ + .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, \ + .driver_data = (kernel_ulong_t)(cfg), + +#define MEI_ME_RPM_TIMEOUT 500 /* ms */ + +/** + * struct mei_me_hw - me hw specific data + * + * @cfg: per device generation config and ops + * @mem_addr: io memory address + * @pg_state: power gating state + * @d0i3_supported: di03 support + * @hbuf_depth: depth of hardware host/write buffer in slots + */ +struct mei_me_hw { + const struct mei_cfg *cfg; + void __iomem *mem_addr; + enum mei_pg_state pg_state; + bool d0i3_supported; + u8 hbuf_depth; +}; + +#define to_me_hw(dev) (struct mei_me_hw *)((dev)->hw) + +/** + * enum mei_cfg_idx - indices to platform specific configurations. + * + * Note: has to be synchronized with mei_cfg_list[] + * + * @MEI_ME_UNDEF_CFG: Lower sentinel. + * @MEI_ME_ICH_CFG: I/O Controller Hub legacy devices. + * @MEI_ME_ICH10_CFG: I/O Controller Hub platforms Gen10 + * @MEI_ME_PCH6_CFG: Platform Controller Hub platforms (Gen6). + * @MEI_ME_PCH7_CFG: Platform Controller Hub platforms (Gen7). + * @MEI_ME_PCH_CPT_PBG_CFG:Platform Controller Hub workstations + * with quirk for Node Manager exclusion. + * @MEI_ME_PCH8_CFG: Platform Controller Hub Gen8 and newer + * client platforms. + * @MEI_ME_PCH8_SPS_CFG: Platform Controller Hub Gen8 and newer + * servers platforms with quirk for + * SPS firmware exclusion. + * @MEI_ME_PCH12_CFG: Platform Controller Hub Gen12 and newer + * @MEI_ME_NUM_CFG: Upper Sentinel. + */ +enum mei_cfg_idx { + MEI_ME_UNDEF_CFG, + MEI_ME_ICH_CFG, + MEI_ME_ICH10_CFG, + MEI_ME_PCH6_CFG, + MEI_ME_PCH7_CFG, + MEI_ME_PCH_CPT_PBG_CFG, + MEI_ME_PCH8_CFG, + MEI_ME_PCH8_SPS_CFG, + MEI_ME_PCH12_CFG, + MEI_ME_NUM_CFG, +}; + +const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx); + +struct mei_device *mei_me_dev_init(struct pci_dev *pdev, + const struct mei_cfg *cfg); + +int mei_me_pg_enter_sync(struct mei_device *dev); +int mei_me_pg_exit_sync(struct mei_device *dev); + +irqreturn_t mei_me_irq_quick_handler(int irq, void *dev_id); +irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id); + +#endif /* _MEI_INTERFACE_H_ */ diff --git a/drivers/misc/mei/hw-txe-regs.h b/drivers/misc/mei/hw-txe-regs.h new file mode 100644 index 000000000..f19229c4e --- /dev/null +++ b/drivers/misc/mei/hw-txe-regs.h @@ -0,0 +1,294 @@ +/****************************************************************************** + * Intel Management Engine Interface (Intel MEI) Linux driver + * Intel MEI Interface Header + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2013 - 2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution + * in the file called COPYING + * + * Contact Information: + * Intel Corporation. + * linux-mei@linux.intel.com + * http://www.intel.com + * + * BSD LICENSE + * + * Copyright(c) 2013 - 2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + *****************************************************************************/ +#ifndef _MEI_HW_TXE_REGS_H_ +#define _MEI_HW_TXE_REGS_H_ + +#include "hw.h" + +#define SEC_ALIVENESS_TIMER_TIMEOUT (5 * MSEC_PER_SEC) +#define SEC_ALIVENESS_WAIT_TIMEOUT (1 * MSEC_PER_SEC) +#define SEC_RESET_WAIT_TIMEOUT (1 * MSEC_PER_SEC) +#define SEC_READY_WAIT_TIMEOUT (5 * MSEC_PER_SEC) +#define START_MESSAGE_RESPONSE_WAIT_TIMEOUT (5 * MSEC_PER_SEC) +#define RESET_CANCEL_WAIT_TIMEOUT (1 * MSEC_PER_SEC) + +enum { + SEC_BAR, + BRIDGE_BAR, + + NUM_OF_MEM_BARS +}; + +/* SeC FW Status Register + * + * FW uses this register in order to report its status to host. + * This register resides in PCI-E config space. + */ +#define PCI_CFG_TXE_FW_STS0 0x40 +# define PCI_CFG_TXE_FW_STS0_WRK_ST_MSK 0x0000000F +# define PCI_CFG_TXE_FW_STS0_OP_ST_MSK 0x000001C0 +# define PCI_CFG_TXE_FW_STS0_FW_INIT_CMPLT 0x00000200 +# define PCI_CFG_TXE_FW_STS0_ERR_CODE_MSK 0x0000F000 +# define PCI_CFG_TXE_FW_STS0_OP_MODE_MSK 0x000F0000 +# define PCI_CFG_TXE_FW_STS0_RST_CNT_MSK 0x00F00000 +#define PCI_CFG_TXE_FW_STS1 0x48 + +#define IPC_BASE_ADDR 0x80400 /* SeC IPC Base Address */ + +/* IPC Input Doorbell Register */ +#define SEC_IPC_INPUT_DOORBELL_REG (0x0000 + IPC_BASE_ADDR) + +/* IPC Input Status Register + * This register indicates whether or not processing of + * the most recent command has been completed by the SEC + * New commands and payloads should not be written by the Host + * until this indicates that the previous command has been processed. + */ +#define SEC_IPC_INPUT_STATUS_REG (0x0008 + IPC_BASE_ADDR) +# define SEC_IPC_INPUT_STATUS_RDY BIT(0) + +/* IPC Host Interrupt Status Register */ +#define SEC_IPC_HOST_INT_STATUS_REG (0x0010 + IPC_BASE_ADDR) +#define SEC_IPC_HOST_INT_STATUS_OUT_DB BIT(0) +#define SEC_IPC_HOST_INT_STATUS_IN_RDY BIT(1) +#define SEC_IPC_HOST_INT_STATUS_HDCP_M0_RCVD BIT(5) +#define SEC_IPC_HOST_INT_STATUS_ILL_MEM_ACCESS BIT(17) +#define SEC_IPC_HOST_INT_STATUS_AES_HKEY_ERR BIT(18) +#define SEC_IPC_HOST_INT_STATUS_DES_HKEY_ERR BIT(19) +#define SEC_IPC_HOST_INT_STATUS_TMRMTB_OVERFLOW BIT(21) + +/* Convenient mask for pending interrupts */ +#define SEC_IPC_HOST_INT_STATUS_PENDING \ + (SEC_IPC_HOST_INT_STATUS_OUT_DB| \ + SEC_IPC_HOST_INT_STATUS_IN_RDY) + +/* IPC Host Interrupt Mask Register */ +#define SEC_IPC_HOST_INT_MASK_REG (0x0014 + IPC_BASE_ADDR) + +# define SEC_IPC_HOST_INT_MASK_OUT_DB BIT(0) /* Output Doorbell Int Mask */ +# define SEC_IPC_HOST_INT_MASK_IN_RDY BIT(1) /* Input Ready Int Mask */ + +/* IPC Input Payload RAM */ +#define SEC_IPC_INPUT_PAYLOAD_REG (0x0100 + IPC_BASE_ADDR) +/* IPC Shared Payload RAM */ +#define IPC_SHARED_PAYLOAD_REG (0x0200 + IPC_BASE_ADDR) + +/* SeC Address Translation Table Entry 2 - Ctrl + * + * This register resides also in SeC's PCI-E Memory space. + */ +#define SATT2_CTRL_REG 0x1040 +# define SATT2_CTRL_VALID_MSK BIT(0) +# define SATT2_CTRL_BR_BASE_ADDR_REG_SHIFT 8 +# define SATT2_CTRL_BRIDGE_HOST_EN_MSK BIT(12) + +/* SATT Table Entry 2 SAP Base Address Register */ +#define SATT2_SAP_BA_REG 0x1044 +/* SATT Table Entry 2 SAP Size Register. */ +#define SATT2_SAP_SIZE_REG 0x1048 + /* SATT Table Entry 2 SAP Bridge Address - LSB Register */ +#define SATT2_BRG_BA_LSB_REG 0x104C + +/* Host High-level Interrupt Status Register */ +#define HHISR_REG 0x2020 +/* Host High-level Interrupt Enable Register + * + * Resides in PCI memory space. This is the top hierarchy for + * interrupts from SeC to host, aggregating both interrupts that + * arrive through HICR registers as well as interrupts + * that arrive via IPC. + */ +#define HHIER_REG 0x2024 +#define IPC_HHIER_SEC BIT(0) +#define IPC_HHIER_BRIDGE BIT(1) +#define IPC_HHIER_MSK (IPC_HHIER_SEC | IPC_HHIER_BRIDGE) + +/* Host High-level Interrupt Mask Register. + * + * Resides in PCI memory space. + * This is the top hierarchy for masking interrupts from SeC to host. + */ +#define HHIMR_REG 0x2028 +#define IPC_HHIMR_SEC BIT(0) +#define IPC_HHIMR_BRIDGE BIT(1) + +/* Host High-level IRQ Status Register */ +#define HHIRQSR_REG 0x202C + +/* Host Interrupt Cause Register 0 - SeC IPC Readiness + * + * This register is both an ICR to Host from PCI Memory Space + * and it is also exposed in the SeC memory space. + * This register is used by SeC's IPC driver in order + * to synchronize with host about IPC interface state. + */ +#define HICR_SEC_IPC_READINESS_REG 0x2040 +#define HICR_SEC_IPC_READINESS_HOST_RDY BIT(0) +#define HICR_SEC_IPC_READINESS_SEC_RDY BIT(1) +#define HICR_SEC_IPC_READINESS_SYS_RDY \ + (HICR_SEC_IPC_READINESS_HOST_RDY | \ + HICR_SEC_IPC_READINESS_SEC_RDY) +#define HICR_SEC_IPC_READINESS_RDY_CLR BIT(2) + +/* Host Interrupt Cause Register 1 - Aliveness Response */ +/* This register is both an ICR to Host from PCI Memory Space + * and it is also exposed in the SeC memory space. + * The register may be used by SeC to ACK a host request for aliveness. + */ +#define HICR_HOST_ALIVENESS_RESP_REG 0x2044 +#define HICR_HOST_ALIVENESS_RESP_ACK BIT(0) + +/* Host Interrupt Cause Register 2 - SeC IPC Output Doorbell */ +#define HICR_SEC_IPC_OUTPUT_DOORBELL_REG 0x2048 + +/* Host Interrupt Status Register. + * + * Resides in PCI memory space. + * This is the main register involved in generating interrupts + * from SeC to host via HICRs. + * The interrupt generation rules are as follows: + * An interrupt will be generated whenever for any i, + * there is a transition from a state where at least one of + * the following conditions did not hold, to a state where + * ALL the following conditions hold: + * A) HISR.INT[i]_STS == 1. + * B) HIER.INT[i]_EN == 1. + */ +#define HISR_REG 0x2060 +#define HISR_INT_0_STS BIT(0) +#define HISR_INT_1_STS BIT(1) +#define HISR_INT_2_STS BIT(2) +#define HISR_INT_3_STS BIT(3) +#define HISR_INT_4_STS BIT(4) +#define HISR_INT_5_STS BIT(5) +#define HISR_INT_6_STS BIT(6) +#define HISR_INT_7_STS BIT(7) +#define HISR_INT_STS_MSK \ + (HISR_INT_0_STS | HISR_INT_1_STS | HISR_INT_2_STS) + +/* Host Interrupt Enable Register. Resides in PCI memory space. */ +#define HIER_REG 0x2064 +#define HIER_INT_0_EN BIT(0) +#define HIER_INT_1_EN BIT(1) +#define HIER_INT_2_EN BIT(2) +#define HIER_INT_3_EN BIT(3) +#define HIER_INT_4_EN BIT(4) +#define HIER_INT_5_EN BIT(5) +#define HIER_INT_6_EN BIT(6) +#define HIER_INT_7_EN BIT(7) + +#define HIER_INT_EN_MSK \ + (HIER_INT_0_EN | HIER_INT_1_EN | HIER_INT_2_EN) + + +/* SEC Memory Space IPC output payload. + * + * This register is part of the output payload which SEC provides to host. + */ +#define BRIDGE_IPC_OUTPUT_PAYLOAD_REG 0x20C0 + +/* SeC Interrupt Cause Register - Host Aliveness Request + * This register is both an ICR to SeC and it is also exposed + * in the host-visible PCI memory space. + * The register is used by host to request SeC aliveness. + */ +#define SICR_HOST_ALIVENESS_REQ_REG 0x214C +#define SICR_HOST_ALIVENESS_REQ_REQUESTED BIT(0) + + +/* SeC Interrupt Cause Register - Host IPC Readiness + * + * This register is both an ICR to SeC and it is also exposed + * in the host-visible PCI memory space. + * This register is used by the host's SeC driver uses in order + * to synchronize with SeC about IPC interface state. + */ +#define SICR_HOST_IPC_READINESS_REQ_REG 0x2150 + + +#define SICR_HOST_IPC_READINESS_HOST_RDY BIT(0) +#define SICR_HOST_IPC_READINESS_SEC_RDY BIT(1) +#define SICR_HOST_IPC_READINESS_SYS_RDY \ + (SICR_HOST_IPC_READINESS_HOST_RDY | \ + SICR_HOST_IPC_READINESS_SEC_RDY) +#define SICR_HOST_IPC_READINESS_RDY_CLR BIT(2) + +/* SeC Interrupt Cause Register - SeC IPC Output Status + * + * This register indicates whether or not processing of the most recent + * command has been completed by the Host. + * New commands and payloads should not be written by SeC until this + * register indicates that the previous command has been processed. + */ +#define SICR_SEC_IPC_OUTPUT_STATUS_REG 0x2154 +# define SEC_IPC_OUTPUT_STATUS_RDY BIT(0) + + + +/* MEI IPC Message payload size 64 bytes */ +#define PAYLOAD_SIZE 64 + +/* MAX size for SATT range 32MB */ +#define SATT_RANGE_MAX (32 << 20) + + +#endif /* _MEI_HW_TXE_REGS_H_ */ + diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c new file mode 100644 index 000000000..8449fe036 --- /dev/null +++ b/drivers/misc/mei/hw-txe.c @@ -0,0 +1,1267 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "mei_dev.h" +#include "hw-txe.h" +#include "client.h" +#include "hbm.h" + +#include "mei-trace.h" + +#define TXE_HBUF_DEPTH (PAYLOAD_SIZE / MEI_SLOT_SIZE) + +/** + * mei_txe_reg_read - Reads 32bit data from the txe device + * + * @base_addr: registers base address + * @offset: register offset + * + * Return: register value + */ +static inline u32 mei_txe_reg_read(void __iomem *base_addr, + unsigned long offset) +{ + return ioread32(base_addr + offset); +} + +/** + * mei_txe_reg_write - Writes 32bit data to the txe device + * + * @base_addr: registers base address + * @offset: register offset + * @value: the value to write + */ +static inline void mei_txe_reg_write(void __iomem *base_addr, + unsigned long offset, u32 value) +{ + iowrite32(value, base_addr + offset); +} + +/** + * mei_txe_sec_reg_read_silent - Reads 32bit data from the SeC BAR + * + * @hw: the txe hardware structure + * @offset: register offset + * + * Doesn't check for aliveness while Reads 32bit data from the SeC BAR + * + * Return: register value + */ +static inline u32 mei_txe_sec_reg_read_silent(struct mei_txe_hw *hw, + unsigned long offset) +{ + return mei_txe_reg_read(hw->mem_addr[SEC_BAR], offset); +} + +/** + * mei_txe_sec_reg_read - Reads 32bit data from the SeC BAR + * + * @hw: the txe hardware structure + * @offset: register offset + * + * Reads 32bit data from the SeC BAR and shout loud if aliveness is not set + * + * Return: register value + */ +static inline u32 mei_txe_sec_reg_read(struct mei_txe_hw *hw, + unsigned long offset) +{ + WARN(!hw->aliveness, "sec read: aliveness not asserted\n"); + return mei_txe_sec_reg_read_silent(hw, offset); +} +/** + * mei_txe_sec_reg_write_silent - Writes 32bit data to the SeC BAR + * doesn't check for aliveness + * + * @hw: the txe hardware structure + * @offset: register offset + * @value: value to write + * + * Doesn't check for aliveness while writes 32bit data from to the SeC BAR + */ +static inline void mei_txe_sec_reg_write_silent(struct mei_txe_hw *hw, + unsigned long offset, u32 value) +{ + mei_txe_reg_write(hw->mem_addr[SEC_BAR], offset, value); +} + +/** + * mei_txe_sec_reg_write - Writes 32bit data to the SeC BAR + * + * @hw: the txe hardware structure + * @offset: register offset + * @value: value to write + * + * Writes 32bit data from the SeC BAR and shout loud if aliveness is not set + */ +static inline void mei_txe_sec_reg_write(struct mei_txe_hw *hw, + unsigned long offset, u32 value) +{ + WARN(!hw->aliveness, "sec write: aliveness not asserted\n"); + mei_txe_sec_reg_write_silent(hw, offset, value); +} +/** + * mei_txe_br_reg_read - Reads 32bit data from the Bridge BAR + * + * @hw: the txe hardware structure + * @offset: offset from which to read the data + * + * Return: the byte read. + */ +static inline u32 mei_txe_br_reg_read(struct mei_txe_hw *hw, + unsigned long offset) +{ + return mei_txe_reg_read(hw->mem_addr[BRIDGE_BAR], offset); +} + +/** + * mei_txe_br_reg_write - Writes 32bit data to the Bridge BAR + * + * @hw: the txe hardware structure + * @offset: offset from which to write the data + * @value: the byte to write + */ +static inline void mei_txe_br_reg_write(struct mei_txe_hw *hw, + unsigned long offset, u32 value) +{ + mei_txe_reg_write(hw->mem_addr[BRIDGE_BAR], offset, value); +} + +/** + * mei_txe_aliveness_set - request for aliveness change + * + * @dev: the device structure + * @req: requested aliveness value + * + * Request for aliveness change and returns true if the change is + * really needed and false if aliveness is already + * in the requested state + * + * Locking: called under "dev->device_lock" lock + * + * Return: true if request was send + */ +static bool mei_txe_aliveness_set(struct mei_device *dev, u32 req) +{ + + struct mei_txe_hw *hw = to_txe_hw(dev); + bool do_req = hw->aliveness != req; + + dev_dbg(dev->dev, "Aliveness current=%d request=%d\n", + hw->aliveness, req); + if (do_req) { + dev->pg_event = MEI_PG_EVENT_WAIT; + mei_txe_br_reg_write(hw, SICR_HOST_ALIVENESS_REQ_REG, req); + } + return do_req; +} + + +/** + * mei_txe_aliveness_req_get - get aliveness requested register value + * + * @dev: the device structure + * + * Extract HICR_HOST_ALIVENESS_RESP_ACK bit from + * from HICR_HOST_ALIVENESS_REQ register value + * + * Return: SICR_HOST_ALIVENESS_REQ_REQUESTED bit value + */ +static u32 mei_txe_aliveness_req_get(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + u32 reg; + + reg = mei_txe_br_reg_read(hw, SICR_HOST_ALIVENESS_REQ_REG); + return reg & SICR_HOST_ALIVENESS_REQ_REQUESTED; +} + +/** + * mei_txe_aliveness_get - get aliveness response register value + * + * @dev: the device structure + * + * Return: HICR_HOST_ALIVENESS_RESP_ACK bit from HICR_HOST_ALIVENESS_RESP + * register + */ +static u32 mei_txe_aliveness_get(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + u32 reg; + + reg = mei_txe_br_reg_read(hw, HICR_HOST_ALIVENESS_RESP_REG); + return reg & HICR_HOST_ALIVENESS_RESP_ACK; +} + +/** + * mei_txe_aliveness_poll - waits for aliveness to settle + * + * @dev: the device structure + * @expected: expected aliveness value + * + * Polls for HICR_HOST_ALIVENESS_RESP.ALIVENESS_RESP to be set + * + * Return: 0 if the expected value was received, -ETIME otherwise + */ +static int mei_txe_aliveness_poll(struct mei_device *dev, u32 expected) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + ktime_t stop, start; + + start = ktime_get(); + stop = ktime_add(start, ms_to_ktime(SEC_ALIVENESS_WAIT_TIMEOUT)); + do { + hw->aliveness = mei_txe_aliveness_get(dev); + if (hw->aliveness == expected) { + dev->pg_event = MEI_PG_EVENT_IDLE; + dev_dbg(dev->dev, "aliveness settled after %lld usecs\n", + ktime_to_us(ktime_sub(ktime_get(), start))); + return 0; + } + usleep_range(20, 50); + } while (ktime_before(ktime_get(), stop)); + + dev->pg_event = MEI_PG_EVENT_IDLE; + dev_err(dev->dev, "aliveness timed out\n"); + return -ETIME; +} + +/** + * mei_txe_aliveness_wait - waits for aliveness to settle + * + * @dev: the device structure + * @expected: expected aliveness value + * + * Waits for HICR_HOST_ALIVENESS_RESP.ALIVENESS_RESP to be set + * + * Return: 0 on success and < 0 otherwise + */ +static int mei_txe_aliveness_wait(struct mei_device *dev, u32 expected) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + const unsigned long timeout = + msecs_to_jiffies(SEC_ALIVENESS_WAIT_TIMEOUT); + long err; + int ret; + + hw->aliveness = mei_txe_aliveness_get(dev); + if (hw->aliveness == expected) + return 0; + + mutex_unlock(&dev->device_lock); + err = wait_event_timeout(hw->wait_aliveness_resp, + dev->pg_event == MEI_PG_EVENT_RECEIVED, timeout); + mutex_lock(&dev->device_lock); + + hw->aliveness = mei_txe_aliveness_get(dev); + ret = hw->aliveness == expected ? 0 : -ETIME; + + if (ret) + dev_warn(dev->dev, "aliveness timed out = %ld aliveness = %d event = %d\n", + err, hw->aliveness, dev->pg_event); + else + dev_dbg(dev->dev, "aliveness settled after = %d msec aliveness = %d event = %d\n", + jiffies_to_msecs(timeout - err), + hw->aliveness, dev->pg_event); + + dev->pg_event = MEI_PG_EVENT_IDLE; + return ret; +} + +/** + * mei_txe_aliveness_set_sync - sets an wait for aliveness to complete + * + * @dev: the device structure + * @req: requested aliveness value + * + * Return: 0 on success and < 0 otherwise + */ +int mei_txe_aliveness_set_sync(struct mei_device *dev, u32 req) +{ + if (mei_txe_aliveness_set(dev, req)) + return mei_txe_aliveness_wait(dev, req); + return 0; +} + +/** + * mei_txe_pg_in_transition - is device now in pg transition + * + * @dev: the device structure + * + * Return: true if in pg transition, false otherwise + */ +static bool mei_txe_pg_in_transition(struct mei_device *dev) +{ + return dev->pg_event == MEI_PG_EVENT_WAIT; +} + +/** + * mei_txe_pg_is_enabled - detect if PG is supported by HW + * + * @dev: the device structure + * + * Return: true is pg supported, false otherwise + */ +static bool mei_txe_pg_is_enabled(struct mei_device *dev) +{ + return true; +} + +/** + * mei_txe_pg_state - translate aliveness register value + * to the mei power gating state + * + * @dev: the device structure + * + * Return: MEI_PG_OFF if aliveness is on and MEI_PG_ON otherwise + */ +static inline enum mei_pg_state mei_txe_pg_state(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + return hw->aliveness ? MEI_PG_OFF : MEI_PG_ON; +} + +/** + * mei_txe_input_ready_interrupt_enable - sets the Input Ready Interrupt + * + * @dev: the device structure + */ +static void mei_txe_input_ready_interrupt_enable(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + u32 hintmsk; + /* Enable the SEC_IPC_HOST_INT_MASK_IN_RDY interrupt */ + hintmsk = mei_txe_sec_reg_read(hw, SEC_IPC_HOST_INT_MASK_REG); + hintmsk |= SEC_IPC_HOST_INT_MASK_IN_RDY; + mei_txe_sec_reg_write(hw, SEC_IPC_HOST_INT_MASK_REG, hintmsk); +} + +/** + * mei_txe_input_doorbell_set - sets bit 0 in + * SEC_IPC_INPUT_DOORBELL.IPC_INPUT_DOORBELL. + * + * @hw: the txe hardware structure + */ +static void mei_txe_input_doorbell_set(struct mei_txe_hw *hw) +{ + /* Clear the interrupt cause */ + clear_bit(TXE_INTR_IN_READY_BIT, &hw->intr_cause); + mei_txe_sec_reg_write(hw, SEC_IPC_INPUT_DOORBELL_REG, 1); +} + +/** + * mei_txe_output_ready_set - Sets the SICR_SEC_IPC_OUTPUT_STATUS bit to 1 + * + * @hw: the txe hardware structure + */ +static void mei_txe_output_ready_set(struct mei_txe_hw *hw) +{ + mei_txe_br_reg_write(hw, + SICR_SEC_IPC_OUTPUT_STATUS_REG, + SEC_IPC_OUTPUT_STATUS_RDY); +} + +/** + * mei_txe_is_input_ready - check if TXE is ready for receiving data + * + * @dev: the device structure + * + * Return: true if INPUT STATUS READY bit is set + */ +static bool mei_txe_is_input_ready(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + u32 status; + + status = mei_txe_sec_reg_read(hw, SEC_IPC_INPUT_STATUS_REG); + return !!(SEC_IPC_INPUT_STATUS_RDY & status); +} + +/** + * mei_txe_intr_clear - clear all interrupts + * + * @dev: the device structure + */ +static inline void mei_txe_intr_clear(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + mei_txe_sec_reg_write_silent(hw, SEC_IPC_HOST_INT_STATUS_REG, + SEC_IPC_HOST_INT_STATUS_PENDING); + mei_txe_br_reg_write(hw, HISR_REG, HISR_INT_STS_MSK); + mei_txe_br_reg_write(hw, HHISR_REG, IPC_HHIER_MSK); +} + +/** + * mei_txe_intr_disable - disable all interrupts + * + * @dev: the device structure + */ +static void mei_txe_intr_disable(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + mei_txe_br_reg_write(hw, HHIER_REG, 0); + mei_txe_br_reg_write(hw, HIER_REG, 0); +} +/** + * mei_txe_intr_enable - enable all interrupts + * + * @dev: the device structure + */ +static void mei_txe_intr_enable(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + mei_txe_br_reg_write(hw, HHIER_REG, IPC_HHIER_MSK); + mei_txe_br_reg_write(hw, HIER_REG, HIER_INT_EN_MSK); +} + +/** + * mei_txe_synchronize_irq - wait for pending IRQ handlers + * + * @dev: the device structure + */ +static void mei_txe_synchronize_irq(struct mei_device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + + synchronize_irq(pdev->irq); +} + +/** + * mei_txe_pending_interrupts - check if there are pending interrupts + * only Aliveness, Input ready, and output doorbell are of relevance + * + * @dev: the device structure + * + * Checks if there are pending interrupts + * only Aliveness, Readiness, Input ready, and Output doorbell are relevant + * + * Return: true if there are pending interrupts + */ +static bool mei_txe_pending_interrupts(struct mei_device *dev) +{ + + struct mei_txe_hw *hw = to_txe_hw(dev); + bool ret = (hw->intr_cause & (TXE_INTR_READINESS | + TXE_INTR_ALIVENESS | + TXE_INTR_IN_READY | + TXE_INTR_OUT_DB)); + + if (ret) { + dev_dbg(dev->dev, + "Pending Interrupts InReady=%01d Readiness=%01d, Aliveness=%01d, OutDoor=%01d\n", + !!(hw->intr_cause & TXE_INTR_IN_READY), + !!(hw->intr_cause & TXE_INTR_READINESS), + !!(hw->intr_cause & TXE_INTR_ALIVENESS), + !!(hw->intr_cause & TXE_INTR_OUT_DB)); + } + return ret; +} + +/** + * mei_txe_input_payload_write - write a dword to the host buffer + * at offset idx + * + * @dev: the device structure + * @idx: index in the host buffer + * @value: value + */ +static void mei_txe_input_payload_write(struct mei_device *dev, + unsigned long idx, u32 value) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + mei_txe_sec_reg_write(hw, SEC_IPC_INPUT_PAYLOAD_REG + + (idx * sizeof(u32)), value); +} + +/** + * mei_txe_out_data_read - read dword from the device buffer + * at offset idx + * + * @dev: the device structure + * @idx: index in the device buffer + * + * Return: register value at index + */ +static u32 mei_txe_out_data_read(const struct mei_device *dev, + unsigned long idx) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + return mei_txe_br_reg_read(hw, + BRIDGE_IPC_OUTPUT_PAYLOAD_REG + (idx * sizeof(u32))); +} + +/* Readiness */ + +/** + * mei_txe_readiness_set_host_rdy - set host readiness bit + * + * @dev: the device structure + */ +static void mei_txe_readiness_set_host_rdy(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + mei_txe_br_reg_write(hw, + SICR_HOST_IPC_READINESS_REQ_REG, + SICR_HOST_IPC_READINESS_HOST_RDY); +} + +/** + * mei_txe_readiness_clear - clear host readiness bit + * + * @dev: the device structure + */ +static void mei_txe_readiness_clear(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + mei_txe_br_reg_write(hw, SICR_HOST_IPC_READINESS_REQ_REG, + SICR_HOST_IPC_READINESS_RDY_CLR); +} +/** + * mei_txe_readiness_get - Reads and returns + * the HICR_SEC_IPC_READINESS register value + * + * @dev: the device structure + * + * Return: the HICR_SEC_IPC_READINESS register value + */ +static u32 mei_txe_readiness_get(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + return mei_txe_br_reg_read(hw, HICR_SEC_IPC_READINESS_REG); +} + + +/** + * mei_txe_readiness_is_sec_rdy - check readiness + * for HICR_SEC_IPC_READINESS_SEC_RDY + * + * @readiness: cached readiness state + * + * Return: true if readiness bit is set + */ +static inline bool mei_txe_readiness_is_sec_rdy(u32 readiness) +{ + return !!(readiness & HICR_SEC_IPC_READINESS_SEC_RDY); +} + +/** + * mei_txe_hw_is_ready - check if the hw is ready + * + * @dev: the device structure + * + * Return: true if sec is ready + */ +static bool mei_txe_hw_is_ready(struct mei_device *dev) +{ + u32 readiness = mei_txe_readiness_get(dev); + + return mei_txe_readiness_is_sec_rdy(readiness); +} + +/** + * mei_txe_host_is_ready - check if the host is ready + * + * @dev: the device structure + * + * Return: true if host is ready + */ +static inline bool mei_txe_host_is_ready(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + u32 reg = mei_txe_br_reg_read(hw, HICR_SEC_IPC_READINESS_REG); + + return !!(reg & HICR_SEC_IPC_READINESS_HOST_RDY); +} + +/** + * mei_txe_readiness_wait - wait till readiness settles + * + * @dev: the device structure + * + * Return: 0 on success and -ETIME on timeout + */ +static int mei_txe_readiness_wait(struct mei_device *dev) +{ + if (mei_txe_hw_is_ready(dev)) + return 0; + + mutex_unlock(&dev->device_lock); + wait_event_timeout(dev->wait_hw_ready, dev->recvd_hw_ready, + msecs_to_jiffies(SEC_RESET_WAIT_TIMEOUT)); + mutex_lock(&dev->device_lock); + if (!dev->recvd_hw_ready) { + dev_err(dev->dev, "wait for readiness failed\n"); + return -ETIME; + } + + dev->recvd_hw_ready = false; + return 0; +} + +static const struct mei_fw_status mei_txe_fw_sts = { + .count = 2, + .status[0] = PCI_CFG_TXE_FW_STS0, + .status[1] = PCI_CFG_TXE_FW_STS1 +}; + +/** + * mei_txe_fw_status - read fw status register from pci config space + * + * @dev: mei device + * @fw_status: fw status register values + * + * Return: 0 on success, error otherwise + */ +static int mei_txe_fw_status(struct mei_device *dev, + struct mei_fw_status *fw_status) +{ + const struct mei_fw_status *fw_src = &mei_txe_fw_sts; + struct pci_dev *pdev = to_pci_dev(dev->dev); + int ret; + int i; + + if (!fw_status) + return -EINVAL; + + fw_status->count = fw_src->count; + for (i = 0; i < fw_src->count && i < MEI_FW_STATUS_MAX; i++) { + ret = pci_read_config_dword(pdev, fw_src->status[i], + &fw_status->status[i]); + trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HSF_X", + fw_src->status[i], + fw_status->status[i]); + if (ret) + return ret; + } + + return 0; +} + +/** + * mei_txe_hw_config - configure hardware at the start of the devices + * + * @dev: the device structure + * + * Configure hardware at the start of the device should be done only + * once at the device probe time + */ +static void mei_txe_hw_config(struct mei_device *dev) +{ + + struct mei_txe_hw *hw = to_txe_hw(dev); + + hw->aliveness = mei_txe_aliveness_get(dev); + hw->readiness = mei_txe_readiness_get(dev); + + dev_dbg(dev->dev, "aliveness_resp = 0x%08x, readiness = 0x%08x.\n", + hw->aliveness, hw->readiness); +} + +/** + * mei_txe_write - writes a message to device. + * + * @dev: the device structure + * @hdr: header of message + * @hdr_len: header length in bytes - must multiplication of a slot (4bytes) + * @data: payload + * @data_len: paylead length in bytes + * + * Return: 0 if success, < 0 - otherwise. + */ +static int mei_txe_write(struct mei_device *dev, + const void *hdr, size_t hdr_len, + const void *data, size_t data_len) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + unsigned long rem; + const u32 *reg_buf; + u32 slots = TXE_HBUF_DEPTH; + u32 dw_cnt; + unsigned long i, j; + + if (WARN_ON(!hdr || !data || hdr_len & 0x3)) + return -EINVAL; + + dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM((struct mei_msg_hdr *)hdr)); + + dw_cnt = mei_data2slots(hdr_len + data_len); + if (dw_cnt > slots) + return -EMSGSIZE; + + if (WARN(!hw->aliveness, "txe write: aliveness not asserted\n")) + return -EAGAIN; + + /* Enable Input Ready Interrupt. */ + mei_txe_input_ready_interrupt_enable(dev); + + if (!mei_txe_is_input_ready(dev)) { + char fw_sts_str[MEI_FW_STATUS_STR_SZ]; + + mei_fw_status_str(dev, fw_sts_str, MEI_FW_STATUS_STR_SZ); + dev_err(dev->dev, "Input is not ready %s\n", fw_sts_str); + return -EAGAIN; + } + + reg_buf = hdr; + for (i = 0; i < hdr_len / MEI_SLOT_SIZE; i++) + mei_txe_input_payload_write(dev, i, reg_buf[i]); + + reg_buf = data; + for (j = 0; j < data_len / MEI_SLOT_SIZE; j++) + mei_txe_input_payload_write(dev, i + j, reg_buf[j]); + + rem = data_len & 0x3; + if (rem > 0) { + u32 reg = 0; + + memcpy(®, (const u8 *)data + data_len - rem, rem); + mei_txe_input_payload_write(dev, i + j, reg); + } + + /* after each write the whole buffer is consumed */ + hw->slots = 0; + + /* Set Input-Doorbell */ + mei_txe_input_doorbell_set(hw); + + return 0; +} + +/** + * mei_txe_hbuf_depth - mimics the me hbuf circular buffer + * + * @dev: the device structure + * + * Return: the TXE_HBUF_DEPTH + */ +static u32 mei_txe_hbuf_depth(const struct mei_device *dev) +{ + return TXE_HBUF_DEPTH; +} + +/** + * mei_txe_hbuf_empty_slots - mimics the me hbuf circular buffer + * + * @dev: the device structure + * + * Return: always TXE_HBUF_DEPTH + */ +static int mei_txe_hbuf_empty_slots(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + return hw->slots; +} + +/** + * mei_txe_count_full_read_slots - mimics the me device circular buffer + * + * @dev: the device structure + * + * Return: always buffer size in dwords count + */ +static int mei_txe_count_full_read_slots(struct mei_device *dev) +{ + /* read buffers has static size */ + return TXE_HBUF_DEPTH; +} + +/** + * mei_txe_read_hdr - read message header which is always in 4 first bytes + * + * @dev: the device structure + * + * Return: mei message header + */ + +static u32 mei_txe_read_hdr(const struct mei_device *dev) +{ + return mei_txe_out_data_read(dev, 0); +} +/** + * mei_txe_read - reads a message from the txe device. + * + * @dev: the device structure + * @buf: message buffer will be written + * @len: message size will be read + * + * Return: -EINVAL on error wrong argument and 0 on success + */ +static int mei_txe_read(struct mei_device *dev, + unsigned char *buf, unsigned long len) +{ + + struct mei_txe_hw *hw = to_txe_hw(dev); + u32 *reg_buf, reg; + u32 rem; + u32 i; + + if (WARN_ON(!buf || !len)) + return -EINVAL; + + reg_buf = (u32 *)buf; + rem = len & 0x3; + + dev_dbg(dev->dev, "buffer-length = %lu buf[0]0x%08X\n", + len, mei_txe_out_data_read(dev, 0)); + + for (i = 0; i < len / MEI_SLOT_SIZE; i++) { + /* skip header: index starts from 1 */ + reg = mei_txe_out_data_read(dev, i + 1); + dev_dbg(dev->dev, "buf[%d] = 0x%08X\n", i, reg); + *reg_buf++ = reg; + } + + if (rem) { + reg = mei_txe_out_data_read(dev, i + 1); + memcpy(reg_buf, ®, rem); + } + + mei_txe_output_ready_set(hw); + return 0; +} + +/** + * mei_txe_hw_reset - resets host and fw. + * + * @dev: the device structure + * @intr_enable: if interrupt should be enabled after reset. + * + * Return: 0 on success and < 0 in case of error + */ +static int mei_txe_hw_reset(struct mei_device *dev, bool intr_enable) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + u32 aliveness_req; + /* + * read input doorbell to ensure consistency between Bridge and SeC + * return value might be garbage return + */ + (void)mei_txe_sec_reg_read_silent(hw, SEC_IPC_INPUT_DOORBELL_REG); + + aliveness_req = mei_txe_aliveness_req_get(dev); + hw->aliveness = mei_txe_aliveness_get(dev); + + /* Disable interrupts in this stage we will poll */ + mei_txe_intr_disable(dev); + + /* + * If Aliveness Request and Aliveness Response are not equal then + * wait for them to be equal + * Since we might have interrupts disabled - poll for it + */ + if (aliveness_req != hw->aliveness) + if (mei_txe_aliveness_poll(dev, aliveness_req) < 0) { + dev_err(dev->dev, "wait for aliveness settle failed ... bailing out\n"); + return -EIO; + } + + /* + * If Aliveness Request and Aliveness Response are set then clear them + */ + if (aliveness_req) { + mei_txe_aliveness_set(dev, 0); + if (mei_txe_aliveness_poll(dev, 0) < 0) { + dev_err(dev->dev, "wait for aliveness failed ... bailing out\n"); + return -EIO; + } + } + + /* + * Set readiness RDY_CLR bit + */ + mei_txe_readiness_clear(dev); + + return 0; +} + +/** + * mei_txe_hw_start - start the hardware after reset + * + * @dev: the device structure + * + * Return: 0 on success an error code otherwise + */ +static int mei_txe_hw_start(struct mei_device *dev) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + int ret; + + u32 hisr; + + /* bring back interrupts */ + mei_txe_intr_enable(dev); + + ret = mei_txe_readiness_wait(dev); + if (ret < 0) { + dev_err(dev->dev, "waiting for readiness failed\n"); + return ret; + } + + /* + * If HISR.INT2_STS interrupt status bit is set then clear it. + */ + hisr = mei_txe_br_reg_read(hw, HISR_REG); + if (hisr & HISR_INT_2_STS) + mei_txe_br_reg_write(hw, HISR_REG, HISR_INT_2_STS); + + /* Clear the interrupt cause of OutputDoorbell */ + clear_bit(TXE_INTR_OUT_DB_BIT, &hw->intr_cause); + + ret = mei_txe_aliveness_set_sync(dev, 1); + if (ret < 0) { + dev_err(dev->dev, "wait for aliveness failed ... bailing out\n"); + return ret; + } + + pm_runtime_set_active(dev->dev); + + /* enable input ready interrupts: + * SEC_IPC_HOST_INT_MASK.IPC_INPUT_READY_INT_MASK + */ + mei_txe_input_ready_interrupt_enable(dev); + + + /* Set the SICR_SEC_IPC_OUTPUT_STATUS.IPC_OUTPUT_READY bit */ + mei_txe_output_ready_set(hw); + + /* Set bit SICR_HOST_IPC_READINESS.HOST_RDY + */ + mei_txe_readiness_set_host_rdy(dev); + + return 0; +} + +/** + * mei_txe_check_and_ack_intrs - translate multi BAR interrupt into + * single bit mask and acknowledge the interrupts + * + * @dev: the device structure + * @do_ack: acknowledge interrupts + * + * Return: true if found interrupts to process. + */ +static bool mei_txe_check_and_ack_intrs(struct mei_device *dev, bool do_ack) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + u32 hisr; + u32 hhisr; + u32 ipc_isr; + u32 aliveness; + bool generated; + + /* read interrupt registers */ + hhisr = mei_txe_br_reg_read(hw, HHISR_REG); + generated = (hhisr & IPC_HHIER_MSK); + if (!generated) + goto out; + + hisr = mei_txe_br_reg_read(hw, HISR_REG); + + aliveness = mei_txe_aliveness_get(dev); + if (hhisr & IPC_HHIER_SEC && aliveness) { + ipc_isr = mei_txe_sec_reg_read_silent(hw, + SEC_IPC_HOST_INT_STATUS_REG); + } else { + ipc_isr = 0; + hhisr &= ~IPC_HHIER_SEC; + } + + generated = generated || + (hisr & HISR_INT_STS_MSK) || + (ipc_isr & SEC_IPC_HOST_INT_STATUS_PENDING); + + if (generated && do_ack) { + /* Save the interrupt causes */ + hw->intr_cause |= hisr & HISR_INT_STS_MSK; + if (ipc_isr & SEC_IPC_HOST_INT_STATUS_IN_RDY) + hw->intr_cause |= TXE_INTR_IN_READY; + + + mei_txe_intr_disable(dev); + /* Clear the interrupts in hierarchy: + * IPC and Bridge, than the High Level */ + mei_txe_sec_reg_write_silent(hw, + SEC_IPC_HOST_INT_STATUS_REG, ipc_isr); + mei_txe_br_reg_write(hw, HISR_REG, hisr); + mei_txe_br_reg_write(hw, HHISR_REG, hhisr); + } + +out: + return generated; +} + +/** + * mei_txe_irq_quick_handler - The ISR of the MEI device + * + * @irq: The irq number + * @dev_id: pointer to the device structure + * + * Return: IRQ_WAKE_THREAD if interrupt is designed for the device + * IRQ_NONE otherwise + */ +irqreturn_t mei_txe_irq_quick_handler(int irq, void *dev_id) +{ + struct mei_device *dev = dev_id; + + if (mei_txe_check_and_ack_intrs(dev, true)) + return IRQ_WAKE_THREAD; + return IRQ_NONE; +} + + +/** + * mei_txe_irq_thread_handler - txe interrupt thread + * + * @irq: The irq number + * @dev_id: pointer to the device structure + * + * Return: IRQ_HANDLED + */ +irqreturn_t mei_txe_irq_thread_handler(int irq, void *dev_id) +{ + struct mei_device *dev = (struct mei_device *) dev_id; + struct mei_txe_hw *hw = to_txe_hw(dev); + struct list_head cmpl_list; + s32 slots; + int rets = 0; + + dev_dbg(dev->dev, "irq thread: Interrupt Registers HHISR|HISR|SEC=%02X|%04X|%02X\n", + mei_txe_br_reg_read(hw, HHISR_REG), + mei_txe_br_reg_read(hw, HISR_REG), + mei_txe_sec_reg_read_silent(hw, SEC_IPC_HOST_INT_STATUS_REG)); + + + /* initialize our complete list */ + mutex_lock(&dev->device_lock); + INIT_LIST_HEAD(&cmpl_list); + + if (pci_dev_msi_enabled(to_pci_dev(dev->dev))) + mei_txe_check_and_ack_intrs(dev, true); + + /* show irq events */ + mei_txe_pending_interrupts(dev); + + hw->aliveness = mei_txe_aliveness_get(dev); + hw->readiness = mei_txe_readiness_get(dev); + + /* Readiness: + * Detection of TXE driver going through reset + * or TXE driver resetting the HECI interface. + */ + if (test_and_clear_bit(TXE_INTR_READINESS_BIT, &hw->intr_cause)) { + dev_dbg(dev->dev, "Readiness Interrupt was received...\n"); + + /* Check if SeC is going through reset */ + if (mei_txe_readiness_is_sec_rdy(hw->readiness)) { + dev_dbg(dev->dev, "we need to start the dev.\n"); + dev->recvd_hw_ready = true; + } else { + dev->recvd_hw_ready = false; + if (dev->dev_state != MEI_DEV_RESETTING) { + + dev_warn(dev->dev, "FW not ready: resetting.\n"); + schedule_work(&dev->reset_work); + goto end; + + } + } + wake_up(&dev->wait_hw_ready); + } + + /************************************************************/ + /* Check interrupt cause: + * Aliveness: Detection of SeC acknowledge of host request that + * it remain alive or host cancellation of that request. + */ + + if (test_and_clear_bit(TXE_INTR_ALIVENESS_BIT, &hw->intr_cause)) { + /* Clear the interrupt cause */ + dev_dbg(dev->dev, + "Aliveness Interrupt: Status: %d\n", hw->aliveness); + dev->pg_event = MEI_PG_EVENT_RECEIVED; + if (waitqueue_active(&hw->wait_aliveness_resp)) + wake_up(&hw->wait_aliveness_resp); + } + + + /* Output Doorbell: + * Detection of SeC having sent output to host + */ + slots = mei_count_full_read_slots(dev); + if (test_and_clear_bit(TXE_INTR_OUT_DB_BIT, &hw->intr_cause)) { + /* Read from TXE */ + rets = mei_irq_read_handler(dev, &cmpl_list, &slots); + if (rets && + (dev->dev_state != MEI_DEV_RESETTING && + dev->dev_state != MEI_DEV_POWER_DOWN)) { + dev_err(dev->dev, + "mei_irq_read_handler ret = %d.\n", rets); + + schedule_work(&dev->reset_work); + goto end; + } + } + /* Input Ready: Detection if host can write to SeC */ + if (test_and_clear_bit(TXE_INTR_IN_READY_BIT, &hw->intr_cause)) { + dev->hbuf_is_ready = true; + hw->slots = TXE_HBUF_DEPTH; + } + + if (hw->aliveness && dev->hbuf_is_ready) { + /* get the real register value */ + dev->hbuf_is_ready = mei_hbuf_is_ready(dev); + rets = mei_irq_write_handler(dev, &cmpl_list); + if (rets && rets != -EMSGSIZE) + dev_err(dev->dev, "mei_irq_write_handler ret = %d.\n", + rets); + dev->hbuf_is_ready = mei_hbuf_is_ready(dev); + } + + mei_irq_compl_handler(dev, &cmpl_list); + +end: + dev_dbg(dev->dev, "interrupt thread end ret = %d\n", rets); + + mutex_unlock(&dev->device_lock); + + mei_enable_interrupts(dev); + return IRQ_HANDLED; +} + +static const struct mei_hw_ops mei_txe_hw_ops = { + + .host_is_ready = mei_txe_host_is_ready, + + .fw_status = mei_txe_fw_status, + .pg_state = mei_txe_pg_state, + + .hw_is_ready = mei_txe_hw_is_ready, + .hw_reset = mei_txe_hw_reset, + .hw_config = mei_txe_hw_config, + .hw_start = mei_txe_hw_start, + + .pg_in_transition = mei_txe_pg_in_transition, + .pg_is_enabled = mei_txe_pg_is_enabled, + + .intr_clear = mei_txe_intr_clear, + .intr_enable = mei_txe_intr_enable, + .intr_disable = mei_txe_intr_disable, + .synchronize_irq = mei_txe_synchronize_irq, + + .hbuf_free_slots = mei_txe_hbuf_empty_slots, + .hbuf_is_ready = mei_txe_is_input_ready, + .hbuf_depth = mei_txe_hbuf_depth, + + .write = mei_txe_write, + + .rdbuf_full_slots = mei_txe_count_full_read_slots, + .read_hdr = mei_txe_read_hdr, + + .read = mei_txe_read, + +}; + +/** + * mei_txe_dev_init - allocates and initializes txe hardware specific structure + * + * @pdev: pci device + * + * Return: struct mei_device * on success or NULL + */ +struct mei_device *mei_txe_dev_init(struct pci_dev *pdev) +{ + struct mei_device *dev; + struct mei_txe_hw *hw; + + dev = devm_kzalloc(&pdev->dev, sizeof(struct mei_device) + + sizeof(struct mei_txe_hw), GFP_KERNEL); + if (!dev) + return NULL; + + mei_device_init(dev, &pdev->dev, &mei_txe_hw_ops); + + hw = to_txe_hw(dev); + + init_waitqueue_head(&hw->wait_aliveness_resp); + + return dev; +} + +/** + * mei_txe_setup_satt2 - SATT2 configuration for DMA support. + * + * @dev: the device structure + * @addr: physical address start of the range + * @range: physical range size + * + * Return: 0 on success an error code otherwise + */ +int mei_txe_setup_satt2(struct mei_device *dev, phys_addr_t addr, u32 range) +{ + struct mei_txe_hw *hw = to_txe_hw(dev); + + u32 lo32 = lower_32_bits(addr); + u32 hi32 = upper_32_bits(addr); + u32 ctrl; + + /* SATT is limited to 36 Bits */ + if (hi32 & ~0xF) + return -EINVAL; + + /* SATT has to be 16Byte aligned */ + if (lo32 & 0xF) + return -EINVAL; + + /* SATT range has to be 4Bytes aligned */ + if (range & 0x4) + return -EINVAL; + + /* SATT is limited to 32 MB range*/ + if (range > SATT_RANGE_MAX) + return -EINVAL; + + ctrl = SATT2_CTRL_VALID_MSK; + ctrl |= hi32 << SATT2_CTRL_BR_BASE_ADDR_REG_SHIFT; + + mei_txe_br_reg_write(hw, SATT2_SAP_SIZE_REG, range); + mei_txe_br_reg_write(hw, SATT2_BRG_BA_LSB_REG, lo32); + mei_txe_br_reg_write(hw, SATT2_CTRL_REG, ctrl); + dev_dbg(dev->dev, "SATT2: SAP_SIZE_OFFSET=0x%08X, BRG_BA_LSB_OFFSET=0x%08X, CTRL_OFFSET=0x%08X\n", + range, lo32, ctrl); + + return 0; +} diff --git a/drivers/misc/mei/hw-txe.h b/drivers/misc/mei/hw-txe.h new file mode 100644 index 000000000..e1e8b66d7 --- /dev/null +++ b/drivers/misc/mei/hw-txe.h @@ -0,0 +1,75 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _MEI_HW_TXE_H_ +#define _MEI_HW_TXE_H_ + +#include + +#include "hw.h" +#include "hw-txe-regs.h" + +#define MEI_TXI_RPM_TIMEOUT 500 /* ms */ + +/* Flatten Hierarchy interrupt cause */ +#define TXE_INTR_READINESS_BIT 0 /* HISR_INT_0_STS */ +#define TXE_INTR_READINESS HISR_INT_0_STS +#define TXE_INTR_ALIVENESS_BIT 1 /* HISR_INT_1_STS */ +#define TXE_INTR_ALIVENESS HISR_INT_1_STS +#define TXE_INTR_OUT_DB_BIT 2 /* HISR_INT_2_STS */ +#define TXE_INTR_OUT_DB HISR_INT_2_STS +#define TXE_INTR_IN_READY_BIT 8 /* beyond HISR */ +#define TXE_INTR_IN_READY BIT(8) + +/** + * struct mei_txe_hw - txe hardware specifics + * + * @mem_addr: SeC and BRIDGE bars + * @aliveness: aliveness (power gating) state of the hardware + * @readiness: readiness state of the hardware + * @slots: number of empty slots + * @wait_aliveness_resp: aliveness wait queue + * @intr_cause: translated interrupt cause + */ +struct mei_txe_hw { + void __iomem * const *mem_addr; + u32 aliveness; + u32 readiness; + u32 slots; + + wait_queue_head_t wait_aliveness_resp; + + unsigned long intr_cause; +}; + +#define to_txe_hw(dev) (struct mei_txe_hw *)((dev)->hw) + +static inline struct mei_device *hw_txe_to_mei(struct mei_txe_hw *hw) +{ + return container_of((void *)hw, struct mei_device, hw); +} + +struct mei_device *mei_txe_dev_init(struct pci_dev *pdev); + +irqreturn_t mei_txe_irq_quick_handler(int irq, void *dev_id); +irqreturn_t mei_txe_irq_thread_handler(int irq, void *dev_id); + +int mei_txe_aliveness_set_sync(struct mei_device *dev, u32 req); + +int mei_txe_setup_satt2(struct mei_device *dev, phys_addr_t addr, u32 range); + + +#endif /* _MEI_HW_TXE_H_ */ diff --git a/drivers/misc/mei/hw.h b/drivers/misc/mei/hw.h new file mode 100644 index 000000000..656559257 --- /dev/null +++ b/drivers/misc/mei/hw.h @@ -0,0 +1,515 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _MEI_HW_TYPES_H_ +#define _MEI_HW_TYPES_H_ + +#include + +/* + * Timeouts in Seconds + */ +#define MEI_HW_READY_TIMEOUT 2 /* Timeout on ready message */ +#define MEI_CONNECT_TIMEOUT 3 /* HPS: at least 2 seconds */ + +#define MEI_CL_CONNECT_TIMEOUT 15 /* HPS: Client Connect Timeout */ +#define MEI_CLIENTS_INIT_TIMEOUT 15 /* HPS: Clients Enumeration Timeout */ + +#define MEI_PGI_TIMEOUT 1 /* PG Isolation time response 1 sec */ +#define MEI_D0I3_TIMEOUT 5 /* D0i3 set/unset max response time */ +#define MEI_HBM_TIMEOUT 1 /* 1 second */ + +/* + * MEI Version + */ +#define HBM_MINOR_VERSION 0 +#define HBM_MAJOR_VERSION 2 + +/* + * MEI version with PGI support + */ +#define HBM_MINOR_VERSION_PGI 1 +#define HBM_MAJOR_VERSION_PGI 1 + +/* + * MEI version with Dynamic clients support + */ +#define HBM_MINOR_VERSION_DC 0 +#define HBM_MAJOR_VERSION_DC 2 + +/* + * MEI version with immediate reply to enum request support + */ +#define HBM_MINOR_VERSION_IE 0 +#define HBM_MAJOR_VERSION_IE 2 + +/* + * MEI version with disconnect on connection timeout support + */ +#define HBM_MINOR_VERSION_DOT 0 +#define HBM_MAJOR_VERSION_DOT 2 + +/* + * MEI version with notification support + */ +#define HBM_MINOR_VERSION_EV 0 +#define HBM_MAJOR_VERSION_EV 2 + +/* + * MEI version with fixed address client support + */ +#define HBM_MINOR_VERSION_FA 0 +#define HBM_MAJOR_VERSION_FA 2 + +/* + * MEI version with OS ver message support + */ +#define HBM_MINOR_VERSION_OS 0 +#define HBM_MAJOR_VERSION_OS 2 + +/* + * MEI version with dma ring support + */ +#define HBM_MINOR_VERSION_DR 1 +#define HBM_MAJOR_VERSION_DR 2 + +/* Host bus message command opcode */ +#define MEI_HBM_CMD_OP_MSK 0x7f +/* Host bus message command RESPONSE */ +#define MEI_HBM_CMD_RES_MSK 0x80 + +/* + * MEI Bus Message Command IDs + */ +#define HOST_START_REQ_CMD 0x01 +#define HOST_START_RES_CMD 0x81 + +#define HOST_STOP_REQ_CMD 0x02 +#define HOST_STOP_RES_CMD 0x82 + +#define ME_STOP_REQ_CMD 0x03 + +#define HOST_ENUM_REQ_CMD 0x04 +#define HOST_ENUM_RES_CMD 0x84 + +#define HOST_CLIENT_PROPERTIES_REQ_CMD 0x05 +#define HOST_CLIENT_PROPERTIES_RES_CMD 0x85 + +#define CLIENT_CONNECT_REQ_CMD 0x06 +#define CLIENT_CONNECT_RES_CMD 0x86 + +#define CLIENT_DISCONNECT_REQ_CMD 0x07 +#define CLIENT_DISCONNECT_RES_CMD 0x87 + +#define MEI_FLOW_CONTROL_CMD 0x08 + +#define MEI_PG_ISOLATION_ENTRY_REQ_CMD 0x0a +#define MEI_PG_ISOLATION_ENTRY_RES_CMD 0x8a +#define MEI_PG_ISOLATION_EXIT_REQ_CMD 0x0b +#define MEI_PG_ISOLATION_EXIT_RES_CMD 0x8b + +#define MEI_HBM_ADD_CLIENT_REQ_CMD 0x0f +#define MEI_HBM_ADD_CLIENT_RES_CMD 0x8f + +#define MEI_HBM_NOTIFY_REQ_CMD 0x10 +#define MEI_HBM_NOTIFY_RES_CMD 0x90 +#define MEI_HBM_NOTIFICATION_CMD 0x11 + +#define MEI_HBM_DMA_SETUP_REQ_CMD 0x12 +#define MEI_HBM_DMA_SETUP_RES_CMD 0x92 + +/* + * MEI Stop Reason + * used by hbm_host_stop_request.reason + */ +enum mei_stop_reason_types { + DRIVER_STOP_REQUEST = 0x00, + DEVICE_D1_ENTRY = 0x01, + DEVICE_D2_ENTRY = 0x02, + DEVICE_D3_ENTRY = 0x03, + SYSTEM_S1_ENTRY = 0x04, + SYSTEM_S2_ENTRY = 0x05, + SYSTEM_S3_ENTRY = 0x06, + SYSTEM_S4_ENTRY = 0x07, + SYSTEM_S5_ENTRY = 0x08 +}; + + +/** + * enum mei_hbm_status - mei host bus messages return values + * + * @MEI_HBMS_SUCCESS : status success + * @MEI_HBMS_CLIENT_NOT_FOUND : client not found + * @MEI_HBMS_ALREADY_EXISTS : connection already established + * @MEI_HBMS_REJECTED : connection is rejected + * @MEI_HBMS_INVALID_PARAMETER : invalid parameter + * @MEI_HBMS_NOT_ALLOWED : operation not allowed + * @MEI_HBMS_ALREADY_STARTED : system is already started + * @MEI_HBMS_NOT_STARTED : system not started + * + * @MEI_HBMS_MAX : sentinel + */ +enum mei_hbm_status { + MEI_HBMS_SUCCESS = 0, + MEI_HBMS_CLIENT_NOT_FOUND = 1, + MEI_HBMS_ALREADY_EXISTS = 2, + MEI_HBMS_REJECTED = 3, + MEI_HBMS_INVALID_PARAMETER = 4, + MEI_HBMS_NOT_ALLOWED = 5, + MEI_HBMS_ALREADY_STARTED = 6, + MEI_HBMS_NOT_STARTED = 7, + + MEI_HBMS_MAX +}; + + +/* + * Client Connect Status + * used by hbm_client_connect_response.status + */ +enum mei_cl_connect_status { + MEI_CL_CONN_SUCCESS = MEI_HBMS_SUCCESS, + MEI_CL_CONN_NOT_FOUND = MEI_HBMS_CLIENT_NOT_FOUND, + MEI_CL_CONN_ALREADY_STARTED = MEI_HBMS_ALREADY_EXISTS, + MEI_CL_CONN_OUT_OF_RESOURCES = MEI_HBMS_REJECTED, + MEI_CL_CONN_MESSAGE_SMALL = MEI_HBMS_INVALID_PARAMETER, + MEI_CL_CONN_NOT_ALLOWED = MEI_HBMS_NOT_ALLOWED, +}; + +/* + * Client Disconnect Status + */ +enum mei_cl_disconnect_status { + MEI_CL_DISCONN_SUCCESS = MEI_HBMS_SUCCESS +}; + +/** + * struct mei_msg_hdr - MEI BUS Interface Section + * + * @me_addr: device address + * @host_addr: host address + * @length: message length + * @reserved: reserved + * @dma_ring: message is on dma ring + * @internal: message is internal + * @msg_complete: last packet of the message + */ +struct mei_msg_hdr { + u32 me_addr:8; + u32 host_addr:8; + u32 length:9; + u32 reserved:4; + u32 dma_ring:1; + u32 internal:1; + u32 msg_complete:1; +} __packed; + +struct mei_bus_message { + u8 hbm_cmd; + u8 data[0]; +} __packed; + +/** + * struct hbm_cl_cmd - client specific host bus command + * CONNECT, DISCONNECT, and FlOW CONTROL + * + * @hbm_cmd: bus message command header + * @me_addr: address of the client in ME + * @host_addr: address of the client in the driver + * @data: generic data + */ +struct mei_hbm_cl_cmd { + u8 hbm_cmd; + u8 me_addr; + u8 host_addr; + u8 data; +}; + +struct hbm_version { + u8 minor_version; + u8 major_version; +} __packed; + +struct hbm_host_version_request { + u8 hbm_cmd; + u8 reserved; + struct hbm_version host_version; +} __packed; + +struct hbm_host_version_response { + u8 hbm_cmd; + u8 host_version_supported; + struct hbm_version me_max_version; +} __packed; + +struct hbm_host_stop_request { + u8 hbm_cmd; + u8 reason; + u8 reserved[2]; +} __packed; + +struct hbm_host_stop_response { + u8 hbm_cmd; + u8 reserved[3]; +} __packed; + +struct hbm_me_stop_request { + u8 hbm_cmd; + u8 reason; + u8 reserved[2]; +} __packed; + +/** + * enum hbm_host_enum_flags - enumeration request flags (HBM version >= 2.0) + * + * @MEI_HBM_ENUM_F_ALLOW_ADD: allow dynamic clients add + * @MEI_HBM_ENUM_F_IMMEDIATE_ENUM: allow FW to send answer immediately + */ +enum hbm_host_enum_flags { + MEI_HBM_ENUM_F_ALLOW_ADD = BIT(0), + MEI_HBM_ENUM_F_IMMEDIATE_ENUM = BIT(1), +}; + +/** + * struct hbm_host_enum_request - enumeration request from host to fw + * + * @hbm_cmd : bus message command header + * @flags : request flags + * @reserved: reserved + */ +struct hbm_host_enum_request { + u8 hbm_cmd; + u8 flags; + u8 reserved[2]; +} __packed; + +struct hbm_host_enum_response { + u8 hbm_cmd; + u8 reserved[3]; + u8 valid_addresses[32]; +} __packed; + +struct mei_client_properties { + uuid_le protocol_name; + u8 protocol_version; + u8 max_number_of_connections; + u8 fixed_address; + u8 single_recv_buf; + u32 max_msg_length; +} __packed; + +struct hbm_props_request { + u8 hbm_cmd; + u8 me_addr; + u8 reserved[2]; +} __packed; + +struct hbm_props_response { + u8 hbm_cmd; + u8 me_addr; + u8 status; + u8 reserved[1]; + struct mei_client_properties client_properties; +} __packed; + +/** + * struct hbm_add_client_request - request to add a client + * might be sent by fw after enumeration has already completed + * + * @hbm_cmd: bus message command header + * @me_addr: address of the client in ME + * @reserved: reserved + * @client_properties: client properties + */ +struct hbm_add_client_request { + u8 hbm_cmd; + u8 me_addr; + u8 reserved[2]; + struct mei_client_properties client_properties; +} __packed; + +/** + * struct hbm_add_client_response - response to add a client + * sent by the host to report client addition status to fw + * + * @hbm_cmd: bus message command header + * @me_addr: address of the client in ME + * @status: if HBMS_SUCCESS then the client can now accept connections. + * @reserved: reserved + */ +struct hbm_add_client_response { + u8 hbm_cmd; + u8 me_addr; + u8 status; + u8 reserved[1]; +} __packed; + +/** + * struct hbm_power_gate - power gate request/response + * + * @hbm_cmd: bus message command header + * @reserved: reserved + */ +struct hbm_power_gate { + u8 hbm_cmd; + u8 reserved[3]; +} __packed; + +/** + * struct hbm_client_connect_request - connect/disconnect request + * + * @hbm_cmd: bus message command header + * @me_addr: address of the client in ME + * @host_addr: address of the client in the driver + * @reserved: reserved + */ +struct hbm_client_connect_request { + u8 hbm_cmd; + u8 me_addr; + u8 host_addr; + u8 reserved; +} __packed; + +/** + * struct hbm_client_connect_response - connect/disconnect response + * + * @hbm_cmd: bus message command header + * @me_addr: address of the client in ME + * @host_addr: address of the client in the driver + * @status: status of the request + */ +struct hbm_client_connect_response { + u8 hbm_cmd; + u8 me_addr; + u8 host_addr; + u8 status; +} __packed; + + +#define MEI_FC_MESSAGE_RESERVED_LENGTH 5 + +struct hbm_flow_control { + u8 hbm_cmd; + u8 me_addr; + u8 host_addr; + u8 reserved[MEI_FC_MESSAGE_RESERVED_LENGTH]; +} __packed; + +#define MEI_HBM_NOTIFICATION_START 1 +#define MEI_HBM_NOTIFICATION_STOP 0 +/** + * struct hbm_notification_request - start/stop notification request + * + * @hbm_cmd: bus message command header + * @me_addr: address of the client in ME + * @host_addr: address of the client in the driver + * @start: start = 1 or stop = 0 asynchronous notifications + */ +struct hbm_notification_request { + u8 hbm_cmd; + u8 me_addr; + u8 host_addr; + u8 start; +} __packed; + +/** + * struct hbm_notification_response - start/stop notification response + * + * @hbm_cmd: bus message command header + * @me_addr: address of the client in ME + * @host_addr: - address of the client in the driver + * @status: (mei_hbm_status) response status for the request + * - MEI_HBMS_SUCCESS: successful stop/start + * - MEI_HBMS_CLIENT_NOT_FOUND: if the connection could not be found. + * - MEI_HBMS_ALREADY_STARTED: for start requests for a previously + * started notification. + * - MEI_HBMS_NOT_STARTED: for stop request for a connected client for whom + * asynchronous notifications are currently disabled. + * + * @start: start = 1 or stop = 0 asynchronous notifications + * @reserved: reserved + */ +struct hbm_notification_response { + u8 hbm_cmd; + u8 me_addr; + u8 host_addr; + u8 status; + u8 start; + u8 reserved[3]; +} __packed; + +/** + * struct hbm_notification - notification event + * + * @hbm_cmd: bus message command header + * @me_addr: address of the client in ME + * @host_addr: address of the client in the driver + * @reserved: reserved for alignment + */ +struct hbm_notification { + u8 hbm_cmd; + u8 me_addr; + u8 host_addr; + u8 reserved[1]; +} __packed; + +/** + * struct hbm_dma_mem_dscr - dma ring + * + * @addr_hi: the high 32bits of 64 bit address + * @addr_lo: the low 32bits of 64 bit address + * @size : size in bytes (must be power of 2) + */ +struct hbm_dma_mem_dscr { + u32 addr_hi; + u32 addr_lo; + u32 size; +} __packed; + +enum { + DMA_DSCR_HOST = 0, + DMA_DSCR_DEVICE = 1, + DMA_DSCR_CTRL = 2, + DMA_DSCR_NUM, +}; + +/** + * struct hbm_dma_setup_request - dma setup request + * + * @hbm_cmd: bus message command header + * @reserved: reserved for alignment + * @dma_dscr: dma descriptor for HOST, DEVICE, and CTRL + */ +struct hbm_dma_setup_request { + u8 hbm_cmd; + u8 reserved[3]; + struct hbm_dma_mem_dscr dma_dscr[DMA_DSCR_NUM]; +} __packed; + +/** + * struct hbm_dma_setup_response - dma setup response + * + * @hbm_cmd: bus message command header + * @status: 0 on success; otherwise DMA setup failed. + * @reserved: reserved for alignment + */ +struct hbm_dma_setup_response { + u8 hbm_cmd; + u8 status; + u8 reserved[2]; +} __packed; + +#endif diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c new file mode 100644 index 000000000..4888ebc07 --- /dev/null +++ b/drivers/misc/mei/init.c @@ -0,0 +1,406 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#include +#include +#include + +#include + +#include "mei_dev.h" +#include "hbm.h" +#include "client.h" + +const char *mei_dev_state_str(int state) +{ +#define MEI_DEV_STATE(state) case MEI_DEV_##state: return #state + switch (state) { + MEI_DEV_STATE(INITIALIZING); + MEI_DEV_STATE(INIT_CLIENTS); + MEI_DEV_STATE(ENABLED); + MEI_DEV_STATE(RESETTING); + MEI_DEV_STATE(DISABLED); + MEI_DEV_STATE(POWER_DOWN); + MEI_DEV_STATE(POWER_UP); + default: + return "unknown"; + } +#undef MEI_DEV_STATE +} + +const char *mei_pg_state_str(enum mei_pg_state state) +{ +#define MEI_PG_STATE(state) case MEI_PG_##state: return #state + switch (state) { + MEI_PG_STATE(OFF); + MEI_PG_STATE(ON); + default: + return "unknown"; + } +#undef MEI_PG_STATE +} + +/** + * mei_fw_status2str - convert fw status registers to printable string + * + * @fw_status: firmware status + * @buf: string buffer at minimal size MEI_FW_STATUS_STR_SZ + * @len: buffer len must be >= MEI_FW_STATUS_STR_SZ + * + * Return: number of bytes written or -EINVAL if buffer is to small + */ +ssize_t mei_fw_status2str(struct mei_fw_status *fw_status, + char *buf, size_t len) +{ + ssize_t cnt = 0; + int i; + + buf[0] = '\0'; + + if (len < MEI_FW_STATUS_STR_SZ) + return -EINVAL; + + for (i = 0; i < fw_status->count; i++) + cnt += scnprintf(buf + cnt, len - cnt, "%08X ", + fw_status->status[i]); + + /* drop last space */ + buf[cnt] = '\0'; + return cnt; +} +EXPORT_SYMBOL_GPL(mei_fw_status2str); + +/** + * mei_cancel_work - Cancel mei background jobs + * + * @dev: the device structure + */ +void mei_cancel_work(struct mei_device *dev) +{ + cancel_work_sync(&dev->reset_work); + cancel_work_sync(&dev->bus_rescan_work); + + cancel_delayed_work_sync(&dev->timer_work); +} +EXPORT_SYMBOL_GPL(mei_cancel_work); + +/** + * mei_reset - resets host and fw. + * + * @dev: the device structure + * + * Return: 0 on success or < 0 if the reset hasn't succeeded + */ +int mei_reset(struct mei_device *dev) +{ + enum mei_dev_state state = dev->dev_state; + bool interrupts_enabled; + int ret; + + if (state != MEI_DEV_INITIALIZING && + state != MEI_DEV_DISABLED && + state != MEI_DEV_POWER_DOWN && + state != MEI_DEV_POWER_UP) { + char fw_sts_str[MEI_FW_STATUS_STR_SZ]; + + mei_fw_status_str(dev, fw_sts_str, MEI_FW_STATUS_STR_SZ); + dev_warn(dev->dev, "unexpected reset: dev_state = %s fw status = %s\n", + mei_dev_state_str(state), fw_sts_str); + } + + mei_clear_interrupts(dev); + + /* we're already in reset, cancel the init timer + * if the reset was called due the hbm protocol error + * we need to call it before hw start + * so the hbm watchdog won't kick in + */ + mei_hbm_idle(dev); + + /* enter reset flow */ + interrupts_enabled = state != MEI_DEV_POWER_DOWN; + dev->dev_state = MEI_DEV_RESETTING; + + dev->reset_count++; + if (dev->reset_count > MEI_MAX_CONSEC_RESET) { + dev_err(dev->dev, "reset: reached maximal consecutive resets: disabling the device\n"); + dev->dev_state = MEI_DEV_DISABLED; + return -ENODEV; + } + + ret = mei_hw_reset(dev, interrupts_enabled); + /* fall through and remove the sw state even if hw reset has failed */ + + /* no need to clean up software state in case of power up */ + if (state != MEI_DEV_INITIALIZING && state != MEI_DEV_POWER_UP) + mei_cl_all_disconnect(dev); + + mei_hbm_reset(dev); + + dev->rd_msg_hdr = 0; + + if (ret) { + dev_err(dev->dev, "hw_reset failed ret = %d\n", ret); + return ret; + } + + if (state == MEI_DEV_POWER_DOWN) { + dev_dbg(dev->dev, "powering down: end of reset\n"); + dev->dev_state = MEI_DEV_DISABLED; + return 0; + } + + ret = mei_hw_start(dev); + if (ret) { + dev_err(dev->dev, "hw_start failed ret = %d\n", ret); + return ret; + } + + dev_dbg(dev->dev, "link is established start sending messages.\n"); + + dev->dev_state = MEI_DEV_INIT_CLIENTS; + ret = mei_hbm_start_req(dev); + if (ret) { + dev_err(dev->dev, "hbm_start failed ret = %d\n", ret); + dev->dev_state = MEI_DEV_RESETTING; + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(mei_reset); + +/** + * mei_start - initializes host and fw to start work. + * + * @dev: the device structure + * + * Return: 0 on success, <0 on failure. + */ +int mei_start(struct mei_device *dev) +{ + int ret; + + mutex_lock(&dev->device_lock); + + /* acknowledge interrupt and stop interrupts */ + mei_clear_interrupts(dev); + + mei_hw_config(dev); + + dev_dbg(dev->dev, "reset in start the mei device.\n"); + + dev->reset_count = 0; + do { + dev->dev_state = MEI_DEV_INITIALIZING; + ret = mei_reset(dev); + + if (ret == -ENODEV || dev->dev_state == MEI_DEV_DISABLED) { + dev_err(dev->dev, "reset failed ret = %d", ret); + goto err; + } + } while (ret); + + if (mei_hbm_start_wait(dev)) { + dev_err(dev->dev, "HBM haven't started"); + goto err; + } + + if (!mei_host_is_ready(dev)) { + dev_err(dev->dev, "host is not ready.\n"); + goto err; + } + + if (!mei_hw_is_ready(dev)) { + dev_err(dev->dev, "ME is not ready.\n"); + goto err; + } + + if (!mei_hbm_version_is_supported(dev)) { + dev_dbg(dev->dev, "MEI start failed.\n"); + goto err; + } + + dev_dbg(dev->dev, "link layer has been established.\n"); + + mutex_unlock(&dev->device_lock); + return 0; +err: + dev_err(dev->dev, "link layer initialization failed.\n"); + dev->dev_state = MEI_DEV_DISABLED; + mutex_unlock(&dev->device_lock); + return -ENODEV; +} +EXPORT_SYMBOL_GPL(mei_start); + +/** + * mei_restart - restart device after suspend + * + * @dev: the device structure + * + * Return: 0 on success or -ENODEV if the restart hasn't succeeded + */ +int mei_restart(struct mei_device *dev) +{ + int err; + + mutex_lock(&dev->device_lock); + + dev->dev_state = MEI_DEV_POWER_UP; + dev->reset_count = 0; + + err = mei_reset(dev); + + mutex_unlock(&dev->device_lock); + + if (err == -ENODEV || dev->dev_state == MEI_DEV_DISABLED) { + dev_err(dev->dev, "device disabled = %d\n", err); + return -ENODEV; + } + + /* try to start again */ + if (err) + schedule_work(&dev->reset_work); + + + return 0; +} +EXPORT_SYMBOL_GPL(mei_restart); + +static void mei_reset_work(struct work_struct *work) +{ + struct mei_device *dev = + container_of(work, struct mei_device, reset_work); + int ret; + + mei_clear_interrupts(dev); + mei_synchronize_irq(dev); + + mutex_lock(&dev->device_lock); + + ret = mei_reset(dev); + + mutex_unlock(&dev->device_lock); + + if (dev->dev_state == MEI_DEV_DISABLED) { + dev_err(dev->dev, "device disabled = %d\n", ret); + return; + } + + /* retry reset in case of failure */ + if (ret) + schedule_work(&dev->reset_work); +} + +void mei_stop(struct mei_device *dev) +{ + dev_dbg(dev->dev, "stopping the device.\n"); + + mutex_lock(&dev->device_lock); + dev->dev_state = MEI_DEV_POWER_DOWN; + mutex_unlock(&dev->device_lock); + mei_cl_bus_remove_devices(dev); + + mei_cancel_work(dev); + + mei_clear_interrupts(dev); + mei_synchronize_irq(dev); + + mutex_lock(&dev->device_lock); + + mei_reset(dev); + /* move device to disabled state unconditionally */ + dev->dev_state = MEI_DEV_DISABLED; + + mutex_unlock(&dev->device_lock); +} +EXPORT_SYMBOL_GPL(mei_stop); + +/** + * mei_write_is_idle - check if the write queues are idle + * + * @dev: the device structure + * + * Return: true of there is no pending write + */ +bool mei_write_is_idle(struct mei_device *dev) +{ + bool idle = (dev->dev_state == MEI_DEV_ENABLED && + list_empty(&dev->ctrl_wr_list) && + list_empty(&dev->write_list) && + list_empty(&dev->write_waiting_list)); + + dev_dbg(dev->dev, "write pg: is idle[%d] state=%s ctrl=%01d write=%01d wwait=%01d\n", + idle, + mei_dev_state_str(dev->dev_state), + list_empty(&dev->ctrl_wr_list), + list_empty(&dev->write_list), + list_empty(&dev->write_waiting_list)); + + return idle; +} +EXPORT_SYMBOL_GPL(mei_write_is_idle); + +/** + * mei_device_init -- initialize mei_device structure + * + * @dev: the mei device + * @device: the device structure + * @hw_ops: hw operations + */ +void mei_device_init(struct mei_device *dev, + struct device *device, + const struct mei_hw_ops *hw_ops) +{ + /* setup our list array */ + INIT_LIST_HEAD(&dev->file_list); + INIT_LIST_HEAD(&dev->device_list); + INIT_LIST_HEAD(&dev->me_clients); + mutex_init(&dev->device_lock); + init_rwsem(&dev->me_clients_rwsem); + mutex_init(&dev->cl_bus_lock); + init_waitqueue_head(&dev->wait_hw_ready); + init_waitqueue_head(&dev->wait_pg); + init_waitqueue_head(&dev->wait_hbm_start); + dev->dev_state = MEI_DEV_INITIALIZING; + dev->reset_count = 0; + + INIT_LIST_HEAD(&dev->write_list); + INIT_LIST_HEAD(&dev->write_waiting_list); + INIT_LIST_HEAD(&dev->ctrl_wr_list); + INIT_LIST_HEAD(&dev->ctrl_rd_list); + dev->tx_queue_limit = MEI_TX_QUEUE_LIMIT_DEFAULT; + + INIT_DELAYED_WORK(&dev->timer_work, mei_timer); + INIT_WORK(&dev->reset_work, mei_reset_work); + INIT_WORK(&dev->bus_rescan_work, mei_cl_bus_rescan_work); + + bitmap_zero(dev->host_clients_map, MEI_CLIENTS_MAX); + dev->open_handle_count = 0; + + /* + * Reserving the first client ID + * 0: Reserved for MEI Bus Message communications + */ + bitmap_set(dev->host_clients_map, 0, 1); + + dev->pg_event = MEI_PG_EVENT_IDLE; + dev->ops = hw_ops; + dev->dev = device; +} +EXPORT_SYMBOL_GPL(mei_device_init); + diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c new file mode 100644 index 000000000..66f4d12d0 --- /dev/null +++ b/drivers/misc/mei/interrupt.c @@ -0,0 +1,535 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "mei_dev.h" +#include "hbm.h" +#include "client.h" + + +/** + * mei_irq_compl_handler - dispatch complete handlers + * for the completed callbacks + * + * @dev: mei device + * @cmpl_list: list of completed cbs + */ +void mei_irq_compl_handler(struct mei_device *dev, struct list_head *cmpl_list) +{ + struct mei_cl_cb *cb, *next; + struct mei_cl *cl; + + list_for_each_entry_safe(cb, next, cmpl_list, list) { + cl = cb->cl; + list_del_init(&cb->list); + + dev_dbg(dev->dev, "completing call back.\n"); + mei_cl_complete(cl, cb); + } +} +EXPORT_SYMBOL_GPL(mei_irq_compl_handler); + +/** + * mei_cl_hbm_equal - check if hbm is addressed to the client + * + * @cl: host client + * @mei_hdr: header of mei client message + * + * Return: true if matches, false otherwise + */ +static inline int mei_cl_hbm_equal(struct mei_cl *cl, + struct mei_msg_hdr *mei_hdr) +{ + return mei_cl_host_addr(cl) == mei_hdr->host_addr && + mei_cl_me_id(cl) == mei_hdr->me_addr; +} + +/** + * mei_irq_discard_msg - discard received message + * + * @dev: mei device + * @hdr: message header + */ +static void mei_irq_discard_msg(struct mei_device *dev, struct mei_msg_hdr *hdr) +{ + /* + * no need to check for size as it is guarantied + * that length fits into rd_msg_buf + */ + mei_read_slots(dev, dev->rd_msg_buf, hdr->length); + dev_dbg(dev->dev, "discarding message " MEI_HDR_FMT "\n", + MEI_HDR_PRM(hdr)); +} + +/** + * mei_cl_irq_read_msg - process client message + * + * @cl: reading client + * @mei_hdr: header of mei client message + * @cmpl_list: completion list + * + * Return: always 0 + */ +static int mei_cl_irq_read_msg(struct mei_cl *cl, + struct mei_msg_hdr *mei_hdr, + struct list_head *cmpl_list) +{ + struct mei_device *dev = cl->dev; + struct mei_cl_cb *cb; + size_t buf_sz; + + cb = list_first_entry_or_null(&cl->rd_pending, struct mei_cl_cb, list); + if (!cb) { + if (!mei_cl_is_fixed_address(cl)) { + cl_err(dev, cl, "pending read cb not found\n"); + goto discard; + } + cb = mei_cl_alloc_cb(cl, mei_cl_mtu(cl), MEI_FOP_READ, cl->fp); + if (!cb) + goto discard; + list_add_tail(&cb->list, &cl->rd_pending); + } + + if (!mei_cl_is_connected(cl)) { + cl_dbg(dev, cl, "not connected\n"); + cb->status = -ENODEV; + goto discard; + } + + buf_sz = mei_hdr->length + cb->buf_idx; + /* catch for integer overflow */ + if (buf_sz < cb->buf_idx) { + cl_err(dev, cl, "message is too big len %d idx %zu\n", + mei_hdr->length, cb->buf_idx); + cb->status = -EMSGSIZE; + goto discard; + } + + if (cb->buf.size < buf_sz) { + cl_dbg(dev, cl, "message overflow. size %zu len %d idx %zu\n", + cb->buf.size, mei_hdr->length, cb->buf_idx); + cb->status = -EMSGSIZE; + goto discard; + } + + mei_read_slots(dev, cb->buf.data + cb->buf_idx, mei_hdr->length); + + cb->buf_idx += mei_hdr->length; + + if (mei_hdr->msg_complete) { + cl_dbg(dev, cl, "completed read length = %zu\n", cb->buf_idx); + list_move_tail(&cb->list, cmpl_list); + } else { + pm_runtime_mark_last_busy(dev->dev); + pm_request_autosuspend(dev->dev); + } + + return 0; + +discard: + if (cb) + list_move_tail(&cb->list, cmpl_list); + mei_irq_discard_msg(dev, mei_hdr); + return 0; +} + +/** + * mei_cl_irq_disconnect_rsp - send disconnection response message + * + * @cl: client + * @cb: callback block. + * @cmpl_list: complete list. + * + * Return: 0, OK; otherwise, error. + */ +static int mei_cl_irq_disconnect_rsp(struct mei_cl *cl, struct mei_cl_cb *cb, + struct list_head *cmpl_list) +{ + struct mei_device *dev = cl->dev; + u32 msg_slots; + int slots; + int ret; + + msg_slots = mei_hbm2slots(sizeof(struct hbm_client_connect_response)); + slots = mei_hbuf_empty_slots(dev); + if (slots < 0) + return -EOVERFLOW; + + if ((u32)slots < msg_slots) + return -EMSGSIZE; + + ret = mei_hbm_cl_disconnect_rsp(dev, cl); + list_move_tail(&cb->list, cmpl_list); + + return ret; +} + +/** + * mei_cl_irq_read - processes client read related operation from the + * interrupt thread context - request for flow control credits + * + * @cl: client + * @cb: callback block. + * @cmpl_list: complete list. + * + * Return: 0, OK; otherwise, error. + */ +static int mei_cl_irq_read(struct mei_cl *cl, struct mei_cl_cb *cb, + struct list_head *cmpl_list) +{ + struct mei_device *dev = cl->dev; + u32 msg_slots; + int slots; + int ret; + + if (!list_empty(&cl->rd_pending)) + return 0; + + msg_slots = mei_hbm2slots(sizeof(struct hbm_flow_control)); + slots = mei_hbuf_empty_slots(dev); + if (slots < 0) + return -EOVERFLOW; + + if ((u32)slots < msg_slots) + return -EMSGSIZE; + + ret = mei_hbm_cl_flow_control_req(dev, cl); + if (ret) { + cl->status = ret; + cb->buf_idx = 0; + list_move_tail(&cb->list, cmpl_list); + return ret; + } + + pm_runtime_mark_last_busy(dev->dev); + pm_request_autosuspend(dev->dev); + + list_move_tail(&cb->list, &cl->rd_pending); + + return 0; +} + +static inline bool hdr_is_hbm(struct mei_msg_hdr *mei_hdr) +{ + return mei_hdr->host_addr == 0 && mei_hdr->me_addr == 0; +} + +static inline bool hdr_is_fixed(struct mei_msg_hdr *mei_hdr) +{ + return mei_hdr->host_addr == 0 && mei_hdr->me_addr != 0; +} + +static inline int hdr_is_valid(u32 msg_hdr) +{ + struct mei_msg_hdr *mei_hdr; + + mei_hdr = (struct mei_msg_hdr *)&msg_hdr; + if (!msg_hdr || mei_hdr->reserved) + return -EBADMSG; + + return 0; +} + +/** + * mei_irq_read_handler - bottom half read routine after ISR to + * handle the read processing. + * + * @dev: the device structure + * @cmpl_list: An instance of our list structure + * @slots: slots to read. + * + * Return: 0 on success, <0 on failure. + */ +int mei_irq_read_handler(struct mei_device *dev, + struct list_head *cmpl_list, s32 *slots) +{ + struct mei_msg_hdr *mei_hdr; + struct mei_cl *cl; + int ret; + + if (!dev->rd_msg_hdr) { + dev->rd_msg_hdr = mei_read_hdr(dev); + (*slots)--; + dev_dbg(dev->dev, "slots =%08x.\n", *slots); + + ret = hdr_is_valid(dev->rd_msg_hdr); + if (ret) { + dev_err(dev->dev, "corrupted message header 0x%08X\n", + dev->rd_msg_hdr); + goto end; + } + } + + mei_hdr = (struct mei_msg_hdr *)&dev->rd_msg_hdr; + dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM(mei_hdr)); + + if (mei_slots2data(*slots) < mei_hdr->length) { + dev_err(dev->dev, "less data available than length=%08x.\n", + *slots); + /* we can't read the message */ + ret = -ENODATA; + goto end; + } + + /* HBM message */ + if (hdr_is_hbm(mei_hdr)) { + ret = mei_hbm_dispatch(dev, mei_hdr); + if (ret) { + dev_dbg(dev->dev, "mei_hbm_dispatch failed ret = %d\n", + ret); + goto end; + } + goto reset_slots; + } + + /* find recipient cl */ + list_for_each_entry(cl, &dev->file_list, link) { + if (mei_cl_hbm_equal(cl, mei_hdr)) { + cl_dbg(dev, cl, "got a message\n"); + break; + } + } + + /* if no recipient cl was found we assume corrupted header */ + if (&cl->link == &dev->file_list) { + /* A message for not connected fixed address clients + * should be silently discarded + * On power down client may be force cleaned, + * silently discard such messages + */ + if (hdr_is_fixed(mei_hdr) || + dev->dev_state == MEI_DEV_POWER_DOWN) { + mei_irq_discard_msg(dev, mei_hdr); + ret = 0; + goto reset_slots; + } + dev_err(dev->dev, "no destination client found 0x%08X\n", + dev->rd_msg_hdr); + ret = -EBADMSG; + goto end; + } + + ret = mei_cl_irq_read_msg(cl, mei_hdr, cmpl_list); + + +reset_slots: + /* reset the number of slots and header */ + *slots = mei_count_full_read_slots(dev); + dev->rd_msg_hdr = 0; + + if (*slots == -EOVERFLOW) { + /* overflow - reset */ + dev_err(dev->dev, "resetting due to slots overflow.\n"); + /* set the event since message has been read */ + ret = -ERANGE; + goto end; + } +end: + return ret; +} +EXPORT_SYMBOL_GPL(mei_irq_read_handler); + + +/** + * mei_irq_write_handler - dispatch write requests + * after irq received + * + * @dev: the device structure + * @cmpl_list: An instance of our list structure + * + * Return: 0 on success, <0 on failure. + */ +int mei_irq_write_handler(struct mei_device *dev, struct list_head *cmpl_list) +{ + + struct mei_cl *cl; + struct mei_cl_cb *cb, *next; + s32 slots; + int ret; + + + if (!mei_hbuf_acquire(dev)) + return 0; + + slots = mei_hbuf_empty_slots(dev); + if (slots < 0) + return -EOVERFLOW; + + if (slots == 0) + return -EMSGSIZE; + + /* complete all waiting for write CB */ + dev_dbg(dev->dev, "complete all waiting for write cb.\n"); + + list_for_each_entry_safe(cb, next, &dev->write_waiting_list, list) { + cl = cb->cl; + + cl->status = 0; + cl_dbg(dev, cl, "MEI WRITE COMPLETE\n"); + cl->writing_state = MEI_WRITE_COMPLETE; + list_move_tail(&cb->list, cmpl_list); + } + + /* complete control write list CB */ + dev_dbg(dev->dev, "complete control write list cb.\n"); + list_for_each_entry_safe(cb, next, &dev->ctrl_wr_list, list) { + cl = cb->cl; + switch (cb->fop_type) { + case MEI_FOP_DISCONNECT: + /* send disconnect message */ + ret = mei_cl_irq_disconnect(cl, cb, cmpl_list); + if (ret) + return ret; + + break; + case MEI_FOP_READ: + /* send flow control message */ + ret = mei_cl_irq_read(cl, cb, cmpl_list); + if (ret) + return ret; + + break; + case MEI_FOP_CONNECT: + /* connect message */ + ret = mei_cl_irq_connect(cl, cb, cmpl_list); + if (ret) + return ret; + + break; + case MEI_FOP_DISCONNECT_RSP: + /* send disconnect resp */ + ret = mei_cl_irq_disconnect_rsp(cl, cb, cmpl_list); + if (ret) + return ret; + break; + + case MEI_FOP_NOTIFY_START: + case MEI_FOP_NOTIFY_STOP: + ret = mei_cl_irq_notify(cl, cb, cmpl_list); + if (ret) + return ret; + break; + default: + BUG(); + } + + } + /* complete write list CB */ + dev_dbg(dev->dev, "complete write list cb.\n"); + list_for_each_entry_safe(cb, next, &dev->write_list, list) { + cl = cb->cl; + ret = mei_cl_irq_write(cl, cb, cmpl_list); + if (ret) + return ret; + } + return 0; +} +EXPORT_SYMBOL_GPL(mei_irq_write_handler); + + +/** + * mei_connect_timeout - connect/disconnect timeouts + * + * @cl: host client + */ +static void mei_connect_timeout(struct mei_cl *cl) +{ + struct mei_device *dev = cl->dev; + + if (cl->state == MEI_FILE_CONNECTING) { + if (dev->hbm_f_dot_supported) { + cl->state = MEI_FILE_DISCONNECT_REQUIRED; + wake_up(&cl->wait); + return; + } + } + mei_reset(dev); +} + +#define MEI_STALL_TIMER_FREQ (2 * HZ) +/** + * mei_schedule_stall_timer - re-arm stall_timer work + * + * Schedule stall timer + * + * @dev: the device structure + */ +void mei_schedule_stall_timer(struct mei_device *dev) +{ + schedule_delayed_work(&dev->timer_work, MEI_STALL_TIMER_FREQ); +} + +/** + * mei_timer - timer function. + * + * @work: pointer to the work_struct structure + * + */ +void mei_timer(struct work_struct *work) +{ + struct mei_cl *cl; + struct mei_device *dev = container_of(work, + struct mei_device, timer_work.work); + bool reschedule_timer = false; + + mutex_lock(&dev->device_lock); + + /* Catch interrupt stalls during HBM init handshake */ + if (dev->dev_state == MEI_DEV_INIT_CLIENTS && + dev->hbm_state != MEI_HBM_IDLE) { + + if (dev->init_clients_timer) { + if (--dev->init_clients_timer == 0) { + dev_err(dev->dev, "timer: init clients timeout hbm_state = %d.\n", + dev->hbm_state); + mei_reset(dev); + goto out; + } + reschedule_timer = true; + } + } + + if (dev->dev_state != MEI_DEV_ENABLED) + goto out; + + /*** connect/disconnect timeouts ***/ + list_for_each_entry(cl, &dev->file_list, link) { + if (cl->timer_count) { + if (--cl->timer_count == 0) { + dev_err(dev->dev, "timer: connect/disconnect timeout.\n"); + mei_connect_timeout(cl); + goto out; + } + reschedule_timer = true; + } + } + +out: + if (dev->dev_state != MEI_DEV_DISABLED && reschedule_timer) + mei_schedule_stall_timer(dev); + + mutex_unlock(&dev->device_lock); +} diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c new file mode 100644 index 000000000..87281b369 --- /dev/null +++ b/drivers/misc/mei/main.c @@ -0,0 +1,1022 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2018, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "mei_dev.h" +#include "client.h" + +/** + * mei_open - the open function + * + * @inode: pointer to inode structure + * @file: pointer to file structure + * + * Return: 0 on success, <0 on error + */ +static int mei_open(struct inode *inode, struct file *file) +{ + struct mei_device *dev; + struct mei_cl *cl; + + int err; + + dev = container_of(inode->i_cdev, struct mei_device, cdev); + if (!dev) + return -ENODEV; + + mutex_lock(&dev->device_lock); + + if (dev->dev_state != MEI_DEV_ENABLED) { + dev_dbg(dev->dev, "dev_state != MEI_ENABLED dev_state = %s\n", + mei_dev_state_str(dev->dev_state)); + err = -ENODEV; + goto err_unlock; + } + + cl = mei_cl_alloc_linked(dev); + if (IS_ERR(cl)) { + err = PTR_ERR(cl); + goto err_unlock; + } + + cl->fp = file; + file->private_data = cl; + + mutex_unlock(&dev->device_lock); + + return nonseekable_open(inode, file); + +err_unlock: + mutex_unlock(&dev->device_lock); + return err; +} + +/** + * mei_release - the release function + * + * @inode: pointer to inode structure + * @file: pointer to file structure + * + * Return: 0 on success, <0 on error + */ +static int mei_release(struct inode *inode, struct file *file) +{ + struct mei_cl *cl = file->private_data; + struct mei_device *dev; + int rets; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + mutex_lock(&dev->device_lock); + + rets = mei_cl_disconnect(cl); + + mei_cl_flush_queues(cl, file); + cl_dbg(dev, cl, "removing\n"); + + mei_cl_unlink(cl); + + file->private_data = NULL; + + kfree(cl); + + mutex_unlock(&dev->device_lock); + return rets; +} + + +/** + * mei_read - the read function. + * + * @file: pointer to file structure + * @ubuf: pointer to user buffer + * @length: buffer length + * @offset: data offset in buffer + * + * Return: >=0 data length on success , <0 on error + */ +static ssize_t mei_read(struct file *file, char __user *ubuf, + size_t length, loff_t *offset) +{ + struct mei_cl *cl = file->private_data; + struct mei_device *dev; + struct mei_cl_cb *cb = NULL; + bool nonblock = !!(file->f_flags & O_NONBLOCK); + ssize_t rets; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + + mutex_lock(&dev->device_lock); + if (dev->dev_state != MEI_DEV_ENABLED) { + rets = -ENODEV; + goto out; + } + + if (length == 0) { + rets = 0; + goto out; + } + + if (ubuf == NULL) { + rets = -EMSGSIZE; + goto out; + } + + cb = mei_cl_read_cb(cl, file); + if (cb) + goto copy_buffer; + + if (*offset > 0) + *offset = 0; + + rets = mei_cl_read_start(cl, length, file); + if (rets && rets != -EBUSY) { + cl_dbg(dev, cl, "mei start read failure status = %zd\n", rets); + goto out; + } + + if (nonblock) { + rets = -EAGAIN; + goto out; + } + + mutex_unlock(&dev->device_lock); + if (wait_event_interruptible(cl->rx_wait, + !list_empty(&cl->rd_completed) || + !mei_cl_is_connected(cl))) { + if (signal_pending(current)) + return -EINTR; + return -ERESTARTSYS; + } + mutex_lock(&dev->device_lock); + + if (!mei_cl_is_connected(cl)) { + rets = -ENODEV; + goto out; + } + + cb = mei_cl_read_cb(cl, file); + if (!cb) { + rets = 0; + goto out; + } + +copy_buffer: + /* now copy the data to user space */ + if (cb->status) { + rets = cb->status; + cl_dbg(dev, cl, "read operation failed %zd\n", rets); + goto free; + } + + cl_dbg(dev, cl, "buf.size = %zu buf.idx = %zu offset = %lld\n", + cb->buf.size, cb->buf_idx, *offset); + if (*offset >= cb->buf_idx) { + rets = 0; + goto free; + } + + /* length is being truncated to PAGE_SIZE, + * however buf_idx may point beyond that */ + length = min_t(size_t, length, cb->buf_idx - *offset); + + if (copy_to_user(ubuf, cb->buf.data + *offset, length)) { + dev_dbg(dev->dev, "failed to copy data to userland\n"); + rets = -EFAULT; + goto free; + } + + rets = length; + *offset += length; + /* not all data was read, keep the cb */ + if (*offset < cb->buf_idx) + goto out; + +free: + mei_io_cb_free(cb); + *offset = 0; + +out: + cl_dbg(dev, cl, "end mei read rets = %zd\n", rets); + mutex_unlock(&dev->device_lock); + return rets; +} +/** + * mei_write - the write function. + * + * @file: pointer to file structure + * @ubuf: pointer to user buffer + * @length: buffer length + * @offset: data offset in buffer + * + * Return: >=0 data length on success , <0 on error + */ +static ssize_t mei_write(struct file *file, const char __user *ubuf, + size_t length, loff_t *offset) +{ + struct mei_cl *cl = file->private_data; + struct mei_cl_cb *cb; + struct mei_device *dev; + ssize_t rets; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + mutex_lock(&dev->device_lock); + + if (dev->dev_state != MEI_DEV_ENABLED) { + rets = -ENODEV; + goto out; + } + + if (!mei_cl_is_connected(cl)) { + cl_err(dev, cl, "is not connected"); + rets = -ENODEV; + goto out; + } + + if (!mei_me_cl_is_active(cl->me_cl)) { + rets = -ENOTTY; + goto out; + } + + if (length > mei_cl_mtu(cl)) { + rets = -EFBIG; + goto out; + } + + if (length == 0) { + rets = 0; + goto out; + } + + while (cl->tx_cb_queued >= dev->tx_queue_limit) { + if (file->f_flags & O_NONBLOCK) { + rets = -EAGAIN; + goto out; + } + mutex_unlock(&dev->device_lock); + rets = wait_event_interruptible(cl->tx_wait, + cl->writing_state == MEI_WRITE_COMPLETE || + (!mei_cl_is_connected(cl))); + mutex_lock(&dev->device_lock); + if (rets) { + if (signal_pending(current)) + rets = -EINTR; + goto out; + } + if (!mei_cl_is_connected(cl)) { + rets = -ENODEV; + goto out; + } + } + + cb = mei_cl_alloc_cb(cl, length, MEI_FOP_WRITE, file); + if (!cb) { + rets = -ENOMEM; + goto out; + } + + rets = copy_from_user(cb->buf.data, ubuf, length); + if (rets) { + dev_dbg(dev->dev, "failed to copy data from userland\n"); + rets = -EFAULT; + mei_io_cb_free(cb); + goto out; + } + + rets = mei_cl_write(cl, cb); +out: + mutex_unlock(&dev->device_lock); + return rets; +} + +/** + * mei_ioctl_connect_client - the connect to fw client IOCTL function + * + * @file: private data of the file object + * @data: IOCTL connect data, input and output parameters + * + * Locking: called under "dev->device_lock" lock + * + * Return: 0 on success, <0 on failure. + */ +static int mei_ioctl_connect_client(struct file *file, + struct mei_connect_client_data *data) +{ + struct mei_device *dev; + struct mei_client *client; + struct mei_me_client *me_cl; + struct mei_cl *cl; + int rets; + + cl = file->private_data; + dev = cl->dev; + + if (dev->dev_state != MEI_DEV_ENABLED) + return -ENODEV; + + if (cl->state != MEI_FILE_INITIALIZING && + cl->state != MEI_FILE_DISCONNECTED) + return -EBUSY; + + /* find ME client we're trying to connect to */ + me_cl = mei_me_cl_by_uuid(dev, &data->in_client_uuid); + if (!me_cl) { + dev_dbg(dev->dev, "Cannot connect to FW Client UUID = %pUl\n", + &data->in_client_uuid); + rets = -ENOTTY; + goto end; + } + + if (me_cl->props.fixed_address) { + bool forbidden = dev->override_fixed_address ? + !dev->allow_fixed_address : !dev->hbm_f_fa_supported; + if (forbidden) { + dev_dbg(dev->dev, "Connection forbidden to FW Client UUID = %pUl\n", + &data->in_client_uuid); + rets = -ENOTTY; + goto end; + } + } + + dev_dbg(dev->dev, "Connect to FW Client ID = %d\n", + me_cl->client_id); + dev_dbg(dev->dev, "FW Client - Protocol Version = %d\n", + me_cl->props.protocol_version); + dev_dbg(dev->dev, "FW Client - Max Msg Len = %d\n", + me_cl->props.max_msg_length); + + /* prepare the output buffer */ + client = &data->out_client_properties; + client->max_msg_length = me_cl->props.max_msg_length; + client->protocol_version = me_cl->props.protocol_version; + dev_dbg(dev->dev, "Can connect?\n"); + + rets = mei_cl_connect(cl, me_cl, file); + +end: + mei_me_cl_put(me_cl); + return rets; +} + +/** + * mei_ioctl_client_notify_request - + * propagate event notification request to client + * + * @file: pointer to file structure + * @request: 0 - disable, 1 - enable + * + * Return: 0 on success , <0 on error + */ +static int mei_ioctl_client_notify_request(const struct file *file, u32 request) +{ + struct mei_cl *cl = file->private_data; + + if (request != MEI_HBM_NOTIFICATION_START && + request != MEI_HBM_NOTIFICATION_STOP) + return -EINVAL; + + return mei_cl_notify_request(cl, file, (u8)request); +} + +/** + * mei_ioctl_client_notify_get - wait for notification request + * + * @file: pointer to file structure + * @notify_get: 0 - disable, 1 - enable + * + * Return: 0 on success , <0 on error + */ +static int mei_ioctl_client_notify_get(const struct file *file, u32 *notify_get) +{ + struct mei_cl *cl = file->private_data; + bool notify_ev; + bool block = (file->f_flags & O_NONBLOCK) == 0; + int rets; + + rets = mei_cl_notify_get(cl, block, ¬ify_ev); + if (rets) + return rets; + + *notify_get = notify_ev ? 1 : 0; + return 0; +} + +/** + * mei_ioctl - the IOCTL function + * + * @file: pointer to file structure + * @cmd: ioctl command + * @data: pointer to mei message structure + * + * Return: 0 on success , <0 on error + */ +static long mei_ioctl(struct file *file, unsigned int cmd, unsigned long data) +{ + struct mei_device *dev; + struct mei_cl *cl = file->private_data; + struct mei_connect_client_data connect_data; + u32 notify_get, notify_req; + int rets; + + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + dev_dbg(dev->dev, "IOCTL cmd = 0x%x", cmd); + + mutex_lock(&dev->device_lock); + if (dev->dev_state != MEI_DEV_ENABLED) { + rets = -ENODEV; + goto out; + } + + switch (cmd) { + case IOCTL_MEI_CONNECT_CLIENT: + dev_dbg(dev->dev, ": IOCTL_MEI_CONNECT_CLIENT.\n"); + if (copy_from_user(&connect_data, (char __user *)data, + sizeof(struct mei_connect_client_data))) { + dev_dbg(dev->dev, "failed to copy data from userland\n"); + rets = -EFAULT; + goto out; + } + + rets = mei_ioctl_connect_client(file, &connect_data); + if (rets) + goto out; + + /* if all is ok, copying the data back to user. */ + if (copy_to_user((char __user *)data, &connect_data, + sizeof(struct mei_connect_client_data))) { + dev_dbg(dev->dev, "failed to copy data to userland\n"); + rets = -EFAULT; + goto out; + } + + break; + + case IOCTL_MEI_NOTIFY_SET: + dev_dbg(dev->dev, ": IOCTL_MEI_NOTIFY_SET.\n"); + if (copy_from_user(¬ify_req, + (char __user *)data, sizeof(notify_req))) { + dev_dbg(dev->dev, "failed to copy data from userland\n"); + rets = -EFAULT; + goto out; + } + rets = mei_ioctl_client_notify_request(file, notify_req); + break; + + case IOCTL_MEI_NOTIFY_GET: + dev_dbg(dev->dev, ": IOCTL_MEI_NOTIFY_GET.\n"); + rets = mei_ioctl_client_notify_get(file, ¬ify_get); + if (rets) + goto out; + + dev_dbg(dev->dev, "copy connect data to user\n"); + if (copy_to_user((char __user *)data, + ¬ify_get, sizeof(notify_get))) { + dev_dbg(dev->dev, "failed to copy data to userland\n"); + rets = -EFAULT; + goto out; + + } + break; + + default: + rets = -ENOIOCTLCMD; + } + +out: + mutex_unlock(&dev->device_lock); + return rets; +} + +/** + * mei_compat_ioctl - the compat IOCTL function + * + * @file: pointer to file structure + * @cmd: ioctl command + * @data: pointer to mei message structure + * + * Return: 0 on success , <0 on error + */ +#ifdef CONFIG_COMPAT +static long mei_compat_ioctl(struct file *file, + unsigned int cmd, unsigned long data) +{ + return mei_ioctl(file, cmd, (unsigned long)compat_ptr(data)); +} +#endif + + +/** + * mei_poll - the poll function + * + * @file: pointer to file structure + * @wait: pointer to poll_table structure + * + * Return: poll mask + */ +static __poll_t mei_poll(struct file *file, poll_table *wait) +{ + __poll_t req_events = poll_requested_events(wait); + struct mei_cl *cl = file->private_data; + struct mei_device *dev; + __poll_t mask = 0; + bool notify_en; + + if (WARN_ON(!cl || !cl->dev)) + return EPOLLERR; + + dev = cl->dev; + + mutex_lock(&dev->device_lock); + + notify_en = cl->notify_en && (req_events & EPOLLPRI); + + if (dev->dev_state != MEI_DEV_ENABLED || + !mei_cl_is_connected(cl)) { + mask = EPOLLERR; + goto out; + } + + if (notify_en) { + poll_wait(file, &cl->ev_wait, wait); + if (cl->notify_ev) + mask |= EPOLLPRI; + } + + if (req_events & (EPOLLIN | EPOLLRDNORM)) { + poll_wait(file, &cl->rx_wait, wait); + + if (!list_empty(&cl->rd_completed)) + mask |= EPOLLIN | EPOLLRDNORM; + else + mei_cl_read_start(cl, mei_cl_mtu(cl), file); + } + + if (req_events & (EPOLLOUT | EPOLLWRNORM)) { + poll_wait(file, &cl->tx_wait, wait); + if (cl->tx_cb_queued < dev->tx_queue_limit) + mask |= EPOLLOUT | EPOLLWRNORM; + } + +out: + mutex_unlock(&dev->device_lock); + return mask; +} + +/** + * mei_cl_is_write_queued - check if the client has pending writes. + * + * @cl: writing host client + * + * Return: true if client is writing, false otherwise. + */ +static bool mei_cl_is_write_queued(struct mei_cl *cl) +{ + struct mei_device *dev = cl->dev; + struct mei_cl_cb *cb; + + list_for_each_entry(cb, &dev->write_list, list) + if (cb->cl == cl) + return true; + list_for_each_entry(cb, &dev->write_waiting_list, list) + if (cb->cl == cl) + return true; + return false; +} + +/** + * mei_fsync - the fsync handler + * + * @fp: pointer to file structure + * @start: unused + * @end: unused + * @datasync: unused + * + * Return: 0 on success, -ENODEV if client is not connected + */ +static int mei_fsync(struct file *fp, loff_t start, loff_t end, int datasync) +{ + struct mei_cl *cl = fp->private_data; + struct mei_device *dev; + int rets; + + if (WARN_ON(!cl || !cl->dev)) + return -ENODEV; + + dev = cl->dev; + + mutex_lock(&dev->device_lock); + + if (dev->dev_state != MEI_DEV_ENABLED || !mei_cl_is_connected(cl)) { + rets = -ENODEV; + goto out; + } + + while (mei_cl_is_write_queued(cl)) { + mutex_unlock(&dev->device_lock); + rets = wait_event_interruptible(cl->tx_wait, + cl->writing_state == MEI_WRITE_COMPLETE || + !mei_cl_is_connected(cl)); + mutex_lock(&dev->device_lock); + if (rets) { + if (signal_pending(current)) + rets = -EINTR; + goto out; + } + if (!mei_cl_is_connected(cl)) { + rets = -ENODEV; + goto out; + } + } + rets = 0; +out: + mutex_unlock(&dev->device_lock); + return rets; +} + +/** + * mei_fasync - asynchronous io support + * + * @fd: file descriptor + * @file: pointer to file structure + * @band: band bitmap + * + * Return: negative on error, + * 0 if it did no changes, + * and positive a process was added or deleted + */ +static int mei_fasync(int fd, struct file *file, int band) +{ + + struct mei_cl *cl = file->private_data; + + if (!mei_cl_is_connected(cl)) + return -ENODEV; + + return fasync_helper(fd, file, band, &cl->ev_async); +} + +/** + * fw_status_show - mei device fw_status attribute show method + * + * @device: device pointer + * @attr: attribute pointer + * @buf: char out buffer + * + * Return: number of the bytes printed into buf or error + */ +static ssize_t fw_status_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct mei_device *dev = dev_get_drvdata(device); + struct mei_fw_status fw_status; + int err, i; + ssize_t cnt = 0; + + mutex_lock(&dev->device_lock); + err = mei_fw_status(dev, &fw_status); + mutex_unlock(&dev->device_lock); + if (err) { + dev_err(device, "read fw_status error = %d\n", err); + return err; + } + + for (i = 0; i < fw_status.count; i++) + cnt += scnprintf(buf + cnt, PAGE_SIZE - cnt, "%08X\n", + fw_status.status[i]); + return cnt; +} +static DEVICE_ATTR_RO(fw_status); + +/** + * hbm_ver_show - display HBM protocol version negotiated with FW + * + * @device: device pointer + * @attr: attribute pointer + * @buf: char out buffer + * + * Return: number of the bytes printed into buf or error + */ +static ssize_t hbm_ver_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct mei_device *dev = dev_get_drvdata(device); + struct hbm_version ver; + + mutex_lock(&dev->device_lock); + ver = dev->version; + mutex_unlock(&dev->device_lock); + + return sprintf(buf, "%u.%u\n", ver.major_version, ver.minor_version); +} +static DEVICE_ATTR_RO(hbm_ver); + +/** + * hbm_ver_drv_show - display HBM protocol version advertised by driver + * + * @device: device pointer + * @attr: attribute pointer + * @buf: char out buffer + * + * Return: number of the bytes printed into buf or error + */ +static ssize_t hbm_ver_drv_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%u.%u\n", HBM_MAJOR_VERSION, HBM_MINOR_VERSION); +} +static DEVICE_ATTR_RO(hbm_ver_drv); + +static ssize_t tx_queue_limit_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct mei_device *dev = dev_get_drvdata(device); + u8 size = 0; + + mutex_lock(&dev->device_lock); + size = dev->tx_queue_limit; + mutex_unlock(&dev->device_lock); + + return snprintf(buf, PAGE_SIZE, "%u\n", size); +} + +static ssize_t tx_queue_limit_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mei_device *dev = dev_get_drvdata(device); + u8 limit; + unsigned int inp; + int err; + + err = kstrtouint(buf, 10, &inp); + if (err) + return err; + if (inp > MEI_TX_QUEUE_LIMIT_MAX || inp < MEI_TX_QUEUE_LIMIT_MIN) + return -EINVAL; + limit = inp; + + mutex_lock(&dev->device_lock); + dev->tx_queue_limit = limit; + mutex_unlock(&dev->device_lock); + + return count; +} +static DEVICE_ATTR_RW(tx_queue_limit); + +/** + * fw_ver_show - display ME FW version + * + * @device: device pointer + * @attr: attribute pointer + * @buf: char out buffer + * + * Return: number of the bytes printed into buf or error + */ +static ssize_t fw_ver_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct mei_device *dev = dev_get_drvdata(device); + struct mei_fw_version *ver; + ssize_t cnt = 0; + int i; + + ver = dev->fw_ver; + + for (i = 0; i < MEI_MAX_FW_VER_BLOCKS; i++) + cnt += scnprintf(buf + cnt, PAGE_SIZE - cnt, "%u:%u.%u.%u.%u\n", + ver[i].platform, ver[i].major, ver[i].minor, + ver[i].hotfix, ver[i].buildno); + return cnt; +} +static DEVICE_ATTR_RO(fw_ver); + +static struct attribute *mei_attrs[] = { + &dev_attr_fw_status.attr, + &dev_attr_hbm_ver.attr, + &dev_attr_hbm_ver_drv.attr, + &dev_attr_tx_queue_limit.attr, + &dev_attr_fw_ver.attr, + NULL +}; +ATTRIBUTE_GROUPS(mei); + +/* + * file operations structure will be used for mei char device. + */ +static const struct file_operations mei_fops = { + .owner = THIS_MODULE, + .read = mei_read, + .unlocked_ioctl = mei_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = mei_compat_ioctl, +#endif + .open = mei_open, + .release = mei_release, + .write = mei_write, + .poll = mei_poll, + .fsync = mei_fsync, + .fasync = mei_fasync, + .llseek = no_llseek +}; + +static struct class *mei_class; +static dev_t mei_devt; +#define MEI_MAX_DEVS MINORMASK +static DEFINE_MUTEX(mei_minor_lock); +static DEFINE_IDR(mei_idr); + +/** + * mei_minor_get - obtain next free device minor number + * + * @dev: device pointer + * + * Return: allocated minor, or -ENOSPC if no free minor left + */ +static int mei_minor_get(struct mei_device *dev) +{ + int ret; + + mutex_lock(&mei_minor_lock); + ret = idr_alloc(&mei_idr, dev, 0, MEI_MAX_DEVS, GFP_KERNEL); + if (ret >= 0) + dev->minor = ret; + else if (ret == -ENOSPC) + dev_err(dev->dev, "too many mei devices\n"); + + mutex_unlock(&mei_minor_lock); + return ret; +} + +/** + * mei_minor_free - mark device minor number as free + * + * @dev: device pointer + */ +static void mei_minor_free(struct mei_device *dev) +{ + mutex_lock(&mei_minor_lock); + idr_remove(&mei_idr, dev->minor); + mutex_unlock(&mei_minor_lock); +} + +int mei_register(struct mei_device *dev, struct device *parent) +{ + struct device *clsdev; /* class device */ + int ret, devno; + + ret = mei_minor_get(dev); + if (ret < 0) + return ret; + + /* Fill in the data structures */ + devno = MKDEV(MAJOR(mei_devt), dev->minor); + cdev_init(&dev->cdev, &mei_fops); + dev->cdev.owner = parent->driver->owner; + + /* Add the device */ + ret = cdev_add(&dev->cdev, devno, 1); + if (ret) { + dev_err(parent, "unable to add device %d:%d\n", + MAJOR(mei_devt), dev->minor); + goto err_dev_add; + } + + clsdev = device_create_with_groups(mei_class, parent, devno, + dev, mei_groups, + "mei%d", dev->minor); + + if (IS_ERR(clsdev)) { + dev_err(parent, "unable to create device %d:%d\n", + MAJOR(mei_devt), dev->minor); + ret = PTR_ERR(clsdev); + goto err_dev_create; + } + + ret = mei_dbgfs_register(dev, dev_name(clsdev)); + if (ret) { + dev_err(clsdev, "cannot register debugfs ret = %d\n", ret); + goto err_dev_dbgfs; + } + + return 0; + +err_dev_dbgfs: + device_destroy(mei_class, devno); +err_dev_create: + cdev_del(&dev->cdev); +err_dev_add: + mei_minor_free(dev); + return ret; +} +EXPORT_SYMBOL_GPL(mei_register); + +void mei_deregister(struct mei_device *dev) +{ + int devno; + + devno = dev->cdev.dev; + cdev_del(&dev->cdev); + + mei_dbgfs_deregister(dev); + + device_destroy(mei_class, devno); + + mei_minor_free(dev); +} +EXPORT_SYMBOL_GPL(mei_deregister); + +static int __init mei_init(void) +{ + int ret; + + mei_class = class_create(THIS_MODULE, "mei"); + if (IS_ERR(mei_class)) { + pr_err("couldn't create class\n"); + ret = PTR_ERR(mei_class); + goto err; + } + + ret = alloc_chrdev_region(&mei_devt, 0, MEI_MAX_DEVS, "mei"); + if (ret < 0) { + pr_err("unable to allocate char dev region\n"); + goto err_class; + } + + ret = mei_cl_bus_init(); + if (ret < 0) { + pr_err("unable to initialize bus\n"); + goto err_chrdev; + } + + return 0; + +err_chrdev: + unregister_chrdev_region(mei_devt, MEI_MAX_DEVS); +err_class: + class_destroy(mei_class); +err: + return ret; +} + +static void __exit mei_exit(void) +{ + unregister_chrdev_region(mei_devt, MEI_MAX_DEVS); + class_destroy(mei_class); + mei_cl_bus_exit(); +} + +module_init(mei_init); +module_exit(mei_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) Management Engine Interface"); +MODULE_LICENSE("GPL v2"); + diff --git a/drivers/misc/mei/mei-trace.c b/drivers/misc/mei/mei-trace.c new file mode 100644 index 000000000..374edde72 --- /dev/null +++ b/drivers/misc/mei/mei-trace.c @@ -0,0 +1,26 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include + +/* sparse doesn't like tracepoint macros */ +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "mei-trace.h" + +EXPORT_TRACEPOINT_SYMBOL(mei_reg_read); +EXPORT_TRACEPOINT_SYMBOL(mei_reg_write); +EXPORT_TRACEPOINT_SYMBOL(mei_pci_cfg_read); +#endif /* __CHECKER__ */ diff --git a/drivers/misc/mei/mei-trace.h b/drivers/misc/mei/mei-trace.h new file mode 100644 index 000000000..b52e9b97a --- /dev/null +++ b/drivers/misc/mei/mei-trace.h @@ -0,0 +1,93 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#if !defined(_MEI_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _MEI_TRACE_H_ + +#include +#include +#include + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mei + +TRACE_EVENT(mei_reg_read, + TP_PROTO(const struct device *dev, const char *reg, u32 offs, u32 val), + TP_ARGS(dev, reg, offs, val), + TP_STRUCT__entry( + __string(dev, dev_name(dev)) + __field(const char *, reg) + __field(u32, offs) + __field(u32, val) + ), + TP_fast_assign( + __assign_str(dev, dev_name(dev)) + __entry->reg = reg; + __entry->offs = offs; + __entry->val = val; + ), + TP_printk("[%s] read %s:[%#x] = %#x", + __get_str(dev), __entry->reg, __entry->offs, __entry->val) +); + +TRACE_EVENT(mei_reg_write, + TP_PROTO(const struct device *dev, const char *reg, u32 offs, u32 val), + TP_ARGS(dev, reg, offs, val), + TP_STRUCT__entry( + __string(dev, dev_name(dev)) + __field(const char *, reg) + __field(u32, offs) + __field(u32, val) + ), + TP_fast_assign( + __assign_str(dev, dev_name(dev)) + __entry->reg = reg; + __entry->offs = offs; + __entry->val = val; + ), + TP_printk("[%s] write %s[%#x] = %#x", + __get_str(dev), __entry->reg, __entry->offs, __entry->val) +); + +TRACE_EVENT(mei_pci_cfg_read, + TP_PROTO(const struct device *dev, const char *reg, u32 offs, u32 val), + TP_ARGS(dev, reg, offs, val), + TP_STRUCT__entry( + __string(dev, dev_name(dev)) + __field(const char *, reg) + __field(u32, offs) + __field(u32, val) + ), + TP_fast_assign( + __assign_str(dev, dev_name(dev)) + __entry->reg = reg; + __entry->offs = offs; + __entry->val = val; + ), + TP_printk("[%s] pci cfg read %s:[%#x] = %#x", + __get_str(dev), __entry->reg, __entry->offs, __entry->val) +); + +#endif /* _MEI_TRACE_H_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE mei-trace +#include diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h new file mode 100644 index 000000000..fc7a5e3fb --- /dev/null +++ b/drivers/misc/mei/mei_dev.h @@ -0,0 +1,756 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2018, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _MEI_DEV_H_ +#define _MEI_DEV_H_ + +#include +#include +#include +#include +#include + +#include "hw.h" +#include "hbm.h" + +#define MEI_SLOT_SIZE sizeof(u32) +#define MEI_RD_MSG_BUF_SIZE (128 * MEI_SLOT_SIZE) + +/* + * Number of Maximum MEI Clients + */ +#define MEI_CLIENTS_MAX 256 + +/* + * maximum number of consecutive resets + */ +#define MEI_MAX_CONSEC_RESET 3 + +/* + * Number of File descriptors/handles + * that can be opened to the driver. + * + * Limit to 255: 256 Total Clients + * minus internal client for MEI Bus Messages + */ +#define MEI_MAX_OPEN_HANDLE_COUNT (MEI_CLIENTS_MAX - 1) + +/* File state */ +enum file_state { + MEI_FILE_UNINITIALIZED = 0, + MEI_FILE_INITIALIZING, + MEI_FILE_CONNECTING, + MEI_FILE_CONNECTED, + MEI_FILE_DISCONNECTING, + MEI_FILE_DISCONNECT_REPLY, + MEI_FILE_DISCONNECT_REQUIRED, + MEI_FILE_DISCONNECTED, +}; + +/* MEI device states */ +enum mei_dev_state { + MEI_DEV_INITIALIZING = 0, + MEI_DEV_INIT_CLIENTS, + MEI_DEV_ENABLED, + MEI_DEV_RESETTING, + MEI_DEV_DISABLED, + MEI_DEV_POWER_DOWN, + MEI_DEV_POWER_UP +}; + +const char *mei_dev_state_str(int state); + +enum mei_file_transaction_states { + MEI_IDLE, + MEI_WRITING, + MEI_WRITE_COMPLETE, +}; + +/** + * enum mei_cb_file_ops - file operation associated with the callback + * @MEI_FOP_READ: read + * @MEI_FOP_WRITE: write + * @MEI_FOP_CONNECT: connect + * @MEI_FOP_DISCONNECT: disconnect + * @MEI_FOP_DISCONNECT_RSP: disconnect response + * @MEI_FOP_NOTIFY_START: start notification + * @MEI_FOP_NOTIFY_STOP: stop notification + */ +enum mei_cb_file_ops { + MEI_FOP_READ = 0, + MEI_FOP_WRITE, + MEI_FOP_CONNECT, + MEI_FOP_DISCONNECT, + MEI_FOP_DISCONNECT_RSP, + MEI_FOP_NOTIFY_START, + MEI_FOP_NOTIFY_STOP, +}; + +/** + * enum mei_cl_io_mode - io mode between driver and fw + * + * @MEI_CL_IO_TX_BLOCKING: send is blocking + * @MEI_CL_IO_TX_INTERNAL: internal communication between driver and FW + * + * @MEI_CL_IO_RX_NONBLOCK: recv is non-blocking + */ +enum mei_cl_io_mode { + MEI_CL_IO_TX_BLOCKING = BIT(0), + MEI_CL_IO_TX_INTERNAL = BIT(1), + + MEI_CL_IO_RX_NONBLOCK = BIT(2), +}; + +/* + * Intel MEI message data struct + */ +struct mei_msg_data { + size_t size; + unsigned char *data; +}; + +/* Maximum number of processed FW status registers */ +#define MEI_FW_STATUS_MAX 6 +/* Minimal buffer for FW status string (8 bytes in dw + space or '\0') */ +#define MEI_FW_STATUS_STR_SZ (MEI_FW_STATUS_MAX * (8 + 1)) + + +/* + * struct mei_fw_status - storage of FW status data + * + * @count: number of actually available elements in array + * @status: FW status registers + */ +struct mei_fw_status { + int count; + u32 status[MEI_FW_STATUS_MAX]; +}; + +/** + * struct mei_me_client - representation of me (fw) client + * + * @list: link in me client list + * @refcnt: struct reference count + * @props: client properties + * @client_id: me client id + * @tx_flow_ctrl_creds: flow control credits + * @connect_count: number connections to this client + * @bus_added: added to bus + */ +struct mei_me_client { + struct list_head list; + struct kref refcnt; + struct mei_client_properties props; + u8 client_id; + u8 tx_flow_ctrl_creds; + u8 connect_count; + u8 bus_added; +}; + + +struct mei_cl; + +/** + * struct mei_cl_cb - file operation callback structure + * + * @list: link in callback queue + * @cl: file client who is running this operation + * @fop_type: file operation type + * @buf: buffer for data associated with the callback + * @buf_idx: last read index + * @fp: pointer to file structure + * @status: io status of the cb + * @internal: communication between driver and FW flag + * @blocking: transmission blocking mode + */ +struct mei_cl_cb { + struct list_head list; + struct mei_cl *cl; + enum mei_cb_file_ops fop_type; + struct mei_msg_data buf; + size_t buf_idx; + const struct file *fp; + int status; + u32 internal:1; + u32 blocking:1; +}; + +/** + * struct mei_cl - me client host representation + * carried in file->private_data + * + * @link: link in the clients list + * @dev: mei parent device + * @state: file operation state + * @tx_wait: wait queue for tx completion + * @rx_wait: wait queue for rx completion + * @wait: wait queue for management operation + * @ev_wait: notification wait queue + * @ev_async: event async notification + * @status: connection status + * @me_cl: fw client connected + * @fp: file associated with client + * @host_client_id: host id + * @tx_flow_ctrl_creds: transmit flow credentials + * @rx_flow_ctrl_creds: receive flow credentials + * @timer_count: watchdog timer for operation completion + * @notify_en: notification - enabled/disabled + * @notify_ev: pending notification event + * @tx_cb_queued: number of tx callbacks in queue + * @writing_state: state of the tx + * @rd_pending: pending read credits + * @rd_completed: completed read + * + * @cldev: device on the mei client bus + */ +struct mei_cl { + struct list_head link; + struct mei_device *dev; + enum file_state state; + wait_queue_head_t tx_wait; + wait_queue_head_t rx_wait; + wait_queue_head_t wait; + wait_queue_head_t ev_wait; + struct fasync_struct *ev_async; + int status; + struct mei_me_client *me_cl; + const struct file *fp; + u8 host_client_id; + u8 tx_flow_ctrl_creds; + u8 rx_flow_ctrl_creds; + u8 timer_count; + u8 notify_en; + u8 notify_ev; + u8 tx_cb_queued; + enum mei_file_transaction_states writing_state; + struct list_head rd_pending; + struct list_head rd_completed; + + struct mei_cl_device *cldev; +}; + +#define MEI_TX_QUEUE_LIMIT_DEFAULT 50 +#define MEI_TX_QUEUE_LIMIT_MAX 255 +#define MEI_TX_QUEUE_LIMIT_MIN 30 + +/** + * struct mei_hw_ops - hw specific ops + * + * @host_is_ready : query for host readiness + * + * @hw_is_ready : query if hw is ready + * @hw_reset : reset hw + * @hw_start : start hw after reset + * @hw_config : configure hw + * + * @fw_status : get fw status registers + * @pg_state : power gating state of the device + * @pg_in_transition : is device now in pg transition + * @pg_is_enabled : is power gating enabled + * + * @intr_clear : clear pending interrupts + * @intr_enable : enable interrupts + * @intr_disable : disable interrupts + * @synchronize_irq : synchronize irqs + * + * @hbuf_free_slots : query for write buffer empty slots + * @hbuf_is_ready : query if write buffer is empty + * @hbuf_depth : query for write buffer depth + * + * @write : write a message to FW + * + * @rdbuf_full_slots : query how many slots are filled + * + * @read_hdr : get first 4 bytes (header) + * @read : read a buffer from the FW + */ +struct mei_hw_ops { + + bool (*host_is_ready)(struct mei_device *dev); + + bool (*hw_is_ready)(struct mei_device *dev); + int (*hw_reset)(struct mei_device *dev, bool enable); + int (*hw_start)(struct mei_device *dev); + void (*hw_config)(struct mei_device *dev); + + int (*fw_status)(struct mei_device *dev, struct mei_fw_status *fw_sts); + enum mei_pg_state (*pg_state)(struct mei_device *dev); + bool (*pg_in_transition)(struct mei_device *dev); + bool (*pg_is_enabled)(struct mei_device *dev); + + void (*intr_clear)(struct mei_device *dev); + void (*intr_enable)(struct mei_device *dev); + void (*intr_disable)(struct mei_device *dev); + void (*synchronize_irq)(struct mei_device *dev); + + int (*hbuf_free_slots)(struct mei_device *dev); + bool (*hbuf_is_ready)(struct mei_device *dev); + u32 (*hbuf_depth)(const struct mei_device *dev); + int (*write)(struct mei_device *dev, + const void *hdr, size_t hdr_len, + const void *data, size_t data_len); + + int (*rdbuf_full_slots)(struct mei_device *dev); + + u32 (*read_hdr)(const struct mei_device *dev); + int (*read)(struct mei_device *dev, + unsigned char *buf, unsigned long len); +}; + +/* MEI bus API*/ +void mei_cl_bus_rescan_work(struct work_struct *work); +void mei_cl_bus_dev_fixup(struct mei_cl_device *dev); +ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, + unsigned int mode); +ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length, + unsigned int mode, unsigned long timeout); +bool mei_cl_bus_rx_event(struct mei_cl *cl); +bool mei_cl_bus_notify_event(struct mei_cl *cl); +void mei_cl_bus_remove_devices(struct mei_device *bus); +int mei_cl_bus_init(void); +void mei_cl_bus_exit(void); + +/** + * enum mei_pg_event - power gating transition events + * + * @MEI_PG_EVENT_IDLE: the driver is not in power gating transition + * @MEI_PG_EVENT_WAIT: the driver is waiting for a pg event to complete + * @MEI_PG_EVENT_RECEIVED: the driver received pg event + * @MEI_PG_EVENT_INTR_WAIT: the driver is waiting for a pg event interrupt + * @MEI_PG_EVENT_INTR_RECEIVED: the driver received pg event interrupt + */ +enum mei_pg_event { + MEI_PG_EVENT_IDLE, + MEI_PG_EVENT_WAIT, + MEI_PG_EVENT_RECEIVED, + MEI_PG_EVENT_INTR_WAIT, + MEI_PG_EVENT_INTR_RECEIVED, +}; + +/** + * enum mei_pg_state - device internal power gating state + * + * @MEI_PG_OFF: device is not power gated - it is active + * @MEI_PG_ON: device is power gated - it is in lower power state + */ +enum mei_pg_state { + MEI_PG_OFF = 0, + MEI_PG_ON = 1, +}; + +const char *mei_pg_state_str(enum mei_pg_state state); + +/** + * struct mei_fw_version - MEI FW version struct + * + * @platform: platform identifier + * @major: major version field + * @minor: minor version field + * @buildno: build number version field + * @hotfix: hotfix number version field + */ +struct mei_fw_version { + u8 platform; + u8 major; + u16 minor; + u16 buildno; + u16 hotfix; +}; + +#define MEI_MAX_FW_VER_BLOCKS 3 + +/** + * struct mei_device - MEI private device struct + * + * @dev : device on a bus + * @cdev : character device + * @minor : minor number allocated for device + * + * @write_list : write pending list + * @write_waiting_list : write completion list + * @ctrl_wr_list : pending control write list + * @ctrl_rd_list : pending control read list + * @tx_queue_limit: tx queues per client linit + * + * @file_list : list of opened handles + * @open_handle_count: number of opened handles + * + * @device_lock : big device lock + * @timer_work : MEI timer delayed work (timeouts) + * + * @recvd_hw_ready : hw ready message received flag + * + * @wait_hw_ready : wait queue for receive HW ready message form FW + * @wait_pg : wait queue for receive PG message from FW + * @wait_hbm_start : wait queue for receive HBM start message from FW + * + * @reset_count : number of consecutive resets + * @dev_state : device state + * @hbm_state : state of host bus message protocol + * @init_clients_timer : HBM init handshake timeout + * + * @pg_event : power gating event + * @pg_domain : runtime PM domain + * + * @rd_msg_buf : control messages buffer + * @rd_msg_hdr : read message header storage + * + * @hbuf_is_ready : query if the host host/write buffer is ready + * + * @version : HBM protocol version in use + * @hbm_f_pg_supported : hbm feature pgi protocol + * @hbm_f_dc_supported : hbm feature dynamic clients + * @hbm_f_dot_supported : hbm feature disconnect on timeout + * @hbm_f_ev_supported : hbm feature event notification + * @hbm_f_fa_supported : hbm feature fixed address client + * @hbm_f_ie_supported : hbm feature immediate reply to enum request + * @hbm_f_os_supported : hbm feature support OS ver message + * @hbm_f_dr_supported : hbm feature dma ring supported + * + * @fw_ver : FW versions + * + * @fw_f_fw_ver_supported : fw feature: fw version supported + * + * @me_clients_rwsem: rw lock over me_clients list + * @me_clients : list of FW clients + * @me_clients_map : FW clients bit map + * @host_clients_map : host clients id pool + * + * @allow_fixed_address: allow user space to connect a fixed client + * @override_fixed_address: force allow fixed address behavior + * + * @reset_work : work item for the device reset + * @bus_rescan_work : work item for the bus rescan + * + * @device_list : mei client bus list + * @cl_bus_lock : client bus list lock + * + * @dbgfs_dir : debugfs mei root directory + * + * @ops: : hw specific operations + * @hw : hw specific data + */ +struct mei_device { + struct device *dev; + struct cdev cdev; + int minor; + + struct list_head write_list; + struct list_head write_waiting_list; + struct list_head ctrl_wr_list; + struct list_head ctrl_rd_list; + u8 tx_queue_limit; + + struct list_head file_list; + long open_handle_count; + + struct mutex device_lock; + struct delayed_work timer_work; + + bool recvd_hw_ready; + /* + * waiting queue for receive message from FW + */ + wait_queue_head_t wait_hw_ready; + wait_queue_head_t wait_pg; + wait_queue_head_t wait_hbm_start; + + /* + * mei device states + */ + unsigned long reset_count; + enum mei_dev_state dev_state; + enum mei_hbm_state hbm_state; + u16 init_clients_timer; + + /* + * Power Gating support + */ + enum mei_pg_event pg_event; +#ifdef CONFIG_PM + struct dev_pm_domain pg_domain; +#endif /* CONFIG_PM */ + + unsigned char rd_msg_buf[MEI_RD_MSG_BUF_SIZE]; + u32 rd_msg_hdr; + + /* write buffer */ + bool hbuf_is_ready; + + struct hbm_version version; + unsigned int hbm_f_pg_supported:1; + unsigned int hbm_f_dc_supported:1; + unsigned int hbm_f_dot_supported:1; + unsigned int hbm_f_ev_supported:1; + unsigned int hbm_f_fa_supported:1; + unsigned int hbm_f_ie_supported:1; + unsigned int hbm_f_os_supported:1; + unsigned int hbm_f_dr_supported:1; + + struct mei_fw_version fw_ver[MEI_MAX_FW_VER_BLOCKS]; + + unsigned int fw_f_fw_ver_supported:1; + + struct rw_semaphore me_clients_rwsem; + struct list_head me_clients; + DECLARE_BITMAP(me_clients_map, MEI_CLIENTS_MAX); + DECLARE_BITMAP(host_clients_map, MEI_CLIENTS_MAX); + + bool allow_fixed_address; + bool override_fixed_address; + + struct work_struct reset_work; + struct work_struct bus_rescan_work; + + /* List of bus devices */ + struct list_head device_list; + struct mutex cl_bus_lock; + +#if IS_ENABLED(CONFIG_DEBUG_FS) + struct dentry *dbgfs_dir; +#endif /* CONFIG_DEBUG_FS */ + + + const struct mei_hw_ops *ops; + char hw[0] __aligned(sizeof(void *)); +}; + +static inline unsigned long mei_secs_to_jiffies(unsigned long sec) +{ + return msecs_to_jiffies(sec * MSEC_PER_SEC); +} + +/** + * mei_data2slots - get slots number from a message length + * + * @length: size of the messages in bytes + * + * Return: number of slots + */ +static inline u32 mei_data2slots(size_t length) +{ + return DIV_ROUND_UP(length, MEI_SLOT_SIZE); +} + +/** + * mei_hbm2slots - get slots number from a hbm message length + * length + size of the mei message header + * + * @length: size of the messages in bytes + * + * Return: number of slots + */ +static inline u32 mei_hbm2slots(size_t length) +{ + return DIV_ROUND_UP(sizeof(struct mei_msg_hdr) + length, MEI_SLOT_SIZE); +} + +/** + * mei_slots2data - get data in slots - bytes from slots + * + * @slots: number of available slots + * + * Return: number of bytes in slots + */ +static inline u32 mei_slots2data(int slots) +{ + return slots * MEI_SLOT_SIZE; +} + +/* + * mei init function prototypes + */ +void mei_device_init(struct mei_device *dev, + struct device *device, + const struct mei_hw_ops *hw_ops); +int mei_reset(struct mei_device *dev); +int mei_start(struct mei_device *dev); +int mei_restart(struct mei_device *dev); +void mei_stop(struct mei_device *dev); +void mei_cancel_work(struct mei_device *dev); + +/* + * MEI interrupt functions prototype + */ + +void mei_timer(struct work_struct *work); +void mei_schedule_stall_timer(struct mei_device *dev); +int mei_irq_read_handler(struct mei_device *dev, + struct list_head *cmpl_list, s32 *slots); + +int mei_irq_write_handler(struct mei_device *dev, struct list_head *cmpl_list); +void mei_irq_compl_handler(struct mei_device *dev, struct list_head *cmpl_list); + +/* + * Register Access Function + */ + + +static inline void mei_hw_config(struct mei_device *dev) +{ + dev->ops->hw_config(dev); +} + +static inline enum mei_pg_state mei_pg_state(struct mei_device *dev) +{ + return dev->ops->pg_state(dev); +} + +static inline bool mei_pg_in_transition(struct mei_device *dev) +{ + return dev->ops->pg_in_transition(dev); +} + +static inline bool mei_pg_is_enabled(struct mei_device *dev) +{ + return dev->ops->pg_is_enabled(dev); +} + +static inline int mei_hw_reset(struct mei_device *dev, bool enable) +{ + return dev->ops->hw_reset(dev, enable); +} + +static inline int mei_hw_start(struct mei_device *dev) +{ + return dev->ops->hw_start(dev); +} + +static inline void mei_clear_interrupts(struct mei_device *dev) +{ + dev->ops->intr_clear(dev); +} + +static inline void mei_enable_interrupts(struct mei_device *dev) +{ + dev->ops->intr_enable(dev); +} + +static inline void mei_disable_interrupts(struct mei_device *dev) +{ + dev->ops->intr_disable(dev); +} + +static inline void mei_synchronize_irq(struct mei_device *dev) +{ + dev->ops->synchronize_irq(dev); +} + +static inline bool mei_host_is_ready(struct mei_device *dev) +{ + return dev->ops->host_is_ready(dev); +} +static inline bool mei_hw_is_ready(struct mei_device *dev) +{ + return dev->ops->hw_is_ready(dev); +} + +static inline bool mei_hbuf_is_ready(struct mei_device *dev) +{ + return dev->ops->hbuf_is_ready(dev); +} + +static inline int mei_hbuf_empty_slots(struct mei_device *dev) +{ + return dev->ops->hbuf_free_slots(dev); +} + +static inline u32 mei_hbuf_depth(const struct mei_device *dev) +{ + return dev->ops->hbuf_depth(dev); +} + +static inline int mei_write_message(struct mei_device *dev, + const void *hdr, size_t hdr_len, + const void *data, size_t data_len) +{ + return dev->ops->write(dev, hdr, hdr_len, data, data_len); +} + +static inline u32 mei_read_hdr(const struct mei_device *dev) +{ + return dev->ops->read_hdr(dev); +} + +static inline void mei_read_slots(struct mei_device *dev, + unsigned char *buf, unsigned long len) +{ + dev->ops->read(dev, buf, len); +} + +static inline int mei_count_full_read_slots(struct mei_device *dev) +{ + return dev->ops->rdbuf_full_slots(dev); +} + +static inline int mei_fw_status(struct mei_device *dev, + struct mei_fw_status *fw_status) +{ + return dev->ops->fw_status(dev, fw_status); +} + +bool mei_hbuf_acquire(struct mei_device *dev); + +bool mei_write_is_idle(struct mei_device *dev); + +#if IS_ENABLED(CONFIG_DEBUG_FS) +int mei_dbgfs_register(struct mei_device *dev, const char *name); +void mei_dbgfs_deregister(struct mei_device *dev); +#else +static inline int mei_dbgfs_register(struct mei_device *dev, const char *name) +{ + return 0; +} +static inline void mei_dbgfs_deregister(struct mei_device *dev) {} +#endif /* CONFIG_DEBUG_FS */ + +int mei_register(struct mei_device *dev, struct device *parent); +void mei_deregister(struct mei_device *dev); + +#define MEI_HDR_FMT "hdr:host=%02d me=%02d len=%d dma=%1d internal=%1d comp=%1d" +#define MEI_HDR_PRM(hdr) \ + (hdr)->host_addr, (hdr)->me_addr, \ + (hdr)->length, (hdr)->dma_ring, (hdr)->internal, (hdr)->msg_complete + +ssize_t mei_fw_status2str(struct mei_fw_status *fw_sts, char *buf, size_t len); +/** + * mei_fw_status_str - fetch and convert fw status registers to printable string + * + * @dev: the device structure + * @buf: string buffer at minimal size MEI_FW_STATUS_STR_SZ + * @len: buffer len must be >= MEI_FW_STATUS_STR_SZ + * + * Return: number of bytes written or < 0 on failure + */ +static inline ssize_t mei_fw_status_str(struct mei_device *dev, + char *buf, size_t len) +{ + struct mei_fw_status fw_status; + int ret; + + buf[0] = '\0'; + + ret = mei_fw_status(dev, &fw_status); + if (ret) + return ret; + + ret = mei_fw_status2str(&fw_status, buf, MEI_FW_STATUS_STR_SZ); + + return ret; +} + + +#endif diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c new file mode 100644 index 000000000..35086f67d --- /dev/null +++ b/drivers/misc/mei/pci-me.c @@ -0,0 +1,534 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2003-2012, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include "mei_dev.h" +#include "client.h" +#include "hw-me-regs.h" +#include "hw-me.h" + +/* mei_pci_tbl - PCI Device ID Table */ +static const struct pci_device_id mei_me_pci_tbl[] = { + {MEI_PCI_DEVICE(MEI_DEV_ID_82946GZ, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_82G35, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_82Q965, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_82G965, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_82GM965, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_82GME965, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82Q35, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82G33, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82Q33, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_82X38, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_3200, MEI_ME_ICH_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_6, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_7, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_8, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_9, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9_10, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_1, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_2, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_3, MEI_ME_ICH_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH9M_4, MEI_ME_ICH_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_1, MEI_ME_ICH10_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_2, MEI_ME_ICH10_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_3, MEI_ME_ICH10_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_4, MEI_ME_ICH10_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_1, MEI_ME_PCH6_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_2, MEI_ME_PCH6_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CPT_1, MEI_ME_PCH_CPT_PBG_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PBG_1, MEI_ME_PCH_CPT_PBG_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_1, MEI_ME_PCH7_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_2, MEI_ME_PCH7_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_3, MEI_ME_PCH7_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_H, MEI_ME_PCH8_SPS_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_W, MEI_ME_PCH8_SPS_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_LP, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LPT_HR, MEI_ME_PCH8_SPS_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_WPT_LP_2, MEI_ME_PCH8_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_SPT, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, MEI_ME_PCH8_SPS_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, MEI_ME_PCH8_SPS_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_DNV_IE, MEI_ME_PCH8_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_GLK, MEI_ME_PCH8_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_KBP, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, MEI_ME_PCH8_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_LP_4, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_4, MEI_ME_PCH8_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_V, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H_3, MEI_ME_PCH8_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_N, MEI_ME_PCH12_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH12_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_MCC_4, MEI_ME_PCH8_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_CDF, MEI_ME_PCH8_CFG)}, + + /* required last entry */ + {0, } +}; + +MODULE_DEVICE_TABLE(pci, mei_me_pci_tbl); + +#ifdef CONFIG_PM +static inline void mei_me_set_pm_domain(struct mei_device *dev); +static inline void mei_me_unset_pm_domain(struct mei_device *dev); +#else +static inline void mei_me_set_pm_domain(struct mei_device *dev) {} +static inline void mei_me_unset_pm_domain(struct mei_device *dev) {} +#endif /* CONFIG_PM */ + +/** + * mei_me_quirk_probe - probe for devices that doesn't valid ME interface + * + * @pdev: PCI device structure + * @cfg: per generation config + * + * Return: true if ME Interface is valid, false otherwise + */ +static bool mei_me_quirk_probe(struct pci_dev *pdev, + const struct mei_cfg *cfg) +{ + if (cfg->quirk_probe && cfg->quirk_probe(pdev)) { + dev_info(&pdev->dev, "Device doesn't have valid ME Interface\n"); + return false; + } + + return true; +} + +/** + * mei_me_probe - Device Initialization Routine + * + * @pdev: PCI device structure + * @ent: entry in kcs_pci_tbl + * + * Return: 0 on success, <0 on failure. + */ +static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + const struct mei_cfg *cfg; + struct mei_device *dev; + struct mei_me_hw *hw; + unsigned int irqflags; + int err; + + cfg = mei_me_get_cfg(ent->driver_data); + if (!cfg) + return -ENODEV; + + if (!mei_me_quirk_probe(pdev, cfg)) + return -ENODEV; + + /* enable pci dev */ + err = pcim_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "failed to enable pci device.\n"); + goto end; + } + /* set PCI host mastering */ + pci_set_master(pdev); + /* pci request regions and mapping IO device memory for mei driver */ + err = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME); + if (err) { + dev_err(&pdev->dev, "failed to get pci regions.\n"); + goto end; + } + + if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) || + dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) { + + err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (err) + err = dma_set_coherent_mask(&pdev->dev, + DMA_BIT_MASK(32)); + } + if (err) { + dev_err(&pdev->dev, "No usable DMA configuration, aborting\n"); + goto end; + } + + /* allocates and initializes the mei dev structure */ + dev = mei_me_dev_init(pdev, cfg); + if (!dev) { + err = -ENOMEM; + goto end; + } + hw = to_me_hw(dev); + hw->mem_addr = pcim_iomap_table(pdev)[0]; + + pci_enable_msi(pdev); + + /* request and enable interrupt */ + irqflags = pci_dev_msi_enabled(pdev) ? IRQF_ONESHOT : IRQF_SHARED; + + err = request_threaded_irq(pdev->irq, + mei_me_irq_quick_handler, + mei_me_irq_thread_handler, + irqflags, KBUILD_MODNAME, dev); + if (err) { + dev_err(&pdev->dev, "request_threaded_irq failure. irq = %d\n", + pdev->irq); + goto end; + } + + if (mei_start(dev)) { + dev_err(&pdev->dev, "init hw failure.\n"); + err = -ENODEV; + goto release_irq; + } + + pm_runtime_set_autosuspend_delay(&pdev->dev, MEI_ME_RPM_TIMEOUT); + pm_runtime_use_autosuspend(&pdev->dev); + + err = mei_register(dev, &pdev->dev); + if (err) + goto stop; + + pci_set_drvdata(pdev, dev); + + /* + * MEI requires to resume from runtime suspend mode + * in order to perform link reset flow upon system suspend. + */ + dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP); + + /* + * ME maps runtime suspend/resume to D0i states, + * hence we need to go around native PCI runtime service which + * eventually brings the device into D3cold/hot state, + * but the mei device cannot wake up from D3 unlike from D0i3. + * To get around the PCI device native runtime pm, + * ME uses runtime pm domain handlers which take precedence + * over the driver's pm handlers. + */ + mei_me_set_pm_domain(dev); + + if (mei_pg_is_enabled(dev)) { + pm_runtime_put_noidle(&pdev->dev); + if (hw->d0i3_supported) + pm_runtime_allow(&pdev->dev); + } + + dev_dbg(&pdev->dev, "initialization successful.\n"); + + return 0; + +stop: + mei_stop(dev); +release_irq: + mei_cancel_work(dev); + mei_disable_interrupts(dev); + free_irq(pdev->irq, dev); +end: + dev_err(&pdev->dev, "initialization failed.\n"); + return err; +} + +/** + * mei_me_shutdown - Device Removal Routine + * + * @pdev: PCI device structure + * + * mei_me_shutdown is called from the reboot notifier + * it's a simplified version of remove so we go down + * faster. + */ +static void mei_me_shutdown(struct pci_dev *pdev) +{ + struct mei_device *dev; + + dev = pci_get_drvdata(pdev); + if (!dev) + return; + + dev_dbg(&pdev->dev, "shutdown\n"); + mei_stop(dev); + + mei_me_unset_pm_domain(dev); + + mei_disable_interrupts(dev); + free_irq(pdev->irq, dev); +} + +/** + * mei_me_remove - Device Removal Routine + * + * @pdev: PCI device structure + * + * mei_me_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. + */ +static void mei_me_remove(struct pci_dev *pdev) +{ + struct mei_device *dev; + + dev = pci_get_drvdata(pdev); + if (!dev) + return; + + if (mei_pg_is_enabled(dev)) + pm_runtime_get_noresume(&pdev->dev); + + dev_dbg(&pdev->dev, "stop\n"); + mei_stop(dev); + + mei_me_unset_pm_domain(dev); + + mei_disable_interrupts(dev); + + free_irq(pdev->irq, dev); + + mei_deregister(dev); +} + +#ifdef CONFIG_PM_SLEEP +static int mei_me_pci_suspend(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev = pci_get_drvdata(pdev); + + if (!dev) + return -ENODEV; + + dev_dbg(&pdev->dev, "suspend\n"); + + mei_stop(dev); + + mei_disable_interrupts(dev); + + free_irq(pdev->irq, dev); + pci_disable_msi(pdev); + + return 0; +} + +static int mei_me_pci_resume(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev; + unsigned int irqflags; + int err; + + dev = pci_get_drvdata(pdev); + if (!dev) + return -ENODEV; + + pci_enable_msi(pdev); + + irqflags = pci_dev_msi_enabled(pdev) ? IRQF_ONESHOT : IRQF_SHARED; + + /* request and enable interrupt */ + err = request_threaded_irq(pdev->irq, + mei_me_irq_quick_handler, + mei_me_irq_thread_handler, + irqflags, KBUILD_MODNAME, dev); + + if (err) { + dev_err(&pdev->dev, "request_threaded_irq failed: irq = %d.\n", + pdev->irq); + return err; + } + + err = mei_restart(dev); + if (err) + return err; + + /* Start timer if stopped in suspend */ + schedule_delayed_work(&dev->timer_work, HZ); + + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + +#ifdef CONFIG_PM +static int mei_me_pm_runtime_idle(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev; + + dev_dbg(&pdev->dev, "rpm: me: runtime_idle\n"); + + dev = pci_get_drvdata(pdev); + if (!dev) + return -ENODEV; + if (mei_write_is_idle(dev)) + pm_runtime_autosuspend(device); + + return -EBUSY; +} + +static int mei_me_pm_runtime_suspend(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev; + int ret; + + dev_dbg(&pdev->dev, "rpm: me: runtime suspend\n"); + + dev = pci_get_drvdata(pdev); + if (!dev) + return -ENODEV; + + mutex_lock(&dev->device_lock); + + if (mei_write_is_idle(dev)) + ret = mei_me_pg_enter_sync(dev); + else + ret = -EAGAIN; + + mutex_unlock(&dev->device_lock); + + dev_dbg(&pdev->dev, "rpm: me: runtime suspend ret=%d\n", ret); + + if (ret && ret != -EAGAIN) + schedule_work(&dev->reset_work); + + return ret; +} + +static int mei_me_pm_runtime_resume(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev; + int ret; + + dev_dbg(&pdev->dev, "rpm: me: runtime resume\n"); + + dev = pci_get_drvdata(pdev); + if (!dev) + return -ENODEV; + + mutex_lock(&dev->device_lock); + + ret = mei_me_pg_exit_sync(dev); + + mutex_unlock(&dev->device_lock); + + dev_dbg(&pdev->dev, "rpm: me: runtime resume ret = %d\n", ret); + + if (ret) + schedule_work(&dev->reset_work); + + return ret; +} + +/** + * mei_me_set_pm_domain - fill and set pm domain structure for device + * + * @dev: mei_device + */ +static inline void mei_me_set_pm_domain(struct mei_device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + + if (pdev->dev.bus && pdev->dev.bus->pm) { + dev->pg_domain.ops = *pdev->dev.bus->pm; + + dev->pg_domain.ops.runtime_suspend = mei_me_pm_runtime_suspend; + dev->pg_domain.ops.runtime_resume = mei_me_pm_runtime_resume; + dev->pg_domain.ops.runtime_idle = mei_me_pm_runtime_idle; + + dev_pm_domain_set(&pdev->dev, &dev->pg_domain); + } +} + +/** + * mei_me_unset_pm_domain - clean pm domain structure for device + * + * @dev: mei_device + */ +static inline void mei_me_unset_pm_domain(struct mei_device *dev) +{ + /* stop using pm callbacks if any */ + dev_pm_domain_set(dev->dev, NULL); +} + +static const struct dev_pm_ops mei_me_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(mei_me_pci_suspend, + mei_me_pci_resume) + SET_RUNTIME_PM_OPS( + mei_me_pm_runtime_suspend, + mei_me_pm_runtime_resume, + mei_me_pm_runtime_idle) +}; + +#define MEI_ME_PM_OPS (&mei_me_pm_ops) +#else +#define MEI_ME_PM_OPS NULL +#endif /* CONFIG_PM */ +/* + * PCI driver structure + */ +static struct pci_driver mei_me_driver = { + .name = KBUILD_MODNAME, + .id_table = mei_me_pci_tbl, + .probe = mei_me_probe, + .remove = mei_me_remove, + .shutdown = mei_me_shutdown, + .driver.pm = MEI_ME_PM_OPS, + .driver.probe_type = PROBE_PREFER_ASYNCHRONOUS, +}; + +module_pci_driver(mei_me_driver); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) Management Engine Interface"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c new file mode 100644 index 000000000..e1b909123 --- /dev/null +++ b/drivers/misc/mei/pci-txe.c @@ -0,0 +1,422 @@ +/* + * + * Intel Management Engine Interface (Intel MEI) Linux driver + * Copyright (c) 2013-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +#include "mei_dev.h" +#include "hw-txe.h" + +static const struct pci_device_id mei_txe_pci_tbl[] = { + {PCI_VDEVICE(INTEL, 0x0F18)}, /* Baytrail */ + {PCI_VDEVICE(INTEL, 0x2298)}, /* Cherrytrail */ + + {0, } +}; +MODULE_DEVICE_TABLE(pci, mei_txe_pci_tbl); + +#ifdef CONFIG_PM +static inline void mei_txe_set_pm_domain(struct mei_device *dev); +static inline void mei_txe_unset_pm_domain(struct mei_device *dev); +#else +static inline void mei_txe_set_pm_domain(struct mei_device *dev) {} +static inline void mei_txe_unset_pm_domain(struct mei_device *dev) {} +#endif /* CONFIG_PM */ + +/** + * mei_txe_probe - Device Initialization Routine + * + * @pdev: PCI device structure + * @ent: entry in mei_txe_pci_tbl + * + * Return: 0 on success, <0 on failure. + */ +static int mei_txe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct mei_device *dev; + struct mei_txe_hw *hw; + const int mask = BIT(SEC_BAR) | BIT(BRIDGE_BAR); + int err; + + /* enable pci dev */ + err = pcim_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "failed to enable pci device.\n"); + goto end; + } + /* set PCI host mastering */ + pci_set_master(pdev); + /* pci request regions and mapping IO device memory for mei driver */ + err = pcim_iomap_regions(pdev, mask, KBUILD_MODNAME); + if (err) { + dev_err(&pdev->dev, "failed to get pci regions.\n"); + goto end; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(36)); + if (err) { + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&pdev->dev, "No suitable DMA available.\n"); + goto end; + } + } + + /* allocates and initializes the mei dev structure */ + dev = mei_txe_dev_init(pdev); + if (!dev) { + err = -ENOMEM; + goto end; + } + hw = to_txe_hw(dev); + hw->mem_addr = pcim_iomap_table(pdev); + + pci_enable_msi(pdev); + + /* clear spurious interrupts */ + mei_clear_interrupts(dev); + + /* request and enable interrupt */ + if (pci_dev_msi_enabled(pdev)) + err = request_threaded_irq(pdev->irq, + NULL, + mei_txe_irq_thread_handler, + IRQF_ONESHOT, KBUILD_MODNAME, dev); + else + err = request_threaded_irq(pdev->irq, + mei_txe_irq_quick_handler, + mei_txe_irq_thread_handler, + IRQF_SHARED, KBUILD_MODNAME, dev); + if (err) { + dev_err(&pdev->dev, "mei: request_threaded_irq failure. irq = %d\n", + pdev->irq); + goto end; + } + + if (mei_start(dev)) { + dev_err(&pdev->dev, "init hw failure.\n"); + err = -ENODEV; + goto release_irq; + } + + pm_runtime_set_autosuspend_delay(&pdev->dev, MEI_TXI_RPM_TIMEOUT); + pm_runtime_use_autosuspend(&pdev->dev); + + err = mei_register(dev, &pdev->dev); + if (err) + goto stop; + + pci_set_drvdata(pdev, dev); + + /* + * MEI requires to resume from runtime suspend mode + * in order to perform link reset flow upon system suspend. + */ + dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP); + + /* + * TXE maps runtime suspend/resume to own power gating states, + * hence we need to go around native PCI runtime service which + * eventually brings the device into D3cold/hot state. + * But the TXE device cannot wake up from D3 unlike from own + * power gating. To get around PCI device native runtime pm, + * TXE uses runtime pm domain handlers which take precedence. + */ + mei_txe_set_pm_domain(dev); + + pm_runtime_put_noidle(&pdev->dev); + + return 0; + +stop: + mei_stop(dev); +release_irq: + mei_cancel_work(dev); + mei_disable_interrupts(dev); + free_irq(pdev->irq, dev); +end: + dev_err(&pdev->dev, "initialization failed.\n"); + return err; +} + +/** + * mei_txe_remove - Device Shutdown Routine + * + * @pdev: PCI device structure + * + * mei_txe_shutdown is called from the reboot notifier + * it's a simplified version of remove so we go down + * faster. + */ +static void mei_txe_shutdown(struct pci_dev *pdev) +{ + struct mei_device *dev; + + dev = pci_get_drvdata(pdev); + if (!dev) + return; + + dev_dbg(&pdev->dev, "shutdown\n"); + mei_stop(dev); + + mei_txe_unset_pm_domain(dev); + + mei_disable_interrupts(dev); + free_irq(pdev->irq, dev); +} + +/** + * mei_txe_remove - Device Removal Routine + * + * @pdev: PCI device structure + * + * mei_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. + */ +static void mei_txe_remove(struct pci_dev *pdev) +{ + struct mei_device *dev; + + dev = pci_get_drvdata(pdev); + if (!dev) { + dev_err(&pdev->dev, "mei: dev == NULL\n"); + return; + } + + pm_runtime_get_noresume(&pdev->dev); + + mei_stop(dev); + + mei_txe_unset_pm_domain(dev); + + mei_disable_interrupts(dev); + free_irq(pdev->irq, dev); + + mei_deregister(dev); +} + + +#ifdef CONFIG_PM_SLEEP +static int mei_txe_pci_suspend(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev = pci_get_drvdata(pdev); + + if (!dev) + return -ENODEV; + + dev_dbg(&pdev->dev, "suspend\n"); + + mei_stop(dev); + + mei_disable_interrupts(dev); + + free_irq(pdev->irq, dev); + pci_disable_msi(pdev); + + return 0; +} + +static int mei_txe_pci_resume(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev; + int err; + + dev = pci_get_drvdata(pdev); + if (!dev) + return -ENODEV; + + pci_enable_msi(pdev); + + mei_clear_interrupts(dev); + + /* request and enable interrupt */ + if (pci_dev_msi_enabled(pdev)) + err = request_threaded_irq(pdev->irq, + NULL, + mei_txe_irq_thread_handler, + IRQF_ONESHOT, KBUILD_MODNAME, dev); + else + err = request_threaded_irq(pdev->irq, + mei_txe_irq_quick_handler, + mei_txe_irq_thread_handler, + IRQF_SHARED, KBUILD_MODNAME, dev); + if (err) { + dev_err(&pdev->dev, "request_threaded_irq failed: irq = %d.\n", + pdev->irq); + return err; + } + + err = mei_restart(dev); + + return err; +} +#endif /* CONFIG_PM_SLEEP */ + +#ifdef CONFIG_PM +static int mei_txe_pm_runtime_idle(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev; + + dev_dbg(&pdev->dev, "rpm: txe: runtime_idle\n"); + + dev = pci_get_drvdata(pdev); + if (!dev) + return -ENODEV; + if (mei_write_is_idle(dev)) + pm_runtime_autosuspend(device); + + return -EBUSY; +} +static int mei_txe_pm_runtime_suspend(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev; + int ret; + + dev_dbg(&pdev->dev, "rpm: txe: runtime suspend\n"); + + dev = pci_get_drvdata(pdev); + if (!dev) + return -ENODEV; + + mutex_lock(&dev->device_lock); + + if (mei_write_is_idle(dev)) + ret = mei_txe_aliveness_set_sync(dev, 0); + else + ret = -EAGAIN; + + /* keep irq on we are staying in D0 */ + + dev_dbg(&pdev->dev, "rpm: txe: runtime suspend ret=%d\n", ret); + + mutex_unlock(&dev->device_lock); + + if (ret && ret != -EAGAIN) + schedule_work(&dev->reset_work); + + return ret; +} + +static int mei_txe_pm_runtime_resume(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct mei_device *dev; + int ret; + + dev_dbg(&pdev->dev, "rpm: txe: runtime resume\n"); + + dev = pci_get_drvdata(pdev); + if (!dev) + return -ENODEV; + + mutex_lock(&dev->device_lock); + + mei_enable_interrupts(dev); + + ret = mei_txe_aliveness_set_sync(dev, 1); + + mutex_unlock(&dev->device_lock); + + dev_dbg(&pdev->dev, "rpm: txe: runtime resume ret = %d\n", ret); + + if (ret) + schedule_work(&dev->reset_work); + + return ret; +} + +/** + * mei_txe_set_pm_domain - fill and set pm domain structure for device + * + * @dev: mei_device + */ +static inline void mei_txe_set_pm_domain(struct mei_device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev->dev); + + if (pdev->dev.bus && pdev->dev.bus->pm) { + dev->pg_domain.ops = *pdev->dev.bus->pm; + + dev->pg_domain.ops.runtime_suspend = mei_txe_pm_runtime_suspend; + dev->pg_domain.ops.runtime_resume = mei_txe_pm_runtime_resume; + dev->pg_domain.ops.runtime_idle = mei_txe_pm_runtime_idle; + + dev_pm_domain_set(&pdev->dev, &dev->pg_domain); + } +} + +/** + * mei_txe_unset_pm_domain - clean pm domain structure for device + * + * @dev: mei_device + */ +static inline void mei_txe_unset_pm_domain(struct mei_device *dev) +{ + /* stop using pm callbacks if any */ + dev_pm_domain_set(dev->dev, NULL); +} + +static const struct dev_pm_ops mei_txe_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(mei_txe_pci_suspend, + mei_txe_pci_resume) + SET_RUNTIME_PM_OPS( + mei_txe_pm_runtime_suspend, + mei_txe_pm_runtime_resume, + mei_txe_pm_runtime_idle) +}; + +#define MEI_TXE_PM_OPS (&mei_txe_pm_ops) +#else +#define MEI_TXE_PM_OPS NULL +#endif /* CONFIG_PM */ + +/* + * PCI driver structure + */ +static struct pci_driver mei_txe_driver = { + .name = KBUILD_MODNAME, + .id_table = mei_txe_pci_tbl, + .probe = mei_txe_probe, + .remove = mei_txe_remove, + .shutdown = mei_txe_shutdown, + .driver.pm = MEI_TXE_PM_OPS, +}; + +module_pci_driver(mei_txe_driver); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) Trusted Execution Environment Interface"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig new file mode 100644 index 000000000..227cc7443 --- /dev/null +++ b/drivers/misc/mic/Kconfig @@ -0,0 +1,156 @@ +menu "Intel MIC & related support" + +comment "Intel MIC Bus Driver" + +config INTEL_MIC_BUS + tristate "Intel MIC Bus Driver" + depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS + help + This option is selected by any driver which registers a + device or driver on the MIC Bus, such as CONFIG_INTEL_MIC_HOST, + CONFIG_INTEL_MIC_CARD, CONFIG_INTEL_MIC_X100_DMA etc. + + If you are building a host/card kernel with an Intel MIC device + then say M (recommended) or Y, else say N. If unsure say N. + + More information about the Intel MIC family as well as the Linux + OS and tools for MIC to use with this driver are available from + . + +comment "SCIF Bus Driver" + +config SCIF_BUS + tristate "SCIF Bus Driver" + depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS + help + This option is selected by any driver which registers a + device or driver on the SCIF Bus, such as CONFIG_INTEL_MIC_HOST + and CONFIG_INTEL_MIC_CARD. + + If you are building a host/card kernel with an Intel MIC device + then say M (recommended) or Y, else say N. If unsure say N. + + More information about the Intel MIC family as well as the Linux + OS and tools for MIC to use with this driver are available from + . + +comment "VOP Bus Driver" + +config VOP_BUS + tristate "VOP Bus Driver" + depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS + help + This option is selected by any driver which registers a + device or driver on the VOP Bus, such as CONFIG_INTEL_MIC_HOST + and CONFIG_INTEL_MIC_CARD. + + If you are building a host/card kernel with an Intel MIC device + then say M (recommended) or Y, else say N. If unsure say N. + + More information about the Intel MIC family as well as the Linux + OS and tools for MIC to use with this driver are available from + . + +comment "Intel MIC Host Driver" + +config INTEL_MIC_HOST + tristate "Intel MIC Host Driver" + depends on 64BIT && PCI && X86 + depends on INTEL_MIC_BUS && SCIF_BUS && MIC_COSM && VOP_BUS + help + This enables Host Driver support for the Intel Many Integrated + Core (MIC) family of PCIe form factor coprocessor devices that + run a 64 bit Linux OS. The driver manages card OS state and + enables communication between host and card. Intel MIC X100 + devices are currently supported. + + If you are building a host kernel with an Intel MIC device then + say M (recommended) or Y, else say N. If unsure say N. + + More information about the Intel MIC family as well as the Linux + OS and tools for MIC to use with this driver are available from + . + +comment "Intel MIC Card Driver" + +config INTEL_MIC_CARD + tristate "Intel MIC Card Driver" + depends on 64BIT && X86 + depends on INTEL_MIC_BUS && SCIF_BUS && MIC_COSM && VOP_BUS + select VIRTIO + help + This enables card driver support for the Intel Many Integrated + Core (MIC) device family. The card driver communicates shutdown/ + crash events to the host and allows registration/configuration of + virtio devices. Intel MIC X100 devices are currently supported. + + If you are building a card kernel for an Intel MIC device then + say M (recommended) or Y, else say N. If unsure say N. + + For more information see + . + +comment "SCIF Driver" + +config SCIF + tristate "SCIF Driver" + depends on 64BIT && PCI && X86 && SCIF_BUS && IOMMU_SUPPORT + select IOMMU_IOVA + help + This enables SCIF Driver support for the Intel Many Integrated + Core (MIC) family of PCIe form factor coprocessor devices that + run a 64 bit Linux OS. The Symmetric Communication Interface + (SCIF (pronounced as skiff)) is a low level communications API + across PCIe currently implemented for MIC. + + If you are building a host kernel with an Intel MIC device then + say M (recommended) or Y, else say N. If unsure say N. + + More information about the Intel MIC family as well as the Linux + OS and tools for MIC to use with this driver are available from + . + +comment "Intel MIC Coprocessor State Management (COSM) Drivers" + +config MIC_COSM + tristate "Intel MIC Coprocessor State Management (COSM) Drivers" + depends on 64BIT && PCI && X86 && SCIF + help + This enables COSM driver support for the Intel Many + Integrated Core (MIC) family of PCIe form factor coprocessor + devices. COSM drivers implement functions such as boot, + shutdown, reset and reboot of MIC devices. + + If you are building a host kernel with an Intel MIC device then + say M (recommended) or Y, else say N. If unsure say N. + + More information about the Intel MIC family as well as the Linux + OS and tools for MIC to use with this driver are available from + . + +comment "VOP Driver" + +config VOP + tristate "VOP Driver" + depends on 64BIT && PCI && X86 && VOP_BUS + select VHOST_RING + select VIRTIO + help + This enables VOP (Virtio over PCIe) Driver support for the Intel + Many Integrated Core (MIC) family of PCIe form factor coprocessor + devices. The VOP driver allows virtio drivers, e.g. net, console + and block drivers, on the card connect to user space virtio + devices on the host. + + If you are building a host kernel with an Intel MIC device then + say M (recommended) or Y, else say N. If unsure say N. + + More information about the Intel MIC family as well as the Linux + OS and tools for MIC to use with this driver are available from + . + +if VOP +source "drivers/vhost/Kconfig.vringh" +endif + +endmenu diff --git a/drivers/misc/mic/Makefile b/drivers/misc/mic/Makefile new file mode 100644 index 000000000..1a43622b1 --- /dev/null +++ b/drivers/misc/mic/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile - Intel MIC Linux driver. +# Copyright(c) 2013, Intel Corporation. +# +obj-$(CONFIG_INTEL_MIC_HOST) += host/ +obj-$(CONFIG_INTEL_MIC_CARD) += card/ +obj-y += bus/ +obj-$(CONFIG_SCIF) += scif/ +obj-$(CONFIG_MIC_COSM) += cosm/ +obj-$(CONFIG_MIC_COSM) += cosm_client/ +obj-$(CONFIG_VOP) += vop/ diff --git a/drivers/misc/mic/bus/Makefile b/drivers/misc/mic/bus/Makefile new file mode 100644 index 000000000..8758a7daa --- /dev/null +++ b/drivers/misc/mic/bus/Makefile @@ -0,0 +1,8 @@ +# +# Makefile - Intel MIC Linux driver. +# Copyright(c) 2014, Intel Corporation. +# +obj-$(CONFIG_INTEL_MIC_BUS) += mic_bus.o +obj-$(CONFIG_SCIF_BUS) += scif_bus.o +obj-$(CONFIG_MIC_COSM) += cosm_bus.o +obj-$(CONFIG_VOP_BUS) += vop_bus.o diff --git a/drivers/misc/mic/bus/cosm_bus.c b/drivers/misc/mic/bus/cosm_bus.c new file mode 100644 index 000000000..d31d6c6e6 --- /dev/null +++ b/drivers/misc/mic/bus/cosm_bus.c @@ -0,0 +1,141 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC COSM Bus Driver + */ +#include +#include +#include +#include "cosm_bus.h" + +/* Unique numbering for cosm devices. */ +static DEFINE_IDA(cosm_index_ida); + +static int cosm_dev_probe(struct device *d) +{ + struct cosm_device *dev = dev_to_cosm(d); + struct cosm_driver *drv = drv_to_cosm(dev->dev.driver); + + return drv->probe(dev); +} + +static int cosm_dev_remove(struct device *d) +{ + struct cosm_device *dev = dev_to_cosm(d); + struct cosm_driver *drv = drv_to_cosm(dev->dev.driver); + + drv->remove(dev); + return 0; +} + +static struct bus_type cosm_bus = { + .name = "cosm_bus", + .probe = cosm_dev_probe, + .remove = cosm_dev_remove, +}; + +int cosm_register_driver(struct cosm_driver *driver) +{ + driver->driver.bus = &cosm_bus; + return driver_register(&driver->driver); +} +EXPORT_SYMBOL_GPL(cosm_register_driver); + +void cosm_unregister_driver(struct cosm_driver *driver) +{ + driver_unregister(&driver->driver); +} +EXPORT_SYMBOL_GPL(cosm_unregister_driver); + +static inline void cosm_release_dev(struct device *d) +{ + struct cosm_device *cdev = dev_to_cosm(d); + + kfree(cdev); +} + +struct cosm_device * +cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops) +{ + struct cosm_device *cdev; + int ret; + + cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); + if (!cdev) + return ERR_PTR(-ENOMEM); + + cdev->dev.parent = pdev; + cdev->dev.release = cosm_release_dev; + cdev->hw_ops = hw_ops; + dev_set_drvdata(&cdev->dev, cdev); + cdev->dev.bus = &cosm_bus; + + /* Assign a unique device index and hence name */ + ret = ida_simple_get(&cosm_index_ida, 0, 0, GFP_KERNEL); + if (ret < 0) + goto free_cdev; + + cdev->index = ret; + cdev->dev.id = ret; + dev_set_name(&cdev->dev, "cosm-dev%u", cdev->index); + + ret = device_register(&cdev->dev); + if (ret) + goto ida_remove; + return cdev; +ida_remove: + ida_simple_remove(&cosm_index_ida, cdev->index); +free_cdev: + put_device(&cdev->dev); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(cosm_register_device); + +void cosm_unregister_device(struct cosm_device *dev) +{ + int index = dev->index; /* save for after device release */ + + device_unregister(&dev->dev); + ida_simple_remove(&cosm_index_ida, index); +} +EXPORT_SYMBOL_GPL(cosm_unregister_device); + +struct cosm_device *cosm_find_cdev_by_id(int id) +{ + struct device *dev = subsys_find_device_by_id(&cosm_bus, id, NULL); + + return dev ? container_of(dev, struct cosm_device, dev) : NULL; +} +EXPORT_SYMBOL_GPL(cosm_find_cdev_by_id); + +static int __init cosm_init(void) +{ + return bus_register(&cosm_bus); +} + +static void __exit cosm_exit(void) +{ + bus_unregister(&cosm_bus); + ida_destroy(&cosm_index_ida); +} + +core_initcall(cosm_init); +module_exit(cosm_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) MIC card OS state management bus driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/mic/bus/cosm_bus.h b/drivers/misc/mic/bus/cosm_bus.h new file mode 100644 index 000000000..8b6341855 --- /dev/null +++ b/drivers/misc/mic/bus/cosm_bus.h @@ -0,0 +1,136 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC COSM Bus Driver + */ +#ifndef _COSM_BUS_H_ +#define _COSM_BUS_H_ + +#include +#include +#include "../common/mic_dev.h" + +/** + * cosm_device - representation of a cosm device + * + * @attr_group: Pointer to list of sysfs attribute groups. + * @sdev: Device for sysfs entries. + * @state: MIC state. + * @prev_state: MIC state previous to MIC_RESETTING + * @shutdown_status: MIC status reported by card for shutdown/crashes. + * @shutdown_status_int: Internal shutdown status maintained by the driver + * @cosm_mutex: Mutex for synchronizing access to data structures. + * @reset_trigger_work: Work for triggering reset requests. + * @scif_work: Work for handling per device SCIF connections + * @cmdline: Kernel command line. + * @firmware: Firmware file name. + * @ramdisk: Ramdisk file name. + * @bootmode: Boot mode i.e. "linux" or "elf" for flash updates. + * @log_buf_addr: Log buffer address for MIC. + * @log_buf_len: Log buffer length address for MIC. + * @state_sysfs: Sysfs dirent for notifying ring 3 about MIC state changes. + * @hw_ops: the hardware bus ops for this device. + * @dev: underlying device. + * @index: unique position on the cosm bus + * @dbg_dir: debug fs directory + * @newepd: new endpoint from scif accept to be assigned to this cdev + * @epd: SCIF endpoint for this cdev + * @heartbeat_watchdog_enable: if heartbeat watchdog is enabled for this cdev + * @sysfs_heartbeat_enable: sysfs setting for disabling heartbeat notification + */ +struct cosm_device { + const struct attribute_group **attr_group; + struct device *sdev; + u8 state; + u8 prev_state; + u8 shutdown_status; + u8 shutdown_status_int; + struct mutex cosm_mutex; + struct work_struct reset_trigger_work; + struct work_struct scif_work; + char *cmdline; + char *firmware; + char *ramdisk; + char *bootmode; + void *log_buf_addr; + int *log_buf_len; + struct kernfs_node *state_sysfs; + struct cosm_hw_ops *hw_ops; + struct device dev; + int index; + struct dentry *dbg_dir; + scif_epd_t newepd; + scif_epd_t epd; + bool heartbeat_watchdog_enable; + bool sysfs_heartbeat_enable; +}; + +/** + * cosm_driver - operations for a cosm driver + * + * @driver: underlying device driver (populate name and owner). + * @probe: the function to call when a device is found. Returns 0 or -errno. + * @remove: the function to call when a device is removed. + */ +struct cosm_driver { + struct device_driver driver; + int (*probe)(struct cosm_device *dev); + void (*remove)(struct cosm_device *dev); +}; + +/** + * cosm_hw_ops - cosm bus ops + * + * @reset: trigger MIC reset + * @force_reset: force MIC reset + * @post_reset: inform MIC reset is complete + * @ready: is MIC ready for OS download + * @start: boot MIC + * @stop: prepare MIC for reset + * @family: return MIC HW family string + * @stepping: return MIC HW stepping string + * @aper: return MIC PCIe aperture + */ +struct cosm_hw_ops { + void (*reset)(struct cosm_device *cdev); + void (*force_reset)(struct cosm_device *cdev); + void (*post_reset)(struct cosm_device *cdev, enum mic_states state); + bool (*ready)(struct cosm_device *cdev); + int (*start)(struct cosm_device *cdev, int id); + void (*stop)(struct cosm_device *cdev, bool force); + ssize_t (*family)(struct cosm_device *cdev, char *buf); + ssize_t (*stepping)(struct cosm_device *cdev, char *buf); + struct mic_mw *(*aper)(struct cosm_device *cdev); +}; + +struct cosm_device * +cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops); +void cosm_unregister_device(struct cosm_device *dev); +int cosm_register_driver(struct cosm_driver *drv); +void cosm_unregister_driver(struct cosm_driver *drv); +struct cosm_device *cosm_find_cdev_by_id(int id); + +static inline struct cosm_device *dev_to_cosm(struct device *dev) +{ + return container_of(dev, struct cosm_device, dev); +} + +static inline struct cosm_driver *drv_to_cosm(struct device_driver *drv) +{ + return container_of(drv, struct cosm_driver, driver); +} +#endif /* _COSM_BUS_H */ diff --git a/drivers/misc/mic/bus/mic_bus.c b/drivers/misc/mic/bus/mic_bus.c new file mode 100644 index 000000000..77b16ca66 --- /dev/null +++ b/drivers/misc/mic/bus/mic_bus.c @@ -0,0 +1,204 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Bus driver. + * + * This implementation is very similar to the the virtio bus driver + * implementation @ drivers/virtio/virtio.c + */ +#include +#include +#include +#include + +static ssize_t device_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct mbus_device *dev = dev_to_mbus(d); + return sprintf(buf, "0x%04x\n", dev->id.device); +} +static DEVICE_ATTR_RO(device); + +static ssize_t vendor_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct mbus_device *dev = dev_to_mbus(d); + return sprintf(buf, "0x%04x\n", dev->id.vendor); +} +static DEVICE_ATTR_RO(vendor); + +static ssize_t modalias_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct mbus_device *dev = dev_to_mbus(d); + return sprintf(buf, "mbus:d%08Xv%08X\n", + dev->id.device, dev->id.vendor); +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *mbus_dev_attrs[] = { + &dev_attr_device.attr, + &dev_attr_vendor.attr, + &dev_attr_modalias.attr, + NULL, +}; +ATTRIBUTE_GROUPS(mbus_dev); + +static inline int mbus_id_match(const struct mbus_device *dev, + const struct mbus_device_id *id) +{ + if (id->device != dev->id.device && id->device != MBUS_DEV_ANY_ID) + return 0; + + return id->vendor == MBUS_DEV_ANY_ID || id->vendor == dev->id.vendor; +} + +/* + * This looks through all the IDs a driver claims to support. If any of them + * match, we return 1 and the kernel will call mbus_dev_probe(). + */ +static int mbus_dev_match(struct device *dv, struct device_driver *dr) +{ + unsigned int i; + struct mbus_device *dev = dev_to_mbus(dv); + const struct mbus_device_id *ids; + + ids = drv_to_mbus(dr)->id_table; + for (i = 0; ids[i].device; i++) + if (mbus_id_match(dev, &ids[i])) + return 1; + return 0; +} + +static int mbus_uevent(struct device *dv, struct kobj_uevent_env *env) +{ + struct mbus_device *dev = dev_to_mbus(dv); + + return add_uevent_var(env, "MODALIAS=mbus:d%08Xv%08X", + dev->id.device, dev->id.vendor); +} + +static int mbus_dev_probe(struct device *d) +{ + int err; + struct mbus_device *dev = dev_to_mbus(d); + struct mbus_driver *drv = drv_to_mbus(dev->dev.driver); + + err = drv->probe(dev); + if (!err) + if (drv->scan) + drv->scan(dev); + return err; +} + +static int mbus_dev_remove(struct device *d) +{ + struct mbus_device *dev = dev_to_mbus(d); + struct mbus_driver *drv = drv_to_mbus(dev->dev.driver); + + drv->remove(dev); + return 0; +} + +static struct bus_type mic_bus = { + .name = "mic_bus", + .match = mbus_dev_match, + .dev_groups = mbus_dev_groups, + .uevent = mbus_uevent, + .probe = mbus_dev_probe, + .remove = mbus_dev_remove, +}; + +int mbus_register_driver(struct mbus_driver *driver) +{ + driver->driver.bus = &mic_bus; + return driver_register(&driver->driver); +} +EXPORT_SYMBOL_GPL(mbus_register_driver); + +void mbus_unregister_driver(struct mbus_driver *driver) +{ + driver_unregister(&driver->driver); +} +EXPORT_SYMBOL_GPL(mbus_unregister_driver); + +static void mbus_release_dev(struct device *d) +{ + struct mbus_device *mbdev = dev_to_mbus(d); + kfree(mbdev); +} + +struct mbus_device * +mbus_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops, + struct mbus_hw_ops *hw_ops, int index, + void __iomem *mmio_va) +{ + int ret; + struct mbus_device *mbdev; + + mbdev = kzalloc(sizeof(*mbdev), GFP_KERNEL); + if (!mbdev) + return ERR_PTR(-ENOMEM); + + mbdev->mmio_va = mmio_va; + mbdev->dev.parent = pdev; + mbdev->id.device = id; + mbdev->id.vendor = MBUS_DEV_ANY_ID; + mbdev->dev.dma_ops = dma_ops; + mbdev->dev.dma_mask = &mbdev->dev.coherent_dma_mask; + dma_set_mask(&mbdev->dev, DMA_BIT_MASK(64)); + mbdev->dev.release = mbus_release_dev; + mbdev->hw_ops = hw_ops; + mbdev->dev.bus = &mic_bus; + mbdev->index = index; + dev_set_name(&mbdev->dev, "mbus-dev%u", mbdev->index); + /* + * device_register() causes the bus infrastructure to look for a + * matching driver. + */ + ret = device_register(&mbdev->dev); + if (ret) + goto free_mbdev; + return mbdev; +free_mbdev: + put_device(&mbdev->dev); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(mbus_register_device); + +void mbus_unregister_device(struct mbus_device *mbdev) +{ + device_unregister(&mbdev->dev); +} +EXPORT_SYMBOL_GPL(mbus_unregister_device); + +static int __init mbus_init(void) +{ + return bus_register(&mic_bus); +} + +static void __exit mbus_exit(void) +{ + bus_unregister(&mic_bus); +} + +core_initcall(mbus_init); +module_exit(mbus_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) MIC Bus driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/mic/bus/scif_bus.c b/drivers/misc/mic/bus/scif_bus.c new file mode 100644 index 000000000..a444db5f6 --- /dev/null +++ b/drivers/misc/mic/bus/scif_bus.c @@ -0,0 +1,209 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel Symmetric Communications Interface Bus driver. + */ +#include +#include +#include +#include + +#include "scif_bus.h" + +static ssize_t device_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct scif_hw_dev *dev = dev_to_scif(d); + + return sprintf(buf, "0x%04x\n", dev->id.device); +} +static DEVICE_ATTR_RO(device); + +static ssize_t vendor_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct scif_hw_dev *dev = dev_to_scif(d); + + return sprintf(buf, "0x%04x\n", dev->id.vendor); +} +static DEVICE_ATTR_RO(vendor); + +static ssize_t modalias_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct scif_hw_dev *dev = dev_to_scif(d); + + return sprintf(buf, "scif:d%08Xv%08X\n", + dev->id.device, dev->id.vendor); +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *scif_dev_attrs[] = { + &dev_attr_device.attr, + &dev_attr_vendor.attr, + &dev_attr_modalias.attr, + NULL, +}; +ATTRIBUTE_GROUPS(scif_dev); + +static inline int scif_id_match(const struct scif_hw_dev *dev, + const struct scif_hw_dev_id *id) +{ + if (id->device != dev->id.device && id->device != SCIF_DEV_ANY_ID) + return 0; + + return id->vendor == SCIF_DEV_ANY_ID || id->vendor == dev->id.vendor; +} + +/* + * This looks through all the IDs a driver claims to support. If any of them + * match, we return 1 and the kernel will call scif_dev_probe(). + */ +static int scif_dev_match(struct device *dv, struct device_driver *dr) +{ + unsigned int i; + struct scif_hw_dev *dev = dev_to_scif(dv); + const struct scif_hw_dev_id *ids; + + ids = drv_to_scif(dr)->id_table; + for (i = 0; ids[i].device; i++) + if (scif_id_match(dev, &ids[i])) + return 1; + return 0; +} + +static int scif_uevent(struct device *dv, struct kobj_uevent_env *env) +{ + struct scif_hw_dev *dev = dev_to_scif(dv); + + return add_uevent_var(env, "MODALIAS=scif:d%08Xv%08X", + dev->id.device, dev->id.vendor); +} + +static int scif_dev_probe(struct device *d) +{ + struct scif_hw_dev *dev = dev_to_scif(d); + struct scif_driver *drv = drv_to_scif(dev->dev.driver); + + return drv->probe(dev); +} + +static int scif_dev_remove(struct device *d) +{ + struct scif_hw_dev *dev = dev_to_scif(d); + struct scif_driver *drv = drv_to_scif(dev->dev.driver); + + drv->remove(dev); + return 0; +} + +static struct bus_type scif_bus = { + .name = "scif_bus", + .match = scif_dev_match, + .dev_groups = scif_dev_groups, + .uevent = scif_uevent, + .probe = scif_dev_probe, + .remove = scif_dev_remove, +}; + +int scif_register_driver(struct scif_driver *driver) +{ + driver->driver.bus = &scif_bus; + return driver_register(&driver->driver); +} +EXPORT_SYMBOL_GPL(scif_register_driver); + +void scif_unregister_driver(struct scif_driver *driver) +{ + driver_unregister(&driver->driver); +} +EXPORT_SYMBOL_GPL(scif_unregister_driver); + +static void scif_release_dev(struct device *d) +{ + struct scif_hw_dev *sdev = dev_to_scif(d); + + kfree(sdev); +} + +struct scif_hw_dev * +scif_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops, + struct scif_hw_ops *hw_ops, u8 dnode, u8 snode, + struct mic_mw *mmio, struct mic_mw *aper, void *dp, + void __iomem *rdp, struct dma_chan **chan, int num_chan, + bool card_rel_da) +{ + int ret; + struct scif_hw_dev *sdev; + + sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); + if (!sdev) + return ERR_PTR(-ENOMEM); + + sdev->dev.parent = pdev; + sdev->id.device = id; + sdev->id.vendor = SCIF_DEV_ANY_ID; + sdev->dev.dma_ops = dma_ops; + sdev->dev.release = scif_release_dev; + sdev->hw_ops = hw_ops; + sdev->dnode = dnode; + sdev->snode = snode; + dev_set_drvdata(&sdev->dev, sdev); + sdev->dev.bus = &scif_bus; + sdev->mmio = mmio; + sdev->aper = aper; + sdev->dp = dp; + sdev->rdp = rdp; + sdev->dev.dma_mask = &sdev->dev.coherent_dma_mask; + dma_set_mask(&sdev->dev, DMA_BIT_MASK(64)); + sdev->dma_ch = chan; + sdev->num_dma_ch = num_chan; + sdev->card_rel_da = card_rel_da; + dev_set_name(&sdev->dev, "scif-dev%u", sdev->dnode); + /* + * device_register() causes the bus infrastructure to look for a + * matching driver. + */ + ret = device_register(&sdev->dev); + if (ret) + goto free_sdev; + return sdev; +free_sdev: + put_device(&sdev->dev); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(scif_register_device); + +void scif_unregister_device(struct scif_hw_dev *sdev) +{ + device_unregister(&sdev->dev); +} +EXPORT_SYMBOL_GPL(scif_unregister_device); + +static int __init scif_init(void) +{ + return bus_register(&scif_bus); +} + +static void __exit scif_exit(void) +{ + bus_unregister(&scif_bus); +} + +core_initcall(scif_init); +module_exit(scif_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) SCIF Bus driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/mic/bus/scif_bus.h b/drivers/misc/mic/bus/scif_bus.h new file mode 100644 index 000000000..ff5956821 --- /dev/null +++ b/drivers/misc/mic/bus/scif_bus.h @@ -0,0 +1,133 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel Symmetric Communications Interface Bus driver. + */ +#ifndef _SCIF_BUS_H_ +#define _SCIF_BUS_H_ +/* + * Everything a scif driver needs to work with any particular scif + * hardware abstraction layer. + */ +#include + +#include +#include "../common/mic_dev.h" + +struct scif_hw_dev_id { + u32 device; + u32 vendor; +}; + +#define MIC_SCIF_DEV 1 +#define SCIF_DEV_ANY_ID 0xffffffff + +/** + * scif_hw_dev - representation of a hardware device abstracted for scif + * @hw_ops: the hardware ops supported by this device + * @id: the device type identification (used to match it with a driver) + * @mmio: MMIO memory window + * @aper: Aperture memory window + * @dev: underlying device + * @dnode - The destination node which this device will communicate with. + * @snode - The source node for this device. + * @dp - Self device page + * @rdp - Remote device page + * @dma_ch - Array of DMA channels + * @num_dma_ch - Number of DMA channels available + * @card_rel_da - Set to true if DMA addresses programmed in the DMA engine + * are relative to the card point of view + */ +struct scif_hw_dev { + struct scif_hw_ops *hw_ops; + struct scif_hw_dev_id id; + struct mic_mw *mmio; + struct mic_mw *aper; + struct device dev; + u8 dnode; + u8 snode; + void *dp; + void __iomem *rdp; + struct dma_chan **dma_ch; + int num_dma_ch; + bool card_rel_da; +}; + +/** + * scif_driver - operations for a scif I/O driver + * @driver: underlying device driver (populate name and owner). + * @id_table: the ids serviced by this driver. + * @probe: the function to call when a device is found. Returns 0 or -errno. + * @remove: the function to call when a device is removed. + */ +struct scif_driver { + struct device_driver driver; + const struct scif_hw_dev_id *id_table; + int (*probe)(struct scif_hw_dev *dev); + void (*remove)(struct scif_hw_dev *dev); +}; + +/** + * scif_hw_ops - Hardware operations for accessing a SCIF device on the SCIF bus. + * + * @next_db: Obtain the next available doorbell. + * @request_irq: Request an interrupt on a particular doorbell. + * @free_irq: Free an interrupt requested previously. + * @ack_interrupt: acknowledge an interrupt in the ISR. + * @send_intr: Send an interrupt to the remote node on a specified doorbell. + * @send_p2p_intr: Send an interrupt to the peer node on a specified doorbell + * which is specifically targeted for a peer to peer node. + * @ioremap: Map a buffer with the specified physical address and length. + * @iounmap: Unmap a buffer previously mapped. + */ +struct scif_hw_ops { + int (*next_db)(struct scif_hw_dev *sdev); + struct mic_irq * (*request_irq)(struct scif_hw_dev *sdev, + irqreturn_t (*func)(int irq, + void *data), + const char *name, void *data, + int db); + void (*free_irq)(struct scif_hw_dev *sdev, + struct mic_irq *cookie, void *data); + void (*ack_interrupt)(struct scif_hw_dev *sdev, int num); + void (*send_intr)(struct scif_hw_dev *sdev, int db); + void (*send_p2p_intr)(struct scif_hw_dev *sdev, int db, + struct mic_mw *mw); + void __iomem * (*ioremap)(struct scif_hw_dev *sdev, + phys_addr_t pa, size_t len); + void (*iounmap)(struct scif_hw_dev *sdev, void __iomem *va); +}; + +int scif_register_driver(struct scif_driver *driver); +void scif_unregister_driver(struct scif_driver *driver); +struct scif_hw_dev * +scif_register_device(struct device *pdev, int id, + const struct dma_map_ops *dma_ops, + struct scif_hw_ops *hw_ops, u8 dnode, u8 snode, + struct mic_mw *mmio, struct mic_mw *aper, + void *dp, void __iomem *rdp, + struct dma_chan **chan, int num_chan, + bool card_rel_da); +void scif_unregister_device(struct scif_hw_dev *sdev); + +static inline struct scif_hw_dev *dev_to_scif(struct device *dev) +{ + return container_of(dev, struct scif_hw_dev, dev); +} + +static inline struct scif_driver *drv_to_scif(struct device_driver *drv) +{ + return container_of(drv, struct scif_driver, driver); +} +#endif /* _SCIF_BUS_H */ diff --git a/drivers/misc/mic/bus/vop_bus.c b/drivers/misc/mic/bus/vop_bus.c new file mode 100644 index 000000000..e5bb9c749 --- /dev/null +++ b/drivers/misc/mic/bus/vop_bus.c @@ -0,0 +1,205 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel Virtio Over PCIe (VOP) Bus driver. + */ +#include +#include +#include +#include + +#include "vop_bus.h" + +static ssize_t device_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct vop_device *dev = dev_to_vop(d); + + return sprintf(buf, "0x%04x\n", dev->id.device); +} +static DEVICE_ATTR_RO(device); + +static ssize_t vendor_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct vop_device *dev = dev_to_vop(d); + + return sprintf(buf, "0x%04x\n", dev->id.vendor); +} +static DEVICE_ATTR_RO(vendor); + +static ssize_t modalias_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct vop_device *dev = dev_to_vop(d); + + return sprintf(buf, "vop:d%08Xv%08X\n", + dev->id.device, dev->id.vendor); +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *vop_dev_attrs[] = { + &dev_attr_device.attr, + &dev_attr_vendor.attr, + &dev_attr_modalias.attr, + NULL, +}; +ATTRIBUTE_GROUPS(vop_dev); + +static inline int vop_id_match(const struct vop_device *dev, + const struct vop_device_id *id) +{ + if (id->device != dev->id.device && id->device != VOP_DEV_ANY_ID) + return 0; + + return id->vendor == VOP_DEV_ANY_ID || id->vendor == dev->id.vendor; +} + +/* + * This looks through all the IDs a driver claims to support. If any of them + * match, we return 1 and the kernel will call vop_dev_probe(). + */ +static int vop_dev_match(struct device *dv, struct device_driver *dr) +{ + unsigned int i; + struct vop_device *dev = dev_to_vop(dv); + const struct vop_device_id *ids; + + ids = drv_to_vop(dr)->id_table; + for (i = 0; ids[i].device; i++) + if (vop_id_match(dev, &ids[i])) + return 1; + return 0; +} + +static int vop_uevent(struct device *dv, struct kobj_uevent_env *env) +{ + struct vop_device *dev = dev_to_vop(dv); + + return add_uevent_var(env, "MODALIAS=vop:d%08Xv%08X", + dev->id.device, dev->id.vendor); +} + +static int vop_dev_probe(struct device *d) +{ + struct vop_device *dev = dev_to_vop(d); + struct vop_driver *drv = drv_to_vop(dev->dev.driver); + + return drv->probe(dev); +} + +static int vop_dev_remove(struct device *d) +{ + struct vop_device *dev = dev_to_vop(d); + struct vop_driver *drv = drv_to_vop(dev->dev.driver); + + drv->remove(dev); + return 0; +} + +static struct bus_type vop_bus = { + .name = "vop_bus", + .match = vop_dev_match, + .dev_groups = vop_dev_groups, + .uevent = vop_uevent, + .probe = vop_dev_probe, + .remove = vop_dev_remove, +}; + +int vop_register_driver(struct vop_driver *driver) +{ + driver->driver.bus = &vop_bus; + return driver_register(&driver->driver); +} +EXPORT_SYMBOL_GPL(vop_register_driver); + +void vop_unregister_driver(struct vop_driver *driver) +{ + driver_unregister(&driver->driver); +} +EXPORT_SYMBOL_GPL(vop_unregister_driver); + +static void vop_release_dev(struct device *d) +{ + struct vop_device *dev = dev_to_vop(d); + + kfree(dev); +} + +struct vop_device * +vop_register_device(struct device *pdev, int id, + const struct dma_map_ops *dma_ops, + struct vop_hw_ops *hw_ops, u8 dnode, struct mic_mw *aper, + struct dma_chan *chan) +{ + int ret; + struct vop_device *vdev; + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) + return ERR_PTR(-ENOMEM); + + vdev->dev.parent = pdev; + vdev->id.device = id; + vdev->id.vendor = VOP_DEV_ANY_ID; + vdev->dev.dma_ops = dma_ops; + vdev->dev.dma_mask = &vdev->dev.coherent_dma_mask; + dma_set_mask(&vdev->dev, DMA_BIT_MASK(64)); + vdev->dev.release = vop_release_dev; + vdev->hw_ops = hw_ops; + vdev->dev.bus = &vop_bus; + vdev->dnode = dnode; + vdev->aper = aper; + vdev->dma_ch = chan; + vdev->index = dnode - 1; + dev_set_name(&vdev->dev, "vop-dev%u", vdev->index); + /* + * device_register() causes the bus infrastructure to look for a + * matching driver. + */ + ret = device_register(&vdev->dev); + if (ret) + goto free_vdev; + return vdev; +free_vdev: + put_device(&vdev->dev); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(vop_register_device); + +void vop_unregister_device(struct vop_device *dev) +{ + device_unregister(&dev->dev); +} +EXPORT_SYMBOL_GPL(vop_unregister_device); + +static int __init vop_init(void) +{ + return bus_register(&vop_bus); +} + +static void __exit vop_exit(void) +{ + bus_unregister(&vop_bus); +} + +core_initcall(vop_init); +module_exit(vop_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) VOP Bus driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/mic/bus/vop_bus.h b/drivers/misc/mic/bus/vop_bus.h new file mode 100644 index 000000000..fff7a865d --- /dev/null +++ b/drivers/misc/mic/bus/vop_bus.h @@ -0,0 +1,140 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel Virtio over PCIe Bus driver. + */ +#ifndef _VOP_BUS_H_ +#define _VOP_BUS_H_ +/* + * Everything a vop driver needs to work with any particular vop + * implementation. + */ +#include +#include + +#include "../common/mic_dev.h" + +struct vop_device_id { + u32 device; + u32 vendor; +}; + +#define VOP_DEV_TRNSP 1 +#define VOP_DEV_ANY_ID 0xffffffff +/* + * Size of the internal buffer used during DMA's as an intermediate buffer + * for copy to/from user. Must be an integral number of pages. + */ +#define VOP_INT_DMA_BUF_SIZE PAGE_ALIGN(64 * 1024ULL) + +/** + * vop_device - representation of a device using vop + * @hw_ops: the hardware ops supported by this device. + * @id: the device type identification (used to match it with a driver). + * @dev: underlying device. + * @dnode - The destination node which this device will communicate with. + * @aper: Aperture memory window + * @dma_ch - DMA channel + * @index: unique position on the vop bus + */ +struct vop_device { + struct vop_hw_ops *hw_ops; + struct vop_device_id id; + struct device dev; + u8 dnode; + struct mic_mw *aper; + struct dma_chan *dma_ch; + int index; +}; + +/** + * vop_driver - operations for a vop I/O driver + * @driver: underlying device driver (populate name and owner). + * @id_table: the ids serviced by this driver. + * @probe: the function to call when a device is found. Returns 0 or -errno. + * @remove: the function to call when a device is removed. + */ +struct vop_driver { + struct device_driver driver; + const struct vop_device_id *id_table; + int (*probe)(struct vop_device *dev); + void (*remove)(struct vop_device *dev); +}; + +/** + * vop_hw_ops - Hardware operations for accessing a VOP device on the VOP bus. + * + * @next_db: Obtain the next available doorbell. + * @request_irq: Request an interrupt on a particular doorbell. + * @free_irq: Free an interrupt requested previously. + * @ack_interrupt: acknowledge an interrupt in the ISR. + * @get_remote_dp: Get access to the virtio device page used by the remote + * node to add/remove/configure virtio devices. + * @get_dp: Get access to the virtio device page used by the self + * node to add/remove/configure virtio devices. + * @send_intr: Send an interrupt to the peer node on a specified doorbell. + * @ioremap: Map a buffer with the specified DMA address and length. + * @iounmap: Unmap a buffer previously mapped. + * @dma_filter: The DMA filter function to use for obtaining access to + * a DMA channel on the peer node. + */ +struct vop_hw_ops { + int (*next_db)(struct vop_device *vpdev); + struct mic_irq *(*request_irq)(struct vop_device *vpdev, + irqreturn_t (*func)(int irq, void *data), + const char *name, void *data, + int intr_src); + void (*free_irq)(struct vop_device *vpdev, + struct mic_irq *cookie, void *data); + void (*ack_interrupt)(struct vop_device *vpdev, int num); + void __iomem * (*get_remote_dp)(struct vop_device *vpdev); + void * (*get_dp)(struct vop_device *vpdev); + void (*send_intr)(struct vop_device *vpdev, int db); + void __iomem * (*ioremap)(struct vop_device *vpdev, + dma_addr_t pa, size_t len); + void (*iounmap)(struct vop_device *vpdev, void __iomem *va); +}; + +struct vop_device * +vop_register_device(struct device *pdev, int id, + const struct dma_map_ops *dma_ops, + struct vop_hw_ops *hw_ops, u8 dnode, struct mic_mw *aper, + struct dma_chan *chan); +void vop_unregister_device(struct vop_device *dev); +int vop_register_driver(struct vop_driver *drv); +void vop_unregister_driver(struct vop_driver *drv); + +/* + * module_vop_driver() - Helper macro for drivers that don't do + * anything special in module init/exit. This eliminates a lot of + * boilerplate. Each module may only use this macro once, and + * calling it replaces module_init() and module_exit() + */ +#define module_vop_driver(__vop_driver) \ + module_driver(__vop_driver, vop_register_driver, \ + vop_unregister_driver) + +static inline struct vop_device *dev_to_vop(struct device *dev) +{ + return container_of(dev, struct vop_device, dev); +} + +static inline struct vop_driver *drv_to_vop(struct device_driver *drv) +{ + return container_of(drv, struct vop_driver, driver); +} +#endif /* _VOP_BUS_H */ diff --git a/drivers/misc/mic/card/Makefile b/drivers/misc/mic/card/Makefile new file mode 100644 index 000000000..921a7e7e0 --- /dev/null +++ b/drivers/misc/mic/card/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile - Intel MIC Linux driver. +# Copyright(c) 2013, Intel Corporation. +# +ccflags-y += -DINTEL_MIC_CARD + +obj-$(CONFIG_INTEL_MIC_CARD) += mic_card.o +mic_card-y += mic_x100.o +mic_card-y += mic_device.o +mic_card-y += mic_debugfs.o diff --git a/drivers/misc/mic/card/mic_debugfs.c b/drivers/misc/mic/card/mic_debugfs.c new file mode 100644 index 000000000..421b3d791 --- /dev/null +++ b/drivers/misc/mic/card/mic_debugfs.c @@ -0,0 +1,130 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Disclaimer: The codes contained in these modules may be specific to + * the Intel Software Development Platform codenamed: Knights Ferry, and + * the Intel product codenamed: Knights Corner, and are not backward + * compatible with other Intel products. Additionally, Intel will NOT + * support the codes or instruction set in future products. + * + * Intel MIC Card driver. + * + */ +#include +#include +#include +#include +#include + +#include "../common/mic_dev.h" +#include "mic_device.h" + +/* Debugfs parent dir */ +static struct dentry *mic_dbg; + +/** + * mic_intr_test - Send interrupts to host. + */ +static int mic_intr_test(struct seq_file *s, void *unused) +{ + struct mic_driver *mdrv = s->private; + struct mic_device *mdev = &mdrv->mdev; + + mic_send_intr(mdev, 0); + msleep(1000); + mic_send_intr(mdev, 1); + msleep(1000); + mic_send_intr(mdev, 2); + msleep(1000); + mic_send_intr(mdev, 3); + msleep(1000); + + return 0; +} + +static int mic_intr_test_open(struct inode *inode, struct file *file) +{ + return single_open(file, mic_intr_test, inode->i_private); +} + +static int mic_intr_test_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations intr_test_ops = { + .owner = THIS_MODULE, + .open = mic_intr_test_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mic_intr_test_release +}; + +/** + * mic_create_card_debug_dir - Initialize MIC debugfs entries. + */ +void __init mic_create_card_debug_dir(struct mic_driver *mdrv) +{ + struct dentry *d; + + if (!mic_dbg) + return; + + mdrv->dbg_dir = debugfs_create_dir(mdrv->name, mic_dbg); + if (!mdrv->dbg_dir) { + dev_err(mdrv->dev, "Cant create dbg_dir %s\n", mdrv->name); + return; + } + + d = debugfs_create_file("intr_test", 0444, mdrv->dbg_dir, + mdrv, &intr_test_ops); + + if (!d) { + dev_err(mdrv->dev, + "Cant create dbg intr_test %s\n", mdrv->name); + return; + } +} + +/** + * mic_delete_card_debug_dir - Uninitialize MIC debugfs entries. + */ +void mic_delete_card_debug_dir(struct mic_driver *mdrv) +{ + if (!mdrv->dbg_dir) + return; + + debugfs_remove_recursive(mdrv->dbg_dir); +} + +/** + * mic_init_card_debugfs - Initialize global debugfs entry. + */ +void __init mic_init_card_debugfs(void) +{ + mic_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL); + if (!mic_dbg) + pr_err("can't create debugfs dir\n"); +} + +/** + * mic_exit_card_debugfs - Uninitialize global debugfs entry + */ +void mic_exit_card_debugfs(void) +{ + debugfs_remove(mic_dbg); +} diff --git a/drivers/misc/mic/card/mic_device.c b/drivers/misc/mic/card/mic_device.c new file mode 100644 index 000000000..e749af48f --- /dev/null +++ b/drivers/misc/mic/card/mic_device.c @@ -0,0 +1,429 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Disclaimer: The codes contained in these modules may be specific to + * the Intel Software Development Platform codenamed: Knights Ferry, and + * the Intel product codenamed: Knights Corner, and are not backward + * compatible with other Intel products. Additionally, Intel will NOT + * support the codes or instruction set in future products. + * + * Intel MIC Card driver. + * + */ +#include +#include +#include +#include +#include +#include + +#include +#include "../common/mic_dev.h" +#include "mic_device.h" + +static struct mic_driver *g_drv; + +static int __init mic_dp_init(void) +{ + struct mic_driver *mdrv = g_drv; + struct mic_device *mdev = &mdrv->mdev; + struct mic_bootparam __iomem *bootparam; + u64 lo, hi, dp_dma_addr; + u32 magic; + + lo = mic_read_spad(&mdrv->mdev, MIC_DPLO_SPAD); + hi = mic_read_spad(&mdrv->mdev, MIC_DPHI_SPAD); + + dp_dma_addr = lo | (hi << 32); + mdrv->dp = mic_card_map(mdev, dp_dma_addr, MIC_DP_SIZE); + if (!mdrv->dp) { + dev_err(mdrv->dev, "Cannot remap Aperture BAR\n"); + return -ENOMEM; + } + bootparam = mdrv->dp; + magic = ioread32(&bootparam->magic); + if (MIC_MAGIC != magic) { + dev_err(mdrv->dev, "bootparam magic mismatch 0x%x\n", magic); + return -EIO; + } + return 0; +} + +/* Uninitialize the device page */ +static void mic_dp_uninit(void) +{ + mic_card_unmap(&g_drv->mdev, g_drv->dp); +} + +/** + * mic_request_card_irq - request an irq. + * + * @handler: interrupt handler passed to request_threaded_irq. + * @thread_fn: thread fn. passed to request_threaded_irq. + * @name: The ASCII name of the callee requesting the irq. + * @data: private data that is returned back when calling the + * function handler. + * @index: The doorbell index of the requester. + * + * returns: The cookie that is transparent to the caller. Passed + * back when calling mic_free_irq. An appropriate error code + * is returned on failure. Caller needs to use IS_ERR(return_val) + * to check for failure and PTR_ERR(return_val) to obtained the + * error code. + * + */ +struct mic_irq * +mic_request_card_irq(irq_handler_t handler, + irq_handler_t thread_fn, const char *name, + void *data, int index) +{ + int rc = 0; + unsigned long cookie; + struct mic_driver *mdrv = g_drv; + + rc = request_threaded_irq(mic_db_to_irq(mdrv, index), handler, + thread_fn, 0, name, data); + if (rc) { + dev_err(mdrv->dev, "request_threaded_irq failed rc = %d\n", rc); + goto err; + } + mdrv->irq_info.irq_usage_count[index]++; + cookie = index; + return (struct mic_irq *)cookie; +err: + return ERR_PTR(rc); +} + +/** + * mic_free_card_irq - free irq. + * + * @cookie: cookie obtained during a successful call to mic_request_threaded_irq + * @data: private data specified by the calling function during the + * mic_request_threaded_irq + * + * returns: none. + */ +void mic_free_card_irq(struct mic_irq *cookie, void *data) +{ + int index; + struct mic_driver *mdrv = g_drv; + + index = (unsigned long)cookie & 0xFFFFU; + free_irq(mic_db_to_irq(mdrv, index), data); + mdrv->irq_info.irq_usage_count[index]--; +} + +/** + * mic_next_card_db - Get the doorbell with minimum usage count. + * + * Returns the irq index. + */ +int mic_next_card_db(void) +{ + int i; + int index = 0; + struct mic_driver *mdrv = g_drv; + + for (i = 0; i < mdrv->intr_info.num_intr; i++) { + if (mdrv->irq_info.irq_usage_count[i] < + mdrv->irq_info.irq_usage_count[index]) + index = i; + } + + return index; +} + +/** + * mic_init_irq - Initialize irq information. + * + * Returns 0 in success. Appropriate error code on failure. + */ +static int mic_init_irq(void) +{ + struct mic_driver *mdrv = g_drv; + + mdrv->irq_info.irq_usage_count = kzalloc((sizeof(u32) * + mdrv->intr_info.num_intr), + GFP_KERNEL); + if (!mdrv->irq_info.irq_usage_count) + return -ENOMEM; + return 0; +} + +/** + * mic_uninit_irq - Uninitialize irq information. + * + * None. + */ +static void mic_uninit_irq(void) +{ + struct mic_driver *mdrv = g_drv; + + kfree(mdrv->irq_info.irq_usage_count); +} + +static inline struct mic_driver *scdev_to_mdrv(struct scif_hw_dev *scdev) +{ + return dev_get_drvdata(scdev->dev.parent); +} + +static struct mic_irq * +___mic_request_irq(struct scif_hw_dev *scdev, + irqreturn_t (*func)(int irq, void *data), + const char *name, void *data, + int db) +{ + return mic_request_card_irq(func, NULL, name, data, db); +} + +static void +___mic_free_irq(struct scif_hw_dev *scdev, + struct mic_irq *cookie, void *data) +{ + return mic_free_card_irq(cookie, data); +} + +static void ___mic_ack_interrupt(struct scif_hw_dev *scdev, int num) +{ + struct mic_driver *mdrv = scdev_to_mdrv(scdev); + + mic_ack_interrupt(&mdrv->mdev); +} + +static int ___mic_next_db(struct scif_hw_dev *scdev) +{ + return mic_next_card_db(); +} + +static void ___mic_send_intr(struct scif_hw_dev *scdev, int db) +{ + struct mic_driver *mdrv = scdev_to_mdrv(scdev); + + mic_send_intr(&mdrv->mdev, db); +} + +static void ___mic_send_p2p_intr(struct scif_hw_dev *scdev, int db, + struct mic_mw *mw) +{ + mic_send_p2p_intr(db, mw); +} + +static void __iomem * +___mic_ioremap(struct scif_hw_dev *scdev, + phys_addr_t pa, size_t len) +{ + struct mic_driver *mdrv = scdev_to_mdrv(scdev); + + return mic_card_map(&mdrv->mdev, pa, len); +} + +static void ___mic_iounmap(struct scif_hw_dev *scdev, void __iomem *va) +{ + struct mic_driver *mdrv = scdev_to_mdrv(scdev); + + mic_card_unmap(&mdrv->mdev, va); +} + +static struct scif_hw_ops scif_hw_ops = { + .request_irq = ___mic_request_irq, + .free_irq = ___mic_free_irq, + .ack_interrupt = ___mic_ack_interrupt, + .next_db = ___mic_next_db, + .send_intr = ___mic_send_intr, + .send_p2p_intr = ___mic_send_p2p_intr, + .ioremap = ___mic_ioremap, + .iounmap = ___mic_iounmap, +}; + +static inline struct mic_driver *vpdev_to_mdrv(struct vop_device *vpdev) +{ + return dev_get_drvdata(vpdev->dev.parent); +} + +static struct mic_irq * +__mic_request_irq(struct vop_device *vpdev, + irqreturn_t (*func)(int irq, void *data), + const char *name, void *data, int intr_src) +{ + return mic_request_card_irq(func, NULL, name, data, intr_src); +} + +static void __mic_free_irq(struct vop_device *vpdev, + struct mic_irq *cookie, void *data) +{ + return mic_free_card_irq(cookie, data); +} + +static void __mic_ack_interrupt(struct vop_device *vpdev, int num) +{ + struct mic_driver *mdrv = vpdev_to_mdrv(vpdev); + + mic_ack_interrupt(&mdrv->mdev); +} + +static int __mic_next_db(struct vop_device *vpdev) +{ + return mic_next_card_db(); +} + +static void __iomem *__mic_get_remote_dp(struct vop_device *vpdev) +{ + struct mic_driver *mdrv = vpdev_to_mdrv(vpdev); + + return mdrv->dp; +} + +static void __mic_send_intr(struct vop_device *vpdev, int db) +{ + struct mic_driver *mdrv = vpdev_to_mdrv(vpdev); + + mic_send_intr(&mdrv->mdev, db); +} + +static void __iomem *__mic_ioremap(struct vop_device *vpdev, + dma_addr_t pa, size_t len) +{ + struct mic_driver *mdrv = vpdev_to_mdrv(vpdev); + + return mic_card_map(&mdrv->mdev, pa, len); +} + +static void __mic_iounmap(struct vop_device *vpdev, void __iomem *va) +{ + struct mic_driver *mdrv = vpdev_to_mdrv(vpdev); + + mic_card_unmap(&mdrv->mdev, va); +} + +static struct vop_hw_ops vop_hw_ops = { + .request_irq = __mic_request_irq, + .free_irq = __mic_free_irq, + .ack_interrupt = __mic_ack_interrupt, + .next_db = __mic_next_db, + .get_remote_dp = __mic_get_remote_dp, + .send_intr = __mic_send_intr, + .ioremap = __mic_ioremap, + .iounmap = __mic_iounmap, +}; + +static int mic_request_dma_chans(struct mic_driver *mdrv) +{ + dma_cap_mask_t mask; + struct dma_chan *chan; + + dma_cap_zero(mask); + dma_cap_set(DMA_MEMCPY, mask); + + do { + chan = dma_request_channel(mask, NULL, NULL); + if (chan) { + mdrv->dma_ch[mdrv->num_dma_ch++] = chan; + if (mdrv->num_dma_ch >= MIC_MAX_DMA_CHAN) + break; + } + } while (chan); + dev_info(mdrv->dev, "DMA channels # %d\n", mdrv->num_dma_ch); + return mdrv->num_dma_ch; +} + +static void mic_free_dma_chans(struct mic_driver *mdrv) +{ + int i = 0; + + for (i = 0; i < mdrv->num_dma_ch; i++) { + dma_release_channel(mdrv->dma_ch[i]); + mdrv->dma_ch[i] = NULL; + } + mdrv->num_dma_ch = 0; +} + +/* + * mic_driver_init - MIC driver initialization tasks. + * + * Returns 0 in success. Appropriate error code on failure. + */ +int __init mic_driver_init(struct mic_driver *mdrv) +{ + int rc; + struct mic_bootparam __iomem *bootparam; + u8 node_id; + + g_drv = mdrv; + /* Unloading the card module is not supported. */ + if (!try_module_get(mdrv->dev->driver->owner)) { + rc = -ENODEV; + goto done; + } + rc = mic_dp_init(); + if (rc) + goto put; + rc = mic_init_irq(); + if (rc) + goto dp_uninit; + if (!mic_request_dma_chans(mdrv)) { + rc = -ENODEV; + goto irq_uninit; + } + mdrv->vpdev = vop_register_device(mdrv->dev, VOP_DEV_TRNSP, + NULL, &vop_hw_ops, 0, + NULL, mdrv->dma_ch[0]); + if (IS_ERR(mdrv->vpdev)) { + rc = PTR_ERR(mdrv->vpdev); + goto dma_free; + } + bootparam = mdrv->dp; + node_id = ioread8(&bootparam->node_id); + mdrv->scdev = scif_register_device(mdrv->dev, MIC_SCIF_DEV, + NULL, &scif_hw_ops, + 0, node_id, &mdrv->mdev.mmio, NULL, + NULL, mdrv->dp, mdrv->dma_ch, + mdrv->num_dma_ch, true); + if (IS_ERR(mdrv->scdev)) { + rc = PTR_ERR(mdrv->scdev); + goto vop_remove; + } + mic_create_card_debug_dir(mdrv); +done: + return rc; +vop_remove: + vop_unregister_device(mdrv->vpdev); +dma_free: + mic_free_dma_chans(mdrv); +irq_uninit: + mic_uninit_irq(); +dp_uninit: + mic_dp_uninit(); +put: + module_put(mdrv->dev->driver->owner); + return rc; +} + +/* + * mic_driver_uninit - MIC driver uninitialization tasks. + * + * Returns None + */ +void mic_driver_uninit(struct mic_driver *mdrv) +{ + mic_delete_card_debug_dir(mdrv); + scif_unregister_device(mdrv->scdev); + vop_unregister_device(mdrv->vpdev); + mic_free_dma_chans(mdrv); + mic_uninit_irq(); + mic_dp_uninit(); + module_put(mdrv->dev->driver->owner); +} diff --git a/drivers/misc/mic/card/mic_device.h b/drivers/misc/mic/card/mic_device.h new file mode 100644 index 000000000..333dbed97 --- /dev/null +++ b/drivers/misc/mic/card/mic_device.h @@ -0,0 +1,149 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Disclaimer: The codes contained in these modules may be specific to + * the Intel Software Development Platform codenamed: Knights Ferry, and + * the Intel product codenamed: Knights Corner, and are not backward + * compatible with other Intel products. Additionally, Intel will NOT + * support the codes or instruction set in future products. + * + * Intel MIC Card driver. + * + */ +#ifndef _MIC_CARD_DEVICE_H_ +#define _MIC_CARD_DEVICE_H_ + +#include +#include +#include +#include +#include "../bus/scif_bus.h" +#include "../bus/vop_bus.h" + +/** + * struct mic_intr_info - Contains h/w specific interrupt sources info + * + * @num_intr: The number of irqs available + */ +struct mic_intr_info { + u32 num_intr; +}; + +/** + * struct mic_irq_info - OS specific irq information + * + * @irq_usage_count: usage count array tracking the number of sources + * assigned for each irq. + */ +struct mic_irq_info { + int *irq_usage_count; +}; + +/** + * struct mic_device - MIC device information. + * + * @mmio: MMIO bar information. + */ +struct mic_device { + struct mic_mw mmio; +}; + +/** + * struct mic_driver - MIC card driver information. + * + * @name: Name for MIC driver. + * @dbg_dir: debugfs directory of this MIC device. + * @dev: The device backing this MIC. + * @dp: The pointer to the virtio device page. + * @mdev: MIC device information for the host. + * @hotplug_work: Hot plug work for adding/removing virtio devices. + * @irq_info: The OS specific irq information + * @intr_info: H/W specific interrupt information. + * @dma_mbdev: dma device on the MIC virtual bus. + * @dma_ch - Array of DMA channels + * @num_dma_ch - Number of DMA channels available + * @scdev: SCIF device on the SCIF virtual bus. + * @vpdev: Virtio over PCIe device on the VOP virtual bus. + */ +struct mic_driver { + char name[20]; + struct dentry *dbg_dir; + struct device *dev; + void __iomem *dp; + struct mic_device mdev; + struct work_struct hotplug_work; + struct mic_irq_info irq_info; + struct mic_intr_info intr_info; + struct mbus_device *dma_mbdev; + struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN]; + int num_dma_ch; + struct scif_hw_dev *scdev; + struct vop_device *vpdev; +}; + +/** + * struct mic_irq - opaque pointer used as cookie + */ +struct mic_irq; + +/** + * mic_mmio_read - read from an MMIO register. + * @mw: MMIO register base virtual address. + * @offset: register offset. + * + * RETURNS: register value. + */ +static inline u32 mic_mmio_read(struct mic_mw *mw, u32 offset) +{ + return ioread32(mw->va + offset); +} + +/** + * mic_mmio_write - write to an MMIO register. + * @mw: MMIO register base virtual address. + * @val: the data value to put into the register + * @offset: register offset. + * + * RETURNS: none. + */ +static inline void +mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset) +{ + iowrite32(val, mw->va + offset); +} + +int mic_driver_init(struct mic_driver *mdrv); +void mic_driver_uninit(struct mic_driver *mdrv); +int mic_next_card_db(void); +struct mic_irq * +mic_request_card_irq(irq_handler_t handler, irq_handler_t thread_fn, + const char *name, void *data, int db); +void mic_free_card_irq(struct mic_irq *cookie, void *data); +u32 mic_read_spad(struct mic_device *mdev, unsigned int idx); +void mic_send_intr(struct mic_device *mdev, int doorbell); +void mic_send_p2p_intr(int doorbell, struct mic_mw *mw); +int mic_db_to_irq(struct mic_driver *mdrv, int db); +u32 mic_ack_interrupt(struct mic_device *mdev); +void mic_hw_intr_init(struct mic_driver *mdrv); +void __iomem * +mic_card_map(struct mic_device *mdev, dma_addr_t addr, size_t size); +void mic_card_unmap(struct mic_device *mdev, void __iomem *addr); +void __init mic_create_card_debug_dir(struct mic_driver *mdrv); +void mic_delete_card_debug_dir(struct mic_driver *mdrv); +void __init mic_init_card_debugfs(void); +void mic_exit_card_debugfs(void); +#endif diff --git a/drivers/misc/mic/card/mic_x100.c b/drivers/misc/mic/card/mic_x100.c new file mode 100644 index 000000000..4007adc66 --- /dev/null +++ b/drivers/misc/mic/card/mic_x100.c @@ -0,0 +1,359 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Disclaimer: The codes contained in these modules may be specific to + * the Intel Software Development Platform codenamed: Knights Ferry, and + * the Intel product codenamed: Knights Corner, and are not backward + * compatible with other Intel products. Additionally, Intel will NOT + * support the codes or instruction set in future products. + * + * Intel MIC Card driver. + * + */ +#include +#include +#include + +#include "../common/mic_dev.h" +#include "mic_device.h" +#include "mic_x100.h" + +static const char mic_driver_name[] = "mic"; + +static struct mic_driver g_drv; + +/** + * mic_read_spad - read from the scratchpad register + * @mdev: pointer to mic_device instance + * @idx: index to scratchpad register, 0 based + * + * This function allows reading of the 32bit scratchpad register. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +u32 mic_read_spad(struct mic_device *mdev, unsigned int idx) +{ + return mic_mmio_read(&mdev->mmio, + MIC_X100_SBOX_BASE_ADDRESS + + MIC_X100_SBOX_SPAD0 + idx * 4); +} + +/** + * __mic_send_intr - Send interrupt to Host. + * @mdev: pointer to mic_device instance + * @doorbell: Doorbell number. + */ +void mic_send_intr(struct mic_device *mdev, int doorbell) +{ + struct mic_mw *mw = &mdev->mmio; + + if (doorbell > MIC_X100_MAX_DOORBELL_IDX) + return; + /* Ensure that the interrupt is ordered w.r.t previous stores. */ + wmb(); + mic_mmio_write(mw, MIC_X100_SBOX_SDBIC0_DBREQ_BIT, + MIC_X100_SBOX_BASE_ADDRESS + + (MIC_X100_SBOX_SDBIC0 + (4 * doorbell))); +} + +/* + * mic_x100_send_sbox_intr - Send an MIC_X100_SBOX interrupt to MIC. + */ +static void mic_x100_send_sbox_intr(struct mic_mw *mw, int doorbell) +{ + u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8; + u32 apicicr_low = mic_mmio_read(mw, MIC_X100_SBOX_BASE_ADDRESS + + apic_icr_offset); + + /* for MIC we need to make sure we "hit" the send_icr bit (13) */ + apicicr_low = (apicicr_low | (1 << 13)); + /* + * Ensure that the interrupt is ordered w.r.t. previous stores + * to main memory. Fence instructions are not implemented in X100 + * since execution is in order but a compiler barrier is still + * required. + */ + wmb(); + mic_mmio_write(mw, apicicr_low, + MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset); +} + +static void mic_x100_send_rdmasr_intr(struct mic_mw *mw, int doorbell) +{ + int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2); + /* + * Ensure that the interrupt is ordered w.r.t. previous stores + * to main memory. Fence instructions are not implemented in X100 + * since execution is in order but a compiler barrier is still + * required. + */ + wmb(); + mic_mmio_write(mw, 0, MIC_X100_SBOX_BASE_ADDRESS + rdmasr_offset); +} + +/** + * mic_ack_interrupt - Device specific interrupt handling. + * @mdev: pointer to mic_device instance + * + * Returns: bitmask of doorbell events triggered. + */ +u32 mic_ack_interrupt(struct mic_device *mdev) +{ + return 0; +} + +static inline int mic_get_sbox_irq(int db) +{ + return MIC_X100_IRQ_BASE + db; +} + +static inline int mic_get_rdmasr_irq(int index) +{ + return MIC_X100_RDMASR_IRQ_BASE + index; +} + +void mic_send_p2p_intr(int db, struct mic_mw *mw) +{ + int rdmasr_index; + + if (db < MIC_X100_NUM_SBOX_IRQ) { + mic_x100_send_sbox_intr(mw, db); + } else { + rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ; + mic_x100_send_rdmasr_intr(mw, rdmasr_index); + } +} + +/** + * mic_hw_intr_init - Initialize h/w specific interrupt + * information. + * @mdrv: pointer to mic_driver + */ +void mic_hw_intr_init(struct mic_driver *mdrv) +{ + mdrv->intr_info.num_intr = MIC_X100_NUM_SBOX_IRQ + + MIC_X100_NUM_RDMASR_IRQ; +} + +/** + * mic_db_to_irq - Retrieve irq number corresponding to a doorbell. + * @mdrv: pointer to mic_driver + * @db: The doorbell obtained for which the irq is needed. Doorbell + * may correspond to an sbox doorbell or an rdmasr index. + * + * Returns the irq corresponding to the doorbell. + */ +int mic_db_to_irq(struct mic_driver *mdrv, int db) +{ + int rdmasr_index; + + /* + * The total number of doorbell interrupts on the card are 16. Indices + * 0-8 falls in the SBOX category and 8-15 fall in the RDMASR category. + */ + if (db < MIC_X100_NUM_SBOX_IRQ) { + return mic_get_sbox_irq(db); + } else { + rdmasr_index = db - MIC_X100_NUM_SBOX_IRQ; + return mic_get_rdmasr_irq(rdmasr_index); + } +} + +/* + * mic_card_map - Allocate virtual address for a remote memory region. + * @mdev: pointer to mic_device instance. + * @addr: Remote DMA address. + * @size: Size of the region. + * + * Returns: Virtual address backing the remote memory region. + */ +void __iomem * +mic_card_map(struct mic_device *mdev, dma_addr_t addr, size_t size) +{ + return ioremap(addr, size); +} + +/* + * mic_card_unmap - Unmap the virtual address for a remote memory region. + * @mdev: pointer to mic_device instance. + * @addr: Virtual address for remote memory region. + * + * Returns: None. + */ +void mic_card_unmap(struct mic_device *mdev, void __iomem *addr) +{ + iounmap(addr); +} + +static inline struct mic_driver *mbdev_to_mdrv(struct mbus_device *mbdev) +{ + return dev_get_drvdata(mbdev->dev.parent); +} + +static struct mic_irq * +_mic_request_threaded_irq(struct mbus_device *mbdev, + irq_handler_t handler, irq_handler_t thread_fn, + const char *name, void *data, int intr_src) +{ + int rc = 0; + unsigned int irq = intr_src; + unsigned long cookie = irq; + + rc = request_threaded_irq(irq, handler, thread_fn, 0, name, data); + if (rc) { + dev_err(mbdev_to_mdrv(mbdev)->dev, + "request_threaded_irq failed rc = %d\n", rc); + return ERR_PTR(rc); + } + return (struct mic_irq *)cookie; +} + +static void _mic_free_irq(struct mbus_device *mbdev, + struct mic_irq *cookie, void *data) +{ + unsigned long irq = (unsigned long)cookie; + free_irq(irq, data); +} + +static void _mic_ack_interrupt(struct mbus_device *mbdev, int num) +{ + mic_ack_interrupt(&mbdev_to_mdrv(mbdev)->mdev); +} + +static struct mbus_hw_ops mbus_hw_ops = { + .request_threaded_irq = _mic_request_threaded_irq, + .free_irq = _mic_free_irq, + .ack_interrupt = _mic_ack_interrupt, +}; + +static int __init mic_probe(struct platform_device *pdev) +{ + struct mic_driver *mdrv = &g_drv; + struct mic_device *mdev = &mdrv->mdev; + int rc = 0; + + mdrv->dev = &pdev->dev; + snprintf(mdrv->name, sizeof(mic_driver_name), mic_driver_name); + + /* FIXME: use dma_set_mask_and_coherent() and check result */ + dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + + mdev->mmio.pa = MIC_X100_MMIO_BASE; + mdev->mmio.len = MIC_X100_MMIO_LEN; + mdev->mmio.va = devm_ioremap(&pdev->dev, MIC_X100_MMIO_BASE, + MIC_X100_MMIO_LEN); + if (!mdev->mmio.va) { + dev_err(&pdev->dev, "Cannot remap MMIO BAR\n"); + rc = -EIO; + goto done; + } + mic_hw_intr_init(mdrv); + platform_set_drvdata(pdev, mdrv); + mdrv->dma_mbdev = mbus_register_device(mdrv->dev, MBUS_DEV_DMA_MIC, + NULL, &mbus_hw_ops, 0, + mdrv->mdev.mmio.va); + if (IS_ERR(mdrv->dma_mbdev)) { + rc = PTR_ERR(mdrv->dma_mbdev); + dev_err(&pdev->dev, "mbus_add_device failed rc %d\n", rc); + goto done; + } + rc = mic_driver_init(mdrv); + if (rc) { + dev_err(&pdev->dev, "mic_driver_init failed rc %d\n", rc); + goto remove_dma; + } +done: + return rc; +remove_dma: + mbus_unregister_device(mdrv->dma_mbdev); + return rc; +} + +static int mic_remove(struct platform_device *pdev) +{ + struct mic_driver *mdrv = &g_drv; + + mic_driver_uninit(mdrv); + mbus_unregister_device(mdrv->dma_mbdev); + return 0; +} + +static void mic_platform_shutdown(struct platform_device *pdev) +{ + mic_remove(pdev); +} + +static struct platform_driver __refdata mic_platform_driver = { + .probe = mic_probe, + .remove = mic_remove, + .shutdown = mic_platform_shutdown, + .driver = { + .name = mic_driver_name, + }, +}; + +static struct platform_device *mic_platform_dev; + +static int __init mic_init(void) +{ + int ret; + struct cpuinfo_x86 *c = &cpu_data(0); + + if (!(c->x86 == 11 && c->x86_model == 1)) { + ret = -ENODEV; + pr_err("%s not running on X100 ret %d\n", __func__, ret); + goto done; + } + + request_module("mic_x100_dma"); + mic_init_card_debugfs(); + + mic_platform_dev = platform_device_register_simple(mic_driver_name, + 0, NULL, 0); + ret = PTR_ERR_OR_ZERO(mic_platform_dev); + if (ret) { + pr_err("platform_device_register_full ret %d\n", ret); + goto cleanup_debugfs; + } + ret = platform_driver_register(&mic_platform_driver); + if (ret) { + pr_err("platform_driver_register ret %d\n", ret); + goto device_unregister; + } + return ret; + +device_unregister: + platform_device_unregister(mic_platform_dev); +cleanup_debugfs: + mic_exit_card_debugfs(); +done: + return ret; +} + +static void __exit mic_exit(void) +{ + platform_driver_unregister(&mic_platform_driver); + platform_device_unregister(mic_platform_dev); + mic_exit_card_debugfs(); +} + +module_init(mic_init); +module_exit(mic_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) MIC X100 Card driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/mic/card/mic_x100.h b/drivers/misc/mic/card/mic_x100.h new file mode 100644 index 000000000..7e2224934 --- /dev/null +++ b/drivers/misc/mic/card/mic_x100.h @@ -0,0 +1,49 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Disclaimer: The codes contained in these modules may be specific to + * the Intel Software Development Platform codenamed: Knights Ferry, and + * the Intel product codenamed: Knights Corner, and are not backward + * compatible with other Intel products. Additionally, Intel will NOT + * support the codes or instruction set in future products. + * + * Intel MIC Card driver. + * + */ +#ifndef _MIC_X100_CARD_H_ +#define _MIC_X100_CARD_H_ + +#define MIC_X100_MMIO_BASE 0x08007C0000ULL +#define MIC_X100_MMIO_LEN 0x00020000ULL +#define MIC_X100_SBOX_BASE_ADDRESS 0x00010000ULL + +#define MIC_X100_SBOX_SPAD0 0x0000AB20 +#define MIC_X100_SBOX_SDBIC0 0x0000CC90 +#define MIC_X100_SBOX_SDBIC0_DBREQ_BIT 0x80000000 +#define MIC_X100_SBOX_RDMASR0 0x0000B180 +#define MIC_X100_SBOX_APICICR0 0x0000A9D0 + +#define MIC_X100_MAX_DOORBELL_IDX 8 + +#define MIC_X100_NUM_SBOX_IRQ 8 +#define MIC_X100_NUM_RDMASR_IRQ 8 +#define MIC_X100_SBOX_IRQ_BASE 0 +#define MIC_X100_RDMASR_IRQ_BASE 17 + +#define MIC_X100_IRQ_BASE 26 + +#endif diff --git a/drivers/misc/mic/common/mic_dev.h b/drivers/misc/mic/common/mic_dev.h new file mode 100644 index 000000000..50776772e --- /dev/null +++ b/drivers/misc/mic/common/mic_dev.h @@ -0,0 +1,67 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC driver. + * + */ +#ifndef __MIC_DEV_H__ +#define __MIC_DEV_H__ + +/* The maximum number of MIC devices supported in a single host system. */ +#define MIC_MAX_NUM_DEVS 128 + +/** + * enum mic_hw_family - The hardware family to which a device belongs. + */ +enum mic_hw_family { + MIC_FAMILY_X100 = 0, + MIC_FAMILY_X200, + MIC_FAMILY_UNKNOWN, + MIC_FAMILY_LAST +}; + +/** + * struct mic_mw - MIC memory window + * + * @pa: Base physical address. + * @va: Base ioremap'd virtual address. + * @len: Size of the memory window. + */ +struct mic_mw { + phys_addr_t pa; + void __iomem *va; + resource_size_t len; +}; + +/* + * Scratch pad register offsets used by the host to communicate + * device page DMA address to the card. + */ +#define MIC_DPLO_SPAD 14 +#define MIC_DPHI_SPAD 15 + +/* + * These values are supposed to be in the config_change field of the + * device page when the host sends a config change interrupt to the card. + */ +#define MIC_VIRTIO_PARAM_DEV_REMOVE 0x1 +#define MIC_VIRTIO_PARAM_CONFIG_CHANGED 0x2 + +/* Maximum number of DMA channels */ +#define MIC_MAX_DMA_CHAN 4 + +#endif diff --git a/drivers/misc/mic/cosm/Makefile b/drivers/misc/mic/cosm/Makefile new file mode 100644 index 000000000..97d74cb12 --- /dev/null +++ b/drivers/misc/mic/cosm/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile - Intel MIC Coprocessor State Management (COSM) Driver +# Copyright(c) 2015, Intel Corporation. +# +obj-$(CONFIG_MIC_COSM) += mic_cosm.o + +mic_cosm-objs := cosm_main.o +mic_cosm-objs += cosm_debugfs.o +mic_cosm-objs += cosm_sysfs.o +mic_cosm-objs += cosm_scif_server.o diff --git a/drivers/misc/mic/cosm/cosm_debugfs.c b/drivers/misc/mic/cosm/cosm_debugfs.c new file mode 100644 index 000000000..216cb3cd2 --- /dev/null +++ b/drivers/misc/mic/cosm/cosm_debugfs.c @@ -0,0 +1,156 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Coprocessor State Management (COSM) Driver + * + */ + +#include +#include +#include +#include "cosm_main.h" + +/* Debugfs parent dir */ +static struct dentry *cosm_dbg; + +/** + * cosm_log_buf_show - Display MIC kernel log buffer + * + * log_buf addr/len is read from System.map by user space + * and populated in sysfs entries. + */ +static int cosm_log_buf_show(struct seq_file *s, void *unused) +{ + void __iomem *log_buf_va; + int __iomem *log_buf_len_va; + struct cosm_device *cdev = s->private; + void *kva; + int size; + u64 aper_offset; + + if (!cdev || !cdev->log_buf_addr || !cdev->log_buf_len) + goto done; + + mutex_lock(&cdev->cosm_mutex); + switch (cdev->state) { + case MIC_BOOTING: + case MIC_ONLINE: + case MIC_SHUTTING_DOWN: + break; + default: + goto unlock; + } + + /* + * Card kernel will never be relocated and any kernel text/data mapping + * can be translated to phys address by subtracting __START_KERNEL_map. + */ + aper_offset = (u64)cdev->log_buf_len - __START_KERNEL_map; + log_buf_len_va = cdev->hw_ops->aper(cdev)->va + aper_offset; + aper_offset = (u64)cdev->log_buf_addr - __START_KERNEL_map; + log_buf_va = cdev->hw_ops->aper(cdev)->va + aper_offset; + + size = ioread32(log_buf_len_va); + kva = kmalloc(size, GFP_KERNEL); + if (!kva) + goto unlock; + + memcpy_fromio(kva, log_buf_va, size); + seq_write(s, kva, size); + kfree(kva); +unlock: + mutex_unlock(&cdev->cosm_mutex); +done: + return 0; +} + +static int cosm_log_buf_open(struct inode *inode, struct file *file) +{ + return single_open(file, cosm_log_buf_show, inode->i_private); +} + +static const struct file_operations log_buf_ops = { + .owner = THIS_MODULE, + .open = cosm_log_buf_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release +}; + +/** + * cosm_force_reset_show - Force MIC reset + * + * Invokes the force_reset COSM bus op instead of the standard reset + * op in case a force reset of the MIC device is required + */ +static int cosm_force_reset_show(struct seq_file *s, void *pos) +{ + struct cosm_device *cdev = s->private; + + cosm_stop(cdev, true); + return 0; +} + +static int cosm_force_reset_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, cosm_force_reset_show, inode->i_private); +} + +static const struct file_operations force_reset_ops = { + .owner = THIS_MODULE, + .open = cosm_force_reset_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release +}; + +void cosm_create_debug_dir(struct cosm_device *cdev) +{ + char name[16]; + + if (!cosm_dbg) + return; + + scnprintf(name, sizeof(name), "mic%d", cdev->index); + cdev->dbg_dir = debugfs_create_dir(name, cosm_dbg); + if (!cdev->dbg_dir) + return; + + debugfs_create_file("log_buf", 0444, cdev->dbg_dir, cdev, &log_buf_ops); + debugfs_create_file("force_reset", 0444, cdev->dbg_dir, cdev, + &force_reset_ops); +} + +void cosm_delete_debug_dir(struct cosm_device *cdev) +{ + if (!cdev->dbg_dir) + return; + + debugfs_remove_recursive(cdev->dbg_dir); +} + +void cosm_init_debugfs(void) +{ + cosm_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL); + if (!cosm_dbg) + pr_err("can't create debugfs dir\n"); +} + +void cosm_exit_debugfs(void) +{ + debugfs_remove(cosm_dbg); +} diff --git a/drivers/misc/mic/cosm/cosm_main.c b/drivers/misc/mic/cosm/cosm_main.c new file mode 100644 index 000000000..7005cb1e0 --- /dev/null +++ b/drivers/misc/mic/cosm/cosm_main.c @@ -0,0 +1,393 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Coprocessor State Management (COSM) Driver + * + */ + +#include +#include +#include +#include +#include +#include "cosm_main.h" + +static const char cosm_driver_name[] = "mic"; + +/* COSM ID allocator */ +static struct ida g_cosm_ida; +/* Class of MIC devices for sysfs accessibility. */ +static struct class *g_cosm_class; +/* Number of MIC devices */ +static atomic_t g_num_dev; + +/** + * cosm_hw_reset - Issue a HW reset for the MIC device + * @cdev: pointer to cosm_device instance + */ +static void cosm_hw_reset(struct cosm_device *cdev, bool force) +{ + int i; + +#define MIC_RESET_TO (45) + if (force && cdev->hw_ops->force_reset) + cdev->hw_ops->force_reset(cdev); + else + cdev->hw_ops->reset(cdev); + + for (i = 0; i < MIC_RESET_TO; i++) { + if (cdev->hw_ops->ready(cdev)) { + cosm_set_state(cdev, MIC_READY); + return; + } + /* + * Resets typically take 10s of seconds to complete. + * Since an MMIO read is required to check if the + * firmware is ready or not, a 1 second delay works nicely. + */ + msleep(1000); + } + cosm_set_state(cdev, MIC_RESET_FAILED); +} + +/** + * cosm_start - Start the MIC + * @cdev: pointer to cosm_device instance + * + * This function prepares an MIC for boot and initiates boot. + * RETURNS: An appropriate -ERRNO error value on error, or 0 for success. + */ +int cosm_start(struct cosm_device *cdev) +{ + const struct cred *orig_cred; + struct cred *override_cred; + int rc; + + mutex_lock(&cdev->cosm_mutex); + if (!cdev->bootmode) { + dev_err(&cdev->dev, "%s %d bootmode not set\n", + __func__, __LINE__); + rc = -EINVAL; + goto unlock_ret; + } +retry: + if (cdev->state != MIC_READY) { + dev_err(&cdev->dev, "%s %d MIC state not READY\n", + __func__, __LINE__); + rc = -EINVAL; + goto unlock_ret; + } + if (!cdev->hw_ops->ready(cdev)) { + cosm_hw_reset(cdev, false); + /* + * The state will either be MIC_READY if the reset succeeded + * or MIC_RESET_FAILED if the firmware reset failed. + */ + goto retry; + } + + /* + * Set credentials to root to allow non-root user to download initramsfs + * with 600 permissions + */ + override_cred = prepare_creds(); + if (!override_cred) { + dev_err(&cdev->dev, "%s %d prepare_creds failed\n", + __func__, __LINE__); + rc = -ENOMEM; + goto unlock_ret; + } + override_cred->fsuid = GLOBAL_ROOT_UID; + orig_cred = override_creds(override_cred); + + rc = cdev->hw_ops->start(cdev, cdev->index); + + revert_creds(orig_cred); + put_cred(override_cred); + if (rc) + goto unlock_ret; + + /* + * If linux is being booted, card is treated 'online' only + * when the scif interface in the card is up. If anything else + * is booted, we set card to 'online' immediately. + */ + if (!strcmp(cdev->bootmode, "linux")) + cosm_set_state(cdev, MIC_BOOTING); + else + cosm_set_state(cdev, MIC_ONLINE); +unlock_ret: + mutex_unlock(&cdev->cosm_mutex); + if (rc) + dev_err(&cdev->dev, "cosm_start failed rc %d\n", rc); + return rc; +} + +/** + * cosm_stop - Prepare the MIC for reset and trigger reset + * @cdev: pointer to cosm_device instance + * @force: force a MIC to reset even if it is already reset and ready. + * + * RETURNS: None + */ +void cosm_stop(struct cosm_device *cdev, bool force) +{ + mutex_lock(&cdev->cosm_mutex); + if (cdev->state != MIC_READY || force) { + /* + * Don't call hw_ops if they have been called previously. + * stop(..) calls device_unregister and will crash the system if + * called multiple times. + */ + u8 state = cdev->state == MIC_RESETTING ? + cdev->prev_state : cdev->state; + bool call_hw_ops = state != MIC_RESET_FAILED && + state != MIC_READY; + + if (cdev->state != MIC_RESETTING) + cosm_set_state(cdev, MIC_RESETTING); + cdev->heartbeat_watchdog_enable = false; + if (call_hw_ops) + cdev->hw_ops->stop(cdev, force); + cosm_hw_reset(cdev, force); + cosm_set_shutdown_status(cdev, MIC_NOP); + if (call_hw_ops && cdev->hw_ops->post_reset) + cdev->hw_ops->post_reset(cdev, cdev->state); + } + mutex_unlock(&cdev->cosm_mutex); + flush_work(&cdev->scif_work); +} + +/** + * cosm_reset_trigger_work - Trigger MIC reset + * @work: The work structure + * + * This work is scheduled whenever the host wants to reset the MIC. + */ +static void cosm_reset_trigger_work(struct work_struct *work) +{ + struct cosm_device *cdev = container_of(work, struct cosm_device, + reset_trigger_work); + cosm_stop(cdev, false); +} + +/** + * cosm_reset - Schedule MIC reset + * @cdev: pointer to cosm_device instance + * + * RETURNS: An -EINVAL if the card is already READY or 0 for success. + */ +int cosm_reset(struct cosm_device *cdev) +{ + int rc = 0; + + mutex_lock(&cdev->cosm_mutex); + if (cdev->state != MIC_READY) { + if (cdev->state != MIC_RESETTING) { + cdev->prev_state = cdev->state; + cosm_set_state(cdev, MIC_RESETTING); + schedule_work(&cdev->reset_trigger_work); + } + } else { + dev_err(&cdev->dev, "%s %d MIC is READY\n", __func__, __LINE__); + rc = -EINVAL; + } + mutex_unlock(&cdev->cosm_mutex); + return rc; +} + +/** + * cosm_shutdown - Initiate MIC shutdown. + * @cdev: pointer to cosm_device instance + * + * RETURNS: None + */ +int cosm_shutdown(struct cosm_device *cdev) +{ + struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN }; + int rc = 0; + + mutex_lock(&cdev->cosm_mutex); + if (cdev->state != MIC_ONLINE) { + rc = -EINVAL; + dev_err(&cdev->dev, "%s %d skipping shutdown in state: %s\n", + __func__, __LINE__, cosm_state_string[cdev->state]); + goto err; + } + + if (!cdev->epd) { + rc = -ENOTCONN; + dev_err(&cdev->dev, "%s %d scif endpoint not connected rc %d\n", + __func__, __LINE__, rc); + goto err; + } + + rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK); + if (rc < 0) { + dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n", + __func__, __LINE__, rc); + goto err; + } + cdev->heartbeat_watchdog_enable = false; + cosm_set_state(cdev, MIC_SHUTTING_DOWN); + rc = 0; +err: + mutex_unlock(&cdev->cosm_mutex); + return rc; +} + +static int cosm_driver_probe(struct cosm_device *cdev) +{ + int rc; + + /* Initialize SCIF server at first probe */ + if (atomic_add_return(1, &g_num_dev) == 1) { + rc = cosm_scif_init(); + if (rc) + goto scif_exit; + } + mutex_init(&cdev->cosm_mutex); + INIT_WORK(&cdev->reset_trigger_work, cosm_reset_trigger_work); + INIT_WORK(&cdev->scif_work, cosm_scif_work); + cdev->sysfs_heartbeat_enable = true; + cosm_sysfs_init(cdev); + cdev->sdev = device_create_with_groups(g_cosm_class, cdev->dev.parent, + MKDEV(0, cdev->index), cdev, cdev->attr_group, + "mic%d", cdev->index); + if (IS_ERR(cdev->sdev)) { + rc = PTR_ERR(cdev->sdev); + dev_err(&cdev->dev, "device_create_with_groups failed rc %d\n", + rc); + goto scif_exit; + } + + cdev->state_sysfs = sysfs_get_dirent(cdev->sdev->kobj.sd, + "state"); + if (!cdev->state_sysfs) { + rc = -ENODEV; + dev_err(&cdev->dev, "sysfs_get_dirent failed rc %d\n", rc); + goto destroy_device; + } + cosm_create_debug_dir(cdev); + return 0; +destroy_device: + device_destroy(g_cosm_class, MKDEV(0, cdev->index)); +scif_exit: + if (atomic_dec_and_test(&g_num_dev)) + cosm_scif_exit(); + return rc; +} + +static void cosm_driver_remove(struct cosm_device *cdev) +{ + cosm_delete_debug_dir(cdev); + sysfs_put(cdev->state_sysfs); + device_destroy(g_cosm_class, MKDEV(0, cdev->index)); + flush_work(&cdev->reset_trigger_work); + cosm_stop(cdev, false); + if (atomic_dec_and_test(&g_num_dev)) + cosm_scif_exit(); + + /* These sysfs entries might have allocated */ + kfree(cdev->cmdline); + kfree(cdev->firmware); + kfree(cdev->ramdisk); + kfree(cdev->bootmode); +} + +static int cosm_suspend(struct device *dev) +{ + struct cosm_device *cdev = dev_to_cosm(dev); + + mutex_lock(&cdev->cosm_mutex); + switch (cdev->state) { + /** + * Suspend/freeze hooks in userspace have already shutdown the card. + * Card should be 'ready' in most cases. It is however possible that + * some userspace application initiated a boot. In those cases, we + * simply reset the card. + */ + case MIC_ONLINE: + case MIC_BOOTING: + case MIC_SHUTTING_DOWN: + mutex_unlock(&cdev->cosm_mutex); + cosm_stop(cdev, false); + break; + default: + mutex_unlock(&cdev->cosm_mutex); + break; + } + return 0; +} + +static const struct dev_pm_ops cosm_pm_ops = { + .suspend = cosm_suspend, + .freeze = cosm_suspend +}; + +static struct cosm_driver cosm_driver = { + .driver = { + .name = KBUILD_MODNAME, + .owner = THIS_MODULE, + .pm = &cosm_pm_ops, + }, + .probe = cosm_driver_probe, + .remove = cosm_driver_remove +}; + +static int __init cosm_init(void) +{ + int ret; + + cosm_init_debugfs(); + + g_cosm_class = class_create(THIS_MODULE, cosm_driver_name); + if (IS_ERR(g_cosm_class)) { + ret = PTR_ERR(g_cosm_class); + pr_err("class_create failed ret %d\n", ret); + goto cleanup_debugfs; + } + + ida_init(&g_cosm_ida); + ret = cosm_register_driver(&cosm_driver); + if (ret) { + pr_err("cosm_register_driver failed ret %d\n", ret); + goto ida_destroy; + } + return 0; +ida_destroy: + ida_destroy(&g_cosm_ida); + class_destroy(g_cosm_class); +cleanup_debugfs: + cosm_exit_debugfs(); + return ret; +} + +static void __exit cosm_exit(void) +{ + cosm_unregister_driver(&cosm_driver); + ida_destroy(&g_cosm_ida); + class_destroy(g_cosm_class); + cosm_exit_debugfs(); +} + +module_init(cosm_init); +module_exit(cosm_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) MIC Coprocessor State Management (COSM) Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/mic/cosm/cosm_main.h b/drivers/misc/mic/cosm/cosm_main.h new file mode 100644 index 000000000..aa78cdf25 --- /dev/null +++ b/drivers/misc/mic/cosm/cosm_main.h @@ -0,0 +1,73 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Coprocessor State Management (COSM) Driver + * + */ +#ifndef _COSM_COSM_H_ +#define _COSM_COSM_H_ + +#include +#include "../bus/cosm_bus.h" + +#define COSM_HEARTBEAT_SEND_SEC 30 +#define SCIF_COSM_LISTEN_PORT 201 + +/** + * enum COSM msg id's + * @COSM_MSG_SHUTDOWN: host->card trigger shutdown + * @COSM_MSG_SYNC_TIME: host->card send host time to card to sync time + * @COSM_MSG_HEARTBEAT: card->host heartbeat + * @COSM_MSG_SHUTDOWN_STATUS: card->host with shutdown status as payload + */ +enum cosm_msg_id { + COSM_MSG_SHUTDOWN, + COSM_MSG_SYNC_TIME, + COSM_MSG_HEARTBEAT, + COSM_MSG_SHUTDOWN_STATUS, +}; + +struct cosm_msg { + u64 id; + union { + u64 shutdown_status; + struct { + u64 tv_sec; + u64 tv_nsec; + } timespec; + }; +}; + +extern const char * const cosm_state_string[]; +extern const char * const cosm_shutdown_status_string[]; + +void cosm_sysfs_init(struct cosm_device *cdev); +int cosm_start(struct cosm_device *cdev); +void cosm_stop(struct cosm_device *cdev, bool force); +int cosm_reset(struct cosm_device *cdev); +int cosm_shutdown(struct cosm_device *cdev); +void cosm_set_state(struct cosm_device *cdev, u8 state); +void cosm_set_shutdown_status(struct cosm_device *cdev, u8 status); +void cosm_init_debugfs(void); +void cosm_exit_debugfs(void); +void cosm_create_debug_dir(struct cosm_device *cdev); +void cosm_delete_debug_dir(struct cosm_device *cdev); +int cosm_scif_init(void); +void cosm_scif_exit(void); +void cosm_scif_work(struct work_struct *work); + +#endif diff --git a/drivers/misc/mic/cosm/cosm_scif_server.c b/drivers/misc/mic/cosm/cosm_scif_server.c new file mode 100644 index 000000000..e94b7eac4 --- /dev/null +++ b/drivers/misc/mic/cosm/cosm_scif_server.c @@ -0,0 +1,411 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Coprocessor State Management (COSM) Driver + * + */ +#include +#include + +#include "cosm_main.h" + +/* + * The COSM driver uses SCIF to communicate between the management node and the + * MIC cards. SCIF is used to (a) Send a shutdown command to the card (b) + * receive a shutdown status back from the card upon completion of shutdown and + * (c) receive periodic heartbeat messages from the card used to deduce if the + * card has crashed. + * + * A COSM server consisting of a SCIF listening endpoint waits for incoming + * connections from the card. Upon acceptance of the connection, a separate + * work-item is scheduled to handle SCIF message processing for that card. The + * life-time of this work-item is therefore the time from which the connection + * from a card is accepted to the time at which the connection is closed. A new + * work-item starts each time the card boots and is alive till the card (a) + * shuts down (b) is reset (c) crashes (d) cosm_client driver on the card is + * unloaded. + * + * From the point of view of COSM interactions with SCIF during card + * shutdown, reset and crash are as follows: + * + * Card shutdown + * ------------- + * 1. COSM client on the card invokes orderly_poweroff() in response to SHUTDOWN + * message from the host. + * 2. Card driver shutdown callback invokes scif_unregister_device(..) resulting + * in scif_remove(..) getting called on the card + * 3. scif_remove -> scif_stop -> scif_handle_remove_node -> + * scif_peer_unregister_device -> device_unregister for the host peer device + * 4. During device_unregister remove(..) method of cosm_client is invoked which + * closes the COSM SCIF endpoint on the card. This results in a SCIF_DISCNCT + * message being sent to host SCIF. SCIF_DISCNCT message processing on the + * host SCIF sets the host COSM SCIF endpoint state to DISCONNECTED and wakes + * up the host COSM thread blocked in scif_poll(..) resulting in + * scif_poll(..) returning EPOLLHUP. + * 5. On the card, scif_peer_release_dev is next called which results in an + * SCIF_EXIT message being sent to the host and after receiving the + * SCIF_EXIT_ACK from the host the peer device teardown on the card is + * complete. + * 6. As part of the SCIF_EXIT message processing on the host, host sends a + * SCIF_REMOVE_NODE to itself corresponding to the card being removed. This + * starts a similar SCIF peer device teardown sequence on the host + * corresponding to the card being shut down. + * + * Card reset + * ---------- + * The case of interest here is when the card has not been previously shut down + * since most of the steps below are skipped in that case: + + * 1. cosm_stop(..) invokes hw_ops->stop(..) method of the base PCIe driver + * which unregisters the SCIF HW device resulting in scif_remove(..) being + * called on the host. + * 2. scif_remove(..) calls scif_disconnect_node(..) which results in a + * SCIF_EXIT message being sent to the card. + * 3. The card executes scif_stop() as part of SCIF_EXIT message + * processing. This results in the COSM endpoint on the card being closed and + * the SCIF host peer device on the card getting unregistered similar to + * steps 3, 4 and 5 for the card shutdown case above. scif_poll(..) on the + * host returns EPOLLHUP as a result. + * 4. On the host, card peer device unregister and SCIF HW remove(..) also + * subsequently complete. + * + * Card crash + * ---------- + * If a reset is issued after the card has crashed, there is no SCIF_DISCNT + * message from the card which would result in scif_poll(..) returning + * EPOLLHUP. In this case when the host SCIF driver sends a SCIF_REMOVE_NODE + * message to itself resulting in the card SCIF peer device being unregistered, + * this results in a scif_peer_release_dev -> scif_cleanup_scifdev-> + * scif_invalidate_ep call sequence which sets the endpoint state to + * DISCONNECTED and results in scif_poll(..) returning EPOLLHUP. + */ + +#define COSM_SCIF_BACKLOG 16 +#define COSM_HEARTBEAT_CHECK_DELTA_SEC 10 +#define COSM_HEARTBEAT_TIMEOUT_SEC \ + (COSM_HEARTBEAT_SEND_SEC + COSM_HEARTBEAT_CHECK_DELTA_SEC) +#define COSM_HEARTBEAT_TIMEOUT_MSEC (COSM_HEARTBEAT_TIMEOUT_SEC * MSEC_PER_SEC) + +static struct task_struct *server_thread; +static scif_epd_t listen_epd; + +/* Publish MIC card's shutdown status to user space MIC daemon */ +static void cosm_update_mic_status(struct cosm_device *cdev) +{ + if (cdev->shutdown_status_int != MIC_NOP) { + cosm_set_shutdown_status(cdev, cdev->shutdown_status_int); + cdev->shutdown_status_int = MIC_NOP; + } +} + +/* Store MIC card's shutdown status internally when it is received */ +static void cosm_shutdown_status_int(struct cosm_device *cdev, + enum mic_status shutdown_status) +{ + switch (shutdown_status) { + case MIC_HALTED: + case MIC_POWER_OFF: + case MIC_RESTART: + case MIC_CRASHED: + break; + default: + dev_err(&cdev->dev, "%s %d Unexpected shutdown_status %d\n", + __func__, __LINE__, shutdown_status); + return; + }; + cdev->shutdown_status_int = shutdown_status; + cdev->heartbeat_watchdog_enable = false; + + if (cdev->state != MIC_SHUTTING_DOWN) + cosm_set_state(cdev, MIC_SHUTTING_DOWN); +} + +/* Non-blocking recv. Read and process all available messages */ +static void cosm_scif_recv(struct cosm_device *cdev) +{ + struct cosm_msg msg; + int rc; + + while (1) { + rc = scif_recv(cdev->epd, &msg, sizeof(msg), 0); + if (!rc) { + break; + } else if (rc < 0) { + dev_dbg(&cdev->dev, "%s: %d rc %d\n", + __func__, __LINE__, rc); + break; + } + dev_dbg(&cdev->dev, "%s: %d rc %d id 0x%llx\n", + __func__, __LINE__, rc, msg.id); + + switch (msg.id) { + case COSM_MSG_SHUTDOWN_STATUS: + cosm_shutdown_status_int(cdev, msg.shutdown_status); + break; + case COSM_MSG_HEARTBEAT: + /* Nothing to do, heartbeat only unblocks scif_poll */ + break; + default: + dev_err(&cdev->dev, "%s: %d unknown msg.id %lld\n", + __func__, __LINE__, msg.id); + break; + } + } +} + +/* Publish crashed status for this MIC card */ +static void cosm_set_crashed(struct cosm_device *cdev) +{ + dev_err(&cdev->dev, "node alive timeout\n"); + cosm_shutdown_status_int(cdev, MIC_CRASHED); + cosm_update_mic_status(cdev); +} + +/* Send host time to the MIC card to sync system time between host and MIC */ +static void cosm_send_time(struct cosm_device *cdev) +{ + struct cosm_msg msg = { .id = COSM_MSG_SYNC_TIME }; + struct timespec64 ts; + int rc; + + ktime_get_real_ts64(&ts); + msg.timespec.tv_sec = ts.tv_sec; + msg.timespec.tv_nsec = ts.tv_nsec; + + rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK); + if (rc < 0) + dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n", + __func__, __LINE__, rc); +} + +/* + * Close this cosm_device's endpoint after its peer endpoint on the card has + * been closed. In all cases except MIC card crash EPOLLHUP on the host is + * triggered by the client's endpoint being closed. + */ +static void cosm_scif_close(struct cosm_device *cdev) +{ + /* + * Because SHUTDOWN_STATUS message is sent by the MIC cards in the + * reboot notifier when shutdown is still not complete, we notify mpssd + * to reset the card when SCIF endpoint is closed. + */ + cosm_update_mic_status(cdev); + scif_close(cdev->epd); + cdev->epd = NULL; + dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__); +} + +/* + * Set card state to ONLINE when a new SCIF connection from a MIC card is + * received. Normally the state is BOOTING when the connection comes in, but can + * be ONLINE if cosm_client driver on the card was unloaded and then reloaded. + */ +static int cosm_set_online(struct cosm_device *cdev) +{ + int rc = 0; + + if (MIC_BOOTING == cdev->state || MIC_ONLINE == cdev->state) { + cdev->heartbeat_watchdog_enable = cdev->sysfs_heartbeat_enable; + cdev->epd = cdev->newepd; + if (cdev->state == MIC_BOOTING) + cosm_set_state(cdev, MIC_ONLINE); + cosm_send_time(cdev); + dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__); + } else { + dev_warn(&cdev->dev, "%s %d not going online in state: %s\n", + __func__, __LINE__, cosm_state_string[cdev->state]); + rc = -EINVAL; + } + /* Drop reference acquired by bus_find_device in the server thread */ + put_device(&cdev->dev); + return rc; +} + +/* + * Work function for handling work for a SCIF connection from a particular MIC + * card. It first sets the card state to ONLINE and then calls scif_poll to + * block on activity such as incoming messages on the SCIF endpoint. When the + * endpoint is closed, the work function exits, completing its life cycle, from + * MIC card boot to card shutdown/reset/crash. + */ +void cosm_scif_work(struct work_struct *work) +{ + struct cosm_device *cdev = container_of(work, struct cosm_device, + scif_work); + struct scif_pollepd pollepd; + int rc; + + mutex_lock(&cdev->cosm_mutex); + if (cosm_set_online(cdev)) + goto exit; + + while (1) { + pollepd.epd = cdev->epd; + pollepd.events = EPOLLIN; + + /* Drop the mutex before blocking in scif_poll(..) */ + mutex_unlock(&cdev->cosm_mutex); + /* poll(..) with timeout on our endpoint */ + rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_TIMEOUT_MSEC); + mutex_lock(&cdev->cosm_mutex); + if (rc < 0) { + dev_err(&cdev->dev, "%s %d scif_poll rc %d\n", + __func__, __LINE__, rc); + continue; + } + + /* There is a message from the card */ + if (pollepd.revents & EPOLLIN) + cosm_scif_recv(cdev); + + /* The peer endpoint is closed or this endpoint disconnected */ + if (pollepd.revents & EPOLLHUP) { + cosm_scif_close(cdev); + break; + } + + /* Did we timeout from poll? */ + if (!rc && cdev->heartbeat_watchdog_enable) + cosm_set_crashed(cdev); + } +exit: + dev_dbg(&cdev->dev, "%s %d exiting\n", __func__, __LINE__); + mutex_unlock(&cdev->cosm_mutex); +} + +/* + * COSM SCIF server thread function. Accepts incoming SCIF connections from MIC + * cards, finds the correct cosm_device to associate that connection with and + * schedules individual work items for each MIC card. + */ +static int cosm_scif_server(void *unused) +{ + struct cosm_device *cdev; + scif_epd_t newepd; + struct scif_port_id port_id; + int rc; + + allow_signal(SIGKILL); + + while (!kthread_should_stop()) { + rc = scif_accept(listen_epd, &port_id, &newepd, + SCIF_ACCEPT_SYNC); + if (rc < 0) { + if (-ERESTARTSYS != rc) + pr_err("%s %d rc %d\n", __func__, __LINE__, rc); + continue; + } + + /* + * Associate the incoming connection with a particular + * cosm_device, COSM device ID == SCIF node ID - 1 + */ + cdev = cosm_find_cdev_by_id(port_id.node - 1); + if (!cdev) + continue; + cdev->newepd = newepd; + schedule_work(&cdev->scif_work); + } + + pr_debug("%s %d Server thread stopped\n", __func__, __LINE__); + return 0; +} + +static int cosm_scif_listen(void) +{ + int rc; + + listen_epd = scif_open(); + if (!listen_epd) { + pr_err("%s %d scif_open failed\n", __func__, __LINE__); + return -ENOMEM; + } + + rc = scif_bind(listen_epd, SCIF_COSM_LISTEN_PORT); + if (rc < 0) { + pr_err("%s %d scif_bind failed rc %d\n", + __func__, __LINE__, rc); + goto err; + } + + rc = scif_listen(listen_epd, COSM_SCIF_BACKLOG); + if (rc < 0) { + pr_err("%s %d scif_listen rc %d\n", __func__, __LINE__, rc); + goto err; + } + pr_debug("%s %d listen_epd set up\n", __func__, __LINE__); + return 0; +err: + scif_close(listen_epd); + listen_epd = NULL; + return rc; +} + +static void cosm_scif_listen_exit(void) +{ + pr_debug("%s %d closing listen_epd\n", __func__, __LINE__); + if (listen_epd) { + scif_close(listen_epd); + listen_epd = NULL; + } +} + +/* + * Create a listening SCIF endpoint and a server kthread which accepts incoming + * SCIF connections from MIC cards + */ +int cosm_scif_init(void) +{ + int rc = cosm_scif_listen(); + + if (rc) { + pr_err("%s %d cosm_scif_listen rc %d\n", + __func__, __LINE__, rc); + goto err; + } + + server_thread = kthread_run(cosm_scif_server, NULL, "cosm_server"); + if (IS_ERR(server_thread)) { + rc = PTR_ERR(server_thread); + pr_err("%s %d kthread_run rc %d\n", __func__, __LINE__, rc); + goto listen_exit; + } + return 0; +listen_exit: + cosm_scif_listen_exit(); +err: + return rc; +} + +/* Stop the running server thread and close the listening SCIF endpoint */ +void cosm_scif_exit(void) +{ + int rc; + + if (!IS_ERR_OR_NULL(server_thread)) { + rc = send_sig(SIGKILL, server_thread, 0); + if (rc) { + pr_err("%s %d send_sig rc %d\n", + __func__, __LINE__, rc); + return; + } + kthread_stop(server_thread); + } + + cosm_scif_listen_exit(); +} diff --git a/drivers/misc/mic/cosm/cosm_sysfs.c b/drivers/misc/mic/cosm/cosm_sysfs.c new file mode 100644 index 000000000..29d6863b6 --- /dev/null +++ b/drivers/misc/mic/cosm/cosm_sysfs.c @@ -0,0 +1,461 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Coprocessor State Management (COSM) Driver + * + */ +#include +#include "cosm_main.h" + +/* + * A state-to-string lookup table, for exposing a human readable state + * via sysfs. Always keep in sync with enum cosm_states + */ +const char * const cosm_state_string[] = { + [MIC_READY] = "ready", + [MIC_BOOTING] = "booting", + [MIC_ONLINE] = "online", + [MIC_SHUTTING_DOWN] = "shutting_down", + [MIC_RESETTING] = "resetting", + [MIC_RESET_FAILED] = "reset_failed", +}; + +/* + * A shutdown-status-to-string lookup table, for exposing a human + * readable state via sysfs. Always keep in sync with enum cosm_shutdown_status + */ +const char * const cosm_shutdown_status_string[] = { + [MIC_NOP] = "nop", + [MIC_CRASHED] = "crashed", + [MIC_HALTED] = "halted", + [MIC_POWER_OFF] = "poweroff", + [MIC_RESTART] = "restart", +}; + +void cosm_set_shutdown_status(struct cosm_device *cdev, u8 shutdown_status) +{ + dev_dbg(&cdev->dev, "Shutdown Status %s -> %s\n", + cosm_shutdown_status_string[cdev->shutdown_status], + cosm_shutdown_status_string[shutdown_status]); + cdev->shutdown_status = shutdown_status; +} + +void cosm_set_state(struct cosm_device *cdev, u8 state) +{ + dev_dbg(&cdev->dev, "State %s -> %s\n", + cosm_state_string[cdev->state], + cosm_state_string[state]); + cdev->state = state; + sysfs_notify_dirent(cdev->state_sysfs); +} + +static ssize_t +family_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + return cdev->hw_ops->family(cdev, buf); +} +static DEVICE_ATTR_RO(family); + +static ssize_t +stepping_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + return cdev->hw_ops->stepping(cdev, buf); +} +static DEVICE_ATTR_RO(stepping); + +static ssize_t +state_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev || cdev->state >= MIC_LAST) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%s\n", + cosm_state_string[cdev->state]); +} + +static ssize_t +state_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + int rc; + + if (!cdev) + return -EINVAL; + + if (sysfs_streq(buf, "boot")) { + rc = cosm_start(cdev); + goto done; + } + if (sysfs_streq(buf, "reset")) { + rc = cosm_reset(cdev); + goto done; + } + + if (sysfs_streq(buf, "shutdown")) { + rc = cosm_shutdown(cdev); + goto done; + } + rc = -EINVAL; +done: + if (rc) + count = rc; + return count; +} +static DEVICE_ATTR_RW(state); + +static ssize_t shutdown_status_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev || cdev->shutdown_status >= MIC_STATUS_LAST) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%s\n", + cosm_shutdown_status_string[cdev->shutdown_status]); +} +static DEVICE_ATTR_RO(shutdown_status); + +static ssize_t +heartbeat_enable_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%d\n", cdev->sysfs_heartbeat_enable); +} + +static ssize_t +heartbeat_enable_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + int enable; + int ret; + + if (!cdev) + return -EINVAL; + + mutex_lock(&cdev->cosm_mutex); + ret = kstrtoint(buf, 10, &enable); + if (ret) + goto unlock; + + cdev->sysfs_heartbeat_enable = enable; + /* if state is not online, cdev->heartbeat_watchdog_enable is 0 */ + if (cdev->state == MIC_ONLINE) + cdev->heartbeat_watchdog_enable = enable; + ret = count; +unlock: + mutex_unlock(&cdev->cosm_mutex); + return ret; +} +static DEVICE_ATTR_RW(heartbeat_enable); + +static ssize_t +cmdline_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + char *cmdline; + + if (!cdev) + return -EINVAL; + + cmdline = cdev->cmdline; + + if (cmdline) + return scnprintf(buf, PAGE_SIZE, "%s\n", cmdline); + return 0; +} + +static ssize_t +cmdline_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + mutex_lock(&cdev->cosm_mutex); + kfree(cdev->cmdline); + + cdev->cmdline = kmalloc(count + 1, GFP_KERNEL); + if (!cdev->cmdline) { + count = -ENOMEM; + goto unlock; + } + + strncpy(cdev->cmdline, buf, count); + + if (cdev->cmdline[count - 1] == '\n') + cdev->cmdline[count - 1] = '\0'; + else + cdev->cmdline[count] = '\0'; +unlock: + mutex_unlock(&cdev->cosm_mutex); + return count; +} +static DEVICE_ATTR_RW(cmdline); + +static ssize_t +firmware_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + char *firmware; + + if (!cdev) + return -EINVAL; + + firmware = cdev->firmware; + + if (firmware) + return scnprintf(buf, PAGE_SIZE, "%s\n", firmware); + return 0; +} + +static ssize_t +firmware_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + mutex_lock(&cdev->cosm_mutex); + kfree(cdev->firmware); + + cdev->firmware = kmalloc(count + 1, GFP_KERNEL); + if (!cdev->firmware) { + count = -ENOMEM; + goto unlock; + } + strncpy(cdev->firmware, buf, count); + + if (cdev->firmware[count - 1] == '\n') + cdev->firmware[count - 1] = '\0'; + else + cdev->firmware[count] = '\0'; +unlock: + mutex_unlock(&cdev->cosm_mutex); + return count; +} +static DEVICE_ATTR_RW(firmware); + +static ssize_t +ramdisk_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + char *ramdisk; + + if (!cdev) + return -EINVAL; + + ramdisk = cdev->ramdisk; + + if (ramdisk) + return scnprintf(buf, PAGE_SIZE, "%s\n", ramdisk); + return 0; +} + +static ssize_t +ramdisk_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + mutex_lock(&cdev->cosm_mutex); + kfree(cdev->ramdisk); + + cdev->ramdisk = kmalloc(count + 1, GFP_KERNEL); + if (!cdev->ramdisk) { + count = -ENOMEM; + goto unlock; + } + + strncpy(cdev->ramdisk, buf, count); + + if (cdev->ramdisk[count - 1] == '\n') + cdev->ramdisk[count - 1] = '\0'; + else + cdev->ramdisk[count] = '\0'; +unlock: + mutex_unlock(&cdev->cosm_mutex); + return count; +} +static DEVICE_ATTR_RW(ramdisk); + +static ssize_t +bootmode_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + char *bootmode; + + if (!cdev) + return -EINVAL; + + bootmode = cdev->bootmode; + + if (bootmode) + return scnprintf(buf, PAGE_SIZE, "%s\n", bootmode); + return 0; +} + +static ssize_t +bootmode_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + if (!sysfs_streq(buf, "linux") && !sysfs_streq(buf, "flash")) + return -EINVAL; + + mutex_lock(&cdev->cosm_mutex); + kfree(cdev->bootmode); + + cdev->bootmode = kmalloc(count + 1, GFP_KERNEL); + if (!cdev->bootmode) { + count = -ENOMEM; + goto unlock; + } + + strncpy(cdev->bootmode, buf, count); + + if (cdev->bootmode[count - 1] == '\n') + cdev->bootmode[count - 1] = '\0'; + else + cdev->bootmode[count] = '\0'; +unlock: + mutex_unlock(&cdev->cosm_mutex); + return count; +} +static DEVICE_ATTR_RW(bootmode); + +static ssize_t +log_buf_addr_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_addr); +} + +static ssize_t +log_buf_addr_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + int ret; + unsigned long addr; + + if (!cdev) + return -EINVAL; + + ret = kstrtoul(buf, 16, &addr); + if (ret) + goto exit; + + cdev->log_buf_addr = (void *)addr; + ret = count; +exit: + return ret; +} +static DEVICE_ATTR_RW(log_buf_addr); + +static ssize_t +log_buf_len_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_len); +} + +static ssize_t +log_buf_len_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + int ret; + unsigned long addr; + + if (!cdev) + return -EINVAL; + + ret = kstrtoul(buf, 16, &addr); + if (ret) + goto exit; + + cdev->log_buf_len = (int *)addr; + ret = count; +exit: + return ret; +} +static DEVICE_ATTR_RW(log_buf_len); + +static struct attribute *cosm_default_attrs[] = { + &dev_attr_family.attr, + &dev_attr_stepping.attr, + &dev_attr_state.attr, + &dev_attr_shutdown_status.attr, + &dev_attr_heartbeat_enable.attr, + &dev_attr_cmdline.attr, + &dev_attr_firmware.attr, + &dev_attr_ramdisk.attr, + &dev_attr_bootmode.attr, + &dev_attr_log_buf_addr.attr, + &dev_attr_log_buf_len.attr, + + NULL +}; + +ATTRIBUTE_GROUPS(cosm_default); + +void cosm_sysfs_init(struct cosm_device *cdev) +{ + cdev->attr_group = cosm_default_groups; +} diff --git a/drivers/misc/mic/cosm_client/Makefile b/drivers/misc/mic/cosm_client/Makefile new file mode 100644 index 000000000..6f751a519 --- /dev/null +++ b/drivers/misc/mic/cosm_client/Makefile @@ -0,0 +1,7 @@ +# +# Makefile - Intel MIC COSM Client Driver +# Copyright(c) 2015, Intel Corporation. +# +obj-$(CONFIG_MIC_COSM) += cosm_client.o + +cosm_client-objs += cosm_scif_client.o diff --git a/drivers/misc/mic/cosm_client/cosm_scif_client.c b/drivers/misc/mic/cosm_client/cosm_scif_client.c new file mode 100644 index 000000000..225078cb5 --- /dev/null +++ b/drivers/misc/mic/cosm_client/cosm_scif_client.c @@ -0,0 +1,281 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC COSM Client Driver + * + */ +#include +#include +#include +#include +#include + +#include "../cosm/cosm_main.h" + +#define COSM_SCIF_MAX_RETRIES 10 +#define COSM_HEARTBEAT_SEND_MSEC (COSM_HEARTBEAT_SEND_SEC * MSEC_PER_SEC) + +static struct task_struct *client_thread; +static scif_epd_t client_epd; +static struct scif_peer_dev *client_spdev; + +/* + * Reboot notifier: receives shutdown status from the OS and communicates it + * back to the COSM process on the host + */ +static int cosm_reboot_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN_STATUS }; + int rc; + + event = (event == SYS_RESTART) ? SYSTEM_RESTART : event; + dev_info(&client_spdev->dev, "%s %d received event %ld\n", + __func__, __LINE__, event); + + msg.shutdown_status = event; + rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK); + if (rc < 0) + dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n", + __func__, __LINE__, rc); + + return NOTIFY_DONE; +} + +static struct notifier_block cosm_reboot = { + .notifier_call = cosm_reboot_event, +}; + +/* Set system time from timespec value received from the host */ +static void cosm_set_time(struct cosm_msg *msg) +{ + struct timespec64 ts = { + .tv_sec = msg->timespec.tv_sec, + .tv_nsec = msg->timespec.tv_nsec, + }; + int rc = do_settimeofday64(&ts); + + if (rc) + dev_err(&client_spdev->dev, "%s: %d settimeofday rc %d\n", + __func__, __LINE__, rc); +} + +/* COSM client receive message processing */ +static void cosm_client_recv(void) +{ + struct cosm_msg msg; + int rc; + + while (1) { + rc = scif_recv(client_epd, &msg, sizeof(msg), 0); + if (!rc) { + return; + } else if (rc < 0) { + dev_err(&client_spdev->dev, "%s: %d rc %d\n", + __func__, __LINE__, rc); + return; + } + + dev_dbg(&client_spdev->dev, "%s: %d rc %d id 0x%llx\n", + __func__, __LINE__, rc, msg.id); + + switch (msg.id) { + case COSM_MSG_SYNC_TIME: + cosm_set_time(&msg); + break; + case COSM_MSG_SHUTDOWN: + orderly_poweroff(true); + break; + default: + dev_err(&client_spdev->dev, "%s: %d unknown id %lld\n", + __func__, __LINE__, msg.id); + break; + } + } +} + +/* Initiate connection to the COSM server on the host */ +static int cosm_scif_connect(void) +{ + struct scif_port_id port_id; + int i, rc; + + client_epd = scif_open(); + if (!client_epd) { + dev_err(&client_spdev->dev, "%s %d scif_open failed\n", + __func__, __LINE__); + return -ENOMEM; + } + + port_id.node = 0; + port_id.port = SCIF_COSM_LISTEN_PORT; + + for (i = 0; i < COSM_SCIF_MAX_RETRIES; i++) { + rc = scif_connect(client_epd, &port_id); + if (rc < 0) + msleep(1000); + else + break; + } + + if (rc < 0) { + dev_err(&client_spdev->dev, "%s %d scif_connect rc %d\n", + __func__, __LINE__, rc); + scif_close(client_epd); + client_epd = NULL; + } + return rc < 0 ? rc : 0; +} + +/* Close host SCIF connection */ +static void cosm_scif_connect_exit(void) +{ + if (client_epd) { + scif_close(client_epd); + client_epd = NULL; + } +} + +/* + * COSM SCIF client thread function: waits for messages from the host and sends + * a heartbeat to the host + */ +static int cosm_scif_client(void *unused) +{ + struct cosm_msg msg = { .id = COSM_MSG_HEARTBEAT }; + struct scif_pollepd pollepd; + int rc; + + allow_signal(SIGKILL); + + while (!kthread_should_stop()) { + pollepd.epd = client_epd; + pollepd.events = EPOLLIN; + + rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_SEND_MSEC); + if (rc < 0) { + if (-EINTR != rc) + dev_err(&client_spdev->dev, + "%s %d scif_poll rc %d\n", + __func__, __LINE__, rc); + continue; + } + + if (pollepd.revents & EPOLLIN) + cosm_client_recv(); + + msg.id = COSM_MSG_HEARTBEAT; + rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK); + if (rc < 0) + dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n", + __func__, __LINE__, rc); + } + + dev_dbg(&client_spdev->dev, "%s %d Client thread stopped\n", + __func__, __LINE__); + return 0; +} + +static void cosm_scif_probe(struct scif_peer_dev *spdev) +{ + int rc; + + dev_dbg(&spdev->dev, "%s %d: dnode %d\n", + __func__, __LINE__, spdev->dnode); + + /* We are only interested in the host with spdev->dnode == 0 */ + if (spdev->dnode) + return; + + client_spdev = spdev; + rc = cosm_scif_connect(); + if (rc) + goto exit; + + rc = register_reboot_notifier(&cosm_reboot); + if (rc) { + dev_err(&spdev->dev, + "reboot notifier registration failed rc %d\n", rc); + goto connect_exit; + } + + client_thread = kthread_run(cosm_scif_client, NULL, "cosm_client"); + if (IS_ERR(client_thread)) { + rc = PTR_ERR(client_thread); + dev_err(&spdev->dev, "%s %d kthread_run rc %d\n", + __func__, __LINE__, rc); + goto unreg_reboot; + } + return; +unreg_reboot: + unregister_reboot_notifier(&cosm_reboot); +connect_exit: + cosm_scif_connect_exit(); +exit: + client_spdev = NULL; +} + +static void cosm_scif_remove(struct scif_peer_dev *spdev) +{ + int rc; + + dev_dbg(&spdev->dev, "%s %d: dnode %d\n", + __func__, __LINE__, spdev->dnode); + + if (spdev->dnode) + return; + + if (!IS_ERR_OR_NULL(client_thread)) { + rc = send_sig(SIGKILL, client_thread, 0); + if (rc) { + pr_err("%s %d send_sig rc %d\n", + __func__, __LINE__, rc); + return; + } + kthread_stop(client_thread); + } + unregister_reboot_notifier(&cosm_reboot); + cosm_scif_connect_exit(); + client_spdev = NULL; +} + +static struct scif_client scif_client_cosm = { + .name = KBUILD_MODNAME, + .probe = cosm_scif_probe, + .remove = cosm_scif_remove, +}; + +static int __init cosm_client_init(void) +{ + int rc = scif_client_register(&scif_client_cosm); + + if (rc) + pr_err("scif_client_register failed rc %d\n", rc); + return rc; +} + +static void __exit cosm_client_exit(void) +{ + scif_client_unregister(&scif_client_cosm); +} + +module_init(cosm_client_init); +module_exit(cosm_client_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) MIC card OS state management client driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/mic/host/Makefile b/drivers/misc/mic/host/Makefile new file mode 100644 index 000000000..25f153367 --- /dev/null +++ b/drivers/misc/mic/host/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile - Intel MIC Linux driver. +# Copyright(c) 2013, Intel Corporation. +# +obj-$(CONFIG_INTEL_MIC_HOST) += mic_host.o +mic_host-objs := mic_main.o +mic_host-objs += mic_x100.o +mic_host-objs += mic_smpt.o +mic_host-objs += mic_intr.o +mic_host-objs += mic_boot.o +mic_host-objs += mic_debugfs.o diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c new file mode 100644 index 000000000..c327985c9 --- /dev/null +++ b/drivers/misc/mic/host/mic_boot.c @@ -0,0 +1,599 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#include +#include +#include +#include +#include +#include +#include "../bus/scif_bus.h" +#include "../bus/vop_bus.h" +#include "../common/mic_dev.h" +#include "mic_device.h" +#include "mic_smpt.h" + +static inline struct mic_device *vpdev_to_mdev(struct device *dev) +{ + return dev_get_drvdata(dev->parent); +} + +static dma_addr_t +_mic_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + void *va = phys_to_virt(page_to_phys(page)) + offset; + struct mic_device *mdev = vpdev_to_mdev(dev); + + return mic_map_single(mdev, va, size); +} + +static void _mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct mic_device *mdev = vpdev_to_mdev(dev); + + mic_unmap_single(mdev, dma_addr, size); +} + +static const struct dma_map_ops _mic_dma_ops = { + .map_page = _mic_dma_map_page, + .unmap_page = _mic_dma_unmap_page, +}; + +static struct mic_irq * +__mic_request_irq(struct vop_device *vpdev, + irqreturn_t (*func)(int irq, void *data), + const char *name, void *data, int intr_src) +{ + struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev); + + return mic_request_threaded_irq(mdev, func, NULL, name, data, + intr_src, MIC_INTR_DB); +} + +static void __mic_free_irq(struct vop_device *vpdev, + struct mic_irq *cookie, void *data) +{ + struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev); + + mic_free_irq(mdev, cookie, data); +} + +static void __mic_ack_interrupt(struct vop_device *vpdev, int num) +{ + struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev); + + mdev->ops->intr_workarounds(mdev); +} + +static int __mic_next_db(struct vop_device *vpdev) +{ + struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev); + + return mic_next_db(mdev); +} + +static void *__mic_get_dp(struct vop_device *vpdev) +{ + struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev); + + return mdev->dp; +} + +static void __iomem *__mic_get_remote_dp(struct vop_device *vpdev) +{ + return NULL; +} + +static void __mic_send_intr(struct vop_device *vpdev, int db) +{ + struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev); + + mdev->ops->send_intr(mdev, db); +} + +static void __iomem *__mic_ioremap(struct vop_device *vpdev, + dma_addr_t pa, size_t len) +{ + struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev); + + return mdev->aper.va + pa; +} + +static void __mic_iounmap(struct vop_device *vpdev, void __iomem *va) +{ + /* nothing to do */ +} + +static struct vop_hw_ops vop_hw_ops = { + .request_irq = __mic_request_irq, + .free_irq = __mic_free_irq, + .ack_interrupt = __mic_ack_interrupt, + .next_db = __mic_next_db, + .get_dp = __mic_get_dp, + .get_remote_dp = __mic_get_remote_dp, + .send_intr = __mic_send_intr, + .ioremap = __mic_ioremap, + .iounmap = __mic_iounmap, +}; + +static inline struct mic_device *scdev_to_mdev(struct scif_hw_dev *scdev) +{ + return dev_get_drvdata(scdev->dev.parent); +} + +static void *__mic_dma_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + unsigned long attrs) +{ + struct scif_hw_dev *scdev = dev_get_drvdata(dev); + struct mic_device *mdev = scdev_to_mdev(scdev); + dma_addr_t tmp; + void *va = kmalloc(size, gfp); + + if (va) { + tmp = mic_map_single(mdev, va, size); + if (dma_mapping_error(dev, tmp)) { + kfree(va); + va = NULL; + } else { + *dma_handle = tmp; + } + } + return va; +} + +static void __mic_dma_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs) +{ + struct scif_hw_dev *scdev = dev_get_drvdata(dev); + struct mic_device *mdev = scdev_to_mdev(scdev); + + mic_unmap_single(mdev, dma_handle, size); + kfree(vaddr); +} + +static dma_addr_t +__mic_dma_map_page(struct device *dev, struct page *page, unsigned long offset, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + void *va = phys_to_virt(page_to_phys(page)) + offset; + struct scif_hw_dev *scdev = dev_get_drvdata(dev); + struct mic_device *mdev = scdev_to_mdev(scdev); + + return mic_map_single(mdev, va, size); +} + +static void +__mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct scif_hw_dev *scdev = dev_get_drvdata(dev); + struct mic_device *mdev = scdev_to_mdev(scdev); + + mic_unmap_single(mdev, dma_addr, size); +} + +static int __mic_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + struct scif_hw_dev *scdev = dev_get_drvdata(dev); + struct mic_device *mdev = scdev_to_mdev(scdev); + struct scatterlist *s; + int i, j, ret; + dma_addr_t da; + + ret = dma_map_sg(&mdev->pdev->dev, sg, nents, dir); + if (ret <= 0) + return 0; + + for_each_sg(sg, s, nents, i) { + da = mic_map(mdev, sg_dma_address(s) + s->offset, s->length); + if (!da) + goto err; + sg_dma_address(s) = da; + } + return nents; +err: + for_each_sg(sg, s, i, j) { + mic_unmap(mdev, sg_dma_address(s), s->length); + sg_dma_address(s) = mic_to_dma_addr(mdev, sg_dma_address(s)); + } + dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir); + return 0; +} + +static void __mic_dma_unmap_sg(struct device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction dir, + unsigned long attrs) +{ + struct scif_hw_dev *scdev = dev_get_drvdata(dev); + struct mic_device *mdev = scdev_to_mdev(scdev); + struct scatterlist *s; + dma_addr_t da; + int i; + + for_each_sg(sg, s, nents, i) { + da = mic_to_dma_addr(mdev, sg_dma_address(s)); + mic_unmap(mdev, sg_dma_address(s), s->length); + sg_dma_address(s) = da; + } + dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir); +} + +static const struct dma_map_ops __mic_dma_ops = { + .alloc = __mic_dma_alloc, + .free = __mic_dma_free, + .map_page = __mic_dma_map_page, + .unmap_page = __mic_dma_unmap_page, + .map_sg = __mic_dma_map_sg, + .unmap_sg = __mic_dma_unmap_sg, +}; + +static struct mic_irq * +___mic_request_irq(struct scif_hw_dev *scdev, + irqreturn_t (*func)(int irq, void *data), + const char *name, + void *data, int db) +{ + struct mic_device *mdev = scdev_to_mdev(scdev); + + return mic_request_threaded_irq(mdev, func, NULL, name, data, + db, MIC_INTR_DB); +} + +static void +___mic_free_irq(struct scif_hw_dev *scdev, + struct mic_irq *cookie, void *data) +{ + struct mic_device *mdev = scdev_to_mdev(scdev); + + mic_free_irq(mdev, cookie, data); +} + +static void ___mic_ack_interrupt(struct scif_hw_dev *scdev, int num) +{ + struct mic_device *mdev = scdev_to_mdev(scdev); + + mdev->ops->intr_workarounds(mdev); +} + +static int ___mic_next_db(struct scif_hw_dev *scdev) +{ + struct mic_device *mdev = scdev_to_mdev(scdev); + + return mic_next_db(mdev); +} + +static void ___mic_send_intr(struct scif_hw_dev *scdev, int db) +{ + struct mic_device *mdev = scdev_to_mdev(scdev); + + mdev->ops->send_intr(mdev, db); +} + +static void __iomem *___mic_ioremap(struct scif_hw_dev *scdev, + phys_addr_t pa, size_t len) +{ + struct mic_device *mdev = scdev_to_mdev(scdev); + + return mdev->aper.va + pa; +} + +static void ___mic_iounmap(struct scif_hw_dev *scdev, void __iomem *va) +{ + /* nothing to do */ +} + +static struct scif_hw_ops scif_hw_ops = { + .request_irq = ___mic_request_irq, + .free_irq = ___mic_free_irq, + .ack_interrupt = ___mic_ack_interrupt, + .next_db = ___mic_next_db, + .send_intr = ___mic_send_intr, + .ioremap = ___mic_ioremap, + .iounmap = ___mic_iounmap, +}; + +static inline struct mic_device *mbdev_to_mdev(struct mbus_device *mbdev) +{ + return dev_get_drvdata(mbdev->dev.parent); +} + +static dma_addr_t +mic_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + void *va = phys_to_virt(page_to_phys(page)) + offset; + struct mic_device *mdev = dev_get_drvdata(dev->parent); + + return mic_map_single(mdev, va, size); +} + +static void +mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct mic_device *mdev = dev_get_drvdata(dev->parent); + mic_unmap_single(mdev, dma_addr, size); +} + +static const struct dma_map_ops mic_dma_ops = { + .map_page = mic_dma_map_page, + .unmap_page = mic_dma_unmap_page, +}; + +static struct mic_irq * +_mic_request_threaded_irq(struct mbus_device *mbdev, + irq_handler_t handler, irq_handler_t thread_fn, + const char *name, void *data, int intr_src) +{ + return mic_request_threaded_irq(mbdev_to_mdev(mbdev), handler, + thread_fn, name, data, + intr_src, MIC_INTR_DMA); +} + +static void _mic_free_irq(struct mbus_device *mbdev, + struct mic_irq *cookie, void *data) +{ + mic_free_irq(mbdev_to_mdev(mbdev), cookie, data); +} + +static void _mic_ack_interrupt(struct mbus_device *mbdev, int num) +{ + struct mic_device *mdev = mbdev_to_mdev(mbdev); + mdev->ops->intr_workarounds(mdev); +} + +static struct mbus_hw_ops mbus_hw_ops = { + .request_threaded_irq = _mic_request_threaded_irq, + .free_irq = _mic_free_irq, + .ack_interrupt = _mic_ack_interrupt, +}; + +/* Initialize the MIC bootparams */ +void mic_bootparam_init(struct mic_device *mdev) +{ + struct mic_bootparam *bootparam = mdev->dp; + + bootparam->magic = cpu_to_le32(MIC_MAGIC); + bootparam->h2c_config_db = -1; + bootparam->node_id = mdev->id + 1; + bootparam->scif_host_dma_addr = 0x0; + bootparam->scif_card_dma_addr = 0x0; + bootparam->c2h_scif_db = -1; + bootparam->h2c_scif_db = -1; +} + +static inline struct mic_device *cosmdev_to_mdev(struct cosm_device *cdev) +{ + return dev_get_drvdata(cdev->dev.parent); +} + +static void _mic_reset(struct cosm_device *cdev) +{ + struct mic_device *mdev = cosmdev_to_mdev(cdev); + + mdev->ops->reset_fw_ready(mdev); + mdev->ops->reset(mdev); +} + +static bool _mic_ready(struct cosm_device *cdev) +{ + struct mic_device *mdev = cosmdev_to_mdev(cdev); + + return mdev->ops->is_fw_ready(mdev); +} + +/** + * mic_request_dma_chans - Request DMA channels + * @mdev: pointer to mic_device instance + * + * returns number of DMA channels acquired + */ +static int mic_request_dma_chans(struct mic_device *mdev) +{ + dma_cap_mask_t mask; + struct dma_chan *chan; + + dma_cap_zero(mask); + dma_cap_set(DMA_MEMCPY, mask); + + do { + chan = dma_request_channel(mask, mdev->ops->dma_filter, + &mdev->pdev->dev); + if (chan) { + mdev->dma_ch[mdev->num_dma_ch++] = chan; + if (mdev->num_dma_ch >= MIC_MAX_DMA_CHAN) + break; + } + } while (chan); + dev_info(&mdev->pdev->dev, "DMA channels # %d\n", mdev->num_dma_ch); + return mdev->num_dma_ch; +} + +/** + * mic_free_dma_chans - release DMA channels + * @mdev: pointer to mic_device instance + * + * returns none + */ +static void mic_free_dma_chans(struct mic_device *mdev) +{ + int i = 0; + + for (i = 0; i < mdev->num_dma_ch; i++) { + dma_release_channel(mdev->dma_ch[i]); + mdev->dma_ch[i] = NULL; + } + mdev->num_dma_ch = 0; +} + +/** + * _mic_start - Start the MIC. + * @cdev: pointer to cosm_device instance + * @id: MIC device id/index provided by COSM used in other drivers like SCIF + * + * This function prepares an MIC for boot and initiates boot. + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + * + * For all cosm_hw_ops the caller holds a mutex to ensure serialization. + */ +static int _mic_start(struct cosm_device *cdev, int id) +{ + struct mic_device *mdev = cosmdev_to_mdev(cdev); + int rc; + + mic_bootparam_init(mdev); + mdev->dma_mbdev = mbus_register_device(&mdev->pdev->dev, + MBUS_DEV_DMA_HOST, &mic_dma_ops, + &mbus_hw_ops, id, mdev->mmio.va); + if (IS_ERR(mdev->dma_mbdev)) { + rc = PTR_ERR(mdev->dma_mbdev); + goto unlock_ret; + } + if (!mic_request_dma_chans(mdev)) { + rc = -ENODEV; + goto dma_remove; + } + mdev->scdev = scif_register_device(&mdev->pdev->dev, MIC_SCIF_DEV, + &__mic_dma_ops, &scif_hw_ops, + id + 1, 0, &mdev->mmio, + &mdev->aper, mdev->dp, NULL, + mdev->dma_ch, mdev->num_dma_ch, + true); + if (IS_ERR(mdev->scdev)) { + rc = PTR_ERR(mdev->scdev); + goto dma_free; + } + + mdev->vpdev = vop_register_device(&mdev->pdev->dev, + VOP_DEV_TRNSP, &_mic_dma_ops, + &vop_hw_ops, id + 1, &mdev->aper, + mdev->dma_ch[0]); + if (IS_ERR(mdev->vpdev)) { + rc = PTR_ERR(mdev->vpdev); + goto scif_remove; + } + + rc = mdev->ops->load_mic_fw(mdev, NULL); + if (rc) + goto vop_remove; + mic_smpt_restore(mdev); + mic_intr_restore(mdev); + mdev->intr_ops->enable_interrupts(mdev); + mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr); + mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32); + mdev->ops->send_firmware_intr(mdev); + goto unlock_ret; +vop_remove: + vop_unregister_device(mdev->vpdev); +scif_remove: + scif_unregister_device(mdev->scdev); +dma_free: + mic_free_dma_chans(mdev); +dma_remove: + mbus_unregister_device(mdev->dma_mbdev); +unlock_ret: + return rc; +} + +/** + * _mic_stop - Prepare the MIC for reset and trigger reset. + * @cdev: pointer to cosm_device instance + * @force: force a MIC to reset even if it is already offline. + * + * RETURNS: None. + */ +static void _mic_stop(struct cosm_device *cdev, bool force) +{ + struct mic_device *mdev = cosmdev_to_mdev(cdev); + + /* + * Since SCIF handles card shutdown and reset (using COSM), it will + * will be the first to be registered and the last to be + * unregistered. + */ + vop_unregister_device(mdev->vpdev); + scif_unregister_device(mdev->scdev); + mic_free_dma_chans(mdev); + mbus_unregister_device(mdev->dma_mbdev); + mic_bootparam_init(mdev); +} + +static ssize_t _mic_family(struct cosm_device *cdev, char *buf) +{ + struct mic_device *mdev = cosmdev_to_mdev(cdev); + static const char *family[MIC_FAMILY_LAST] = { "x100", "Unknown" }; + + return scnprintf(buf, PAGE_SIZE, "%s\n", family[mdev->family]); +} + +static ssize_t _mic_stepping(struct cosm_device *cdev, char *buf) +{ + struct mic_device *mdev = cosmdev_to_mdev(cdev); + const char *string = "??"; + + switch (mdev->stepping) { + case MIC_A0_STEP: + string = "A0"; + break; + case MIC_B0_STEP: + string = "B0"; + break; + case MIC_B1_STEP: + string = "B1"; + break; + case MIC_C0_STEP: + string = "C0"; + break; + default: + break; + } + return scnprintf(buf, PAGE_SIZE, "%s\n", string); +} + +static struct mic_mw *_mic_aper(struct cosm_device *cdev) +{ + struct mic_device *mdev = cosmdev_to_mdev(cdev); + + return &mdev->aper; +} + +struct cosm_hw_ops cosm_hw_ops = { + .reset = _mic_reset, + .force_reset = _mic_reset, + .post_reset = NULL, + .ready = _mic_ready, + .start = _mic_start, + .stop = _mic_stop, + .family = _mic_family, + .stepping = _mic_stepping, + .aper = _mic_aper, +}; diff --git a/drivers/misc/mic/host/mic_debugfs.c b/drivers/misc/mic/host/mic_debugfs.c new file mode 100644 index 000000000..0a9daba8b --- /dev/null +++ b/drivers/misc/mic/host/mic_debugfs.c @@ -0,0 +1,216 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#include +#include +#include + +#include +#include "../common/mic_dev.h" +#include "mic_device.h" +#include "mic_smpt.h" + +/* Debugfs parent dir */ +static struct dentry *mic_dbg; + +static int mic_smpt_show(struct seq_file *s, void *pos) +{ + int i; + struct mic_device *mdev = s->private; + unsigned long flags; + + seq_printf(s, "MIC %-2d |%-10s| %-14s %-10s\n", + mdev->id, "SMPT entry", "SW DMA addr", "RefCount"); + seq_puts(s, "====================================================\n"); + + if (mdev->smpt) { + struct mic_smpt_info *smpt_info = mdev->smpt; + spin_lock_irqsave(&smpt_info->smpt_lock, flags); + for (i = 0; i < smpt_info->info.num_reg; i++) { + seq_printf(s, "%9s|%-10d| %-#14llx %-10lld\n", + " ", i, smpt_info->entry[i].dma_addr, + smpt_info->entry[i].ref_count); + } + spin_unlock_irqrestore(&smpt_info->smpt_lock, flags); + } + seq_puts(s, "====================================================\n"); + return 0; +} + +static int mic_smpt_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, mic_smpt_show, inode->i_private); +} + +static int mic_smpt_debug_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations smpt_file_ops = { + .owner = THIS_MODULE, + .open = mic_smpt_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mic_smpt_debug_release +}; + +static int mic_post_code_show(struct seq_file *s, void *pos) +{ + struct mic_device *mdev = s->private; + u32 reg = mdev->ops->get_postcode(mdev); + + seq_printf(s, "%c%c", reg & 0xff, (reg >> 8) & 0xff); + return 0; +} + +static int mic_post_code_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, mic_post_code_show, inode->i_private); +} + +static int mic_post_code_debug_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations post_code_ops = { + .owner = THIS_MODULE, + .open = mic_post_code_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mic_post_code_debug_release +}; + +static int mic_msi_irq_info_show(struct seq_file *s, void *pos) +{ + struct mic_device *mdev = s->private; + int reg; + int i, j; + u16 entry; + u16 vector; + struct pci_dev *pdev = mdev->pdev; + + if (pci_dev_msi_enabled(pdev)) { + for (i = 0; i < mdev->irq_info.num_vectors; i++) { + if (pdev->msix_enabled) { + entry = mdev->irq_info.msix_entries[i].entry; + vector = mdev->irq_info.msix_entries[i].vector; + } else { + entry = 0; + vector = pdev->irq; + } + + reg = mdev->intr_ops->read_msi_to_src_map(mdev, entry); + + seq_printf(s, "%s %-10d %s %-10d MXAR[%d]: %08X\n", + "IRQ:", vector, "Entry:", entry, i, reg); + + seq_printf(s, "%-10s", "offset:"); + for (j = (MIC_NUM_OFFSETS - 1); j >= 0; j--) + seq_printf(s, "%4d ", j); + seq_puts(s, "\n"); + + + seq_printf(s, "%-10s", "count:"); + for (j = (MIC_NUM_OFFSETS - 1); j >= 0; j--) + seq_printf(s, "%4d ", + (mdev->irq_info.mic_msi_map[i] & + BIT(j)) ? 1 : 0); + seq_puts(s, "\n\n"); + } + } else { + seq_puts(s, "MSI/MSIx interrupts not enabled\n"); + } + + return 0; +} + +static int mic_msi_irq_info_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, mic_msi_irq_info_show, inode->i_private); +} + +static int +mic_msi_irq_info_debug_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations msi_irq_info_ops = { + .owner = THIS_MODULE, + .open = mic_msi_irq_info_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = mic_msi_irq_info_debug_release +}; + +/** + * mic_create_debug_dir - Initialize MIC debugfs entries. + */ +void mic_create_debug_dir(struct mic_device *mdev) +{ + char name[16]; + + if (!mic_dbg) + return; + + scnprintf(name, sizeof(name), "mic%d", mdev->id); + mdev->dbg_dir = debugfs_create_dir(name, mic_dbg); + if (!mdev->dbg_dir) + return; + + debugfs_create_file("smpt", 0444, mdev->dbg_dir, mdev, &smpt_file_ops); + + debugfs_create_file("post_code", 0444, mdev->dbg_dir, mdev, + &post_code_ops); + + debugfs_create_file("msi_irq_info", 0444, mdev->dbg_dir, mdev, + &msi_irq_info_ops); +} + +/** + * mic_delete_debug_dir - Uninitialize MIC debugfs entries. + */ +void mic_delete_debug_dir(struct mic_device *mdev) +{ + if (!mdev->dbg_dir) + return; + + debugfs_remove_recursive(mdev->dbg_dir); +} + +/** + * mic_init_debugfs - Initialize global debugfs entry. + */ +void __init mic_init_debugfs(void) +{ + mic_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL); + if (!mic_dbg) + pr_err("can't create debugfs dir\n"); +} + +/** + * mic_exit_debugfs - Uninitialize global debugfs entry + */ +void mic_exit_debugfs(void) +{ + debugfs_remove(mic_dbg); +} diff --git a/drivers/misc/mic/host/mic_device.h b/drivers/misc/mic/host/mic_device.h new file mode 100644 index 000000000..52b12b22f --- /dev/null +++ b/drivers/misc/mic/host/mic_device.h @@ -0,0 +1,169 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#ifndef _MIC_DEVICE_H_ +#define _MIC_DEVICE_H_ + +#include +#include +#include +#include +#include +#include +#include +#include "../bus/scif_bus.h" +#include "../bus/vop_bus.h" +#include "../bus/cosm_bus.h" +#include "mic_intr.h" + +/** + * enum mic_stepping - MIC stepping ids. + */ +enum mic_stepping { + MIC_A0_STEP = 0x0, + MIC_B0_STEP = 0x10, + MIC_B1_STEP = 0x11, + MIC_C0_STEP = 0x20, +}; + +extern struct cosm_hw_ops cosm_hw_ops; + +/** + * struct mic_device - MIC device information for each card. + * + * @mmio: MMIO bar information. + * @aper: Aperture bar information. + * @family: The MIC family to which this device belongs. + * @ops: MIC HW specific operations. + * @id: The unique device id for this MIC device. + * @stepping: Stepping ID. + * @pdev: Underlying PCI device. + * @mic_mutex: Mutex for synchronizing access to mic_device. + * @intr_ops: HW specific interrupt operations. + * @smpt_ops: Hardware specific SMPT operations. + * @smpt: MIC SMPT information. + * @intr_info: H/W specific interrupt information. + * @irq_info: The OS specific irq information + * @dbg_dir: debugfs directory of this MIC device. + * @bootaddr: MIC boot address. + * @dp: virtio device page + * @dp_dma_addr: virtio device page DMA address. + * @dma_mbdev: MIC BUS DMA device. + * @dma_ch - Array of DMA channels + * @num_dma_ch - Number of DMA channels available + * @scdev: SCIF device on the SCIF virtual bus. + * @vpdev: Virtio over PCIe device on the VOP virtual bus. + * @cosm_dev: COSM device + */ +struct mic_device { + struct mic_mw mmio; + struct mic_mw aper; + enum mic_hw_family family; + struct mic_hw_ops *ops; + int id; + enum mic_stepping stepping; + struct pci_dev *pdev; + struct mutex mic_mutex; + struct mic_hw_intr_ops *intr_ops; + struct mic_smpt_ops *smpt_ops; + struct mic_smpt_info *smpt; + struct mic_intr_info *intr_info; + struct mic_irq_info irq_info; + struct dentry *dbg_dir; + u32 bootaddr; + void *dp; + dma_addr_t dp_dma_addr; + struct mbus_device *dma_mbdev; + struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN]; + int num_dma_ch; + struct scif_hw_dev *scdev; + struct vop_device *vpdev; + struct cosm_device *cosm_dev; +}; + +/** + * struct mic_hw_ops - MIC HW specific operations. + * @aper_bar: Aperture bar resource number. + * @mmio_bar: MMIO bar resource number. + * @read_spad: Read from scratch pad register. + * @write_spad: Write to scratch pad register. + * @send_intr: Send an interrupt for a particular doorbell on the card. + * @ack_interrupt: Hardware specific operations to ack the h/w on + * receipt of an interrupt. + * @intr_workarounds: Hardware specific workarounds needed after + * handling an interrupt. + * @reset: Reset the remote processor. + * @reset_fw_ready: Reset firmware ready field. + * @is_fw_ready: Check if firmware is ready for OS download. + * @send_firmware_intr: Send an interrupt to the card firmware. + * @load_mic_fw: Load firmware segments required to boot the card + * into card memory. This includes the kernel, command line, ramdisk etc. + * @get_postcode: Get post code status from firmware. + * @dma_filter: DMA filter function to be used. + */ +struct mic_hw_ops { + u8 aper_bar; + u8 mmio_bar; + u32 (*read_spad)(struct mic_device *mdev, unsigned int idx); + void (*write_spad)(struct mic_device *mdev, unsigned int idx, u32 val); + void (*send_intr)(struct mic_device *mdev, int doorbell); + u32 (*ack_interrupt)(struct mic_device *mdev); + void (*intr_workarounds)(struct mic_device *mdev); + void (*reset)(struct mic_device *mdev); + void (*reset_fw_ready)(struct mic_device *mdev); + bool (*is_fw_ready)(struct mic_device *mdev); + void (*send_firmware_intr)(struct mic_device *mdev); + int (*load_mic_fw)(struct mic_device *mdev, const char *buf); + u32 (*get_postcode)(struct mic_device *mdev); + bool (*dma_filter)(struct dma_chan *chan, void *param); +}; + +/** + * mic_mmio_read - read from an MMIO register. + * @mw: MMIO register base virtual address. + * @offset: register offset. + * + * RETURNS: register value. + */ +static inline u32 mic_mmio_read(struct mic_mw *mw, u32 offset) +{ + return ioread32(mw->va + offset); +} + +/** + * mic_mmio_write - write to an MMIO register. + * @mw: MMIO register base virtual address. + * @val: the data value to put into the register + * @offset: register offset. + * + * RETURNS: none. + */ +static inline void +mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset) +{ + iowrite32(val, mw->va + offset); +} + +void mic_bootparam_init(struct mic_device *mdev); +void mic_create_debug_dir(struct mic_device *dev); +void mic_delete_debug_dir(struct mic_device *dev); +void __init mic_init_debugfs(void); +void mic_exit_debugfs(void); +#endif diff --git a/drivers/misc/mic/host/mic_intr.c b/drivers/misc/mic/host/mic_intr.c new file mode 100644 index 000000000..08ca3e372 --- /dev/null +++ b/drivers/misc/mic/host/mic_intr.c @@ -0,0 +1,645 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#include +#include + +#include "../common/mic_dev.h" +#include "mic_device.h" + +static irqreturn_t mic_thread_fn(int irq, void *dev) +{ + struct mic_device *mdev = dev; + struct mic_intr_info *intr_info = mdev->intr_info; + struct mic_irq_info *irq_info = &mdev->irq_info; + struct mic_intr_cb *intr_cb; + struct pci_dev *pdev = mdev->pdev; + int i; + + spin_lock(&irq_info->mic_thread_lock); + for (i = intr_info->intr_start_idx[MIC_INTR_DB]; + i < intr_info->intr_len[MIC_INTR_DB]; i++) + if (test_and_clear_bit(i, &irq_info->mask)) { + list_for_each_entry(intr_cb, &irq_info->cb_list[i], + list) + if (intr_cb->thread_fn) + intr_cb->thread_fn(pdev->irq, + intr_cb->data); + } + spin_unlock(&irq_info->mic_thread_lock); + return IRQ_HANDLED; +} +/** + * mic_interrupt - Generic interrupt handler for + * MSI and INTx based interrupts. + */ +static irqreturn_t mic_interrupt(int irq, void *dev) +{ + struct mic_device *mdev = dev; + struct mic_intr_info *intr_info = mdev->intr_info; + struct mic_irq_info *irq_info = &mdev->irq_info; + struct mic_intr_cb *intr_cb; + struct pci_dev *pdev = mdev->pdev; + u32 mask; + int i; + + mask = mdev->ops->ack_interrupt(mdev); + if (!mask) + return IRQ_NONE; + + spin_lock(&irq_info->mic_intr_lock); + for (i = intr_info->intr_start_idx[MIC_INTR_DB]; + i < intr_info->intr_len[MIC_INTR_DB]; i++) + if (mask & BIT(i)) { + list_for_each_entry(intr_cb, &irq_info->cb_list[i], + list) + if (intr_cb->handler) + intr_cb->handler(pdev->irq, + intr_cb->data); + set_bit(i, &irq_info->mask); + } + spin_unlock(&irq_info->mic_intr_lock); + return IRQ_WAKE_THREAD; +} + +/* Return the interrupt offset from the index. Index is 0 based. */ +static u16 mic_map_src_to_offset(struct mic_device *mdev, + int intr_src, enum mic_intr_type type) +{ + if (type >= MIC_NUM_INTR_TYPES) + return MIC_NUM_OFFSETS; + if (intr_src >= mdev->intr_info->intr_len[type]) + return MIC_NUM_OFFSETS; + + return mdev->intr_info->intr_start_idx[type] + intr_src; +} + +/* Return next available msix_entry. */ +static struct msix_entry *mic_get_available_vector(struct mic_device *mdev) +{ + int i; + struct mic_irq_info *info = &mdev->irq_info; + + for (i = 0; i < info->num_vectors; i++) + if (!info->mic_msi_map[i]) + return &info->msix_entries[i]; + return NULL; +} + +/** + * mic_register_intr_callback - Register a callback handler for the + * given source id. + * + * @mdev: pointer to the mic_device instance + * @idx: The source id to be registered. + * @handler: The function to be called when the source id receives + * the interrupt. + * @thread_fn: thread fn. corresponding to the handler + * @data: Private data of the requester. + * Return the callback structure that was registered or an + * appropriate error on failure. + */ +static struct mic_intr_cb *mic_register_intr_callback(struct mic_device *mdev, + u8 idx, irq_handler_t handler, irq_handler_t thread_fn, + void *data) +{ + struct mic_intr_cb *intr_cb; + unsigned long flags; + int rc; + intr_cb = kmalloc(sizeof(*intr_cb), GFP_KERNEL); + + if (!intr_cb) + return ERR_PTR(-ENOMEM); + + intr_cb->handler = handler; + intr_cb->thread_fn = thread_fn; + intr_cb->data = data; + intr_cb->cb_id = ida_simple_get(&mdev->irq_info.cb_ida, + 0, 0, GFP_KERNEL); + if (intr_cb->cb_id < 0) { + rc = intr_cb->cb_id; + goto ida_fail; + } + + spin_lock(&mdev->irq_info.mic_thread_lock); + spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags); + list_add_tail(&intr_cb->list, &mdev->irq_info.cb_list[idx]); + spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags); + spin_unlock(&mdev->irq_info.mic_thread_lock); + + return intr_cb; +ida_fail: + kfree(intr_cb); + return ERR_PTR(rc); +} + +/** + * mic_unregister_intr_callback - Unregister the callback handler + * identified by its callback id. + * + * @mdev: pointer to the mic_device instance + * @idx: The callback structure id to be unregistered. + * Return the source id that was unregistered or MIC_NUM_OFFSETS if no + * such callback handler was found. + */ +static u8 mic_unregister_intr_callback(struct mic_device *mdev, u32 idx) +{ + struct list_head *pos, *tmp; + struct mic_intr_cb *intr_cb; + unsigned long flags; + int i; + + spin_lock(&mdev->irq_info.mic_thread_lock); + spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags); + for (i = 0; i < MIC_NUM_OFFSETS; i++) { + list_for_each_safe(pos, tmp, &mdev->irq_info.cb_list[i]) { + intr_cb = list_entry(pos, struct mic_intr_cb, list); + if (intr_cb->cb_id == idx) { + list_del(pos); + ida_simple_remove(&mdev->irq_info.cb_ida, + intr_cb->cb_id); + kfree(intr_cb); + spin_unlock_irqrestore( + &mdev->irq_info.mic_intr_lock, flags); + spin_unlock(&mdev->irq_info.mic_thread_lock); + return i; + } + } + } + spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags); + spin_unlock(&mdev->irq_info.mic_thread_lock); + return MIC_NUM_OFFSETS; +} + +/** + * mic_setup_msix - Initializes MSIx interrupts. + * + * @mdev: pointer to mic_device instance + * + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +static int mic_setup_msix(struct mic_device *mdev, struct pci_dev *pdev) +{ + int rc, i; + int entry_size = sizeof(*mdev->irq_info.msix_entries); + + mdev->irq_info.msix_entries = kmalloc_array(MIC_MIN_MSIX, + entry_size, GFP_KERNEL); + if (!mdev->irq_info.msix_entries) { + rc = -ENOMEM; + goto err_nomem1; + } + + for (i = 0; i < MIC_MIN_MSIX; i++) + mdev->irq_info.msix_entries[i].entry = i; + + rc = pci_enable_msix_exact(pdev, mdev->irq_info.msix_entries, + MIC_MIN_MSIX); + if (rc) { + dev_dbg(&pdev->dev, "Error enabling MSIx. rc = %d\n", rc); + goto err_enable_msix; + } + + mdev->irq_info.num_vectors = MIC_MIN_MSIX; + mdev->irq_info.mic_msi_map = kzalloc((sizeof(u32) * + mdev->irq_info.num_vectors), GFP_KERNEL); + + if (!mdev->irq_info.mic_msi_map) { + rc = -ENOMEM; + goto err_nomem2; + } + + dev_dbg(&mdev->pdev->dev, + "%d MSIx irqs setup\n", mdev->irq_info.num_vectors); + return 0; +err_nomem2: + pci_disable_msix(pdev); +err_enable_msix: + kfree(mdev->irq_info.msix_entries); +err_nomem1: + mdev->irq_info.num_vectors = 0; + return rc; +} + +/** + * mic_setup_callbacks - Initialize data structures needed + * to handle callbacks. + * + * @mdev: pointer to mic_device instance + */ +static int mic_setup_callbacks(struct mic_device *mdev) +{ + int i; + + mdev->irq_info.cb_list = kmalloc_array(MIC_NUM_OFFSETS, + sizeof(*mdev->irq_info.cb_list), + GFP_KERNEL); + if (!mdev->irq_info.cb_list) + return -ENOMEM; + + for (i = 0; i < MIC_NUM_OFFSETS; i++) + INIT_LIST_HEAD(&mdev->irq_info.cb_list[i]); + ida_init(&mdev->irq_info.cb_ida); + spin_lock_init(&mdev->irq_info.mic_intr_lock); + spin_lock_init(&mdev->irq_info.mic_thread_lock); + return 0; +} + +/** + * mic_release_callbacks - Uninitialize data structures needed + * to handle callbacks. + * + * @mdev: pointer to mic_device instance + */ +static void mic_release_callbacks(struct mic_device *mdev) +{ + unsigned long flags; + struct list_head *pos, *tmp; + struct mic_intr_cb *intr_cb; + int i; + + spin_lock(&mdev->irq_info.mic_thread_lock); + spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags); + for (i = 0; i < MIC_NUM_OFFSETS; i++) { + if (list_empty(&mdev->irq_info.cb_list[i])) + break; + + list_for_each_safe(pos, tmp, &mdev->irq_info.cb_list[i]) { + intr_cb = list_entry(pos, struct mic_intr_cb, list); + list_del(pos); + ida_simple_remove(&mdev->irq_info.cb_ida, + intr_cb->cb_id); + kfree(intr_cb); + } + } + spin_unlock_irqrestore(&mdev->irq_info.mic_intr_lock, flags); + spin_unlock(&mdev->irq_info.mic_thread_lock); + ida_destroy(&mdev->irq_info.cb_ida); + kfree(mdev->irq_info.cb_list); +} + +/** + * mic_setup_msi - Initializes MSI interrupts. + * + * @mdev: pointer to mic_device instance + * @pdev: PCI device structure + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +static int mic_setup_msi(struct mic_device *mdev, struct pci_dev *pdev) +{ + int rc; + + rc = pci_enable_msi(pdev); + if (rc) { + dev_dbg(&pdev->dev, "Error enabling MSI. rc = %d\n", rc); + return rc; + } + + mdev->irq_info.num_vectors = 1; + mdev->irq_info.mic_msi_map = kzalloc((sizeof(u32) * + mdev->irq_info.num_vectors), GFP_KERNEL); + + if (!mdev->irq_info.mic_msi_map) { + rc = -ENOMEM; + goto err_nomem1; + } + + rc = mic_setup_callbacks(mdev); + if (rc) { + dev_err(&pdev->dev, "Error setting up callbacks\n"); + goto err_nomem2; + } + + rc = request_threaded_irq(pdev->irq, mic_interrupt, mic_thread_fn, + 0, "mic-msi", mdev); + if (rc) { + dev_err(&pdev->dev, "Error allocating MSI interrupt\n"); + goto err_irq_req_fail; + } + + dev_dbg(&pdev->dev, "%d MSI irqs setup\n", mdev->irq_info.num_vectors); + return 0; +err_irq_req_fail: + mic_release_callbacks(mdev); +err_nomem2: + kfree(mdev->irq_info.mic_msi_map); +err_nomem1: + pci_disable_msi(pdev); + mdev->irq_info.num_vectors = 0; + return rc; +} + +/** + * mic_setup_intx - Initializes legacy interrupts. + * + * @mdev: pointer to mic_device instance + * @pdev: PCI device structure + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +static int mic_setup_intx(struct mic_device *mdev, struct pci_dev *pdev) +{ + int rc; + + /* Enable intx */ + pci_intx(pdev, 1); + rc = mic_setup_callbacks(mdev); + if (rc) { + dev_err(&pdev->dev, "Error setting up callbacks\n"); + goto err_nomem; + } + + rc = request_threaded_irq(pdev->irq, mic_interrupt, mic_thread_fn, + IRQF_SHARED, "mic-intx", mdev); + if (rc) + goto err; + + dev_dbg(&pdev->dev, "intx irq setup\n"); + return 0; +err: + mic_release_callbacks(mdev); +err_nomem: + return rc; +} + +/** + * mic_next_db - Retrieve the next doorbell interrupt source id. + * The id is picked sequentially from the available pool of + * doorlbell ids. + * + * @mdev: pointer to the mic_device instance. + * + * Returns the next doorbell interrupt source. + */ +int mic_next_db(struct mic_device *mdev) +{ + int next_db; + + next_db = mdev->irq_info.next_avail_src % + mdev->intr_info->intr_len[MIC_INTR_DB]; + mdev->irq_info.next_avail_src++; + return next_db; +} + +#define COOKIE_ID_SHIFT 16 +#define GET_ENTRY(cookie) ((cookie) & 0xFFFF) +#define GET_OFFSET(cookie) ((cookie) >> COOKIE_ID_SHIFT) +#define MK_COOKIE(x, y) ((x) | (y) << COOKIE_ID_SHIFT) + +/** + * mic_request_threaded_irq - request an irq. mic_mutex needs + * to be held before calling this function. + * + * @mdev: pointer to mic_device instance + * @handler: The callback function that handles the interrupt. + * The function needs to call ack_interrupts + * (mdev->ops->ack_interrupt(mdev)) when handling the interrupts. + * @thread_fn: thread fn required by request_threaded_irq. + * @name: The ASCII name of the callee requesting the irq. + * @data: private data that is returned back when calling the + * function handler. + * @intr_src: The source id of the requester. Its the doorbell id + * for Doorbell interrupts and DMA channel id for DMA interrupts. + * @type: The type of interrupt. Values defined in mic_intr_type + * + * returns: The cookie that is transparent to the caller. Passed + * back when calling mic_free_irq. An appropriate error code + * is returned on failure. Caller needs to use IS_ERR(return_val) + * to check for failure and PTR_ERR(return_val) to obtained the + * error code. + * + */ +struct mic_irq * +mic_request_threaded_irq(struct mic_device *mdev, + irq_handler_t handler, irq_handler_t thread_fn, + const char *name, void *data, int intr_src, + enum mic_intr_type type) +{ + u16 offset; + int rc = 0; + struct msix_entry *msix = NULL; + unsigned long cookie = 0; + u16 entry; + struct mic_intr_cb *intr_cb; + struct pci_dev *pdev = mdev->pdev; + + offset = mic_map_src_to_offset(mdev, intr_src, type); + if (offset >= MIC_NUM_OFFSETS) { + dev_err(&mdev->pdev->dev, + "Error mapping index %d to a valid source id.\n", + intr_src); + rc = -EINVAL; + goto err; + } + + if (mdev->irq_info.num_vectors > 1) { + msix = mic_get_available_vector(mdev); + if (!msix) { + dev_err(&mdev->pdev->dev, + "No MSIx vectors available for use.\n"); + rc = -ENOSPC; + goto err; + } + + rc = request_threaded_irq(msix->vector, handler, thread_fn, + 0, name, data); + if (rc) { + dev_dbg(&mdev->pdev->dev, + "request irq failed rc = %d\n", rc); + goto err; + } + entry = msix->entry; + mdev->irq_info.mic_msi_map[entry] |= BIT(offset); + mdev->intr_ops->program_msi_to_src_map(mdev, + entry, offset, true); + cookie = MK_COOKIE(entry, offset); + dev_dbg(&mdev->pdev->dev, "irq: %d assigned for src: %d\n", + msix->vector, intr_src); + } else { + intr_cb = mic_register_intr_callback(mdev, offset, handler, + thread_fn, data); + if (IS_ERR(intr_cb)) { + dev_err(&mdev->pdev->dev, + "No available callback entries for use\n"); + rc = PTR_ERR(intr_cb); + goto err; + } + + entry = 0; + if (pci_dev_msi_enabled(pdev)) { + mdev->irq_info.mic_msi_map[entry] |= (1 << offset); + mdev->intr_ops->program_msi_to_src_map(mdev, + entry, offset, true); + } + cookie = MK_COOKIE(entry, intr_cb->cb_id); + dev_dbg(&mdev->pdev->dev, "callback %d registered for src: %d\n", + intr_cb->cb_id, intr_src); + } + return (struct mic_irq *)cookie; +err: + return ERR_PTR(rc); +} + +/** + * mic_free_irq - free irq. mic_mutex + * needs to be held before calling this function. + * + * @mdev: pointer to mic_device instance + * @cookie: cookie obtained during a successful call to mic_request_threaded_irq + * @data: private data specified by the calling function during the + * mic_request_threaded_irq + * + * returns: none. + */ +void mic_free_irq(struct mic_device *mdev, + struct mic_irq *cookie, void *data) +{ + u32 offset; + u32 entry; + u8 src_id; + unsigned int irq; + struct pci_dev *pdev = mdev->pdev; + + entry = GET_ENTRY((unsigned long)cookie); + offset = GET_OFFSET((unsigned long)cookie); + if (mdev->irq_info.num_vectors > 1) { + if (entry >= mdev->irq_info.num_vectors) { + dev_warn(&mdev->pdev->dev, + "entry %d should be < num_irq %d\n", + entry, mdev->irq_info.num_vectors); + return; + } + irq = mdev->irq_info.msix_entries[entry].vector; + free_irq(irq, data); + mdev->irq_info.mic_msi_map[entry] &= ~(BIT(offset)); + mdev->intr_ops->program_msi_to_src_map(mdev, + entry, offset, false); + + dev_dbg(&mdev->pdev->dev, "irq: %d freed\n", irq); + } else { + irq = pdev->irq; + src_id = mic_unregister_intr_callback(mdev, offset); + if (src_id >= MIC_NUM_OFFSETS) { + dev_warn(&mdev->pdev->dev, "Error unregistering callback\n"); + return; + } + if (pci_dev_msi_enabled(pdev)) { + mdev->irq_info.mic_msi_map[entry] &= ~(BIT(src_id)); + mdev->intr_ops->program_msi_to_src_map(mdev, + entry, src_id, false); + } + dev_dbg(&mdev->pdev->dev, "callback %d unregistered for src: %d\n", + offset, src_id); + } +} + +/** + * mic_setup_interrupts - Initializes interrupts. + * + * @mdev: pointer to mic_device instance + * @pdev: PCI device structure + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev) +{ + int rc; + + rc = mic_setup_msix(mdev, pdev); + if (!rc) + goto done; + + rc = mic_setup_msi(mdev, pdev); + if (!rc) + goto done; + + rc = mic_setup_intx(mdev, pdev); + if (rc) { + dev_err(&mdev->pdev->dev, "no usable interrupts\n"); + return rc; + } +done: + mdev->intr_ops->enable_interrupts(mdev); + return 0; +} + +/** + * mic_free_interrupts - Frees interrupts setup by mic_setup_interrupts + * + * @mdev: pointer to mic_device instance + * @pdev: PCI device structure + * + * returns none. + */ +void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev) +{ + int i; + + mdev->intr_ops->disable_interrupts(mdev); + if (mdev->irq_info.num_vectors > 1) { + for (i = 0; i < mdev->irq_info.num_vectors; i++) { + if (mdev->irq_info.mic_msi_map[i]) + dev_warn(&pdev->dev, "irq %d may still be in use.\n", + mdev->irq_info.msix_entries[i].vector); + } + kfree(mdev->irq_info.mic_msi_map); + kfree(mdev->irq_info.msix_entries); + pci_disable_msix(pdev); + } else { + if (pci_dev_msi_enabled(pdev)) { + free_irq(pdev->irq, mdev); + kfree(mdev->irq_info.mic_msi_map); + pci_disable_msi(pdev); + } else { + free_irq(pdev->irq, mdev); + } + mic_release_callbacks(mdev); + } +} + +/** + * mic_intr_restore - Restore MIC interrupt registers. + * + * @mdev: pointer to mic_device instance. + * + * Restore the interrupt registers to values previously + * stored in the SW data structures. mic_mutex needs to + * be held before calling this function. + * + * returns None. + */ +void mic_intr_restore(struct mic_device *mdev) +{ + int entry, offset; + struct pci_dev *pdev = mdev->pdev; + + if (!pci_dev_msi_enabled(pdev)) + return; + + for (entry = 0; entry < mdev->irq_info.num_vectors; entry++) { + for (offset = 0; offset < MIC_NUM_OFFSETS; offset++) { + if (mdev->irq_info.mic_msi_map[entry] & BIT(offset)) + mdev->intr_ops->program_msi_to_src_map(mdev, + entry, offset, true); + } + } +} diff --git a/drivers/misc/mic/host/mic_intr.h b/drivers/misc/mic/host/mic_intr.h new file mode 100644 index 000000000..cce28824d --- /dev/null +++ b/drivers/misc/mic/host/mic_intr.h @@ -0,0 +1,149 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#ifndef _MIC_INTR_H_ +#define _MIC_INTR_H_ + +#include +#include +/* + * The minimum number of msix vectors required for normal operation. + * 3 for virtio network, console and block devices. + * 1 for card shutdown notifications. + * 4 for host owned DMA channels. + * 1 for SCIF + */ +#define MIC_MIN_MSIX 9 +#define MIC_NUM_OFFSETS 32 + +/** + * mic_intr_source - The type of source that will generate + * the interrupt.The number of types needs to be in sync with + * MIC_NUM_INTR_TYPES + * + * MIC_INTR_DB: The source is a doorbell + * MIC_INTR_DMA: The source is a DMA channel + * MIC_INTR_ERR: The source is an error interrupt e.g. SBOX ERR + * MIC_NUM_INTR_TYPES: Total number of interrupt sources. + */ +enum mic_intr_type { + MIC_INTR_DB = 0, + MIC_INTR_DMA, + MIC_INTR_ERR, + MIC_NUM_INTR_TYPES +}; + +/** + * struct mic_intr_info - Contains h/w specific interrupt sources + * information. + * + * @intr_start_idx: Contains the starting indexes of the + * interrupt types. + * @intr_len: Contains the length of the interrupt types. + */ +struct mic_intr_info { + u16 intr_start_idx[MIC_NUM_INTR_TYPES]; + u16 intr_len[MIC_NUM_INTR_TYPES]; +}; + +/** + * struct mic_irq_info - OS specific irq information + * + * @next_avail_src: next available doorbell that can be assigned. + * @msix_entries: msix entries allocated while setting up MSI-x + * @mic_msi_map: The MSI/MSI-x mapping information. + * @num_vectors: The number of MSI/MSI-x vectors that have been allocated. + * @cb_ida: callback ID allocator to track the callbacks registered. + * @mic_intr_lock: spinlock to protect the interrupt callback list. + * @mic_thread_lock: spinlock to protect the thread callback list. + * This lock is used to protect against thread_fn while + * mic_intr_lock is used to protect against interrupt handler. + * @cb_list: Array of callback lists one for each source. + * @mask: Mask used by the main thread fn to call the underlying thread fns. + */ +struct mic_irq_info { + int next_avail_src; + struct msix_entry *msix_entries; + u32 *mic_msi_map; + u16 num_vectors; + struct ida cb_ida; + spinlock_t mic_intr_lock; + spinlock_t mic_thread_lock; + struct list_head *cb_list; + unsigned long mask; +}; + +/** + * struct mic_intr_cb - Interrupt callback structure. + * + * @handler: The callback function + * @thread_fn: The thread_fn. + * @data: Private data of the requester. + * @cb_id: The callback id. Identifies this callback. + * @list: list head pointing to the next callback structure. + */ +struct mic_intr_cb { + irq_handler_t handler; + irq_handler_t thread_fn; + void *data; + int cb_id; + struct list_head list; +}; + +/** + * struct mic_irq - opaque pointer used as cookie + */ +struct mic_irq; + +/* Forward declaration */ +struct mic_device; + +/** + * struct mic_hw_intr_ops: MIC HW specific interrupt operations + * @intr_init: Initialize H/W specific interrupt information. + * @enable_interrupts: Enable interrupts from the hardware. + * @disable_interrupts: Disable interrupts from the hardware. + * @program_msi_to_src_map: Update MSI mapping registers with + * irq information. + * @read_msi_to_src_map: Read MSI mapping registers containing + * irq information. + */ +struct mic_hw_intr_ops { + void (*intr_init)(struct mic_device *mdev); + void (*enable_interrupts)(struct mic_device *mdev); + void (*disable_interrupts)(struct mic_device *mdev); + void (*program_msi_to_src_map) (struct mic_device *mdev, + int idx, int intr_src, bool set); + u32 (*read_msi_to_src_map) (struct mic_device *mdev, + int idx); +}; + +int mic_next_db(struct mic_device *mdev); +struct mic_irq * +mic_request_threaded_irq(struct mic_device *mdev, + irq_handler_t handler, irq_handler_t thread_fn, + const char *name, void *data, int intr_src, + enum mic_intr_type type); +void mic_free_irq(struct mic_device *mdev, + struct mic_irq *cookie, void *data); +int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev); +void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev); +void mic_intr_restore(struct mic_device *mdev); +#endif diff --git a/drivers/misc/mic/host/mic_main.c b/drivers/misc/mic/host/mic_main.c new file mode 100644 index 000000000..035be3e9c --- /dev/null +++ b/drivers/misc/mic/host/mic_main.c @@ -0,0 +1,347 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + */ +#include +#include +#include +#include + +#include +#include "../common/mic_dev.h" +#include "mic_device.h" +#include "mic_x100.h" +#include "mic_smpt.h" + +static const char mic_driver_name[] = "mic"; + +static const struct pci_device_id mic_pci_tbl[] = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2250)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2251)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2252)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2253)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2254)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2255)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2256)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2257)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2258)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_2259)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225a)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225b)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225c)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225d)}, + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, MIC_X100_PCI_DEVICE_225e)}, + + /* required last entry */ + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, mic_pci_tbl); + +/* ID allocator for MIC devices */ +static struct ida g_mic_ida; + +/* Initialize the device page */ +static int mic_dp_init(struct mic_device *mdev) +{ + mdev->dp = kzalloc(MIC_DP_SIZE, GFP_KERNEL); + if (!mdev->dp) + return -ENOMEM; + + mdev->dp_dma_addr = mic_map_single(mdev, + mdev->dp, MIC_DP_SIZE); + if (mic_map_error(mdev->dp_dma_addr)) { + kfree(mdev->dp); + dev_err(&mdev->pdev->dev, "%s %d err %d\n", + __func__, __LINE__, -ENOMEM); + return -ENOMEM; + } + mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr); + mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32); + return 0; +} + +/* Uninitialize the device page */ +static void mic_dp_uninit(struct mic_device *mdev) +{ + mic_unmap_single(mdev, mdev->dp_dma_addr, MIC_DP_SIZE); + kfree(mdev->dp); +} + +/** + * mic_ops_init: Initialize HW specific operation tables. + * + * @mdev: pointer to mic_device instance + * + * returns none. + */ +static void mic_ops_init(struct mic_device *mdev) +{ + switch (mdev->family) { + case MIC_FAMILY_X100: + mdev->ops = &mic_x100_ops; + mdev->intr_ops = &mic_x100_intr_ops; + mdev->smpt_ops = &mic_x100_smpt_ops; + break; + default: + break; + } +} + +/** + * mic_get_family - Determine hardware family to which this MIC belongs. + * + * @pdev: The pci device structure + * + * returns family. + */ +static enum mic_hw_family mic_get_family(struct pci_dev *pdev) +{ + enum mic_hw_family family; + + switch (pdev->device) { + case MIC_X100_PCI_DEVICE_2250: + case MIC_X100_PCI_DEVICE_2251: + case MIC_X100_PCI_DEVICE_2252: + case MIC_X100_PCI_DEVICE_2253: + case MIC_X100_PCI_DEVICE_2254: + case MIC_X100_PCI_DEVICE_2255: + case MIC_X100_PCI_DEVICE_2256: + case MIC_X100_PCI_DEVICE_2257: + case MIC_X100_PCI_DEVICE_2258: + case MIC_X100_PCI_DEVICE_2259: + case MIC_X100_PCI_DEVICE_225a: + case MIC_X100_PCI_DEVICE_225b: + case MIC_X100_PCI_DEVICE_225c: + case MIC_X100_PCI_DEVICE_225d: + case MIC_X100_PCI_DEVICE_225e: + family = MIC_FAMILY_X100; + break; + default: + family = MIC_FAMILY_UNKNOWN; + break; + } + return family; +} + +/** + * mic_device_init - Allocates and initializes the MIC device structure + * + * @mdev: pointer to mic_device instance + * @pdev: The pci device structure + * + * returns none. + */ +static void +mic_device_init(struct mic_device *mdev, struct pci_dev *pdev) +{ + mdev->pdev = pdev; + mdev->family = mic_get_family(pdev); + mdev->stepping = pdev->revision; + mic_ops_init(mdev); + mutex_init(&mdev->mic_mutex); + mdev->irq_info.next_avail_src = 0; +} + +/** + * mic_probe - Device Initialization Routine + * + * @pdev: PCI device structure + * @ent: entry in mic_pci_tbl + * + * returns 0 on success, < 0 on failure. + */ +static int mic_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int rc; + struct mic_device *mdev; + + mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); + if (!mdev) { + rc = -ENOMEM; + dev_err(&pdev->dev, "mdev kmalloc failed rc %d\n", rc); + goto mdev_alloc_fail; + } + mdev->id = ida_simple_get(&g_mic_ida, 0, MIC_MAX_NUM_DEVS, GFP_KERNEL); + if (mdev->id < 0) { + rc = mdev->id; + dev_err(&pdev->dev, "ida_simple_get failed rc %d\n", rc); + goto ida_fail; + } + + mic_device_init(mdev, pdev); + + rc = pci_enable_device(pdev); + if (rc) { + dev_err(&pdev->dev, "failed to enable pci device.\n"); + goto ida_remove; + } + + pci_set_master(pdev); + + rc = pci_request_regions(pdev, mic_driver_name); + if (rc) { + dev_err(&pdev->dev, "failed to get pci regions.\n"); + goto disable_device; + } + + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (rc) { + dev_err(&pdev->dev, "Cannot set DMA mask\n"); + goto release_regions; + } + + mdev->mmio.pa = pci_resource_start(pdev, mdev->ops->mmio_bar); + mdev->mmio.len = pci_resource_len(pdev, mdev->ops->mmio_bar); + mdev->mmio.va = pci_ioremap_bar(pdev, mdev->ops->mmio_bar); + if (!mdev->mmio.va) { + dev_err(&pdev->dev, "Cannot remap MMIO BAR\n"); + rc = -EIO; + goto release_regions; + } + + mdev->aper.pa = pci_resource_start(pdev, mdev->ops->aper_bar); + mdev->aper.len = pci_resource_len(pdev, mdev->ops->aper_bar); + mdev->aper.va = ioremap_wc(mdev->aper.pa, mdev->aper.len); + if (!mdev->aper.va) { + dev_err(&pdev->dev, "Cannot remap Aperture BAR\n"); + rc = -EIO; + goto unmap_mmio; + } + + mdev->intr_ops->intr_init(mdev); + rc = mic_setup_interrupts(mdev, pdev); + if (rc) { + dev_err(&pdev->dev, "mic_setup_interrupts failed %d\n", rc); + goto unmap_aper; + } + rc = mic_smpt_init(mdev); + if (rc) { + dev_err(&pdev->dev, "smpt_init failed %d\n", rc); + goto free_interrupts; + } + + pci_set_drvdata(pdev, mdev); + + rc = mic_dp_init(mdev); + if (rc) { + dev_err(&pdev->dev, "mic_dp_init failed rc %d\n", rc); + goto smpt_uninit; + } + mic_bootparam_init(mdev); + mic_create_debug_dir(mdev); + + mdev->cosm_dev = cosm_register_device(&mdev->pdev->dev, &cosm_hw_ops); + if (IS_ERR(mdev->cosm_dev)) { + rc = PTR_ERR(mdev->cosm_dev); + dev_err(&pdev->dev, "cosm_add_device failed rc %d\n", rc); + goto cleanup_debug_dir; + } + return 0; +cleanup_debug_dir: + mic_delete_debug_dir(mdev); + mic_dp_uninit(mdev); +smpt_uninit: + mic_smpt_uninit(mdev); +free_interrupts: + mic_free_interrupts(mdev, pdev); +unmap_aper: + iounmap(mdev->aper.va); +unmap_mmio: + iounmap(mdev->mmio.va); +release_regions: + pci_release_regions(pdev); +disable_device: + pci_disable_device(pdev); +ida_remove: + ida_simple_remove(&g_mic_ida, mdev->id); +ida_fail: + kfree(mdev); +mdev_alloc_fail: + dev_err(&pdev->dev, "Probe failed rc %d\n", rc); + return rc; +} + +/** + * mic_remove - Device Removal Routine + * mic_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. + * + * @pdev: PCI device structure + */ +static void mic_remove(struct pci_dev *pdev) +{ + struct mic_device *mdev; + + mdev = pci_get_drvdata(pdev); + if (!mdev) + return; + + cosm_unregister_device(mdev->cosm_dev); + mic_delete_debug_dir(mdev); + mic_dp_uninit(mdev); + mic_smpt_uninit(mdev); + mic_free_interrupts(mdev, pdev); + iounmap(mdev->aper.va); + iounmap(mdev->mmio.va); + pci_release_regions(pdev); + pci_disable_device(pdev); + ida_simple_remove(&g_mic_ida, mdev->id); + kfree(mdev); +} + +static struct pci_driver mic_driver = { + .name = mic_driver_name, + .id_table = mic_pci_tbl, + .probe = mic_probe, + .remove = mic_remove +}; + +static int __init mic_init(void) +{ + int ret; + + request_module("mic_x100_dma"); + mic_init_debugfs(); + ida_init(&g_mic_ida); + ret = pci_register_driver(&mic_driver); + if (ret) { + pr_err("pci_register_driver failed ret %d\n", ret); + goto cleanup_debugfs; + } + return 0; +cleanup_debugfs: + ida_destroy(&g_mic_ida); + mic_exit_debugfs(); + return ret; +} + +static void __exit mic_exit(void) +{ + pci_unregister_driver(&mic_driver); + ida_destroy(&g_mic_ida); + mic_exit_debugfs(); +} + +module_init(mic_init); +module_exit(mic_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) MIC X100 Host driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/mic/host/mic_smpt.c b/drivers/misc/mic/host/mic_smpt.c new file mode 100644 index 000000000..c3f958580 --- /dev/null +++ b/drivers/misc/mic/host/mic_smpt.c @@ -0,0 +1,439 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#include + +#include "../common/mic_dev.h" +#include "mic_device.h" +#include "mic_smpt.h" + +static inline u64 mic_system_page_mask(struct mic_device *mdev) +{ + return (1ULL << mdev->smpt->info.page_shift) - 1ULL; +} + +static inline u8 mic_sys_addr_to_smpt(struct mic_device *mdev, dma_addr_t pa) +{ + return (pa - mdev->smpt->info.base) >> mdev->smpt->info.page_shift; +} + +static inline u64 mic_smpt_to_pa(struct mic_device *mdev, u8 index) +{ + return mdev->smpt->info.base + (index * mdev->smpt->info.page_size); +} + +static inline u64 mic_smpt_offset(struct mic_device *mdev, dma_addr_t pa) +{ + return pa & mic_system_page_mask(mdev); +} + +static inline u64 mic_smpt_align_low(struct mic_device *mdev, dma_addr_t pa) +{ + return ALIGN(pa - mic_system_page_mask(mdev), + mdev->smpt->info.page_size); +} + +static inline u64 mic_smpt_align_high(struct mic_device *mdev, dma_addr_t pa) +{ + return ALIGN(pa, mdev->smpt->info.page_size); +} + +/* Total Cumulative system memory accessible by MIC across all SMPT entries */ +static inline u64 mic_max_system_memory(struct mic_device *mdev) +{ + return mdev->smpt->info.num_reg * mdev->smpt->info.page_size; +} + +/* Maximum system memory address accessible by MIC */ +static inline u64 mic_max_system_addr(struct mic_device *mdev) +{ + return mdev->smpt->info.base + mic_max_system_memory(mdev) - 1ULL; +} + +/* Check if the DMA address is a MIC system memory address */ +static inline bool +mic_is_system_addr(struct mic_device *mdev, dma_addr_t pa) +{ + return pa >= mdev->smpt->info.base && pa <= mic_max_system_addr(mdev); +} + +/* Populate an SMPT entry and update the reference counts. */ +static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr, + int entries, struct mic_device *mdev) +{ + struct mic_smpt_info *smpt_info = mdev->smpt; + int i; + + for (i = spt; i < spt + entries; i++, + addr += smpt_info->info.page_size) { + if (!smpt_info->entry[i].ref_count && + (smpt_info->entry[i].dma_addr != addr)) { + mdev->smpt_ops->set(mdev, addr, i); + smpt_info->entry[i].dma_addr = addr; + } + smpt_info->entry[i].ref_count += ref[i - spt]; + } +} + +/* + * Find an available MIC address in MIC SMPT address space + * for a given DMA address and size. + */ +static dma_addr_t mic_smpt_op(struct mic_device *mdev, u64 dma_addr, + int entries, s64 *ref, size_t size) +{ + int spt; + int ae = 0; + int i; + unsigned long flags; + dma_addr_t mic_addr = 0; + dma_addr_t addr = dma_addr; + struct mic_smpt_info *smpt_info = mdev->smpt; + + spin_lock_irqsave(&smpt_info->smpt_lock, flags); + + /* find existing entries */ + for (i = 0; i < smpt_info->info.num_reg; i++) { + if (smpt_info->entry[i].dma_addr == addr) { + ae++; + addr += smpt_info->info.page_size; + } else if (ae) /* cannot find contiguous entries */ + goto not_found; + + if (ae == entries) + goto found; + } + + /* find free entry */ + for (ae = 0, i = 0; i < smpt_info->info.num_reg; i++) { + ae = (smpt_info->entry[i].ref_count == 0) ? ae + 1 : 0; + if (ae == entries) + goto found; + } + +not_found: + spin_unlock_irqrestore(&smpt_info->smpt_lock, flags); + return mic_addr; + +found: + spt = i - entries + 1; + mic_addr = mic_smpt_to_pa(mdev, spt); + mic_add_smpt_entry(spt, ref, dma_addr, entries, mdev); + smpt_info->map_count++; + smpt_info->ref_count += (s64)size; + spin_unlock_irqrestore(&smpt_info->smpt_lock, flags); + return mic_addr; +} + +/* + * Returns number of smpt entries needed for dma_addr to dma_addr + size + * also returns the reference count array for each of those entries + * and the starting smpt address + */ +static int mic_get_smpt_ref_count(struct mic_device *mdev, dma_addr_t dma_addr, + size_t size, s64 *ref, u64 *smpt_start) +{ + u64 start = dma_addr; + u64 end = dma_addr + size; + int i = 0; + + while (start < end) { + ref[i++] = min(mic_smpt_align_high(mdev, start + 1), + end) - start; + start = mic_smpt_align_high(mdev, start + 1); + } + + if (smpt_start) + *smpt_start = mic_smpt_align_low(mdev, dma_addr); + + return i; +} + +/* + * mic_to_dma_addr - Converts a MIC address to a DMA address. + * + * @mdev: pointer to mic_device instance. + * @mic_addr: MIC address. + * + * returns a DMA address. + */ +dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr) +{ + struct mic_smpt_info *smpt_info = mdev->smpt; + int spt; + dma_addr_t dma_addr; + + if (!mic_is_system_addr(mdev, mic_addr)) { + dev_err(&mdev->pdev->dev, + "mic_addr is invalid. mic_addr = 0x%llx\n", mic_addr); + return -EINVAL; + } + spt = mic_sys_addr_to_smpt(mdev, mic_addr); + dma_addr = smpt_info->entry[spt].dma_addr + + mic_smpt_offset(mdev, mic_addr); + return dma_addr; +} + +/** + * mic_map - Maps a DMA address to a MIC physical address. + * + * @mdev: pointer to mic_device instance. + * @dma_addr: DMA address. + * @size: Size of the region to be mapped. + * + * This API converts the DMA address provided to a DMA address understood + * by MIC. Caller should check for errors by calling mic_map_error(..). + * + * returns DMA address as required by MIC. + */ +dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size) +{ + dma_addr_t mic_addr = 0; + int num_entries; + s64 *ref; + u64 smpt_start; + + if (!size || size > mic_max_system_memory(mdev)) + return mic_addr; + + ref = kmalloc_array(mdev->smpt->info.num_reg, sizeof(s64), GFP_ATOMIC); + if (!ref) + return mic_addr; + + num_entries = mic_get_smpt_ref_count(mdev, dma_addr, size, + ref, &smpt_start); + + /* Set the smpt table appropriately and get 16G aligned mic address */ + mic_addr = mic_smpt_op(mdev, smpt_start, num_entries, ref, size); + + kfree(ref); + + /* + * If mic_addr is zero then its an error case + * since mic_addr can never be zero. + * else generate mic_addr by adding the 16G offset in dma_addr + */ + if (!mic_addr && MIC_FAMILY_X100 == mdev->family) { + dev_err(&mdev->pdev->dev, + "mic_map failed dma_addr 0x%llx size 0x%lx\n", + dma_addr, size); + return mic_addr; + } else { + return mic_addr + mic_smpt_offset(mdev, dma_addr); + } +} + +/** + * mic_unmap - Unmaps a MIC physical address. + * + * @mdev: pointer to mic_device instance. + * @mic_addr: MIC physical address. + * @size: Size of the region to be unmapped. + * + * This API unmaps the mappings created by mic_map(..). + * + * returns None. + */ +void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size) +{ + struct mic_smpt_info *smpt_info = mdev->smpt; + s64 *ref; + int num_smpt; + int spt; + int i; + unsigned long flags; + + if (!size) + return; + + if (!mic_is_system_addr(mdev, mic_addr)) { + dev_err(&mdev->pdev->dev, + "invalid address: 0x%llx\n", mic_addr); + return; + } + + spt = mic_sys_addr_to_smpt(mdev, mic_addr); + ref = kmalloc_array(mdev->smpt->info.num_reg, sizeof(s64), GFP_ATOMIC); + if (!ref) + return; + + /* Get number of smpt entries to be mapped, ref count array */ + num_smpt = mic_get_smpt_ref_count(mdev, mic_addr, size, ref, NULL); + + spin_lock_irqsave(&smpt_info->smpt_lock, flags); + smpt_info->unmap_count++; + smpt_info->ref_count -= (s64)size; + + for (i = spt; i < spt + num_smpt; i++) { + smpt_info->entry[i].ref_count -= ref[i - spt]; + if (smpt_info->entry[i].ref_count < 0) + dev_warn(&mdev->pdev->dev, + "ref count for entry %d is negative\n", i); + } + spin_unlock_irqrestore(&smpt_info->smpt_lock, flags); + kfree(ref); +} + +/** + * mic_map_single - Maps a virtual address to a MIC physical address. + * + * @mdev: pointer to mic_device instance. + * @va: Kernel direct mapped virtual address. + * @size: Size of the region to be mapped. + * + * This API calls pci_map_single(..) for the direct mapped virtual address + * and then converts the DMA address provided to a DMA address understood + * by MIC. Caller should check for errors by calling mic_map_error(..). + * + * returns DMA address as required by MIC. + */ +dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size) +{ + dma_addr_t mic_addr = 0; + struct pci_dev *pdev = mdev->pdev; + dma_addr_t dma_addr = + pci_map_single(pdev, va, size, PCI_DMA_BIDIRECTIONAL); + + if (!pci_dma_mapping_error(pdev, dma_addr)) { + mic_addr = mic_map(mdev, dma_addr, size); + if (!mic_addr) { + dev_err(&mdev->pdev->dev, + "mic_map failed dma_addr 0x%llx size 0x%lx\n", + dma_addr, size); + pci_unmap_single(pdev, dma_addr, + size, PCI_DMA_BIDIRECTIONAL); + } + } + return mic_addr; +} + +/** + * mic_unmap_single - Unmaps a MIC physical address. + * + * @mdev: pointer to mic_device instance. + * @mic_addr: MIC physical address. + * @size: Size of the region to be unmapped. + * + * This API unmaps the mappings created by mic_map_single(..). + * + * returns None. + */ +void +mic_unmap_single(struct mic_device *mdev, dma_addr_t mic_addr, size_t size) +{ + struct pci_dev *pdev = mdev->pdev; + dma_addr_t dma_addr = mic_to_dma_addr(mdev, mic_addr); + mic_unmap(mdev, mic_addr, size); + pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); +} + +/** + * mic_smpt_init - Initialize MIC System Memory Page Tables. + * + * @mdev: pointer to mic_device instance. + * + * returns 0 for success and -errno for error. + */ +int mic_smpt_init(struct mic_device *mdev) +{ + int i, err = 0; + dma_addr_t dma_addr; + struct mic_smpt_info *smpt_info; + + mdev->smpt = kmalloc(sizeof(*mdev->smpt), GFP_KERNEL); + if (!mdev->smpt) + return -ENOMEM; + + smpt_info = mdev->smpt; + mdev->smpt_ops->init(mdev); + smpt_info->entry = kmalloc_array(smpt_info->info.num_reg, + sizeof(*smpt_info->entry), GFP_KERNEL); + if (!smpt_info->entry) { + err = -ENOMEM; + goto free_smpt; + } + spin_lock_init(&smpt_info->smpt_lock); + for (i = 0; i < smpt_info->info.num_reg; i++) { + dma_addr = i * smpt_info->info.page_size; + smpt_info->entry[i].dma_addr = dma_addr; + smpt_info->entry[i].ref_count = 0; + mdev->smpt_ops->set(mdev, dma_addr, i); + } + smpt_info->ref_count = 0; + smpt_info->map_count = 0; + smpt_info->unmap_count = 0; + return 0; +free_smpt: + kfree(smpt_info); + return err; +} + +/** + * mic_smpt_uninit - UnInitialize MIC System Memory Page Tables. + * + * @mdev: pointer to mic_device instance. + * + * returns None. + */ +void mic_smpt_uninit(struct mic_device *mdev) +{ + struct mic_smpt_info *smpt_info = mdev->smpt; + int i; + + dev_dbg(&mdev->pdev->dev, + "nodeid %d SMPT ref count %lld map %lld unmap %lld\n", + mdev->id, smpt_info->ref_count, + smpt_info->map_count, smpt_info->unmap_count); + + for (i = 0; i < smpt_info->info.num_reg; i++) { + dev_dbg(&mdev->pdev->dev, + "SMPT entry[%d] dma_addr = 0x%llx ref_count = %lld\n", + i, smpt_info->entry[i].dma_addr, + smpt_info->entry[i].ref_count); + if (smpt_info->entry[i].ref_count) + dev_warn(&mdev->pdev->dev, + "ref count for entry %d is not zero\n", i); + } + kfree(smpt_info->entry); + kfree(smpt_info); +} + +/** + * mic_smpt_restore - Restore MIC System Memory Page Tables. + * + * @mdev: pointer to mic_device instance. + * + * Restore the SMPT registers to values previously stored in the + * SW data structures. Some MIC steppings lose register state + * across resets and this API should be called for performing + * a restore operation if required. + * + * returns None. + */ +void mic_smpt_restore(struct mic_device *mdev) +{ + int i; + dma_addr_t dma_addr; + + for (i = 0; i < mdev->smpt->info.num_reg; i++) { + dma_addr = mdev->smpt->entry[i].dma_addr; + mdev->smpt_ops->set(mdev, dma_addr, i); + } +} diff --git a/drivers/misc/mic/host/mic_smpt.h b/drivers/misc/mic/host/mic_smpt.h new file mode 100644 index 000000000..68721c6e7 --- /dev/null +++ b/drivers/misc/mic/host/mic_smpt.h @@ -0,0 +1,99 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#ifndef MIC_SMPT_H +#define MIC_SMPT_H +/** + * struct mic_smpt_ops - MIC HW specific SMPT operations. + * @init: Initialize hardware specific SMPT information in mic_smpt_hw_info. + * @set: Set the value for a particular SMPT entry. + */ +struct mic_smpt_ops { + void (*init)(struct mic_device *mdev); + void (*set)(struct mic_device *mdev, dma_addr_t dma_addr, u8 index); +}; + +/** + * struct mic_smpt - MIC SMPT entry information. + * @dma_addr: Base DMA address for this SMPT entry. + * @ref_count: Number of active mappings for this SMPT entry in bytes. + */ +struct mic_smpt { + dma_addr_t dma_addr; + s64 ref_count; +}; + +/** + * struct mic_smpt_hw_info - MIC SMPT hardware specific information. + * @num_reg: Number of SMPT registers. + * @page_shift: System memory page shift. + * @page_size: System memory page size. + * @base: System address base. + */ +struct mic_smpt_hw_info { + u8 num_reg; + u8 page_shift; + u64 page_size; + u64 base; +}; + +/** + * struct mic_smpt_info - MIC SMPT information. + * @entry: Array of SMPT entries. + * @smpt_lock: Spin lock protecting access to SMPT data structures. + * @info: Hardware specific SMPT information. + * @ref_count: Number of active SMPT mappings (for debug). + * @map_count: Number of SMPT mappings created (for debug). + * @unmap_count: Number of SMPT mappings destroyed (for debug). + */ +struct mic_smpt_info { + struct mic_smpt *entry; + spinlock_t smpt_lock; + struct mic_smpt_hw_info info; + s64 ref_count; + s64 map_count; + s64 unmap_count; +}; + +dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size); +void mic_unmap_single(struct mic_device *mdev, + dma_addr_t mic_addr, size_t size); +dma_addr_t mic_map(struct mic_device *mdev, + dma_addr_t dma_addr, size_t size); +void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size); +dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr); + +/** + * mic_map_error - Check a MIC address for errors. + * + * @mdev: pointer to mic_device instance. + * + * returns Whether there was an error during mic_map..(..) APIs. + */ +static inline bool mic_map_error(dma_addr_t mic_addr) +{ + return !mic_addr; +} + +int mic_smpt_init(struct mic_device *mdev); +void mic_smpt_uninit(struct mic_device *mdev); +void mic_smpt_restore(struct mic_device *mdev); + +#endif diff --git a/drivers/misc/mic/host/mic_x100.c b/drivers/misc/mic/host/mic_x100.c new file mode 100644 index 000000000..82a973c85 --- /dev/null +++ b/drivers/misc/mic/host/mic_x100.c @@ -0,0 +1,584 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#include +#include +#include +#include +#include + +#include "../common/mic_dev.h" +#include "mic_device.h" +#include "mic_x100.h" +#include "mic_smpt.h" + +/** + * mic_x100_write_spad - write to the scratchpad register + * @mdev: pointer to mic_device instance + * @idx: index to the scratchpad register, 0 based + * @val: the data value to put into the register + * + * This function allows writing of a 32bit value to the indexed scratchpad + * register. + * + * RETURNS: none. + */ +static void +mic_x100_write_spad(struct mic_device *mdev, unsigned int idx, u32 val) +{ + dev_dbg(&mdev->pdev->dev, "Writing 0x%x to scratch pad index %d\n", + val, idx); + mic_mmio_write(&mdev->mmio, val, + MIC_X100_SBOX_BASE_ADDRESS + + MIC_X100_SBOX_SPAD0 + idx * 4); +} + +/** + * mic_x100_read_spad - read from the scratchpad register + * @mdev: pointer to mic_device instance + * @idx: index to scratchpad register, 0 based + * + * This function allows reading of the 32bit scratchpad register. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +static u32 +mic_x100_read_spad(struct mic_device *mdev, unsigned int idx) +{ + u32 val = mic_mmio_read(&mdev->mmio, + MIC_X100_SBOX_BASE_ADDRESS + + MIC_X100_SBOX_SPAD0 + idx * 4); + + dev_dbg(&mdev->pdev->dev, + "Reading 0x%x from scratch pad index %d\n", val, idx); + return val; +} + +/** + * mic_x100_enable_interrupts - Enable interrupts. + * @mdev: pointer to mic_device instance + */ +static void mic_x100_enable_interrupts(struct mic_device *mdev) +{ + u32 reg; + struct mic_mw *mw = &mdev->mmio; + u32 sice0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICE0; + u32 siac0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SIAC0; + + reg = mic_mmio_read(mw, sice0); + reg |= MIC_X100_SBOX_DBR_BITS(0xf) | MIC_X100_SBOX_DMA_BITS(0xff); + mic_mmio_write(mw, reg, sice0); + + /* + * Enable auto-clear when enabling interrupts. Applicable only for + * MSI-x. Legacy and MSI mode cannot have auto-clear enabled. + */ + if (mdev->irq_info.num_vectors > 1) { + reg = mic_mmio_read(mw, siac0); + reg |= MIC_X100_SBOX_DBR_BITS(0xf) | + MIC_X100_SBOX_DMA_BITS(0xff); + mic_mmio_write(mw, reg, siac0); + } +} + +/** + * mic_x100_disable_interrupts - Disable interrupts. + * @mdev: pointer to mic_device instance + */ +static void mic_x100_disable_interrupts(struct mic_device *mdev) +{ + u32 reg; + struct mic_mw *mw = &mdev->mmio; + u32 sice0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICE0; + u32 siac0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SIAC0; + u32 sicc0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICC0; + + reg = mic_mmio_read(mw, sice0); + mic_mmio_write(mw, reg, sicc0); + + if (mdev->irq_info.num_vectors > 1) { + reg = mic_mmio_read(mw, siac0); + reg &= ~(MIC_X100_SBOX_DBR_BITS(0xf) | + MIC_X100_SBOX_DMA_BITS(0xff)); + mic_mmio_write(mw, reg, siac0); + } +} + +/** + * mic_x100_send_sbox_intr - Send an MIC_X100_SBOX interrupt to MIC. + * @mdev: pointer to mic_device instance + */ +static void mic_x100_send_sbox_intr(struct mic_device *mdev, + int doorbell) +{ + struct mic_mw *mw = &mdev->mmio; + u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8; + u32 apicicr_low = mic_mmio_read(mw, MIC_X100_SBOX_BASE_ADDRESS + + apic_icr_offset); + + /* for MIC we need to make sure we "hit" the send_icr bit (13) */ + apicicr_low = (apicicr_low | (1 << 13)); + + /* Ensure that the interrupt is ordered w.r.t. previous stores. */ + wmb(); + mic_mmio_write(mw, apicicr_low, + MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset); +} + +/** + * mic_x100_send_rdmasr_intr - Send an RDMASR interrupt to MIC. + * @mdev: pointer to mic_device instance + */ +static void mic_x100_send_rdmasr_intr(struct mic_device *mdev, + int doorbell) +{ + int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2); + /* Ensure that the interrupt is ordered w.r.t. previous stores. */ + wmb(); + mic_mmio_write(&mdev->mmio, 0, + MIC_X100_SBOX_BASE_ADDRESS + rdmasr_offset); +} + +/** + * __mic_x100_send_intr - Send interrupt to MIC. + * @mdev: pointer to mic_device instance + * @doorbell: doorbell number. + */ +static void mic_x100_send_intr(struct mic_device *mdev, int doorbell) +{ + int rdmasr_db; + if (doorbell < MIC_X100_NUM_SBOX_IRQ) { + mic_x100_send_sbox_intr(mdev, doorbell); + } else { + rdmasr_db = doorbell - MIC_X100_NUM_SBOX_IRQ; + mic_x100_send_rdmasr_intr(mdev, rdmasr_db); + } +} + +/** + * mic_x100_ack_interrupt - Read the interrupt sources register and + * clear it. This function will be called in the MSI/INTx case. + * @mdev: Pointer to mic_device instance. + * + * Returns: bitmask of interrupt sources triggered. + */ +static u32 mic_x100_ack_interrupt(struct mic_device *mdev) +{ + u32 sicr0 = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SICR0; + u32 reg = mic_mmio_read(&mdev->mmio, sicr0); + mic_mmio_write(&mdev->mmio, reg, sicr0); + return reg; +} + +/** + * mic_x100_intr_workarounds - These hardware specific workarounds are + * to be invoked everytime an interrupt is handled. + * @mdev: Pointer to mic_device instance. + * + * Returns: none + */ +static void mic_x100_intr_workarounds(struct mic_device *mdev) +{ + struct mic_mw *mw = &mdev->mmio; + + /* Clear pending bit array. */ + if (MIC_A0_STEP == mdev->stepping) + mic_mmio_write(mw, 1, MIC_X100_SBOX_BASE_ADDRESS + + MIC_X100_SBOX_MSIXPBACR); + + if (mdev->stepping >= MIC_B0_STEP) + mdev->intr_ops->enable_interrupts(mdev); +} + +/** + * mic_x100_hw_intr_init - Initialize h/w specific interrupt + * information. + * @mdev: pointer to mic_device instance + */ +static void mic_x100_hw_intr_init(struct mic_device *mdev) +{ + mdev->intr_info = (struct mic_intr_info *)mic_x100_intr_init; +} + +/** + * mic_x100_read_msi_to_src_map - read from the MSI mapping registers + * @mdev: pointer to mic_device instance + * @idx: index to the mapping register, 0 based + * + * This function allows reading of the 32bit MSI mapping register. + * + * RETURNS: The value in the register. + */ +static u32 +mic_x100_read_msi_to_src_map(struct mic_device *mdev, int idx) +{ + return mic_mmio_read(&mdev->mmio, + MIC_X100_SBOX_BASE_ADDRESS + + MIC_X100_SBOX_MXAR0 + idx * 4); +} + +/** + * mic_x100_program_msi_to_src_map - program the MSI mapping registers + * @mdev: pointer to mic_device instance + * @idx: index to the mapping register, 0 based + * @offset: The bit offset in the register that needs to be updated. + * @set: boolean specifying if the bit in the specified offset needs + * to be set or cleared. + * + * RETURNS: None. + */ +static void +mic_x100_program_msi_to_src_map(struct mic_device *mdev, + int idx, int offset, bool set) +{ + unsigned long reg; + struct mic_mw *mw = &mdev->mmio; + u32 mxar = MIC_X100_SBOX_BASE_ADDRESS + + MIC_X100_SBOX_MXAR0 + idx * 4; + + reg = mic_mmio_read(mw, mxar); + if (set) + __set_bit(offset, ®); + else + __clear_bit(offset, ®); + mic_mmio_write(mw, reg, mxar); +} + +/* + * mic_x100_reset_fw_ready - Reset Firmware ready status field. + * @mdev: pointer to mic_device instance + */ +static void mic_x100_reset_fw_ready(struct mic_device *mdev) +{ + mdev->ops->write_spad(mdev, MIC_X100_DOWNLOAD_INFO, 0); +} + +/* + * mic_x100_is_fw_ready - Check if firmware is ready. + * @mdev: pointer to mic_device instance + */ +static bool mic_x100_is_fw_ready(struct mic_device *mdev) +{ + u32 scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO); + return MIC_X100_SPAD2_DOWNLOAD_STATUS(scratch2) ? true : false; +} + +/** + * mic_x100_get_apic_id - Get bootstrap APIC ID. + * @mdev: pointer to mic_device instance + */ +static u32 mic_x100_get_apic_id(struct mic_device *mdev) +{ + u32 scratch2 = 0; + + scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO); + return MIC_X100_SPAD2_APIC_ID(scratch2); +} + +/** + * mic_x100_send_firmware_intr - Send an interrupt to the firmware on MIC. + * @mdev: pointer to mic_device instance + */ +static void mic_x100_send_firmware_intr(struct mic_device *mdev) +{ + u32 apicicr_low; + u64 apic_icr_offset = MIC_X100_SBOX_APICICR7; + int vector = MIC_X100_BSP_INTERRUPT_VECTOR; + struct mic_mw *mw = &mdev->mmio; + + /* + * For MIC we need to make sure we "hit" + * the send_icr bit (13). + */ + apicicr_low = (vector | (1 << 13)); + + mic_mmio_write(mw, mic_x100_get_apic_id(mdev), + MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset + 4); + + /* Ensure that the interrupt is ordered w.r.t. previous stores. */ + wmb(); + mic_mmio_write(mw, apicicr_low, + MIC_X100_SBOX_BASE_ADDRESS + apic_icr_offset); +} + +/** + * mic_x100_hw_reset - Reset the MIC device. + * @mdev: pointer to mic_device instance + */ +static void mic_x100_hw_reset(struct mic_device *mdev) +{ + u32 reset_reg; + u32 rgcr = MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_RGCR; + struct mic_mw *mw = &mdev->mmio; + + /* Ensure that the reset is ordered w.r.t. previous loads and stores */ + mb(); + /* Trigger reset */ + reset_reg = mic_mmio_read(mw, rgcr); + reset_reg |= 0x1; + mic_mmio_write(mw, reset_reg, rgcr); + /* + * It seems we really want to delay at least 1 second + * after touching reset to prevent a lot of problems. + */ + msleep(1000); +} + +/** + * mic_x100_load_command_line - Load command line to MIC. + * @mdev: pointer to mic_device instance + * @fw: the firmware image + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +static int +mic_x100_load_command_line(struct mic_device *mdev, const struct firmware *fw) +{ + u32 len = 0; + u32 boot_mem; + char *buf; + void __iomem *cmd_line_va = mdev->aper.va + mdev->bootaddr + fw->size; +#define CMDLINE_SIZE 2048 + + boot_mem = mdev->aper.len >> 20; + buf = kzalloc(CMDLINE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + len += snprintf(buf, CMDLINE_SIZE - len, + " mem=%dM", boot_mem); + if (mdev->cosm_dev->cmdline) + snprintf(buf + len, CMDLINE_SIZE - len, " %s", + mdev->cosm_dev->cmdline); + memcpy_toio(cmd_line_va, buf, strlen(buf) + 1); + kfree(buf); + return 0; +} + +/** + * mic_x100_load_ramdisk - Load ramdisk to MIC. + * @mdev: pointer to mic_device instance + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +static int +mic_x100_load_ramdisk(struct mic_device *mdev) +{ + const struct firmware *fw; + int rc; + struct boot_params __iomem *bp = mdev->aper.va + mdev->bootaddr; + + rc = request_firmware(&fw, mdev->cosm_dev->ramdisk, &mdev->pdev->dev); + if (rc < 0) { + dev_err(&mdev->pdev->dev, + "ramdisk request_firmware failed: %d %s\n", + rc, mdev->cosm_dev->ramdisk); + goto error; + } + /* + * Typically the bootaddr for card OS is 64M + * so copy over the ramdisk @ 128M. + */ + memcpy_toio(mdev->aper.va + (mdev->bootaddr << 1), fw->data, fw->size); + iowrite32(mdev->bootaddr << 1, &bp->hdr.ramdisk_image); + iowrite32(fw->size, &bp->hdr.ramdisk_size); + release_firmware(fw); +error: + return rc; +} + +/** + * mic_x100_get_boot_addr - Get MIC boot address. + * @mdev: pointer to mic_device instance + * + * This function is called during firmware load to determine + * the address at which the OS should be downloaded in card + * memory i.e. GDDR. + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +static int +mic_x100_get_boot_addr(struct mic_device *mdev) +{ + u32 scratch2, boot_addr; + int rc = 0; + + scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO); + boot_addr = MIC_X100_SPAD2_DOWNLOAD_ADDR(scratch2); + dev_dbg(&mdev->pdev->dev, "%s %d boot_addr 0x%x\n", + __func__, __LINE__, boot_addr); + if (boot_addr > (1 << 31)) { + dev_err(&mdev->pdev->dev, + "incorrect bootaddr 0x%x\n", + boot_addr); + rc = -EINVAL; + goto error; + } + mdev->bootaddr = boot_addr; +error: + return rc; +} + +/** + * mic_x100_load_firmware - Load firmware to MIC. + * @mdev: pointer to mic_device instance + * @buf: buffer containing boot string including firmware/ramdisk path. + * + * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + */ +static int +mic_x100_load_firmware(struct mic_device *mdev, const char *buf) +{ + int rc; + const struct firmware *fw; + + rc = mic_x100_get_boot_addr(mdev); + if (rc) + return rc; + /* load OS */ + rc = request_firmware(&fw, mdev->cosm_dev->firmware, &mdev->pdev->dev); + if (rc < 0) { + dev_err(&mdev->pdev->dev, + "ramdisk request_firmware failed: %d %s\n", + rc, mdev->cosm_dev->firmware); + return rc; + } + if (mdev->bootaddr > mdev->aper.len - fw->size) { + rc = -EINVAL; + dev_err(&mdev->pdev->dev, "%s %d rc %d bootaddr 0x%x\n", + __func__, __LINE__, rc, mdev->bootaddr); + goto error; + } + memcpy_toio(mdev->aper.va + mdev->bootaddr, fw->data, fw->size); + mdev->ops->write_spad(mdev, MIC_X100_FW_SIZE, fw->size); + if (!strcmp(mdev->cosm_dev->bootmode, "flash")) { + rc = -EINVAL; + dev_err(&mdev->pdev->dev, "%s %d rc %d\n", + __func__, __LINE__, rc); + goto error; + } + /* load command line */ + rc = mic_x100_load_command_line(mdev, fw); + if (rc) { + dev_err(&mdev->pdev->dev, "%s %d rc %d\n", + __func__, __LINE__, rc); + goto error; + } + release_firmware(fw); + /* load ramdisk */ + if (mdev->cosm_dev->ramdisk) + rc = mic_x100_load_ramdisk(mdev); + + return rc; + +error: + release_firmware(fw); + return rc; +} + +/** + * mic_x100_get_postcode - Get postcode status from firmware. + * @mdev: pointer to mic_device instance + * + * RETURNS: postcode. + */ +static u32 mic_x100_get_postcode(struct mic_device *mdev) +{ + return mic_mmio_read(&mdev->mmio, MIC_X100_POSTCODE); +} + +/** + * mic_x100_smpt_set - Update an SMPT entry with a DMA address. + * @mdev: pointer to mic_device instance + * + * RETURNS: none. + */ +static void +mic_x100_smpt_set(struct mic_device *mdev, dma_addr_t dma_addr, u8 index) +{ +#define SNOOP_ON (0 << 0) +#define SNOOP_OFF (1 << 0) +/* + * Sbox Smpt Reg Bits: + * Bits 31:2 Host address + * Bits 1 RSVD + * Bits 0 No snoop + */ +#define BUILD_SMPT(NO_SNOOP, HOST_ADDR) \ + (u32)(((HOST_ADDR) << 2) | ((NO_SNOOP) & 0x01)) + + uint32_t smpt_reg_val = BUILD_SMPT(SNOOP_ON, + dma_addr >> mdev->smpt->info.page_shift); + mic_mmio_write(&mdev->mmio, smpt_reg_val, + MIC_X100_SBOX_BASE_ADDRESS + + MIC_X100_SBOX_SMPT00 + (4 * index)); +} + +/** + * mic_x100_smpt_hw_init - Initialize SMPT X100 specific fields. + * @mdev: pointer to mic_device instance + * + * RETURNS: none. + */ +static void mic_x100_smpt_hw_init(struct mic_device *mdev) +{ + struct mic_smpt_hw_info *info = &mdev->smpt->info; + + info->num_reg = 32; + info->page_shift = 34; + info->page_size = (1ULL << info->page_shift); + info->base = 0x8000000000ULL; +} + +struct mic_smpt_ops mic_x100_smpt_ops = { + .init = mic_x100_smpt_hw_init, + .set = mic_x100_smpt_set, +}; + +static bool mic_x100_dma_filter(struct dma_chan *chan, void *param) +{ + if (chan->device->dev->parent == (struct device *)param) + return true; + return false; +} + +struct mic_hw_ops mic_x100_ops = { + .aper_bar = MIC_X100_APER_BAR, + .mmio_bar = MIC_X100_MMIO_BAR, + .read_spad = mic_x100_read_spad, + .write_spad = mic_x100_write_spad, + .send_intr = mic_x100_send_intr, + .ack_interrupt = mic_x100_ack_interrupt, + .intr_workarounds = mic_x100_intr_workarounds, + .reset = mic_x100_hw_reset, + .reset_fw_ready = mic_x100_reset_fw_ready, + .is_fw_ready = mic_x100_is_fw_ready, + .send_firmware_intr = mic_x100_send_firmware_intr, + .load_mic_fw = mic_x100_load_firmware, + .get_postcode = mic_x100_get_postcode, + .dma_filter = mic_x100_dma_filter, +}; + +struct mic_hw_intr_ops mic_x100_intr_ops = { + .intr_init = mic_x100_hw_intr_init, + .enable_interrupts = mic_x100_enable_interrupts, + .disable_interrupts = mic_x100_disable_interrupts, + .program_msi_to_src_map = mic_x100_program_msi_to_src_map, + .read_msi_to_src_map = mic_x100_read_msi_to_src_map, +}; diff --git a/drivers/misc/mic/host/mic_x100.h b/drivers/misc/mic/host/mic_x100.h new file mode 100644 index 000000000..8b7daa182 --- /dev/null +++ b/drivers/misc/mic/host/mic_x100.h @@ -0,0 +1,98 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2013 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Host driver. + * + */ +#ifndef _MIC_X100_HW_H_ +#define _MIC_X100_HW_H_ + +#define MIC_X100_PCI_DEVICE_2250 0x2250 +#define MIC_X100_PCI_DEVICE_2251 0x2251 +#define MIC_X100_PCI_DEVICE_2252 0x2252 +#define MIC_X100_PCI_DEVICE_2253 0x2253 +#define MIC_X100_PCI_DEVICE_2254 0x2254 +#define MIC_X100_PCI_DEVICE_2255 0x2255 +#define MIC_X100_PCI_DEVICE_2256 0x2256 +#define MIC_X100_PCI_DEVICE_2257 0x2257 +#define MIC_X100_PCI_DEVICE_2258 0x2258 +#define MIC_X100_PCI_DEVICE_2259 0x2259 +#define MIC_X100_PCI_DEVICE_225a 0x225a +#define MIC_X100_PCI_DEVICE_225b 0x225b +#define MIC_X100_PCI_DEVICE_225c 0x225c +#define MIC_X100_PCI_DEVICE_225d 0x225d +#define MIC_X100_PCI_DEVICE_225e 0x225e + +#define MIC_X100_APER_BAR 0 +#define MIC_X100_MMIO_BAR 4 + +#define MIC_X100_SBOX_BASE_ADDRESS 0x00010000 +#define MIC_X100_SBOX_SPAD0 0x0000AB20 +#define MIC_X100_SBOX_SICR0_DBR(x) ((x) & 0xf) +#define MIC_X100_SBOX_SICR0_DMA(x) (((x) >> 8) & 0xff) +#define MIC_X100_SBOX_SICE0_DBR(x) ((x) & 0xf) +#define MIC_X100_SBOX_DBR_BITS(x) ((x) & 0xf) +#define MIC_X100_SBOX_SICE0_DMA(x) (((x) >> 8) & 0xff) +#define MIC_X100_SBOX_DMA_BITS(x) (((x) & 0xff) << 8) + +#define MIC_X100_SBOX_APICICR0 0x0000A9D0 +#define MIC_X100_SBOX_SICR0 0x00009004 +#define MIC_X100_SBOX_SICE0 0x0000900C +#define MIC_X100_SBOX_SICC0 0x00009010 +#define MIC_X100_SBOX_SIAC0 0x00009014 +#define MIC_X100_SBOX_MSIXPBACR 0x00009084 +#define MIC_X100_SBOX_MXAR0 0x00009044 +#define MIC_X100_SBOX_SMPT00 0x00003100 +#define MIC_X100_SBOX_RDMASR0 0x0000B180 + +#define MIC_X100_DOORBELL_IDX_START 0 +#define MIC_X100_NUM_DOORBELL 4 +#define MIC_X100_DMA_IDX_START 8 +#define MIC_X100_NUM_DMA 8 +#define MIC_X100_ERR_IDX_START 30 +#define MIC_X100_NUM_ERR 1 + +#define MIC_X100_NUM_SBOX_IRQ 8 +#define MIC_X100_NUM_RDMASR_IRQ 8 +#define MIC_X100_RDMASR_IRQ_BASE 17 +#define MIC_X100_SPAD2_DOWNLOAD_STATUS(x) ((x) & 0x1) +#define MIC_X100_SPAD2_APIC_ID(x) (((x) >> 1) & 0x1ff) +#define MIC_X100_SPAD2_DOWNLOAD_ADDR(x) ((x) & 0xfffff000) +#define MIC_X100_SBOX_APICICR7 0x0000AA08 +#define MIC_X100_SBOX_RGCR 0x00004010 +#define MIC_X100_SBOX_SDBIC0 0x0000CC90 +#define MIC_X100_DOWNLOAD_INFO 2 +#define MIC_X100_FW_SIZE 5 +#define MIC_X100_POSTCODE 0x242c + +static const u16 mic_x100_intr_init[] = { + MIC_X100_DOORBELL_IDX_START, + MIC_X100_DMA_IDX_START, + MIC_X100_ERR_IDX_START, + MIC_X100_NUM_DOORBELL, + MIC_X100_NUM_DMA, + MIC_X100_NUM_ERR, +}; + +/* Host->Card(bootstrap) Interrupt Vector */ +#define MIC_X100_BSP_INTERRUPT_VECTOR 229 + +extern struct mic_hw_ops mic_x100_ops; +extern struct mic_smpt_ops mic_x100_smpt_ops; +extern struct mic_hw_intr_ops mic_x100_intr_ops; + +#endif diff --git a/drivers/misc/mic/scif/Makefile b/drivers/misc/mic/scif/Makefile new file mode 100644 index 000000000..ff372555d --- /dev/null +++ b/drivers/misc/mic/scif/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile - SCIF driver. +# Copyright(c) 2014, Intel Corporation. +# +obj-$(CONFIG_SCIF) += scif.o +scif-objs := scif_main.o +scif-objs += scif_peer_bus.o +scif-objs += scif_ports.o +scif-objs += scif_debugfs.o +scif-objs += scif_fd.o +scif-objs += scif_api.o +scif-objs += scif_epd.o +scif-objs += scif_rb.o +scif-objs += scif_nodeqp.o +scif-objs += scif_nm.o +scif-objs += scif_dma.o +scif-objs += scif_fence.o +scif-objs += scif_mmap.o +scif-objs += scif_rma.o +scif-objs += scif_rma_list.o diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c new file mode 100644 index 000000000..8dd0ccede --- /dev/null +++ b/drivers/misc/mic/scif/scif_api.c @@ -0,0 +1,1496 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include +#include "scif_main.h" +#include "scif_map.h" + +static const char * const scif_ep_states[] = { + "Unbound", + "Bound", + "Listening", + "Connected", + "Connecting", + "Mapping", + "Closing", + "Close Listening", + "Disconnected", + "Zombie"}; + +enum conn_async_state { + ASYNC_CONN_IDLE = 1, /* ep setup for async connect */ + ASYNC_CONN_INPROGRESS, /* async connect in progress */ + ASYNC_CONN_FLUSH_WORK /* async work flush in progress */ +}; + +/* + * File operations for anonymous inode file associated with a SCIF endpoint, + * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the + * poll API in the kernel and these take in a struct file *. Since a struct + * file is not available to kernel mode SCIF, it uses an anonymous file for + * this purpose. + */ +const struct file_operations scif_anon_fops = { + .owner = THIS_MODULE, +}; + +scif_epd_t scif_open(void) +{ + struct scif_endpt *ep; + int err; + + might_sleep(); + ep = kzalloc(sizeof(*ep), GFP_KERNEL); + if (!ep) + goto err_ep_alloc; + + ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL); + if (!ep->qp_info.qp) + goto err_qp_alloc; + + err = scif_anon_inode_getfile(ep); + if (err) + goto err_anon_inode; + + spin_lock_init(&ep->lock); + mutex_init(&ep->sendlock); + mutex_init(&ep->recvlock); + + scif_rma_ep_init(ep); + ep->state = SCIFEP_UNBOUND; + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI open: ep %p success\n", ep); + return ep; + +err_anon_inode: + kfree(ep->qp_info.qp); +err_qp_alloc: + kfree(ep); +err_ep_alloc: + return NULL; +} +EXPORT_SYMBOL_GPL(scif_open); + +/* + * scif_disconnect_ep - Disconnects the endpoint if found + * @epd: The end point returned from scif_open() + */ +static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep) +{ + struct scifmsg msg; + struct scif_endpt *fep = NULL; + struct scif_endpt *tmpep; + struct list_head *pos, *tmpq; + int err; + + /* + * Wake up any threads blocked in send()/recv() before closing + * out the connection. Grabbing and releasing the send/recv lock + * will ensure that any blocked senders/receivers have exited for + * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after + * close. Ring 3 endpoints are not affected since close will not + * be called while there are IOCTLs executing. + */ + wake_up_interruptible(&ep->sendwq); + wake_up_interruptible(&ep->recvwq); + mutex_lock(&ep->sendlock); + mutex_unlock(&ep->sendlock); + mutex_lock(&ep->recvlock); + mutex_unlock(&ep->recvlock); + + /* Remove from the connected list */ + mutex_lock(&scif_info.connlock); + list_for_each_safe(pos, tmpq, &scif_info.connected) { + tmpep = list_entry(pos, struct scif_endpt, list); + if (tmpep == ep) { + list_del(pos); + fep = tmpep; + spin_lock(&ep->lock); + break; + } + } + + if (!fep) { + /* + * The other side has completed the disconnect before + * the end point can be removed from the list. Therefore + * the ep lock is not locked, traverse the disconnected + * list to find the endpoint and release the conn lock. + */ + list_for_each_safe(pos, tmpq, &scif_info.disconnected) { + tmpep = list_entry(pos, struct scif_endpt, list); + if (tmpep == ep) { + list_del(pos); + break; + } + } + mutex_unlock(&scif_info.connlock); + return NULL; + } + + init_completion(&ep->discon); + msg.uop = SCIF_DISCNCT; + msg.src = ep->port; + msg.dst = ep->peer; + msg.payload[0] = (u64)ep; + msg.payload[1] = ep->remote_ep; + + err = scif_nodeqp_send(ep->remote_dev, &msg); + spin_unlock(&ep->lock); + mutex_unlock(&scif_info.connlock); + + if (!err) + /* Wait for the remote node to respond with SCIF_DISCNT_ACK */ + wait_for_completion_timeout(&ep->discon, + SCIF_NODE_ALIVE_TIMEOUT); + return ep; +} + +int scif_close(scif_epd_t epd) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scif_endpt *tmpep; + struct list_head *pos, *tmpq; + enum scif_epd_state oldstate; + bool flush_conn; + + dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n", + ep, scif_ep_states[ep->state]); + might_sleep(); + spin_lock(&ep->lock); + flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS); + spin_unlock(&ep->lock); + + if (flush_conn) + flush_work(&scif_info.conn_work); + + spin_lock(&ep->lock); + oldstate = ep->state; + + ep->state = SCIFEP_CLOSING; + + switch (oldstate) { + case SCIFEP_ZOMBIE: + dev_err(scif_info.mdev.this_device, + "SCIFAPI close: zombie state unexpected\n"); + /* fall through */ + case SCIFEP_DISCONNECTED: + spin_unlock(&ep->lock); + scif_unregister_all_windows(epd); + /* Remove from the disconnected list */ + mutex_lock(&scif_info.connlock); + list_for_each_safe(pos, tmpq, &scif_info.disconnected) { + tmpep = list_entry(pos, struct scif_endpt, list); + if (tmpep == ep) { + list_del(pos); + break; + } + } + mutex_unlock(&scif_info.connlock); + break; + case SCIFEP_UNBOUND: + case SCIFEP_BOUND: + case SCIFEP_CONNECTING: + spin_unlock(&ep->lock); + break; + case SCIFEP_MAPPING: + case SCIFEP_CONNECTED: + case SCIFEP_CLOSING: + { + spin_unlock(&ep->lock); + scif_unregister_all_windows(epd); + scif_disconnect_ep(ep); + break; + } + case SCIFEP_LISTENING: + case SCIFEP_CLLISTEN: + { + struct scif_conreq *conreq; + struct scifmsg msg; + struct scif_endpt *aep; + + spin_unlock(&ep->lock); + mutex_lock(&scif_info.eplock); + + /* remove from listen list */ + list_for_each_safe(pos, tmpq, &scif_info.listen) { + tmpep = list_entry(pos, struct scif_endpt, list); + if (tmpep == ep) + list_del(pos); + } + /* Remove any dangling accepts */ + while (ep->acceptcnt) { + aep = list_first_entry(&ep->li_accept, + struct scif_endpt, liacceptlist); + list_del(&aep->liacceptlist); + scif_put_port(aep->port.port); + list_for_each_safe(pos, tmpq, &scif_info.uaccept) { + tmpep = list_entry(pos, struct scif_endpt, + miacceptlist); + if (tmpep == aep) { + list_del(pos); + break; + } + } + mutex_unlock(&scif_info.eplock); + mutex_lock(&scif_info.connlock); + list_for_each_safe(pos, tmpq, &scif_info.connected) { + tmpep = list_entry(pos, + struct scif_endpt, list); + if (tmpep == aep) { + list_del(pos); + break; + } + } + list_for_each_safe(pos, tmpq, &scif_info.disconnected) { + tmpep = list_entry(pos, + struct scif_endpt, list); + if (tmpep == aep) { + list_del(pos); + break; + } + } + mutex_unlock(&scif_info.connlock); + scif_teardown_ep(aep); + mutex_lock(&scif_info.eplock); + scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD); + ep->acceptcnt--; + } + + spin_lock(&ep->lock); + mutex_unlock(&scif_info.eplock); + + /* Remove and reject any pending connection requests. */ + while (ep->conreqcnt) { + conreq = list_first_entry(&ep->conlist, + struct scif_conreq, list); + list_del(&conreq->list); + + msg.uop = SCIF_CNCT_REJ; + msg.dst.node = conreq->msg.src.node; + msg.dst.port = conreq->msg.src.port; + msg.payload[0] = conreq->msg.payload[0]; + msg.payload[1] = conreq->msg.payload[1]; + /* + * No Error Handling on purpose for scif_nodeqp_send(). + * If the remote node is lost we still want free the + * connection requests on the self node. + */ + scif_nodeqp_send(&scif_dev[conreq->msg.src.node], + &msg); + ep->conreqcnt--; + kfree(conreq); + } + + spin_unlock(&ep->lock); + /* If a kSCIF accept is waiting wake it up */ + wake_up_interruptible(&ep->conwq); + break; + } + } + scif_put_port(ep->port.port); + scif_anon_inode_fput(ep); + scif_teardown_ep(ep); + scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD); + return 0; +} +EXPORT_SYMBOL_GPL(scif_close); + +/** + * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer + * accept new connections. + * @epd: The end point returned from scif_open() + */ +int __scif_flush(scif_epd_t epd) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + + switch (ep->state) { + case SCIFEP_LISTENING: + { + ep->state = SCIFEP_CLLISTEN; + + /* If an accept is waiting wake it up */ + wake_up_interruptible(&ep->conwq); + break; + } + default: + break; + } + return 0; +} + +int scif_bind(scif_epd_t epd, u16 pn) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int ret = 0; + int tmp; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI bind: ep %p %s requested port number %d\n", + ep, scif_ep_states[ep->state], pn); + if (pn) { + /* + * Similar to IETF RFC 1700, SCIF ports below + * SCIF_ADMIN_PORT_END can only be bound by system (or root) + * processes or by processes executed by privileged users. + */ + if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) { + ret = -EACCES; + goto scif_bind_admin_exit; + } + } + + spin_lock(&ep->lock); + if (ep->state == SCIFEP_BOUND) { + ret = -EINVAL; + goto scif_bind_exit; + } else if (ep->state != SCIFEP_UNBOUND) { + ret = -EISCONN; + goto scif_bind_exit; + } + + if (pn) { + tmp = scif_rsrv_port(pn); + if (tmp != pn) { + ret = -EINVAL; + goto scif_bind_exit; + } + } else { + ret = scif_get_new_port(); + if (ret < 0) + goto scif_bind_exit; + pn = ret; + } + + ep->state = SCIFEP_BOUND; + ep->port.node = scif_info.nodeid; + ep->port.port = pn; + ep->conn_async_state = ASYNC_CONN_IDLE; + ret = pn; + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI bind: bound to port number %d\n", pn); +scif_bind_exit: + spin_unlock(&ep->lock); +scif_bind_admin_exit: + return ret; +} +EXPORT_SYMBOL_GPL(scif_bind); + +int scif_listen(scif_epd_t epd, int backlog) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]); + spin_lock(&ep->lock); + switch (ep->state) { + case SCIFEP_ZOMBIE: + case SCIFEP_CLOSING: + case SCIFEP_CLLISTEN: + case SCIFEP_UNBOUND: + case SCIFEP_DISCONNECTED: + spin_unlock(&ep->lock); + return -EINVAL; + case SCIFEP_LISTENING: + case SCIFEP_CONNECTED: + case SCIFEP_CONNECTING: + case SCIFEP_MAPPING: + spin_unlock(&ep->lock); + return -EISCONN; + case SCIFEP_BOUND: + break; + } + + ep->state = SCIFEP_LISTENING; + ep->backlog = backlog; + + ep->conreqcnt = 0; + ep->acceptcnt = 0; + INIT_LIST_HEAD(&ep->conlist); + init_waitqueue_head(&ep->conwq); + INIT_LIST_HEAD(&ep->li_accept); + spin_unlock(&ep->lock); + + /* + * Listen status is complete so delete the qp information not needed + * on a listen before placing on the list of listening ep's + */ + scif_teardown_ep(ep); + ep->qp_info.qp = NULL; + + mutex_lock(&scif_info.eplock); + list_add_tail(&ep->list, &scif_info.listen); + mutex_unlock(&scif_info.eplock); + return 0; +} +EXPORT_SYMBOL_GPL(scif_listen); + +/* + ************************************************************************ + * SCIF connection flow: + * + * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF + * connections via a SCIF_CNCT_REQ message + * 2) A SCIF endpoint can initiate a SCIF connection by calling + * scif_connect(..) which calls scif_setup_qp_connect(..) which + * allocates the local qp for the endpoint ring buffer and then sends + * a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or + * a SCIF_CNCT_REJ message + * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which + * wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ + * message otherwise + * 4) A thread blocked waiting for incoming connections allocates its local + * endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT + * and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then + * the node sends a SCIF_CNCT_REJ message + * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the + * connecting endpoint is woken up as part of handling + * scif_cnctgnt_resp(..) following which it maps the remote endpoints' + * QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on + * success or a SCIF_CNCT_GNTNACK message on failure and completes + * the scif_connect(..) API + * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked + * in step 4 is woken up and completes the scif_accept(..) API + * 7) The SCIF connection is now established between the two SCIF endpoints. + */ +static int scif_conn_func(struct scif_endpt *ep) +{ + int err = 0; + struct scifmsg msg; + struct device *spdev; + + err = scif_reserve_dma_chan(ep); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + ep->state = SCIFEP_BOUND; + goto connect_error_simple; + } + /* Initiate the first part of the endpoint QP setup */ + err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset, + SCIF_ENDPT_QP_SIZE, ep->remote_dev); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s err %d qp_offset 0x%llx\n", + __func__, err, ep->qp_info.qp_offset); + ep->state = SCIFEP_BOUND; + goto connect_error_simple; + } + + spdev = scif_get_peer_dev(ep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + goto cleanup_qp; + } + /* Format connect message and send it */ + msg.src = ep->port; + msg.dst = ep->conn_port; + msg.uop = SCIF_CNCT_REQ; + msg.payload[0] = (u64)ep; + msg.payload[1] = ep->qp_info.qp_offset; + err = _scif_nodeqp_send(ep->remote_dev, &msg); + if (err) + goto connect_error_dec; + scif_put_peer_dev(spdev); + /* + * Wait for the remote node to respond with SCIF_CNCT_GNT or + * SCIF_CNCT_REJ message. + */ + err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d timeout\n", __func__, __LINE__); + ep->state = SCIFEP_BOUND; + } + spdev = scif_get_peer_dev(ep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + goto cleanup_qp; + } + if (ep->state == SCIFEP_MAPPING) { + err = scif_setup_qp_connect_response(ep->remote_dev, + ep->qp_info.qp, + ep->qp_info.gnt_pld); + /* + * If the resource to map the queue are not available then + * we need to tell the other side to terminate the accept + */ + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + msg.uop = SCIF_CNCT_GNTNACK; + msg.payload[0] = ep->remote_ep; + _scif_nodeqp_send(ep->remote_dev, &msg); + ep->state = SCIFEP_BOUND; + goto connect_error_dec; + } + + msg.uop = SCIF_CNCT_GNTACK; + msg.payload[0] = ep->remote_ep; + err = _scif_nodeqp_send(ep->remote_dev, &msg); + if (err) { + ep->state = SCIFEP_BOUND; + goto connect_error_dec; + } + ep->state = SCIFEP_CONNECTED; + mutex_lock(&scif_info.connlock); + list_add_tail(&ep->list, &scif_info.connected); + mutex_unlock(&scif_info.connlock); + dev_dbg(&ep->remote_dev->sdev->dev, + "SCIFAPI connect: ep %p connected\n", ep); + } else if (ep->state == SCIFEP_BOUND) { + dev_dbg(&ep->remote_dev->sdev->dev, + "SCIFAPI connect: ep %p connection refused\n", ep); + err = -ECONNREFUSED; + goto connect_error_dec; + } + scif_put_peer_dev(spdev); + return err; +connect_error_dec: + scif_put_peer_dev(spdev); +cleanup_qp: + scif_cleanup_ep_qp(ep); +connect_error_simple: + return err; +} + +/* + * scif_conn_handler: + * + * Workqueue handler for servicing non-blocking SCIF connect + * + */ +void scif_conn_handler(struct work_struct *work) +{ + struct scif_endpt *ep; + + do { + ep = NULL; + spin_lock(&scif_info.nb_connect_lock); + if (!list_empty(&scif_info.nb_connect_list)) { + ep = list_first_entry(&scif_info.nb_connect_list, + struct scif_endpt, conn_list); + list_del(&ep->conn_list); + } + spin_unlock(&scif_info.nb_connect_lock); + if (ep) { + ep->conn_err = scif_conn_func(ep); + wake_up_interruptible(&ep->conn_pend_wq); + } + } while (ep); +} + +int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err = 0; + struct scif_dev *remote_dev; + struct device *spdev; + + dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep, + scif_ep_states[ep->state]); + + if (!scif_dev || dst->node > scif_info.maxid) + return -ENODEV; + + might_sleep(); + + remote_dev = &scif_dev[dst->node]; + spdev = scif_get_peer_dev(remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + return err; + } + + spin_lock(&ep->lock); + switch (ep->state) { + case SCIFEP_ZOMBIE: + case SCIFEP_CLOSING: + err = -EINVAL; + break; + case SCIFEP_DISCONNECTED: + if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) + ep->conn_async_state = ASYNC_CONN_FLUSH_WORK; + else + err = -EINVAL; + break; + case SCIFEP_LISTENING: + case SCIFEP_CLLISTEN: + err = -EOPNOTSUPP; + break; + case SCIFEP_CONNECTING: + case SCIFEP_MAPPING: + if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) + err = -EINPROGRESS; + else + err = -EISCONN; + break; + case SCIFEP_CONNECTED: + if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) + ep->conn_async_state = ASYNC_CONN_FLUSH_WORK; + else + err = -EISCONN; + break; + case SCIFEP_UNBOUND: + err = scif_get_new_port(); + if (err < 0) + break; + ep->port.port = err; + ep->port.node = scif_info.nodeid; + ep->conn_async_state = ASYNC_CONN_IDLE; + /* Fall through */ + case SCIFEP_BOUND: + /* + * If a non-blocking connect has been already initiated + * (conn_async_state is either ASYNC_CONN_INPROGRESS or + * ASYNC_CONN_FLUSH_WORK), the end point could end up in + * SCIF_BOUND due an error in the connection process + * (e.g., connection refused) If conn_async_state is + * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK + * so that the error status can be collected. If the state is + * already ASYNC_CONN_FLUSH_WORK - then set the error to + * EINPROGRESS since some other thread is waiting to collect + * error status. + */ + if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) { + ep->conn_async_state = ASYNC_CONN_FLUSH_WORK; + } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) { + err = -EINPROGRESS; + } else { + ep->conn_port = *dst; + init_waitqueue_head(&ep->sendwq); + init_waitqueue_head(&ep->recvwq); + init_waitqueue_head(&ep->conwq); + ep->conn_async_state = 0; + + if (unlikely(non_block)) + ep->conn_async_state = ASYNC_CONN_INPROGRESS; + } + break; + } + + if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) + goto connect_simple_unlock1; + + ep->state = SCIFEP_CONNECTING; + ep->remote_dev = &scif_dev[dst->node]; + ep->qp_info.qp->magic = SCIFEP_MAGIC; + if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) { + init_waitqueue_head(&ep->conn_pend_wq); + spin_lock(&scif_info.nb_connect_lock); + list_add_tail(&ep->conn_list, &scif_info.nb_connect_list); + spin_unlock(&scif_info.nb_connect_lock); + err = -EINPROGRESS; + schedule_work(&scif_info.conn_work); + } +connect_simple_unlock1: + spin_unlock(&ep->lock); + scif_put_peer_dev(spdev); + if (err) { + return err; + } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) { + flush_work(&scif_info.conn_work); + err = ep->conn_err; + spin_lock(&ep->lock); + ep->conn_async_state = ASYNC_CONN_IDLE; + spin_unlock(&ep->lock); + } else { + err = scif_conn_func(ep); + } + return err; +} + +int scif_connect(scif_epd_t epd, struct scif_port_id *dst) +{ + return __scif_connect(epd, dst, false); +} +EXPORT_SYMBOL_GPL(scif_connect); + +/** + * scif_accept() - Accept a connection request from the remote node + * + * The function accepts a connection request from the remote node. Successful + * complete is indicate by a new end point being created and passed back + * to the caller for future reference. + * + * Upon successful complete a zero will be returned and the peer information + * will be filled in. + * + * If the end point is not in the listening state -EINVAL will be returned. + * + * If during the connection sequence resource allocation fails the -ENOMEM + * will be returned. + * + * If the function is called with the ASYNC flag set and no connection requests + * are pending it will return -EAGAIN. + * + * If the remote side is not sending any connection requests the caller may + * terminate this function with a signal. If so a -EINTR will be returned. + */ +int scif_accept(scif_epd_t epd, struct scif_port_id *peer, + scif_epd_t *newepd, int flags) +{ + struct scif_endpt *lep = (struct scif_endpt *)epd; + struct scif_endpt *cep; + struct scif_conreq *conreq; + struct scifmsg msg; + int err; + struct device *spdev; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]); + + if (flags & ~SCIF_ACCEPT_SYNC) + return -EINVAL; + + if (!peer || !newepd) + return -EINVAL; + + might_sleep(); + spin_lock(&lep->lock); + if (lep->state != SCIFEP_LISTENING) { + spin_unlock(&lep->lock); + return -EINVAL; + } + + if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) { + /* No connection request present and we do not want to wait */ + spin_unlock(&lep->lock); + return -EAGAIN; + } + + lep->files = current->files; +retry_connection: + spin_unlock(&lep->lock); + /* Wait for the remote node to send us a SCIF_CNCT_REQ */ + err = wait_event_interruptible(lep->conwq, + (lep->conreqcnt || + (lep->state != SCIFEP_LISTENING))); + if (err) + return err; + + if (lep->state != SCIFEP_LISTENING) + return -EINTR; + + spin_lock(&lep->lock); + + if (!lep->conreqcnt) + goto retry_connection; + + /* Get the first connect request off the list */ + conreq = list_first_entry(&lep->conlist, struct scif_conreq, list); + list_del(&conreq->list); + lep->conreqcnt--; + spin_unlock(&lep->lock); + + /* Fill in the peer information */ + peer->node = conreq->msg.src.node; + peer->port = conreq->msg.src.port; + + cep = kzalloc(sizeof(*cep), GFP_KERNEL); + if (!cep) { + err = -ENOMEM; + goto scif_accept_error_epalloc; + } + spin_lock_init(&cep->lock); + mutex_init(&cep->sendlock); + mutex_init(&cep->recvlock); + cep->state = SCIFEP_CONNECTING; + cep->remote_dev = &scif_dev[peer->node]; + cep->remote_ep = conreq->msg.payload[0]; + + scif_rma_ep_init(cep); + + err = scif_reserve_dma_chan(cep); + if (err) { + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + goto scif_accept_error_qpalloc; + } + + cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL); + if (!cep->qp_info.qp) { + err = -ENOMEM; + goto scif_accept_error_qpalloc; + } + + err = scif_anon_inode_getfile(cep); + if (err) + goto scif_accept_error_anon_inode; + + cep->qp_info.qp->magic = SCIFEP_MAGIC; + spdev = scif_get_peer_dev(cep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + goto scif_accept_error_map; + } + err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset, + conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE, + cep->remote_dev); + if (err) { + dev_dbg(&cep->remote_dev->sdev->dev, + "SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n", + lep, cep, err, cep->qp_info.qp_offset); + scif_put_peer_dev(spdev); + goto scif_accept_error_map; + } + + cep->port.node = lep->port.node; + cep->port.port = lep->port.port; + cep->peer.node = peer->node; + cep->peer.port = peer->port; + init_waitqueue_head(&cep->sendwq); + init_waitqueue_head(&cep->recvwq); + init_waitqueue_head(&cep->conwq); + + msg.uop = SCIF_CNCT_GNT; + msg.src = cep->port; + msg.payload[0] = cep->remote_ep; + msg.payload[1] = cep->qp_info.qp_offset; + msg.payload[2] = (u64)cep; + + err = _scif_nodeqp_send(cep->remote_dev, &msg); + scif_put_peer_dev(spdev); + if (err) + goto scif_accept_error_map; +retry: + /* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */ + err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING, + SCIF_NODE_ACCEPT_TIMEOUT); + if (!err && scifdev_alive(cep)) + goto retry; + err = !err ? -ENODEV : 0; + if (err) + goto scif_accept_error_map; + kfree(conreq); + + spin_lock(&cep->lock); + + if (cep->state == SCIFEP_CLOSING) { + /* + * Remote failed to allocate resources and NAKed the grant. + * There is at this point nothing referencing the new end point. + */ + spin_unlock(&cep->lock); + scif_teardown_ep(cep); + kfree(cep); + + /* If call with sync flag then go back and wait. */ + if (flags & SCIF_ACCEPT_SYNC) { + spin_lock(&lep->lock); + goto retry_connection; + } + return -EAGAIN; + } + + scif_get_port(cep->port.port); + *newepd = (scif_epd_t)cep; + spin_unlock(&cep->lock); + return 0; +scif_accept_error_map: + scif_anon_inode_fput(cep); +scif_accept_error_anon_inode: + scif_teardown_ep(cep); +scif_accept_error_qpalloc: + kfree(cep); +scif_accept_error_epalloc: + msg.uop = SCIF_CNCT_REJ; + msg.dst.node = conreq->msg.src.node; + msg.dst.port = conreq->msg.src.port; + msg.payload[0] = conreq->msg.payload[0]; + msg.payload[1] = conreq->msg.payload[1]; + scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg); + kfree(conreq); + return err; +} +EXPORT_SYMBOL_GPL(scif_accept); + +/* + * scif_msg_param_check: + * @epd: The end point returned from scif_open() + * @len: Length to receive + * @flags: blocking or non blocking + * + * Validate parameters for messaging APIs scif_send(..)/scif_recv(..). + */ +static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags) +{ + int ret = -EINVAL; + + if (len < 0) + goto err_ret; + if (flags && (!(flags & SCIF_RECV_BLOCK))) + goto err_ret; + ret = 0; +err_ret: + return ret; +} + +static int _scif_send(scif_epd_t epd, void *msg, int len, int flags) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scifmsg notif_msg; + int curr_xfer_len = 0, sent_len = 0, write_count; + int ret = 0; + struct scif_qp *qp = ep->qp_info.qp; + + if (flags & SCIF_SEND_BLOCK) + might_sleep(); + + spin_lock(&ep->lock); + while (sent_len != len && SCIFEP_CONNECTED == ep->state) { + write_count = scif_rb_space(&qp->outbound_q); + if (write_count) { + /* Best effort to send as much data as possible */ + curr_xfer_len = min(len - sent_len, write_count); + ret = scif_rb_write(&qp->outbound_q, msg, + curr_xfer_len); + if (ret < 0) + break; + /* Success. Update write pointer */ + scif_rb_commit(&qp->outbound_q); + /* + * Send a notification to the peer about the + * produced data message. + */ + notif_msg.src = ep->port; + notif_msg.uop = SCIF_CLIENT_SENT; + notif_msg.payload[0] = ep->remote_ep; + ret = _scif_nodeqp_send(ep->remote_dev, ¬if_msg); + if (ret) + break; + sent_len += curr_xfer_len; + msg = msg + curr_xfer_len; + continue; + } + curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1); + /* Not enough RB space. return for the Non Blocking case */ + if (!(flags & SCIF_SEND_BLOCK)) + break; + + spin_unlock(&ep->lock); + /* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */ + ret = + wait_event_interruptible(ep->sendwq, + (SCIFEP_CONNECTED != ep->state) || + (scif_rb_space(&qp->outbound_q) >= + curr_xfer_len)); + spin_lock(&ep->lock); + if (ret) + break; + } + if (sent_len) + ret = sent_len; + else if (!ret && SCIFEP_CONNECTED != ep->state) + ret = SCIFEP_DISCONNECTED == ep->state ? + -ECONNRESET : -ENOTCONN; + spin_unlock(&ep->lock); + return ret; +} + +static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags) +{ + int read_size; + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scifmsg notif_msg; + int curr_recv_len = 0, remaining_len = len, read_count; + int ret = 0; + struct scif_qp *qp = ep->qp_info.qp; + + if (flags & SCIF_RECV_BLOCK) + might_sleep(); + spin_lock(&ep->lock); + while (remaining_len && (SCIFEP_CONNECTED == ep->state || + SCIFEP_DISCONNECTED == ep->state)) { + read_count = scif_rb_count(&qp->inbound_q, remaining_len); + if (read_count) { + /* + * Best effort to recv as much data as there + * are bytes to read in the RB particularly + * important for the Non Blocking case. + */ + curr_recv_len = min(remaining_len, read_count); + read_size = scif_rb_get_next(&qp->inbound_q, + msg, curr_recv_len); + if (ep->state == SCIFEP_CONNECTED) { + /* + * Update the read pointer only if the endpoint + * is still connected else the read pointer + * might no longer exist since the peer has + * freed resources! + */ + scif_rb_update_read_ptr(&qp->inbound_q); + /* + * Send a notification to the peer about the + * consumed data message only if the EP is in + * SCIFEP_CONNECTED state. + */ + notif_msg.src = ep->port; + notif_msg.uop = SCIF_CLIENT_RCVD; + notif_msg.payload[0] = ep->remote_ep; + ret = _scif_nodeqp_send(ep->remote_dev, + ¬if_msg); + if (ret) + break; + } + remaining_len -= curr_recv_len; + msg = msg + curr_recv_len; + continue; + } + /* + * Bail out now if the EP is in SCIFEP_DISCONNECTED state else + * we will keep looping forever. + */ + if (ep->state == SCIFEP_DISCONNECTED) + break; + /* + * Return in the Non Blocking case if there is no data + * to read in this iteration. + */ + if (!(flags & SCIF_RECV_BLOCK)) + break; + curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1); + spin_unlock(&ep->lock); + /* + * Wait for a SCIF_CLIENT_SEND message in the blocking case + * or until other side disconnects. + */ + ret = + wait_event_interruptible(ep->recvwq, + SCIFEP_CONNECTED != ep->state || + scif_rb_count(&qp->inbound_q, + curr_recv_len) + >= curr_recv_len); + spin_lock(&ep->lock); + if (ret) + break; + } + if (len - remaining_len) + ret = len - remaining_len; + else if (!ret && ep->state != SCIFEP_CONNECTED) + ret = ep->state == SCIFEP_DISCONNECTED ? + -ECONNRESET : -ENOTCONN; + spin_unlock(&ep->lock); + return ret; +} + +/** + * scif_user_send() - Send data to connection queue + * @epd: The end point returned from scif_open() + * @msg: Address to place data + * @len: Length to receive + * @flags: blocking or non blocking + * + * This function is called from the driver IOCTL entry point + * only and is a wrapper for _scif_send(). + */ +int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err = 0; + int sent_len = 0; + char *tmp; + int loop_len; + int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1))); + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]); + if (!len) + return 0; + + err = scif_msg_param_check(epd, len, flags); + if (err) + goto send_err; + + tmp = kmalloc(chunk_len, GFP_KERNEL); + if (!tmp) { + err = -ENOMEM; + goto send_err; + } + /* + * Grabbing the lock before breaking up the transfer in + * multiple chunks is required to ensure that messages do + * not get fragmented and reordered. + */ + mutex_lock(&ep->sendlock); + while (sent_len != len) { + loop_len = len - sent_len; + loop_len = min(chunk_len, loop_len); + if (copy_from_user(tmp, msg, loop_len)) { + err = -EFAULT; + goto send_free_err; + } + err = _scif_send(epd, tmp, loop_len, flags); + if (err < 0) + goto send_free_err; + sent_len += err; + msg += err; + if (err != loop_len) + goto send_free_err; + } +send_free_err: + mutex_unlock(&ep->sendlock); + kfree(tmp); +send_err: + return err < 0 ? err : sent_len; +} + +/** + * scif_user_recv() - Receive data from connection queue + * @epd: The end point returned from scif_open() + * @msg: Address to place data + * @len: Length to receive + * @flags: blocking or non blocking + * + * This function is called from the driver IOCTL entry point + * only and is a wrapper for _scif_recv(). + */ +int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err = 0; + int recv_len = 0; + char *tmp; + int loop_len; + int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1))); + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]); + if (!len) + return 0; + + err = scif_msg_param_check(epd, len, flags); + if (err) + goto recv_err; + + tmp = kmalloc(chunk_len, GFP_KERNEL); + if (!tmp) { + err = -ENOMEM; + goto recv_err; + } + /* + * Grabbing the lock before breaking up the transfer in + * multiple chunks is required to ensure that messages do + * not get fragmented and reordered. + */ + mutex_lock(&ep->recvlock); + while (recv_len != len) { + loop_len = len - recv_len; + loop_len = min(chunk_len, loop_len); + err = _scif_recv(epd, tmp, loop_len, flags); + if (err < 0) + goto recv_free_err; + if (copy_to_user(msg, tmp, err)) { + err = -EFAULT; + goto recv_free_err; + } + recv_len += err; + msg += err; + if (err != loop_len) + goto recv_free_err; + } +recv_free_err: + mutex_unlock(&ep->recvlock); + kfree(tmp); +recv_err: + return err < 0 ? err : recv_len; +} + +/** + * scif_send() - Send data to connection queue + * @epd: The end point returned from scif_open() + * @msg: Address to place data + * @len: Length to receive + * @flags: blocking or non blocking + * + * This function is called from the kernel mode only and is + * a wrapper for _scif_send(). + */ +int scif_send(scif_epd_t epd, void *msg, int len, int flags) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int ret; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]); + if (!len) + return 0; + + ret = scif_msg_param_check(epd, len, flags); + if (ret) + return ret; + if (!ep->remote_dev) + return -ENOTCONN; + /* + * Grab the mutex lock in the blocking case only + * to ensure messages do not get fragmented/reordered. + * The non blocking mode is protected using spin locks + * in _scif_send(). + */ + if (flags & SCIF_SEND_BLOCK) + mutex_lock(&ep->sendlock); + + ret = _scif_send(epd, msg, len, flags); + + if (flags & SCIF_SEND_BLOCK) + mutex_unlock(&ep->sendlock); + return ret; +} +EXPORT_SYMBOL_GPL(scif_send); + +/** + * scif_recv() - Receive data from connection queue + * @epd: The end point returned from scif_open() + * @msg: Address to place data + * @len: Length to receive + * @flags: blocking or non blocking + * + * This function is called from the kernel mode only and is + * a wrapper for _scif_recv(). + */ +int scif_recv(scif_epd_t epd, void *msg, int len, int flags) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int ret; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]); + if (!len) + return 0; + + ret = scif_msg_param_check(epd, len, flags); + if (ret) + return ret; + /* + * Grab the mutex lock in the blocking case only + * to ensure messages do not get fragmented/reordered. + * The non blocking mode is protected using spin locks + * in _scif_send(). + */ + if (flags & SCIF_RECV_BLOCK) + mutex_lock(&ep->recvlock); + + ret = _scif_recv(epd, msg, len, flags); + + if (flags & SCIF_RECV_BLOCK) + mutex_unlock(&ep->recvlock); + + return ret; +} +EXPORT_SYMBOL_GPL(scif_recv); + +static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq, + poll_table *p, struct scif_endpt *ep) +{ + /* + * Because poll_wait makes a GFP_KERNEL allocation, give up the lock + * and regrab it afterwards. Because the endpoint state might have + * changed while the lock was given up, the state must be checked + * again after re-acquiring the lock. The code in __scif_pollfd(..) + * does this. + */ + spin_unlock(&ep->lock); + poll_wait(f, wq, p); + spin_lock(&ep->lock); +} + +__poll_t +__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep) +{ + __poll_t mask = 0; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]); + + spin_lock(&ep->lock); + + /* Endpoint is waiting for a non-blocking connect to complete */ + if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) { + _scif_poll_wait(f, &ep->conn_pend_wq, wait, ep); + if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) { + if (ep->state == SCIFEP_CONNECTED || + ep->state == SCIFEP_DISCONNECTED || + ep->conn_err) + mask |= EPOLLOUT; + goto exit; + } + } + + /* Endpoint is listening for incoming connection requests */ + if (ep->state == SCIFEP_LISTENING) { + _scif_poll_wait(f, &ep->conwq, wait, ep); + if (ep->state == SCIFEP_LISTENING) { + if (ep->conreqcnt) + mask |= EPOLLIN; + goto exit; + } + } + + /* Endpoint is connected or disconnected */ + if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) { + if (poll_requested_events(wait) & EPOLLIN) + _scif_poll_wait(f, &ep->recvwq, wait, ep); + if (poll_requested_events(wait) & EPOLLOUT) + _scif_poll_wait(f, &ep->sendwq, wait, ep); + if (ep->state == SCIFEP_CONNECTED || + ep->state == SCIFEP_DISCONNECTED) { + /* Data can be read without blocking */ + if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1)) + mask |= EPOLLIN; + /* Data can be written without blocking */ + if (scif_rb_space(&ep->qp_info.qp->outbound_q)) + mask |= EPOLLOUT; + /* Return EPOLLHUP if endpoint is disconnected */ + if (ep->state == SCIFEP_DISCONNECTED) + mask |= EPOLLHUP; + goto exit; + } + } + + /* Return EPOLLERR if the endpoint is in none of the above states */ + mask |= EPOLLERR; +exit: + spin_unlock(&ep->lock); + return mask; +} + +/** + * scif_poll() - Kernel mode SCIF poll + * @ufds: Array of scif_pollepd structures containing the end points + * and events to poll on + * @nfds: Size of the ufds array + * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout + * + * The code flow in this function is based on do_poll(..) in select.c + * + * Returns the number of endpoints which have pending events or 0 in + * the event of a timeout. If a signal is used for wake up, -EINTR is + * returned. + */ +int +scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs) +{ + struct poll_wqueues table; + poll_table *pt; + int i, count = 0, timed_out = timeout_msecs == 0; + __poll_t mask; + u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT + : msecs_to_jiffies(timeout_msecs); + + poll_initwait(&table); + pt = &table.pt; + while (1) { + for (i = 0; i < nfds; i++) { + pt->_key = ufds[i].events | EPOLLERR | EPOLLHUP; + mask = __scif_pollfd(ufds[i].epd->anon, + pt, ufds[i].epd); + mask &= ufds[i].events | EPOLLERR | EPOLLHUP; + if (mask) { + count++; + pt->_qproc = NULL; + } + ufds[i].revents = mask; + } + pt->_qproc = NULL; + if (!count) { + count = table.error; + if (signal_pending(current)) + count = -EINTR; + } + if (count || timed_out) + break; + + if (!schedule_timeout_interruptible(timeout)) + timed_out = 1; + } + poll_freewait(&table); + return count; +} +EXPORT_SYMBOL_GPL(scif_poll); + +int scif_get_node_ids(u16 *nodes, int len, u16 *self) +{ + int online = 0; + int offset = 0; + int node; + + if (!scif_is_mgmt_node()) + scif_get_node_info(); + + *self = scif_info.nodeid; + mutex_lock(&scif_info.conflock); + len = min_t(int, len, scif_info.total); + for (node = 0; node <= scif_info.maxid; node++) { + if (_scifdev_alive(&scif_dev[node])) { + online++; + if (offset < len) + nodes[offset++] = node; + } + } + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI get_node_ids total %d online %d filled in %d nodes\n", + scif_info.total, online, offset); + mutex_unlock(&scif_info.conflock); + + return online; +} +EXPORT_SYMBOL_GPL(scif_get_node_ids); + +static int scif_add_client_dev(struct device *dev, struct subsys_interface *si) +{ + struct scif_client *client = + container_of(si, struct scif_client, si); + struct scif_peer_dev *spdev = + container_of(dev, struct scif_peer_dev, dev); + + if (client->probe) + client->probe(spdev); + return 0; +} + +static void scif_remove_client_dev(struct device *dev, + struct subsys_interface *si) +{ + struct scif_client *client = + container_of(si, struct scif_client, si); + struct scif_peer_dev *spdev = + container_of(dev, struct scif_peer_dev, dev); + + if (client->remove) + client->remove(spdev); +} + +void scif_client_unregister(struct scif_client *client) +{ + subsys_interface_unregister(&client->si); +} +EXPORT_SYMBOL_GPL(scif_client_unregister); + +int scif_client_register(struct scif_client *client) +{ + struct subsys_interface *si = &client->si; + + si->name = client->name; + si->subsys = &scif_peer_bus; + si->add_dev = scif_add_client_dev; + si->remove_dev = scif_remove_client_dev; + + return subsys_interface_register(&client->si); +} +EXPORT_SYMBOL_GPL(scif_client_register); diff --git a/drivers/misc/mic/scif/scif_debugfs.c b/drivers/misc/mic/scif/scif_debugfs.c new file mode 100644 index 000000000..6884dad97 --- /dev/null +++ b/drivers/misc/mic/scif/scif_debugfs.c @@ -0,0 +1,162 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include +#include + +#include "../common/mic_dev.h" +#include "scif_main.h" + +/* Debugfs parent dir */ +static struct dentry *scif_dbg; + +static int scif_dev_test(struct seq_file *s, void *unused) +{ + int node; + + seq_printf(s, "Total Nodes %d Self Node Id %d Maxid %d\n", + scif_info.total, scif_info.nodeid, + scif_info.maxid); + + if (!scif_dev) + return 0; + + seq_printf(s, "%-16s\t%-16s\n", "node_id", "state"); + + for (node = 0; node <= scif_info.maxid; node++) + seq_printf(s, "%-16d\t%-16s\n", scif_dev[node].node, + _scifdev_alive(&scif_dev[node]) ? + "Running" : "Offline"); + return 0; +} + +static int scif_dev_test_open(struct inode *inode, struct file *file) +{ + return single_open(file, scif_dev_test, inode->i_private); +} + +static int scif_dev_test_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations scif_dev_ops = { + .owner = THIS_MODULE, + .open = scif_dev_test_open, + .read = seq_read, + .llseek = seq_lseek, + .release = scif_dev_test_release +}; + +static void scif_display_window(struct scif_window *window, struct seq_file *s) +{ + int j; + struct scatterlist *sg; + scif_pinned_pages_t pin = window->pinned_pages; + + seq_printf(s, "window %p type %d temp %d offset 0x%llx ", + window, window->type, window->temp, window->offset); + seq_printf(s, "nr_pages 0x%llx nr_contig_chunks 0x%x prot %d ", + window->nr_pages, window->nr_contig_chunks, window->prot); + seq_printf(s, "ref_count %d magic 0x%llx peer_window 0x%llx ", + window->ref_count, window->magic, window->peer_window); + seq_printf(s, "unreg_state 0x%x va_for_temp 0x%lx\n", + window->unreg_state, window->va_for_temp); + + for (j = 0; j < window->nr_contig_chunks; j++) + seq_printf(s, "page[%d] dma_addr 0x%llx num_pages 0x%llx\n", j, + window->dma_addr[j], window->num_pages[j]); + + if (window->type == SCIF_WINDOW_SELF && pin) + for (j = 0; j < window->nr_pages; j++) + seq_printf(s, "page[%d] = pinned_pages %p address %p\n", + j, pin->pages[j], + page_address(pin->pages[j])); + + if (window->st) + for_each_sg(window->st->sgl, sg, window->st->nents, j) + seq_printf(s, "sg[%d] dma addr 0x%llx length 0x%x\n", + j, sg_dma_address(sg), sg_dma_len(sg)); +} + +static void scif_display_all_windows(struct list_head *head, struct seq_file *s) +{ + struct list_head *item; + struct scif_window *window; + + list_for_each(item, head) { + window = list_entry(item, struct scif_window, list); + scif_display_window(window, s); + } +} + +static int scif_rma_test(struct seq_file *s, void *unused) +{ + struct scif_endpt *ep; + struct list_head *pos; + + mutex_lock(&scif_info.connlock); + list_for_each(pos, &scif_info.connected) { + ep = list_entry(pos, struct scif_endpt, list); + seq_printf(s, "ep %p self windows\n", ep); + mutex_lock(&ep->rma_info.rma_lock); + scif_display_all_windows(&ep->rma_info.reg_list, s); + seq_printf(s, "ep %p remote windows\n", ep); + scif_display_all_windows(&ep->rma_info.remote_reg_list, s); + mutex_unlock(&ep->rma_info.rma_lock); + } + mutex_unlock(&scif_info.connlock); + return 0; +} + +static int scif_rma_test_open(struct inode *inode, struct file *file) +{ + return single_open(file, scif_rma_test, inode->i_private); +} + +static int scif_rma_test_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations scif_rma_ops = { + .owner = THIS_MODULE, + .open = scif_rma_test_open, + .read = seq_read, + .llseek = seq_lseek, + .release = scif_rma_test_release +}; + +void __init scif_init_debugfs(void) +{ + scif_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL); + if (!scif_dbg) { + dev_err(scif_info.mdev.this_device, + "can't create debugfs dir scif\n"); + return; + } + + debugfs_create_file("scif_dev", 0444, scif_dbg, NULL, &scif_dev_ops); + debugfs_create_file("scif_rma", 0444, scif_dbg, NULL, &scif_rma_ops); + debugfs_create_u8("en_msg_log", 0666, scif_dbg, &scif_info.en_msg_log); + debugfs_create_u8("p2p_enable", 0666, scif_dbg, &scif_info.p2p_enable); +} + +void scif_exit_debugfs(void) +{ + debugfs_remove_recursive(scif_dbg); +} diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c new file mode 100644 index 000000000..6369aeaa7 --- /dev/null +++ b/drivers/misc/mic/scif/scif_dma.c @@ -0,0 +1,1960 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "scif_main.h" +#include "scif_map.h" + +/* + * struct scif_dma_comp_cb - SCIF DMA completion callback + * + * @dma_completion_func: DMA completion callback + * @cb_cookie: DMA completion callback cookie + * @temp_buf: Temporary buffer + * @temp_buf_to_free: Temporary buffer to be freed + * @is_cache: Is a kmem_cache allocated buffer + * @dst_offset: Destination registration offset + * @dst_window: Destination registration window + * @len: Length of the temp buffer + * @temp_phys: DMA address of the temp buffer + * @sdev: The SCIF device + * @header_padding: padding for cache line alignment + */ +struct scif_dma_comp_cb { + void (*dma_completion_func)(void *cookie); + void *cb_cookie; + u8 *temp_buf; + u8 *temp_buf_to_free; + bool is_cache; + s64 dst_offset; + struct scif_window *dst_window; + size_t len; + dma_addr_t temp_phys; + struct scif_dev *sdev; + int header_padding; +}; + +/** + * struct scif_copy_work - Work for DMA copy + * + * @src_offset: Starting source offset + * @dst_offset: Starting destination offset + * @src_window: Starting src registered window + * @dst_window: Starting dst registered window + * @loopback: true if this is a loopback DMA transfer + * @len: Length of the transfer + * @comp_cb: DMA copy completion callback + * @remote_dev: The remote SCIF peer device + * @fence_type: polling or interrupt based + * @ordered: is this a tail byte ordered DMA transfer + */ +struct scif_copy_work { + s64 src_offset; + s64 dst_offset; + struct scif_window *src_window; + struct scif_window *dst_window; + int loopback; + size_t len; + struct scif_dma_comp_cb *comp_cb; + struct scif_dev *remote_dev; + int fence_type; + bool ordered; +}; + +/** + * scif_reserve_dma_chan: + * @ep: Endpoint Descriptor. + * + * This routine reserves a DMA channel for a particular + * endpoint. All DMA transfers for an endpoint are always + * programmed on the same DMA channel. + */ +int scif_reserve_dma_chan(struct scif_endpt *ep) +{ + int err = 0; + struct scif_dev *scifdev; + struct scif_hw_dev *sdev; + struct dma_chan *chan; + + /* Loopback DMAs are not supported on the management node */ + if (!scif_info.nodeid && scifdev_self(ep->remote_dev)) + return 0; + if (scif_info.nodeid) + scifdev = &scif_dev[0]; + else + scifdev = ep->remote_dev; + sdev = scifdev->sdev; + if (!sdev->num_dma_ch) + return -ENODEV; + chan = sdev->dma_ch[scifdev->dma_ch_idx]; + scifdev->dma_ch_idx = (scifdev->dma_ch_idx + 1) % sdev->num_dma_ch; + mutex_lock(&ep->rma_info.rma_lock); + ep->rma_info.dma_chan = chan; + mutex_unlock(&ep->rma_info.rma_lock); + return err; +} + +#ifdef CONFIG_MMU_NOTIFIER +/** + * scif_rma_destroy_tcw: + * + * This routine destroys temporary cached windows + */ +static +void __scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, + u64 start, u64 len) +{ + struct list_head *item, *tmp; + struct scif_window *window; + u64 start_va, end_va; + u64 end = start + len; + + if (end <= start) + return; + + list_for_each_safe(item, tmp, &mmn->tc_reg_list) { + window = list_entry(item, struct scif_window, list); + if (!len) + break; + start_va = window->va_for_temp; + end_va = start_va + (window->nr_pages << PAGE_SHIFT); + if (start < start_va && end <= start_va) + break; + if (start >= end_va) + continue; + __scif_rma_destroy_tcw_helper(window); + } +} + +static void scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, u64 start, u64 len) +{ + struct scif_endpt *ep = mmn->ep; + + spin_lock(&ep->rma_info.tc_lock); + __scif_rma_destroy_tcw(mmn, start, len); + spin_unlock(&ep->rma_info.tc_lock); +} + +static void scif_rma_destroy_tcw_ep(struct scif_endpt *ep) +{ + struct list_head *item, *tmp; + struct scif_mmu_notif *mmn; + + list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) { + mmn = list_entry(item, struct scif_mmu_notif, list); + scif_rma_destroy_tcw(mmn, 0, ULONG_MAX); + } +} + +static void __scif_rma_destroy_tcw_ep(struct scif_endpt *ep) +{ + struct list_head *item, *tmp; + struct scif_mmu_notif *mmn; + + spin_lock(&ep->rma_info.tc_lock); + list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) { + mmn = list_entry(item, struct scif_mmu_notif, list); + __scif_rma_destroy_tcw(mmn, 0, ULONG_MAX); + } + spin_unlock(&ep->rma_info.tc_lock); +} + +static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes) +{ + if ((cur_bytes >> PAGE_SHIFT) > scif_info.rma_tc_limit) + return false; + if ((atomic_read(&ep->rma_info.tcw_total_pages) + + (cur_bytes >> PAGE_SHIFT)) > + scif_info.rma_tc_limit) { + dev_info(scif_info.mdev.this_device, + "%s %d total=%d, current=%zu reached max\n", + __func__, __LINE__, + atomic_read(&ep->rma_info.tcw_total_pages), + (1 + (cur_bytes >> PAGE_SHIFT))); + scif_rma_destroy_tcw_invalid(); + __scif_rma_destroy_tcw_ep(ep); + } + return true; +} + +static void scif_mmu_notifier_release(struct mmu_notifier *mn, + struct mm_struct *mm) +{ + struct scif_mmu_notif *mmn; + + mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier); + scif_rma_destroy_tcw(mmn, 0, ULONG_MAX); + schedule_work(&scif_info.misc_work); +} + +static int scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end, + bool blockable) +{ + struct scif_mmu_notif *mmn; + + mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier); + scif_rma_destroy_tcw(mmn, start, end - start); + + return 0; +} + +static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + /* + * Nothing to do here, everything needed was done in + * invalidate_range_start. + */ +} + +static const struct mmu_notifier_ops scif_mmu_notifier_ops = { + .release = scif_mmu_notifier_release, + .clear_flush_young = NULL, + .invalidate_range_start = scif_mmu_notifier_invalidate_range_start, + .invalidate_range_end = scif_mmu_notifier_invalidate_range_end}; + +static void scif_ep_unregister_mmu_notifier(struct scif_endpt *ep) +{ + struct scif_endpt_rma_info *rma = &ep->rma_info; + struct scif_mmu_notif *mmn = NULL; + struct list_head *item, *tmp; + + mutex_lock(&ep->rma_info.mmn_lock); + list_for_each_safe(item, tmp, &rma->mmn_list) { + mmn = list_entry(item, struct scif_mmu_notif, list); + mmu_notifier_unregister(&mmn->ep_mmu_notifier, mmn->mm); + list_del(item); + kfree(mmn); + } + mutex_unlock(&ep->rma_info.mmn_lock); +} + +static void scif_init_mmu_notifier(struct scif_mmu_notif *mmn, + struct mm_struct *mm, struct scif_endpt *ep) +{ + mmn->ep = ep; + mmn->mm = mm; + mmn->ep_mmu_notifier.ops = &scif_mmu_notifier_ops; + INIT_LIST_HEAD(&mmn->list); + INIT_LIST_HEAD(&mmn->tc_reg_list); +} + +static struct scif_mmu_notif * +scif_find_mmu_notifier(struct mm_struct *mm, struct scif_endpt_rma_info *rma) +{ + struct scif_mmu_notif *mmn; + + list_for_each_entry(mmn, &rma->mmn_list, list) + if (mmn->mm == mm) + return mmn; + return NULL; +} + +static struct scif_mmu_notif * +scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep) +{ + struct scif_mmu_notif *mmn + = kzalloc(sizeof(*mmn), GFP_KERNEL); + + if (!mmn) + return ERR_PTR(-ENOMEM); + + scif_init_mmu_notifier(mmn, current->mm, ep); + if (mmu_notifier_register(&mmn->ep_mmu_notifier, current->mm)) { + kfree(mmn); + return ERR_PTR(-EBUSY); + } + list_add(&mmn->list, &ep->rma_info.mmn_list); + return mmn; +} + +/* + * Called from the misc thread to destroy temporary cached windows and + * unregister the MMU notifier for the SCIF endpoint. + */ +void scif_mmu_notif_handler(struct work_struct *work) +{ + struct list_head *pos, *tmpq; + struct scif_endpt *ep; +restart: + scif_rma_destroy_tcw_invalid(); + spin_lock(&scif_info.rmalock); + list_for_each_safe(pos, tmpq, &scif_info.mmu_notif_cleanup) { + ep = list_entry(pos, struct scif_endpt, mmu_list); + list_del(&ep->mmu_list); + spin_unlock(&scif_info.rmalock); + scif_rma_destroy_tcw_ep(ep); + scif_ep_unregister_mmu_notifier(ep); + goto restart; + } + spin_unlock(&scif_info.rmalock); +} + +static bool scif_is_set_reg_cache(int flags) +{ + return !!(flags & SCIF_RMA_USECACHE); +} +#else +static struct scif_mmu_notif * +scif_find_mmu_notifier(struct mm_struct *mm, + struct scif_endpt_rma_info *rma) +{ + return NULL; +} + +static struct scif_mmu_notif * +scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep) +{ + return NULL; +} + +void scif_mmu_notif_handler(struct work_struct *work) +{ +} + +static bool scif_is_set_reg_cache(int flags) +{ + return false; +} + +static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes) +{ + return false; +} +#endif + +/** + * scif_register_temp: + * @epd: End Point Descriptor. + * @addr: virtual address to/from which to copy + * @len: length of range to copy + * @out_offset: computed offset returned by reference. + * @out_window: allocated registered window returned by reference. + * + * Create a temporary registered window. The peer will not know about this + * window. This API is used for scif_vreadfrom()/scif_vwriteto() API's. + */ +static int +scif_register_temp(scif_epd_t epd, unsigned long addr, size_t len, int prot, + off_t *out_offset, struct scif_window **out_window) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err; + scif_pinned_pages_t pinned_pages; + size_t aligned_len; + + aligned_len = ALIGN(len, PAGE_SIZE); + + err = __scif_pin_pages((void *)(addr & PAGE_MASK), + aligned_len, &prot, 0, &pinned_pages); + if (err) + return err; + + pinned_pages->prot = prot; + + /* Compute the offset for this registration */ + err = scif_get_window_offset(ep, 0, 0, + aligned_len >> PAGE_SHIFT, + (s64 *)out_offset); + if (err) + goto error_unpin; + + /* Allocate and prepare self registration window */ + *out_window = scif_create_window(ep, aligned_len >> PAGE_SHIFT, + *out_offset, true); + if (!*out_window) { + scif_free_window_offset(ep, NULL, *out_offset); + err = -ENOMEM; + goto error_unpin; + } + + (*out_window)->pinned_pages = pinned_pages; + (*out_window)->nr_pages = pinned_pages->nr_pages; + (*out_window)->prot = pinned_pages->prot; + + (*out_window)->va_for_temp = addr & PAGE_MASK; + err = scif_map_window(ep->remote_dev, *out_window); + if (err) { + /* Something went wrong! Rollback */ + scif_destroy_window(ep, *out_window); + *out_window = NULL; + } else { + *out_offset |= (addr - (*out_window)->va_for_temp); + } + return err; +error_unpin: + if (err) + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + scif_unpin_pages(pinned_pages); + return err; +} + +#define SCIF_DMA_TO (3 * HZ) + +/* + * scif_sync_dma - Program a DMA without an interrupt descriptor + * + * @dev - The address of the pointer to the device instance used + * for DMA registration. + * @chan - DMA channel to be used. + * @sync_wait: Wait for DMA to complete? + * + * Return 0 on success and -errno on error. + */ +static int scif_sync_dma(struct scif_hw_dev *sdev, struct dma_chan *chan, + bool sync_wait) +{ + int err = 0; + struct dma_async_tx_descriptor *tx = NULL; + enum dma_ctrl_flags flags = DMA_PREP_FENCE; + dma_cookie_t cookie; + struct dma_device *ddev; + + if (!chan) { + err = -EIO; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + ddev = chan->device; + + tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags); + if (!tx) { + err = -ENOMEM; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } + cookie = tx->tx_submit(tx); + + if (dma_submit_error(cookie)) { + err = -ENOMEM; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } + if (!sync_wait) { + dma_async_issue_pending(chan); + } else { + if (dma_sync_wait(chan, cookie) == DMA_COMPLETE) { + err = 0; + } else { + err = -EIO; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + } + } +release: + return err; +} + +static void scif_dma_callback(void *arg) +{ + struct completion *done = (struct completion *)arg; + + complete(done); +} + +#define SCIF_DMA_SYNC_WAIT true +#define SCIF_DMA_POLL BIT(0) +#define SCIF_DMA_INTR BIT(1) + +/* + * scif_async_dma - Program a DMA with an interrupt descriptor + * + * @dev - The address of the pointer to the device instance used + * for DMA registration. + * @chan - DMA channel to be used. + * Return 0 on success and -errno on error. + */ +static int scif_async_dma(struct scif_hw_dev *sdev, struct dma_chan *chan) +{ + int err = 0; + struct dma_device *ddev; + struct dma_async_tx_descriptor *tx = NULL; + enum dma_ctrl_flags flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE; + DECLARE_COMPLETION_ONSTACK(done_wait); + dma_cookie_t cookie; + enum dma_status status; + + if (!chan) { + err = -EIO; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + ddev = chan->device; + + tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags); + if (!tx) { + err = -ENOMEM; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } + reinit_completion(&done_wait); + tx->callback = scif_dma_callback; + tx->callback_param = &done_wait; + cookie = tx->tx_submit(tx); + + if (dma_submit_error(cookie)) { + err = -ENOMEM; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } + dma_async_issue_pending(chan); + + err = wait_for_completion_timeout(&done_wait, SCIF_DMA_TO); + if (!err) { + err = -EIO; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } + err = 0; + status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); + if (status != DMA_COMPLETE) { + err = -EIO; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } +release: + return err; +} + +/* + * scif_drain_dma_poll - Drain all outstanding DMA operations for a particular + * DMA channel via polling. + * + * @sdev - The SCIF device + * @chan - DMA channel + * Return 0 on success and -errno on error. + */ +static int scif_drain_dma_poll(struct scif_hw_dev *sdev, struct dma_chan *chan) +{ + if (!chan) + return -EINVAL; + return scif_sync_dma(sdev, chan, SCIF_DMA_SYNC_WAIT); +} + +/* + * scif_drain_dma_intr - Drain all outstanding DMA operations for a particular + * DMA channel via interrupt based blocking wait. + * + * @sdev - The SCIF device + * @chan - DMA channel + * Return 0 on success and -errno on error. + */ +int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan) +{ + if (!chan) + return -EINVAL; + return scif_async_dma(sdev, chan); +} + +/** + * scif_rma_destroy_windows: + * + * This routine destroys all windows queued for cleanup + */ +void scif_rma_destroy_windows(void) +{ + struct list_head *item, *tmp; + struct scif_window *window; + struct scif_endpt *ep; + struct dma_chan *chan; + + might_sleep(); +restart: + spin_lock(&scif_info.rmalock); + list_for_each_safe(item, tmp, &scif_info.rma) { + window = list_entry(item, struct scif_window, + list); + ep = (struct scif_endpt *)window->ep; + chan = ep->rma_info.dma_chan; + + list_del_init(&window->list); + spin_unlock(&scif_info.rmalock); + if (!chan || !scifdev_alive(ep) || + !scif_drain_dma_intr(ep->remote_dev->sdev, + ep->rma_info.dma_chan)) + /* Remove window from global list */ + window->unreg_state = OP_COMPLETED; + else + dev_warn(&ep->remote_dev->sdev->dev, + "DMA engine hung?\n"); + if (window->unreg_state == OP_COMPLETED) { + if (window->type == SCIF_WINDOW_SELF) + scif_destroy_window(ep, window); + else + scif_destroy_remote_window(window); + atomic_dec(&ep->rma_info.tw_refcount); + } + goto restart; + } + spin_unlock(&scif_info.rmalock); +} + +/** + * scif_rma_destroy_tcw: + * + * This routine destroys temporary cached registered windows + * which have been queued for cleanup. + */ +void scif_rma_destroy_tcw_invalid(void) +{ + struct list_head *item, *tmp; + struct scif_window *window; + struct scif_endpt *ep; + struct dma_chan *chan; + + might_sleep(); +restart: + spin_lock(&scif_info.rmalock); + list_for_each_safe(item, tmp, &scif_info.rma_tc) { + window = list_entry(item, struct scif_window, list); + ep = (struct scif_endpt *)window->ep; + chan = ep->rma_info.dma_chan; + list_del_init(&window->list); + spin_unlock(&scif_info.rmalock); + mutex_lock(&ep->rma_info.rma_lock); + if (!chan || !scifdev_alive(ep) || + !scif_drain_dma_intr(ep->remote_dev->sdev, + ep->rma_info.dma_chan)) { + atomic_sub(window->nr_pages, + &ep->rma_info.tcw_total_pages); + scif_destroy_window(ep, window); + atomic_dec(&ep->rma_info.tcw_refcount); + } else { + dev_warn(&ep->remote_dev->sdev->dev, + "DMA engine hung?\n"); + } + mutex_unlock(&ep->rma_info.rma_lock); + goto restart; + } + spin_unlock(&scif_info.rmalock); +} + +static inline +void *_get_local_va(off_t off, struct scif_window *window, size_t len) +{ + int page_nr = (off - window->offset) >> PAGE_SHIFT; + off_t page_off = off & ~PAGE_MASK; + void *va = NULL; + + if (window->type == SCIF_WINDOW_SELF) { + struct page **pages = window->pinned_pages->pages; + + va = page_address(pages[page_nr]) + page_off; + } + return va; +} + +static inline +void *ioremap_remote(off_t off, struct scif_window *window, + size_t len, struct scif_dev *dev, + struct scif_window_iter *iter) +{ + dma_addr_t phys = scif_off_to_dma_addr(window, off, NULL, iter); + + /* + * If the DMA address is not card relative then we need the DMA + * addresses to be an offset into the bar. The aperture base was already + * added so subtract it here since scif_ioremap is going to add it again + */ + if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER && + dev->sdev->aper && !dev->sdev->card_rel_da) + phys = phys - dev->sdev->aper->pa; + return scif_ioremap(phys, len, dev); +} + +static inline void +iounmap_remote(void *virt, size_t size, struct scif_copy_work *work) +{ + scif_iounmap(virt, size, work->remote_dev); +} + +/* + * Takes care of ordering issue caused by + * 1. Hardware: Only in the case of cpu copy from mgmt node to card + * because of WC memory. + * 2. Software: If memcpy reorders copy instructions for optimization. + * This could happen at both mgmt node and card. + */ +static inline void +scif_ordered_memcpy_toio(char *dst, const char *src, size_t count) +{ + if (!count) + return; + + memcpy_toio((void __iomem __force *)dst, src, --count); + /* Order the last byte with the previous stores */ + wmb(); + *(dst + count) = *(src + count); +} + +static inline void scif_unaligned_cpy_toio(char *dst, const char *src, + size_t count, bool ordered) +{ + if (ordered) + scif_ordered_memcpy_toio(dst, src, count); + else + memcpy_toio((void __iomem __force *)dst, src, count); +} + +static inline +void scif_ordered_memcpy_fromio(char *dst, const char *src, size_t count) +{ + if (!count) + return; + + memcpy_fromio(dst, (void __iomem __force *)src, --count); + /* Order the last byte with the previous loads */ + rmb(); + *(dst + count) = *(src + count); +} + +static inline void scif_unaligned_cpy_fromio(char *dst, const char *src, + size_t count, bool ordered) +{ + if (ordered) + scif_ordered_memcpy_fromio(dst, src, count); + else + memcpy_fromio(dst, (void __iomem __force *)src, count); +} + +#define SCIF_RMA_ERROR_CODE (~(dma_addr_t)0x0) + +/* + * scif_off_to_dma_addr: + * Obtain the dma_addr given the window and the offset. + * @window: Registered window. + * @off: Window offset. + * @nr_bytes: Return the number of contiguous bytes till next DMA addr index. + * @index: Return the index of the dma_addr array found. + * @start_off: start offset of index of the dma addr array found. + * The nr_bytes provides the callee an estimate of the maximum possible + * DMA xfer possible while the index/start_off provide faster lookups + * for the next iteration. + */ +dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off, + size_t *nr_bytes, struct scif_window_iter *iter) +{ + int i, page_nr; + s64 start, end; + off_t page_off; + + if (window->nr_pages == window->nr_contig_chunks) { + page_nr = (off - window->offset) >> PAGE_SHIFT; + page_off = off & ~PAGE_MASK; + + if (nr_bytes) + *nr_bytes = PAGE_SIZE - page_off; + return window->dma_addr[page_nr] | page_off; + } + if (iter) { + i = iter->index; + start = iter->offset; + } else { + i = 0; + start = window->offset; + } + for (; i < window->nr_contig_chunks; i++) { + end = start + (window->num_pages[i] << PAGE_SHIFT); + if (off >= start && off < end) { + if (iter) { + iter->index = i; + iter->offset = start; + } + if (nr_bytes) + *nr_bytes = end - off; + return (window->dma_addr[i] + (off - start)); + } + start += (window->num_pages[i] << PAGE_SHIFT); + } + dev_err(scif_info.mdev.this_device, + "%s %d BUG. Addr not found? window %p off 0x%llx\n", + __func__, __LINE__, window, off); + return SCIF_RMA_ERROR_CODE; +} + +/* + * Copy between rma window and temporary buffer + */ +static void scif_rma_local_cpu_copy(s64 offset, struct scif_window *window, + u8 *temp, size_t rem_len, bool to_temp) +{ + void *window_virt; + size_t loop_len; + int offset_in_page; + s64 end_offset; + + offset_in_page = offset & ~PAGE_MASK; + loop_len = PAGE_SIZE - offset_in_page; + + if (rem_len < loop_len) + loop_len = rem_len; + + window_virt = _get_local_va(offset, window, loop_len); + if (!window_virt) + return; + if (to_temp) + memcpy(temp, window_virt, loop_len); + else + memcpy(window_virt, temp, loop_len); + + offset += loop_len; + temp += loop_len; + rem_len -= loop_len; + + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + while (rem_len) { + if (offset == end_offset) { + window = list_next_entry(window, list); + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + } + loop_len = min(PAGE_SIZE, rem_len); + window_virt = _get_local_va(offset, window, loop_len); + if (!window_virt) + return; + if (to_temp) + memcpy(temp, window_virt, loop_len); + else + memcpy(window_virt, temp, loop_len); + offset += loop_len; + temp += loop_len; + rem_len -= loop_len; + } +} + +/** + * scif_rma_completion_cb: + * @data: RMA cookie + * + * RMA interrupt completion callback. + */ +static void scif_rma_completion_cb(void *data) +{ + struct scif_dma_comp_cb *comp_cb = data; + + /* Free DMA Completion CB. */ + if (comp_cb->dst_window) + scif_rma_local_cpu_copy(comp_cb->dst_offset, + comp_cb->dst_window, + comp_cb->temp_buf + + comp_cb->header_padding, + comp_cb->len, false); + scif_unmap_single(comp_cb->temp_phys, comp_cb->sdev, + SCIF_KMEM_UNALIGNED_BUF_SIZE); + if (comp_cb->is_cache) + kmem_cache_free(unaligned_cache, + comp_cb->temp_buf_to_free); + else + kfree(comp_cb->temp_buf_to_free); +} + +/* Copies between temporary buffer and offsets provided in work */ +static int +scif_rma_list_dma_copy_unaligned(struct scif_copy_work *work, + u8 *temp, struct dma_chan *chan, + bool src_local) +{ + struct scif_dma_comp_cb *comp_cb = work->comp_cb; + dma_addr_t window_dma_addr, temp_dma_addr; + dma_addr_t temp_phys = comp_cb->temp_phys; + size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len; + int offset_in_ca, ret = 0; + s64 end_offset, offset; + struct scif_window *window; + void *window_virt_addr; + size_t tail_len; + struct dma_async_tx_descriptor *tx; + struct dma_device *dev = chan->device; + dma_cookie_t cookie; + + if (src_local) { + offset = work->dst_offset; + window = work->dst_window; + } else { + offset = work->src_offset; + window = work->src_window; + } + + offset_in_ca = offset & (L1_CACHE_BYTES - 1); + if (offset_in_ca) { + loop_len = L1_CACHE_BYTES - offset_in_ca; + loop_len = min(loop_len, remaining_len); + window_virt_addr = ioremap_remote(offset, window, + loop_len, + work->remote_dev, + NULL); + if (!window_virt_addr) + return -ENOMEM; + if (src_local) + scif_unaligned_cpy_toio(window_virt_addr, temp, + loop_len, + work->ordered && + !(remaining_len - loop_len)); + else + scif_unaligned_cpy_fromio(temp, window_virt_addr, + loop_len, work->ordered && + !(remaining_len - loop_len)); + iounmap_remote(window_virt_addr, loop_len, work); + + offset += loop_len; + temp += loop_len; + temp_phys += loop_len; + remaining_len -= loop_len; + } + + offset_in_ca = offset & ~PAGE_MASK; + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + + tail_len = remaining_len & (L1_CACHE_BYTES - 1); + remaining_len -= tail_len; + while (remaining_len) { + if (offset == end_offset) { + window = list_next_entry(window, list); + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + } + if (scif_is_mgmt_node()) + temp_dma_addr = temp_phys; + else + /* Fix if we ever enable IOMMU on the card */ + temp_dma_addr = (dma_addr_t)virt_to_phys(temp); + window_dma_addr = scif_off_to_dma_addr(window, offset, + &nr_contig_bytes, + NULL); + loop_len = min(nr_contig_bytes, remaining_len); + if (src_local) { + if (work->ordered && !tail_len && + !(remaining_len - loop_len) && + loop_len != L1_CACHE_BYTES) { + /* + * Break up the last chunk of the transfer into + * two steps. if there is no tail to guarantee + * DMA ordering. SCIF_DMA_POLLING inserts + * a status update descriptor in step 1 which + * acts as a double sided synchronization fence + * for the DMA engine to ensure that the last + * cache line in step 2 is updated last. + */ + /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */ + tx = + dev->device_prep_dma_memcpy(chan, + window_dma_addr, + temp_dma_addr, + loop_len - + L1_CACHE_BYTES, + DMA_PREP_FENCE); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + offset += (loop_len - L1_CACHE_BYTES); + temp_dma_addr += (loop_len - L1_CACHE_BYTES); + window_dma_addr += (loop_len - L1_CACHE_BYTES); + remaining_len -= (loop_len - L1_CACHE_BYTES); + loop_len = remaining_len; + + /* Step 2) DMA: L1_CACHE_BYTES */ + tx = + dev->device_prep_dma_memcpy(chan, + window_dma_addr, + temp_dma_addr, + loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } else { + tx = + dev->device_prep_dma_memcpy(chan, + window_dma_addr, + temp_dma_addr, + loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } + } else { + tx = dev->device_prep_dma_memcpy(chan, temp_dma_addr, + window_dma_addr, loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } + if (ret < 0) + goto err; + offset += loop_len; + temp += loop_len; + temp_phys += loop_len; + remaining_len -= loop_len; + offset_in_ca = 0; + } + if (tail_len) { + if (offset == end_offset) { + window = list_next_entry(window, list); + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + } + window_virt_addr = ioremap_remote(offset, window, tail_len, + work->remote_dev, + NULL); + if (!window_virt_addr) + return -ENOMEM; + /* + * The CPU copy for the tail bytes must be initiated only once + * previous DMA transfers for this endpoint have completed + * to guarantee ordering. + */ + if (work->ordered) { + struct scif_dev *rdev = work->remote_dev; + + ret = scif_drain_dma_intr(rdev->sdev, chan); + if (ret) + return ret; + } + if (src_local) + scif_unaligned_cpy_toio(window_virt_addr, temp, + tail_len, work->ordered); + else + scif_unaligned_cpy_fromio(temp, window_virt_addr, + tail_len, work->ordered); + iounmap_remote(window_virt_addr, tail_len, work); + } + tx = dev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_INTERRUPT); + if (!tx) { + ret = -ENOMEM; + return ret; + } + tx->callback = &scif_rma_completion_cb; + tx->callback_param = comp_cb; + cookie = tx->tx_submit(tx); + + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + return ret; + } + dma_async_issue_pending(chan); + return 0; +err: + dev_err(scif_info.mdev.this_device, + "%s %d Desc Prog Failed ret %d\n", + __func__, __LINE__, ret); + return ret; +} + +/* + * _scif_rma_list_dma_copy_aligned: + * + * Traverse all the windows and perform DMA copy. + */ +static int _scif_rma_list_dma_copy_aligned(struct scif_copy_work *work, + struct dma_chan *chan) +{ + dma_addr_t src_dma_addr, dst_dma_addr; + size_t loop_len, remaining_len, src_contig_bytes = 0; + size_t dst_contig_bytes = 0; + struct scif_window_iter src_win_iter; + struct scif_window_iter dst_win_iter; + s64 end_src_offset, end_dst_offset; + struct scif_window *src_window = work->src_window; + struct scif_window *dst_window = work->dst_window; + s64 src_offset = work->src_offset, dst_offset = work->dst_offset; + int ret = 0; + struct dma_async_tx_descriptor *tx; + struct dma_device *dev = chan->device; + dma_cookie_t cookie; + + remaining_len = work->len; + + scif_init_window_iter(src_window, &src_win_iter); + scif_init_window_iter(dst_window, &dst_win_iter); + end_src_offset = src_window->offset + + (src_window->nr_pages << PAGE_SHIFT); + end_dst_offset = dst_window->offset + + (dst_window->nr_pages << PAGE_SHIFT); + while (remaining_len) { + if (src_offset == end_src_offset) { + src_window = list_next_entry(src_window, list); + end_src_offset = src_window->offset + + (src_window->nr_pages << PAGE_SHIFT); + scif_init_window_iter(src_window, &src_win_iter); + } + if (dst_offset == end_dst_offset) { + dst_window = list_next_entry(dst_window, list); + end_dst_offset = dst_window->offset + + (dst_window->nr_pages << PAGE_SHIFT); + scif_init_window_iter(dst_window, &dst_win_iter); + } + + /* compute dma addresses for transfer */ + src_dma_addr = scif_off_to_dma_addr(src_window, src_offset, + &src_contig_bytes, + &src_win_iter); + dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset, + &dst_contig_bytes, + &dst_win_iter); + loop_len = min(src_contig_bytes, dst_contig_bytes); + loop_len = min(loop_len, remaining_len); + if (work->ordered && !(remaining_len - loop_len)) { + /* + * Break up the last chunk of the transfer into two + * steps to ensure that the last byte in step 2 is + * updated last. + */ + /* Step 1) DMA: Body Length - 1 */ + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, + loop_len - 1, + DMA_PREP_FENCE); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + src_offset += (loop_len - 1); + dst_offset += (loop_len - 1); + src_dma_addr += (loop_len - 1); + dst_dma_addr += (loop_len - 1); + remaining_len -= (loop_len - 1); + loop_len = remaining_len; + + /* Step 2) DMA: 1 BYTES */ + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } else { + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + } + src_offset += loop_len; + dst_offset += loop_len; + remaining_len -= loop_len; + } + return ret; +err: + dev_err(scif_info.mdev.this_device, + "%s %d Desc Prog Failed ret %d\n", + __func__, __LINE__, ret); + return ret; +} + +/* + * scif_rma_list_dma_copy_aligned: + * + * Traverse all the windows and perform DMA copy. + */ +static int scif_rma_list_dma_copy_aligned(struct scif_copy_work *work, + struct dma_chan *chan) +{ + dma_addr_t src_dma_addr, dst_dma_addr; + size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0; + size_t dst_contig_bytes = 0; + int src_cache_off; + s64 end_src_offset, end_dst_offset; + struct scif_window_iter src_win_iter; + struct scif_window_iter dst_win_iter; + void *src_virt, *dst_virt; + struct scif_window *src_window = work->src_window; + struct scif_window *dst_window = work->dst_window; + s64 src_offset = work->src_offset, dst_offset = work->dst_offset; + int ret = 0; + struct dma_async_tx_descriptor *tx; + struct dma_device *dev = chan->device; + dma_cookie_t cookie; + + remaining_len = work->len; + scif_init_window_iter(src_window, &src_win_iter); + scif_init_window_iter(dst_window, &dst_win_iter); + + src_cache_off = src_offset & (L1_CACHE_BYTES - 1); + if (src_cache_off != 0) { + /* Head */ + loop_len = L1_CACHE_BYTES - src_cache_off; + loop_len = min(loop_len, remaining_len); + src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset); + dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset); + if (src_window->type == SCIF_WINDOW_SELF) + src_virt = _get_local_va(src_offset, src_window, + loop_len); + else + src_virt = ioremap_remote(src_offset, src_window, + loop_len, + work->remote_dev, NULL); + if (!src_virt) + return -ENOMEM; + if (dst_window->type == SCIF_WINDOW_SELF) + dst_virt = _get_local_va(dst_offset, dst_window, + loop_len); + else + dst_virt = ioremap_remote(dst_offset, dst_window, + loop_len, + work->remote_dev, NULL); + if (!dst_virt) { + if (src_window->type != SCIF_WINDOW_SELF) + iounmap_remote(src_virt, loop_len, work); + return -ENOMEM; + } + if (src_window->type == SCIF_WINDOW_SELF) + scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len, + remaining_len == loop_len ? + work->ordered : false); + else + scif_unaligned_cpy_fromio(dst_virt, src_virt, loop_len, + remaining_len == loop_len ? + work->ordered : false); + if (src_window->type != SCIF_WINDOW_SELF) + iounmap_remote(src_virt, loop_len, work); + if (dst_window->type != SCIF_WINDOW_SELF) + iounmap_remote(dst_virt, loop_len, work); + src_offset += loop_len; + dst_offset += loop_len; + remaining_len -= loop_len; + } + + end_src_offset = src_window->offset + + (src_window->nr_pages << PAGE_SHIFT); + end_dst_offset = dst_window->offset + + (dst_window->nr_pages << PAGE_SHIFT); + tail_len = remaining_len & (L1_CACHE_BYTES - 1); + remaining_len -= tail_len; + while (remaining_len) { + if (src_offset == end_src_offset) { + src_window = list_next_entry(src_window, list); + end_src_offset = src_window->offset + + (src_window->nr_pages << PAGE_SHIFT); + scif_init_window_iter(src_window, &src_win_iter); + } + if (dst_offset == end_dst_offset) { + dst_window = list_next_entry(dst_window, list); + end_dst_offset = dst_window->offset + + (dst_window->nr_pages << PAGE_SHIFT); + scif_init_window_iter(dst_window, &dst_win_iter); + } + + /* compute dma addresses for transfer */ + src_dma_addr = scif_off_to_dma_addr(src_window, src_offset, + &src_contig_bytes, + &src_win_iter); + dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset, + &dst_contig_bytes, + &dst_win_iter); + loop_len = min(src_contig_bytes, dst_contig_bytes); + loop_len = min(loop_len, remaining_len); + if (work->ordered && !tail_len && + !(remaining_len - loop_len)) { + /* + * Break up the last chunk of the transfer into two + * steps. if there is no tail to gurantee DMA ordering. + * Passing SCIF_DMA_POLLING inserts a status update + * descriptor in step 1 which acts as a double sided + * synchronization fence for the DMA engine to ensure + * that the last cache line in step 2 is updated last. + */ + /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */ + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, + loop_len - + L1_CACHE_BYTES, + DMA_PREP_FENCE); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + src_offset += (loop_len - L1_CACHE_BYTES); + dst_offset += (loop_len - L1_CACHE_BYTES); + src_dma_addr += (loop_len - L1_CACHE_BYTES); + dst_dma_addr += (loop_len - L1_CACHE_BYTES); + remaining_len -= (loop_len - L1_CACHE_BYTES); + loop_len = remaining_len; + + /* Step 2) DMA: L1_CACHE_BYTES */ + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, + loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } else { + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, + loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } + src_offset += loop_len; + dst_offset += loop_len; + remaining_len -= loop_len; + } + remaining_len = tail_len; + if (remaining_len) { + loop_len = remaining_len; + if (src_offset == end_src_offset) + src_window = list_next_entry(src_window, list); + if (dst_offset == end_dst_offset) + dst_window = list_next_entry(dst_window, list); + + src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset); + dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset); + /* + * The CPU copy for the tail bytes must be initiated only once + * previous DMA transfers for this endpoint have completed to + * guarantee ordering. + */ + if (work->ordered) { + struct scif_dev *rdev = work->remote_dev; + + ret = scif_drain_dma_poll(rdev->sdev, chan); + if (ret) + return ret; + } + if (src_window->type == SCIF_WINDOW_SELF) + src_virt = _get_local_va(src_offset, src_window, + loop_len); + else + src_virt = ioremap_remote(src_offset, src_window, + loop_len, + work->remote_dev, NULL); + if (!src_virt) + return -ENOMEM; + + if (dst_window->type == SCIF_WINDOW_SELF) + dst_virt = _get_local_va(dst_offset, dst_window, + loop_len); + else + dst_virt = ioremap_remote(dst_offset, dst_window, + loop_len, + work->remote_dev, NULL); + if (!dst_virt) { + if (src_window->type != SCIF_WINDOW_SELF) + iounmap_remote(src_virt, loop_len, work); + return -ENOMEM; + } + + if (src_window->type == SCIF_WINDOW_SELF) + scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len, + work->ordered); + else + scif_unaligned_cpy_fromio(dst_virt, src_virt, + loop_len, work->ordered); + if (src_window->type != SCIF_WINDOW_SELF) + iounmap_remote(src_virt, loop_len, work); + + if (dst_window->type != SCIF_WINDOW_SELF) + iounmap_remote(dst_virt, loop_len, work); + remaining_len -= loop_len; + } + return ret; +err: + dev_err(scif_info.mdev.this_device, + "%s %d Desc Prog Failed ret %d\n", + __func__, __LINE__, ret); + return ret; +} + +/* + * scif_rma_list_cpu_copy: + * + * Traverse all the windows and perform CPU copy. + */ +static int scif_rma_list_cpu_copy(struct scif_copy_work *work) +{ + void *src_virt, *dst_virt; + size_t loop_len, remaining_len; + int src_page_off, dst_page_off; + s64 src_offset = work->src_offset, dst_offset = work->dst_offset; + struct scif_window *src_window = work->src_window; + struct scif_window *dst_window = work->dst_window; + s64 end_src_offset, end_dst_offset; + int ret = 0; + struct scif_window_iter src_win_iter; + struct scif_window_iter dst_win_iter; + + remaining_len = work->len; + + scif_init_window_iter(src_window, &src_win_iter); + scif_init_window_iter(dst_window, &dst_win_iter); + while (remaining_len) { + src_page_off = src_offset & ~PAGE_MASK; + dst_page_off = dst_offset & ~PAGE_MASK; + loop_len = min(PAGE_SIZE - + max(src_page_off, dst_page_off), + remaining_len); + + if (src_window->type == SCIF_WINDOW_SELF) + src_virt = _get_local_va(src_offset, src_window, + loop_len); + else + src_virt = ioremap_remote(src_offset, src_window, + loop_len, + work->remote_dev, + &src_win_iter); + if (!src_virt) { + ret = -ENOMEM; + goto error; + } + + if (dst_window->type == SCIF_WINDOW_SELF) + dst_virt = _get_local_va(dst_offset, dst_window, + loop_len); + else + dst_virt = ioremap_remote(dst_offset, dst_window, + loop_len, + work->remote_dev, + &dst_win_iter); + if (!dst_virt) { + if (src_window->type == SCIF_WINDOW_PEER) + iounmap_remote(src_virt, loop_len, work); + ret = -ENOMEM; + goto error; + } + + if (work->loopback) { + memcpy(dst_virt, src_virt, loop_len); + } else { + if (src_window->type == SCIF_WINDOW_SELF) + memcpy_toio((void __iomem __force *)dst_virt, + src_virt, loop_len); + else + memcpy_fromio(dst_virt, + (void __iomem __force *)src_virt, + loop_len); + } + if (src_window->type == SCIF_WINDOW_PEER) + iounmap_remote(src_virt, loop_len, work); + + if (dst_window->type == SCIF_WINDOW_PEER) + iounmap_remote(dst_virt, loop_len, work); + + src_offset += loop_len; + dst_offset += loop_len; + remaining_len -= loop_len; + if (remaining_len) { + end_src_offset = src_window->offset + + (src_window->nr_pages << PAGE_SHIFT); + end_dst_offset = dst_window->offset + + (dst_window->nr_pages << PAGE_SHIFT); + if (src_offset == end_src_offset) { + src_window = list_next_entry(src_window, list); + scif_init_window_iter(src_window, + &src_win_iter); + } + if (dst_offset == end_dst_offset) { + dst_window = list_next_entry(dst_window, list); + scif_init_window_iter(dst_window, + &dst_win_iter); + } + } + } +error: + return ret; +} + +static int scif_rma_list_dma_copy_wrapper(struct scif_endpt *epd, + struct scif_copy_work *work, + struct dma_chan *chan, off_t loffset) +{ + int src_cache_off, dst_cache_off; + s64 src_offset = work->src_offset, dst_offset = work->dst_offset; + u8 *temp = NULL; + bool src_local = true, dst_local = false; + struct scif_dma_comp_cb *comp_cb; + dma_addr_t src_dma_addr, dst_dma_addr; + int err; + + if (is_dma_copy_aligned(chan->device, 1, 1, 1)) + return _scif_rma_list_dma_copy_aligned(work, chan); + + src_cache_off = src_offset & (L1_CACHE_BYTES - 1); + dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1); + + if (dst_cache_off == src_cache_off) + return scif_rma_list_dma_copy_aligned(work, chan); + + if (work->loopback) + return scif_rma_list_cpu_copy(work); + src_dma_addr = __scif_off_to_dma_addr(work->src_window, src_offset); + dst_dma_addr = __scif_off_to_dma_addr(work->dst_window, dst_offset); + src_local = work->src_window->type == SCIF_WINDOW_SELF; + dst_local = work->dst_window->type == SCIF_WINDOW_SELF; + + dst_local = dst_local; + /* Allocate dma_completion cb */ + comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL); + if (!comp_cb) + goto error; + + work->comp_cb = comp_cb; + comp_cb->cb_cookie = comp_cb; + comp_cb->dma_completion_func = &scif_rma_completion_cb; + + if (work->len + (L1_CACHE_BYTES << 1) < SCIF_KMEM_UNALIGNED_BUF_SIZE) { + comp_cb->is_cache = false; + /* Allocate padding bytes to align to a cache line */ + temp = kmalloc(work->len + (L1_CACHE_BYTES << 1), + GFP_KERNEL); + if (!temp) + goto free_comp_cb; + comp_cb->temp_buf_to_free = temp; + /* kmalloc(..) does not guarantee cache line alignment */ + if (!IS_ALIGNED((u64)temp, L1_CACHE_BYTES)) + temp = PTR_ALIGN(temp, L1_CACHE_BYTES); + } else { + comp_cb->is_cache = true; + temp = kmem_cache_alloc(unaligned_cache, GFP_KERNEL); + if (!temp) + goto free_comp_cb; + comp_cb->temp_buf_to_free = temp; + } + + if (src_local) { + temp += dst_cache_off; + scif_rma_local_cpu_copy(work->src_offset, work->src_window, + temp, work->len, true); + } else { + comp_cb->dst_window = work->dst_window; + comp_cb->dst_offset = work->dst_offset; + work->src_offset = work->src_offset - src_cache_off; + comp_cb->len = work->len; + work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES); + comp_cb->header_padding = src_cache_off; + } + comp_cb->temp_buf = temp; + + err = scif_map_single(&comp_cb->temp_phys, temp, + work->remote_dev, SCIF_KMEM_UNALIGNED_BUF_SIZE); + if (err) + goto free_temp_buf; + comp_cb->sdev = work->remote_dev; + if (scif_rma_list_dma_copy_unaligned(work, temp, chan, src_local) < 0) + goto free_temp_buf; + if (!src_local) + work->fence_type = SCIF_DMA_INTR; + return 0; +free_temp_buf: + if (comp_cb->is_cache) + kmem_cache_free(unaligned_cache, comp_cb->temp_buf_to_free); + else + kfree(comp_cb->temp_buf_to_free); +free_comp_cb: + kfree(comp_cb); +error: + return -ENOMEM; +} + +/** + * scif_rma_copy: + * @epd: end point descriptor. + * @loffset: offset in local registered address space to/from which to copy + * @addr: user virtual address to/from which to copy + * @len: length of range to copy + * @roffset: offset in remote registered address space to/from which to copy + * @flags: flags + * @dir: LOCAL->REMOTE or vice versa. + * @last_chunk: true if this is the last chunk of a larger transfer + * + * Validate parameters, check if src/dst registered ranges requested for copy + * are valid and initiate either CPU or DMA copy. + */ +static int scif_rma_copy(scif_epd_t epd, off_t loffset, unsigned long addr, + size_t len, off_t roffset, int flags, + enum scif_rma_dir dir, bool last_chunk) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scif_rma_req remote_req; + struct scif_rma_req req; + struct scif_window *local_window = NULL; + struct scif_window *remote_window = NULL; + struct scif_copy_work copy_work; + bool loopback; + int err = 0; + struct dma_chan *chan; + struct scif_mmu_notif *mmn = NULL; + bool cache = false; + struct device *spdev; + + err = scif_verify_epd(ep); + if (err) + return err; + + if (flags && !(flags & (SCIF_RMA_USECPU | SCIF_RMA_USECACHE | + SCIF_RMA_SYNC | SCIF_RMA_ORDERED))) + return -EINVAL; + + loopback = scifdev_self(ep->remote_dev) ? true : false; + copy_work.fence_type = ((flags & SCIF_RMA_SYNC) && last_chunk) ? + SCIF_DMA_POLL : 0; + copy_work.ordered = !!((flags & SCIF_RMA_ORDERED) && last_chunk); + + /* Use CPU for Mgmt node <-> Mgmt node copies */ + if (loopback && scif_is_mgmt_node()) { + flags |= SCIF_RMA_USECPU; + copy_work.fence_type = 0x0; + } + + cache = scif_is_set_reg_cache(flags); + + remote_req.out_window = &remote_window; + remote_req.offset = roffset; + remote_req.nr_bytes = len; + /* + * If transfer is from local to remote then the remote window + * must be writeable and vice versa. + */ + remote_req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_WRITE : VM_READ; + remote_req.type = SCIF_WINDOW_PARTIAL; + remote_req.head = &ep->rma_info.remote_reg_list; + + spdev = scif_get_peer_dev(ep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + return err; + } + + if (addr && cache) { + mutex_lock(&ep->rma_info.mmn_lock); + mmn = scif_find_mmu_notifier(current->mm, &ep->rma_info); + if (!mmn) + mmn = scif_add_mmu_notifier(current->mm, ep); + mutex_unlock(&ep->rma_info.mmn_lock); + if (IS_ERR(mmn)) { + scif_put_peer_dev(spdev); + return PTR_ERR(mmn); + } + cache = cache && !scif_rma_tc_can_cache(ep, len); + } + mutex_lock(&ep->rma_info.rma_lock); + if (addr) { + req.out_window = &local_window; + req.nr_bytes = ALIGN(len + (addr & ~PAGE_MASK), + PAGE_SIZE); + req.va_for_temp = addr & PAGE_MASK; + req.prot = (dir == SCIF_LOCAL_TO_REMOTE ? + VM_READ : VM_WRITE | VM_READ); + /* Does a valid local window exist? */ + if (mmn) { + spin_lock(&ep->rma_info.tc_lock); + req.head = &mmn->tc_reg_list; + err = scif_query_tcw(ep, &req); + spin_unlock(&ep->rma_info.tc_lock); + } + if (!mmn || err) { + err = scif_register_temp(epd, req.va_for_temp, + req.nr_bytes, req.prot, + &loffset, &local_window); + if (err) { + mutex_unlock(&ep->rma_info.rma_lock); + goto error; + } + if (!cache) + goto skip_cache; + atomic_inc(&ep->rma_info.tcw_refcount); + atomic_add_return(local_window->nr_pages, + &ep->rma_info.tcw_total_pages); + if (mmn) { + spin_lock(&ep->rma_info.tc_lock); + scif_insert_tcw(local_window, + &mmn->tc_reg_list); + spin_unlock(&ep->rma_info.tc_lock); + } + } +skip_cache: + loffset = local_window->offset + + (addr - local_window->va_for_temp); + } else { + req.out_window = &local_window; + req.offset = loffset; + /* + * If transfer is from local to remote then the self window + * must be readable and vice versa. + */ + req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_READ : VM_WRITE; + req.nr_bytes = len; + req.type = SCIF_WINDOW_PARTIAL; + req.head = &ep->rma_info.reg_list; + /* Does a valid local window exist? */ + err = scif_query_window(&req); + if (err) { + mutex_unlock(&ep->rma_info.rma_lock); + goto error; + } + } + + /* Does a valid remote window exist? */ + err = scif_query_window(&remote_req); + if (err) { + mutex_unlock(&ep->rma_info.rma_lock); + goto error; + } + + /* + * Prepare copy_work for submitting work to the DMA kernel thread + * or CPU copy routine. + */ + copy_work.len = len; + copy_work.loopback = loopback; + copy_work.remote_dev = ep->remote_dev; + if (dir == SCIF_LOCAL_TO_REMOTE) { + copy_work.src_offset = loffset; + copy_work.src_window = local_window; + copy_work.dst_offset = roffset; + copy_work.dst_window = remote_window; + } else { + copy_work.src_offset = roffset; + copy_work.src_window = remote_window; + copy_work.dst_offset = loffset; + copy_work.dst_window = local_window; + } + + if (flags & SCIF_RMA_USECPU) { + scif_rma_list_cpu_copy(©_work); + } else { + chan = ep->rma_info.dma_chan; + err = scif_rma_list_dma_copy_wrapper(epd, ©_work, + chan, loffset); + } + if (addr && !cache) + atomic_inc(&ep->rma_info.tw_refcount); + + mutex_unlock(&ep->rma_info.rma_lock); + + if (last_chunk) { + struct scif_dev *rdev = ep->remote_dev; + + if (copy_work.fence_type == SCIF_DMA_POLL) + err = scif_drain_dma_poll(rdev->sdev, + ep->rma_info.dma_chan); + else if (copy_work.fence_type == SCIF_DMA_INTR) + err = scif_drain_dma_intr(rdev->sdev, + ep->rma_info.dma_chan); + } + + if (addr && !cache) + scif_queue_for_cleanup(local_window, &scif_info.rma); + scif_put_peer_dev(spdev); + return err; +error: + if (err) { + if (addr && local_window && !cache) + scif_destroy_window(ep, local_window); + dev_err(scif_info.mdev.this_device, + "%s %d err %d len 0x%lx\n", + __func__, __LINE__, err, len); + } + scif_put_peer_dev(spdev); + return err; +} + +int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, + off_t roffset, int flags) +{ + int err; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI readfrom: ep %p loffset 0x%lx len 0x%lx offset 0x%lx flags 0x%x\n", + epd, loffset, len, roffset, flags); + if (scif_unaligned(loffset, roffset)) { + while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) { + err = scif_rma_copy(epd, loffset, 0x0, + SCIF_MAX_UNALIGNED_BUF_SIZE, + roffset, flags, + SCIF_REMOTE_TO_LOCAL, false); + if (err) + goto readfrom_err; + loffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + roffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + len -= SCIF_MAX_UNALIGNED_BUF_SIZE; + } + } + err = scif_rma_copy(epd, loffset, 0x0, len, + roffset, flags, SCIF_REMOTE_TO_LOCAL, true); +readfrom_err: + return err; +} +EXPORT_SYMBOL_GPL(scif_readfrom); + +int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, + off_t roffset, int flags) +{ + int err; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI writeto: ep %p loffset 0x%lx len 0x%lx roffset 0x%lx flags 0x%x\n", + epd, loffset, len, roffset, flags); + if (scif_unaligned(loffset, roffset)) { + while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) { + err = scif_rma_copy(epd, loffset, 0x0, + SCIF_MAX_UNALIGNED_BUF_SIZE, + roffset, flags, + SCIF_LOCAL_TO_REMOTE, false); + if (err) + goto writeto_err; + loffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + roffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + len -= SCIF_MAX_UNALIGNED_BUF_SIZE; + } + } + err = scif_rma_copy(epd, loffset, 0x0, len, + roffset, flags, SCIF_LOCAL_TO_REMOTE, true); +writeto_err: + return err; +} +EXPORT_SYMBOL_GPL(scif_writeto); + +int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, + off_t roffset, int flags) +{ + int err; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI vreadfrom: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n", + epd, addr, len, roffset, flags); + if (scif_unaligned((off_t __force)addr, roffset)) { + if (len > SCIF_MAX_UNALIGNED_BUF_SIZE) + flags &= ~SCIF_RMA_USECACHE; + + while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) { + err = scif_rma_copy(epd, 0, (u64)addr, + SCIF_MAX_UNALIGNED_BUF_SIZE, + roffset, flags, + SCIF_REMOTE_TO_LOCAL, false); + if (err) + goto vreadfrom_err; + addr += SCIF_MAX_UNALIGNED_BUF_SIZE; + roffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + len -= SCIF_MAX_UNALIGNED_BUF_SIZE; + } + } + err = scif_rma_copy(epd, 0, (u64)addr, len, + roffset, flags, SCIF_REMOTE_TO_LOCAL, true); +vreadfrom_err: + return err; +} +EXPORT_SYMBOL_GPL(scif_vreadfrom); + +int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, + off_t roffset, int flags) +{ + int err; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI vwriteto: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n", + epd, addr, len, roffset, flags); + if (scif_unaligned((off_t __force)addr, roffset)) { + if (len > SCIF_MAX_UNALIGNED_BUF_SIZE) + flags &= ~SCIF_RMA_USECACHE; + + while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) { + err = scif_rma_copy(epd, 0, (u64)addr, + SCIF_MAX_UNALIGNED_BUF_SIZE, + roffset, flags, + SCIF_LOCAL_TO_REMOTE, false); + if (err) + goto vwriteto_err; + addr += SCIF_MAX_UNALIGNED_BUF_SIZE; + roffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + len -= SCIF_MAX_UNALIGNED_BUF_SIZE; + } + } + err = scif_rma_copy(epd, 0, (u64)addr, len, + roffset, flags, SCIF_LOCAL_TO_REMOTE, true); +vwriteto_err: + return err; +} +EXPORT_SYMBOL_GPL(scif_vwriteto); diff --git a/drivers/misc/mic/scif/scif_epd.c b/drivers/misc/mic/scif/scif_epd.c new file mode 100644 index 000000000..00e5d6d66 --- /dev/null +++ b/drivers/misc/mic/scif/scif_epd.c @@ -0,0 +1,357 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "scif_main.h" +#include "scif_map.h" + +void scif_cleanup_ep_qp(struct scif_endpt *ep) +{ + struct scif_qp *qp = ep->qp_info.qp; + + if (qp->outbound_q.rb_base) { + scif_iounmap((void *)qp->outbound_q.rb_base, + qp->outbound_q.size, ep->remote_dev); + qp->outbound_q.rb_base = NULL; + } + if (qp->remote_qp) { + scif_iounmap((void *)qp->remote_qp, + sizeof(struct scif_qp), ep->remote_dev); + qp->remote_qp = NULL; + } + if (qp->local_qp) { + scif_unmap_single(qp->local_qp, ep->remote_dev, + sizeof(struct scif_qp)); + qp->local_qp = 0x0; + } + if (qp->local_buf) { + scif_unmap_single(qp->local_buf, ep->remote_dev, + SCIF_ENDPT_QP_SIZE); + qp->local_buf = 0; + } +} + +void scif_teardown_ep(void *endpt) +{ + struct scif_endpt *ep = endpt; + struct scif_qp *qp = ep->qp_info.qp; + + if (qp) { + spin_lock(&ep->lock); + scif_cleanup_ep_qp(ep); + spin_unlock(&ep->lock); + kfree(qp->inbound_q.rb_base); + kfree(qp); + } +} + +/* + * Enqueue the endpoint to the zombie list for cleanup. + * The endpoint should not be accessed once this API returns. + */ +void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held) +{ + if (!eplock_held) + mutex_lock(&scif_info.eplock); + spin_lock(&ep->lock); + ep->state = SCIFEP_ZOMBIE; + spin_unlock(&ep->lock); + list_add_tail(&ep->list, &scif_info.zombie); + scif_info.nr_zombies++; + if (!eplock_held) + mutex_unlock(&scif_info.eplock); + schedule_work(&scif_info.misc_work); +} + +static struct scif_endpt *scif_find_listen_ep(u16 port) +{ + struct scif_endpt *ep = NULL; + struct list_head *pos, *tmpq; + + mutex_lock(&scif_info.eplock); + list_for_each_safe(pos, tmpq, &scif_info.listen) { + ep = list_entry(pos, struct scif_endpt, list); + if (ep->port.port == port) { + mutex_unlock(&scif_info.eplock); + return ep; + } + } + mutex_unlock(&scif_info.eplock); + return NULL; +} + +void scif_cleanup_zombie_epd(void) +{ + struct list_head *pos, *tmpq; + struct scif_endpt *ep; + + mutex_lock(&scif_info.eplock); + list_for_each_safe(pos, tmpq, &scif_info.zombie) { + ep = list_entry(pos, struct scif_endpt, list); + if (scif_rma_ep_can_uninit(ep)) { + list_del(pos); + scif_info.nr_zombies--; + put_iova_domain(&ep->rma_info.iovad); + kfree(ep); + } + } + mutex_unlock(&scif_info.eplock); +} + +/** + * scif_cnctreq() - Respond to SCIF_CNCT_REQ interrupt message + * @msg: Interrupt message + * + * This message is initiated by the remote node to request a connection + * to the local node. This function looks for an end point in the + * listen state on the requested port id. + * + * If it finds a listening port it places the connect request on the + * listening end points queue and wakes up any pending accept calls. + * + * If it does not find a listening end point it sends a connection + * reject message to the remote node. + */ +void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = NULL; + struct scif_conreq *conreq; + + conreq = kmalloc(sizeof(*conreq), GFP_KERNEL); + if (!conreq) + /* Lack of resources so reject the request. */ + goto conreq_sendrej; + + ep = scif_find_listen_ep(msg->dst.port); + if (!ep) + /* Send reject due to no listening ports */ + goto conreq_sendrej_free; + else + spin_lock(&ep->lock); + + if (ep->backlog <= ep->conreqcnt) { + /* Send reject due to too many pending requests */ + spin_unlock(&ep->lock); + goto conreq_sendrej_free; + } + + conreq->msg = *msg; + list_add_tail(&conreq->list, &ep->conlist); + ep->conreqcnt++; + wake_up_interruptible(&ep->conwq); + spin_unlock(&ep->lock); + return; + +conreq_sendrej_free: + kfree(conreq); +conreq_sendrej: + msg->uop = SCIF_CNCT_REJ; + scif_nodeqp_send(&scif_dev[msg->src.node], msg); +} + +/** + * scif_cnctgnt() - Respond to SCIF_CNCT_GNT interrupt message + * @msg: Interrupt message + * + * An accept() on the remote node has occurred and sent this message + * to indicate success. Place the end point in the MAPPING state and + * save the remote nodes memory information. Then wake up the connect + * request so it can finish. + */ +void scif_cnctgnt(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + + spin_lock(&ep->lock); + if (SCIFEP_CONNECTING == ep->state) { + ep->peer.node = msg->src.node; + ep->peer.port = msg->src.port; + ep->qp_info.gnt_pld = msg->payload[1]; + ep->remote_ep = msg->payload[2]; + ep->state = SCIFEP_MAPPING; + + wake_up(&ep->conwq); + } + spin_unlock(&ep->lock); +} + +/** + * scif_cnctgnt_ack() - Respond to SCIF_CNCT_GNTACK interrupt message + * @msg: Interrupt message + * + * The remote connection request has finished mapping the local memory. + * Place the connection in the connected state and wake up the pending + * accept() call. + */ +void scif_cnctgnt_ack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + + mutex_lock(&scif_info.connlock); + spin_lock(&ep->lock); + /* New ep is now connected with all resources set. */ + ep->state = SCIFEP_CONNECTED; + list_add_tail(&ep->list, &scif_info.connected); + wake_up(&ep->conwq); + spin_unlock(&ep->lock); + mutex_unlock(&scif_info.connlock); +} + +/** + * scif_cnctgnt_nack() - Respond to SCIF_CNCT_GNTNACK interrupt message + * @msg: Interrupt message + * + * The remote connection request failed to map the local memory it was sent. + * Place the end point in the CLOSING state to indicate it and wake up + * the pending accept(); + */ +void scif_cnctgnt_nack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + + spin_lock(&ep->lock); + ep->state = SCIFEP_CLOSING; + wake_up(&ep->conwq); + spin_unlock(&ep->lock); +} + +/** + * scif_cnctrej() - Respond to SCIF_CNCT_REJ interrupt message + * @msg: Interrupt message + * + * The remote end has rejected the connection request. Set the end + * point back to the bound state and wake up the pending connect(). + */ +void scif_cnctrej(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + + spin_lock(&ep->lock); + if (SCIFEP_CONNECTING == ep->state) { + ep->state = SCIFEP_BOUND; + wake_up(&ep->conwq); + } + spin_unlock(&ep->lock); +} + +/** + * scif_discnct() - Respond to SCIF_DISCNCT interrupt message + * @msg: Interrupt message + * + * The remote node has indicated close() has been called on its end + * point. Remove the local end point from the connected list, set its + * state to disconnected and ensure accesses to the remote node are + * shutdown. + * + * When all accesses to the remote end have completed then send a + * DISCNT_ACK to indicate it can remove its resources and complete + * the close routine. + */ +void scif_discnct(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = NULL; + struct scif_endpt *tmpep; + struct list_head *pos, *tmpq; + + mutex_lock(&scif_info.connlock); + list_for_each_safe(pos, tmpq, &scif_info.connected) { + tmpep = list_entry(pos, struct scif_endpt, list); + /* + * The local ep may have sent a disconnect and and been closed + * due to a message response time out. It may have been + * allocated again and formed a new connection so we want to + * check if the remote ep matches + */ + if (((u64)tmpep == msg->payload[1]) && + ((u64)tmpep->remote_ep == msg->payload[0])) { + list_del(pos); + ep = tmpep; + spin_lock(&ep->lock); + break; + } + } + + /* + * If the terminated end is not found then this side started closing + * before the other side sent the disconnect. If so the ep will no + * longer be on the connected list. Regardless the other side + * needs to be acked to let it know close is complete. + */ + if (!ep) { + mutex_unlock(&scif_info.connlock); + goto discnct_ack; + } + + ep->state = SCIFEP_DISCONNECTED; + list_add_tail(&ep->list, &scif_info.disconnected); + + wake_up_interruptible(&ep->sendwq); + wake_up_interruptible(&ep->recvwq); + spin_unlock(&ep->lock); + mutex_unlock(&scif_info.connlock); + +discnct_ack: + msg->uop = SCIF_DISCNT_ACK; + scif_nodeqp_send(&scif_dev[msg->src.node], msg); +} + +/** + * scif_discnct_ack() - Respond to SCIF_DISCNT_ACK interrupt message + * @msg: Interrupt message + * + * Remote side has indicated it has not more references to local resources + */ +void scif_discnt_ack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + + spin_lock(&ep->lock); + ep->state = SCIFEP_DISCONNECTED; + spin_unlock(&ep->lock); + complete(&ep->discon); +} + +/** + * scif_clientsend() - Respond to SCIF_CLIENT_SEND interrupt message + * @msg: Interrupt message + * + * Remote side is confirming send or receive interrupt handling is complete. + */ +void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + + spin_lock(&ep->lock); + if (SCIFEP_CONNECTED == ep->state) + wake_up_interruptible(&ep->recvwq); + spin_unlock(&ep->lock); +} + +/** + * scif_clientrcvd() - Respond to SCIF_CLIENT_RCVD interrupt message + * @msg: Interrupt message + * + * Remote side is confirming send or receive interrupt handling is complete. + */ +void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + + spin_lock(&ep->lock); + if (SCIFEP_CONNECTED == ep->state) + wake_up_interruptible(&ep->sendwq); + spin_unlock(&ep->lock); +} diff --git a/drivers/misc/mic/scif/scif_epd.h b/drivers/misc/mic/scif/scif_epd.h new file mode 100644 index 000000000..f39b663da --- /dev/null +++ b/drivers/misc/mic/scif/scif_epd.h @@ -0,0 +1,210 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#ifndef SCIF_EPD_H +#define SCIF_EPD_H + +#include +#include +#include + +#define SCIF_EPLOCK_HELD true + +enum scif_epd_state { + SCIFEP_UNBOUND, + SCIFEP_BOUND, + SCIFEP_LISTENING, + SCIFEP_CONNECTED, + SCIFEP_CONNECTING, + SCIFEP_MAPPING, + SCIFEP_CLOSING, + SCIFEP_CLLISTEN, + SCIFEP_DISCONNECTED, + SCIFEP_ZOMBIE +}; + +/* + * struct scif_conreq - Data structure added to the connection list. + * + * @msg: connection request message received + * @list: link to list of connection requests + */ +struct scif_conreq { + struct scifmsg msg; + struct list_head list; +}; + +/* Size of the RB for the Endpoint QP */ +#define SCIF_ENDPT_QP_SIZE 0x1000 + +/* + * scif_endpt_qp_info - SCIF endpoint queue pair + * + * @qp - Qpair for this endpoint + * @qp_offset - DMA address of the QP + * @gnt_pld - Payload in a SCIF_CNCT_GNT message containing the + * physical address of the remote_qp. + */ +struct scif_endpt_qp_info { + struct scif_qp *qp; + dma_addr_t qp_offset; + dma_addr_t gnt_pld; +}; + +/* + * struct scif_endpt - The SCIF endpoint data structure + * + * @state: end point state + * @lock: lock synchronizing access to endpoint fields like state etc + * @port: self port information + * @peer: peer port information + * @backlog: maximum pending connection requests + * @qp_info: Endpoint QP information for SCIF messaging + * @remote_dev: scifdev used by this endpt to communicate with remote node. + * @remote_ep: remote endpoint + * @conreqcnt: Keep track of number of connection requests. + * @files: Open file information used to match the id passed in with + * the flush routine. + * @conlist: list of connection requests + * @conwq: waitqueue for connection processing + * @discon: completion used during disconnection + * @sendwq: waitqueue used during sending messages + * @recvwq: waitqueue used during message receipt + * @sendlock: Synchronize ordering of messages sent + * @recvlock: Synchronize ordering of messages received + * @list: link to list of various endpoints like connected, listening etc + * @li_accept: pending ACCEPTREG + * @acceptcnt: pending ACCEPTREG cnt + * @liacceptlist: link to listen accept + * @miacceptlist: link to uaccept + * @listenep: associated listen ep + * @conn_work: Non blocking connect work + * @conn_port: Connection port + * @conn_err: Errors during connection + * @conn_async_state: Async connection + * @conn_pend_wq: Used by poll while waiting for incoming connections + * @conn_list: List of async connection requests + * @rma_info: Information for triggering SCIF RMA and DMA operations + * @mmu_list: link to list of MMU notifier cleanup work + * @anon: anonymous file for use in kernel mode scif poll + */ +struct scif_endpt { + enum scif_epd_state state; + spinlock_t lock; + struct scif_port_id port; + struct scif_port_id peer; + int backlog; + struct scif_endpt_qp_info qp_info; + struct scif_dev *remote_dev; + u64 remote_ep; + int conreqcnt; + struct files_struct *files; + struct list_head conlist; + wait_queue_head_t conwq; + struct completion discon; + wait_queue_head_t sendwq; + wait_queue_head_t recvwq; + struct mutex sendlock; + struct mutex recvlock; + struct list_head list; + struct list_head li_accept; + int acceptcnt; + struct list_head liacceptlist; + struct list_head miacceptlist; + struct scif_endpt *listenep; + struct scif_port_id conn_port; + int conn_err; + int conn_async_state; + wait_queue_head_t conn_pend_wq; + struct list_head conn_list; + struct scif_endpt_rma_info rma_info; + struct list_head mmu_list; + struct file *anon; +}; + +static inline int scifdev_alive(struct scif_endpt *ep) +{ + return _scifdev_alive(ep->remote_dev); +} + +/* + * scif_verify_epd: + * ep: SCIF endpoint + * + * Checks several generic error conditions and returns the + * appropriate error. + */ +static inline int scif_verify_epd(struct scif_endpt *ep) +{ + if (ep->state == SCIFEP_DISCONNECTED) + return -ECONNRESET; + + if (ep->state != SCIFEP_CONNECTED) + return -ENOTCONN; + + if (!scifdev_alive(ep)) + return -ENODEV; + + return 0; +} + +static inline int scif_anon_inode_getfile(scif_epd_t epd) +{ + epd->anon = anon_inode_getfile("scif", &scif_anon_fops, NULL, 0); + if (IS_ERR(epd->anon)) + return PTR_ERR(epd->anon); + return 0; +} + +static inline void scif_anon_inode_fput(scif_epd_t epd) +{ + if (epd->anon) { + fput(epd->anon); + epd->anon = NULL; + } +} + +void scif_cleanup_zombie_epd(void); +void scif_teardown_ep(void *endpt); +void scif_cleanup_ep_qp(struct scif_endpt *ep); +void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held); +void scif_get_node_info(void); +void scif_send_acks(struct scif_dev *dev); +void scif_conn_handler(struct work_struct *work); +int scif_rsrv_port(u16 port); +void scif_get_port(u16 port); +int scif_get_new_port(void); +void scif_put_port(u16 port); +int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags); +int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags); +void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_cnctgnt(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_cnctgnt_ack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_cnctgnt_nack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_cnctrej(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_discnct(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_discnt_ack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg); +int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block); +int __scif_flush(scif_epd_t epd); +int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd); +__poll_t __scif_pollfd(struct file *f, poll_table *wait, + struct scif_endpt *ep); +int __scif_pin_pages(void *addr, size_t len, int *out_prot, + int map_flags, scif_pinned_pages_t *pages); +#endif /* SCIF_EPD_H */ diff --git a/drivers/misc/mic/scif/scif_fd.c b/drivers/misc/mic/scif/scif_fd.c new file mode 100644 index 000000000..5c2a57ae4 --- /dev/null +++ b/drivers/misc/mic/scif/scif_fd.c @@ -0,0 +1,471 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "scif_main.h" + +static int scif_fdopen(struct inode *inode, struct file *f) +{ + struct scif_endpt *priv = scif_open(); + + if (!priv) + return -ENOMEM; + f->private_data = priv; + return 0; +} + +static int scif_fdclose(struct inode *inode, struct file *f) +{ + struct scif_endpt *priv = f->private_data; + + return scif_close(priv); +} + +static int scif_fdmmap(struct file *f, struct vm_area_struct *vma) +{ + struct scif_endpt *priv = f->private_data; + + return scif_mmap(vma, priv); +} + +static __poll_t scif_fdpoll(struct file *f, poll_table *wait) +{ + struct scif_endpt *priv = f->private_data; + + return __scif_pollfd(f, wait, priv); +} + +static int scif_fdflush(struct file *f, fl_owner_t id) +{ + struct scif_endpt *ep = f->private_data; + + spin_lock(&ep->lock); + /* + * The listening endpoint stashes the open file information before + * waiting for incoming connections. The release callback would never be + * called if the application closed the endpoint, while waiting for + * incoming connections from a separate thread since the file descriptor + * reference count is bumped up in the accept IOCTL. Call the flush + * routine if the id matches the endpoint open file information so that + * the listening endpoint can be woken up and the fd released. + */ + if (ep->files == id) + __scif_flush(ep); + spin_unlock(&ep->lock); + return 0; +} + +static __always_inline void scif_err_debug(int err, const char *str) +{ + /* + * ENOTCONN is a common uninteresting error which is + * flooding debug messages to the console unnecessarily. + */ + if (err < 0 && err != -ENOTCONN) + dev_dbg(scif_info.mdev.this_device, "%s err %d\n", str, err); +} + +static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + struct scif_endpt *priv = f->private_data; + void __user *argp = (void __user *)arg; + int err = 0; + struct scifioctl_msg request; + bool non_block = false; + + non_block = !!(f->f_flags & O_NONBLOCK); + + switch (cmd) { + case SCIF_BIND: + { + int pn; + + if (copy_from_user(&pn, argp, sizeof(pn))) + return -EFAULT; + + pn = scif_bind(priv, pn); + if (pn < 0) + return pn; + + if (copy_to_user(argp, &pn, sizeof(pn))) + return -EFAULT; + + return 0; + } + case SCIF_LISTEN: + return scif_listen(priv, arg); + case SCIF_CONNECT: + { + struct scifioctl_connect req; + struct scif_endpt *ep = (struct scif_endpt *)priv; + + if (copy_from_user(&req, argp, sizeof(req))) + return -EFAULT; + + err = __scif_connect(priv, &req.peer, non_block); + if (err < 0) + return err; + + req.self.node = ep->port.node; + req.self.port = ep->port.port; + + if (copy_to_user(argp, &req, sizeof(req))) + return -EFAULT; + + return 0; + } + /* + * Accept is done in two halves. The request ioctl does the basic + * functionality of accepting the request and returning the information + * about it including the internal ID of the end point. The register + * is done with the internal ID on a new file descriptor opened by the + * requesting process. + */ + case SCIF_ACCEPTREQ: + { + struct scifioctl_accept request; + scif_epd_t *ep = (scif_epd_t *)&request.endpt; + + if (copy_from_user(&request, argp, sizeof(request))) + return -EFAULT; + + err = scif_accept(priv, &request.peer, ep, request.flags); + if (err < 0) + return err; + + if (copy_to_user(argp, &request, sizeof(request))) { + scif_close(*ep); + return -EFAULT; + } + /* + * Add to the list of user mode eps where the second half + * of the accept is not yet completed. + */ + mutex_lock(&scif_info.eplock); + list_add_tail(&((*ep)->miacceptlist), &scif_info.uaccept); + list_add_tail(&((*ep)->liacceptlist), &priv->li_accept); + (*ep)->listenep = priv; + priv->acceptcnt++; + mutex_unlock(&scif_info.eplock); + + return 0; + } + case SCIF_ACCEPTREG: + { + struct scif_endpt *priv = f->private_data; + struct scif_endpt *newep; + struct scif_endpt *lisep; + struct scif_endpt *fep = NULL; + struct scif_endpt *tmpep; + struct list_head *pos, *tmpq; + + /* Finally replace the pointer to the accepted endpoint */ + if (copy_from_user(&newep, argp, sizeof(void *))) + return -EFAULT; + + /* Remove form the user accept queue */ + mutex_lock(&scif_info.eplock); + list_for_each_safe(pos, tmpq, &scif_info.uaccept) { + tmpep = list_entry(pos, + struct scif_endpt, miacceptlist); + if (tmpep == newep) { + list_del(pos); + fep = tmpep; + break; + } + } + + if (!fep) { + mutex_unlock(&scif_info.eplock); + return -ENOENT; + } + + lisep = newep->listenep; + list_for_each_safe(pos, tmpq, &lisep->li_accept) { + tmpep = list_entry(pos, + struct scif_endpt, liacceptlist); + if (tmpep == newep) { + list_del(pos); + lisep->acceptcnt--; + break; + } + } + + mutex_unlock(&scif_info.eplock); + + /* Free the resources automatically created from the open. */ + scif_anon_inode_fput(priv); + scif_teardown_ep(priv); + scif_add_epd_to_zombie_list(priv, !SCIF_EPLOCK_HELD); + f->private_data = newep; + return 0; + } + case SCIF_SEND: + { + struct scif_endpt *priv = f->private_data; + + if (copy_from_user(&request, argp, + sizeof(struct scifioctl_msg))) { + err = -EFAULT; + goto send_err; + } + err = scif_user_send(priv, (void __user *)request.msg, + request.len, request.flags); + if (err < 0) + goto send_err; + if (copy_to_user(& + ((struct scifioctl_msg __user *)argp)->out_len, + &err, sizeof(err))) { + err = -EFAULT; + goto send_err; + } + err = 0; +send_err: + scif_err_debug(err, "scif_send"); + return err; + } + case SCIF_RECV: + { + struct scif_endpt *priv = f->private_data; + + if (copy_from_user(&request, argp, + sizeof(struct scifioctl_msg))) { + err = -EFAULT; + goto recv_err; + } + + err = scif_user_recv(priv, (void __user *)request.msg, + request.len, request.flags); + if (err < 0) + goto recv_err; + + if (copy_to_user(& + ((struct scifioctl_msg __user *)argp)->out_len, + &err, sizeof(err))) { + err = -EFAULT; + goto recv_err; + } + err = 0; +recv_err: + scif_err_debug(err, "scif_recv"); + return err; + } + case SCIF_GET_NODEIDS: + { + struct scifioctl_node_ids node_ids; + int entries; + u16 *nodes; + void __user *unodes, *uself; + u16 self; + + if (copy_from_user(&node_ids, argp, sizeof(node_ids))) { + err = -EFAULT; + goto getnodes_err2; + } + + entries = min_t(int, scif_info.maxid, node_ids.len); + nodes = kmalloc_array(entries, sizeof(u16), GFP_KERNEL); + if (entries && !nodes) { + err = -ENOMEM; + goto getnodes_err2; + } + node_ids.len = scif_get_node_ids(nodes, entries, &self); + + unodes = (void __user *)node_ids.nodes; + if (copy_to_user(unodes, nodes, sizeof(u16) * entries)) { + err = -EFAULT; + goto getnodes_err1; + } + + uself = (void __user *)node_ids.self; + if (copy_to_user(uself, &self, sizeof(u16))) { + err = -EFAULT; + goto getnodes_err1; + } + + if (copy_to_user(argp, &node_ids, sizeof(node_ids))) { + err = -EFAULT; + goto getnodes_err1; + } +getnodes_err1: + kfree(nodes); +getnodes_err2: + return err; + } + case SCIF_REG: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_reg reg; + off_t ret; + + if (copy_from_user(®, argp, sizeof(reg))) { + err = -EFAULT; + goto reg_err; + } + if (reg.flags & SCIF_MAP_KERNEL) { + err = -EINVAL; + goto reg_err; + } + ret = scif_register(priv, (void *)reg.addr, reg.len, + reg.offset, reg.prot, reg.flags); + if (ret < 0) { + err = (int)ret; + goto reg_err; + } + + if (copy_to_user(&((struct scifioctl_reg __user *)argp) + ->out_offset, &ret, sizeof(reg.out_offset))) { + err = -EFAULT; + goto reg_err; + } + err = 0; +reg_err: + scif_err_debug(err, "scif_register"); + return err; + } + case SCIF_UNREG: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_unreg unreg; + + if (copy_from_user(&unreg, argp, sizeof(unreg))) { + err = -EFAULT; + goto unreg_err; + } + err = scif_unregister(priv, unreg.offset, unreg.len); +unreg_err: + scif_err_debug(err, "scif_unregister"); + return err; + } + case SCIF_READFROM: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_copy copy; + + if (copy_from_user(©, argp, sizeof(copy))) { + err = -EFAULT; + goto readfrom_err; + } + err = scif_readfrom(priv, copy.loffset, copy.len, copy.roffset, + copy.flags); +readfrom_err: + scif_err_debug(err, "scif_readfrom"); + return err; + } + case SCIF_WRITETO: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_copy copy; + + if (copy_from_user(©, argp, sizeof(copy))) { + err = -EFAULT; + goto writeto_err; + } + err = scif_writeto(priv, copy.loffset, copy.len, copy.roffset, + copy.flags); +writeto_err: + scif_err_debug(err, "scif_writeto"); + return err; + } + case SCIF_VREADFROM: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_copy copy; + + if (copy_from_user(©, argp, sizeof(copy))) { + err = -EFAULT; + goto vreadfrom_err; + } + err = scif_vreadfrom(priv, (void __force *)copy.addr, copy.len, + copy.roffset, copy.flags); +vreadfrom_err: + scif_err_debug(err, "scif_vreadfrom"); + return err; + } + case SCIF_VWRITETO: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_copy copy; + + if (copy_from_user(©, argp, sizeof(copy))) { + err = -EFAULT; + goto vwriteto_err; + } + err = scif_vwriteto(priv, (void __force *)copy.addr, copy.len, + copy.roffset, copy.flags); +vwriteto_err: + scif_err_debug(err, "scif_vwriteto"); + return err; + } + case SCIF_FENCE_MARK: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_fence_mark mark; + int tmp_mark = 0; + + if (copy_from_user(&mark, argp, sizeof(mark))) { + err = -EFAULT; + goto fence_mark_err; + } + err = scif_fence_mark(priv, mark.flags, &tmp_mark); + if (err) + goto fence_mark_err; + if (copy_to_user((void __user *)mark.mark, &tmp_mark, + sizeof(tmp_mark))) { + err = -EFAULT; + goto fence_mark_err; + } +fence_mark_err: + scif_err_debug(err, "scif_fence_mark"); + return err; + } + case SCIF_FENCE_WAIT: + { + struct scif_endpt *priv = f->private_data; + + err = scif_fence_wait(priv, arg); + scif_err_debug(err, "scif_fence_wait"); + return err; + } + case SCIF_FENCE_SIGNAL: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_fence_signal signal; + + if (copy_from_user(&signal, argp, sizeof(signal))) { + err = -EFAULT; + goto fence_signal_err; + } + + err = scif_fence_signal(priv, signal.loff, signal.lval, + signal.roff, signal.rval, signal.flags); +fence_signal_err: + scif_err_debug(err, "scif_fence_signal"); + return err; + } + } + return -EINVAL; +} + +const struct file_operations scif_fops = { + .open = scif_fdopen, + .release = scif_fdclose, + .unlocked_ioctl = scif_fdioctl, + .mmap = scif_fdmmap, + .poll = scif_fdpoll, + .flush = scif_fdflush, + .owner = THIS_MODULE, +}; diff --git a/drivers/misc/mic/scif/scif_fence.c b/drivers/misc/mic/scif/scif_fence.c new file mode 100644 index 000000000..7bb929f05 --- /dev/null +++ b/drivers/misc/mic/scif/scif_fence.c @@ -0,0 +1,772 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ + +#include "scif_main.h" + +/** + * scif_recv_mark: Handle SCIF_MARK request + * @msg: Interrupt message + * + * The peer has requested a mark. + */ +void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + int mark = 0; + int err; + + err = _scif_fence_mark(ep, &mark); + if (err) + msg->uop = SCIF_MARK_NACK; + else + msg->uop = SCIF_MARK_ACK; + msg->payload[0] = ep->remote_ep; + msg->payload[2] = mark; + scif_nodeqp_send(ep->remote_dev, msg); +} + +/** + * scif_recv_mark_resp: Handle SCIF_MARK_(N)ACK messages. + * @msg: Interrupt message + * + * The peer has responded to a SCIF_MARK message. + */ +void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + struct scif_fence_info *fence_req = + (struct scif_fence_info *)msg->payload[1]; + + mutex_lock(&ep->rma_info.rma_lock); + if (msg->uop == SCIF_MARK_ACK) { + fence_req->state = OP_COMPLETED; + fence_req->dma_mark = (int)msg->payload[2]; + } else { + fence_req->state = OP_FAILED; + } + mutex_unlock(&ep->rma_info.rma_lock); + complete(&fence_req->comp); +} + +/** + * scif_recv_wait: Handle SCIF_WAIT request + * @msg: Interrupt message + * + * The peer has requested waiting on a fence. + */ +void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + struct scif_remote_fence_info *fence; + + /* + * Allocate structure for remote fence information and + * send a NACK if the allocation failed. The peer will + * return ENOMEM upon receiving a NACK. + */ + fence = kmalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) { + msg->payload[0] = ep->remote_ep; + msg->uop = SCIF_WAIT_NACK; + scif_nodeqp_send(ep->remote_dev, msg); + return; + } + + /* Prepare the fence request */ + memcpy(&fence->msg, msg, sizeof(struct scifmsg)); + INIT_LIST_HEAD(&fence->list); + + /* Insert to the global remote fence request list */ + mutex_lock(&scif_info.fencelock); + atomic_inc(&ep->rma_info.fence_refcount); + list_add_tail(&fence->list, &scif_info.fence); + mutex_unlock(&scif_info.fencelock); + + schedule_work(&scif_info.misc_work); +} + +/** + * scif_recv_wait_resp: Handle SCIF_WAIT_(N)ACK messages. + * @msg: Interrupt message + * + * The peer has responded to a SCIF_WAIT message. + */ +void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + struct scif_fence_info *fence_req = + (struct scif_fence_info *)msg->payload[1]; + + mutex_lock(&ep->rma_info.rma_lock); + if (msg->uop == SCIF_WAIT_ACK) + fence_req->state = OP_COMPLETED; + else + fence_req->state = OP_FAILED; + mutex_unlock(&ep->rma_info.rma_lock); + complete(&fence_req->comp); +} + +/** + * scif_recv_sig_local: Handle SCIF_SIG_LOCAL request + * @msg: Interrupt message + * + * The peer has requested a signal on a local offset. + */ +void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + int err; + + err = scif_prog_signal(ep, msg->payload[1], msg->payload[2], + SCIF_WINDOW_SELF); + if (err) + msg->uop = SCIF_SIG_NACK; + else + msg->uop = SCIF_SIG_ACK; + msg->payload[0] = ep->remote_ep; + scif_nodeqp_send(ep->remote_dev, msg); +} + +/** + * scif_recv_sig_remote: Handle SCIF_SIGNAL_REMOTE request + * @msg: Interrupt message + * + * The peer has requested a signal on a remote offset. + */ +void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + int err; + + err = scif_prog_signal(ep, msg->payload[1], msg->payload[2], + SCIF_WINDOW_PEER); + if (err) + msg->uop = SCIF_SIG_NACK; + else + msg->uop = SCIF_SIG_ACK; + msg->payload[0] = ep->remote_ep; + scif_nodeqp_send(ep->remote_dev, msg); +} + +/** + * scif_recv_sig_resp: Handle SCIF_SIG_(N)ACK messages. + * @msg: Interrupt message + * + * The peer has responded to a signal request. + */ +void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + struct scif_fence_info *fence_req = + (struct scif_fence_info *)msg->payload[3]; + + mutex_lock(&ep->rma_info.rma_lock); + if (msg->uop == SCIF_SIG_ACK) + fence_req->state = OP_COMPLETED; + else + fence_req->state = OP_FAILED; + mutex_unlock(&ep->rma_info.rma_lock); + complete(&fence_req->comp); +} + +static inline void *scif_get_local_va(off_t off, struct scif_window *window) +{ + struct page **pages = window->pinned_pages->pages; + int page_nr = (off - window->offset) >> PAGE_SHIFT; + off_t page_off = off & ~PAGE_MASK; + + return page_address(pages[page_nr]) + page_off; +} + +static void scif_prog_signal_cb(void *arg) +{ + struct scif_status *status = arg; + + dma_pool_free(status->ep->remote_dev->signal_pool, status, + status->src_dma_addr); +} + +static int _scif_prog_signal(scif_epd_t epd, dma_addr_t dst, u64 val) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct dma_chan *chan = ep->rma_info.dma_chan; + struct dma_device *ddev = chan->device; + bool x100 = !is_dma_copy_aligned(chan->device, 1, 1, 1); + struct dma_async_tx_descriptor *tx; + struct scif_status *status = NULL; + dma_addr_t src; + dma_cookie_t cookie; + int err; + + tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE); + if (!tx) { + err = -ENOMEM; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto alloc_fail; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + err = (int)cookie; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto alloc_fail; + } + dma_async_issue_pending(chan); + if (x100) { + /* + * For X100 use the status descriptor to write the value to + * the destination. + */ + tx = ddev->device_prep_dma_imm_data(chan, dst, val, 0); + } else { + status = dma_pool_alloc(ep->remote_dev->signal_pool, GFP_KERNEL, + &src); + if (!status) { + err = -ENOMEM; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto alloc_fail; + } + status->val = val; + status->src_dma_addr = src; + status->ep = ep; + src += offsetof(struct scif_status, val); + tx = ddev->device_prep_dma_memcpy(chan, dst, src, sizeof(val), + DMA_PREP_INTERRUPT); + } + if (!tx) { + err = -ENOMEM; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto dma_fail; + } + if (!x100) { + tx->callback = scif_prog_signal_cb; + tx->callback_param = status; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + err = -EIO; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto dma_fail; + } + dma_async_issue_pending(chan); + return 0; +dma_fail: + if (!x100) + dma_pool_free(ep->remote_dev->signal_pool, status, + src - offsetof(struct scif_status, val)); +alloc_fail: + return err; +} + +/* + * scif_prog_signal: + * @epd - Endpoint Descriptor + * @offset - registered address to write @val to + * @val - Value to be written at @offset + * @type - Type of the window. + * + * Arrange to write a value to the registered offset after ensuring that the + * offset provided is indeed valid. + */ +int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val, + enum scif_window_type type) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scif_window *window = NULL; + struct scif_rma_req req; + dma_addr_t dst_dma_addr; + int err; + + mutex_lock(&ep->rma_info.rma_lock); + req.out_window = &window; + req.offset = offset; + req.nr_bytes = sizeof(u64); + req.prot = SCIF_PROT_WRITE; + req.type = SCIF_WINDOW_SINGLE; + if (type == SCIF_WINDOW_SELF) + req.head = &ep->rma_info.reg_list; + else + req.head = &ep->rma_info.remote_reg_list; + /* Does a valid window exist? */ + err = scif_query_window(&req); + if (err) { + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + goto unlock_ret; + } + + if (scif_is_mgmt_node() && scifdev_self(ep->remote_dev)) { + u64 *dst_virt; + + if (type == SCIF_WINDOW_SELF) + dst_virt = scif_get_local_va(offset, window); + else + dst_virt = + scif_get_local_va(offset, (struct scif_window *) + window->peer_window); + *dst_virt = val; + } else { + dst_dma_addr = __scif_off_to_dma_addr(window, offset); + err = _scif_prog_signal(epd, dst_dma_addr, val); + } +unlock_ret: + mutex_unlock(&ep->rma_info.rma_lock); + return err; +} + +static int _scif_fence_wait(scif_epd_t epd, int mark) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + dma_cookie_t cookie = mark & ~SCIF_REMOTE_FENCE; + int err; + + /* Wait for DMA callback in scif_fence_mark_cb(..) */ + err = wait_event_interruptible_timeout(ep->rma_info.markwq, + dma_async_is_tx_complete( + ep->rma_info.dma_chan, + cookie, NULL, NULL) == + DMA_COMPLETE, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err) + err = -ETIMEDOUT; + else if (err > 0) + err = 0; + return err; +} + +/** + * scif_rma_handle_remote_fences: + * + * This routine services remote fence requests. + */ +void scif_rma_handle_remote_fences(void) +{ + struct list_head *item, *tmp; + struct scif_remote_fence_info *fence; + struct scif_endpt *ep; + int mark, err; + + might_sleep(); + mutex_lock(&scif_info.fencelock); + list_for_each_safe(item, tmp, &scif_info.fence) { + fence = list_entry(item, struct scif_remote_fence_info, + list); + /* Remove fence from global list */ + list_del(&fence->list); + + /* Initiate the fence operation */ + ep = (struct scif_endpt *)fence->msg.payload[0]; + mark = fence->msg.payload[2]; + err = _scif_fence_wait(ep, mark); + if (err) + fence->msg.uop = SCIF_WAIT_NACK; + else + fence->msg.uop = SCIF_WAIT_ACK; + fence->msg.payload[0] = ep->remote_ep; + scif_nodeqp_send(ep->remote_dev, &fence->msg); + kfree(fence); + if (!atomic_sub_return(1, &ep->rma_info.fence_refcount)) + schedule_work(&scif_info.misc_work); + } + mutex_unlock(&scif_info.fencelock); +} + +static int _scif_send_fence(scif_epd_t epd, int uop, int mark, int *out_mark) +{ + int err; + struct scifmsg msg; + struct scif_fence_info *fence_req; + struct scif_endpt *ep = (struct scif_endpt *)epd; + + fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL); + if (!fence_req) { + err = -ENOMEM; + goto error; + } + + fence_req->state = OP_IN_PROGRESS; + init_completion(&fence_req->comp); + + msg.src = ep->port; + msg.uop = uop; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = (u64)fence_req; + if (uop == SCIF_WAIT) + msg.payload[2] = mark; + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) + err = scif_nodeqp_send(ep->remote_dev, &msg); + else + err = -ENOTCONN; + spin_unlock(&ep->lock); + if (err) + goto error_free; +retry: + /* Wait for a SCIF_WAIT_(N)ACK message */ + err = wait_for_completion_timeout(&fence_req->comp, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err && scifdev_alive(ep)) + goto retry; + if (!err) + err = -ENODEV; + if (err > 0) + err = 0; + mutex_lock(&ep->rma_info.rma_lock); + if (err < 0) { + if (fence_req->state == OP_IN_PROGRESS) + fence_req->state = OP_FAILED; + } + if (fence_req->state == OP_FAILED && !err) + err = -ENOMEM; + if (uop == SCIF_MARK && fence_req->state == OP_COMPLETED) + *out_mark = SCIF_REMOTE_FENCE | fence_req->dma_mark; + mutex_unlock(&ep->rma_info.rma_lock); +error_free: + kfree(fence_req); +error: + return err; +} + +/** + * scif_send_fence_mark: + * @epd: end point descriptor. + * @out_mark: Output DMA mark reported by peer. + * + * Send a remote fence mark request. + */ +static int scif_send_fence_mark(scif_epd_t epd, int *out_mark) +{ + return _scif_send_fence(epd, SCIF_MARK, 0, out_mark); +} + +/** + * scif_send_fence_wait: + * @epd: end point descriptor. + * @mark: DMA mark to wait for. + * + * Send a remote fence wait request. + */ +static int scif_send_fence_wait(scif_epd_t epd, int mark) +{ + return _scif_send_fence(epd, SCIF_WAIT, mark, NULL); +} + +static int _scif_send_fence_signal_wait(struct scif_endpt *ep, + struct scif_fence_info *fence_req) +{ + int err; + +retry: + /* Wait for a SCIF_SIG_(N)ACK message */ + err = wait_for_completion_timeout(&fence_req->comp, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err && scifdev_alive(ep)) + goto retry; + if (!err) + err = -ENODEV; + if (err > 0) + err = 0; + if (err < 0) { + mutex_lock(&ep->rma_info.rma_lock); + if (fence_req->state == OP_IN_PROGRESS) + fence_req->state = OP_FAILED; + mutex_unlock(&ep->rma_info.rma_lock); + } + if (fence_req->state == OP_FAILED && !err) + err = -ENXIO; + return err; +} + +/** + * scif_send_fence_signal: + * @epd - endpoint descriptor + * @loff - local offset + * @lval - local value to write to loffset + * @roff - remote offset + * @rval - remote value to write to roffset + * @flags - flags + * + * Sends a remote fence signal request + */ +static int scif_send_fence_signal(scif_epd_t epd, off_t roff, u64 rval, + off_t loff, u64 lval, int flags) +{ + int err = 0; + struct scifmsg msg; + struct scif_fence_info *fence_req; + struct scif_endpt *ep = (struct scif_endpt *)epd; + + fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL); + if (!fence_req) { + err = -ENOMEM; + goto error; + } + + fence_req->state = OP_IN_PROGRESS; + init_completion(&fence_req->comp); + msg.src = ep->port; + if (flags & SCIF_SIGNAL_LOCAL) { + msg.uop = SCIF_SIG_LOCAL; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = roff; + msg.payload[2] = rval; + msg.payload[3] = (u64)fence_req; + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) + err = scif_nodeqp_send(ep->remote_dev, &msg); + else + err = -ENOTCONN; + spin_unlock(&ep->lock); + if (err) + goto error_free; + err = _scif_send_fence_signal_wait(ep, fence_req); + if (err) + goto error_free; + } + fence_req->state = OP_IN_PROGRESS; + + if (flags & SCIF_SIGNAL_REMOTE) { + msg.uop = SCIF_SIG_REMOTE; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = loff; + msg.payload[2] = lval; + msg.payload[3] = (u64)fence_req; + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) + err = scif_nodeqp_send(ep->remote_dev, &msg); + else + err = -ENOTCONN; + spin_unlock(&ep->lock); + if (err) + goto error_free; + err = _scif_send_fence_signal_wait(ep, fence_req); + } +error_free: + kfree(fence_req); +error: + return err; +} + +static void scif_fence_mark_cb(void *arg) +{ + struct scif_endpt *ep = (struct scif_endpt *)arg; + + wake_up_interruptible(&ep->rma_info.markwq); + atomic_dec(&ep->rma_info.fence_refcount); +} + +/* + * _scif_fence_mark: + * + * @epd - endpoint descriptor + * Set up a mark for this endpoint and return the value of the mark. + */ +int _scif_fence_mark(scif_epd_t epd, int *mark) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct dma_chan *chan = ep->rma_info.dma_chan; + struct dma_device *ddev = chan->device; + struct dma_async_tx_descriptor *tx; + dma_cookie_t cookie; + int err; + + tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE); + if (!tx) { + err = -ENOMEM; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + err = (int)cookie; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + dma_async_issue_pending(chan); + tx = ddev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT); + if (!tx) { + err = -ENOMEM; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + tx->callback = scif_fence_mark_cb; + tx->callback_param = ep; + *mark = cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + err = (int)cookie; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + atomic_inc(&ep->rma_info.fence_refcount); + dma_async_issue_pending(chan); + return 0; +} + +#define SCIF_LOOPB_MAGIC_MARK 0xdead + +int scif_fence_mark(scif_epd_t epd, int flags, int *mark) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err = 0; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x\n", + ep, flags, *mark); + err = scif_verify_epd(ep); + if (err) + return err; + + /* Invalid flags? */ + if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)) + return -EINVAL; + + /* At least one of init self or peer RMA should be set */ + if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))) + return -EINVAL; + + /* Exactly one of init self or peer RMA should be set but not both */ + if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER)) + return -EINVAL; + + /* + * Management node loopback does not need to use DMA. + * Return a valid mark to be symmetric. + */ + if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) { + *mark = SCIF_LOOPB_MAGIC_MARK; + return 0; + } + + if (flags & SCIF_FENCE_INIT_SELF) + err = _scif_fence_mark(epd, mark); + else + err = scif_send_fence_mark(ep, mark); + + if (err) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x err %d\n", + ep, flags, *mark, err); + return err; +} +EXPORT_SYMBOL_GPL(scif_fence_mark); + +int scif_fence_wait(scif_epd_t epd, int mark) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err = 0; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI fence_wait: ep %p mark 0x%x\n", + ep, mark); + err = scif_verify_epd(ep); + if (err) + return err; + /* + * Management node loopback does not need to use DMA. + * The only valid mark provided is 0 so simply + * return success if the mark is valid. + */ + if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) { + if (mark == SCIF_LOOPB_MAGIC_MARK) + return 0; + else + return -EINVAL; + } + if (mark & SCIF_REMOTE_FENCE) + err = scif_send_fence_wait(epd, mark); + else + err = _scif_fence_wait(epd, mark); + if (err < 0) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + return err; +} +EXPORT_SYMBOL_GPL(scif_fence_wait); + +int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, + off_t roff, u64 rval, int flags) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err = 0; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI fence_signal: ep %p loff 0x%lx lval 0x%llx roff 0x%lx rval 0x%llx flags 0x%x\n", + ep, loff, lval, roff, rval, flags); + err = scif_verify_epd(ep); + if (err) + return err; + + /* Invalid flags? */ + if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER | + SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE)) + return -EINVAL; + + /* At least one of init self or peer RMA should be set */ + if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))) + return -EINVAL; + + /* Exactly one of init self or peer RMA should be set but not both */ + if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER)) + return -EINVAL; + + /* At least one of SCIF_SIGNAL_LOCAL or SCIF_SIGNAL_REMOTE required */ + if (!(flags & (SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE))) + return -EINVAL; + + /* Only Dword offsets allowed */ + if ((flags & SCIF_SIGNAL_LOCAL) && (loff & (sizeof(u32) - 1))) + return -EINVAL; + + /* Only Dword aligned offsets allowed */ + if ((flags & SCIF_SIGNAL_REMOTE) && (roff & (sizeof(u32) - 1))) + return -EINVAL; + + if (flags & SCIF_FENCE_INIT_PEER) { + err = scif_send_fence_signal(epd, roff, rval, loff, + lval, flags); + } else { + /* Local Signal in Local RAS */ + if (flags & SCIF_SIGNAL_LOCAL) { + err = scif_prog_signal(epd, loff, lval, + SCIF_WINDOW_SELF); + if (err) + goto error_ret; + } + + /* Signal in Remote RAS */ + if (flags & SCIF_SIGNAL_REMOTE) + err = scif_prog_signal(epd, roff, + rval, SCIF_WINDOW_PEER); + } +error_ret: + if (err) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + return err; +} +EXPORT_SYMBOL_GPL(scif_fence_signal); diff --git a/drivers/misc/mic/scif/scif_main.c b/drivers/misc/mic/scif/scif_main.c new file mode 100644 index 000000000..36d847af1 --- /dev/null +++ b/drivers/misc/mic/scif/scif_main.c @@ -0,0 +1,359 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include +#include + +#include +#include "../common/mic_dev.h" +#include "../bus/scif_bus.h" +#include "scif_peer_bus.h" +#include "scif_main.h" +#include "scif_map.h" + +struct scif_info scif_info = { + .mdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "scif", + .fops = &scif_fops, + } +}; + +struct scif_dev *scif_dev; +struct kmem_cache *unaligned_cache; +static atomic_t g_loopb_cnt; + +/* Runs in the context of intr_wq */ +static void scif_intr_bh_handler(struct work_struct *work) +{ + struct scif_dev *scifdev = + container_of(work, struct scif_dev, intr_bh); + + if (scifdev_self(scifdev)) + scif_loopb_msg_handler(scifdev, scifdev->qpairs); + else + scif_nodeqp_intrhandler(scifdev, scifdev->qpairs); +} + +int scif_setup_intr_wq(struct scif_dev *scifdev) +{ + if (!scifdev->intr_wq) { + snprintf(scifdev->intr_wqname, sizeof(scifdev->intr_wqname), + "SCIF INTR %d", scifdev->node); + scifdev->intr_wq = + alloc_ordered_workqueue(scifdev->intr_wqname, 0); + if (!scifdev->intr_wq) + return -ENOMEM; + INIT_WORK(&scifdev->intr_bh, scif_intr_bh_handler); + } + return 0; +} + +void scif_destroy_intr_wq(struct scif_dev *scifdev) +{ + if (scifdev->intr_wq) { + destroy_workqueue(scifdev->intr_wq); + scifdev->intr_wq = NULL; + } +} + +irqreturn_t scif_intr_handler(int irq, void *data) +{ + struct scif_dev *scifdev = data; + struct scif_hw_dev *sdev = scifdev->sdev; + + sdev->hw_ops->ack_interrupt(sdev, scifdev->db); + queue_work(scifdev->intr_wq, &scifdev->intr_bh); + return IRQ_HANDLED; +} + +static void scif_qp_setup_handler(struct work_struct *work) +{ + struct scif_dev *scifdev = container_of(work, struct scif_dev, + qp_dwork.work); + struct scif_hw_dev *sdev = scifdev->sdev; + dma_addr_t da = 0; + int err; + + if (scif_is_mgmt_node()) { + struct mic_bootparam *bp = sdev->dp; + + da = bp->scif_card_dma_addr; + scifdev->rdb = bp->h2c_scif_db; + } else { + struct mic_bootparam __iomem *bp = sdev->rdp; + + da = readq(&bp->scif_host_dma_addr); + scifdev->rdb = ioread8(&bp->c2h_scif_db); + } + if (da) { + err = scif_qp_response(da, scifdev); + if (err) + dev_err(&scifdev->sdev->dev, + "scif_qp_response err %d\n", err); + } else { + schedule_delayed_work(&scifdev->qp_dwork, + msecs_to_jiffies(1000)); + } +} + +static int scif_setup_scifdev(void) +{ + /* We support a maximum of 129 SCIF nodes including the mgmt node */ +#define MAX_SCIF_NODES 129 + int i; + u8 num_nodes = MAX_SCIF_NODES; + + scif_dev = kcalloc(num_nodes, sizeof(*scif_dev), GFP_KERNEL); + if (!scif_dev) + return -ENOMEM; + for (i = 0; i < num_nodes; i++) { + struct scif_dev *scifdev = &scif_dev[i]; + + scifdev->node = i; + scifdev->exit = OP_IDLE; + init_waitqueue_head(&scifdev->disconn_wq); + mutex_init(&scifdev->lock); + INIT_WORK(&scifdev->peer_add_work, scif_add_peer_device); + INIT_DELAYED_WORK(&scifdev->p2p_dwork, + scif_poll_qp_state); + INIT_DELAYED_WORK(&scifdev->qp_dwork, + scif_qp_setup_handler); + INIT_LIST_HEAD(&scifdev->p2p); + RCU_INIT_POINTER(scifdev->spdev, NULL); + } + return 0; +} + +static void scif_destroy_scifdev(void) +{ + kfree(scif_dev); +} + +static int scif_probe(struct scif_hw_dev *sdev) +{ + struct scif_dev *scifdev = &scif_dev[sdev->dnode]; + int rc; + + dev_set_drvdata(&sdev->dev, sdev); + scifdev->sdev = sdev; + + if (1 == atomic_add_return(1, &g_loopb_cnt)) { + struct scif_dev *loopb_dev = &scif_dev[sdev->snode]; + + loopb_dev->sdev = sdev; + rc = scif_setup_loopback_qp(loopb_dev); + if (rc) + goto exit; + } + + rc = scif_setup_intr_wq(scifdev); + if (rc) + goto destroy_loopb; + rc = scif_setup_qp(scifdev); + if (rc) + goto destroy_intr; + scifdev->db = sdev->hw_ops->next_db(sdev); + scifdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler, + "SCIF_INTR", scifdev, + scifdev->db); + if (IS_ERR(scifdev->cookie)) { + rc = PTR_ERR(scifdev->cookie); + goto free_qp; + } + if (scif_is_mgmt_node()) { + struct mic_bootparam *bp = sdev->dp; + + bp->c2h_scif_db = scifdev->db; + bp->scif_host_dma_addr = scifdev->qp_dma_addr; + } else { + struct mic_bootparam __iomem *bp = sdev->rdp; + + iowrite8(scifdev->db, &bp->h2c_scif_db); + writeq(scifdev->qp_dma_addr, &bp->scif_card_dma_addr); + } + schedule_delayed_work(&scifdev->qp_dwork, + msecs_to_jiffies(1000)); + return rc; +free_qp: + scif_free_qp(scifdev); +destroy_intr: + scif_destroy_intr_wq(scifdev); +destroy_loopb: + if (atomic_dec_and_test(&g_loopb_cnt)) + scif_destroy_loopback_qp(&scif_dev[sdev->snode]); +exit: + return rc; +} + +void scif_stop(struct scif_dev *scifdev) +{ + struct scif_dev *dev; + int i; + + for (i = scif_info.maxid; i >= 0; i--) { + dev = &scif_dev[i]; + if (scifdev_self(dev)) + continue; + scif_handle_remove_node(i); + } +} + +static void scif_remove(struct scif_hw_dev *sdev) +{ + struct scif_dev *scifdev = &scif_dev[sdev->dnode]; + + if (scif_is_mgmt_node()) { + struct mic_bootparam *bp = sdev->dp; + + bp->c2h_scif_db = -1; + bp->scif_host_dma_addr = 0x0; + } else { + struct mic_bootparam __iomem *bp = sdev->rdp; + + iowrite8(-1, &bp->h2c_scif_db); + writeq(0x0, &bp->scif_card_dma_addr); + } + if (scif_is_mgmt_node()) { + scif_disconnect_node(scifdev->node, true); + } else { + scif_info.card_initiated_exit = true; + scif_stop(scifdev); + } + if (atomic_dec_and_test(&g_loopb_cnt)) + scif_destroy_loopback_qp(&scif_dev[sdev->snode]); + if (scifdev->cookie) { + sdev->hw_ops->free_irq(sdev, scifdev->cookie, scifdev); + scifdev->cookie = NULL; + } + scif_destroy_intr_wq(scifdev); + cancel_delayed_work(&scifdev->qp_dwork); + scif_free_qp(scifdev); + scifdev->rdb = -1; + scifdev->sdev = NULL; +} + +static struct scif_hw_dev_id id_table[] = { + { MIC_SCIF_DEV, SCIF_DEV_ANY_ID }, + { 0 }, +}; + +static struct scif_driver scif_driver = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = scif_probe, + .remove = scif_remove, +}; + +static int _scif_init(void) +{ + int rc; + + mutex_init(&scif_info.eplock); + spin_lock_init(&scif_info.rmalock); + spin_lock_init(&scif_info.nb_connect_lock); + spin_lock_init(&scif_info.port_lock); + mutex_init(&scif_info.conflock); + mutex_init(&scif_info.connlock); + mutex_init(&scif_info.fencelock); + INIT_LIST_HEAD(&scif_info.uaccept); + INIT_LIST_HEAD(&scif_info.listen); + INIT_LIST_HEAD(&scif_info.zombie); + INIT_LIST_HEAD(&scif_info.connected); + INIT_LIST_HEAD(&scif_info.disconnected); + INIT_LIST_HEAD(&scif_info.rma); + INIT_LIST_HEAD(&scif_info.rma_tc); + INIT_LIST_HEAD(&scif_info.mmu_notif_cleanup); + INIT_LIST_HEAD(&scif_info.fence); + INIT_LIST_HEAD(&scif_info.nb_connect_list); + init_waitqueue_head(&scif_info.exitwq); + scif_info.rma_tc_limit = SCIF_RMA_TEMP_CACHE_LIMIT; + scif_info.en_msg_log = 0; + scif_info.p2p_enable = 1; + rc = scif_setup_scifdev(); + if (rc) + goto error; + unaligned_cache = kmem_cache_create("Unaligned_DMA", + SCIF_KMEM_UNALIGNED_BUF_SIZE, + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!unaligned_cache) { + rc = -ENOMEM; + goto free_sdev; + } + INIT_WORK(&scif_info.misc_work, scif_misc_handler); + INIT_WORK(&scif_info.mmu_notif_work, scif_mmu_notif_handler); + INIT_WORK(&scif_info.conn_work, scif_conn_handler); + idr_init(&scif_ports); + return 0; +free_sdev: + scif_destroy_scifdev(); +error: + return rc; +} + +static void _scif_exit(void) +{ + idr_destroy(&scif_ports); + kmem_cache_destroy(unaligned_cache); + scif_destroy_scifdev(); +} + +static int __init scif_init(void) +{ + struct miscdevice *mdev = &scif_info.mdev; + int rc; + + _scif_init(); + iova_cache_get(); + rc = scif_peer_bus_init(); + if (rc) + goto exit; + rc = scif_register_driver(&scif_driver); + if (rc) + goto peer_bus_exit; + rc = misc_register(mdev); + if (rc) + goto unreg_scif; + scif_init_debugfs(); + return 0; +unreg_scif: + scif_unregister_driver(&scif_driver); +peer_bus_exit: + scif_peer_bus_exit(); +exit: + _scif_exit(); + return rc; +} + +static void __exit scif_exit(void) +{ + scif_exit_debugfs(); + misc_deregister(&scif_info.mdev); + scif_unregister_driver(&scif_driver); + scif_peer_bus_exit(); + iova_cache_put(); + _scif_exit(); +} + +module_init(scif_init); +module_exit(scif_exit); + +MODULE_DEVICE_TABLE(scif, id_table); +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) SCIF driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h new file mode 100644 index 000000000..0e5eff9ad --- /dev/null +++ b/drivers/misc/mic/scif/scif_main.h @@ -0,0 +1,283 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#ifndef SCIF_MAIN_H +#define SCIF_MAIN_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../common/mic_dev.h" + +#define SCIF_MGMT_NODE 0 +#define SCIF_DEFAULT_WATCHDOG_TO 30 +#define SCIF_NODE_ACCEPT_TIMEOUT (3 * HZ) +#define SCIF_NODE_ALIVE_TIMEOUT (SCIF_DEFAULT_WATCHDOG_TO * HZ) +#define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000 + +/* + * Generic state used for certain node QP message exchanges + * like Unregister, Alloc etc. + */ +enum scif_msg_state { + OP_IDLE = 1, + OP_IN_PROGRESS, + OP_COMPLETED, + OP_FAILED +}; + +/* + * struct scif_info - Global SCIF information + * + * @nodeid: Node ID this node is to others + * @maxid: Max known node ID + * @total: Total number of SCIF nodes + * @nr_zombies: number of zombie endpoints + * @eplock: Lock to synchronize listening, zombie endpoint lists + * @connlock: Lock to synchronize connected and disconnected lists + * @nb_connect_lock: Synchronize non blocking connect operations + * @port_lock: Synchronize access to SCIF ports + * @uaccept: List of user acceptreq waiting for acceptreg + * @listen: List of listening end points + * @zombie: List of zombie end points with pending RMA's + * @connected: List of end points in connected state + * @disconnected: List of end points in disconnected state + * @nb_connect_list: List for non blocking connections + * @misc_work: miscellaneous SCIF tasks + * @conflock: Lock to synchronize SCIF node configuration changes + * @en_msg_log: Enable debug message logging + * @p2p_enable: Enable P2P SCIF network + * @mdev: The MISC device + * @conn_work: Work for workqueue handling all connections + * @exitwq: Wait queue for waiting for an EXIT node QP message response + * @loopb_dev: Dummy SCIF device used for loopback + * @loopb_wq: Workqueue used for handling loopback messages + * @loopb_wqname[16]: Name of loopback workqueue + * @loopb_work: Used for submitting work to loopb_wq + * @loopb_recv_q: List of messages received on the loopb_wq + * @card_initiated_exit: set when the card has initiated the exit + * @rmalock: Synchronize access to RMA operations + * @fencelock: Synchronize access to list of remote fences requested. + * @rma: List of temporary registered windows to be destroyed. + * @rma_tc: List of temporary registered & cached Windows to be destroyed + * @fence: List of remote fence requests + * @mmu_notif_work: Work for registration caching MMU notifier workqueue + * @mmu_notif_cleanup: List of temporary cached windows for reg cache + * @rma_tc_limit: RMA temporary cache limit + */ +struct scif_info { + u8 nodeid; + u8 maxid; + u8 total; + u32 nr_zombies; + struct mutex eplock; + struct mutex connlock; + spinlock_t nb_connect_lock; + spinlock_t port_lock; + struct list_head uaccept; + struct list_head listen; + struct list_head zombie; + struct list_head connected; + struct list_head disconnected; + struct list_head nb_connect_list; + struct work_struct misc_work; + struct mutex conflock; + u8 en_msg_log; + u8 p2p_enable; + struct miscdevice mdev; + struct work_struct conn_work; + wait_queue_head_t exitwq; + struct scif_dev *loopb_dev; + struct workqueue_struct *loopb_wq; + char loopb_wqname[16]; + struct work_struct loopb_work; + struct list_head loopb_recv_q; + bool card_initiated_exit; + spinlock_t rmalock; + struct mutex fencelock; + struct list_head rma; + struct list_head rma_tc; + struct list_head fence; + struct work_struct mmu_notif_work; + struct list_head mmu_notif_cleanup; + unsigned long rma_tc_limit; +}; + +/* + * struct scif_p2p_info - SCIF mapping information used for P2P + * + * @ppi_peer_id - SCIF peer node id + * @ppi_sg - Scatter list for bar information (One for mmio and one for aper) + * @sg_nentries - Number of entries in the scatterlist + * @ppi_da: DMA address for MMIO and APER bars + * @ppi_len: Length of MMIO and APER bars + * @ppi_list: Link in list of mapping information + */ +struct scif_p2p_info { + u8 ppi_peer_id; + struct scatterlist *ppi_sg[2]; + u64 sg_nentries[2]; + dma_addr_t ppi_da[2]; + u64 ppi_len[2]; +#define SCIF_PPI_MMIO 0 +#define SCIF_PPI_APER 1 + struct list_head ppi_list; +}; + +/* + * struct scif_dev - SCIF remote device specific fields + * + * @node: Node id + * @p2p: List of P2P mapping information + * @qpairs: The node queue pair for exchanging control messages + * @intr_wq: Workqueue for handling Node QP messages + * @intr_wqname: Name of node QP workqueue for handling interrupts + * @intr_bh: Used for submitting work to intr_wq + * @lock: Lock used for synchronizing access to the scif device + * @sdev: SCIF hardware device on the SCIF hardware bus + * @db: doorbell the peer will trigger to generate an interrupt on self + * @rdb: Doorbell to trigger on the peer to generate an interrupt on the peer + * @cookie: Cookie received while registering the interrupt handler + * @peer_add_work: Work for handling device_add for peer devices + * @p2p_dwork: Delayed work to enable polling for P2P state + * @qp_dwork: Delayed work for enabling polling for remote QP information + * @p2p_retry: Number of times to retry polling of P2P state + * @base_addr: P2P aperture bar base address + * @mic_mw mmio: The peer MMIO information used for P2P + * @spdev: SCIF peer device on the SCIF peer bus + * @node_remove_ack_pending: True if a node_remove_ack is pending + * @exit_ack_pending: true if an exit_ack is pending + * @disconn_wq: Used while waiting for a node remove response + * @disconn_rescnt: Keeps track of number of node remove requests sent + * @exit: Status of exit message + * @qp_dma_addr: Queue pair DMA address passed to the peer + * @dma_ch_idx: Round robin index for DMA channels + * @signal_pool: DMA pool used for scheduling scif_fence_signal DMA's +*/ +struct scif_dev { + u8 node; + struct list_head p2p; + struct scif_qp *qpairs; + struct workqueue_struct *intr_wq; + char intr_wqname[16]; + struct work_struct intr_bh; + struct mutex lock; + struct scif_hw_dev *sdev; + int db; + int rdb; + struct mic_irq *cookie; + struct work_struct peer_add_work; + struct delayed_work p2p_dwork; + struct delayed_work qp_dwork; + int p2p_retry; + dma_addr_t base_addr; + struct mic_mw mmio; + struct scif_peer_dev __rcu *spdev; + bool node_remove_ack_pending; + bool exit_ack_pending; + wait_queue_head_t disconn_wq; + atomic_t disconn_rescnt; + enum scif_msg_state exit; + dma_addr_t qp_dma_addr; + int dma_ch_idx; + struct dma_pool *signal_pool; +}; + +extern bool scif_reg_cache_enable; +extern bool scif_ulimit_check; +extern struct scif_info scif_info; +extern struct idr scif_ports; +extern struct bus_type scif_peer_bus; +extern struct scif_dev *scif_dev; +extern const struct file_operations scif_fops; +extern const struct file_operations scif_anon_fops; + +/* Size of the RB for the Node QP */ +#define SCIF_NODE_QP_SIZE 0x10000 + +#include "scif_nodeqp.h" +#include "scif_rma.h" +#include "scif_rma_list.h" + +/* + * scifdev_self: + * @dev: The remote SCIF Device + * + * Returns true if the SCIF Device passed is the self aka Loopback SCIF device. + */ +static inline int scifdev_self(struct scif_dev *dev) +{ + return dev->node == scif_info.nodeid; +} + +static inline bool scif_is_mgmt_node(void) +{ + return !scif_info.nodeid; +} + +/* + * scifdev_is_p2p: + * @dev: The remote SCIF Device + * + * Returns true if the SCIF Device is a MIC Peer to Peer SCIF device. + */ +static inline bool scifdev_is_p2p(struct scif_dev *dev) +{ + if (scif_is_mgmt_node()) + return false; + else + return dev != &scif_dev[SCIF_MGMT_NODE] && + !scifdev_self(dev); +} + +/* + * scifdev_alive: + * @scifdev: The remote SCIF Device + * + * Returns true if the remote SCIF Device is running or sleeping for + * this endpoint. + */ +static inline int _scifdev_alive(struct scif_dev *scifdev) +{ + struct scif_peer_dev *spdev; + + rcu_read_lock(); + spdev = rcu_dereference(scifdev->spdev); + rcu_read_unlock(); + return !!spdev; +} + +#include "scif_epd.h" + +void __init scif_init_debugfs(void); +void scif_exit_debugfs(void); +int scif_setup_intr_wq(struct scif_dev *scifdev); +void scif_destroy_intr_wq(struct scif_dev *scifdev); +void scif_cleanup_scifdev(struct scif_dev *dev); +void scif_handle_remove_node(int node); +void scif_disconnect_node(u32 node_id, bool mgmt_initiated); +void scif_free_qp(struct scif_dev *dev); +void scif_misc_handler(struct work_struct *work); +void scif_stop(struct scif_dev *scifdev); +irqreturn_t scif_intr_handler(int irq, void *data); +#endif /* SCIF_MAIN_H */ diff --git a/drivers/misc/mic/scif/scif_map.h b/drivers/misc/mic/scif/scif_map.h new file mode 100644 index 000000000..3e86360ba --- /dev/null +++ b/drivers/misc/mic/scif/scif_map.h @@ -0,0 +1,136 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#ifndef SCIF_MAP_H +#define SCIF_MAP_H + +#include "../bus/scif_bus.h" + +static __always_inline void * +scif_alloc_coherent(dma_addr_t *dma_handle, + struct scif_dev *scifdev, size_t size, + gfp_t gfp) +{ + void *va; + + if (scifdev_self(scifdev)) { + va = kmalloc(size, gfp); + if (va) + *dma_handle = virt_to_phys(va); + } else { + va = dma_alloc_coherent(&scifdev->sdev->dev, + size, dma_handle, gfp); + if (va && scifdev_is_p2p(scifdev)) + *dma_handle = *dma_handle + scifdev->base_addr; + } + return va; +} + +static __always_inline void +scif_free_coherent(void *va, dma_addr_t local, + struct scif_dev *scifdev, size_t size) +{ + if (scifdev_self(scifdev)) { + kfree(va); + } else { + if (scifdev_is_p2p(scifdev) && local > scifdev->base_addr) + local = local - scifdev->base_addr; + dma_free_coherent(&scifdev->sdev->dev, + size, va, local); + } +} + +static __always_inline int +scif_map_single(dma_addr_t *dma_handle, + void *local, struct scif_dev *scifdev, size_t size) +{ + int err = 0; + + if (scifdev_self(scifdev)) { + *dma_handle = virt_to_phys((local)); + } else { + *dma_handle = dma_map_single(&scifdev->sdev->dev, + local, size, DMA_BIDIRECTIONAL); + if (dma_mapping_error(&scifdev->sdev->dev, *dma_handle)) + err = -ENOMEM; + else if (scifdev_is_p2p(scifdev)) + *dma_handle = *dma_handle + scifdev->base_addr; + } + if (err) + *dma_handle = 0; + return err; +} + +static __always_inline void +scif_unmap_single(dma_addr_t local, struct scif_dev *scifdev, + size_t size) +{ + if (!scifdev_self(scifdev)) { + if (scifdev_is_p2p(scifdev)) + local = local - scifdev->base_addr; + dma_unmap_single(&scifdev->sdev->dev, local, + size, DMA_BIDIRECTIONAL); + } +} + +static __always_inline void * +scif_ioremap(dma_addr_t phys, size_t size, struct scif_dev *scifdev) +{ + void *out_virt; + struct scif_hw_dev *sdev = scifdev->sdev; + + if (scifdev_self(scifdev)) + out_virt = phys_to_virt(phys); + else + out_virt = (void __force *) + sdev->hw_ops->ioremap(sdev, phys, size); + return out_virt; +} + +static __always_inline void +scif_iounmap(void *virt, size_t len, struct scif_dev *scifdev) +{ + if (!scifdev_self(scifdev)) { + struct scif_hw_dev *sdev = scifdev->sdev; + + sdev->hw_ops->iounmap(sdev, (void __force __iomem *)virt); + } +} + +static __always_inline int +scif_map_page(dma_addr_t *dma_handle, struct page *page, + struct scif_dev *scifdev) +{ + int err = 0; + + if (scifdev_self(scifdev)) { + *dma_handle = page_to_phys(page); + } else { + struct scif_hw_dev *sdev = scifdev->sdev; + *dma_handle = dma_map_page(&sdev->dev, + page, 0x0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&sdev->dev, *dma_handle)) + err = -ENOMEM; + else if (scifdev_is_p2p(scifdev)) + *dma_handle = *dma_handle + scifdev->base_addr; + } + if (err) + *dma_handle = 0; + return err; +} +#endif /* SCIF_MAP_H */ diff --git a/drivers/misc/mic/scif/scif_mmap.c b/drivers/misc/mic/scif/scif_mmap.c new file mode 100644 index 000000000..928211677 --- /dev/null +++ b/drivers/misc/mic/scif/scif_mmap.c @@ -0,0 +1,699 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "scif_main.h" + +/* + * struct scif_vma_info - Information about a remote memory mapping + * created via scif_mmap(..) + * @vma: VM area struct + * @list: link to list of active vmas + */ +struct scif_vma_info { + struct vm_area_struct *vma; + struct list_head list; +}; + +void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_rma_req req; + struct scif_window *window = NULL; + struct scif_window *recv_window = + (struct scif_window *)msg->payload[0]; + struct scif_endpt *ep; + + ep = (struct scif_endpt *)recv_window->ep; + req.out_window = &window; + req.offset = recv_window->offset; + req.prot = recv_window->prot; + req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT; + req.type = SCIF_WINDOW_FULL; + req.head = &ep->rma_info.reg_list; + msg->payload[0] = ep->remote_ep; + + mutex_lock(&ep->rma_info.rma_lock); + /* Does a valid window exist? */ + if (scif_query_window(&req)) { + dev_err(&scifdev->sdev->dev, + "%s %d -ENXIO\n", __func__, __LINE__); + msg->uop = SCIF_UNREGISTER_ACK; + goto error; + } + + scif_put_window(window, window->nr_pages); + + if (!window->ref_count) { + atomic_inc(&ep->rma_info.tw_refcount); + ep->rma_info.async_list_del = 1; + list_del_init(&window->list); + scif_free_window_offset(ep, window, window->offset); + } +error: + mutex_unlock(&ep->rma_info.rma_lock); + if (window && !window->ref_count) + scif_queue_for_cleanup(window, &scif_info.rma); +} + +/* + * Remove valid remote memory mappings created via scif_mmap(..) from the + * process address space since the remote node is lost + */ +static void __scif_zap_mmaps(struct scif_endpt *ep) +{ + struct list_head *item; + struct scif_vma_info *info; + struct vm_area_struct *vma; + unsigned long size; + + spin_lock(&ep->lock); + list_for_each(item, &ep->rma_info.vma_list) { + info = list_entry(item, struct scif_vma_info, list); + vma = info->vma; + size = vma->vm_end - vma->vm_start; + zap_vma_ptes(vma, vma->vm_start, size); + dev_dbg(scif_info.mdev.this_device, + "%s ep %p zap vma %p size 0x%lx\n", + __func__, ep, info->vma, size); + } + spin_unlock(&ep->lock); +} + +/* + * Traverse the list of endpoints for a particular remote node and + * zap valid remote memory mappings since the remote node is lost + */ +static void _scif_zap_mmaps(int node, struct list_head *head) +{ + struct scif_endpt *ep; + struct list_head *item; + + mutex_lock(&scif_info.connlock); + list_for_each(item, head) { + ep = list_entry(item, struct scif_endpt, list); + if (ep->remote_dev->node == node) + __scif_zap_mmaps(ep); + } + mutex_unlock(&scif_info.connlock); +} + +/* + * Wrapper for removing remote memory mappings for a particular node. This API + * is called by peer nodes as part of handling a lost node. + */ +void scif_zap_mmaps(int node) +{ + _scif_zap_mmaps(node, &scif_info.connected); + _scif_zap_mmaps(node, &scif_info.disconnected); +} + +/* + * This API is only called while handling a lost node: + * a) Remote node is dead. + * b) Remote memory mappings have been zapped + * So we can traverse the remote_reg_list without any locks. Since + * the window has not yet been unregistered we can drop the ref count + * and queue it to the cleanup thread. + */ +static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep) +{ + struct list_head *pos, *tmp; + struct scif_window *window; + + list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) { + window = list_entry(pos, struct scif_window, list); + if (window->ref_count) + scif_put_window(window, window->nr_pages); + else + dev_err(scif_info.mdev.this_device, + "%s %d unexpected\n", + __func__, __LINE__); + if (!window->ref_count) { + atomic_inc(&ep->rma_info.tw_refcount); + list_del_init(&window->list); + scif_queue_for_cleanup(window, &scif_info.rma); + } + } +} + +/* Cleanup remote registration lists for zombie endpoints */ +void scif_cleanup_rma_for_zombies(int node) +{ + struct scif_endpt *ep; + struct list_head *item; + + mutex_lock(&scif_info.eplock); + list_for_each(item, &scif_info.zombie) { + ep = list_entry(item, struct scif_endpt, list); + if (ep->remote_dev && ep->remote_dev->node == node) + __scif_cleanup_rma_for_zombies(ep); + } + mutex_unlock(&scif_info.eplock); + flush_work(&scif_info.misc_work); +} + +/* Insert the VMA into the per endpoint VMA list */ +static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma) +{ + struct scif_vma_info *info; + int err = 0; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + err = -ENOMEM; + goto done; + } + info->vma = vma; + spin_lock(&ep->lock); + list_add_tail(&info->list, &ep->rma_info.vma_list); + spin_unlock(&ep->lock); +done: + return err; +} + +/* Delete the VMA from the per endpoint VMA list */ +static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma) +{ + struct list_head *item; + struct scif_vma_info *info; + + spin_lock(&ep->lock); + list_for_each(item, &ep->rma_info.vma_list) { + info = list_entry(item, struct scif_vma_info, list); + if (info->vma == vma) { + list_del(&info->list); + kfree(info); + break; + } + } + spin_unlock(&ep->lock); +} + +static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep) +{ + struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev; + struct scif_hw_dev *sdev = scifdev->sdev; + phys_addr_t out_phys, apt_base = 0; + + /* + * If the DMA address is card relative then we need to add the + * aperture base for mmap to work correctly + */ + if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da) + apt_base = sdev->aper->pa; + out_phys = apt_base + phys; + return out_phys; +} + +int scif_get_pages(scif_epd_t epd, off_t offset, size_t len, + struct scif_range **pages) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scif_rma_req req; + struct scif_window *window = NULL; + int nr_pages, err, i; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n", + ep, offset, len); + err = scif_verify_epd(ep); + if (err) + return err; + + if (!len || (offset < 0) || + (offset + len < offset) || + (ALIGN(offset, PAGE_SIZE) != offset) || + (ALIGN(len, PAGE_SIZE) != len)) + return -EINVAL; + + nr_pages = len >> PAGE_SHIFT; + + req.out_window = &window; + req.offset = offset; + req.prot = 0; + req.nr_bytes = len; + req.type = SCIF_WINDOW_SINGLE; + req.head = &ep->rma_info.remote_reg_list; + + mutex_lock(&ep->rma_info.rma_lock); + /* Does a valid window exist? */ + err = scif_query_window(&req); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error; + } + + /* Allocate scif_range */ + *pages = kzalloc(sizeof(**pages), GFP_KERNEL); + if (!*pages) { + err = -ENOMEM; + goto error; + } + + /* Allocate phys addr array */ + (*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t)); + if (!((*pages)->phys_addr)) { + err = -ENOMEM; + goto error; + } + + if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) { + /* Allocate virtual address array */ + ((*pages)->va = scif_zalloc(nr_pages * sizeof(void *))); + if (!(*pages)->va) { + err = -ENOMEM; + goto error; + } + } + /* Populate the values */ + (*pages)->cookie = window; + (*pages)->nr_pages = nr_pages; + (*pages)->prot_flags = window->prot; + + for (i = 0; i < nr_pages; i++) { + (*pages)->phys_addr[i] = + __scif_off_to_dma_addr(window, offset + + (i * PAGE_SIZE)); + (*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i], + ep); + if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) + (*pages)->va[i] = + ep->remote_dev->sdev->aper->va + + (*pages)->phys_addr[i] - + ep->remote_dev->sdev->aper->pa; + } + + scif_get_window(window, nr_pages); +error: + mutex_unlock(&ep->rma_info.rma_lock); + if (err) { + if (*pages) { + scif_free((*pages)->phys_addr, + nr_pages * sizeof(dma_addr_t)); + scif_free((*pages)->va, + nr_pages * sizeof(void *)); + kfree(*pages); + *pages = NULL; + } + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + } + return err; +} +EXPORT_SYMBOL_GPL(scif_get_pages); + +int scif_put_pages(struct scif_range *pages) +{ + struct scif_endpt *ep; + struct scif_window *window; + struct scifmsg msg; + + if (!pages || !pages->cookie) + return -EINVAL; + + window = pages->cookie; + + if (!window || window->magic != SCIFEP_MAGIC) + return -EINVAL; + + ep = (struct scif_endpt *)window->ep; + /* + * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the + * callee should be allowed to release references to the pages, + * else the endpoint was not connected in the first place, + * hence the ENOTCONN. + */ + if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED) + return -ENOTCONN; + + mutex_lock(&ep->rma_info.rma_lock); + + scif_put_window(window, pages->nr_pages); + + /* Initiate window destruction if ref count is zero */ + if (!window->ref_count) { + list_del(&window->list); + mutex_unlock(&ep->rma_info.rma_lock); + scif_drain_dma_intr(ep->remote_dev->sdev, + ep->rma_info.dma_chan); + /* Inform the peer about this window being destroyed. */ + msg.uop = SCIF_MUNMAP; + msg.src = ep->port; + msg.payload[0] = window->peer_window; + /* No error handling for notification messages */ + scif_nodeqp_send(ep->remote_dev, &msg); + /* Destroy this window from the peer's registered AS */ + scif_destroy_remote_window(window); + } else { + mutex_unlock(&ep->rma_info.rma_lock); + } + + scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t)); + scif_free(pages->va, pages->nr_pages * sizeof(void *)); + kfree(pages); + return 0; +} +EXPORT_SYMBOL_GPL(scif_put_pages); + +/* + * scif_rma_list_mmap: + * + * Traverse the remote registration list starting from start_window: + * 1) Create VtoP mappings via remap_pfn_range(..) + * 2) Once step 1) and 2) complete successfully then traverse the range of + * windows again and bump the reference count. + * RMA lock must be held. + */ +static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset, + int nr_pages, struct vm_area_struct *vma) +{ + s64 end_offset, loop_offset = offset; + struct scif_window *window = start_window; + int loop_nr_pages, nr_pages_left = nr_pages; + struct scif_endpt *ep = (struct scif_endpt *)start_window->ep; + struct list_head *head = &ep->rma_info.remote_reg_list; + int i, err = 0; + dma_addr_t phys_addr; + struct scif_window_iter src_win_iter; + size_t contig_bytes = 0; + + might_sleep(); + list_for_each_entry_from(window, head, list) { + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + loop_nr_pages = min_t(int, + (end_offset - loop_offset) >> PAGE_SHIFT, + nr_pages_left); + scif_init_window_iter(window, &src_win_iter); + for (i = 0; i < loop_nr_pages; i++) { + phys_addr = scif_off_to_dma_addr(window, loop_offset, + &contig_bytes, + &src_win_iter); + phys_addr = scif_get_phys(phys_addr, ep); + err = remap_pfn_range(vma, + vma->vm_start + + loop_offset - offset, + phys_addr >> PAGE_SHIFT, + PAGE_SIZE, + vma->vm_page_prot); + if (err) + goto error; + loop_offset += PAGE_SIZE; + } + nr_pages_left -= loop_nr_pages; + if (!nr_pages_left) + break; + } + /* + * No more failures expected. Bump up the ref count for all + * the windows. Another traversal from start_window required + * for handling errors encountered across windows during + * remap_pfn_range(..). + */ + loop_offset = offset; + nr_pages_left = nr_pages; + window = start_window; + head = &ep->rma_info.remote_reg_list; + list_for_each_entry_from(window, head, list) { + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + loop_nr_pages = min_t(int, + (end_offset - loop_offset) >> PAGE_SHIFT, + nr_pages_left); + scif_get_window(window, loop_nr_pages); + nr_pages_left -= loop_nr_pages; + loop_offset += (loop_nr_pages << PAGE_SHIFT); + if (!nr_pages_left) + break; + } +error: + if (err) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + return err; +} + +/* + * scif_rma_list_munmap: + * + * Traverse the remote registration list starting from window: + * 1) Decrement ref count. + * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer. + * RMA lock must be held. + */ +static void scif_rma_list_munmap(struct scif_window *start_window, + s64 offset, int nr_pages) +{ + struct scifmsg msg; + s64 loop_offset = offset, end_offset; + int loop_nr_pages, nr_pages_left = nr_pages; + struct scif_endpt *ep = (struct scif_endpt *)start_window->ep; + struct list_head *head = &ep->rma_info.remote_reg_list; + struct scif_window *window = start_window, *_window; + + msg.uop = SCIF_MUNMAP; + msg.src = ep->port; + loop_offset = offset; + nr_pages_left = nr_pages; + list_for_each_entry_safe_from(window, _window, head, list) { + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + loop_nr_pages = min_t(int, + (end_offset - loop_offset) >> PAGE_SHIFT, + nr_pages_left); + scif_put_window(window, loop_nr_pages); + if (!window->ref_count) { + struct scif_dev *rdev = ep->remote_dev; + + scif_drain_dma_intr(rdev->sdev, + ep->rma_info.dma_chan); + /* Inform the peer about this munmap */ + msg.payload[0] = window->peer_window; + /* No error handling for Notification messages. */ + scif_nodeqp_send(ep->remote_dev, &msg); + list_del(&window->list); + /* Destroy this window from the peer's registered AS */ + scif_destroy_remote_window(window); + } + nr_pages_left -= loop_nr_pages; + loop_offset += (loop_nr_pages << PAGE_SHIFT); + if (!nr_pages_left) + break; + } +} + +/* + * The private data field of each VMA used to mmap a remote window + * points to an instance of struct vma_pvt + */ +struct vma_pvt { + struct scif_endpt *ep; /* End point for remote window */ + s64 offset; /* offset within remote window */ + bool valid_offset; /* offset is valid only if the original + * mmap request was for a single page + * else the offset within the vma is + * the correct offset + */ + struct kref ref; +}; + +static void vma_pvt_release(struct kref *ref) +{ + struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref); + + kfree(vmapvt); +} + +/** + * scif_vma_open - VMA open driver callback + * @vma: VMM memory area. + * The open method is called by the kernel to allow the subsystem implementing + * the VMA to initialize the area. This method is invoked any time a new + * reference to the VMA is made (when a process forks, for example). + * The one exception happens when the VMA is first created by mmap; + * in this case, the driver's mmap method is called instead. + * This function is also invoked when an existing VMA is split by the kernel + * due to a call to munmap on a subset of the VMA resulting in two VMAs. + * The kernel invokes this function only on one of the two VMAs. + */ +static void scif_vma_open(struct vm_area_struct *vma) +{ + struct vma_pvt *vmapvt = vma->vm_private_data; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n", + vma->vm_start, vma->vm_end); + scif_insert_vma(vmapvt->ep, vma); + kref_get(&vmapvt->ref); +} + +/** + * scif_munmap - VMA close driver callback. + * @vma: VMM memory area. + * When an area is destroyed, the kernel calls its close operation. + * Note that there's no usage count associated with VMA's; the area + * is opened and closed exactly once by each process that uses it. + */ +static void scif_munmap(struct vm_area_struct *vma) +{ + struct scif_endpt *ep; + struct vma_pvt *vmapvt = vma->vm_private_data; + int nr_pages = vma_pages(vma); + s64 offset; + struct scif_rma_req req; + struct scif_window *window = NULL; + int err; + + might_sleep(); + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n", + vma->vm_start, vma->vm_end); + ep = vmapvt->ep; + offset = vmapvt->valid_offset ? vmapvt->offset : + (vma->vm_pgoff) << PAGE_SHIFT; + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n", + ep, nr_pages, offset); + req.out_window = &window; + req.offset = offset; + req.nr_bytes = vma->vm_end - vma->vm_start; + req.prot = vma->vm_flags & (VM_READ | VM_WRITE); + req.type = SCIF_WINDOW_PARTIAL; + req.head = &ep->rma_info.remote_reg_list; + + mutex_lock(&ep->rma_info.rma_lock); + + err = scif_query_window(&req); + if (err) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + else + scif_rma_list_munmap(window, offset, nr_pages); + + mutex_unlock(&ep->rma_info.rma_lock); + /* + * The kernel probably zeroes these out but we still want + * to clean up our own mess just in case. + */ + vma->vm_ops = NULL; + vma->vm_private_data = NULL; + kref_put(&vmapvt->ref, vma_pvt_release); + scif_delete_vma(ep, vma); +} + +static const struct vm_operations_struct scif_vm_ops = { + .open = scif_vma_open, + .close = scif_munmap, +}; + +/** + * scif_mmap - Map pages in virtual address space to a remote window. + * @vma: VMM memory area. + * @epd: endpoint descriptor + * + * Return: Upon successful completion, scif_mmap() returns zero + * else an apt error is returned as documented in scif.h + */ +int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd) +{ + struct scif_rma_req req; + struct scif_window *window = NULL; + struct scif_endpt *ep = (struct scif_endpt *)epd; + s64 start_offset = vma->vm_pgoff << PAGE_SHIFT; + int nr_pages = vma_pages(vma); + int err; + struct vma_pvt *vmapvt; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n", + ep, start_offset, nr_pages); + err = scif_verify_epd(ep); + if (err) + return err; + + might_sleep(); + + err = scif_insert_vma(ep, vma); + if (err) + return err; + + vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL); + if (!vmapvt) { + scif_delete_vma(ep, vma); + return -ENOMEM; + } + + vmapvt->ep = ep; + kref_init(&vmapvt->ref); + + req.out_window = &window; + req.offset = start_offset; + req.nr_bytes = vma->vm_end - vma->vm_start; + req.prot = vma->vm_flags & (VM_READ | VM_WRITE); + req.type = SCIF_WINDOW_PARTIAL; + req.head = &ep->rma_info.remote_reg_list; + + mutex_lock(&ep->rma_info.rma_lock); + /* Does a valid window exist? */ + err = scif_query_window(&req); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error_unlock; + } + + /* Default prot for loopback */ + if (!scifdev_self(ep->remote_dev)) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + + /* + * VM_DONTCOPY - Do not copy this vma on fork + * VM_DONTEXPAND - Cannot expand with mremap() + * VM_RESERVED - Count as reserved_vm like IO + * VM_PFNMAP - Page-ranges managed without "struct page" + * VM_IO - Memory mapped I/O or similar + * + * We do not want to copy this VMA automatically on a fork(), + * expand this VMA due to mremap() or swap out these pages since + * the VMA is actually backed by physical pages in the remote + * node's physical memory and not via a struct page. + */ + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; + + if (!scifdev_self(ep->remote_dev)) + vma->vm_flags |= VM_IO | VM_PFNMAP; + + /* Map this range of windows */ + err = scif_rma_list_mmap(window, start_offset, nr_pages, vma); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error_unlock; + } + /* Set up the driver call back */ + vma->vm_ops = &scif_vm_ops; + vma->vm_private_data = vmapvt; +error_unlock: + mutex_unlock(&ep->rma_info.rma_lock); + if (err) { + kfree(vmapvt); + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + scif_delete_vma(ep, vma); + } + return err; +} diff --git a/drivers/misc/mic/scif/scif_nm.c b/drivers/misc/mic/scif/scif_nm.c new file mode 100644 index 000000000..79f26a02a --- /dev/null +++ b/drivers/misc/mic/scif/scif_nm.c @@ -0,0 +1,237 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "scif_peer_bus.h" + +#include "scif_main.h" +#include "scif_map.h" + +/** + * scif_invalidate_ep() - Set state for all connected endpoints + * to disconnected and wake up all send/recv waitqueues + */ +static void scif_invalidate_ep(int node) +{ + struct scif_endpt *ep; + struct list_head *pos, *tmpq; + + flush_work(&scif_info.conn_work); + mutex_lock(&scif_info.connlock); + list_for_each_safe(pos, tmpq, &scif_info.disconnected) { + ep = list_entry(pos, struct scif_endpt, list); + if (ep->remote_dev->node == node) { + scif_unmap_all_windows(ep); + spin_lock(&ep->lock); + scif_cleanup_ep_qp(ep); + spin_unlock(&ep->lock); + } + } + list_for_each_safe(pos, tmpq, &scif_info.connected) { + ep = list_entry(pos, struct scif_endpt, list); + if (ep->remote_dev->node == node) { + list_del(pos); + spin_lock(&ep->lock); + ep->state = SCIFEP_DISCONNECTED; + list_add_tail(&ep->list, &scif_info.disconnected); + scif_cleanup_ep_qp(ep); + wake_up_interruptible(&ep->sendwq); + wake_up_interruptible(&ep->recvwq); + spin_unlock(&ep->lock); + scif_unmap_all_windows(ep); + } + } + mutex_unlock(&scif_info.connlock); +} + +void scif_free_qp(struct scif_dev *scifdev) +{ + struct scif_qp *qp = scifdev->qpairs; + + if (!qp) + return; + scif_unmap_single(qp->local_buf, scifdev, qp->inbound_q.size); + kfree(qp->inbound_q.rb_base); + scif_unmap_single(qp->local_qp, scifdev, sizeof(struct scif_qp)); + kfree(scifdev->qpairs); + scifdev->qpairs = NULL; +} + +static void scif_cleanup_qp(struct scif_dev *dev) +{ + struct scif_qp *qp = &dev->qpairs[0]; + + if (!qp) + return; + scif_iounmap((void *)qp->remote_qp, sizeof(struct scif_qp), dev); + scif_iounmap((void *)qp->outbound_q.rb_base, + sizeof(struct scif_qp), dev); + qp->remote_qp = NULL; + qp->local_write = 0; + qp->inbound_q.current_write_offset = 0; + qp->inbound_q.current_read_offset = 0; + if (scifdev_is_p2p(dev)) + scif_free_qp(dev); +} + +void scif_send_acks(struct scif_dev *dev) +{ + struct scifmsg msg; + + if (dev->node_remove_ack_pending) { + msg.uop = SCIF_NODE_REMOVE_ACK; + msg.src.node = scif_info.nodeid; + msg.dst.node = SCIF_MGMT_NODE; + msg.payload[0] = dev->node; + scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], &msg); + dev->node_remove_ack_pending = false; + } + if (dev->exit_ack_pending) { + msg.uop = SCIF_EXIT_ACK; + msg.src.node = scif_info.nodeid; + msg.dst.node = dev->node; + scif_nodeqp_send(dev, &msg); + dev->exit_ack_pending = false; + } +} + +/* + * scif_cleanup_scifdev + * + * @dev: Remote SCIF device. + * Uninitialize SCIF data structures for remote SCIF device. + */ +void scif_cleanup_scifdev(struct scif_dev *dev) +{ + struct scif_hw_dev *sdev = dev->sdev; + + if (!dev->sdev) + return; + if (scifdev_is_p2p(dev)) { + if (dev->cookie) { + sdev->hw_ops->free_irq(sdev, dev->cookie, dev); + dev->cookie = NULL; + } + scif_destroy_intr_wq(dev); + } + flush_work(&scif_info.misc_work); + scif_destroy_p2p(dev); + scif_invalidate_ep(dev->node); + scif_zap_mmaps(dev->node); + scif_cleanup_rma_for_zombies(dev->node); + flush_work(&scif_info.misc_work); + scif_send_acks(dev); + if (!dev->node && scif_info.card_initiated_exit) { + /* + * Send an SCIF_EXIT message which is the last message from MIC + * to the Host and wait for a SCIF_EXIT_ACK + */ + scif_send_exit(dev); + scif_info.card_initiated_exit = false; + } + scif_cleanup_qp(dev); +} + +/* + * scif_remove_node: + * + * @node: Node to remove + */ +void scif_handle_remove_node(int node) +{ + struct scif_dev *scifdev = &scif_dev[node]; + + if (scif_peer_unregister_device(scifdev)) + scif_send_acks(scifdev); +} + +static int scif_send_rmnode_msg(int node, int remove_node) +{ + struct scifmsg notif_msg; + struct scif_dev *dev = &scif_dev[node]; + + notif_msg.uop = SCIF_NODE_REMOVE; + notif_msg.src.node = scif_info.nodeid; + notif_msg.dst.node = node; + notif_msg.payload[0] = remove_node; + return scif_nodeqp_send(dev, ¬if_msg); +} + +/** + * scif_node_disconnect: + * + * @node_id[in]: source node id. + * @mgmt_initiated: Disconnection initiated from the mgmt node + * + * Disconnect a node from the scif network. + */ +void scif_disconnect_node(u32 node_id, bool mgmt_initiated) +{ + int ret; + int msg_cnt = 0; + u32 i = 0; + struct scif_dev *scifdev = &scif_dev[node_id]; + + if (!node_id) + return; + + atomic_set(&scifdev->disconn_rescnt, 0); + + /* Destroy p2p network */ + for (i = 1; i <= scif_info.maxid; i++) { + if (i == node_id) + continue; + ret = scif_send_rmnode_msg(i, node_id); + if (!ret) + msg_cnt++; + } + /* Wait for the remote nodes to respond with SCIF_NODE_REMOVE_ACK */ + ret = wait_event_timeout(scifdev->disconn_wq, + (atomic_read(&scifdev->disconn_rescnt) + == msg_cnt), SCIF_NODE_ALIVE_TIMEOUT); + /* Tell the card to clean up */ + if (mgmt_initiated && _scifdev_alive(scifdev)) + /* + * Send an SCIF_EXIT message which is the last message from Host + * to the MIC and wait for a SCIF_EXIT_ACK + */ + scif_send_exit(scifdev); + atomic_set(&scifdev->disconn_rescnt, 0); + /* Tell the mgmt node to clean up */ + ret = scif_send_rmnode_msg(SCIF_MGMT_NODE, node_id); + if (!ret) + /* Wait for mgmt node to respond with SCIF_NODE_REMOVE_ACK */ + wait_event_timeout(scifdev->disconn_wq, + (atomic_read(&scifdev->disconn_rescnt) == 1), + SCIF_NODE_ALIVE_TIMEOUT); +} + +void scif_get_node_info(void) +{ + struct scifmsg msg; + DECLARE_COMPLETION_ONSTACK(node_info); + + msg.uop = SCIF_GET_NODE_INFO; + msg.src.node = scif_info.nodeid; + msg.dst.node = SCIF_MGMT_NODE; + msg.payload[3] = (u64)&node_info; + + if ((scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], &msg))) + return; + + /* Wait for a response with SCIF_GET_NODE_INFO */ + wait_for_completion(&node_info); +} diff --git a/drivers/misc/mic/scif/scif_nodeqp.c b/drivers/misc/mic/scif/scif_nodeqp.c new file mode 100644 index 000000000..c66ca1a58 --- /dev/null +++ b/drivers/misc/mic/scif/scif_nodeqp.c @@ -0,0 +1,1354 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "../bus/scif_bus.h" +#include "scif_peer_bus.h" +#include "scif_main.h" +#include "scif_nodeqp.h" +#include "scif_map.h" + +/* + ************************************************************************ + * SCIF node Queue Pair (QP) setup flow: + * + * 1) SCIF driver gets probed with a scif_hw_dev via the scif_hw_bus + * 2) scif_setup_qp(..) allocates the local qp and calls + * scif_setup_qp_connect(..) which allocates and maps the local + * buffer for the inbound QP + * 3) The local node updates the device page with the DMA address of the QP + * 4) A delayed work is scheduled (qp_dwork) which periodically reads if + * the peer node has updated its QP DMA address + * 5) Once a valid non zero address is found in the QP DMA address field + * in the device page, the local node maps the remote node's QP, + * updates its outbound QP and sends a SCIF_INIT message to the peer + * 6) The SCIF_INIT message is received by the peer node QP interrupt bottom + * half handler by calling scif_init(..) + * 7) scif_init(..) registers a new SCIF peer node by calling + * scif_peer_register_device(..) which signifies the addition of a new + * SCIF node + * 8) On the mgmt node, P2P network setup/teardown is initiated if all the + * remote nodes are online via scif_p2p_setup(..) + * 9) For P2P setup, the host maps the remote nodes' aperture and memory + * bars and sends a SCIF_NODE_ADD message to both nodes + * 10) As part of scif_nodeadd, both nodes set up their local inbound + * QPs and send a SCIF_NODE_ADD_ACK to the mgmt node + * 11) As part of scif_node_add_ack(..) the mgmt node forwards the + * SCIF_NODE_ADD_ACK to the remote nodes + * 12) As part of scif_node_add_ack(..) the remote nodes update their + * outbound QPs, make sure they can access memory on the remote node + * and then add a new SCIF peer node by calling + * scif_peer_register_device(..) which signifies the addition of a new + * SCIF node. + * 13) The SCIF network is now established across all nodes. + * + ************************************************************************ + * SCIF node QP teardown flow (initiated by non mgmt node): + * + * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus + * 2) The device page QP DMA address field is updated with 0x0 + * 3) A non mgmt node now cleans up all local data structures and sends a + * SCIF_EXIT message to the peer and waits for a SCIF_EXIT_ACK + * 4) As part of scif_exit(..) handling scif_disconnect_node(..) is called + * 5) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the + * peers and waits for a SCIF_NODE_REMOVE_ACK + * 6) As part of scif_node_remove(..) a remote node unregisters the peer + * node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK + * 7) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs + * it sends itself a node remove message whose handling cleans up local + * data structures and unregisters the peer node from the SCIF network + * 8) The mgmt node sends a SCIF_EXIT_ACK + * 9) Upon receipt of the SCIF_EXIT_ACK the node initiating the teardown + * completes the SCIF remove routine + * 10) The SCIF network is now torn down for the node initiating the + * teardown sequence + * + ************************************************************************ + * SCIF node QP teardown flow (initiated by mgmt node): + * + * 1) SCIF driver gets a remove callback with a scif_hw_dev via the scif_hw_bus + * 2) The device page QP DMA address field is updated with 0x0 + * 3) The mgmt node calls scif_disconnect_node(..) + * 4) scif_disconnect_node(..) sends a SCIF_NODE_REMOVE message to all the peers + * and waits for a SCIF_NODE_REMOVE_ACK + * 5) As part of scif_node_remove(..) a remote node unregisters the peer + * node from the SCIF network and sends a SCIF_NODE_REMOVE_ACK + * 6) When the mgmt node has received all the SCIF_NODE_REMOVE_ACKs + * it unregisters the peer node from the SCIF network + * 7) The mgmt node sends a SCIF_EXIT message and waits for a SCIF_EXIT_ACK. + * 8) A non mgmt node upon receipt of a SCIF_EXIT message calls scif_stop(..) + * which would clean up local data structures for all SCIF nodes and + * then send a SCIF_EXIT_ACK back to the mgmt node + * 9) Upon receipt of the SCIF_EXIT_ACK the the mgmt node sends itself a node + * remove message whose handling cleans up local data structures and + * destroys any P2P mappings. + * 10) The SCIF hardware device for which a remove callback was received is now + * disconnected from the SCIF network. + */ +/* + * Initializes "local" data structures for the QP. Allocates the QP + * ring buffer (rb) and initializes the "in bound" queue. + */ +int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset, + int local_size, struct scif_dev *scifdev) +{ + void *local_q = qp->inbound_q.rb_base; + int err = 0; + u32 tmp_rd = 0; + + spin_lock_init(&qp->send_lock); + spin_lock_init(&qp->recv_lock); + + /* Allocate rb only if not already allocated */ + if (!local_q) { + local_q = kzalloc(local_size, GFP_KERNEL); + if (!local_q) { + err = -ENOMEM; + return err; + } + } + + err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size); + if (err) + goto kfree; + /* + * To setup the inbound_q, the buffer lives locally, the read pointer + * is remote and the write pointer is local. + */ + scif_rb_init(&qp->inbound_q, + &tmp_rd, + &qp->local_write, + local_q, get_count_order(local_size)); + /* + * The read pointer is NULL initially and it is unsafe to use the ring + * buffer til this changes! + */ + qp->inbound_q.read_ptr = NULL; + err = scif_map_single(qp_offset, qp, + scifdev, sizeof(struct scif_qp)); + if (err) + goto unmap; + qp->local_qp = *qp_offset; + return err; +unmap: + scif_unmap_single(qp->local_buf, scifdev, local_size); + qp->local_buf = 0; +kfree: + kfree(local_q); + return err; +} + +/* When the other side has already done it's allocation, this is called */ +int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset, + dma_addr_t phys, int local_size, + struct scif_dev *scifdev) +{ + void *local_q; + void *remote_q; + struct scif_qp *remote_qp; + int remote_size; + int err = 0; + + spin_lock_init(&qp->send_lock); + spin_lock_init(&qp->recv_lock); + /* Start by figuring out where we need to point */ + remote_qp = scif_ioremap(phys, sizeof(struct scif_qp), scifdev); + if (!remote_qp) + return -EIO; + qp->remote_qp = remote_qp; + if (qp->remote_qp->magic != SCIFEP_MAGIC) { + err = -EIO; + goto iounmap; + } + qp->remote_buf = remote_qp->local_buf; + remote_size = qp->remote_qp->inbound_q.size; + remote_q = scif_ioremap(qp->remote_buf, remote_size, scifdev); + if (!remote_q) { + err = -EIO; + goto iounmap; + } + qp->remote_qp->local_write = 0; + /* + * To setup the outbound_q, the buffer lives in remote memory, + * the read pointer is local, the write pointer is remote + */ + scif_rb_init(&qp->outbound_q, + &qp->local_read, + &qp->remote_qp->local_write, + remote_q, + get_count_order(remote_size)); + local_q = kzalloc(local_size, GFP_KERNEL); + if (!local_q) { + err = -ENOMEM; + goto iounmap_1; + } + err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size); + if (err) + goto kfree; + qp->remote_qp->local_read = 0; + /* + * To setup the inbound_q, the buffer lives locally, the read pointer + * is remote and the write pointer is local + */ + scif_rb_init(&qp->inbound_q, + &qp->remote_qp->local_read, + &qp->local_write, + local_q, get_count_order(local_size)); + err = scif_map_single(qp_offset, qp, scifdev, + sizeof(struct scif_qp)); + if (err) + goto unmap; + qp->local_qp = *qp_offset; + return err; +unmap: + scif_unmap_single(qp->local_buf, scifdev, local_size); + qp->local_buf = 0; +kfree: + kfree(local_q); +iounmap_1: + scif_iounmap(remote_q, remote_size, scifdev); + qp->outbound_q.rb_base = NULL; +iounmap: + scif_iounmap(qp->remote_qp, sizeof(struct scif_qp), scifdev); + qp->remote_qp = NULL; + return err; +} + +int scif_setup_qp_connect_response(struct scif_dev *scifdev, + struct scif_qp *qp, u64 payload) +{ + int err = 0; + void *r_buf; + int remote_size; + phys_addr_t tmp_phys; + + qp->remote_qp = scif_ioremap(payload, sizeof(struct scif_qp), scifdev); + + if (!qp->remote_qp) { + err = -ENOMEM; + goto error; + } + + if (qp->remote_qp->magic != SCIFEP_MAGIC) { + dev_err(&scifdev->sdev->dev, + "SCIFEP_MAGIC mismatch between self %d remote %d\n", + scif_dev[scif_info.nodeid].node, scifdev->node); + err = -ENODEV; + goto error; + } + + tmp_phys = qp->remote_qp->local_buf; + remote_size = qp->remote_qp->inbound_q.size; + r_buf = scif_ioremap(tmp_phys, remote_size, scifdev); + + if (!r_buf) + return -EIO; + + qp->local_read = 0; + scif_rb_init(&qp->outbound_q, + &qp->local_read, + &qp->remote_qp->local_write, + r_buf, + get_count_order(remote_size)); + /* + * Because the node QP may already be processing an INIT message, set + * the read pointer so the cached read offset isn't lost + */ + qp->remote_qp->local_read = qp->inbound_q.current_read_offset; + /* + * resetup the inbound_q now that we know where the + * inbound_read really is. + */ + scif_rb_init(&qp->inbound_q, + &qp->remote_qp->local_read, + &qp->local_write, + qp->inbound_q.rb_base, + get_count_order(qp->inbound_q.size)); +error: + return err; +} + +static __always_inline void +scif_send_msg_intr(struct scif_dev *scifdev) +{ + struct scif_hw_dev *sdev = scifdev->sdev; + + if (scifdev_is_p2p(scifdev)) + sdev->hw_ops->send_p2p_intr(sdev, scifdev->rdb, &scifdev->mmio); + else + sdev->hw_ops->send_intr(sdev, scifdev->rdb); +} + +int scif_qp_response(phys_addr_t phys, struct scif_dev *scifdev) +{ + int err = 0; + struct scifmsg msg; + + err = scif_setup_qp_connect_response(scifdev, scifdev->qpairs, phys); + if (!err) { + /* + * Now that everything is setup and mapped, we're ready + * to tell the peer about our queue's location + */ + msg.uop = SCIF_INIT; + msg.dst.node = scifdev->node; + err = scif_nodeqp_send(scifdev, &msg); + } + return err; +} + +void scif_send_exit(struct scif_dev *scifdev) +{ + struct scifmsg msg; + int ret; + + scifdev->exit = OP_IN_PROGRESS; + msg.uop = SCIF_EXIT; + msg.src.node = scif_info.nodeid; + msg.dst.node = scifdev->node; + ret = scif_nodeqp_send(scifdev, &msg); + if (ret) + goto done; + /* Wait for a SCIF_EXIT_ACK message */ + wait_event_timeout(scif_info.exitwq, scifdev->exit == OP_COMPLETED, + SCIF_NODE_ALIVE_TIMEOUT); +done: + scifdev->exit = OP_IDLE; +} + +int scif_setup_qp(struct scif_dev *scifdev) +{ + int err = 0; + int local_size; + struct scif_qp *qp; + + local_size = SCIF_NODE_QP_SIZE; + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + err = -ENOMEM; + return err; + } + qp->magic = SCIFEP_MAGIC; + scifdev->qpairs = qp; + err = scif_setup_qp_connect(qp, &scifdev->qp_dma_addr, + local_size, scifdev); + if (err) + goto free_qp; + /* + * We're as setup as we can be. The inbound_q is setup, w/o a usable + * outbound q. When we get a message, the read_ptr will be updated, + * and we will pull the message. + */ + return err; +free_qp: + kfree(scifdev->qpairs); + scifdev->qpairs = NULL; + return err; +} + +static void scif_p2p_freesg(struct scatterlist *sg) +{ + kfree(sg); +} + +static struct scatterlist * +scif_p2p_setsg(phys_addr_t pa, int page_size, int page_cnt) +{ + struct scatterlist *sg; + struct page *page; + int i; + + sg = kcalloc(page_cnt, sizeof(struct scatterlist), GFP_KERNEL); + if (!sg) + return NULL; + sg_init_table(sg, page_cnt); + for (i = 0; i < page_cnt; i++) { + page = pfn_to_page(pa >> PAGE_SHIFT); + sg_set_page(&sg[i], page, page_size, 0); + pa += page_size; + } + return sg; +} + +/* Init p2p mappings required to access peerdev from scifdev */ +static struct scif_p2p_info * +scif_init_p2p_info(struct scif_dev *scifdev, struct scif_dev *peerdev) +{ + struct scif_p2p_info *p2p; + int num_mmio_pages, num_aper_pages, sg_page_shift, err, num_aper_chunks; + struct scif_hw_dev *psdev = peerdev->sdev; + struct scif_hw_dev *sdev = scifdev->sdev; + + num_mmio_pages = psdev->mmio->len >> PAGE_SHIFT; + num_aper_pages = psdev->aper->len >> PAGE_SHIFT; + + p2p = kzalloc(sizeof(*p2p), GFP_KERNEL); + if (!p2p) + return NULL; + p2p->ppi_sg[SCIF_PPI_MMIO] = scif_p2p_setsg(psdev->mmio->pa, + PAGE_SIZE, num_mmio_pages); + if (!p2p->ppi_sg[SCIF_PPI_MMIO]) + goto free_p2p; + p2p->sg_nentries[SCIF_PPI_MMIO] = num_mmio_pages; + sg_page_shift = get_order(min(psdev->aper->len, (u64)(1 << 30))); + num_aper_chunks = num_aper_pages >> (sg_page_shift - PAGE_SHIFT); + p2p->ppi_sg[SCIF_PPI_APER] = scif_p2p_setsg(psdev->aper->pa, + 1 << sg_page_shift, + num_aper_chunks); + p2p->sg_nentries[SCIF_PPI_APER] = num_aper_chunks; + err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO], + num_mmio_pages, PCI_DMA_BIDIRECTIONAL); + if (err != num_mmio_pages) + goto scif_p2p_free; + err = dma_map_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER], + num_aper_chunks, PCI_DMA_BIDIRECTIONAL); + if (err != num_aper_chunks) + goto dma_unmap; + p2p->ppi_da[SCIF_PPI_MMIO] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_MMIO]); + p2p->ppi_da[SCIF_PPI_APER] = sg_dma_address(p2p->ppi_sg[SCIF_PPI_APER]); + p2p->ppi_len[SCIF_PPI_MMIO] = num_mmio_pages; + p2p->ppi_len[SCIF_PPI_APER] = num_aper_pages; + p2p->ppi_peer_id = peerdev->node; + return p2p; +dma_unmap: + dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO], + p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL); +scif_p2p_free: + scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]); + scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]); +free_p2p: + kfree(p2p); + return NULL; +} + +/* Uninitialize and release resources from a p2p mapping */ +static void scif_deinit_p2p_info(struct scif_dev *scifdev, + struct scif_p2p_info *p2p) +{ + struct scif_hw_dev *sdev = scifdev->sdev; + + dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO], + p2p->sg_nentries[SCIF_PPI_MMIO], DMA_BIDIRECTIONAL); + dma_unmap_sg(&sdev->dev, p2p->ppi_sg[SCIF_PPI_APER], + p2p->sg_nentries[SCIF_PPI_APER], DMA_BIDIRECTIONAL); + scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]); + scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]); + kfree(p2p); +} + +/** + * scif_node_connect: Respond to SCIF_NODE_CONNECT interrupt message + * @dst: Destination node + * + * Connect the src and dst node by setting up the p2p connection + * between them. Management node here acts like a proxy. + */ +static void scif_node_connect(struct scif_dev *scifdev, int dst) +{ + struct scif_dev *dev_j = scifdev; + struct scif_dev *dev_i = NULL; + struct scif_p2p_info *p2p_ij = NULL; /* bus addr for j from i */ + struct scif_p2p_info *p2p_ji = NULL; /* bus addr for i from j */ + struct scif_p2p_info *p2p; + struct list_head *pos, *tmp; + struct scifmsg msg; + int err; + u64 tmppayload; + + if (dst < 1 || dst > scif_info.maxid) + return; + + dev_i = &scif_dev[dst]; + + if (!_scifdev_alive(dev_i)) + return; + /* + * If the p2p connection is already setup or in the process of setting + * up then just ignore this request. The requested node will get + * informed by SCIF_NODE_ADD_ACK or SCIF_NODE_ADD_NACK + */ + if (!list_empty(&dev_i->p2p)) { + list_for_each_safe(pos, tmp, &dev_i->p2p) { + p2p = list_entry(pos, struct scif_p2p_info, ppi_list); + if (p2p->ppi_peer_id == dev_j->node) + return; + } + } + p2p_ij = scif_init_p2p_info(dev_i, dev_j); + if (!p2p_ij) + return; + p2p_ji = scif_init_p2p_info(dev_j, dev_i); + if (!p2p_ji) { + scif_deinit_p2p_info(dev_i, p2p_ij); + return; + } + list_add_tail(&p2p_ij->ppi_list, &dev_i->p2p); + list_add_tail(&p2p_ji->ppi_list, &dev_j->p2p); + + /* + * Send a SCIF_NODE_ADD to dev_i, pass it its bus address + * as seen from dev_j + */ + msg.uop = SCIF_NODE_ADD; + msg.src.node = dev_j->node; + msg.dst.node = dev_i->node; + + msg.payload[0] = p2p_ji->ppi_da[SCIF_PPI_APER]; + msg.payload[1] = p2p_ij->ppi_da[SCIF_PPI_MMIO]; + msg.payload[2] = p2p_ij->ppi_da[SCIF_PPI_APER]; + msg.payload[3] = p2p_ij->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT; + + err = scif_nodeqp_send(dev_i, &msg); + if (err) { + dev_err(&scifdev->sdev->dev, + "%s %d error %d\n", __func__, __LINE__, err); + return; + } + + /* Same as above but to dev_j */ + msg.uop = SCIF_NODE_ADD; + msg.src.node = dev_i->node; + msg.dst.node = dev_j->node; + + tmppayload = msg.payload[0]; + msg.payload[0] = msg.payload[2]; + msg.payload[2] = tmppayload; + msg.payload[1] = p2p_ji->ppi_da[SCIF_PPI_MMIO]; + msg.payload[3] = p2p_ji->ppi_len[SCIF_PPI_APER] << PAGE_SHIFT; + + scif_nodeqp_send(dev_j, &msg); +} + +static void scif_p2p_setup(void) +{ + int i, j; + + if (!scif_info.p2p_enable) + return; + + for (i = 1; i <= scif_info.maxid; i++) + if (!_scifdev_alive(&scif_dev[i])) + return; + + for (i = 1; i <= scif_info.maxid; i++) { + for (j = 1; j <= scif_info.maxid; j++) { + struct scif_dev *scifdev = &scif_dev[i]; + + if (i == j) + continue; + scif_node_connect(scifdev, j); + } + } +} + +static char *message_types[] = {"BAD", + "INIT", + "EXIT", + "SCIF_EXIT_ACK", + "SCIF_NODE_ADD", + "SCIF_NODE_ADD_ACK", + "SCIF_NODE_ADD_NACK", + "REMOVE_NODE", + "REMOVE_NODE_ACK", + "CNCT_REQ", + "CNCT_GNT", + "CNCT_GNTACK", + "CNCT_GNTNACK", + "CNCT_REJ", + "DISCNCT", + "DISCNT_ACK", + "CLIENT_SENT", + "CLIENT_RCVD", + "SCIF_GET_NODE_INFO", + "REGISTER", + "REGISTER_ACK", + "REGISTER_NACK", + "UNREGISTER", + "UNREGISTER_ACK", + "UNREGISTER_NACK", + "ALLOC_REQ", + "ALLOC_GNT", + "ALLOC_REJ", + "FREE_PHYS", + "FREE_VIRT", + "MUNMAP", + "MARK", + "MARK_ACK", + "MARK_NACK", + "WAIT", + "WAIT_ACK", + "WAIT_NACK", + "SIGNAL_LOCAL", + "SIGNAL_REMOTE", + "SIG_ACK", + "SIG_NACK"}; + +static void +scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg, + const char *label) +{ + if (!scif_info.en_msg_log) + return; + if (msg->uop > SCIF_MAX_MSG) { + dev_err(&scifdev->sdev->dev, + "%s: unknown msg type %d\n", label, msg->uop); + return; + } + dev_info(&scifdev->sdev->dev, + "%s: msg type %s, src %d:%d, dest %d:%d payload 0x%llx:0x%llx:0x%llx:0x%llx\n", + label, message_types[msg->uop], msg->src.node, msg->src.port, + msg->dst.node, msg->dst.port, msg->payload[0], msg->payload[1], + msg->payload[2], msg->payload[3]); +} + +int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_qp *qp = scifdev->qpairs; + int err = -ENOMEM, loop_cnt = 0; + + scif_display_message(scifdev, msg, "Sent"); + if (!qp) { + err = -EINVAL; + goto error; + } + spin_lock(&qp->send_lock); + + while ((err = scif_rb_write(&qp->outbound_q, + msg, sizeof(struct scifmsg)))) { + mdelay(1); +#define SCIF_NODEQP_SEND_TO_MSEC (3 * 1000) + if (loop_cnt++ > (SCIF_NODEQP_SEND_TO_MSEC)) { + err = -ENODEV; + break; + } + } + if (!err) + scif_rb_commit(&qp->outbound_q); + spin_unlock(&qp->send_lock); + if (!err) { + if (scifdev_self(scifdev)) + /* + * For loopback we need to emulate an interrupt by + * queuing work for the queue handling real node + * Qp interrupts. + */ + queue_work(scifdev->intr_wq, &scifdev->intr_bh); + else + scif_send_msg_intr(scifdev); + } +error: + if (err) + dev_dbg(&scifdev->sdev->dev, + "%s %d error %d uop %d\n", + __func__, __LINE__, err, msg->uop); + return err; +} + +/** + * scif_nodeqp_send - Send a message on the node queue pair + * @scifdev: Scif Device. + * @msg: The message to be sent. + */ +int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg) +{ + int err; + struct device *spdev = NULL; + + if (msg->uop > SCIF_EXIT_ACK) { + /* Dont send messages once the exit flow has begun */ + if (OP_IDLE != scifdev->exit) + return -ENODEV; + spdev = scif_get_peer_dev(scifdev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + return err; + } + } + err = _scif_nodeqp_send(scifdev, msg); + if (msg->uop > SCIF_EXIT_ACK) + scif_put_peer_dev(spdev); + return err; +} + +/* + * scif_misc_handler: + * + * Work queue handler for servicing miscellaneous SCIF tasks. + * Examples include: + * 1) Remote fence requests. + * 2) Destruction of temporary registered windows + * created during scif_vreadfrom()/scif_vwriteto(). + * 3) Cleanup of zombie endpoints. + */ +void scif_misc_handler(struct work_struct *work) +{ + scif_rma_handle_remote_fences(); + scif_rma_destroy_windows(); + scif_rma_destroy_tcw_invalid(); + scif_cleanup_zombie_epd(); +} + +/** + * scif_init() - Respond to SCIF_INIT interrupt message + * @scifdev: Remote SCIF device node + * @msg: Interrupt message + */ +static __always_inline void +scif_init(struct scif_dev *scifdev, struct scifmsg *msg) +{ + /* + * Allow the thread waiting for device page updates for the peer QP DMA + * address to complete initializing the inbound_q. + */ + flush_delayed_work(&scifdev->qp_dwork); + + scif_peer_register_device(scifdev); + + if (scif_is_mgmt_node()) { + mutex_lock(&scif_info.conflock); + scif_p2p_setup(); + mutex_unlock(&scif_info.conflock); + } +} + +/** + * scif_exit() - Respond to SCIF_EXIT interrupt message + * @scifdev: Remote SCIF device node + * @msg: Interrupt message + * + * This function stops the SCIF interface for the node which sent + * the SCIF_EXIT message and starts waiting for that node to + * resetup the queue pair again. + */ +static __always_inline void +scif_exit(struct scif_dev *scifdev, struct scifmsg *unused) +{ + scifdev->exit_ack_pending = true; + if (scif_is_mgmt_node()) + scif_disconnect_node(scifdev->node, false); + else + scif_stop(scifdev); + schedule_delayed_work(&scifdev->qp_dwork, + msecs_to_jiffies(1000)); +} + +/** + * scif_exitack() - Respond to SCIF_EXIT_ACK interrupt message + * @scifdev: Remote SCIF device node + * @msg: Interrupt message + * + */ +static __always_inline void +scif_exit_ack(struct scif_dev *scifdev, struct scifmsg *unused) +{ + scifdev->exit = OP_COMPLETED; + wake_up(&scif_info.exitwq); +} + +/** + * scif_node_add() - Respond to SCIF_NODE_ADD interrupt message + * @scifdev: Remote SCIF device node + * @msg: Interrupt message + * + * When the mgmt node driver has finished initializing a MIC node queue pair it + * marks the node as online. It then looks for all currently online MIC cards + * and send a SCIF_NODE_ADD message to identify the ID of the new card for + * peer to peer initialization + * + * The local node allocates its incoming queue and sends its address in the + * SCIF_NODE_ADD_ACK message back to the mgmt node, the mgmt node "reflects" + * this message to the new node + */ +static __always_inline void +scif_node_add(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_dev *newdev; + dma_addr_t qp_offset; + int qp_connect; + struct scif_hw_dev *sdev; + + dev_dbg(&scifdev->sdev->dev, + "Scifdev %d:%d received NODE_ADD msg for node %d\n", + scifdev->node, msg->dst.node, msg->src.node); + dev_dbg(&scifdev->sdev->dev, + "Remote address for this node's aperture %llx\n", + msg->payload[0]); + newdev = &scif_dev[msg->src.node]; + newdev->node = msg->src.node; + newdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev; + sdev = newdev->sdev; + + if (scif_setup_intr_wq(newdev)) { + dev_err(&scifdev->sdev->dev, + "failed to setup interrupts for %d\n", msg->src.node); + goto interrupt_setup_error; + } + newdev->mmio.va = ioremap_nocache(msg->payload[1], sdev->mmio->len); + if (!newdev->mmio.va) { + dev_err(&scifdev->sdev->dev, + "failed to map mmio for %d\n", msg->src.node); + goto mmio_map_error; + } + newdev->qpairs = kzalloc(sizeof(*newdev->qpairs), GFP_KERNEL); + if (!newdev->qpairs) + goto qp_alloc_error; + /* + * Set the base address of the remote node's memory since it gets + * added to qp_offset + */ + newdev->base_addr = msg->payload[0]; + + qp_connect = scif_setup_qp_connect(newdev->qpairs, &qp_offset, + SCIF_NODE_QP_SIZE, newdev); + if (qp_connect) { + dev_err(&scifdev->sdev->dev, + "failed to setup qp_connect %d\n", qp_connect); + goto qp_connect_error; + } + + newdev->db = sdev->hw_ops->next_db(sdev); + newdev->cookie = sdev->hw_ops->request_irq(sdev, scif_intr_handler, + "SCIF_INTR", newdev, + newdev->db); + if (IS_ERR(newdev->cookie)) + goto qp_connect_error; + newdev->qpairs->magic = SCIFEP_MAGIC; + newdev->qpairs->qp_state = SCIF_QP_OFFLINE; + + msg->uop = SCIF_NODE_ADD_ACK; + msg->dst.node = msg->src.node; + msg->src.node = scif_info.nodeid; + msg->payload[0] = qp_offset; + msg->payload[2] = newdev->db; + scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg); + return; +qp_connect_error: + kfree(newdev->qpairs); + newdev->qpairs = NULL; +qp_alloc_error: + iounmap(newdev->mmio.va); + newdev->mmio.va = NULL; +mmio_map_error: +interrupt_setup_error: + dev_err(&scifdev->sdev->dev, + "node add failed for node %d\n", msg->src.node); + msg->uop = SCIF_NODE_ADD_NACK; + msg->dst.node = msg->src.node; + msg->src.node = scif_info.nodeid; + scif_nodeqp_send(&scif_dev[SCIF_MGMT_NODE], msg); +} + +void scif_poll_qp_state(struct work_struct *work) +{ +#define SCIF_NODE_QP_RETRY 100 +#define SCIF_NODE_QP_TIMEOUT 100 + struct scif_dev *peerdev = container_of(work, struct scif_dev, + p2p_dwork.work); + struct scif_qp *qp = &peerdev->qpairs[0]; + + if (qp->qp_state != SCIF_QP_ONLINE || + qp->remote_qp->qp_state != SCIF_QP_ONLINE) { + if (peerdev->p2p_retry++ == SCIF_NODE_QP_RETRY) { + dev_err(&peerdev->sdev->dev, + "Warning: QP check timeout with state %d\n", + qp->qp_state); + goto timeout; + } + schedule_delayed_work(&peerdev->p2p_dwork, + msecs_to_jiffies(SCIF_NODE_QP_TIMEOUT)); + return; + } + return; +timeout: + dev_err(&peerdev->sdev->dev, + "%s %d remote node %d offline, state = 0x%x\n", + __func__, __LINE__, peerdev->node, qp->qp_state); + qp->remote_qp->qp_state = SCIF_QP_OFFLINE; + scif_peer_unregister_device(peerdev); + scif_cleanup_scifdev(peerdev); +} + +/** + * scif_node_add_ack() - Respond to SCIF_NODE_ADD_ACK interrupt message + * @scifdev: Remote SCIF device node + * @msg: Interrupt message + * + * After a MIC node receives the SCIF_NODE_ADD_ACK message it send this + * message to the mgmt node to confirm the sequence is finished. + * + */ +static __always_inline void +scif_node_add_ack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_dev *peerdev; + struct scif_qp *qp; + struct scif_dev *dst_dev = &scif_dev[msg->dst.node]; + + dev_dbg(&scifdev->sdev->dev, + "Scifdev %d received SCIF_NODE_ADD_ACK msg src %d dst %d\n", + scifdev->node, msg->src.node, msg->dst.node); + dev_dbg(&scifdev->sdev->dev, + "payload %llx %llx %llx %llx\n", msg->payload[0], + msg->payload[1], msg->payload[2], msg->payload[3]); + if (scif_is_mgmt_node()) { + /* + * the lock serializes with scif_qp_response_ack. The mgmt node + * is forwarding the NODE_ADD_ACK message from src to dst we + * need to make sure that the dst has already received a + * NODE_ADD for src and setup its end of the qp to dst + */ + mutex_lock(&scif_info.conflock); + msg->payload[1] = scif_info.maxid; + scif_nodeqp_send(dst_dev, msg); + mutex_unlock(&scif_info.conflock); + return; + } + peerdev = &scif_dev[msg->src.node]; + peerdev->sdev = scif_dev[SCIF_MGMT_NODE].sdev; + peerdev->node = msg->src.node; + + qp = &peerdev->qpairs[0]; + + if ((scif_setup_qp_connect_response(peerdev, &peerdev->qpairs[0], + msg->payload[0]))) + goto local_error; + peerdev->rdb = msg->payload[2]; + qp->remote_qp->qp_state = SCIF_QP_ONLINE; + + scif_peer_register_device(peerdev); + + schedule_delayed_work(&peerdev->p2p_dwork, 0); + return; +local_error: + scif_cleanup_scifdev(peerdev); +} + +/** + * scif_node_add_nack: Respond to SCIF_NODE_ADD_NACK interrupt message + * @msg: Interrupt message + * + * SCIF_NODE_ADD failed, so inform the waiting wq. + */ +static __always_inline void +scif_node_add_nack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + if (scif_is_mgmt_node()) { + struct scif_dev *dst_dev = &scif_dev[msg->dst.node]; + + dev_dbg(&scifdev->sdev->dev, + "SCIF_NODE_ADD_NACK received from %d\n", scifdev->node); + scif_nodeqp_send(dst_dev, msg); + } +} + +/* + * scif_node_remove: Handle SCIF_NODE_REMOVE message + * @msg: Interrupt message + * + * Handle node removal. + */ +static __always_inline void +scif_node_remove(struct scif_dev *scifdev, struct scifmsg *msg) +{ + int node = msg->payload[0]; + struct scif_dev *scdev = &scif_dev[node]; + + scdev->node_remove_ack_pending = true; + scif_handle_remove_node(node); +} + +/* + * scif_node_remove_ack: Handle SCIF_NODE_REMOVE_ACK message + * @msg: Interrupt message + * + * The peer has acked a SCIF_NODE_REMOVE message. + */ +static __always_inline void +scif_node_remove_ack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_dev *sdev = &scif_dev[msg->payload[0]]; + + atomic_inc(&sdev->disconn_rescnt); + wake_up(&sdev->disconn_wq); +} + +/** + * scif_get_node_info: Respond to SCIF_GET_NODE_INFO interrupt message + * @msg: Interrupt message + * + * Retrieve node info i.e maxid and total from the mgmt node. + */ +static __always_inline void +scif_get_node_info_resp(struct scif_dev *scifdev, struct scifmsg *msg) +{ + if (scif_is_mgmt_node()) { + swap(msg->dst.node, msg->src.node); + mutex_lock(&scif_info.conflock); + msg->payload[1] = scif_info.maxid; + msg->payload[2] = scif_info.total; + mutex_unlock(&scif_info.conflock); + scif_nodeqp_send(scifdev, msg); + } else { + struct completion *node_info = + (struct completion *)msg->payload[3]; + + mutex_lock(&scif_info.conflock); + scif_info.maxid = msg->payload[1]; + scif_info.total = msg->payload[2]; + complete_all(node_info); + mutex_unlock(&scif_info.conflock); + } +} + +static void +scif_msg_unknown(struct scif_dev *scifdev, struct scifmsg *msg) +{ + /* Bogus Node Qp Message? */ + dev_err(&scifdev->sdev->dev, + "Unknown message 0x%xn scifdev->node 0x%x\n", + msg->uop, scifdev->node); +} + +static void (*scif_intr_func[SCIF_MAX_MSG + 1]) + (struct scif_dev *, struct scifmsg *msg) = { + scif_msg_unknown, /* Error */ + scif_init, /* SCIF_INIT */ + scif_exit, /* SCIF_EXIT */ + scif_exit_ack, /* SCIF_EXIT_ACK */ + scif_node_add, /* SCIF_NODE_ADD */ + scif_node_add_ack, /* SCIF_NODE_ADD_ACK */ + scif_node_add_nack, /* SCIF_NODE_ADD_NACK */ + scif_node_remove, /* SCIF_NODE_REMOVE */ + scif_node_remove_ack, /* SCIF_NODE_REMOVE_ACK */ + scif_cnctreq, /* SCIF_CNCT_REQ */ + scif_cnctgnt, /* SCIF_CNCT_GNT */ + scif_cnctgnt_ack, /* SCIF_CNCT_GNTACK */ + scif_cnctgnt_nack, /* SCIF_CNCT_GNTNACK */ + scif_cnctrej, /* SCIF_CNCT_REJ */ + scif_discnct, /* SCIF_DISCNCT */ + scif_discnt_ack, /* SCIF_DISCNT_ACK */ + scif_clientsend, /* SCIF_CLIENT_SENT */ + scif_clientrcvd, /* SCIF_CLIENT_RCVD */ + scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */ + scif_recv_reg, /* SCIF_REGISTER */ + scif_recv_reg_ack, /* SCIF_REGISTER_ACK */ + scif_recv_reg_nack, /* SCIF_REGISTER_NACK */ + scif_recv_unreg, /* SCIF_UNREGISTER */ + scif_recv_unreg_ack, /* SCIF_UNREGISTER_ACK */ + scif_recv_unreg_nack, /* SCIF_UNREGISTER_NACK */ + scif_alloc_req, /* SCIF_ALLOC_REQ */ + scif_alloc_gnt_rej, /* SCIF_ALLOC_GNT */ + scif_alloc_gnt_rej, /* SCIF_ALLOC_REJ */ + scif_free_virt, /* SCIF_FREE_VIRT */ + scif_recv_munmap, /* SCIF_MUNMAP */ + scif_recv_mark, /* SCIF_MARK */ + scif_recv_mark_resp, /* SCIF_MARK_ACK */ + scif_recv_mark_resp, /* SCIF_MARK_NACK */ + scif_recv_wait, /* SCIF_WAIT */ + scif_recv_wait_resp, /* SCIF_WAIT_ACK */ + scif_recv_wait_resp, /* SCIF_WAIT_NACK */ + scif_recv_sig_local, /* SCIF_SIG_LOCAL */ + scif_recv_sig_remote, /* SCIF_SIG_REMOTE */ + scif_recv_sig_resp, /* SCIF_SIG_ACK */ + scif_recv_sig_resp, /* SCIF_SIG_NACK */ +}; + +/** + * scif_nodeqp_msg_handler() - Common handler for node messages + * @scifdev: Remote device to respond to + * @qp: Remote memory pointer + * @msg: The message to be handled. + * + * This routine calls the appropriate routine to handle a Node Qp + * message receipt + */ +static int scif_max_msg_id = SCIF_MAX_MSG; + +static void +scif_nodeqp_msg_handler(struct scif_dev *scifdev, + struct scif_qp *qp, struct scifmsg *msg) +{ + scif_display_message(scifdev, msg, "Rcvd"); + + if (msg->uop > (u32)scif_max_msg_id) { + /* Bogus Node Qp Message? */ + dev_err(&scifdev->sdev->dev, + "Unknown message 0x%xn scifdev->node 0x%x\n", + msg->uop, scifdev->node); + return; + } + + scif_intr_func[msg->uop](scifdev, msg); +} + +/** + * scif_nodeqp_intrhandler() - Interrupt handler for node messages + * @scifdev: Remote device to respond to + * @qp: Remote memory pointer + * + * This routine is triggered by the interrupt mechanism. It reads + * messages from the node queue RB and calls the Node QP Message handling + * routine. + */ +void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp) +{ + struct scifmsg msg; + int read_size; + + do { + read_size = scif_rb_get_next(&qp->inbound_q, &msg, sizeof(msg)); + if (!read_size) + break; + scif_nodeqp_msg_handler(scifdev, qp, &msg); + /* + * The node queue pair is unmapped so skip the read pointer + * update after receipt of a SCIF_EXIT_ACK + */ + if (SCIF_EXIT_ACK == msg.uop) + break; + scif_rb_update_read_ptr(&qp->inbound_q); + } while (1); +} + +/** + * scif_loopb_wq_handler - Loopback Workqueue Handler. + * @work: loop back work + * + * This work queue routine is invoked by the loopback work queue handler. + * It grabs the recv lock, dequeues any available messages from the head + * of the loopback message list, calls the node QP message handler, + * waits for it to return, then frees up this message and dequeues more + * elements of the list if available. + */ +static void scif_loopb_wq_handler(struct work_struct *unused) +{ + struct scif_dev *scifdev = scif_info.loopb_dev; + struct scif_qp *qp = scifdev->qpairs; + struct scif_loopb_msg *msg; + + do { + msg = NULL; + spin_lock(&qp->recv_lock); + if (!list_empty(&scif_info.loopb_recv_q)) { + msg = list_first_entry(&scif_info.loopb_recv_q, + struct scif_loopb_msg, + list); + list_del(&msg->list); + } + spin_unlock(&qp->recv_lock); + + if (msg) { + scif_nodeqp_msg_handler(scifdev, qp, &msg->msg); + kfree(msg); + } + } while (msg); +} + +/** + * scif_loopb_msg_handler() - Workqueue handler for loopback messages. + * @scifdev: SCIF device + * @qp: Queue pair. + * + * This work queue routine is triggered when a loopback message is received. + * + * We need special handling for receiving Node Qp messages on a loopback SCIF + * device via two workqueues for receiving messages. + * + * The reason we need the extra workqueue which is not required with *normal* + * non-loopback SCIF devices is the potential classic deadlock described below: + * + * Thread A tries to send a message on a loopback SCIF device and blocks since + * there is no space in the RB while it has the send_lock held or another + * lock called lock X for example. + * + * Thread B: The Loopback Node QP message receive workqueue receives the message + * and tries to send a message (eg an ACK) to the loopback SCIF device. It tries + * to grab the send lock again or lock X and deadlocks with Thread A. The RB + * cannot be drained any further due to this classic deadlock. + * + * In order to avoid deadlocks as mentioned above we have an extra level of + * indirection achieved by having two workqueues. + * 1) The first workqueue whose handler is scif_loopb_msg_handler reads + * messages from the Node QP RB, adds them to a list and queues work for the + * second workqueue. + * + * 2) The second workqueue whose handler is scif_loopb_wq_handler dequeues + * messages from the list, handles them, frees up the memory and dequeues + * more elements from the list if possible. + */ +int +scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp) +{ + int read_size; + struct scif_loopb_msg *msg; + + do { + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (!msg) + return -ENOMEM; + read_size = scif_rb_get_next(&qp->inbound_q, &msg->msg, + sizeof(struct scifmsg)); + if (read_size != sizeof(struct scifmsg)) { + kfree(msg); + scif_rb_update_read_ptr(&qp->inbound_q); + break; + } + spin_lock(&qp->recv_lock); + list_add_tail(&msg->list, &scif_info.loopb_recv_q); + spin_unlock(&qp->recv_lock); + queue_work(scif_info.loopb_wq, &scif_info.loopb_work); + scif_rb_update_read_ptr(&qp->inbound_q); + } while (read_size == sizeof(struct scifmsg)); + return read_size; +} + +/** + * scif_setup_loopback_qp - One time setup work for Loopback Node Qp. + * @scifdev: SCIF device + * + * Sets up the required loopback workqueues, queue pairs and ring buffers + */ +int scif_setup_loopback_qp(struct scif_dev *scifdev) +{ + int err = 0; + void *local_q; + struct scif_qp *qp; + + err = scif_setup_intr_wq(scifdev); + if (err) + goto exit; + INIT_LIST_HEAD(&scif_info.loopb_recv_q); + snprintf(scif_info.loopb_wqname, sizeof(scif_info.loopb_wqname), + "SCIF LOOPB %d", scifdev->node); + scif_info.loopb_wq = + alloc_ordered_workqueue(scif_info.loopb_wqname, 0); + if (!scif_info.loopb_wq) { + err = -ENOMEM; + goto destroy_intr; + } + INIT_WORK(&scif_info.loopb_work, scif_loopb_wq_handler); + /* Allocate Self Qpair */ + scifdev->qpairs = kzalloc(sizeof(*scifdev->qpairs), GFP_KERNEL); + if (!scifdev->qpairs) { + err = -ENOMEM; + goto destroy_loopb_wq; + } + + qp = scifdev->qpairs; + qp->magic = SCIFEP_MAGIC; + spin_lock_init(&qp->send_lock); + spin_lock_init(&qp->recv_lock); + + local_q = kzalloc(SCIF_NODE_QP_SIZE, GFP_KERNEL); + if (!local_q) { + err = -ENOMEM; + goto free_qpairs; + } + /* + * For loopback the inbound_q and outbound_q are essentially the same + * since the Node sends a message on the loopback interface to the + * outbound_q which is then received on the inbound_q. + */ + scif_rb_init(&qp->outbound_q, + &qp->local_read, + &qp->local_write, + local_q, get_count_order(SCIF_NODE_QP_SIZE)); + + scif_rb_init(&qp->inbound_q, + &qp->local_read, + &qp->local_write, + local_q, get_count_order(SCIF_NODE_QP_SIZE)); + scif_info.nodeid = scifdev->node; + + scif_peer_register_device(scifdev); + + scif_info.loopb_dev = scifdev; + return err; +free_qpairs: + kfree(scifdev->qpairs); +destroy_loopb_wq: + destroy_workqueue(scif_info.loopb_wq); +destroy_intr: + scif_destroy_intr_wq(scifdev); +exit: + return err; +} + +/** + * scif_destroy_loopback_qp - One time uninit work for Loopback Node Qp + * @scifdev: SCIF device + * + * Destroys the workqueues and frees up the Ring Buffer and Queue Pair memory. + */ +int scif_destroy_loopback_qp(struct scif_dev *scifdev) +{ + scif_peer_unregister_device(scifdev); + destroy_workqueue(scif_info.loopb_wq); + scif_destroy_intr_wq(scifdev); + kfree(scifdev->qpairs->outbound_q.rb_base); + kfree(scifdev->qpairs); + scifdev->sdev = NULL; + scif_info.loopb_dev = NULL; + return 0; +} + +void scif_destroy_p2p(struct scif_dev *scifdev) +{ + struct scif_dev *peer_dev; + struct scif_p2p_info *p2p; + struct list_head *pos, *tmp; + int bd; + + mutex_lock(&scif_info.conflock); + /* Free P2P mappings in the given node for all its peer nodes */ + list_for_each_safe(pos, tmp, &scifdev->p2p) { + p2p = list_entry(pos, struct scif_p2p_info, ppi_list); + dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_MMIO], + p2p->sg_nentries[SCIF_PPI_MMIO], + DMA_BIDIRECTIONAL); + dma_unmap_sg(&scifdev->sdev->dev, p2p->ppi_sg[SCIF_PPI_APER], + p2p->sg_nentries[SCIF_PPI_APER], + DMA_BIDIRECTIONAL); + scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]); + scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]); + list_del(pos); + kfree(p2p); + } + + /* Free P2P mapping created in the peer nodes for the given node */ + for (bd = SCIF_MGMT_NODE + 1; bd <= scif_info.maxid; bd++) { + peer_dev = &scif_dev[bd]; + list_for_each_safe(pos, tmp, &peer_dev->p2p) { + p2p = list_entry(pos, struct scif_p2p_info, ppi_list); + if (p2p->ppi_peer_id == scifdev->node) { + dma_unmap_sg(&peer_dev->sdev->dev, + p2p->ppi_sg[SCIF_PPI_MMIO], + p2p->sg_nentries[SCIF_PPI_MMIO], + DMA_BIDIRECTIONAL); + dma_unmap_sg(&peer_dev->sdev->dev, + p2p->ppi_sg[SCIF_PPI_APER], + p2p->sg_nentries[SCIF_PPI_APER], + DMA_BIDIRECTIONAL); + scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_MMIO]); + scif_p2p_freesg(p2p->ppi_sg[SCIF_PPI_APER]); + list_del(pos); + kfree(p2p); + } + } + } + mutex_unlock(&scif_info.conflock); +} diff --git a/drivers/misc/mic/scif/scif_nodeqp.h b/drivers/misc/mic/scif/scif_nodeqp.h new file mode 100644 index 000000000..958962731 --- /dev/null +++ b/drivers/misc/mic/scif/scif_nodeqp.h @@ -0,0 +1,221 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Intel SCIF driver. + * + */ +#ifndef SCIF_NODEQP +#define SCIF_NODEQP + +#include "scif_rb.h" +#include "scif_peer_bus.h" + +#define SCIF_INIT 1 /* First message sent to the peer node for discovery */ +#define SCIF_EXIT 2 /* Last message from the peer informing intent to exit */ +#define SCIF_EXIT_ACK 3 /* Response to SCIF_EXIT message */ +#define SCIF_NODE_ADD 4 /* Tell Online nodes a new node exits */ +#define SCIF_NODE_ADD_ACK 5 /* Confirm to mgmt node sequence is finished */ +#define SCIF_NODE_ADD_NACK 6 /* SCIF_NODE_ADD failed */ +#define SCIF_NODE_REMOVE 7 /* Request to deactivate a SCIF node */ +#define SCIF_NODE_REMOVE_ACK 8 /* Response to a SCIF_NODE_REMOVE message */ +#define SCIF_CNCT_REQ 9 /* Phys addr of Request connection to a port */ +#define SCIF_CNCT_GNT 10 /* Phys addr of new Grant connection request */ +#define SCIF_CNCT_GNTACK 11 /* Error type Reject a connection request */ +#define SCIF_CNCT_GNTNACK 12 /* Error type Reject a connection request */ +#define SCIF_CNCT_REJ 13 /* Error type Reject a connection request */ +#define SCIF_DISCNCT 14 /* Notify peer that connection is being terminated */ +#define SCIF_DISCNT_ACK 15 /* Notify peer that connection is being terminated */ +#define SCIF_CLIENT_SENT 16 /* Notify the peer that data has been written */ +#define SCIF_CLIENT_RCVD 17 /* Notify the peer that data has been read */ +#define SCIF_GET_NODE_INFO 18 /* Get current node mask from the mgmt node*/ +#define SCIF_REGISTER 19 /* Tell peer about a new registered window */ +#define SCIF_REGISTER_ACK 20 /* Notify peer about unregistration success */ +#define SCIF_REGISTER_NACK 21 /* Notify peer about registration success */ +#define SCIF_UNREGISTER 22 /* Tell peer about unregistering a window */ +#define SCIF_UNREGISTER_ACK 23 /* Notify peer about registration failure */ +#define SCIF_UNREGISTER_NACK 24 /* Notify peer about unregistration failure */ +#define SCIF_ALLOC_REQ 25 /* Request a mapped buffer */ +#define SCIF_ALLOC_GNT 26 /* Notify peer about allocation success */ +#define SCIF_ALLOC_REJ 27 /* Notify peer about allocation failure */ +#define SCIF_FREE_VIRT 28 /* Free previously allocated virtual memory */ +#define SCIF_MUNMAP 29 /* Acknowledgment for a SCIF_MMAP request */ +#define SCIF_MARK 30 /* SCIF Remote Fence Mark Request */ +#define SCIF_MARK_ACK 31 /* SCIF Remote Fence Mark Success */ +#define SCIF_MARK_NACK 32 /* SCIF Remote Fence Mark Failure */ +#define SCIF_WAIT 33 /* SCIF Remote Fence Wait Request */ +#define SCIF_WAIT_ACK 34 /* SCIF Remote Fence Wait Success */ +#define SCIF_WAIT_NACK 35 /* SCIF Remote Fence Wait Failure */ +#define SCIF_SIG_LOCAL 36 /* SCIF Remote Fence Local Signal Request */ +#define SCIF_SIG_REMOTE 37 /* SCIF Remote Fence Remote Signal Request */ +#define SCIF_SIG_ACK 38 /* SCIF Remote Fence Remote Signal Success */ +#define SCIF_SIG_NACK 39 /* SCIF Remote Fence Remote Signal Failure */ +#define SCIF_MAX_MSG SCIF_SIG_NACK + +/* + * struct scifmsg - Node QP message format + * + * @src: Source information + * @dst: Destination information + * @uop: The message opcode + * @payload: Unique payload format for each message + */ +struct scifmsg { + struct scif_port_id src; + struct scif_port_id dst; + u32 uop; + u64 payload[4]; +} __packed; + +/* + * struct scif_allocmsg - Used with SCIF_ALLOC_REQ to request + * the remote note to allocate memory + * + * phys_addr: Physical address of the buffer + * vaddr: Virtual address of the buffer + * size: Size of the buffer + * state: Current state + * allocwq: wait queue for status + */ +struct scif_allocmsg { + dma_addr_t phys_addr; + unsigned long vaddr; + size_t size; + enum scif_msg_state state; + wait_queue_head_t allocwq; +}; + +/* + * struct scif_qp - Node Queue Pair + * + * Interesting structure -- a little difficult because we can only + * write across the PCIe, so any r/w pointer we need to read is + * local. We only need to read the read pointer on the inbound_q + * and read the write pointer in the outbound_q + * + * @magic: Magic value to ensure the peer sees the QP correctly + * @outbound_q: The outbound ring buffer for sending messages + * @inbound_q: The inbound ring buffer for receiving messages + * @local_write: Local write index + * @local_read: Local read index + * @remote_qp: The remote queue pair + * @local_buf: DMA address of local ring buffer + * @local_qp: DMA address of the local queue pair data structure + * @remote_buf: DMA address of remote ring buffer + * @qp_state: QP state i.e. online or offline used for P2P + * @send_lock: synchronize access to outbound queue + * @recv_lock: Synchronize access to inbound queue + */ +struct scif_qp { + u64 magic; +#define SCIFEP_MAGIC 0x5c1f000000005c1fULL + struct scif_rb outbound_q; + struct scif_rb inbound_q; + + u32 local_write __aligned(64); + u32 local_read __aligned(64); + struct scif_qp *remote_qp; + dma_addr_t local_buf; + dma_addr_t local_qp; + dma_addr_t remote_buf; + u32 qp_state; +#define SCIF_QP_OFFLINE 0xdead +#define SCIF_QP_ONLINE 0xc0de + spinlock_t send_lock; + spinlock_t recv_lock; +}; + +/* + * struct scif_loopb_msg - An element in the loopback Node QP message list. + * + * @msg - The SCIF node QP message + * @list - link in the list of messages + */ +struct scif_loopb_msg { + struct scifmsg msg; + struct list_head list; +}; + +int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg); +int _scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_nodeqp_intrhandler(struct scif_dev *scifdev, struct scif_qp *qp); +int scif_loopb_msg_handler(struct scif_dev *scifdev, struct scif_qp *qp); +int scif_setup_qp(struct scif_dev *scifdev); +int scif_qp_response(phys_addr_t phys, struct scif_dev *dev); +int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset, + int local_size, struct scif_dev *scifdev); +int scif_setup_qp_accept(struct scif_qp *qp, dma_addr_t *qp_offset, + dma_addr_t phys, int local_size, + struct scif_dev *scifdev); +int scif_setup_qp_connect_response(struct scif_dev *scifdev, + struct scif_qp *qp, u64 payload); +int scif_setup_loopback_qp(struct scif_dev *scifdev); +int scif_destroy_loopback_qp(struct scif_dev *scifdev); +void scif_poll_qp_state(struct work_struct *work); +void scif_destroy_p2p(struct scif_dev *scifdev); +void scif_send_exit(struct scif_dev *scifdev); +static inline struct device *scif_get_peer_dev(struct scif_dev *scifdev) +{ + struct scif_peer_dev *spdev; + struct device *spdev_ret; + + rcu_read_lock(); + spdev = rcu_dereference(scifdev->spdev); + if (spdev) + spdev_ret = get_device(&spdev->dev); + else + spdev_ret = ERR_PTR(-ENODEV); + rcu_read_unlock(); + return spdev_ret; +} + +static inline void scif_put_peer_dev(struct device *dev) +{ + put_device(dev); +} +#endif /* SCIF_NODEQP */ diff --git a/drivers/misc/mic/scif/scif_peer_bus.c b/drivers/misc/mic/scif/scif_peer_bus.c new file mode 100644 index 000000000..6ffa3bdbd --- /dev/null +++ b/drivers/misc/mic/scif/scif_peer_bus.c @@ -0,0 +1,183 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + */ +#include "scif_main.h" +#include "../bus/scif_bus.h" +#include "scif_peer_bus.h" + +static inline struct scif_peer_dev * +dev_to_scif_peer(struct device *dev) +{ + return container_of(dev, struct scif_peer_dev, dev); +} + +struct bus_type scif_peer_bus = { + .name = "scif_peer_bus", +}; + +static void scif_peer_release_dev(struct device *d) +{ + struct scif_peer_dev *sdev = dev_to_scif_peer(d); + struct scif_dev *scifdev = &scif_dev[sdev->dnode]; + + scif_cleanup_scifdev(scifdev); + kfree(sdev); +} + +static int scif_peer_initialize_device(struct scif_dev *scifdev) +{ + struct scif_peer_dev *spdev; + int ret; + + spdev = kzalloc(sizeof(*spdev), GFP_KERNEL); + if (!spdev) { + ret = -ENOMEM; + goto err; + } + + spdev->dev.parent = scifdev->sdev->dev.parent; + spdev->dev.release = scif_peer_release_dev; + spdev->dnode = scifdev->node; + spdev->dev.bus = &scif_peer_bus; + dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode); + + device_initialize(&spdev->dev); + get_device(&spdev->dev); + rcu_assign_pointer(scifdev->spdev, spdev); + + mutex_lock(&scif_info.conflock); + scif_info.total++; + scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid); + mutex_unlock(&scif_info.conflock); + return 0; +err: + dev_err(&scifdev->sdev->dev, + "dnode %d: initialize_device rc %d\n", scifdev->node, ret); + return ret; +} + +static int scif_peer_add_device(struct scif_dev *scifdev) +{ + struct scif_peer_dev *spdev = rcu_dereference(scifdev->spdev); + char pool_name[16]; + int ret; + + ret = device_add(&spdev->dev); + put_device(&spdev->dev); + if (ret) { + dev_err(&scifdev->sdev->dev, + "dnode %d: peer device_add failed\n", scifdev->node); + goto put_spdev; + } + + scnprintf(pool_name, sizeof(pool_name), "scif-%d", spdev->dnode); + scifdev->signal_pool = dmam_pool_create(pool_name, &scifdev->sdev->dev, + sizeof(struct scif_status), 1, + 0); + if (!scifdev->signal_pool) { + dev_err(&scifdev->sdev->dev, + "dnode %d: dmam_pool_create failed\n", scifdev->node); + ret = -ENOMEM; + goto del_spdev; + } + dev_dbg(&spdev->dev, "Added peer dnode %d\n", spdev->dnode); + return 0; +del_spdev: + device_del(&spdev->dev); +put_spdev: + RCU_INIT_POINTER(scifdev->spdev, NULL); + synchronize_rcu(); + put_device(&spdev->dev); + + mutex_lock(&scif_info.conflock); + scif_info.total--; + mutex_unlock(&scif_info.conflock); + return ret; +} + +void scif_add_peer_device(struct work_struct *work) +{ + struct scif_dev *scifdev = container_of(work, struct scif_dev, + peer_add_work); + + scif_peer_add_device(scifdev); +} + +/* + * Peer device registration is split into a device_initialize and a device_add. + * The reason for doing this is as follows: First, peer device registration + * itself cannot be done in the message processing thread and must be delegated + * to another workqueue, otherwise if SCIF client probe, called during peer + * device registration, calls scif_connect(..), it will block the message + * processing thread causing a deadlock. Next, device_initialize is done in the + * "top-half" message processing thread and device_add in the "bottom-half" + * workqueue. If this is not done, SCIF_CNCT_REQ message processing executing + * concurrently with SCIF_INIT message processing is unable to get a reference + * on the peer device, thereby failing the connect request. + */ +void scif_peer_register_device(struct scif_dev *scifdev) +{ + int ret; + + mutex_lock(&scifdev->lock); + ret = scif_peer_initialize_device(scifdev); + if (ret) + goto exit; + schedule_work(&scifdev->peer_add_work); +exit: + mutex_unlock(&scifdev->lock); +} + +int scif_peer_unregister_device(struct scif_dev *scifdev) +{ + struct scif_peer_dev *spdev; + + mutex_lock(&scifdev->lock); + /* Flush work to ensure device register is complete */ + flush_work(&scifdev->peer_add_work); + + /* + * Continue holding scifdev->lock since theoretically unregister_device + * can be called simultaneously from multiple threads + */ + spdev = rcu_dereference(scifdev->spdev); + if (!spdev) { + mutex_unlock(&scifdev->lock); + return -ENODEV; + } + + RCU_INIT_POINTER(scifdev->spdev, NULL); + synchronize_rcu(); + mutex_unlock(&scifdev->lock); + + dev_dbg(&spdev->dev, "Removing peer dnode %d\n", spdev->dnode); + device_unregister(&spdev->dev); + + mutex_lock(&scif_info.conflock); + scif_info.total--; + mutex_unlock(&scif_info.conflock); + return 0; +} + +int scif_peer_bus_init(void) +{ + return bus_register(&scif_peer_bus); +} + +void scif_peer_bus_exit(void) +{ + bus_unregister(&scif_peer_bus); +} diff --git a/drivers/misc/mic/scif/scif_peer_bus.h b/drivers/misc/mic/scif/scif_peer_bus.h new file mode 100644 index 000000000..a3b8dd2ed --- /dev/null +++ b/drivers/misc/mic/scif/scif_peer_bus.h @@ -0,0 +1,31 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + */ +#ifndef _SCIF_PEER_BUS_H_ +#define _SCIF_PEER_BUS_H_ + +#include +#include +#include + +struct scif_dev; + +void scif_add_peer_device(struct work_struct *work); +void scif_peer_register_device(struct scif_dev *sdev); +int scif_peer_unregister_device(struct scif_dev *scifdev); +int scif_peer_bus_init(void); +void scif_peer_bus_exit(void); +#endif /* _SCIF_PEER_BUS_H */ diff --git a/drivers/misc/mic/scif/scif_ports.c b/drivers/misc/mic/scif/scif_ports.c new file mode 100644 index 000000000..594e18d27 --- /dev/null +++ b/drivers/misc/mic/scif/scif_ports.c @@ -0,0 +1,124 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include + +#include "scif_main.h" + +#define SCIF_PORT_COUNT 0x10000 /* Ports available */ + +struct idr scif_ports; + +/* + * struct scif_port - SCIF port information + * + * @ref_cnt - Reference count since there can be multiple endpoints + * created via scif_accept(..) simultaneously using a port. + */ +struct scif_port { + int ref_cnt; +}; + +/** + * __scif_get_port - Reserve a specified port # for SCIF and add it + * to the global list. + * @port : port # to be reserved. + * + * @return : Allocated SCIF port #, or -ENOSPC if port unavailable. + * On memory allocation failure, returns -ENOMEM. + */ +static int __scif_get_port(int start, int end) +{ + int id; + struct scif_port *port = kzalloc(sizeof(*port), GFP_ATOMIC); + + if (!port) + return -ENOMEM; + spin_lock(&scif_info.port_lock); + id = idr_alloc(&scif_ports, port, start, end, GFP_ATOMIC); + if (id >= 0) + port->ref_cnt++; + spin_unlock(&scif_info.port_lock); + return id; +} + +/** + * scif_rsrv_port - Reserve a specified port # for SCIF. + * @port : port # to be reserved. + * + * @return : Allocated SCIF port #, or -ENOSPC if port unavailable. + * On memory allocation failure, returns -ENOMEM. + */ +int scif_rsrv_port(u16 port) +{ + return __scif_get_port(port, port + 1); +} + +/** + * scif_get_new_port - Get and reserve any port # for SCIF in the range + * SCIF_PORT_RSVD + 1 to SCIF_PORT_COUNT - 1. + * + * @return : Allocated SCIF port #, or -ENOSPC if no ports available. + * On memory allocation failure, returns -ENOMEM. + */ +int scif_get_new_port(void) +{ + return __scif_get_port(SCIF_PORT_RSVD + 1, SCIF_PORT_COUNT); +} + +/** + * scif_get_port - Increment the reference count for a SCIF port + * @id : SCIF port + * + * @return : None + */ +void scif_get_port(u16 id) +{ + struct scif_port *port; + + if (!id) + return; + spin_lock(&scif_info.port_lock); + port = idr_find(&scif_ports, id); + if (port) + port->ref_cnt++; + spin_unlock(&scif_info.port_lock); +} + +/** + * scif_put_port - Release a reserved SCIF port + * @id : SCIF port to be released. + * + * @return : None + */ +void scif_put_port(u16 id) +{ + struct scif_port *port; + + if (!id) + return; + spin_lock(&scif_info.port_lock); + port = idr_find(&scif_ports, id); + if (port) { + port->ref_cnt--; + if (!port->ref_cnt) { + idr_remove(&scif_ports, id); + kfree(port); + } + } + spin_unlock(&scif_info.port_lock); +} diff --git a/drivers/misc/mic/scif/scif_rb.c b/drivers/misc/mic/scif/scif_rb.c new file mode 100644 index 000000000..b665757ca --- /dev/null +++ b/drivers/misc/mic/scif/scif_rb.c @@ -0,0 +1,249 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include +#include +#include +#include + +#include "scif_rb.h" + +#define scif_rb_ring_cnt(head, tail, size) CIRC_CNT(head, tail, size) +#define scif_rb_ring_space(head, tail, size) CIRC_SPACE(head, tail, size) + +/** + * scif_rb_init - Initializes the ring buffer + * @rb: ring buffer + * @read_ptr: A pointer to the read offset + * @write_ptr: A pointer to the write offset + * @rb_base: A pointer to the base of the ring buffer + * @size: The size of the ring buffer in powers of two + */ +void scif_rb_init(struct scif_rb *rb, u32 *read_ptr, u32 *write_ptr, + void *rb_base, u8 size) +{ + rb->rb_base = rb_base; + rb->size = (1 << size); + rb->read_ptr = read_ptr; + rb->write_ptr = write_ptr; + rb->current_read_offset = *read_ptr; + rb->current_write_offset = *write_ptr; +} + +/* Copies a message to the ring buffer -- handles the wrap around case */ +static void memcpy_torb(struct scif_rb *rb, void *header, + void *msg, u32 size) +{ + u32 size1, size2; + + if (header + size >= rb->rb_base + rb->size) { + /* Need to call two copies if it wraps around */ + size1 = (u32)(rb->rb_base + rb->size - header); + size2 = size - size1; + memcpy_toio((void __iomem __force *)header, msg, size1); + memcpy_toio((void __iomem __force *)rb->rb_base, + msg + size1, size2); + } else { + memcpy_toio((void __iomem __force *)header, msg, size); + } +} + +/* Copies a message from the ring buffer -- handles the wrap around case */ +static void memcpy_fromrb(struct scif_rb *rb, void *header, + void *msg, u32 size) +{ + u32 size1, size2; + + if (header + size >= rb->rb_base + rb->size) { + /* Need to call two copies if it wraps around */ + size1 = (u32)(rb->rb_base + rb->size - header); + size2 = size - size1; + memcpy_fromio(msg, (void __iomem __force *)header, size1); + memcpy_fromio(msg + size1, + (void __iomem __force *)rb->rb_base, size2); + } else { + memcpy_fromio(msg, (void __iomem __force *)header, size); + } +} + +/** + * scif_rb_space - Query space available for writing to the RB + * @rb: ring buffer + * + * Return: size available for writing to RB in bytes. + */ +u32 scif_rb_space(struct scif_rb *rb) +{ + rb->current_read_offset = *rb->read_ptr; + /* + * Update from the HW read pointer only once the peer has exposed the + * new empty slot. This barrier is paired with the memory barrier + * scif_rb_update_read_ptr() + */ + mb(); + return scif_rb_ring_space(rb->current_write_offset, + rb->current_read_offset, rb->size); +} + +/** + * scif_rb_write - Write a message to the RB + * @rb: ring buffer + * @msg: buffer to send the message. Must be at least size bytes long + * @size: the size (in bytes) to be copied to the RB + * + * This API does not block if there isn't enough space in the RB. + * Returns: 0 on success or -ENOMEM on failure + */ +int scif_rb_write(struct scif_rb *rb, void *msg, u32 size) +{ + void *header; + + if (scif_rb_space(rb) < size) + return -ENOMEM; + header = rb->rb_base + rb->current_write_offset; + memcpy_torb(rb, header, msg, size); + /* + * Wait until scif_rb_commit(). Update the local ring + * buffer data, not the shared data until commit. + */ + rb->current_write_offset = + (rb->current_write_offset + size) & (rb->size - 1); + return 0; +} + +/** + * scif_rb_commit - To submit the message to let the peer fetch it + * @rb: ring buffer + */ +void scif_rb_commit(struct scif_rb *rb) +{ + /* + * We must ensure ordering between the all the data committed + * previously before we expose the new message to the peer by + * updating the write_ptr. This write barrier is paired with + * the read barrier in scif_rb_count(..) + */ + wmb(); + WRITE_ONCE(*rb->write_ptr, rb->current_write_offset); +#ifdef CONFIG_INTEL_MIC_CARD + /* + * X100 Si bug: For the case where a Core is performing an EXT_WR + * followed by a Doorbell Write, the Core must perform two EXT_WR to the + * same address with the same data before it does the Doorbell Write. + * This way, if ordering is violated for the Interrupt Message, it will + * fall just behind the first Posted associated with the first EXT_WR. + */ + WRITE_ONCE(*rb->write_ptr, rb->current_write_offset); +#endif +} + +/** + * scif_rb_get - To get next message from the ring buffer + * @rb: ring buffer + * @size: Number of bytes to be read + * + * Return: NULL if no bytes to be read from the ring buffer, otherwise the + * pointer to the next byte + */ +static void *scif_rb_get(struct scif_rb *rb, u32 size) +{ + void *header = NULL; + + if (scif_rb_count(rb, size) >= size) + header = rb->rb_base + rb->current_read_offset; + return header; +} + +/* + * scif_rb_get_next - Read from ring buffer. + * @rb: ring buffer + * @msg: buffer to hold the message. Must be at least size bytes long + * @size: Number of bytes to be read + * + * Return: number of bytes read if available bytes are >= size, otherwise + * returns zero. + */ +u32 scif_rb_get_next(struct scif_rb *rb, void *msg, u32 size) +{ + void *header = NULL; + int read_size = 0; + + header = scif_rb_get(rb, size); + if (header) { + u32 next_cmd_offset = + (rb->current_read_offset + size) & (rb->size - 1); + + read_size = size; + rb->current_read_offset = next_cmd_offset; + memcpy_fromrb(rb, header, msg, size); + } + return read_size; +} + +/** + * scif_rb_update_read_ptr + * @rb: ring buffer + */ +void scif_rb_update_read_ptr(struct scif_rb *rb) +{ + u32 new_offset; + + new_offset = rb->current_read_offset; + /* + * We must ensure ordering between the all the data committed or read + * previously before we expose the empty slot to the peer by updating + * the read_ptr. This barrier is paired with the memory barrier in + * scif_rb_space(..) + */ + mb(); + WRITE_ONCE(*rb->read_ptr, new_offset); +#ifdef CONFIG_INTEL_MIC_CARD + /* + * X100 Si Bug: For the case where a Core is performing an EXT_WR + * followed by a Doorbell Write, the Core must perform two EXT_WR to the + * same address with the same data before it does the Doorbell Write. + * This way, if ordering is violated for the Interrupt Message, it will + * fall just behind the first Posted associated with the first EXT_WR. + */ + WRITE_ONCE(*rb->read_ptr, new_offset); +#endif +} + +/** + * scif_rb_count + * @rb: ring buffer + * @size: Number of bytes expected to be read + * + * Return: number of bytes that can be read from the RB + */ +u32 scif_rb_count(struct scif_rb *rb, u32 size) +{ + if (scif_rb_ring_cnt(rb->current_write_offset, + rb->current_read_offset, + rb->size) < size) { + rb->current_write_offset = *rb->write_ptr; + /* + * Update from the HW write pointer if empty only once the peer + * has exposed the new message. This read barrier is paired + * with the write barrier in scif_rb_commit(..) + */ + smp_rmb(); + } + return scif_rb_ring_cnt(rb->current_write_offset, + rb->current_read_offset, + rb->size); +} diff --git a/drivers/misc/mic/scif/scif_rb.h b/drivers/misc/mic/scif/scif_rb.h new file mode 100644 index 000000000..166dffe30 --- /dev/null +++ b/drivers/misc/mic/scif/scif_rb.h @@ -0,0 +1,100 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Intel SCIF driver. + */ +#ifndef SCIF_RB_H +#define SCIF_RB_H +/* + * This file describes a general purpose, byte based ring buffer. Writers to the + * ring buffer need to synchronize using a lock. The same is true for readers, + * although in practice, the ring buffer has a single reader. It is lockless + * between producer and consumer so it can handle being used across the PCIe + * bus. The ring buffer ensures that there are no reads across the PCIe bus for + * performance reasons. Two of these are used to form a single bidirectional + * queue-pair across PCIe. + */ +/* + * struct scif_rb - SCIF Ring Buffer + * + * @rb_base: The base of the memory used for storing RB messages + * @read_ptr: Pointer to the read offset + * @write_ptr: Pointer to the write offset + * @size: Size of the memory in rb_base + * @current_read_offset: Cached read offset for performance + * @current_write_offset: Cached write offset for performance + */ +struct scif_rb { + void *rb_base; + u32 *read_ptr; + u32 *write_ptr; + u32 size; + u32 current_read_offset; + u32 current_write_offset; +}; + +/* methods used by both */ +void scif_rb_init(struct scif_rb *rb, u32 *read_ptr, u32 *write_ptr, + void *rb_base, u8 size); +/* writer only methods */ +/* write a new command, then scif_rb_commit() */ +int scif_rb_write(struct scif_rb *rb, void *msg, u32 size); +/* after write(), then scif_rb_commit() */ +void scif_rb_commit(struct scif_rb *rb); +/* query space available for writing to a RB. */ +u32 scif_rb_space(struct scif_rb *rb); + +/* reader only methods */ +/* read a new message from the ring buffer of size bytes */ +u32 scif_rb_get_next(struct scif_rb *rb, void *msg, u32 size); +/* update the read pointer so that the space can be reused */ +void scif_rb_update_read_ptr(struct scif_rb *rb); +/* count the number of bytes that can be read */ +u32 scif_rb_count(struct scif_rb *rb, u32 size); +#endif diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c new file mode 100644 index 000000000..e1f59b177 --- /dev/null +++ b/drivers/misc/mic/scif/scif_rma.c @@ -0,0 +1,1775 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include +#include +#include +#include + +#include "scif_main.h" +#include "scif_map.h" + +/* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */ +#define SCIF_MAP_ULIMIT 0x40 + +bool scif_ulimit_check = 1; + +/** + * scif_rma_ep_init: + * @ep: end point + * + * Initialize RMA per EP data structures. + */ +void scif_rma_ep_init(struct scif_endpt *ep) +{ + struct scif_endpt_rma_info *rma = &ep->rma_info; + + mutex_init(&rma->rma_lock); + init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN); + spin_lock_init(&rma->tc_lock); + mutex_init(&rma->mmn_lock); + INIT_LIST_HEAD(&rma->reg_list); + INIT_LIST_HEAD(&rma->remote_reg_list); + atomic_set(&rma->tw_refcount, 0); + atomic_set(&rma->tcw_refcount, 0); + atomic_set(&rma->tcw_total_pages, 0); + atomic_set(&rma->fence_refcount, 0); + + rma->async_list_del = 0; + rma->dma_chan = NULL; + INIT_LIST_HEAD(&rma->mmn_list); + INIT_LIST_HEAD(&rma->vma_list); + init_waitqueue_head(&rma->markwq); +} + +/** + * scif_rma_ep_can_uninit: + * @ep: end point + * + * Returns 1 if an endpoint can be uninitialized and 0 otherwise. + */ +int scif_rma_ep_can_uninit(struct scif_endpt *ep) +{ + int ret = 0; + + mutex_lock(&ep->rma_info.rma_lock); + /* Destroy RMA Info only if both lists are empty */ + if (list_empty(&ep->rma_info.reg_list) && + list_empty(&ep->rma_info.remote_reg_list) && + list_empty(&ep->rma_info.mmn_list) && + !atomic_read(&ep->rma_info.tw_refcount) && + !atomic_read(&ep->rma_info.tcw_refcount) && + !atomic_read(&ep->rma_info.fence_refcount)) + ret = 1; + mutex_unlock(&ep->rma_info.rma_lock); + return ret; +} + +/** + * scif_create_pinned_pages: + * @nr_pages: number of pages in window + * @prot: read/write protection + * + * Allocate and prepare a set of pinned pages. + */ +static struct scif_pinned_pages * +scif_create_pinned_pages(int nr_pages, int prot) +{ + struct scif_pinned_pages *pin; + + might_sleep(); + pin = scif_zalloc(sizeof(*pin)); + if (!pin) + goto error; + + pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages)); + if (!pin->pages) + goto error_free_pinned_pages; + + pin->prot = prot; + pin->magic = SCIFEP_MAGIC; + return pin; + +error_free_pinned_pages: + scif_free(pin, sizeof(*pin)); +error: + return NULL; +} + +/** + * scif_destroy_pinned_pages: + * @pin: A set of pinned pages. + * + * Deallocate resources for pinned pages. + */ +static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin) +{ + int j; + int writeable = pin->prot & SCIF_PROT_WRITE; + int kernel = SCIF_MAP_KERNEL & pin->map_flags; + + for (j = 0; j < pin->nr_pages; j++) { + if (pin->pages[j] && !kernel) { + if (writeable) + SetPageDirty(pin->pages[j]); + put_page(pin->pages[j]); + } + } + + scif_free(pin->pages, + pin->nr_pages * sizeof(*pin->pages)); + scif_free(pin, sizeof(*pin)); + return 0; +} + +/* + * scif_create_window: + * @ep: end point + * @nr_pages: number of pages + * @offset: registration offset + * @temp: true if a temporary window is being created + * + * Allocate and prepare a self registration window. + */ +struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages, + s64 offset, bool temp) +{ + struct scif_window *window; + + might_sleep(); + window = scif_zalloc(sizeof(*window)); + if (!window) + goto error; + + window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr)); + if (!window->dma_addr) + goto error_free_window; + + window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages)); + if (!window->num_pages) + goto error_free_window; + + window->offset = offset; + window->ep = (u64)ep; + window->magic = SCIFEP_MAGIC; + window->reg_state = OP_IDLE; + init_waitqueue_head(&window->regwq); + window->unreg_state = OP_IDLE; + init_waitqueue_head(&window->unregwq); + INIT_LIST_HEAD(&window->list); + window->type = SCIF_WINDOW_SELF; + window->temp = temp; + return window; + +error_free_window: + scif_free(window->dma_addr, + nr_pages * sizeof(*window->dma_addr)); + scif_free(window, sizeof(*window)); +error: + return NULL; +} + +/** + * scif_destroy_incomplete_window: + * @ep: end point + * @window: registration window + * + * Deallocate resources for self window. + */ +static void scif_destroy_incomplete_window(struct scif_endpt *ep, + struct scif_window *window) +{ + int err; + int nr_pages = window->nr_pages; + struct scif_allocmsg *alloc = &window->alloc_handle; + struct scifmsg msg; + +retry: + /* Wait for a SCIF_ALLOC_GNT/REJ message */ + err = wait_event_timeout(alloc->allocwq, + alloc->state != OP_IN_PROGRESS, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err && scifdev_alive(ep)) + goto retry; + + mutex_lock(&ep->rma_info.rma_lock); + if (alloc->state == OP_COMPLETED) { + msg.uop = SCIF_FREE_VIRT; + msg.src = ep->port; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = window->alloc_handle.vaddr; + msg.payload[2] = (u64)window; + msg.payload[3] = SCIF_REGISTER; + _scif_nodeqp_send(ep->remote_dev, &msg); + } + mutex_unlock(&ep->rma_info.rma_lock); + + scif_free_window_offset(ep, window, window->offset); + scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr)); + scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages)); + scif_free(window, sizeof(*window)); +} + +/** + * scif_unmap_window: + * @remote_dev: SCIF remote device + * @window: registration window + * + * Delete any DMA mappings created for a registered self window + */ +void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window) +{ + int j; + + if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) { + if (window->st) { + dma_unmap_sg(&remote_dev->sdev->dev, + window->st->sgl, window->st->nents, + DMA_BIDIRECTIONAL); + sg_free_table(window->st); + kfree(window->st); + window->st = NULL; + } + } else { + for (j = 0; j < window->nr_contig_chunks; j++) { + if (window->dma_addr[j]) { + scif_unmap_single(window->dma_addr[j], + remote_dev, + window->num_pages[j] << + PAGE_SHIFT); + window->dma_addr[j] = 0x0; + } + } + } +} + +static inline struct mm_struct *__scif_acquire_mm(void) +{ + if (scif_ulimit_check) + return get_task_mm(current); + return NULL; +} + +static inline void __scif_release_mm(struct mm_struct *mm) +{ + if (mm) + mmput(mm); +} + +static inline int +__scif_dec_pinned_vm_lock(struct mm_struct *mm, + int nr_pages, bool try_lock) +{ + if (!mm || !nr_pages || !scif_ulimit_check) + return 0; + if (try_lock) { + if (!down_write_trylock(&mm->mmap_sem)) { + dev_err(scif_info.mdev.this_device, + "%s %d err\n", __func__, __LINE__); + return -1; + } + } else { + down_write(&mm->mmap_sem); + } + mm->pinned_vm -= nr_pages; + up_write(&mm->mmap_sem); + return 0; +} + +static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm, + int nr_pages) +{ + unsigned long locked, lock_limit; + + if (!mm || !nr_pages || !scif_ulimit_check) + return 0; + + locked = nr_pages; + locked += mm->pinned_vm; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { + dev_err(scif_info.mdev.this_device, + "locked(%lu) > lock_limit(%lu)\n", + locked, lock_limit); + return -ENOMEM; + } + mm->pinned_vm = locked; + return 0; +} + +/** + * scif_destroy_window: + * @ep: end point + * @window: registration window + * + * Deallocate resources for self window. + */ +int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window) +{ + int j; + struct scif_pinned_pages *pinned_pages = window->pinned_pages; + int nr_pages = window->nr_pages; + + might_sleep(); + if (!window->temp && window->mm) { + __scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0); + __scif_release_mm(window->mm); + window->mm = NULL; + } + + scif_free_window_offset(ep, window, window->offset); + scif_unmap_window(ep->remote_dev, window); + /* + * Decrement references for this set of pinned pages from + * this window. + */ + j = atomic_sub_return(1, &pinned_pages->ref_count); + if (j < 0) + dev_err(scif_info.mdev.this_device, + "%s %d incorrect ref count %d\n", + __func__, __LINE__, j); + /* + * If the ref count for pinned_pages is zero then someone + * has already called scif_unpin_pages() for it and we should + * destroy the page cache. + */ + if (!j) + scif_destroy_pinned_pages(window->pinned_pages); + scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr)); + scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages)); + window->magic = 0; + scif_free(window, sizeof(*window)); + return 0; +} + +/** + * scif_create_remote_lookup: + * @remote_dev: SCIF remote device + * @window: remote window + * + * Allocate and prepare lookup entries for the remote + * end to copy over the physical addresses. + * Returns 0 on success and appropriate errno on failure. + */ +static int scif_create_remote_lookup(struct scif_dev *remote_dev, + struct scif_window *window) +{ + int i, j, err = 0; + int nr_pages = window->nr_pages; + bool vmalloc_dma_phys, vmalloc_num_pages; + + might_sleep(); + /* Map window */ + err = scif_map_single(&window->mapped_offset, + window, remote_dev, sizeof(*window)); + if (err) + goto error_window; + + /* Compute the number of lookup entries. 21 == 2MB Shift */ + window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE, + ((2) * 1024 * 1024)) >> 21; + + window->dma_addr_lookup.lookup = + scif_alloc_coherent(&window->dma_addr_lookup.offset, + remote_dev, window->nr_lookup * + sizeof(*window->dma_addr_lookup.lookup), + GFP_KERNEL | __GFP_ZERO); + if (!window->dma_addr_lookup.lookup) { + err = -ENOMEM; + goto error_window; + } + + window->num_pages_lookup.lookup = + scif_alloc_coherent(&window->num_pages_lookup.offset, + remote_dev, window->nr_lookup * + sizeof(*window->num_pages_lookup.lookup), + GFP_KERNEL | __GFP_ZERO); + if (!window->num_pages_lookup.lookup) { + err = -ENOMEM; + goto error_window; + } + + vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]); + vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]); + + /* Now map each of the pages containing physical addresses */ + for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) { + err = scif_map_page(&window->dma_addr_lookup.lookup[j], + vmalloc_dma_phys ? + vmalloc_to_page(&window->dma_addr[i]) : + virt_to_page(&window->dma_addr[i]), + remote_dev); + if (err) + goto error_window; + err = scif_map_page(&window->num_pages_lookup.lookup[j], + vmalloc_num_pages ? + vmalloc_to_page(&window->num_pages[i]) : + virt_to_page(&window->num_pages[i]), + remote_dev); + if (err) + goto error_window; + } + return 0; +error_window: + return err; +} + +/** + * scif_destroy_remote_lookup: + * @remote_dev: SCIF remote device + * @window: remote window + * + * Destroy lookup entries used for the remote + * end to copy over the physical addresses. + */ +static void scif_destroy_remote_lookup(struct scif_dev *remote_dev, + struct scif_window *window) +{ + int i, j; + + if (window->nr_lookup) { + struct scif_rma_lookup *lup = &window->dma_addr_lookup; + struct scif_rma_lookup *npup = &window->num_pages_lookup; + + for (i = 0, j = 0; i < window->nr_pages; + i += SCIF_NR_ADDR_IN_PAGE, j++) { + if (lup->lookup && lup->lookup[j]) + scif_unmap_single(lup->lookup[j], + remote_dev, + PAGE_SIZE); + if (npup->lookup && npup->lookup[j]) + scif_unmap_single(npup->lookup[j], + remote_dev, + PAGE_SIZE); + } + if (lup->lookup) + scif_free_coherent(lup->lookup, lup->offset, + remote_dev, window->nr_lookup * + sizeof(*lup->lookup)); + if (npup->lookup) + scif_free_coherent(npup->lookup, npup->offset, + remote_dev, window->nr_lookup * + sizeof(*npup->lookup)); + if (window->mapped_offset) + scif_unmap_single(window->mapped_offset, + remote_dev, sizeof(*window)); + window->nr_lookup = 0; + } +} + +/** + * scif_create_remote_window: + * @ep: end point + * @nr_pages: number of pages in window + * + * Allocate and prepare a remote registration window. + */ +static struct scif_window * +scif_create_remote_window(struct scif_dev *scifdev, int nr_pages) +{ + struct scif_window *window; + + might_sleep(); + window = scif_zalloc(sizeof(*window)); + if (!window) + goto error_ret; + + window->magic = SCIFEP_MAGIC; + window->nr_pages = nr_pages; + + window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr)); + if (!window->dma_addr) + goto error_window; + + window->num_pages = scif_zalloc(nr_pages * + sizeof(*window->num_pages)); + if (!window->num_pages) + goto error_window; + + if (scif_create_remote_lookup(scifdev, window)) + goto error_window; + + window->type = SCIF_WINDOW_PEER; + window->unreg_state = OP_IDLE; + INIT_LIST_HEAD(&window->list); + return window; +error_window: + scif_destroy_remote_window(window); +error_ret: + return NULL; +} + +/** + * scif_destroy_remote_window: + * @ep: end point + * @window: remote registration window + * + * Deallocate resources for remote window. + */ +void +scif_destroy_remote_window(struct scif_window *window) +{ + scif_free(window->dma_addr, window->nr_pages * + sizeof(*window->dma_addr)); + scif_free(window->num_pages, window->nr_pages * + sizeof(*window->num_pages)); + window->magic = 0; + scif_free(window, sizeof(*window)); +} + +/** + * scif_iommu_map: create DMA mappings if the IOMMU is enabled + * @remote_dev: SCIF remote device + * @window: remote registration window + * + * Map the physical pages using dma_map_sg(..) and then detect the number + * of contiguous DMA mappings allocated + */ +static int scif_iommu_map(struct scif_dev *remote_dev, + struct scif_window *window) +{ + struct scatterlist *sg; + int i, err; + scif_pinned_pages_t pin = window->pinned_pages; + + window->st = kzalloc(sizeof(*window->st), GFP_KERNEL); + if (!window->st) + return -ENOMEM; + + err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL); + if (err) + return err; + + for_each_sg(window->st->sgl, sg, window->st->nents, i) + sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0); + + err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl, + window->st->nents, DMA_BIDIRECTIONAL); + if (!err) + return -ENOMEM; + /* Detect contiguous ranges of DMA mappings */ + sg = window->st->sgl; + for (i = 0; sg; i++) { + dma_addr_t last_da; + + window->dma_addr[i] = sg_dma_address(sg); + window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT; + last_da = sg_dma_address(sg) + sg_dma_len(sg); + while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) { + window->num_pages[i] += + (sg_dma_len(sg) >> PAGE_SHIFT); + last_da = window->dma_addr[i] + + sg_dma_len(sg); + } + window->nr_contig_chunks++; + } + return 0; +} + +/** + * scif_map_window: + * @remote_dev: SCIF remote device + * @window: self registration window + * + * Map pages of a window into the aperture/PCI. + * Also determine addresses required for DMA. + */ +int +scif_map_window(struct scif_dev *remote_dev, struct scif_window *window) +{ + int i, j, k, err = 0, nr_contig_pages; + scif_pinned_pages_t pin; + phys_addr_t phys_prev, phys_curr; + + might_sleep(); + + pin = window->pinned_pages; + + if (intel_iommu_enabled && !scifdev_self(remote_dev)) + return scif_iommu_map(remote_dev, window); + + for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) { + phys_prev = page_to_phys(pin->pages[i]); + nr_contig_pages = 1; + + /* Detect physically contiguous chunks */ + for (k = i + 1; k < window->nr_pages; k++) { + phys_curr = page_to_phys(pin->pages[k]); + if (phys_curr != (phys_prev + PAGE_SIZE)) + break; + phys_prev = phys_curr; + nr_contig_pages++; + } + window->num_pages[j] = nr_contig_pages; + window->nr_contig_chunks++; + if (scif_is_mgmt_node()) { + /* + * Management node has to deal with SMPT on X100 and + * hence the DMA mapping is required + */ + err = scif_map_single(&window->dma_addr[j], + phys_to_virt(page_to_phys( + pin->pages[i])), + remote_dev, + nr_contig_pages << PAGE_SHIFT); + if (err) + return err; + } else { + window->dma_addr[j] = page_to_phys(pin->pages[i]); + } + } + return err; +} + +/** + * scif_send_scif_unregister: + * @ep: end point + * @window: self registration window + * + * Send a SCIF_UNREGISTER message. + */ +static int scif_send_scif_unregister(struct scif_endpt *ep, + struct scif_window *window) +{ + struct scifmsg msg; + + msg.uop = SCIF_UNREGISTER; + msg.src = ep->port; + msg.payload[0] = window->alloc_handle.vaddr; + msg.payload[1] = (u64)window; + return scif_nodeqp_send(ep->remote_dev, &msg); +} + +/** + * scif_unregister_window: + * @window: self registration window + * + * Send an unregistration request and wait for a response. + */ +int scif_unregister_window(struct scif_window *window) +{ + int err = 0; + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + bool send_msg = false; + + might_sleep(); + switch (window->unreg_state) { + case OP_IDLE: + { + window->unreg_state = OP_IN_PROGRESS; + send_msg = true; + /* fall through */ + } + case OP_IN_PROGRESS: + { + scif_get_window(window, 1); + mutex_unlock(&ep->rma_info.rma_lock); + if (send_msg) { + err = scif_send_scif_unregister(ep, window); + if (err) { + window->unreg_state = OP_COMPLETED; + goto done; + } + } else { + /* Return ENXIO since unregistration is in progress */ + mutex_lock(&ep->rma_info.rma_lock); + return -ENXIO; + } +retry: + /* Wait for a SCIF_UNREGISTER_(N)ACK message */ + err = wait_event_timeout(window->unregwq, + window->unreg_state != OP_IN_PROGRESS, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err && scifdev_alive(ep)) + goto retry; + if (!err) { + err = -ENODEV; + window->unreg_state = OP_COMPLETED; + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + } + if (err > 0) + err = 0; +done: + mutex_lock(&ep->rma_info.rma_lock); + scif_put_window(window, 1); + break; + } + case OP_FAILED: + { + if (!scifdev_alive(ep)) { + err = -ENODEV; + window->unreg_state = OP_COMPLETED; + } + break; + } + case OP_COMPLETED: + break; + default: + err = -ENODEV; + } + + if (window->unreg_state == OP_COMPLETED && window->ref_count) + scif_put_window(window, window->nr_pages); + + if (!window->ref_count) { + atomic_inc(&ep->rma_info.tw_refcount); + list_del_init(&window->list); + scif_free_window_offset(ep, window, window->offset); + mutex_unlock(&ep->rma_info.rma_lock); + if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) && + scifdev_alive(ep)) { + scif_drain_dma_intr(ep->remote_dev->sdev, + ep->rma_info.dma_chan); + } else { + if (!__scif_dec_pinned_vm_lock(window->mm, + window->nr_pages, 1)) { + __scif_release_mm(window->mm); + window->mm = NULL; + } + } + scif_queue_for_cleanup(window, &scif_info.rma); + mutex_lock(&ep->rma_info.rma_lock); + } + return err; +} + +/** + * scif_send_alloc_request: + * @ep: end point + * @window: self registration window + * + * Send a remote window allocation request + */ +static int scif_send_alloc_request(struct scif_endpt *ep, + struct scif_window *window) +{ + struct scifmsg msg; + struct scif_allocmsg *alloc = &window->alloc_handle; + + /* Set up the Alloc Handle */ + alloc->state = OP_IN_PROGRESS; + init_waitqueue_head(&alloc->allocwq); + + /* Send out an allocation request */ + msg.uop = SCIF_ALLOC_REQ; + msg.payload[1] = window->nr_pages; + msg.payload[2] = (u64)&window->alloc_handle; + return _scif_nodeqp_send(ep->remote_dev, &msg); +} + +/** + * scif_prep_remote_window: + * @ep: end point + * @window: self registration window + * + * Send a remote window allocation request, wait for an allocation response, + * and prepares the remote window by copying over the page lists + */ +static int scif_prep_remote_window(struct scif_endpt *ep, + struct scif_window *window) +{ + struct scifmsg msg; + struct scif_window *remote_window; + struct scif_allocmsg *alloc = &window->alloc_handle; + dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1; + int i = 0, j = 0; + int nr_contig_chunks, loop_nr_contig_chunks; + int remaining_nr_contig_chunks, nr_lookup; + int err, map_err; + + map_err = scif_map_window(ep->remote_dev, window); + if (map_err) + dev_err(&ep->remote_dev->sdev->dev, + "%s %d map_err %d\n", __func__, __LINE__, map_err); + remaining_nr_contig_chunks = window->nr_contig_chunks; + nr_contig_chunks = window->nr_contig_chunks; +retry: + /* Wait for a SCIF_ALLOC_GNT/REJ message */ + err = wait_event_timeout(alloc->allocwq, + alloc->state != OP_IN_PROGRESS, + SCIF_NODE_ALIVE_TIMEOUT); + mutex_lock(&ep->rma_info.rma_lock); + /* Synchronize with the thread waking up allocwq */ + mutex_unlock(&ep->rma_info.rma_lock); + if (!err && scifdev_alive(ep)) + goto retry; + + if (!err) + err = -ENODEV; + + if (err > 0) + err = 0; + else + return err; + + /* Bail out. The remote end rejected this request */ + if (alloc->state == OP_FAILED) + return -ENOMEM; + + if (map_err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, map_err); + msg.uop = SCIF_FREE_VIRT; + msg.src = ep->port; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = window->alloc_handle.vaddr; + msg.payload[2] = (u64)window; + msg.payload[3] = SCIF_REGISTER; + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) + err = _scif_nodeqp_send(ep->remote_dev, &msg); + else + err = -ENOTCONN; + spin_unlock(&ep->lock); + return err; + } + + remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window), + ep->remote_dev); + + /* Compute the number of lookup entries. 21 == 2MB Shift */ + nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE) + >> ilog2(SCIF_NR_ADDR_IN_PAGE); + + dma_phys_lookup = + scif_ioremap(remote_window->dma_addr_lookup.offset, + nr_lookup * + sizeof(*remote_window->dma_addr_lookup.lookup), + ep->remote_dev); + num_pages_lookup = + scif_ioremap(remote_window->num_pages_lookup.offset, + nr_lookup * + sizeof(*remote_window->num_pages_lookup.lookup), + ep->remote_dev); + + while (remaining_nr_contig_chunks) { + loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks, + (int)SCIF_NR_ADDR_IN_PAGE); + /* #1/2 - Copy physical addresses over to the remote side */ + + /* #2/2 - Copy DMA addresses (addresses that are fed into the + * DMA engine) We transfer bus addresses which are then + * converted into a MIC physical address on the remote + * side if it is a MIC, if the remote node is a mgmt node we + * transfer the MIC physical address + */ + tmp = scif_ioremap(dma_phys_lookup[j], + loop_nr_contig_chunks * + sizeof(*window->dma_addr), + ep->remote_dev); + tmp1 = scif_ioremap(num_pages_lookup[j], + loop_nr_contig_chunks * + sizeof(*window->num_pages), + ep->remote_dev); + if (scif_is_mgmt_node()) { + memcpy_toio((void __force __iomem *)tmp, + &window->dma_addr[i], loop_nr_contig_chunks + * sizeof(*window->dma_addr)); + memcpy_toio((void __force __iomem *)tmp1, + &window->num_pages[i], loop_nr_contig_chunks + * sizeof(*window->num_pages)); + } else { + if (scifdev_is_p2p(ep->remote_dev)) { + /* + * add remote node's base address for this node + * to convert it into a MIC address + */ + int m; + dma_addr_t dma_addr; + + for (m = 0; m < loop_nr_contig_chunks; m++) { + dma_addr = window->dma_addr[i + m] + + ep->remote_dev->base_addr; + writeq(dma_addr, + (void __force __iomem *)&tmp[m]); + } + memcpy_toio((void __force __iomem *)tmp1, + &window->num_pages[i], + loop_nr_contig_chunks + * sizeof(*window->num_pages)); + } else { + /* Mgmt node or loopback - transfer DMA + * addresses as is, this is the same as a + * MIC physical address (we use the dma_addr + * and not the phys_addr array since the + * phys_addr is only setup if there is a mmap() + * request from the mgmt node) + */ + memcpy_toio((void __force __iomem *)tmp, + &window->dma_addr[i], + loop_nr_contig_chunks * + sizeof(*window->dma_addr)); + memcpy_toio((void __force __iomem *)tmp1, + &window->num_pages[i], + loop_nr_contig_chunks * + sizeof(*window->num_pages)); + } + } + remaining_nr_contig_chunks -= loop_nr_contig_chunks; + i += loop_nr_contig_chunks; + j++; + scif_iounmap(tmp, loop_nr_contig_chunks * + sizeof(*window->dma_addr), ep->remote_dev); + scif_iounmap(tmp1, loop_nr_contig_chunks * + sizeof(*window->num_pages), ep->remote_dev); + } + + /* Prepare the remote window for the peer */ + remote_window->peer_window = (u64)window; + remote_window->offset = window->offset; + remote_window->prot = window->prot; + remote_window->nr_contig_chunks = nr_contig_chunks; + remote_window->ep = ep->remote_ep; + scif_iounmap(num_pages_lookup, + nr_lookup * + sizeof(*remote_window->num_pages_lookup.lookup), + ep->remote_dev); + scif_iounmap(dma_phys_lookup, + nr_lookup * + sizeof(*remote_window->dma_addr_lookup.lookup), + ep->remote_dev); + scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev); + window->peer_window = alloc->vaddr; + return err; +} + +/** + * scif_send_scif_register: + * @ep: end point + * @window: self registration window + * + * Send a SCIF_REGISTER message if EP is connected and wait for a + * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT + * message so that the peer can free its remote window allocated earlier. + */ +static int scif_send_scif_register(struct scif_endpt *ep, + struct scif_window *window) +{ + int err = 0; + struct scifmsg msg; + + msg.src = ep->port; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = window->alloc_handle.vaddr; + msg.payload[2] = (u64)window; + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) { + msg.uop = SCIF_REGISTER; + window->reg_state = OP_IN_PROGRESS; + err = _scif_nodeqp_send(ep->remote_dev, &msg); + spin_unlock(&ep->lock); + if (!err) { +retry: + /* Wait for a SCIF_REGISTER_(N)ACK message */ + err = wait_event_timeout(window->regwq, + window->reg_state != + OP_IN_PROGRESS, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err && scifdev_alive(ep)) + goto retry; + err = !err ? -ENODEV : 0; + if (window->reg_state == OP_FAILED) + err = -ENOTCONN; + } + } else { + msg.uop = SCIF_FREE_VIRT; + msg.payload[3] = SCIF_REGISTER; + err = _scif_nodeqp_send(ep->remote_dev, &msg); + spin_unlock(&ep->lock); + if (!err) + err = -ENOTCONN; + } + return err; +} + +/** + * scif_get_window_offset: + * @ep: end point descriptor + * @flags: flags + * @offset: offset hint + * @num_pages: number of pages + * @out_offset: computed offset returned by reference. + * + * Compute/Claim a new offset for this EP. + */ +int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset, + int num_pages, s64 *out_offset) +{ + s64 page_index; + struct iova *iova_ptr; + int err = 0; + + if (flags & SCIF_MAP_FIXED) { + page_index = SCIF_IOVA_PFN(offset); + iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index, + page_index + num_pages - 1); + if (!iova_ptr) + err = -EADDRINUSE; + } else { + iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages, + SCIF_DMA_63BIT_PFN - 1, 0); + if (!iova_ptr) + err = -ENOMEM; + } + if (!err) + *out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT; + return err; +} + +/** + * scif_free_window_offset: + * @ep: end point descriptor + * @window: registration window + * @offset: Offset to be freed + * + * Free offset for this EP. The callee is supposed to grab + * the RMA mutex before calling this API. + */ +void scif_free_window_offset(struct scif_endpt *ep, + struct scif_window *window, s64 offset) +{ + if ((window && !window->offset_freed) || !window) { + free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT); + if (window) + window->offset_freed = true; + } +} + +/** + * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message + * @msg: Interrupt message + * + * Remote side is requesting a memory allocation. + */ +void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg) +{ + int err; + struct scif_window *window = NULL; + int nr_pages = msg->payload[1]; + + window = scif_create_remote_window(scifdev, nr_pages); + if (!window) { + err = -ENOMEM; + goto error; + } + + /* The peer's allocation request is granted */ + msg->uop = SCIF_ALLOC_GNT; + msg->payload[0] = (u64)window; + msg->payload[1] = window->mapped_offset; + err = scif_nodeqp_send(scifdev, msg); + if (err) + scif_destroy_remote_window(window); + return; +error: + /* The peer's allocation request is rejected */ + dev_err(&scifdev->sdev->dev, + "%s %d error %d alloc_ptr %p nr_pages 0x%x\n", + __func__, __LINE__, err, window, nr_pages); + msg->uop = SCIF_ALLOC_REJ; + scif_nodeqp_send(scifdev, msg); +} + +/** + * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message + * @msg: Interrupt message + * + * Remote side responded to a memory allocation. + */ +void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2]; + struct scif_window *window = container_of(handle, struct scif_window, + alloc_handle); + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + + mutex_lock(&ep->rma_info.rma_lock); + handle->vaddr = msg->payload[0]; + handle->phys_addr = msg->payload[1]; + if (msg->uop == SCIF_ALLOC_GNT) + handle->state = OP_COMPLETED; + else + handle->state = OP_FAILED; + wake_up(&handle->allocwq); + mutex_unlock(&ep->rma_info.rma_lock); +} + +/** + * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message + * @msg: Interrupt message + * + * Free up memory kmalloc'd earlier. + */ +void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_window *window = (struct scif_window *)msg->payload[1]; + + scif_destroy_remote_window(window); +} + +static void +scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window) +{ + int j; + struct scif_hw_dev *sdev = dev->sdev; + phys_addr_t apt_base = 0; + + /* + * Add the aperture base if the DMA address is not card relative + * since the DMA addresses need to be an offset into the bar + */ + if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER && + sdev->aper && !sdev->card_rel_da) + apt_base = sdev->aper->pa; + else + return; + + for (j = 0; j < window->nr_contig_chunks; j++) { + if (window->num_pages[j]) + window->dma_addr[j] += apt_base; + else + break; + } +} + +/** + * scif_recv_reg: Respond to SCIF_REGISTER interrupt message + * @msg: Interrupt message + * + * Update remote window list with a new registered window. + */ +void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + struct scif_window *window = + (struct scif_window *)msg->payload[1]; + + mutex_lock(&ep->rma_info.rma_lock); + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) { + msg->uop = SCIF_REGISTER_ACK; + scif_nodeqp_send(ep->remote_dev, msg); + scif_fixup_aper_base(ep->remote_dev, window); + /* No further failures expected. Insert new window */ + scif_insert_window(window, &ep->rma_info.remote_reg_list); + } else { + msg->uop = SCIF_REGISTER_NACK; + scif_nodeqp_send(ep->remote_dev, msg); + } + spin_unlock(&ep->lock); + mutex_unlock(&ep->rma_info.rma_lock); + /* free up any lookup resources now that page lists are transferred */ + scif_destroy_remote_lookup(ep->remote_dev, window); + /* + * We could not insert the window but we need to + * destroy the window. + */ + if (msg->uop == SCIF_REGISTER_NACK) + scif_destroy_remote_window(window); +} + +/** + * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message + * @msg: Interrupt message + * + * Remove window from remote registration list; + */ +void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_rma_req req; + struct scif_window *window = NULL; + struct scif_window *recv_window = + (struct scif_window *)msg->payload[0]; + struct scif_endpt *ep; + int del_window = 0; + + ep = (struct scif_endpt *)recv_window->ep; + req.out_window = &window; + req.offset = recv_window->offset; + req.prot = 0; + req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT; + req.type = SCIF_WINDOW_FULL; + req.head = &ep->rma_info.remote_reg_list; + msg->payload[0] = ep->remote_ep; + + mutex_lock(&ep->rma_info.rma_lock); + /* Does a valid window exist? */ + if (scif_query_window(&req)) { + dev_err(&scifdev->sdev->dev, + "%s %d -ENXIO\n", __func__, __LINE__); + msg->uop = SCIF_UNREGISTER_ACK; + goto error; + } + if (window) { + if (window->ref_count) + scif_put_window(window, window->nr_pages); + else + dev_err(&scifdev->sdev->dev, + "%s %d ref count should be +ve\n", + __func__, __LINE__); + window->unreg_state = OP_COMPLETED; + if (!window->ref_count) { + msg->uop = SCIF_UNREGISTER_ACK; + atomic_inc(&ep->rma_info.tw_refcount); + ep->rma_info.async_list_del = 1; + list_del_init(&window->list); + del_window = 1; + } else { + /* NACK! There are valid references to this window */ + msg->uop = SCIF_UNREGISTER_NACK; + } + } else { + /* The window did not make its way to the list at all. ACK */ + msg->uop = SCIF_UNREGISTER_ACK; + scif_destroy_remote_window(recv_window); + } +error: + mutex_unlock(&ep->rma_info.rma_lock); + if (del_window) + scif_drain_dma_intr(ep->remote_dev->sdev, + ep->rma_info.dma_chan); + scif_nodeqp_send(ep->remote_dev, msg); + if (del_window) + scif_queue_for_cleanup(window, &scif_info.rma); +} + +/** + * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message + * @msg: Interrupt message + * + * Wake up the window waiting to complete registration. + */ +void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_window *window = + (struct scif_window *)msg->payload[2]; + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + + mutex_lock(&ep->rma_info.rma_lock); + window->reg_state = OP_COMPLETED; + wake_up(&window->regwq); + mutex_unlock(&ep->rma_info.rma_lock); +} + +/** + * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message + * @msg: Interrupt message + * + * Wake up the window waiting to inform it that registration + * cannot be completed. + */ +void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_window *window = + (struct scif_window *)msg->payload[2]; + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + + mutex_lock(&ep->rma_info.rma_lock); + window->reg_state = OP_FAILED; + wake_up(&window->regwq); + mutex_unlock(&ep->rma_info.rma_lock); +} + +/** + * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message + * @msg: Interrupt message + * + * Wake up the window waiting to complete unregistration. + */ +void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_window *window = + (struct scif_window *)msg->payload[1]; + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + + mutex_lock(&ep->rma_info.rma_lock); + window->unreg_state = OP_COMPLETED; + wake_up(&window->unregwq); + mutex_unlock(&ep->rma_info.rma_lock); +} + +/** + * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message + * @msg: Interrupt message + * + * Wake up the window waiting to inform it that unregistration + * cannot be completed immediately. + */ +void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_window *window = + (struct scif_window *)msg->payload[1]; + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + + mutex_lock(&ep->rma_info.rma_lock); + window->unreg_state = OP_FAILED; + wake_up(&window->unregwq); + mutex_unlock(&ep->rma_info.rma_lock); +} + +int __scif_pin_pages(void *addr, size_t len, int *out_prot, + int map_flags, scif_pinned_pages_t *pages) +{ + struct scif_pinned_pages *pinned_pages; + int nr_pages, err = 0, i; + bool vmalloc_addr = false; + bool try_upgrade = false; + int prot = *out_prot; + int ulimit = 0; + struct mm_struct *mm = NULL; + + /* Unsupported flags */ + if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT)) + return -EINVAL; + ulimit = !!(map_flags & SCIF_MAP_ULIMIT); + + /* Unsupported protection requested */ + if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE)) + return -EINVAL; + + /* addr/len must be page aligned. len should be non zero */ + if (!len || + (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) || + (ALIGN((u64)len, PAGE_SIZE) != (u64)len)) + return -EINVAL; + + might_sleep(); + + nr_pages = len >> PAGE_SHIFT; + + /* Allocate a set of pinned pages */ + pinned_pages = scif_create_pinned_pages(nr_pages, prot); + if (!pinned_pages) + return -ENOMEM; + + if (map_flags & SCIF_MAP_KERNEL) { + if (is_vmalloc_addr(addr)) + vmalloc_addr = true; + + for (i = 0; i < nr_pages; i++) { + if (vmalloc_addr) + pinned_pages->pages[i] = + vmalloc_to_page(addr + (i * PAGE_SIZE)); + else + pinned_pages->pages[i] = + virt_to_page(addr + (i * PAGE_SIZE)); + } + pinned_pages->nr_pages = nr_pages; + pinned_pages->map_flags = SCIF_MAP_KERNEL; + } else { + /* + * SCIF supports registration caching. If a registration has + * been requested with read only permissions, then we try + * to pin the pages with RW permissions so that a subsequent + * transfer with RW permission can hit the cache instead of + * invalidating it. If the upgrade fails with RW then we + * revert back to R permission and retry + */ + if (prot == SCIF_PROT_READ) + try_upgrade = true; + prot |= SCIF_PROT_WRITE; +retry: + mm = current->mm; + down_write(&mm->mmap_sem); + if (ulimit) { + err = __scif_check_inc_pinned_vm(mm, nr_pages); + if (err) { + up_write(&mm->mmap_sem); + pinned_pages->nr_pages = 0; + goto error_unmap; + } + } + + pinned_pages->nr_pages = get_user_pages( + (u64)addr, + nr_pages, + (prot & SCIF_PROT_WRITE) ? FOLL_WRITE : 0, + pinned_pages->pages, + NULL); + up_write(&mm->mmap_sem); + if (nr_pages != pinned_pages->nr_pages) { + if (pinned_pages->nr_pages < 0) + pinned_pages->nr_pages = 0; + if (try_upgrade) { + if (ulimit) + __scif_dec_pinned_vm_lock(mm, + nr_pages, 0); + /* Roll back any pinned pages */ + for (i = 0; i < pinned_pages->nr_pages; i++) { + if (pinned_pages->pages[i]) + put_page( + pinned_pages->pages[i]); + } + prot &= ~SCIF_PROT_WRITE; + try_upgrade = false; + goto retry; + } + } + pinned_pages->map_flags = 0; + } + + if (pinned_pages->nr_pages < nr_pages) { + err = -EFAULT; + goto dec_pinned; + } + + *out_prot = prot; + atomic_set(&pinned_pages->ref_count, 1); + *pages = pinned_pages; + return err; +dec_pinned: + if (ulimit) + __scif_dec_pinned_vm_lock(mm, nr_pages, 0); + /* Something went wrong! Rollback */ +error_unmap: + scif_destroy_pinned_pages(pinned_pages); + *pages = NULL; + dev_dbg(scif_info.mdev.this_device, + "%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len); + return err; +} + +int scif_pin_pages(void *addr, size_t len, int prot, + int map_flags, scif_pinned_pages_t *pages) +{ + return __scif_pin_pages(addr, len, &prot, map_flags, pages); +} +EXPORT_SYMBOL_GPL(scif_pin_pages); + +int scif_unpin_pages(scif_pinned_pages_t pinned_pages) +{ + int err = 0, ret; + + if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic) + return -EINVAL; + + ret = atomic_sub_return(1, &pinned_pages->ref_count); + if (ret < 0) { + dev_err(scif_info.mdev.this_device, + "%s %d scif_unpin_pages called without pinning? rc %d\n", + __func__, __LINE__, ret); + return -EINVAL; + } + /* + * Destroy the window if the ref count for this set of pinned + * pages has dropped to zero. If it is positive then there is + * a valid registered window which is backed by these pages and + * it will be destroyed once all such windows are unregistered. + */ + if (!ret) + err = scif_destroy_pinned_pages(pinned_pages); + + return err; +} +EXPORT_SYMBOL_GPL(scif_unpin_pages); + +static inline void +scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep) +{ + mutex_lock(&ep->rma_info.rma_lock); + scif_insert_window(window, &ep->rma_info.reg_list); + mutex_unlock(&ep->rma_info.rma_lock); +} + +off_t scif_register_pinned_pages(scif_epd_t epd, + scif_pinned_pages_t pinned_pages, + off_t offset, int map_flags) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + s64 computed_offset; + struct scif_window *window; + int err; + size_t len; + struct device *spdev; + + /* Unsupported flags */ + if (map_flags & ~SCIF_MAP_FIXED) + return -EINVAL; + + len = pinned_pages->nr_pages << PAGE_SHIFT; + + /* + * Offset is not page aligned/negative or offset+len + * wraps around with SCIF_MAP_FIXED. + */ + if ((map_flags & SCIF_MAP_FIXED) && + ((ALIGN(offset, PAGE_SIZE) != offset) || + (offset < 0) || + (len > LONG_MAX - offset))) + return -EINVAL; + + might_sleep(); + + err = scif_verify_epd(ep); + if (err) + return err; + /* + * It is an error to pass pinned_pages to scif_register_pinned_pages() + * after calling scif_unpin_pages(). + */ + if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0)) + return -EINVAL; + + /* Compute the offset for this registration */ + err = scif_get_window_offset(ep, map_flags, offset, + len, &computed_offset); + if (err) { + atomic_sub(1, &pinned_pages->ref_count); + return err; + } + + /* Allocate and prepare self registration window */ + window = scif_create_window(ep, pinned_pages->nr_pages, + computed_offset, false); + if (!window) { + atomic_sub(1, &pinned_pages->ref_count); + scif_free_window_offset(ep, NULL, computed_offset); + return -ENOMEM; + } + + window->pinned_pages = pinned_pages; + window->nr_pages = pinned_pages->nr_pages; + window->prot = pinned_pages->prot; + + spdev = scif_get_peer_dev(ep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + scif_destroy_window(ep, window); + return err; + } + err = scif_send_alloc_request(ep, window); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error_unmap; + } + + /* Prepare the remote registration window */ + err = scif_prep_remote_window(ep, window); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error_unmap; + } + + /* Tell the peer about the new window */ + err = scif_send_scif_register(ep, window); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error_unmap; + } + + scif_put_peer_dev(spdev); + /* No further failures expected. Insert new window */ + scif_insert_local_window(window, ep); + return computed_offset; +error_unmap: + scif_destroy_window(ep, window); + scif_put_peer_dev(spdev); + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + return err; +} +EXPORT_SYMBOL_GPL(scif_register_pinned_pages); + +off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, + int prot, int map_flags) +{ + scif_pinned_pages_t pinned_pages; + off_t err; + struct scif_endpt *ep = (struct scif_endpt *)epd; + s64 computed_offset; + struct scif_window *window; + struct mm_struct *mm = NULL; + struct device *spdev; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n", + epd, addr, len, offset, prot, map_flags); + /* Unsupported flags */ + if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL)) + return -EINVAL; + + /* + * Offset is not page aligned/negative or offset+len + * wraps around with SCIF_MAP_FIXED. + */ + if ((map_flags & SCIF_MAP_FIXED) && + ((ALIGN(offset, PAGE_SIZE) != offset) || + (offset < 0) || + (len > LONG_MAX - offset))) + return -EINVAL; + + /* Unsupported protection requested */ + if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE)) + return -EINVAL; + + /* addr/len must be page aligned. len should be non zero */ + if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) || + (ALIGN(len, PAGE_SIZE) != len)) + return -EINVAL; + + might_sleep(); + + err = scif_verify_epd(ep); + if (err) + return err; + + /* Compute the offset for this registration */ + err = scif_get_window_offset(ep, map_flags, offset, + len >> PAGE_SHIFT, &computed_offset); + if (err) + return err; + + spdev = scif_get_peer_dev(ep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + scif_free_window_offset(ep, NULL, computed_offset); + return err; + } + /* Allocate and prepare self registration window */ + window = scif_create_window(ep, len >> PAGE_SHIFT, + computed_offset, false); + if (!window) { + scif_free_window_offset(ep, NULL, computed_offset); + scif_put_peer_dev(spdev); + return -ENOMEM; + } + + window->nr_pages = len >> PAGE_SHIFT; + + err = scif_send_alloc_request(ep, window); + if (err) { + scif_destroy_incomplete_window(ep, window); + scif_put_peer_dev(spdev); + return err; + } + + if (!(map_flags & SCIF_MAP_KERNEL)) { + mm = __scif_acquire_mm(); + map_flags |= SCIF_MAP_ULIMIT; + } + /* Pin down the pages */ + err = __scif_pin_pages(addr, len, &prot, + map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT), + &pinned_pages); + if (err) { + scif_destroy_incomplete_window(ep, window); + __scif_release_mm(mm); + goto error; + } + + window->pinned_pages = pinned_pages; + window->prot = pinned_pages->prot; + window->mm = mm; + + /* Prepare the remote registration window */ + err = scif_prep_remote_window(ep, window); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %ld\n", __func__, __LINE__, err); + goto error_unmap; + } + + /* Tell the peer about the new window */ + err = scif_send_scif_register(ep, window); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %ld\n", __func__, __LINE__, err); + goto error_unmap; + } + + scif_put_peer_dev(spdev); + /* No further failures expected. Insert new window */ + scif_insert_local_window(window, ep); + dev_dbg(&ep->remote_dev->sdev->dev, + "SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n", + epd, addr, len, computed_offset); + return computed_offset; +error_unmap: + scif_destroy_window(ep, window); +error: + scif_put_peer_dev(spdev); + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %ld\n", __func__, __LINE__, err); + return err; +} +EXPORT_SYMBOL_GPL(scif_register); + +int +scif_unregister(scif_epd_t epd, off_t offset, size_t len) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scif_window *window = NULL; + struct scif_rma_req req; + int nr_pages, err; + struct device *spdev; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n", + ep, offset, len); + /* len must be page aligned. len should be non zero */ + if (!len || + (ALIGN((u64)len, PAGE_SIZE) != (u64)len)) + return -EINVAL; + + /* Offset is not page aligned or offset+len wraps around */ + if ((ALIGN(offset, PAGE_SIZE) != offset) || + (offset < 0) || + (len > LONG_MAX - offset)) + return -EINVAL; + + err = scif_verify_epd(ep); + if (err) + return err; + + might_sleep(); + nr_pages = len >> PAGE_SHIFT; + + req.out_window = &window; + req.offset = offset; + req.prot = 0; + req.nr_bytes = len; + req.type = SCIF_WINDOW_FULL; + req.head = &ep->rma_info.reg_list; + + spdev = scif_get_peer_dev(ep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + return err; + } + mutex_lock(&ep->rma_info.rma_lock); + /* Does a valid window exist? */ + err = scif_query_window(&req); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error; + } + /* Unregister all the windows in this range */ + err = scif_rma_list_unregister(window, offset, nr_pages); + if (err) + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); +error: + mutex_unlock(&ep->rma_info.rma_lock); + scif_put_peer_dev(spdev); + return err; +} +EXPORT_SYMBOL_GPL(scif_unregister); diff --git a/drivers/misc/mic/scif/scif_rma.h b/drivers/misc/mic/scif/scif_rma.h new file mode 100644 index 000000000..fa6722279 --- /dev/null +++ b/drivers/misc/mic/scif/scif_rma.h @@ -0,0 +1,464 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Intel SCIF driver. + * + */ +#ifndef SCIF_RMA_H +#define SCIF_RMA_H + +#include +#include + +#include "../bus/scif_bus.h" + +/* If this bit is set then the mark is a remote fence mark */ +#define SCIF_REMOTE_FENCE_BIT 31 +/* Magic value used to indicate a remote fence request */ +#define SCIF_REMOTE_FENCE BIT_ULL(SCIF_REMOTE_FENCE_BIT) + +#define SCIF_MAX_UNALIGNED_BUF_SIZE (1024 * 1024ULL) +#define SCIF_KMEM_UNALIGNED_BUF_SIZE (SCIF_MAX_UNALIGNED_BUF_SIZE + \ + (L1_CACHE_BYTES << 1)) + +#define SCIF_IOVA_START_PFN (1) +#define SCIF_IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) +#define SCIF_DMA_64BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(64)) +#define SCIF_DMA_63BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(63)) + +/* + * struct scif_endpt_rma_info - Per Endpoint Remote Memory Access Information + * + * @reg_list: List of registration windows for self + * @remote_reg_list: List of registration windows for peer + * @iovad: Offset generator + * @rma_lock: Synchronizes access to self/remote list and also protects the + * window from being destroyed while RMAs are in progress. + * @tc_lock: Synchronizes access to temporary cached windows list + * for SCIF Registration Caching. + * @mmn_lock: Synchronizes access to the list of MMU notifiers registered + * @tw_refcount: Keeps track of number of outstanding temporary registered + * windows created by scif_vreadfrom/scif_vwriteto which have + * not been destroyed. + * @tcw_refcount: Same as tw_refcount but for temporary cached windows + * @tcw_total_pages: Same as tcw_refcount but in terms of pages pinned + * @mmn_list: MMU notifier so that we can destroy the windows when required + * @fence_refcount: Keeps track of number of outstanding remote fence + * requests which have been received by the peer. + * @dma_chan: DMA channel used for all DMA transfers for this endpoint. + * @async_list_del: Detect asynchronous list entry deletion + * @vma_list: List of vmas with remote memory mappings + * @markwq: Wait queue used for scif_fence_mark/scif_fence_wait +*/ +struct scif_endpt_rma_info { + struct list_head reg_list; + struct list_head remote_reg_list; + struct iova_domain iovad; + struct mutex rma_lock; + spinlock_t tc_lock; + struct mutex mmn_lock; + atomic_t tw_refcount; + atomic_t tcw_refcount; + atomic_t tcw_total_pages; + struct list_head mmn_list; + atomic_t fence_refcount; + struct dma_chan *dma_chan; + int async_list_del; + struct list_head vma_list; + wait_queue_head_t markwq; +}; + +/* + * struct scif_fence_info - used for tracking fence requests + * + * @state: State of this transfer + * @wq: Fences wait on this queue + * @dma_mark: Used for storing the DMA mark + */ +struct scif_fence_info { + enum scif_msg_state state; + struct completion comp; + int dma_mark; +}; + +/* + * struct scif_remote_fence_info - used for tracking remote fence requests + * + * @msg: List of SCIF node QP fence messages + * @list: Link to list of remote fence requests + */ +struct scif_remote_fence_info { + struct scifmsg msg; + struct list_head list; +}; + +/* + * Specifies whether an RMA operation can span across partial windows, a single + * window or multiple contiguous windows. Mmaps can span across partial windows. + * Unregistration can span across complete windows. scif_get_pages() can span a + * single window. A window can also be of type self or peer. + */ +enum scif_window_type { + SCIF_WINDOW_PARTIAL, + SCIF_WINDOW_SINGLE, + SCIF_WINDOW_FULL, + SCIF_WINDOW_SELF, + SCIF_WINDOW_PEER +}; + +/* The number of physical addresses that can be stored in a PAGE. */ +#define SCIF_NR_ADDR_IN_PAGE (0x1000 >> 3) + +/* + * struct scif_rma_lookup - RMA lookup data structure for page list transfers + * + * Store an array of lookup offsets. Each offset in this array maps + * one 4K page containing 512 physical addresses i.e. 2MB. 512 such + * offsets in a 4K page will correspond to 1GB of registered address space. + + * @lookup: Array of offsets + * @offset: DMA offset of lookup array + */ +struct scif_rma_lookup { + dma_addr_t *lookup; + dma_addr_t offset; +}; + +/* + * struct scif_pinned_pages - A set of pinned pages obtained with + * scif_pin_pages() which could be part of multiple registered + * windows across different end points. + * + * @nr_pages: Number of pages which is defined as a s64 instead of an int + * to avoid sign extension with buffers >= 2GB + * @prot: read/write protections + * @map_flags: Flags specified during the pin operation + * @ref_count: Reference count bumped in terms of number of pages + * @magic: A magic value + * @pages: Array of pointers to struct pages populated with get_user_pages(..) + */ +struct scif_pinned_pages { + s64 nr_pages; + int prot; + int map_flags; + atomic_t ref_count; + u64 magic; + struct page **pages; +}; + +/* + * struct scif_status - Stores DMA status update information + * + * @src_dma_addr: Source buffer DMA address + * @val: src location for value to be written to the destination + * @ep: SCIF endpoint + */ +struct scif_status { + dma_addr_t src_dma_addr; + u64 val; + struct scif_endpt *ep; +}; + +/* + * struct scif_window - Registration Window for Self and Remote + * + * @nr_pages: Number of pages which is defined as a s64 instead of an int + * to avoid sign extension with buffers >= 2GB + * @nr_contig_chunks: Number of contiguous physical chunks + * @prot: read/write protections + * @ref_count: reference count in terms of number of pages + * @magic: Cookie to detect corruption + * @offset: registered offset + * @va_for_temp: va address that this window represents + * @dma_mark: Used to determine if all DMAs against the window are done + * @ep: Pointer to EP. Useful for passing EP around with messages to + avoid expensive list traversals. + * @list: link to list of windows for the endpoint + * @type: self or peer window + * @peer_window: Pointer to peer window. Useful for sending messages to peer + * without requiring an extra list traversal + * @unreg_state: unregistration state + * @offset_freed: True if the offset has been freed + * @temp: True for temporary windows created via scif_vreadfrom/scif_vwriteto + * @mm: memory descriptor for the task_struct which initiated the RMA + * @st: scatter gather table for DMA mappings with IOMMU enabled + * @pinned_pages: The set of pinned_pages backing this window + * @alloc_handle: Handle for sending ALLOC_REQ + * @regwq: Wait Queue for an registration (N)ACK + * @reg_state: Registration state + * @unregwq: Wait Queue for an unregistration (N)ACK + * @dma_addr_lookup: Lookup for physical addresses used for DMA + * @nr_lookup: Number of entries in lookup + * @mapped_offset: Offset used to map the window by the peer + * @dma_addr: Array of physical addresses used for Mgmt node & MIC initiated DMA + * @num_pages: Array specifying number of pages for each physical address + */ +struct scif_window { + s64 nr_pages; + int nr_contig_chunks; + int prot; + int ref_count; + u64 magic; + s64 offset; + unsigned long va_for_temp; + int dma_mark; + u64 ep; + struct list_head list; + enum scif_window_type type; + u64 peer_window; + enum scif_msg_state unreg_state; + bool offset_freed; + bool temp; + struct mm_struct *mm; + struct sg_table *st; + union { + struct { + struct scif_pinned_pages *pinned_pages; + struct scif_allocmsg alloc_handle; + wait_queue_head_t regwq; + enum scif_msg_state reg_state; + wait_queue_head_t unregwq; + }; + struct { + struct scif_rma_lookup dma_addr_lookup; + struct scif_rma_lookup num_pages_lookup; + int nr_lookup; + dma_addr_t mapped_offset; + }; + }; + dma_addr_t *dma_addr; + u64 *num_pages; +} __packed; + +/* + * scif_mmu_notif - SCIF mmu notifier information + * + * @mmu_notifier ep_mmu_notifier: MMU notifier operations + * @tc_reg_list: List of temp registration windows for self + * @mm: memory descriptor for the task_struct which initiated the RMA + * @ep: SCIF endpoint + * @list: link to list of MMU notifier information + */ +struct scif_mmu_notif { +#ifdef CONFIG_MMU_NOTIFIER + struct mmu_notifier ep_mmu_notifier; +#endif + struct list_head tc_reg_list; + struct mm_struct *mm; + struct scif_endpt *ep; + struct list_head list; +}; + +enum scif_rma_dir { + SCIF_LOCAL_TO_REMOTE, + SCIF_REMOTE_TO_LOCAL +}; + +extern struct kmem_cache *unaligned_cache; +/* Initialize RMA for this EP */ +void scif_rma_ep_init(struct scif_endpt *ep); +/* Check if epd can be uninitialized */ +int scif_rma_ep_can_uninit(struct scif_endpt *ep); +/* Obtain a new offset. Callee must grab RMA lock */ +int scif_get_window_offset(struct scif_endpt *ep, int flags, + s64 offset, int nr_pages, s64 *out_offset); +/* Free offset. Callee must grab RMA lock */ +void scif_free_window_offset(struct scif_endpt *ep, + struct scif_window *window, s64 offset); +/* Create self registration window */ +struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages, + s64 offset, bool temp); +/* Destroy self registration window.*/ +int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window); +void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window); +/* Map pages of self window to Aperture/PCI */ +int scif_map_window(struct scif_dev *remote_dev, + struct scif_window *window); +/* Unregister a self window */ +int scif_unregister_window(struct scif_window *window); +/* Destroy remote registration window */ +void +scif_destroy_remote_window(struct scif_window *window); +/* remove valid remote memory mappings from process address space */ +void scif_zap_mmaps(int node); +/* Query if any applications have remote memory mappings */ +bool scif_rma_do_apps_have_mmaps(int node); +/* Cleanup remote registration lists for zombie endpoints */ +void scif_cleanup_rma_for_zombies(int node); +/* Reserve a DMA channel for a particular endpoint */ +int scif_reserve_dma_chan(struct scif_endpt *ep); +/* Setup a DMA mark for an endpoint */ +int _scif_fence_mark(scif_epd_t epd, int *mark); +int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val, + enum scif_window_type type); +void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_mmu_notif_handler(struct work_struct *work); +void scif_rma_handle_remote_fences(void); +void scif_rma_destroy_windows(void); +void scif_rma_destroy_tcw_invalid(void); +int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan); + +struct scif_window_iter { + s64 offset; + int index; +}; + +static inline void +scif_init_window_iter(struct scif_window *window, struct scif_window_iter *iter) +{ + iter->offset = window->offset; + iter->index = 0; +} + +dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off, + size_t *nr_bytes, + struct scif_window_iter *iter); +static inline +dma_addr_t __scif_off_to_dma_addr(struct scif_window *window, s64 off) +{ + return scif_off_to_dma_addr(window, off, NULL, NULL); +} + +static inline bool scif_unaligned(off_t src_offset, off_t dst_offset) +{ + src_offset = src_offset & (L1_CACHE_BYTES - 1); + dst_offset = dst_offset & (L1_CACHE_BYTES - 1); + return !(src_offset == dst_offset); +} + +/* + * scif_zalloc: + * @size: Size of the allocation request. + * + * Helper API which attempts to allocate zeroed pages via + * __get_free_pages(..) first and then falls back on + * vzalloc(..) if that fails. + */ +static inline void *scif_zalloc(size_t size) +{ + void *ret = NULL; + size_t align = ALIGN(size, PAGE_SIZE); + + if (align && get_order(align) < MAX_ORDER) + ret = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(align)); + return ret ? ret : vzalloc(align); +} + +/* + * scif_free: + * @addr: Address to be freed. + * @size: Size of the allocation. + * Helper API which frees memory allocated via scif_zalloc(). + */ +static inline void scif_free(void *addr, size_t size) +{ + size_t align = ALIGN(size, PAGE_SIZE); + + if (is_vmalloc_addr(addr)) + vfree(addr); + else + free_pages((unsigned long)addr, get_order(align)); +} + +static inline void scif_get_window(struct scif_window *window, int nr_pages) +{ + window->ref_count += nr_pages; +} + +static inline void scif_put_window(struct scif_window *window, int nr_pages) +{ + window->ref_count -= nr_pages; +} + +static inline void scif_set_window_ref(struct scif_window *window, int nr_pages) +{ + window->ref_count = nr_pages; +} + +static inline void +scif_queue_for_cleanup(struct scif_window *window, struct list_head *list) +{ + spin_lock(&scif_info.rmalock); + list_add_tail(&window->list, list); + spin_unlock(&scif_info.rmalock); + schedule_work(&scif_info.misc_work); +} + +static inline void __scif_rma_destroy_tcw_helper(struct scif_window *window) +{ + list_del_init(&window->list); + scif_queue_for_cleanup(window, &scif_info.rma_tc); +} + +static inline bool scif_is_iommu_enabled(void) +{ +#ifdef CONFIG_INTEL_IOMMU + return intel_iommu_enabled; +#else + return false; +#endif +} +#endif /* SCIF_RMA_H */ diff --git a/drivers/misc/mic/scif/scif_rma_list.c b/drivers/misc/mic/scif/scif_rma_list.c new file mode 100644 index 000000000..a036dbb41 --- /dev/null +++ b/drivers/misc/mic/scif/scif_rma_list.c @@ -0,0 +1,291 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "scif_main.h" +#include +#include + +/* + * scif_insert_tcw: + * + * Insert a temp window to the temp registration list sorted by va_for_temp. + * RMA lock must be held. + */ +void scif_insert_tcw(struct scif_window *window, struct list_head *head) +{ + struct scif_window *curr = NULL; + struct scif_window *prev = list_entry(head, struct scif_window, list); + struct list_head *item; + + INIT_LIST_HEAD(&window->list); + /* Compare with tail and if the entry is new tail add it to the end */ + if (!list_empty(head)) { + curr = list_entry(head->prev, struct scif_window, list); + if (curr->va_for_temp < window->va_for_temp) { + list_add_tail(&window->list, head); + return; + } + } + list_for_each(item, head) { + curr = list_entry(item, struct scif_window, list); + if (curr->va_for_temp > window->va_for_temp) + break; + prev = curr; + } + list_add(&window->list, &prev->list); +} + +/* + * scif_insert_window: + * + * Insert a window to the self registration list sorted by offset. + * RMA lock must be held. + */ +void scif_insert_window(struct scif_window *window, struct list_head *head) +{ + struct scif_window *curr = NULL, *prev = NULL; + struct list_head *item; + + INIT_LIST_HEAD(&window->list); + list_for_each(item, head) { + curr = list_entry(item, struct scif_window, list); + if (curr->offset > window->offset) + break; + prev = curr; + } + if (!prev) + list_add(&window->list, head); + else + list_add(&window->list, &prev->list); + scif_set_window_ref(window, window->nr_pages); +} + +/* + * scif_query_tcw: + * + * Query the temp cached registration list of ep for an overlapping window + * in case of permission mismatch, destroy the previous window. if permissions + * match and overlap is partial, destroy the window but return the new range + * RMA lock must be held. + */ +int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *req) +{ + struct list_head *item, *temp, *head = req->head; + struct scif_window *window; + u64 start_va_window, start_va_req = req->va_for_temp; + u64 end_va_window, end_va_req = start_va_req + req->nr_bytes; + + if (!req->nr_bytes) + return -EINVAL; + /* + * Avoid traversing the entire list to find out that there + * is no entry that matches + */ + if (!list_empty(head)) { + window = list_last_entry(head, struct scif_window, list); + end_va_window = window->va_for_temp + + (window->nr_pages << PAGE_SHIFT); + if (start_va_req > end_va_window) + return -ENXIO; + } + list_for_each_safe(item, temp, head) { + window = list_entry(item, struct scif_window, list); + start_va_window = window->va_for_temp; + end_va_window = window->va_for_temp + + (window->nr_pages << PAGE_SHIFT); + if (start_va_req < start_va_window && + end_va_req < start_va_window) + break; + if (start_va_req >= end_va_window) + continue; + if ((window->prot & req->prot) == req->prot) { + if (start_va_req >= start_va_window && + end_va_req <= end_va_window) { + *req->out_window = window; + return 0; + } + /* expand window */ + if (start_va_req < start_va_window) { + req->nr_bytes += + start_va_window - start_va_req; + req->va_for_temp = start_va_window; + } + if (end_va_req >= end_va_window) + req->nr_bytes += end_va_window - end_va_req; + } + /* Destroy the old window to create a new one */ + __scif_rma_destroy_tcw_helper(window); + break; + } + return -ENXIO; +} + +/* + * scif_query_window: + * + * Query the registration list and check if a valid contiguous + * range of windows exist. + * RMA lock must be held. + */ +int scif_query_window(struct scif_rma_req *req) +{ + struct list_head *item; + struct scif_window *window; + s64 end_offset, offset = req->offset; + u64 tmp_min, nr_bytes_left = req->nr_bytes; + + if (!req->nr_bytes) + return -EINVAL; + + list_for_each(item, req->head) { + window = list_entry(item, struct scif_window, list); + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + if (offset < window->offset) + /* Offset not found! */ + return -ENXIO; + if (offset >= end_offset) + continue; + /* Check read/write protections. */ + if ((window->prot & req->prot) != req->prot) + return -EPERM; + if (nr_bytes_left == req->nr_bytes) + /* Store the first window */ + *req->out_window = window; + tmp_min = min((u64)end_offset - offset, nr_bytes_left); + nr_bytes_left -= tmp_min; + offset += tmp_min; + /* + * Range requested encompasses + * multiple windows contiguously. + */ + if (!nr_bytes_left) { + /* Done for partial window */ + if (req->type == SCIF_WINDOW_PARTIAL || + req->type == SCIF_WINDOW_SINGLE) + return 0; + /* Extra logic for full windows */ + if (offset == end_offset) + /* Spanning multiple whole windows */ + return 0; + /* Not spanning multiple whole windows */ + return -ENXIO; + } + if (req->type == SCIF_WINDOW_SINGLE) + break; + } + dev_err(scif_info.mdev.this_device, + "%s %d ENXIO\n", __func__, __LINE__); + return -ENXIO; +} + +/* + * scif_rma_list_unregister: + * + * Traverse the self registration list starting from window: + * 1) Call scif_unregister_window(..) + * RMA lock must be held. + */ +int scif_rma_list_unregister(struct scif_window *window, + s64 offset, int nr_pages) +{ + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + struct list_head *head = &ep->rma_info.reg_list; + s64 end_offset; + int err = 0; + int loop_nr_pages; + struct scif_window *_window; + + list_for_each_entry_safe_from(window, _window, head, list) { + end_offset = window->offset + (window->nr_pages << PAGE_SHIFT); + loop_nr_pages = min((int)((end_offset - offset) >> PAGE_SHIFT), + nr_pages); + err = scif_unregister_window(window); + if (err) + return err; + nr_pages -= loop_nr_pages; + offset += (loop_nr_pages << PAGE_SHIFT); + if (!nr_pages) + break; + } + return 0; +} + +/* + * scif_unmap_all_window: + * + * Traverse all the windows in the self registration list and: + * 1) Delete any DMA mappings created + */ +void scif_unmap_all_windows(scif_epd_t epd) +{ + struct list_head *item, *tmp; + struct scif_window *window; + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct list_head *head = &ep->rma_info.reg_list; + + mutex_lock(&ep->rma_info.rma_lock); + list_for_each_safe(item, tmp, head) { + window = list_entry(item, struct scif_window, list); + scif_unmap_window(ep->remote_dev, window); + } + mutex_unlock(&ep->rma_info.rma_lock); +} + +/* + * scif_unregister_all_window: + * + * Traverse all the windows in the self registration list and: + * 1) Call scif_unregister_window(..) + * RMA lock must be held. + */ +int scif_unregister_all_windows(scif_epd_t epd) +{ + struct list_head *item, *tmp; + struct scif_window *window; + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct list_head *head = &ep->rma_info.reg_list; + int err = 0; + + mutex_lock(&ep->rma_info.rma_lock); +retry: + item = NULL; + tmp = NULL; + list_for_each_safe(item, tmp, head) { + window = list_entry(item, struct scif_window, list); + ep->rma_info.async_list_del = 0; + err = scif_unregister_window(window); + if (err) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", + __func__, __LINE__, err); + /* + * Need to restart list traversal if there has been + * an asynchronous list entry deletion. + */ + if (READ_ONCE(ep->rma_info.async_list_del)) + goto retry; + } + mutex_unlock(&ep->rma_info.rma_lock); + if (!list_empty(&ep->rma_info.mmn_list)) { + spin_lock(&scif_info.rmalock); + list_add_tail(&ep->mmu_list, &scif_info.mmu_notif_cleanup); + spin_unlock(&scif_info.rmalock); + schedule_work(&scif_info.mmu_notif_work); + } + return err; +} diff --git a/drivers/misc/mic/scif/scif_rma_list.h b/drivers/misc/mic/scif/scif_rma_list.h new file mode 100644 index 000000000..7d58d1d55 --- /dev/null +++ b/drivers/misc/mic/scif/scif_rma_list.h @@ -0,0 +1,57 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#ifndef SCIF_RMA_LIST_H +#define SCIF_RMA_LIST_H + +/* + * struct scif_rma_req - Self Registration list RMA Request query + * + * @out_window - Returns the window if found + * @offset: Starting offset + * @nr_bytes: number of bytes + * @prot: protection requested i.e. read or write or both + * @type: Specify single, partial or multiple windows + * @head: Head of list on which to search + * @va_for_temp: VA for searching temporary cached windows + */ +struct scif_rma_req { + struct scif_window **out_window; + union { + s64 offset; + unsigned long va_for_temp; + }; + size_t nr_bytes; + int prot; + enum scif_window_type type; + struct list_head *head; +}; + +/* Insert */ +void scif_insert_window(struct scif_window *window, struct list_head *head); +void scif_insert_tcw(struct scif_window *window, + struct list_head *head); +/* Query */ +int scif_query_window(struct scif_rma_req *request); +int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *request); +/* Called from close to unregister all self windows */ +int scif_unregister_all_windows(scif_epd_t epd); +void scif_unmap_all_windows(scif_epd_t epd); +/* Traverse list and unregister */ +int scif_rma_list_unregister(struct scif_window *window, s64 offset, + int nr_pages); +#endif /* SCIF_RMA_LIST_H */ diff --git a/drivers/misc/mic/vop/Makefile b/drivers/misc/mic/vop/Makefile new file mode 100644 index 000000000..78819c899 --- /dev/null +++ b/drivers/misc/mic/vop/Makefile @@ -0,0 +1,9 @@ +# +# Makefile - Intel MIC Linux driver. +# Copyright(c) 2016, Intel Corporation. +# +obj-m := vop.o + +vop-objs += vop_main.o +vop-objs += vop_debugfs.o +vop-objs += vop_vringh.o diff --git a/drivers/misc/mic/vop/vop_debugfs.c b/drivers/misc/mic/vop/vop_debugfs.c new file mode 100644 index 000000000..ab43884e5 --- /dev/null +++ b/drivers/misc/mic/vop/vop_debugfs.c @@ -0,0 +1,232 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel Virtio Over PCIe (VOP) driver. + * + */ +#include +#include + +#include "vop_main.h" + +static int vop_dp_show(struct seq_file *s, void *pos) +{ + struct mic_device_desc *d; + struct mic_device_ctrl *dc; + struct mic_vqconfig *vqconfig; + __u32 *features; + __u8 *config; + struct vop_info *vi = s->private; + struct vop_device *vpdev = vi->vpdev; + struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); + int j, k; + + seq_printf(s, "Bootparam: magic 0x%x\n", + bootparam->magic); + seq_printf(s, "Bootparam: h2c_config_db %d\n", + bootparam->h2c_config_db); + seq_printf(s, "Bootparam: node_id %d\n", + bootparam->node_id); + seq_printf(s, "Bootparam: c2h_scif_db %d\n", + bootparam->c2h_scif_db); + seq_printf(s, "Bootparam: h2c_scif_db %d\n", + bootparam->h2c_scif_db); + seq_printf(s, "Bootparam: scif_host_dma_addr 0x%llx\n", + bootparam->scif_host_dma_addr); + seq_printf(s, "Bootparam: scif_card_dma_addr 0x%llx\n", + bootparam->scif_card_dma_addr); + + for (j = sizeof(*bootparam); + j < MIC_DP_SIZE; j += mic_total_desc_size(d)) { + d = (void *)bootparam + j; + dc = (void *)d + mic_aligned_desc_size(d); + + /* end of list */ + if (d->type == 0) + break; + + if (d->type == -1) + continue; + + seq_printf(s, "Type %d ", d->type); + seq_printf(s, "Num VQ %d ", d->num_vq); + seq_printf(s, "Feature Len %d\n", d->feature_len); + seq_printf(s, "Config Len %d ", d->config_len); + seq_printf(s, "Shutdown Status %d\n", d->status); + + for (k = 0; k < d->num_vq; k++) { + vqconfig = mic_vq_config(d) + k; + seq_printf(s, "vqconfig[%d]: ", k); + seq_printf(s, "address 0x%llx ", + vqconfig->address); + seq_printf(s, "num %d ", vqconfig->num); + seq_printf(s, "used address 0x%llx\n", + vqconfig->used_address); + } + + features = (__u32 *)mic_vq_features(d); + seq_printf(s, "Features: Host 0x%x ", features[0]); + seq_printf(s, "Guest 0x%x\n", features[1]); + + config = mic_vq_configspace(d); + for (k = 0; k < d->config_len; k++) + seq_printf(s, "config[%d]=%d\n", k, config[k]); + + seq_puts(s, "Device control:\n"); + seq_printf(s, "Config Change %d ", dc->config_change); + seq_printf(s, "Vdev reset %d\n", dc->vdev_reset); + seq_printf(s, "Guest Ack %d ", dc->guest_ack); + seq_printf(s, "Host ack %d\n", dc->host_ack); + seq_printf(s, "Used address updated %d ", + dc->used_address_updated); + seq_printf(s, "Vdev 0x%llx\n", dc->vdev); + seq_printf(s, "c2h doorbell %d ", dc->c2h_vdev_db); + seq_printf(s, "h2c doorbell %d\n", dc->h2c_vdev_db); + } + schedule_work(&vi->hotplug_work); + return 0; +} + +static int vop_dp_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, vop_dp_show, inode->i_private); +} + +static int vop_dp_debug_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations dp_ops = { + .owner = THIS_MODULE, + .open = vop_dp_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = vop_dp_debug_release +}; + +static int vop_vdev_info_show(struct seq_file *s, void *unused) +{ + struct vop_info *vi = s->private; + struct list_head *pos, *tmp; + struct vop_vdev *vdev; + int i, j; + + mutex_lock(&vi->vop_mutex); + list_for_each_safe(pos, tmp, &vi->vdev_list) { + vdev = list_entry(pos, struct vop_vdev, list); + seq_printf(s, "VDEV type %d state %s in %ld out %ld in_dma %ld out_dma %ld\n", + vdev->virtio_id, + vop_vdevup(vdev) ? "UP" : "DOWN", + vdev->in_bytes, + vdev->out_bytes, + vdev->in_bytes_dma, + vdev->out_bytes_dma); + for (i = 0; i < MIC_MAX_VRINGS; i++) { + struct vring_desc *desc; + struct vring_avail *avail; + struct vring_used *used; + struct vop_vringh *vvr = &vdev->vvr[i]; + struct vringh *vrh = &vvr->vrh; + int num = vrh->vring.num; + + if (!num) + continue; + desc = vrh->vring.desc; + seq_printf(s, "vring i %d avail_idx %d", + i, vvr->vring.info->avail_idx & (num - 1)); + seq_printf(s, " vring i %d avail_idx %d\n", + i, vvr->vring.info->avail_idx); + seq_printf(s, "vrh i %d weak_barriers %d", + i, vrh->weak_barriers); + seq_printf(s, " last_avail_idx %d last_used_idx %d", + vrh->last_avail_idx, vrh->last_used_idx); + seq_printf(s, " completed %d\n", vrh->completed); + for (j = 0; j < num; j++) { + seq_printf(s, "desc[%d] addr 0x%llx len %d", + j, desc->addr, desc->len); + seq_printf(s, " flags 0x%x next %d\n", + desc->flags, desc->next); + desc++; + } + avail = vrh->vring.avail; + seq_printf(s, "avail flags 0x%x idx %d\n", + vringh16_to_cpu(vrh, avail->flags), + vringh16_to_cpu(vrh, + avail->idx) & (num - 1)); + seq_printf(s, "avail flags 0x%x idx %d\n", + vringh16_to_cpu(vrh, avail->flags), + vringh16_to_cpu(vrh, avail->idx)); + for (j = 0; j < num; j++) + seq_printf(s, "avail ring[%d] %d\n", + j, avail->ring[j]); + used = vrh->vring.used; + seq_printf(s, "used flags 0x%x idx %d\n", + vringh16_to_cpu(vrh, used->flags), + vringh16_to_cpu(vrh, used->idx) & (num - 1)); + seq_printf(s, "used flags 0x%x idx %d\n", + vringh16_to_cpu(vrh, used->flags), + vringh16_to_cpu(vrh, used->idx)); + for (j = 0; j < num; j++) + seq_printf(s, "used ring[%d] id %d len %d\n", + j, vringh32_to_cpu(vrh, + used->ring[j].id), + vringh32_to_cpu(vrh, + used->ring[j].len)); + } + } + mutex_unlock(&vi->vop_mutex); + + return 0; +} + +static int vop_vdev_info_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, vop_vdev_info_show, inode->i_private); +} + +static int vop_vdev_info_debug_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations vdev_info_ops = { + .owner = THIS_MODULE, + .open = vop_vdev_info_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = vop_vdev_info_debug_release +}; + +void vop_init_debugfs(struct vop_info *vi) +{ + char name[16]; + + snprintf(name, sizeof(name), "%s%d", KBUILD_MODNAME, vi->vpdev->dnode); + vi->dbg = debugfs_create_dir(name, NULL); + if (!vi->dbg) { + pr_err("can't create debugfs dir vop\n"); + return; + } + debugfs_create_file("dp", 0444, vi->dbg, vi, &dp_ops); + debugfs_create_file("vdev_info", 0444, vi->dbg, vi, &vdev_info_ops); +} + +void vop_exit_debugfs(struct vop_info *vi) +{ + debugfs_remove_recursive(vi->dbg); +} diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c new file mode 100644 index 000000000..f4332a97c --- /dev/null +++ b/drivers/misc/mic/vop/vop_main.c @@ -0,0 +1,766 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Adapted from: + * + * virtio for kvm on s390 + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Christian Borntraeger + * + * Intel Virtio Over PCIe (VOP) driver. + * + */ +#include +#include +#include +#include + +#include "vop_main.h" + +#define VOP_MAX_VRINGS 4 + +/* + * _vop_vdev - Allocated per virtio device instance injected by the peer. + * + * @vdev: Virtio device + * @desc: Virtio device page descriptor + * @dc: Virtio device control + * @vpdev: VOP device which is the parent for this virtio device + * @vr: Buffer for accessing the VRING + * @used: Buffer for used + * @used_size: Size of the used buffer + * @reset_done: Track whether VOP reset is complete + * @virtio_cookie: Cookie returned upon requesting a interrupt + * @c2h_vdev_db: The doorbell used by the guest to interrupt the host + * @h2c_vdev_db: The doorbell used by the host to interrupt the guest + * @dnode: The destination node + */ +struct _vop_vdev { + struct virtio_device vdev; + struct mic_device_desc __iomem *desc; + struct mic_device_ctrl __iomem *dc; + struct vop_device *vpdev; + void __iomem *vr[VOP_MAX_VRINGS]; + dma_addr_t used[VOP_MAX_VRINGS]; + int used_size[VOP_MAX_VRINGS]; + struct completion reset_done; + struct mic_irq *virtio_cookie; + int c2h_vdev_db; + int h2c_vdev_db; + int dnode; +}; + +#define to_vopvdev(vd) container_of(vd, struct _vop_vdev, vdev) + +#define _vop_aligned_desc_size(d) __mic_align(_vop_desc_size(d), 8) + +/* Helper API to obtain the parent of the virtio device */ +static inline struct device *_vop_dev(struct _vop_vdev *vdev) +{ + return vdev->vdev.dev.parent; +} + +static inline unsigned _vop_desc_size(struct mic_device_desc __iomem *desc) +{ + return sizeof(*desc) + + ioread8(&desc->num_vq) * sizeof(struct mic_vqconfig) + + ioread8(&desc->feature_len) * 2 + + ioread8(&desc->config_len); +} + +static inline struct mic_vqconfig __iomem * +_vop_vq_config(struct mic_device_desc __iomem *desc) +{ + return (struct mic_vqconfig __iomem *)(desc + 1); +} + +static inline u8 __iomem * +_vop_vq_features(struct mic_device_desc __iomem *desc) +{ + return (u8 __iomem *)(_vop_vq_config(desc) + ioread8(&desc->num_vq)); +} + +static inline u8 __iomem * +_vop_vq_configspace(struct mic_device_desc __iomem *desc) +{ + return _vop_vq_features(desc) + ioread8(&desc->feature_len) * 2; +} + +static inline unsigned +_vop_total_desc_size(struct mic_device_desc __iomem *desc) +{ + return _vop_aligned_desc_size(desc) + sizeof(struct mic_device_ctrl); +} + +/* This gets the device's feature bits. */ +static u64 vop_get_features(struct virtio_device *vdev) +{ + unsigned int i, bits; + u32 features = 0; + struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc; + u8 __iomem *in_features = _vop_vq_features(desc); + int feature_len = ioread8(&desc->feature_len); + + bits = min_t(unsigned, feature_len, sizeof(vdev->features)) * 8; + for (i = 0; i < bits; i++) + if (ioread8(&in_features[i / 8]) & (BIT(i % 8))) + features |= BIT(i); + + return features; +} + +static int vop_finalize_features(struct virtio_device *vdev) +{ + unsigned int i, bits; + struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc; + u8 feature_len = ioread8(&desc->feature_len); + /* Second half of bitmap is features we accept. */ + u8 __iomem *out_features = + _vop_vq_features(desc) + feature_len; + + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + + memset_io(out_features, 0, feature_len); + bits = min_t(unsigned, feature_len, + sizeof(vdev->features)) * 8; + for (i = 0; i < bits; i++) { + if (__virtio_test_bit(vdev, i)) + iowrite8(ioread8(&out_features[i / 8]) | (1 << (i % 8)), + &out_features[i / 8]); + } + return 0; +} + +/* + * Reading and writing elements in config space + */ +static void vop_get(struct virtio_device *vdev, unsigned int offset, + void *buf, unsigned len) +{ + struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc; + + if (offset + len > ioread8(&desc->config_len)) + return; + memcpy_fromio(buf, _vop_vq_configspace(desc) + offset, len); +} + +static void vop_set(struct virtio_device *vdev, unsigned int offset, + const void *buf, unsigned len) +{ + struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc; + + if (offset + len > ioread8(&desc->config_len)) + return; + memcpy_toio(_vop_vq_configspace(desc) + offset, buf, len); +} + +/* + * The operations to get and set the status word just access the status + * field of the device descriptor. set_status also interrupts the host + * to tell about status changes. + */ +static u8 vop_get_status(struct virtio_device *vdev) +{ + return ioread8(&to_vopvdev(vdev)->desc->status); +} + +static void vop_set_status(struct virtio_device *dev, u8 status) +{ + struct _vop_vdev *vdev = to_vopvdev(dev); + struct vop_device *vpdev = vdev->vpdev; + + if (!status) + return; + iowrite8(status, &vdev->desc->status); + vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db); +} + +/* Inform host on a virtio device reset and wait for ack from host */ +static void vop_reset_inform_host(struct virtio_device *dev) +{ + struct _vop_vdev *vdev = to_vopvdev(dev); + struct mic_device_ctrl __iomem *dc = vdev->dc; + struct vop_device *vpdev = vdev->vpdev; + int retry; + + iowrite8(0, &dc->host_ack); + iowrite8(1, &dc->vdev_reset); + vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db); + + /* Wait till host completes all card accesses and acks the reset */ + for (retry = 100; retry--;) { + if (ioread8(&dc->host_ack)) + break; + msleep(100); + }; + + dev_dbg(_vop_dev(vdev), "%s: retry: %d\n", __func__, retry); + + /* Reset status to 0 in case we timed out */ + iowrite8(0, &vdev->desc->status); +} + +static void vop_reset(struct virtio_device *dev) +{ + struct _vop_vdev *vdev = to_vopvdev(dev); + + dev_dbg(_vop_dev(vdev), "%s: virtio id %d\n", + __func__, dev->id.device); + + vop_reset_inform_host(dev); + complete_all(&vdev->reset_done); +} + +/* + * The virtio_ring code calls this API when it wants to notify the Host. + */ +static bool vop_notify(struct virtqueue *vq) +{ + struct _vop_vdev *vdev = vq->priv; + struct vop_device *vpdev = vdev->vpdev; + + vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db); + return true; +} + +static void vop_del_vq(struct virtqueue *vq, int n) +{ + struct _vop_vdev *vdev = to_vopvdev(vq->vdev); + struct vring *vr = (struct vring *)(vq + 1); + struct vop_device *vpdev = vdev->vpdev; + + dma_unmap_single(&vpdev->dev, vdev->used[n], + vdev->used_size[n], DMA_BIDIRECTIONAL); + free_pages((unsigned long)vr->used, get_order(vdev->used_size[n])); + vring_del_virtqueue(vq); + vpdev->hw_ops->iounmap(vpdev, vdev->vr[n]); + vdev->vr[n] = NULL; +} + +static void vop_del_vqs(struct virtio_device *dev) +{ + struct _vop_vdev *vdev = to_vopvdev(dev); + struct virtqueue *vq, *n; + int idx = 0; + + dev_dbg(_vop_dev(vdev), "%s\n", __func__); + + list_for_each_entry_safe(vq, n, &dev->vqs, list) + vop_del_vq(vq, idx++); +} + +/* + * This routine will assign vring's allocated in host/io memory. Code in + * virtio_ring.c however continues to access this io memory as if it were local + * memory without io accessors. + */ +static struct virtqueue *vop_find_vq(struct virtio_device *dev, + unsigned index, + void (*callback)(struct virtqueue *vq), + const char *name, bool ctx) +{ + struct _vop_vdev *vdev = to_vopvdev(dev); + struct vop_device *vpdev = vdev->vpdev; + struct mic_vqconfig __iomem *vqconfig; + struct mic_vqconfig config; + struct virtqueue *vq; + void __iomem *va; + struct _mic_vring_info __iomem *info; + void *used; + int vr_size, _vr_size, err, magic; + struct vring *vr; + u8 type = ioread8(&vdev->desc->type); + + if (index >= ioread8(&vdev->desc->num_vq)) + return ERR_PTR(-ENOENT); + + if (!name) + return ERR_PTR(-ENOENT); + + /* First assign the vring's allocated in host memory */ + vqconfig = _vop_vq_config(vdev->desc) + index; + memcpy_fromio(&config, vqconfig, sizeof(config)); + _vr_size = round_up(vring_size(le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN), 4); + vr_size = PAGE_ALIGN(_vr_size + sizeof(struct _mic_vring_info)); + va = vpdev->hw_ops->ioremap(vpdev, le64_to_cpu(config.address), + vr_size); + if (!va) + return ERR_PTR(-ENOMEM); + vdev->vr[index] = va; + memset_io(va, 0x0, _vr_size); + vq = vring_new_virtqueue( + index, + le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN, + dev, + false, + ctx, + (void __force *)va, vop_notify, callback, name); + if (!vq) { + err = -ENOMEM; + goto unmap; + } + info = va + _vr_size; + magic = ioread32(&info->magic); + + if (WARN(magic != MIC_MAGIC + type + index, "magic mismatch")) { + err = -EIO; + goto unmap; + } + + /* Allocate and reassign used ring now */ + vdev->used_size[index] = PAGE_ALIGN(sizeof(__u16) * 3 + + sizeof(struct vring_used_elem) * + le16_to_cpu(config.num)); + used = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(vdev->used_size[index])); + if (!used) { + err = -ENOMEM; + dev_err(_vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto del_vq; + } + vdev->used[index] = dma_map_single(&vpdev->dev, used, + vdev->used_size[index], + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&vpdev->dev, vdev->used[index])) { + err = -ENOMEM; + dev_err(_vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto free_used; + } + writeq(vdev->used[index], &vqconfig->used_address); + /* + * To reassign the used ring here we are directly accessing + * struct vring_virtqueue which is a private data structure + * in virtio_ring.c. At the minimum, a BUILD_BUG_ON() in + * vring_new_virtqueue() would ensure that + * (&vq->vring == (struct vring *) (&vq->vq + 1)); + */ + vr = (struct vring *)(vq + 1); + vr->used = used; + + vq->priv = vdev; + return vq; +free_used: + free_pages((unsigned long)used, + get_order(vdev->used_size[index])); +del_vq: + vring_del_virtqueue(vq); +unmap: + vpdev->hw_ops->iounmap(vpdev, vdev->vr[index]); + return ERR_PTR(err); +} + +static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs, + struct virtqueue *vqs[], + vq_callback_t *callbacks[], + const char * const names[], const bool *ctx, + struct irq_affinity *desc) +{ + struct _vop_vdev *vdev = to_vopvdev(dev); + struct vop_device *vpdev = vdev->vpdev; + struct mic_device_ctrl __iomem *dc = vdev->dc; + int i, err, retry; + + /* We must have this many virtqueues. */ + if (nvqs > ioread8(&vdev->desc->num_vq)) + return -ENOENT; + + for (i = 0; i < nvqs; ++i) { + dev_dbg(_vop_dev(vdev), "%s: %d: %s\n", + __func__, i, names[i]); + vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i], + ctx ? ctx[i] : false); + if (IS_ERR(vqs[i])) { + err = PTR_ERR(vqs[i]); + goto error; + } + } + + iowrite8(1, &dc->used_address_updated); + /* + * Send an interrupt to the host to inform it that used + * rings have been re-assigned. + */ + vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db); + for (retry = 100; --retry;) { + if (!ioread8(&dc->used_address_updated)) + break; + msleep(100); + }; + + dev_dbg(_vop_dev(vdev), "%s: retry: %d\n", __func__, retry); + if (!retry) { + err = -ENODEV; + goto error; + } + + return 0; +error: + vop_del_vqs(dev); + return err; +} + +/* + * The config ops structure as defined by virtio config + */ +static struct virtio_config_ops vop_vq_config_ops = { + .get_features = vop_get_features, + .finalize_features = vop_finalize_features, + .get = vop_get, + .set = vop_set, + .get_status = vop_get_status, + .set_status = vop_set_status, + .reset = vop_reset, + .find_vqs = vop_find_vqs, + .del_vqs = vop_del_vqs, +}; + +static irqreturn_t vop_virtio_intr_handler(int irq, void *data) +{ + struct _vop_vdev *vdev = data; + struct vop_device *vpdev = vdev->vpdev; + struct virtqueue *vq; + + vpdev->hw_ops->ack_interrupt(vpdev, vdev->h2c_vdev_db); + list_for_each_entry(vq, &vdev->vdev.vqs, list) + vring_interrupt(0, vq); + + return IRQ_HANDLED; +} + +static void vop_virtio_release_dev(struct device *_d) +{ + struct virtio_device *vdev = + container_of(_d, struct virtio_device, dev); + struct _vop_vdev *vop_vdev = + container_of(vdev, struct _vop_vdev, vdev); + + kfree(vop_vdev); +} + +/* + * adds a new device and register it with virtio + * appropriate drivers are loaded by the device model + */ +static int _vop_add_device(struct mic_device_desc __iomem *d, + unsigned int offset, struct vop_device *vpdev, + int dnode) +{ + struct _vop_vdev *vdev, *reg_dev = NULL; + int ret; + u8 type = ioread8(&d->type); + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) + return -ENOMEM; + + vdev->vpdev = vpdev; + vdev->vdev.dev.parent = &vpdev->dev; + vdev->vdev.dev.release = vop_virtio_release_dev; + vdev->vdev.id.device = type; + vdev->vdev.config = &vop_vq_config_ops; + vdev->desc = d; + vdev->dc = (void __iomem *)d + _vop_aligned_desc_size(d); + vdev->dnode = dnode; + vdev->vdev.priv = (void *)(u64)dnode; + init_completion(&vdev->reset_done); + + vdev->h2c_vdev_db = vpdev->hw_ops->next_db(vpdev); + vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev, + vop_virtio_intr_handler, "virtio intr", + vdev, vdev->h2c_vdev_db); + if (IS_ERR(vdev->virtio_cookie)) { + ret = PTR_ERR(vdev->virtio_cookie); + goto kfree; + } + iowrite8((u8)vdev->h2c_vdev_db, &vdev->dc->h2c_vdev_db); + vdev->c2h_vdev_db = ioread8(&vdev->dc->c2h_vdev_db); + + ret = register_virtio_device(&vdev->vdev); + reg_dev = vdev; + if (ret) { + dev_err(_vop_dev(vdev), + "Failed to register vop device %u type %u\n", + offset, type); + goto free_irq; + } + writeq((u64)vdev, &vdev->dc->vdev); + dev_dbg(_vop_dev(vdev), "%s: registered vop device %u type %u vdev %p\n", + __func__, offset, type, vdev); + + return 0; + +free_irq: + vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev); +kfree: + if (reg_dev) + put_device(&vdev->vdev.dev); + else + kfree(vdev); + return ret; +} + +/* + * match for a vop device with a specific desc pointer + */ +static int vop_match_desc(struct device *dev, void *data) +{ + struct virtio_device *_dev = dev_to_virtio(dev); + struct _vop_vdev *vdev = to_vopvdev(_dev); + + return vdev->desc == (void __iomem *)data; +} + +static void _vop_handle_config_change(struct mic_device_desc __iomem *d, + unsigned int offset, + struct vop_device *vpdev) +{ + struct mic_device_ctrl __iomem *dc + = (void __iomem *)d + _vop_aligned_desc_size(d); + struct _vop_vdev *vdev = (struct _vop_vdev *)readq(&dc->vdev); + + if (ioread8(&dc->config_change) != MIC_VIRTIO_PARAM_CONFIG_CHANGED) + return; + + dev_dbg(&vpdev->dev, "%s %d\n", __func__, __LINE__); + virtio_config_changed(&vdev->vdev); + iowrite8(1, &dc->guest_ack); +} + +/* + * removes a virtio device if a hot remove event has been + * requested by the host. + */ +static int _vop_remove_device(struct mic_device_desc __iomem *d, + unsigned int offset, struct vop_device *vpdev) +{ + struct mic_device_ctrl __iomem *dc + = (void __iomem *)d + _vop_aligned_desc_size(d); + struct _vop_vdev *vdev = (struct _vop_vdev *)readq(&dc->vdev); + u8 status; + int ret = -1; + + if (ioread8(&dc->config_change) == MIC_VIRTIO_PARAM_DEV_REMOVE) { + struct device *dev = get_device(&vdev->vdev.dev); + + dev_dbg(&vpdev->dev, + "%s %d config_change %d type %d vdev %p\n", + __func__, __LINE__, + ioread8(&dc->config_change), ioread8(&d->type), vdev); + status = ioread8(&d->status); + reinit_completion(&vdev->reset_done); + unregister_virtio_device(&vdev->vdev); + vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev); + iowrite8(-1, &dc->h2c_vdev_db); + if (status & VIRTIO_CONFIG_S_DRIVER_OK) + wait_for_completion(&vdev->reset_done); + put_device(dev); + iowrite8(1, &dc->guest_ack); + dev_dbg(&vpdev->dev, "%s %d guest_ack %d\n", + __func__, __LINE__, ioread8(&dc->guest_ack)); + iowrite8(-1, &d->type); + ret = 0; + } + return ret; +} + +#define REMOVE_DEVICES true + +static void _vop_scan_devices(void __iomem *dp, struct vop_device *vpdev, + bool remove, int dnode) +{ + s8 type; + unsigned int i; + struct mic_device_desc __iomem *d; + struct mic_device_ctrl __iomem *dc; + struct device *dev; + int ret; + + for (i = sizeof(struct mic_bootparam); + i < MIC_DP_SIZE; i += _vop_total_desc_size(d)) { + d = dp + i; + dc = (void __iomem *)d + _vop_aligned_desc_size(d); + /* + * This read barrier is paired with the corresponding write + * barrier on the host which is inserted before adding or + * removing a virtio device descriptor, by updating the type. + */ + rmb(); + type = ioread8(&d->type); + + /* end of list */ + if (type == 0) + break; + + if (type == -1) + continue; + + /* device already exists */ + dev = device_find_child(&vpdev->dev, (void __force *)d, + vop_match_desc); + if (dev) { + if (remove) + iowrite8(MIC_VIRTIO_PARAM_DEV_REMOVE, + &dc->config_change); + put_device(dev); + _vop_handle_config_change(d, i, vpdev); + ret = _vop_remove_device(d, i, vpdev); + if (remove) { + iowrite8(0, &dc->config_change); + iowrite8(0, &dc->guest_ack); + } + continue; + } + + /* new device */ + dev_dbg(&vpdev->dev, "%s %d Adding new virtio device %p\n", + __func__, __LINE__, d); + if (!remove) + _vop_add_device(d, i, vpdev, dnode); + } +} + +static void vop_scan_devices(struct vop_info *vi, + struct vop_device *vpdev, bool remove) +{ + void __iomem *dp = vpdev->hw_ops->get_remote_dp(vpdev); + + if (!dp) + return; + mutex_lock(&vi->vop_mutex); + _vop_scan_devices(dp, vpdev, remove, vpdev->dnode); + mutex_unlock(&vi->vop_mutex); +} + +/* + * vop_hotplug_device tries to find changes in the device page. + */ +static void vop_hotplug_devices(struct work_struct *work) +{ + struct vop_info *vi = container_of(work, struct vop_info, + hotplug_work); + + vop_scan_devices(vi, vi->vpdev, !REMOVE_DEVICES); +} + +/* + * Interrupt handler for hot plug/config changes etc. + */ +static irqreturn_t vop_extint_handler(int irq, void *data) +{ + struct vop_info *vi = data; + struct mic_bootparam __iomem *bp; + struct vop_device *vpdev = vi->vpdev; + + bp = vpdev->hw_ops->get_remote_dp(vpdev); + dev_dbg(&vpdev->dev, "%s %d hotplug work\n", + __func__, __LINE__); + vpdev->hw_ops->ack_interrupt(vpdev, ioread8(&bp->h2c_config_db)); + schedule_work(&vi->hotplug_work); + return IRQ_HANDLED; +} + +static int vop_driver_probe(struct vop_device *vpdev) +{ + struct vop_info *vi; + int rc; + + vi = kzalloc(sizeof(*vi), GFP_KERNEL); + if (!vi) { + rc = -ENOMEM; + goto exit; + } + dev_set_drvdata(&vpdev->dev, vi); + vi->vpdev = vpdev; + + mutex_init(&vi->vop_mutex); + INIT_WORK(&vi->hotplug_work, vop_hotplug_devices); + if (vpdev->dnode) { + rc = vop_host_init(vi); + if (rc < 0) + goto free; + } else { + struct mic_bootparam __iomem *bootparam; + + vop_scan_devices(vi, vpdev, !REMOVE_DEVICES); + + vi->h2c_config_db = vpdev->hw_ops->next_db(vpdev); + vi->cookie = vpdev->hw_ops->request_irq(vpdev, + vop_extint_handler, + "virtio_config_intr", + vi, vi->h2c_config_db); + if (IS_ERR(vi->cookie)) { + rc = PTR_ERR(vi->cookie); + goto free; + } + bootparam = vpdev->hw_ops->get_remote_dp(vpdev); + iowrite8(vi->h2c_config_db, &bootparam->h2c_config_db); + } + vop_init_debugfs(vi); + return 0; +free: + kfree(vi); +exit: + return rc; +} + +static void vop_driver_remove(struct vop_device *vpdev) +{ + struct vop_info *vi = dev_get_drvdata(&vpdev->dev); + + if (vpdev->dnode) { + vop_host_uninit(vi); + } else { + struct mic_bootparam __iomem *bootparam = + vpdev->hw_ops->get_remote_dp(vpdev); + if (bootparam) + iowrite8(-1, &bootparam->h2c_config_db); + vpdev->hw_ops->free_irq(vpdev, vi->cookie, vi); + flush_work(&vi->hotplug_work); + vop_scan_devices(vi, vpdev, REMOVE_DEVICES); + } + vop_exit_debugfs(vi); + kfree(vi); +} + +static struct vop_device_id id_table[] = { + { VOP_DEV_TRNSP, VOP_DEV_ANY_ID }, + { 0 }, +}; + +static struct vop_driver vop_driver = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = vop_driver_probe, + .remove = vop_driver_remove, +}; + +module_vop_driver(vop_driver); + +MODULE_DEVICE_TABLE(mbus, id_table); +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) Virtio Over PCIe (VOP) driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/mic/vop/vop_main.h b/drivers/misc/mic/vop/vop_main.h new file mode 100644 index 000000000..ba47ec7a6 --- /dev/null +++ b/drivers/misc/mic/vop/vop_main.h @@ -0,0 +1,170 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel Virtio Over PCIe (VOP) driver. + * + */ +#ifndef _VOP_MAIN_H_ +#define _VOP_MAIN_H_ + +#include +#include +#include +#include + +#include +#include "../common/mic_dev.h" + +#include "../bus/vop_bus.h" + +/* + * Note on endianness. + * 1. Host can be both BE or LE + * 2. Guest/card is LE. Host uses le_to_cpu to access desc/avail + * rings and ioreadXX/iowriteXX to access used ring. + * 3. Device page exposed by host to guest contains LE values. Guest + * accesses these using ioreadXX/iowriteXX etc. This way in general we + * obey the virtio spec according to which guest works with native + * endianness and host is aware of guest endianness and does all + * required endianness conversion. + * 4. Data provided from user space to guest (in ADD_DEVICE and + * CONFIG_CHANGE ioctl's) is not interpreted by the driver and should be + * in guest endianness. + */ + +/* + * vop_info - Allocated per invocation of VOP probe + * + * @vpdev: VOP device + * @hotplug_work: Handle virtio device creation, deletion and configuration + * @cookie: Cookie received upon requesting a virtio configuration interrupt + * @h2c_config_db: The doorbell used by the peer to indicate a config change + * @vdev_list: List of "active" virtio devices injected in the peer node + * @vop_mutex: Synchronize access to the device page as well as serialize + * creation/deletion of virtio devices on the peer node + * @dp: Peer device page information + * @dbg: Debugfs entry + * @dma_ch: The DMA channel used by this transport for data transfers. + * @name: Name for this transport used in misc device creation. + * @miscdev: The misc device registered. + */ +struct vop_info { + struct vop_device *vpdev; + struct work_struct hotplug_work; + struct mic_irq *cookie; + int h2c_config_db; + struct list_head vdev_list; + struct mutex vop_mutex; + void __iomem *dp; + struct dentry *dbg; + struct dma_chan *dma_ch; + char name[16]; + struct miscdevice miscdev; +}; + +/** + * struct vop_vringh - Virtio ring host information. + * + * @vring: The VOP vring used for setting up user space mappings. + * @vrh: The host VRINGH used for accessing the card vrings. + * @riov: The VRINGH read kernel IOV. + * @wiov: The VRINGH write kernel IOV. + * @head: The VRINGH head index address passed to vringh_getdesc_kern(..). + * @vr_mutex: Mutex for synchronizing access to the VRING. + * @buf: Temporary kernel buffer used to copy in/out data + * from/to the card via DMA. + * @buf_da: dma address of buf. + * @vdev: Back pointer to VOP virtio device for vringh_notify(..). + */ +struct vop_vringh { + struct mic_vring vring; + struct vringh vrh; + struct vringh_kiov riov; + struct vringh_kiov wiov; + u16 head; + struct mutex vr_mutex; + void *buf; + dma_addr_t buf_da; + struct vop_vdev *vdev; +}; + +/** + * struct vop_vdev - Host information for a card Virtio device. + * + * @virtio_id - Virtio device id. + * @waitq - Waitqueue to allow ring3 apps to poll. + * @vpdev - pointer to VOP bus device. + * @poll_wake - Used for waking up threads blocked in poll. + * @out_bytes - Debug stats for number of bytes copied from host to card. + * @in_bytes - Debug stats for number of bytes copied from card to host. + * @out_bytes_dma - Debug stats for number of bytes copied from host to card + * using DMA. + * @in_bytes_dma - Debug stats for number of bytes copied from card to host + * using DMA. + * @tx_len_unaligned - Debug stats for number of bytes copied to the card where + * the transfer length did not have the required DMA alignment. + * @tx_dst_unaligned - Debug stats for number of bytes copied where the + * destination address on the card did not have the required DMA alignment. + * @vvr - Store per VRING data structures. + * @virtio_bh_work - Work struct used to schedule virtio bottom half handling. + * @dd - Virtio device descriptor. + * @dc - Virtio device control fields. + * @list - List of Virtio devices. + * @virtio_db - The doorbell used by the card to interrupt the host. + * @virtio_cookie - The cookie returned while requesting interrupts. + * @vi: Transport information. + * @vdev_mutex: Mutex synchronizing virtio device injection, + * removal and data transfers. + * @destroy: Track if a virtio device is being destroyed. + * @deleted: The virtio device has been deleted. + */ +struct vop_vdev { + int virtio_id; + wait_queue_head_t waitq; + struct vop_device *vpdev; + int poll_wake; + unsigned long out_bytes; + unsigned long in_bytes; + unsigned long out_bytes_dma; + unsigned long in_bytes_dma; + unsigned long tx_len_unaligned; + unsigned long tx_dst_unaligned; + unsigned long rx_dst_unaligned; + struct vop_vringh vvr[MIC_MAX_VRINGS]; + struct work_struct virtio_bh_work; + struct mic_device_desc *dd; + struct mic_device_ctrl *dc; + struct list_head list; + int virtio_db; + struct mic_irq *virtio_cookie; + struct vop_info *vi; + struct mutex vdev_mutex; + struct completion destroy; + bool deleted; +}; + +/* Helper API to check if a virtio device is running */ +static inline bool vop_vdevup(struct vop_vdev *vdev) +{ + return !!vdev->dd->status; +} + +void vop_init_debugfs(struct vop_info *vi); +void vop_exit_debugfs(struct vop_info *vi); +int vop_host_init(struct vop_info *vi); +void vop_host_uninit(struct vop_info *vi); +#endif diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c new file mode 100644 index 000000000..a252c2199 --- /dev/null +++ b/drivers/misc/mic/vop/vop_vringh.c @@ -0,0 +1,1169 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel Virtio Over PCIe (VOP) driver. + * + */ +#include +#include +#include + +#include +#include "../common/mic_dev.h" + +#include +#include "vop_main.h" + +/* Helper API to obtain the VOP PCIe device */ +static inline struct device *vop_dev(struct vop_vdev *vdev) +{ + return vdev->vpdev->dev.parent; +} + +/* Helper API to check if a virtio device is initialized */ +static inline int vop_vdev_inited(struct vop_vdev *vdev) +{ + if (!vdev) + return -EINVAL; + /* Device has not been created yet */ + if (!vdev->dd || !vdev->dd->type) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, -EINVAL); + return -EINVAL; + } + /* Device has been removed/deleted */ + if (vdev->dd->type == -1) { + dev_dbg(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, -ENODEV); + return -ENODEV; + } + return 0; +} + +static void _vop_notify(struct vringh *vrh) +{ + struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh); + struct vop_vdev *vdev = vvrh->vdev; + struct vop_device *vpdev = vdev->vpdev; + s8 db = vdev->dc->h2c_vdev_db; + + if (db != -1) + vpdev->hw_ops->send_intr(vpdev, db); +} + +static void vop_virtio_init_post(struct vop_vdev *vdev) +{ + struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd); + struct vop_device *vpdev = vdev->vpdev; + int i, used_size; + + for (i = 0; i < vdev->dd->num_vq; i++) { + used_size = PAGE_ALIGN(sizeof(u16) * 3 + + sizeof(struct vring_used_elem) * + le16_to_cpu(vqconfig->num)); + if (!le64_to_cpu(vqconfig[i].used_address)) { + dev_warn(vop_dev(vdev), "used_address zero??\n"); + continue; + } + vdev->vvr[i].vrh.vring.used = + (void __force *)vpdev->hw_ops->ioremap( + vpdev, + le64_to_cpu(vqconfig[i].used_address), + used_size); + } + + vdev->dc->used_address_updated = 0; + + dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n", + __func__, vdev->virtio_id); +} + +static inline void vop_virtio_device_reset(struct vop_vdev *vdev) +{ + int i; + + dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n", + __func__, vdev->dd->status, vdev->virtio_id); + + for (i = 0; i < vdev->dd->num_vq; i++) + /* + * Avoid lockdep false positive. The + 1 is for the vop + * mutex which is held in the reset devices code path. + */ + mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1); + + /* 0 status means "reset" */ + vdev->dd->status = 0; + vdev->dc->vdev_reset = 0; + vdev->dc->host_ack = 1; + + for (i = 0; i < vdev->dd->num_vq; i++) { + struct vringh *vrh = &vdev->vvr[i].vrh; + + vdev->vvr[i].vring.info->avail_idx = 0; + vrh->completed = 0; + vrh->last_avail_idx = 0; + vrh->last_used_idx = 0; + } + + for (i = 0; i < vdev->dd->num_vq; i++) + mutex_unlock(&vdev->vvr[i].vr_mutex); +} + +static void vop_virtio_reset_devices(struct vop_info *vi) +{ + struct list_head *pos, *tmp; + struct vop_vdev *vdev; + + list_for_each_safe(pos, tmp, &vi->vdev_list) { + vdev = list_entry(pos, struct vop_vdev, list); + vop_virtio_device_reset(vdev); + vdev->poll_wake = 1; + wake_up(&vdev->waitq); + } +} + +static void vop_bh_handler(struct work_struct *work) +{ + struct vop_vdev *vdev = container_of(work, struct vop_vdev, + virtio_bh_work); + + if (vdev->dc->used_address_updated) + vop_virtio_init_post(vdev); + + if (vdev->dc->vdev_reset) + vop_virtio_device_reset(vdev); + + vdev->poll_wake = 1; + wake_up(&vdev->waitq); +} + +static irqreturn_t _vop_virtio_intr_handler(int irq, void *data) +{ + struct vop_vdev *vdev = data; + struct vop_device *vpdev = vdev->vpdev; + + vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db); + schedule_work(&vdev->virtio_bh_work); + return IRQ_HANDLED; +} + +static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp) +{ + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake); + int ret = 0, retry, i; + struct vop_device *vpdev = vdev->vpdev; + struct vop_info *vi = dev_get_drvdata(&vpdev->dev); + struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); + s8 db = bootparam->h2c_config_db; + + mutex_lock(&vi->vop_mutex); + for (i = 0; i < vdev->dd->num_vq; i++) + mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1); + + if (db == -1 || vdev->dd->type == -1) { + ret = -EIO; + goto exit; + } + + memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len); + vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED; + vpdev->hw_ops->send_intr(vpdev, db); + + for (retry = 100; retry--;) { + ret = wait_event_timeout(wake, vdev->dc->guest_ack, + msecs_to_jiffies(100)); + if (ret) + break; + } + + dev_dbg(vop_dev(vdev), + "%s %d retry: %d\n", __func__, __LINE__, retry); + vdev->dc->config_change = 0; + vdev->dc->guest_ack = 0; +exit: + for (i = 0; i < vdev->dd->num_vq; i++) + mutex_unlock(&vdev->vvr[i].vr_mutex); + mutex_unlock(&vi->vop_mutex); + return ret; +} + +static int vop_copy_dp_entry(struct vop_vdev *vdev, + struct mic_device_desc *argp, __u8 *type, + struct mic_device_desc **devpage) +{ + struct vop_device *vpdev = vdev->vpdev; + struct mic_device_desc *devp; + struct mic_vqconfig *vqconfig; + int ret = 0, i; + bool slot_found = false; + + vqconfig = mic_vq_config(argp); + for (i = 0; i < argp->num_vq; i++) { + if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) { + ret = -EINVAL; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + goto exit; + } + } + + /* Find the first free device page entry */ + for (i = sizeof(struct mic_bootparam); + i < MIC_DP_SIZE - mic_total_desc_size(argp); + i += mic_total_desc_size(devp)) { + devp = vpdev->hw_ops->get_dp(vpdev) + i; + if (devp->type == 0 || devp->type == -1) { + slot_found = true; + break; + } + } + if (!slot_found) { + ret = -EINVAL; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + goto exit; + } + /* + * Save off the type before doing the memcpy. Type will be set in the + * end after completing all initialization for the new device. + */ + *type = argp->type; + argp->type = 0; + memcpy(devp, argp, mic_desc_size(argp)); + + *devpage = devp; +exit: + return ret; +} + +static void vop_init_device_ctrl(struct vop_vdev *vdev, + struct mic_device_desc *devpage) +{ + struct mic_device_ctrl *dc; + + dc = (void *)devpage + mic_aligned_desc_size(devpage); + + dc->config_change = 0; + dc->guest_ack = 0; + dc->vdev_reset = 0; + dc->host_ack = 0; + dc->used_address_updated = 0; + dc->c2h_vdev_db = -1; + dc->h2c_vdev_db = -1; + vdev->dc = dc; +} + +static int vop_virtio_add_device(struct vop_vdev *vdev, + struct mic_device_desc *argp) +{ + struct vop_info *vi = vdev->vi; + struct vop_device *vpdev = vi->vpdev; + struct mic_device_desc *dd = NULL; + struct mic_vqconfig *vqconfig; + int vr_size, i, j, ret; + u8 type = 0; + s8 db = -1; + char irqname[16]; + struct mic_bootparam *bootparam; + u16 num; + dma_addr_t vr_addr; + + bootparam = vpdev->hw_ops->get_dp(vpdev); + init_waitqueue_head(&vdev->waitq); + INIT_LIST_HEAD(&vdev->list); + vdev->vpdev = vpdev; + + ret = vop_copy_dp_entry(vdev, argp, &type, &dd); + if (ret) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + return ret; + } + + vop_init_device_ctrl(vdev, dd); + + vdev->dd = dd; + vdev->virtio_id = type; + vqconfig = mic_vq_config(dd); + INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler); + + for (i = 0; i < dd->num_vq; i++) { + struct vop_vringh *vvr = &vdev->vvr[i]; + struct mic_vring *vr = &vdev->vvr[i].vring; + + num = le16_to_cpu(vqconfig[i].num); + mutex_init(&vvr->vr_mutex); + vr_size = PAGE_ALIGN(round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4) + + sizeof(struct _mic_vring_info)); + vr->va = (void *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(vr_size)); + if (!vr->va) { + ret = -ENOMEM; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + goto err; + } + vr->len = vr_size; + vr->info = vr->va + round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4); + vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i); + vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&vpdev->dev, vr_addr)) { + free_pages((unsigned long)vr->va, get_order(vr_size)); + ret = -ENOMEM; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + goto err; + } + vqconfig[i].address = cpu_to_le64(vr_addr); + + vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN); + ret = vringh_init_kern(&vvr->vrh, + *(u32 *)mic_vq_features(vdev->dd), + num, false, vr->vr.desc, vr->vr.avail, + vr->vr.used); + if (ret) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + goto err; + } + vringh_kiov_init(&vvr->riov, NULL, 0); + vringh_kiov_init(&vvr->wiov, NULL, 0); + vvr->head = USHRT_MAX; + vvr->vdev = vdev; + vvr->vrh.notify = _vop_notify; + dev_dbg(&vpdev->dev, + "%s %d index %d va %p info %p vr_size 0x%x\n", + __func__, __LINE__, i, vr->va, vr->info, vr_size); + vvr->buf = (void *)__get_free_pages(GFP_KERNEL, + get_order(VOP_INT_DMA_BUF_SIZE)); + vvr->buf_da = dma_map_single(&vpdev->dev, + vvr->buf, VOP_INT_DMA_BUF_SIZE, + DMA_BIDIRECTIONAL); + } + + snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index, + vdev->virtio_id); + vdev->virtio_db = vpdev->hw_ops->next_db(vpdev); + vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev, + _vop_virtio_intr_handler, irqname, vdev, + vdev->virtio_db); + if (IS_ERR(vdev->virtio_cookie)) { + ret = PTR_ERR(vdev->virtio_cookie); + dev_dbg(&vpdev->dev, "request irq failed\n"); + goto err; + } + + vdev->dc->c2h_vdev_db = vdev->virtio_db; + + /* + * Order the type update with previous stores. This write barrier + * is paired with the corresponding read barrier before the uncached + * system memory read of the type, on the card while scanning the + * device page. + */ + smp_wmb(); + dd->type = type; + argp->type = type; + + if (bootparam) { + db = bootparam->h2c_config_db; + if (db != -1) + vpdev->hw_ops->send_intr(vpdev, db); + } + dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db); + return 0; +err: + vqconfig = mic_vq_config(dd); + for (j = 0; j < i; j++) { + struct vop_vringh *vvr = &vdev->vvr[j]; + + dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address), + vvr->vring.len, DMA_BIDIRECTIONAL); + free_pages((unsigned long)vvr->vring.va, + get_order(vvr->vring.len)); + } + return ret; +} + +static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp, + struct vop_device *vpdev) +{ + struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); + s8 db; + int ret, retry; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake); + + devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE; + db = bootparam->h2c_config_db; + if (db != -1) + vpdev->hw_ops->send_intr(vpdev, db); + else + goto done; + for (retry = 15; retry--;) { + ret = wait_event_timeout(wake, devp->guest_ack, + msecs_to_jiffies(1000)); + if (ret) + break; + } +done: + devp->config_change = 0; + devp->guest_ack = 0; +} + +static void vop_virtio_del_device(struct vop_vdev *vdev) +{ + struct vop_info *vi = vdev->vi; + struct vop_device *vpdev = vdev->vpdev; + int i; + struct mic_vqconfig *vqconfig; + struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev); + + if (!bootparam) + goto skip_hot_remove; + vop_dev_remove(vi, vdev->dc, vpdev); +skip_hot_remove: + vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev); + flush_work(&vdev->virtio_bh_work); + vqconfig = mic_vq_config(vdev->dd); + for (i = 0; i < vdev->dd->num_vq; i++) { + struct vop_vringh *vvr = &vdev->vvr[i]; + + dma_unmap_single(&vpdev->dev, + vvr->buf_da, VOP_INT_DMA_BUF_SIZE, + DMA_BIDIRECTIONAL); + free_pages((unsigned long)vvr->buf, + get_order(VOP_INT_DMA_BUF_SIZE)); + vringh_kiov_cleanup(&vvr->riov); + vringh_kiov_cleanup(&vvr->wiov); + dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address), + vvr->vring.len, DMA_BIDIRECTIONAL); + free_pages((unsigned long)vvr->vring.va, + get_order(vvr->vring.len)); + } + /* + * Order the type update with previous stores. This write barrier + * is paired with the corresponding read barrier before the uncached + * system memory read of the type, on the card while scanning the + * device page. + */ + smp_wmb(); + vdev->dd->type = -1; +} + +/* + * vop_sync_dma - Wrapper for synchronous DMAs. + * + * @dev - The address of the pointer to the device instance used + * for DMA registration. + * @dst - destination DMA address. + * @src - source DMA address. + * @len - size of the transfer. + * + * Return DMA_SUCCESS on success + */ +static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src, + size_t len) +{ + int err = 0; + struct dma_device *ddev; + struct dma_async_tx_descriptor *tx; + struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev); + struct dma_chan *vop_ch = vi->dma_ch; + + if (!vop_ch) { + err = -EBUSY; + goto error; + } + ddev = vop_ch->device; + tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len, + DMA_PREP_FENCE); + if (!tx) { + err = -ENOMEM; + goto error; + } else { + dma_cookie_t cookie; + + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + err = -ENOMEM; + goto error; + } + dma_async_issue_pending(vop_ch); + err = dma_sync_wait(vop_ch, cookie); + } +error: + if (err) + dev_err(&vi->vpdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; +} + +#define VOP_USE_DMA true + +/* + * Initiates the copies across the PCIe bus from card memory to a user + * space buffer. When transfers are done using DMA, source/destination + * addresses and transfer length must follow the alignment requirements of + * the MIC DMA engine. + */ +static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf, + size_t len, u64 daddr, size_t dlen, + int vr_idx) +{ + struct vop_device *vpdev = vdev->vpdev; + void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len); + struct vop_vringh *vvr = &vdev->vvr[vr_idx]; + struct vop_info *vi = dev_get_drvdata(&vpdev->dev); + size_t dma_alignment = 1 << vi->dma_ch->device->copy_align; + bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1); + size_t dma_offset, partlen; + int err; + + if (!VOP_USE_DMA) { + if (copy_to_user(ubuf, (void __force *)dbuf, len)) { + err = -EFAULT; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto err; + } + vdev->in_bytes += len; + err = 0; + goto err; + } + + dma_offset = daddr - round_down(daddr, dma_alignment); + daddr -= dma_offset; + len += dma_offset; + /* + * X100 uses DMA addresses as seen by the card so adding + * the aperture base is not required for DMA. However x200 + * requires DMA addresses to be an offset into the bar so + * add the aperture base for x200. + */ + if (x200) + daddr += vpdev->aper->pa; + while (len) { + partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE); + err = vop_sync_dma(vdev, vvr->buf_da, daddr, + ALIGN(partlen, dma_alignment)); + if (err) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto err; + } + if (copy_to_user(ubuf, vvr->buf + dma_offset, + partlen - dma_offset)) { + err = -EFAULT; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto err; + } + daddr += partlen; + ubuf += partlen; + dbuf += partlen; + vdev->in_bytes_dma += partlen; + vdev->in_bytes += partlen; + len -= partlen; + dma_offset = 0; + } + err = 0; +err: + vpdev->hw_ops->iounmap(vpdev, dbuf); + dev_dbg(vop_dev(vdev), + "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n", + __func__, ubuf, dbuf, len, vr_idx); + return err; +} + +/* + * Initiates copies across the PCIe bus from a user space buffer to card + * memory. When transfers are done using DMA, source/destination addresses + * and transfer length must follow the alignment requirements of the MIC + * DMA engine. + */ +static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf, + size_t len, u64 daddr, size_t dlen, + int vr_idx) +{ + struct vop_device *vpdev = vdev->vpdev; + void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len); + struct vop_vringh *vvr = &vdev->vvr[vr_idx]; + struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev); + size_t dma_alignment = 1 << vi->dma_ch->device->copy_align; + bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1); + size_t partlen; + bool dma = VOP_USE_DMA; + int err = 0; + size_t offset = 0; + + if (daddr & (dma_alignment - 1)) { + vdev->tx_dst_unaligned += len; + dma = false; + } else if (ALIGN(len, dma_alignment) > dlen) { + vdev->tx_len_unaligned += len; + dma = false; + } + + if (!dma) + goto memcpy; + + /* + * X100 uses DMA addresses as seen by the card so adding + * the aperture base is not required for DMA. However x200 + * requires DMA addresses to be an offset into the bar so + * add the aperture base for x200. + */ + if (x200) + daddr += vpdev->aper->pa; + while (len) { + partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE); + + if (copy_from_user(vvr->buf, ubuf, partlen)) { + err = -EFAULT; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto err; + } + err = vop_sync_dma(vdev, daddr, vvr->buf_da, + ALIGN(partlen, dma_alignment)); + if (err) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto err; + } + daddr += partlen; + ubuf += partlen; + dbuf += partlen; + vdev->out_bytes_dma += partlen; + vdev->out_bytes += partlen; + len -= partlen; + } +memcpy: + /* + * We are copying to IO below and should ideally use something + * like copy_from_user_toio(..) if it existed. + */ + while (len) { + partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE); + + if (copy_from_user(vvr->buf, ubuf + offset, partlen)) { + err = -EFAULT; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto err; + } + memcpy_toio(dbuf + offset, vvr->buf, partlen); + offset += partlen; + vdev->out_bytes += partlen; + len -= partlen; + } + err = 0; +err: + vpdev->hw_ops->iounmap(vpdev, dbuf); + dev_dbg(vop_dev(vdev), + "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n", + __func__, ubuf, dbuf, len, vr_idx); + return err; +} + +#define MIC_VRINGH_READ true + +/* Determine the total number of bytes consumed in a VRINGH KIOV */ +static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov) +{ + int i; + u32 total = iov->consumed; + + for (i = 0; i < iov->i; i++) + total += iov->iov[i].iov_len; + return total; +} + +/* + * Traverse the VRINGH KIOV and issue the APIs to trigger the copies. + * This API is heavily based on the vringh_iov_xfer(..) implementation + * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..) + * and vringh_iov_push_kern(..) directly is because there is no + * way to override the VRINGH xfer(..) routines as of v3.10. + */ +static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov, + void __user *ubuf, size_t len, bool read, int vr_idx, + size_t *out_len) +{ + int ret = 0; + size_t partlen, tot_len = 0; + + while (len && iov->i < iov->used) { + struct kvec *kiov = &iov->iov[iov->i]; + + partlen = min(kiov->iov_len, len); + if (read) + ret = vop_virtio_copy_to_user(vdev, ubuf, partlen, + (u64)kiov->iov_base, + kiov->iov_len, + vr_idx); + else + ret = vop_virtio_copy_from_user(vdev, ubuf, partlen, + (u64)kiov->iov_base, + kiov->iov_len, + vr_idx); + if (ret) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + break; + } + len -= partlen; + ubuf += partlen; + tot_len += partlen; + iov->consumed += partlen; + kiov->iov_len -= partlen; + kiov->iov_base += partlen; + if (!kiov->iov_len) { + /* Fix up old iov element then increment. */ + kiov->iov_len = iov->consumed; + kiov->iov_base -= iov->consumed; + + iov->consumed = 0; + iov->i++; + } + } + *out_len = tot_len; + return ret; +} + +/* + * Use the standard VRINGH infrastructure in the kernel to fetch new + * descriptors, initiate the copies and update the used ring. + */ +static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy) +{ + int ret = 0; + u32 iovcnt = copy->iovcnt; + struct iovec iov; + struct iovec __user *u_iov = copy->iov; + void __user *ubuf = NULL; + struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx]; + struct vringh_kiov *riov = &vvr->riov; + struct vringh_kiov *wiov = &vvr->wiov; + struct vringh *vrh = &vvr->vrh; + u16 *head = &vvr->head; + struct mic_vring *vr = &vvr->vring; + size_t len = 0, out_len; + + copy->out_len = 0; + /* Fetch a new IOVEC if all previous elements have been processed */ + if (riov->i == riov->used && wiov->i == wiov->used) { + ret = vringh_getdesc_kern(vrh, riov, wiov, + head, GFP_KERNEL); + /* Check if there are available descriptors */ + if (ret <= 0) + return ret; + } + while (iovcnt) { + if (!len) { + /* Copy over a new iovec from user space. */ + ret = copy_from_user(&iov, u_iov, sizeof(*u_iov)); + if (ret) { + ret = -EINVAL; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + break; + } + len = iov.iov_len; + ubuf = iov.iov_base; + } + /* Issue all the read descriptors first */ + ret = vop_vringh_copy(vdev, riov, ubuf, len, + MIC_VRINGH_READ, copy->vr_idx, &out_len); + if (ret) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + break; + } + len -= out_len; + ubuf += out_len; + copy->out_len += out_len; + /* Issue the write descriptors next */ + ret = vop_vringh_copy(vdev, wiov, ubuf, len, + !MIC_VRINGH_READ, copy->vr_idx, &out_len); + if (ret) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, ret); + break; + } + len -= out_len; + ubuf += out_len; + copy->out_len += out_len; + if (!len) { + /* One user space iovec is now completed */ + iovcnt--; + u_iov++; + } + /* Exit loop if all elements in KIOVs have been processed. */ + if (riov->i == riov->used && wiov->i == wiov->used) + break; + } + /* + * Update the used ring if a descriptor was available and some data was + * copied in/out and the user asked for a used ring update. + */ + if (*head != USHRT_MAX && copy->out_len && copy->update_used) { + u32 total = 0; + + /* Determine the total data consumed */ + total += vop_vringh_iov_consumed(riov); + total += vop_vringh_iov_consumed(wiov); + vringh_complete_kern(vrh, *head, total); + *head = USHRT_MAX; + if (vringh_need_notify_kern(vrh) > 0) + vringh_notify(vrh); + vringh_kiov_cleanup(riov); + vringh_kiov_cleanup(wiov); + /* Update avail idx for user space */ + vr->info->avail_idx = vrh->last_avail_idx; + } + return ret; +} + +static inline int vop_verify_copy_args(struct vop_vdev *vdev, + struct mic_copy_desc *copy) +{ + if (!vdev || copy->vr_idx >= vdev->dd->num_vq) + return -EINVAL; + return 0; +} + +/* Copy a specified number of virtio descriptors in a chain */ +static int vop_virtio_copy_desc(struct vop_vdev *vdev, + struct mic_copy_desc *copy) +{ + int err; + struct vop_vringh *vvr; + + err = vop_verify_copy_args(vdev, copy); + if (err) + return err; + + vvr = &vdev->vvr[copy->vr_idx]; + mutex_lock(&vvr->vr_mutex); + if (!vop_vdevup(vdev)) { + err = -ENODEV; + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + goto err; + } + err = _vop_virtio_copy(vdev, copy); + if (err) { + dev_err(vop_dev(vdev), "%s %d err %d\n", + __func__, __LINE__, err); + } +err: + mutex_unlock(&vvr->vr_mutex); + return err; +} + +static int vop_open(struct inode *inode, struct file *f) +{ + struct vop_vdev *vdev; + struct vop_info *vi = container_of(f->private_data, + struct vop_info, miscdev); + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) + return -ENOMEM; + vdev->vi = vi; + mutex_init(&vdev->vdev_mutex); + f->private_data = vdev; + init_completion(&vdev->destroy); + complete(&vdev->destroy); + return 0; +} + +static int vop_release(struct inode *inode, struct file *f) +{ + struct vop_vdev *vdev = f->private_data, *vdev_tmp; + struct vop_info *vi = vdev->vi; + struct list_head *pos, *tmp; + bool found = false; + + mutex_lock(&vdev->vdev_mutex); + if (vdev->deleted) + goto unlock; + mutex_lock(&vi->vop_mutex); + list_for_each_safe(pos, tmp, &vi->vdev_list) { + vdev_tmp = list_entry(pos, struct vop_vdev, list); + if (vdev == vdev_tmp) { + vop_virtio_del_device(vdev); + list_del(pos); + found = true; + break; + } + } + mutex_unlock(&vi->vop_mutex); +unlock: + mutex_unlock(&vdev->vdev_mutex); + if (!found) + wait_for_completion(&vdev->destroy); + f->private_data = NULL; + kfree(vdev); + return 0; +} + +static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + struct vop_vdev *vdev = f->private_data; + struct vop_info *vi = vdev->vi; + void __user *argp = (void __user *)arg; + int ret; + + switch (cmd) { + case MIC_VIRTIO_ADD_DEVICE: + { + struct mic_device_desc dd, *dd_config; + + if (copy_from_user(&dd, argp, sizeof(dd))) + return -EFAULT; + + if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE || + dd.num_vq > MIC_MAX_VRINGS) + return -EINVAL; + + dd_config = memdup_user(argp, mic_desc_size(&dd)); + if (IS_ERR(dd_config)) + return PTR_ERR(dd_config); + + /* Ensure desc has not changed between the two reads */ + if (memcmp(&dd, dd_config, sizeof(dd))) { + ret = -EINVAL; + goto free_ret; + } + mutex_lock(&vdev->vdev_mutex); + mutex_lock(&vi->vop_mutex); + ret = vop_virtio_add_device(vdev, dd_config); + if (ret) + goto unlock_ret; + list_add_tail(&vdev->list, &vi->vdev_list); +unlock_ret: + mutex_unlock(&vi->vop_mutex); + mutex_unlock(&vdev->vdev_mutex); +free_ret: + kfree(dd_config); + return ret; + } + case MIC_VIRTIO_COPY_DESC: + { + struct mic_copy_desc copy; + + mutex_lock(&vdev->vdev_mutex); + ret = vop_vdev_inited(vdev); + if (ret) + goto _unlock_ret; + + if (copy_from_user(©, argp, sizeof(copy))) { + ret = -EFAULT; + goto _unlock_ret; + } + + ret = vop_virtio_copy_desc(vdev, ©); + if (ret < 0) + goto _unlock_ret; + if (copy_to_user( + &((struct mic_copy_desc __user *)argp)->out_len, + ©.out_len, sizeof(copy.out_len))) + ret = -EFAULT; +_unlock_ret: + mutex_unlock(&vdev->vdev_mutex); + return ret; + } + case MIC_VIRTIO_CONFIG_CHANGE: + { + void *buf; + + mutex_lock(&vdev->vdev_mutex); + ret = vop_vdev_inited(vdev); + if (ret) + goto __unlock_ret; + buf = memdup_user(argp, vdev->dd->config_len); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto __unlock_ret; + } + ret = vop_virtio_config_change(vdev, buf); + kfree(buf); +__unlock_ret: + mutex_unlock(&vdev->vdev_mutex); + return ret; + } + default: + return -ENOIOCTLCMD; + }; + return 0; +} + +/* + * We return EPOLLIN | EPOLLOUT from poll when new buffers are enqueued, and + * not when previously enqueued buffers may be available. This means that + * in the card->host (TX) path, when userspace is unblocked by poll it + * must drain all available descriptors or it can stall. + */ +static __poll_t vop_poll(struct file *f, poll_table *wait) +{ + struct vop_vdev *vdev = f->private_data; + __poll_t mask = 0; + + mutex_lock(&vdev->vdev_mutex); + if (vop_vdev_inited(vdev)) { + mask = EPOLLERR; + goto done; + } + poll_wait(f, &vdev->waitq, wait); + if (vop_vdev_inited(vdev)) { + mask = EPOLLERR; + } else if (vdev->poll_wake) { + vdev->poll_wake = 0; + mask = EPOLLIN | EPOLLOUT; + } +done: + mutex_unlock(&vdev->vdev_mutex); + return mask; +} + +static inline int +vop_query_offset(struct vop_vdev *vdev, unsigned long offset, + unsigned long *size, unsigned long *pa) +{ + struct vop_device *vpdev = vdev->vpdev; + unsigned long start = MIC_DP_SIZE; + int i; + + /* + * MMAP interface is as follows: + * offset region + * 0x0 virtio device_page + * 0x1000 first vring + * 0x1000 + size of 1st vring second vring + * .... + */ + if (!offset) { + *pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev)); + *size = MIC_DP_SIZE; + return 0; + } + + for (i = 0; i < vdev->dd->num_vq; i++) { + struct vop_vringh *vvr = &vdev->vvr[i]; + + if (offset == start) { + *pa = virt_to_phys(vvr->vring.va); + *size = vvr->vring.len; + return 0; + } + start += vvr->vring.len; + } + return -1; +} + +/* + * Maps the device page and virtio rings to user space for readonly access. + */ +static int vop_mmap(struct file *f, struct vm_area_struct *vma) +{ + struct vop_vdev *vdev = f->private_data; + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size; + int i, err; + + err = vop_vdev_inited(vdev); + if (err) + goto ret; + if (vma->vm_flags & VM_WRITE) { + err = -EACCES; + goto ret; + } + while (size_rem) { + i = vop_query_offset(vdev, offset, &size, &pa); + if (i < 0) { + err = -EINVAL; + goto ret; + } + err = remap_pfn_range(vma, vma->vm_start + offset, + pa >> PAGE_SHIFT, size, + vma->vm_page_prot); + if (err) + goto ret; + size_rem -= size; + offset += size; + } +ret: + return err; +} + +static const struct file_operations vop_fops = { + .open = vop_open, + .release = vop_release, + .unlocked_ioctl = vop_ioctl, + .poll = vop_poll, + .mmap = vop_mmap, + .owner = THIS_MODULE, +}; + +int vop_host_init(struct vop_info *vi) +{ + int rc; + struct miscdevice *mdev; + struct vop_device *vpdev = vi->vpdev; + + INIT_LIST_HEAD(&vi->vdev_list); + vi->dma_ch = vpdev->dma_ch; + mdev = &vi->miscdev; + mdev->minor = MISC_DYNAMIC_MINOR; + snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index); + mdev->name = vi->name; + mdev->fops = &vop_fops; + mdev->parent = &vpdev->dev; + + rc = misc_register(mdev); + if (rc) + dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc); + return rc; +} + +void vop_host_uninit(struct vop_info *vi) +{ + struct list_head *pos, *tmp; + struct vop_vdev *vdev; + + mutex_lock(&vi->vop_mutex); + vop_virtio_reset_devices(vi); + list_for_each_safe(pos, tmp, &vi->vdev_list) { + vdev = list_entry(pos, struct vop_vdev, list); + list_del(pos); + reinit_completion(&vdev->destroy); + mutex_unlock(&vi->vop_mutex); + mutex_lock(&vdev->vdev_mutex); + vop_virtio_del_device(vdev); + vdev->deleted = true; + mutex_unlock(&vdev->vdev_mutex); + complete(&vdev->destroy); + mutex_lock(&vi->vop_mutex); + } + mutex_unlock(&vi->vop_mutex); + misc_deregister(&vi->miscdev); +} diff --git a/drivers/misc/ocxl/Kconfig b/drivers/misc/ocxl/Kconfig new file mode 100644 index 000000000..4bbdb0d3c --- /dev/null +++ b/drivers/misc/ocxl/Kconfig @@ -0,0 +1,31 @@ +# +# Open Coherent Accelerator (OCXL) compatible devices +# + +config OCXL_BASE + bool + default n + select PPC_COPRO_BASE + +config OCXL + tristate "OpenCAPI coherent accelerator support" + depends on PPC_POWERNV && PCI && EEH + select OCXL_BASE + default m + help + Select this option to enable the ocxl driver for Open + Coherent Accelerator Processor Interface (OpenCAPI) devices. + + OpenCAPI allows FPGA and ASIC accelerators to be coherently + attached to a CPU over an OpenCAPI link. + + The ocxl driver enables userspace programs to access these + accelerators through devices in /dev/ocxl/. + + For more information, see http://opencapi.org. + + This is not to be confused with the support for IBM CAPI + accelerators (CONFIG_CXL), which are PCI-based instead of a + dedicated OpenCAPI link, and don't follow the same protocol. + + If unsure, say N. diff --git a/drivers/misc/ocxl/Makefile b/drivers/misc/ocxl/Makefile new file mode 100644 index 000000000..5229dcda8 --- /dev/null +++ b/drivers/misc/ocxl/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0+ +ccflags-$(CONFIG_PPC_WERROR) += -Werror + +ocxl-y += main.o pci.o config.o file.o pasid.o +ocxl-y += link.o context.o afu_irq.o sysfs.o trace.o +obj-$(CONFIG_OCXL) += ocxl.o + +# For tracepoints to include our trace.h from tracepoint infrastructure: +CFLAGS_trace.o := -I$(src) + +# ccflags-y += -DDEBUG diff --git a/drivers/misc/ocxl/afu_irq.c b/drivers/misc/ocxl/afu_irq.c new file mode 100644 index 000000000..e70cfa245 --- /dev/null +++ b/drivers/misc/ocxl/afu_irq.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include +#include +#include "ocxl_internal.h" +#include "trace.h" + +struct afu_irq { + int id; + int hw_irq; + unsigned int virq; + char *name; + u64 trigger_page; + struct eventfd_ctx *ev_ctx; +}; + +static int irq_offset_to_id(struct ocxl_context *ctx, u64 offset) +{ + return (offset - ctx->afu->irq_base_offset) >> PAGE_SHIFT; +} + +static u64 irq_id_to_offset(struct ocxl_context *ctx, int id) +{ + return ctx->afu->irq_base_offset + (id << PAGE_SHIFT); +} + +static irqreturn_t afu_irq_handler(int virq, void *data) +{ + struct afu_irq *irq = (struct afu_irq *) data; + + trace_ocxl_afu_irq_receive(virq); + if (irq->ev_ctx) + eventfd_signal(irq->ev_ctx, 1); + return IRQ_HANDLED; +} + +static int setup_afu_irq(struct ocxl_context *ctx, struct afu_irq *irq) +{ + int rc; + + irq->virq = irq_create_mapping(NULL, irq->hw_irq); + if (!irq->virq) { + pr_err("irq_create_mapping failed\n"); + return -ENOMEM; + } + pr_debug("hw_irq %d mapped to virq %u\n", irq->hw_irq, irq->virq); + + irq->name = kasprintf(GFP_KERNEL, "ocxl-afu-%u", irq->virq); + if (!irq->name) { + irq_dispose_mapping(irq->virq); + return -ENOMEM; + } + + rc = request_irq(irq->virq, afu_irq_handler, 0, irq->name, irq); + if (rc) { + kfree(irq->name); + irq->name = NULL; + irq_dispose_mapping(irq->virq); + pr_err("request_irq failed: %d\n", rc); + return rc; + } + return 0; +} + +static void release_afu_irq(struct afu_irq *irq) +{ + free_irq(irq->virq, irq); + irq_dispose_mapping(irq->virq); + kfree(irq->name); +} + +int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset) +{ + struct afu_irq *irq; + int rc; + + irq = kzalloc(sizeof(struct afu_irq), GFP_KERNEL); + if (!irq) + return -ENOMEM; + + /* + * We limit the number of afu irqs per context and per link to + * avoid a single process or user depleting the pool of IPIs + */ + + mutex_lock(&ctx->irq_lock); + + irq->id = idr_alloc(&ctx->irq_idr, irq, 0, MAX_IRQ_PER_CONTEXT, + GFP_KERNEL); + if (irq->id < 0) { + rc = -ENOSPC; + goto err_unlock; + } + + rc = ocxl_link_irq_alloc(ctx->afu->fn->link, &irq->hw_irq, + &irq->trigger_page); + if (rc) + goto err_idr; + + rc = setup_afu_irq(ctx, irq); + if (rc) + goto err_alloc; + + *irq_offset = irq_id_to_offset(ctx, irq->id); + + trace_ocxl_afu_irq_alloc(ctx->pasid, irq->id, irq->virq, irq->hw_irq, + *irq_offset); + mutex_unlock(&ctx->irq_lock); + return 0; + +err_alloc: + ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq); +err_idr: + idr_remove(&ctx->irq_idr, irq->id); +err_unlock: + mutex_unlock(&ctx->irq_lock); + kfree(irq); + return rc; +} + +static void afu_irq_free(struct afu_irq *irq, struct ocxl_context *ctx) +{ + trace_ocxl_afu_irq_free(ctx->pasid, irq->id); + if (ctx->mapping) + unmap_mapping_range(ctx->mapping, + irq_id_to_offset(ctx, irq->id), + 1 << PAGE_SHIFT, 1); + release_afu_irq(irq); + if (irq->ev_ctx) + eventfd_ctx_put(irq->ev_ctx); + ocxl_link_free_irq(ctx->afu->fn->link, irq->hw_irq); + kfree(irq); +} + +int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset) +{ + struct afu_irq *irq; + int id = irq_offset_to_id(ctx, irq_offset); + + mutex_lock(&ctx->irq_lock); + + irq = idr_find(&ctx->irq_idr, id); + if (!irq) { + mutex_unlock(&ctx->irq_lock); + return -EINVAL; + } + idr_remove(&ctx->irq_idr, irq->id); + afu_irq_free(irq, ctx); + mutex_unlock(&ctx->irq_lock); + return 0; +} + +void ocxl_afu_irq_free_all(struct ocxl_context *ctx) +{ + struct afu_irq *irq; + int id; + + mutex_lock(&ctx->irq_lock); + idr_for_each_entry(&ctx->irq_idr, irq, id) + afu_irq_free(irq, ctx); + mutex_unlock(&ctx->irq_lock); +} + +int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, int eventfd) +{ + struct afu_irq *irq; + struct eventfd_ctx *ev_ctx; + int rc = 0, id = irq_offset_to_id(ctx, irq_offset); + + mutex_lock(&ctx->irq_lock); + irq = idr_find(&ctx->irq_idr, id); + if (!irq) { + rc = -EINVAL; + goto unlock; + } + + ev_ctx = eventfd_ctx_fdget(eventfd); + if (IS_ERR(ev_ctx)) { + rc = -EINVAL; + goto unlock; + } + + irq->ev_ctx = ev_ctx; +unlock: + mutex_unlock(&ctx->irq_lock); + return rc; +} + +u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset) +{ + struct afu_irq *irq; + int id = irq_offset_to_id(ctx, irq_offset); + u64 addr = 0; + + mutex_lock(&ctx->irq_lock); + irq = idr_find(&ctx->irq_idr, id); + if (irq) + addr = irq->trigger_page; + mutex_unlock(&ctx->irq_lock); + return addr; +} diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c new file mode 100644 index 000000000..8f2c5d8bd --- /dev/null +++ b/drivers/misc/ocxl/config.c @@ -0,0 +1,725 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include +#include +#include + +#define EXTRACT_BIT(val, bit) (!!(val & BIT(bit))) +#define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s) + +#define OCXL_DVSEC_AFU_IDX_MASK GENMASK(5, 0) +#define OCXL_DVSEC_ACTAG_MASK GENMASK(11, 0) +#define OCXL_DVSEC_PASID_MASK GENMASK(19, 0) +#define OCXL_DVSEC_PASID_LOG_MASK GENMASK(4, 0) + +#define OCXL_DVSEC_TEMPL_VERSION 0x0 +#define OCXL_DVSEC_TEMPL_NAME 0x4 +#define OCXL_DVSEC_TEMPL_AFU_VERSION 0x1C +#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL 0x20 +#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ 0x28 +#define OCXL_DVSEC_TEMPL_MMIO_PP 0x30 +#define OCXL_DVSEC_TEMPL_MMIO_PP_SZ 0x38 +#define OCXL_DVSEC_TEMPL_MEM_SZ 0x3C +#define OCXL_DVSEC_TEMPL_WWID 0x40 + +#define OCXL_MAX_AFU_PER_FUNCTION 64 +#define OCXL_TEMPL_LEN 0x58 +#define OCXL_TEMPL_NAME_LEN 24 +#define OCXL_CFG_TIMEOUT 3 + +static int find_dvsec(struct pci_dev *dev, int dvsec_id) +{ + int vsec = 0; + u16 vendor, id; + + while ((vsec = pci_find_next_ext_capability(dev, vsec, + OCXL_EXT_CAP_ID_DVSEC))) { + pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, + &vendor); + pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); + if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id) + return vsec; + } + return 0; +} + +static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) +{ + int vsec = 0; + u16 vendor, id; + u8 idx; + + while ((vsec = pci_find_next_ext_capability(dev, vsec, + OCXL_EXT_CAP_ID_DVSEC))) { + pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, + &vendor); + pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); + + if (vendor == PCI_VENDOR_ID_IBM && + id == OCXL_DVSEC_AFU_CTRL_ID) { + pci_read_config_byte(dev, + vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, + &idx); + if (idx == afu_idx) + return vsec; + } + } + return 0; +} + +static int read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + u16 val; + int pos; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PASID); + if (!pos) { + /* + * PASID capability is not mandatory, but there + * shouldn't be any AFU + */ + dev_dbg(&dev->dev, "Function doesn't require any PASID\n"); + fn->max_pasid_log = -1; + goto out; + } + pci_read_config_word(dev, pos + PCI_PASID_CAP, &val); + fn->max_pasid_log = EXTRACT_BITS(val, 8, 12); + +out: + dev_dbg(&dev->dev, "PASID capability:\n"); + dev_dbg(&dev->dev, " Max PASID log = %d\n", fn->max_pasid_log); + return 0; +} + +static int read_dvsec_tl(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + int pos; + + pos = find_dvsec(dev, OCXL_DVSEC_TL_ID); + if (!pos && PCI_FUNC(dev->devfn) == 0) { + dev_err(&dev->dev, "Can't find TL DVSEC\n"); + return -ENODEV; + } + if (pos && PCI_FUNC(dev->devfn) != 0) { + dev_err(&dev->dev, "TL DVSEC is only allowed on function 0\n"); + return -ENODEV; + } + fn->dvsec_tl_pos = pos; + return 0; +} + +static int read_dvsec_function(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + int pos, afu_present; + u32 val; + + pos = find_dvsec(dev, OCXL_DVSEC_FUNC_ID); + if (!pos) { + dev_err(&dev->dev, "Can't find function DVSEC\n"); + return -ENODEV; + } + fn->dvsec_function_pos = pos; + + pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val); + afu_present = EXTRACT_BIT(val, 31); + if (!afu_present) { + fn->max_afu_index = -1; + dev_dbg(&dev->dev, "Function doesn't define any AFU\n"); + goto out; + } + fn->max_afu_index = EXTRACT_BITS(val, 24, 29); + +out: + dev_dbg(&dev->dev, "Function DVSEC:\n"); + dev_dbg(&dev->dev, " Max AFU index = %d\n", fn->max_afu_index); + return 0; +} + +static int read_dvsec_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + int pos; + + if (fn->max_afu_index < 0) { + fn->dvsec_afu_info_pos = -1; + return 0; + } + + pos = find_dvsec(dev, OCXL_DVSEC_AFU_INFO_ID); + if (!pos) { + dev_err(&dev->dev, "Can't find AFU information DVSEC\n"); + return -ENODEV; + } + fn->dvsec_afu_info_pos = pos; + return 0; +} + +static int read_dvsec_vendor(struct pci_dev *dev) +{ + int pos; + u32 cfg, tlx, dlx; + + /* + * vendor specific DVSEC is optional + * + * It's currently only used on function 0 to specify the + * version of some logic blocks. Some older images may not + * even have it so we ignore any errors + */ + if (PCI_FUNC(dev->devfn) != 0) + return 0; + + pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID); + if (!pos) + return 0; + + pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_CFG_VERS, &cfg); + pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_TLX_VERS, &tlx); + pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_DLX_VERS, &dlx); + + dev_dbg(&dev->dev, "Vendor specific DVSEC:\n"); + dev_dbg(&dev->dev, " CFG version = 0x%x\n", cfg); + dev_dbg(&dev->dev, " TLX version = 0x%x\n", tlx); + dev_dbg(&dev->dev, " DLX version = 0x%x\n", dlx); + return 0; +} + +static int validate_function(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + if (fn->max_pasid_log == -1 && fn->max_afu_index >= 0) { + dev_err(&dev->dev, + "AFUs are defined but no PASIDs are requested\n"); + return -EINVAL; + } + + if (fn->max_afu_index > OCXL_MAX_AFU_PER_FUNCTION) { + dev_err(&dev->dev, + "Max AFU index out of architectural limit (%d vs %d)\n", + fn->max_afu_index, OCXL_MAX_AFU_PER_FUNCTION); + return -EINVAL; + } + return 0; +} + +int ocxl_config_read_function(struct pci_dev *dev, struct ocxl_fn_config *fn) +{ + int rc; + + rc = read_pasid(dev, fn); + if (rc) { + dev_err(&dev->dev, "Invalid PASID configuration: %d\n", rc); + return -ENODEV; + } + + rc = read_dvsec_tl(dev, fn); + if (rc) { + dev_err(&dev->dev, + "Invalid Transaction Layer DVSEC configuration: %d\n", + rc); + return -ENODEV; + } + + rc = read_dvsec_function(dev, fn); + if (rc) { + dev_err(&dev->dev, + "Invalid Function DVSEC configuration: %d\n", rc); + return -ENODEV; + } + + rc = read_dvsec_afu_info(dev, fn); + if (rc) { + dev_err(&dev->dev, "Invalid AFU configuration: %d\n", rc); + return -ENODEV; + } + + rc = read_dvsec_vendor(dev); + if (rc) { + dev_err(&dev->dev, + "Invalid vendor specific DVSEC configuration: %d\n", + rc); + return -ENODEV; + } + + rc = validate_function(dev, fn); + return rc; +} +EXPORT_SYMBOL_GPL(ocxl_config_read_function); + +static int read_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn, + int offset, u32 *data) +{ + u32 val; + unsigned long timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT); + int pos = fn->dvsec_afu_info_pos; + + /* Protect 'data valid' bit */ + if (EXTRACT_BIT(offset, 31)) { + dev_err(&dev->dev, "Invalid offset in AFU info DVSEC\n"); + return -EINVAL; + } + + pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, offset); + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val); + while (!EXTRACT_BIT(val, 31)) { + if (time_after_eq(jiffies, timeout)) { + dev_err(&dev->dev, + "Timeout while reading AFU info DVSEC (offset=%d)\n", + offset); + return -EBUSY; + } + cpu_relax(); + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val); + } + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_DATA, data); + return 0; +} + +int ocxl_config_check_afu_index(struct pci_dev *dev, + struct ocxl_fn_config *fn, int afu_idx) +{ + u32 val; + int rc, templ_major, templ_minor, len; + + pci_write_config_byte(dev, + fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX, + afu_idx); + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, &val); + if (rc) + return rc; + + /* AFU index map can have holes */ + if (!val) + return 0; + + templ_major = EXTRACT_BITS(val, 8, 15); + templ_minor = EXTRACT_BITS(val, 0, 7); + dev_dbg(&dev->dev, "AFU descriptor template version %d.%d\n", + templ_major, templ_minor); + + len = EXTRACT_BITS(val, 16, 31); + if (len != OCXL_TEMPL_LEN) { + dev_warn(&dev->dev, + "Unexpected template length in AFU information (%#x)\n", + len); + } + return 1; +} +EXPORT_SYMBOL_GPL(ocxl_config_check_afu_index); + +static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn, + struct ocxl_afu_config *afu) +{ + int i, rc; + u32 val, *ptr; + + BUILD_BUG_ON(OCXL_AFU_NAME_SZ < OCXL_TEMPL_NAME_LEN); + for (i = 0; i < OCXL_TEMPL_NAME_LEN; i += 4) { + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_NAME + i, &val); + if (rc) + return rc; + ptr = (u32 *) &afu->name[i]; + *ptr = le32_to_cpu((__force __le32) val); + } + afu->name[OCXL_AFU_NAME_SZ - 1] = '\0'; /* play safe */ + return 0; +} + +static int read_afu_mmio(struct pci_dev *dev, struct ocxl_fn_config *fn, + struct ocxl_afu_config *afu) +{ + int rc; + u32 val; + + /* + * Global MMIO + */ + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL, &val); + if (rc) + return rc; + afu->global_mmio_bar = EXTRACT_BITS(val, 0, 2); + afu->global_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL + 4, &val); + if (rc) + return rc; + afu->global_mmio_offset += (u64) val << 32; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ, &val); + if (rc) + return rc; + afu->global_mmio_size = val; + + /* + * Per-process MMIO + */ + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP, &val); + if (rc) + return rc; + afu->pp_mmio_bar = EXTRACT_BITS(val, 0, 2); + afu->pp_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP + 4, &val); + if (rc) + return rc; + afu->pp_mmio_offset += (u64) val << 32; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP_SZ, &val); + if (rc) + return rc; + afu->pp_mmio_stride = val; + + return 0; +} + +static int read_afu_control(struct pci_dev *dev, struct ocxl_afu_config *afu) +{ + int pos; + u8 val8; + u16 val16; + + pos = find_dvsec_afu_ctrl(dev, afu->idx); + if (!pos) { + dev_err(&dev->dev, "Can't find AFU control DVSEC for AFU %d\n", + afu->idx); + return -ENODEV; + } + afu->dvsec_afu_control_pos = pos; + + pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_SUP, &val8); + afu->pasid_supported_log = EXTRACT_BITS(val8, 0, 4); + + pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, &val16); + afu->actag_supported = EXTRACT_BITS(val16, 0, 11); + return 0; +} + +static bool char_allowed(int c) +{ + /* + * Permitted Characters : Alphanumeric, hyphen, underscore, comma + */ + if ((c >= 0x30 && c <= 0x39) /* digits */ || + (c >= 0x41 && c <= 0x5A) /* upper case */ || + (c >= 0x61 && c <= 0x7A) /* lower case */ || + c == 0 /* NULL */ || + c == 0x2D /* - */ || + c == 0x5F /* _ */ || + c == 0x2C /* , */) + return true; + return false; +} + +static int validate_afu(struct pci_dev *dev, struct ocxl_afu_config *afu) +{ + int i; + + if (!afu->name[0]) { + dev_err(&dev->dev, "Empty AFU name\n"); + return -EINVAL; + } + for (i = 0; i < OCXL_TEMPL_NAME_LEN; i++) { + if (!char_allowed(afu->name[i])) { + dev_err(&dev->dev, + "Invalid character in AFU name\n"); + return -EINVAL; + } + } + + if (afu->global_mmio_bar != 0 && + afu->global_mmio_bar != 2 && + afu->global_mmio_bar != 4) { + dev_err(&dev->dev, "Invalid global MMIO bar number\n"); + return -EINVAL; + } + if (afu->pp_mmio_bar != 0 && + afu->pp_mmio_bar != 2 && + afu->pp_mmio_bar != 4) { + dev_err(&dev->dev, "Invalid per-process MMIO bar number\n"); + return -EINVAL; + } + return 0; +} + +int ocxl_config_read_afu(struct pci_dev *dev, struct ocxl_fn_config *fn, + struct ocxl_afu_config *afu, u8 afu_idx) +{ + int rc; + u32 val32; + + /* + * First, we need to write the AFU idx for the AFU we want to + * access. + */ + WARN_ON((afu_idx & OCXL_DVSEC_AFU_IDX_MASK) != afu_idx); + afu->idx = afu_idx; + pci_write_config_byte(dev, + fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX, + afu->idx); + + rc = read_afu_name(dev, fn, afu); + if (rc) + return rc; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_AFU_VERSION, &val32); + if (rc) + return rc; + afu->version_major = EXTRACT_BITS(val32, 24, 31); + afu->version_minor = EXTRACT_BITS(val32, 16, 23); + afu->afuc_type = EXTRACT_BITS(val32, 14, 15); + afu->afum_type = EXTRACT_BITS(val32, 12, 13); + afu->profile = EXTRACT_BITS(val32, 0, 7); + + rc = read_afu_mmio(dev, fn, afu); + if (rc) + return rc; + + rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MEM_SZ, &val32); + if (rc) + return rc; + afu->log_mem_size = EXTRACT_BITS(val32, 0, 7); + + rc = read_afu_control(dev, afu); + if (rc) + return rc; + + dev_dbg(&dev->dev, "AFU configuration:\n"); + dev_dbg(&dev->dev, " name = %s\n", afu->name); + dev_dbg(&dev->dev, " version = %d.%d\n", afu->version_major, + afu->version_minor); + dev_dbg(&dev->dev, " global mmio bar = %hhu\n", afu->global_mmio_bar); + dev_dbg(&dev->dev, " global mmio offset = %#llx\n", + afu->global_mmio_offset); + dev_dbg(&dev->dev, " global mmio size = %#x\n", afu->global_mmio_size); + dev_dbg(&dev->dev, " pp mmio bar = %hhu\n", afu->pp_mmio_bar); + dev_dbg(&dev->dev, " pp mmio offset = %#llx\n", afu->pp_mmio_offset); + dev_dbg(&dev->dev, " pp mmio stride = %#x\n", afu->pp_mmio_stride); + dev_dbg(&dev->dev, " mem size (log) = %hhu\n", afu->log_mem_size); + dev_dbg(&dev->dev, " pasid supported (log) = %u\n", + afu->pasid_supported_log); + dev_dbg(&dev->dev, " actag supported = %u\n", + afu->actag_supported); + + rc = validate_afu(dev, afu); + return rc; +} +EXPORT_SYMBOL_GPL(ocxl_config_read_afu); + +int ocxl_config_get_actag_info(struct pci_dev *dev, u16 *base, u16 *enabled, + u16 *supported) +{ + int rc; + + /* + * This is really a simple wrapper for the kernel API, to + * avoid an external driver using ocxl as a library to call + * platform-dependent code + */ + rc = pnv_ocxl_get_actag(dev, base, enabled, supported); + if (rc) { + dev_err(&dev->dev, "Can't get actag for device: %d\n", rc); + return rc; + } + return 0; +} +EXPORT_SYMBOL_GPL(ocxl_config_get_actag_info); + +void ocxl_config_set_afu_actag(struct pci_dev *dev, int pos, int actag_base, + int actag_count) +{ + u16 val; + + val = actag_count & OCXL_DVSEC_ACTAG_MASK; + pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_EN, val); + + val = actag_base & OCXL_DVSEC_ACTAG_MASK; + pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_BASE, val); +} +EXPORT_SYMBOL_GPL(ocxl_config_set_afu_actag); + +int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count) +{ + return pnv_ocxl_get_pasid_count(dev, count); +} +EXPORT_SYMBOL_GPL(ocxl_config_get_pasid_info); + +void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base, + u32 pasid_count_log) +{ + u8 val8; + u32 val32; + + val8 = pasid_count_log & OCXL_DVSEC_PASID_LOG_MASK; + pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_EN, val8); + + pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE, + &val32); + val32 &= ~OCXL_DVSEC_PASID_MASK; + val32 |= pasid_base & OCXL_DVSEC_PASID_MASK; + pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE, + val32); +} +EXPORT_SYMBOL_GPL(ocxl_config_set_afu_pasid); + +void ocxl_config_set_afu_state(struct pci_dev *dev, int pos, int enable) +{ + u8 val; + + pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, &val); + if (enable) + val |= 1; + else + val &= 0xFE; + pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, val); +} +EXPORT_SYMBOL_GPL(ocxl_config_set_afu_state); + +int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec) +{ + u32 val; + __be32 *be32ptr; + u8 timers; + int i, rc; + long recv_cap; + char *recv_rate; + + /* + * Skip on function != 0, as the TL can only be defined on 0 + */ + if (PCI_FUNC(dev->devfn) != 0) + return 0; + + recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL); + if (!recv_rate) + return -ENOMEM; + /* + * The spec defines 64 templates for messages in the + * Transaction Layer (TL). + * + * The host and device each support a subset, so we need to + * configure the transmitters on each side to send only + * templates the receiver understands, at a rate the receiver + * can process. Per the spec, template 0 must be supported by + * everybody. That's the template which has been used by the + * host and device so far. + * + * The sending rate limit must be set before the template is + * enabled. + */ + + /* + * Device -> host + */ + rc = pnv_ocxl_get_tl_cap(dev, &recv_cap, recv_rate, + PNV_OCXL_TL_RATE_BUF_SIZE); + if (rc) + goto out; + + for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) { + be32ptr = (__be32 *) &recv_rate[i]; + pci_write_config_dword(dev, + tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i, + be32_to_cpu(*be32ptr)); + } + val = recv_cap >> 32; + pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val); + val = recv_cap & GENMASK(31, 0); + pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP + 4, val); + + /* + * Host -> device + */ + for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) { + pci_read_config_dword(dev, + tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i, + &val); + be32ptr = (__be32 *) &recv_rate[i]; + *be32ptr = cpu_to_be32(val); + } + pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val); + recv_cap = (long) val << 32; + pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4, &val); + recv_cap |= val; + + rc = pnv_ocxl_set_tl_conf(dev, recv_cap, __pa(recv_rate), + PNV_OCXL_TL_RATE_BUF_SIZE); + if (rc) + goto out; + + /* + * Opencapi commands needing to be retried are classified per + * the TL in 2 groups: short and long commands. + * + * The short back off timer it not used for now. It will be + * for opencapi 4.0. + * + * The long back off timer is typically used when an AFU hits + * a page fault but the NPU is already processing one. So the + * AFU needs to wait before it can resubmit. Having a value + * too low doesn't break anything, but can generate extra + * traffic on the link. + * We set it to 1.6 us for now. It's shorter than, but in the + * same order of magnitude as the time spent to process a page + * fault. + */ + timers = 0x2 << 4; /* long timer = 1.6 us */ + pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS, + timers); + + rc = 0; +out: + kfree(recv_rate); + return rc; +} +EXPORT_SYMBOL_GPL(ocxl_config_set_TL); + +int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int pasid) +{ + u32 val; + unsigned long timeout; + + pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID, + &val); + if (EXTRACT_BIT(val, 20)) { + dev_err(&dev->dev, + "Can't terminate PASID %#x, previous termination didn't complete\n", + pasid); + return -EBUSY; + } + + val &= ~OCXL_DVSEC_PASID_MASK; + val |= pasid & OCXL_DVSEC_PASID_MASK; + val |= BIT(20); + pci_write_config_dword(dev, + afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID, + val); + + timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT); + pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID, + &val); + while (EXTRACT_BIT(val, 20)) { + if (time_after_eq(jiffies, timeout)) { + dev_err(&dev->dev, + "Timeout while waiting for AFU to terminate PASID %#x\n", + pasid); + return -EBUSY; + } + cpu_relax(); + pci_read_config_dword(dev, + afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID, + &val); + } + return 0; +} +EXPORT_SYMBOL_GPL(ocxl_config_terminate_pasid); + +void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, u32 tag_first, + u32 tag_count) +{ + u32 val; + + val = (tag_first & OCXL_DVSEC_ACTAG_MASK) << 16; + val |= tag_count & OCXL_DVSEC_ACTAG_MASK; + pci_write_config_dword(dev, func_dvsec + OCXL_DVSEC_FUNC_OFF_ACTAG, + val); +} +EXPORT_SYMBOL_GPL(ocxl_config_set_actag); diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c new file mode 100644 index 000000000..c10a940e3 --- /dev/null +++ b/drivers/misc/ocxl/context.c @@ -0,0 +1,282 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include "trace.h" +#include "ocxl_internal.h" + +struct ocxl_context *ocxl_context_alloc(void) +{ + return kzalloc(sizeof(struct ocxl_context), GFP_KERNEL); +} + +int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, + struct address_space *mapping) +{ + int pasid; + + ctx->afu = afu; + mutex_lock(&afu->contexts_lock); + pasid = idr_alloc(&afu->contexts_idr, ctx, afu->pasid_base, + afu->pasid_base + afu->pasid_max, GFP_KERNEL); + if (pasid < 0) { + mutex_unlock(&afu->contexts_lock); + return pasid; + } + afu->pasid_count++; + mutex_unlock(&afu->contexts_lock); + + ctx->pasid = pasid; + ctx->status = OPENED; + mutex_init(&ctx->status_mutex); + ctx->mapping = mapping; + mutex_init(&ctx->mapping_lock); + init_waitqueue_head(&ctx->events_wq); + mutex_init(&ctx->xsl_error_lock); + mutex_init(&ctx->irq_lock); + idr_init(&ctx->irq_idr); + ctx->tidr = 0; + + /* + * Keep a reference on the AFU to make sure it's valid for the + * duration of the life of the context + */ + ocxl_afu_get(afu); + return 0; +} + +/* + * Callback for when a translation fault triggers an error + * data: a pointer to the context which triggered the fault + * addr: the address that triggered the error + * dsisr: the value of the PPC64 dsisr register + */ +static void xsl_fault_error(void *data, u64 addr, u64 dsisr) +{ + struct ocxl_context *ctx = (struct ocxl_context *) data; + + mutex_lock(&ctx->xsl_error_lock); + ctx->xsl_error.addr = addr; + ctx->xsl_error.dsisr = dsisr; + ctx->xsl_error.count++; + mutex_unlock(&ctx->xsl_error_lock); + + wake_up_all(&ctx->events_wq); +} + +int ocxl_context_attach(struct ocxl_context *ctx, u64 amr) +{ + int rc; + + // Locks both status & tidr + mutex_lock(&ctx->status_mutex); + if (ctx->status != OPENED) { + rc = -EIO; + goto out; + } + + rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, + current->mm->context.id, ctx->tidr, amr, current->mm, + xsl_fault_error, ctx); + if (rc) + goto out; + + ctx->status = ATTACHED; +out: + mutex_unlock(&ctx->status_mutex); + return rc; +} + +static vm_fault_t map_afu_irq(struct vm_area_struct *vma, unsigned long address, + u64 offset, struct ocxl_context *ctx) +{ + u64 trigger_addr; + + trigger_addr = ocxl_afu_irq_get_addr(ctx, offset); + if (!trigger_addr) + return VM_FAULT_SIGBUS; + + return vmf_insert_pfn(vma, address, trigger_addr >> PAGE_SHIFT); +} + +static vm_fault_t map_pp_mmio(struct vm_area_struct *vma, unsigned long address, + u64 offset, struct ocxl_context *ctx) +{ + u64 pp_mmio_addr; + int pasid_off; + vm_fault_t ret; + + if (offset >= ctx->afu->config.pp_mmio_stride) + return VM_FAULT_SIGBUS; + + mutex_lock(&ctx->status_mutex); + if (ctx->status != ATTACHED) { + mutex_unlock(&ctx->status_mutex); + pr_debug("%s: Context not attached, failing mmio mmap\n", + __func__); + return VM_FAULT_SIGBUS; + } + + pasid_off = ctx->pasid - ctx->afu->pasid_base; + pp_mmio_addr = ctx->afu->pp_mmio_start + + pasid_off * ctx->afu->config.pp_mmio_stride + + offset; + + ret = vmf_insert_pfn(vma, address, pp_mmio_addr >> PAGE_SHIFT); + mutex_unlock(&ctx->status_mutex); + return ret; +} + +static vm_fault_t ocxl_mmap_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct ocxl_context *ctx = vma->vm_file->private_data; + u64 offset; + vm_fault_t ret; + + offset = vmf->pgoff << PAGE_SHIFT; + pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__, + ctx->pasid, vmf->address, offset); + + if (offset < ctx->afu->irq_base_offset) + ret = map_pp_mmio(vma, vmf->address, offset, ctx); + else + ret = map_afu_irq(vma, vmf->address, offset, ctx); + return ret; +} + +static const struct vm_operations_struct ocxl_vmops = { + .fault = ocxl_mmap_fault, +}; + +static int check_mmap_afu_irq(struct ocxl_context *ctx, + struct vm_area_struct *vma) +{ + /* only one page */ + if (vma_pages(vma) != 1) + return -EINVAL; + + /* check offset validty */ + if (!ocxl_afu_irq_get_addr(ctx, vma->vm_pgoff << PAGE_SHIFT)) + return -EINVAL; + + /* + * trigger page should only be accessible in write mode. + * + * It's a bit theoretical, as a page mmaped with only + * PROT_WRITE is currently readable, but it doesn't hurt. + */ + if ((vma->vm_flags & VM_READ) || (vma->vm_flags & VM_EXEC) || + !(vma->vm_flags & VM_WRITE)) + return -EINVAL; + vma->vm_flags &= ~(VM_MAYREAD | VM_MAYEXEC); + return 0; +} + +static int check_mmap_mmio(struct ocxl_context *ctx, + struct vm_area_struct *vma) +{ + if ((vma_pages(vma) + vma->vm_pgoff) > + (ctx->afu->config.pp_mmio_stride >> PAGE_SHIFT)) + return -EINVAL; + return 0; +} + +int ocxl_context_mmap(struct ocxl_context *ctx, struct vm_area_struct *vma) +{ + int rc; + + if ((vma->vm_pgoff << PAGE_SHIFT) < ctx->afu->irq_base_offset) + rc = check_mmap_mmio(ctx, vma); + else + rc = check_mmap_afu_irq(ctx, vma); + if (rc) + return rc; + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_ops = &ocxl_vmops; + return 0; +} + +int ocxl_context_detach(struct ocxl_context *ctx) +{ + struct pci_dev *dev; + int afu_control_pos; + enum ocxl_context_status status; + int rc; + + mutex_lock(&ctx->status_mutex); + status = ctx->status; + ctx->status = CLOSED; + mutex_unlock(&ctx->status_mutex); + if (status != ATTACHED) + return 0; + + dev = to_pci_dev(ctx->afu->fn->dev.parent); + afu_control_pos = ctx->afu->config.dvsec_afu_control_pos; + + mutex_lock(&ctx->afu->afu_control_lock); + rc = ocxl_config_terminate_pasid(dev, afu_control_pos, ctx->pasid); + mutex_unlock(&ctx->afu->afu_control_lock); + trace_ocxl_terminate_pasid(ctx->pasid, rc); + if (rc) { + /* + * If we timeout waiting for the AFU to terminate the + * pasid, then it's dangerous to clean up the Process + * Element entry in the SPA, as it may be referenced + * in the future by the AFU. In which case, we would + * checkstop because of an invalid PE access (FIR + * register 2, bit 42). So leave the PE + * defined. Caller shouldn't free the context so that + * PASID remains allocated. + * + * A link reset will be required to cleanup the AFU + * and the SPA. + */ + if (rc == -EBUSY) + return rc; + } + rc = ocxl_link_remove_pe(ctx->afu->fn->link, ctx->pasid); + if (rc) { + dev_warn(&ctx->afu->dev, + "Couldn't remove PE entry cleanly: %d\n", rc); + } + return 0; +} + +void ocxl_context_detach_all(struct ocxl_afu *afu) +{ + struct ocxl_context *ctx; + int tmp; + + mutex_lock(&afu->contexts_lock); + idr_for_each_entry(&afu->contexts_idr, ctx, tmp) { + ocxl_context_detach(ctx); + /* + * We are force detaching - remove any active mmio + * mappings so userspace cannot interfere with the + * card if it comes back. Easiest way to exercise + * this is to unbind and rebind the driver via sysfs + * while it is in use. + */ + mutex_lock(&ctx->mapping_lock); + if (ctx->mapping) + unmap_mapping_range(ctx->mapping, 0, 0, 1); + mutex_unlock(&ctx->mapping_lock); + } + mutex_unlock(&afu->contexts_lock); +} + +void ocxl_context_free(struct ocxl_context *ctx) +{ + mutex_lock(&ctx->afu->contexts_lock); + ctx->afu->pasid_count--; + idr_remove(&ctx->afu->contexts_idr, ctx->pasid); + mutex_unlock(&ctx->afu->contexts_lock); + + ocxl_afu_irq_free_all(ctx); + idr_destroy(&ctx->irq_idr); + /* reference to the AFU taken in ocxl_context_init */ + ocxl_afu_put(ctx->afu); + kfree(ctx); +} diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c new file mode 100644 index 000000000..e6a607488 --- /dev/null +++ b/drivers/misc/ocxl/file.c @@ -0,0 +1,541 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include +#include +#include +#include +#include +#include +#include "ocxl_internal.h" + + +#define OCXL_NUM_MINORS 256 /* Total to reserve */ + +static dev_t ocxl_dev; +static struct class *ocxl_class; +static struct mutex minors_idr_lock; +static struct idr minors_idr; + +static struct ocxl_afu *find_and_get_afu(dev_t devno) +{ + struct ocxl_afu *afu; + int afu_minor; + + afu_minor = MINOR(devno); + /* + * We don't declare an RCU critical section here, as our AFU + * is protected by a reference counter on the device. By the time the + * minor number of a device is removed from the idr, the ref count of + * the device is already at 0, so no user API will access that AFU and + * this function can't return it. + */ + afu = idr_find(&minors_idr, afu_minor); + if (afu) + ocxl_afu_get(afu); + return afu; +} + +static int allocate_afu_minor(struct ocxl_afu *afu) +{ + int minor; + + mutex_lock(&minors_idr_lock); + minor = idr_alloc(&minors_idr, afu, 0, OCXL_NUM_MINORS, GFP_KERNEL); + mutex_unlock(&minors_idr_lock); + return minor; +} + +static void free_afu_minor(struct ocxl_afu *afu) +{ + mutex_lock(&minors_idr_lock); + idr_remove(&minors_idr, MINOR(afu->dev.devt)); + mutex_unlock(&minors_idr_lock); +} + +static int afu_open(struct inode *inode, struct file *file) +{ + struct ocxl_afu *afu; + struct ocxl_context *ctx; + int rc; + + pr_debug("%s for device %x\n", __func__, inode->i_rdev); + + afu = find_and_get_afu(inode->i_rdev); + if (!afu) + return -ENODEV; + + ctx = ocxl_context_alloc(); + if (!ctx) { + rc = -ENOMEM; + goto put_afu; + } + + rc = ocxl_context_init(ctx, afu, inode->i_mapping); + if (rc) + goto put_afu; + file->private_data = ctx; + ocxl_afu_put(afu); + return 0; + +put_afu: + ocxl_afu_put(afu); + return rc; +} + +static long afu_ioctl_attach(struct ocxl_context *ctx, + struct ocxl_ioctl_attach __user *uarg) +{ + struct ocxl_ioctl_attach arg; + u64 amr = 0; + int rc; + + pr_debug("%s for context %d\n", __func__, ctx->pasid); + + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + /* Make sure reserved fields are not set for forward compatibility */ + if (arg.reserved1 || arg.reserved2 || arg.reserved3) + return -EINVAL; + + amr = arg.amr & mfspr(SPRN_UAMOR); + rc = ocxl_context_attach(ctx, amr); + return rc; +} + +static long afu_ioctl_get_metadata(struct ocxl_context *ctx, + struct ocxl_ioctl_metadata __user *uarg) +{ + struct ocxl_ioctl_metadata arg; + + memset(&arg, 0, sizeof(arg)); + + arg.version = 0; + + arg.afu_version_major = ctx->afu->config.version_major; + arg.afu_version_minor = ctx->afu->config.version_minor; + arg.pasid = ctx->pasid; + arg.pp_mmio_size = ctx->afu->config.pp_mmio_stride; + arg.global_mmio_size = ctx->afu->config.global_mmio_size; + + if (copy_to_user(uarg, &arg, sizeof(arg))) + return -EFAULT; + + return 0; +} + +#ifdef CONFIG_PPC64 +static long afu_ioctl_enable_p9_wait(struct ocxl_context *ctx, + struct ocxl_ioctl_p9_wait __user *uarg) +{ + struct ocxl_ioctl_p9_wait arg; + + memset(&arg, 0, sizeof(arg)); + + if (cpu_has_feature(CPU_FTR_P9_TIDR)) { + enum ocxl_context_status status; + + // Locks both status & tidr + mutex_lock(&ctx->status_mutex); + if (!ctx->tidr) { + if (set_thread_tidr(current)) { + mutex_unlock(&ctx->status_mutex); + return -ENOENT; + } + + ctx->tidr = current->thread.tidr; + } + + status = ctx->status; + mutex_unlock(&ctx->status_mutex); + + if (status == ATTACHED) { + int rc; + struct link *link = ctx->afu->fn->link; + + rc = ocxl_link_update_pe(link, ctx->pasid, ctx->tidr); + if (rc) + return rc; + } + + arg.thread_id = ctx->tidr; + } else + return -ENOENT; + + if (copy_to_user(uarg, &arg, sizeof(arg))) + return -EFAULT; + + return 0; +} +#endif + + +static long afu_ioctl_get_features(struct ocxl_context *ctx, + struct ocxl_ioctl_features __user *uarg) +{ + struct ocxl_ioctl_features arg; + + memset(&arg, 0, sizeof(arg)); + +#ifdef CONFIG_PPC64 + if (cpu_has_feature(CPU_FTR_P9_TIDR)) + arg.flags[0] |= OCXL_IOCTL_FEATURES_FLAGS0_P9_WAIT; +#endif + + if (copy_to_user(uarg, &arg, sizeof(arg))) + return -EFAULT; + + return 0; +} + +#define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" : \ + x == OCXL_IOCTL_IRQ_ALLOC ? "IRQ_ALLOC" : \ + x == OCXL_IOCTL_IRQ_FREE ? "IRQ_FREE" : \ + x == OCXL_IOCTL_IRQ_SET_FD ? "IRQ_SET_FD" : \ + x == OCXL_IOCTL_GET_METADATA ? "GET_METADATA" : \ + x == OCXL_IOCTL_ENABLE_P9_WAIT ? "ENABLE_P9_WAIT" : \ + x == OCXL_IOCTL_GET_FEATURES ? "GET_FEATURES" : \ + "UNKNOWN") + +static long afu_ioctl(struct file *file, unsigned int cmd, + unsigned long args) +{ + struct ocxl_context *ctx = file->private_data; + struct ocxl_ioctl_irq_fd irq_fd; + u64 irq_offset; + long rc; + + pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid, + CMD_STR(cmd)); + + if (ctx->status == CLOSED) + return -EIO; + + switch (cmd) { + case OCXL_IOCTL_ATTACH: + rc = afu_ioctl_attach(ctx, + (struct ocxl_ioctl_attach __user *) args); + break; + + case OCXL_IOCTL_IRQ_ALLOC: + rc = ocxl_afu_irq_alloc(ctx, &irq_offset); + if (!rc) { + rc = copy_to_user((u64 __user *) args, &irq_offset, + sizeof(irq_offset)); + if (rc) { + ocxl_afu_irq_free(ctx, irq_offset); + return -EFAULT; + } + } + break; + + case OCXL_IOCTL_IRQ_FREE: + rc = copy_from_user(&irq_offset, (u64 __user *) args, + sizeof(irq_offset)); + if (rc) + return -EFAULT; + rc = ocxl_afu_irq_free(ctx, irq_offset); + break; + + case OCXL_IOCTL_IRQ_SET_FD: + rc = copy_from_user(&irq_fd, (u64 __user *) args, + sizeof(irq_fd)); + if (rc) + return -EFAULT; + if (irq_fd.reserved) + return -EINVAL; + rc = ocxl_afu_irq_set_fd(ctx, irq_fd.irq_offset, + irq_fd.eventfd); + break; + + case OCXL_IOCTL_GET_METADATA: + rc = afu_ioctl_get_metadata(ctx, + (struct ocxl_ioctl_metadata __user *) args); + break; + +#ifdef CONFIG_PPC64 + case OCXL_IOCTL_ENABLE_P9_WAIT: + rc = afu_ioctl_enable_p9_wait(ctx, + (struct ocxl_ioctl_p9_wait __user *) args); + break; +#endif + + case OCXL_IOCTL_GET_FEATURES: + rc = afu_ioctl_get_features(ctx, + (struct ocxl_ioctl_features __user *) args); + break; + + default: + rc = -EINVAL; + } + return rc; +} + +static long afu_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long args) +{ + return afu_ioctl(file, cmd, args); +} + +static int afu_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct ocxl_context *ctx = file->private_data; + + pr_debug("%s for context %d\n", __func__, ctx->pasid); + return ocxl_context_mmap(ctx, vma); +} + +static bool has_xsl_error(struct ocxl_context *ctx) +{ + bool ret; + + mutex_lock(&ctx->xsl_error_lock); + ret = !!ctx->xsl_error.addr; + mutex_unlock(&ctx->xsl_error_lock); + + return ret; +} + +/* + * Are there any events pending on the AFU + * ctx: The AFU context + * Returns: true if there are events pending + */ +static bool afu_events_pending(struct ocxl_context *ctx) +{ + if (has_xsl_error(ctx)) + return true; + return false; +} + +static unsigned int afu_poll(struct file *file, struct poll_table_struct *wait) +{ + struct ocxl_context *ctx = file->private_data; + unsigned int mask = 0; + bool closed; + + pr_debug("%s for context %d\n", __func__, ctx->pasid); + + poll_wait(file, &ctx->events_wq, wait); + + mutex_lock(&ctx->status_mutex); + closed = (ctx->status == CLOSED); + mutex_unlock(&ctx->status_mutex); + + if (afu_events_pending(ctx)) + mask = EPOLLIN | EPOLLRDNORM; + else if (closed) + mask = EPOLLERR; + + return mask; +} + +/* + * Populate the supplied buffer with a single XSL error + * ctx: The AFU context to report the error from + * header: the event header to populate + * buf: The buffer to write the body into (should be at least + * AFU_EVENT_BODY_XSL_ERROR_SIZE) + * Return: the amount of buffer that was populated + */ +static ssize_t append_xsl_error(struct ocxl_context *ctx, + struct ocxl_kernel_event_header *header, + char __user *buf) +{ + struct ocxl_kernel_event_xsl_fault_error body; + + memset(&body, 0, sizeof(body)); + + mutex_lock(&ctx->xsl_error_lock); + if (!ctx->xsl_error.addr) { + mutex_unlock(&ctx->xsl_error_lock); + return 0; + } + + body.addr = ctx->xsl_error.addr; + body.dsisr = ctx->xsl_error.dsisr; + body.count = ctx->xsl_error.count; + + ctx->xsl_error.addr = 0; + ctx->xsl_error.dsisr = 0; + ctx->xsl_error.count = 0; + + mutex_unlock(&ctx->xsl_error_lock); + + header->type = OCXL_AFU_EVENT_XSL_FAULT_ERROR; + + if (copy_to_user(buf, &body, sizeof(body))) + return -EFAULT; + + return sizeof(body); +} + +#define AFU_EVENT_BODY_MAX_SIZE sizeof(struct ocxl_kernel_event_xsl_fault_error) + +/* + * Reports events on the AFU + * Format: + * Header (struct ocxl_kernel_event_header) + * Body (struct ocxl_kernel_event_*) + * Header... + */ +static ssize_t afu_read(struct file *file, char __user *buf, size_t count, + loff_t *off) +{ + struct ocxl_context *ctx = file->private_data; + struct ocxl_kernel_event_header header; + ssize_t rc; + ssize_t used = 0; + DEFINE_WAIT(event_wait); + + memset(&header, 0, sizeof(header)); + + /* Require offset to be 0 */ + if (*off != 0) + return -EINVAL; + + if (count < (sizeof(struct ocxl_kernel_event_header) + + AFU_EVENT_BODY_MAX_SIZE)) + return -EINVAL; + + for (;;) { + prepare_to_wait(&ctx->events_wq, &event_wait, + TASK_INTERRUPTIBLE); + + if (afu_events_pending(ctx)) + break; + + if (ctx->status == CLOSED) + break; + + if (file->f_flags & O_NONBLOCK) { + finish_wait(&ctx->events_wq, &event_wait); + return -EAGAIN; + } + + if (signal_pending(current)) { + finish_wait(&ctx->events_wq, &event_wait); + return -ERESTARTSYS; + } + + schedule(); + } + + finish_wait(&ctx->events_wq, &event_wait); + + if (has_xsl_error(ctx)) { + used = append_xsl_error(ctx, &header, buf + sizeof(header)); + if (used < 0) + return used; + } + + if (!afu_events_pending(ctx)) + header.flags |= OCXL_KERNEL_EVENT_FLAG_LAST; + + if (copy_to_user(buf, &header, sizeof(header))) + return -EFAULT; + + used += sizeof(header); + + rc = used; + return rc; +} + +static int afu_release(struct inode *inode, struct file *file) +{ + struct ocxl_context *ctx = file->private_data; + int rc; + + pr_debug("%s for device %x\n", __func__, inode->i_rdev); + rc = ocxl_context_detach(ctx); + mutex_lock(&ctx->mapping_lock); + ctx->mapping = NULL; + mutex_unlock(&ctx->mapping_lock); + wake_up_all(&ctx->events_wq); + if (rc != -EBUSY) + ocxl_context_free(ctx); + return 0; +} + +static const struct file_operations ocxl_afu_fops = { + .owner = THIS_MODULE, + .open = afu_open, + .unlocked_ioctl = afu_ioctl, + .compat_ioctl = afu_compat_ioctl, + .mmap = afu_mmap, + .poll = afu_poll, + .read = afu_read, + .release = afu_release, +}; + +int ocxl_create_cdev(struct ocxl_afu *afu) +{ + int rc; + + cdev_init(&afu->cdev, &ocxl_afu_fops); + rc = cdev_add(&afu->cdev, afu->dev.devt, 1); + if (rc) { + dev_err(&afu->dev, "Unable to add afu char device: %d\n", rc); + return rc; + } + return 0; +} + +void ocxl_destroy_cdev(struct ocxl_afu *afu) +{ + cdev_del(&afu->cdev); +} + +int ocxl_register_afu(struct ocxl_afu *afu) +{ + int minor; + + minor = allocate_afu_minor(afu); + if (minor < 0) + return minor; + afu->dev.devt = MKDEV(MAJOR(ocxl_dev), minor); + afu->dev.class = ocxl_class; + return device_register(&afu->dev); +} + +void ocxl_unregister_afu(struct ocxl_afu *afu) +{ + free_afu_minor(afu); +} + +static char *ocxl_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "ocxl/%s", dev_name(dev)); +} + +int ocxl_file_init(void) +{ + int rc; + + mutex_init(&minors_idr_lock); + idr_init(&minors_idr); + + rc = alloc_chrdev_region(&ocxl_dev, 0, OCXL_NUM_MINORS, "ocxl"); + if (rc) { + pr_err("Unable to allocate ocxl major number: %d\n", rc); + return rc; + } + + ocxl_class = class_create(THIS_MODULE, "ocxl"); + if (IS_ERR(ocxl_class)) { + pr_err("Unable to create ocxl class\n"); + unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS); + return PTR_ERR(ocxl_class); + } + + ocxl_class->devnode = ocxl_devnode; + return 0; +} + +void ocxl_file_exit(void) +{ + class_destroy(ocxl_class); + unregister_chrdev_region(ocxl_dev, OCXL_NUM_MINORS); + idr_destroy(&minors_idr); +} diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c new file mode 100644 index 000000000..646d16450 --- /dev/null +++ b/drivers/misc/ocxl/link.c @@ -0,0 +1,690 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include +#include +#include +#include +#include +#include +#include "ocxl_internal.h" +#include "trace.h" + + +#define SPA_PASID_BITS 15 +#define SPA_PASID_MAX ((1 << SPA_PASID_BITS) - 1) +#define SPA_PE_MASK SPA_PASID_MAX +#define SPA_SPA_SIZE_LOG 22 /* Each SPA is 4 Mb */ + +#define SPA_CFG_SF (1ull << (63-0)) +#define SPA_CFG_TA (1ull << (63-1)) +#define SPA_CFG_HV (1ull << (63-3)) +#define SPA_CFG_UV (1ull << (63-4)) +#define SPA_CFG_XLAT_hpt (0ull << (63-6)) /* Hashed page table (HPT) mode */ +#define SPA_CFG_XLAT_roh (2ull << (63-6)) /* Radix on HPT mode */ +#define SPA_CFG_XLAT_ror (3ull << (63-6)) /* Radix on Radix mode */ +#define SPA_CFG_PR (1ull << (63-49)) +#define SPA_CFG_TC (1ull << (63-54)) +#define SPA_CFG_DR (1ull << (63-59)) + +#define SPA_XSL_TF (1ull << (63-3)) /* Translation fault */ +#define SPA_XSL_S (1ull << (63-38)) /* Store operation */ + +#define SPA_PE_VALID 0x80000000 + + +struct pe_data { + struct mm_struct *mm; + /* callback to trigger when a translation fault occurs */ + void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr); + /* opaque pointer to be passed to the above callback */ + void *xsl_err_data; + struct rcu_head rcu; +}; + +struct spa { + struct ocxl_process_element *spa_mem; + int spa_order; + struct mutex spa_lock; + struct radix_tree_root pe_tree; /* Maps PE handles to pe_data */ + char *irq_name; + int virq; + void __iomem *reg_dsisr; + void __iomem *reg_dar; + void __iomem *reg_tfc; + void __iomem *reg_pe_handle; + /* + * The following field are used by the memory fault + * interrupt handler. We can only have one interrupt at a + * time. The NPU won't raise another interrupt until the + * previous one has been ack'd by writing to the TFC register + */ + struct xsl_fault { + struct work_struct fault_work; + u64 pe; + u64 dsisr; + u64 dar; + struct pe_data pe_data; + } xsl_fault; +}; + +/* + * A opencapi link can be used be by several PCI functions. We have + * one link per device slot. + * + * A linked list of opencapi links should suffice, as there's a + * limited number of opencapi slots on a system and lookup is only + * done when the device is probed + */ +struct link { + struct list_head list; + struct kref ref; + int domain; + int bus; + int dev; + atomic_t irq_available; + struct spa *spa; + void *platform_data; +}; +static struct list_head links_list = LIST_HEAD_INIT(links_list); +static DEFINE_MUTEX(links_list_lock); + +enum xsl_response { + CONTINUE, + ADDRESS_ERROR, + RESTART, +}; + + +static void read_irq(struct spa *spa, u64 *dsisr, u64 *dar, u64 *pe) +{ + u64 reg; + + *dsisr = in_be64(spa->reg_dsisr); + *dar = in_be64(spa->reg_dar); + reg = in_be64(spa->reg_pe_handle); + *pe = reg & SPA_PE_MASK; +} + +static void ack_irq(struct spa *spa, enum xsl_response r) +{ + u64 reg = 0; + + /* continue is not supported */ + if (r == RESTART) + reg = PPC_BIT(31); + else if (r == ADDRESS_ERROR) + reg = PPC_BIT(30); + else + WARN(1, "Invalid irq response %d\n", r); + + if (reg) { + trace_ocxl_fault_ack(spa->spa_mem, spa->xsl_fault.pe, + spa->xsl_fault.dsisr, spa->xsl_fault.dar, reg); + out_be64(spa->reg_tfc, reg); + } +} + +static void xsl_fault_handler_bh(struct work_struct *fault_work) +{ + vm_fault_t flt = 0; + unsigned long access, flags, inv_flags = 0; + enum xsl_response r; + struct xsl_fault *fault = container_of(fault_work, struct xsl_fault, + fault_work); + struct spa *spa = container_of(fault, struct spa, xsl_fault); + + int rc; + + /* + * We must release a reference on mm_users whenever exiting this + * function (taken in the memory fault interrupt handler) + */ + rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr, + &flt); + if (rc) { + pr_debug("copro_handle_mm_fault failed: %d\n", rc); + if (fault->pe_data.xsl_err_cb) { + fault->pe_data.xsl_err_cb( + fault->pe_data.xsl_err_data, + fault->dar, fault->dsisr); + } + r = ADDRESS_ERROR; + goto ack; + } + + if (!radix_enabled()) { + /* + * update_mmu_cache() will not have loaded the hash + * since current->trap is not a 0x400 or 0x300, so + * just call hash_page_mm() here. + */ + access = _PAGE_PRESENT | _PAGE_READ; + if (fault->dsisr & SPA_XSL_S) + access |= _PAGE_WRITE; + + if (REGION_ID(fault->dar) != USER_REGION_ID) + access |= _PAGE_PRIVILEGED; + + local_irq_save(flags); + hash_page_mm(fault->pe_data.mm, fault->dar, access, 0x300, + inv_flags); + local_irq_restore(flags); + } + r = RESTART; +ack: + mmput(fault->pe_data.mm); + ack_irq(spa, r); +} + +static irqreturn_t xsl_fault_handler(int irq, void *data) +{ + struct link *link = (struct link *) data; + struct spa *spa = link->spa; + u64 dsisr, dar, pe_handle; + struct pe_data *pe_data; + struct ocxl_process_element *pe; + int lpid, pid, tid; + bool schedule = false; + + read_irq(spa, &dsisr, &dar, &pe_handle); + trace_ocxl_fault(spa->spa_mem, pe_handle, dsisr, dar, -1); + + WARN_ON(pe_handle > SPA_PE_MASK); + pe = spa->spa_mem + pe_handle; + lpid = be32_to_cpu(pe->lpid); + pid = be32_to_cpu(pe->pid); + tid = be32_to_cpu(pe->tid); + /* We could be reading all null values here if the PE is being + * removed while an interrupt kicks in. It's not supposed to + * happen if the driver notified the AFU to terminate the + * PASID, and the AFU waited for pending operations before + * acknowledging. But even if it happens, we won't find a + * memory context below and fail silently, so it should be ok. + */ + if (!(dsisr & SPA_XSL_TF)) { + WARN(1, "Invalid xsl interrupt fault register %#llx\n", dsisr); + ack_irq(spa, ADDRESS_ERROR); + return IRQ_HANDLED; + } + + rcu_read_lock(); + pe_data = radix_tree_lookup(&spa->pe_tree, pe_handle); + if (!pe_data) { + /* + * Could only happen if the driver didn't notify the + * AFU about PASID termination before removing the PE, + * or the AFU didn't wait for all memory access to + * have completed. + * + * Either way, we fail early, but we shouldn't log an + * error message, as it is a valid (if unexpected) + * scenario + */ + rcu_read_unlock(); + pr_debug("Unknown mm context for xsl interrupt\n"); + ack_irq(spa, ADDRESS_ERROR); + return IRQ_HANDLED; + } + WARN_ON(pe_data->mm->context.id != pid); + + if (mmget_not_zero(pe_data->mm)) { + spa->xsl_fault.pe = pe_handle; + spa->xsl_fault.dar = dar; + spa->xsl_fault.dsisr = dsisr; + spa->xsl_fault.pe_data = *pe_data; + schedule = true; + /* mm_users count released by bottom half */ + } + rcu_read_unlock(); + if (schedule) + schedule_work(&spa->xsl_fault.fault_work); + else + ack_irq(spa, ADDRESS_ERROR); + return IRQ_HANDLED; +} + +static void unmap_irq_registers(struct spa *spa) +{ + pnv_ocxl_unmap_xsl_regs(spa->reg_dsisr, spa->reg_dar, spa->reg_tfc, + spa->reg_pe_handle); +} + +static int map_irq_registers(struct pci_dev *dev, struct spa *spa) +{ + return pnv_ocxl_map_xsl_regs(dev, &spa->reg_dsisr, &spa->reg_dar, + &spa->reg_tfc, &spa->reg_pe_handle); +} + +static int setup_xsl_irq(struct pci_dev *dev, struct link *link) +{ + struct spa *spa = link->spa; + int rc; + int hwirq; + + rc = pnv_ocxl_get_xsl_irq(dev, &hwirq); + if (rc) + return rc; + + rc = map_irq_registers(dev, spa); + if (rc) + return rc; + + spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x", + link->domain, link->bus, link->dev); + if (!spa->irq_name) { + unmap_irq_registers(spa); + dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n"); + return -ENOMEM; + } + /* + * At some point, we'll need to look into allowing a higher + * number of interrupts. Could we have an IRQ domain per link? + */ + spa->virq = irq_create_mapping(NULL, hwirq); + if (!spa->virq) { + kfree(spa->irq_name); + unmap_irq_registers(spa); + dev_err(&dev->dev, + "irq_create_mapping failed for translation interrupt\n"); + return -EINVAL; + } + + dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq); + + rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name, + link); + if (rc) { + irq_dispose_mapping(spa->virq); + kfree(spa->irq_name); + unmap_irq_registers(spa); + dev_err(&dev->dev, + "request_irq failed for translation interrupt: %d\n", + rc); + return -EINVAL; + } + return 0; +} + +static void release_xsl_irq(struct link *link) +{ + struct spa *spa = link->spa; + + if (spa->virq) { + free_irq(spa->virq, link); + irq_dispose_mapping(spa->virq); + } + kfree(spa->irq_name); + unmap_irq_registers(spa); +} + +static int alloc_spa(struct pci_dev *dev, struct link *link) +{ + struct spa *spa; + + spa = kzalloc(sizeof(struct spa), GFP_KERNEL); + if (!spa) + return -ENOMEM; + + mutex_init(&spa->spa_lock); + INIT_RADIX_TREE(&spa->pe_tree, GFP_KERNEL); + INIT_WORK(&spa->xsl_fault.fault_work, xsl_fault_handler_bh); + + spa->spa_order = SPA_SPA_SIZE_LOG - PAGE_SHIFT; + spa->spa_mem = (struct ocxl_process_element *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, spa->spa_order); + if (!spa->spa_mem) { + dev_err(&dev->dev, "Can't allocate Shared Process Area\n"); + kfree(spa); + return -ENOMEM; + } + pr_debug("Allocated SPA for %x:%x:%x at %p\n", link->domain, link->bus, + link->dev, spa->spa_mem); + + link->spa = spa; + return 0; +} + +static void free_spa(struct link *link) +{ + struct spa *spa = link->spa; + + pr_debug("Freeing SPA for %x:%x:%x\n", link->domain, link->bus, + link->dev); + + if (spa && spa->spa_mem) { + free_pages((unsigned long) spa->spa_mem, spa->spa_order); + kfree(spa); + link->spa = NULL; + } +} + +static int alloc_link(struct pci_dev *dev, int PE_mask, struct link **out_link) +{ + struct link *link; + int rc; + + link = kzalloc(sizeof(struct link), GFP_KERNEL); + if (!link) + return -ENOMEM; + + kref_init(&link->ref); + link->domain = pci_domain_nr(dev->bus); + link->bus = dev->bus->number; + link->dev = PCI_SLOT(dev->devfn); + atomic_set(&link->irq_available, MAX_IRQ_PER_LINK); + + rc = alloc_spa(dev, link); + if (rc) + goto err_free; + + rc = setup_xsl_irq(dev, link); + if (rc) + goto err_spa; + + /* platform specific hook */ + rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask, + &link->platform_data); + if (rc) + goto err_xsl_irq; + + *out_link = link; + return 0; + +err_xsl_irq: + release_xsl_irq(link); +err_spa: + free_spa(link); +err_free: + kfree(link); + return rc; +} + +static void free_link(struct link *link) +{ + release_xsl_irq(link); + free_spa(link); + kfree(link); +} + +int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle) +{ + int rc = 0; + struct link *link; + + mutex_lock(&links_list_lock); + list_for_each_entry(link, &links_list, list) { + /* The functions of a device all share the same link */ + if (link->domain == pci_domain_nr(dev->bus) && + link->bus == dev->bus->number && + link->dev == PCI_SLOT(dev->devfn)) { + kref_get(&link->ref); + *link_handle = link; + goto unlock; + } + } + rc = alloc_link(dev, PE_mask, &link); + if (rc) + goto unlock; + + list_add(&link->list, &links_list); + *link_handle = link; +unlock: + mutex_unlock(&links_list_lock); + return rc; +} +EXPORT_SYMBOL_GPL(ocxl_link_setup); + +static void release_xsl(struct kref *ref) +{ + struct link *link = container_of(ref, struct link, ref); + + list_del(&link->list); + /* call platform code before releasing data */ + pnv_ocxl_spa_release(link->platform_data); + free_link(link); +} + +void ocxl_link_release(struct pci_dev *dev, void *link_handle) +{ + struct link *link = (struct link *) link_handle; + + mutex_lock(&links_list_lock); + kref_put(&link->ref, release_xsl); + mutex_unlock(&links_list_lock); +} +EXPORT_SYMBOL_GPL(ocxl_link_release); + +static u64 calculate_cfg_state(bool kernel) +{ + u64 state; + + state = SPA_CFG_DR; + if (mfspr(SPRN_LPCR) & LPCR_TC) + state |= SPA_CFG_TC; + if (radix_enabled()) + state |= SPA_CFG_XLAT_ror; + else + state |= SPA_CFG_XLAT_hpt; + state |= SPA_CFG_HV; + if (kernel) { + if (mfmsr() & MSR_SF) + state |= SPA_CFG_SF; + } else { + state |= SPA_CFG_PR; + if (!test_tsk_thread_flag(current, TIF_32BIT)) + state |= SPA_CFG_SF; + } + return state; +} + +int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, + u64 amr, struct mm_struct *mm, + void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), + void *xsl_err_data) +{ + struct link *link = (struct link *) link_handle; + struct spa *spa = link->spa; + struct ocxl_process_element *pe; + int pe_handle, rc = 0; + struct pe_data *pe_data; + + BUILD_BUG_ON(sizeof(struct ocxl_process_element) != 128); + if (pasid > SPA_PASID_MAX) + return -EINVAL; + + mutex_lock(&spa->spa_lock); + pe_handle = pasid & SPA_PE_MASK; + pe = spa->spa_mem + pe_handle; + + if (pe->software_state) { + rc = -EBUSY; + goto unlock; + } + + pe_data = kmalloc(sizeof(*pe_data), GFP_KERNEL); + if (!pe_data) { + rc = -ENOMEM; + goto unlock; + } + + pe_data->mm = mm; + pe_data->xsl_err_cb = xsl_err_cb; + pe_data->xsl_err_data = xsl_err_data; + + memset(pe, 0, sizeof(struct ocxl_process_element)); + pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0)); + pe->lpid = cpu_to_be32(mfspr(SPRN_LPID)); + pe->pid = cpu_to_be32(pidr); + pe->tid = cpu_to_be32(tidr); + pe->amr = cpu_to_be64(amr); + pe->software_state = cpu_to_be32(SPA_PE_VALID); + + mm_context_add_copro(mm); + /* + * Barrier is to make sure PE is visible in the SPA before it + * is used by the device. It also helps with the global TLBI + * invalidation + */ + mb(); + radix_tree_insert(&spa->pe_tree, pe_handle, pe_data); + + /* + * The mm must stay valid for as long as the device uses it. We + * lower the count when the context is removed from the SPA. + * + * We grab mm_count (and not mm_users), as we don't want to + * end up in a circular dependency if a process mmaps its + * mmio, therefore incrementing the file ref count when + * calling mmap(), and forgets to unmap before exiting. In + * that scenario, when the kernel handles the death of the + * process, the file is not cleaned because unmap was not + * called, and the mm wouldn't be freed because we would still + * have a reference on mm_users. Incrementing mm_count solves + * the problem. + */ + mmgrab(mm); + trace_ocxl_context_add(current->pid, spa->spa_mem, pasid, pidr, tidr); +unlock: + mutex_unlock(&spa->spa_lock); + return rc; +} +EXPORT_SYMBOL_GPL(ocxl_link_add_pe); + +int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid) +{ + struct link *link = (struct link *) link_handle; + struct spa *spa = link->spa; + struct ocxl_process_element *pe; + int pe_handle, rc; + + if (pasid > SPA_PASID_MAX) + return -EINVAL; + + pe_handle = pasid & SPA_PE_MASK; + pe = spa->spa_mem + pe_handle; + + mutex_lock(&spa->spa_lock); + + pe->tid = cpu_to_be32(tid); + + /* + * The barrier makes sure the PE is updated + * before we clear the NPU context cache below, so that the + * old PE cannot be reloaded erroneously. + */ + mb(); + + /* + * hook to platform code + * On powerpc, the entry needs to be cleared from the context + * cache of the NPU. + */ + rc = pnv_ocxl_spa_remove_pe_from_cache(link->platform_data, pe_handle); + WARN_ON(rc); + + mutex_unlock(&spa->spa_lock); + return rc; +} + +int ocxl_link_remove_pe(void *link_handle, int pasid) +{ + struct link *link = (struct link *) link_handle; + struct spa *spa = link->spa; + struct ocxl_process_element *pe; + struct pe_data *pe_data; + int pe_handle, rc; + + if (pasid > SPA_PASID_MAX) + return -EINVAL; + + /* + * About synchronization with our memory fault handler: + * + * Before removing the PE, the driver is supposed to have + * notified the AFU, which should have cleaned up and make + * sure the PASID is no longer in use, including pending + * interrupts. However, there's no way to be sure... + * + * We clear the PE and remove the context from our radix + * tree. From that point on, any new interrupt for that + * context will fail silently, which is ok. As mentioned + * above, that's not expected, but it could happen if the + * driver or AFU didn't do the right thing. + * + * There could still be a bottom half running, but we don't + * need to wait/flush, as it is managing a reference count on + * the mm it reads from the radix tree. + */ + pe_handle = pasid & SPA_PE_MASK; + pe = spa->spa_mem + pe_handle; + + mutex_lock(&spa->spa_lock); + + if (!(be32_to_cpu(pe->software_state) & SPA_PE_VALID)) { + rc = -EINVAL; + goto unlock; + } + + trace_ocxl_context_remove(current->pid, spa->spa_mem, pasid, + be32_to_cpu(pe->pid), be32_to_cpu(pe->tid)); + + memset(pe, 0, sizeof(struct ocxl_process_element)); + /* + * The barrier makes sure the PE is removed from the SPA + * before we clear the NPU context cache below, so that the + * old PE cannot be reloaded erroneously. + */ + mb(); + + /* + * hook to platform code + * On powerpc, the entry needs to be cleared from the context + * cache of the NPU. + */ + rc = pnv_ocxl_spa_remove_pe_from_cache(link->platform_data, pe_handle); + WARN_ON(rc); + + pe_data = radix_tree_delete(&spa->pe_tree, pe_handle); + if (!pe_data) { + WARN(1, "Couldn't find pe data when removing PE\n"); + } else { + mm_context_remove_copro(pe_data->mm); + mmdrop(pe_data->mm); + kfree_rcu(pe_data, rcu); + } +unlock: + mutex_unlock(&spa->spa_lock); + return rc; +} +EXPORT_SYMBOL_GPL(ocxl_link_remove_pe); + +int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr) +{ + struct link *link = (struct link *) link_handle; + int rc, irq; + u64 addr; + + if (atomic_dec_if_positive(&link->irq_available) < 0) + return -ENOSPC; + + rc = pnv_ocxl_alloc_xive_irq(&irq, &addr); + if (rc) { + atomic_inc(&link->irq_available); + return rc; + } + + *hw_irq = irq; + *trigger_addr = addr; + return 0; +} +EXPORT_SYMBOL_GPL(ocxl_link_irq_alloc); + +void ocxl_link_free_irq(void *link_handle, int hw_irq) +{ + struct link *link = (struct link *) link_handle; + + pnv_ocxl_free_xive_irq(hw_irq); + atomic_inc(&link->irq_available); +} +EXPORT_SYMBOL_GPL(ocxl_link_free_irq); diff --git a/drivers/misc/ocxl/main.c b/drivers/misc/ocxl/main.c new file mode 100644 index 000000000..7210d9e05 --- /dev/null +++ b/drivers/misc/ocxl/main.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include +#include "ocxl_internal.h" + +static int __init init_ocxl(void) +{ + int rc = 0; + + rc = ocxl_file_init(); + if (rc) + return rc; + + rc = pci_register_driver(&ocxl_pci_driver); + if (rc) { + ocxl_file_exit(); + return rc; + } + return 0; +} + +static void exit_ocxl(void) +{ + pci_unregister_driver(&ocxl_pci_driver); + ocxl_file_exit(); +} + +module_init(init_ocxl); +module_exit(exit_ocxl); + +MODULE_DESCRIPTION("Open Coherent Accelerator"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h new file mode 100644 index 000000000..a32f21510 --- /dev/null +++ b/drivers/misc/ocxl/ocxl_internal.h @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#ifndef _OCXL_INTERNAL_H_ +#define _OCXL_INTERNAL_H_ + +#include +#include +#include +#include + +#define MAX_IRQ_PER_LINK 2000 +#define MAX_IRQ_PER_CONTEXT MAX_IRQ_PER_LINK + +#define to_ocxl_function(d) container_of(d, struct ocxl_fn, dev) +#define to_ocxl_afu(d) container_of(d, struct ocxl_afu, dev) + +extern struct pci_driver ocxl_pci_driver; + + +struct ocxl_fn { + struct device dev; + int bar_used[3]; + struct ocxl_fn_config config; + struct list_head afu_list; + int pasid_base; + int actag_base; + int actag_enabled; + int actag_supported; + struct list_head pasid_list; + struct list_head actag_list; + void *link; +}; + +struct ocxl_afu { + struct ocxl_fn *fn; + struct list_head list; + struct device dev; + struct cdev cdev; + struct ocxl_afu_config config; + int pasid_base; + int pasid_count; /* opened contexts */ + int pasid_max; /* maximum number of contexts */ + int actag_base; + int actag_enabled; + struct mutex contexts_lock; + struct idr contexts_idr; + struct mutex afu_control_lock; + u64 global_mmio_start; + u64 irq_base_offset; + void __iomem *global_mmio_ptr; + u64 pp_mmio_start; + struct bin_attribute attr_global_mmio; +}; + +enum ocxl_context_status { + CLOSED, + OPENED, + ATTACHED, +}; + +// Contains metadata about a translation fault +struct ocxl_xsl_error { + u64 addr; // The address that triggered the fault + u64 dsisr; // the value of the dsisr register + u64 count; // The number of times this fault has been triggered +}; + +struct ocxl_context { + struct ocxl_afu *afu; + int pasid; + struct mutex status_mutex; + enum ocxl_context_status status; + struct address_space *mapping; + struct mutex mapping_lock; + wait_queue_head_t events_wq; + struct mutex xsl_error_lock; + struct ocxl_xsl_error xsl_error; + struct mutex irq_lock; + struct idr irq_idr; + u16 tidr; // Thread ID used for P9 wait implementation +}; + +struct ocxl_process_element { + __be64 config_state; + __be32 reserved1[11]; + __be32 lpid; + __be32 tid; + __be32 pid; + __be32 reserved2[10]; + __be64 amr; + __be32 reserved3[3]; + __be32 software_state; +}; + + +extern struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu); +extern void ocxl_afu_put(struct ocxl_afu *afu); + +extern int ocxl_create_cdev(struct ocxl_afu *afu); +extern void ocxl_destroy_cdev(struct ocxl_afu *afu); +extern int ocxl_register_afu(struct ocxl_afu *afu); +extern void ocxl_unregister_afu(struct ocxl_afu *afu); + +extern int ocxl_file_init(void); +extern void ocxl_file_exit(void); + +extern int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size); +extern void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size); +extern int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size); +extern void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size); + +extern struct ocxl_context *ocxl_context_alloc(void); +extern int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu, + struct address_space *mapping); +extern int ocxl_context_attach(struct ocxl_context *ctx, u64 amr); +extern int ocxl_context_mmap(struct ocxl_context *ctx, + struct vm_area_struct *vma); +extern int ocxl_context_detach(struct ocxl_context *ctx); +extern void ocxl_context_detach_all(struct ocxl_afu *afu); +extern void ocxl_context_free(struct ocxl_context *ctx); + +extern int ocxl_sysfs_add_afu(struct ocxl_afu *afu); +extern void ocxl_sysfs_remove_afu(struct ocxl_afu *afu); + +extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, u64 *irq_offset); +extern int ocxl_afu_irq_free(struct ocxl_context *ctx, u64 irq_offset); +extern void ocxl_afu_irq_free_all(struct ocxl_context *ctx); +extern int ocxl_afu_irq_set_fd(struct ocxl_context *ctx, u64 irq_offset, + int eventfd); +extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, u64 irq_offset); + +#endif /* _OCXL_INTERNAL_H_ */ diff --git a/drivers/misc/ocxl/pasid.c b/drivers/misc/ocxl/pasid.c new file mode 100644 index 000000000..d14cb56e6 --- /dev/null +++ b/drivers/misc/ocxl/pasid.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include "ocxl_internal.h" + + +struct id_range { + struct list_head list; + u32 start; + u32 end; +}; + +#ifdef DEBUG +static void dump_list(struct list_head *head, char *type_str) +{ + struct id_range *cur; + + pr_debug("%s ranges allocated:\n", type_str); + list_for_each_entry(cur, head, list) { + pr_debug("Range %d->%d\n", cur->start, cur->end); + } +} +#endif + +static int range_alloc(struct list_head *head, u32 size, int max_id, + char *type_str) +{ + struct list_head *pos; + struct id_range *cur, *new; + int rc, last_end; + + new = kmalloc(sizeof(struct id_range), GFP_KERNEL); + if (!new) + return -ENOMEM; + + pos = head; + last_end = -1; + list_for_each_entry(cur, head, list) { + if ((cur->start - last_end) > size) + break; + last_end = cur->end; + pos = &cur->list; + } + + new->start = last_end + 1; + new->end = new->start + size - 1; + + if (new->end > max_id) { + kfree(new); + rc = -ENOSPC; + } else { + list_add(&new->list, pos); + rc = new->start; + } + +#ifdef DEBUG + dump_list(head, type_str); +#endif + return rc; +} + +static void range_free(struct list_head *head, u32 start, u32 size, + char *type_str) +{ + bool found = false; + struct id_range *cur, *tmp; + + list_for_each_entry_safe(cur, tmp, head, list) { + if (cur->start == start && cur->end == (start + size - 1)) { + found = true; + list_del(&cur->list); + kfree(cur); + break; + } + } + WARN_ON(!found); +#ifdef DEBUG + dump_list(head, type_str); +#endif +} + +int ocxl_pasid_afu_alloc(struct ocxl_fn *fn, u32 size) +{ + int max_pasid; + + if (fn->config.max_pasid_log < 0) + return -ENOSPC; + max_pasid = 1 << fn->config.max_pasid_log; + return range_alloc(&fn->pasid_list, size, max_pasid, "afu pasid"); +} + +void ocxl_pasid_afu_free(struct ocxl_fn *fn, u32 start, u32 size) +{ + return range_free(&fn->pasid_list, start, size, "afu pasid"); +} + +int ocxl_actag_afu_alloc(struct ocxl_fn *fn, u32 size) +{ + int max_actag; + + max_actag = fn->actag_enabled; + return range_alloc(&fn->actag_list, size, max_actag, "afu actag"); +} + +void ocxl_actag_afu_free(struct ocxl_fn *fn, u32 start, u32 size) +{ + return range_free(&fn->actag_list, start, size, "afu actag"); +} diff --git a/drivers/misc/ocxl/pci.c b/drivers/misc/ocxl/pci.c new file mode 100644 index 000000000..21f425472 --- /dev/null +++ b/drivers/misc/ocxl/pci.c @@ -0,0 +1,585 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include +#include +#include +#include "ocxl_internal.h" + +/* + * Any opencapi device which wants to use this 'generic' driver should + * use the 0x062B device ID. Vendors should define the subsystem + * vendor/device ID to help differentiate devices. + */ +static const struct pci_device_id ocxl_pci_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x062B), }, + { } +}; +MODULE_DEVICE_TABLE(pci, ocxl_pci_tbl); + + +static struct ocxl_fn *ocxl_fn_get(struct ocxl_fn *fn) +{ + return (get_device(&fn->dev) == NULL) ? NULL : fn; +} + +static void ocxl_fn_put(struct ocxl_fn *fn) +{ + put_device(&fn->dev); +} + +struct ocxl_afu *ocxl_afu_get(struct ocxl_afu *afu) +{ + return (get_device(&afu->dev) == NULL) ? NULL : afu; +} + +void ocxl_afu_put(struct ocxl_afu *afu) +{ + put_device(&afu->dev); +} + +static struct ocxl_afu *alloc_afu(struct ocxl_fn *fn) +{ + struct ocxl_afu *afu; + + afu = kzalloc(sizeof(struct ocxl_afu), GFP_KERNEL); + if (!afu) + return NULL; + + mutex_init(&afu->contexts_lock); + mutex_init(&afu->afu_control_lock); + idr_init(&afu->contexts_idr); + afu->fn = fn; + ocxl_fn_get(fn); + return afu; +} + +static void free_afu(struct ocxl_afu *afu) +{ + idr_destroy(&afu->contexts_idr); + ocxl_fn_put(afu->fn); + kfree(afu); +} + +static void free_afu_dev(struct device *dev) +{ + struct ocxl_afu *afu = to_ocxl_afu(dev); + + ocxl_unregister_afu(afu); + free_afu(afu); +} + +static int set_afu_device(struct ocxl_afu *afu, const char *location) +{ + struct ocxl_fn *fn = afu->fn; + int rc; + + afu->dev.parent = &fn->dev; + afu->dev.release = free_afu_dev; + rc = dev_set_name(&afu->dev, "%s.%s.%hhu", afu->config.name, location, + afu->config.idx); + return rc; +} + +static int assign_afu_actag(struct ocxl_afu *afu, struct pci_dev *dev) +{ + struct ocxl_fn *fn = afu->fn; + int actag_count, actag_offset; + + /* + * if there were not enough actags for the function, each afu + * reduces its count as well + */ + actag_count = afu->config.actag_supported * + fn->actag_enabled / fn->actag_supported; + actag_offset = ocxl_actag_afu_alloc(fn, actag_count); + if (actag_offset < 0) { + dev_err(&afu->dev, "Can't allocate %d actags for AFU: %d\n", + actag_count, actag_offset); + return actag_offset; + } + afu->actag_base = fn->actag_base + actag_offset; + afu->actag_enabled = actag_count; + + ocxl_config_set_afu_actag(dev, afu->config.dvsec_afu_control_pos, + afu->actag_base, afu->actag_enabled); + dev_dbg(&afu->dev, "actag base=%d enabled=%d\n", + afu->actag_base, afu->actag_enabled); + return 0; +} + +static void reclaim_afu_actag(struct ocxl_afu *afu) +{ + struct ocxl_fn *fn = afu->fn; + int start_offset, size; + + start_offset = afu->actag_base - fn->actag_base; + size = afu->actag_enabled; + ocxl_actag_afu_free(afu->fn, start_offset, size); +} + +static int assign_afu_pasid(struct ocxl_afu *afu, struct pci_dev *dev) +{ + struct ocxl_fn *fn = afu->fn; + int pasid_count, pasid_offset; + + /* + * We only support the case where the function configuration + * requested enough PASIDs to cover all AFUs. + */ + pasid_count = 1 << afu->config.pasid_supported_log; + pasid_offset = ocxl_pasid_afu_alloc(fn, pasid_count); + if (pasid_offset < 0) { + dev_err(&afu->dev, "Can't allocate %d PASIDs for AFU: %d\n", + pasid_count, pasid_offset); + return pasid_offset; + } + afu->pasid_base = fn->pasid_base + pasid_offset; + afu->pasid_count = 0; + afu->pasid_max = pasid_count; + + ocxl_config_set_afu_pasid(dev, afu->config.dvsec_afu_control_pos, + afu->pasid_base, + afu->config.pasid_supported_log); + dev_dbg(&afu->dev, "PASID base=%d, enabled=%d\n", + afu->pasid_base, pasid_count); + return 0; +} + +static void reclaim_afu_pasid(struct ocxl_afu *afu) +{ + struct ocxl_fn *fn = afu->fn; + int start_offset, size; + + start_offset = afu->pasid_base - fn->pasid_base; + size = 1 << afu->config.pasid_supported_log; + ocxl_pasid_afu_free(afu->fn, start_offset, size); +} + +static int reserve_fn_bar(struct ocxl_fn *fn, int bar) +{ + struct pci_dev *dev = to_pci_dev(fn->dev.parent); + int rc, idx; + + if (bar != 0 && bar != 2 && bar != 4) + return -EINVAL; + + idx = bar >> 1; + if (fn->bar_used[idx]++ == 0) { + rc = pci_request_region(dev, bar, "ocxl"); + if (rc) + return rc; + } + return 0; +} + +static void release_fn_bar(struct ocxl_fn *fn, int bar) +{ + struct pci_dev *dev = to_pci_dev(fn->dev.parent); + int idx; + + if (bar != 0 && bar != 2 && bar != 4) + return; + + idx = bar >> 1; + if (--fn->bar_used[idx] == 0) + pci_release_region(dev, bar); + WARN_ON(fn->bar_used[idx] < 0); +} + +static int map_mmio_areas(struct ocxl_afu *afu, struct pci_dev *dev) +{ + int rc; + + rc = reserve_fn_bar(afu->fn, afu->config.global_mmio_bar); + if (rc) + return rc; + + rc = reserve_fn_bar(afu->fn, afu->config.pp_mmio_bar); + if (rc) { + release_fn_bar(afu->fn, afu->config.global_mmio_bar); + return rc; + } + + afu->global_mmio_start = + pci_resource_start(dev, afu->config.global_mmio_bar) + + afu->config.global_mmio_offset; + afu->pp_mmio_start = + pci_resource_start(dev, afu->config.pp_mmio_bar) + + afu->config.pp_mmio_offset; + + afu->global_mmio_ptr = ioremap(afu->global_mmio_start, + afu->config.global_mmio_size); + if (!afu->global_mmio_ptr) { + release_fn_bar(afu->fn, afu->config.pp_mmio_bar); + release_fn_bar(afu->fn, afu->config.global_mmio_bar); + dev_err(&dev->dev, "Error mapping global mmio area\n"); + return -ENOMEM; + } + + /* + * Leave an empty page between the per-process mmio area and + * the AFU interrupt mappings + */ + afu->irq_base_offset = afu->config.pp_mmio_stride + PAGE_SIZE; + return 0; +} + +static void unmap_mmio_areas(struct ocxl_afu *afu) +{ + if (afu->global_mmio_ptr) { + iounmap(afu->global_mmio_ptr); + afu->global_mmio_ptr = NULL; + } + afu->global_mmio_start = 0; + afu->pp_mmio_start = 0; + release_fn_bar(afu->fn, afu->config.pp_mmio_bar); + release_fn_bar(afu->fn, afu->config.global_mmio_bar); +} + +static int configure_afu(struct ocxl_afu *afu, u8 afu_idx, struct pci_dev *dev) +{ + int rc; + + rc = ocxl_config_read_afu(dev, &afu->fn->config, &afu->config, afu_idx); + if (rc) + return rc; + + rc = set_afu_device(afu, dev_name(&dev->dev)); + if (rc) + return rc; + + rc = assign_afu_actag(afu, dev); + if (rc) + return rc; + + rc = assign_afu_pasid(afu, dev); + if (rc) { + reclaim_afu_actag(afu); + return rc; + } + + rc = map_mmio_areas(afu, dev); + if (rc) { + reclaim_afu_pasid(afu); + reclaim_afu_actag(afu); + return rc; + } + return 0; +} + +static void deconfigure_afu(struct ocxl_afu *afu) +{ + unmap_mmio_areas(afu); + reclaim_afu_pasid(afu); + reclaim_afu_actag(afu); +} + +static int activate_afu(struct pci_dev *dev, struct ocxl_afu *afu) +{ + int rc; + + ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 1); + /* + * Char device creation is the last step, as processes can + * call our driver immediately, so all our inits must be finished. + */ + rc = ocxl_create_cdev(afu); + if (rc) + return rc; + return 0; +} + +static void deactivate_afu(struct ocxl_afu *afu) +{ + struct pci_dev *dev = to_pci_dev(afu->fn->dev.parent); + + ocxl_destroy_cdev(afu); + ocxl_config_set_afu_state(dev, afu->config.dvsec_afu_control_pos, 0); +} + +static int init_afu(struct pci_dev *dev, struct ocxl_fn *fn, u8 afu_idx) +{ + int rc; + struct ocxl_afu *afu; + + afu = alloc_afu(fn); + if (!afu) + return -ENOMEM; + + rc = configure_afu(afu, afu_idx, dev); + if (rc) { + free_afu(afu); + return rc; + } + + rc = ocxl_register_afu(afu); + if (rc) + goto err; + + rc = ocxl_sysfs_add_afu(afu); + if (rc) + goto err; + + rc = activate_afu(dev, afu); + if (rc) + goto err_sys; + + list_add_tail(&afu->list, &fn->afu_list); + return 0; + +err_sys: + ocxl_sysfs_remove_afu(afu); +err: + deconfigure_afu(afu); + device_unregister(&afu->dev); + return rc; +} + +static void remove_afu(struct ocxl_afu *afu) +{ + list_del(&afu->list); + ocxl_context_detach_all(afu); + deactivate_afu(afu); + ocxl_sysfs_remove_afu(afu); + deconfigure_afu(afu); + device_unregister(&afu->dev); +} + +static struct ocxl_fn *alloc_function(struct pci_dev *dev) +{ + struct ocxl_fn *fn; + + fn = kzalloc(sizeof(struct ocxl_fn), GFP_KERNEL); + if (!fn) + return NULL; + + INIT_LIST_HEAD(&fn->afu_list); + INIT_LIST_HEAD(&fn->pasid_list); + INIT_LIST_HEAD(&fn->actag_list); + return fn; +} + +static void free_function(struct ocxl_fn *fn) +{ + WARN_ON(!list_empty(&fn->afu_list)); + WARN_ON(!list_empty(&fn->pasid_list)); + kfree(fn); +} + +static void free_function_dev(struct device *dev) +{ + struct ocxl_fn *fn = to_ocxl_function(dev); + + free_function(fn); +} + +static int set_function_device(struct ocxl_fn *fn, struct pci_dev *dev) +{ + int rc; + + fn->dev.parent = &dev->dev; + fn->dev.release = free_function_dev; + rc = dev_set_name(&fn->dev, "ocxlfn.%s", dev_name(&dev->dev)); + if (rc) + return rc; + pci_set_drvdata(dev, fn); + return 0; +} + +static int assign_function_actag(struct ocxl_fn *fn) +{ + struct pci_dev *dev = to_pci_dev(fn->dev.parent); + u16 base, enabled, supported; + int rc; + + rc = ocxl_config_get_actag_info(dev, &base, &enabled, &supported); + if (rc) + return rc; + + fn->actag_base = base; + fn->actag_enabled = enabled; + fn->actag_supported = supported; + + ocxl_config_set_actag(dev, fn->config.dvsec_function_pos, + fn->actag_base, fn->actag_enabled); + dev_dbg(&fn->dev, "actag range starting at %d, enabled %d\n", + fn->actag_base, fn->actag_enabled); + return 0; +} + +static int set_function_pasid(struct ocxl_fn *fn) +{ + struct pci_dev *dev = to_pci_dev(fn->dev.parent); + int rc, desired_count, max_count; + + /* A function may not require any PASID */ + if (fn->config.max_pasid_log < 0) + return 0; + + rc = ocxl_config_get_pasid_info(dev, &max_count); + if (rc) + return rc; + + desired_count = 1 << fn->config.max_pasid_log; + + if (desired_count > max_count) { + dev_err(&fn->dev, + "Function requires more PASIDs than is available (%d vs. %d)\n", + desired_count, max_count); + return -ENOSPC; + } + + fn->pasid_base = 0; + return 0; +} + +static int configure_function(struct ocxl_fn *fn, struct pci_dev *dev) +{ + int rc; + + rc = pci_enable_device(dev); + if (rc) { + dev_err(&dev->dev, "pci_enable_device failed: %d\n", rc); + return rc; + } + + /* + * Once it has been confirmed to work on our hardware, we + * should reset the function, to force the adapter to restart + * from scratch. + * A function reset would also reset all its AFUs. + * + * Some hints for implementation: + * + * - there's not status bit to know when the reset is done. We + * should try reading the config space to know when it's + * done. + * - probably something like: + * Reset + * wait 100ms + * issue config read + * allow device up to 1 sec to return success on config + * read before declaring it broken + * + * Some shared logic on the card (CFG, TLX) won't be reset, so + * there's no guarantee that it will be enough. + */ + rc = ocxl_config_read_function(dev, &fn->config); + if (rc) + return rc; + + rc = set_function_device(fn, dev); + if (rc) + return rc; + + rc = assign_function_actag(fn); + if (rc) + return rc; + + rc = set_function_pasid(fn); + if (rc) + return rc; + + rc = ocxl_link_setup(dev, 0, &fn->link); + if (rc) + return rc; + + rc = ocxl_config_set_TL(dev, fn->config.dvsec_tl_pos); + if (rc) { + ocxl_link_release(dev, fn->link); + return rc; + } + return 0; +} + +static void deconfigure_function(struct ocxl_fn *fn) +{ + struct pci_dev *dev = to_pci_dev(fn->dev.parent); + + ocxl_link_release(dev, fn->link); + pci_disable_device(dev); +} + +static struct ocxl_fn *init_function(struct pci_dev *dev) +{ + struct ocxl_fn *fn; + int rc; + + fn = alloc_function(dev); + if (!fn) + return ERR_PTR(-ENOMEM); + + rc = configure_function(fn, dev); + if (rc) { + free_function(fn); + return ERR_PTR(rc); + } + + rc = device_register(&fn->dev); + if (rc) { + deconfigure_function(fn); + put_device(&fn->dev); + return ERR_PTR(rc); + } + return fn; +} + +static void remove_function(struct ocxl_fn *fn) +{ + deconfigure_function(fn); + device_unregister(&fn->dev); +} + +static int ocxl_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + int rc, afu_count = 0; + u8 afu; + struct ocxl_fn *fn; + + if (!radix_enabled()) { + dev_err(&dev->dev, "Unsupported memory model (hash)\n"); + return -ENODEV; + } + + fn = init_function(dev); + if (IS_ERR(fn)) { + dev_err(&dev->dev, "function init failed: %li\n", + PTR_ERR(fn)); + return PTR_ERR(fn); + } + + for (afu = 0; afu <= fn->config.max_afu_index; afu++) { + rc = ocxl_config_check_afu_index(dev, &fn->config, afu); + if (rc > 0) { + rc = init_afu(dev, fn, afu); + if (rc) { + dev_err(&dev->dev, + "Can't initialize AFU index %d\n", afu); + continue; + } + afu_count++; + } + } + dev_info(&dev->dev, "%d AFU(s) configured\n", afu_count); + return 0; +} + +static void ocxl_remove(struct pci_dev *dev) +{ + struct ocxl_afu *afu, *tmp; + struct ocxl_fn *fn = pci_get_drvdata(dev); + + list_for_each_entry_safe(afu, tmp, &fn->afu_list, list) { + remove_afu(afu); + } + remove_function(fn); +} + +struct pci_driver ocxl_pci_driver = { + .name = "ocxl", + .id_table = ocxl_pci_tbl, + .probe = ocxl_probe, + .remove = ocxl_remove, + .shutdown = ocxl_remove, +}; diff --git a/drivers/misc/ocxl/sysfs.c b/drivers/misc/ocxl/sysfs.c new file mode 100644 index 000000000..0ab1fd1b2 --- /dev/null +++ b/drivers/misc/ocxl/sysfs.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include "ocxl_internal.h" + +static ssize_t global_mmio_size_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct ocxl_afu *afu = to_ocxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + afu->config.global_mmio_size); +} + +static ssize_t pp_mmio_size_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct ocxl_afu *afu = to_ocxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + afu->config.pp_mmio_stride); +} + +static ssize_t afu_version_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct ocxl_afu *afu = to_ocxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%hhu:%hhu\n", + afu->config.version_major, + afu->config.version_minor); +} + +static ssize_t contexts_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct ocxl_afu *afu = to_ocxl_afu(device); + + return scnprintf(buf, PAGE_SIZE, "%d/%d\n", + afu->pasid_count, afu->pasid_max); +} + +static struct device_attribute afu_attrs[] = { + __ATTR_RO(global_mmio_size), + __ATTR_RO(pp_mmio_size), + __ATTR_RO(afu_version), + __ATTR_RO(contexts), +}; + +static ssize_t global_mmio_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj)); + + if (count == 0 || off < 0 || + off >= afu->config.global_mmio_size) + return 0; + memcpy_fromio(buf, afu->global_mmio_ptr + off, count); + return count; +} + +static vm_fault_t global_mmio_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct ocxl_afu *afu = vma->vm_private_data; + unsigned long offset; + + if (vmf->pgoff >= (afu->config.global_mmio_size >> PAGE_SHIFT)) + return VM_FAULT_SIGBUS; + + offset = vmf->pgoff; + offset += (afu->global_mmio_start >> PAGE_SHIFT); + return vmf_insert_pfn(vma, vmf->address, offset); +} + +static const struct vm_operations_struct global_mmio_vmops = { + .fault = global_mmio_fault, +}; + +static int global_mmio_mmap(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + struct vm_area_struct *vma) +{ + struct ocxl_afu *afu = to_ocxl_afu(kobj_to_dev(kobj)); + + if ((vma_pages(vma) + vma->vm_pgoff) > + (afu->config.global_mmio_size >> PAGE_SHIFT)) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_ops = &global_mmio_vmops; + vma->vm_private_data = afu; + return 0; +} + +int ocxl_sysfs_add_afu(struct ocxl_afu *afu) +{ + int i, rc; + + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) { + rc = device_create_file(&afu->dev, &afu_attrs[i]); + if (rc) + goto err; + } + + sysfs_attr_init(&afu->attr_global_mmio.attr); + afu->attr_global_mmio.attr.name = "global_mmio_area"; + afu->attr_global_mmio.attr.mode = 0600; + afu->attr_global_mmio.size = afu->config.global_mmio_size; + afu->attr_global_mmio.read = global_mmio_read; + afu->attr_global_mmio.mmap = global_mmio_mmap; + rc = device_create_bin_file(&afu->dev, &afu->attr_global_mmio); + if (rc) { + dev_err(&afu->dev, + "Unable to create global mmio attr for afu: %d\n", + rc); + goto err; + } + + return 0; + +err: + for (i--; i >= 0; i--) + device_remove_file(&afu->dev, &afu_attrs[i]); + return rc; +} + +void ocxl_sysfs_remove_afu(struct ocxl_afu *afu) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) + device_remove_file(&afu->dev, &afu_attrs[i]); + device_remove_bin_file(&afu->dev, &afu->attr_global_mmio); +} diff --git a/drivers/misc/ocxl/trace.c b/drivers/misc/ocxl/trace.c new file mode 100644 index 000000000..1e6947049 --- /dev/null +++ b/drivers/misc/ocxl/trace.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "trace.h" +#endif diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h new file mode 100644 index 000000000..bcb7ff330 --- /dev/null +++ b/drivers/misc/ocxl/trace.h @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ocxl + +#if !defined(_TRACE_OCXL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_OCXL_H + +#include + +DECLARE_EVENT_CLASS(ocxl_context, + TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr), + TP_ARGS(pid, spa, pasid, pidr, tidr), + + TP_STRUCT__entry( + __field(pid_t, pid) + __field(void*, spa) + __field(int, pasid) + __field(u32, pidr) + __field(u32, tidr) + ), + + TP_fast_assign( + __entry->pid = pid; + __entry->spa = spa; + __entry->pasid = pasid; + __entry->pidr = pidr; + __entry->tidr = tidr; + ), + + TP_printk("linux pid=%d spa=0x%p pasid=0x%x pidr=0x%x tidr=0x%x", + __entry->pid, + __entry->spa, + __entry->pasid, + __entry->pidr, + __entry->tidr + ) +); + +DEFINE_EVENT(ocxl_context, ocxl_context_add, + TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr), + TP_ARGS(pid, spa, pasid, pidr, tidr) +); + +DEFINE_EVENT(ocxl_context, ocxl_context_remove, + TP_PROTO(pid_t pid, void *spa, int pasid, u32 pidr, u32 tidr), + TP_ARGS(pid, spa, pasid, pidr, tidr) +); + +TRACE_EVENT(ocxl_terminate_pasid, + TP_PROTO(int pasid, int rc), + TP_ARGS(pasid, rc), + + TP_STRUCT__entry( + __field(int, pasid) + __field(int, rc) + ), + + TP_fast_assign( + __entry->pasid = pasid; + __entry->rc = rc; + ), + + TP_printk("pasid=0x%x rc=%d", + __entry->pasid, + __entry->rc + ) +); + +DECLARE_EVENT_CLASS(ocxl_fault_handler, + TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc), + TP_ARGS(spa, pe, dsisr, dar, tfc), + + TP_STRUCT__entry( + __field(void *, spa) + __field(u64, pe) + __field(u64, dsisr) + __field(u64, dar) + __field(u64, tfc) + ), + + TP_fast_assign( + __entry->spa = spa; + __entry->pe = pe; + __entry->dsisr = dsisr; + __entry->dar = dar; + __entry->tfc = tfc; + ), + + TP_printk("spa=%p pe=0x%llx dsisr=0x%llx dar=0x%llx tfc=0x%llx", + __entry->spa, + __entry->pe, + __entry->dsisr, + __entry->dar, + __entry->tfc + ) +); + +DEFINE_EVENT(ocxl_fault_handler, ocxl_fault, + TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc), + TP_ARGS(spa, pe, dsisr, dar, tfc) +); + +DEFINE_EVENT(ocxl_fault_handler, ocxl_fault_ack, + TP_PROTO(void *spa, u64 pe, u64 dsisr, u64 dar, u64 tfc), + TP_ARGS(spa, pe, dsisr, dar, tfc) +); + +TRACE_EVENT(ocxl_afu_irq_alloc, + TP_PROTO(int pasid, int irq_id, unsigned int virq, int hw_irq, + u64 irq_offset), + TP_ARGS(pasid, irq_id, virq, hw_irq, irq_offset), + + TP_STRUCT__entry( + __field(int, pasid) + __field(int, irq_id) + __field(unsigned int, virq) + __field(int, hw_irq) + __field(u64, irq_offset) + ), + + TP_fast_assign( + __entry->pasid = pasid; + __entry->irq_id = irq_id; + __entry->virq = virq; + __entry->hw_irq = hw_irq; + __entry->irq_offset = irq_offset; + ), + + TP_printk("pasid=0x%x irq_id=%d virq=%u hw_irq=%d irq_offset=0x%llx", + __entry->pasid, + __entry->irq_id, + __entry->virq, + __entry->hw_irq, + __entry->irq_offset + ) +); + +TRACE_EVENT(ocxl_afu_irq_free, + TP_PROTO(int pasid, int irq_id), + TP_ARGS(pasid, irq_id), + + TP_STRUCT__entry( + __field(int, pasid) + __field(int, irq_id) + ), + + TP_fast_assign( + __entry->pasid = pasid; + __entry->irq_id = irq_id; + ), + + TP_printk("pasid=0x%x irq_id=%d", + __entry->pasid, + __entry->irq_id + ) +); + +TRACE_EVENT(ocxl_afu_irq_receive, + TP_PROTO(int virq), + TP_ARGS(virq), + + TP_STRUCT__entry( + __field(int, virq) + ), + + TP_fast_assign( + __entry->virq = virq; + ), + + TP_printk("virq=%d", + __entry->virq + ) +); + +#endif /* _TRACE_OCXL_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace +#include diff --git a/drivers/misc/pch_phub.c b/drivers/misc/pch_phub.c new file mode 100644 index 000000000..309703e9c --- /dev/null +++ b/drivers/misc/pch_phub.c @@ -0,0 +1,915 @@ +/* + * Copyright (C) 2011 LAPIS Semiconductor Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PHUB_STATUS 0x00 /* Status Register offset */ +#define PHUB_CONTROL 0x04 /* Control Register offset */ +#define PHUB_TIMEOUT 0x05 /* Time out value for Status Register */ +#define PCH_PHUB_ROM_WRITE_ENABLE 0x01 /* Enabling for writing ROM */ +#define PCH_PHUB_ROM_WRITE_DISABLE 0x00 /* Disabling for writing ROM */ +#define PCH_PHUB_MAC_START_ADDR_EG20T 0x14 /* MAC data area start address + offset */ +#define PCH_PHUB_MAC_START_ADDR_ML7223 0x20C /* MAC data area start address + offset */ +#define PCH_PHUB_ROM_START_ADDR_EG20T 0x80 /* ROM data area start address offset + (Intel EG20T PCH)*/ +#define PCH_PHUB_ROM_START_ADDR_ML7213 0x400 /* ROM data area start address + offset(LAPIS Semicon ML7213) + */ +#define PCH_PHUB_ROM_START_ADDR_ML7223 0x400 /* ROM data area start address + offset(LAPIS Semicon ML7223) + */ + +/* MAX number of INT_REDUCE_CONTROL registers */ +#define MAX_NUM_INT_REDUCE_CONTROL_REG 128 +#define PCI_DEVICE_ID_PCH1_PHUB 0x8801 +#define PCH_MINOR_NOS 1 +#define CLKCFG_CAN_50MHZ 0x12000000 +#define CLKCFG_CANCLK_MASK 0xFF000000 +#define CLKCFG_UART_MASK 0xFFFFFF + +/* CM-iTC */ +#define CLKCFG_UART_48MHZ (1 << 16) +#define CLKCFG_UART_25MHZ (2 << 16) +#define CLKCFG_BAUDDIV (2 << 20) +#define CLKCFG_PLL2VCO (8 << 9) +#define CLKCFG_UARTCLKSEL (1 << 18) + +/* Macros for ML7213 */ +#define PCI_DEVICE_ID_ROHM_ML7213_PHUB 0x801A + +/* Macros for ML7223 */ +#define PCI_DEVICE_ID_ROHM_ML7223_mPHUB 0x8012 /* for Bus-m */ +#define PCI_DEVICE_ID_ROHM_ML7223_nPHUB 0x8002 /* for Bus-n */ + +/* Macros for ML7831 */ +#define PCI_DEVICE_ID_ROHM_ML7831_PHUB 0x8801 + +/* SROM ACCESS Macro */ +#define PCH_WORD_ADDR_MASK (~((1 << 2) - 1)) + +/* Registers address offset */ +#define PCH_PHUB_ID_REG 0x0000 +#define PCH_PHUB_QUEUE_PRI_VAL_REG 0x0004 +#define PCH_PHUB_RC_QUEUE_MAXSIZE_REG 0x0008 +#define PCH_PHUB_BRI_QUEUE_MAXSIZE_REG 0x000C +#define PCH_PHUB_COMP_RESP_TIMEOUT_REG 0x0010 +#define PCH_PHUB_BUS_SLAVE_CONTROL_REG 0x0014 +#define PCH_PHUB_DEADLOCK_AVOID_TYPE_REG 0x0018 +#define PCH_PHUB_INTPIN_REG_WPERMIT_REG0 0x0020 +#define PCH_PHUB_INTPIN_REG_WPERMIT_REG1 0x0024 +#define PCH_PHUB_INTPIN_REG_WPERMIT_REG2 0x0028 +#define PCH_PHUB_INTPIN_REG_WPERMIT_REG3 0x002C +#define PCH_PHUB_INT_REDUCE_CONTROL_REG_BASE 0x0040 +#define CLKCFG_REG_OFFSET 0x500 +#define FUNCSEL_REG_OFFSET 0x508 + +#define PCH_PHUB_OROM_SIZE 15360 + +/** + * struct pch_phub_reg - PHUB register structure + * @phub_id_reg: PHUB_ID register val + * @q_pri_val_reg: QUEUE_PRI_VAL register val + * @rc_q_maxsize_reg: RC_QUEUE_MAXSIZE register val + * @bri_q_maxsize_reg: BRI_QUEUE_MAXSIZE register val + * @comp_resp_timeout_reg: COMP_RESP_TIMEOUT register val + * @bus_slave_control_reg: BUS_SLAVE_CONTROL_REG register val + * @deadlock_avoid_type_reg: DEADLOCK_AVOID_TYPE register val + * @intpin_reg_wpermit_reg0: INTPIN_REG_WPERMIT register 0 val + * @intpin_reg_wpermit_reg1: INTPIN_REG_WPERMIT register 1 val + * @intpin_reg_wpermit_reg2: INTPIN_REG_WPERMIT register 2 val + * @intpin_reg_wpermit_reg3: INTPIN_REG_WPERMIT register 3 val + * @int_reduce_control_reg: INT_REDUCE_CONTROL registers val + * @clkcfg_reg: CLK CFG register val + * @funcsel_reg: Function select register value + * @pch_phub_base_address: Register base address + * @pch_phub_extrom_base_address: external rom base address + * @pch_mac_start_address: MAC address area start address + * @pch_opt_rom_start_address: Option ROM start address + * @ioh_type: Save IOH type + * @pdev: pointer to pci device struct + */ +struct pch_phub_reg { + u32 phub_id_reg; + u32 q_pri_val_reg; + u32 rc_q_maxsize_reg; + u32 bri_q_maxsize_reg; + u32 comp_resp_timeout_reg; + u32 bus_slave_control_reg; + u32 deadlock_avoid_type_reg; + u32 intpin_reg_wpermit_reg0; + u32 intpin_reg_wpermit_reg1; + u32 intpin_reg_wpermit_reg2; + u32 intpin_reg_wpermit_reg3; + u32 int_reduce_control_reg[MAX_NUM_INT_REDUCE_CONTROL_REG]; + u32 clkcfg_reg; + u32 funcsel_reg; + void __iomem *pch_phub_base_address; + void __iomem *pch_phub_extrom_base_address; + u32 pch_mac_start_address; + u32 pch_opt_rom_start_address; + int ioh_type; + struct pci_dev *pdev; +}; + +/* SROM SPEC for MAC address assignment offset */ +static const int pch_phub_mac_offset[ETH_ALEN] = {0x3, 0x2, 0x1, 0x0, 0xb, 0xa}; + +static DEFINE_MUTEX(pch_phub_mutex); + +/** + * pch_phub_read_modify_write_reg() - Reading modifying and writing register + * @reg_addr_offset: Register offset address value. + * @data: Writing value. + * @mask: Mask value. + */ +static void pch_phub_read_modify_write_reg(struct pch_phub_reg *chip, + unsigned int reg_addr_offset, + unsigned int data, unsigned int mask) +{ + void __iomem *reg_addr = chip->pch_phub_base_address + reg_addr_offset; + iowrite32(((ioread32(reg_addr) & ~mask)) | data, reg_addr); +} + +#ifdef CONFIG_PM +/* pch_phub_save_reg_conf - saves register configuration */ +static void pch_phub_save_reg_conf(struct pci_dev *pdev) +{ + unsigned int i; + struct pch_phub_reg *chip = pci_get_drvdata(pdev); + + void __iomem *p = chip->pch_phub_base_address; + + chip->phub_id_reg = ioread32(p + PCH_PHUB_ID_REG); + chip->q_pri_val_reg = ioread32(p + PCH_PHUB_QUEUE_PRI_VAL_REG); + chip->rc_q_maxsize_reg = ioread32(p + PCH_PHUB_RC_QUEUE_MAXSIZE_REG); + chip->bri_q_maxsize_reg = ioread32(p + PCH_PHUB_BRI_QUEUE_MAXSIZE_REG); + chip->comp_resp_timeout_reg = + ioread32(p + PCH_PHUB_COMP_RESP_TIMEOUT_REG); + chip->bus_slave_control_reg = + ioread32(p + PCH_PHUB_BUS_SLAVE_CONTROL_REG); + chip->deadlock_avoid_type_reg = + ioread32(p + PCH_PHUB_DEADLOCK_AVOID_TYPE_REG); + chip->intpin_reg_wpermit_reg0 = + ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG0); + chip->intpin_reg_wpermit_reg1 = + ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG1); + chip->intpin_reg_wpermit_reg2 = + ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG2); + chip->intpin_reg_wpermit_reg3 = + ioread32(p + PCH_PHUB_INTPIN_REG_WPERMIT_REG3); + dev_dbg(&pdev->dev, "%s : " + "chip->phub_id_reg=%x, " + "chip->q_pri_val_reg=%x, " + "chip->rc_q_maxsize_reg=%x, " + "chip->bri_q_maxsize_reg=%x, " + "chip->comp_resp_timeout_reg=%x, " + "chip->bus_slave_control_reg=%x, " + "chip->deadlock_avoid_type_reg=%x, " + "chip->intpin_reg_wpermit_reg0=%x, " + "chip->intpin_reg_wpermit_reg1=%x, " + "chip->intpin_reg_wpermit_reg2=%x, " + "chip->intpin_reg_wpermit_reg3=%x\n", __func__, + chip->phub_id_reg, + chip->q_pri_val_reg, + chip->rc_q_maxsize_reg, + chip->bri_q_maxsize_reg, + chip->comp_resp_timeout_reg, + chip->bus_slave_control_reg, + chip->deadlock_avoid_type_reg, + chip->intpin_reg_wpermit_reg0, + chip->intpin_reg_wpermit_reg1, + chip->intpin_reg_wpermit_reg2, + chip->intpin_reg_wpermit_reg3); + for (i = 0; i < MAX_NUM_INT_REDUCE_CONTROL_REG; i++) { + chip->int_reduce_control_reg[i] = + ioread32(p + PCH_PHUB_INT_REDUCE_CONTROL_REG_BASE + 4 * i); + dev_dbg(&pdev->dev, "%s : " + "chip->int_reduce_control_reg[%d]=%x\n", + __func__, i, chip->int_reduce_control_reg[i]); + } + chip->clkcfg_reg = ioread32(p + CLKCFG_REG_OFFSET); + if ((chip->ioh_type == 2) || (chip->ioh_type == 4)) + chip->funcsel_reg = ioread32(p + FUNCSEL_REG_OFFSET); +} + +/* pch_phub_restore_reg_conf - restore register configuration */ +static void pch_phub_restore_reg_conf(struct pci_dev *pdev) +{ + unsigned int i; + struct pch_phub_reg *chip = pci_get_drvdata(pdev); + void __iomem *p; + p = chip->pch_phub_base_address; + + iowrite32(chip->phub_id_reg, p + PCH_PHUB_ID_REG); + iowrite32(chip->q_pri_val_reg, p + PCH_PHUB_QUEUE_PRI_VAL_REG); + iowrite32(chip->rc_q_maxsize_reg, p + PCH_PHUB_RC_QUEUE_MAXSIZE_REG); + iowrite32(chip->bri_q_maxsize_reg, p + PCH_PHUB_BRI_QUEUE_MAXSIZE_REG); + iowrite32(chip->comp_resp_timeout_reg, + p + PCH_PHUB_COMP_RESP_TIMEOUT_REG); + iowrite32(chip->bus_slave_control_reg, + p + PCH_PHUB_BUS_SLAVE_CONTROL_REG); + iowrite32(chip->deadlock_avoid_type_reg, + p + PCH_PHUB_DEADLOCK_AVOID_TYPE_REG); + iowrite32(chip->intpin_reg_wpermit_reg0, + p + PCH_PHUB_INTPIN_REG_WPERMIT_REG0); + iowrite32(chip->intpin_reg_wpermit_reg1, + p + PCH_PHUB_INTPIN_REG_WPERMIT_REG1); + iowrite32(chip->intpin_reg_wpermit_reg2, + p + PCH_PHUB_INTPIN_REG_WPERMIT_REG2); + iowrite32(chip->intpin_reg_wpermit_reg3, + p + PCH_PHUB_INTPIN_REG_WPERMIT_REG3); + dev_dbg(&pdev->dev, "%s : " + "chip->phub_id_reg=%x, " + "chip->q_pri_val_reg=%x, " + "chip->rc_q_maxsize_reg=%x, " + "chip->bri_q_maxsize_reg=%x, " + "chip->comp_resp_timeout_reg=%x, " + "chip->bus_slave_control_reg=%x, " + "chip->deadlock_avoid_type_reg=%x, " + "chip->intpin_reg_wpermit_reg0=%x, " + "chip->intpin_reg_wpermit_reg1=%x, " + "chip->intpin_reg_wpermit_reg2=%x, " + "chip->intpin_reg_wpermit_reg3=%x\n", __func__, + chip->phub_id_reg, + chip->q_pri_val_reg, + chip->rc_q_maxsize_reg, + chip->bri_q_maxsize_reg, + chip->comp_resp_timeout_reg, + chip->bus_slave_control_reg, + chip->deadlock_avoid_type_reg, + chip->intpin_reg_wpermit_reg0, + chip->intpin_reg_wpermit_reg1, + chip->intpin_reg_wpermit_reg2, + chip->intpin_reg_wpermit_reg3); + for (i = 0; i < MAX_NUM_INT_REDUCE_CONTROL_REG; i++) { + iowrite32(chip->int_reduce_control_reg[i], + p + PCH_PHUB_INT_REDUCE_CONTROL_REG_BASE + 4 * i); + dev_dbg(&pdev->dev, "%s : " + "chip->int_reduce_control_reg[%d]=%x\n", + __func__, i, chip->int_reduce_control_reg[i]); + } + + iowrite32(chip->clkcfg_reg, p + CLKCFG_REG_OFFSET); + if ((chip->ioh_type == 2) || (chip->ioh_type == 4)) + iowrite32(chip->funcsel_reg, p + FUNCSEL_REG_OFFSET); +} +#endif + +/** + * pch_phub_read_serial_rom() - Reading Serial ROM + * @offset_address: Serial ROM offset address to read. + * @data: Read buffer for specified Serial ROM value. + */ +static void pch_phub_read_serial_rom(struct pch_phub_reg *chip, + unsigned int offset_address, u8 *data) +{ + void __iomem *mem_addr = chip->pch_phub_extrom_base_address + + offset_address; + + *data = ioread8(mem_addr); +} + +/** + * pch_phub_write_serial_rom() - Writing Serial ROM + * @offset_address: Serial ROM offset address. + * @data: Serial ROM value to write. + */ +static int pch_phub_write_serial_rom(struct pch_phub_reg *chip, + unsigned int offset_address, u8 data) +{ + void __iomem *mem_addr = chip->pch_phub_extrom_base_address + + (offset_address & PCH_WORD_ADDR_MASK); + int i; + unsigned int word_data; + unsigned int pos; + unsigned int mask; + pos = (offset_address % 4) * 8; + mask = ~(0xFF << pos); + + iowrite32(PCH_PHUB_ROM_WRITE_ENABLE, + chip->pch_phub_extrom_base_address + PHUB_CONTROL); + + word_data = ioread32(mem_addr); + iowrite32((word_data & mask) | (u32)data << pos, mem_addr); + + i = 0; + while (ioread8(chip->pch_phub_extrom_base_address + + PHUB_STATUS) != 0x00) { + msleep(1); + if (i == PHUB_TIMEOUT) + return -ETIMEDOUT; + i++; + } + + iowrite32(PCH_PHUB_ROM_WRITE_DISABLE, + chip->pch_phub_extrom_base_address + PHUB_CONTROL); + + return 0; +} + +/** + * pch_phub_read_serial_rom_val() - Read Serial ROM value + * @offset_address: Serial ROM address offset value. + * @data: Serial ROM value to read. + */ +static void pch_phub_read_serial_rom_val(struct pch_phub_reg *chip, + unsigned int offset_address, u8 *data) +{ + unsigned int mem_addr; + + mem_addr = chip->pch_mac_start_address + + pch_phub_mac_offset[offset_address]; + + pch_phub_read_serial_rom(chip, mem_addr, data); +} + +/** + * pch_phub_write_serial_rom_val() - writing Serial ROM value + * @offset_address: Serial ROM address offset value. + * @data: Serial ROM value. + */ +static int pch_phub_write_serial_rom_val(struct pch_phub_reg *chip, + unsigned int offset_address, u8 data) +{ + int retval; + unsigned int mem_addr; + + mem_addr = chip->pch_mac_start_address + + pch_phub_mac_offset[offset_address]; + + retval = pch_phub_write_serial_rom(chip, mem_addr, data); + + return retval; +} + +/* pch_phub_gbe_serial_rom_conf - makes Serial ROM header format configuration + * for Gigabit Ethernet MAC address + */ +static int pch_phub_gbe_serial_rom_conf(struct pch_phub_reg *chip) +{ + int retval; + + retval = pch_phub_write_serial_rom(chip, 0x0b, 0xbc); + retval |= pch_phub_write_serial_rom(chip, 0x0a, 0x10); + retval |= pch_phub_write_serial_rom(chip, 0x09, 0x01); + retval |= pch_phub_write_serial_rom(chip, 0x08, 0x02); + + retval |= pch_phub_write_serial_rom(chip, 0x0f, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x0e, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x0d, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x0c, 0x80); + + retval |= pch_phub_write_serial_rom(chip, 0x13, 0xbc); + retval |= pch_phub_write_serial_rom(chip, 0x12, 0x10); + retval |= pch_phub_write_serial_rom(chip, 0x11, 0x01); + retval |= pch_phub_write_serial_rom(chip, 0x10, 0x18); + + retval |= pch_phub_write_serial_rom(chip, 0x1b, 0xbc); + retval |= pch_phub_write_serial_rom(chip, 0x1a, 0x10); + retval |= pch_phub_write_serial_rom(chip, 0x19, 0x01); + retval |= pch_phub_write_serial_rom(chip, 0x18, 0x19); + + retval |= pch_phub_write_serial_rom(chip, 0x23, 0xbc); + retval |= pch_phub_write_serial_rom(chip, 0x22, 0x10); + retval |= pch_phub_write_serial_rom(chip, 0x21, 0x01); + retval |= pch_phub_write_serial_rom(chip, 0x20, 0x3a); + + retval |= pch_phub_write_serial_rom(chip, 0x27, 0x01); + retval |= pch_phub_write_serial_rom(chip, 0x26, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x25, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x24, 0x00); + + return retval; +} + +/* pch_phub_gbe_serial_rom_conf_mp - makes SerialROM header format configuration + * for Gigabit Ethernet MAC address + */ +static int pch_phub_gbe_serial_rom_conf_mp(struct pch_phub_reg *chip) +{ + int retval; + u32 offset_addr; + + offset_addr = 0x200; + retval = pch_phub_write_serial_rom(chip, 0x03 + offset_addr, 0xbc); + retval |= pch_phub_write_serial_rom(chip, 0x02 + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x01 + offset_addr, 0x40); + retval |= pch_phub_write_serial_rom(chip, 0x00 + offset_addr, 0x02); + + retval |= pch_phub_write_serial_rom(chip, 0x07 + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x06 + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x05 + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x04 + offset_addr, 0x80); + + retval |= pch_phub_write_serial_rom(chip, 0x0b + offset_addr, 0xbc); + retval |= pch_phub_write_serial_rom(chip, 0x0a + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x09 + offset_addr, 0x40); + retval |= pch_phub_write_serial_rom(chip, 0x08 + offset_addr, 0x18); + + retval |= pch_phub_write_serial_rom(chip, 0x13 + offset_addr, 0xbc); + retval |= pch_phub_write_serial_rom(chip, 0x12 + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x11 + offset_addr, 0x40); + retval |= pch_phub_write_serial_rom(chip, 0x10 + offset_addr, 0x19); + + retval |= pch_phub_write_serial_rom(chip, 0x1b + offset_addr, 0xbc); + retval |= pch_phub_write_serial_rom(chip, 0x1a + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x19 + offset_addr, 0x40); + retval |= pch_phub_write_serial_rom(chip, 0x18 + offset_addr, 0x3a); + + retval |= pch_phub_write_serial_rom(chip, 0x1f + offset_addr, 0x01); + retval |= pch_phub_write_serial_rom(chip, 0x1e + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x1d + offset_addr, 0x00); + retval |= pch_phub_write_serial_rom(chip, 0x1c + offset_addr, 0x00); + + return retval; +} + +/** + * pch_phub_read_gbe_mac_addr() - Read Gigabit Ethernet MAC address + * @offset_address: Gigabit Ethernet MAC address offset value. + * @data: Buffer of the Gigabit Ethernet MAC address value. + */ +static void pch_phub_read_gbe_mac_addr(struct pch_phub_reg *chip, u8 *data) +{ + int i; + for (i = 0; i < ETH_ALEN; i++) + pch_phub_read_serial_rom_val(chip, i, &data[i]); +} + +/** + * pch_phub_write_gbe_mac_addr() - Write MAC address + * @offset_address: Gigabit Ethernet MAC address offset value. + * @data: Gigabit Ethernet MAC address value. + */ +static int pch_phub_write_gbe_mac_addr(struct pch_phub_reg *chip, u8 *data) +{ + int retval; + int i; + + if ((chip->ioh_type == 1) || (chip->ioh_type == 5)) /* EG20T or ML7831*/ + retval = pch_phub_gbe_serial_rom_conf(chip); + else /* ML7223 */ + retval = pch_phub_gbe_serial_rom_conf_mp(chip); + if (retval) + return retval; + + for (i = 0; i < ETH_ALEN; i++) { + retval = pch_phub_write_serial_rom_val(chip, i, data[i]); + if (retval) + return retval; + } + + return retval; +} + +static ssize_t pch_phub_bin_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + unsigned int rom_signature; + unsigned char rom_length; + unsigned int tmp; + unsigned int addr_offset; + unsigned int orom_size; + int ret; + int err; + ssize_t rom_size; + + struct pch_phub_reg *chip = dev_get_drvdata(kobj_to_dev(kobj)); + + ret = mutex_lock_interruptible(&pch_phub_mutex); + if (ret) { + err = -ERESTARTSYS; + goto return_err_nomutex; + } + + /* Get Rom signature */ + chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size); + if (!chip->pch_phub_extrom_base_address) { + err = -ENODATA; + goto exrom_map_err; + } + + pch_phub_read_serial_rom(chip, chip->pch_opt_rom_start_address, + (unsigned char *)&rom_signature); + rom_signature &= 0xff; + pch_phub_read_serial_rom(chip, chip->pch_opt_rom_start_address + 1, + (unsigned char *)&tmp); + rom_signature |= (tmp & 0xff) << 8; + if (rom_signature == 0xAA55) { + pch_phub_read_serial_rom(chip, + chip->pch_opt_rom_start_address + 2, + &rom_length); + orom_size = rom_length * 512; + if (orom_size < off) { + addr_offset = 0; + goto return_ok; + } + if (orom_size < count) { + addr_offset = 0; + goto return_ok; + } + + for (addr_offset = 0; addr_offset < count; addr_offset++) { + pch_phub_read_serial_rom(chip, + chip->pch_opt_rom_start_address + addr_offset + off, + &buf[addr_offset]); + } + } else { + err = -ENODATA; + goto return_err; + } +return_ok: + pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address); + mutex_unlock(&pch_phub_mutex); + return addr_offset; + +return_err: + pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address); +exrom_map_err: + mutex_unlock(&pch_phub_mutex); +return_err_nomutex: + return err; +} + +static ssize_t pch_phub_bin_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + int err; + unsigned int addr_offset; + int ret; + ssize_t rom_size; + struct pch_phub_reg *chip = dev_get_drvdata(kobj_to_dev(kobj)); + + ret = mutex_lock_interruptible(&pch_phub_mutex); + if (ret) + return -ERESTARTSYS; + + if (off > PCH_PHUB_OROM_SIZE) { + addr_offset = 0; + goto return_ok; + } + if (count > PCH_PHUB_OROM_SIZE) { + addr_offset = 0; + goto return_ok; + } + + chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size); + if (!chip->pch_phub_extrom_base_address) { + err = -ENOMEM; + goto exrom_map_err; + } + + for (addr_offset = 0; addr_offset < count; addr_offset++) { + if (PCH_PHUB_OROM_SIZE < off + addr_offset) + goto return_ok; + + ret = pch_phub_write_serial_rom(chip, + chip->pch_opt_rom_start_address + addr_offset + off, + buf[addr_offset]); + if (ret) { + err = ret; + goto return_err; + } + } + +return_ok: + pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address); + mutex_unlock(&pch_phub_mutex); + return addr_offset; + +return_err: + pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address); + +exrom_map_err: + mutex_unlock(&pch_phub_mutex); + return err; +} + +static ssize_t show_pch_mac(struct device *dev, struct device_attribute *attr, + char *buf) +{ + u8 mac[8]; + struct pch_phub_reg *chip = dev_get_drvdata(dev); + ssize_t rom_size; + + chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size); + if (!chip->pch_phub_extrom_base_address) + return -ENOMEM; + + pch_phub_read_gbe_mac_addr(chip, mac); + pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address); + + return sprintf(buf, "%pM\n", mac); +} + +static ssize_t store_pch_mac(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + u8 mac[ETH_ALEN]; + ssize_t rom_size; + struct pch_phub_reg *chip = dev_get_drvdata(dev); + int ret; + + if (!mac_pton(buf, mac)) + return -EINVAL; + + chip->pch_phub_extrom_base_address = pci_map_rom(chip->pdev, &rom_size); + if (!chip->pch_phub_extrom_base_address) + return -ENOMEM; + + ret = pch_phub_write_gbe_mac_addr(chip, mac); + pci_unmap_rom(chip->pdev, chip->pch_phub_extrom_base_address); + if (ret) + return ret; + + return count; +} + +static DEVICE_ATTR(pch_mac, S_IRUGO | S_IWUSR, show_pch_mac, store_pch_mac); + +static const struct bin_attribute pch_bin_attr = { + .attr = { + .name = "pch_firmware", + .mode = S_IRUGO | S_IWUSR, + }, + .size = PCH_PHUB_OROM_SIZE + 1, + .read = pch_phub_bin_read, + .write = pch_phub_bin_write, +}; + +static int pch_phub_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int ret; + struct pch_phub_reg *chip; + + chip = kzalloc(sizeof(struct pch_phub_reg), GFP_KERNEL); + if (chip == NULL) + return -ENOMEM; + + ret = pci_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, + "%s : pci_enable_device FAILED(ret=%d)", __func__, ret); + goto err_pci_enable_dev; + } + dev_dbg(&pdev->dev, "%s : pci_enable_device returns %d\n", __func__, + ret); + + ret = pci_request_regions(pdev, KBUILD_MODNAME); + if (ret) { + dev_err(&pdev->dev, + "%s : pci_request_regions FAILED(ret=%d)", __func__, ret); + goto err_req_regions; + } + dev_dbg(&pdev->dev, "%s : " + "pci_request_regions returns %d\n", __func__, ret); + + chip->pch_phub_base_address = pci_iomap(pdev, 1, 0); + + + if (chip->pch_phub_base_address == NULL) { + dev_err(&pdev->dev, "%s : pci_iomap FAILED", __func__); + ret = -ENOMEM; + goto err_pci_iomap; + } + dev_dbg(&pdev->dev, "%s : pci_iomap SUCCESS and value " + "in pch_phub_base_address variable is %p\n", __func__, + chip->pch_phub_base_address); + + chip->pdev = pdev; /* Save pci device struct */ + + if (id->driver_data == 1) { /* EG20T PCH */ + const char *board_name; + unsigned int prefetch = 0x000affaa; + + if (pdev->dev.of_node) + of_property_read_u32(pdev->dev.of_node, + "intel,eg20t-prefetch", + &prefetch); + + ret = sysfs_create_file(&pdev->dev.kobj, + &dev_attr_pch_mac.attr); + if (ret) + goto err_sysfs_create; + + ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr); + if (ret) + goto exit_bin_attr; + + pch_phub_read_modify_write_reg(chip, + (unsigned int)CLKCFG_REG_OFFSET, + CLKCFG_CAN_50MHZ, + CLKCFG_CANCLK_MASK); + + /* quirk for CM-iTC board */ + board_name = dmi_get_system_info(DMI_BOARD_NAME); + if (board_name && strstr(board_name, "CM-iTC")) + pch_phub_read_modify_write_reg(chip, + (unsigned int)CLKCFG_REG_OFFSET, + CLKCFG_UART_48MHZ | CLKCFG_BAUDDIV | + CLKCFG_PLL2VCO | CLKCFG_UARTCLKSEL, + CLKCFG_UART_MASK); + + /* set the prefech value */ + iowrite32(prefetch, chip->pch_phub_base_address + 0x14); + /* set the interrupt delay value */ + iowrite32(0x25, chip->pch_phub_base_address + 0x44); + chip->pch_opt_rom_start_address = PCH_PHUB_ROM_START_ADDR_EG20T; + chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_EG20T; + + /* quirk for MIPS Boston platform */ + if (pdev->dev.of_node) { + if (of_machine_is_compatible("img,boston")) { + pch_phub_read_modify_write_reg(chip, + (unsigned int)CLKCFG_REG_OFFSET, + CLKCFG_UART_25MHZ, + CLKCFG_UART_MASK); + } + } + } else if (id->driver_data == 2) { /* ML7213 IOH */ + ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr); + if (ret) + goto err_sysfs_create; + /* set the prefech value + * Device2(USB OHCI #1/ USB EHCI #1/ USB Device):a + * Device4(SDIO #0,1,2):f + * Device6(SATA 2):f + * Device8(USB OHCI #0/ USB EHCI #0):a + */ + iowrite32(0x000affa0, chip->pch_phub_base_address + 0x14); + chip->pch_opt_rom_start_address =\ + PCH_PHUB_ROM_START_ADDR_ML7213; + } else if (id->driver_data == 3) { /* ML7223 IOH Bus-m*/ + /* set the prefech value + * Device8(GbE) + */ + iowrite32(0x000a0000, chip->pch_phub_base_address + 0x14); + /* set the interrupt delay value */ + iowrite32(0x25, chip->pch_phub_base_address + 0x140); + chip->pch_opt_rom_start_address =\ + PCH_PHUB_ROM_START_ADDR_ML7223; + chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_ML7223; + } else if (id->driver_data == 4) { /* ML7223 IOH Bus-n*/ + ret = sysfs_create_file(&pdev->dev.kobj, + &dev_attr_pch_mac.attr); + if (ret) + goto err_sysfs_create; + ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr); + if (ret) + goto exit_bin_attr; + /* set the prefech value + * Device2(USB OHCI #0,1,2,3/ USB EHCI #0):a + * Device4(SDIO #0,1):f + * Device6(SATA 2):f + */ + iowrite32(0x0000ffa0, chip->pch_phub_base_address + 0x14); + chip->pch_opt_rom_start_address =\ + PCH_PHUB_ROM_START_ADDR_ML7223; + chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_ML7223; + } else if (id->driver_data == 5) { /* ML7831 */ + ret = sysfs_create_file(&pdev->dev.kobj, + &dev_attr_pch_mac.attr); + if (ret) + goto err_sysfs_create; + + ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr); + if (ret) + goto exit_bin_attr; + + /* set the prefech value */ + iowrite32(0x000affaa, chip->pch_phub_base_address + 0x14); + /* set the interrupt delay value */ + iowrite32(0x25, chip->pch_phub_base_address + 0x44); + chip->pch_opt_rom_start_address = PCH_PHUB_ROM_START_ADDR_EG20T; + chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_EG20T; + } + + chip->ioh_type = id->driver_data; + pci_set_drvdata(pdev, chip); + + return 0; +exit_bin_attr: + sysfs_remove_file(&pdev->dev.kobj, &dev_attr_pch_mac.attr); + +err_sysfs_create: + pci_iounmap(pdev, chip->pch_phub_base_address); +err_pci_iomap: + pci_release_regions(pdev); +err_req_regions: + pci_disable_device(pdev); +err_pci_enable_dev: + kfree(chip); + dev_err(&pdev->dev, "%s returns %d\n", __func__, ret); + return ret; +} + +static void pch_phub_remove(struct pci_dev *pdev) +{ + struct pch_phub_reg *chip = pci_get_drvdata(pdev); + + sysfs_remove_file(&pdev->dev.kobj, &dev_attr_pch_mac.attr); + sysfs_remove_bin_file(&pdev->dev.kobj, &pch_bin_attr); + pci_iounmap(pdev, chip->pch_phub_base_address); + pci_release_regions(pdev); + pci_disable_device(pdev); + kfree(chip); +} + +#ifdef CONFIG_PM + +static int pch_phub_suspend(struct pci_dev *pdev, pm_message_t state) +{ + int ret; + + pch_phub_save_reg_conf(pdev); + ret = pci_save_state(pdev); + if (ret) { + dev_err(&pdev->dev, + " %s -pci_save_state returns %d\n", __func__, ret); + return ret; + } + pci_enable_wake(pdev, PCI_D3hot, 0); + pci_disable_device(pdev); + pci_set_power_state(pdev, pci_choose_state(pdev, state)); + + return 0; +} + +static int pch_phub_resume(struct pci_dev *pdev) +{ + int ret; + + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + ret = pci_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, + "%s-pci_enable_device failed(ret=%d) ", __func__, ret); + return ret; + } + + pci_enable_wake(pdev, PCI_D3hot, 0); + pch_phub_restore_reg_conf(pdev); + + return 0; +} +#else +#define pch_phub_suspend NULL +#define pch_phub_resume NULL +#endif /* CONFIG_PM */ + +static const struct pci_device_id pch_phub_pcidev_id[] = { + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PCH1_PHUB), 1, }, + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7213_PHUB), 2, }, + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7223_mPHUB), 3, }, + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7223_nPHUB), 4, }, + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ROHM_ML7831_PHUB), 5, }, + { } +}; +MODULE_DEVICE_TABLE(pci, pch_phub_pcidev_id); + +static struct pci_driver pch_phub_driver = { + .name = "pch_phub", + .id_table = pch_phub_pcidev_id, + .probe = pch_phub_probe, + .remove = pch_phub_remove, + .suspend = pch_phub_suspend, + .resume = pch_phub_resume +}; + +module_pci_driver(pch_phub_driver); + +MODULE_DESCRIPTION("Intel EG20T PCH/LAPIS Semiconductor IOH(ML7213/ML7223) PHUB"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c new file mode 100644 index 000000000..7d166f57f --- /dev/null +++ b/drivers/misc/pci_endpoint_test.c @@ -0,0 +1,831 @@ +/** + * Host side test driver to test endpoint functionality + * + * Copyright (C) 2017 Texas Instruments + * Author: Kishon Vijay Abraham I + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 of + * the License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#define DRV_MODULE_NAME "pci-endpoint-test" + +#define IRQ_TYPE_UNDEFINED -1 +#define IRQ_TYPE_LEGACY 0 +#define IRQ_TYPE_MSI 1 +#define IRQ_TYPE_MSIX 2 + +#define PCI_ENDPOINT_TEST_MAGIC 0x0 + +#define PCI_ENDPOINT_TEST_COMMAND 0x4 +#define COMMAND_RAISE_LEGACY_IRQ BIT(0) +#define COMMAND_RAISE_MSI_IRQ BIT(1) +#define COMMAND_RAISE_MSIX_IRQ BIT(2) +#define COMMAND_READ BIT(3) +#define COMMAND_WRITE BIT(4) +#define COMMAND_COPY BIT(5) + +#define PCI_ENDPOINT_TEST_STATUS 0x8 +#define STATUS_READ_SUCCESS BIT(0) +#define STATUS_READ_FAIL BIT(1) +#define STATUS_WRITE_SUCCESS BIT(2) +#define STATUS_WRITE_FAIL BIT(3) +#define STATUS_COPY_SUCCESS BIT(4) +#define STATUS_COPY_FAIL BIT(5) +#define STATUS_IRQ_RAISED BIT(6) +#define STATUS_SRC_ADDR_INVALID BIT(7) +#define STATUS_DST_ADDR_INVALID BIT(8) + +#define PCI_ENDPOINT_TEST_LOWER_SRC_ADDR 0x0c +#define PCI_ENDPOINT_TEST_UPPER_SRC_ADDR 0x10 + +#define PCI_ENDPOINT_TEST_LOWER_DST_ADDR 0x14 +#define PCI_ENDPOINT_TEST_UPPER_DST_ADDR 0x18 + +#define PCI_ENDPOINT_TEST_SIZE 0x1c +#define PCI_ENDPOINT_TEST_CHECKSUM 0x20 + +#define PCI_ENDPOINT_TEST_IRQ_TYPE 0x24 +#define PCI_ENDPOINT_TEST_IRQ_NUMBER 0x28 + +#define PCI_DEVICE_ID_TI_AM654 0xb00c + +#define is_am654_pci_dev(pdev) \ + ((pdev)->device == PCI_DEVICE_ID_TI_AM654) + +static DEFINE_IDA(pci_endpoint_test_ida); + +#define to_endpoint_test(priv) container_of((priv), struct pci_endpoint_test, \ + miscdev) + +static bool no_msi; +module_param(no_msi, bool, 0444); +MODULE_PARM_DESC(no_msi, "Disable MSI interrupt in pci_endpoint_test"); + +static int irq_type = IRQ_TYPE_MSI; +module_param(irq_type, int, 0444); +MODULE_PARM_DESC(irq_type, "IRQ mode selection in pci_endpoint_test (0 - Legacy, 1 - MSI, 2 - MSI-X)"); + +enum pci_barno { + BAR_0, + BAR_1, + BAR_2, + BAR_3, + BAR_4, + BAR_5, +}; + +struct pci_endpoint_test { + struct pci_dev *pdev; + void __iomem *base; + void __iomem *bar[6]; + struct completion irq_raised; + int last_irq; + int num_irqs; + int irq_type; + /* mutex to protect the ioctls */ + struct mutex mutex; + struct miscdevice miscdev; + enum pci_barno test_reg_bar; + size_t alignment; +}; + +struct pci_endpoint_test_data { + enum pci_barno test_reg_bar; + size_t alignment; + int irq_type; +}; + +static inline u32 pci_endpoint_test_readl(struct pci_endpoint_test *test, + u32 offset) +{ + return readl(test->base + offset); +} + +static inline void pci_endpoint_test_writel(struct pci_endpoint_test *test, + u32 offset, u32 value) +{ + writel(value, test->base + offset); +} + +static inline u32 pci_endpoint_test_bar_readl(struct pci_endpoint_test *test, + int bar, int offset) +{ + return readl(test->bar[bar] + offset); +} + +static inline void pci_endpoint_test_bar_writel(struct pci_endpoint_test *test, + int bar, u32 offset, u32 value) +{ + writel(value, test->bar[bar] + offset); +} + +static irqreturn_t pci_endpoint_test_irqhandler(int irq, void *dev_id) +{ + struct pci_endpoint_test *test = dev_id; + u32 reg; + + reg = pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_STATUS); + if (reg & STATUS_IRQ_RAISED) { + test->last_irq = irq; + complete(&test->irq_raised); + reg &= ~STATUS_IRQ_RAISED; + } + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_STATUS, + reg); + + return IRQ_HANDLED; +} + +static void pci_endpoint_test_free_irq_vectors(struct pci_endpoint_test *test) +{ + struct pci_dev *pdev = test->pdev; + + pci_free_irq_vectors(pdev); + test->irq_type = IRQ_TYPE_UNDEFINED; +} + +static bool pci_endpoint_test_alloc_irq_vectors(struct pci_endpoint_test *test, + int type) +{ + int irq = -1; + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + bool res = true; + + switch (type) { + case IRQ_TYPE_LEGACY: + irq = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_LEGACY); + if (irq < 0) + dev_err(dev, "Failed to get Legacy interrupt\n"); + break; + case IRQ_TYPE_MSI: + irq = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI); + if (irq < 0) + dev_err(dev, "Failed to get MSI interrupts\n"); + break; + case IRQ_TYPE_MSIX: + irq = pci_alloc_irq_vectors(pdev, 1, 2048, PCI_IRQ_MSIX); + if (irq < 0) + dev_err(dev, "Failed to get MSI-X interrupts\n"); + break; + default: + dev_err(dev, "Invalid IRQ type selected\n"); + } + + if (irq < 0) { + irq = 0; + res = false; + } + + test->irq_type = type; + test->num_irqs = irq; + + return res; +} + +static void pci_endpoint_test_release_irq(struct pci_endpoint_test *test) +{ + int i; + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + + for (i = 0; i < test->num_irqs; i++) + devm_free_irq(dev, pci_irq_vector(pdev, i), test); + + test->num_irqs = 0; +} + +static bool pci_endpoint_test_request_irq(struct pci_endpoint_test *test) +{ + int i; + int err; + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + + for (i = 0; i < test->num_irqs; i++) { + err = devm_request_irq(dev, pci_irq_vector(pdev, i), + pci_endpoint_test_irqhandler, + IRQF_SHARED, DRV_MODULE_NAME, test); + if (err) + goto fail; + } + + return true; + +fail: + switch (irq_type) { + case IRQ_TYPE_LEGACY: + dev_err(dev, "Failed to request IRQ %d for Legacy\n", + pci_irq_vector(pdev, i)); + break; + case IRQ_TYPE_MSI: + dev_err(dev, "Failed to request IRQ %d for MSI %d\n", + pci_irq_vector(pdev, i), + i + 1); + break; + case IRQ_TYPE_MSIX: + dev_err(dev, "Failed to request IRQ %d for MSI-X %d\n", + pci_irq_vector(pdev, i), + i + 1); + break; + } + + return false; +} + +static bool pci_endpoint_test_bar(struct pci_endpoint_test *test, + enum pci_barno barno) +{ + int j; + u32 val; + int size; + struct pci_dev *pdev = test->pdev; + + if (!test->bar[barno]) + return false; + + size = pci_resource_len(pdev, barno); + + if (barno == test->test_reg_bar) + size = 0x4; + + for (j = 0; j < size; j += 4) + pci_endpoint_test_bar_writel(test, barno, j, 0xA0A0A0A0); + + for (j = 0; j < size; j += 4) { + val = pci_endpoint_test_bar_readl(test, barno, j); + if (val != 0xA0A0A0A0) + return false; + } + + return true; +} + +static bool pci_endpoint_test_legacy_irq(struct pci_endpoint_test *test) +{ + u32 val; + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, + IRQ_TYPE_LEGACY); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 0); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, + COMMAND_RAISE_LEGACY_IRQ); + val = wait_for_completion_timeout(&test->irq_raised, + msecs_to_jiffies(1000)); + if (!val) + return false; + + return true; +} + +static bool pci_endpoint_test_msi_irq(struct pci_endpoint_test *test, + u16 msi_num, bool msix) +{ + u32 val; + struct pci_dev *pdev = test->pdev; + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, + msix == false ? IRQ_TYPE_MSI : + IRQ_TYPE_MSIX); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, msi_num); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, + msix == false ? COMMAND_RAISE_MSI_IRQ : + COMMAND_RAISE_MSIX_IRQ); + val = wait_for_completion_timeout(&test->irq_raised, + msecs_to_jiffies(1000)); + if (!val) + return false; + + if (pci_irq_vector(pdev, msi_num - 1) == test->last_irq) + return true; + + return false; +} + +static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size) +{ + bool ret = false; + void *src_addr; + void *dst_addr; + dma_addr_t src_phys_addr; + dma_addr_t dst_phys_addr; + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + void *orig_src_addr; + dma_addr_t orig_src_phys_addr; + void *orig_dst_addr; + dma_addr_t orig_dst_phys_addr; + size_t offset; + size_t alignment = test->alignment; + int irq_type = test->irq_type; + u32 src_crc32; + u32 dst_crc32; + + if (size > SIZE_MAX - alignment) + goto err; + + if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) { + dev_err(dev, "Invalid IRQ type option\n"); + goto err; + } + + orig_src_addr = dma_alloc_coherent(dev, size + alignment, + &orig_src_phys_addr, GFP_KERNEL); + if (!orig_src_addr) { + dev_err(dev, "Failed to allocate source buffer\n"); + ret = false; + goto err; + } + + if (alignment && !IS_ALIGNED(orig_src_phys_addr, alignment)) { + src_phys_addr = PTR_ALIGN(orig_src_phys_addr, alignment); + offset = src_phys_addr - orig_src_phys_addr; + src_addr = orig_src_addr + offset; + } else { + src_phys_addr = orig_src_phys_addr; + src_addr = orig_src_addr; + } + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_SRC_ADDR, + lower_32_bits(src_phys_addr)); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_SRC_ADDR, + upper_32_bits(src_phys_addr)); + + get_random_bytes(src_addr, size); + src_crc32 = crc32_le(~0, src_addr, size); + + orig_dst_addr = dma_alloc_coherent(dev, size + alignment, + &orig_dst_phys_addr, GFP_KERNEL); + if (!orig_dst_addr) { + dev_err(dev, "Failed to allocate destination address\n"); + ret = false; + goto err_orig_src_addr; + } + + if (alignment && !IS_ALIGNED(orig_dst_phys_addr, alignment)) { + dst_phys_addr = PTR_ALIGN(orig_dst_phys_addr, alignment); + offset = dst_phys_addr - orig_dst_phys_addr; + dst_addr = orig_dst_addr + offset; + } else { + dst_phys_addr = orig_dst_phys_addr; + dst_addr = orig_dst_addr; + } + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_DST_ADDR, + lower_32_bits(dst_phys_addr)); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_DST_ADDR, + upper_32_bits(dst_phys_addr)); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, + size); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, + COMMAND_COPY); + + wait_for_completion(&test->irq_raised); + + dst_crc32 = crc32_le(~0, dst_addr, size); + if (dst_crc32 == src_crc32) + ret = true; + + dma_free_coherent(dev, size + alignment, orig_dst_addr, + orig_dst_phys_addr); + +err_orig_src_addr: + dma_free_coherent(dev, size + alignment, orig_src_addr, + orig_src_phys_addr); + +err: + return ret; +} + +static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size) +{ + bool ret = false; + u32 reg; + void *addr; + dma_addr_t phys_addr; + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + void *orig_addr; + dma_addr_t orig_phys_addr; + size_t offset; + size_t alignment = test->alignment; + int irq_type = test->irq_type; + u32 crc32; + + if (size > SIZE_MAX - alignment) + goto err; + + if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) { + dev_err(dev, "Invalid IRQ type option\n"); + goto err; + } + + orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr, + GFP_KERNEL); + if (!orig_addr) { + dev_err(dev, "Failed to allocate address\n"); + ret = false; + goto err; + } + + if (alignment && !IS_ALIGNED(orig_phys_addr, alignment)) { + phys_addr = PTR_ALIGN(orig_phys_addr, alignment); + offset = phys_addr - orig_phys_addr; + addr = orig_addr + offset; + } else { + phys_addr = orig_phys_addr; + addr = orig_addr; + } + + get_random_bytes(addr, size); + + crc32 = crc32_le(~0, addr, size); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_CHECKSUM, + crc32); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_SRC_ADDR, + lower_32_bits(phys_addr)); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_SRC_ADDR, + upper_32_bits(phys_addr)); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, size); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, + COMMAND_READ); + + wait_for_completion(&test->irq_raised); + + reg = pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_STATUS); + if (reg & STATUS_READ_SUCCESS) + ret = true; + + dma_free_coherent(dev, size + alignment, orig_addr, orig_phys_addr); + +err: + return ret; +} + +static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size) +{ + bool ret = false; + void *addr; + dma_addr_t phys_addr; + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + void *orig_addr; + dma_addr_t orig_phys_addr; + size_t offset; + size_t alignment = test->alignment; + int irq_type = test->irq_type; + u32 crc32; + + if (size > SIZE_MAX - alignment) + goto err; + + if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) { + dev_err(dev, "Invalid IRQ type option\n"); + goto err; + } + + orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr, + GFP_KERNEL); + if (!orig_addr) { + dev_err(dev, "Failed to allocate destination address\n"); + ret = false; + goto err; + } + + if (alignment && !IS_ALIGNED(orig_phys_addr, alignment)) { + phys_addr = PTR_ALIGN(orig_phys_addr, alignment); + offset = phys_addr - orig_phys_addr; + addr = orig_addr + offset; + } else { + phys_addr = orig_phys_addr; + addr = orig_addr; + } + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_LOWER_DST_ADDR, + lower_32_bits(phys_addr)); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_UPPER_DST_ADDR, + upper_32_bits(phys_addr)); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_SIZE, size); + + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE, irq_type); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, 1); + pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND, + COMMAND_WRITE); + + wait_for_completion(&test->irq_raised); + + crc32 = crc32_le(~0, addr, size); + if (crc32 == pci_endpoint_test_readl(test, PCI_ENDPOINT_TEST_CHECKSUM)) + ret = true; + + dma_free_coherent(dev, size + alignment, orig_addr, orig_phys_addr); +err: + return ret; +} + +static bool pci_endpoint_test_set_irq(struct pci_endpoint_test *test, + int req_irq_type) +{ + struct pci_dev *pdev = test->pdev; + struct device *dev = &pdev->dev; + + if (req_irq_type < IRQ_TYPE_LEGACY || req_irq_type > IRQ_TYPE_MSIX) { + dev_err(dev, "Invalid IRQ type option\n"); + return false; + } + + if (test->irq_type == req_irq_type) + return true; + + pci_endpoint_test_release_irq(test); + pci_endpoint_test_free_irq_vectors(test); + + if (!pci_endpoint_test_alloc_irq_vectors(test, req_irq_type)) + goto err; + + if (!pci_endpoint_test_request_irq(test)) + goto err; + + return true; + +err: + pci_endpoint_test_free_irq_vectors(test); + return false; +} + +static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int ret = -EINVAL; + enum pci_barno bar; + struct pci_endpoint_test *test = to_endpoint_test(file->private_data); + struct pci_dev *pdev = test->pdev; + + mutex_lock(&test->mutex); + switch (cmd) { + case PCITEST_BAR: + bar = arg; + if (bar < 0 || bar > 5) + goto ret; + if (is_am654_pci_dev(pdev) && bar == BAR_0) + goto ret; + ret = pci_endpoint_test_bar(test, bar); + break; + case PCITEST_LEGACY_IRQ: + ret = pci_endpoint_test_legacy_irq(test); + break; + case PCITEST_MSI: + case PCITEST_MSIX: + ret = pci_endpoint_test_msi_irq(test, arg, cmd == PCITEST_MSIX); + break; + case PCITEST_WRITE: + ret = pci_endpoint_test_write(test, arg); + break; + case PCITEST_READ: + ret = pci_endpoint_test_read(test, arg); + break; + case PCITEST_COPY: + ret = pci_endpoint_test_copy(test, arg); + break; + case PCITEST_SET_IRQTYPE: + ret = pci_endpoint_test_set_irq(test, arg); + break; + case PCITEST_GET_IRQTYPE: + ret = irq_type; + break; + } + +ret: + mutex_unlock(&test->mutex); + return ret; +} + +static const struct file_operations pci_endpoint_test_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = pci_endpoint_test_ioctl, +}; + +static int pci_endpoint_test_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int err; + int id; + char name[24]; + enum pci_barno bar; + void __iomem *base; + struct device *dev = &pdev->dev; + struct pci_endpoint_test *test; + struct pci_endpoint_test_data *data; + enum pci_barno test_reg_bar = BAR_0; + struct miscdevice *misc_device; + + if (pci_is_bridge(pdev)) + return -ENODEV; + + test = devm_kzalloc(dev, sizeof(*test), GFP_KERNEL); + if (!test) + return -ENOMEM; + + test->test_reg_bar = 0; + test->alignment = 0; + test->pdev = pdev; + test->irq_type = IRQ_TYPE_UNDEFINED; + + if (no_msi) + irq_type = IRQ_TYPE_LEGACY; + + data = (struct pci_endpoint_test_data *)ent->driver_data; + if (data) { + test_reg_bar = data->test_reg_bar; + test->test_reg_bar = test_reg_bar; + test->alignment = data->alignment; + irq_type = data->irq_type; + } + + init_completion(&test->irq_raised); + mutex_init(&test->mutex); + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Cannot enable PCI device\n"); + return err; + } + + err = pci_request_regions(pdev, DRV_MODULE_NAME); + if (err) { + dev_err(dev, "Cannot obtain PCI resources\n"); + goto err_disable_pdev; + } + + pci_set_master(pdev); + + if (!pci_endpoint_test_alloc_irq_vectors(test, irq_type)) + goto err_disable_irq; + + if (!pci_endpoint_test_request_irq(test)) + goto err_disable_irq; + + for (bar = BAR_0; bar <= BAR_5; bar++) { + if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) { + base = pci_ioremap_bar(pdev, bar); + if (!base) { + dev_err(dev, "Failed to read BAR%d\n", bar); + WARN_ON(bar == test_reg_bar); + } + test->bar[bar] = base; + } + } + + test->base = test->bar[test_reg_bar]; + if (!test->base) { + err = -ENOMEM; + dev_err(dev, "Cannot perform PCI test without BAR%d\n", + test_reg_bar); + goto err_iounmap; + } + + pci_set_drvdata(pdev, test); + + id = ida_simple_get(&pci_endpoint_test_ida, 0, 0, GFP_KERNEL); + if (id < 0) { + err = id; + dev_err(dev, "Unable to get id\n"); + goto err_iounmap; + } + + snprintf(name, sizeof(name), DRV_MODULE_NAME ".%d", id); + misc_device = &test->miscdev; + misc_device->minor = MISC_DYNAMIC_MINOR; + misc_device->name = kstrdup(name, GFP_KERNEL); + if (!misc_device->name) { + err = -ENOMEM; + goto err_ida_remove; + } + misc_device->fops = &pci_endpoint_test_fops, + + err = misc_register(misc_device); + if (err) { + dev_err(dev, "Failed to register device\n"); + goto err_kfree_name; + } + + return 0; + +err_kfree_name: + kfree(misc_device->name); + +err_ida_remove: + ida_simple_remove(&pci_endpoint_test_ida, id); + +err_iounmap: + for (bar = BAR_0; bar <= BAR_5; bar++) { + if (test->bar[bar]) + pci_iounmap(pdev, test->bar[bar]); + } + pci_endpoint_test_release_irq(test); + +err_disable_irq: + pci_endpoint_test_free_irq_vectors(test); + pci_release_regions(pdev); + +err_disable_pdev: + pci_disable_device(pdev); + + return err; +} + +static void pci_endpoint_test_remove(struct pci_dev *pdev) +{ + int id; + enum pci_barno bar; + struct pci_endpoint_test *test = pci_get_drvdata(pdev); + struct miscdevice *misc_device = &test->miscdev; + + if (sscanf(misc_device->name, DRV_MODULE_NAME ".%d", &id) != 1) + return; + if (id < 0) + return; + + misc_deregister(&test->miscdev); + kfree(misc_device->name); + ida_simple_remove(&pci_endpoint_test_ida, id); + for (bar = BAR_0; bar <= BAR_5; bar++) { + if (test->bar[bar]) + pci_iounmap(pdev, test->bar[bar]); + } + + pci_endpoint_test_release_irq(test); + pci_endpoint_test_free_irq_vectors(test); + + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +static const struct pci_endpoint_test_data am654_data = { + .test_reg_bar = BAR_2, + .alignment = SZ_64K, + .irq_type = IRQ_TYPE_MSI, +}; + +static const struct pci_device_id pci_endpoint_test_tbl[] = { + { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA74x) }, + { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_DRA72x) }, + { PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, 0x81c0) }, + { PCI_DEVICE_DATA(SYNOPSYS, EDDA, NULL) }, + { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_AM654), + .driver_data = (kernel_ulong_t)&am654_data + }, + { } +}; +MODULE_DEVICE_TABLE(pci, pci_endpoint_test_tbl); + +static struct pci_driver pci_endpoint_test_driver = { + .name = DRV_MODULE_NAME, + .id_table = pci_endpoint_test_tbl, + .probe = pci_endpoint_test_probe, + .remove = pci_endpoint_test_remove, +}; +module_pci_driver(pci_endpoint_test_driver); + +MODULE_DESCRIPTION("PCI ENDPOINT TEST HOST DRIVER"); +MODULE_AUTHOR("Kishon Vijay Abraham I "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c new file mode 100644 index 000000000..b084245f6 --- /dev/null +++ b/drivers/misc/phantom.c @@ -0,0 +1,571 @@ +/* + * Copyright (C) 2005-2007 Jiri Slaby + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * You need a userspace library to cooperate with this driver. It (and other + * info) may be obtained here: + * http://www.fi.muni.cz/~xslaby/phantom.html + * or alternatively, you might use OpenHaptics provided by Sensable. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define PHANTOM_VERSION "n0.9.8" + +#define PHANTOM_MAX_MINORS 8 + +#define PHN_IRQCTL 0x4c /* irq control in caddr space */ + +#define PHB_RUNNING 1 +#define PHB_NOT_OH 2 + +static DEFINE_MUTEX(phantom_mutex); +static struct class *phantom_class; +static int phantom_major; + +struct phantom_device { + unsigned int opened; + void __iomem *caddr; + u32 __iomem *iaddr; + u32 __iomem *oaddr; + unsigned long status; + atomic_t counter; + + wait_queue_head_t wait; + struct cdev cdev; + + struct mutex open_lock; + spinlock_t regs_lock; + + /* used in NOT_OH mode */ + struct phm_regs oregs; + u32 ctl_reg; +}; + +static unsigned char phantom_devices[PHANTOM_MAX_MINORS]; + +static int phantom_status(struct phantom_device *dev, unsigned long newstat) +{ + pr_debug("phantom_status %lx %lx\n", dev->status, newstat); + + if (!(dev->status & PHB_RUNNING) && (newstat & PHB_RUNNING)) { + atomic_set(&dev->counter, 0); + iowrite32(PHN_CTL_IRQ, dev->iaddr + PHN_CONTROL); + iowrite32(0x43, dev->caddr + PHN_IRQCTL); + ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */ + } else if ((dev->status & PHB_RUNNING) && !(newstat & PHB_RUNNING)) { + iowrite32(0, dev->caddr + PHN_IRQCTL); + ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */ + } + + dev->status = newstat; + + return 0; +} + +/* + * File ops + */ + +static long phantom_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct phantom_device *dev = file->private_data; + struct phm_regs rs; + struct phm_reg r; + void __user *argp = (void __user *)arg; + unsigned long flags; + unsigned int i; + + switch (cmd) { + case PHN_SETREG: + case PHN_SET_REG: + if (copy_from_user(&r, argp, sizeof(r))) + return -EFAULT; + + if (r.reg > 7) + return -EINVAL; + + spin_lock_irqsave(&dev->regs_lock, flags); + if (r.reg == PHN_CONTROL && (r.value & PHN_CTL_IRQ) && + phantom_status(dev, dev->status | PHB_RUNNING)){ + spin_unlock_irqrestore(&dev->regs_lock, flags); + return -ENODEV; + } + + pr_debug("phantom: writing %x to %u\n", r.value, r.reg); + + /* preserve amp bit (don't allow to change it when in NOT_OH) */ + if (r.reg == PHN_CONTROL && (dev->status & PHB_NOT_OH)) { + r.value &= ~PHN_CTL_AMP; + r.value |= dev->ctl_reg & PHN_CTL_AMP; + dev->ctl_reg = r.value; + } + + iowrite32(r.value, dev->iaddr + r.reg); + ioread32(dev->iaddr); /* PCI posting */ + + if (r.reg == PHN_CONTROL && !(r.value & PHN_CTL_IRQ)) + phantom_status(dev, dev->status & ~PHB_RUNNING); + spin_unlock_irqrestore(&dev->regs_lock, flags); + break; + case PHN_SETREGS: + case PHN_SET_REGS: + if (copy_from_user(&rs, argp, sizeof(rs))) + return -EFAULT; + + pr_debug("phantom: SRS %u regs %x\n", rs.count, rs.mask); + spin_lock_irqsave(&dev->regs_lock, flags); + if (dev->status & PHB_NOT_OH) + memcpy(&dev->oregs, &rs, sizeof(rs)); + else { + u32 m = min(rs.count, 8U); + for (i = 0; i < m; i++) + if (rs.mask & BIT(i)) + iowrite32(rs.values[i], dev->oaddr + i); + ioread32(dev->iaddr); /* PCI posting */ + } + spin_unlock_irqrestore(&dev->regs_lock, flags); + break; + case PHN_GETREG: + case PHN_GET_REG: + if (copy_from_user(&r, argp, sizeof(r))) + return -EFAULT; + + if (r.reg > 7) + return -EINVAL; + + r.value = ioread32(dev->iaddr + r.reg); + + if (copy_to_user(argp, &r, sizeof(r))) + return -EFAULT; + break; + case PHN_GETREGS: + case PHN_GET_REGS: { + u32 m; + + if (copy_from_user(&rs, argp, sizeof(rs))) + return -EFAULT; + + m = min(rs.count, 8U); + + pr_debug("phantom: GRS %u regs %x\n", rs.count, rs.mask); + spin_lock_irqsave(&dev->regs_lock, flags); + for (i = 0; i < m; i++) + if (rs.mask & BIT(i)) + rs.values[i] = ioread32(dev->iaddr + i); + atomic_set(&dev->counter, 0); + spin_unlock_irqrestore(&dev->regs_lock, flags); + + if (copy_to_user(argp, &rs, sizeof(rs))) + return -EFAULT; + break; + } case PHN_NOT_OH: + spin_lock_irqsave(&dev->regs_lock, flags); + if (dev->status & PHB_RUNNING) { + printk(KERN_ERR "phantom: you need to set NOT_OH " + "before you start the device!\n"); + spin_unlock_irqrestore(&dev->regs_lock, flags); + return -EINVAL; + } + dev->status |= PHB_NOT_OH; + spin_unlock_irqrestore(&dev->regs_lock, flags); + break; + default: + return -ENOTTY; + } + + return 0; +} + +#ifdef CONFIG_COMPAT +static long phantom_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + if (_IOC_NR(cmd) <= 3 && _IOC_SIZE(cmd) == sizeof(compat_uptr_t)) { + cmd &= ~(_IOC_SIZEMASK << _IOC_SIZESHIFT); + cmd |= sizeof(void *) << _IOC_SIZESHIFT; + } + return phantom_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); +} +#else +#define phantom_compat_ioctl NULL +#endif + +static int phantom_open(struct inode *inode, struct file *file) +{ + struct phantom_device *dev = container_of(inode->i_cdev, + struct phantom_device, cdev); + + mutex_lock(&phantom_mutex); + nonseekable_open(inode, file); + + if (mutex_lock_interruptible(&dev->open_lock)) { + mutex_unlock(&phantom_mutex); + return -ERESTARTSYS; + } + + if (dev->opened) { + mutex_unlock(&dev->open_lock); + mutex_unlock(&phantom_mutex); + return -EINVAL; + } + + WARN_ON(dev->status & PHB_NOT_OH); + + file->private_data = dev; + + atomic_set(&dev->counter, 0); + dev->opened++; + mutex_unlock(&dev->open_lock); + mutex_unlock(&phantom_mutex); + return 0; +} + +static int phantom_release(struct inode *inode, struct file *file) +{ + struct phantom_device *dev = file->private_data; + + mutex_lock(&dev->open_lock); + + dev->opened = 0; + phantom_status(dev, dev->status & ~PHB_RUNNING); + dev->status &= ~PHB_NOT_OH; + + mutex_unlock(&dev->open_lock); + + return 0; +} + +static __poll_t phantom_poll(struct file *file, poll_table *wait) +{ + struct phantom_device *dev = file->private_data; + __poll_t mask = 0; + + pr_debug("phantom_poll: %d\n", atomic_read(&dev->counter)); + poll_wait(file, &dev->wait, wait); + + if (!(dev->status & PHB_RUNNING)) + mask = EPOLLERR; + else if (atomic_read(&dev->counter)) + mask = EPOLLIN | EPOLLRDNORM; + + pr_debug("phantom_poll end: %x/%d\n", mask, atomic_read(&dev->counter)); + + return mask; +} + +static const struct file_operations phantom_file_ops = { + .open = phantom_open, + .release = phantom_release, + .unlocked_ioctl = phantom_ioctl, + .compat_ioctl = phantom_compat_ioctl, + .poll = phantom_poll, + .llseek = no_llseek, +}; + +static irqreturn_t phantom_isr(int irq, void *data) +{ + struct phantom_device *dev = data; + unsigned int i; + u32 ctl; + + spin_lock(&dev->regs_lock); + ctl = ioread32(dev->iaddr + PHN_CONTROL); + if (!(ctl & PHN_CTL_IRQ)) { + spin_unlock(&dev->regs_lock); + return IRQ_NONE; + } + + iowrite32(0, dev->iaddr); + iowrite32(0xc0, dev->iaddr); + + if (dev->status & PHB_NOT_OH) { + struct phm_regs *r = &dev->oregs; + u32 m = min(r->count, 8U); + + for (i = 0; i < m; i++) + if (r->mask & BIT(i)) + iowrite32(r->values[i], dev->oaddr + i); + + dev->ctl_reg ^= PHN_CTL_AMP; + iowrite32(dev->ctl_reg, dev->iaddr + PHN_CONTROL); + } + spin_unlock(&dev->regs_lock); + + ioread32(dev->iaddr); /* PCI posting */ + + atomic_inc(&dev->counter); + wake_up_interruptible(&dev->wait); + + return IRQ_HANDLED; +} + +/* + * Init and deinit driver + */ + +static unsigned int phantom_get_free(void) +{ + unsigned int i; + + for (i = 0; i < PHANTOM_MAX_MINORS; i++) + if (phantom_devices[i] == 0) + break; + + return i; +} + +static int phantom_probe(struct pci_dev *pdev, + const struct pci_device_id *pci_id) +{ + struct phantom_device *pht; + unsigned int minor; + int retval; + + retval = pci_enable_device(pdev); + if (retval) { + dev_err(&pdev->dev, "pci_enable_device failed!\n"); + goto err; + } + + minor = phantom_get_free(); + if (minor == PHANTOM_MAX_MINORS) { + dev_err(&pdev->dev, "too many devices found!\n"); + retval = -EIO; + goto err_dis; + } + + phantom_devices[minor] = 1; + + retval = pci_request_regions(pdev, "phantom"); + if (retval) { + dev_err(&pdev->dev, "pci_request_regions failed!\n"); + goto err_null; + } + + retval = -ENOMEM; + pht = kzalloc(sizeof(*pht), GFP_KERNEL); + if (pht == NULL) { + dev_err(&pdev->dev, "unable to allocate device\n"); + goto err_reg; + } + + pht->caddr = pci_iomap(pdev, 0, 0); + if (pht->caddr == NULL) { + dev_err(&pdev->dev, "can't remap conf space\n"); + goto err_fr; + } + pht->iaddr = pci_iomap(pdev, 2, 0); + if (pht->iaddr == NULL) { + dev_err(&pdev->dev, "can't remap input space\n"); + goto err_unmc; + } + pht->oaddr = pci_iomap(pdev, 3, 0); + if (pht->oaddr == NULL) { + dev_err(&pdev->dev, "can't remap output space\n"); + goto err_unmi; + } + + mutex_init(&pht->open_lock); + spin_lock_init(&pht->regs_lock); + init_waitqueue_head(&pht->wait); + cdev_init(&pht->cdev, &phantom_file_ops); + pht->cdev.owner = THIS_MODULE; + + iowrite32(0, pht->caddr + PHN_IRQCTL); + ioread32(pht->caddr + PHN_IRQCTL); /* PCI posting */ + retval = request_irq(pdev->irq, phantom_isr, + IRQF_SHARED, "phantom", pht); + if (retval) { + dev_err(&pdev->dev, "can't establish ISR\n"); + goto err_unmo; + } + + retval = cdev_add(&pht->cdev, MKDEV(phantom_major, minor), 1); + if (retval) { + dev_err(&pdev->dev, "chardev registration failed\n"); + goto err_irq; + } + + if (IS_ERR(device_create(phantom_class, &pdev->dev, + MKDEV(phantom_major, minor), NULL, + "phantom%u", minor))) + dev_err(&pdev->dev, "can't create device\n"); + + pci_set_drvdata(pdev, pht); + + return 0; +err_irq: + free_irq(pdev->irq, pht); +err_unmo: + pci_iounmap(pdev, pht->oaddr); +err_unmi: + pci_iounmap(pdev, pht->iaddr); +err_unmc: + pci_iounmap(pdev, pht->caddr); +err_fr: + kfree(pht); +err_reg: + pci_release_regions(pdev); +err_null: + phantom_devices[minor] = 0; +err_dis: + pci_disable_device(pdev); +err: + return retval; +} + +static void phantom_remove(struct pci_dev *pdev) +{ + struct phantom_device *pht = pci_get_drvdata(pdev); + unsigned int minor = MINOR(pht->cdev.dev); + + device_destroy(phantom_class, MKDEV(phantom_major, minor)); + + cdev_del(&pht->cdev); + + iowrite32(0, pht->caddr + PHN_IRQCTL); + ioread32(pht->caddr + PHN_IRQCTL); /* PCI posting */ + free_irq(pdev->irq, pht); + + pci_iounmap(pdev, pht->oaddr); + pci_iounmap(pdev, pht->iaddr); + pci_iounmap(pdev, pht->caddr); + + kfree(pht); + + pci_release_regions(pdev); + + phantom_devices[minor] = 0; + + pci_disable_device(pdev); +} + +#ifdef CONFIG_PM +static int phantom_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct phantom_device *dev = pci_get_drvdata(pdev); + + iowrite32(0, dev->caddr + PHN_IRQCTL); + ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */ + + synchronize_irq(pdev->irq); + + return 0; +} + +static int phantom_resume(struct pci_dev *pdev) +{ + struct phantom_device *dev = pci_get_drvdata(pdev); + + iowrite32(0, dev->caddr + PHN_IRQCTL); + + return 0; +} +#else +#define phantom_suspend NULL +#define phantom_resume NULL +#endif + +static struct pci_device_id phantom_pci_tbl[] = { + { .vendor = PCI_VENDOR_ID_PLX, .device = PCI_DEVICE_ID_PLX_9050, + .subvendor = PCI_VENDOR_ID_PLX, .subdevice = PCI_DEVICE_ID_PLX_9050, + .class = PCI_CLASS_BRIDGE_OTHER << 8, .class_mask = 0xffff00 }, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, phantom_pci_tbl); + +static struct pci_driver phantom_pci_driver = { + .name = "phantom", + .id_table = phantom_pci_tbl, + .probe = phantom_probe, + .remove = phantom_remove, + .suspend = phantom_suspend, + .resume = phantom_resume +}; + +static CLASS_ATTR_STRING(version, 0444, PHANTOM_VERSION); + +static int __init phantom_init(void) +{ + int retval; + dev_t dev; + + phantom_class = class_create(THIS_MODULE, "phantom"); + if (IS_ERR(phantom_class)) { + retval = PTR_ERR(phantom_class); + printk(KERN_ERR "phantom: can't register phantom class\n"); + goto err; + } + retval = class_create_file(phantom_class, &class_attr_version.attr); + if (retval) { + printk(KERN_ERR "phantom: can't create sysfs version file\n"); + goto err_class; + } + + retval = alloc_chrdev_region(&dev, 0, PHANTOM_MAX_MINORS, "phantom"); + if (retval) { + printk(KERN_ERR "phantom: can't register character device\n"); + goto err_attr; + } + phantom_major = MAJOR(dev); + + retval = pci_register_driver(&phantom_pci_driver); + if (retval) { + printk(KERN_ERR "phantom: can't register pci driver\n"); + goto err_unchr; + } + + printk(KERN_INFO "Phantom Linux Driver, version " PHANTOM_VERSION ", " + "init OK\n"); + + return 0; +err_unchr: + unregister_chrdev_region(dev, PHANTOM_MAX_MINORS); +err_attr: + class_remove_file(phantom_class, &class_attr_version.attr); +err_class: + class_destroy(phantom_class); +err: + return retval; +} + +static void __exit phantom_exit(void) +{ + pci_unregister_driver(&phantom_pci_driver); + + unregister_chrdev_region(MKDEV(phantom_major, 0), PHANTOM_MAX_MINORS); + + class_remove_file(phantom_class, &class_attr_version.attr); + class_destroy(phantom_class); + + pr_debug("phantom: module successfully removed\n"); +} + +module_init(phantom_init); +module_exit(phantom_exit); + +MODULE_AUTHOR("Jiri Slaby "); +MODULE_DESCRIPTION("Sensable Phantom driver (PCI devices)"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(PHANTOM_VERSION); diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c new file mode 100644 index 000000000..41f2a9f68 --- /dev/null +++ b/drivers/misc/pti.c @@ -0,0 +1,988 @@ +/* + * pti.c - PTI driver for cJTAG data extration + * + * Copyright (C) Intel 2010 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * The PTI (Parallel Trace Interface) driver directs trace data routed from + * various parts in the system out through the Intel Penwell PTI port and + * out of the mobile device for analysis with a debugging tool + * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7, + * compact JTAG, standard. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRIVERNAME "pti" +#define PCINAME "pciPTI" +#define TTYNAME "ttyPTI" +#define CHARNAME "pti" +#define PTITTY_MINOR_START 0 +#define PTITTY_MINOR_NUM 2 +#define MAX_APP_IDS 16 /* 128 channel ids / u8 bit size */ +#define MAX_OS_IDS 16 /* 128 channel ids / u8 bit size */ +#define MAX_MODEM_IDS 16 /* 128 channel ids / u8 bit size */ +#define MODEM_BASE_ID 71 /* modem master ID address */ +#define CONTROL_ID 72 /* control master ID address */ +#define CONSOLE_ID 73 /* console master ID address */ +#define OS_BASE_ID 74 /* base OS master ID address */ +#define APP_BASE_ID 80 /* base App master ID address */ +#define CONTROL_FRAME_LEN 32 /* PTI control frame maximum size */ +#define USER_COPY_SIZE 8192 /* 8Kb buffer for user space copy */ +#define APERTURE_14 0x3800000 /* offset to first OS write addr */ +#define APERTURE_LEN 0x400000 /* address length */ + +struct pti_tty { + struct pti_masterchannel *mc; +}; + +struct pti_dev { + struct tty_port port[PTITTY_MINOR_NUM]; + unsigned long pti_addr; + unsigned long aperture_base; + void __iomem *pti_ioaddr; + u8 ia_app[MAX_APP_IDS]; + u8 ia_os[MAX_OS_IDS]; + u8 ia_modem[MAX_MODEM_IDS]; +}; + +/* + * This protects access to ia_app, ia_os, and ia_modem, + * which keeps track of channels allocated in + * an aperture write id. + */ +static DEFINE_MUTEX(alloclock); + +static const struct pci_device_id pci_ids[] = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x82B)}, + {0} +}; + +static struct tty_driver *pti_tty_driver; +static struct pti_dev *drv_data; + +static unsigned int pti_console_channel; +static unsigned int pti_control_channel; + +/** + * pti_write_to_aperture()- The private write function to PTI HW. + * + * @mc: The 'aperture'. It's part of a write address that holds + * a master and channel ID. + * @buf: Data being written to the HW that will ultimately be seen + * in a debugging tool (Fido, Lauterbach). + * @len: Size of buffer. + * + * Since each aperture is specified by a unique + * master/channel ID, no two processes will be writing + * to the same aperture at the same time so no lock is required. The + * PTI-Output agent will send these out in the order that they arrived, and + * thus, it will intermix these messages. The debug tool can then later + * regroup the appropriate message segments together reconstituting each + * message. + */ +static void pti_write_to_aperture(struct pti_masterchannel *mc, + u8 *buf, + int len) +{ + int dwordcnt; + int final; + int i; + u32 ptiword; + u32 __iomem *aperture; + u8 *p = buf; + + /* + * calculate the aperture offset from the base using the master and + * channel id's. + */ + aperture = drv_data->pti_ioaddr + (mc->master << 15) + + (mc->channel << 8); + + dwordcnt = len >> 2; + final = len - (dwordcnt << 2); /* final = trailing bytes */ + if (final == 0 && dwordcnt != 0) { /* always need a final dword */ + final += 4; + dwordcnt--; + } + + for (i = 0; i < dwordcnt; i++) { + ptiword = be32_to_cpu(*(u32 *)p); + p += 4; + iowrite32(ptiword, aperture); + } + + aperture += PTI_LASTDWORD_DTS; /* adding DTS signals that is EOM */ + + ptiword = 0; + for (i = 0; i < final; i++) + ptiword |= *p++ << (24-(8*i)); + + iowrite32(ptiword, aperture); + return; +} + +/** + * pti_control_frame_built_and_sent()- control frame build and send function. + * + * @mc: The master / channel structure on which the function + * built a control frame. + * @thread_name: The thread name associated with the master / channel or + * 'NULL' if using the 'current' global variable. + * + * To be able to post process the PTI contents on host side, a control frame + * is added before sending any PTI content. So the host side knows on + * each PTI frame the name of the thread using a dedicated master / channel. + * The thread name is retrieved from 'current' global variable if 'thread_name' + * is 'NULL', else it is retrieved from 'thread_name' parameter. + * This function builds this frame and sends it to a master ID CONTROL_ID. + * The overhead is only 32 bytes since the driver only writes to HW + * in 32 byte chunks. + */ +static void pti_control_frame_built_and_sent(struct pti_masterchannel *mc, + const char *thread_name) +{ + /* + * Since we access the comm member in current's task_struct, we only + * need to be as large as what 'comm' in that structure is. + */ + char comm[TASK_COMM_LEN]; + struct pti_masterchannel mccontrol = {.master = CONTROL_ID, + .channel = 0}; + const char *thread_name_p; + const char *control_format = "%3d %3d %s"; + u8 control_frame[CONTROL_FRAME_LEN]; + + if (!thread_name) { + if (!in_interrupt()) + get_task_comm(comm, current); + else + strncpy(comm, "Interrupt", TASK_COMM_LEN); + + /* Absolutely ensure our buffer is zero terminated. */ + comm[TASK_COMM_LEN-1] = 0; + thread_name_p = comm; + } else { + thread_name_p = thread_name; + } + + mccontrol.channel = pti_control_channel; + pti_control_channel = (pti_control_channel + 1) & 0x7f; + + snprintf(control_frame, CONTROL_FRAME_LEN, control_format, mc->master, + mc->channel, thread_name_p); + pti_write_to_aperture(&mccontrol, control_frame, strlen(control_frame)); +} + +/** + * pti_write_full_frame_to_aperture()- high level function to + * write to PTI. + * + * @mc: The 'aperture'. It's part of a write address that holds + * a master and channel ID. + * @buf: Data being written to the HW that will ultimately be seen + * in a debugging tool (Fido, Lauterbach). + * @len: Size of buffer. + * + * All threads sending data (either console, user space application, ...) + * are calling the high level function to write to PTI meaning that it is + * possible to add a control frame before sending the content. + */ +static void pti_write_full_frame_to_aperture(struct pti_masterchannel *mc, + const unsigned char *buf, + int len) +{ + pti_control_frame_built_and_sent(mc, NULL); + pti_write_to_aperture(mc, (u8 *)buf, len); +} + +/** + * get_id()- Allocate a master and channel ID. + * + * @id_array: an array of bits representing what channel + * id's are allocated for writing. + * @max_ids: The max amount of available write IDs to use. + * @base_id: The starting SW channel ID, based on the Intel + * PTI arch. + * @thread_name: The thread name associated with the master / channel or + * 'NULL' if using the 'current' global variable. + * + * Returns: + * pti_masterchannel struct with master, channel ID address + * 0 for error + * + * Each bit in the arrays ia_app and ia_os correspond to a master and + * channel id. The bit is one if the id is taken and 0 if free. For + * every master there are 128 channel id's. + */ +static struct pti_masterchannel *get_id(u8 *id_array, + int max_ids, + int base_id, + const char *thread_name) +{ + struct pti_masterchannel *mc; + int i, j, mask; + + mc = kmalloc(sizeof(struct pti_masterchannel), GFP_KERNEL); + if (mc == NULL) + return NULL; + + /* look for a byte with a free bit */ + for (i = 0; i < max_ids; i++) + if (id_array[i] != 0xff) + break; + if (i == max_ids) { + kfree(mc); + return NULL; + } + /* find the bit in the 128 possible channel opportunities */ + mask = 0x80; + for (j = 0; j < 8; j++) { + if ((id_array[i] & mask) == 0) + break; + mask >>= 1; + } + + /* grab it */ + id_array[i] |= mask; + mc->master = base_id; + mc->channel = ((i & 0xf)<<3) + j; + /* write new master Id / channel Id allocation to channel control */ + pti_control_frame_built_and_sent(mc, thread_name); + return mc; +} + +/* + * The following three functions: + * pti_request_mastercahannel(), mipi_release_masterchannel() + * and pti_writedata() are an API for other kernel drivers to + * access PTI. + */ + +/** + * pti_request_masterchannel()- Kernel API function used to allocate + * a master, channel ID address + * to write to PTI HW. + * + * @type: 0- request Application master, channel aperture ID + * write address. + * 1- request OS master, channel aperture ID write + * address. + * 2- request Modem master, channel aperture ID + * write address. + * Other values, error. + * @thread_name: The thread name associated with the master / channel or + * 'NULL' if using the 'current' global variable. + * + * Returns: + * pti_masterchannel struct + * 0 for error + */ +struct pti_masterchannel *pti_request_masterchannel(u8 type, + const char *thread_name) +{ + struct pti_masterchannel *mc; + + mutex_lock(&alloclock); + + switch (type) { + + case 0: + mc = get_id(drv_data->ia_app, MAX_APP_IDS, + APP_BASE_ID, thread_name); + break; + + case 1: + mc = get_id(drv_data->ia_os, MAX_OS_IDS, + OS_BASE_ID, thread_name); + break; + + case 2: + mc = get_id(drv_data->ia_modem, MAX_MODEM_IDS, + MODEM_BASE_ID, thread_name); + break; + default: + mc = NULL; + } + + mutex_unlock(&alloclock); + return mc; +} +EXPORT_SYMBOL_GPL(pti_request_masterchannel); + +/** + * pti_release_masterchannel()- Kernel API function used to release + * a master, channel ID address + * used to write to PTI HW. + * + * @mc: master, channel apeture ID address to be released. This + * will de-allocate the structure via kfree(). + */ +void pti_release_masterchannel(struct pti_masterchannel *mc) +{ + u8 master, channel, i; + + mutex_lock(&alloclock); + + if (mc) { + master = mc->master; + channel = mc->channel; + + if (master == APP_BASE_ID) { + i = channel >> 3; + drv_data->ia_app[i] &= ~(0x80>>(channel & 0x7)); + } else if (master == OS_BASE_ID) { + i = channel >> 3; + drv_data->ia_os[i] &= ~(0x80>>(channel & 0x7)); + } else { + i = channel >> 3; + drv_data->ia_modem[i] &= ~(0x80>>(channel & 0x7)); + } + + kfree(mc); + } + + mutex_unlock(&alloclock); +} +EXPORT_SYMBOL_GPL(pti_release_masterchannel); + +/** + * pti_writedata()- Kernel API function used to write trace + * debugging data to PTI HW. + * + * @mc: Master, channel aperture ID address to write to. + * Null value will return with no write occurring. + * @buf: Trace debuging data to write to the PTI HW. + * Null value will return with no write occurring. + * @count: Size of buf. Value of 0 or a negative number will + * return with no write occuring. + */ +void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count) +{ + /* + * since this function is exported, this is treated like an + * API function, thus, all parameters should + * be checked for validity. + */ + if ((mc != NULL) && (buf != NULL) && (count > 0)) + pti_write_to_aperture(mc, buf, count); + return; +} +EXPORT_SYMBOL_GPL(pti_writedata); + +/* + * for the tty_driver_*() basic function descriptions, see tty_driver.h. + * Specific header comments made for PTI-related specifics. + */ + +/** + * pti_tty_driver_open()- Open an Application master, channel aperture + * ID to the PTI device via tty device. + * + * @tty: tty interface. + * @filp: filp interface pased to tty_port_open() call. + * + * Returns: + * int, 0 for success + * otherwise, fail value + * + * The main purpose of using the tty device interface is for + * each tty port to have a unique PTI write aperture. In an + * example use case, ttyPTI0 gets syslogd and an APP aperture + * ID and ttyPTI1 is where the n_tracesink ldisc hooks to route + * modem messages into PTI. Modem trace data does not have to + * go to ttyPTI1, but ttyPTI0 and ttyPTI1 do need to be distinct + * master IDs. These messages go through the PTI HW and out of + * the handheld platform and to the Fido/Lauterbach device. + */ +static int pti_tty_driver_open(struct tty_struct *tty, struct file *filp) +{ + /* + * we actually want to allocate a new channel per open, per + * system arch. HW gives more than plenty channels for a single + * system task to have its own channel to write trace data. This + * also removes a locking requirement for the actual write + * procedure. + */ + return tty_port_open(tty->port, tty, filp); +} + +/** + * pti_tty_driver_close()- close tty device and release Application + * master, channel aperture ID to the PTI device via tty device. + * + * @tty: tty interface. + * @filp: filp interface pased to tty_port_close() call. + * + * The main purpose of using the tty device interface is to route + * syslog daemon messages to the PTI HW and out of the handheld platform + * and to the Fido/Lauterbach device. + */ +static void pti_tty_driver_close(struct tty_struct *tty, struct file *filp) +{ + tty_port_close(tty->port, tty, filp); +} + +/** + * pti_tty_install()- Used to set up specific master-channels + * to tty ports for organizational purposes when + * tracing viewed from debuging tools. + * + * @driver: tty driver information. + * @tty: tty struct containing pti information. + * + * Returns: + * 0 for success + * otherwise, error + */ +static int pti_tty_install(struct tty_driver *driver, struct tty_struct *tty) +{ + int idx = tty->index; + struct pti_tty *pti_tty_data; + int ret = tty_standard_install(driver, tty); + + if (ret == 0) { + pti_tty_data = kmalloc(sizeof(struct pti_tty), GFP_KERNEL); + if (pti_tty_data == NULL) + return -ENOMEM; + + if (idx == PTITTY_MINOR_START) + pti_tty_data->mc = pti_request_masterchannel(0, NULL); + else + pti_tty_data->mc = pti_request_masterchannel(2, NULL); + + if (pti_tty_data->mc == NULL) { + kfree(pti_tty_data); + return -ENXIO; + } + tty->driver_data = pti_tty_data; + } + + return ret; +} + +/** + * pti_tty_cleanup()- Used to de-allocate master-channel resources + * tied to tty's of this driver. + * + * @tty: tty struct containing pti information. + */ +static void pti_tty_cleanup(struct tty_struct *tty) +{ + struct pti_tty *pti_tty_data = tty->driver_data; + if (pti_tty_data == NULL) + return; + pti_release_masterchannel(pti_tty_data->mc); + kfree(pti_tty_data); + tty->driver_data = NULL; +} + +/** + * pti_tty_driver_write()- Write trace debugging data through the char + * interface to the PTI HW. Part of the misc device implementation. + * + * @filp: Contains private data which is used to obtain + * master, channel write ID. + * @data: trace data to be written. + * @len: # of byte to write. + * + * Returns: + * int, # of bytes written + * otherwise, error + */ +static int pti_tty_driver_write(struct tty_struct *tty, + const unsigned char *buf, int len) +{ + struct pti_tty *pti_tty_data = tty->driver_data; + if ((pti_tty_data != NULL) && (pti_tty_data->mc != NULL)) { + pti_write_to_aperture(pti_tty_data->mc, (u8 *)buf, len); + return len; + } + /* + * we can't write to the pti hardware if the private driver_data + * and the mc address is not there. + */ + else + return -EFAULT; +} + +/** + * pti_tty_write_room()- Always returns 2048. + * + * @tty: contains tty info of the pti driver. + */ +static int pti_tty_write_room(struct tty_struct *tty) +{ + return 2048; +} + +/** + * pti_char_open()- Open an Application master, channel aperture + * ID to the PTI device. Part of the misc device implementation. + * + * @inode: not used. + * @filp: Output- will have a masterchannel struct set containing + * the allocated application PTI aperture write address. + * + * Returns: + * int, 0 for success + * otherwise, a fail value + */ +static int pti_char_open(struct inode *inode, struct file *filp) +{ + struct pti_masterchannel *mc; + + /* + * We really do want to fail immediately if + * pti_request_masterchannel() fails, + * before assigning the value to filp->private_data. + * Slightly easier to debug if this driver needs debugging. + */ + mc = pti_request_masterchannel(0, NULL); + if (mc == NULL) + return -ENOMEM; + filp->private_data = mc; + return 0; +} + +/** + * pti_char_release()- Close a char channel to the PTI device. Part + * of the misc device implementation. + * + * @inode: Not used in this implementaiton. + * @filp: Contains private_data that contains the master, channel + * ID to be released by the PTI device. + * + * Returns: + * always 0 + */ +static int pti_char_release(struct inode *inode, struct file *filp) +{ + pti_release_masterchannel(filp->private_data); + filp->private_data = NULL; + return 0; +} + +/** + * pti_char_write()- Write trace debugging data through the char + * interface to the PTI HW. Part of the misc device implementation. + * + * @filp: Contains private data which is used to obtain + * master, channel write ID. + * @data: trace data to be written. + * @len: # of byte to write. + * @ppose: Not used in this function implementation. + * + * Returns: + * int, # of bytes written + * otherwise, error value + * + * Notes: From side discussions with Alan Cox and experimenting + * with PTI debug HW like Nokia's Fido box and Lauterbach + * devices, 8192 byte write buffer used by USER_COPY_SIZE was + * deemed an appropriate size for this type of usage with + * debugging HW. + */ +static ssize_t pti_char_write(struct file *filp, const char __user *data, + size_t len, loff_t *ppose) +{ + struct pti_masterchannel *mc; + void *kbuf; + const char __user *tmp; + size_t size = USER_COPY_SIZE; + size_t n = 0; + + tmp = data; + mc = filp->private_data; + + kbuf = kmalloc(size, GFP_KERNEL); + if (kbuf == NULL) { + pr_err("%s(%d): buf allocation failed\n", + __func__, __LINE__); + return -ENOMEM; + } + + do { + if (len - n > USER_COPY_SIZE) + size = USER_COPY_SIZE; + else + size = len - n; + + if (copy_from_user(kbuf, tmp, size)) { + kfree(kbuf); + return n ? n : -EFAULT; + } + + pti_write_to_aperture(mc, kbuf, size); + n += size; + tmp += size; + + } while (len > n); + + kfree(kbuf); + return len; +} + +static const struct tty_operations pti_tty_driver_ops = { + .open = pti_tty_driver_open, + .close = pti_tty_driver_close, + .write = pti_tty_driver_write, + .write_room = pti_tty_write_room, + .install = pti_tty_install, + .cleanup = pti_tty_cleanup +}; + +static const struct file_operations pti_char_driver_ops = { + .owner = THIS_MODULE, + .write = pti_char_write, + .open = pti_char_open, + .release = pti_char_release, +}; + +static struct miscdevice pti_char_driver = { + .minor = MISC_DYNAMIC_MINOR, + .name = CHARNAME, + .fops = &pti_char_driver_ops +}; + +/** + * pti_console_write()- Write to the console that has been acquired. + * + * @c: Not used in this implementaiton. + * @buf: Data to be written. + * @len: Length of buf. + */ +static void pti_console_write(struct console *c, const char *buf, unsigned len) +{ + static struct pti_masterchannel mc = {.master = CONSOLE_ID, + .channel = 0}; + + mc.channel = pti_console_channel; + pti_console_channel = (pti_console_channel + 1) & 0x7f; + + pti_write_full_frame_to_aperture(&mc, buf, len); +} + +/** + * pti_console_device()- Return the driver tty structure and set the + * associated index implementation. + * + * @c: Console device of the driver. + * @index: index associated with c. + * + * Returns: + * always value of pti_tty_driver structure when this function + * is called. + */ +static struct tty_driver *pti_console_device(struct console *c, int *index) +{ + *index = c->index; + return pti_tty_driver; +} + +/** + * pti_console_setup()- Initialize console variables used by the driver. + * + * @c: Not used. + * @opts: Not used. + * + * Returns: + * always 0. + */ +static int pti_console_setup(struct console *c, char *opts) +{ + pti_console_channel = 0; + pti_control_channel = 0; + return 0; +} + +/* + * pti_console struct, used to capture OS printk()'s and shift + * out to the PTI device for debugging. This cannot be + * enabled upon boot because of the possibility of eating + * any serial console printk's (race condition discovered). + * The console should be enabled upon when the tty port is + * used for the first time. Since the primary purpose for + * the tty port is to hook up syslog to it, the tty port + * will be open for a really long time. + */ +static struct console pti_console = { + .name = TTYNAME, + .write = pti_console_write, + .device = pti_console_device, + .setup = pti_console_setup, + .flags = CON_PRINTBUFFER, + .index = 0, +}; + +/** + * pti_port_activate()- Used to start/initialize any items upon + * first opening of tty_port(). + * + * @port- The tty port number of the PTI device. + * @tty- The tty struct associated with this device. + * + * Returns: + * always returns 0 + * + * Notes: The primary purpose of the PTI tty port 0 is to hook + * the syslog daemon to it; thus this port will be open for a + * very long time. + */ +static int pti_port_activate(struct tty_port *port, struct tty_struct *tty) +{ + if (port->tty->index == PTITTY_MINOR_START) + console_start(&pti_console); + return 0; +} + +/** + * pti_port_shutdown()- Used to stop/shutdown any items upon the + * last tty port close. + * + * @port- The tty port number of the PTI device. + * + * Notes: The primary purpose of the PTI tty port 0 is to hook + * the syslog daemon to it; thus this port will be open for a + * very long time. + */ +static void pti_port_shutdown(struct tty_port *port) +{ + if (port->tty->index == PTITTY_MINOR_START) + console_stop(&pti_console); +} + +static const struct tty_port_operations tty_port_ops = { + .activate = pti_port_activate, + .shutdown = pti_port_shutdown, +}; + +/* + * Note the _probe() call sets everything up and ties the char and tty + * to successfully detecting the PTI device on the pci bus. + */ + +/** + * pti_pci_probe()- Used to detect pti on the pci bus and set + * things up in the driver. + * + * @pdev- pci_dev struct values for pti. + * @ent- pci_device_id struct for pti driver. + * + * Returns: + * 0 for success + * otherwise, error + */ +static int pti_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + unsigned int a; + int retval = -EINVAL; + int pci_bar = 1; + + dev_dbg(&pdev->dev, "%s %s(%d): PTI PCI ID %04x:%04x\n", __FILE__, + __func__, __LINE__, pdev->vendor, pdev->device); + + retval = misc_register(&pti_char_driver); + if (retval) { + pr_err("%s(%d): CHAR registration failed of pti driver\n", + __func__, __LINE__); + pr_err("%s(%d): Error value returned: %d\n", + __func__, __LINE__, retval); + goto err; + } + + retval = pci_enable_device(pdev); + if (retval != 0) { + dev_err(&pdev->dev, + "%s: pci_enable_device() returned error %d\n", + __func__, retval); + goto err_unreg_misc; + } + + drv_data = kzalloc(sizeof(*drv_data), GFP_KERNEL); + if (drv_data == NULL) { + retval = -ENOMEM; + dev_err(&pdev->dev, + "%s(%d): kmalloc() returned NULL memory.\n", + __func__, __LINE__); + goto err_disable_pci; + } + drv_data->pti_addr = pci_resource_start(pdev, pci_bar); + + retval = pci_request_region(pdev, pci_bar, dev_name(&pdev->dev)); + if (retval != 0) { + dev_err(&pdev->dev, + "%s(%d): pci_request_region() returned error %d\n", + __func__, __LINE__, retval); + goto err_free_dd; + } + drv_data->aperture_base = drv_data->pti_addr+APERTURE_14; + drv_data->pti_ioaddr = + ioremap_nocache((u32)drv_data->aperture_base, + APERTURE_LEN); + if (!drv_data->pti_ioaddr) { + retval = -ENOMEM; + goto err_rel_reg; + } + + pci_set_drvdata(pdev, drv_data); + + for (a = 0; a < PTITTY_MINOR_NUM; a++) { + struct tty_port *port = &drv_data->port[a]; + tty_port_init(port); + port->ops = &tty_port_ops; + + tty_port_register_device(port, pti_tty_driver, a, &pdev->dev); + } + + register_console(&pti_console); + + return 0; +err_rel_reg: + pci_release_region(pdev, pci_bar); +err_free_dd: + kfree(drv_data); +err_disable_pci: + pci_disable_device(pdev); +err_unreg_misc: + misc_deregister(&pti_char_driver); +err: + return retval; +} + +/** + * pti_pci_remove()- Driver exit method to remove PTI from + * PCI bus. + * @pdev: variable containing pci info of PTI. + */ +static void pti_pci_remove(struct pci_dev *pdev) +{ + struct pti_dev *drv_data = pci_get_drvdata(pdev); + unsigned int a; + + unregister_console(&pti_console); + + for (a = 0; a < PTITTY_MINOR_NUM; a++) { + tty_unregister_device(pti_tty_driver, a); + tty_port_destroy(&drv_data->port[a]); + } + + iounmap(drv_data->pti_ioaddr); + kfree(drv_data); + pci_release_region(pdev, 1); + pci_disable_device(pdev); + + misc_deregister(&pti_char_driver); +} + +static struct pci_driver pti_pci_driver = { + .name = PCINAME, + .id_table = pci_ids, + .probe = pti_pci_probe, + .remove = pti_pci_remove, +}; + +/** + * + * pti_init()- Overall entry/init call to the pti driver. + * It starts the registration process with the kernel. + * + * Returns: + * int __init, 0 for success + * otherwise value is an error + * + */ +static int __init pti_init(void) +{ + int retval = -EINVAL; + + /* First register module as tty device */ + + pti_tty_driver = alloc_tty_driver(PTITTY_MINOR_NUM); + if (pti_tty_driver == NULL) { + pr_err("%s(%d): Memory allocation failed for ptiTTY driver\n", + __func__, __LINE__); + return -ENOMEM; + } + + pti_tty_driver->driver_name = DRIVERNAME; + pti_tty_driver->name = TTYNAME; + pti_tty_driver->major = 0; + pti_tty_driver->minor_start = PTITTY_MINOR_START; + pti_tty_driver->type = TTY_DRIVER_TYPE_SYSTEM; + pti_tty_driver->subtype = SYSTEM_TYPE_SYSCONS; + pti_tty_driver->flags = TTY_DRIVER_REAL_RAW | + TTY_DRIVER_DYNAMIC_DEV; + pti_tty_driver->init_termios = tty_std_termios; + + tty_set_operations(pti_tty_driver, &pti_tty_driver_ops); + + retval = tty_register_driver(pti_tty_driver); + if (retval) { + pr_err("%s(%d): TTY registration failed of pti driver\n", + __func__, __LINE__); + pr_err("%s(%d): Error value returned: %d\n", + __func__, __LINE__, retval); + + goto put_tty; + } + + retval = pci_register_driver(&pti_pci_driver); + if (retval) { + pr_err("%s(%d): PCI registration failed of pti driver\n", + __func__, __LINE__); + pr_err("%s(%d): Error value returned: %d\n", + __func__, __LINE__, retval); + goto unreg_tty; + } + + return 0; +unreg_tty: + tty_unregister_driver(pti_tty_driver); +put_tty: + put_tty_driver(pti_tty_driver); + pti_tty_driver = NULL; + return retval; +} + +/** + * pti_exit()- Unregisters this module as a tty and pci driver. + */ +static void __exit pti_exit(void) +{ + tty_unregister_driver(pti_tty_driver); + pci_unregister_driver(&pti_pci_driver); + put_tty_driver(pti_tty_driver); +} + +module_init(pti_init); +module_exit(pti_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ken Mills, Jay Freyensee"); +MODULE_DESCRIPTION("PTI Driver"); + diff --git a/drivers/misc/qcom-coincell.c b/drivers/misc/qcom-coincell.c new file mode 100644 index 000000000..829a61dbd --- /dev/null +++ b/drivers/misc/qcom-coincell.c @@ -0,0 +1,153 @@ +/* Copyright (c) 2013, The Linux Foundation. All rights reserved. + * Copyright (c) 2015, Sony Mobile Communications Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +struct qcom_coincell { + struct device *dev; + struct regmap *regmap; + u32 base_addr; +}; + +#define QCOM_COINCELL_REG_RSET 0x44 +#define QCOM_COINCELL_REG_VSET 0x45 +#define QCOM_COINCELL_REG_ENABLE 0x46 + +#define QCOM_COINCELL_ENABLE BIT(7) + +static const int qcom_rset_map[] = { 2100, 1700, 1200, 800 }; +static const int qcom_vset_map[] = { 2500, 3200, 3100, 3000 }; +/* NOTE: for pm8921 and others, voltage of 2500 is 16 (10000b), not 0 */ + +/* if enable==0, rset and vset are ignored */ +static int qcom_coincell_chgr_config(struct qcom_coincell *chgr, int rset, + int vset, bool enable) +{ + int i, j, rc; + + /* if disabling, just do that and skip other operations */ + if (!enable) + return regmap_write(chgr->regmap, + chgr->base_addr + QCOM_COINCELL_REG_ENABLE, 0); + + /* find index for current-limiting resistor */ + for (i = 0; i < ARRAY_SIZE(qcom_rset_map); i++) + if (rset == qcom_rset_map[i]) + break; + + if (i >= ARRAY_SIZE(qcom_rset_map)) { + dev_err(chgr->dev, "invalid rset-ohms value %d\n", rset); + return -EINVAL; + } + + /* find index for charge voltage */ + for (j = 0; j < ARRAY_SIZE(qcom_vset_map); j++) + if (vset == qcom_vset_map[j]) + break; + + if (j >= ARRAY_SIZE(qcom_vset_map)) { + dev_err(chgr->dev, "invalid vset-millivolts value %d\n", vset); + return -EINVAL; + } + + rc = regmap_write(chgr->regmap, + chgr->base_addr + QCOM_COINCELL_REG_RSET, i); + if (rc) { + /* + * This is mainly to flag a bad base_addr (reg) from dts. + * Other failures writing to the registers should be + * extremely rare, or indicative of problems that + * should be reported elsewhere (eg. spmi failure). + */ + dev_err(chgr->dev, "could not write to RSET register\n"); + return rc; + } + + rc = regmap_write(chgr->regmap, + chgr->base_addr + QCOM_COINCELL_REG_VSET, j); + if (rc) + return rc; + + /* set 'enable' register */ + return regmap_write(chgr->regmap, + chgr->base_addr + QCOM_COINCELL_REG_ENABLE, + QCOM_COINCELL_ENABLE); +} + +static int qcom_coincell_probe(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + struct qcom_coincell chgr; + u32 rset = 0; + u32 vset = 0; + bool enable; + int rc; + + chgr.dev = &pdev->dev; + + chgr.regmap = dev_get_regmap(pdev->dev.parent, NULL); + if (!chgr.regmap) { + dev_err(chgr.dev, "Unable to get regmap\n"); + return -EINVAL; + } + + rc = of_property_read_u32(node, "reg", &chgr.base_addr); + if (rc) + return rc; + + enable = !of_property_read_bool(node, "qcom,charger-disable"); + + if (enable) { + rc = of_property_read_u32(node, "qcom,rset-ohms", &rset); + if (rc) { + dev_err(chgr.dev, + "can't find 'qcom,rset-ohms' in DT block"); + return rc; + } + + rc = of_property_read_u32(node, "qcom,vset-millivolts", &vset); + if (rc) { + dev_err(chgr.dev, + "can't find 'qcom,vset-millivolts' in DT block"); + return rc; + } + } + + return qcom_coincell_chgr_config(&chgr, rset, vset, enable); +} + +static const struct of_device_id qcom_coincell_match_table[] = { + { .compatible = "qcom,pm8941-coincell", }, + {} +}; + +MODULE_DEVICE_TABLE(of, qcom_coincell_match_table); + +static struct platform_driver qcom_coincell_driver = { + .driver = { + .name = "qcom-spmi-coincell", + .of_match_table = qcom_coincell_match_table, + }, + .probe = qcom_coincell_probe, +}; + +module_platform_driver(qcom_coincell_driver); + +MODULE_DESCRIPTION("Qualcomm PMIC coincell charger driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile new file mode 100644 index 000000000..0003a1d56 --- /dev/null +++ b/drivers/misc/sgi-gru/Makefile @@ -0,0 +1,5 @@ +ccflags-$(CONFIG_SGI_GRU_DEBUG) := -DDEBUG + +obj-$(CONFIG_SGI_GRU) := gru.o +gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o grukdump.o + diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h new file mode 100644 index 000000000..3ad76cd18 --- /dev/null +++ b/drivers/misc/sgi-gru/gru.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRU_H__ +#define __GRU_H__ + +/* + * GRU architectural definitions + */ +#define GRU_CACHE_LINE_BYTES 64 +#define GRU_HANDLE_STRIDE 256 +#define GRU_CB_BASE 0 +#define GRU_DS_BASE 0x20000 + +/* + * Size used to map GRU GSeg + */ +#if defined(CONFIG_IA64) +#define GRU_GSEG_PAGESIZE (256 * 1024UL) +#elif defined(CONFIG_X86_64) +#define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */ +#else +#error "Unsupported architecture" +#endif + +/* + * Structure for obtaining GRU resource information + */ +struct gru_chiplet_info { + int node; + int chiplet; + int blade; + int total_dsr_bytes; + int total_cbr; + int total_user_dsr_bytes; + int total_user_cbr; + int free_user_dsr_bytes; + int free_user_cbr; +}; + +/* + * Statictics kept for each context. + */ +struct gru_gseg_statistics { + unsigned long fmm_tlbmiss; + unsigned long upm_tlbmiss; + unsigned long tlbdropin; + unsigned long context_stolen; + unsigned long reserved[10]; +}; + +/* Flags for GRU options on the gru_create_context() call */ +/* Select one of the follow 4 options to specify how TLB misses are handled */ +#define GRU_OPT_MISS_DEFAULT 0x0000 /* Use default mode */ +#define GRU_OPT_MISS_USER_POLL 0x0001 /* User will poll CB for faults */ +#define GRU_OPT_MISS_FMM_INTR 0x0002 /* Send interrupt to cpu to + handle fault */ +#define GRU_OPT_MISS_FMM_POLL 0x0003 /* Use system polling thread */ +#define GRU_OPT_MISS_MASK 0x0003 /* Mask for TLB MISS option */ + + + +#endif /* __GRU_H__ */ diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h new file mode 100644 index 000000000..04d5170ac --- /dev/null +++ b/drivers/misc/sgi-gru/gru_instructions.h @@ -0,0 +1,736 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRU_INSTRUCTIONS_H__ +#define __GRU_INSTRUCTIONS_H__ + +extern int gru_check_status_proc(void *cb); +extern int gru_wait_proc(void *cb); +extern void gru_wait_abort_proc(void *cb); + + + +/* + * Architecture dependent functions + */ + +#if defined(CONFIG_IA64) +#include +#include +#define __flush_cache(p) ia64_fc((unsigned long)p) +/* Use volatile on IA64 to ensure ordering via st4.rel */ +#define gru_ordered_store_ulong(p, v) \ + do { \ + barrier(); \ + *((volatile unsigned long *)(p)) = v; /* force st.rel */ \ + } while (0) +#elif defined(CONFIG_X86_64) +#include +#define __flush_cache(p) clflush(p) +#define gru_ordered_store_ulong(p, v) \ + do { \ + barrier(); \ + *(unsigned long *)p = v; \ + } while (0) +#else +#error "Unsupported architecture" +#endif + +/* + * Control block status and exception codes + */ +#define CBS_IDLE 0 +#define CBS_EXCEPTION 1 +#define CBS_ACTIVE 2 +#define CBS_CALL_OS 3 + +/* CB substatus bitmasks */ +#define CBSS_MSG_QUEUE_MASK 7 +#define CBSS_IMPLICIT_ABORT_ACTIVE_MASK 8 + +/* CB substatus message queue values (low 3 bits of substatus) */ +#define CBSS_NO_ERROR 0 +#define CBSS_LB_OVERFLOWED 1 +#define CBSS_QLIMIT_REACHED 2 +#define CBSS_PAGE_OVERFLOW 3 +#define CBSS_AMO_NACKED 4 +#define CBSS_PUT_NACKED 5 + +/* + * Structure used to fetch exception detail for CBs that terminate with + * CBS_EXCEPTION + */ +struct control_block_extended_exc_detail { + unsigned long cb; + int opc; + int ecause; + int exopc; + long exceptdet0; + int exceptdet1; + int cbrstate; + int cbrexecstatus; +}; + +/* + * Instruction formats + */ + +/* + * Generic instruction format. + * This definition has precise bit field definitions. + */ +struct gru_instruction_bits { + /* DW 0 - low */ + unsigned int icmd: 1; + unsigned char ima: 3; /* CB_DelRep, unmapped mode */ + unsigned char reserved0: 4; + unsigned int xtype: 3; + unsigned int iaa0: 2; + unsigned int iaa1: 2; + unsigned char reserved1: 1; + unsigned char opc: 8; /* opcode */ + unsigned char exopc: 8; /* extended opcode */ + /* DW 0 - high */ + unsigned int idef2: 22; /* TRi0 */ + unsigned char reserved2: 2; + unsigned char istatus: 2; + unsigned char isubstatus:4; + unsigned char reserved3: 1; + unsigned char tlb_fault_color: 1; + /* DW 1 */ + unsigned long idef4; /* 42 bits: TRi1, BufSize */ + /* DW 2-6 */ + unsigned long idef1; /* BAddr0 */ + unsigned long idef5; /* Nelem */ + unsigned long idef6; /* Stride, Operand1 */ + unsigned long idef3; /* BAddr1, Value, Operand2 */ + unsigned long reserved4; + /* DW 7 */ + unsigned long avalue; /* AValue */ +}; + +/* + * Generic instruction with friendlier names. This format is used + * for inline instructions. + */ +struct gru_instruction { + /* DW 0 */ + union { + unsigned long op64; /* icmd,xtype,iaa0,ima,opc,tri0 */ + struct { + unsigned int op32; + unsigned int tri0; + }; + }; + unsigned long tri1_bufsize; /* DW 1 */ + unsigned long baddr0; /* DW 2 */ + unsigned long nelem; /* DW 3 */ + unsigned long op1_stride; /* DW 4 */ + unsigned long op2_value_baddr1; /* DW 5 */ + unsigned long reserved0; /* DW 6 */ + unsigned long avalue; /* DW 7 */ +}; + +/* Some shifts and masks for the low 64 bits of a GRU command */ +#define GRU_CB_ICMD_SHFT 0 +#define GRU_CB_ICMD_MASK 0x1 +#define GRU_CB_XTYPE_SHFT 8 +#define GRU_CB_XTYPE_MASK 0x7 +#define GRU_CB_IAA0_SHFT 11 +#define GRU_CB_IAA0_MASK 0x3 +#define GRU_CB_IAA1_SHFT 13 +#define GRU_CB_IAA1_MASK 0x3 +#define GRU_CB_IMA_SHFT 1 +#define GRU_CB_IMA_MASK 0x3 +#define GRU_CB_OPC_SHFT 16 +#define GRU_CB_OPC_MASK 0xff +#define GRU_CB_EXOPC_SHFT 24 +#define GRU_CB_EXOPC_MASK 0xff +#define GRU_IDEF2_SHFT 32 +#define GRU_IDEF2_MASK 0x3ffff +#define GRU_ISTATUS_SHFT 56 +#define GRU_ISTATUS_MASK 0x3 + +/* GRU instruction opcodes (opc field) */ +#define OP_NOP 0x00 +#define OP_BCOPY 0x01 +#define OP_VLOAD 0x02 +#define OP_IVLOAD 0x03 +#define OP_VSTORE 0x04 +#define OP_IVSTORE 0x05 +#define OP_VSET 0x06 +#define OP_IVSET 0x07 +#define OP_MESQ 0x08 +#define OP_GAMXR 0x09 +#define OP_GAMIR 0x0a +#define OP_GAMIRR 0x0b +#define OP_GAMER 0x0c +#define OP_GAMERR 0x0d +#define OP_BSTORE 0x0e +#define OP_VFLUSH 0x0f + + +/* Extended opcodes values (exopc field) */ + +/* GAMIR - AMOs with implicit operands */ +#define EOP_IR_FETCH 0x01 /* Plain fetch of memory */ +#define EOP_IR_CLR 0x02 /* Fetch and clear */ +#define EOP_IR_INC 0x05 /* Fetch and increment */ +#define EOP_IR_DEC 0x07 /* Fetch and decrement */ +#define EOP_IR_QCHK1 0x0d /* Queue check, 64 byte msg */ +#define EOP_IR_QCHK2 0x0e /* Queue check, 128 byte msg */ + +/* GAMIRR - Registered AMOs with implicit operands */ +#define EOP_IRR_FETCH 0x01 /* Registered fetch of memory */ +#define EOP_IRR_CLR 0x02 /* Registered fetch and clear */ +#define EOP_IRR_INC 0x05 /* Registered fetch and increment */ +#define EOP_IRR_DEC 0x07 /* Registered fetch and decrement */ +#define EOP_IRR_DECZ 0x0f /* Registered fetch and decrement, update on zero*/ + +/* GAMER - AMOs with explicit operands */ +#define EOP_ER_SWAP 0x00 /* Exchange argument and memory */ +#define EOP_ER_OR 0x01 /* Logical OR with memory */ +#define EOP_ER_AND 0x02 /* Logical AND with memory */ +#define EOP_ER_XOR 0x03 /* Logical XOR with memory */ +#define EOP_ER_ADD 0x04 /* Add value to memory */ +#define EOP_ER_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/ +#define EOP_ER_CADD 0x0c /* Queue check, operand1*64 byte msg */ + +/* GAMERR - Registered AMOs with explicit operands */ +#define EOP_ERR_SWAP 0x00 /* Exchange argument and memory */ +#define EOP_ERR_OR 0x01 /* Logical OR with memory */ +#define EOP_ERR_AND 0x02 /* Logical AND with memory */ +#define EOP_ERR_XOR 0x03 /* Logical XOR with memory */ +#define EOP_ERR_ADD 0x04 /* Add value to memory */ +#define EOP_ERR_CSWAP 0x08 /* Compare with operand2, write operand1 if match*/ +#define EOP_ERR_EPOLL 0x09 /* Poll for equality */ +#define EOP_ERR_NPOLL 0x0a /* Poll for inequality */ + +/* GAMXR - SGI Arithmetic unit */ +#define EOP_XR_CSWAP 0x0b /* Masked compare exchange */ + + +/* Transfer types (xtype field) */ +#define XTYPE_B 0x0 /* byte */ +#define XTYPE_S 0x1 /* short (2-byte) */ +#define XTYPE_W 0x2 /* word (4-byte) */ +#define XTYPE_DW 0x3 /* doubleword (8-byte) */ +#define XTYPE_CL 0x6 /* cacheline (64-byte) */ + + +/* Instruction access attributes (iaa0, iaa1 fields) */ +#define IAA_RAM 0x0 /* normal cached RAM access */ +#define IAA_NCRAM 0x2 /* noncoherent RAM access */ +#define IAA_MMIO 0x1 /* noncoherent memory-mapped I/O space */ +#define IAA_REGISTER 0x3 /* memory-mapped registers, etc. */ + + +/* Instruction mode attributes (ima field) */ +#define IMA_MAPPED 0x0 /* Virtual mode */ +#define IMA_CB_DELAY 0x1 /* hold read responses until status changes */ +#define IMA_UNMAPPED 0x2 /* bypass the TLBs (OS only) */ +#define IMA_INTERRUPT 0x4 /* Interrupt when instruction completes */ + +/* CBE ecause bits */ +#define CBE_CAUSE_RI (1 << 0) +#define CBE_CAUSE_INVALID_INSTRUCTION (1 << 1) +#define CBE_CAUSE_UNMAPPED_MODE_FORBIDDEN (1 << 2) +#define CBE_CAUSE_PE_CHECK_DATA_ERROR (1 << 3) +#define CBE_CAUSE_IAA_GAA_MISMATCH (1 << 4) +#define CBE_CAUSE_DATA_SEGMENT_LIMIT_EXCEPTION (1 << 5) +#define CBE_CAUSE_OS_FATAL_TLB_FAULT (1 << 6) +#define CBE_CAUSE_EXECUTION_HW_ERROR (1 << 7) +#define CBE_CAUSE_TLBHW_ERROR (1 << 8) +#define CBE_CAUSE_RA_REQUEST_TIMEOUT (1 << 9) +#define CBE_CAUSE_HA_REQUEST_TIMEOUT (1 << 10) +#define CBE_CAUSE_RA_RESPONSE_FATAL (1 << 11) +#define CBE_CAUSE_RA_RESPONSE_NON_FATAL (1 << 12) +#define CBE_CAUSE_HA_RESPONSE_FATAL (1 << 13) +#define CBE_CAUSE_HA_RESPONSE_NON_FATAL (1 << 14) +#define CBE_CAUSE_ADDRESS_SPACE_DECODE_ERROR (1 << 15) +#define CBE_CAUSE_PROTOCOL_STATE_DATA_ERROR (1 << 16) +#define CBE_CAUSE_RA_RESPONSE_DATA_ERROR (1 << 17) +#define CBE_CAUSE_HA_RESPONSE_DATA_ERROR (1 << 18) +#define CBE_CAUSE_FORCED_ERROR (1 << 19) + +/* CBE cbrexecstatus bits */ +#define CBR_EXS_ABORT_OCC_BIT 0 +#define CBR_EXS_INT_OCC_BIT 1 +#define CBR_EXS_PENDING_BIT 2 +#define CBR_EXS_QUEUED_BIT 3 +#define CBR_EXS_TLB_INVAL_BIT 4 +#define CBR_EXS_EXCEPTION_BIT 5 +#define CBR_EXS_CB_INT_PENDING_BIT 6 + +#define CBR_EXS_ABORT_OCC (1 << CBR_EXS_ABORT_OCC_BIT) +#define CBR_EXS_INT_OCC (1 << CBR_EXS_INT_OCC_BIT) +#define CBR_EXS_PENDING (1 << CBR_EXS_PENDING_BIT) +#define CBR_EXS_QUEUED (1 << CBR_EXS_QUEUED_BIT) +#define CBR_EXS_TLB_INVAL (1 << CBR_EXS_TLB_INVAL_BIT) +#define CBR_EXS_EXCEPTION (1 << CBR_EXS_EXCEPTION_BIT) +#define CBR_EXS_CB_INT_PENDING (1 << CBR_EXS_CB_INT_PENDING_BIT) + +/* + * Exceptions are retried for the following cases. If any OTHER bits are set + * in ecause, the exception is not retryable. + */ +#define EXCEPTION_RETRY_BITS (CBE_CAUSE_EXECUTION_HW_ERROR | \ + CBE_CAUSE_TLBHW_ERROR | \ + CBE_CAUSE_RA_REQUEST_TIMEOUT | \ + CBE_CAUSE_RA_RESPONSE_NON_FATAL | \ + CBE_CAUSE_HA_RESPONSE_NON_FATAL | \ + CBE_CAUSE_RA_RESPONSE_DATA_ERROR | \ + CBE_CAUSE_HA_RESPONSE_DATA_ERROR \ + ) + +/* Message queue head structure */ +union gru_mesqhead { + unsigned long val; + struct { + unsigned int head; + unsigned int limit; + }; +}; + + +/* Generate the low word of a GRU instruction */ +static inline unsigned long +__opdword(unsigned char opcode, unsigned char exopc, unsigned char xtype, + unsigned char iaa0, unsigned char iaa1, + unsigned long idef2, unsigned char ima) +{ + return (1 << GRU_CB_ICMD_SHFT) | + ((unsigned long)CBS_ACTIVE << GRU_ISTATUS_SHFT) | + (idef2<< GRU_IDEF2_SHFT) | + (iaa0 << GRU_CB_IAA0_SHFT) | + (iaa1 << GRU_CB_IAA1_SHFT) | + (ima << GRU_CB_IMA_SHFT) | + (xtype << GRU_CB_XTYPE_SHFT) | + (opcode << GRU_CB_OPC_SHFT) | + (exopc << GRU_CB_EXOPC_SHFT); +} + +/* + * Architecture specific intrinsics + */ +static inline void gru_flush_cache(void *p) +{ + __flush_cache(p); +} + +/* + * Store the lower 64 bits of the command including the "start" bit. Then + * start the instruction executing. + */ +static inline void gru_start_instruction(struct gru_instruction *ins, unsigned long op64) +{ + gru_ordered_store_ulong(ins, op64); + mb(); + gru_flush_cache(ins); +} + + +/* Convert "hints" to IMA */ +#define CB_IMA(h) ((h) | IMA_UNMAPPED) + +/* Convert data segment cache line index into TRI0 / TRI1 value */ +#define GRU_DINDEX(i) ((i) * GRU_CACHE_LINE_BYTES) + +/* Inline functions for GRU instructions. + * Note: + * - nelem and stride are in elements + * - tri0/tri1 is in bytes for the beginning of the data segment. + */ +static inline void gru_vload_phys(void *cb, unsigned long gpa, + unsigned int tri0, int iaa, unsigned long hints) +{ + struct gru_instruction *ins = (struct gru_instruction *)cb; + + ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62); + ins->nelem = 1; + ins->op1_stride = 1; + gru_start_instruction(ins, __opdword(OP_VLOAD, 0, XTYPE_DW, iaa, 0, + (unsigned long)tri0, CB_IMA(hints))); +} + +static inline void gru_vstore_phys(void *cb, unsigned long gpa, + unsigned int tri0, int iaa, unsigned long hints) +{ + struct gru_instruction *ins = (struct gru_instruction *)cb; + + ins->baddr0 = (long)gpa | ((unsigned long)iaa << 62); + ins->nelem = 1; + ins->op1_stride = 1; + gru_start_instruction(ins, __opdword(OP_VSTORE, 0, XTYPE_DW, iaa, 0, + (unsigned long)tri0, CB_IMA(hints))); +} + +static inline void gru_vload(void *cb, unsigned long mem_addr, + unsigned int tri0, unsigned char xtype, unsigned long nelem, + unsigned long stride, unsigned long hints) +{ + struct gru_instruction *ins = (struct gru_instruction *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->op1_stride = stride; + gru_start_instruction(ins, __opdword(OP_VLOAD, 0, xtype, IAA_RAM, 0, + (unsigned long)tri0, CB_IMA(hints))); +} + +static inline void gru_vstore(void *cb, unsigned long mem_addr, + unsigned int tri0, unsigned char xtype, unsigned long nelem, + unsigned long stride, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->op1_stride = stride; + gru_start_instruction(ins, __opdword(OP_VSTORE, 0, xtype, IAA_RAM, 0, + tri0, CB_IMA(hints))); +} + +static inline void gru_ivload(void *cb, unsigned long mem_addr, + unsigned int tri0, unsigned int tri1, unsigned char xtype, + unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->tri1_bufsize = tri1; + gru_start_instruction(ins, __opdword(OP_IVLOAD, 0, xtype, IAA_RAM, 0, + tri0, CB_IMA(hints))); +} + +static inline void gru_ivstore(void *cb, unsigned long mem_addr, + unsigned int tri0, unsigned int tri1, + unsigned char xtype, unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->nelem = nelem; + ins->tri1_bufsize = tri1; + gru_start_instruction(ins, __opdword(OP_IVSTORE, 0, xtype, IAA_RAM, 0, + tri0, CB_IMA(hints))); +} + +static inline void gru_vset(void *cb, unsigned long mem_addr, + unsigned long value, unsigned char xtype, unsigned long nelem, + unsigned long stride, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->op2_value_baddr1 = value; + ins->nelem = nelem; + ins->op1_stride = stride; + gru_start_instruction(ins, __opdword(OP_VSET, 0, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_ivset(void *cb, unsigned long mem_addr, + unsigned int tri1, unsigned long value, unsigned char xtype, + unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->op2_value_baddr1 = value; + ins->nelem = nelem; + ins->tri1_bufsize = tri1; + gru_start_instruction(ins, __opdword(OP_IVSET, 0, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_vflush(void *cb, unsigned long mem_addr, + unsigned long nelem, unsigned char xtype, unsigned long stride, + unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)mem_addr; + ins->op1_stride = stride; + ins->nelem = nelem; + gru_start_instruction(ins, __opdword(OP_VFLUSH, 0, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_nop(void *cb, int hints) +{ + struct gru_instruction *ins = (void *)cb; + + gru_start_instruction(ins, __opdword(OP_NOP, 0, 0, 0, 0, 0, CB_IMA(hints))); +} + + +static inline void gru_bcopy(void *cb, const unsigned long src, + unsigned long dest, + unsigned int tri0, unsigned int xtype, unsigned long nelem, + unsigned int bufsize, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op2_value_baddr1 = (long)dest; + ins->nelem = nelem; + ins->tri1_bufsize = bufsize; + gru_start_instruction(ins, __opdword(OP_BCOPY, 0, xtype, IAA_RAM, + IAA_RAM, tri0, CB_IMA(hints))); +} + +static inline void gru_bstore(void *cb, const unsigned long src, + unsigned long dest, unsigned int tri0, unsigned int xtype, + unsigned long nelem, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op2_value_baddr1 = (long)dest; + ins->nelem = nelem; + gru_start_instruction(ins, __opdword(OP_BSTORE, 0, xtype, 0, IAA_RAM, + tri0, CB_IMA(hints))); +} + +static inline void gru_gamir(void *cb, int exopc, unsigned long src, + unsigned int xtype, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + gru_start_instruction(ins, __opdword(OP_GAMIR, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_gamirr(void *cb, int exopc, unsigned long src, + unsigned int xtype, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + gru_start_instruction(ins, __opdword(OP_GAMIRR, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_gamer(void *cb, int exopc, unsigned long src, + unsigned int xtype, + unsigned long operand1, unsigned long operand2, + unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op1_stride = operand1; + ins->op2_value_baddr1 = operand2; + gru_start_instruction(ins, __opdword(OP_GAMER, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_gamerr(void *cb, int exopc, unsigned long src, + unsigned int xtype, unsigned long operand1, + unsigned long operand2, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->op1_stride = operand1; + ins->op2_value_baddr1 = operand2; + gru_start_instruction(ins, __opdword(OP_GAMERR, exopc, xtype, IAA_RAM, 0, + 0, CB_IMA(hints))); +} + +static inline void gru_gamxr(void *cb, unsigned long src, + unsigned int tri0, unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)src; + ins->nelem = 4; + gru_start_instruction(ins, __opdword(OP_GAMXR, EOP_XR_CSWAP, XTYPE_DW, + IAA_RAM, 0, 0, CB_IMA(hints))); +} + +static inline void gru_mesq(void *cb, unsigned long queue, + unsigned long tri0, unsigned long nelem, + unsigned long hints) +{ + struct gru_instruction *ins = (void *)cb; + + ins->baddr0 = (long)queue; + ins->nelem = nelem; + gru_start_instruction(ins, __opdword(OP_MESQ, 0, XTYPE_CL, IAA_RAM, 0, + tri0, CB_IMA(hints))); +} + +static inline unsigned long gru_get_amo_value(void *cb) +{ + struct gru_instruction *ins = (void *)cb; + + return ins->avalue; +} + +static inline int gru_get_amo_value_head(void *cb) +{ + struct gru_instruction *ins = (void *)cb; + + return ins->avalue & 0xffffffff; +} + +static inline int gru_get_amo_value_limit(void *cb) +{ + struct gru_instruction *ins = (void *)cb; + + return ins->avalue >> 32; +} + +static inline union gru_mesqhead gru_mesq_head(int head, int limit) +{ + union gru_mesqhead mqh; + + mqh.head = head; + mqh.limit = limit; + return mqh; +} + +/* + * Get struct control_block_extended_exc_detail for CB. + */ +extern int gru_get_cb_exception_detail(void *cb, + struct control_block_extended_exc_detail *excdet); + +#define GRU_EXC_STR_SIZE 256 + + +/* + * Control block definition for checking status + */ +struct gru_control_block_status { + unsigned int icmd :1; + unsigned int ima :3; + unsigned int reserved0 :4; + unsigned int unused1 :24; + unsigned int unused2 :24; + unsigned int istatus :2; + unsigned int isubstatus :4; + unsigned int unused3 :2; +}; + +/* Get CB status */ +static inline int gru_get_cb_status(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + return cbs->istatus; +} + +/* Get CB message queue substatus */ +static inline int gru_get_cb_message_queue_substatus(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + return cbs->isubstatus & CBSS_MSG_QUEUE_MASK; +} + +/* Get CB substatus */ +static inline int gru_get_cb_substatus(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + + return cbs->isubstatus; +} + +/* + * User interface to check an instruction status. UPM and exceptions + * are handled automatically. However, this function does NOT wait + * for an active instruction to complete. + * + */ +static inline int gru_check_status(void *cb) +{ + struct gru_control_block_status *cbs = (void *)cb; + int ret; + + ret = cbs->istatus; + if (ret != CBS_ACTIVE) + ret = gru_check_status_proc(cb); + return ret; +} + +/* + * User interface (via inline function) to wait for an instruction + * to complete. Completion status (IDLE or EXCEPTION is returned + * to the user. Exception due to hardware errors are automatically + * retried before returning an exception. + * + */ +static inline int gru_wait(void *cb) +{ + return gru_wait_proc(cb); +} + +/* + * Wait for CB to complete. Aborts program if error. (Note: error does NOT + * mean TLB mis - only fatal errors such as memory parity error or user + * bugs will cause termination. + */ +static inline void gru_wait_abort(void *cb) +{ + gru_wait_abort_proc(cb); +} + +/* + * Get a pointer to the start of a gseg + * p - Any valid pointer within the gseg + */ +static inline void *gru_get_gseg_pointer (void *p) +{ + return (void *)((unsigned long)p & ~(GRU_GSEG_PAGESIZE - 1)); +} + +/* + * Get a pointer to a control block + * gseg - GSeg address returned from gru_get_thread_gru_segment() + * index - index of desired CB + */ +static inline void *gru_get_cb_pointer(void *gseg, + int index) +{ + return gseg + GRU_CB_BASE + index * GRU_HANDLE_STRIDE; +} + +/* + * Get a pointer to a cacheline in the data segment portion of a GSeg + * gseg - GSeg address returned from gru_get_thread_gru_segment() + * index - index of desired cache line + */ +static inline void *gru_get_data_pointer(void *gseg, int index) +{ + return gseg + GRU_DS_BASE + index * GRU_CACHE_LINE_BYTES; +} + +/* + * Convert a vaddr into the tri index within the GSEG + * vaddr - virtual address of within gseg + */ +static inline int gru_get_tri(void *vaddr) +{ + return ((unsigned long)vaddr & (GRU_GSEG_PAGESIZE - 1)) - GRU_DS_BASE; +} +#endif /* __GRU_INSTRUCTIONS_H__ */ diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c new file mode 100644 index 000000000..93be82fc3 --- /dev/null +++ b/drivers/misc/sgi-gru/grufault.c @@ -0,0 +1,907 @@ +/* + * SN Platform GRU Driver + * + * FAULT HANDLER FOR GRU DETECTED TLB MISSES + * + * This file contains code that handles TLB misses within the GRU. + * These misses are reported either via interrupts or user polling of + * the user CB. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "gru.h" +#include "grutables.h" +#include "grulib.h" +#include "gru_instructions.h" +#include + +/* Return codes for vtop functions */ +#define VTOP_SUCCESS 0 +#define VTOP_INVALID -1 +#define VTOP_RETRY -2 + + +/* + * Test if a physical address is a valid GRU GSEG address + */ +static inline int is_gru_paddr(unsigned long paddr) +{ + return paddr >= gru_start_paddr && paddr < gru_end_paddr; +} + +/* + * Find the vma of a GRU segment. Caller must hold mmap_sem. + */ +struct vm_area_struct *gru_find_vma(unsigned long vaddr) +{ + struct vm_area_struct *vma; + + vma = find_vma(current->mm, vaddr); + if (vma && vma->vm_start <= vaddr && vma->vm_ops == &gru_vm_ops) + return vma; + return NULL; +} + +/* + * Find and lock the gts that contains the specified user vaddr. + * + * Returns: + * - *gts with the mmap_sem locked for read and the GTS locked. + * - NULL if vaddr invalid OR is not a valid GSEG vaddr. + */ + +static struct gru_thread_state *gru_find_lock_gts(unsigned long vaddr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct gru_thread_state *gts = NULL; + + down_read(&mm->mmap_sem); + vma = gru_find_vma(vaddr); + if (vma) + gts = gru_find_thread_state(vma, TSID(vaddr, vma)); + if (gts) + mutex_lock(>s->ts_ctxlock); + else + up_read(&mm->mmap_sem); + return gts; +} + +static struct gru_thread_state *gru_alloc_locked_gts(unsigned long vaddr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct gru_thread_state *gts = ERR_PTR(-EINVAL); + + down_write(&mm->mmap_sem); + vma = gru_find_vma(vaddr); + if (!vma) + goto err; + + gts = gru_alloc_thread_state(vma, TSID(vaddr, vma)); + if (IS_ERR(gts)) + goto err; + mutex_lock(>s->ts_ctxlock); + downgrade_write(&mm->mmap_sem); + return gts; + +err: + up_write(&mm->mmap_sem); + return gts; +} + +/* + * Unlock a GTS that was previously locked with gru_find_lock_gts(). + */ +static void gru_unlock_gts(struct gru_thread_state *gts) +{ + mutex_unlock(>s->ts_ctxlock); + up_read(¤t->mm->mmap_sem); +} + +/* + * Set a CB.istatus to active using a user virtual address. This must be done + * just prior to a TFH RESTART. The new cb.istatus is an in-cache status ONLY. + * If the line is evicted, the status may be lost. The in-cache update + * is necessary to prevent the user from seeing a stale cb.istatus that will + * change as soon as the TFH restart is complete. Races may cause an + * occasional failure to clear the cb.istatus, but that is ok. + */ +static void gru_cb_set_istatus_active(struct gru_instruction_bits *cbk) +{ + if (cbk) { + cbk->istatus = CBS_ACTIVE; + } +} + +/* + * Read & clear a TFM + * + * The GRU has an array of fault maps. A map is private to a cpu + * Only one cpu will be accessing a cpu's fault map. + * + * This function scans the cpu-private fault map & clears all bits that + * are set. The function returns a bitmap that indicates the bits that + * were cleared. Note that sense the maps may be updated asynchronously by + * the GRU, atomic operations must be used to clear bits. + */ +static void get_clear_fault_map(struct gru_state *gru, + struct gru_tlb_fault_map *imap, + struct gru_tlb_fault_map *dmap) +{ + unsigned long i, k; + struct gru_tlb_fault_map *tfm; + + tfm = get_tfm_for_cpu(gru, gru_cpu_fault_map_id()); + prefetchw(tfm); /* Helps on hardware, required for emulator */ + for (i = 0; i < BITS_TO_LONGS(GRU_NUM_CBE); i++) { + k = tfm->fault_bits[i]; + if (k) + k = xchg(&tfm->fault_bits[i], 0UL); + imap->fault_bits[i] = k; + k = tfm->done_bits[i]; + if (k) + k = xchg(&tfm->done_bits[i], 0UL); + dmap->fault_bits[i] = k; + } + + /* + * Not functionally required but helps performance. (Required + * on emulator) + */ + gru_flush_cache(tfm); +} + +/* + * Atomic (interrupt context) & non-atomic (user context) functions to + * convert a vaddr into a physical address. The size of the page + * is returned in pageshift. + * returns: + * 0 - successful + * < 0 - error code + * 1 - (atomic only) try again in non-atomic context + */ +static int non_atomic_pte_lookup(struct vm_area_struct *vma, + unsigned long vaddr, int write, + unsigned long *paddr, int *pageshift) +{ + struct page *page; + +#ifdef CONFIG_HUGETLB_PAGE + *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT; +#else + *pageshift = PAGE_SHIFT; +#endif + if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0) + return -EFAULT; + *paddr = page_to_phys(page); + put_page(page); + return 0; +} + +/* + * atomic_pte_lookup + * + * Convert a user virtual address to a physical address + * Only supports Intel large pages (2MB only) on x86_64. + * ZZZ - hugepage support is incomplete + * + * NOTE: mmap_sem is already held on entry to this function. This + * guarantees existence of the page tables. + */ +static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr, + int write, unsigned long *paddr, int *pageshift) +{ + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t pte; + + pgdp = pgd_offset(vma->vm_mm, vaddr); + if (unlikely(pgd_none(*pgdp))) + goto err; + + p4dp = p4d_offset(pgdp, vaddr); + if (unlikely(p4d_none(*p4dp))) + goto err; + + pudp = pud_offset(p4dp, vaddr); + if (unlikely(pud_none(*pudp))) + goto err; + + pmdp = pmd_offset(pudp, vaddr); + if (unlikely(pmd_none(*pmdp))) + goto err; +#ifdef CONFIG_X86_64 + if (unlikely(pmd_large(*pmdp))) + pte = *(pte_t *) pmdp; + else +#endif + pte = *pte_offset_kernel(pmdp, vaddr); + + if (unlikely(!pte_present(pte) || + (write && (!pte_write(pte) || !pte_dirty(pte))))) + return 1; + + *paddr = pte_pfn(pte) << PAGE_SHIFT; +#ifdef CONFIG_HUGETLB_PAGE + *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT; +#else + *pageshift = PAGE_SHIFT; +#endif + return 0; + +err: + return 1; +} + +static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr, + int write, int atomic, unsigned long *gpa, int *pageshift) +{ + struct mm_struct *mm = gts->ts_mm; + struct vm_area_struct *vma; + unsigned long paddr; + int ret, ps; + + vma = find_vma(mm, vaddr); + if (!vma) + goto inval; + + /* + * Atomic lookup is faster & usually works even if called in non-atomic + * context. + */ + rmb(); /* Must/check ms_range_active before loading PTEs */ + ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &ps); + if (ret) { + if (atomic) + goto upm; + if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, &ps)) + goto inval; + } + if (is_gru_paddr(paddr)) + goto inval; + paddr = paddr & ~((1UL << ps) - 1); + *gpa = uv_soc_phys_ram_to_gpa(paddr); + *pageshift = ps; + return VTOP_SUCCESS; + +inval: + return VTOP_INVALID; +upm: + return VTOP_RETRY; +} + + +/* + * Flush a CBE from cache. The CBE is clean in the cache. Dirty the + * CBE cacheline so that the line will be written back to home agent. + * Otherwise the line may be silently dropped. This has no impact + * except on performance. + */ +static void gru_flush_cache_cbe(struct gru_control_block_extended *cbe) +{ + if (unlikely(cbe)) { + cbe->cbrexecstatus = 0; /* make CL dirty */ + gru_flush_cache(cbe); + } +} + +/* + * Preload the TLB with entries that may be required. Currently, preloading + * is implemented only for BCOPY. Preload pages OR to + * the end of the bcopy tranfer, whichever is smaller. + */ +static void gru_preload_tlb(struct gru_state *gru, + struct gru_thread_state *gts, int atomic, + unsigned long fault_vaddr, int asid, int write, + unsigned char tlb_preload_count, + struct gru_tlb_fault_handle *tfh, + struct gru_control_block_extended *cbe) +{ + unsigned long vaddr = 0, gpa; + int ret, pageshift; + + if (cbe->opccpy != OP_BCOPY) + return; + + if (fault_vaddr == cbe->cbe_baddr0) + vaddr = fault_vaddr + GRU_CACHE_LINE_BYTES * cbe->cbe_src_cl - 1; + else if (fault_vaddr == cbe->cbe_baddr1) + vaddr = fault_vaddr + (1 << cbe->xtypecpy) * cbe->cbe_nelemcur - 1; + + fault_vaddr &= PAGE_MASK; + vaddr &= PAGE_MASK; + vaddr = min(vaddr, fault_vaddr + tlb_preload_count * PAGE_SIZE); + + while (vaddr > fault_vaddr) { + ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); + if (ret || tfh_write_only(tfh, gpa, GAA_RAM, vaddr, asid, write, + GRU_PAGESIZE(pageshift))) + return; + gru_dbg(grudev, + "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, rw %d, ps %d, gpa 0x%lx\n", + atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, + vaddr, asid, write, pageshift, gpa); + vaddr -= PAGE_SIZE; + STAT(tlb_preload_page); + } +} + +/* + * Drop a TLB entry into the GRU. The fault is described by info in an TFH. + * Input: + * cb Address of user CBR. Null if not running in user context + * Return: + * 0 = dropin, exception, or switch to UPM successful + * 1 = range invalidate active + * < 0 = error code + * + */ +static int gru_try_dropin(struct gru_state *gru, + struct gru_thread_state *gts, + struct gru_tlb_fault_handle *tfh, + struct gru_instruction_bits *cbk) +{ + struct gru_control_block_extended *cbe = NULL; + unsigned char tlb_preload_count = gts->ts_tlb_preload_count; + int pageshift = 0, asid, write, ret, atomic = !cbk, indexway; + unsigned long gpa = 0, vaddr = 0; + + /* + * NOTE: The GRU contains magic hardware that eliminates races between + * TLB invalidates and TLB dropins. If an invalidate occurs + * in the window between reading the TFH and the subsequent TLB dropin, + * the dropin is ignored. This eliminates the need for additional locks. + */ + + /* + * Prefetch the CBE if doing TLB preloading + */ + if (unlikely(tlb_preload_count)) { + cbe = gru_tfh_to_cbe(tfh); + prefetchw(cbe); + } + + /* + * Error if TFH state is IDLE or FMM mode & the user issuing a UPM call. + * Might be a hardware race OR a stupid user. Ignore FMM because FMM + * is a transient state. + */ + if (tfh->status != TFHSTATUS_EXCEPTION) { + gru_flush_cache(tfh); + sync_core(); + if (tfh->status != TFHSTATUS_EXCEPTION) + goto failnoexception; + STAT(tfh_stale_on_fault); + } + if (tfh->state == TFHSTATE_IDLE) + goto failidle; + if (tfh->state == TFHSTATE_MISS_FMM && cbk) + goto failfmm; + + write = (tfh->cause & TFHCAUSE_TLB_MOD) != 0; + vaddr = tfh->missvaddr; + asid = tfh->missasid; + indexway = tfh->indexway; + if (asid == 0) + goto failnoasid; + + rmb(); /* TFH must be cache resident before reading ms_range_active */ + + /* + * TFH is cache resident - at least briefly. Fail the dropin + * if a range invalidate is active. + */ + if (atomic_read(>s->ts_gms->ms_range_active)) + goto failactive; + + ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); + if (ret == VTOP_INVALID) + goto failinval; + if (ret == VTOP_RETRY) + goto failupm; + + if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) { + gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift); + if (atomic || !gru_update_cch(gts)) { + gts->ts_force_cch_reload = 1; + goto failupm; + } + } + + if (unlikely(cbe) && pageshift == PAGE_SHIFT) { + gru_preload_tlb(gru, gts, atomic, vaddr, asid, write, tlb_preload_count, tfh, cbe); + gru_flush_cache_cbe(cbe); + } + + gru_cb_set_istatus_active(cbk); + gts->ustats.tlbdropin++; + tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write, + GRU_PAGESIZE(pageshift)); + gru_dbg(grudev, + "%s: gid %d, gts 0x%p, tfh 0x%p, vaddr 0x%lx, asid 0x%x, indexway 0x%x," + " rw %d, ps %d, gpa 0x%lx\n", + atomic ? "atomic" : "non-atomic", gru->gs_gid, gts, tfh, vaddr, asid, + indexway, write, pageshift, gpa); + STAT(tlb_dropin); + return 0; + +failnoasid: + /* No asid (delayed unload). */ + STAT(tlb_dropin_fail_no_asid); + gru_dbg(grudev, "FAILED no_asid tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); + if (!cbk) + tfh_user_polling_mode(tfh); + else + gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); + return -EAGAIN; + +failupm: + /* Atomic failure switch CBR to UPM */ + tfh_user_polling_mode(tfh); + gru_flush_cache_cbe(cbe); + STAT(tlb_dropin_fail_upm); + gru_dbg(grudev, "FAILED upm tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); + return 1; + +failfmm: + /* FMM state on UPM call */ + gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); + STAT(tlb_dropin_fail_fmm); + gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state); + return 0; + +failnoexception: + /* TFH status did not show exception pending */ + gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); + if (cbk) + gru_flush_cache(cbk); + STAT(tlb_dropin_fail_no_exception); + gru_dbg(grudev, "FAILED non-exception tfh: 0x%p, status %d, state %d\n", + tfh, tfh->status, tfh->state); + return 0; + +failidle: + /* TFH state was idle - no miss pending */ + gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); + if (cbk) + gru_flush_cache(cbk); + STAT(tlb_dropin_fail_idle); + gru_dbg(grudev, "FAILED idle tfh: 0x%p, state %d\n", tfh, tfh->state); + return 0; + +failinval: + /* All errors (atomic & non-atomic) switch CBR to EXCEPTION state */ + tfh_exception(tfh); + gru_flush_cache_cbe(cbe); + STAT(tlb_dropin_fail_invalid); + gru_dbg(grudev, "FAILED inval tfh: 0x%p, vaddr 0x%lx\n", tfh, vaddr); + return -EFAULT; + +failactive: + /* Range invalidate active. Switch to UPM iff atomic */ + if (!cbk) + tfh_user_polling_mode(tfh); + else + gru_flush_cache(tfh); + gru_flush_cache_cbe(cbe); + STAT(tlb_dropin_fail_range_active); + gru_dbg(grudev, "FAILED range active: tfh 0x%p, vaddr 0x%lx\n", + tfh, vaddr); + return 1; +} + +/* + * Process an external interrupt from the GRU. This interrupt is + * caused by a TLB miss. + * Note that this is the interrupt handler that is registered with linux + * interrupt handlers. + */ +static irqreturn_t gru_intr(int chiplet, int blade) +{ + struct gru_state *gru; + struct gru_tlb_fault_map imap, dmap; + struct gru_thread_state *gts; + struct gru_tlb_fault_handle *tfh = NULL; + struct completion *cmp; + int cbrnum, ctxnum; + + STAT(intr); + + gru = &gru_base[blade]->bs_grus[chiplet]; + if (!gru) { + dev_err(grudev, "GRU: invalid interrupt: cpu %d, chiplet %d\n", + raw_smp_processor_id(), chiplet); + return IRQ_NONE; + } + get_clear_fault_map(gru, &imap, &dmap); + gru_dbg(grudev, + "cpu %d, chiplet %d, gid %d, imap %016lx %016lx, dmap %016lx %016lx\n", + smp_processor_id(), chiplet, gru->gs_gid, + imap.fault_bits[0], imap.fault_bits[1], + dmap.fault_bits[0], dmap.fault_bits[1]); + + for_each_cbr_in_tfm(cbrnum, dmap.fault_bits) { + STAT(intr_cbr); + cmp = gru->gs_blade->bs_async_wq; + if (cmp) + complete(cmp); + gru_dbg(grudev, "gid %d, cbr_done %d, done %d\n", + gru->gs_gid, cbrnum, cmp ? cmp->done : -1); + } + + for_each_cbr_in_tfm(cbrnum, imap.fault_bits) { + STAT(intr_tfh); + tfh = get_tfh_by_index(gru, cbrnum); + prefetchw(tfh); /* Helps on hdw, required for emulator */ + + /* + * When hardware sets a bit in the faultmap, it implicitly + * locks the GRU context so that it cannot be unloaded. + * The gts cannot change until a TFH start/writestart command + * is issued. + */ + ctxnum = tfh->ctxnum; + gts = gru->gs_gts[ctxnum]; + + /* Spurious interrupts can cause this. Ignore. */ + if (!gts) { + STAT(intr_spurious); + continue; + } + + /* + * This is running in interrupt context. Trylock the mmap_sem. + * If it fails, retry the fault in user context. + */ + gts->ustats.fmm_tlbmiss++; + if (!gts->ts_force_cch_reload && + down_read_trylock(>s->ts_mm->mmap_sem)) { + gru_try_dropin(gru, gts, tfh, NULL); + up_read(>s->ts_mm->mmap_sem); + } else { + tfh_user_polling_mode(tfh); + STAT(intr_mm_lock_failed); + } + } + return IRQ_HANDLED; +} + +irqreturn_t gru0_intr(int irq, void *dev_id) +{ + return gru_intr(0, uv_numa_blade_id()); +} + +irqreturn_t gru1_intr(int irq, void *dev_id) +{ + return gru_intr(1, uv_numa_blade_id()); +} + +irqreturn_t gru_intr_mblade(int irq, void *dev_id) +{ + int blade; + + for_each_possible_blade(blade) { + if (uv_blade_nr_possible_cpus(blade)) + continue; + gru_intr(0, blade); + gru_intr(1, blade); + } + return IRQ_HANDLED; +} + + +static int gru_user_dropin(struct gru_thread_state *gts, + struct gru_tlb_fault_handle *tfh, + void *cb) +{ + struct gru_mm_struct *gms = gts->ts_gms; + int ret; + + gts->ustats.upm_tlbmiss++; + while (1) { + wait_event(gms->ms_wait_queue, + atomic_read(&gms->ms_range_active) == 0); + prefetchw(tfh); /* Helps on hdw, required for emulator */ + ret = gru_try_dropin(gts->ts_gru, gts, tfh, cb); + if (ret <= 0) + return ret; + STAT(call_os_wait_queue); + } +} + +/* + * This interface is called as a result of a user detecting a "call OS" bit + * in a user CB. Normally means that a TLB fault has occurred. + * cb - user virtual address of the CB + */ +int gru_handle_user_call_os(unsigned long cb) +{ + struct gru_tlb_fault_handle *tfh; + struct gru_thread_state *gts; + void *cbk; + int ucbnum, cbrnum, ret = -EINVAL; + + STAT(call_os); + + /* sanity check the cb pointer */ + ucbnum = get_cb_number((void *)cb); + if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) + return -EINVAL; + + gts = gru_find_lock_gts(cb); + if (!gts) + return -EINVAL; + gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts); + + if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) + goto exit; + + gru_check_context_placement(gts); + + /* + * CCH may contain stale data if ts_force_cch_reload is set. + */ + if (gts->ts_gru && gts->ts_force_cch_reload) { + gts->ts_force_cch_reload = 0; + gru_update_cch(gts); + } + + ret = -EAGAIN; + cbrnum = thread_cbr_number(gts, ucbnum); + if (gts->ts_gru) { + tfh = get_tfh_by_index(gts->ts_gru, cbrnum); + cbk = get_gseg_base_address_cb(gts->ts_gru->gs_gru_base_vaddr, + gts->ts_ctxnum, ucbnum); + ret = gru_user_dropin(gts, tfh, cbk); + } +exit: + gru_unlock_gts(gts); + return ret; +} + +/* + * Fetch the exception detail information for a CB that terminated with + * an exception. + */ +int gru_get_exception_detail(unsigned long arg) +{ + struct control_block_extended_exc_detail excdet; + struct gru_control_block_extended *cbe; + struct gru_thread_state *gts; + int ucbnum, cbrnum, ret; + + STAT(user_exception); + if (copy_from_user(&excdet, (void __user *)arg, sizeof(excdet))) + return -EFAULT; + + gts = gru_find_lock_gts(excdet.cb); + if (!gts) + return -EINVAL; + + gru_dbg(grudev, "address 0x%lx, gid %d, gts 0x%p\n", excdet.cb, gts->ts_gru ? gts->ts_gru->gs_gid : -1, gts); + ucbnum = get_cb_number((void *)excdet.cb); + if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) { + ret = -EINVAL; + } else if (gts->ts_gru) { + cbrnum = thread_cbr_number(gts, ucbnum); + cbe = get_cbe_by_index(gts->ts_gru, cbrnum); + gru_flush_cache(cbe); /* CBE not coherent */ + sync_core(); /* make sure we are have current data */ + excdet.opc = cbe->opccpy; + excdet.exopc = cbe->exopccpy; + excdet.ecause = cbe->ecause; + excdet.exceptdet0 = cbe->idef1upd; + excdet.exceptdet1 = cbe->idef3upd; + excdet.cbrstate = cbe->cbrstate; + excdet.cbrexecstatus = cbe->cbrexecstatus; + gru_flush_cache_cbe(cbe); + ret = 0; + } else { + ret = -EAGAIN; + } + gru_unlock_gts(gts); + + gru_dbg(grudev, + "cb 0x%lx, op %d, exopc %d, cbrstate %d, cbrexecstatus 0x%x, ecause 0x%x, " + "exdet0 0x%lx, exdet1 0x%x\n", + excdet.cb, excdet.opc, excdet.exopc, excdet.cbrstate, excdet.cbrexecstatus, + excdet.ecause, excdet.exceptdet0, excdet.exceptdet1); + if (!ret && copy_to_user((void __user *)arg, &excdet, sizeof(excdet))) + ret = -EFAULT; + return ret; +} + +/* + * User request to unload a context. Content is saved for possible reload. + */ +static int gru_unload_all_contexts(void) +{ + struct gru_thread_state *gts; + struct gru_state *gru; + int gid, ctxnum; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + foreach_gid(gid) { + gru = GID_TO_GRU(gid); + spin_lock(&gru->gs_lock); + for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) { + gts = gru->gs_gts[ctxnum]; + if (gts && mutex_trylock(>s->ts_ctxlock)) { + spin_unlock(&gru->gs_lock); + gru_unload_context(gts, 1); + mutex_unlock(>s->ts_ctxlock); + spin_lock(&gru->gs_lock); + } + } + spin_unlock(&gru->gs_lock); + } + return 0; +} + +int gru_user_unload_context(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_unload_context_req req; + + STAT(user_unload_context); + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + gru_dbg(grudev, "gseg 0x%lx\n", req.gseg); + + if (!req.gseg) + return gru_unload_all_contexts(); + + gts = gru_find_lock_gts(req.gseg); + if (!gts) + return -EINVAL; + + if (gts->ts_gru) + gru_unload_context(gts, 1); + gru_unlock_gts(gts); + + return 0; +} + +/* + * User request to flush a range of virtual addresses from the GRU TLB + * (Mainly for testing). + */ +int gru_user_flush_tlb(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_flush_tlb_req req; + struct gru_mm_struct *gms; + + STAT(user_flush_tlb); + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + gru_dbg(grudev, "gseg 0x%lx, vaddr 0x%lx, len 0x%lx\n", req.gseg, + req.vaddr, req.len); + + gts = gru_find_lock_gts(req.gseg); + if (!gts) + return -EINVAL; + + gms = gts->ts_gms; + gru_unlock_gts(gts); + gru_flush_tlb_range(gms, req.vaddr, req.len); + + return 0; +} + +/* + * Fetch GSEG statisticss + */ +long gru_get_gseg_statistics(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_get_gseg_statistics_req req; + + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + /* + * The library creates arrays of contexts for threaded programs. + * If no gts exists in the array, the context has never been used & all + * statistics are implicitly 0. + */ + gts = gru_find_lock_gts(req.gseg); + if (gts) { + memcpy(&req.stats, >s->ustats, sizeof(gts->ustats)); + gru_unlock_gts(gts); + } else { + memset(&req.stats, 0, sizeof(gts->ustats)); + } + + if (copy_to_user((void __user *)arg, &req, sizeof(req))) + return -EFAULT; + + return 0; +} + +/* + * Register the current task as the user of the GSEG slice. + * Needed for TLB fault interrupt targeting. + */ +int gru_set_context_option(unsigned long arg) +{ + struct gru_thread_state *gts; + struct gru_set_context_option_req req; + int ret = 0; + + STAT(set_context_option); + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + gru_dbg(grudev, "op %d, gseg 0x%lx, value1 0x%lx\n", req.op, req.gseg, req.val1); + + gts = gru_find_lock_gts(req.gseg); + if (!gts) { + gts = gru_alloc_locked_gts(req.gseg); + if (IS_ERR(gts)) + return PTR_ERR(gts); + } + + switch (req.op) { + case sco_blade_chiplet: + /* Select blade/chiplet for GRU context */ + if (req.val0 < -1 || req.val0 >= GRU_CHIPLETS_PER_HUB || + req.val1 < -1 || req.val1 >= GRU_MAX_BLADES || + (req.val1 >= 0 && !gru_base[req.val1])) { + ret = -EINVAL; + } else { + gts->ts_user_blade_id = req.val1; + gts->ts_user_chiplet_id = req.val0; + gru_check_context_placement(gts); + } + break; + case sco_gseg_owner: + /* Register the current task as the GSEG owner */ + gts->ts_tgid_owner = current->tgid; + break; + case sco_cch_req_slice: + /* Set the CCH slice option */ + gts->ts_cch_req_slice = req.val1 & 3; + break; + default: + ret = -EINVAL; + } + gru_unlock_gts(gts); + + return ret; +} diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c new file mode 100644 index 000000000..104a05f6b --- /dev/null +++ b/drivers/misc/sgi-gru/grufile.c @@ -0,0 +1,623 @@ +/* + * SN Platform GRU Driver + * + * FILE OPERATIONS & DRIVER INITIALIZATION + * + * This file supports the user system call for file open, close, mmap, etc. + * This also incudes the driver initialization code. + * + * Copyright (c) 2008-2014 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_X86_64 +#include +#endif +#include +#include "gru.h" +#include "grulib.h" +#include "grutables.h" + +#include +#include + +struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly; +unsigned long gru_start_paddr __read_mostly; +void *gru_start_vaddr __read_mostly; +unsigned long gru_end_paddr __read_mostly; +unsigned int gru_max_gids __read_mostly; +struct gru_stats_s gru_stats; + +/* Guaranteed user available resources on each node */ +static int max_user_cbrs, max_user_dsr_bytes; + +static struct miscdevice gru_miscdev; + +static int gru_supported(void) +{ + return is_uv_system() && + (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE); +} + +/* + * gru_vma_close + * + * Called when unmapping a device mapping. Frees all gru resources + * and tables belonging to the vma. + */ +static void gru_vma_close(struct vm_area_struct *vma) +{ + struct gru_vma_data *vdata; + struct gru_thread_state *gts; + struct list_head *entry, *next; + + if (!vma->vm_private_data) + return; + + vdata = vma->vm_private_data; + vma->vm_private_data = NULL; + gru_dbg(grudev, "vma %p, file %p, vdata %p\n", vma, vma->vm_file, + vdata); + list_for_each_safe(entry, next, &vdata->vd_head) { + gts = + list_entry(entry, struct gru_thread_state, ts_next); + list_del(>s->ts_next); + mutex_lock(>s->ts_ctxlock); + if (gts->ts_gru) + gru_unload_context(gts, 0); + mutex_unlock(>s->ts_ctxlock); + gts_drop(gts); + } + kfree(vdata); + STAT(vdata_free); +} + +/* + * gru_file_mmap + * + * Called when mmapping the device. Initializes the vma with a fault handler + * and private data structure necessary to allocate, track, and free the + * underlying pages. + */ +static int gru_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE)) + return -EPERM; + + if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) || + vma->vm_end & (GRU_GSEG_PAGESIZE - 1)) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_LOCKED | + VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_page_prot = PAGE_SHARED; + vma->vm_ops = &gru_vm_ops; + + vma->vm_private_data = gru_alloc_vma_data(vma, 0); + if (!vma->vm_private_data) + return -ENOMEM; + + gru_dbg(grudev, "file %p, vaddr 0x%lx, vma %p, vdata %p\n", + file, vma->vm_start, vma, vma->vm_private_data); + return 0; +} + +/* + * Create a new GRU context + */ +static int gru_create_new_context(unsigned long arg) +{ + struct gru_create_context_req req; + struct vm_area_struct *vma; + struct gru_vma_data *vdata; + int ret = -EINVAL; + + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + if (req.data_segment_bytes > max_user_dsr_bytes) + return -EINVAL; + if (req.control_blocks > max_user_cbrs || !req.maximum_thread_count) + return -EINVAL; + + if (!(req.options & GRU_OPT_MISS_MASK)) + req.options |= GRU_OPT_MISS_FMM_INTR; + + down_write(¤t->mm->mmap_sem); + vma = gru_find_vma(req.gseg); + if (vma) { + vdata = vma->vm_private_data; + vdata->vd_user_options = req.options; + vdata->vd_dsr_au_count = + GRU_DS_BYTES_TO_AU(req.data_segment_bytes); + vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks); + vdata->vd_tlb_preload_count = req.tlb_preload_count; + ret = 0; + } + up_write(¤t->mm->mmap_sem); + + return ret; +} + +/* + * Get GRU configuration info (temp - for emulator testing) + */ +static long gru_get_config_info(unsigned long arg) +{ + struct gru_config_info info; + int nodesperblade; + + if (num_online_nodes() > 1 && + (uv_node_to_blade_id(1) == uv_node_to_blade_id(0))) + nodesperblade = 2; + else + nodesperblade = 1; + memset(&info, 0, sizeof(info)); + info.cpus = num_online_cpus(); + info.nodes = num_online_nodes(); + info.blades = info.nodes / nodesperblade; + info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades; + + if (copy_to_user((void __user *)arg, &info, sizeof(info))) + return -EFAULT; + return 0; +} + +/* + * gru_file_unlocked_ioctl + * + * Called to update file attributes via IOCTL calls. + */ +static long gru_file_unlocked_ioctl(struct file *file, unsigned int req, + unsigned long arg) +{ + int err = -EBADRQC; + + gru_dbg(grudev, "file %p, req 0x%x, 0x%lx\n", file, req, arg); + + switch (req) { + case GRU_CREATE_CONTEXT: + err = gru_create_new_context(arg); + break; + case GRU_SET_CONTEXT_OPTION: + err = gru_set_context_option(arg); + break; + case GRU_USER_GET_EXCEPTION_DETAIL: + err = gru_get_exception_detail(arg); + break; + case GRU_USER_UNLOAD_CONTEXT: + err = gru_user_unload_context(arg); + break; + case GRU_USER_FLUSH_TLB: + err = gru_user_flush_tlb(arg); + break; + case GRU_USER_CALL_OS: + err = gru_handle_user_call_os(arg); + break; + case GRU_GET_GSEG_STATISTICS: + err = gru_get_gseg_statistics(arg); + break; + case GRU_KTEST: + err = gru_ktest(arg); + break; + case GRU_GET_CONFIG_INFO: + err = gru_get_config_info(arg); + break; + case GRU_DUMP_CHIPLET_STATE: + err = gru_dump_chiplet_request(arg); + break; + } + return err; +} + +/* + * Called at init time to build tables for all GRUs that are present in the + * system. + */ +static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr, + void *vaddr, int blade_id, int chiplet_id) +{ + spin_lock_init(&gru->gs_lock); + spin_lock_init(&gru->gs_asid_lock); + gru->gs_gru_base_paddr = paddr; + gru->gs_gru_base_vaddr = vaddr; + gru->gs_gid = blade_id * GRU_CHIPLETS_PER_BLADE + chiplet_id; + gru->gs_blade = gru_base[blade_id]; + gru->gs_blade_id = blade_id; + gru->gs_chiplet_id = chiplet_id; + gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1; + gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1; + gru->gs_asid_limit = MAX_ASID; + gru_tgh_flush_init(gru); + if (gru->gs_gid >= gru_max_gids) + gru_max_gids = gru->gs_gid + 1; + gru_dbg(grudev, "bid %d, gid %d, vaddr %p (0x%lx)\n", + blade_id, gru->gs_gid, gru->gs_gru_base_vaddr, + gru->gs_gru_base_paddr); +} + +static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) +{ + int pnode, nid, bid, chip; + int cbrs, dsrbytes, n; + int order = get_order(sizeof(struct gru_blade_state)); + struct page *page; + struct gru_state *gru; + unsigned long paddr; + void *vaddr; + + max_user_cbrs = GRU_NUM_CB; + max_user_dsr_bytes = GRU_NUM_DSR_BYTES; + for_each_possible_blade(bid) { + pnode = uv_blade_to_pnode(bid); + nid = uv_blade_to_memory_nid(bid);/* -1 if no memory on blade */ + page = alloc_pages_node(nid, GFP_KERNEL, order); + if (!page) + goto fail; + gru_base[bid] = page_address(page); + memset(gru_base[bid], 0, sizeof(struct gru_blade_state)); + gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0]; + spin_lock_init(&gru_base[bid]->bs_lock); + init_rwsem(&gru_base[bid]->bs_kgts_sema); + + dsrbytes = 0; + cbrs = 0; + for (gru = gru_base[bid]->bs_grus, chip = 0; + chip < GRU_CHIPLETS_PER_BLADE; + chip++, gru++) { + paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip); + vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip); + gru_init_chiplet(gru, paddr, vaddr, bid, chip); + n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; + cbrs = max(cbrs, n); + n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; + dsrbytes = max(dsrbytes, n); + } + max_user_cbrs = min(max_user_cbrs, cbrs); + max_user_dsr_bytes = min(max_user_dsr_bytes, dsrbytes); + } + + return 0; + +fail: + for (bid--; bid >= 0; bid--) + free_pages((unsigned long)gru_base[bid], order); + return -ENOMEM; +} + +static void gru_free_tables(void) +{ + int bid; + int order = get_order(sizeof(struct gru_state) * + GRU_CHIPLETS_PER_BLADE); + + for (bid = 0; bid < GRU_MAX_BLADES; bid++) + free_pages((unsigned long)gru_base[bid], order); +} + +static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep) +{ + unsigned long mmr = 0; + int core; + + /* + * We target the cores of a blade and not the hyperthreads themselves. + * There is a max of 8 cores per socket and 2 sockets per blade, + * making for a max total of 16 cores (i.e., 16 CPUs without + * hyperthreading and 32 CPUs with hyperthreading). + */ + core = uv_cpu_core_number(cpu) + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); + if (core >= GRU_NUM_TFM || uv_cpu_ht_number(cpu)) + return 0; + + if (chiplet == 0) { + mmr = UVH_GR0_TLB_INT0_CONFIG + + core * (UVH_GR0_TLB_INT1_CONFIG - UVH_GR0_TLB_INT0_CONFIG); + } else if (chiplet == 1) { + mmr = UVH_GR1_TLB_INT0_CONFIG + + core * (UVH_GR1_TLB_INT1_CONFIG - UVH_GR1_TLB_INT0_CONFIG); + } else { + BUG(); + } + + *corep = core; + return mmr; +} + +#ifdef CONFIG_IA64 + +static int gru_irq_count[GRU_CHIPLETS_PER_BLADE]; + +static void gru_noop(struct irq_data *d) +{ +} + +static struct irq_chip gru_chip[GRU_CHIPLETS_PER_BLADE] = { + [0 ... GRU_CHIPLETS_PER_BLADE - 1] { + .irq_mask = gru_noop, + .irq_unmask = gru_noop, + .irq_ack = gru_noop + } +}; + +static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, + irq_handler_t irq_handler, int cpu, int blade) +{ + unsigned long mmr; + int irq = IRQ_GRU + chiplet; + int ret, core; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return 0; + + if (gru_irq_count[chiplet] == 0) { + gru_chip[chiplet].name = irq_name; + ret = irq_set_chip(irq, &gru_chip[chiplet]); + if (ret) { + printk(KERN_ERR "%s: set_irq_chip failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + + ret = request_irq(irq, irq_handler, 0, irq_name, NULL); + if (ret) { + printk(KERN_ERR "%s: request_irq failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + } + gru_irq_count[chiplet]++; + + return 0; +} + +static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) +{ + unsigned long mmr; + int core, irq = IRQ_GRU + chiplet; + + if (gru_irq_count[chiplet] == 0) + return; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return; + + if (--gru_irq_count[chiplet] == 0) + free_irq(irq, NULL); +} + +#elif defined CONFIG_X86_64 + +static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, + irq_handler_t irq_handler, int cpu, int blade) +{ + unsigned long mmr; + int irq, core; + int ret; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return 0; + + irq = uv_setup_irq(irq_name, cpu, blade, mmr, UV_AFFINITY_CPU); + if (irq < 0) { + printk(KERN_ERR "%s: uv_setup_irq failed, errno=%d\n", + GRU_DRIVER_ID_STR, -irq); + return irq; + } + + ret = request_irq(irq, irq_handler, 0, irq_name, NULL); + if (ret) { + uv_teardown_irq(irq); + printk(KERN_ERR "%s: request_irq failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + gru_base[blade]->bs_grus[chiplet].gs_irq[core] = irq; + return 0; +} + +static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) +{ + int irq, core; + unsigned long mmr; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr) { + irq = gru_base[blade]->bs_grus[chiplet].gs_irq[core]; + if (irq) { + free_irq(irq, NULL); + uv_teardown_irq(irq); + } + } +} + +#endif + +static void gru_teardown_tlb_irqs(void) +{ + int blade; + int cpu; + + for_each_online_cpu(cpu) { + blade = uv_cpu_to_blade_id(cpu); + gru_chiplet_teardown_tlb_irq(0, cpu, blade); + gru_chiplet_teardown_tlb_irq(1, cpu, blade); + } + for_each_possible_blade(blade) { + if (uv_blade_nr_possible_cpus(blade)) + continue; + gru_chiplet_teardown_tlb_irq(0, 0, blade); + gru_chiplet_teardown_tlb_irq(1, 0, blade); + } +} + +static int gru_setup_tlb_irqs(void) +{ + int blade; + int cpu; + int ret; + + for_each_online_cpu(cpu) { + blade = uv_cpu_to_blade_id(cpu); + ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru0_intr, cpu, blade); + if (ret != 0) + goto exit1; + + ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru1_intr, cpu, blade); + if (ret != 0) + goto exit1; + } + for_each_possible_blade(blade) { + if (uv_blade_nr_possible_cpus(blade)) + continue; + ret = gru_chiplet_setup_tlb_irq(0, "GRU0_TLB", gru_intr_mblade, 0, blade); + if (ret != 0) + goto exit1; + + ret = gru_chiplet_setup_tlb_irq(1, "GRU1_TLB", gru_intr_mblade, 0, blade); + if (ret != 0) + goto exit1; + } + + return 0; + +exit1: + gru_teardown_tlb_irqs(); + return ret; +} + +/* + * gru_init + * + * Called at boot or module load time to initialize the GRUs. + */ +static int __init gru_init(void) +{ + int ret; + + if (!gru_supported()) + return 0; + +#if defined CONFIG_IA64 + gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */ +#else + gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) & + 0x7fffffffffffUL; +#endif + gru_start_vaddr = __va(gru_start_paddr); + gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE; + printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n", + gru_start_paddr, gru_end_paddr); + ret = misc_register(&gru_miscdev); + if (ret) { + printk(KERN_ERR "%s: misc_register failed\n", + GRU_DRIVER_ID_STR); + goto exit0; + } + + ret = gru_proc_init(); + if (ret) { + printk(KERN_ERR "%s: proc init failed\n", GRU_DRIVER_ID_STR); + goto exit1; + } + + ret = gru_init_tables(gru_start_paddr, gru_start_vaddr); + if (ret) { + printk(KERN_ERR "%s: init tables failed\n", GRU_DRIVER_ID_STR); + goto exit2; + } + + ret = gru_setup_tlb_irqs(); + if (ret != 0) + goto exit3; + + gru_kservices_init(); + + printk(KERN_INFO "%s: v%s\n", GRU_DRIVER_ID_STR, + GRU_DRIVER_VERSION_STR); + return 0; + +exit3: + gru_free_tables(); +exit2: + gru_proc_exit(); +exit1: + misc_deregister(&gru_miscdev); +exit0: + return ret; + +} + +static void __exit gru_exit(void) +{ + if (!gru_supported()) + return; + + gru_teardown_tlb_irqs(); + gru_kservices_exit(); + gru_free_tables(); + misc_deregister(&gru_miscdev); + gru_proc_exit(); +} + +static const struct file_operations gru_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = gru_file_unlocked_ioctl, + .mmap = gru_file_mmap, + .llseek = noop_llseek, +}; + +static struct miscdevice gru_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "gru", + .fops = &gru_fops, +}; + +const struct vm_operations_struct gru_vm_ops = { + .close = gru_vma_close, + .fault = gru_fault, +}; + +#ifndef MODULE +fs_initcall(gru_init); +#else +module_init(gru_init); +#endif +module_exit(gru_exit); + +module_param(gru_options, ulong, 0644); +MODULE_PARM_DESC(gru_options, "Various debug options"); + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(GRU_DRIVER_ID_STR GRU_DRIVER_VERSION_STR); +MODULE_VERSION(GRU_DRIVER_VERSION_STR); + diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c new file mode 100644 index 000000000..1ee8e82ba --- /dev/null +++ b/drivers/misc/sgi-gru/gruhandles.c @@ -0,0 +1,210 @@ +/* + * GRU KERNEL MCS INSTRUCTIONS + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include "gru.h" +#include "grulib.h" +#include "grutables.h" + +/* 10 sec */ +#ifdef CONFIG_IA64 +#include +#define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) +#define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq) +#else +#include +#define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) +#define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz) +#endif + +/* Extract the status field from a kernel handle */ +#define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long *)(h)) >> 16) & 3) + +struct mcs_op_statistic mcs_op_statistics[mcsop_last]; + +static void update_mcs_stats(enum mcs_op op, unsigned long clks) +{ + unsigned long nsec; + + nsec = CLKS2NSEC(clks); + atomic_long_inc(&mcs_op_statistics[op].count); + atomic_long_add(nsec, &mcs_op_statistics[op].total); + if (mcs_op_statistics[op].max < nsec) + mcs_op_statistics[op].max = nsec; +} + +static void start_instruction(void *h) +{ + unsigned long *w0 = h; + + wmb(); /* setting CMD/STATUS bits must be last */ + *w0 = *w0 | 0x20001; + gru_flush_cache(h); +} + +static void report_instruction_timeout(void *h) +{ + unsigned long goff = GSEGPOFF((unsigned long)h); + char *id = "???"; + + if (TYPE_IS(CCH, goff)) + id = "CCH"; + else if (TYPE_IS(TGH, goff)) + id = "TGH"; + else if (TYPE_IS(TFH, goff)) + id = "TFH"; + + panic(KERN_ALERT "GRU %p (%s) is malfunctioning\n", h, id); +} + +static int wait_instruction_complete(void *h, enum mcs_op opc) +{ + int status; + unsigned long start_time = get_cycles(); + + while (1) { + cpu_relax(); + status = GET_MSEG_HANDLE_STATUS(h); + if (status != CCHSTATUS_ACTIVE) + break; + if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) { + report_instruction_timeout(h); + start_time = get_cycles(); + } + } + if (gru_options & OPT_STATS) + update_mcs_stats(opc, get_cycles() - start_time); + return status; +} + +int cch_allocate(struct gru_context_configuration_handle *cch) +{ + int ret; + + cch->opc = CCHOP_ALLOCATE; + start_instruction(cch); + ret = wait_instruction_complete(cch, cchop_allocate); + + /* + * Stop speculation into the GSEG being mapped by the previous ALLOCATE. + * The GSEG memory does not exist until the ALLOCATE completes. + */ + sync_core(); + return ret; +} + +int cch_start(struct gru_context_configuration_handle *cch) +{ + cch->opc = CCHOP_START; + start_instruction(cch); + return wait_instruction_complete(cch, cchop_start); +} + +int cch_interrupt(struct gru_context_configuration_handle *cch) +{ + cch->opc = CCHOP_INTERRUPT; + start_instruction(cch); + return wait_instruction_complete(cch, cchop_interrupt); +} + +int cch_deallocate(struct gru_context_configuration_handle *cch) +{ + int ret; + + cch->opc = CCHOP_DEALLOCATE; + start_instruction(cch); + ret = wait_instruction_complete(cch, cchop_deallocate); + + /* + * Stop speculation into the GSEG being unmapped by the previous + * DEALLOCATE. + */ + sync_core(); + return ret; +} + +int cch_interrupt_sync(struct gru_context_configuration_handle + *cch) +{ + cch->opc = CCHOP_INTERRUPT_SYNC; + start_instruction(cch); + return wait_instruction_complete(cch, cchop_interrupt_sync); +} + +int tgh_invalidate(struct gru_tlb_global_handle *tgh, + unsigned long vaddr, unsigned long vaddrmask, + int asid, int pagesize, int global, int n, + unsigned short ctxbitmap) +{ + tgh->vaddr = vaddr; + tgh->asid = asid; + tgh->pagesize = pagesize; + tgh->n = n; + tgh->global = global; + tgh->vaddrmask = vaddrmask; + tgh->ctxbitmap = ctxbitmap; + tgh->opc = TGHOP_TLBINV; + start_instruction(tgh); + return wait_instruction_complete(tgh, tghop_invalidate); +} + +int tfh_write_only(struct gru_tlb_fault_handle *tfh, + unsigned long paddr, int gaa, + unsigned long vaddr, int asid, int dirty, + int pagesize) +{ + tfh->fillasid = asid; + tfh->fillvaddr = vaddr; + tfh->pfn = paddr >> GRU_PADDR_SHIFT; + tfh->gaa = gaa; + tfh->dirty = dirty; + tfh->pagesize = pagesize; + tfh->opc = TFHOP_WRITE_ONLY; + start_instruction(tfh); + return wait_instruction_complete(tfh, tfhop_write_only); +} + +void tfh_write_restart(struct gru_tlb_fault_handle *tfh, + unsigned long paddr, int gaa, + unsigned long vaddr, int asid, int dirty, + int pagesize) +{ + tfh->fillasid = asid; + tfh->fillvaddr = vaddr; + tfh->pfn = paddr >> GRU_PADDR_SHIFT; + tfh->gaa = gaa; + tfh->dirty = dirty; + tfh->pagesize = pagesize; + tfh->opc = TFHOP_WRITE_RESTART; + start_instruction(tfh); +} + +void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_USER_POLLING_MODE; + start_instruction(tfh); +} + +void tfh_exception(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_EXCEPTION; + start_instruction(tfh); +} + diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h new file mode 100644 index 000000000..3d7bd36a1 --- /dev/null +++ b/drivers/misc/sgi-gru/gruhandles.h @@ -0,0 +1,530 @@ +/* + * SN Platform GRU Driver + * + * GRU HANDLE DEFINITION + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRUHANDLES_H__ +#define __GRUHANDLES_H__ +#include "gru_instructions.h" + +/* + * Manifest constants for GRU Memory Map + */ +#define GRU_GSEG0_BASE 0 +#define GRU_MCS_BASE (64 * 1024 * 1024) +#define GRU_SIZE (128UL * 1024 * 1024) + +/* Handle & resource counts */ +#define GRU_NUM_CB 128 +#define GRU_NUM_DSR_BYTES (32 * 1024) +#define GRU_NUM_TFM 16 +#define GRU_NUM_TGH 24 +#define GRU_NUM_CBE 128 +#define GRU_NUM_TFH 128 +#define GRU_NUM_CCH 16 + +/* Maximum resource counts that can be reserved by user programs */ +#define GRU_NUM_USER_CBR GRU_NUM_CBE +#define GRU_NUM_USER_DSR_BYTES GRU_NUM_DSR_BYTES + +/* Bytes per handle & handle stride. Code assumes all cb, tfh, cbe handles + * are the same */ +#define GRU_HANDLE_BYTES 64 +#define GRU_HANDLE_STRIDE 256 + +/* Base addresses of handles */ +#define GRU_TFM_BASE (GRU_MCS_BASE + 0x00000) +#define GRU_TGH_BASE (GRU_MCS_BASE + 0x08000) +#define GRU_CBE_BASE (GRU_MCS_BASE + 0x10000) +#define GRU_TFH_BASE (GRU_MCS_BASE + 0x18000) +#define GRU_CCH_BASE (GRU_MCS_BASE + 0x20000) + +/* User gseg constants */ +#define GRU_GSEG_STRIDE (4 * 1024 * 1024) +#define GSEG_BASE(a) ((a) & ~(GRU_GSEG_PAGESIZE - 1)) + +/* Data segment constants */ +#define GRU_DSR_AU_BYTES 1024 +#define GRU_DSR_CL (GRU_NUM_DSR_BYTES / GRU_CACHE_LINE_BYTES) +#define GRU_DSR_AU_CL (GRU_DSR_AU_BYTES / GRU_CACHE_LINE_BYTES) +#define GRU_DSR_AU (GRU_NUM_DSR_BYTES / GRU_DSR_AU_BYTES) + +/* Control block constants */ +#define GRU_CBR_AU_SIZE 2 +#define GRU_CBR_AU (GRU_NUM_CBE / GRU_CBR_AU_SIZE) + +/* Convert resource counts to the number of AU */ +#define GRU_DS_BYTES_TO_AU(n) DIV_ROUND_UP(n, GRU_DSR_AU_BYTES) +#define GRU_CB_COUNT_TO_AU(n) DIV_ROUND_UP(n, GRU_CBR_AU_SIZE) + +/* UV limits */ +#define GRU_CHIPLETS_PER_HUB 2 +#define GRU_HUBS_PER_BLADE 1 +#define GRU_CHIPLETS_PER_BLADE (GRU_HUBS_PER_BLADE * GRU_CHIPLETS_PER_HUB) + +/* User GRU Gseg offsets */ +#define GRU_CB_BASE 0 +#define GRU_CB_LIMIT (GRU_CB_BASE + GRU_HANDLE_STRIDE * GRU_NUM_CBE) +#define GRU_DS_BASE 0x20000 +#define GRU_DS_LIMIT (GRU_DS_BASE + GRU_NUM_DSR_BYTES) + +/* Convert a GRU physical address to the chiplet offset */ +#define GSEGPOFF(h) ((h) & (GRU_SIZE - 1)) + +/* Convert an arbitrary handle address to the beginning of the GRU segment */ +#define GRUBASE(h) ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1))) + +/* Test a valid handle address to determine the type */ +#define TYPE_IS(hn, h) ((h) >= GRU_##hn##_BASE && (h) < \ + GRU_##hn##_BASE + GRU_NUM_##hn * GRU_HANDLE_STRIDE && \ + (((h) & (GRU_HANDLE_STRIDE - 1)) == 0)) + + +/* General addressing macros. */ +static inline void *get_gseg_base_address(void *base, int ctxnum) +{ + return (void *)(base + GRU_GSEG0_BASE + GRU_GSEG_STRIDE * ctxnum); +} + +static inline void *get_gseg_base_address_cb(void *base, int ctxnum, int line) +{ + return (void *)(get_gseg_base_address(base, ctxnum) + + GRU_CB_BASE + GRU_HANDLE_STRIDE * line); +} + +static inline void *get_gseg_base_address_ds(void *base, int ctxnum, int line) +{ + return (void *)(get_gseg_base_address(base, ctxnum) + GRU_DS_BASE + + GRU_CACHE_LINE_BYTES * line); +} + +static inline struct gru_tlb_fault_map *get_tfm(void *base, int ctxnum) +{ + return (struct gru_tlb_fault_map *)(base + GRU_TFM_BASE + + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline struct gru_tlb_global_handle *get_tgh(void *base, int ctxnum) +{ + return (struct gru_tlb_global_handle *)(base + GRU_TGH_BASE + + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline struct gru_control_block_extended *get_cbe(void *base, int ctxnum) +{ + return (struct gru_control_block_extended *)(base + GRU_CBE_BASE + + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline struct gru_tlb_fault_handle *get_tfh(void *base, int ctxnum) +{ + return (struct gru_tlb_fault_handle *)(base + GRU_TFH_BASE + + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline struct gru_context_configuration_handle *get_cch(void *base, + int ctxnum) +{ + return (struct gru_context_configuration_handle *)(base + + GRU_CCH_BASE + ctxnum * GRU_HANDLE_STRIDE); +} + +static inline unsigned long get_cb_number(void *cb) +{ + return (((unsigned long)cb - GRU_CB_BASE) % GRU_GSEG_PAGESIZE) / + GRU_HANDLE_STRIDE; +} + +/* byte offset to a specific GRU chiplet. (p=pnode, c=chiplet (0 or 1)*/ +static inline unsigned long gru_chiplet_paddr(unsigned long paddr, int pnode, + int chiplet) +{ + return paddr + GRU_SIZE * (2 * pnode + chiplet); +} + +static inline void *gru_chiplet_vaddr(void *vaddr, int pnode, int chiplet) +{ + return vaddr + GRU_SIZE * (2 * pnode + chiplet); +} + +static inline struct gru_control_block_extended *gru_tfh_to_cbe( + struct gru_tlb_fault_handle *tfh) +{ + unsigned long cbe; + + cbe = (unsigned long)tfh - GRU_TFH_BASE + GRU_CBE_BASE; + return (struct gru_control_block_extended*)cbe; +} + + + + +/* + * Global TLB Fault Map + * Bitmap of outstanding TLB misses needing interrupt/polling service. + * + */ +struct gru_tlb_fault_map { + unsigned long fault_bits[BITS_TO_LONGS(GRU_NUM_CBE)]; + unsigned long fill0[2]; + unsigned long done_bits[BITS_TO_LONGS(GRU_NUM_CBE)]; + unsigned long fill1[2]; +}; + +/* + * TGH - TLB Global Handle + * Used for TLB flushing. + * + */ +struct gru_tlb_global_handle { + unsigned int cmd:1; /* DW 0 */ + unsigned int delresp:1; + unsigned int opc:1; + unsigned int fill1:5; + + unsigned int fill2:8; + + unsigned int status:2; + unsigned long fill3:2; + unsigned int state:3; + unsigned long fill4:1; + + unsigned int cause:3; + unsigned long fill5:37; + + unsigned long vaddr:64; /* DW 1 */ + + unsigned int asid:24; /* DW 2 */ + unsigned int fill6:8; + + unsigned int pagesize:5; + unsigned int fill7:11; + + unsigned int global:1; + unsigned int fill8:15; + + unsigned long vaddrmask:39; /* DW 3 */ + unsigned int fill9:9; + unsigned int n:10; + unsigned int fill10:6; + + unsigned int ctxbitmap:16; /* DW4 */ + unsigned long fill11[3]; +}; + +enum gru_tgh_cmd { + TGHCMD_START +}; + +enum gru_tgh_opc { + TGHOP_TLBNOP, + TGHOP_TLBINV +}; + +enum gru_tgh_status { + TGHSTATUS_IDLE, + TGHSTATUS_EXCEPTION, + TGHSTATUS_ACTIVE +}; + +enum gru_tgh_state { + TGHSTATE_IDLE, + TGHSTATE_PE_INVAL, + TGHSTATE_INTERRUPT_INVAL, + TGHSTATE_WAITDONE, + TGHSTATE_RESTART_CTX, +}; + +enum gru_tgh_cause { + TGHCAUSE_RR_ECC, + TGHCAUSE_TLB_ECC, + TGHCAUSE_LRU_ECC, + TGHCAUSE_PS_ECC, + TGHCAUSE_MUL_ERR, + TGHCAUSE_DATA_ERR, + TGHCAUSE_SW_FORCE +}; + + +/* + * TFH - TLB Global Handle + * Used for TLB dropins into the GRU TLB. + * + */ +struct gru_tlb_fault_handle { + unsigned int cmd:1; /* DW 0 - low 32*/ + unsigned int delresp:1; + unsigned int fill0:2; + unsigned int opc:3; + unsigned int fill1:9; + + unsigned int status:2; + unsigned int fill2:2; + unsigned int state:3; + unsigned int fill3:1; + + unsigned int cause:6; + unsigned int cb_int:1; + unsigned int fill4:1; + + unsigned int indexway:12; /* DW 0 - high 32 */ + unsigned int fill5:4; + + unsigned int ctxnum:4; + unsigned int fill6:12; + + unsigned long missvaddr:64; /* DW 1 */ + + unsigned int missasid:24; /* DW 2 */ + unsigned int fill7:8; + unsigned int fillasid:24; + unsigned int dirty:1; + unsigned int gaa:2; + unsigned long fill8:5; + + unsigned long pfn:41; /* DW 3 */ + unsigned int fill9:7; + unsigned int pagesize:5; + unsigned int fill10:11; + + unsigned long fillvaddr:64; /* DW 4 */ + + unsigned long fill11[3]; +}; + +enum gru_tfh_opc { + TFHOP_NOOP, + TFHOP_RESTART, + TFHOP_WRITE_ONLY, + TFHOP_WRITE_RESTART, + TFHOP_EXCEPTION, + TFHOP_USER_POLLING_MODE = 7, +}; + +enum tfh_status { + TFHSTATUS_IDLE, + TFHSTATUS_EXCEPTION, + TFHSTATUS_ACTIVE, +}; + +enum tfh_state { + TFHSTATE_INACTIVE, + TFHSTATE_IDLE, + TFHSTATE_MISS_UPM, + TFHSTATE_MISS_FMM, + TFHSTATE_HW_ERR, + TFHSTATE_WRITE_TLB, + TFHSTATE_RESTART_CBR, +}; + +/* TFH cause bits */ +enum tfh_cause { + TFHCAUSE_NONE, + TFHCAUSE_TLB_MISS, + TFHCAUSE_TLB_MOD, + TFHCAUSE_HW_ERROR_RR, + TFHCAUSE_HW_ERROR_MAIN_ARRAY, + TFHCAUSE_HW_ERROR_VALID, + TFHCAUSE_HW_ERROR_PAGESIZE, + TFHCAUSE_INSTRUCTION_EXCEPTION, + TFHCAUSE_UNCORRECTIBLE_ERROR, +}; + +/* GAA values */ +#define GAA_RAM 0x0 +#define GAA_NCRAM 0x2 +#define GAA_MMIO 0x1 +#define GAA_REGISTER 0x3 + +/* GRU paddr shift for pfn. (NOTE: shift is NOT by actual pagesize) */ +#define GRU_PADDR_SHIFT 12 + +/* + * Context Configuration handle + * Used to allocate resources to a GSEG context. + * + */ +struct gru_context_configuration_handle { + unsigned int cmd:1; /* DW0 */ + unsigned int delresp:1; + unsigned int opc:3; + unsigned int unmap_enable:1; + unsigned int req_slice_set_enable:1; + unsigned int req_slice:2; + unsigned int cb_int_enable:1; + unsigned int tlb_int_enable:1; + unsigned int tfm_fault_bit_enable:1; + unsigned int tlb_int_select:4; + + unsigned int status:2; + unsigned int state:2; + unsigned int reserved2:4; + + unsigned int cause:4; + unsigned int tfm_done_bit_enable:1; + unsigned int unused:3; + + unsigned int dsr_allocation_map; + + unsigned long cbr_allocation_map; /* DW1 */ + + unsigned int asid[8]; /* DW 2 - 5 */ + unsigned short sizeavail[8]; /* DW 6 - 7 */ +} __attribute__ ((packed)); + +enum gru_cch_opc { + CCHOP_START = 1, + CCHOP_ALLOCATE, + CCHOP_INTERRUPT, + CCHOP_DEALLOCATE, + CCHOP_INTERRUPT_SYNC, +}; + +enum gru_cch_status { + CCHSTATUS_IDLE, + CCHSTATUS_EXCEPTION, + CCHSTATUS_ACTIVE, +}; + +enum gru_cch_state { + CCHSTATE_INACTIVE, + CCHSTATE_MAPPED, + CCHSTATE_ACTIVE, + CCHSTATE_INTERRUPTED, +}; + +/* CCH Exception cause */ +enum gru_cch_cause { + CCHCAUSE_REGION_REGISTER_WRITE_ERROR = 1, + CCHCAUSE_ILLEGAL_OPCODE = 2, + CCHCAUSE_INVALID_START_REQUEST = 3, + CCHCAUSE_INVALID_ALLOCATION_REQUEST = 4, + CCHCAUSE_INVALID_DEALLOCATION_REQUEST = 5, + CCHCAUSE_INVALID_INTERRUPT_REQUEST = 6, + CCHCAUSE_CCH_BUSY = 7, + CCHCAUSE_NO_CBRS_TO_ALLOCATE = 8, + CCHCAUSE_BAD_TFM_CONFIG = 9, + CCHCAUSE_CBR_RESOURCES_OVERSUBSCRIPED = 10, + CCHCAUSE_DSR_RESOURCES_OVERSUBSCRIPED = 11, + CCHCAUSE_CBR_DEALLOCATION_ERROR = 12, +}; +/* + * CBE - Control Block Extended + * Maintains internal GRU state for active CBs. + * + */ +struct gru_control_block_extended { + unsigned int reserved0:1; /* DW 0 - low */ + unsigned int imacpy:3; + unsigned int reserved1:4; + unsigned int xtypecpy:3; + unsigned int iaa0cpy:2; + unsigned int iaa1cpy:2; + unsigned int reserved2:1; + unsigned int opccpy:8; + unsigned int exopccpy:8; + + unsigned int idef2cpy:22; /* DW 0 - high */ + unsigned int reserved3:10; + + unsigned int idef4cpy:22; /* DW 1 */ + unsigned int reserved4:10; + unsigned int idef4upd:22; + unsigned int reserved5:10; + + unsigned long idef1upd:64; /* DW 2 */ + + unsigned long idef5cpy:64; /* DW 3 */ + + unsigned long idef6cpy:64; /* DW 4 */ + + unsigned long idef3upd:64; /* DW 5 */ + + unsigned long idef5upd:64; /* DW 6 */ + + unsigned int idef2upd:22; /* DW 7 */ + unsigned int reserved6:10; + + unsigned int ecause:20; + unsigned int cbrstate:4; + unsigned int cbrexecstatus:8; +}; + +/* CBE fields for active BCOPY instructions */ +#define cbe_baddr0 idef1upd +#define cbe_baddr1 idef3upd +#define cbe_src_cl idef6cpy +#define cbe_nelemcur idef5upd + +enum gru_cbr_state { + CBRSTATE_INACTIVE, + CBRSTATE_IDLE, + CBRSTATE_PE_CHECK, + CBRSTATE_QUEUED, + CBRSTATE_WAIT_RESPONSE, + CBRSTATE_INTERRUPTED, + CBRSTATE_INTERRUPTED_MISS_FMM, + CBRSTATE_BUSY_INTERRUPT_MISS_FMM, + CBRSTATE_INTERRUPTED_MISS_UPM, + CBRSTATE_BUSY_INTERRUPTED_MISS_UPM, + CBRSTATE_REQUEST_ISSUE, + CBRSTATE_BUSY_INTERRUPT, +}; + +/* CBE cbrexecstatus bits - defined in gru_instructions.h*/ +/* CBE ecause bits - defined in gru_instructions.h */ + +/* + * Convert a processor pagesize into the strange encoded pagesize used by the + * GRU. Processor pagesize is encoded as log of bytes per page. (or PAGE_SHIFT) + * pagesize log pagesize grupagesize + * 4k 12 0 + * 16k 14 1 + * 64k 16 2 + * 256k 18 3 + * 1m 20 4 + * 2m 21 5 + * 4m 22 6 + * 16m 24 7 + * 64m 26 8 + * ... + */ +#define GRU_PAGESIZE(sh) ((((sh) > 20 ? (sh) + 2 : (sh)) >> 1) - 6) +#define GRU_SIZEAVAIL(sh) (1UL << GRU_PAGESIZE(sh)) + +/* minimum TLB purge count to ensure a full purge */ +#define GRUMAXINVAL 1024UL + +int cch_allocate(struct gru_context_configuration_handle *cch); +int cch_start(struct gru_context_configuration_handle *cch); +int cch_interrupt(struct gru_context_configuration_handle *cch); +int cch_deallocate(struct gru_context_configuration_handle *cch); +int cch_interrupt_sync(struct gru_context_configuration_handle *cch); +int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr, + unsigned long vaddrmask, int asid, int pagesize, int global, int n, + unsigned short ctxbitmap); +int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr, + int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); +void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr, + int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); +void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh); +void tfh_exception(struct gru_tlb_fault_handle *tfh); + +#endif /* __GRUHANDLES_H__ */ diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c new file mode 100644 index 000000000..1540a7785 --- /dev/null +++ b/drivers/misc/sgi-gru/grukdump.c @@ -0,0 +1,236 @@ +/* + * SN Platform GRU Driver + * + * Dump GRU State + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "gru.h" +#include "grutables.h" +#include "gruhandles.h" +#include "grulib.h" + +#define CCH_LOCK_ATTEMPTS 10 + +static int gru_user_copy_handle(void __user **dp, void *s) +{ + if (copy_to_user(*dp, s, GRU_HANDLE_BYTES)) + return -1; + *dp += GRU_HANDLE_BYTES; + return 0; +} + +static int gru_dump_context_data(void *grubase, + struct gru_context_configuration_handle *cch, + void __user *ubuf, int ctxnum, int dsrcnt, + int flush_cbrs) +{ + void *cb, *cbe, *tfh, *gseg; + int i, scr; + + gseg = grubase + ctxnum * GRU_GSEG_STRIDE; + cb = gseg + GRU_CB_BASE; + cbe = grubase + GRU_CBE_BASE; + tfh = grubase + GRU_TFH_BASE; + + for_each_cbr_in_allocation_map(i, &cch->cbr_allocation_map, scr) { + if (flush_cbrs) + gru_flush_cache(cb); + if (gru_user_copy_handle(&ubuf, cb)) + goto fail; + if (gru_user_copy_handle(&ubuf, tfh + i * GRU_HANDLE_STRIDE)) + goto fail; + if (gru_user_copy_handle(&ubuf, cbe + i * GRU_HANDLE_STRIDE)) + goto fail; + cb += GRU_HANDLE_STRIDE; + } + if (dsrcnt) + memcpy(ubuf, gseg + GRU_DS_BASE, dsrcnt * GRU_HANDLE_STRIDE); + return 0; + +fail: + return -EFAULT; +} + +static int gru_dump_tfm(struct gru_state *gru, + void __user *ubuf, void __user *ubufend) +{ + struct gru_tlb_fault_map *tfm; + int i; + + if (GRU_NUM_TFM * GRU_CACHE_LINE_BYTES > ubufend - ubuf) + return -EFBIG; + + for (i = 0; i < GRU_NUM_TFM; i++) { + tfm = get_tfm(gru->gs_gru_base_vaddr, i); + if (gru_user_copy_handle(&ubuf, tfm)) + goto fail; + } + return GRU_NUM_TFM * GRU_CACHE_LINE_BYTES; + +fail: + return -EFAULT; +} + +static int gru_dump_tgh(struct gru_state *gru, + void __user *ubuf, void __user *ubufend) +{ + struct gru_tlb_global_handle *tgh; + int i; + + if (GRU_NUM_TGH * GRU_CACHE_LINE_BYTES > ubufend - ubuf) + return -EFBIG; + + for (i = 0; i < GRU_NUM_TGH; i++) { + tgh = get_tgh(gru->gs_gru_base_vaddr, i); + if (gru_user_copy_handle(&ubuf, tgh)) + goto fail; + } + return GRU_NUM_TGH * GRU_CACHE_LINE_BYTES; + +fail: + return -EFAULT; +} + +static int gru_dump_context(struct gru_state *gru, int ctxnum, + void __user *ubuf, void __user *ubufend, char data_opt, + char lock_cch, char flush_cbrs) +{ + struct gru_dump_context_header hdr; + struct gru_dump_context_header __user *uhdr = ubuf; + struct gru_context_configuration_handle *cch, *ubufcch; + struct gru_thread_state *gts; + int try, cch_locked, cbrcnt = 0, dsrcnt = 0, bytes = 0, ret = 0; + void *grubase; + + memset(&hdr, 0, sizeof(hdr)); + grubase = gru->gs_gru_base_vaddr; + cch = get_cch(grubase, ctxnum); + for (try = 0; try < CCH_LOCK_ATTEMPTS; try++) { + cch_locked = trylock_cch_handle(cch); + if (cch_locked) + break; + msleep(1); + } + + ubuf += sizeof(hdr); + ubufcch = ubuf; + if (gru_user_copy_handle(&ubuf, cch)) { + if (cch_locked) + unlock_cch_handle(cch); + return -EFAULT; + } + if (cch_locked) + ubufcch->delresp = 0; + bytes = sizeof(hdr) + GRU_CACHE_LINE_BYTES; + + if (cch_locked || !lock_cch) { + gts = gru->gs_gts[ctxnum]; + if (gts && gts->ts_vma) { + hdr.pid = gts->ts_tgid_owner; + hdr.vaddr = gts->ts_vma->vm_start; + } + if (cch->state != CCHSTATE_INACTIVE) { + cbrcnt = hweight64(cch->cbr_allocation_map) * + GRU_CBR_AU_SIZE; + dsrcnt = data_opt ? hweight32(cch->dsr_allocation_map) * + GRU_DSR_AU_CL : 0; + } + bytes += (3 * cbrcnt + dsrcnt) * GRU_CACHE_LINE_BYTES; + if (bytes > ubufend - ubuf) + ret = -EFBIG; + else + ret = gru_dump_context_data(grubase, cch, ubuf, ctxnum, + dsrcnt, flush_cbrs); + } + if (cch_locked) + unlock_cch_handle(cch); + if (ret) + return ret; + + hdr.magic = GRU_DUMP_MAGIC; + hdr.gid = gru->gs_gid; + hdr.ctxnum = ctxnum; + hdr.cbrcnt = cbrcnt; + hdr.dsrcnt = dsrcnt; + hdr.cch_locked = cch_locked; + if (copy_to_user(uhdr, &hdr, sizeof(hdr))) + return -EFAULT; + + return bytes; +} + +int gru_dump_chiplet_request(unsigned long arg) +{ + struct gru_state *gru; + struct gru_dump_chiplet_state_req req; + void __user *ubuf; + void __user *ubufend; + int ctxnum, ret, cnt = 0; + + if (copy_from_user(&req, (void __user *)arg, sizeof(req))) + return -EFAULT; + + /* Currently, only dump by gid is implemented */ + if (req.gid >= gru_max_gids) + return -EINVAL; + req.gid = array_index_nospec(req.gid, gru_max_gids); + + gru = GID_TO_GRU(req.gid); + ubuf = req.buf; + ubufend = req.buf + req.buflen; + + ret = gru_dump_tfm(gru, ubuf, ubufend); + if (ret < 0) + goto fail; + ubuf += ret; + + ret = gru_dump_tgh(gru, ubuf, ubufend); + if (ret < 0) + goto fail; + ubuf += ret; + + for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) { + if (req.ctxnum == ctxnum || req.ctxnum < 0) { + ret = gru_dump_context(gru, ctxnum, ubuf, ubufend, + req.data_opt, req.lock_cch, + req.flush_cbrs); + if (ret < 0) + goto fail; + ubuf += ret; + cnt++; + } + } + + if (copy_to_user((void __user *)arg, &req, sizeof(req))) + return -EFAULT; + return cnt; + +fail: + return ret; +} diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c new file mode 100644 index 000000000..030769018 --- /dev/null +++ b/drivers/misc/sgi-gru/grukservices.c @@ -0,0 +1,1171 @@ +/* + * SN Platform GRU Driver + * + * KERNEL SERVICES THAT USE THE GRU + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "gru.h" +#include "grulib.h" +#include "grutables.h" +#include "grukservices.h" +#include "gru_instructions.h" +#include + +/* + * Kernel GRU Usage + * + * The following is an interim algorithm for management of kernel GRU + * resources. This will likely be replaced when we better understand the + * kernel/user requirements. + * + * Blade percpu resources reserved for kernel use. These resources are + * reserved whenever the the kernel context for the blade is loaded. Note + * that the kernel context is not guaranteed to be always available. It is + * loaded on demand & can be stolen by a user if the user demand exceeds the + * kernel demand. The kernel can always reload the kernel context but + * a SLEEP may be required!!!. + * + * Async Overview: + * + * Each blade has one "kernel context" that owns GRU kernel resources + * located on the blade. Kernel drivers use GRU resources in this context + * for sending messages, zeroing memory, etc. + * + * The kernel context is dynamically loaded on demand. If it is not in + * use by the kernel, the kernel context can be unloaded & given to a user. + * The kernel context will be reloaded when needed. This may require that + * a context be stolen from a user. + * NOTE: frequent unloading/reloading of the kernel context is + * expensive. We are depending on batch schedulers, cpusets, sane + * drivers or some other mechanism to prevent the need for frequent + * stealing/reloading. + * + * The kernel context consists of two parts: + * - 1 CB & a few DSRs that are reserved for each cpu on the blade. + * Each cpu has it's own private resources & does not share them + * with other cpus. These resources are used serially, ie, + * locked, used & unlocked on each call to a function in + * grukservices. + * (Now that we have dynamic loading of kernel contexts, I + * may rethink this & allow sharing between cpus....) + * + * - Additional resources can be reserved long term & used directly + * by UV drivers located in the kernel. Drivers using these GRU + * resources can use asynchronous GRU instructions that send + * interrupts on completion. + * - these resources must be explicitly locked/unlocked + * - locked resources prevent (obviously) the kernel + * context from being unloaded. + * - drivers using these resource directly issue their own + * GRU instruction and must wait/check completion. + * + * When these resources are reserved, the caller can optionally + * associate a wait_queue with the resources and use asynchronous + * GRU instructions. When an async GRU instruction completes, the + * driver will do a wakeup on the event. + * + */ + + +#define ASYNC_HAN_TO_BID(h) ((h) - 1) +#define ASYNC_BID_TO_HAN(b) ((b) + 1) +#define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)] + +#define GRU_NUM_KERNEL_CBR 1 +#define GRU_NUM_KERNEL_DSR_BYTES 256 +#define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \ + GRU_CACHE_LINE_BYTES) + +/* GRU instruction attributes for all instructions */ +#define IMA IMA_CB_DELAY + +/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */ +#define __gru_cacheline_aligned__ \ + __attribute__((__aligned__(GRU_CACHE_LINE_BYTES))) + +#define MAGIC 0x1234567887654321UL + +/* Default retry count for GRU errors on kernel instructions */ +#define EXCEPTION_RETRY_LIMIT 3 + +/* Status of message queue sections */ +#define MQS_EMPTY 0 +#define MQS_FULL 1 +#define MQS_NOOP 2 + +/*----------------- RESOURCE MANAGEMENT -------------------------------------*/ +/* optimized for x86_64 */ +struct message_queue { + union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */ + int qlines; /* DW 1 */ + long hstatus[2]; + void *next __gru_cacheline_aligned__;/* CL 1 */ + void *limit; + void *start; + void *start2; + char data ____cacheline_aligned; /* CL 2 */ +}; + +/* First word in every message - used by mesq interface */ +struct message_header { + char present; + char present2; + char lines; + char fill; +}; + +#define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) + +/* + * Reload the blade's kernel context into a GRU chiplet. Called holding + * the bs_kgts_sema for READ. Will steal user contexts if necessary. + */ +static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) +{ + struct gru_state *gru; + struct gru_thread_state *kgts; + void *vaddr; + int ctxnum, ncpus; + + up_read(&bs->bs_kgts_sema); + down_write(&bs->bs_kgts_sema); + + if (!bs->bs_kgts) { + do { + bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); + if (!IS_ERR(bs->bs_kgts)) + break; + msleep(1); + } while (true); + bs->bs_kgts->ts_user_blade_id = blade_id; + } + kgts = bs->bs_kgts; + + if (!kgts->ts_gru) { + STAT(load_kernel_context); + ncpus = uv_blade_nr_possible_cpus(blade_id); + kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU( + GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs); + kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU( + GRU_NUM_KERNEL_DSR_BYTES * ncpus + + bs->bs_async_dsr_bytes); + while (!gru_assign_gru_context(kgts)) { + msleep(1); + gru_steal_context(kgts); + } + gru_load_context(kgts); + gru = bs->bs_kgts->ts_gru; + vaddr = gru->gs_gru_base_vaddr; + ctxnum = kgts->ts_ctxnum; + bs->kernel_cb = get_gseg_base_address_cb(vaddr, ctxnum, 0); + bs->kernel_dsr = get_gseg_base_address_ds(vaddr, ctxnum, 0); + } + downgrade_write(&bs->bs_kgts_sema); +} + +/* + * Free all kernel contexts that are not currently in use. + * Returns 0 if all freed, else number of inuse context. + */ +static int gru_free_kernel_contexts(void) +{ + struct gru_blade_state *bs; + struct gru_thread_state *kgts; + int bid, ret = 0; + + for (bid = 0; bid < GRU_MAX_BLADES; bid++) { + bs = gru_base[bid]; + if (!bs) + continue; + + /* Ignore busy contexts. Don't want to block here. */ + if (down_write_trylock(&bs->bs_kgts_sema)) { + kgts = bs->bs_kgts; + if (kgts && kgts->ts_gru) + gru_unload_context(kgts, 0); + bs->bs_kgts = NULL; + up_write(&bs->bs_kgts_sema); + kfree(kgts); + } else { + ret++; + } + } + return ret; +} + +/* + * Lock & load the kernel context for the specified blade. + */ +static struct gru_blade_state *gru_lock_kernel_context(int blade_id) +{ + struct gru_blade_state *bs; + int bid; + + STAT(lock_kernel_context); +again: + bid = blade_id < 0 ? uv_numa_blade_id() : blade_id; + bs = gru_base[bid]; + + /* Handle the case where migration occurred while waiting for the sema */ + down_read(&bs->bs_kgts_sema); + if (blade_id < 0 && bid != uv_numa_blade_id()) { + up_read(&bs->bs_kgts_sema); + goto again; + } + if (!bs->bs_kgts || !bs->bs_kgts->ts_gru) + gru_load_kernel_context(bs, bid); + return bs; + +} + +/* + * Unlock the kernel context for the specified blade. Context is not + * unloaded but may be stolen before next use. + */ +static void gru_unlock_kernel_context(int blade_id) +{ + struct gru_blade_state *bs; + + bs = gru_base[blade_id]; + up_read(&bs->bs_kgts_sema); + STAT(unlock_kernel_context); +} + +/* + * Reserve & get pointers to the DSR/CBRs reserved for the current cpu. + * - returns with preemption disabled + */ +static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) +{ + struct gru_blade_state *bs; + int lcpu; + + BUG_ON(dsr_bytes > GRU_NUM_KERNEL_DSR_BYTES); + preempt_disable(); + bs = gru_lock_kernel_context(-1); + lcpu = uv_blade_processor_id(); + *cb = bs->kernel_cb + lcpu * GRU_HANDLE_STRIDE; + *dsr = bs->kernel_dsr + lcpu * GRU_NUM_KERNEL_DSR_BYTES; + return 0; +} + +/* + * Free the current cpus reserved DSR/CBR resources. + */ +static void gru_free_cpu_resources(void *cb, void *dsr) +{ + gru_unlock_kernel_context(uv_numa_blade_id()); + preempt_enable(); +} + +/* + * Reserve GRU resources to be used asynchronously. + * Note: currently supports only 1 reservation per blade. + * + * input: + * blade_id - blade on which resources should be reserved + * cbrs - number of CBRs + * dsr_bytes - number of DSR bytes needed + * output: + * handle to identify resource + * (0 = async resources already reserved) + */ +unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, + struct completion *cmp) +{ + struct gru_blade_state *bs; + struct gru_thread_state *kgts; + int ret = 0; + + bs = gru_base[blade_id]; + + down_write(&bs->bs_kgts_sema); + + /* Verify no resources already reserved */ + if (bs->bs_async_dsr_bytes + bs->bs_async_cbrs) + goto done; + bs->bs_async_dsr_bytes = dsr_bytes; + bs->bs_async_cbrs = cbrs; + bs->bs_async_wq = cmp; + kgts = bs->bs_kgts; + + /* Resources changed. Unload context if already loaded */ + if (kgts && kgts->ts_gru) + gru_unload_context(kgts, 0); + ret = ASYNC_BID_TO_HAN(blade_id); + +done: + up_write(&bs->bs_kgts_sema); + return ret; +} + +/* + * Release async resources previously reserved. + * + * input: + * han - handle to identify resources + */ +void gru_release_async_resources(unsigned long han) +{ + struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); + + down_write(&bs->bs_kgts_sema); + bs->bs_async_dsr_bytes = 0; + bs->bs_async_cbrs = 0; + bs->bs_async_wq = NULL; + up_write(&bs->bs_kgts_sema); +} + +/* + * Wait for async GRU instructions to complete. + * + * input: + * han - handle to identify resources + */ +void gru_wait_async_cbr(unsigned long han) +{ + struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); + + wait_for_completion(bs->bs_async_wq); + mb(); +} + +/* + * Lock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + * output: + * cb - pointer to first CBR + * dsr - pointer to first DSR + */ +void gru_lock_async_resource(unsigned long han, void **cb, void **dsr) +{ + struct gru_blade_state *bs = ASYNC_HAN_TO_BS(han); + int blade_id = ASYNC_HAN_TO_BID(han); + int ncpus; + + gru_lock_kernel_context(blade_id); + ncpus = uv_blade_nr_possible_cpus(blade_id); + if (cb) + *cb = bs->kernel_cb + ncpus * GRU_HANDLE_STRIDE; + if (dsr) + *dsr = bs->kernel_dsr + ncpus * GRU_NUM_KERNEL_DSR_BYTES; +} + +/* + * Unlock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + */ +void gru_unlock_async_resource(unsigned long han) +{ + int blade_id = ASYNC_HAN_TO_BID(han); + + gru_unlock_kernel_context(blade_id); +} + +/*----------------------------------------------------------------------*/ +int gru_get_cb_exception_detail(void *cb, + struct control_block_extended_exc_detail *excdet) +{ + struct gru_control_block_extended *cbe; + struct gru_thread_state *kgts = NULL; + unsigned long off; + int cbrnum, bid; + + /* + * Locate kgts for cb. This algorithm is SLOW but + * this function is rarely called (ie., almost never). + * Performance does not matter. + */ + for_each_possible_blade(bid) { + if (!gru_base[bid]) + break; + kgts = gru_base[bid]->bs_kgts; + if (!kgts || !kgts->ts_gru) + continue; + off = cb - kgts->ts_gru->gs_gru_base_vaddr; + if (off < GRU_SIZE) + break; + kgts = NULL; + } + BUG_ON(!kgts); + cbrnum = thread_cbr_number(kgts, get_cb_number(cb)); + cbe = get_cbe(GRUBASE(cb), cbrnum); + gru_flush_cache(cbe); /* CBE not coherent */ + sync_core(); + excdet->opc = cbe->opccpy; + excdet->exopc = cbe->exopccpy; + excdet->ecause = cbe->ecause; + excdet->exceptdet0 = cbe->idef1upd; + excdet->exceptdet1 = cbe->idef3upd; + gru_flush_cache(cbe); + return 0; +} + +static char *gru_get_cb_exception_detail_str(int ret, void *cb, + char *buf, int size) +{ + struct gru_control_block_status *gen = (void *)cb; + struct control_block_extended_exc_detail excdet; + + if (ret > 0 && gen->istatus == CBS_EXCEPTION) { + gru_get_cb_exception_detail(cb, &excdet); + snprintf(buf, size, + "GRU:%d exception: cb %p, opc %d, exopc %d, ecause 0x%x," + "excdet0 0x%lx, excdet1 0x%x", smp_processor_id(), + gen, excdet.opc, excdet.exopc, excdet.ecause, + excdet.exceptdet0, excdet.exceptdet1); + } else { + snprintf(buf, size, "No exception"); + } + return buf; +} + +static int gru_wait_idle_or_exception(struct gru_control_block_status *gen) +{ + while (gen->istatus >= CBS_ACTIVE) { + cpu_relax(); + barrier(); + } + return gen->istatus; +} + +static int gru_retry_exception(void *cb) +{ + struct gru_control_block_status *gen = (void *)cb; + struct control_block_extended_exc_detail excdet; + int retry = EXCEPTION_RETRY_LIMIT; + + while (1) { + if (gru_wait_idle_or_exception(gen) == CBS_IDLE) + return CBS_IDLE; + if (gru_get_cb_message_queue_substatus(cb)) + return CBS_EXCEPTION; + gru_get_cb_exception_detail(cb, &excdet); + if ((excdet.ecause & ~EXCEPTION_RETRY_BITS) || + (excdet.cbrexecstatus & CBR_EXS_ABORT_OCC)) + break; + if (retry-- == 0) + break; + gen->icmd = 1; + gru_flush_cache(gen); + } + return CBS_EXCEPTION; +} + +int gru_check_status_proc(void *cb) +{ + struct gru_control_block_status *gen = (void *)cb; + int ret; + + ret = gen->istatus; + if (ret == CBS_EXCEPTION) + ret = gru_retry_exception(cb); + rmb(); + return ret; + +} + +int gru_wait_proc(void *cb) +{ + struct gru_control_block_status *gen = (void *)cb; + int ret; + + ret = gru_wait_idle_or_exception(gen); + if (ret == CBS_EXCEPTION) + ret = gru_retry_exception(cb); + rmb(); + return ret; +} + +static void gru_abort(int ret, void *cb, char *str) +{ + char buf[GRU_EXC_STR_SIZE]; + + panic("GRU FATAL ERROR: %s - %s\n", str, + gru_get_cb_exception_detail_str(ret, cb, buf, sizeof(buf))); +} + +void gru_wait_abort_proc(void *cb) +{ + int ret; + + ret = gru_wait_proc(cb); + if (ret) + gru_abort(ret, cb, "gru_wait_abort"); +} + + +/*------------------------------ MESSAGE QUEUES -----------------------------*/ + +/* Internal status . These are NOT returned to the user. */ +#define MQIE_AGAIN -1 /* try again */ + + +/* + * Save/restore the "present" flag that is in the second line of 2-line + * messages + */ +static inline int get_present2(void *p) +{ + struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; + return mhdr->present; +} + +static inline void restore_present2(void *p, int val) +{ + struct message_header *mhdr = p + GRU_CACHE_LINE_BYTES; + mhdr->present = val; +} + +/* + * Create a message queue. + * qlines - message queue size in cache lines. Includes 2-line header. + */ +int gru_create_message_queue(struct gru_message_queue_desc *mqd, + void *p, unsigned int bytes, int nasid, int vector, int apicid) +{ + struct message_queue *mq = p; + unsigned int qlines; + + qlines = bytes / GRU_CACHE_LINE_BYTES - 2; + memset(mq, 0, bytes); + mq->start = &mq->data; + mq->start2 = &mq->data + (qlines / 2 - 1) * GRU_CACHE_LINE_BYTES; + mq->next = &mq->data; + mq->limit = &mq->data + (qlines - 2) * GRU_CACHE_LINE_BYTES; + mq->qlines = qlines; + mq->hstatus[0] = 0; + mq->hstatus[1] = 1; + mq->head = gru_mesq_head(2, qlines / 2 + 1); + mqd->mq = mq; + mqd->mq_gpa = uv_gpa(mq); + mqd->qlines = qlines; + mqd->interrupt_pnode = nasid >> 1; + mqd->interrupt_vector = vector; + mqd->interrupt_apicid = apicid; + return 0; +} +EXPORT_SYMBOL_GPL(gru_create_message_queue); + +/* + * Send a NOOP message to a message queue + * Returns: + * 0 - if queue is full after the send. This is the normal case + * but various races can change this. + * -1 - if mesq sent successfully but queue not full + * >0 - unexpected error. MQE_xxx returned + */ +static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd, + void *mesg) +{ + const struct message_header noop_header = { + .present = MQS_NOOP, .lines = 1}; + unsigned long m; + int substatus, ret; + struct message_header save_mhdr, *mhdr = mesg; + + STAT(mesq_noop); + save_mhdr = *mhdr; + *mhdr = noop_header; + gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA); + ret = gru_wait(cb); + + if (ret) { + substatus = gru_get_cb_message_queue_substatus(cb); + switch (substatus) { + case CBSS_NO_ERROR: + STAT(mesq_noop_unexpected_error); + ret = MQE_UNEXPECTED_CB_ERR; + break; + case CBSS_LB_OVERFLOWED: + STAT(mesq_noop_lb_overflow); + ret = MQE_CONGESTION; + break; + case CBSS_QLIMIT_REACHED: + STAT(mesq_noop_qlimit_reached); + ret = 0; + break; + case CBSS_AMO_NACKED: + STAT(mesq_noop_amo_nacked); + ret = MQE_CONGESTION; + break; + case CBSS_PUT_NACKED: + STAT(mesq_noop_put_nacked); + m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); + gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1, + IMA); + if (gru_wait(cb) == CBS_IDLE) + ret = MQIE_AGAIN; + else + ret = MQE_UNEXPECTED_CB_ERR; + break; + case CBSS_PAGE_OVERFLOW: + STAT(mesq_noop_page_overflow); + /* fallthru */ + default: + BUG(); + } + } + *mhdr = save_mhdr; + return ret; +} + +/* + * Handle a gru_mesq full. + */ +static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd, + void *mesg, int lines) +{ + union gru_mesqhead mqh; + unsigned int limit, head; + unsigned long avalue; + int half, qlines; + + /* Determine if switching to first/second half of q */ + avalue = gru_get_amo_value(cb); + head = gru_get_amo_value_head(cb); + limit = gru_get_amo_value_limit(cb); + + qlines = mqd->qlines; + half = (limit != qlines); + + if (half) + mqh = gru_mesq_head(qlines / 2 + 1, qlines); + else + mqh = gru_mesq_head(2, qlines / 2 + 1); + + /* Try to get lock for switching head pointer */ + gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + if (!gru_get_amo_value(cb)) { + STAT(mesq_qf_locked); + return MQE_QUEUE_FULL; + } + + /* Got the lock. Send optional NOP if queue not full, */ + if (head != limit) { + if (send_noop_message(cb, mqd, mesg)) { + gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), + XTYPE_DW, IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + STAT(mesq_qf_noop_not_full); + return MQIE_AGAIN; + } + avalue++; + } + + /* Then flip queuehead to other half of queue. */ + gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue, + IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + + /* If not successfully in swapping queue head, clear the hstatus lock */ + if (gru_get_amo_value(cb) != avalue) { + STAT(mesq_qf_switch_head_failed); + gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, + IMA); + if (gru_wait(cb) != CBS_IDLE) + goto cberr; + } + return MQIE_AGAIN; +cberr: + STAT(mesq_qf_unexpected_error); + return MQE_UNEXPECTED_CB_ERR; +} + +/* + * Handle a PUT failure. Note: if message was a 2-line message, one of the + * lines might have successfully have been written. Before sending the + * message, "present" must be cleared in BOTH lines to prevent the receiver + * from prematurely seeing the full message. + */ +static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd, + void *mesg, int lines) +{ + unsigned long m; + int ret, loops = 200; /* experimentally determined */ + + m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); + if (lines == 2) { + gru_vset(cb, m, 0, XTYPE_CL, lines, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) + return MQE_UNEXPECTED_CB_ERR; + } + gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) + return MQE_UNEXPECTED_CB_ERR; + + if (!mqd->interrupt_vector) + return MQE_OK; + + /* + * Send a noop message in order to deliver a cross-partition interrupt + * to the SSI that contains the target message queue. Normally, the + * interrupt is automatically delivered by hardware following mesq + * operations, but some error conditions require explicit delivery. + * The noop message will trigger delivery. Otherwise partition failures + * could cause unrecovered errors. + */ + do { + ret = send_noop_message(cb, mqd, mesg); + } while ((ret == MQIE_AGAIN || ret == MQE_CONGESTION) && (loops-- > 0)); + + if (ret == MQIE_AGAIN || ret == MQE_CONGESTION) { + /* + * Don't indicate to the app to resend the message, as it's + * already been successfully sent. We simply send an OK + * (rather than fail the send with MQE_UNEXPECTED_CB_ERR), + * assuming that the other side is receiving enough + * interrupts to get this message processed anyway. + */ + ret = MQE_OK; + } + return ret; +} + +/* + * Handle a gru_mesq failure. Some of these failures are software recoverable + * or retryable. + */ +static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, + void *mesg, int lines) +{ + int substatus, ret = 0; + + substatus = gru_get_cb_message_queue_substatus(cb); + switch (substatus) { + case CBSS_NO_ERROR: + STAT(mesq_send_unexpected_error); + ret = MQE_UNEXPECTED_CB_ERR; + break; + case CBSS_LB_OVERFLOWED: + STAT(mesq_send_lb_overflow); + ret = MQE_CONGESTION; + break; + case CBSS_QLIMIT_REACHED: + STAT(mesq_send_qlimit_reached); + ret = send_message_queue_full(cb, mqd, mesg, lines); + break; + case CBSS_AMO_NACKED: + STAT(mesq_send_amo_nacked); + ret = MQE_CONGESTION; + break; + case CBSS_PUT_NACKED: + STAT(mesq_send_put_nacked); + ret = send_message_put_nacked(cb, mqd, mesg, lines); + break; + case CBSS_PAGE_OVERFLOW: + STAT(mesq_page_overflow); + /* fallthru */ + default: + BUG(); + } + return ret; +} + +/* + * Send a message to a message queue + * mqd message queue descriptor + * mesg message. ust be vaddr within a GSEG + * bytes message size (<= 2 CL) + */ +int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg, + unsigned int bytes) +{ + struct message_header *mhdr; + void *cb; + void *dsr; + int istatus, clines, ret; + + STAT(mesq_send); + BUG_ON(bytes < sizeof(int) || bytes > 2 * GRU_CACHE_LINE_BYTES); + + clines = DIV_ROUND_UP(bytes, GRU_CACHE_LINE_BYTES); + if (gru_get_cpu_resources(bytes, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + memcpy(dsr, mesg, bytes); + mhdr = dsr; + mhdr->present = MQS_FULL; + mhdr->lines = clines; + if (clines == 2) { + mhdr->present2 = get_present2(mhdr); + restore_present2(mhdr, MQS_FULL); + } + + do { + ret = MQE_OK; + gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA); + istatus = gru_wait(cb); + if (istatus != CBS_IDLE) + ret = send_message_failure(cb, mqd, dsr, clines); + } while (ret == MQIE_AGAIN); + gru_free_cpu_resources(cb, dsr); + + if (ret) + STAT(mesq_send_failed); + return ret; +} +EXPORT_SYMBOL_GPL(gru_send_message_gpa); + +/* + * Advance the receive pointer for the queue to the next message. + */ +void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg) +{ + struct message_queue *mq = mqd->mq; + struct message_header *mhdr = mq->next; + void *next, *pnext; + int half = -1; + int lines = mhdr->lines; + + if (lines == 2) + restore_present2(mhdr, MQS_EMPTY); + mhdr->present = MQS_EMPTY; + + pnext = mq->next; + next = pnext + GRU_CACHE_LINE_BYTES * lines; + if (next == mq->limit) { + next = mq->start; + half = 1; + } else if (pnext < mq->start2 && next >= mq->start2) { + half = 0; + } + + if (half >= 0) + mq->hstatus[half] = 1; + mq->next = next; +} +EXPORT_SYMBOL_GPL(gru_free_message); + +/* + * Get next message from message queue. Return NULL if no message + * present. User must call next_message() to move to next message. + * rmq message queue + */ +void *gru_get_next_message(struct gru_message_queue_desc *mqd) +{ + struct message_queue *mq = mqd->mq; + struct message_header *mhdr = mq->next; + int present = mhdr->present; + + /* skip NOOP messages */ + while (present == MQS_NOOP) { + gru_free_message(mqd, mhdr); + mhdr = mq->next; + present = mhdr->present; + } + + /* Wait for both halves of 2 line messages */ + if (present == MQS_FULL && mhdr->lines == 2 && + get_present2(mhdr) == MQS_EMPTY) + present = MQS_EMPTY; + + if (!present) { + STAT(mesq_receive_none); + return NULL; + } + + if (mhdr->lines == 2) + restore_present2(mhdr, mhdr->present2); + + STAT(mesq_receive); + return mhdr; +} +EXPORT_SYMBOL_GPL(gru_get_next_message); + +/* ---------------------- GRU DATA COPY FUNCTIONS ---------------------------*/ + +/* + * Load a DW from a global GPA. The GPA can be a memory or MMR address. + */ +int gru_read_gpa(unsigned long *value, unsigned long gpa) +{ + void *cb; + void *dsr; + int ret, iaa; + + STAT(read_gpa); + if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + iaa = gpa >> 62; + gru_vload_phys(cb, gpa, gru_get_tri(dsr), iaa, IMA); + ret = gru_wait(cb); + if (ret == CBS_IDLE) + *value = *(unsigned long *)dsr; + gru_free_cpu_resources(cb, dsr); + return ret; +} +EXPORT_SYMBOL_GPL(gru_read_gpa); + + +/* + * Copy a block of data using the GRU resources + */ +int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, + unsigned int bytes) +{ + void *cb; + void *dsr; + int ret; + + STAT(copy_gpa); + if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr), + XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA); + ret = gru_wait(cb); + gru_free_cpu_resources(cb, dsr); + return ret; +} +EXPORT_SYMBOL_GPL(gru_copy_gpa); + +/* ------------------- KERNEL QUICKTESTS RUN AT STARTUP ----------------*/ +/* Temp - will delete after we gain confidence in the GRU */ + +static int quicktest0(unsigned long arg) +{ + unsigned long word0; + unsigned long word1; + void *cb; + void *dsr; + unsigned long *p; + int ret = -EIO; + + if (gru_get_cpu_resources(GRU_CACHE_LINE_BYTES, &cb, &dsr)) + return MQE_BUG_NO_RESOURCES; + p = dsr; + word0 = MAGIC; + word1 = 0; + + gru_vload(cb, uv_gpa(&word0), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) { + printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 1\n", smp_processor_id()); + goto done; + } + + if (*p != MAGIC) { + printk(KERN_DEBUG "GRU:%d quicktest0 bad magic 0x%lx\n", smp_processor_id(), *p); + goto done; + } + gru_vstore(cb, uv_gpa(&word1), gru_get_tri(dsr), XTYPE_DW, 1, 1, IMA); + if (gru_wait(cb) != CBS_IDLE) { + printk(KERN_DEBUG "GRU:%d quicktest0: CBR failure 2\n", smp_processor_id()); + goto done; + } + + if (word0 != word1 || word1 != MAGIC) { + printk(KERN_DEBUG + "GRU:%d quicktest0 err: found 0x%lx, expected 0x%lx\n", + smp_processor_id(), word1, MAGIC); + goto done; + } + ret = 0; + +done: + gru_free_cpu_resources(cb, dsr); + return ret; +} + +#define ALIGNUP(p, q) ((void *)(((unsigned long)(p) + (q) - 1) & ~(q - 1))) + +static int quicktest1(unsigned long arg) +{ + struct gru_message_queue_desc mqd; + void *p, *mq; + int i, ret = -EIO; + char mes[GRU_CACHE_LINE_BYTES], *m; + + /* Need 1K cacheline aligned that does not cross page boundary */ + p = kmalloc(4096, 0); + if (p == NULL) + return -ENOMEM; + mq = ALIGNUP(p, 1024); + memset(mes, 0xee, sizeof(mes)); + + gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); + for (i = 0; i < 6; i++) { + mes[8] = i; + do { + ret = gru_send_message_gpa(&mqd, mes, sizeof(mes)); + } while (ret == MQE_CONGESTION); + if (ret) + break; + } + if (ret != MQE_QUEUE_FULL || i != 4) { + printk(KERN_DEBUG "GRU:%d quicktest1: unexpect status %d, i %d\n", + smp_processor_id(), ret, i); + goto done; + } + + for (i = 0; i < 6; i++) { + m = gru_get_next_message(&mqd); + if (!m || m[8] != i) + break; + gru_free_message(&mqd, m); + } + if (i != 4) { + printk(KERN_DEBUG "GRU:%d quicktest2: bad message, i %d, m %p, m8 %d\n", + smp_processor_id(), i, m, m ? m[8] : -1); + goto done; + } + ret = 0; + +done: + kfree(p); + return ret; +} + +static int quicktest2(unsigned long arg) +{ + static DECLARE_COMPLETION(cmp); + unsigned long han; + int blade_id = 0; + int numcb = 4; + int ret = 0; + unsigned long *buf; + void *cb0, *cb; + struct gru_control_block_status *gen; + int i, k, istatus, bytes; + + bytes = numcb * 4 * 8; + buf = kmalloc(bytes, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = -EBUSY; + han = gru_reserve_async_resources(blade_id, numcb, 0, &cmp); + if (!han) + goto done; + + gru_lock_async_resource(han, &cb0, NULL); + memset(buf, 0xee, bytes); + for (i = 0; i < numcb; i++) + gru_vset(cb0 + i * GRU_HANDLE_STRIDE, uv_gpa(&buf[i * 4]), 0, + XTYPE_DW, 4, 1, IMA_INTERRUPT); + + ret = 0; + k = numcb; + do { + gru_wait_async_cbr(han); + for (i = 0; i < numcb; i++) { + cb = cb0 + i * GRU_HANDLE_STRIDE; + istatus = gru_check_status(cb); + if (istatus != CBS_ACTIVE && istatus != CBS_CALL_OS) + break; + } + if (i == numcb) + continue; + if (istatus != CBS_IDLE) { + printk(KERN_DEBUG "GRU:%d quicktest2: cb %d, exception\n", smp_processor_id(), i); + ret = -EFAULT; + } else if (buf[4 * i] || buf[4 * i + 1] || buf[4 * i + 2] || + buf[4 * i + 3]) { + printk(KERN_DEBUG "GRU:%d quicktest2:cb %d, buf 0x%lx, 0x%lx, 0x%lx, 0x%lx\n", + smp_processor_id(), i, buf[4 * i], buf[4 * i + 1], buf[4 * i + 2], buf[4 * i + 3]); + ret = -EIO; + } + k--; + gen = cb; + gen->istatus = CBS_CALL_OS; /* don't handle this CBR again */ + } while (k); + BUG_ON(cmp.done); + + gru_unlock_async_resource(han); + gru_release_async_resources(han); +done: + kfree(buf); + return ret; +} + +#define BUFSIZE 200 +static int quicktest3(unsigned long arg) +{ + char buf1[BUFSIZE], buf2[BUFSIZE]; + int ret = 0; + + memset(buf2, 0, sizeof(buf2)); + memset(buf1, get_cycles() & 255, sizeof(buf1)); + gru_copy_gpa(uv_gpa(buf2), uv_gpa(buf1), BUFSIZE); + if (memcmp(buf1, buf2, BUFSIZE)) { + printk(KERN_DEBUG "GRU:%d quicktest3 error\n", smp_processor_id()); + ret = -EIO; + } + return ret; +} + +/* + * Debugging only. User hook for various kernel tests + * of driver & gru. + */ +int gru_ktest(unsigned long arg) +{ + int ret = -EINVAL; + + switch (arg & 0xff) { + case 0: + ret = quicktest0(arg); + break; + case 1: + ret = quicktest1(arg); + break; + case 2: + ret = quicktest2(arg); + break; + case 3: + ret = quicktest3(arg); + break; + case 99: + ret = gru_free_kernel_contexts(); + break; + } + return ret; + +} + +int gru_kservices_init(void) +{ + return 0; +} + +void gru_kservices_exit(void) +{ + if (gru_free_kernel_contexts()) + BUG(); +} + diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h new file mode 100644 index 000000000..02aa94d84 --- /dev/null +++ b/drivers/misc/sgi-gru/grukservices.h @@ -0,0 +1,214 @@ + +/* + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef __GRU_KSERVICES_H_ +#define __GRU_KSERVICES_H_ + + +/* + * Message queues using the GRU to send/receive messages. + * + * These function allow the user to create a message queue for + * sending/receiving 1 or 2 cacheline messages using the GRU. + * + * Processes SENDING messages will use a kernel CBR/DSR to send + * the message. This is transparent to the caller. + * + * The receiver does not use any GRU resources. + * + * The functions support: + * - single receiver + * - multiple senders + * - cross partition message + * + * Missing features ZZZ: + * - user options for dealing with timeouts, queue full, etc. + * - gru_create_message_queue() needs interrupt vector info + */ + +struct gru_message_queue_desc { + void *mq; /* message queue vaddress */ + unsigned long mq_gpa; /* global address of mq */ + int qlines; /* queue size in CL */ + int interrupt_vector; /* interrupt vector */ + int interrupt_pnode; /* pnode for interrupt */ + int interrupt_apicid; /* lapicid for interrupt */ +}; + +/* + * Initialize a user allocated chunk of memory to be used as + * a message queue. The caller must ensure that the queue is + * in contiguous physical memory and is cacheline aligned. + * + * Message queue size is the total number of bytes allocated + * to the queue including a 2 cacheline header that is used + * to manage the queue. + * + * Input: + * mqd pointer to message queue descriptor + * p pointer to user allocated mesq memory. + * bytes size of message queue in bytes + * vector interrupt vector (zero if no interrupts) + * nasid nasid of blade where interrupt is delivered + * apicid apicid of cpu for interrupt + * + * Errors: + * 0 OK + * >0 error + */ +extern int gru_create_message_queue(struct gru_message_queue_desc *mqd, + void *p, unsigned int bytes, int nasid, int vector, int apicid); + +/* + * Send a message to a message queue. + * + * Note: The message queue transport mechanism uses the first 32 + * bits of the message. Users should avoid using these bits. + * + * + * Input: + * mqd pointer to message queue descriptor + * mesg pointer to message. Must be 64-bit aligned + * bytes size of message in bytes + * + * Output: + * 0 message sent + * >0 Send failure - see error codes below + * + */ +extern int gru_send_message_gpa(struct gru_message_queue_desc *mqd, + void *mesg, unsigned int bytes); + +/* Status values for gru_send_message() */ +#define MQE_OK 0 /* message sent successfully */ +#define MQE_CONGESTION 1 /* temporary congestion, try again */ +#define MQE_QUEUE_FULL 2 /* queue is full */ +#define MQE_UNEXPECTED_CB_ERR 3 /* unexpected CB error */ +#define MQE_PAGE_OVERFLOW 10 /* BUG - queue overflowed a page */ +#define MQE_BUG_NO_RESOURCES 11 /* BUG - could not alloc GRU cb/dsr */ + +/* + * Advance the receive pointer for the message queue to the next message. + * Note: current API requires messages to be gotten & freed in order. Future + * API extensions may allow for out-of-order freeing. + * + * Input + * mqd pointer to message queue descriptor + * mesq message being freed + */ +extern void gru_free_message(struct gru_message_queue_desc *mqd, + void *mesq); + +/* + * Get next message from message queue. Returns pointer to + * message OR NULL if no message present. + * User must call gru_free_message() after message is processed + * in order to move the queue pointers to next message. + * + * Input + * mqd pointer to message queue descriptor + * + * Output: + * p pointer to message + * NULL no message available + */ +extern void *gru_get_next_message(struct gru_message_queue_desc *mqd); + + +/* + * Read a GRU global GPA. Source can be located in a remote partition. + * + * Input: + * value memory address where MMR value is returned + * gpa source numalink physical address of GPA + * + * Output: + * 0 OK + * >0 error + */ +int gru_read_gpa(unsigned long *value, unsigned long gpa); + + +/* + * Copy data using the GRU. Source or destination can be located in a remote + * partition. + * + * Input: + * dest_gpa destination global physical address + * src_gpa source global physical address + * bytes number of bytes to copy + * + * Output: + * 0 OK + * >0 error + */ +extern int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, + unsigned int bytes); + +/* + * Reserve GRU resources to be used asynchronously. + * + * input: + * blade_id - blade on which resources should be reserved + * cbrs - number of CBRs + * dsr_bytes - number of DSR bytes needed + * cmp - completion structure for waiting for + * async completions + * output: + * handle to identify resource + * (0 = no resources) + */ +extern unsigned long gru_reserve_async_resources(int blade_id, int cbrs, int dsr_bytes, + struct completion *cmp); + +/* + * Release async resources previously reserved. + * + * input: + * han - handle to identify resources + */ +extern void gru_release_async_resources(unsigned long han); + +/* + * Wait for async GRU instructions to complete. + * + * input: + * han - handle to identify resources + */ +extern void gru_wait_async_cbr(unsigned long han); + +/* + * Lock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + * output: + * cb - pointer to first CBR + * dsr - pointer to first DSR + */ +extern void gru_lock_async_resource(unsigned long han, void **cb, void **dsr); + +/* + * Unlock previous reserved async GRU resources + * + * input: + * han - handle to identify resources + */ +extern void gru_unlock_async_resource(unsigned long han); + +#endif /* __GRU_KSERVICES_H_ */ diff --git a/drivers/misc/sgi-gru/grulib.h b/drivers/misc/sgi-gru/grulib.h new file mode 100644 index 000000000..e77d1b1f9 --- /dev/null +++ b/drivers/misc/sgi-gru/grulib.h @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRULIB_H__ +#define __GRULIB_H__ + +#define GRU_BASENAME "gru" +#define GRU_FULLNAME "/dev/gru" +#define GRU_IOCTL_NUM 'G' + +/* + * Maximum number of GRU segments that a user can have open + * ZZZ temp - set high for testing. Revisit. + */ +#define GRU_MAX_OPEN_CONTEXTS 32 + +/* Set Number of Request Blocks */ +#define GRU_CREATE_CONTEXT _IOWR(GRU_IOCTL_NUM, 1, void *) + +/* Set Context Options */ +#define GRU_SET_CONTEXT_OPTION _IOWR(GRU_IOCTL_NUM, 4, void *) + +/* Fetch exception detail */ +#define GRU_USER_GET_EXCEPTION_DETAIL _IOWR(GRU_IOCTL_NUM, 6, void *) + +/* For user call_os handling - normally a TLB fault */ +#define GRU_USER_CALL_OS _IOWR(GRU_IOCTL_NUM, 8, void *) + +/* For user unload context */ +#define GRU_USER_UNLOAD_CONTEXT _IOWR(GRU_IOCTL_NUM, 9, void *) + +/* For dumpping GRU chiplet state */ +#define GRU_DUMP_CHIPLET_STATE _IOWR(GRU_IOCTL_NUM, 11, void *) + +/* For getting gseg statistics */ +#define GRU_GET_GSEG_STATISTICS _IOWR(GRU_IOCTL_NUM, 12, void *) + +/* For user TLB flushing (primarily for tests) */ +#define GRU_USER_FLUSH_TLB _IOWR(GRU_IOCTL_NUM, 50, void *) + +/* Get some config options (primarily for tests & emulator) */ +#define GRU_GET_CONFIG_INFO _IOWR(GRU_IOCTL_NUM, 51, void *) + +/* Various kernel self-tests */ +#define GRU_KTEST _IOWR(GRU_IOCTL_NUM, 52, void *) + +#define CONTEXT_WINDOW_BYTES(th) (GRU_GSEG_PAGESIZE * (th)) +#define THREAD_POINTER(p, th) (p + GRU_GSEG_PAGESIZE * (th)) +#define GSEG_START(cb) ((void *)((unsigned long)(cb) & ~(GRU_GSEG_PAGESIZE - 1))) + +struct gru_get_gseg_statistics_req { + unsigned long gseg; + struct gru_gseg_statistics stats; +}; + +/* + * Structure used to pass TLB flush parameters to the driver + */ +struct gru_create_context_req { + unsigned long gseg; + unsigned int data_segment_bytes; + unsigned int control_blocks; + unsigned int maximum_thread_count; + unsigned int options; + unsigned char tlb_preload_count; +}; + +/* + * Structure used to pass unload context parameters to the driver + */ +struct gru_unload_context_req { + unsigned long gseg; +}; + +/* + * Structure used to set context options + */ +enum {sco_gseg_owner, sco_cch_req_slice, sco_blade_chiplet}; +struct gru_set_context_option_req { + unsigned long gseg; + int op; + int val0; + long val1; +}; + +/* + * Structure used to pass TLB flush parameters to the driver + */ +struct gru_flush_tlb_req { + unsigned long gseg; + unsigned long vaddr; + size_t len; +}; + +/* + * Structure used to pass TLB flush parameters to the driver + */ +enum {dcs_pid, dcs_gid}; +struct gru_dump_chiplet_state_req { + unsigned int op; + unsigned int gid; + int ctxnum; + char data_opt; + char lock_cch; + char flush_cbrs; + char fill[10]; + pid_t pid; + void *buf; + size_t buflen; + /* ---- output --- */ + unsigned int num_contexts; +}; + +#define GRU_DUMP_MAGIC 0x3474ab6c +struct gru_dump_context_header { + unsigned int magic; + unsigned int gid; + unsigned char ctxnum; + unsigned char cbrcnt; + unsigned char dsrcnt; + pid_t pid; + unsigned long vaddr; + int cch_locked; + unsigned long data[0]; +}; + +/* + * GRU configuration info (temp - for testing) + */ +struct gru_config_info { + int cpus; + int blades; + int nodes; + int chiplets; + int fill[16]; +}; + +#endif /* __GRULIB_H__ */ diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c new file mode 100644 index 000000000..ab174f28e --- /dev/null +++ b/drivers/misc/sgi-gru/grumain.c @@ -0,0 +1,976 @@ +/* + * SN Platform GRU Driver + * + * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "gru.h" +#include "grutables.h" +#include "gruhandles.h" + +unsigned long gru_options __read_mostly; + +static struct device_driver gru_driver = { + .name = "gru" +}; + +static struct device gru_device = { + .init_name = "", + .driver = &gru_driver, +}; + +struct device *grudev = &gru_device; + +/* + * Select a gru fault map to be used by the current cpu. Note that + * multiple cpus may be using the same map. + * ZZZ should be inline but did not work on emulator + */ +int gru_cpu_fault_map_id(void) +{ +#ifdef CONFIG_IA64 + return uv_blade_processor_id() % GRU_NUM_TFM; +#else + int cpu = smp_processor_id(); + int id, core; + + core = uv_cpu_core_number(cpu); + id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); + return id; +#endif +} + +/*--------- ASID Management ------------------------------------------- + * + * Initially, assign asids sequentially from MIN_ASID .. MAX_ASID. + * Once MAX is reached, flush the TLB & start over. However, + * some asids may still be in use. There won't be many (percentage wise) still + * in use. Search active contexts & determine the value of the first + * asid in use ("x"s below). Set "limit" to this value. + * This defines a block of assignable asids. + * + * When "limit" is reached, search forward from limit+1 and determine the + * next block of assignable asids. + * + * Repeat until MAX_ASID is reached, then start over again. + * + * Each time MAX_ASID is reached, increment the asid generation. Since + * the search for in-use asids only checks contexts with GRUs currently + * assigned, asids in some contexts will be missed. Prior to loading + * a context, the asid generation of the GTS asid is rechecked. If it + * doesn't match the current generation, a new asid will be assigned. + * + * 0---------------x------------x---------------------x----| + * ^-next ^-limit ^-MAX_ASID + * + * All asid manipulation & context loading/unloading is protected by the + * gs_lock. + */ + +/* Hit the asid limit. Start over */ +static int gru_wrap_asid(struct gru_state *gru) +{ + gru_dbg(grudev, "gid %d\n", gru->gs_gid); + STAT(asid_wrap); + gru->gs_asid_gen++; + return MIN_ASID; +} + +/* Find the next chunk of unused asids */ +static int gru_reset_asid_limit(struct gru_state *gru, int asid) +{ + int i, gid, inuse_asid, limit; + + gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid); + STAT(asid_next); + limit = MAX_ASID; + if (asid >= limit) + asid = gru_wrap_asid(gru); + gru_flush_all_tlb(gru); + gid = gru->gs_gid; +again: + for (i = 0; i < GRU_NUM_CCH; i++) { + if (!gru->gs_gts[i] || is_kernel_context(gru->gs_gts[i])) + continue; + inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid; + gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n", + gru->gs_gid, gru->gs_gts[i], gru->gs_gts[i]->ts_gms, + inuse_asid, i); + if (inuse_asid == asid) { + asid += ASID_INC; + if (asid >= limit) { + /* + * empty range: reset the range limit and + * start over + */ + limit = MAX_ASID; + if (asid >= MAX_ASID) + asid = gru_wrap_asid(gru); + goto again; + } + } + + if ((inuse_asid > asid) && (inuse_asid < limit)) + limit = inuse_asid; + } + gru->gs_asid_limit = limit; + gru->gs_asid = asid; + gru_dbg(grudev, "gid %d, new asid 0x%x, new_limit 0x%x\n", gru->gs_gid, + asid, limit); + return asid; +} + +/* Assign a new ASID to a thread context. */ +static int gru_assign_asid(struct gru_state *gru) +{ + int asid; + + gru->gs_asid += ASID_INC; + asid = gru->gs_asid; + if (asid >= gru->gs_asid_limit) + asid = gru_reset_asid_limit(gru, asid); + + gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid); + return asid; +} + +/* + * Clear n bits in a word. Return a word indicating the bits that were cleared. + * Optionally, build an array of chars that contain the bit numbers allocated. + */ +static unsigned long reserve_resources(unsigned long *p, int n, int mmax, + char *idx) +{ + unsigned long bits = 0; + int i; + + while (n--) { + i = find_first_bit(p, mmax); + if (i == mmax) + BUG(); + __clear_bit(i, p); + __set_bit(i, &bits); + if (idx) + *idx++ = i; + } + return bits; +} + +unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count, + char *cbmap) +{ + return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU, + cbmap); +} + +unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count, + char *dsmap) +{ + return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU, + dsmap); +} + +static void reserve_gru_resources(struct gru_state *gru, + struct gru_thread_state *gts) +{ + gru->gs_active_contexts++; + gts->ts_cbr_map = + gru_reserve_cb_resources(gru, gts->ts_cbr_au_count, + gts->ts_cbr_idx); + gts->ts_dsr_map = + gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL); +} + +static void free_gru_resources(struct gru_state *gru, + struct gru_thread_state *gts) +{ + gru->gs_active_contexts--; + gru->gs_cbr_map |= gts->ts_cbr_map; + gru->gs_dsr_map |= gts->ts_dsr_map; +} + +/* + * Check if a GRU has sufficient free resources to satisfy an allocation + * request. Note: GRU locks may or may not be held when this is called. If + * not held, recheck after acquiring the appropriate locks. + * + * Returns 1 if sufficient resources, 0 if not + */ +static int check_gru_resources(struct gru_state *gru, int cbr_au_count, + int dsr_au_count, int max_active_contexts) +{ + return hweight64(gru->gs_cbr_map) >= cbr_au_count + && hweight64(gru->gs_dsr_map) >= dsr_au_count + && gru->gs_active_contexts < max_active_contexts; +} + +/* + * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG + * context. + */ +static int gru_load_mm_tracker(struct gru_state *gru, + struct gru_thread_state *gts) +{ + struct gru_mm_struct *gms = gts->ts_gms; + struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid]; + unsigned short ctxbitmap = (1 << gts->ts_ctxnum); + int asid; + + spin_lock(&gms->ms_asid_lock); + asid = asids->mt_asid; + + spin_lock(&gru->gs_asid_lock); + if (asid == 0 || (asids->mt_ctxbitmap == 0 && asids->mt_asid_gen != + gru->gs_asid_gen)) { + asid = gru_assign_asid(gru); + asids->mt_asid = asid; + asids->mt_asid_gen = gru->gs_asid_gen; + STAT(asid_new); + } else { + STAT(asid_reuse); + } + spin_unlock(&gru->gs_asid_lock); + + BUG_ON(asids->mt_ctxbitmap & ctxbitmap); + asids->mt_ctxbitmap |= ctxbitmap; + if (!test_bit(gru->gs_gid, gms->ms_asidmap)) + __set_bit(gru->gs_gid, gms->ms_asidmap); + spin_unlock(&gms->ms_asid_lock); + + gru_dbg(grudev, + "gid %d, gts %p, gms %p, ctxnum %d, asid 0x%x, asidmap 0x%lx\n", + gru->gs_gid, gts, gms, gts->ts_ctxnum, asid, + gms->ms_asidmap[0]); + return asid; +} + +static void gru_unload_mm_tracker(struct gru_state *gru, + struct gru_thread_state *gts) +{ + struct gru_mm_struct *gms = gts->ts_gms; + struct gru_mm_tracker *asids; + unsigned short ctxbitmap; + + asids = &gms->ms_asids[gru->gs_gid]; + ctxbitmap = (1 << gts->ts_ctxnum); + spin_lock(&gms->ms_asid_lock); + spin_lock(&gru->gs_asid_lock); + BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap); + asids->mt_ctxbitmap ^= ctxbitmap; + gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum %d, asidmap 0x%lx\n", + gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]); + spin_unlock(&gru->gs_asid_lock); + spin_unlock(&gms->ms_asid_lock); +} + +/* + * Decrement the reference count on a GTS structure. Free the structure + * if the reference count goes to zero. + */ +void gts_drop(struct gru_thread_state *gts) +{ + if (gts && atomic_dec_return(>s->ts_refcnt) == 0) { + if (gts->ts_gms) + gru_drop_mmu_notifier(gts->ts_gms); + kfree(gts); + STAT(gts_free); + } +} + +/* + * Locate the GTS structure for the current thread. + */ +static struct gru_thread_state *gru_find_current_gts_nolock(struct gru_vma_data + *vdata, int tsid) +{ + struct gru_thread_state *gts; + + list_for_each_entry(gts, &vdata->vd_head, ts_next) + if (gts->ts_tsid == tsid) + return gts; + return NULL; +} + +/* + * Allocate a thread state structure. + */ +struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, + int cbr_au_count, int dsr_au_count, + unsigned char tlb_preload_count, int options, int tsid) +{ + struct gru_thread_state *gts; + struct gru_mm_struct *gms; + int bytes; + + bytes = DSR_BYTES(dsr_au_count) + CBR_BYTES(cbr_au_count); + bytes += sizeof(struct gru_thread_state); + gts = kmalloc(bytes, GFP_KERNEL); + if (!gts) + return ERR_PTR(-ENOMEM); + + STAT(gts_alloc); + memset(gts, 0, sizeof(struct gru_thread_state)); /* zero out header */ + atomic_set(>s->ts_refcnt, 1); + mutex_init(>s->ts_ctxlock); + gts->ts_cbr_au_count = cbr_au_count; + gts->ts_dsr_au_count = dsr_au_count; + gts->ts_tlb_preload_count = tlb_preload_count; + gts->ts_user_options = options; + gts->ts_user_blade_id = -1; + gts->ts_user_chiplet_id = -1; + gts->ts_tsid = tsid; + gts->ts_ctxnum = NULLCTX; + gts->ts_tlb_int_select = -1; + gts->ts_cch_req_slice = -1; + gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT); + if (vma) { + gts->ts_mm = current->mm; + gts->ts_vma = vma; + gms = gru_register_mmu_notifier(); + if (IS_ERR(gms)) + goto err; + gts->ts_gms = gms; + } + + gru_dbg(grudev, "alloc gts %p\n", gts); + return gts; + +err: + gts_drop(gts); + return ERR_CAST(gms); +} + +/* + * Allocate a vma private data structure. + */ +struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, int tsid) +{ + struct gru_vma_data *vdata = NULL; + + vdata = kmalloc(sizeof(*vdata), GFP_KERNEL); + if (!vdata) + return NULL; + + STAT(vdata_alloc); + INIT_LIST_HEAD(&vdata->vd_head); + spin_lock_init(&vdata->vd_lock); + gru_dbg(grudev, "alloc vdata %p\n", vdata); + return vdata; +} + +/* + * Find the thread state structure for the current thread. + */ +struct gru_thread_state *gru_find_thread_state(struct vm_area_struct *vma, + int tsid) +{ + struct gru_vma_data *vdata = vma->vm_private_data; + struct gru_thread_state *gts; + + spin_lock(&vdata->vd_lock); + gts = gru_find_current_gts_nolock(vdata, tsid); + spin_unlock(&vdata->vd_lock); + gru_dbg(grudev, "vma %p, gts %p\n", vma, gts); + return gts; +} + +/* + * Allocate a new thread state for a GSEG. Note that races may allow + * another thread to race to create a gts. + */ +struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma, + int tsid) +{ + struct gru_vma_data *vdata = vma->vm_private_data; + struct gru_thread_state *gts, *ngts; + + gts = gru_alloc_gts(vma, vdata->vd_cbr_au_count, + vdata->vd_dsr_au_count, + vdata->vd_tlb_preload_count, + vdata->vd_user_options, tsid); + if (IS_ERR(gts)) + return gts; + + spin_lock(&vdata->vd_lock); + ngts = gru_find_current_gts_nolock(vdata, tsid); + if (ngts) { + gts_drop(gts); + gts = ngts; + STAT(gts_double_allocate); + } else { + list_add(>s->ts_next, &vdata->vd_head); + } + spin_unlock(&vdata->vd_lock); + gru_dbg(grudev, "vma %p, gts %p\n", vma, gts); + return gts; +} + +/* + * Free the GRU context assigned to the thread state. + */ +static void gru_free_gru_context(struct gru_thread_state *gts) +{ + struct gru_state *gru; + + gru = gts->ts_gru; + gru_dbg(grudev, "gts %p, gid %d\n", gts, gru->gs_gid); + + spin_lock(&gru->gs_lock); + gru->gs_gts[gts->ts_ctxnum] = NULL; + free_gru_resources(gru, gts); + BUG_ON(test_bit(gts->ts_ctxnum, &gru->gs_context_map) == 0); + __clear_bit(gts->ts_ctxnum, &gru->gs_context_map); + gts->ts_ctxnum = NULLCTX; + gts->ts_gru = NULL; + gts->ts_blade = -1; + spin_unlock(&gru->gs_lock); + + gts_drop(gts); + STAT(free_context); +} + +/* + * Prefetching cachelines help hardware performance. + * (Strictly a performance enhancement. Not functionally required). + */ +static void prefetch_data(void *p, int num, int stride) +{ + while (num-- > 0) { + prefetchw(p); + p += stride; + } +} + +static inline long gru_copy_handle(void *d, void *s) +{ + memcpy(d, s, GRU_HANDLE_BYTES); + return GRU_HANDLE_BYTES; +} + +static void gru_prefetch_context(void *gseg, void *cb, void *cbe, + unsigned long cbrmap, unsigned long length) +{ + int i, scr; + + prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES, + GRU_CACHE_LINE_BYTES); + + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { + prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES); + prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1, + GRU_CACHE_LINE_BYTES); + cb += GRU_HANDLE_STRIDE; + } +} + +static void gru_load_context_data(void *save, void *grubase, int ctxnum, + unsigned long cbrmap, unsigned long dsrmap, + int data_valid) +{ + void *gseg, *cb, *cbe; + unsigned long length; + int i, scr; + + gseg = grubase + ctxnum * GRU_GSEG_STRIDE; + cb = gseg + GRU_CB_BASE; + cbe = grubase + GRU_CBE_BASE; + length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; + gru_prefetch_context(gseg, cb, cbe, cbrmap, length); + + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { + if (data_valid) { + save += gru_copy_handle(cb, save); + save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, + save); + } else { + memset(cb, 0, GRU_CACHE_LINE_BYTES); + memset(cbe + i * GRU_HANDLE_STRIDE, 0, + GRU_CACHE_LINE_BYTES); + } + /* Flush CBE to hide race in context restart */ + mb(); + gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE); + cb += GRU_HANDLE_STRIDE; + } + + if (data_valid) + memcpy(gseg + GRU_DS_BASE, save, length); + else + memset(gseg + GRU_DS_BASE, 0, length); +} + +static void gru_unload_context_data(void *save, void *grubase, int ctxnum, + unsigned long cbrmap, unsigned long dsrmap) +{ + void *gseg, *cb, *cbe; + unsigned long length; + int i, scr; + + gseg = grubase + ctxnum * GRU_GSEG_STRIDE; + cb = gseg + GRU_CB_BASE; + cbe = grubase + GRU_CBE_BASE; + length = hweight64(dsrmap) * GRU_DSR_AU_BYTES; + + /* CBEs may not be coherent. Flush them from cache */ + for_each_cbr_in_allocation_map(i, &cbrmap, scr) + gru_flush_cache(cbe + i * GRU_HANDLE_STRIDE); + mb(); /* Let the CL flush complete */ + + gru_prefetch_context(gseg, cb, cbe, cbrmap, length); + + for_each_cbr_in_allocation_map(i, &cbrmap, scr) { + save += gru_copy_handle(save, cb); + save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE); + cb += GRU_HANDLE_STRIDE; + } + memcpy(save, gseg + GRU_DS_BASE, length); +} + +void gru_unload_context(struct gru_thread_state *gts, int savestate) +{ + struct gru_state *gru = gts->ts_gru; + struct gru_context_configuration_handle *cch; + int ctxnum = gts->ts_ctxnum; + + if (!is_kernel_context(gts)) + zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE); + cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); + + gru_dbg(grudev, "gts %p, cbrmap 0x%lx, dsrmap 0x%lx\n", + gts, gts->ts_cbr_map, gts->ts_dsr_map); + lock_cch_handle(cch); + if (cch_interrupt_sync(cch)) + BUG(); + + if (!is_kernel_context(gts)) + gru_unload_mm_tracker(gru, gts); + if (savestate) { + gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, + ctxnum, gts->ts_cbr_map, + gts->ts_dsr_map); + gts->ts_data_valid = 1; + } + + if (cch_deallocate(cch)) + BUG(); + unlock_cch_handle(cch); + + gru_free_gru_context(gts); +} + +/* + * Load a GRU context by copying it from the thread data structure in memory + * to the GRU. + */ +void gru_load_context(struct gru_thread_state *gts) +{ + struct gru_state *gru = gts->ts_gru; + struct gru_context_configuration_handle *cch; + int i, err, asid, ctxnum = gts->ts_ctxnum; + + cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); + lock_cch_handle(cch); + cch->tfm_fault_bit_enable = + (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL + || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); + cch->tlb_int_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); + if (cch->tlb_int_enable) { + gts->ts_tlb_int_select = gru_cpu_fault_map_id(); + cch->tlb_int_select = gts->ts_tlb_int_select; + } + if (gts->ts_cch_req_slice >= 0) { + cch->req_slice_set_enable = 1; + cch->req_slice = gts->ts_cch_req_slice; + } else { + cch->req_slice_set_enable =0; + } + cch->tfm_done_bit_enable = 0; + cch->dsr_allocation_map = gts->ts_dsr_map; + cch->cbr_allocation_map = gts->ts_cbr_map; + + if (is_kernel_context(gts)) { + cch->unmap_enable = 1; + cch->tfm_done_bit_enable = 1; + cch->cb_int_enable = 1; + cch->tlb_int_select = 0; /* For now, ints go to cpu 0 */ + } else { + cch->unmap_enable = 0; + cch->tfm_done_bit_enable = 0; + cch->cb_int_enable = 0; + asid = gru_load_mm_tracker(gru, gts); + for (i = 0; i < 8; i++) { + cch->asid[i] = asid + i; + cch->sizeavail[i] = gts->ts_sizeavail; + } + } + + err = cch_allocate(cch); + if (err) { + gru_dbg(grudev, + "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n", + err, cch, gts, gts->ts_cbr_map, gts->ts_dsr_map); + BUG(); + } + + gru_load_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum, + gts->ts_cbr_map, gts->ts_dsr_map, gts->ts_data_valid); + + if (cch_start(cch)) + BUG(); + unlock_cch_handle(cch); + + gru_dbg(grudev, "gid %d, gts %p, cbrmap 0x%lx, dsrmap 0x%lx, tie %d, tis %d\n", + gts->ts_gru->gs_gid, gts, gts->ts_cbr_map, gts->ts_dsr_map, + (gts->ts_user_options == GRU_OPT_MISS_FMM_INTR), gts->ts_tlb_int_select); +} + +/* + * Update fields in an active CCH: + * - retarget interrupts on local blade + * - update sizeavail mask + */ +int gru_update_cch(struct gru_thread_state *gts) +{ + struct gru_context_configuration_handle *cch; + struct gru_state *gru = gts->ts_gru; + int i, ctxnum = gts->ts_ctxnum, ret = 0; + + cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); + + lock_cch_handle(cch); + if (cch->state == CCHSTATE_ACTIVE) { + if (gru->gs_gts[gts->ts_ctxnum] != gts) + goto exit; + if (cch_interrupt(cch)) + BUG(); + for (i = 0; i < 8; i++) + cch->sizeavail[i] = gts->ts_sizeavail; + gts->ts_tlb_int_select = gru_cpu_fault_map_id(); + cch->tlb_int_select = gru_cpu_fault_map_id(); + cch->tfm_fault_bit_enable = + (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL + || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); + if (cch_start(cch)) + BUG(); + ret = 1; + } +exit: + unlock_cch_handle(cch); + return ret; +} + +/* + * Update CCH tlb interrupt select. Required when all the following is true: + * - task's GRU context is loaded into a GRU + * - task is using interrupt notification for TLB faults + * - task has migrated to a different cpu on the same blade where + * it was previously running. + */ +static int gru_retarget_intr(struct gru_thread_state *gts) +{ + if (gts->ts_tlb_int_select < 0 + || gts->ts_tlb_int_select == gru_cpu_fault_map_id()) + return 0; + + gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select, + gru_cpu_fault_map_id()); + return gru_update_cch(gts); +} + +/* + * Check if a GRU context is allowed to use a specific chiplet. By default + * a context is assigned to any blade-local chiplet. However, users can + * override this. + * Returns 1 if assignment allowed, 0 otherwise + */ +static int gru_check_chiplet_assignment(struct gru_state *gru, + struct gru_thread_state *gts) +{ + int blade_id; + int chiplet_id; + + blade_id = gts->ts_user_blade_id; + if (blade_id < 0) + blade_id = uv_numa_blade_id(); + + chiplet_id = gts->ts_user_chiplet_id; + return gru->gs_blade_id == blade_id && + (chiplet_id < 0 || chiplet_id == gru->gs_chiplet_id); +} + +/* + * Unload the gru context if it is not assigned to the correct blade or + * chiplet. Misassignment can occur if the process migrates to a different + * blade or if the user changes the selected blade/chiplet. + */ +void gru_check_context_placement(struct gru_thread_state *gts) +{ + struct gru_state *gru; + + /* + * If the current task is the context owner, verify that the + * context is correctly placed. This test is skipped for non-owner + * references. Pthread apps use non-owner references to the CBRs. + */ + gru = gts->ts_gru; + if (!gru || gts->ts_tgid_owner != current->tgid) + return; + + if (!gru_check_chiplet_assignment(gru, gts)) { + STAT(check_context_unload); + gru_unload_context(gts, 1); + } else if (gru_retarget_intr(gts)) { + STAT(check_context_retarget_intr); + } +} + + +/* + * Insufficient GRU resources available on the local blade. Steal a context from + * a process. This is a hack until a _real_ resource scheduler is written.... + */ +#define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0) +#define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \ + ((g)+1) : &(b)->bs_grus[0]) + +static int is_gts_stealable(struct gru_thread_state *gts, + struct gru_blade_state *bs) +{ + if (is_kernel_context(gts)) + return down_write_trylock(&bs->bs_kgts_sema); + else + return mutex_trylock(>s->ts_ctxlock); +} + +static void gts_stolen(struct gru_thread_state *gts, + struct gru_blade_state *bs) +{ + if (is_kernel_context(gts)) { + up_write(&bs->bs_kgts_sema); + STAT(steal_kernel_context); + } else { + mutex_unlock(>s->ts_ctxlock); + STAT(steal_user_context); + } +} + +void gru_steal_context(struct gru_thread_state *gts) +{ + struct gru_blade_state *blade; + struct gru_state *gru, *gru0; + struct gru_thread_state *ngts = NULL; + int ctxnum, ctxnum0, flag = 0, cbr, dsr; + int blade_id; + + blade_id = gts->ts_user_blade_id; + if (blade_id < 0) + blade_id = uv_numa_blade_id(); + cbr = gts->ts_cbr_au_count; + dsr = gts->ts_dsr_au_count; + + blade = gru_base[blade_id]; + spin_lock(&blade->bs_lock); + + ctxnum = next_ctxnum(blade->bs_lru_ctxnum); + gru = blade->bs_lru_gru; + if (ctxnum == 0) + gru = next_gru(blade, gru); + blade->bs_lru_gru = gru; + blade->bs_lru_ctxnum = ctxnum; + ctxnum0 = ctxnum; + gru0 = gru; + while (1) { + if (gru_check_chiplet_assignment(gru, gts)) { + if (check_gru_resources(gru, cbr, dsr, GRU_NUM_CCH)) + break; + spin_lock(&gru->gs_lock); + for (; ctxnum < GRU_NUM_CCH; ctxnum++) { + if (flag && gru == gru0 && ctxnum == ctxnum0) + break; + ngts = gru->gs_gts[ctxnum]; + /* + * We are grabbing locks out of order, so trylock is + * needed. GTSs are usually not locked, so the odds of + * success are high. If trylock fails, try to steal a + * different GSEG. + */ + if (ngts && is_gts_stealable(ngts, blade)) + break; + ngts = NULL; + } + spin_unlock(&gru->gs_lock); + if (ngts || (flag && gru == gru0 && ctxnum == ctxnum0)) + break; + } + if (flag && gru == gru0) + break; + flag = 1; + ctxnum = 0; + gru = next_gru(blade, gru); + } + spin_unlock(&blade->bs_lock); + + if (ngts) { + gts->ustats.context_stolen++; + ngts->ts_steal_jiffies = jiffies; + gru_unload_context(ngts, is_kernel_context(ngts) ? 0 : 1); + gts_stolen(ngts, blade); + } else { + STAT(steal_context_failed); + } + gru_dbg(grudev, + "stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;" + " avail cb %ld, ds %ld\n", + gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map), + hweight64(gru->gs_dsr_map)); +} + +/* + * Assign a gru context. + */ +static int gru_assign_context_number(struct gru_state *gru) +{ + int ctxnum; + + ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH); + __set_bit(ctxnum, &gru->gs_context_map); + return ctxnum; +} + +/* + * Scan the GRUs on the local blade & assign a GRU context. + */ +struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts) +{ + struct gru_state *gru, *grux; + int i, max_active_contexts; + int blade_id = gts->ts_user_blade_id; + + if (blade_id < 0) + blade_id = uv_numa_blade_id(); +again: + gru = NULL; + max_active_contexts = GRU_NUM_CCH; + for_each_gru_on_blade(grux, blade_id, i) { + if (!gru_check_chiplet_assignment(grux, gts)) + continue; + if (check_gru_resources(grux, gts->ts_cbr_au_count, + gts->ts_dsr_au_count, + max_active_contexts)) { + gru = grux; + max_active_contexts = grux->gs_active_contexts; + if (max_active_contexts == 0) + break; + } + } + + if (gru) { + spin_lock(&gru->gs_lock); + if (!check_gru_resources(gru, gts->ts_cbr_au_count, + gts->ts_dsr_au_count, GRU_NUM_CCH)) { + spin_unlock(&gru->gs_lock); + goto again; + } + reserve_gru_resources(gru, gts); + gts->ts_gru = gru; + gts->ts_blade = gru->gs_blade_id; + gts->ts_ctxnum = gru_assign_context_number(gru); + atomic_inc(>s->ts_refcnt); + gru->gs_gts[gts->ts_ctxnum] = gts; + spin_unlock(&gru->gs_lock); + + STAT(assign_context); + gru_dbg(grudev, + "gseg %p, gts %p, gid %d, ctx %d, cbr %d, dsr %d\n", + gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts, + gts->ts_gru->gs_gid, gts->ts_ctxnum, + gts->ts_cbr_au_count, gts->ts_dsr_au_count); + } else { + gru_dbg(grudev, "failed to allocate a GTS %s\n", ""); + STAT(assign_context_failed); + } + + return gru; +} + +/* + * gru_nopage + * + * Map the user's GRU segment + * + * Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries. + */ +vm_fault_t gru_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct gru_thread_state *gts; + unsigned long paddr, vaddr; + unsigned long expires; + + vaddr = vmf->address; + gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n", + vma, vaddr, GSEG_BASE(vaddr)); + STAT(nopfn); + + /* The following check ensures vaddr is a valid address in the VMA */ + gts = gru_find_thread_state(vma, TSID(vaddr, vma)); + if (!gts) + return VM_FAULT_SIGBUS; + +again: + mutex_lock(>s->ts_ctxlock); + preempt_disable(); + + gru_check_context_placement(gts); + + if (!gts->ts_gru) { + STAT(load_user_context); + if (!gru_assign_gru_context(gts)) { + preempt_enable(); + mutex_unlock(>s->ts_ctxlock); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ + expires = gts->ts_steal_jiffies + GRU_STEAL_DELAY; + if (time_before(expires, jiffies)) + gru_steal_context(gts); + goto again; + } + gru_load_context(gts); + paddr = gseg_physical_address(gts->ts_gru, gts->ts_ctxnum); + remap_pfn_range(vma, vaddr & ~(GRU_GSEG_PAGESIZE - 1), + paddr >> PAGE_SHIFT, GRU_GSEG_PAGESIZE, + vma->vm_page_prot); + } + + preempt_enable(); + mutex_unlock(>s->ts_ctxlock); + + return VM_FAULT_NOPAGE; +} + diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c new file mode 100644 index 000000000..42ea2ecca --- /dev/null +++ b/drivers/misc/sgi-gru/gruprocfs.c @@ -0,0 +1,324 @@ +/* + * SN Platform GRU Driver + * + * PROC INTERFACES + * + * This file supports the /proc interfaces for the GRU driver + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include "gru.h" +#include "grulib.h" +#include "grutables.h" + +#define printstat(s, f) printstat_val(s, &gru_stats.f, #f) + +static void printstat_val(struct seq_file *s, atomic_long_t *v, char *id) +{ + unsigned long val = atomic_long_read(v); + + seq_printf(s, "%16lu %s\n", val, id); +} + +static int statistics_show(struct seq_file *s, void *p) +{ + printstat(s, vdata_alloc); + printstat(s, vdata_free); + printstat(s, gts_alloc); + printstat(s, gts_free); + printstat(s, gms_alloc); + printstat(s, gms_free); + printstat(s, gts_double_allocate); + printstat(s, assign_context); + printstat(s, assign_context_failed); + printstat(s, free_context); + printstat(s, load_user_context); + printstat(s, load_kernel_context); + printstat(s, lock_kernel_context); + printstat(s, unlock_kernel_context); + printstat(s, steal_user_context); + printstat(s, steal_kernel_context); + printstat(s, steal_context_failed); + printstat(s, nopfn); + printstat(s, asid_new); + printstat(s, asid_next); + printstat(s, asid_wrap); + printstat(s, asid_reuse); + printstat(s, intr); + printstat(s, intr_cbr); + printstat(s, intr_tfh); + printstat(s, intr_spurious); + printstat(s, intr_mm_lock_failed); + printstat(s, call_os); + printstat(s, call_os_wait_queue); + printstat(s, user_flush_tlb); + printstat(s, user_unload_context); + printstat(s, user_exception); + printstat(s, set_context_option); + printstat(s, check_context_retarget_intr); + printstat(s, check_context_unload); + printstat(s, tlb_dropin); + printstat(s, tlb_preload_page); + printstat(s, tlb_dropin_fail_no_asid); + printstat(s, tlb_dropin_fail_upm); + printstat(s, tlb_dropin_fail_invalid); + printstat(s, tlb_dropin_fail_range_active); + printstat(s, tlb_dropin_fail_idle); + printstat(s, tlb_dropin_fail_fmm); + printstat(s, tlb_dropin_fail_no_exception); + printstat(s, tfh_stale_on_fault); + printstat(s, mmu_invalidate_range); + printstat(s, mmu_invalidate_page); + printstat(s, flush_tlb); + printstat(s, flush_tlb_gru); + printstat(s, flush_tlb_gru_tgh); + printstat(s, flush_tlb_gru_zero_asid); + printstat(s, copy_gpa); + printstat(s, read_gpa); + printstat(s, mesq_receive); + printstat(s, mesq_receive_none); + printstat(s, mesq_send); + printstat(s, mesq_send_failed); + printstat(s, mesq_noop); + printstat(s, mesq_send_unexpected_error); + printstat(s, mesq_send_lb_overflow); + printstat(s, mesq_send_qlimit_reached); + printstat(s, mesq_send_amo_nacked); + printstat(s, mesq_send_put_nacked); + printstat(s, mesq_qf_locked); + printstat(s, mesq_qf_noop_not_full); + printstat(s, mesq_qf_switch_head_failed); + printstat(s, mesq_qf_unexpected_error); + printstat(s, mesq_noop_unexpected_error); + printstat(s, mesq_noop_lb_overflow); + printstat(s, mesq_noop_qlimit_reached); + printstat(s, mesq_noop_amo_nacked); + printstat(s, mesq_noop_put_nacked); + printstat(s, mesq_noop_page_overflow); + return 0; +} + +static ssize_t statistics_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *data) +{ + memset(&gru_stats, 0, sizeof(gru_stats)); + return count; +} + +static int mcs_statistics_show(struct seq_file *s, void *p) +{ + int op; + unsigned long total, count, max; + static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt", + "cch_interrupt_sync", "cch_deallocate", "tfh_write_only", + "tfh_write_restart", "tgh_invalidate"}; + + seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks"); + for (op = 0; op < mcsop_last; op++) { + count = atomic_long_read(&mcs_op_statistics[op].count); + total = atomic_long_read(&mcs_op_statistics[op].total); + max = mcs_op_statistics[op].max; + seq_printf(s, "%-20s%12ld%12ld%12ld\n", id[op], count, + count ? total / count : 0, max); + } + return 0; +} + +static ssize_t mcs_statistics_write(struct file *file, + const char __user *userbuf, size_t count, loff_t *data) +{ + memset(mcs_op_statistics, 0, sizeof(mcs_op_statistics)); + return count; +} + +static int options_show(struct seq_file *s, void *p) +{ + seq_printf(s, "#bitmask: 1=trace, 2=statistics\n"); + seq_printf(s, "0x%lx\n", gru_options); + return 0; +} + +static ssize_t options_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *data) +{ + int ret; + + ret = kstrtoul_from_user(userbuf, count, 0, &gru_options); + if (ret) + return ret; + + return count; +} + +static int cch_seq_show(struct seq_file *file, void *data) +{ + long gid = *(long *)data; + int i; + struct gru_state *gru = GID_TO_GRU(gid); + struct gru_thread_state *ts; + const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" }; + + if (gid == 0) + seq_printf(file, "#%5s%5s%6s%7s%9s%6s%8s%8s\n", "gid", "bid", + "ctx#", "asid", "pid", "cbrs", "dsbytes", "mode"); + if (gru) + for (i = 0; i < GRU_NUM_CCH; i++) { + ts = gru->gs_gts[i]; + if (!ts) + continue; + seq_printf(file, " %5d%5d%6d%7d%9d%6d%8d%8s\n", + gru->gs_gid, gru->gs_blade_id, i, + is_kernel_context(ts) ? 0 : ts->ts_gms->ms_asids[gid].mt_asid, + is_kernel_context(ts) ? 0 : ts->ts_tgid_owner, + ts->ts_cbr_au_count * GRU_CBR_AU_SIZE, + ts->ts_cbr_au_count * GRU_DSR_AU_BYTES, + mode[ts->ts_user_options & + GRU_OPT_MISS_MASK]); + } + + return 0; +} + +static int gru_seq_show(struct seq_file *file, void *data) +{ + long gid = *(long *)data, ctxfree, cbrfree, dsrfree; + struct gru_state *gru = GID_TO_GRU(gid); + + if (gid == 0) { + seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid", + "ctx", "cbr", "dsr", "ctx", "cbr", "dsr"); + seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy", + "busy", "busy", "free", "free", "free"); + } + if (gru) { + ctxfree = GRU_NUM_CCH - gru->gs_active_contexts; + cbrfree = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; + dsrfree = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; + seq_printf(file, " %5d%5d%7ld%6ld%6ld%8ld%6ld%6ld\n", + gru->gs_gid, gru->gs_blade_id, GRU_NUM_CCH - ctxfree, + GRU_NUM_CBE - cbrfree, GRU_NUM_DSR_BYTES - dsrfree, + ctxfree, cbrfree, dsrfree); + } + + return 0; +} + +static void seq_stop(struct seq_file *file, void *data) +{ +} + +static void *seq_start(struct seq_file *file, loff_t *gid) +{ + if (*gid < gru_max_gids) + return gid; + return NULL; +} + +static void *seq_next(struct seq_file *file, void *data, loff_t *gid) +{ + (*gid)++; + if (*gid < gru_max_gids) + return gid; + return NULL; +} + +static const struct seq_operations cch_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = cch_seq_show +}; + +static const struct seq_operations gru_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = gru_seq_show +}; + +static int statistics_open(struct inode *inode, struct file *file) +{ + return single_open(file, statistics_show, NULL); +} + +static int mcs_statistics_open(struct inode *inode, struct file *file) +{ + return single_open(file, mcs_statistics_show, NULL); +} + +static int options_open(struct inode *inode, struct file *file) +{ + return single_open(file, options_show, NULL); +} + +/* *INDENT-OFF* */ +static const struct file_operations statistics_fops = { + .open = statistics_open, + .read = seq_read, + .write = statistics_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations mcs_statistics_fops = { + .open = mcs_statistics_open, + .read = seq_read, + .write = mcs_statistics_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct file_operations options_fops = { + .open = options_open, + .read = seq_read, + .write = options_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct proc_dir_entry *proc_gru __read_mostly; + +int gru_proc_init(void) +{ + proc_gru = proc_mkdir("sgi_uv/gru", NULL); + if (!proc_gru) + return -1; + if (!proc_create("statistics", 0644, proc_gru, &statistics_fops)) + goto err; + if (!proc_create("mcs_statistics", 0644, proc_gru, &mcs_statistics_fops)) + goto err; + if (!proc_create("debug_options", 0644, proc_gru, &options_fops)) + goto err; + if (!proc_create_seq("cch_status", 0444, proc_gru, &cch_seq_ops)) + goto err; + if (!proc_create_seq("gru_status", 0444, proc_gru, &gru_seq_ops)) + goto err; + return 0; +err: + remove_proc_subtree("sgi_uv/gru", NULL); + return -1; +} + +void gru_proc_exit(void) +{ + remove_proc_subtree("sgi_uv/gru", NULL); +} diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h new file mode 100644 index 000000000..3e041b6f7 --- /dev/null +++ b/drivers/misc/sgi-gru/grutables.h @@ -0,0 +1,679 @@ +/* + * SN Platform GRU Driver + * + * GRU DRIVER TABLES, MACROS, externs, etc + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __GRUTABLES_H__ +#define __GRUTABLES_H__ + +/* + * GRU Chiplet: + * The GRU is a user addressible memory accelerator. It provides + * several forms of load, store, memset, bcopy instructions. In addition, it + * contains special instructions for AMOs, sending messages to message + * queues, etc. + * + * The GRU is an integral part of the node controller. It connects + * directly to the cpu socket. In its current implementation, there are 2 + * GRU chiplets in the node controller on each blade (~node). + * + * The entire GRU memory space is fully coherent and cacheable by the cpus. + * + * Each GRU chiplet has a physical memory map that looks like the following: + * + * +-----------------+ + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * |/////////////////| + * +-----------------+ + * | system control | + * +-----------------+ _______ +-------------+ + * |/////////////////| / | | + * |/////////////////| / | | + * |/////////////////| / | instructions| + * |/////////////////| / | | + * |/////////////////| / | | + * |/////////////////| / |-------------| + * |/////////////////| / | | + * +-----------------+ | | + * | context 15 | | data | + * +-----------------+ | | + * | ...... | \ | | + * +-----------------+ \____________ +-------------+ + * | context 1 | + * +-----------------+ + * | context 0 | + * +-----------------+ + * + * Each of the "contexts" is a chunk of memory that can be mmaped into user + * space. The context consists of 2 parts: + * + * - an instruction space that can be directly accessed by the user + * to issue GRU instructions and to check instruction status. + * + * - a data area that acts as normal RAM. + * + * User instructions contain virtual addresses of data to be accessed by the + * GRU. The GRU contains a TLB that is used to convert these user virtual + * addresses to physical addresses. + * + * The "system control" area of the GRU chiplet is used by the kernel driver + * to manage user contexts and to perform functions such as TLB dropin and + * purging. + * + * One context may be reserved for the kernel and used for cross-partition + * communication. The GRU will also be used to asynchronously zero out + * large blocks of memory (not currently implemented). + * + * + * Tables: + * + * VDATA-VMA Data - Holds a few parameters. Head of linked list of + * GTS tables for threads using the GSEG + * GTS - Gru Thread State - contains info for managing a GSEG context. A + * GTS is allocated for each thread accessing a + * GSEG. + * GTD - GRU Thread Data - contains shadow copy of GRU data when GSEG is + * not loaded into a GRU + * GMS - GRU Memory Struct - Used to manage TLB shootdowns. Tracks GRUs + * where a GSEG has been loaded. Similar to + * an mm_struct but for GRU. + * + * GS - GRU State - Used to manage the state of a GRU chiplet + * BS - Blade State - Used to manage state of all GRU chiplets + * on a blade + * + * + * Normal task tables for task using GRU. + * - 2 threads in process + * - 2 GSEGs open in process + * - GSEG1 is being used by both threads + * - GSEG2 is used only by thread 2 + * + * task -->| + * task ---+---> mm ->------ (notifier) -------+-> gms + * | | + * |--> vma -> vdata ---> gts--->| GSEG1 (thread1) + * | | | + * | +-> gts--->| GSEG1 (thread2) + * | | + * |--> vma -> vdata ---> gts--->| GSEG2 (thread2) + * . + * . + * + * GSEGs are marked DONTCOPY on fork + * + * At open + * file.private_data -> NULL + * + * At mmap, + * vma -> vdata + * + * After gseg reference + * vma -> vdata ->gts + * + * After fork + * parent + * vma -> vdata -> gts + * child + * (vma is not copied) + * + */ + +#include +#include +#include +#include +#include +#include +#include "gru.h" +#include "grulib.h" +#include "gruhandles.h" + +extern struct gru_stats_s gru_stats; +extern struct gru_blade_state *gru_base[]; +extern unsigned long gru_start_paddr, gru_end_paddr; +extern void *gru_start_vaddr; +extern unsigned int gru_max_gids; + +#define GRU_MAX_BLADES MAX_NUMNODES +#define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE) + +#define GRU_DRIVER_ID_STR "SGI GRU Device Driver" +#define GRU_DRIVER_VERSION_STR "0.85" + +/* + * GRU statistics. + */ +struct gru_stats_s { + atomic_long_t vdata_alloc; + atomic_long_t vdata_free; + atomic_long_t gts_alloc; + atomic_long_t gts_free; + atomic_long_t gms_alloc; + atomic_long_t gms_free; + atomic_long_t gts_double_allocate; + atomic_long_t assign_context; + atomic_long_t assign_context_failed; + atomic_long_t free_context; + atomic_long_t load_user_context; + atomic_long_t load_kernel_context; + atomic_long_t lock_kernel_context; + atomic_long_t unlock_kernel_context; + atomic_long_t steal_user_context; + atomic_long_t steal_kernel_context; + atomic_long_t steal_context_failed; + atomic_long_t nopfn; + atomic_long_t asid_new; + atomic_long_t asid_next; + atomic_long_t asid_wrap; + atomic_long_t asid_reuse; + atomic_long_t intr; + atomic_long_t intr_cbr; + atomic_long_t intr_tfh; + atomic_long_t intr_spurious; + atomic_long_t intr_mm_lock_failed; + atomic_long_t call_os; + atomic_long_t call_os_wait_queue; + atomic_long_t user_flush_tlb; + atomic_long_t user_unload_context; + atomic_long_t user_exception; + atomic_long_t set_context_option; + atomic_long_t check_context_retarget_intr; + atomic_long_t check_context_unload; + atomic_long_t tlb_dropin; + atomic_long_t tlb_preload_page; + atomic_long_t tlb_dropin_fail_no_asid; + atomic_long_t tlb_dropin_fail_upm; + atomic_long_t tlb_dropin_fail_invalid; + atomic_long_t tlb_dropin_fail_range_active; + atomic_long_t tlb_dropin_fail_idle; + atomic_long_t tlb_dropin_fail_fmm; + atomic_long_t tlb_dropin_fail_no_exception; + atomic_long_t tfh_stale_on_fault; + atomic_long_t mmu_invalidate_range; + atomic_long_t mmu_invalidate_page; + atomic_long_t flush_tlb; + atomic_long_t flush_tlb_gru; + atomic_long_t flush_tlb_gru_tgh; + atomic_long_t flush_tlb_gru_zero_asid; + + atomic_long_t copy_gpa; + atomic_long_t read_gpa; + + atomic_long_t mesq_receive; + atomic_long_t mesq_receive_none; + atomic_long_t mesq_send; + atomic_long_t mesq_send_failed; + atomic_long_t mesq_noop; + atomic_long_t mesq_send_unexpected_error; + atomic_long_t mesq_send_lb_overflow; + atomic_long_t mesq_send_qlimit_reached; + atomic_long_t mesq_send_amo_nacked; + atomic_long_t mesq_send_put_nacked; + atomic_long_t mesq_page_overflow; + atomic_long_t mesq_qf_locked; + atomic_long_t mesq_qf_noop_not_full; + atomic_long_t mesq_qf_switch_head_failed; + atomic_long_t mesq_qf_unexpected_error; + atomic_long_t mesq_noop_unexpected_error; + atomic_long_t mesq_noop_lb_overflow; + atomic_long_t mesq_noop_qlimit_reached; + atomic_long_t mesq_noop_amo_nacked; + atomic_long_t mesq_noop_put_nacked; + atomic_long_t mesq_noop_page_overflow; + +}; + +enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync, + cchop_deallocate, tfhop_write_only, tfhop_write_restart, + tghop_invalidate, mcsop_last}; + +struct mcs_op_statistic { + atomic_long_t count; + atomic_long_t total; + unsigned long max; +}; + +extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; + +#define OPT_DPRINT 1 +#define OPT_STATS 2 + + +#define IRQ_GRU 110 /* Starting IRQ number for interrupts */ + +/* Delay in jiffies between attempts to assign a GRU context */ +#define GRU_ASSIGN_DELAY ((HZ * 20) / 1000) + +/* + * If a process has it's context stolen, min delay in jiffies before trying to + * steal a context from another process. + */ +#define GRU_STEAL_DELAY ((HZ * 200) / 1000) + +#define STAT(id) do { \ + if (gru_options & OPT_STATS) \ + atomic_long_inc(&gru_stats.id); \ + } while (0) + +#ifdef CONFIG_SGI_GRU_DEBUG +#define gru_dbg(dev, fmt, x...) \ + do { \ + if (gru_options & OPT_DPRINT) \ + printk(KERN_DEBUG "GRU:%d %s: " fmt, smp_processor_id(), __func__, x);\ + } while (0) +#else +#define gru_dbg(x...) +#endif + +/*----------------------------------------------------------------------------- + * ASID management + */ +#define MAX_ASID 0xfffff0 +#define MIN_ASID 8 +#define ASID_INC 8 /* number of regions */ + +/* Generate a GRU asid value from a GRU base asid & a virtual address. */ +#define VADDR_HI_BIT 64 +#define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3) +#define GRUASID(asid, addr) ((asid) + GRUREGION(addr)) + +/*------------------------------------------------------------------------------ + * File & VMS Tables + */ + +struct gru_state; + +/* + * This structure is pointed to from the mmstruct via the notifier pointer. + * There is one of these per address space. + */ +struct gru_mm_tracker { /* pack to reduce size */ + unsigned int mt_asid_gen:24; /* ASID wrap count */ + unsigned int mt_asid:24; /* current base ASID for gru */ + unsigned short mt_ctxbitmap:16;/* bitmap of contexts using + asid */ +} __attribute__ ((packed)); + +struct gru_mm_struct { + struct mmu_notifier ms_notifier; + atomic_t ms_refcnt; + spinlock_t ms_asid_lock; /* protects ASID assignment */ + atomic_t ms_range_active;/* num range_invals active */ + char ms_released; + wait_queue_head_t ms_wait_queue; + DECLARE_BITMAP(ms_asidmap, GRU_MAX_GRUS); + struct gru_mm_tracker ms_asids[GRU_MAX_GRUS]; +}; + +/* + * One of these structures is allocated when a GSEG is mmaped. The + * structure is pointed to by the vma->vm_private_data field in the vma struct. + */ +struct gru_vma_data { + spinlock_t vd_lock; /* Serialize access to vma */ + struct list_head vd_head; /* head of linked list of gts */ + long vd_user_options;/* misc user option flags */ + int vd_cbr_au_count; + int vd_dsr_au_count; + unsigned char vd_tlb_preload_count; +}; + +/* + * One of these is allocated for each thread accessing a mmaped GRU. A linked + * list of these structure is hung off the struct gru_vma_data in the mm_struct. + */ +struct gru_thread_state { + struct list_head ts_next; /* list - head at vma-private */ + struct mutex ts_ctxlock; /* load/unload CTX lock */ + struct mm_struct *ts_mm; /* mm currently mapped to + context */ + struct vm_area_struct *ts_vma; /* vma of GRU context */ + struct gru_state *ts_gru; /* GRU where the context is + loaded */ + struct gru_mm_struct *ts_gms; /* asid & ioproc struct */ + unsigned char ts_tlb_preload_count; /* TLB preload pages */ + unsigned long ts_cbr_map; /* map of allocated CBRs */ + unsigned long ts_dsr_map; /* map of allocated DATA + resources */ + unsigned long ts_steal_jiffies;/* jiffies when context last + stolen */ + long ts_user_options;/* misc user option flags */ + pid_t ts_tgid_owner; /* task that is using the + context - for migration */ + short ts_user_blade_id;/* user selected blade */ + char ts_user_chiplet_id;/* user selected chiplet */ + unsigned short ts_sizeavail; /* Pagesizes in use */ + int ts_tsid; /* thread that owns the + structure */ + int ts_tlb_int_select;/* target cpu if interrupts + enabled */ + int ts_ctxnum; /* context number where the + context is loaded */ + atomic_t ts_refcnt; /* reference count GTS */ + unsigned char ts_dsr_au_count;/* Number of DSR resources + required for contest */ + unsigned char ts_cbr_au_count;/* Number of CBR resources + required for contest */ + char ts_cch_req_slice;/* CCH packet slice */ + char ts_blade; /* If >= 0, migrate context if + ref from different blade */ + char ts_force_cch_reload; + char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each + allocated CB */ + int ts_data_valid; /* Indicates if ts_gdata has + valid data */ + struct gru_gseg_statistics ustats; /* User statistics */ + unsigned long ts_gdata[0]; /* save area for GRU data (CB, + DS, CBE) */ +}; + +/* + * Threaded programs actually allocate an array of GSEGs when a context is + * created. Each thread uses a separate GSEG. TSID is the index into the GSEG + * array. + */ +#define TSID(a, v) (((a) - (v)->vm_start) / GRU_GSEG_PAGESIZE) +#define UGRUADDR(gts) ((gts)->ts_vma->vm_start + \ + (gts)->ts_tsid * GRU_GSEG_PAGESIZE) + +#define NULLCTX (-1) /* if context not loaded into GRU */ + +/*----------------------------------------------------------------------------- + * GRU State Tables + */ + +/* + * One of these exists for each GRU chiplet. + */ +struct gru_state { + struct gru_blade_state *gs_blade; /* GRU state for entire + blade */ + unsigned long gs_gru_base_paddr; /* Physical address of + gru segments (64) */ + void *gs_gru_base_vaddr; /* Virtual address of + gru segments (64) */ + unsigned short gs_gid; /* unique GRU number */ + unsigned short gs_blade_id; /* blade of GRU */ + unsigned char gs_chiplet_id; /* blade chiplet of GRU */ + unsigned char gs_tgh_local_shift; /* used to pick TGH for + local flush */ + unsigned char gs_tgh_first_remote; /* starting TGH# for + remote flush */ + spinlock_t gs_asid_lock; /* lock used for + assigning asids */ + spinlock_t gs_lock; /* lock used for + assigning contexts */ + + /* -- the following are protected by the gs_asid_lock spinlock ---- */ + unsigned int gs_asid; /* Next availe ASID */ + unsigned int gs_asid_limit; /* Limit of available + ASIDs */ + unsigned int gs_asid_gen; /* asid generation. + Inc on wrap */ + + /* --- the following fields are protected by the gs_lock spinlock --- */ + unsigned long gs_context_map; /* bitmap to manage + contexts in use */ + unsigned long gs_cbr_map; /* bitmap to manage CB + resources */ + unsigned long gs_dsr_map; /* bitmap used to manage + DATA resources */ + unsigned int gs_reserved_cbrs; /* Number of kernel- + reserved cbrs */ + unsigned int gs_reserved_dsr_bytes; /* Bytes of kernel- + reserved dsrs */ + unsigned short gs_active_contexts; /* number of contexts + in use */ + struct gru_thread_state *gs_gts[GRU_NUM_CCH]; /* GTS currently using + the context */ + int gs_irq[GRU_NUM_TFM]; /* Interrupt irqs */ +}; + +/* + * This structure contains the GRU state for all the GRUs on a blade. + */ +struct gru_blade_state { + void *kernel_cb; /* First kernel + reserved cb */ + void *kernel_dsr; /* First kernel + reserved DSR */ + struct rw_semaphore bs_kgts_sema; /* lock for kgts */ + struct gru_thread_state *bs_kgts; /* GTS for kernel use */ + + /* ---- the following are used for managing kernel async GRU CBRs --- */ + int bs_async_dsr_bytes; /* DSRs for async */ + int bs_async_cbrs; /* CBRs AU for async */ + struct completion *bs_async_wq; + + /* ---- the following are protected by the bs_lock spinlock ---- */ + spinlock_t bs_lock; /* lock used for + stealing contexts */ + int bs_lru_ctxnum; /* STEAL - last context + stolen */ + struct gru_state *bs_lru_gru; /* STEAL - last gru + stolen */ + + struct gru_state bs_grus[GRU_CHIPLETS_PER_BLADE]; +}; + +/*----------------------------------------------------------------------------- + * Address Primitives + */ +#define get_tfm_for_cpu(g, c) \ + ((struct gru_tlb_fault_map *)get_tfm((g)->gs_gru_base_vaddr, (c))) +#define get_tfh_by_index(g, i) \ + ((struct gru_tlb_fault_handle *)get_tfh((g)->gs_gru_base_vaddr, (i))) +#define get_tgh_by_index(g, i) \ + ((struct gru_tlb_global_handle *)get_tgh((g)->gs_gru_base_vaddr, (i))) +#define get_cbe_by_index(g, i) \ + ((struct gru_control_block_extended *)get_cbe((g)->gs_gru_base_vaddr,\ + (i))) + +/*----------------------------------------------------------------------------- + * Useful Macros + */ + +/* Given a blade# & chiplet#, get a pointer to the GRU */ +#define get_gru(b, c) (&gru_base[b]->bs_grus[c]) + +/* Number of bytes to save/restore when unloading/loading GRU contexts */ +#define DSR_BYTES(dsr) ((dsr) * GRU_DSR_AU_BYTES) +#define CBR_BYTES(cbr) ((cbr) * GRU_HANDLE_BYTES * GRU_CBR_AU_SIZE * 2) + +/* Convert a user CB number to the actual CBRNUM */ +#define thread_cbr_number(gts, n) ((gts)->ts_cbr_idx[(n) / GRU_CBR_AU_SIZE] \ + * GRU_CBR_AU_SIZE + (n) % GRU_CBR_AU_SIZE) + +/* Convert a gid to a pointer to the GRU */ +#define GID_TO_GRU(gid) \ + (gru_base[(gid) / GRU_CHIPLETS_PER_BLADE] ? \ + (&gru_base[(gid) / GRU_CHIPLETS_PER_BLADE]-> \ + bs_grus[(gid) % GRU_CHIPLETS_PER_BLADE]) : \ + NULL) + +/* Scan all active GRUs in a GRU bitmap */ +#define for_each_gru_in_bitmap(gid, map) \ + for_each_set_bit((gid), (map), GRU_MAX_GRUS) + +/* Scan all active GRUs on a specific blade */ +#define for_each_gru_on_blade(gru, nid, i) \ + for ((gru) = gru_base[nid]->bs_grus, (i) = 0; \ + (i) < GRU_CHIPLETS_PER_BLADE; \ + (i)++, (gru)++) + +/* Scan all GRUs */ +#define foreach_gid(gid) \ + for ((gid) = 0; (gid) < gru_max_gids; (gid)++) + +/* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */ +#define for_each_gts_on_gru(gts, gru, ctxnum) \ + for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++) \ + if (((gts) = (gru)->gs_gts[ctxnum])) + +/* Scan each CBR whose bit is set in a TFM (or copy of) */ +#define for_each_cbr_in_tfm(i, map) \ + for_each_set_bit((i), (map), GRU_NUM_CBE) + +/* Scan each CBR in a CBR bitmap. Note: multiple CBRs in an allocation unit */ +#define for_each_cbr_in_allocation_map(i, map, k) \ + for_each_set_bit((k), (map), GRU_CBR_AU) \ + for ((i) = (k)*GRU_CBR_AU_SIZE; \ + (i) < ((k) + 1) * GRU_CBR_AU_SIZE; (i)++) + +/* Scan each DSR in a DSR bitmap. Note: multiple DSRs in an allocation unit */ +#define for_each_dsr_in_allocation_map(i, map, k) \ + for_each_set_bit((k), (const unsigned long *)(map), GRU_DSR_AU) \ + for ((i) = (k) * GRU_DSR_AU_CL; \ + (i) < ((k) + 1) * GRU_DSR_AU_CL; (i)++) + +#define gseg_physical_address(gru, ctxnum) \ + ((gru)->gs_gru_base_paddr + ctxnum * GRU_GSEG_STRIDE) +#define gseg_virtual_address(gru, ctxnum) \ + ((gru)->gs_gru_base_vaddr + ctxnum * GRU_GSEG_STRIDE) + +/*----------------------------------------------------------------------------- + * Lock / Unlock GRU handles + * Use the "delresp" bit in the handle as a "lock" bit. + */ + +/* Lock hierarchy checking enabled only in emulator */ + +/* 0 = lock failed, 1 = locked */ +static inline int __trylock_handle(void *h) +{ + return !test_and_set_bit(1, h); +} + +static inline void __lock_handle(void *h) +{ + while (test_and_set_bit(1, h)) + cpu_relax(); +} + +static inline void __unlock_handle(void *h) +{ + clear_bit(1, h); +} + +static inline int trylock_cch_handle(struct gru_context_configuration_handle *cch) +{ + return __trylock_handle(cch); +} + +static inline void lock_cch_handle(struct gru_context_configuration_handle *cch) +{ + __lock_handle(cch); +} + +static inline void unlock_cch_handle(struct gru_context_configuration_handle + *cch) +{ + __unlock_handle(cch); +} + +static inline void lock_tgh_handle(struct gru_tlb_global_handle *tgh) +{ + __lock_handle(tgh); +} + +static inline void unlock_tgh_handle(struct gru_tlb_global_handle *tgh) +{ + __unlock_handle(tgh); +} + +static inline int is_kernel_context(struct gru_thread_state *gts) +{ + return !gts->ts_mm; +} + +/* + * The following are for Nehelem-EX. A more general scheme is needed for + * future processors. + */ +#define UV_MAX_INT_CORES 8 +#define uv_cpu_socket_number(p) ((cpu_physical_id(p) >> 5) & 1) +#define uv_cpu_ht_number(p) (cpu_physical_id(p) & 1) +#define uv_cpu_core_number(p) (((cpu_physical_id(p) >> 2) & 4) | \ + ((cpu_physical_id(p) >> 1) & 3)) +/*----------------------------------------------------------------------------- + * Function prototypes & externs + */ +struct gru_unload_context_req; + +extern const struct vm_operations_struct gru_vm_ops; +extern struct device *grudev; + +extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma, + int tsid); +extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct + *vma, int tsid); +extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct + *vma, int tsid); +extern struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts); +extern void gru_load_context(struct gru_thread_state *gts); +extern void gru_steal_context(struct gru_thread_state *gts); +extern void gru_unload_context(struct gru_thread_state *gts, int savestate); +extern int gru_update_cch(struct gru_thread_state *gts); +extern void gts_drop(struct gru_thread_state *gts); +extern void gru_tgh_flush_init(struct gru_state *gru); +extern int gru_kservices_init(void); +extern void gru_kservices_exit(void); +extern irqreturn_t gru0_intr(int irq, void *dev_id); +extern irqreturn_t gru1_intr(int irq, void *dev_id); +extern irqreturn_t gru_intr_mblade(int irq, void *dev_id); +extern int gru_dump_chiplet_request(unsigned long arg); +extern long gru_get_gseg_statistics(unsigned long arg); +extern int gru_handle_user_call_os(unsigned long address); +extern int gru_user_flush_tlb(unsigned long arg); +extern int gru_user_unload_context(unsigned long arg); +extern int gru_get_exception_detail(unsigned long arg); +extern int gru_set_context_option(unsigned long address); +extern void gru_check_context_placement(struct gru_thread_state *gts); +extern int gru_cpu_fault_map_id(void); +extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); +extern void gru_flush_all_tlb(struct gru_state *gru); +extern int gru_proc_init(void); +extern void gru_proc_exit(void); + +extern struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, + int cbr_au_count, int dsr_au_count, + unsigned char tlb_preload_count, int options, int tsid); +extern unsigned long gru_reserve_cb_resources(struct gru_state *gru, + int cbr_au_count, char *cbmap); +extern unsigned long gru_reserve_ds_resources(struct gru_state *gru, + int dsr_au_count, char *dsmap); +extern vm_fault_t gru_fault(struct vm_fault *vmf); +extern struct gru_mm_struct *gru_register_mmu_notifier(void); +extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms); + +extern int gru_ktest(unsigned long arg); +extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, + unsigned long len); + +extern unsigned long gru_options; + +#endif /* __GRUTABLES_H__ */ diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c new file mode 100644 index 000000000..be28f05bf --- /dev/null +++ b/drivers/misc/sgi-gru/grutlbpurge.c @@ -0,0 +1,370 @@ +/* + * SN Platform GRU Driver + * + * MMUOPS callbacks + TLB flushing + * + * This file handles emu notifier callbacks from the core kernel. The callbacks + * are used to update the TLB in the GRU as a result of changes in the + * state of a process address space. This file also handles TLB invalidates + * from the GRU driver. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "gru.h" +#include "grutables.h" +#include + +#define gru_random() get_cycles() + +/* ---------------------------------- TLB Invalidation functions -------- + * get_tgh_handle + * + * Find a TGH to use for issuing a TLB invalidate. For GRUs that are on the + * local blade, use a fixed TGH that is a function of the blade-local cpu + * number. Normally, this TGH is private to the cpu & no contention occurs for + * the TGH. For offblade GRUs, select a random TGH in the range above the + * private TGHs. A spinlock is required to access this TGH & the lock must be + * released when the invalidate is completes. This sucks, but it is the best we + * can do. + * + * Note that the spinlock is IN the TGH handle so locking does not involve + * additional cache lines. + * + */ +static inline int get_off_blade_tgh(struct gru_state *gru) +{ + int n; + + n = GRU_NUM_TGH - gru->gs_tgh_first_remote; + n = gru_random() % n; + n += gru->gs_tgh_first_remote; + return n; +} + +static inline int get_on_blade_tgh(struct gru_state *gru) +{ + return uv_blade_processor_id() >> gru->gs_tgh_local_shift; +} + +static struct gru_tlb_global_handle *get_lock_tgh_handle(struct gru_state + *gru) +{ + struct gru_tlb_global_handle *tgh; + int n; + + preempt_disable(); + if (uv_numa_blade_id() == gru->gs_blade_id) + n = get_on_blade_tgh(gru); + else + n = get_off_blade_tgh(gru); + tgh = get_tgh_by_index(gru, n); + lock_tgh_handle(tgh); + + return tgh; +} + +static void get_unlock_tgh_handle(struct gru_tlb_global_handle *tgh) +{ + unlock_tgh_handle(tgh); + preempt_enable(); +} + +/* + * gru_flush_tlb_range + * + * General purpose TLB invalidation function. This function scans every GRU in + * the ENTIRE system (partition) looking for GRUs where the specified MM has + * been accessed by the GRU. For each GRU found, the TLB must be invalidated OR + * the ASID invalidated. Invalidating an ASID causes a new ASID to be assigned + * on the next fault. This effectively flushes the ENTIRE TLB for the MM at the + * cost of (possibly) a large number of future TLBmisses. + * + * The current algorithm is optimized based on the following (somewhat true) + * assumptions: + * - GRU contexts are not loaded into a GRU unless a reference is made to + * the data segment or control block (this is true, not an assumption). + * If a DS/CB is referenced, the user will also issue instructions that + * cause TLBmisses. It is not necessary to optimize for the case where + * contexts are loaded but no instructions cause TLB misses. (I know + * this will happen but I'm not optimizing for it). + * - GRU instructions to invalidate TLB entries are SLOOOOWWW - normally + * a few usec but in unusual cases, it could be longer. Avoid if + * possible. + * - intrablade process migration between cpus is not frequent but is + * common. + * - a GRU context is not typically migrated to a different GRU on the + * blade because of intrablade migration + * - interblade migration is rare. Processes migrate their GRU context to + * the new blade. + * - if interblade migration occurs, migration back to the original blade + * is very very rare (ie., no optimization for this case) + * - most GRU instruction operate on a subset of the user REGIONS. Code + * & shared library regions are not likely targets of GRU instructions. + * + * To help improve the efficiency of TLB invalidation, the GMS data + * structure is maintained for EACH address space (MM struct). The GMS is + * also the structure that contains the pointer to the mmu callout + * functions. This structure is linked to the mm_struct for the address space + * using the mmu "register" function. The mmu interfaces are used to + * provide the callbacks for TLB invalidation. The GMS contains: + * + * - asid[maxgrus] array. ASIDs are assigned to a GRU when a context is + * loaded into the GRU. + * - asidmap[maxgrus]. bitmap to make it easier to find non-zero asids in + * the above array + * - ctxbitmap[maxgrus]. Indicates the contexts that are currently active + * in the GRU for the address space. This bitmap must be passed to the + * GRU to do an invalidate. + * + * The current algorithm for invalidating TLBs is: + * - scan the asidmap for GRUs where the context has been loaded, ie, + * asid is non-zero. + * - for each gru found: + * - if the ctxtmap is non-zero, there are active contexts in the + * GRU. TLB invalidate instructions must be issued to the GRU. + * - if the ctxtmap is zero, no context is active. Set the ASID to + * zero to force a full TLB invalidation. This is fast but will + * cause a lot of TLB misses if the context is reloaded onto the + * GRU + * + */ + +void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, + unsigned long len) +{ + struct gru_state *gru; + struct gru_mm_tracker *asids; + struct gru_tlb_global_handle *tgh; + unsigned long num; + int grupagesize, pagesize, pageshift, gid, asid; + + /* ZZZ TODO - handle huge pages */ + pageshift = PAGE_SHIFT; + pagesize = (1UL << pageshift); + grupagesize = GRU_PAGESIZE(pageshift); + num = min(((len + pagesize - 1) >> pageshift), GRUMAXINVAL); + + STAT(flush_tlb); + gru_dbg(grudev, "gms %p, start 0x%lx, len 0x%lx, asidmap 0x%lx\n", gms, + start, len, gms->ms_asidmap[0]); + + spin_lock(&gms->ms_asid_lock); + for_each_gru_in_bitmap(gid, gms->ms_asidmap) { + STAT(flush_tlb_gru); + gru = GID_TO_GRU(gid); + asids = gms->ms_asids + gid; + asid = asids->mt_asid; + if (asids->mt_ctxbitmap && asid) { + STAT(flush_tlb_gru_tgh); + asid = GRUASID(asid, start); + gru_dbg(grudev, + " FLUSH gruid %d, asid 0x%x, vaddr 0x%lx, vamask 0x%x, num %ld, cbmap 0x%x\n", + gid, asid, start, grupagesize, num, asids->mt_ctxbitmap); + tgh = get_lock_tgh_handle(gru); + tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0, + num - 1, asids->mt_ctxbitmap); + get_unlock_tgh_handle(tgh); + } else { + STAT(flush_tlb_gru_zero_asid); + asids->mt_asid = 0; + __clear_bit(gru->gs_gid, gms->ms_asidmap); + gru_dbg(grudev, + " CLEARASID gruid %d, asid 0x%x, cbtmap 0x%x, asidmap 0x%lx\n", + gid, asid, asids->mt_ctxbitmap, + gms->ms_asidmap[0]); + } + } + spin_unlock(&gms->ms_asid_lock); +} + +/* + * Flush the entire TLB on a chiplet. + */ +void gru_flush_all_tlb(struct gru_state *gru) +{ + struct gru_tlb_global_handle *tgh; + + gru_dbg(grudev, "gid %d\n", gru->gs_gid); + tgh = get_lock_tgh_handle(gru); + tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0xffff); + get_unlock_tgh_handle(tgh); +} + +/* + * MMUOPS notifier callout functions + */ +static int gru_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end, + bool blockable) +{ + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, + ms_notifier); + + STAT(mmu_invalidate_range); + atomic_inc(&gms->ms_range_active); + gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms, + start, end, atomic_read(&gms->ms_range_active)); + gru_flush_tlb_range(gms, start, end - start); + + return 0; +} + +static void gru_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, + ms_notifier); + + /* ..._and_test() provides needed barrier */ + (void)atomic_dec_and_test(&gms->ms_range_active); + + wake_up_all(&gms->ms_wait_queue); + gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end); +} + +static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, + ms_notifier); + + gms->ms_released = 1; + gru_dbg(grudev, "gms %p\n", gms); +} + + +static const struct mmu_notifier_ops gru_mmuops = { + .flags = MMU_INVALIDATE_DOES_NOT_BLOCK, + .invalidate_range_start = gru_invalidate_range_start, + .invalidate_range_end = gru_invalidate_range_end, + .release = gru_release, +}; + +/* Move this to the basic mmu_notifier file. But for now... */ +static struct mmu_notifier *mmu_find_ops(struct mm_struct *mm, + const struct mmu_notifier_ops *ops) +{ + struct mmu_notifier *mn, *gru_mn = NULL; + + if (mm->mmu_notifier_mm) { + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, + hlist) + if (mn->ops == ops) { + gru_mn = mn; + break; + } + rcu_read_unlock(); + } + return gru_mn; +} + +struct gru_mm_struct *gru_register_mmu_notifier(void) +{ + struct gru_mm_struct *gms; + struct mmu_notifier *mn; + int err; + + mn = mmu_find_ops(current->mm, &gru_mmuops); + if (mn) { + gms = container_of(mn, struct gru_mm_struct, ms_notifier); + atomic_inc(&gms->ms_refcnt); + } else { + gms = kzalloc(sizeof(*gms), GFP_KERNEL); + if (!gms) + return ERR_PTR(-ENOMEM); + STAT(gms_alloc); + spin_lock_init(&gms->ms_asid_lock); + gms->ms_notifier.ops = &gru_mmuops; + atomic_set(&gms->ms_refcnt, 1); + init_waitqueue_head(&gms->ms_wait_queue); + err = __mmu_notifier_register(&gms->ms_notifier, current->mm); + if (err) + goto error; + } + if (gms) + gru_dbg(grudev, "gms %p, refcnt %d\n", gms, + atomic_read(&gms->ms_refcnt)); + return gms; +error: + kfree(gms); + return ERR_PTR(err); +} + +void gru_drop_mmu_notifier(struct gru_mm_struct *gms) +{ + gru_dbg(grudev, "gms %p, refcnt %d, released %d\n", gms, + atomic_read(&gms->ms_refcnt), gms->ms_released); + if (atomic_dec_return(&gms->ms_refcnt) == 0) { + if (!gms->ms_released) + mmu_notifier_unregister(&gms->ms_notifier, current->mm); + kfree(gms); + STAT(gms_free); + } +} + +/* + * Setup TGH parameters. There are: + * - 24 TGH handles per GRU chiplet + * - a portion (MAX_LOCAL_TGH) of the handles are reserved for + * use by blade-local cpus + * - the rest are used by off-blade cpus. This usage is + * less frequent than blade-local usage. + * + * For now, use 16 handles for local flushes, 8 for remote flushes. If the blade + * has less tan or equal to 16 cpus, each cpu has a unique handle that it can + * use. + */ +#define MAX_LOCAL_TGH 16 + +void gru_tgh_flush_init(struct gru_state *gru) +{ + int cpus, shift = 0, n; + + cpus = uv_blade_nr_possible_cpus(gru->gs_blade_id); + + /* n = cpus rounded up to next power of 2 */ + if (cpus) { + n = 1 << fls(cpus - 1); + + /* + * shift count for converting local cpu# to TGH index + * 0 if cpus <= MAX_LOCAL_TGH, + * 1 if cpus <= 2*MAX_LOCAL_TGH, + * etc + */ + shift = max(0, fls(n - 1) - fls(MAX_LOCAL_TGH - 1)); + } + gru->gs_tgh_local_shift = shift; + + /* first starting TGH index to use for remote purges */ + gru->gs_tgh_first_remote = (cpus + (1 << shift) - 1) >> shift; + +} diff --git a/drivers/misc/sgi-xp/Makefile b/drivers/misc/sgi-xp/Makefile new file mode 100644 index 000000000..bbb622c19 --- /dev/null +++ b/drivers/misc/sgi-xp/Makefile @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for SGI's XP devices. +# + +obj-$(CONFIG_SGI_XP) += xp.o +xp-y := xp_main.o +xp-$(CONFIG_IA64_SGI_SN2) += xp_sn2.o xp_nofault.o +xp-$(CONFIG_IA64_GENERIC) += xp_sn2.o xp_nofault.o +xp-$(CONFIG_IA64_SGI_UV) += xp_uv.o +xp-$(CONFIG_X86_64) += xp_uv.o + +obj-$(CONFIG_SGI_XP) += xpc.o +xpc-y := xpc_main.o xpc_channel.o xpc_partition.o +xpc-$(CONFIG_IA64_SGI_SN2) += xpc_sn2.o +xpc-$(CONFIG_IA64_GENERIC) += xpc_sn2.o +xpc-$(CONFIG_IA64_SGI_UV) += xpc_uv.o +xpc-$(CONFIG_X86_64) += xpc_uv.o + +obj-$(CONFIG_SGI_XP) += xpnet.o diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h new file mode 100644 index 000000000..b8069eec1 --- /dev/null +++ b/drivers/misc/sgi-xp/xp.h @@ -0,0 +1,368 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2004-2008 Silicon Graphics, Inc. All rights reserved. + */ + +/* + * External Cross Partition (XP) structures and defines. + */ + +#ifndef _DRIVERS_MISC_SGIXP_XP_H +#define _DRIVERS_MISC_SGIXP_XP_H + +#include + +#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV +#include +#define is_uv() is_uv_system() +#endif + +#ifndef is_uv +#define is_uv() 0 +#endif + +#if defined CONFIG_IA64 +#include /* defines is_shub1() and is_shub2() */ +#define is_shub() ia64_platform_is("sn2") +#endif + +#ifndef is_shub1 +#define is_shub1() 0 +#endif + +#ifndef is_shub2 +#define is_shub2() 0 +#endif + +#ifndef is_shub +#define is_shub() 0 +#endif + +#ifdef USE_DBUG_ON +#define DBUG_ON(condition) BUG_ON(condition) +#else +#define DBUG_ON(condition) +#endif + +/* + * Define the maximum number of partitions the system can possibly support. + * It is based on the maximum number of hardware partitionable regions. The + * term 'region' in this context refers to the minimum number of nodes that + * can comprise an access protection grouping. The access protection is in + * regards to memory, IPI and IOI. + * + * The maximum number of hardware partitionable regions is equal to the + * maximum number of nodes in the entire system divided by the minimum number + * of nodes that comprise an access protection grouping. + */ +#define XP_MAX_NPARTITIONS_SN2 64 +#define XP_MAX_NPARTITIONS_UV 256 + +/* + * XPC establishes channel connections between the local partition and any + * other partition that is currently up. Over these channels, kernel-level + * `users' can communicate with their counterparts on the other partitions. + * + * If the need for additional channels arises, one can simply increase + * XPC_MAX_NCHANNELS accordingly. If the day should come where that number + * exceeds the absolute MAXIMUM number of channels possible (eight), then one + * will need to make changes to the XPC code to accommodate for this. + * + * The absolute maximum number of channels possible is limited to eight for + * performance reasons on sn2 hardware. The internal cross partition structures + * require sixteen bytes per channel, and eight allows all of this + * interface-shared info to fit in one 128-byte cacheline. + */ +#define XPC_MEM_CHANNEL 0 /* memory channel number */ +#define XPC_NET_CHANNEL 1 /* network channel number */ + +#define XPC_MAX_NCHANNELS 2 /* max #of channels allowed */ + +#if XPC_MAX_NCHANNELS > 8 +#error XPC_MAX_NCHANNELS exceeds absolute MAXIMUM possible. +#endif + +/* + * Define macro, XPC_MSG_SIZE(), is provided for the user + * that wants to fit as many msg entries as possible in a given memory size + * (e.g. a memory page). + */ +#define XPC_MSG_MAX_SIZE 128 +#define XPC_MSG_HDR_MAX_SIZE 16 +#define XPC_MSG_PAYLOAD_MAX_SIZE (XPC_MSG_MAX_SIZE - XPC_MSG_HDR_MAX_SIZE) + +#define XPC_MSG_SIZE(_payload_size) \ + ALIGN(XPC_MSG_HDR_MAX_SIZE + (_payload_size), \ + is_uv() ? 64 : 128) + + +/* + * Define the return values and values passed to user's callout functions. + * (It is important to add new value codes at the end just preceding + * xpUnknownReason, which must have the highest numerical value.) + */ +enum xp_retval { + xpSuccess = 0, + + xpNotConnected, /* 1: channel is not connected */ + xpConnected, /* 2: channel connected (opened) */ + xpRETIRED1, /* 3: (formerly xpDisconnected) */ + + xpMsgReceived, /* 4: message received */ + xpMsgDelivered, /* 5: message delivered and acknowledged */ + + xpRETIRED2, /* 6: (formerly xpTransferFailed) */ + + xpNoWait, /* 7: operation would require wait */ + xpRetry, /* 8: retry operation */ + xpTimeout, /* 9: timeout in xpc_allocate_msg_wait() */ + xpInterrupted, /* 10: interrupted wait */ + + xpUnequalMsgSizes, /* 11: message size disparity between sides */ + xpInvalidAddress, /* 12: invalid address */ + + xpNoMemory, /* 13: no memory available for XPC structures */ + xpLackOfResources, /* 14: insufficient resources for operation */ + xpUnregistered, /* 15: channel is not registered */ + xpAlreadyRegistered, /* 16: channel is already registered */ + + xpPartitionDown, /* 17: remote partition is down */ + xpNotLoaded, /* 18: XPC module is not loaded */ + xpUnloading, /* 19: this side is unloading XPC module */ + + xpBadMagic, /* 20: XPC MAGIC string not found */ + + xpReactivating, /* 21: remote partition was reactivated */ + + xpUnregistering, /* 22: this side is unregistering channel */ + xpOtherUnregistering, /* 23: other side is unregistering channel */ + + xpCloneKThread, /* 24: cloning kernel thread */ + xpCloneKThreadFailed, /* 25: cloning kernel thread failed */ + + xpNoHeartbeat, /* 26: remote partition has no heartbeat */ + + xpPioReadError, /* 27: PIO read error */ + xpPhysAddrRegFailed, /* 28: registration of phys addr range failed */ + + xpRETIRED3, /* 29: (formerly xpBteDirectoryError) */ + xpRETIRED4, /* 30: (formerly xpBtePoisonError) */ + xpRETIRED5, /* 31: (formerly xpBteWriteError) */ + xpRETIRED6, /* 32: (formerly xpBteAccessError) */ + xpRETIRED7, /* 33: (formerly xpBtePWriteError) */ + xpRETIRED8, /* 34: (formerly xpBtePReadError) */ + xpRETIRED9, /* 35: (formerly xpBteTimeOutError) */ + xpRETIRED10, /* 36: (formerly xpBteXtalkError) */ + xpRETIRED11, /* 37: (formerly xpBteNotAvailable) */ + xpRETIRED12, /* 38: (formerly xpBteUnmappedError) */ + + xpBadVersion, /* 39: bad version number */ + xpVarsNotSet, /* 40: the XPC variables are not set up */ + xpNoRsvdPageAddr, /* 41: unable to get rsvd page's phys addr */ + xpInvalidPartid, /* 42: invalid partition ID */ + xpLocalPartid, /* 43: local partition ID */ + + xpOtherGoingDown, /* 44: other side going down, reason unknown */ + xpSystemGoingDown, /* 45: system is going down, reason unknown */ + xpSystemHalt, /* 46: system is being halted */ + xpSystemReboot, /* 47: system is being rebooted */ + xpSystemPoweroff, /* 48: system is being powered off */ + + xpDisconnecting, /* 49: channel disconnecting (closing) */ + + xpOpenCloseError, /* 50: channel open/close protocol error */ + + xpDisconnected, /* 51: channel disconnected (closed) */ + + xpBteCopyError, /* 52: bte_copy() returned error */ + xpSalError, /* 53: sn SAL error */ + xpRsvdPageNotSet, /* 54: the reserved page is not set up */ + xpPayloadTooBig, /* 55: payload too large for message slot */ + + xpUnsupported, /* 56: unsupported functionality or resource */ + xpNeedMoreInfo, /* 57: more info is needed by SAL */ + + xpGruCopyError, /* 58: gru_copy_gru() returned error */ + xpGruSendMqError, /* 59: gru send message queue related error */ + + xpBadChannelNumber, /* 60: invalid channel number */ + xpBadMsgType, /* 61: invalid message type */ + xpBiosError, /* 62: BIOS error */ + + xpUnknownReason /* 63: unknown reason - must be last in enum */ +}; + +/* + * Define the callout function type used by XPC to update the user on + * connection activity and state changes via the user function registered + * by xpc_connect(). + * + * Arguments: + * + * reason - reason code. + * partid - partition ID associated with condition. + * ch_number - channel # associated with condition. + * data - pointer to optional data. + * key - pointer to optional user-defined value provided as the "key" + * argument to xpc_connect(). + * + * A reason code of xpConnected indicates that a connection has been + * established to the specified partition on the specified channel. The data + * argument indicates the max number of entries allowed in the message queue. + * + * A reason code of xpMsgReceived indicates that a XPC message arrived from + * the specified partition on the specified channel. The data argument + * specifies the address of the message's payload. The user must call + * xpc_received() when finished with the payload. + * + * All other reason codes indicate failure. The data argmument is NULL. + * When a failure reason code is received, one can assume that the channel + * is not connected. + */ +typedef void (*xpc_channel_func) (enum xp_retval reason, short partid, + int ch_number, void *data, void *key); + +/* + * Define the callout function type used by XPC to notify the user of + * messages received and delivered via the user function registered by + * xpc_send_notify(). + * + * Arguments: + * + * reason - reason code. + * partid - partition ID associated with condition. + * ch_number - channel # associated with condition. + * key - pointer to optional user-defined value provided as the "key" + * argument to xpc_send_notify(). + * + * A reason code of xpMsgDelivered indicates that the message was delivered + * to the intended recipient and that they have acknowledged its receipt by + * calling xpc_received(). + * + * All other reason codes indicate failure. + * + * NOTE: The user defined function must be callable by an interrupt handler + * and thus cannot block. + */ +typedef void (*xpc_notify_func) (enum xp_retval reason, short partid, + int ch_number, void *key); + +/* + * The following is a registration entry. There is a global array of these, + * one per channel. It is used to record the connection registration made + * by the users of XPC. As long as a registration entry exists, for any + * partition that comes up, XPC will attempt to establish a connection on + * that channel. Notification that a connection has been made will occur via + * the xpc_channel_func function. + * + * The 'func' field points to the function to call when aynchronous + * notification is required for such events as: a connection established/lost, + * or an incoming message received, or an error condition encountered. A + * non-NULL 'func' field indicates that there is an active registration for + * the channel. + */ +struct xpc_registration { + struct mutex mutex; + xpc_channel_func func; /* function to call */ + void *key; /* pointer to user's key */ + u16 nentries; /* #of msg entries in local msg queue */ + u16 entry_size; /* message queue's message entry size */ + u32 assigned_limit; /* limit on #of assigned kthreads */ + u32 idle_limit; /* limit on #of idle kthreads */ +} ____cacheline_aligned; + +#define XPC_CHANNEL_REGISTERED(_c) (xpc_registrations[_c].func != NULL) + +/* the following are valid xpc_send() or xpc_send_notify() flags */ +#define XPC_WAIT 0 /* wait flag */ +#define XPC_NOWAIT 1 /* no wait flag */ + +struct xpc_interface { + void (*connect) (int); + void (*disconnect) (int); + enum xp_retval (*send) (short, int, u32, void *, u16); + enum xp_retval (*send_notify) (short, int, u32, void *, u16, + xpc_notify_func, void *); + void (*received) (short, int, void *); + enum xp_retval (*partid_to_nasids) (short, void *); +}; + +extern struct xpc_interface xpc_interface; + +extern void xpc_set_interface(void (*)(int), + void (*)(int), + enum xp_retval (*)(short, int, u32, void *, u16), + enum xp_retval (*)(short, int, u32, void *, u16, + xpc_notify_func, void *), + void (*)(short, int, void *), + enum xp_retval (*)(short, void *)); +extern void xpc_clear_interface(void); + +extern enum xp_retval xpc_connect(int, xpc_channel_func, void *, u16, + u16, u32, u32); +extern void xpc_disconnect(int); + +static inline enum xp_retval +xpc_send(short partid, int ch_number, u32 flags, void *payload, + u16 payload_size) +{ + if (!xpc_interface.send) + return xpNotLoaded; + + return xpc_interface.send(partid, ch_number, flags, payload, + payload_size); +} + +static inline enum xp_retval +xpc_send_notify(short partid, int ch_number, u32 flags, void *payload, + u16 payload_size, xpc_notify_func func, void *key) +{ + if (!xpc_interface.send_notify) + return xpNotLoaded; + + return xpc_interface.send_notify(partid, ch_number, flags, payload, + payload_size, func, key); +} + +static inline void +xpc_received(short partid, int ch_number, void *payload) +{ + if (xpc_interface.received) + xpc_interface.received(partid, ch_number, payload); +} + +static inline enum xp_retval +xpc_partid_to_nasids(short partid, void *nasids) +{ + if (!xpc_interface.partid_to_nasids) + return xpNotLoaded; + + return xpc_interface.partid_to_nasids(partid, nasids); +} + +extern short xp_max_npartitions; +extern short xp_partition_id; +extern u8 xp_region_size; + +extern unsigned long (*xp_pa) (void *); +extern unsigned long (*xp_socket_pa) (unsigned long); +extern enum xp_retval (*xp_remote_memcpy) (unsigned long, const unsigned long, + size_t); +extern int (*xp_cpu_to_nasid) (int); +extern enum xp_retval (*xp_expand_memprotect) (unsigned long, unsigned long); +extern enum xp_retval (*xp_restrict_memprotect) (unsigned long, unsigned long); + +extern u64 xp_nofault_PIOR_target; +extern int xp_nofault_PIOR(void *); +extern int xp_error_PIOR(void); + +extern struct device *xp; +extern enum xp_retval xp_init_sn2(void); +extern enum xp_retval xp_init_uv(void); +extern void xp_exit_sn2(void); +extern void xp_exit_uv(void); + +#endif /* _DRIVERS_MISC_SGIXP_XP_H */ diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c new file mode 100644 index 000000000..6d7f557fd --- /dev/null +++ b/drivers/misc/sgi-xp/xp_main.c @@ -0,0 +1,264 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition (XP) base. + * + * XP provides a base from which its users can interact + * with XPC, yet not be dependent on XPC. + * + */ + +#include +#include +#include "xp.h" + +/* define the XP debug device structures to be used with dev_dbg() et al */ + +struct device_driver xp_dbg_name = { + .name = "xp" +}; + +struct device xp_dbg_subname = { + .init_name = "", /* set to "" */ + .driver = &xp_dbg_name +}; + +struct device *xp = &xp_dbg_subname; + +/* max #of partitions possible */ +short xp_max_npartitions; +EXPORT_SYMBOL_GPL(xp_max_npartitions); + +short xp_partition_id; +EXPORT_SYMBOL_GPL(xp_partition_id); + +u8 xp_region_size; +EXPORT_SYMBOL_GPL(xp_region_size); + +unsigned long (*xp_pa) (void *addr); +EXPORT_SYMBOL_GPL(xp_pa); + +unsigned long (*xp_socket_pa) (unsigned long gpa); +EXPORT_SYMBOL_GPL(xp_socket_pa); + +enum xp_retval (*xp_remote_memcpy) (unsigned long dst_gpa, + const unsigned long src_gpa, size_t len); +EXPORT_SYMBOL_GPL(xp_remote_memcpy); + +int (*xp_cpu_to_nasid) (int cpuid); +EXPORT_SYMBOL_GPL(xp_cpu_to_nasid); + +enum xp_retval (*xp_expand_memprotect) (unsigned long phys_addr, + unsigned long size); +EXPORT_SYMBOL_GPL(xp_expand_memprotect); +enum xp_retval (*xp_restrict_memprotect) (unsigned long phys_addr, + unsigned long size); +EXPORT_SYMBOL_GPL(xp_restrict_memprotect); + +/* + * xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level + * users of XPC. + */ +struct xpc_registration xpc_registrations[XPC_MAX_NCHANNELS]; +EXPORT_SYMBOL_GPL(xpc_registrations); + +/* + * Initialize the XPC interface to NULL to indicate that XPC isn't loaded. + */ +struct xpc_interface xpc_interface = { }; +EXPORT_SYMBOL_GPL(xpc_interface); + +/* + * XPC calls this when it (the XPC module) has been loaded. + */ +void +xpc_set_interface(void (*connect) (int), + void (*disconnect) (int), + enum xp_retval (*send) (short, int, u32, void *, u16), + enum xp_retval (*send_notify) (short, int, u32, void *, u16, + xpc_notify_func, void *), + void (*received) (short, int, void *), + enum xp_retval (*partid_to_nasids) (short, void *)) +{ + xpc_interface.connect = connect; + xpc_interface.disconnect = disconnect; + xpc_interface.send = send; + xpc_interface.send_notify = send_notify; + xpc_interface.received = received; + xpc_interface.partid_to_nasids = partid_to_nasids; +} +EXPORT_SYMBOL_GPL(xpc_set_interface); + +/* + * XPC calls this when it (the XPC module) is being unloaded. + */ +void +xpc_clear_interface(void) +{ + memset(&xpc_interface, 0, sizeof(xpc_interface)); +} +EXPORT_SYMBOL_GPL(xpc_clear_interface); + +/* + * Register for automatic establishment of a channel connection whenever + * a partition comes up. + * + * Arguments: + * + * ch_number - channel # to register for connection. + * func - function to call for asynchronous notification of channel + * state changes (i.e., connection, disconnection, error) and + * the arrival of incoming messages. + * key - pointer to optional user-defined value that gets passed back + * to the user on any callouts made to func. + * payload_size - size in bytes of the XPC message's payload area which + * contains a user-defined message. The user should make + * this large enough to hold their largest message. + * nentries - max #of XPC message entries a message queue can contain. + * The actual number, which is determined when a connection + * is established and may be less then requested, will be + * passed to the user via the xpConnected callout. + * assigned_limit - max number of kthreads allowed to be processing + * messages (per connection) at any given instant. + * idle_limit - max number of kthreads allowed to be idle at any given + * instant. + */ +enum xp_retval +xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size, + u16 nentries, u32 assigned_limit, u32 idle_limit) +{ + struct xpc_registration *registration; + + DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS); + DBUG_ON(payload_size == 0 || nentries == 0); + DBUG_ON(func == NULL); + DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit); + + if (XPC_MSG_SIZE(payload_size) > XPC_MSG_MAX_SIZE) + return xpPayloadTooBig; + + registration = &xpc_registrations[ch_number]; + + if (mutex_lock_interruptible(®istration->mutex) != 0) + return xpInterrupted; + + /* if XPC_CHANNEL_REGISTERED(ch_number) */ + if (registration->func != NULL) { + mutex_unlock(®istration->mutex); + return xpAlreadyRegistered; + } + + /* register the channel for connection */ + registration->entry_size = XPC_MSG_SIZE(payload_size); + registration->nentries = nentries; + registration->assigned_limit = assigned_limit; + registration->idle_limit = idle_limit; + registration->key = key; + registration->func = func; + + mutex_unlock(®istration->mutex); + + if (xpc_interface.connect) + xpc_interface.connect(ch_number); + + return xpSuccess; +} +EXPORT_SYMBOL_GPL(xpc_connect); + +/* + * Remove the registration for automatic connection of the specified channel + * when a partition comes up. + * + * Before returning this xpc_disconnect() will wait for all connections on the + * specified channel have been closed/torndown. So the caller can be assured + * that they will not be receiving any more callouts from XPC to their + * function registered via xpc_connect(). + * + * Arguments: + * + * ch_number - channel # to unregister. + */ +void +xpc_disconnect(int ch_number) +{ + struct xpc_registration *registration; + + DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS); + + registration = &xpc_registrations[ch_number]; + + /* + * We've decided not to make this a down_interruptible(), since we + * figured XPC's users will just turn around and call xpc_disconnect() + * again anyways, so we might as well wait, if need be. + */ + mutex_lock(®istration->mutex); + + /* if !XPC_CHANNEL_REGISTERED(ch_number) */ + if (registration->func == NULL) { + mutex_unlock(®istration->mutex); + return; + } + + /* remove the connection registration for the specified channel */ + registration->func = NULL; + registration->key = NULL; + registration->nentries = 0; + registration->entry_size = 0; + registration->assigned_limit = 0; + registration->idle_limit = 0; + + if (xpc_interface.disconnect) + xpc_interface.disconnect(ch_number); + + mutex_unlock(®istration->mutex); + + return; +} +EXPORT_SYMBOL_GPL(xpc_disconnect); + +int __init +xp_init(void) +{ + enum xp_retval ret; + int ch_number; + + /* initialize the connection registration mutex */ + for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) + mutex_init(&xpc_registrations[ch_number].mutex); + + if (is_shub()) + ret = xp_init_sn2(); + else if (is_uv()) + ret = xp_init_uv(); + else + ret = 0; + + if (ret != xpSuccess) + return ret; + + return 0; +} + +module_init(xp_init); + +void __exit +xp_exit(void) +{ + if (is_shub()) + xp_exit_sn2(); + else if (is_uv()) + xp_exit_uv(); +} + +module_exit(xp_exit); + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("Cross Partition (XP) base"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/sgi-xp/xp_nofault.S b/drivers/misc/sgi-xp/xp_nofault.S new file mode 100644 index 000000000..e38d43319 --- /dev/null +++ b/drivers/misc/sgi-xp/xp_nofault.S @@ -0,0 +1,35 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * The xp_nofault_PIOR function takes a pointer to a remote PIO register + * and attempts to load and consume a value from it. This function + * will be registered as a nofault code block. In the event that the + * PIO read fails, the MCA handler will force the error to look + * corrected and vector to the xp_error_PIOR which will return an error. + * + * The definition of "consumption" and the time it takes for an MCA + * to surface is processor implementation specific. This code + * is sufficient on Itanium through the Montvale processor family. + * It may need to be adjusted for future processor implementations. + * + * extern int xp_nofault_PIOR(void *remote_register); + */ + + .global xp_nofault_PIOR +xp_nofault_PIOR: + mov r8=r0 // Stage a success return value + ld8.acq r9=[r32];; // PIO Read the specified register + adds r9=1,r9;; // Add to force consumption + srlz.i;; // Allow time for MCA to surface + br.ret.sptk.many b0;; // Return success + + .global xp_error_PIOR +xp_error_PIOR: + mov r8=1 // Return value of 1 + br.ret.sptk.many b0;; // Return failure diff --git a/drivers/misc/sgi-xp/xp_sn2.c b/drivers/misc/sgi-xp/xp_sn2.c new file mode 100644 index 000000000..d8e463f87 --- /dev/null +++ b/drivers/misc/sgi-xp/xp_sn2.c @@ -0,0 +1,190 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition (XP) sn2-based functions. + * + * Architecture specific implementation of common functions. + */ + +#include +#include +#include +#include +#include "xp.h" + +/* + * The export of xp_nofault_PIOR needs to happen here since it is defined + * in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is + * defined here. + */ +EXPORT_SYMBOL_GPL(xp_nofault_PIOR); + +u64 xp_nofault_PIOR_target; +EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target); + +/* + * Register a nofault code region which performs a cross-partition PIO read. + * If the PIO read times out, the MCA handler will consume the error and + * return to a kernel-provided instruction to indicate an error. This PIO read + * exists because it is guaranteed to timeout if the destination is down + * (amo operations do not timeout on at least some CPUs on Shubs <= v1.2, + * which unfortunately we have to work around). + */ +static enum xp_retval +xp_register_nofault_code_sn2(void) +{ + int ret; + u64 func_addr; + u64 err_func_addr; + + func_addr = *(u64 *)xp_nofault_PIOR; + err_func_addr = *(u64 *)xp_error_PIOR; + ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr, + 1, 1); + if (ret != 0) { + dev_err(xp, "can't register nofault code, error=%d\n", ret); + return xpSalError; + } + /* + * Setup the nofault PIO read target. (There is no special reason why + * SH_IPI_ACCESS was selected.) + */ + if (is_shub1()) + xp_nofault_PIOR_target = SH1_IPI_ACCESS; + else if (is_shub2()) + xp_nofault_PIOR_target = SH2_IPI_ACCESS0; + + return xpSuccess; +} + +static void +xp_unregister_nofault_code_sn2(void) +{ + u64 func_addr = *(u64 *)xp_nofault_PIOR; + u64 err_func_addr = *(u64 *)xp_error_PIOR; + + /* unregister the PIO read nofault code region */ + (void)sn_register_nofault_code(func_addr, err_func_addr, + err_func_addr, 1, 0); +} + +/* + * Convert a virtual memory address to a physical memory address. + */ +static unsigned long +xp_pa_sn2(void *addr) +{ + return __pa(addr); +} + +/* + * Convert a global physical to a socket physical address. + */ +static unsigned long +xp_socket_pa_sn2(unsigned long gpa) +{ + return gpa; +} + +/* + * Wrapper for bte_copy(). + * + * dst_pa - physical address of the destination of the transfer. + * src_pa - physical address of the source of the transfer. + * len - number of bytes to transfer from source to destination. + * + * Note: xp_remote_memcpy_sn2() should never be called while holding a spinlock. + */ +static enum xp_retval +xp_remote_memcpy_sn2(unsigned long dst_pa, const unsigned long src_pa, + size_t len) +{ + bte_result_t ret; + + ret = bte_copy(src_pa, dst_pa, len, (BTE_NOTIFY | BTE_WACQUIRE), NULL); + if (ret == BTE_SUCCESS) + return xpSuccess; + + if (is_shub2()) { + dev_err(xp, "bte_copy() on shub2 failed, error=0x%x dst_pa=" + "0x%016lx src_pa=0x%016lx len=%ld\\n", ret, dst_pa, + src_pa, len); + } else { + dev_err(xp, "bte_copy() failed, error=%d dst_pa=0x%016lx " + "src_pa=0x%016lx len=%ld\\n", ret, dst_pa, src_pa, len); + } + + return xpBteCopyError; +} + +static int +xp_cpu_to_nasid_sn2(int cpuid) +{ + return cpuid_to_nasid(cpuid); +} + +static enum xp_retval +xp_expand_memprotect_sn2(unsigned long phys_addr, unsigned long size) +{ + u64 nasid_array = 0; + int ret; + + ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1, + &nasid_array); + if (ret != 0) { + dev_err(xp, "sn_change_memprotect(,, " + "SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret); + return xpSalError; + } + return xpSuccess; +} + +static enum xp_retval +xp_restrict_memprotect_sn2(unsigned long phys_addr, unsigned long size) +{ + u64 nasid_array = 0; + int ret; + + ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0, + &nasid_array); + if (ret != 0) { + dev_err(xp, "sn_change_memprotect(,, " + "SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret); + return xpSalError; + } + return xpSuccess; +} + +enum xp_retval +xp_init_sn2(void) +{ + BUG_ON(!is_shub()); + + xp_max_npartitions = XP_MAX_NPARTITIONS_SN2; + xp_partition_id = sn_partition_id; + xp_region_size = sn_region_size; + + xp_pa = xp_pa_sn2; + xp_socket_pa = xp_socket_pa_sn2; + xp_remote_memcpy = xp_remote_memcpy_sn2; + xp_cpu_to_nasid = xp_cpu_to_nasid_sn2; + xp_expand_memprotect = xp_expand_memprotect_sn2; + xp_restrict_memprotect = xp_restrict_memprotect_sn2; + + return xp_register_nofault_code_sn2(); +} + +void +xp_exit_sn2(void) +{ + BUG_ON(!is_shub()); + + xp_unregister_nofault_code_sn2(); +} + diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c new file mode 100644 index 000000000..a0d093274 --- /dev/null +++ b/drivers/misc/sgi-xp/xp_uv.c @@ -0,0 +1,171 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition (XP) uv-based functions. + * + * Architecture specific implementation of common functions. + * + */ + +#include +#include +#if defined CONFIG_X86_64 +#include +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +#include +#endif +#include "../sgi-gru/grukservices.h" +#include "xp.h" + +/* + * Convert a virtual memory address to a physical memory address. + */ +static unsigned long +xp_pa_uv(void *addr) +{ + return uv_gpa(addr); +} + +/* + * Convert a global physical to socket physical address. + */ +static unsigned long +xp_socket_pa_uv(unsigned long gpa) +{ + return uv_gpa_to_soc_phys_ram(gpa); +} + +static enum xp_retval +xp_remote_mmr_read(unsigned long dst_gpa, const unsigned long src_gpa, + size_t len) +{ + int ret; + unsigned long *dst_va = __va(uv_gpa_to_soc_phys_ram(dst_gpa)); + + BUG_ON(!uv_gpa_in_mmr_space(src_gpa)); + BUG_ON(len != 8); + + ret = gru_read_gpa(dst_va, src_gpa); + if (ret == 0) + return xpSuccess; + + dev_err(xp, "gru_read_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx " + "len=%ld\n", dst_gpa, src_gpa, len); + return xpGruCopyError; +} + + +static enum xp_retval +xp_remote_memcpy_uv(unsigned long dst_gpa, const unsigned long src_gpa, + size_t len) +{ + int ret; + + if (uv_gpa_in_mmr_space(src_gpa)) + return xp_remote_mmr_read(dst_gpa, src_gpa, len); + + ret = gru_copy_gpa(dst_gpa, src_gpa, len); + if (ret == 0) + return xpSuccess; + + dev_err(xp, "gru_copy_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx " + "len=%ld\n", dst_gpa, src_gpa, len); + return xpGruCopyError; +} + +static int +xp_cpu_to_nasid_uv(int cpuid) +{ + /* ??? Is this same as sn2 nasid in mach/part bitmaps set up by SAL? */ + return UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpuid)); +} + +static enum xp_retval +xp_expand_memprotect_uv(unsigned long phys_addr, unsigned long size) +{ + int ret; + +#if defined CONFIG_X86_64 + ret = uv_bios_change_memprotect(phys_addr, size, UV_MEMPROT_ALLOW_RW); + if (ret != BIOS_STATUS_SUCCESS) { + dev_err(xp, "uv_bios_change_memprotect(,, " + "UV_MEMPROT_ALLOW_RW) failed, ret=%d\n", ret); + return xpBiosError; + } + +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + u64 nasid_array; + + ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1, + &nasid_array); + if (ret != 0) { + dev_err(xp, "sn_change_memprotect(,, " + "SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret); + return xpSalError; + } +#else + #error not a supported configuration +#endif + return xpSuccess; +} + +static enum xp_retval +xp_restrict_memprotect_uv(unsigned long phys_addr, unsigned long size) +{ + int ret; + +#if defined CONFIG_X86_64 + ret = uv_bios_change_memprotect(phys_addr, size, + UV_MEMPROT_RESTRICT_ACCESS); + if (ret != BIOS_STATUS_SUCCESS) { + dev_err(xp, "uv_bios_change_memprotect(,, " + "UV_MEMPROT_RESTRICT_ACCESS) failed, ret=%d\n", ret); + return xpBiosError; + } + +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + u64 nasid_array; + + ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0, + &nasid_array); + if (ret != 0) { + dev_err(xp, "sn_change_memprotect(,, " + "SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret); + return xpSalError; + } +#else + #error not a supported configuration +#endif + return xpSuccess; +} + +enum xp_retval +xp_init_uv(void) +{ + BUG_ON(!is_uv()); + + xp_max_npartitions = XP_MAX_NPARTITIONS_UV; + xp_partition_id = sn_partition_id; + xp_region_size = sn_region_size; + + xp_pa = xp_pa_uv; + xp_socket_pa = xp_socket_pa_uv; + xp_remote_memcpy = xp_remote_memcpy_uv; + xp_cpu_to_nasid = xp_cpu_to_nasid_uv; + xp_expand_memprotect = xp_expand_memprotect_uv; + xp_restrict_memprotect = xp_restrict_memprotect_uv; + + return xpSuccess; +} + +void +xp_exit_uv(void) +{ + BUG_ON(!is_uv()); +} diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h new file mode 100644 index 000000000..b94d5f767 --- /dev/null +++ b/drivers/misc/sgi-xp/xpc.h @@ -0,0 +1,1004 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) structures and macros. + */ + +#ifndef _DRIVERS_MISC_SGIXP_XPC_H +#define _DRIVERS_MISC_SGIXP_XPC_H + +#include +#include +#include +#include +#include "xp.h" + +/* + * XPC Version numbers consist of a major and minor number. XPC can always + * talk to versions with same major #, and never talk to versions with a + * different major #. + */ +#define _XPC_VERSION(_maj, _min) (((_maj) << 4) | ((_min) & 0xf)) +#define XPC_VERSION_MAJOR(_v) ((_v) >> 4) +#define XPC_VERSION_MINOR(_v) ((_v) & 0xf) + +/* define frequency of the heartbeat and frequency how often it's checked */ +#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */ +#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */ + +/* define the process name of HB checker and the CPU it is pinned to */ +#define XPC_HB_CHECK_THREAD_NAME "xpc_hb" +#define XPC_HB_CHECK_CPU 0 + +/* define the process name of the discovery thread */ +#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery" + +/* + * the reserved page + * + * SAL reserves one page of memory per partition for XPC. Though a full page + * in length (16384 bytes), its starting address is not page aligned, but it + * is cacheline aligned. The reserved page consists of the following: + * + * reserved page header + * + * The first two 64-byte cachelines of the reserved page contain the + * header (struct xpc_rsvd_page). Before SAL initialization has completed, + * SAL has set up the following fields of the reserved page header: + * SAL_signature, SAL_version, SAL_partid, and SAL_nasids_size. The + * other fields are set up by XPC. (xpc_rsvd_page points to the local + * partition's reserved page.) + * + * part_nasids mask + * mach_nasids mask + * + * SAL also sets up two bitmaps (or masks), one that reflects the actual + * nasids in this partition (part_nasids), and the other that reflects + * the actual nasids in the entire machine (mach_nasids). We're only + * interested in the even numbered nasids (which contain the processors + * and/or memory), so we only need half as many bits to represent the + * nasids. When mapping nasid to bit in a mask (or bit to nasid) be sure + * to either divide or multiply by 2. The part_nasids mask is located + * starting at the first cacheline following the reserved page header. The + * mach_nasids mask follows right after the part_nasids mask. The size in + * bytes of each mask is reflected by the reserved page header field + * 'SAL_nasids_size'. (Local partition's mask pointers are xpc_part_nasids + * and xpc_mach_nasids.) + * + * vars (ia64-sn2 only) + * vars part (ia64-sn2 only) + * + * Immediately following the mach_nasids mask are the XPC variables + * required by other partitions. First are those that are generic to all + * partitions (vars), followed on the next available cacheline by those + * which are partition specific (vars part). These are setup by XPC. + * (Local partition's vars pointers are xpc_vars and xpc_vars_part.) + * + * Note: Until 'ts_jiffies' is set non-zero, the partition XPC code has not been + * initialized. + */ +struct xpc_rsvd_page { + u64 SAL_signature; /* SAL: unique signature */ + u64 SAL_version; /* SAL: version */ + short SAL_partid; /* SAL: partition ID */ + short max_npartitions; /* value of XPC_MAX_PARTITIONS */ + u8 version; + u8 pad1[3]; /* align to next u64 in 1st 64-byte cacheline */ + unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */ + union { + struct { + unsigned long vars_pa; /* phys addr */ + } sn2; + struct { + unsigned long heartbeat_gpa; /* phys addr */ + unsigned long activate_gru_mq_desc_gpa; /* phys addr */ + } uv; + } sn; + u64 pad2[9]; /* align to last u64 in 2nd 64-byte cacheline */ + u64 SAL_nasids_size; /* SAL: size of each nasid mask in bytes */ +}; + +#define XPC_RP_VERSION _XPC_VERSION(3, 0) /* version 3.0 of the reserved page */ + +/* + * Define the structures by which XPC variables can be exported to other + * partitions. (There are two: struct xpc_vars and struct xpc_vars_part) + */ + +/* + * The following structure describes the partition generic variables + * needed by other partitions in order to properly initialize. + * + * struct xpc_vars version number also applies to struct xpc_vars_part. + * Changes to either structure and/or related functionality should be + * reflected by incrementing either the major or minor version numbers + * of struct xpc_vars. + */ +struct xpc_vars_sn2 { + u8 version; + u64 heartbeat; + DECLARE_BITMAP(heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2); + u64 heartbeat_offline; /* if 0, heartbeat should be changing */ + int activate_IRQ_nasid; + int activate_IRQ_phys_cpuid; + unsigned long vars_part_pa; + unsigned long amos_page_pa;/* paddr of page of amos from MSPEC driver */ + struct amo *amos_page; /* vaddr of page of amos from MSPEC driver */ +}; + +#define XPC_V_VERSION _XPC_VERSION(3, 1) /* version 3.1 of the cross vars */ + +/* + * The following structure describes the per partition specific variables. + * + * An array of these structures, one per partition, will be defined. As a + * partition becomes active XPC will copy the array entry corresponding to + * itself from that partition. It is desirable that the size of this structure + * evenly divides into a 128-byte cacheline, such that none of the entries in + * this array crosses a 128-byte cacheline boundary. As it is now, each entry + * occupies 64-bytes. + */ +struct xpc_vars_part_sn2 { + u64 magic; + + unsigned long openclose_args_pa; /* phys addr of open and close args */ + unsigned long GPs_pa; /* physical address of Get/Put values */ + + unsigned long chctl_amo_pa; /* physical address of chctl flags' amo */ + + int notify_IRQ_nasid; /* nasid of where to send notify IRQs */ + int notify_IRQ_phys_cpuid; /* CPUID of where to send notify IRQs */ + + u8 nchannels; /* #of defined channels supported */ + + u8 reserved[23]; /* pad to a full 64 bytes */ +}; + +/* + * The vars_part MAGIC numbers play a part in the first contact protocol. + * + * MAGIC1 indicates that the per partition specific variables for a remote + * partition have been initialized by this partition. + * + * MAGIC2 indicates that this partition has pulled the remote partititions + * per partition variables that pertain to this partition. + */ +#define XPC_VP_MAGIC1_SN2 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */ +#define XPC_VP_MAGIC2_SN2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */ + +/* the reserved page sizes and offsets */ + +#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page)) +#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars_sn2)) + +#define XPC_RP_PART_NASIDS(_rp) ((unsigned long *)((u8 *)(_rp) + \ + XPC_RP_HEADER_SIZE)) +#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + \ + xpc_nasid_mask_nlongs) +#define XPC_RP_VARS(_rp) ((struct xpc_vars_sn2 *) \ + (XPC_RP_MACH_NASIDS(_rp) + \ + xpc_nasid_mask_nlongs)) + + +/* + * The following structure describes the partition's heartbeat info which + * will be periodically read by other partitions to determine whether this + * XPC is still 'alive'. + */ +struct xpc_heartbeat_uv { + unsigned long value; + unsigned long offline; /* if 0, heartbeat should be changing */ +}; + +/* + * Info pertinent to a GRU message queue using a watch list for irq generation. + */ +struct xpc_gru_mq_uv { + void *address; /* address of GRU message queue */ + unsigned int order; /* size of GRU message queue as a power of 2 */ + int irq; /* irq raised when message is received in mq */ + int mmr_blade; /* blade where watchlist was allocated from */ + unsigned long mmr_offset; /* offset of irq mmr located on mmr_blade */ + unsigned long mmr_value; /* value of irq mmr located on mmr_blade */ + int watchlist_num; /* number of watchlist allocatd by BIOS */ + void *gru_mq_desc; /* opaque structure used by the GRU driver */ +}; + +/* + * The activate_mq is used to send/receive GRU messages that affect XPC's + * partition active state and channel state. This is uv only. + */ +struct xpc_activate_mq_msghdr_uv { + unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */ + short partid; /* sender's partid */ + u8 act_state; /* sender's act_state at time msg sent */ + u8 type; /* message's type */ + unsigned long rp_ts_jiffies; /* timestamp of sender's rp setup by XPC */ +}; + +/* activate_mq defined message types */ +#define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV 0 + +#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 1 +#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 2 + +#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 3 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 4 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 5 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 6 +#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV 7 + +#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 8 +#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 9 + +struct xpc_activate_mq_msg_uv { + struct xpc_activate_mq_msghdr_uv hdr; +}; + +struct xpc_activate_mq_msg_activate_req_uv { + struct xpc_activate_mq_msghdr_uv hdr; + unsigned long rp_gpa; + unsigned long heartbeat_gpa; + unsigned long activate_gru_mq_desc_gpa; +}; + +struct xpc_activate_mq_msg_deactivate_req_uv { + struct xpc_activate_mq_msghdr_uv hdr; + enum xp_retval reason; +}; + +struct xpc_activate_mq_msg_chctl_closerequest_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; + enum xp_retval reason; +}; + +struct xpc_activate_mq_msg_chctl_closereply_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; +}; + +struct xpc_activate_mq_msg_chctl_openrequest_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; + short entry_size; /* size of notify_mq's GRU messages */ + short local_nentries; /* ??? Is this needed? What is? */ +}; + +struct xpc_activate_mq_msg_chctl_openreply_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; + short remote_nentries; /* ??? Is this needed? What is? */ + short local_nentries; /* ??? Is this needed? What is? */ + unsigned long notify_gru_mq_desc_gpa; +}; + +struct xpc_activate_mq_msg_chctl_opencomplete_uv { + struct xpc_activate_mq_msghdr_uv hdr; + short ch_number; +}; + +/* + * Functions registered by add_timer() or called by kernel_thread() only + * allow for a single 64-bit argument. The following macros can be used to + * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from + * the passed argument. + */ +#define XPC_PACK_ARGS(_arg1, _arg2) \ + ((((u64)_arg1) & 0xffffffff) | \ + ((((u64)_arg2) & 0xffffffff) << 32)) + +#define XPC_UNPACK_ARG1(_args) (((u64)_args) & 0xffffffff) +#define XPC_UNPACK_ARG2(_args) ((((u64)_args) >> 32) & 0xffffffff) + +/* + * Define a Get/Put value pair (pointers) used with a message queue. + */ +struct xpc_gp_sn2 { + s64 get; /* Get value */ + s64 put; /* Put value */ +}; + +#define XPC_GP_SIZE \ + L1_CACHE_ALIGN(sizeof(struct xpc_gp_sn2) * XPC_MAX_NCHANNELS) + +/* + * Define a structure that contains arguments associated with opening and + * closing a channel. + */ +struct xpc_openclose_args { + u16 reason; /* reason why channel is closing */ + u16 entry_size; /* sizeof each message entry */ + u16 remote_nentries; /* #of message entries in remote msg queue */ + u16 local_nentries; /* #of message entries in local msg queue */ + unsigned long local_msgqueue_pa; /* phys addr of local message queue */ +}; + +#define XPC_OPENCLOSE_ARGS_SIZE \ + L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * \ + XPC_MAX_NCHANNELS) + + +/* + * Structures to define a fifo singly-linked list. + */ + +struct xpc_fifo_entry_uv { + struct xpc_fifo_entry_uv *next; +}; + +struct xpc_fifo_head_uv { + struct xpc_fifo_entry_uv *first; + struct xpc_fifo_entry_uv *last; + spinlock_t lock; + int n_entries; +}; + +/* + * Define a sn2 styled message. + * + * A user-defined message resides in the payload area. The max size of the + * payload is defined by the user via xpc_connect(). + * + * The size of a message entry (within a message queue) must be a 128-byte + * cacheline sized multiple in order to facilitate the BTE transfer of messages + * from one message queue to another. + */ +struct xpc_msg_sn2 { + u8 flags; /* FOR XPC INTERNAL USE ONLY */ + u8 reserved[7]; /* FOR XPC INTERNAL USE ONLY */ + s64 number; /* FOR XPC INTERNAL USE ONLY */ + + u64 payload; /* user defined portion of message */ +}; + +/* struct xpc_msg_sn2 flags */ + +#define XPC_M_SN2_DONE 0x01 /* msg has been received/consumed */ +#define XPC_M_SN2_READY 0x02 /* msg is ready to be sent */ +#define XPC_M_SN2_INTERRUPT 0x04 /* send interrupt when msg consumed */ + +/* + * The format of a uv XPC notify_mq GRU message is as follows: + * + * A user-defined message resides in the payload area. The max size of the + * payload is defined by the user via xpc_connect(). + * + * The size of a message (payload and header) sent via the GRU must be either 1 + * or 2 GRU_CACHE_LINE_BYTES in length. + */ + +struct xpc_notify_mq_msghdr_uv { + union { + unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */ + struct xpc_fifo_entry_uv next; /* FOR XPC INTERNAL USE ONLY */ + } u; + short partid; /* FOR XPC INTERNAL USE ONLY */ + u8 ch_number; /* FOR XPC INTERNAL USE ONLY */ + u8 size; /* FOR XPC INTERNAL USE ONLY */ + unsigned int msg_slot_number; /* FOR XPC INTERNAL USE ONLY */ +}; + +struct xpc_notify_mq_msg_uv { + struct xpc_notify_mq_msghdr_uv hdr; + unsigned long payload; +}; + +/* + * Define sn2's notify entry. + * + * This is used to notify a message's sender that their message was received + * and consumed by the intended recipient. + */ +struct xpc_notify_sn2 { + u8 type; /* type of notification */ + + /* the following two fields are only used if type == XPC_N_CALL */ + xpc_notify_func func; /* user's notify function */ + void *key; /* pointer to user's key */ +}; + +/* struct xpc_notify_sn2 type of notification */ + +#define XPC_N_CALL 0x01 /* notify function provided by user */ + +/* + * Define uv's version of the notify entry. It additionally is used to allocate + * a msg slot on the remote partition into which is copied a sent message. + */ +struct xpc_send_msg_slot_uv { + struct xpc_fifo_entry_uv next; + unsigned int msg_slot_number; + xpc_notify_func func; /* user's notify function */ + void *key; /* pointer to user's key */ +}; + +/* + * Define the structure that manages all the stuff required by a channel. In + * particular, they are used to manage the messages sent across the channel. + * + * This structure is private to a partition, and is NOT shared across the + * partition boundary. + * + * There is an array of these structures for each remote partition. It is + * allocated at the time a partition becomes active. The array contains one + * of these structures for each potential channel connection to that partition. + */ + +/* + * The following is sn2 only. + * + * Each channel structure manages two message queues (circular buffers). + * They are allocated at the time a channel connection is made. One of + * these message queues (local_msgqueue) holds the locally created messages + * that are destined for the remote partition. The other of these message + * queues (remote_msgqueue) is a locally cached copy of the remote partition's + * own local_msgqueue. + * + * The following is a description of the Get/Put pointers used to manage these + * two message queues. Consider the local_msgqueue to be on one partition + * and the remote_msgqueue to be its cached copy on another partition. A + * description of what each of the lettered areas contains is included. + * + * + * local_msgqueue remote_msgqueue + * + * |/////////| |/////////| + * w_remote_GP.get --> +---------+ |/////////| + * | F | |/////////| + * remote_GP.get --> +---------+ +---------+ <-- local_GP->get + * | | | | + * | | | E | + * | | | | + * | | +---------+ <-- w_local_GP.get + * | B | |/////////| + * | | |////D////| + * | | |/////////| + * | | +---------+ <-- w_remote_GP.put + * | | |////C////| + * local_GP->put --> +---------+ +---------+ <-- remote_GP.put + * | | |/////////| + * | A | |/////////| + * | | |/////////| + * w_local_GP.put --> +---------+ |/////////| + * |/////////| |/////////| + * + * + * ( remote_GP.[get|put] are cached copies of the remote + * partition's local_GP->[get|put], and thus their values can + * lag behind their counterparts on the remote partition. ) + * + * + * A - Messages that have been allocated, but have not yet been sent to the + * remote partition. + * + * B - Messages that have been sent, but have not yet been acknowledged by the + * remote partition as having been received. + * + * C - Area that needs to be prepared for the copying of sent messages, by + * the clearing of the message flags of any previously received messages. + * + * D - Area into which sent messages are to be copied from the remote + * partition's local_msgqueue and then delivered to their intended + * recipients. [ To allow for a multi-message copy, another pointer + * (next_msg_to_pull) has been added to keep track of the next message + * number needing to be copied (pulled). It chases after w_remote_GP.put. + * Any messages lying between w_local_GP.get and next_msg_to_pull have + * been copied and are ready to be delivered. ] + * + * E - Messages that have been copied and delivered, but have not yet been + * acknowledged by the recipient as having been received. + * + * F - Messages that have been acknowledged, but XPC has not yet notified the + * sender that the message was received by its intended recipient. + * This is also an area that needs to be prepared for the allocating of + * new messages, by the clearing of the message flags of the acknowledged + * messages. + */ + +struct xpc_channel_sn2 { + struct xpc_openclose_args *local_openclose_args; /* args passed on */ + /* opening or closing of channel */ + + void *local_msgqueue_base; /* base address of kmalloc'd space */ + struct xpc_msg_sn2 *local_msgqueue; /* local message queue */ + void *remote_msgqueue_base; /* base address of kmalloc'd space */ + struct xpc_msg_sn2 *remote_msgqueue; /* cached copy of remote */ + /* partition's local message queue */ + unsigned long remote_msgqueue_pa; /* phys addr of remote partition's */ + /* local message queue */ + + struct xpc_notify_sn2 *notify_queue;/* notify queue for messages sent */ + + /* various flavors of local and remote Get/Put values */ + + struct xpc_gp_sn2 *local_GP; /* local Get/Put values */ + struct xpc_gp_sn2 remote_GP; /* remote Get/Put values */ + struct xpc_gp_sn2 w_local_GP; /* working local Get/Put values */ + struct xpc_gp_sn2 w_remote_GP; /* working remote Get/Put values */ + s64 next_msg_to_pull; /* Put value of next msg to pull */ + + struct mutex msg_to_pull_mutex; /* next msg to pull serialization */ +}; + +struct xpc_channel_uv { + void *cached_notify_gru_mq_desc; /* remote partition's notify mq's */ + /* gru mq descriptor */ + + struct xpc_send_msg_slot_uv *send_msg_slots; + void *recv_msg_slots; /* each slot will hold a xpc_notify_mq_msg_uv */ + /* structure plus the user's payload */ + + struct xpc_fifo_head_uv msg_slot_free_list; + struct xpc_fifo_head_uv recv_msg_list; /* deliverable payloads */ +}; + +struct xpc_channel { + short partid; /* ID of remote partition connected */ + spinlock_t lock; /* lock for updating this structure */ + unsigned int flags; /* general flags */ + + enum xp_retval reason; /* reason why channel is disconnect'g */ + int reason_line; /* line# disconnect initiated from */ + + u16 number; /* channel # */ + + u16 entry_size; /* sizeof each msg entry */ + u16 local_nentries; /* #of msg entries in local msg queue */ + u16 remote_nentries; /* #of msg entries in remote msg queue */ + + atomic_t references; /* #of external references to queues */ + + atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */ + wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */ + + u8 delayed_chctl_flags; /* chctl flags received, but delayed */ + /* action until channel disconnected */ + + atomic_t n_to_notify; /* #of msg senders to notify */ + + xpc_channel_func func; /* user's channel function */ + void *key; /* pointer to user's key */ + + struct completion wdisconnect_wait; /* wait for channel disconnect */ + + /* kthread management related fields */ + + atomic_t kthreads_assigned; /* #of kthreads assigned to channel */ + u32 kthreads_assigned_limit; /* limit on #of kthreads assigned */ + atomic_t kthreads_idle; /* #of kthreads idle waiting for work */ + u32 kthreads_idle_limit; /* limit on #of kthreads idle */ + atomic_t kthreads_active; /* #of kthreads actively working */ + + wait_queue_head_t idle_wq; /* idle kthread wait queue */ + + union { + struct xpc_channel_sn2 sn2; + struct xpc_channel_uv uv; + } sn; + +} ____cacheline_aligned; + +/* struct xpc_channel flags */ + +#define XPC_C_WASCONNECTED 0x00000001 /* channel was connected */ + +#define XPC_C_ROPENCOMPLETE 0x00000002 /* remote open channel complete */ +#define XPC_C_OPENCOMPLETE 0x00000004 /* local open channel complete */ +#define XPC_C_ROPENREPLY 0x00000008 /* remote open channel reply */ +#define XPC_C_OPENREPLY 0x00000010 /* local open channel reply */ +#define XPC_C_ROPENREQUEST 0x00000020 /* remote open channel request */ +#define XPC_C_OPENREQUEST 0x00000040 /* local open channel request */ + +#define XPC_C_SETUP 0x00000080 /* channel's msgqueues are alloc'd */ +#define XPC_C_CONNECTEDCALLOUT 0x00000100 /* connected callout initiated */ +#define XPC_C_CONNECTEDCALLOUT_MADE \ + 0x00000200 /* connected callout completed */ +#define XPC_C_CONNECTED 0x00000400 /* local channel is connected */ +#define XPC_C_CONNECTING 0x00000800 /* channel is being connected */ + +#define XPC_C_RCLOSEREPLY 0x00001000 /* remote close channel reply */ +#define XPC_C_CLOSEREPLY 0x00002000 /* local close channel reply */ +#define XPC_C_RCLOSEREQUEST 0x00004000 /* remote close channel request */ +#define XPC_C_CLOSEREQUEST 0x00008000 /* local close channel request */ + +#define XPC_C_DISCONNECTED 0x00010000 /* channel is disconnected */ +#define XPC_C_DISCONNECTING 0x00020000 /* channel is being disconnected */ +#define XPC_C_DISCONNECTINGCALLOUT \ + 0x00040000 /* disconnecting callout initiated */ +#define XPC_C_DISCONNECTINGCALLOUT_MADE \ + 0x00080000 /* disconnecting callout completed */ +#define XPC_C_WDISCONNECT 0x00100000 /* waiting for channel disconnect */ + +/* + * The channel control flags (chctl) union consists of a 64-bit variable which + * is divided up into eight bytes, ordered from right to left. Byte zero + * pertains to channel 0, byte one to channel 1, and so on. Each channel's byte + * can have one or more of the chctl flags set in it. + */ + +union xpc_channel_ctl_flags { + u64 all_flags; + u8 flags[XPC_MAX_NCHANNELS]; +}; + +/* chctl flags */ +#define XPC_CHCTL_CLOSEREQUEST 0x01 +#define XPC_CHCTL_CLOSEREPLY 0x02 +#define XPC_CHCTL_OPENREQUEST 0x04 +#define XPC_CHCTL_OPENREPLY 0x08 +#define XPC_CHCTL_OPENCOMPLETE 0x10 +#define XPC_CHCTL_MSGREQUEST 0x20 + +#define XPC_OPENCLOSE_CHCTL_FLAGS \ + (XPC_CHCTL_CLOSEREQUEST | XPC_CHCTL_CLOSEREPLY | \ + XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY | \ + XPC_CHCTL_OPENCOMPLETE) +#define XPC_MSG_CHCTL_FLAGS XPC_CHCTL_MSGREQUEST + +static inline int +xpc_any_openclose_chctl_flags_set(union xpc_channel_ctl_flags *chctl) +{ + int ch_number; + + for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) { + if (chctl->flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS) + return 1; + } + return 0; +} + +static inline int +xpc_any_msg_chctl_flags_set(union xpc_channel_ctl_flags *chctl) +{ + int ch_number; + + for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++) { + if (chctl->flags[ch_number] & XPC_MSG_CHCTL_FLAGS) + return 1; + } + return 0; +} + +/* + * Manage channels on a partition basis. There is one of these structures + * for each partition (a partition will never utilize the structure that + * represents itself). + */ + +struct xpc_partition_sn2 { + unsigned long remote_amos_page_pa; /* paddr of partition's amos page */ + int activate_IRQ_nasid; /* active partition's act/deact nasid */ + int activate_IRQ_phys_cpuid; /* active part's act/deact phys cpuid */ + + unsigned long remote_vars_pa; /* phys addr of partition's vars */ + unsigned long remote_vars_part_pa; /* paddr of partition's vars part */ + u8 remote_vars_version; /* version# of partition's vars */ + + void *local_GPs_base; /* base address of kmalloc'd space */ + struct xpc_gp_sn2 *local_GPs; /* local Get/Put values */ + void *remote_GPs_base; /* base address of kmalloc'd space */ + struct xpc_gp_sn2 *remote_GPs; /* copy of remote partition's local */ + /* Get/Put values */ + unsigned long remote_GPs_pa; /* phys addr of remote partition's local */ + /* Get/Put values */ + + void *local_openclose_args_base; /* base address of kmalloc'd space */ + struct xpc_openclose_args *local_openclose_args; /* local's args */ + unsigned long remote_openclose_args_pa; /* phys addr of remote's args */ + + int notify_IRQ_nasid; /* nasid of where to send notify IRQs */ + int notify_IRQ_phys_cpuid; /* CPUID of where to send notify IRQs */ + char notify_IRQ_owner[8]; /* notify IRQ's owner's name */ + + struct amo *remote_chctl_amo_va; /* addr of remote chctl flags' amo */ + struct amo *local_chctl_amo_va; /* address of chctl flags' amo */ + + struct timer_list dropped_notify_IRQ_timer; /* dropped IRQ timer */ +}; + +struct xpc_partition_uv { + unsigned long heartbeat_gpa; /* phys addr of partition's heartbeat */ + struct xpc_heartbeat_uv cached_heartbeat; /* cached copy of */ + /* partition's heartbeat */ + unsigned long activate_gru_mq_desc_gpa; /* phys addr of parititon's */ + /* activate mq's gru mq */ + /* descriptor */ + void *cached_activate_gru_mq_desc; /* cached copy of partition's */ + /* activate mq's gru mq descriptor */ + struct mutex cached_activate_gru_mq_desc_mutex; + spinlock_t flags_lock; /* protect updating of flags */ + unsigned int flags; /* general flags */ + u8 remote_act_state; /* remote partition's act_state */ + u8 act_state_req; /* act_state request from remote partition */ + enum xp_retval reason; /* reason for deactivate act_state request */ +}; + +/* struct xpc_partition_uv flags */ + +#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV 0x00000001 +#define XPC_P_ENGAGED_UV 0x00000002 + +/* struct xpc_partition_uv act_state change requests */ + +#define XPC_P_ASR_ACTIVATE_UV 0x01 +#define XPC_P_ASR_REACTIVATE_UV 0x02 +#define XPC_P_ASR_DEACTIVATE_UV 0x03 + +struct xpc_partition { + + /* XPC HB infrastructure */ + + u8 remote_rp_version; /* version# of partition's rsvd pg */ + unsigned long remote_rp_ts_jiffies; /* timestamp when rsvd pg setup */ + unsigned long remote_rp_pa; /* phys addr of partition's rsvd pg */ + u64 last_heartbeat; /* HB at last read */ + u32 activate_IRQ_rcvd; /* IRQs since activation */ + spinlock_t act_lock; /* protect updating of act_state */ + u8 act_state; /* from XPC HB viewpoint */ + enum xp_retval reason; /* reason partition is deactivating */ + int reason_line; /* line# deactivation initiated from */ + + unsigned long disengage_timeout; /* timeout in jiffies */ + struct timer_list disengage_timer; + + /* XPC infrastructure referencing and teardown control */ + + u8 setup_state; /* infrastructure setup state */ + wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */ + atomic_t references; /* #of references to infrastructure */ + + u8 nchannels; /* #of defined channels supported */ + atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */ + atomic_t nchannels_engaged; /* #of channels engaged with remote part */ + struct xpc_channel *channels; /* array of channel structures */ + + /* fields used for managing channel avialability and activity */ + + union xpc_channel_ctl_flags chctl; /* chctl flags yet to be processed */ + spinlock_t chctl_lock; /* chctl flags lock */ + + void *remote_openclose_args_base; /* base address of kmalloc'd space */ + struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */ + /* args */ + + /* channel manager related fields */ + + atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */ + wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */ + + union { + struct xpc_partition_sn2 sn2; + struct xpc_partition_uv uv; + } sn; + +} ____cacheline_aligned; + +struct xpc_arch_operations { + int (*setup_partitions) (void); + void (*teardown_partitions) (void); + void (*process_activate_IRQ_rcvd) (void); + enum xp_retval (*get_partition_rsvd_page_pa) + (void *, u64 *, unsigned long *, size_t *); + int (*setup_rsvd_page) (struct xpc_rsvd_page *); + + void (*allow_hb) (short); + void (*disallow_hb) (short); + void (*disallow_all_hbs) (void); + void (*increment_heartbeat) (void); + void (*offline_heartbeat) (void); + void (*online_heartbeat) (void); + void (*heartbeat_init) (void); + void (*heartbeat_exit) (void); + enum xp_retval (*get_remote_heartbeat) (struct xpc_partition *); + + void (*request_partition_activation) (struct xpc_rsvd_page *, + unsigned long, int); + void (*request_partition_reactivation) (struct xpc_partition *); + void (*request_partition_deactivation) (struct xpc_partition *); + void (*cancel_partition_deactivation_request) (struct xpc_partition *); + enum xp_retval (*setup_ch_structures) (struct xpc_partition *); + void (*teardown_ch_structures) (struct xpc_partition *); + + enum xp_retval (*make_first_contact) (struct xpc_partition *); + + u64 (*get_chctl_all_flags) (struct xpc_partition *); + void (*send_chctl_closerequest) (struct xpc_channel *, unsigned long *); + void (*send_chctl_closereply) (struct xpc_channel *, unsigned long *); + void (*send_chctl_openrequest) (struct xpc_channel *, unsigned long *); + void (*send_chctl_openreply) (struct xpc_channel *, unsigned long *); + void (*send_chctl_opencomplete) (struct xpc_channel *, unsigned long *); + void (*process_msg_chctl_flags) (struct xpc_partition *, int); + + enum xp_retval (*save_remote_msgqueue_pa) (struct xpc_channel *, + unsigned long); + + enum xp_retval (*setup_msg_structures) (struct xpc_channel *); + void (*teardown_msg_structures) (struct xpc_channel *); + + void (*indicate_partition_engaged) (struct xpc_partition *); + void (*indicate_partition_disengaged) (struct xpc_partition *); + void (*assume_partition_disengaged) (short); + int (*partition_engaged) (short); + int (*any_partition_engaged) (void); + + int (*n_of_deliverable_payloads) (struct xpc_channel *); + enum xp_retval (*send_payload) (struct xpc_channel *, u32, void *, + u16, u8, xpc_notify_func, void *); + void *(*get_deliverable_payload) (struct xpc_channel *); + void (*received_payload) (struct xpc_channel *, void *); + void (*notify_senders_of_disconnect) (struct xpc_channel *); +}; + +/* struct xpc_partition act_state values (for XPC HB) */ + +#define XPC_P_AS_INACTIVE 0x00 /* partition is not active */ +#define XPC_P_AS_ACTIVATION_REQ 0x01 /* created thread to activate */ +#define XPC_P_AS_ACTIVATING 0x02 /* activation thread started */ +#define XPC_P_AS_ACTIVE 0x03 /* xpc_partition_up() was called */ +#define XPC_P_AS_DEACTIVATING 0x04 /* partition deactivation initiated */ + +#define XPC_DEACTIVATE_PARTITION(_p, _reason) \ + xpc_deactivate_partition(__LINE__, (_p), (_reason)) + +/* struct xpc_partition setup_state values */ + +#define XPC_P_SS_UNSET 0x00 /* infrastructure was never setup */ +#define XPC_P_SS_SETUP 0x01 /* infrastructure is setup */ +#define XPC_P_SS_WTEARDOWN 0x02 /* waiting to teardown infrastructure */ +#define XPC_P_SS_TORNDOWN 0x03 /* infrastructure is torndown */ + +/* + * struct xpc_partition_sn2's dropped notify IRQ timer is set to wait the + * following interval #of seconds before checking for dropped notify IRQs. + * These can occur whenever an IRQ's associated amo write doesn't complete + * until after the IRQ was received. + */ +#define XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL (0.25 * HZ) + +/* number of seconds to wait for other partitions to disengage */ +#define XPC_DISENGAGE_DEFAULT_TIMELIMIT 90 + +/* interval in seconds to print 'waiting deactivation' messages */ +#define XPC_DEACTIVATE_PRINTMSG_INTERVAL 10 + +#define XPC_PARTID(_p) ((short)((_p) - &xpc_partitions[0])) + +/* found in xp_main.c */ +extern struct xpc_registration xpc_registrations[]; + +/* found in xpc_main.c */ +extern struct device *xpc_part; +extern struct device *xpc_chan; +extern struct xpc_arch_operations xpc_arch_ops; +extern int xpc_disengage_timelimit; +extern int xpc_disengage_timedout; +extern int xpc_activate_IRQ_rcvd; +extern spinlock_t xpc_activate_IRQ_rcvd_lock; +extern wait_queue_head_t xpc_activate_IRQ_wq; +extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **); +extern void xpc_activate_partition(struct xpc_partition *); +extern void xpc_activate_kthreads(struct xpc_channel *, int); +extern void xpc_create_kthreads(struct xpc_channel *, int, int); +extern void xpc_disconnect_wait(int); + +/* found in xpc_sn2.c */ +extern int xpc_init_sn2(void); +extern void xpc_exit_sn2(void); + +/* found in xpc_uv.c */ +extern int xpc_init_uv(void); +extern void xpc_exit_uv(void); + +/* found in xpc_partition.c */ +extern int xpc_exiting; +extern int xpc_nasid_mask_nlongs; +extern struct xpc_rsvd_page *xpc_rsvd_page; +extern unsigned long *xpc_mach_nasids; +extern struct xpc_partition *xpc_partitions; +extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **); +extern int xpc_setup_rsvd_page(void); +extern void xpc_teardown_rsvd_page(void); +extern int xpc_identify_activate_IRQ_sender(void); +extern int xpc_partition_disengaged(struct xpc_partition *); +extern enum xp_retval xpc_mark_partition_active(struct xpc_partition *); +extern void xpc_mark_partition_inactive(struct xpc_partition *); +extern void xpc_discovery(void); +extern enum xp_retval xpc_get_remote_rp(int, unsigned long *, + struct xpc_rsvd_page *, + unsigned long *); +extern void xpc_deactivate_partition(const int, struct xpc_partition *, + enum xp_retval); +extern enum xp_retval xpc_initiate_partid_to_nasids(short, void *); + +/* found in xpc_channel.c */ +extern void xpc_initiate_connect(int); +extern void xpc_initiate_disconnect(int); +extern enum xp_retval xpc_allocate_msg_wait(struct xpc_channel *); +extern enum xp_retval xpc_initiate_send(short, int, u32, void *, u16); +extern enum xp_retval xpc_initiate_send_notify(short, int, u32, void *, u16, + xpc_notify_func, void *); +extern void xpc_initiate_received(short, int, void *); +extern void xpc_process_sent_chctl_flags(struct xpc_partition *); +extern void xpc_connected_callout(struct xpc_channel *); +extern void xpc_deliver_payload(struct xpc_channel *); +extern void xpc_disconnect_channel(const int, struct xpc_channel *, + enum xp_retval, unsigned long *); +extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval); +extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval); + +static inline void +xpc_wakeup_channel_mgr(struct xpc_partition *part) +{ + if (atomic_inc_return(&part->channel_mgr_requests) == 1) + wake_up(&part->channel_mgr_wq); +} + +/* + * These next two inlines are used to keep us from tearing down a channel's + * msg queues while a thread may be referencing them. + */ +static inline void +xpc_msgqueue_ref(struct xpc_channel *ch) +{ + atomic_inc(&ch->references); +} + +static inline void +xpc_msgqueue_deref(struct xpc_channel *ch) +{ + s32 refs = atomic_dec_return(&ch->references); + + DBUG_ON(refs < 0); + if (refs == 0) + xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]); +} + +#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \ + xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs) + +/* + * These two inlines are used to keep us from tearing down a partition's + * setup infrastructure while a thread may be referencing it. + */ +static inline void +xpc_part_deref(struct xpc_partition *part) +{ + s32 refs = atomic_dec_return(&part->references); + + DBUG_ON(refs < 0); + if (refs == 0 && part->setup_state == XPC_P_SS_WTEARDOWN) + wake_up(&part->teardown_wq); +} + +static inline int +xpc_part_ref(struct xpc_partition *part) +{ + int setup; + + atomic_inc(&part->references); + setup = (part->setup_state == XPC_P_SS_SETUP); + if (!setup) + xpc_part_deref(part); + + return setup; +} + +/* + * The following macro is to be used for the setting of the reason and + * reason_line fields in both the struct xpc_channel and struct xpc_partition + * structures. + */ +#define XPC_SET_REASON(_p, _reason, _line) \ + { \ + (_p)->reason = _reason; \ + (_p)->reason_line = _line; \ + } + +#endif /* _DRIVERS_MISC_SGIXP_XPC_H */ diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c new file mode 100644 index 000000000..05a890ce2 --- /dev/null +++ b/drivers/misc/sgi-xp/xpc_channel.c @@ -0,0 +1,1011 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) channel support. + * + * This is the part of XPC that manages the channels and + * sends/receives messages across them to/from other partitions. + * + */ + +#include +#include "xpc.h" + +/* + * Process a connect message from a remote partition. + * + * Note: xpc_process_connect() is expecting to be called with the + * spin_lock_irqsave held and will leave it locked upon return. + */ +static void +xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags) +{ + enum xp_retval ret; + + DBUG_ON(!spin_is_locked(&ch->lock)); + + if (!(ch->flags & XPC_C_OPENREQUEST) || + !(ch->flags & XPC_C_ROPENREQUEST)) { + /* nothing more to do for now */ + return; + } + DBUG_ON(!(ch->flags & XPC_C_CONNECTING)); + + if (!(ch->flags & XPC_C_SETUP)) { + spin_unlock_irqrestore(&ch->lock, *irq_flags); + ret = xpc_arch_ops.setup_msg_structures(ch); + spin_lock_irqsave(&ch->lock, *irq_flags); + + if (ret != xpSuccess) + XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags); + else + ch->flags |= XPC_C_SETUP; + + if (ch->flags & XPC_C_DISCONNECTING) + return; + } + + if (!(ch->flags & XPC_C_OPENREPLY)) { + ch->flags |= XPC_C_OPENREPLY; + xpc_arch_ops.send_chctl_openreply(ch, irq_flags); + } + + if (!(ch->flags & XPC_C_ROPENREPLY)) + return; + + if (!(ch->flags & XPC_C_OPENCOMPLETE)) { + ch->flags |= (XPC_C_OPENCOMPLETE | XPC_C_CONNECTED); + xpc_arch_ops.send_chctl_opencomplete(ch, irq_flags); + } + + if (!(ch->flags & XPC_C_ROPENCOMPLETE)) + return; + + dev_info(xpc_chan, "channel %d to partition %d connected\n", + ch->number, ch->partid); + + ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP); /* clear all else */ +} + +/* + * spin_lock_irqsave() is expected to be held on entry. + */ +static void +xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED); + + DBUG_ON(!spin_is_locked(&ch->lock)); + + if (!(ch->flags & XPC_C_DISCONNECTING)) + return; + + DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); + + /* make sure all activity has settled down first */ + + if (atomic_read(&ch->kthreads_assigned) > 0 || + atomic_read(&ch->references) > 0) { + return; + } + DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE)); + + if (part->act_state == XPC_P_AS_DEACTIVATING) { + /* can't proceed until the other side disengages from us */ + if (xpc_arch_ops.partition_engaged(ch->partid)) + return; + + } else { + + /* as long as the other side is up do the full protocol */ + + if (!(ch->flags & XPC_C_RCLOSEREQUEST)) + return; + + if (!(ch->flags & XPC_C_CLOSEREPLY)) { + ch->flags |= XPC_C_CLOSEREPLY; + xpc_arch_ops.send_chctl_closereply(ch, irq_flags); + } + + if (!(ch->flags & XPC_C_RCLOSEREPLY)) + return; + } + + /* wake those waiting for notify completion */ + if (atomic_read(&ch->n_to_notify) > 0) { + /* we do callout while holding ch->lock, callout can't block */ + xpc_arch_ops.notify_senders_of_disconnect(ch); + } + + /* both sides are disconnected now */ + + if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) { + spin_unlock_irqrestore(&ch->lock, *irq_flags); + xpc_disconnect_callout(ch, xpDisconnected); + spin_lock_irqsave(&ch->lock, *irq_flags); + } + + DBUG_ON(atomic_read(&ch->n_to_notify) != 0); + + /* it's now safe to free the channel's message queues */ + xpc_arch_ops.teardown_msg_structures(ch); + + ch->func = NULL; + ch->key = NULL; + ch->entry_size = 0; + ch->local_nentries = 0; + ch->remote_nentries = 0; + ch->kthreads_assigned_limit = 0; + ch->kthreads_idle_limit = 0; + + /* + * Mark the channel disconnected and clear all other flags, including + * XPC_C_SETUP (because of call to + * xpc_arch_ops.teardown_msg_structures()) but not including + * XPC_C_WDISCONNECT (if it was set). + */ + ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT)); + + atomic_dec(&part->nchannels_active); + + if (channel_was_connected) { + dev_info(xpc_chan, "channel %d to partition %d disconnected, " + "reason=%d\n", ch->number, ch->partid, ch->reason); + } + + if (ch->flags & XPC_C_WDISCONNECT) { + /* we won't lose the CPU since we're holding ch->lock */ + complete(&ch->wdisconnect_wait); + } else if (ch->delayed_chctl_flags) { + if (part->act_state != XPC_P_AS_DEACTIVATING) { + /* time to take action on any delayed chctl flags */ + spin_lock(&part->chctl_lock); + part->chctl.flags[ch->number] |= + ch->delayed_chctl_flags; + spin_unlock(&part->chctl_lock); + } + ch->delayed_chctl_flags = 0; + } +} + +/* + * Process a change in the channel's remote connection state. + */ +static void +xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, + u8 chctl_flags) +{ + unsigned long irq_flags; + struct xpc_openclose_args *args = + &part->remote_openclose_args[ch_number]; + struct xpc_channel *ch = &part->channels[ch_number]; + enum xp_retval reason; + enum xp_retval ret; + int create_kthread = 0; + + spin_lock_irqsave(&ch->lock, irq_flags); + +again: + + if ((ch->flags & XPC_C_DISCONNECTED) && + (ch->flags & XPC_C_WDISCONNECT)) { + /* + * Delay processing chctl flags until thread waiting disconnect + * has had a chance to see that the channel is disconnected. + */ + ch->delayed_chctl_flags |= chctl_flags; + goto out; + } + + if (chctl_flags & XPC_CHCTL_CLOSEREQUEST) { + + dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREQUEST (reason=%d) received " + "from partid=%d, channel=%d\n", args->reason, + ch->partid, ch->number); + + /* + * If RCLOSEREQUEST is set, we're probably waiting for + * RCLOSEREPLY. We should find it and a ROPENREQUEST packed + * with this RCLOSEREQUEST in the chctl_flags. + */ + + if (ch->flags & XPC_C_RCLOSEREQUEST) { + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING)); + DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); + DBUG_ON(!(ch->flags & XPC_C_CLOSEREPLY)); + DBUG_ON(ch->flags & XPC_C_RCLOSEREPLY); + + DBUG_ON(!(chctl_flags & XPC_CHCTL_CLOSEREPLY)); + chctl_flags &= ~XPC_CHCTL_CLOSEREPLY; + ch->flags |= XPC_C_RCLOSEREPLY; + + /* both sides have finished disconnecting */ + xpc_process_disconnect(ch, &irq_flags); + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED)); + goto again; + } + + if (ch->flags & XPC_C_DISCONNECTED) { + if (!(chctl_flags & XPC_CHCTL_OPENREQUEST)) { + if (part->chctl.flags[ch_number] & + XPC_CHCTL_OPENREQUEST) { + + DBUG_ON(ch->delayed_chctl_flags != 0); + spin_lock(&part->chctl_lock); + part->chctl.flags[ch_number] |= + XPC_CHCTL_CLOSEREQUEST; + spin_unlock(&part->chctl_lock); + } + goto out; + } + + XPC_SET_REASON(ch, 0, 0); + ch->flags &= ~XPC_C_DISCONNECTED; + + atomic_inc(&part->nchannels_active); + ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST); + } + + chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY | + XPC_CHCTL_OPENCOMPLETE); + + /* + * The meaningful CLOSEREQUEST connection state fields are: + * reason = reason connection is to be closed + */ + + ch->flags |= XPC_C_RCLOSEREQUEST; + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + reason = args->reason; + if (reason <= xpSuccess || reason > xpUnknownReason) + reason = xpUnknownReason; + else if (reason == xpUnregistering) + reason = xpOtherUnregistering; + + XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); + + DBUG_ON(chctl_flags & XPC_CHCTL_CLOSEREPLY); + goto out; + } + + xpc_process_disconnect(ch, &irq_flags); + } + + if (chctl_flags & XPC_CHCTL_CLOSEREPLY) { + + dev_dbg(xpc_chan, "XPC_CHCTL_CLOSEREPLY received from partid=" + "%d, channel=%d\n", ch->partid, ch->number); + + if (ch->flags & XPC_C_DISCONNECTED) { + DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING); + goto out; + } + + DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); + + if (!(ch->flags & XPC_C_RCLOSEREQUEST)) { + if (part->chctl.flags[ch_number] & + XPC_CHCTL_CLOSEREQUEST) { + + DBUG_ON(ch->delayed_chctl_flags != 0); + spin_lock(&part->chctl_lock); + part->chctl.flags[ch_number] |= + XPC_CHCTL_CLOSEREPLY; + spin_unlock(&part->chctl_lock); + } + goto out; + } + + ch->flags |= XPC_C_RCLOSEREPLY; + + if (ch->flags & XPC_C_CLOSEREPLY) { + /* both sides have finished disconnecting */ + xpc_process_disconnect(ch, &irq_flags); + } + } + + if (chctl_flags & XPC_CHCTL_OPENREQUEST) { + + dev_dbg(xpc_chan, "XPC_CHCTL_OPENREQUEST (entry_size=%d, " + "local_nentries=%d) received from partid=%d, " + "channel=%d\n", args->entry_size, args->local_nentries, + ch->partid, ch->number); + + if (part->act_state == XPC_P_AS_DEACTIVATING || + (ch->flags & XPC_C_ROPENREQUEST)) { + goto out; + } + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) { + ch->delayed_chctl_flags |= XPC_CHCTL_OPENREQUEST; + goto out; + } + DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED | + XPC_C_OPENREQUEST))); + DBUG_ON(ch->flags & (XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | + XPC_C_OPENREPLY | XPC_C_CONNECTED)); + + /* + * The meaningful OPENREQUEST connection state fields are: + * entry_size = size of channel's messages in bytes + * local_nentries = remote partition's local_nentries + */ + if (args->entry_size == 0 || args->local_nentries == 0) { + /* assume OPENREQUEST was delayed by mistake */ + goto out; + } + + ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING); + ch->remote_nentries = args->local_nentries; + + if (ch->flags & XPC_C_OPENREQUEST) { + if (args->entry_size != ch->entry_size) { + XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes, + &irq_flags); + goto out; + } + } else { + ch->entry_size = args->entry_size; + + XPC_SET_REASON(ch, 0, 0); + ch->flags &= ~XPC_C_DISCONNECTED; + + atomic_inc(&part->nchannels_active); + } + + xpc_process_connect(ch, &irq_flags); + } + + if (chctl_flags & XPC_CHCTL_OPENREPLY) { + + dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY (local_msgqueue_pa=" + "0x%lx, local_nentries=%d, remote_nentries=%d) " + "received from partid=%d, channel=%d\n", + args->local_msgqueue_pa, args->local_nentries, + args->remote_nentries, ch->partid, ch->number); + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) + goto out; + + if (!(ch->flags & XPC_C_OPENREQUEST)) { + XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError, + &irq_flags); + goto out; + } + + DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST)); + DBUG_ON(ch->flags & XPC_C_CONNECTED); + + /* + * The meaningful OPENREPLY connection state fields are: + * local_msgqueue_pa = physical address of remote + * partition's local_msgqueue + * local_nentries = remote partition's local_nentries + * remote_nentries = remote partition's remote_nentries + */ + DBUG_ON(args->local_msgqueue_pa == 0); + DBUG_ON(args->local_nentries == 0); + DBUG_ON(args->remote_nentries == 0); + + ret = xpc_arch_ops.save_remote_msgqueue_pa(ch, + args->local_msgqueue_pa); + if (ret != xpSuccess) { + XPC_DISCONNECT_CHANNEL(ch, ret, &irq_flags); + goto out; + } + ch->flags |= XPC_C_ROPENREPLY; + + if (args->local_nentries < ch->remote_nentries) { + dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new " + "remote_nentries=%d, old remote_nentries=%d, " + "partid=%d, channel=%d\n", + args->local_nentries, ch->remote_nentries, + ch->partid, ch->number); + + ch->remote_nentries = args->local_nentries; + } + if (args->remote_nentries < ch->local_nentries) { + dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new " + "local_nentries=%d, old local_nentries=%d, " + "partid=%d, channel=%d\n", + args->remote_nentries, ch->local_nentries, + ch->partid, ch->number); + + ch->local_nentries = args->remote_nentries; + } + + xpc_process_connect(ch, &irq_flags); + } + + if (chctl_flags & XPC_CHCTL_OPENCOMPLETE) { + + dev_dbg(xpc_chan, "XPC_CHCTL_OPENCOMPLETE received from " + "partid=%d, channel=%d\n", ch->partid, ch->number); + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) + goto out; + + if (!(ch->flags & XPC_C_OPENREQUEST) || + !(ch->flags & XPC_C_OPENREPLY)) { + XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError, + &irq_flags); + goto out; + } + + DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST)); + DBUG_ON(!(ch->flags & XPC_C_ROPENREPLY)); + DBUG_ON(!(ch->flags & XPC_C_CONNECTED)); + + ch->flags |= XPC_C_ROPENCOMPLETE; + + xpc_process_connect(ch, &irq_flags); + create_kthread = 1; + } + +out: + spin_unlock_irqrestore(&ch->lock, irq_flags); + + if (create_kthread) + xpc_create_kthreads(ch, 1, 0); +} + +/* + * Attempt to establish a channel connection to a remote partition. + */ +static enum xp_retval +xpc_connect_channel(struct xpc_channel *ch) +{ + unsigned long irq_flags; + struct xpc_registration *registration = &xpc_registrations[ch->number]; + + if (mutex_trylock(®istration->mutex) == 0) + return xpRetry; + + if (!XPC_CHANNEL_REGISTERED(ch->number)) { + mutex_unlock(®istration->mutex); + return xpUnregistered; + } + + spin_lock_irqsave(&ch->lock, irq_flags); + + DBUG_ON(ch->flags & XPC_C_CONNECTED); + DBUG_ON(ch->flags & XPC_C_OPENREQUEST); + + if (ch->flags & XPC_C_DISCONNECTING) { + spin_unlock_irqrestore(&ch->lock, irq_flags); + mutex_unlock(®istration->mutex); + return ch->reason; + } + + /* add info from the channel connect registration to the channel */ + + ch->kthreads_assigned_limit = registration->assigned_limit; + ch->kthreads_idle_limit = registration->idle_limit; + DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); + DBUG_ON(atomic_read(&ch->kthreads_idle) != 0); + DBUG_ON(atomic_read(&ch->kthreads_active) != 0); + + ch->func = registration->func; + DBUG_ON(registration->func == NULL); + ch->key = registration->key; + + ch->local_nentries = registration->nentries; + + if (ch->flags & XPC_C_ROPENREQUEST) { + if (registration->entry_size != ch->entry_size) { + /* the local and remote sides aren't the same */ + + /* + * Because XPC_DISCONNECT_CHANNEL() can block we're + * forced to up the registration sema before we unlock + * the channel lock. But that's okay here because we're + * done with the part that required the registration + * sema. XPC_DISCONNECT_CHANNEL() requires that the + * channel lock be locked and will unlock and relock + * the channel lock as needed. + */ + mutex_unlock(®istration->mutex); + XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes, + &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpUnequalMsgSizes; + } + } else { + ch->entry_size = registration->entry_size; + + XPC_SET_REASON(ch, 0, 0); + ch->flags &= ~XPC_C_DISCONNECTED; + + atomic_inc(&xpc_partitions[ch->partid].nchannels_active); + } + + mutex_unlock(®istration->mutex); + + /* initiate the connection */ + + ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING); + xpc_arch_ops.send_chctl_openrequest(ch, &irq_flags); + + xpc_process_connect(ch, &irq_flags); + + spin_unlock_irqrestore(&ch->lock, irq_flags); + + return xpSuccess; +} + +void +xpc_process_sent_chctl_flags(struct xpc_partition *part) +{ + unsigned long irq_flags; + union xpc_channel_ctl_flags chctl; + struct xpc_channel *ch; + int ch_number; + u32 ch_flags; + + chctl.all_flags = xpc_arch_ops.get_chctl_all_flags(part); + + /* + * Initiate channel connections for registered channels. + * + * For each connected channel that has pending messages activate idle + * kthreads and/or create new kthreads as needed. + */ + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + /* + * Process any open or close related chctl flags, and then deal + * with connecting or disconnecting the channel as required. + */ + + if (chctl.flags[ch_number] & XPC_OPENCLOSE_CHCTL_FLAGS) { + xpc_process_openclose_chctl_flags(part, ch_number, + chctl.flags[ch_number]); + } + + ch_flags = ch->flags; /* need an atomic snapshot of flags */ + + if (ch_flags & XPC_C_DISCONNECTING) { + spin_lock_irqsave(&ch->lock, irq_flags); + xpc_process_disconnect(ch, &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + continue; + } + + if (part->act_state == XPC_P_AS_DEACTIVATING) + continue; + + if (!(ch_flags & XPC_C_CONNECTED)) { + if (!(ch_flags & XPC_C_OPENREQUEST)) { + DBUG_ON(ch_flags & XPC_C_SETUP); + (void)xpc_connect_channel(ch); + } + continue; + } + + /* + * Process any message related chctl flags, this may involve + * the activation of kthreads to deliver any pending messages + * sent from the other partition. + */ + + if (chctl.flags[ch_number] & XPC_MSG_CHCTL_FLAGS) + xpc_arch_ops.process_msg_chctl_flags(part, ch_number); + } +} + +/* + * XPC's heartbeat code calls this function to inform XPC that a partition is + * going down. XPC responds by tearing down the XPartition Communication + * infrastructure used for the just downed partition. + * + * XPC's heartbeat code will never call this function and xpc_partition_up() + * at the same time. Nor will it ever make multiple calls to either function + * at the same time. + */ +void +xpc_partition_going_down(struct xpc_partition *part, enum xp_retval reason) +{ + unsigned long irq_flags; + int ch_number; + struct xpc_channel *ch; + + dev_dbg(xpc_chan, "deactivating partition %d, reason=%d\n", + XPC_PARTID(part), reason); + + if (!xpc_part_ref(part)) { + /* infrastructure for this partition isn't currently set up */ + return; + } + + /* disconnect channels associated with the partition going down */ + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + xpc_msgqueue_ref(ch); + spin_lock_irqsave(&ch->lock, irq_flags); + + XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); + + spin_unlock_irqrestore(&ch->lock, irq_flags); + xpc_msgqueue_deref(ch); + } + + xpc_wakeup_channel_mgr(part); + + xpc_part_deref(part); +} + +/* + * Called by XP at the time of channel connection registration to cause + * XPC to establish connections to all currently active partitions. + */ +void +xpc_initiate_connect(int ch_number) +{ + short partid; + struct xpc_partition *part; + + DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS); + + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_part_ref(part)) { + /* + * Initiate the establishment of a connection on the + * newly registered channel to the remote partition. + */ + xpc_wakeup_channel_mgr(part); + xpc_part_deref(part); + } + } +} + +void +xpc_connected_callout(struct xpc_channel *ch) +{ + /* let the registerer know that a connection has been established */ + + if (ch->func != NULL) { + dev_dbg(xpc_chan, "ch->func() called, reason=xpConnected, " + "partid=%d, channel=%d\n", ch->partid, ch->number); + + ch->func(xpConnected, ch->partid, ch->number, + (void *)(u64)ch->local_nentries, ch->key); + + dev_dbg(xpc_chan, "ch->func() returned, reason=xpConnected, " + "partid=%d, channel=%d\n", ch->partid, ch->number); + } +} + +/* + * Called by XP at the time of channel connection unregistration to cause + * XPC to teardown all current connections for the specified channel. + * + * Before returning xpc_initiate_disconnect() will wait until all connections + * on the specified channel have been closed/torndown. So the caller can be + * assured that they will not be receiving any more callouts from XPC to the + * function they registered via xpc_connect(). + * + * Arguments: + * + * ch_number - channel # to unregister. + */ +void +xpc_initiate_disconnect(int ch_number) +{ + unsigned long irq_flags; + short partid; + struct xpc_partition *part; + struct xpc_channel *ch; + + DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS); + + /* initiate the channel disconnect for every active partition */ + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_part_ref(part)) { + ch = &part->channels[ch_number]; + xpc_msgqueue_ref(ch); + + spin_lock_irqsave(&ch->lock, irq_flags); + + if (!(ch->flags & XPC_C_DISCONNECTED)) { + ch->flags |= XPC_C_WDISCONNECT; + + XPC_DISCONNECT_CHANNEL(ch, xpUnregistering, + &irq_flags); + } + + spin_unlock_irqrestore(&ch->lock, irq_flags); + + xpc_msgqueue_deref(ch); + xpc_part_deref(part); + } + } + + xpc_disconnect_wait(ch_number); +} + +/* + * To disconnect a channel, and reflect it back to all who may be waiting. + * + * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by + * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by + * xpc_disconnect_wait(). + * + * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN. + */ +void +xpc_disconnect_channel(const int line, struct xpc_channel *ch, + enum xp_retval reason, unsigned long *irq_flags) +{ + u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED); + + DBUG_ON(!spin_is_locked(&ch->lock)); + + if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) + return; + + DBUG_ON(!(ch->flags & (XPC_C_CONNECTING | XPC_C_CONNECTED))); + + dev_dbg(xpc_chan, "reason=%d, line=%d, partid=%d, channel=%d\n", + reason, line, ch->partid, ch->number); + + XPC_SET_REASON(ch, reason, line); + + ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING); + /* some of these may not have been set */ + ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY | + XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | + XPC_C_CONNECTING | XPC_C_CONNECTED); + + xpc_arch_ops.send_chctl_closerequest(ch, irq_flags); + + if (channel_was_connected) + ch->flags |= XPC_C_WASCONNECTED; + + spin_unlock_irqrestore(&ch->lock, *irq_flags); + + /* wake all idle kthreads so they can exit */ + if (atomic_read(&ch->kthreads_idle) > 0) { + wake_up_all(&ch->idle_wq); + + } else if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { + /* start a kthread that will do the xpDisconnecting callout */ + xpc_create_kthreads(ch, 1, 1); + } + + /* wake those waiting to allocate an entry from the local msg queue */ + if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) + wake_up(&ch->msg_allocate_wq); + + spin_lock_irqsave(&ch->lock, *irq_flags); +} + +void +xpc_disconnect_callout(struct xpc_channel *ch, enum xp_retval reason) +{ + /* + * Let the channel's registerer know that the channel is being + * disconnected. We don't want to do this if the registerer was never + * informed of a connection being made. + */ + + if (ch->func != NULL) { + dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, " + "channel=%d\n", reason, ch->partid, ch->number); + + ch->func(reason, ch->partid, ch->number, NULL, ch->key); + + dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, " + "channel=%d\n", reason, ch->partid, ch->number); + } +} + +/* + * Wait for a message entry to become available for the specified channel, + * but don't wait any longer than 1 jiffy. + */ +enum xp_retval +xpc_allocate_msg_wait(struct xpc_channel *ch) +{ + enum xp_retval ret; + DEFINE_WAIT(wait); + + if (ch->flags & XPC_C_DISCONNECTING) { + DBUG_ON(ch->reason == xpInterrupted); + return ch->reason; + } + + atomic_inc(&ch->n_on_msg_allocate_wq); + prepare_to_wait(&ch->msg_allocate_wq, &wait, TASK_INTERRUPTIBLE); + ret = schedule_timeout(1); + finish_wait(&ch->msg_allocate_wq, &wait); + atomic_dec(&ch->n_on_msg_allocate_wq); + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + DBUG_ON(ch->reason == xpInterrupted); + } else if (ret == 0) { + ret = xpTimeout; + } else { + ret = xpInterrupted; + } + + return ret; +} + +/* + * Send a message that contains the user's payload on the specified channel + * connected to the specified partition. + * + * NOTE that this routine can sleep waiting for a message entry to become + * available. To not sleep, pass in the XPC_NOWAIT flag. + * + * Once sent, this routine will not wait for the message to be received, nor + * will notification be given when it does happen. + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel # to send message on. + * flags - see xp.h for valid flags. + * payload - pointer to the payload which is to be sent. + * payload_size - size of the payload in bytes. + */ +enum xp_retval +xpc_initiate_send(short partid, int ch_number, u32 flags, void *payload, + u16 payload_size) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + enum xp_retval ret = xpUnknownReason; + + dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload, + partid, ch_number); + + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + DBUG_ON(payload == NULL); + + if (xpc_part_ref(part)) { + ret = xpc_arch_ops.send_payload(&part->channels[ch_number], + flags, payload, payload_size, 0, NULL, NULL); + xpc_part_deref(part); + } + + return ret; +} + +/* + * Send a message that contains the user's payload on the specified channel + * connected to the specified partition. + * + * NOTE that this routine can sleep waiting for a message entry to become + * available. To not sleep, pass in the XPC_NOWAIT flag. + * + * This routine will not wait for the message to be sent or received. + * + * Once the remote end of the channel has received the message, the function + * passed as an argument to xpc_initiate_send_notify() will be called. This + * allows the sender to free up or re-use any buffers referenced by the + * message, but does NOT mean the message has been processed at the remote + * end by a receiver. + * + * If this routine returns an error, the caller's function will NOT be called. + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel # to send message on. + * flags - see xp.h for valid flags. + * payload - pointer to the payload which is to be sent. + * payload_size - size of the payload in bytes. + * func - function to call with asynchronous notification of message + * receipt. THIS FUNCTION MUST BE NON-BLOCKING. + * key - user-defined key to be passed to the function when it's called. + */ +enum xp_retval +xpc_initiate_send_notify(short partid, int ch_number, u32 flags, void *payload, + u16 payload_size, xpc_notify_func func, void *key) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + enum xp_retval ret = xpUnknownReason; + + dev_dbg(xpc_chan, "payload=0x%p, partid=%d, channel=%d\n", payload, + partid, ch_number); + + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + DBUG_ON(payload == NULL); + DBUG_ON(func == NULL); + + if (xpc_part_ref(part)) { + ret = xpc_arch_ops.send_payload(&part->channels[ch_number], + flags, payload, payload_size, XPC_N_CALL, func, key); + xpc_part_deref(part); + } + return ret; +} + +/* + * Deliver a message's payload to its intended recipient. + */ +void +xpc_deliver_payload(struct xpc_channel *ch) +{ + void *payload; + + payload = xpc_arch_ops.get_deliverable_payload(ch); + if (payload != NULL) { + + /* + * This ref is taken to protect the payload itself from being + * freed before the user is finished with it, which the user + * indicates by calling xpc_initiate_received(). + */ + xpc_msgqueue_ref(ch); + + atomic_inc(&ch->kthreads_active); + + if (ch->func != NULL) { + dev_dbg(xpc_chan, "ch->func() called, payload=0x%p " + "partid=%d channel=%d\n", payload, ch->partid, + ch->number); + + /* deliver the message to its intended recipient */ + ch->func(xpMsgReceived, ch->partid, ch->number, payload, + ch->key); + + dev_dbg(xpc_chan, "ch->func() returned, payload=0x%p " + "partid=%d channel=%d\n", payload, ch->partid, + ch->number); + } + + atomic_dec(&ch->kthreads_active); + } +} + +/* + * Acknowledge receipt of a delivered message's payload. + * + * This function, although called by users, does not call xpc_part_ref() to + * ensure that the partition infrastructure is in place. It relies on the + * fact that we called xpc_msgqueue_ref() in xpc_deliver_payload(). + * + * Arguments: + * + * partid - ID of partition to which the channel is connected. + * ch_number - channel # message received on. + * payload - pointer to the payload area allocated via + * xpc_initiate_send() or xpc_initiate_send_notify(). + */ +void +xpc_initiate_received(short partid, int ch_number, void *payload) +{ + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_channel *ch; + + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); + DBUG_ON(ch_number < 0 || ch_number >= part->nchannels); + + ch = &part->channels[ch_number]; + xpc_arch_ops.received_payload(ch, payload); + + /* the call to xpc_msgqueue_ref() was done by xpc_deliver_payload() */ + xpc_msgqueue_deref(ch); +} diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c new file mode 100644 index 000000000..83fc748a9 --- /dev/null +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -0,0 +1,1373 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) support - standard version. + * + * XPC provides a message passing capability that crosses partition + * boundaries. This module is made up of two parts: + * + * partition This part detects the presence/absence of other + * partitions. It provides a heartbeat and monitors + * the heartbeats of other partitions. + * + * channel This part manages the channels and sends/receives + * messages across them to/from other partitions. + * + * There are a couple of additional functions residing in XP, which + * provide an interface to XPC for its users. + * + * + * Caveats: + * + * . Currently on sn2, we have no way to determine which nasid an IRQ + * came from. Thus, xpc_send_IRQ_sn2() does a remote amo write + * followed by an IPI. The amo indicates where data is to be pulled + * from, so after the IPI arrives, the remote partition checks the amo + * word. The IPI can actually arrive before the amo however, so other + * code must periodically check for this case. Also, remote amo + * operations do not reliably time out. Thus we do a remote PIO read + * solely to know whether the remote partition is down and whether we + * should stop sending IPIs to it. This remote PIO read operation is + * set up in a special nofault region so SAL knows to ignore (and + * cleanup) any errors due to the remote amo write, PIO read, and/or + * PIO write operations. + * + * If/when new hardware solves this IPI problem, we should abandon + * the current approach. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "xpc.h" + +#ifdef CONFIG_X86_64 +#include +#endif + +/* define two XPC debug device structures to be used with dev_dbg() et al */ + +struct device_driver xpc_dbg_name = { + .name = "xpc" +}; + +struct device xpc_part_dbg_subname = { + .init_name = "", /* set to "part" at xpc_init() time */ + .driver = &xpc_dbg_name +}; + +struct device xpc_chan_dbg_subname = { + .init_name = "", /* set to "chan" at xpc_init() time */ + .driver = &xpc_dbg_name +}; + +struct device *xpc_part = &xpc_part_dbg_subname; +struct device *xpc_chan = &xpc_chan_dbg_subname; + +static int xpc_kdebug_ignore; + +/* systune related variables for /proc/sys directories */ + +static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL; +static int xpc_hb_min_interval = 1; +static int xpc_hb_max_interval = 10; + +static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL; +static int xpc_hb_check_min_interval = 10; +static int xpc_hb_check_max_interval = 120; + +int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT; +static int xpc_disengage_min_timelimit; /* = 0 */ +static int xpc_disengage_max_timelimit = 120; + +static struct ctl_table xpc_sys_xpc_hb_dir[] = { + { + .procname = "hb_interval", + .data = &xpc_hb_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xpc_hb_min_interval, + .extra2 = &xpc_hb_max_interval}, + { + .procname = "hb_check_interval", + .data = &xpc_hb_check_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xpc_hb_check_min_interval, + .extra2 = &xpc_hb_check_max_interval}, + {} +}; +static struct ctl_table xpc_sys_xpc_dir[] = { + { + .procname = "hb", + .mode = 0555, + .child = xpc_sys_xpc_hb_dir}, + { + .procname = "disengage_timelimit", + .data = &xpc_disengage_timelimit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xpc_disengage_min_timelimit, + .extra2 = &xpc_disengage_max_timelimit}, + {} +}; +static struct ctl_table xpc_sys_dir[] = { + { + .procname = "xpc", + .mode = 0555, + .child = xpc_sys_xpc_dir}, + {} +}; +static struct ctl_table_header *xpc_sysctl; + +/* non-zero if any remote partition disengage was timed out */ +int xpc_disengage_timedout; + +/* #of activate IRQs received and not yet processed */ +int xpc_activate_IRQ_rcvd; +DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock); + +/* IRQ handler notifies this wait queue on receipt of an IRQ */ +DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq); + +static unsigned long xpc_hb_check_timeout; +static struct timer_list xpc_hb_timer; + +/* notification that the xpc_hb_checker thread has exited */ +static DECLARE_COMPLETION(xpc_hb_checker_exited); + +/* notification that the xpc_discovery thread has exited */ +static DECLARE_COMPLETION(xpc_discovery_exited); + +static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *); + +static int xpc_system_reboot(struct notifier_block *, unsigned long, void *); +static struct notifier_block xpc_reboot_notifier = { + .notifier_call = xpc_system_reboot, +}; + +static int xpc_system_die(struct notifier_block *, unsigned long, void *); +static struct notifier_block xpc_die_notifier = { + .notifier_call = xpc_system_die, +}; + +struct xpc_arch_operations xpc_arch_ops; + +/* + * Timer function to enforce the timelimit on the partition disengage. + */ +static void +xpc_timeout_partition_disengage(struct timer_list *t) +{ + struct xpc_partition *part = from_timer(part, t, disengage_timer); + + DBUG_ON(time_is_after_jiffies(part->disengage_timeout)); + + (void)xpc_partition_disengaged(part); + + DBUG_ON(part->disengage_timeout != 0); + DBUG_ON(xpc_arch_ops.partition_engaged(XPC_PARTID(part))); +} + +/* + * Timer to produce the heartbeat. The timer structures function is + * already set when this is initially called. A tunable is used to + * specify when the next timeout should occur. + */ +static void +xpc_hb_beater(struct timer_list *unused) +{ + xpc_arch_ops.increment_heartbeat(); + + if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) + wake_up_interruptible(&xpc_activate_IRQ_wq); + + xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ); + add_timer(&xpc_hb_timer); +} + +static void +xpc_start_hb_beater(void) +{ + xpc_arch_ops.heartbeat_init(); + timer_setup(&xpc_hb_timer, xpc_hb_beater, 0); + xpc_hb_beater(0); +} + +static void +xpc_stop_hb_beater(void) +{ + del_timer_sync(&xpc_hb_timer); + xpc_arch_ops.heartbeat_exit(); +} + +/* + * At periodic intervals, scan through all active partitions and ensure + * their heartbeat is still active. If not, the partition is deactivated. + */ +static void +xpc_check_remote_hb(void) +{ + struct xpc_partition *part; + short partid; + enum xp_retval ret; + + for (partid = 0; partid < xp_max_npartitions; partid++) { + + if (xpc_exiting) + break; + + if (partid == xp_partition_id) + continue; + + part = &xpc_partitions[partid]; + + if (part->act_state == XPC_P_AS_INACTIVE || + part->act_state == XPC_P_AS_DEACTIVATING) { + continue; + } + + ret = xpc_arch_ops.get_remote_heartbeat(part); + if (ret != xpSuccess) + XPC_DEACTIVATE_PARTITION(part, ret); + } +} + +/* + * This thread is responsible for nearly all of the partition + * activation/deactivation. + */ +static int +xpc_hb_checker(void *ignore) +{ + int force_IRQ = 0; + + /* this thread was marked active by xpc_hb_init() */ + + set_cpus_allowed_ptr(current, cpumask_of(XPC_HB_CHECK_CPU)); + + /* set our heartbeating to other partitions into motion */ + xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); + xpc_start_hb_beater(); + + while (!xpc_exiting) { + + dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have " + "been received\n", + (int)(xpc_hb_check_timeout - jiffies), + xpc_activate_IRQ_rcvd); + + /* checking of remote heartbeats is skewed by IRQ handling */ + if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) { + xpc_hb_check_timeout = jiffies + + (xpc_hb_check_interval * HZ); + + dev_dbg(xpc_part, "checking remote heartbeats\n"); + xpc_check_remote_hb(); + + /* + * On sn2 we need to periodically recheck to ensure no + * IRQ/amo pairs have been missed. + */ + if (is_shub()) + force_IRQ = 1; + } + + /* check for outstanding IRQs */ + if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) { + force_IRQ = 0; + dev_dbg(xpc_part, "processing activate IRQs " + "received\n"); + xpc_arch_ops.process_activate_IRQ_rcvd(); + } + + /* wait for IRQ or timeout */ + (void)wait_event_interruptible(xpc_activate_IRQ_wq, + (time_is_before_eq_jiffies( + xpc_hb_check_timeout) || + xpc_activate_IRQ_rcvd > 0 || + xpc_exiting)); + } + + xpc_stop_hb_beater(); + + dev_dbg(xpc_part, "heartbeat checker is exiting\n"); + + /* mark this thread as having exited */ + complete(&xpc_hb_checker_exited); + return 0; +} + +/* + * This thread will attempt to discover other partitions to activate + * based on info provided by SAL. This new thread is short lived and + * will exit once discovery is complete. + */ +static int +xpc_initiate_discovery(void *ignore) +{ + xpc_discovery(); + + dev_dbg(xpc_part, "discovery thread is exiting\n"); + + /* mark this thread as having exited */ + complete(&xpc_discovery_exited); + return 0; +} + +/* + * The first kthread assigned to a newly activated partition is the one + * created by XPC HB with which it calls xpc_activating(). XPC hangs on to + * that kthread until the partition is brought down, at which time that kthread + * returns back to XPC HB. (The return of that kthread will signify to XPC HB + * that XPC has dismantled all communication infrastructure for the associated + * partition.) This kthread becomes the channel manager for that partition. + * + * Each active partition has a channel manager, who, besides connecting and + * disconnecting channels, will ensure that each of the partition's connected + * channels has the required number of assigned kthreads to get the work done. + */ +static void +xpc_channel_mgr(struct xpc_partition *part) +{ + while (part->act_state != XPC_P_AS_DEACTIVATING || + atomic_read(&part->nchannels_active) > 0 || + !xpc_partition_disengaged(part)) { + + xpc_process_sent_chctl_flags(part); + + /* + * Wait until we've been requested to activate kthreads or + * all of the channel's message queues have been torn down or + * a signal is pending. + * + * The channel_mgr_requests is set to 1 after being awakened, + * This is done to prevent the channel mgr from making one pass + * through the loop for each request, since he will + * be servicing all the requests in one pass. The reason it's + * set to 1 instead of 0 is so that other kthreads will know + * that the channel mgr is running and won't bother trying to + * wake him up. + */ + atomic_dec(&part->channel_mgr_requests); + (void)wait_event_interruptible(part->channel_mgr_wq, + (atomic_read(&part->channel_mgr_requests) > 0 || + part->chctl.all_flags != 0 || + (part->act_state == XPC_P_AS_DEACTIVATING && + atomic_read(&part->nchannels_active) == 0 && + xpc_partition_disengaged(part)))); + atomic_set(&part->channel_mgr_requests, 1); + } +} + +/* + * Guarantee that the kzalloc'd memory is cacheline aligned. + */ +void * +xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) +{ + /* see if kzalloc will give us cachline aligned memory by default */ + *base = kzalloc(size, flags); + if (*base == NULL) + return NULL; + + if ((u64)*base == L1_CACHE_ALIGN((u64)*base)) + return *base; + + kfree(*base); + + /* nope, we'll have to do it ourselves */ + *base = kzalloc(size + L1_CACHE_BYTES, flags); + if (*base == NULL) + return NULL; + + return (void *)L1_CACHE_ALIGN((u64)*base); +} + +/* + * Setup the channel structures necessary to support XPartition Communication + * between the specified remote partition and the local one. + */ +static enum xp_retval +xpc_setup_ch_structures(struct xpc_partition *part) +{ + enum xp_retval ret; + int ch_number; + struct xpc_channel *ch; + short partid = XPC_PARTID(part); + + /* + * Allocate all of the channel structures as a contiguous chunk of + * memory. + */ + DBUG_ON(part->channels != NULL); + part->channels = kcalloc(XPC_MAX_NCHANNELS, + sizeof(struct xpc_channel), + GFP_KERNEL); + if (part->channels == NULL) { + dev_err(xpc_chan, "can't get memory for channels\n"); + return xpNoMemory; + } + + /* allocate the remote open and close args */ + + part->remote_openclose_args = + xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, + GFP_KERNEL, &part-> + remote_openclose_args_base); + if (part->remote_openclose_args == NULL) { + dev_err(xpc_chan, "can't get memory for remote connect args\n"); + ret = xpNoMemory; + goto out_1; + } + + part->chctl.all_flags = 0; + spin_lock_init(&part->chctl_lock); + + atomic_set(&part->channel_mgr_requests, 1); + init_waitqueue_head(&part->channel_mgr_wq); + + part->nchannels = XPC_MAX_NCHANNELS; + + atomic_set(&part->nchannels_active, 0); + atomic_set(&part->nchannels_engaged, 0); + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch = &part->channels[ch_number]; + + ch->partid = partid; + ch->number = ch_number; + ch->flags = XPC_C_DISCONNECTED; + + atomic_set(&ch->kthreads_assigned, 0); + atomic_set(&ch->kthreads_idle, 0); + atomic_set(&ch->kthreads_active, 0); + + atomic_set(&ch->references, 0); + atomic_set(&ch->n_to_notify, 0); + + spin_lock_init(&ch->lock); + init_completion(&ch->wdisconnect_wait); + + atomic_set(&ch->n_on_msg_allocate_wq, 0); + init_waitqueue_head(&ch->msg_allocate_wq); + init_waitqueue_head(&ch->idle_wq); + } + + ret = xpc_arch_ops.setup_ch_structures(part); + if (ret != xpSuccess) + goto out_2; + + /* + * With the setting of the partition setup_state to XPC_P_SS_SETUP, + * we're declaring that this partition is ready to go. + */ + part->setup_state = XPC_P_SS_SETUP; + + return xpSuccess; + + /* setup of ch structures failed */ +out_2: + kfree(part->remote_openclose_args_base); + part->remote_openclose_args = NULL; +out_1: + kfree(part->channels); + part->channels = NULL; + return ret; +} + +/* + * Teardown the channel structures necessary to support XPartition Communication + * between the specified remote partition and the local one. + */ +static void +xpc_teardown_ch_structures(struct xpc_partition *part) +{ + DBUG_ON(atomic_read(&part->nchannels_engaged) != 0); + DBUG_ON(atomic_read(&part->nchannels_active) != 0); + + /* + * Make this partition inaccessible to local processes by marking it + * as no longer setup. Then wait before proceeding with the teardown + * until all existing references cease. + */ + DBUG_ON(part->setup_state != XPC_P_SS_SETUP); + part->setup_state = XPC_P_SS_WTEARDOWN; + + wait_event(part->teardown_wq, (atomic_read(&part->references) == 0)); + + /* now we can begin tearing down the infrastructure */ + + xpc_arch_ops.teardown_ch_structures(part); + + kfree(part->remote_openclose_args_base); + part->remote_openclose_args = NULL; + kfree(part->channels); + part->channels = NULL; + + part->setup_state = XPC_P_SS_TORNDOWN; +} + +/* + * When XPC HB determines that a partition has come up, it will create a new + * kthread and that kthread will call this function to attempt to set up the + * basic infrastructure used for Cross Partition Communication with the newly + * upped partition. + * + * The kthread that was created by XPC HB and which setup the XPC + * infrastructure will remain assigned to the partition becoming the channel + * manager for that partition until the partition is deactivating, at which + * time the kthread will teardown the XPC infrastructure and then exit. + */ +static int +xpc_activating(void *__partid) +{ + short partid = (u64)__partid; + struct xpc_partition *part = &xpc_partitions[partid]; + unsigned long irq_flags; + + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); + + spin_lock_irqsave(&part->act_lock, irq_flags); + + if (part->act_state == XPC_P_AS_DEACTIVATING) { + part->act_state = XPC_P_AS_INACTIVE; + spin_unlock_irqrestore(&part->act_lock, irq_flags); + part->remote_rp_pa = 0; + return 0; + } + + /* indicate the thread is activating */ + DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ); + part->act_state = XPC_P_AS_ACTIVATING; + + XPC_SET_REASON(part, 0, 0); + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + dev_dbg(xpc_part, "activating partition %d\n", partid); + + xpc_arch_ops.allow_hb(partid); + + if (xpc_setup_ch_structures(part) == xpSuccess) { + (void)xpc_part_ref(part); /* this will always succeed */ + + if (xpc_arch_ops.make_first_contact(part) == xpSuccess) { + xpc_mark_partition_active(part); + xpc_channel_mgr(part); + /* won't return until partition is deactivating */ + } + + xpc_part_deref(part); + xpc_teardown_ch_structures(part); + } + + xpc_arch_ops.disallow_hb(partid); + xpc_mark_partition_inactive(part); + + if (part->reason == xpReactivating) { + /* interrupting ourselves results in activating partition */ + xpc_arch_ops.request_partition_reactivation(part); + } + + return 0; +} + +void +xpc_activate_partition(struct xpc_partition *part) +{ + short partid = XPC_PARTID(part); + unsigned long irq_flags; + struct task_struct *kthread; + + spin_lock_irqsave(&part->act_lock, irq_flags); + + DBUG_ON(part->act_state != XPC_P_AS_INACTIVE); + + part->act_state = XPC_P_AS_ACTIVATION_REQ; + XPC_SET_REASON(part, xpCloneKThread, __LINE__); + + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + kthread = kthread_run(xpc_activating, (void *)((u64)partid), "xpc%02d", + partid); + if (IS_ERR(kthread)) { + spin_lock_irqsave(&part->act_lock, irq_flags); + part->act_state = XPC_P_AS_INACTIVE; + XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__); + spin_unlock_irqrestore(&part->act_lock, irq_flags); + } +} + +void +xpc_activate_kthreads(struct xpc_channel *ch, int needed) +{ + int idle = atomic_read(&ch->kthreads_idle); + int assigned = atomic_read(&ch->kthreads_assigned); + int wakeup; + + DBUG_ON(needed <= 0); + + if (idle > 0) { + wakeup = (needed > idle) ? idle : needed; + needed -= wakeup; + + dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, " + "channel=%d\n", wakeup, ch->partid, ch->number); + + /* only wakeup the requested number of kthreads */ + wake_up_nr(&ch->idle_wq, wakeup); + } + + if (needed <= 0) + return; + + if (needed + assigned > ch->kthreads_assigned_limit) { + needed = ch->kthreads_assigned_limit - assigned; + if (needed <= 0) + return; + } + + dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n", + needed, ch->partid, ch->number); + + xpc_create_kthreads(ch, needed, 0); +} + +/* + * This function is where XPC's kthreads wait for messages to deliver. + */ +static void +xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch) +{ + int (*n_of_deliverable_payloads) (struct xpc_channel *) = + xpc_arch_ops.n_of_deliverable_payloads; + + do { + /* deliver messages to their intended recipients */ + + while (n_of_deliverable_payloads(ch) > 0 && + !(ch->flags & XPC_C_DISCONNECTING)) { + xpc_deliver_payload(ch); + } + + if (atomic_inc_return(&ch->kthreads_idle) > + ch->kthreads_idle_limit) { + /* too many idle kthreads on this channel */ + atomic_dec(&ch->kthreads_idle); + break; + } + + dev_dbg(xpc_chan, "idle kthread calling " + "wait_event_interruptible_exclusive()\n"); + + (void)wait_event_interruptible_exclusive(ch->idle_wq, + (n_of_deliverable_payloads(ch) > 0 || + (ch->flags & XPC_C_DISCONNECTING))); + + atomic_dec(&ch->kthreads_idle); + + } while (!(ch->flags & XPC_C_DISCONNECTING)); +} + +static int +xpc_kthread_start(void *args) +{ + short partid = XPC_UNPACK_ARG1(args); + u16 ch_number = XPC_UNPACK_ARG2(args); + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_channel *ch; + int n_needed; + unsigned long irq_flags; + int (*n_of_deliverable_payloads) (struct xpc_channel *) = + xpc_arch_ops.n_of_deliverable_payloads; + + dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n", + partid, ch_number); + + ch = &part->channels[ch_number]; + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + + /* let registerer know that connection has been established */ + + spin_lock_irqsave(&ch->lock, irq_flags); + if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) { + ch->flags |= XPC_C_CONNECTEDCALLOUT; + spin_unlock_irqrestore(&ch->lock, irq_flags); + + xpc_connected_callout(ch); + + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE; + spin_unlock_irqrestore(&ch->lock, irq_flags); + + /* + * It is possible that while the callout was being + * made that the remote partition sent some messages. + * If that is the case, we may need to activate + * additional kthreads to help deliver them. We only + * need one less than total #of messages to deliver. + */ + n_needed = n_of_deliverable_payloads(ch) - 1; + if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING)) + xpc_activate_kthreads(ch, n_needed); + + } else { + spin_unlock_irqrestore(&ch->lock, irq_flags); + } + + xpc_kthread_waitmsgs(part, ch); + } + + /* let registerer know that connection is disconnecting */ + + spin_lock_irqsave(&ch->lock, irq_flags); + if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { + ch->flags |= XPC_C_DISCONNECTINGCALLOUT; + spin_unlock_irqrestore(&ch->lock, irq_flags); + + xpc_disconnect_callout(ch, xpDisconnecting); + + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE; + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + + if (atomic_dec_return(&ch->kthreads_assigned) == 0 && + atomic_dec_return(&part->nchannels_engaged) == 0) { + xpc_arch_ops.indicate_partition_disengaged(part); + } + + xpc_msgqueue_deref(ch); + + dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n", + partid, ch_number); + + xpc_part_deref(part); + return 0; +} + +/* + * For each partition that XPC has established communications with, there is + * a minimum of one kernel thread assigned to perform any operation that + * may potentially sleep or block (basically the callouts to the asynchronous + * functions registered via xpc_connect()). + * + * Additional kthreads are created and destroyed by XPC as the workload + * demands. + * + * A kthread is assigned to one of the active channels that exists for a given + * partition. + */ +void +xpc_create_kthreads(struct xpc_channel *ch, int needed, + int ignore_disconnecting) +{ + unsigned long irq_flags; + u64 args = XPC_PACK_ARGS(ch->partid, ch->number); + struct xpc_partition *part = &xpc_partitions[ch->partid]; + struct task_struct *kthread; + void (*indicate_partition_disengaged) (struct xpc_partition *) = + xpc_arch_ops.indicate_partition_disengaged; + + while (needed-- > 0) { + + /* + * The following is done on behalf of the newly created + * kthread. That kthread is responsible for doing the + * counterpart to the following before it exits. + */ + if (ignore_disconnecting) { + if (!atomic_inc_not_zero(&ch->kthreads_assigned)) { + /* kthreads assigned had gone to zero */ + BUG_ON(!(ch->flags & + XPC_C_DISCONNECTINGCALLOUT_MADE)); + break; + } + + } else if (ch->flags & XPC_C_DISCONNECTING) { + break; + + } else if (atomic_inc_return(&ch->kthreads_assigned) == 1 && + atomic_inc_return(&part->nchannels_engaged) == 1) { + xpc_arch_ops.indicate_partition_engaged(part); + } + (void)xpc_part_ref(part); + xpc_msgqueue_ref(ch); + + kthread = kthread_run(xpc_kthread_start, (void *)args, + "xpc%02dc%d", ch->partid, ch->number); + if (IS_ERR(kthread)) { + /* the fork failed */ + + /* + * NOTE: if (ignore_disconnecting && + * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true, + * then we'll deadlock if all other kthreads assigned + * to this channel are blocked in the channel's + * registerer, because the only thing that will unblock + * them is the xpDisconnecting callout that this + * failed kthread_run() would have made. + */ + + if (atomic_dec_return(&ch->kthreads_assigned) == 0 && + atomic_dec_return(&part->nchannels_engaged) == 0) { + indicate_partition_disengaged(part); + } + xpc_msgqueue_deref(ch); + xpc_part_deref(part); + + if (atomic_read(&ch->kthreads_assigned) < + ch->kthreads_idle_limit) { + /* + * Flag this as an error only if we have an + * insufficient #of kthreads for the channel + * to function. + */ + spin_lock_irqsave(&ch->lock, irq_flags); + XPC_DISCONNECT_CHANNEL(ch, xpLackOfResources, + &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + } + break; + } + } +} + +void +xpc_disconnect_wait(int ch_number) +{ + unsigned long irq_flags; + short partid; + struct xpc_partition *part; + struct xpc_channel *ch; + int wakeup_channel_mgr; + + /* now wait for all callouts to the caller's function to cease */ + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + if (!xpc_part_ref(part)) + continue; + + ch = &part->channels[ch_number]; + + if (!(ch->flags & XPC_C_WDISCONNECT)) { + xpc_part_deref(part); + continue; + } + + wait_for_completion(&ch->wdisconnect_wait); + + spin_lock_irqsave(&ch->lock, irq_flags); + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED)); + wakeup_channel_mgr = 0; + + if (ch->delayed_chctl_flags) { + if (part->act_state != XPC_P_AS_DEACTIVATING) { + spin_lock(&part->chctl_lock); + part->chctl.flags[ch->number] |= + ch->delayed_chctl_flags; + spin_unlock(&part->chctl_lock); + wakeup_channel_mgr = 1; + } + ch->delayed_chctl_flags = 0; + } + + ch->flags &= ~XPC_C_WDISCONNECT; + spin_unlock_irqrestore(&ch->lock, irq_flags); + + if (wakeup_channel_mgr) + xpc_wakeup_channel_mgr(part); + + xpc_part_deref(part); + } +} + +static int +xpc_setup_partitions(void) +{ + short partid; + struct xpc_partition *part; + + xpc_partitions = kcalloc(xp_max_npartitions, + sizeof(struct xpc_partition), + GFP_KERNEL); + if (xpc_partitions == NULL) { + dev_err(xpc_part, "can't get memory for partition structure\n"); + return -ENOMEM; + } + + /* + * The first few fields of each entry of xpc_partitions[] need to + * be initialized now so that calls to xpc_connect() and + * xpc_disconnect() can be made prior to the activation of any remote + * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE + * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING + * PARTITION HAS BEEN ACTIVATED. + */ + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part)); + + part->activate_IRQ_rcvd = 0; + spin_lock_init(&part->act_lock); + part->act_state = XPC_P_AS_INACTIVE; + XPC_SET_REASON(part, 0, 0); + + timer_setup(&part->disengage_timer, + xpc_timeout_partition_disengage, 0); + + part->setup_state = XPC_P_SS_UNSET; + init_waitqueue_head(&part->teardown_wq); + atomic_set(&part->references, 0); + } + + return xpc_arch_ops.setup_partitions(); +} + +static void +xpc_teardown_partitions(void) +{ + xpc_arch_ops.teardown_partitions(); + kfree(xpc_partitions); +} + +static void +xpc_do_exit(enum xp_retval reason) +{ + short partid; + int active_part_count, printed_waiting_msg = 0; + struct xpc_partition *part; + unsigned long printmsg_time, disengage_timeout = 0; + + /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */ + DBUG_ON(xpc_exiting == 1); + + /* + * Let the heartbeat checker thread and the discovery thread + * (if one is running) know that they should exit. Also wake up + * the heartbeat checker thread in case it's sleeping. + */ + xpc_exiting = 1; + wake_up_interruptible(&xpc_activate_IRQ_wq); + + /* wait for the discovery thread to exit */ + wait_for_completion(&xpc_discovery_exited); + + /* wait for the heartbeat checker thread to exit */ + wait_for_completion(&xpc_hb_checker_exited); + + /* sleep for a 1/3 of a second or so */ + (void)msleep_interruptible(300); + + /* wait for all partitions to become inactive */ + + printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ); + xpc_disengage_timedout = 0; + + do { + active_part_count = 0; + + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_partition_disengaged(part) && + part->act_state == XPC_P_AS_INACTIVE) { + continue; + } + + active_part_count++; + + XPC_DEACTIVATE_PARTITION(part, reason); + + if (part->disengage_timeout > disengage_timeout) + disengage_timeout = part->disengage_timeout; + } + + if (xpc_arch_ops.any_partition_engaged()) { + if (time_is_before_jiffies(printmsg_time)) { + dev_info(xpc_part, "waiting for remote " + "partitions to deactivate, timeout in " + "%ld seconds\n", (disengage_timeout - + jiffies) / HZ); + printmsg_time = jiffies + + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ); + printed_waiting_msg = 1; + } + + } else if (active_part_count > 0) { + if (printed_waiting_msg) { + dev_info(xpc_part, "waiting for local partition" + " to deactivate\n"); + printed_waiting_msg = 0; + } + + } else { + if (!xpc_disengage_timedout) { + dev_info(xpc_part, "all partitions have " + "deactivated\n"); + } + break; + } + + /* sleep for a 1/3 of a second or so */ + (void)msleep_interruptible(300); + + } while (1); + + DBUG_ON(xpc_arch_ops.any_partition_engaged()); + + xpc_teardown_rsvd_page(); + + if (reason == xpUnloading) { + (void)unregister_die_notifier(&xpc_die_notifier); + (void)unregister_reboot_notifier(&xpc_reboot_notifier); + } + + /* clear the interface to XPC's functions */ + xpc_clear_interface(); + + if (xpc_sysctl) + unregister_sysctl_table(xpc_sysctl); + + xpc_teardown_partitions(); + + if (is_shub()) + xpc_exit_sn2(); + else if (is_uv()) + xpc_exit_uv(); +} + +/* + * This function is called when the system is being rebooted. + */ +static int +xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused) +{ + enum xp_retval reason; + + switch (event) { + case SYS_RESTART: + reason = xpSystemReboot; + break; + case SYS_HALT: + reason = xpSystemHalt; + break; + case SYS_POWER_OFF: + reason = xpSystemPoweroff; + break; + default: + reason = xpSystemGoingDown; + } + + xpc_do_exit(reason); + return NOTIFY_DONE; +} + +/* Used to only allow one cpu to complete disconnect */ +static unsigned int xpc_die_disconnecting; + +/* + * Notify other partitions to deactivate from us by first disengaging from all + * references to our memory. + */ +static void +xpc_die_deactivate(void) +{ + struct xpc_partition *part; + short partid; + int any_engaged; + long keep_waiting; + long wait_to_print; + + if (cmpxchg(&xpc_die_disconnecting, 0, 1)) + return; + + /* keep xpc_hb_checker thread from doing anything (just in case) */ + xpc_exiting = 1; + + xpc_arch_ops.disallow_all_hbs(); /*indicate we're deactivated */ + + for (partid = 0; partid < xp_max_npartitions; partid++) { + part = &xpc_partitions[partid]; + + if (xpc_arch_ops.partition_engaged(partid) || + part->act_state != XPC_P_AS_INACTIVE) { + xpc_arch_ops.request_partition_deactivation(part); + xpc_arch_ops.indicate_partition_disengaged(part); + } + } + + /* + * Though we requested that all other partitions deactivate from us, + * we only wait until they've all disengaged or we've reached the + * defined timelimit. + * + * Given that one iteration through the following while-loop takes + * approximately 200 microseconds, calculate the #of loops to take + * before bailing and the #of loops before printing a waiting message. + */ + keep_waiting = xpc_disengage_timelimit * 1000 * 5; + wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5; + + while (1) { + any_engaged = xpc_arch_ops.any_partition_engaged(); + if (!any_engaged) { + dev_info(xpc_part, "all partitions have deactivated\n"); + break; + } + + if (!keep_waiting--) { + for (partid = 0; partid < xp_max_npartitions; + partid++) { + if (xpc_arch_ops.partition_engaged(partid)) { + dev_info(xpc_part, "deactivate from " + "remote partition %d timed " + "out\n", partid); + } + } + break; + } + + if (!wait_to_print--) { + dev_info(xpc_part, "waiting for remote partitions to " + "deactivate, timeout in %ld seconds\n", + keep_waiting / (1000 * 5)); + wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * + 1000 * 5; + } + + udelay(200); + } +} + +/* + * This function is called when the system is being restarted or halted due + * to some sort of system failure. If this is the case we need to notify the + * other partitions to disengage from all references to our memory. + * This function can also be called when our heartbeater could be offlined + * for a time. In this case we need to notify other partitions to not worry + * about the lack of a heartbeat. + */ +static int +xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args) +{ +#ifdef CONFIG_IA64 /* !!! temporary kludge */ + switch (event) { + case DIE_MACHINE_RESTART: + case DIE_MACHINE_HALT: + xpc_die_deactivate(); + break; + + case DIE_KDEBUG_ENTER: + /* Should lack of heartbeat be ignored by other partitions? */ + if (!xpc_kdebug_ignore) + break; + + /* fall through */ + case DIE_MCA_MONARCH_ENTER: + case DIE_INIT_MONARCH_ENTER: + xpc_arch_ops.offline_heartbeat(); + break; + + case DIE_KDEBUG_LEAVE: + /* Is lack of heartbeat being ignored by other partitions? */ + if (!xpc_kdebug_ignore) + break; + + /* fall through */ + case DIE_MCA_MONARCH_LEAVE: + case DIE_INIT_MONARCH_LEAVE: + xpc_arch_ops.online_heartbeat(); + break; + } +#else + struct die_args *die_args = _die_args; + + switch (event) { + case DIE_TRAP: + if (die_args->trapnr == X86_TRAP_DF) + xpc_die_deactivate(); + + if (((die_args->trapnr == X86_TRAP_MF) || + (die_args->trapnr == X86_TRAP_XF)) && + !user_mode(die_args->regs)) + xpc_die_deactivate(); + + break; + case DIE_INT3: + case DIE_DEBUG: + break; + case DIE_OOPS: + case DIE_GPF: + default: + xpc_die_deactivate(); + } +#endif + + return NOTIFY_DONE; +} + +int __init +xpc_init(void) +{ + int ret; + struct task_struct *kthread; + + dev_set_name(xpc_part, "part"); + dev_set_name(xpc_chan, "chan"); + + if (is_shub()) { + /* + * The ia64-sn2 architecture supports at most 64 partitions. + * And the inability to unregister remote amos restricts us + * further to only support exactly 64 partitions on this + * architecture, no less. + */ + if (xp_max_npartitions != 64) { + dev_err(xpc_part, "max #of partitions not set to 64\n"); + ret = -EINVAL; + } else { + ret = xpc_init_sn2(); + } + + } else if (is_uv()) { + ret = xpc_init_uv(); + + } else { + ret = -ENODEV; + } + + if (ret != 0) + return ret; + + ret = xpc_setup_partitions(); + if (ret != 0) { + dev_err(xpc_part, "can't get memory for partition structure\n"); + goto out_1; + } + + xpc_sysctl = register_sysctl_table(xpc_sys_dir); + + /* + * Fill the partition reserved page with the information needed by + * other partitions to discover we are alive and establish initial + * communications. + */ + ret = xpc_setup_rsvd_page(); + if (ret != 0) { + dev_err(xpc_part, "can't setup our reserved page\n"); + goto out_2; + } + + /* add ourselves to the reboot_notifier_list */ + ret = register_reboot_notifier(&xpc_reboot_notifier); + if (ret != 0) + dev_warn(xpc_part, "can't register reboot notifier\n"); + + /* add ourselves to the die_notifier list */ + ret = register_die_notifier(&xpc_die_notifier); + if (ret != 0) + dev_warn(xpc_part, "can't register die notifier\n"); + + /* + * The real work-horse behind xpc. This processes incoming + * interrupts and monitors remote heartbeats. + */ + kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME); + if (IS_ERR(kthread)) { + dev_err(xpc_part, "failed while forking hb check thread\n"); + ret = -EBUSY; + goto out_3; + } + + /* + * Startup a thread that will attempt to discover other partitions to + * activate based on info provided by SAL. This new thread is short + * lived and will exit once discovery is complete. + */ + kthread = kthread_run(xpc_initiate_discovery, NULL, + XPC_DISCOVERY_THREAD_NAME); + if (IS_ERR(kthread)) { + dev_err(xpc_part, "failed while forking discovery thread\n"); + + /* mark this new thread as a non-starter */ + complete(&xpc_discovery_exited); + + xpc_do_exit(xpUnloading); + return -EBUSY; + } + + /* set the interface to point at XPC's functions */ + xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect, + xpc_initiate_send, xpc_initiate_send_notify, + xpc_initiate_received, xpc_initiate_partid_to_nasids); + + return 0; + + /* initialization was not successful */ +out_3: + xpc_teardown_rsvd_page(); + + (void)unregister_die_notifier(&xpc_die_notifier); + (void)unregister_reboot_notifier(&xpc_reboot_notifier); +out_2: + if (xpc_sysctl) + unregister_sysctl_table(xpc_sysctl); + + xpc_teardown_partitions(); +out_1: + if (is_shub()) + xpc_exit_sn2(); + else if (is_uv()) + xpc_exit_uv(); + return ret; +} + +module_init(xpc_init); + +void __exit +xpc_exit(void) +{ + xpc_do_exit(xpUnloading); +} + +module_exit(xpc_exit); + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("Cross Partition Communication (XPC) support"); +MODULE_LICENSE("GPL"); + +module_param(xpc_hb_interval, int, 0); +MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between " + "heartbeat increments."); + +module_param(xpc_hb_check_interval, int, 0); +MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between " + "heartbeat checks."); + +module_param(xpc_disengage_timelimit, int, 0); +MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait " + "for disengage to complete."); + +module_param(xpc_kdebug_ignore, int, 0); +MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by " + "other partitions when dropping into kdebug."); diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c new file mode 100644 index 000000000..519826ba1 --- /dev/null +++ b/drivers/misc/sgi-xp/xpc_partition.c @@ -0,0 +1,540 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) partition support. + * + * This is the part of XPC that detects the presence/absence of + * other partitions. It provides a heartbeat and monitors the + * heartbeats of other partitions. + * + */ + +#include +#include +#include +#include "xpc.h" +#include + +/* XPC is exiting flag */ +int xpc_exiting; + +/* this partition's reserved page pointers */ +struct xpc_rsvd_page *xpc_rsvd_page; +static unsigned long *xpc_part_nasids; +unsigned long *xpc_mach_nasids; + +static int xpc_nasid_mask_nbytes; /* #of bytes in nasid mask */ +int xpc_nasid_mask_nlongs; /* #of longs in nasid mask */ + +struct xpc_partition *xpc_partitions; + +/* + * Guarantee that the kmalloc'd memory is cacheline aligned. + */ +void * +xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base) +{ + /* see if kmalloc will give us cachline aligned memory by default */ + *base = kmalloc(size, flags); + if (*base == NULL) + return NULL; + + if ((u64)*base == L1_CACHE_ALIGN((u64)*base)) + return *base; + + kfree(*base); + + /* nope, we'll have to do it ourselves */ + *base = kmalloc(size + L1_CACHE_BYTES, flags); + if (*base == NULL) + return NULL; + + return (void *)L1_CACHE_ALIGN((u64)*base); +} + +/* + * Given a nasid, get the physical address of the partition's reserved page + * for that nasid. This function returns 0 on any error. + */ +static unsigned long +xpc_get_rsvd_page_pa(int nasid) +{ + enum xp_retval ret; + u64 cookie = 0; + unsigned long rp_pa = nasid; /* seed with nasid */ + size_t len = 0; + size_t buf_len = 0; + void *buf = NULL; + void *buf_base = NULL; + enum xp_retval (*get_partition_rsvd_page_pa) + (void *, u64 *, unsigned long *, size_t *) = + xpc_arch_ops.get_partition_rsvd_page_pa; + + while (1) { + + /* !!! rp_pa will need to be _gpa on UV. + * ??? So do we save it into the architecture specific parts + * ??? of the xpc_partition structure? Do we rename this + * ??? function or have two versions? Rename rp_pa for UV to + * ??? rp_gpa? + */ + ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len); + + dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, " + "address=0x%016lx, len=0x%016lx\n", ret, + (unsigned long)cookie, rp_pa, len); + + if (ret != xpNeedMoreInfo) + break; + + /* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */ + if (is_shub()) + len = L1_CACHE_ALIGN(len); + + if (len > buf_len) { + if (buf_base != NULL) + kfree(buf_base); + buf_len = L1_CACHE_ALIGN(len); + buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL, + &buf_base); + if (buf_base == NULL) { + dev_err(xpc_part, "unable to kmalloc " + "len=0x%016lx\n", buf_len); + ret = xpNoMemory; + break; + } + } + + ret = xp_remote_memcpy(xp_pa(buf), rp_pa, len); + if (ret != xpSuccess) { + dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret); + break; + } + } + + kfree(buf_base); + + if (ret != xpSuccess) + rp_pa = 0; + + dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa); + return rp_pa; +} + +/* + * Fill the partition reserved page with the information needed by + * other partitions to discover we are alive and establish initial + * communications. + */ +int +xpc_setup_rsvd_page(void) +{ + int ret; + struct xpc_rsvd_page *rp; + unsigned long rp_pa; + unsigned long new_ts_jiffies; + + /* get the local reserved page's address */ + + preempt_disable(); + rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id())); + preempt_enable(); + if (rp_pa == 0) { + dev_err(xpc_part, "SAL failed to locate the reserved page\n"); + return -ESRCH; + } + rp = (struct xpc_rsvd_page *)__va(xp_socket_pa(rp_pa)); + + if (rp->SAL_version < 3) { + /* SAL_versions < 3 had a SAL_partid defined as a u8 */ + rp->SAL_partid &= 0xff; + } + BUG_ON(rp->SAL_partid != xp_partition_id); + + if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) { + dev_err(xpc_part, "the reserved page's partid of %d is outside " + "supported range (< 0 || >= %d)\n", rp->SAL_partid, + xp_max_npartitions); + return -EINVAL; + } + + rp->version = XPC_RP_VERSION; + rp->max_npartitions = xp_max_npartitions; + + /* establish the actual sizes of the nasid masks */ + if (rp->SAL_version == 1) { + /* SAL_version 1 didn't set the nasids_size field */ + rp->SAL_nasids_size = 128; + } + xpc_nasid_mask_nbytes = rp->SAL_nasids_size; + xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size * + BITS_PER_BYTE); + + /* setup the pointers to the various items in the reserved page */ + xpc_part_nasids = XPC_RP_PART_NASIDS(rp); + xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp); + + ret = xpc_arch_ops.setup_rsvd_page(rp); + if (ret != 0) + return ret; + + /* + * Set timestamp of when reserved page was setup by XPC. + * This signifies to the remote partition that our reserved + * page is initialized. + */ + new_ts_jiffies = jiffies; + if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies) + new_ts_jiffies++; + rp->ts_jiffies = new_ts_jiffies; + + xpc_rsvd_page = rp; + return 0; +} + +void +xpc_teardown_rsvd_page(void) +{ + /* a zero timestamp indicates our rsvd page is not initialized */ + xpc_rsvd_page->ts_jiffies = 0; +} + +/* + * Get a copy of a portion of the remote partition's rsvd page. + * + * remote_rp points to a buffer that is cacheline aligned for BTE copies and + * is large enough to contain a copy of their reserved page header and + * part_nasids mask. + */ +enum xp_retval +xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids, + struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa) +{ + int l; + enum xp_retval ret; + + /* get the reserved page's physical address */ + + *remote_rp_pa = xpc_get_rsvd_page_pa(nasid); + if (*remote_rp_pa == 0) + return xpNoRsvdPageAddr; + + /* pull over the reserved page header and part_nasids mask */ + ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa, + XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes); + if (ret != xpSuccess) + return ret; + + if (discovered_nasids != NULL) { + unsigned long *remote_part_nasids = + XPC_RP_PART_NASIDS(remote_rp); + + for (l = 0; l < xpc_nasid_mask_nlongs; l++) + discovered_nasids[l] |= remote_part_nasids[l]; + } + + /* zero timestamp indicates the reserved page has not been setup */ + if (remote_rp->ts_jiffies == 0) + return xpRsvdPageNotSet; + + if (XPC_VERSION_MAJOR(remote_rp->version) != + XPC_VERSION_MAJOR(XPC_RP_VERSION)) { + return xpBadVersion; + } + + /* check that both remote and local partids are valid for each side */ + if (remote_rp->SAL_partid < 0 || + remote_rp->SAL_partid >= xp_max_npartitions || + remote_rp->max_npartitions <= xp_partition_id) { + return xpInvalidPartid; + } + + if (remote_rp->SAL_partid == xp_partition_id) + return xpLocalPartid; + + return xpSuccess; +} + +/* + * See if the other side has responded to a partition deactivate request + * from us. Though we requested the remote partition to deactivate with regard + * to us, we really only need to wait for the other side to disengage from us. + */ +int +xpc_partition_disengaged(struct xpc_partition *part) +{ + short partid = XPC_PARTID(part); + int disengaged; + + disengaged = !xpc_arch_ops.partition_engaged(partid); + if (part->disengage_timeout) { + if (!disengaged) { + if (time_is_after_jiffies(part->disengage_timeout)) { + /* timelimit hasn't been reached yet */ + return 0; + } + + /* + * Other side hasn't responded to our deactivate + * request in a timely fashion, so assume it's dead. + */ + + dev_info(xpc_part, "deactivate request to remote " + "partition %d timed out\n", partid); + xpc_disengage_timedout = 1; + xpc_arch_ops.assume_partition_disengaged(partid); + disengaged = 1; + } + part->disengage_timeout = 0; + + /* cancel the timer function, provided it's not us */ + if (!in_interrupt()) + del_singleshot_timer_sync(&part->disengage_timer); + + DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING && + part->act_state != XPC_P_AS_INACTIVE); + if (part->act_state != XPC_P_AS_INACTIVE) + xpc_wakeup_channel_mgr(part); + + xpc_arch_ops.cancel_partition_deactivation_request(part); + } + return disengaged; +} + +/* + * Mark specified partition as active. + */ +enum xp_retval +xpc_mark_partition_active(struct xpc_partition *part) +{ + unsigned long irq_flags; + enum xp_retval ret; + + dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part)); + + spin_lock_irqsave(&part->act_lock, irq_flags); + if (part->act_state == XPC_P_AS_ACTIVATING) { + part->act_state = XPC_P_AS_ACTIVE; + ret = xpSuccess; + } else { + DBUG_ON(part->reason == xpSuccess); + ret = part->reason; + } + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + return ret; +} + +/* + * Start the process of deactivating the specified partition. + */ +void +xpc_deactivate_partition(const int line, struct xpc_partition *part, + enum xp_retval reason) +{ + unsigned long irq_flags; + + spin_lock_irqsave(&part->act_lock, irq_flags); + + if (part->act_state == XPC_P_AS_INACTIVE) { + XPC_SET_REASON(part, reason, line); + spin_unlock_irqrestore(&part->act_lock, irq_flags); + if (reason == xpReactivating) { + /* we interrupt ourselves to reactivate partition */ + xpc_arch_ops.request_partition_reactivation(part); + } + return; + } + if (part->act_state == XPC_P_AS_DEACTIVATING) { + if ((part->reason == xpUnloading && reason != xpUnloading) || + reason == xpReactivating) { + XPC_SET_REASON(part, reason, line); + } + spin_unlock_irqrestore(&part->act_lock, irq_flags); + return; + } + + part->act_state = XPC_P_AS_DEACTIVATING; + XPC_SET_REASON(part, reason, line); + + spin_unlock_irqrestore(&part->act_lock, irq_flags); + + /* ask remote partition to deactivate with regard to us */ + xpc_arch_ops.request_partition_deactivation(part); + + /* set a timelimit on the disengage phase of the deactivation request */ + part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ); + part->disengage_timer.expires = part->disengage_timeout; + add_timer(&part->disengage_timer); + + dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", + XPC_PARTID(part), reason); + + xpc_partition_going_down(part, reason); +} + +/* + * Mark specified partition as inactive. + */ +void +xpc_mark_partition_inactive(struct xpc_partition *part) +{ + unsigned long irq_flags; + + dev_dbg(xpc_part, "setting partition %d to INACTIVE\n", + XPC_PARTID(part)); + + spin_lock_irqsave(&part->act_lock, irq_flags); + part->act_state = XPC_P_AS_INACTIVE; + spin_unlock_irqrestore(&part->act_lock, irq_flags); + part->remote_rp_pa = 0; +} + +/* + * SAL has provided a partition and machine mask. The partition mask + * contains a bit for each even nasid in our partition. The machine + * mask contains a bit for each even nasid in the entire machine. + * + * Using those two bit arrays, we can determine which nasids are + * known in the machine. Each should also have a reserved page + * initialized if they are available for partitioning. + */ +void +xpc_discovery(void) +{ + void *remote_rp_base; + struct xpc_rsvd_page *remote_rp; + unsigned long remote_rp_pa; + int region; + int region_size; + int max_regions; + int nasid; + unsigned long *discovered_nasids; + enum xp_retval ret; + + remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE + + xpc_nasid_mask_nbytes, + GFP_KERNEL, &remote_rp_base); + if (remote_rp == NULL) + return; + + discovered_nasids = kcalloc(xpc_nasid_mask_nlongs, sizeof(long), + GFP_KERNEL); + if (discovered_nasids == NULL) { + kfree(remote_rp_base); + return; + } + + /* + * The term 'region' in this context refers to the minimum number of + * nodes that can comprise an access protection grouping. The access + * protection is in regards to memory, IOI and IPI. + */ + region_size = xp_region_size; + + if (is_uv()) + max_regions = 256; + else { + max_regions = 64; + + switch (region_size) { + case 128: + max_regions *= 2; + /* fall through */ + case 64: + max_regions *= 2; + /* fall through */ + case 32: + max_regions *= 2; + region_size = 16; + DBUG_ON(!is_shub2()); + } + } + + for (region = 0; region < max_regions; region++) { + + if (xpc_exiting) + break; + + dev_dbg(xpc_part, "searching region %d\n", region); + + for (nasid = (region * region_size * 2); + nasid < ((region + 1) * region_size * 2); nasid += 2) { + + if (xpc_exiting) + break; + + dev_dbg(xpc_part, "checking nasid %d\n", nasid); + + if (test_bit(nasid / 2, xpc_part_nasids)) { + dev_dbg(xpc_part, "PROM indicates Nasid %d is " + "part of the local partition; skipping " + "region\n", nasid); + break; + } + + if (!(test_bit(nasid / 2, xpc_mach_nasids))) { + dev_dbg(xpc_part, "PROM indicates Nasid %d was " + "not on Numa-Link network at reset\n", + nasid); + continue; + } + + if (test_bit(nasid / 2, discovered_nasids)) { + dev_dbg(xpc_part, "Nasid %d is part of a " + "partition which was previously " + "discovered\n", nasid); + continue; + } + + /* pull over the rsvd page header & part_nasids mask */ + + ret = xpc_get_remote_rp(nasid, discovered_nasids, + remote_rp, &remote_rp_pa); + if (ret != xpSuccess) { + dev_dbg(xpc_part, "unable to get reserved page " + "from nasid %d, reason=%d\n", nasid, + ret); + + if (ret == xpLocalPartid) + break; + + continue; + } + + xpc_arch_ops.request_partition_activation(remote_rp, + remote_rp_pa, nasid); + } + } + + kfree(discovered_nasids); + kfree(remote_rp_base); +} + +/* + * Given a partid, get the nasids owned by that partition from the + * remote partition's reserved page. + */ +enum xp_retval +xpc_initiate_partid_to_nasids(short partid, void *nasid_mask) +{ + struct xpc_partition *part; + unsigned long part_nasid_pa; + + part = &xpc_partitions[partid]; + if (part->remote_rp_pa == 0) + return xpPartitionDown; + + memset(nasid_mask, 0, xpc_nasid_mask_nbytes); + + part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa); + + return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa, + xpc_nasid_mask_nbytes); +} diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c new file mode 100644 index 000000000..5a12d2a54 --- /dev/null +++ b/drivers/misc/sgi-xp/xpc_sn2.c @@ -0,0 +1,2459 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) sn2-based functions. + * + * Architecture specific implementation of common functions. + * + */ + +#include +#include +#include +#include +#include +#include "xpc.h" + +/* + * Define the number of u64s required to represent all the C-brick nasids + * as a bitmap. The cross-partition kernel modules deal only with + * C-brick nasids, thus the need for bitmaps which don't account for + * odd-numbered (non C-brick) nasids. + */ +#define XPC_MAX_PHYSNODES_SN2 (MAX_NUMALINK_NODES / 2) +#define XP_NASID_MASK_BYTES_SN2 ((XPC_MAX_PHYSNODES_SN2 + 7) / 8) +#define XP_NASID_MASK_WORDS_SN2 ((XPC_MAX_PHYSNODES_SN2 + 63) / 64) + +/* + * Memory for XPC's amo variables is allocated by the MSPEC driver. These + * pages are located in the lowest granule. The lowest granule uses 4k pages + * for cached references and an alternate TLB handler to never provide a + * cacheable mapping for the entire region. This will prevent speculative + * reading of cached copies of our lines from being issued which will cause + * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64 + * amo variables (based on XP_MAX_NPARTITIONS_SN2) to identify the senders of + * NOTIFY IRQs, 128 amo variables (based on XP_NASID_MASK_WORDS_SN2) to identify + * the senders of ACTIVATE IRQs, 1 amo variable to identify which remote + * partitions (i.e., XPCs) consider themselves currently engaged with the + * local XPC and 1 amo variable to request partition deactivation. + */ +#define XPC_NOTIFY_IRQ_AMOS_SN2 0 +#define XPC_ACTIVATE_IRQ_AMOS_SN2 (XPC_NOTIFY_IRQ_AMOS_SN2 + \ + XP_MAX_NPARTITIONS_SN2) +#define XPC_ENGAGED_PARTITIONS_AMO_SN2 (XPC_ACTIVATE_IRQ_AMOS_SN2 + \ + XP_NASID_MASK_WORDS_SN2) +#define XPC_DEACTIVATE_REQUEST_AMO_SN2 (XPC_ENGAGED_PARTITIONS_AMO_SN2 + 1) + +/* + * Buffer used to store a local copy of portions of a remote partition's + * reserved page (either its header and part_nasids mask, or its vars). + */ +static void *xpc_remote_copy_buffer_base_sn2; +static char *xpc_remote_copy_buffer_sn2; + +static struct xpc_vars_sn2 *xpc_vars_sn2; +static struct xpc_vars_part_sn2 *xpc_vars_part_sn2; + +static int +xpc_setup_partitions_sn2(void) +{ + /* nothing needs to be done */ + return 0; +} + +static void +xpc_teardown_partitions_sn2(void) +{ + /* nothing needs to be done */ +} + +/* SH_IPI_ACCESS shub register value on startup */ +static u64 xpc_sh1_IPI_access_sn2; +static u64 xpc_sh2_IPI_access0_sn2; +static u64 xpc_sh2_IPI_access1_sn2; +static u64 xpc_sh2_IPI_access2_sn2; +static u64 xpc_sh2_IPI_access3_sn2; + +/* + * Change protections to allow IPI operations. + */ +static void +xpc_allow_IPI_ops_sn2(void) +{ + int node; + int nasid; + + /* !!! The following should get moved into SAL. */ + if (is_shub2()) { + xpc_sh2_IPI_access0_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0)); + xpc_sh2_IPI_access1_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1)); + xpc_sh2_IPI_access2_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2)); + xpc_sh2_IPI_access3_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3)); + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), + -1UL); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), + -1UL); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), + -1UL); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), + -1UL); + } + } else { + xpc_sh1_IPI_access_sn2 = + (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS)); + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), + -1UL); + } + } +} + +/* + * Restrict protections to disallow IPI operations. + */ +static void +xpc_disallow_IPI_ops_sn2(void) +{ + int node; + int nasid; + + /* !!! The following should get moved into SAL. */ + if (is_shub2()) { + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), + xpc_sh2_IPI_access0_sn2); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), + xpc_sh2_IPI_access1_sn2); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), + xpc_sh2_IPI_access2_sn2); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), + xpc_sh2_IPI_access3_sn2); + } + } else { + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), + xpc_sh1_IPI_access_sn2); + } + } +} + +/* + * The following set of functions are used for the sending and receiving of + * IRQs (also known as IPIs). There are two flavors of IRQs, one that is + * associated with partition activity (SGI_XPC_ACTIVATE) and the other that + * is associated with channel activity (SGI_XPC_NOTIFY). + */ + +static u64 +xpc_receive_IRQ_amo_sn2(struct amo *amo) +{ + return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_CLEAR); +} + +static enum xp_retval +xpc_send_IRQ_sn2(struct amo *amo, u64 flag, int nasid, int phys_cpuid, + int vector) +{ + int ret = 0; + unsigned long irq_flags; + + local_irq_save(irq_flags); + + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, flag); + sn_send_IPI_phys(nasid, phys_cpuid, vector, 0); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + ret = xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo->variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); + + return (ret == 0) ? xpSuccess : xpPioReadError; +} + +static struct amo * +xpc_init_IRQ_amo_sn2(int index) +{ + struct amo *amo = xpc_vars_sn2->amos_page + index; + + (void)xpc_receive_IRQ_amo_sn2(amo); /* clear amo variable */ + return amo; +} + +/* + * Functions associated with SGI_XPC_ACTIVATE IRQ. + */ + +/* + * Notify the heartbeat check thread that an activate IRQ has been received. + */ +static irqreturn_t +xpc_handle_activate_IRQ_sn2(int irq, void *dev_id) +{ + unsigned long irq_flags; + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + xpc_activate_IRQ_rcvd++; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + wake_up_interruptible(&xpc_activate_IRQ_wq); + return IRQ_HANDLED; +} + +/* + * Flag the appropriate amo variable and send an IRQ to the specified node. + */ +static void +xpc_send_activate_IRQ_sn2(unsigned long amos_page_pa, int from_nasid, + int to_nasid, int to_phys_cpuid) +{ + struct amo *amos = (struct amo *)__va(amos_page_pa + + (XPC_ACTIVATE_IRQ_AMOS_SN2 * + sizeof(struct amo))); + + (void)xpc_send_IRQ_sn2(&amos[BIT_WORD(from_nasid / 2)], + BIT_MASK(from_nasid / 2), to_nasid, + to_phys_cpuid, SGI_XPC_ACTIVATE); +} + +static void +xpc_send_local_activate_IRQ_sn2(int from_nasid) +{ + unsigned long irq_flags; + struct amo *amos = (struct amo *)__va(xpc_vars_sn2->amos_page_pa + + (XPC_ACTIVATE_IRQ_AMOS_SN2 * + sizeof(struct amo))); + + /* fake the sending and receipt of an activate IRQ from remote nasid */ + FETCHOP_STORE_OP(TO_AMO((u64)&amos[BIT_WORD(from_nasid / 2)].variable), + FETCHOP_OR, BIT_MASK(from_nasid / 2)); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + xpc_activate_IRQ_rcvd++; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + wake_up_interruptible(&xpc_activate_IRQ_wq); +} + +/* + * Functions associated with SGI_XPC_NOTIFY IRQ. + */ + +/* + * Check to see if any chctl flags were sent from the specified partition. + */ +static void +xpc_check_for_sent_chctl_flags_sn2(struct xpc_partition *part) +{ + union xpc_channel_ctl_flags chctl; + unsigned long irq_flags; + + chctl.all_flags = xpc_receive_IRQ_amo_sn2(part->sn.sn2. + local_chctl_amo_va); + if (chctl.all_flags == 0) + return; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.all_flags |= chctl.all_flags; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + dev_dbg(xpc_chan, "received notify IRQ from partid=%d, chctl.all_flags=" + "0x%llx\n", XPC_PARTID(part), chctl.all_flags); + + xpc_wakeup_channel_mgr(part); +} + +/* + * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified + * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more + * than one partition, we use an amo structure per partition to indicate + * whether a partition has sent an IRQ or not. If it has, then wake up the + * associated kthread to handle it. + * + * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IRQs sent by XPC + * running on other partitions. + * + * Noteworthy Arguments: + * + * irq - Interrupt ReQuest number. NOT USED. + * + * dev_id - partid of IRQ's potential sender. + */ +static irqreturn_t +xpc_handle_notify_IRQ_sn2(int irq, void *dev_id) +{ + short partid = (short)(u64)dev_id; + struct xpc_partition *part = &xpc_partitions[partid]; + + DBUG_ON(partid < 0 || partid >= XP_MAX_NPARTITIONS_SN2); + + if (xpc_part_ref(part)) { + xpc_check_for_sent_chctl_flags_sn2(part); + + xpc_part_deref(part); + } + return IRQ_HANDLED; +} + +/* + * Check to see if xpc_handle_notify_IRQ_sn2() dropped any IRQs on the floor + * because the write to their associated amo variable completed after the IRQ + * was received. + */ +static void +xpc_check_for_dropped_notify_IRQ_sn2(struct timer_list *t) +{ + struct xpc_partition *part = + from_timer(part, t, sn.sn2.dropped_notify_IRQ_timer); + + if (xpc_part_ref(part)) { + xpc_check_for_sent_chctl_flags_sn2(part); + + t->expires = jiffies + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL; + add_timer(t); + xpc_part_deref(part); + } +} + +/* + * Send a notify IRQ to the remote partition that is associated with the + * specified channel. + */ +static void +xpc_send_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag, + char *chctl_flag_string, unsigned long *irq_flags) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + union xpc_channel_ctl_flags chctl = { 0 }; + enum xp_retval ret; + + if (likely(part->act_state != XPC_P_AS_DEACTIVATING)) { + chctl.flags[ch->number] = chctl_flag; + ret = xpc_send_IRQ_sn2(part_sn2->remote_chctl_amo_va, + chctl.all_flags, + part_sn2->notify_IRQ_nasid, + part_sn2->notify_IRQ_phys_cpuid, + SGI_XPC_NOTIFY); + dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n", + chctl_flag_string, ch->partid, ch->number, ret); + if (unlikely(ret != xpSuccess)) { + if (irq_flags != NULL) + spin_unlock_irqrestore(&ch->lock, *irq_flags); + XPC_DEACTIVATE_PARTITION(part, ret); + if (irq_flags != NULL) + spin_lock_irqsave(&ch->lock, *irq_flags); + } + } +} + +#define XPC_SEND_NOTIFY_IRQ_SN2(_ch, _ipi_f, _irq_f) \ + xpc_send_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f, _irq_f) + +/* + * Make it look like the remote partition, which is associated with the + * specified channel, sent us a notify IRQ. This faked IRQ will be handled + * by xpc_check_for_dropped_notify_IRQ_sn2(). + */ +static void +xpc_send_local_notify_IRQ_sn2(struct xpc_channel *ch, u8 chctl_flag, + char *chctl_flag_string) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + union xpc_channel_ctl_flags chctl = { 0 }; + + chctl.flags[ch->number] = chctl_flag; + FETCHOP_STORE_OP(TO_AMO((u64)&part->sn.sn2.local_chctl_amo_va-> + variable), FETCHOP_OR, chctl.all_flags); + dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n", + chctl_flag_string, ch->partid, ch->number); +} + +#define XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(_ch, _ipi_f) \ + xpc_send_local_notify_IRQ_sn2(_ch, _ipi_f, #_ipi_f) + +static void +xpc_send_chctl_closerequest_sn2(struct xpc_channel *ch, + unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args; + + args->reason = ch->reason; + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREQUEST, irq_flags); +} + +static void +xpc_send_chctl_closereply_sn2(struct xpc_channel *ch, unsigned long *irq_flags) +{ + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_CLOSEREPLY, irq_flags); +} + +static void +xpc_send_chctl_openrequest_sn2(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args; + + args->entry_size = ch->entry_size; + args->local_nentries = ch->local_nentries; + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREQUEST, irq_flags); +} + +static void +xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_openclose_args *args = ch->sn.sn2.local_openclose_args; + + args->remote_nentries = ch->remote_nentries; + args->local_nentries = ch->local_nentries; + args->local_msgqueue_pa = xp_pa(ch->sn.sn2.local_msgqueue); + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENREPLY, irq_flags); +} + +static void +xpc_send_chctl_opencomplete_sn2(struct xpc_channel *ch, + unsigned long *irq_flags) +{ + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENCOMPLETE, irq_flags); +} + +static void +xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch) +{ + XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST, NULL); +} + +static void +xpc_send_chctl_local_msgrequest_sn2(struct xpc_channel *ch) +{ + XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST); +} + +static enum xp_retval +xpc_save_remote_msgqueue_pa_sn2(struct xpc_channel *ch, + unsigned long msgqueue_pa) +{ + ch->sn.sn2.remote_msgqueue_pa = msgqueue_pa; + return xpSuccess; +} + +/* + * This next set of functions are used to keep track of when a partition is + * potentially engaged in accessing memory belonging to another partition. + */ + +static void +xpc_indicate_partition_engaged_sn2(struct xpc_partition *part) +{ + unsigned long irq_flags; + struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa + + (XPC_ENGAGED_PARTITIONS_AMO_SN2 * + sizeof(struct amo))); + + local_irq_save(irq_flags); + + /* set bit corresponding to our partid in remote partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, + BIT(sn_partition_id)); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> + variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); +} + +static void +xpc_indicate_partition_disengaged_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + unsigned long irq_flags; + struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa + + (XPC_ENGAGED_PARTITIONS_AMO_SN2 * + sizeof(struct amo))); + + local_irq_save(irq_flags); + + /* clear bit corresponding to our partid in remote partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, + ~BIT(sn_partition_id)); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> + variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); + + /* + * Send activate IRQ to get other side to see that we've cleared our + * bit in their engaged partitions amo. + */ + xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa, + cnodeid_to_nasid(0), + part_sn2->activate_IRQ_nasid, + part_sn2->activate_IRQ_phys_cpuid); +} + +static void +xpc_assume_partition_disengaged_sn2(short partid) +{ + struct amo *amo = xpc_vars_sn2->amos_page + + XPC_ENGAGED_PARTITIONS_AMO_SN2; + + /* clear bit(s) based on partid mask in our partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, + ~BIT(partid)); +} + +static int +xpc_partition_engaged_sn2(short partid) +{ + struct amo *amo = xpc_vars_sn2->amos_page + + XPC_ENGAGED_PARTITIONS_AMO_SN2; + + /* our partition's amo variable ANDed with partid mask */ + return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) & + BIT(partid)) != 0; +} + +static int +xpc_any_partition_engaged_sn2(void) +{ + struct amo *amo = xpc_vars_sn2->amos_page + + XPC_ENGAGED_PARTITIONS_AMO_SN2; + + /* our partition's amo variable */ + return FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) != 0; +} + +/* original protection values for each node */ +static u64 xpc_prot_vec_sn2[MAX_NUMNODES]; + +/* + * Change protections to allow amo operations on non-Shub 1.1 systems. + */ +static enum xp_retval +xpc_allow_amo_ops_sn2(struct amo *amos_page) +{ + enum xp_retval ret = xpSuccess; + + /* + * On SHUB 1.1, we cannot call sn_change_memprotect() since the BIST + * collides with memory operations. On those systems we call + * xpc_allow_amo_ops_shub_wars_1_1_sn2() instead. + */ + if (!enable_shub_wars_1_1()) + ret = xp_expand_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE); + + return ret; +} + +/* + * Change protections to allow amo operations on Shub 1.1 systems. + */ +static void +xpc_allow_amo_ops_shub_wars_1_1_sn2(void) +{ + int node; + int nasid; + + if (!enable_shub_wars_1_1()) + return; + + for_each_online_node(node) { + nasid = cnodeid_to_nasid(node); + /* save current protection values */ + xpc_prot_vec_sn2[node] = + (u64)HUB_L((u64 *)GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQLP_MMR_DIR_PRIVEC0)); + /* open up everything */ + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQLP_MMR_DIR_PRIVEC0), + -1UL); + HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, + SH1_MD_DQRP_MMR_DIR_PRIVEC0), + -1UL); + } +} + +static enum xp_retval +xpc_get_partition_rsvd_page_pa_sn2(void *buf, u64 *cookie, unsigned long *rp_pa, + size_t *len) +{ + s64 status; + enum xp_retval ret; + + status = sn_partition_reserved_page_pa((u64)buf, cookie, + (u64 *)rp_pa, (u64 *)len); + if (status == SALRET_OK) + ret = xpSuccess; + else if (status == SALRET_MORE_PASSES) + ret = xpNeedMoreInfo; + else + ret = xpSalError; + + return ret; +} + + +static int +xpc_setup_rsvd_page_sn2(struct xpc_rsvd_page *rp) +{ + struct amo *amos_page; + int i; + int ret; + + xpc_vars_sn2 = XPC_RP_VARS(rp); + + rp->sn.sn2.vars_pa = xp_pa(xpc_vars_sn2); + + /* vars_part array follows immediately after vars */ + xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) + + XPC_RP_VARS_SIZE); + + /* + * Before clearing xpc_vars_sn2, see if a page of amos had been + * previously allocated. If not we'll need to allocate one and set + * permissions so that cross-partition amos are allowed. + * + * The allocated amo page needs MCA reporting to remain disabled after + * XPC has unloaded. To make this work, we keep a copy of the pointer + * to this page (i.e., amos_page) in the struct xpc_vars_sn2 structure, + * which is pointed to by the reserved page, and re-use that saved copy + * on subsequent loads of XPC. This amo page is never freed, and its + * memory protections are never restricted. + */ + amos_page = xpc_vars_sn2->amos_page; + if (amos_page == NULL) { + amos_page = (struct amo *)TO_AMO(uncached_alloc_page(0, 1)); + if (amos_page == NULL) { + dev_err(xpc_part, "can't allocate page of amos\n"); + return -ENOMEM; + } + + /* + * Open up amo-R/W to cpu. This is done on Shub 1.1 systems + * when xpc_allow_amo_ops_shub_wars_1_1_sn2() is called. + */ + ret = xpc_allow_amo_ops_sn2(amos_page); + if (ret != xpSuccess) { + dev_err(xpc_part, "can't allow amo operations\n"); + uncached_free_page(__IA64_UNCACHED_OFFSET | + TO_PHYS((u64)amos_page), 1); + return -EPERM; + } + } + + /* clear xpc_vars_sn2 */ + memset(xpc_vars_sn2, 0, sizeof(struct xpc_vars_sn2)); + + xpc_vars_sn2->version = XPC_V_VERSION; + xpc_vars_sn2->activate_IRQ_nasid = cpuid_to_nasid(0); + xpc_vars_sn2->activate_IRQ_phys_cpuid = cpu_physical_id(0); + xpc_vars_sn2->vars_part_pa = xp_pa(xpc_vars_part_sn2); + xpc_vars_sn2->amos_page_pa = ia64_tpa((u64)amos_page); + xpc_vars_sn2->amos_page = amos_page; /* save for next load of XPC */ + + /* clear xpc_vars_part_sn2 */ + memset((u64 *)xpc_vars_part_sn2, 0, sizeof(struct xpc_vars_part_sn2) * + XP_MAX_NPARTITIONS_SN2); + + /* initialize the activate IRQ related amo variables */ + for (i = 0; i < xpc_nasid_mask_nlongs; i++) + (void)xpc_init_IRQ_amo_sn2(XPC_ACTIVATE_IRQ_AMOS_SN2 + i); + + /* initialize the engaged remote partitions related amo variables */ + (void)xpc_init_IRQ_amo_sn2(XPC_ENGAGED_PARTITIONS_AMO_SN2); + (void)xpc_init_IRQ_amo_sn2(XPC_DEACTIVATE_REQUEST_AMO_SN2); + + return 0; +} + +static int +xpc_hb_allowed_sn2(short partid, void *heartbeating_to_mask) +{ + return test_bit(partid, heartbeating_to_mask); +} + +static void +xpc_allow_hb_sn2(short partid) +{ + DBUG_ON(xpc_vars_sn2 == NULL); + set_bit(partid, xpc_vars_sn2->heartbeating_to_mask); +} + +static void +xpc_disallow_hb_sn2(short partid) +{ + DBUG_ON(xpc_vars_sn2 == NULL); + clear_bit(partid, xpc_vars_sn2->heartbeating_to_mask); +} + +static void +xpc_disallow_all_hbs_sn2(void) +{ + DBUG_ON(xpc_vars_sn2 == NULL); + bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, xp_max_npartitions); +} + +static void +xpc_increment_heartbeat_sn2(void) +{ + xpc_vars_sn2->heartbeat++; +} + +static void +xpc_offline_heartbeat_sn2(void) +{ + xpc_increment_heartbeat_sn2(); + xpc_vars_sn2->heartbeat_offline = 1; +} + +static void +xpc_online_heartbeat_sn2(void) +{ + xpc_increment_heartbeat_sn2(); + xpc_vars_sn2->heartbeat_offline = 0; +} + +static void +xpc_heartbeat_init_sn2(void) +{ + DBUG_ON(xpc_vars_sn2 == NULL); + + bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2); + xpc_online_heartbeat_sn2(); +} + +static void +xpc_heartbeat_exit_sn2(void) +{ + xpc_offline_heartbeat_sn2(); +} + +static enum xp_retval +xpc_get_remote_heartbeat_sn2(struct xpc_partition *part) +{ + struct xpc_vars_sn2 *remote_vars; + enum xp_retval ret; + + remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2; + + /* pull the remote vars structure that contains the heartbeat */ + ret = xp_remote_memcpy(xp_pa(remote_vars), + part->sn.sn2.remote_vars_pa, + XPC_RP_VARS_SIZE); + if (ret != xpSuccess) + return ret; + + dev_dbg(xpc_part, "partid=%d, heartbeat=%lld, last_heartbeat=%lld, " + "heartbeat_offline=%lld, HB_mask[0]=0x%lx\n", XPC_PARTID(part), + remote_vars->heartbeat, part->last_heartbeat, + remote_vars->heartbeat_offline, + remote_vars->heartbeating_to_mask[0]); + + if ((remote_vars->heartbeat == part->last_heartbeat && + !remote_vars->heartbeat_offline) || + !xpc_hb_allowed_sn2(sn_partition_id, + remote_vars->heartbeating_to_mask)) { + ret = xpNoHeartbeat; + } else { + part->last_heartbeat = remote_vars->heartbeat; + } + + return ret; +} + +/* + * Get a copy of the remote partition's XPC variables from the reserved page. + * + * remote_vars points to a buffer that is cacheline aligned for BTE copies and + * assumed to be of size XPC_RP_VARS_SIZE. + */ +static enum xp_retval +xpc_get_remote_vars_sn2(unsigned long remote_vars_pa, + struct xpc_vars_sn2 *remote_vars) +{ + enum xp_retval ret; + + if (remote_vars_pa == 0) + return xpVarsNotSet; + + /* pull over the cross partition variables */ + ret = xp_remote_memcpy(xp_pa(remote_vars), remote_vars_pa, + XPC_RP_VARS_SIZE); + if (ret != xpSuccess) + return ret; + + if (XPC_VERSION_MAJOR(remote_vars->version) != + XPC_VERSION_MAJOR(XPC_V_VERSION)) { + return xpBadVersion; + } + + return xpSuccess; +} + +static void +xpc_request_partition_activation_sn2(struct xpc_rsvd_page *remote_rp, + unsigned long remote_rp_pa, int nasid) +{ + xpc_send_local_activate_IRQ_sn2(nasid); +} + +static void +xpc_request_partition_reactivation_sn2(struct xpc_partition *part) +{ + xpc_send_local_activate_IRQ_sn2(part->sn.sn2.activate_IRQ_nasid); +} + +static void +xpc_request_partition_deactivation_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + unsigned long irq_flags; + struct amo *amo = (struct amo *)__va(part_sn2->remote_amos_page_pa + + (XPC_DEACTIVATE_REQUEST_AMO_SN2 * + sizeof(struct amo))); + + local_irq_save(irq_flags); + + /* set bit corresponding to our partid in remote partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_OR, + BIT(sn_partition_id)); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> + variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); + + /* + * Send activate IRQ to get other side to see that we've set our + * bit in their deactivate request amo. + */ + xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa, + cnodeid_to_nasid(0), + part_sn2->activate_IRQ_nasid, + part_sn2->activate_IRQ_phys_cpuid); +} + +static void +xpc_cancel_partition_deactivation_request_sn2(struct xpc_partition *part) +{ + unsigned long irq_flags; + struct amo *amo = (struct amo *)__va(part->sn.sn2.remote_amos_page_pa + + (XPC_DEACTIVATE_REQUEST_AMO_SN2 * + sizeof(struct amo))); + + local_irq_save(irq_flags); + + /* clear bit corresponding to our partid in remote partition's amo */ + FETCHOP_STORE_OP(TO_AMO((u64)&amo->variable), FETCHOP_AND, + ~BIT(sn_partition_id)); + + /* + * We must always use the nofault function regardless of whether we + * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we + * didn't, we'd never know that the other partition is down and would + * keep sending IRQs and amos to it until the heartbeat times out. + */ + (void)xp_nofault_PIOR((u64 *)GLOBAL_MMR_ADDR(NASID_GET(&amo-> + variable), + xp_nofault_PIOR_target)); + + local_irq_restore(irq_flags); +} + +static int +xpc_partition_deactivation_requested_sn2(short partid) +{ + struct amo *amo = xpc_vars_sn2->amos_page + + XPC_DEACTIVATE_REQUEST_AMO_SN2; + + /* our partition's amo variable ANDed with partid mask */ + return (FETCHOP_LOAD_OP(TO_AMO((u64)&amo->variable), FETCHOP_LOAD) & + BIT(partid)) != 0; +} + +/* + * Update the remote partition's info. + */ +static void +xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version, + unsigned long *remote_rp_ts_jiffies, + unsigned long remote_rp_pa, + unsigned long remote_vars_pa, + struct xpc_vars_sn2 *remote_vars) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + + part->remote_rp_version = remote_rp_version; + dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n", + part->remote_rp_version); + + part->remote_rp_ts_jiffies = *remote_rp_ts_jiffies; + dev_dbg(xpc_part, " remote_rp_ts_jiffies = 0x%016lx\n", + part->remote_rp_ts_jiffies); + + part->remote_rp_pa = remote_rp_pa; + dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa); + + part_sn2->remote_vars_pa = remote_vars_pa; + dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", + part_sn2->remote_vars_pa); + + part->last_heartbeat = remote_vars->heartbeat - 1; + dev_dbg(xpc_part, " last_heartbeat = 0x%016llx\n", + part->last_heartbeat); + + part_sn2->remote_vars_part_pa = remote_vars->vars_part_pa; + dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", + part_sn2->remote_vars_part_pa); + + part_sn2->activate_IRQ_nasid = remote_vars->activate_IRQ_nasid; + dev_dbg(xpc_part, " activate_IRQ_nasid = 0x%x\n", + part_sn2->activate_IRQ_nasid); + + part_sn2->activate_IRQ_phys_cpuid = + remote_vars->activate_IRQ_phys_cpuid; + dev_dbg(xpc_part, " activate_IRQ_phys_cpuid = 0x%x\n", + part_sn2->activate_IRQ_phys_cpuid); + + part_sn2->remote_amos_page_pa = remote_vars->amos_page_pa; + dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", + part_sn2->remote_amos_page_pa); + + part_sn2->remote_vars_version = remote_vars->version; + dev_dbg(xpc_part, " remote_vars_version = 0x%x\n", + part_sn2->remote_vars_version); +} + +/* + * Prior code has determined the nasid which generated a activate IRQ. + * Inspect that nasid to determine if its partition needs to be activated + * or deactivated. + * + * A partition is considered "awaiting activation" if our partition + * flags indicate it is not active and it has a heartbeat. A + * partition is considered "awaiting deactivation" if our partition + * flags indicate it is active but it has no heartbeat or it is not + * sending its heartbeat to us. + * + * To determine the heartbeat, the remote nasid must have a properly + * initialized reserved page. + */ +static void +xpc_identify_activate_IRQ_req_sn2(int nasid) +{ + struct xpc_rsvd_page *remote_rp; + struct xpc_vars_sn2 *remote_vars; + unsigned long remote_rp_pa; + unsigned long remote_vars_pa; + int remote_rp_version; + int reactivate = 0; + unsigned long remote_rp_ts_jiffies = 0; + short partid; + struct xpc_partition *part; + struct xpc_partition_sn2 *part_sn2; + enum xp_retval ret; + + /* pull over the reserved page structure */ + + remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer_sn2; + + ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa); + if (ret != xpSuccess) { + dev_warn(xpc_part, "unable to get reserved page from nasid %d, " + "which sent interrupt, reason=%d\n", nasid, ret); + return; + } + + remote_vars_pa = remote_rp->sn.sn2.vars_pa; + remote_rp_version = remote_rp->version; + remote_rp_ts_jiffies = remote_rp->ts_jiffies; + + partid = remote_rp->SAL_partid; + part = &xpc_partitions[partid]; + part_sn2 = &part->sn.sn2; + + /* pull over the cross partition variables */ + + remote_vars = (struct xpc_vars_sn2 *)xpc_remote_copy_buffer_sn2; + + ret = xpc_get_remote_vars_sn2(remote_vars_pa, remote_vars); + if (ret != xpSuccess) { + dev_warn(xpc_part, "unable to get XPC variables from nasid %d, " + "which sent interrupt, reason=%d\n", nasid, ret); + + XPC_DEACTIVATE_PARTITION(part, ret); + return; + } + + part->activate_IRQ_rcvd++; + + dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = " + "%lld:0x%lx\n", (int)nasid, (int)partid, + part->activate_IRQ_rcvd, + remote_vars->heartbeat, remote_vars->heartbeating_to_mask[0]); + + if (xpc_partition_disengaged(part) && + part->act_state == XPC_P_AS_INACTIVE) { + + xpc_update_partition_info_sn2(part, remote_rp_version, + &remote_rp_ts_jiffies, + remote_rp_pa, remote_vars_pa, + remote_vars); + + if (xpc_partition_deactivation_requested_sn2(partid)) { + /* + * Other side is waiting on us to deactivate even though + * we already have. + */ + return; + } + + xpc_activate_partition(part); + return; + } + + DBUG_ON(part->remote_rp_version == 0); + DBUG_ON(part_sn2->remote_vars_version == 0); + + if (remote_rp_ts_jiffies != part->remote_rp_ts_jiffies) { + + /* the other side rebooted */ + + DBUG_ON(xpc_partition_engaged_sn2(partid)); + DBUG_ON(xpc_partition_deactivation_requested_sn2(partid)); + + xpc_update_partition_info_sn2(part, remote_rp_version, + &remote_rp_ts_jiffies, + remote_rp_pa, remote_vars_pa, + remote_vars); + reactivate = 1; + } + + if (part->disengage_timeout > 0 && !xpc_partition_disengaged(part)) { + /* still waiting on other side to disengage from us */ + return; + } + + if (reactivate) + XPC_DEACTIVATE_PARTITION(part, xpReactivating); + else if (xpc_partition_deactivation_requested_sn2(partid)) + XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown); +} + +/* + * Loop through the activation amo variables and process any bits + * which are set. Each bit indicates a nasid sending a partition + * activation or deactivation request. + * + * Return #of IRQs detected. + */ +int +xpc_identify_activate_IRQ_sender_sn2(void) +{ + int l; + int b; + unsigned long nasid_mask_long; + u64 nasid; /* remote nasid */ + int n_IRQs_detected = 0; + struct amo *act_amos; + + act_amos = xpc_vars_sn2->amos_page + XPC_ACTIVATE_IRQ_AMOS_SN2; + + /* scan through activate amo variables looking for non-zero entries */ + for (l = 0; l < xpc_nasid_mask_nlongs; l++) { + + if (xpc_exiting) + break; + + nasid_mask_long = xpc_receive_IRQ_amo_sn2(&act_amos[l]); + + b = find_first_bit(&nasid_mask_long, BITS_PER_LONG); + if (b >= BITS_PER_LONG) { + /* no IRQs from nasids in this amo variable */ + continue; + } + + dev_dbg(xpc_part, "amo[%d] gave back 0x%lx\n", l, + nasid_mask_long); + + /* + * If this nasid has been added to the machine since + * our partition was reset, this will retain the + * remote nasid in our reserved pages machine mask. + * This is used in the event of module reload. + */ + xpc_mach_nasids[l] |= nasid_mask_long; + + /* locate the nasid(s) which sent interrupts */ + + do { + n_IRQs_detected++; + nasid = (l * BITS_PER_LONG + b) * 2; + dev_dbg(xpc_part, "interrupt from nasid %lld\n", nasid); + xpc_identify_activate_IRQ_req_sn2(nasid); + + b = find_next_bit(&nasid_mask_long, BITS_PER_LONG, + b + 1); + } while (b < BITS_PER_LONG); + } + return n_IRQs_detected; +} + +static void +xpc_process_activate_IRQ_rcvd_sn2(void) +{ + unsigned long irq_flags; + int n_IRQs_expected; + int n_IRQs_detected; + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + n_IRQs_expected = xpc_activate_IRQ_rcvd; + xpc_activate_IRQ_rcvd = 0; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + n_IRQs_detected = xpc_identify_activate_IRQ_sender_sn2(); + if (n_IRQs_detected < n_IRQs_expected) { + /* retry once to help avoid missing amo */ + (void)xpc_identify_activate_IRQ_sender_sn2(); + } +} + +/* + * Setup the channel structures that are sn2 specific. + */ +static enum xp_retval +xpc_setup_ch_structures_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + struct xpc_channel_sn2 *ch_sn2; + enum xp_retval retval; + int ret; + int cpuid; + int ch_number; + struct timer_list *timer; + short partid = XPC_PARTID(part); + + /* allocate all the required GET/PUT values */ + + part_sn2->local_GPs = + xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL, + &part_sn2->local_GPs_base); + if (part_sn2->local_GPs == NULL) { + dev_err(xpc_chan, "can't get memory for local get/put " + "values\n"); + return xpNoMemory; + } + + part_sn2->remote_GPs = + xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE, GFP_KERNEL, + &part_sn2->remote_GPs_base); + if (part_sn2->remote_GPs == NULL) { + dev_err(xpc_chan, "can't get memory for remote get/put " + "values\n"); + retval = xpNoMemory; + goto out_1; + } + + part_sn2->remote_GPs_pa = 0; + + /* allocate all the required open and close args */ + + part_sn2->local_openclose_args = + xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE, + GFP_KERNEL, &part_sn2-> + local_openclose_args_base); + if (part_sn2->local_openclose_args == NULL) { + dev_err(xpc_chan, "can't get memory for local connect args\n"); + retval = xpNoMemory; + goto out_2; + } + + part_sn2->remote_openclose_args_pa = 0; + + part_sn2->local_chctl_amo_va = xpc_init_IRQ_amo_sn2(partid); + + part_sn2->notify_IRQ_nasid = 0; + part_sn2->notify_IRQ_phys_cpuid = 0; + part_sn2->remote_chctl_amo_va = NULL; + + sprintf(part_sn2->notify_IRQ_owner, "xpc%02d", partid); + ret = request_irq(SGI_XPC_NOTIFY, xpc_handle_notify_IRQ_sn2, + IRQF_SHARED, part_sn2->notify_IRQ_owner, + (void *)(u64)partid); + if (ret != 0) { + dev_err(xpc_chan, "can't register NOTIFY IRQ handler, " + "errno=%d\n", -ret); + retval = xpLackOfResources; + goto out_3; + } + + /* Setup a timer to check for dropped notify IRQs */ + timer = &part_sn2->dropped_notify_IRQ_timer; + timer_setup(timer, xpc_check_for_dropped_notify_IRQ_sn2, 0); + timer->expires = jiffies + XPC_DROPPED_NOTIFY_IRQ_WAIT_INTERVAL; + add_timer(timer); + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch_sn2 = &part->channels[ch_number].sn.sn2; + + ch_sn2->local_GP = &part_sn2->local_GPs[ch_number]; + ch_sn2->local_openclose_args = + &part_sn2->local_openclose_args[ch_number]; + + mutex_init(&ch_sn2->msg_to_pull_mutex); + } + + /* + * Setup the per partition specific variables required by the + * remote partition to establish channel connections with us. + * + * The setting of the magic # indicates that these per partition + * specific variables are ready to be used. + */ + xpc_vars_part_sn2[partid].GPs_pa = xp_pa(part_sn2->local_GPs); + xpc_vars_part_sn2[partid].openclose_args_pa = + xp_pa(part_sn2->local_openclose_args); + xpc_vars_part_sn2[partid].chctl_amo_pa = + xp_pa(part_sn2->local_chctl_amo_va); + cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */ + xpc_vars_part_sn2[partid].notify_IRQ_nasid = cpuid_to_nasid(cpuid); + xpc_vars_part_sn2[partid].notify_IRQ_phys_cpuid = + cpu_physical_id(cpuid); + xpc_vars_part_sn2[partid].nchannels = part->nchannels; + xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC1_SN2; + + return xpSuccess; + + /* setup of ch structures failed */ +out_3: + kfree(part_sn2->local_openclose_args_base); + part_sn2->local_openclose_args = NULL; +out_2: + kfree(part_sn2->remote_GPs_base); + part_sn2->remote_GPs = NULL; +out_1: + kfree(part_sn2->local_GPs_base); + part_sn2->local_GPs = NULL; + return retval; +} + +/* + * Teardown the channel structures that are sn2 specific. + */ +static void +xpc_teardown_ch_structures_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + short partid = XPC_PARTID(part); + + /* + * Indicate that the variables specific to the remote partition are no + * longer available for its use. + */ + xpc_vars_part_sn2[partid].magic = 0; + + /* in case we've still got outstanding timers registered... */ + del_timer_sync(&part_sn2->dropped_notify_IRQ_timer); + free_irq(SGI_XPC_NOTIFY, (void *)(u64)partid); + + kfree(part_sn2->local_openclose_args_base); + part_sn2->local_openclose_args = NULL; + kfree(part_sn2->remote_GPs_base); + part_sn2->remote_GPs = NULL; + kfree(part_sn2->local_GPs_base); + part_sn2->local_GPs = NULL; + part_sn2->local_chctl_amo_va = NULL; +} + +/* + * Create a wrapper that hides the underlying mechanism for pulling a cacheline + * (or multiple cachelines) from a remote partition. + * + * src_pa must be a cacheline aligned physical address on the remote partition. + * dst must be a cacheline aligned virtual address on this partition. + * cnt must be cacheline sized + */ +/* ??? Replace this function by call to xp_remote_memcpy() or bte_copy()? */ +static enum xp_retval +xpc_pull_remote_cachelines_sn2(struct xpc_partition *part, void *dst, + const unsigned long src_pa, size_t cnt) +{ + enum xp_retval ret; + + DBUG_ON(src_pa != L1_CACHE_ALIGN(src_pa)); + DBUG_ON((unsigned long)dst != L1_CACHE_ALIGN((unsigned long)dst)); + DBUG_ON(cnt != L1_CACHE_ALIGN(cnt)); + + if (part->act_state == XPC_P_AS_DEACTIVATING) + return part->reason; + + ret = xp_remote_memcpy(xp_pa(dst), src_pa, cnt); + if (ret != xpSuccess) { + dev_dbg(xpc_chan, "xp_remote_memcpy() from partition %d failed," + " ret=%d\n", XPC_PARTID(part), ret); + } + return ret; +} + +/* + * Pull the remote per partition specific variables from the specified + * partition. + */ +static enum xp_retval +xpc_pull_remote_vars_part_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + u8 buffer[L1_CACHE_BYTES * 2]; + struct xpc_vars_part_sn2 *pulled_entry_cacheline = + (struct xpc_vars_part_sn2 *)L1_CACHE_ALIGN((u64)buffer); + struct xpc_vars_part_sn2 *pulled_entry; + unsigned long remote_entry_cacheline_pa; + unsigned long remote_entry_pa; + short partid = XPC_PARTID(part); + enum xp_retval ret; + + /* pull the cacheline that contains the variables we're interested in */ + + DBUG_ON(part_sn2->remote_vars_part_pa != + L1_CACHE_ALIGN(part_sn2->remote_vars_part_pa)); + DBUG_ON(sizeof(struct xpc_vars_part_sn2) != L1_CACHE_BYTES / 2); + + remote_entry_pa = part_sn2->remote_vars_part_pa + + sn_partition_id * sizeof(struct xpc_vars_part_sn2); + + remote_entry_cacheline_pa = (remote_entry_pa & ~(L1_CACHE_BYTES - 1)); + + pulled_entry = (struct xpc_vars_part_sn2 *)((u64)pulled_entry_cacheline + + (remote_entry_pa & + (L1_CACHE_BYTES - 1))); + + ret = xpc_pull_remote_cachelines_sn2(part, pulled_entry_cacheline, + remote_entry_cacheline_pa, + L1_CACHE_BYTES); + if (ret != xpSuccess) { + dev_dbg(xpc_chan, "failed to pull XPC vars_part from " + "partition %d, ret=%d\n", partid, ret); + return ret; + } + + /* see if they've been set up yet */ + + if (pulled_entry->magic != XPC_VP_MAGIC1_SN2 && + pulled_entry->magic != XPC_VP_MAGIC2_SN2) { + + if (pulled_entry->magic != 0) { + dev_dbg(xpc_chan, "partition %d's XPC vars_part for " + "partition %d has bad magic value (=0x%llx)\n", + partid, sn_partition_id, pulled_entry->magic); + return xpBadMagic; + } + + /* they've not been initialized yet */ + return xpRetry; + } + + if (xpc_vars_part_sn2[partid].magic == XPC_VP_MAGIC1_SN2) { + + /* validate the variables */ + + if (pulled_entry->GPs_pa == 0 || + pulled_entry->openclose_args_pa == 0 || + pulled_entry->chctl_amo_pa == 0) { + + dev_err(xpc_chan, "partition %d's XPC vars_part for " + "partition %d are not valid\n", partid, + sn_partition_id); + return xpInvalidAddress; + } + + /* the variables we imported look to be valid */ + + part_sn2->remote_GPs_pa = pulled_entry->GPs_pa; + part_sn2->remote_openclose_args_pa = + pulled_entry->openclose_args_pa; + part_sn2->remote_chctl_amo_va = + (struct amo *)__va(pulled_entry->chctl_amo_pa); + part_sn2->notify_IRQ_nasid = pulled_entry->notify_IRQ_nasid; + part_sn2->notify_IRQ_phys_cpuid = + pulled_entry->notify_IRQ_phys_cpuid; + + if (part->nchannels > pulled_entry->nchannels) + part->nchannels = pulled_entry->nchannels; + + /* let the other side know that we've pulled their variables */ + + xpc_vars_part_sn2[partid].magic = XPC_VP_MAGIC2_SN2; + } + + if (pulled_entry->magic == XPC_VP_MAGIC1_SN2) + return xpRetry; + + return xpSuccess; +} + +/* + * Establish first contact with the remote partititon. This involves pulling + * the XPC per partition variables from the remote partition and waiting for + * the remote partition to pull ours. + */ +static enum xp_retval +xpc_make_first_contact_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + enum xp_retval ret; + + /* + * Register the remote partition's amos with SAL so it can handle + * and cleanup errors within that address range should the remote + * partition go down. We don't unregister this range because it is + * difficult to tell when outstanding writes to the remote partition + * are finished and thus when it is safe to unregister. This should + * not result in wasted space in the SAL xp_addr_region table because + * we should get the same page for remote_amos_page_pa after module + * reloads and system reboots. + */ + if (sn_register_xp_addr_region(part_sn2->remote_amos_page_pa, + PAGE_SIZE, 1) < 0) { + dev_warn(xpc_part, "xpc_activating(%d) failed to register " + "xp_addr region\n", XPC_PARTID(part)); + + ret = xpPhysAddrRegFailed; + XPC_DEACTIVATE_PARTITION(part, ret); + return ret; + } + + /* + * Send activate IRQ to get other side to activate if they've not + * already begun to do so. + */ + xpc_send_activate_IRQ_sn2(part_sn2->remote_amos_page_pa, + cnodeid_to_nasid(0), + part_sn2->activate_IRQ_nasid, + part_sn2->activate_IRQ_phys_cpuid); + + while ((ret = xpc_pull_remote_vars_part_sn2(part)) != xpSuccess) { + if (ret != xpRetry) { + XPC_DEACTIVATE_PARTITION(part, ret); + return ret; + } + + dev_dbg(xpc_part, "waiting to make first contact with " + "partition %d\n", XPC_PARTID(part)); + + /* wait a 1/4 of a second or so */ + (void)msleep_interruptible(250); + + if (part->act_state == XPC_P_AS_DEACTIVATING) + return part->reason; + } + + return xpSuccess; +} + +/* + * Get the chctl flags and pull the openclose args and/or remote GPs as needed. + */ +static u64 +xpc_get_chctl_all_flags_sn2(struct xpc_partition *part) +{ + struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2; + unsigned long irq_flags; + union xpc_channel_ctl_flags chctl; + enum xp_retval ret; + + /* + * See if there are any chctl flags to be handled. + */ + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + chctl = part->chctl; + if (chctl.all_flags != 0) + part->chctl.all_flags = 0; + + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + if (xpc_any_openclose_chctl_flags_set(&chctl)) { + ret = xpc_pull_remote_cachelines_sn2(part, part-> + remote_openclose_args, + part_sn2-> + remote_openclose_args_pa, + XPC_OPENCLOSE_ARGS_SIZE); + if (ret != xpSuccess) { + XPC_DEACTIVATE_PARTITION(part, ret); + + dev_dbg(xpc_chan, "failed to pull openclose args from " + "partition %d, ret=%d\n", XPC_PARTID(part), + ret); + + /* don't bother processing chctl flags anymore */ + chctl.all_flags = 0; + } + } + + if (xpc_any_msg_chctl_flags_set(&chctl)) { + ret = xpc_pull_remote_cachelines_sn2(part, part_sn2->remote_GPs, + part_sn2->remote_GPs_pa, + XPC_GP_SIZE); + if (ret != xpSuccess) { + XPC_DEACTIVATE_PARTITION(part, ret); + + dev_dbg(xpc_chan, "failed to pull GPs from partition " + "%d, ret=%d\n", XPC_PARTID(part), ret); + + /* don't bother processing chctl flags anymore */ + chctl.all_flags = 0; + } + } + + return chctl.all_flags; +} + +/* + * Allocate the local message queue and the notify queue. + */ +static enum xp_retval +xpc_allocate_local_msgqueue_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + unsigned long irq_flags; + int nentries; + size_t nbytes; + + for (nentries = ch->local_nentries; nentries > 0; nentries--) { + + nbytes = nentries * ch->entry_size; + ch_sn2->local_msgqueue = + xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, + &ch_sn2->local_msgqueue_base); + if (ch_sn2->local_msgqueue == NULL) + continue; + + nbytes = nentries * sizeof(struct xpc_notify_sn2); + ch_sn2->notify_queue = kzalloc(nbytes, GFP_KERNEL); + if (ch_sn2->notify_queue == NULL) { + kfree(ch_sn2->local_msgqueue_base); + ch_sn2->local_msgqueue = NULL; + continue; + } + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->local_nentries) { + dev_dbg(xpc_chan, "nentries=%d local_nentries=%d, " + "partid=%d, channel=%d\n", nentries, + ch->local_nentries, ch->partid, ch->number); + + ch->local_nentries = nentries; + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpSuccess; + } + + dev_dbg(xpc_chan, "can't get memory for local message queue and notify " + "queue, partid=%d, channel=%d\n", ch->partid, ch->number); + return xpNoMemory; +} + +/* + * Allocate the cached remote message queue. + */ +static enum xp_retval +xpc_allocate_remote_msgqueue_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + unsigned long irq_flags; + int nentries; + size_t nbytes; + + DBUG_ON(ch->remote_nentries <= 0); + + for (nentries = ch->remote_nentries; nentries > 0; nentries--) { + + nbytes = nentries * ch->entry_size; + ch_sn2->remote_msgqueue = + xpc_kzalloc_cacheline_aligned(nbytes, GFP_KERNEL, &ch_sn2-> + remote_msgqueue_base); + if (ch_sn2->remote_msgqueue == NULL) + continue; + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->remote_nentries) { + dev_dbg(xpc_chan, "nentries=%d remote_nentries=%d, " + "partid=%d, channel=%d\n", nentries, + ch->remote_nentries, ch->partid, ch->number); + + ch->remote_nentries = nentries; + } + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpSuccess; + } + + dev_dbg(xpc_chan, "can't get memory for cached remote message queue, " + "partid=%d, channel=%d\n", ch->partid, ch->number); + return xpNoMemory; +} + +/* + * Allocate message queues and other stuff associated with a channel. + * + * Note: Assumes all of the channel sizes are filled in. + */ +static enum xp_retval +xpc_setup_msg_structures_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + enum xp_retval ret; + + DBUG_ON(ch->flags & XPC_C_SETUP); + + ret = xpc_allocate_local_msgqueue_sn2(ch); + if (ret == xpSuccess) { + + ret = xpc_allocate_remote_msgqueue_sn2(ch); + if (ret != xpSuccess) { + kfree(ch_sn2->local_msgqueue_base); + ch_sn2->local_msgqueue = NULL; + kfree(ch_sn2->notify_queue); + ch_sn2->notify_queue = NULL; + } + } + return ret; +} + +/* + * Free up message queues and other stuff that were allocated for the specified + * channel. + */ +static void +xpc_teardown_msg_structures_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + + DBUG_ON(!spin_is_locked(&ch->lock)); + + ch_sn2->remote_msgqueue_pa = 0; + + ch_sn2->local_GP->get = 0; + ch_sn2->local_GP->put = 0; + ch_sn2->remote_GP.get = 0; + ch_sn2->remote_GP.put = 0; + ch_sn2->w_local_GP.get = 0; + ch_sn2->w_local_GP.put = 0; + ch_sn2->w_remote_GP.get = 0; + ch_sn2->w_remote_GP.put = 0; + ch_sn2->next_msg_to_pull = 0; + + if (ch->flags & XPC_C_SETUP) { + dev_dbg(xpc_chan, "ch->flags=0x%x, partid=%d, channel=%d\n", + ch->flags, ch->partid, ch->number); + + kfree(ch_sn2->local_msgqueue_base); + ch_sn2->local_msgqueue = NULL; + kfree(ch_sn2->remote_msgqueue_base); + ch_sn2->remote_msgqueue = NULL; + kfree(ch_sn2->notify_queue); + ch_sn2->notify_queue = NULL; + } +} + +/* + * Notify those who wanted to be notified upon delivery of their message. + */ +static void +xpc_notify_senders_sn2(struct xpc_channel *ch, enum xp_retval reason, s64 put) +{ + struct xpc_notify_sn2 *notify; + u8 notify_type; + s64 get = ch->sn.sn2.w_remote_GP.get - 1; + + while (++get < put && atomic_read(&ch->n_to_notify) > 0) { + + notify = &ch->sn.sn2.notify_queue[get % ch->local_nentries]; + + /* + * See if the notify entry indicates it was associated with + * a message who's sender wants to be notified. It is possible + * that it is, but someone else is doing or has done the + * notification. + */ + notify_type = notify->type; + if (notify_type == 0 || + cmpxchg(¬ify->type, notify_type, 0) != notify_type) { + continue; + } + + DBUG_ON(notify_type != XPC_N_CALL); + + atomic_dec(&ch->n_to_notify); + + if (notify->func != NULL) { + dev_dbg(xpc_chan, "notify->func() called, notify=0x%p " + "msg_number=%lld partid=%d channel=%d\n", + (void *)notify, get, ch->partid, ch->number); + + notify->func(reason, ch->partid, ch->number, + notify->key); + + dev_dbg(xpc_chan, "notify->func() returned, notify=0x%p" + " msg_number=%lld partid=%d channel=%d\n", + (void *)notify, get, ch->partid, ch->number); + } + } +} + +static void +xpc_notify_senders_of_disconnect_sn2(struct xpc_channel *ch) +{ + xpc_notify_senders_sn2(ch, ch->reason, ch->sn.sn2.w_local_GP.put); +} + +/* + * Clear some of the msg flags in the local message queue. + */ +static inline void +xpc_clear_local_msgqueue_flags_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + s64 get; + + get = ch_sn2->w_remote_GP.get; + do { + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue + + (get % ch->local_nentries) * + ch->entry_size); + DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); + msg->flags = 0; + } while (++get < ch_sn2->remote_GP.get); +} + +/* + * Clear some of the msg flags in the remote message queue. + */ +static inline void +xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + s64 put, remote_nentries = ch->remote_nentries; + + /* flags are zeroed when the buffer is allocated */ + if (ch_sn2->remote_GP.put < remote_nentries) + return; + + put = max(ch_sn2->w_remote_GP.put, remote_nentries); + do { + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + + (put % remote_nentries) * + ch->entry_size); + DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); + DBUG_ON(!(msg->flags & XPC_M_SN2_DONE)); + DBUG_ON(msg->number != put - remote_nentries); + msg->flags = 0; + } while (++put < ch_sn2->remote_GP.put); +} + +static int +xpc_n_of_deliverable_payloads_sn2(struct xpc_channel *ch) +{ + return ch->sn.sn2.w_remote_GP.put - ch->sn.sn2.w_local_GP.get; +} + +static void +xpc_process_msg_chctl_flags_sn2(struct xpc_partition *part, int ch_number) +{ + struct xpc_channel *ch = &part->channels[ch_number]; + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + int npayloads_sent; + + ch_sn2->remote_GP = part->sn.sn2.remote_GPs[ch_number]; + + /* See what, if anything, has changed for each connected channel */ + + xpc_msgqueue_ref(ch); + + if (ch_sn2->w_remote_GP.get == ch_sn2->remote_GP.get && + ch_sn2->w_remote_GP.put == ch_sn2->remote_GP.put) { + /* nothing changed since GPs were last pulled */ + xpc_msgqueue_deref(ch); + return; + } + + if (!(ch->flags & XPC_C_CONNECTED)) { + xpc_msgqueue_deref(ch); + return; + } + + /* + * First check to see if messages recently sent by us have been + * received by the other side. (The remote GET value will have + * changed since we last looked at it.) + */ + + if (ch_sn2->w_remote_GP.get != ch_sn2->remote_GP.get) { + + /* + * We need to notify any senders that want to be notified + * that their sent messages have been received by their + * intended recipients. We need to do this before updating + * w_remote_GP.get so that we don't allocate the same message + * queue entries prematurely (see xpc_allocate_msg()). + */ + if (atomic_read(&ch->n_to_notify) > 0) { + /* + * Notify senders that messages sent have been + * received and delivered by the other side. + */ + xpc_notify_senders_sn2(ch, xpMsgDelivered, + ch_sn2->remote_GP.get); + } + + /* + * Clear msg->flags in previously sent messages, so that + * they're ready for xpc_allocate_msg(). + */ + xpc_clear_local_msgqueue_flags_sn2(ch); + + ch_sn2->w_remote_GP.get = ch_sn2->remote_GP.get; + + dev_dbg(xpc_chan, "w_remote_GP.get changed to %lld, partid=%d, " + "channel=%d\n", ch_sn2->w_remote_GP.get, ch->partid, + ch->number); + + /* + * If anyone was waiting for message queue entries to become + * available, wake them up. + */ + if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) + wake_up(&ch->msg_allocate_wq); + } + + /* + * Now check for newly sent messages by the other side. (The remote + * PUT value will have changed since we last looked at it.) + */ + + if (ch_sn2->w_remote_GP.put != ch_sn2->remote_GP.put) { + /* + * Clear msg->flags in previously received messages, so that + * they're ready for xpc_get_deliverable_payload_sn2(). + */ + xpc_clear_remote_msgqueue_flags_sn2(ch); + + smp_wmb(); /* ensure flags have been cleared before bte_copy */ + ch_sn2->w_remote_GP.put = ch_sn2->remote_GP.put; + + dev_dbg(xpc_chan, "w_remote_GP.put changed to %lld, partid=%d, " + "channel=%d\n", ch_sn2->w_remote_GP.put, ch->partid, + ch->number); + + npayloads_sent = xpc_n_of_deliverable_payloads_sn2(ch); + if (npayloads_sent > 0) { + dev_dbg(xpc_chan, "msgs waiting to be copied and " + "delivered=%d, partid=%d, channel=%d\n", + npayloads_sent, ch->partid, ch->number); + + if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) + xpc_activate_kthreads(ch, npayloads_sent); + } + } + + xpc_msgqueue_deref(ch); +} + +static struct xpc_msg_sn2 * +xpc_pull_remote_msg_sn2(struct xpc_channel *ch, s64 get) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + unsigned long remote_msg_pa; + struct xpc_msg_sn2 *msg; + u32 msg_index; + u32 nmsgs; + u64 msg_offset; + enum xp_retval ret; + + if (mutex_lock_interruptible(&ch_sn2->msg_to_pull_mutex) != 0) { + /* we were interrupted by a signal */ + return NULL; + } + + while (get >= ch_sn2->next_msg_to_pull) { + + /* pull as many messages as are ready and able to be pulled */ + + msg_index = ch_sn2->next_msg_to_pull % ch->remote_nentries; + + DBUG_ON(ch_sn2->next_msg_to_pull >= ch_sn2->w_remote_GP.put); + nmsgs = ch_sn2->w_remote_GP.put - ch_sn2->next_msg_to_pull; + if (msg_index + nmsgs > ch->remote_nentries) { + /* ignore the ones that wrap the msg queue for now */ + nmsgs = ch->remote_nentries - msg_index; + } + + msg_offset = msg_index * ch->entry_size; + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + + msg_offset); + remote_msg_pa = ch_sn2->remote_msgqueue_pa + msg_offset; + + ret = xpc_pull_remote_cachelines_sn2(part, msg, remote_msg_pa, + nmsgs * ch->entry_size); + if (ret != xpSuccess) { + + dev_dbg(xpc_chan, "failed to pull %d msgs starting with" + " msg %lld from partition %d, channel=%d, " + "ret=%d\n", nmsgs, ch_sn2->next_msg_to_pull, + ch->partid, ch->number, ret); + + XPC_DEACTIVATE_PARTITION(part, ret); + + mutex_unlock(&ch_sn2->msg_to_pull_mutex); + return NULL; + } + + ch_sn2->next_msg_to_pull += nmsgs; + } + + mutex_unlock(&ch_sn2->msg_to_pull_mutex); + + /* return the message we were looking for */ + msg_offset = (get % ch->remote_nentries) * ch->entry_size; + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + msg_offset); + + return msg; +} + +/* + * Get the next deliverable message's payload. + */ +static void * +xpc_get_deliverable_payload_sn2(struct xpc_channel *ch) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + void *payload = NULL; + s64 get; + + do { + if (ch->flags & XPC_C_DISCONNECTING) + break; + + get = ch_sn2->w_local_GP.get; + smp_rmb(); /* guarantee that .get loads before .put */ + if (get == ch_sn2->w_remote_GP.put) + break; + + /* There are messages waiting to be pulled and delivered. + * We need to try to secure one for ourselves. We'll do this + * by trying to increment w_local_GP.get and hope that no one + * else beats us to it. If they do, we'll we'll simply have + * to try again for the next one. + */ + + if (cmpxchg(&ch_sn2->w_local_GP.get, get, get + 1) == get) { + /* we got the entry referenced by get */ + + dev_dbg(xpc_chan, "w_local_GP.get changed to %lld, " + "partid=%d, channel=%d\n", get + 1, + ch->partid, ch->number); + + /* pull the message from the remote partition */ + + msg = xpc_pull_remote_msg_sn2(ch, get); + + if (msg != NULL) { + DBUG_ON(msg->number != get); + DBUG_ON(msg->flags & XPC_M_SN2_DONE); + DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); + + payload = &msg->payload; + } + break; + } + + } while (1); + + return payload; +} + +/* + * Now we actually send the messages that are ready to be sent by advancing + * the local message queue's Put value and then send a chctl msgrequest to the + * recipient partition. + */ +static void +xpc_send_msgs_sn2(struct xpc_channel *ch, s64 initial_put) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + s64 put = initial_put + 1; + int send_msgrequest = 0; + + while (1) { + + while (1) { + if (put == ch_sn2->w_local_GP.put) + break; + + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2-> + local_msgqueue + (put % + ch->local_nentries) * + ch->entry_size); + + if (!(msg->flags & XPC_M_SN2_READY)) + break; + + put++; + } + + if (put == initial_put) { + /* nothing's changed */ + break; + } + + if (cmpxchg_rel(&ch_sn2->local_GP->put, initial_put, put) != + initial_put) { + /* someone else beat us to it */ + DBUG_ON(ch_sn2->local_GP->put < initial_put); + break; + } + + /* we just set the new value of local_GP->put */ + + dev_dbg(xpc_chan, "local_GP->put changed to %lld, partid=%d, " + "channel=%d\n", put, ch->partid, ch->number); + + send_msgrequest = 1; + + /* + * We need to ensure that the message referenced by + * local_GP->put is not XPC_M_SN2_READY or that local_GP->put + * equals w_local_GP.put, so we'll go have a look. + */ + initial_put = put; + } + + if (send_msgrequest) + xpc_send_chctl_msgrequest_sn2(ch); +} + +/* + * Allocate an entry for a message from the message queue associated with the + * specified channel. + */ +static enum xp_retval +xpc_allocate_msg_sn2(struct xpc_channel *ch, u32 flags, + struct xpc_msg_sn2 **address_of_msg) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + enum xp_retval ret; + s64 put; + + /* + * Get the next available message entry from the local message queue. + * If none are available, we'll make sure that we grab the latest + * GP values. + */ + ret = xpTimeout; + + while (1) { + + put = ch_sn2->w_local_GP.put; + smp_rmb(); /* guarantee that .put loads before .get */ + if (put - ch_sn2->w_remote_GP.get < ch->local_nentries) { + + /* There are available message entries. We need to try + * to secure one for ourselves. We'll do this by trying + * to increment w_local_GP.put as long as someone else + * doesn't beat us to it. If they do, we'll have to + * try again. + */ + if (cmpxchg(&ch_sn2->w_local_GP.put, put, put + 1) == + put) { + /* we got the entry referenced by put */ + break; + } + continue; /* try again */ + } + + /* + * There aren't any available msg entries at this time. + * + * In waiting for a message entry to become available, + * we set a timeout in case the other side is not sending + * completion interrupts. This lets us fake a notify IRQ + * that will cause the notify IRQ handler to fetch the latest + * GP values as if an interrupt was sent by the other side. + */ + if (ret == xpTimeout) + xpc_send_chctl_local_msgrequest_sn2(ch); + + if (flags & XPC_NOWAIT) + return xpNoWait; + + ret = xpc_allocate_msg_wait(ch); + if (ret != xpInterrupted && ret != xpTimeout) + return ret; + } + + /* get the message's address and initialize it */ + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->local_msgqueue + + (put % ch->local_nentries) * + ch->entry_size); + + DBUG_ON(msg->flags != 0); + msg->number = put; + + dev_dbg(xpc_chan, "w_local_GP.put changed to %lld; msg=0x%p, " + "msg_number=%lld, partid=%d, channel=%d\n", put + 1, + (void *)msg, msg->number, ch->partid, ch->number); + + *address_of_msg = msg; + return xpSuccess; +} + +/* + * Common code that does the actual sending of the message by advancing the + * local message queue's Put value and sends a chctl msgrequest to the + * partition the message is being sent to. + */ +static enum xp_retval +xpc_send_payload_sn2(struct xpc_channel *ch, u32 flags, void *payload, + u16 payload_size, u8 notify_type, xpc_notify_func func, + void *key) +{ + enum xp_retval ret = xpSuccess; + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg = msg; + struct xpc_notify_sn2 *notify = notify; + s64 msg_number; + s64 put; + + DBUG_ON(notify_type == XPC_N_CALL && func == NULL); + + if (XPC_MSG_SIZE(payload_size) > ch->entry_size) + return xpPayloadTooBig; + + xpc_msgqueue_ref(ch); + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + goto out_1; + } + if (!(ch->flags & XPC_C_CONNECTED)) { + ret = xpNotConnected; + goto out_1; + } + + ret = xpc_allocate_msg_sn2(ch, flags, &msg); + if (ret != xpSuccess) + goto out_1; + + msg_number = msg->number; + + if (notify_type != 0) { + /* + * Tell the remote side to send an ACK interrupt when the + * message has been delivered. + */ + msg->flags |= XPC_M_SN2_INTERRUPT; + + atomic_inc(&ch->n_to_notify); + + notify = &ch_sn2->notify_queue[msg_number % ch->local_nentries]; + notify->func = func; + notify->key = key; + notify->type = notify_type; + + /* ??? Is a mb() needed here? */ + + if (ch->flags & XPC_C_DISCONNECTING) { + /* + * An error occurred between our last error check and + * this one. We will try to clear the type field from + * the notify entry. If we succeed then + * xpc_disconnect_channel() didn't already process + * the notify entry. + */ + if (cmpxchg(¬ify->type, notify_type, 0) == + notify_type) { + atomic_dec(&ch->n_to_notify); + ret = ch->reason; + } + goto out_1; + } + } + + memcpy(&msg->payload, payload, payload_size); + + msg->flags |= XPC_M_SN2_READY; + + /* + * The preceding store of msg->flags must occur before the following + * load of local_GP->put. + */ + smp_mb(); + + /* see if the message is next in line to be sent, if so send it */ + + put = ch_sn2->local_GP->put; + if (put == msg_number) + xpc_send_msgs_sn2(ch, put); + +out_1: + xpc_msgqueue_deref(ch); + return ret; +} + +/* + * Now we actually acknowledge the messages that have been delivered and ack'd + * by advancing the cached remote message queue's Get value and if requested + * send a chctl msgrequest to the message sender's partition. + * + * If a message has XPC_M_SN2_INTERRUPT set, send an interrupt to the partition + * that sent the message. + */ +static void +xpc_acknowledge_msgs_sn2(struct xpc_channel *ch, s64 initial_get, u8 msg_flags) +{ + struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; + struct xpc_msg_sn2 *msg; + s64 get = initial_get + 1; + int send_msgrequest = 0; + + while (1) { + + while (1) { + if (get == ch_sn2->w_local_GP.get) + break; + + msg = (struct xpc_msg_sn2 *)((u64)ch_sn2-> + remote_msgqueue + (get % + ch->remote_nentries) * + ch->entry_size); + + if (!(msg->flags & XPC_M_SN2_DONE)) + break; + + msg_flags |= msg->flags; + get++; + } + + if (get == initial_get) { + /* nothing's changed */ + break; + } + + if (cmpxchg_rel(&ch_sn2->local_GP->get, initial_get, get) != + initial_get) { + /* someone else beat us to it */ + DBUG_ON(ch_sn2->local_GP->get <= initial_get); + break; + } + + /* we just set the new value of local_GP->get */ + + dev_dbg(xpc_chan, "local_GP->get changed to %lld, partid=%d, " + "channel=%d\n", get, ch->partid, ch->number); + + send_msgrequest = (msg_flags & XPC_M_SN2_INTERRUPT); + + /* + * We need to ensure that the message referenced by + * local_GP->get is not XPC_M_SN2_DONE or that local_GP->get + * equals w_local_GP.get, so we'll go have a look. + */ + initial_get = get; + } + + if (send_msgrequest) + xpc_send_chctl_msgrequest_sn2(ch); +} + +static void +xpc_received_payload_sn2(struct xpc_channel *ch, void *payload) +{ + struct xpc_msg_sn2 *msg; + s64 msg_number; + s64 get; + + msg = container_of(payload, struct xpc_msg_sn2, payload); + msg_number = msg->number; + + dev_dbg(xpc_chan, "msg=0x%p, msg_number=%lld, partid=%d, channel=%d\n", + (void *)msg, msg_number, ch->partid, ch->number); + + DBUG_ON((((u64)msg - (u64)ch->sn.sn2.remote_msgqueue) / ch->entry_size) != + msg_number % ch->remote_nentries); + DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); + DBUG_ON(msg->flags & XPC_M_SN2_DONE); + + msg->flags |= XPC_M_SN2_DONE; + + /* + * The preceding store of msg->flags must occur before the following + * load of local_GP->get. + */ + smp_mb(); + + /* + * See if this message is next in line to be acknowledged as having + * been delivered. + */ + get = ch->sn.sn2.local_GP->get; + if (get == msg_number) + xpc_acknowledge_msgs_sn2(ch, get, msg->flags); +} + +static struct xpc_arch_operations xpc_arch_ops_sn2 = { + .setup_partitions = xpc_setup_partitions_sn2, + .teardown_partitions = xpc_teardown_partitions_sn2, + .process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2, + .get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2, + .setup_rsvd_page = xpc_setup_rsvd_page_sn2, + + .allow_hb = xpc_allow_hb_sn2, + .disallow_hb = xpc_disallow_hb_sn2, + .disallow_all_hbs = xpc_disallow_all_hbs_sn2, + .increment_heartbeat = xpc_increment_heartbeat_sn2, + .offline_heartbeat = xpc_offline_heartbeat_sn2, + .online_heartbeat = xpc_online_heartbeat_sn2, + .heartbeat_init = xpc_heartbeat_init_sn2, + .heartbeat_exit = xpc_heartbeat_exit_sn2, + .get_remote_heartbeat = xpc_get_remote_heartbeat_sn2, + + .request_partition_activation = + xpc_request_partition_activation_sn2, + .request_partition_reactivation = + xpc_request_partition_reactivation_sn2, + .request_partition_deactivation = + xpc_request_partition_deactivation_sn2, + .cancel_partition_deactivation_request = + xpc_cancel_partition_deactivation_request_sn2, + + .setup_ch_structures = xpc_setup_ch_structures_sn2, + .teardown_ch_structures = xpc_teardown_ch_structures_sn2, + + .make_first_contact = xpc_make_first_contact_sn2, + + .get_chctl_all_flags = xpc_get_chctl_all_flags_sn2, + .send_chctl_closerequest = xpc_send_chctl_closerequest_sn2, + .send_chctl_closereply = xpc_send_chctl_closereply_sn2, + .send_chctl_openrequest = xpc_send_chctl_openrequest_sn2, + .send_chctl_openreply = xpc_send_chctl_openreply_sn2, + .send_chctl_opencomplete = xpc_send_chctl_opencomplete_sn2, + .process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2, + + .save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2, + + .setup_msg_structures = xpc_setup_msg_structures_sn2, + .teardown_msg_structures = xpc_teardown_msg_structures_sn2, + + .indicate_partition_engaged = xpc_indicate_partition_engaged_sn2, + .indicate_partition_disengaged = xpc_indicate_partition_disengaged_sn2, + .partition_engaged = xpc_partition_engaged_sn2, + .any_partition_engaged = xpc_any_partition_engaged_sn2, + .assume_partition_disengaged = xpc_assume_partition_disengaged_sn2, + + .n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2, + .send_payload = xpc_send_payload_sn2, + .get_deliverable_payload = xpc_get_deliverable_payload_sn2, + .received_payload = xpc_received_payload_sn2, + .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2, +}; + +int +xpc_init_sn2(void) +{ + int ret; + size_t buf_size; + + xpc_arch_ops = xpc_arch_ops_sn2; + + if (offsetof(struct xpc_msg_sn2, payload) > XPC_MSG_HDR_MAX_SIZE) { + dev_err(xpc_part, "header portion of struct xpc_msg_sn2 is " + "larger than %d\n", XPC_MSG_HDR_MAX_SIZE); + return -E2BIG; + } + + buf_size = max(XPC_RP_VARS_SIZE, + XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES_SN2); + xpc_remote_copy_buffer_sn2 = xpc_kmalloc_cacheline_aligned(buf_size, + GFP_KERNEL, + &xpc_remote_copy_buffer_base_sn2); + if (xpc_remote_copy_buffer_sn2 == NULL) { + dev_err(xpc_part, "can't get memory for remote copy buffer\n"); + return -ENOMEM; + } + + /* open up protections for IPI and [potentially] amo operations */ + xpc_allow_IPI_ops_sn2(); + xpc_allow_amo_ops_shub_wars_1_1_sn2(); + + /* + * This is safe to do before the xpc_hb_checker thread has started + * because the handler releases a wait queue. If an interrupt is + * received before the thread is waiting, it will not go to sleep, + * but rather immediately process the interrupt. + */ + ret = request_irq(SGI_XPC_ACTIVATE, xpc_handle_activate_IRQ_sn2, 0, + "xpc hb", NULL); + if (ret != 0) { + dev_err(xpc_part, "can't register ACTIVATE IRQ handler, " + "errno=%d\n", -ret); + xpc_disallow_IPI_ops_sn2(); + kfree(xpc_remote_copy_buffer_base_sn2); + } + return ret; +} + +void +xpc_exit_sn2(void) +{ + free_irq(SGI_XPC_ACTIVATE, NULL); + xpc_disallow_IPI_ops_sn2(); + kfree(xpc_remote_copy_buffer_base_sn2); +} diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c new file mode 100644 index 000000000..340b44d9e --- /dev/null +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -0,0 +1,1813 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. + */ + +/* + * Cross Partition Communication (XPC) uv-based functions. + * + * Architecture specific implementation of common functions. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined CONFIG_X86_64 +#include +#include +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +#include +#include +#endif +#include "../sgi-gru/gru.h" +#include "../sgi-gru/grukservices.h" +#include "xpc.h" + +#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +struct uv_IO_APIC_route_entry { + __u64 vector : 8, + delivery_mode : 3, + dest_mode : 1, + delivery_status : 1, + polarity : 1, + __reserved_1 : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15, + dest : 32; +}; +#endif + +static struct xpc_heartbeat_uv *xpc_heartbeat_uv; + +#define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES) +#define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ + XPC_ACTIVATE_MSG_SIZE_UV) +#define XPC_ACTIVATE_IRQ_NAME "xpc_activate" + +#define XPC_NOTIFY_MSG_SIZE_UV (2 * GRU_CACHE_LINE_BYTES) +#define XPC_NOTIFY_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ + XPC_NOTIFY_MSG_SIZE_UV) +#define XPC_NOTIFY_IRQ_NAME "xpc_notify" + +static int xpc_mq_node = -1; + +static struct xpc_gru_mq_uv *xpc_activate_mq_uv; +static struct xpc_gru_mq_uv *xpc_notify_mq_uv; + +static int +xpc_setup_partitions_uv(void) +{ + short partid; + struct xpc_partition_uv *part_uv; + + for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { + part_uv = &xpc_partitions[partid].sn.uv; + + mutex_init(&part_uv->cached_activate_gru_mq_desc_mutex); + spin_lock_init(&part_uv->flags_lock); + part_uv->remote_act_state = XPC_P_AS_INACTIVE; + } + return 0; +} + +static void +xpc_teardown_partitions_uv(void) +{ + short partid; + struct xpc_partition_uv *part_uv; + unsigned long irq_flags; + + for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { + part_uv = &xpc_partitions[partid].sn.uv; + + if (part_uv->cached_activate_gru_mq_desc != NULL) { + mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex); + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + kfree(part_uv->cached_activate_gru_mq_desc); + part_uv->cached_activate_gru_mq_desc = NULL; + mutex_unlock(&part_uv-> + cached_activate_gru_mq_desc_mutex); + } + } +} + +static int +xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) +{ + int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); + +#if defined CONFIG_X86_64 + mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset, + UV_AFFINITY_CPU); + if (mq->irq < 0) + return mq->irq; + + mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset); + +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0) + mq->irq = SGI_XPC_ACTIVATE; + else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0) + mq->irq = SGI_XPC_NOTIFY; + else + return -EINVAL; + + mq->mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq; + uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mq->mmr_value); +#else + #error not a supported configuration +#endif + + return 0; +} + +static void +xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq) +{ +#if defined CONFIG_X86_64 + uv_teardown_irq(mq->irq); + +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + int mmr_pnode; + unsigned long mmr_value; + + mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); + mmr_value = 1UL << 16; + + uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value); +#else + #error not a supported configuration +#endif +} + +static int +xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq) +{ + int ret; + +#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); + + ret = sn_mq_watchlist_alloc(mmr_pnode, (void *)uv_gpa(mq->address), + mq->order, &mq->mmr_offset); + if (ret < 0) { + dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n", + ret); + return -EBUSY; + } +#elif defined CONFIG_X86_64 + ret = uv_bios_mq_watchlist_alloc(uv_gpa(mq->address), + mq->order, &mq->mmr_offset); + if (ret < 0) { + dev_err(xpc_part, "uv_bios_mq_watchlist_alloc() failed, " + "ret=%d\n", ret); + return ret; + } +#else + #error not a supported configuration +#endif + + mq->watchlist_num = ret; + return 0; +} + +static void +xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq) +{ + int ret; + int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); + +#if defined CONFIG_X86_64 + ret = uv_bios_mq_watchlist_free(mmr_pnode, mq->watchlist_num); + BUG_ON(ret != BIOS_STATUS_SUCCESS); +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + ret = sn_mq_watchlist_free(mmr_pnode, mq->watchlist_num); + BUG_ON(ret != SALRET_OK); +#else + #error not a supported configuration +#endif +} + +static struct xpc_gru_mq_uv * +xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name, + irq_handler_t irq_handler) +{ + enum xp_retval xp_ret; + int ret; + int nid; + int nasid; + int pg_order; + struct page *page; + struct xpc_gru_mq_uv *mq; + struct uv_IO_APIC_route_entry *mmr_value; + + mq = kmalloc(sizeof(struct xpc_gru_mq_uv), GFP_KERNEL); + if (mq == NULL) { + dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() " + "a xpc_gru_mq_uv structure\n"); + ret = -ENOMEM; + goto out_0; + } + + mq->gru_mq_desc = kzalloc(sizeof(struct gru_message_queue_desc), + GFP_KERNEL); + if (mq->gru_mq_desc == NULL) { + dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() " + "a gru_message_queue_desc structure\n"); + ret = -ENOMEM; + goto out_1; + } + + pg_order = get_order(mq_size); + mq->order = pg_order + PAGE_SHIFT; + mq_size = 1UL << mq->order; + + mq->mmr_blade = uv_cpu_to_blade_id(cpu); + + nid = cpu_to_node(cpu); + page = __alloc_pages_node(nid, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, + pg_order); + if (page == NULL) { + dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d " + "bytes of memory on nid=%d for GRU mq\n", mq_size, nid); + ret = -ENOMEM; + goto out_2; + } + mq->address = page_address(page); + + /* enable generation of irq when GRU mq operation occurs to this mq */ + ret = xpc_gru_mq_watchlist_alloc_uv(mq); + if (ret != 0) + goto out_3; + + ret = xpc_get_gru_mq_irq_uv(mq, cpu, irq_name); + if (ret != 0) + goto out_4; + + ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL); + if (ret != 0) { + dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n", + mq->irq, -ret); + goto out_5; + } + + nasid = UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpu)); + + mmr_value = (struct uv_IO_APIC_route_entry *)&mq->mmr_value; + ret = gru_create_message_queue(mq->gru_mq_desc, mq->address, mq_size, + nasid, mmr_value->vector, mmr_value->dest); + if (ret != 0) { + dev_err(xpc_part, "gru_create_message_queue() returned " + "error=%d\n", ret); + ret = -EINVAL; + goto out_6; + } + + /* allow other partitions to access this GRU mq */ + xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size); + if (xp_ret != xpSuccess) { + ret = -EACCES; + goto out_6; + } + + return mq; + + /* something went wrong */ +out_6: + free_irq(mq->irq, NULL); +out_5: + xpc_release_gru_mq_irq_uv(mq); +out_4: + xpc_gru_mq_watchlist_free_uv(mq); +out_3: + free_pages((unsigned long)mq->address, pg_order); +out_2: + kfree(mq->gru_mq_desc); +out_1: + kfree(mq); +out_0: + return ERR_PTR(ret); +} + +static void +xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq) +{ + unsigned int mq_size; + int pg_order; + int ret; + + /* disallow other partitions to access GRU mq */ + mq_size = 1UL << mq->order; + ret = xp_restrict_memprotect(xp_pa(mq->address), mq_size); + BUG_ON(ret != xpSuccess); + + /* unregister irq handler and release mq irq/vector mapping */ + free_irq(mq->irq, NULL); + xpc_release_gru_mq_irq_uv(mq); + + /* disable generation of irq when GRU mq op occurs to this mq */ + xpc_gru_mq_watchlist_free_uv(mq); + + pg_order = mq->order - PAGE_SHIFT; + free_pages((unsigned long)mq->address, pg_order); + + kfree(mq); +} + +static enum xp_retval +xpc_send_gru_msg(struct gru_message_queue_desc *gru_mq_desc, void *msg, + size_t msg_size) +{ + enum xp_retval xp_ret; + int ret; + + while (1) { + ret = gru_send_message_gpa(gru_mq_desc, msg, msg_size); + if (ret == MQE_OK) { + xp_ret = xpSuccess; + break; + } + + if (ret == MQE_QUEUE_FULL) { + dev_dbg(xpc_chan, "gru_send_message_gpa() returned " + "error=MQE_QUEUE_FULL\n"); + /* !!! handle QLimit reached; delay & try again */ + /* ??? Do we add a limit to the number of retries? */ + (void)msleep_interruptible(10); + } else if (ret == MQE_CONGESTION) { + dev_dbg(xpc_chan, "gru_send_message_gpa() returned " + "error=MQE_CONGESTION\n"); + /* !!! handle LB Overflow; simply try again */ + /* ??? Do we add a limit to the number of retries? */ + } else { + /* !!! Currently this is MQE_UNEXPECTED_CB_ERR */ + dev_err(xpc_chan, "gru_send_message_gpa() returned " + "error=%d\n", ret); + xp_ret = xpGruSendMqError; + break; + } + } + return xp_ret; +} + +static void +xpc_process_activate_IRQ_rcvd_uv(void) +{ + unsigned long irq_flags; + short partid; + struct xpc_partition *part; + u8 act_state_req; + + DBUG_ON(xpc_activate_IRQ_rcvd == 0); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { + part = &xpc_partitions[partid]; + + if (part->sn.uv.act_state_req == 0) + continue; + + xpc_activate_IRQ_rcvd--; + BUG_ON(xpc_activate_IRQ_rcvd < 0); + + act_state_req = part->sn.uv.act_state_req; + part->sn.uv.act_state_req = 0; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + if (act_state_req == XPC_P_ASR_ACTIVATE_UV) { + if (part->act_state == XPC_P_AS_INACTIVE) + xpc_activate_partition(part); + else if (part->act_state == XPC_P_AS_DEACTIVATING) + XPC_DEACTIVATE_PARTITION(part, xpReactivating); + + } else if (act_state_req == XPC_P_ASR_REACTIVATE_UV) { + if (part->act_state == XPC_P_AS_INACTIVE) + xpc_activate_partition(part); + else + XPC_DEACTIVATE_PARTITION(part, xpReactivating); + + } else if (act_state_req == XPC_P_ASR_DEACTIVATE_UV) { + XPC_DEACTIVATE_PARTITION(part, part->sn.uv.reason); + + } else { + BUG(); + } + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (xpc_activate_IRQ_rcvd == 0) + break; + } + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + +} + +static void +xpc_handle_activate_mq_msg_uv(struct xpc_partition *part, + struct xpc_activate_mq_msghdr_uv *msg_hdr, + int part_setup, + int *wakeup_hb_checker) +{ + unsigned long irq_flags; + struct xpc_partition_uv *part_uv = &part->sn.uv; + struct xpc_openclose_args *args; + + part_uv->remote_act_state = msg_hdr->act_state; + + switch (msg_hdr->type) { + case XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV: + /* syncing of remote_act_state was just done above */ + break; + + case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: { + struct xpc_activate_mq_msg_activate_req_uv *msg; + + /* + * ??? Do we deal here with ts_jiffies being different + * ??? if act_state != XPC_P_AS_INACTIVE instead of + * ??? below? + */ + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_activate_req_uv, hdr); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV; + part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */ + part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies; + part_uv->heartbeat_gpa = msg->heartbeat_gpa; + + if (msg->activate_gru_mq_desc_gpa != + part_uv->activate_gru_mq_desc_gpa) { + spin_lock(&part_uv->flags_lock); + part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV; + spin_unlock(&part_uv->flags_lock); + part_uv->activate_gru_mq_desc_gpa = + msg->activate_gru_mq_desc_gpa; + } + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + (*wakeup_hb_checker)++; + break; + } + case XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV: { + struct xpc_activate_mq_msg_deactivate_req_uv *msg; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_deactivate_req_uv, hdr); + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV; + part_uv->reason = msg->reason; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + (*wakeup_hb_checker)++; + return; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV: { + struct xpc_activate_mq_msg_chctl_closerequest_uv *msg; + + if (!part_setup) + break; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_closerequest_uv, + hdr); + args = &part->remote_openclose_args[msg->ch_number]; + args->reason = msg->reason; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREQUEST; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + break; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV: { + struct xpc_activate_mq_msg_chctl_closereply_uv *msg; + + if (!part_setup) + break; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_closereply_uv, + hdr); + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_CLOSEREPLY; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + break; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV: { + struct xpc_activate_mq_msg_chctl_openrequest_uv *msg; + + if (!part_setup) + break; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_openrequest_uv, + hdr); + args = &part->remote_openclose_args[msg->ch_number]; + args->entry_size = msg->entry_size; + args->local_nentries = msg->local_nentries; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREQUEST; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + break; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV: { + struct xpc_activate_mq_msg_chctl_openreply_uv *msg; + + if (!part_setup) + break; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_openreply_uv, hdr); + args = &part->remote_openclose_args[msg->ch_number]; + args->remote_nentries = msg->remote_nentries; + args->local_nentries = msg->local_nentries; + args->local_msgqueue_pa = msg->notify_gru_mq_desc_gpa; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + break; + } + case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV: { + struct xpc_activate_mq_msg_chctl_opencomplete_uv *msg; + + if (!part_setup) + break; + + msg = container_of(msg_hdr, struct + xpc_activate_mq_msg_chctl_opencomplete_uv, hdr); + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENCOMPLETE; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); + } + case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV: + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags |= XPC_P_ENGAGED_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + break; + + case XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV: + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags &= ~XPC_P_ENGAGED_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + break; + + default: + dev_err(xpc_part, "received unknown activate_mq msg type=%d " + "from partition=%d\n", msg_hdr->type, XPC_PARTID(part)); + + /* get hb checker to deactivate from the remote partition */ + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV; + part_uv->reason = xpBadMsgType; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + (*wakeup_hb_checker)++; + return; + } + + if (msg_hdr->rp_ts_jiffies != part->remote_rp_ts_jiffies && + part->remote_rp_ts_jiffies != 0) { + /* + * ??? Does what we do here need to be sensitive to + * ??? act_state or remote_act_state? + */ + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_REACTIVATE_UV; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + (*wakeup_hb_checker)++; + } +} + +static irqreturn_t +xpc_handle_activate_IRQ_uv(int irq, void *dev_id) +{ + struct xpc_activate_mq_msghdr_uv *msg_hdr; + short partid; + struct xpc_partition *part; + int wakeup_hb_checker = 0; + int part_referenced; + + while (1) { + msg_hdr = gru_get_next_message(xpc_activate_mq_uv->gru_mq_desc); + if (msg_hdr == NULL) + break; + + partid = msg_hdr->partid; + if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) { + dev_err(xpc_part, "xpc_handle_activate_IRQ_uv() " + "received invalid partid=0x%x in message\n", + partid); + } else { + part = &xpc_partitions[partid]; + + part_referenced = xpc_part_ref(part); + xpc_handle_activate_mq_msg_uv(part, msg_hdr, + part_referenced, + &wakeup_hb_checker); + if (part_referenced) + xpc_part_deref(part); + } + + gru_free_message(xpc_activate_mq_uv->gru_mq_desc, msg_hdr); + } + + if (wakeup_hb_checker) + wake_up_interruptible(&xpc_activate_IRQ_wq); + + return IRQ_HANDLED; +} + +static enum xp_retval +xpc_cache_remote_gru_mq_desc_uv(struct gru_message_queue_desc *gru_mq_desc, + unsigned long gru_mq_desc_gpa) +{ + enum xp_retval ret; + + ret = xp_remote_memcpy(uv_gpa(gru_mq_desc), gru_mq_desc_gpa, + sizeof(struct gru_message_queue_desc)); + if (ret == xpSuccess) + gru_mq_desc->mq = NULL; + + return ret; +} + +static enum xp_retval +xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size, + int msg_type) +{ + struct xpc_activate_mq_msghdr_uv *msg_hdr = msg; + struct xpc_partition_uv *part_uv = &part->sn.uv; + struct gru_message_queue_desc *gru_mq_desc; + unsigned long irq_flags; + enum xp_retval ret; + + DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV); + + msg_hdr->type = msg_type; + msg_hdr->partid = xp_partition_id; + msg_hdr->act_state = part->act_state; + msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies; + + mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex); +again: + if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) { + gru_mq_desc = part_uv->cached_activate_gru_mq_desc; + if (gru_mq_desc == NULL) { + gru_mq_desc = kmalloc(sizeof(struct + gru_message_queue_desc), + GFP_KERNEL); + if (gru_mq_desc == NULL) { + ret = xpNoMemory; + goto done; + } + part_uv->cached_activate_gru_mq_desc = gru_mq_desc; + } + + ret = xpc_cache_remote_gru_mq_desc_uv(gru_mq_desc, + part_uv-> + activate_gru_mq_desc_gpa); + if (ret != xpSuccess) + goto done; + + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags |= XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + } + + /* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */ + ret = xpc_send_gru_msg(part_uv->cached_activate_gru_mq_desc, msg, + msg_size); + if (ret != xpSuccess) { + smp_rmb(); /* ensure a fresh copy of part_uv->flags */ + if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) + goto again; + } +done: + mutex_unlock(&part_uv->cached_activate_gru_mq_desc_mutex); + return ret; +} + +static void +xpc_send_activate_IRQ_part_uv(struct xpc_partition *part, void *msg, + size_t msg_size, int msg_type) +{ + enum xp_retval ret; + + ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type); + if (unlikely(ret != xpSuccess)) + XPC_DEACTIVATE_PARTITION(part, ret); +} + +static void +xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags, + void *msg, size_t msg_size, int msg_type) +{ + struct xpc_partition *part = &xpc_partitions[ch->partid]; + enum xp_retval ret; + + ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type); + if (unlikely(ret != xpSuccess)) { + if (irq_flags != NULL) + spin_unlock_irqrestore(&ch->lock, *irq_flags); + + XPC_DEACTIVATE_PARTITION(part, ret); + + if (irq_flags != NULL) + spin_lock_irqsave(&ch->lock, *irq_flags); + } +} + +static void +xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req) +{ + unsigned long irq_flags; + struct xpc_partition_uv *part_uv = &part->sn.uv; + + /* + * !!! Make our side think that the remote partition sent an activate + * !!! mq message our way by doing what the activate IRQ handler would + * !!! do had one really been sent. + */ + + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = act_state_req; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + wake_up_interruptible(&xpc_activate_IRQ_wq); +} + +static enum xp_retval +xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa, + size_t *len) +{ + s64 status; + enum xp_retval ret; + +#if defined CONFIG_X86_64 + status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa, + (u64 *)len); + if (status == BIOS_STATUS_SUCCESS) + ret = xpSuccess; + else if (status == BIOS_STATUS_MORE_PASSES) + ret = xpNeedMoreInfo; + else + ret = xpBiosError; + +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV + status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len); + if (status == SALRET_OK) + ret = xpSuccess; + else if (status == SALRET_MORE_PASSES) + ret = xpNeedMoreInfo; + else + ret = xpSalError; + +#else + #error not a supported configuration +#endif + + return ret; +} + +static int +xpc_setup_rsvd_page_uv(struct xpc_rsvd_page *rp) +{ + xpc_heartbeat_uv = + &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat; + rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv); + rp->sn.uv.activate_gru_mq_desc_gpa = + uv_gpa(xpc_activate_mq_uv->gru_mq_desc); + return 0; +} + +static void +xpc_allow_hb_uv(short partid) +{ +} + +static void +xpc_disallow_hb_uv(short partid) +{ +} + +static void +xpc_disallow_all_hbs_uv(void) +{ +} + +static void +xpc_increment_heartbeat_uv(void) +{ + xpc_heartbeat_uv->value++; +} + +static void +xpc_offline_heartbeat_uv(void) +{ + xpc_increment_heartbeat_uv(); + xpc_heartbeat_uv->offline = 1; +} + +static void +xpc_online_heartbeat_uv(void) +{ + xpc_increment_heartbeat_uv(); + xpc_heartbeat_uv->offline = 0; +} + +static void +xpc_heartbeat_init_uv(void) +{ + xpc_heartbeat_uv->value = 1; + xpc_heartbeat_uv->offline = 0; +} + +static void +xpc_heartbeat_exit_uv(void) +{ + xpc_offline_heartbeat_uv(); +} + +static enum xp_retval +xpc_get_remote_heartbeat_uv(struct xpc_partition *part) +{ + struct xpc_partition_uv *part_uv = &part->sn.uv; + enum xp_retval ret; + + ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat), + part_uv->heartbeat_gpa, + sizeof(struct xpc_heartbeat_uv)); + if (ret != xpSuccess) + return ret; + + if (part_uv->cached_heartbeat.value == part->last_heartbeat && + !part_uv->cached_heartbeat.offline) { + + ret = xpNoHeartbeat; + } else { + part->last_heartbeat = part_uv->cached_heartbeat.value; + } + return ret; +} + +static void +xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp, + unsigned long remote_rp_gpa, int nasid) +{ + short partid = remote_rp->SAL_partid; + struct xpc_partition *part = &xpc_partitions[partid]; + struct xpc_activate_mq_msg_activate_req_uv msg; + + part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */ + part->remote_rp_ts_jiffies = remote_rp->ts_jiffies; + part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa; + part->sn.uv.activate_gru_mq_desc_gpa = + remote_rp->sn.uv.activate_gru_mq_desc_gpa; + + /* + * ??? Is it a good idea to make this conditional on what is + * ??? potentially stale state information? + */ + if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) { + msg.rp_gpa = uv_gpa(xpc_rsvd_page); + msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa; + msg.activate_gru_mq_desc_gpa = + xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa; + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV); + } + + if (part->act_state == XPC_P_AS_INACTIVE) + xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV); +} + +static void +xpc_request_partition_reactivation_uv(struct xpc_partition *part) +{ + xpc_send_local_activate_IRQ_uv(part, XPC_P_ASR_ACTIVATE_UV); +} + +static void +xpc_request_partition_deactivation_uv(struct xpc_partition *part) +{ + struct xpc_activate_mq_msg_deactivate_req_uv msg; + + /* + * ??? Is it a good idea to make this conditional on what is + * ??? potentially stale state information? + */ + if (part->sn.uv.remote_act_state != XPC_P_AS_DEACTIVATING && + part->sn.uv.remote_act_state != XPC_P_AS_INACTIVE) { + + msg.reason = part->reason; + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV); + } +} + +static void +xpc_cancel_partition_deactivation_request_uv(struct xpc_partition *part) +{ + /* nothing needs to be done */ + return; +} + +static void +xpc_init_fifo_uv(struct xpc_fifo_head_uv *head) +{ + head->first = NULL; + head->last = NULL; + spin_lock_init(&head->lock); + head->n_entries = 0; +} + +static void * +xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head) +{ + unsigned long irq_flags; + struct xpc_fifo_entry_uv *first; + + spin_lock_irqsave(&head->lock, irq_flags); + first = head->first; + if (head->first != NULL) { + head->first = first->next; + if (head->first == NULL) + head->last = NULL; + + head->n_entries--; + BUG_ON(head->n_entries < 0); + + first->next = NULL; + } + spin_unlock_irqrestore(&head->lock, irq_flags); + return first; +} + +static void +xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head, + struct xpc_fifo_entry_uv *last) +{ + unsigned long irq_flags; + + last->next = NULL; + spin_lock_irqsave(&head->lock, irq_flags); + if (head->last != NULL) + head->last->next = last; + else + head->first = last; + head->last = last; + head->n_entries++; + spin_unlock_irqrestore(&head->lock, irq_flags); +} + +static int +xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head) +{ + return head->n_entries; +} + +/* + * Setup the channel structures that are uv specific. + */ +static enum xp_retval +xpc_setup_ch_structures_uv(struct xpc_partition *part) +{ + struct xpc_channel_uv *ch_uv; + int ch_number; + + for (ch_number = 0; ch_number < part->nchannels; ch_number++) { + ch_uv = &part->channels[ch_number].sn.uv; + + xpc_init_fifo_uv(&ch_uv->msg_slot_free_list); + xpc_init_fifo_uv(&ch_uv->recv_msg_list); + } + + return xpSuccess; +} + +/* + * Teardown the channel structures that are uv specific. + */ +static void +xpc_teardown_ch_structures_uv(struct xpc_partition *part) +{ + /* nothing needs to be done */ + return; +} + +static enum xp_retval +xpc_make_first_contact_uv(struct xpc_partition *part) +{ + struct xpc_activate_mq_msg_uv msg; + + /* + * We send a sync msg to get the remote partition's remote_act_state + * updated to our current act_state which at this point should + * be XPC_P_AS_ACTIVATING. + */ + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV); + + while (!((part->sn.uv.remote_act_state == XPC_P_AS_ACTIVATING) || + (part->sn.uv.remote_act_state == XPC_P_AS_ACTIVE))) { + + dev_dbg(xpc_part, "waiting to make first contact with " + "partition %d\n", XPC_PARTID(part)); + + /* wait a 1/4 of a second or so */ + (void)msleep_interruptible(250); + + if (part->act_state == XPC_P_AS_DEACTIVATING) + return part->reason; + } + + return xpSuccess; +} + +static u64 +xpc_get_chctl_all_flags_uv(struct xpc_partition *part) +{ + unsigned long irq_flags; + union xpc_channel_ctl_flags chctl; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + chctl = part->chctl; + if (chctl.all_flags != 0) + part->chctl.all_flags = 0; + + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + return chctl.all_flags; +} + +static enum xp_retval +xpc_allocate_send_msg_slot_uv(struct xpc_channel *ch) +{ + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + struct xpc_send_msg_slot_uv *msg_slot; + unsigned long irq_flags; + int nentries; + int entry; + size_t nbytes; + + for (nentries = ch->local_nentries; nentries > 0; nentries--) { + nbytes = nentries * sizeof(struct xpc_send_msg_slot_uv); + ch_uv->send_msg_slots = kzalloc(nbytes, GFP_KERNEL); + if (ch_uv->send_msg_slots == NULL) + continue; + + for (entry = 0; entry < nentries; entry++) { + msg_slot = &ch_uv->send_msg_slots[entry]; + + msg_slot->msg_slot_number = entry; + xpc_put_fifo_entry_uv(&ch_uv->msg_slot_free_list, + &msg_slot->next); + } + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->local_nentries) + ch->local_nentries = nentries; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpSuccess; + } + + return xpNoMemory; +} + +static enum xp_retval +xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch) +{ + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + struct xpc_notify_mq_msg_uv *msg_slot; + unsigned long irq_flags; + int nentries; + int entry; + size_t nbytes; + + for (nentries = ch->remote_nentries; nentries > 0; nentries--) { + nbytes = nentries * ch->entry_size; + ch_uv->recv_msg_slots = kzalloc(nbytes, GFP_KERNEL); + if (ch_uv->recv_msg_slots == NULL) + continue; + + for (entry = 0; entry < nentries; entry++) { + msg_slot = ch_uv->recv_msg_slots + + entry * ch->entry_size; + + msg_slot->hdr.msg_slot_number = entry; + } + + spin_lock_irqsave(&ch->lock, irq_flags); + if (nentries < ch->remote_nentries) + ch->remote_nentries = nentries; + spin_unlock_irqrestore(&ch->lock, irq_flags); + return xpSuccess; + } + + return xpNoMemory; +} + +/* + * Allocate msg_slots associated with the channel. + */ +static enum xp_retval +xpc_setup_msg_structures_uv(struct xpc_channel *ch) +{ + static enum xp_retval ret; + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + + DBUG_ON(ch->flags & XPC_C_SETUP); + + ch_uv->cached_notify_gru_mq_desc = kmalloc(sizeof(struct + gru_message_queue_desc), + GFP_KERNEL); + if (ch_uv->cached_notify_gru_mq_desc == NULL) + return xpNoMemory; + + ret = xpc_allocate_send_msg_slot_uv(ch); + if (ret == xpSuccess) { + + ret = xpc_allocate_recv_msg_slot_uv(ch); + if (ret != xpSuccess) { + kfree(ch_uv->send_msg_slots); + xpc_init_fifo_uv(&ch_uv->msg_slot_free_list); + } + } + return ret; +} + +/* + * Free up msg_slots and clear other stuff that were setup for the specified + * channel. + */ +static void +xpc_teardown_msg_structures_uv(struct xpc_channel *ch) +{ + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + + DBUG_ON(!spin_is_locked(&ch->lock)); + + kfree(ch_uv->cached_notify_gru_mq_desc); + ch_uv->cached_notify_gru_mq_desc = NULL; + + if (ch->flags & XPC_C_SETUP) { + xpc_init_fifo_uv(&ch_uv->msg_slot_free_list); + kfree(ch_uv->send_msg_slots); + xpc_init_fifo_uv(&ch_uv->recv_msg_list); + kfree(ch_uv->recv_msg_slots); + } +} + +static void +xpc_send_chctl_closerequest_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_closerequest_uv msg; + + msg.ch_number = ch->number; + msg.reason = ch->reason; + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV); +} + +static void +xpc_send_chctl_closereply_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_closereply_uv msg; + + msg.ch_number = ch->number; + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV); +} + +static void +xpc_send_chctl_openrequest_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_openrequest_uv msg; + + msg.ch_number = ch->number; + msg.entry_size = ch->entry_size; + msg.local_nentries = ch->local_nentries; + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV); +} + +static void +xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_openreply_uv msg; + + msg.ch_number = ch->number; + msg.local_nentries = ch->local_nentries; + msg.remote_nentries = ch->remote_nentries; + msg.notify_gru_mq_desc_gpa = uv_gpa(xpc_notify_mq_uv->gru_mq_desc); + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV); +} + +static void +xpc_send_chctl_opencomplete_uv(struct xpc_channel *ch, unsigned long *irq_flags) +{ + struct xpc_activate_mq_msg_chctl_opencomplete_uv msg; + + msg.ch_number = ch->number; + xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV); +} + +static void +xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number) +{ + unsigned long irq_flags; + + spin_lock_irqsave(&part->chctl_lock, irq_flags); + part->chctl.flags[ch_number] |= XPC_CHCTL_MSGREQUEST; + spin_unlock_irqrestore(&part->chctl_lock, irq_flags); + + xpc_wakeup_channel_mgr(part); +} + +static enum xp_retval +xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch, + unsigned long gru_mq_desc_gpa) +{ + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + + DBUG_ON(ch_uv->cached_notify_gru_mq_desc == NULL); + return xpc_cache_remote_gru_mq_desc_uv(ch_uv->cached_notify_gru_mq_desc, + gru_mq_desc_gpa); +} + +static void +xpc_indicate_partition_engaged_uv(struct xpc_partition *part) +{ + struct xpc_activate_mq_msg_uv msg; + + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV); +} + +static void +xpc_indicate_partition_disengaged_uv(struct xpc_partition *part) +{ + struct xpc_activate_mq_msg_uv msg; + + xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), + XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV); +} + +static void +xpc_assume_partition_disengaged_uv(short partid) +{ + struct xpc_partition_uv *part_uv = &xpc_partitions[partid].sn.uv; + unsigned long irq_flags; + + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags &= ~XPC_P_ENGAGED_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); +} + +static int +xpc_partition_engaged_uv(short partid) +{ + return (xpc_partitions[partid].sn.uv.flags & XPC_P_ENGAGED_UV) != 0; +} + +static int +xpc_any_partition_engaged_uv(void) +{ + struct xpc_partition_uv *part_uv; + short partid; + + for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { + part_uv = &xpc_partitions[partid].sn.uv; + if ((part_uv->flags & XPC_P_ENGAGED_UV) != 0) + return 1; + } + return 0; +} + +static enum xp_retval +xpc_allocate_msg_slot_uv(struct xpc_channel *ch, u32 flags, + struct xpc_send_msg_slot_uv **address_of_msg_slot) +{ + enum xp_retval ret; + struct xpc_send_msg_slot_uv *msg_slot; + struct xpc_fifo_entry_uv *entry; + + while (1) { + entry = xpc_get_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list); + if (entry != NULL) + break; + + if (flags & XPC_NOWAIT) + return xpNoWait; + + ret = xpc_allocate_msg_wait(ch); + if (ret != xpInterrupted && ret != xpTimeout) + return ret; + } + + msg_slot = container_of(entry, struct xpc_send_msg_slot_uv, next); + *address_of_msg_slot = msg_slot; + return xpSuccess; +} + +static void +xpc_free_msg_slot_uv(struct xpc_channel *ch, + struct xpc_send_msg_slot_uv *msg_slot) +{ + xpc_put_fifo_entry_uv(&ch->sn.uv.msg_slot_free_list, &msg_slot->next); + + /* wakeup anyone waiting for a free msg slot */ + if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) + wake_up(&ch->msg_allocate_wq); +} + +static void +xpc_notify_sender_uv(struct xpc_channel *ch, + struct xpc_send_msg_slot_uv *msg_slot, + enum xp_retval reason) +{ + xpc_notify_func func = msg_slot->func; + + if (func != NULL && cmpxchg(&msg_slot->func, func, NULL) == func) { + + atomic_dec(&ch->n_to_notify); + + dev_dbg(xpc_chan, "msg_slot->func() called, msg_slot=0x%p " + "msg_slot_number=%d partid=%d channel=%d\n", msg_slot, + msg_slot->msg_slot_number, ch->partid, ch->number); + + func(reason, ch->partid, ch->number, msg_slot->key); + + dev_dbg(xpc_chan, "msg_slot->func() returned, msg_slot=0x%p " + "msg_slot_number=%d partid=%d channel=%d\n", msg_slot, + msg_slot->msg_slot_number, ch->partid, ch->number); + } +} + +static void +xpc_handle_notify_mq_ack_uv(struct xpc_channel *ch, + struct xpc_notify_mq_msg_uv *msg) +{ + struct xpc_send_msg_slot_uv *msg_slot; + int entry = msg->hdr.msg_slot_number % ch->local_nentries; + + msg_slot = &ch->sn.uv.send_msg_slots[entry]; + + BUG_ON(msg_slot->msg_slot_number != msg->hdr.msg_slot_number); + msg_slot->msg_slot_number += ch->local_nentries; + + if (msg_slot->func != NULL) + xpc_notify_sender_uv(ch, msg_slot, xpMsgDelivered); + + xpc_free_msg_slot_uv(ch, msg_slot); +} + +static void +xpc_handle_notify_mq_msg_uv(struct xpc_partition *part, + struct xpc_notify_mq_msg_uv *msg) +{ + struct xpc_partition_uv *part_uv = &part->sn.uv; + struct xpc_channel *ch; + struct xpc_channel_uv *ch_uv; + struct xpc_notify_mq_msg_uv *msg_slot; + unsigned long irq_flags; + int ch_number = msg->hdr.ch_number; + + if (unlikely(ch_number >= part->nchannels)) { + dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received invalid " + "channel number=0x%x in message from partid=%d\n", + ch_number, XPC_PARTID(part)); + + /* get hb checker to deactivate from the remote partition */ + spin_lock_irqsave(&xpc_activate_IRQ_rcvd_lock, irq_flags); + if (part_uv->act_state_req == 0) + xpc_activate_IRQ_rcvd++; + part_uv->act_state_req = XPC_P_ASR_DEACTIVATE_UV; + part_uv->reason = xpBadChannelNumber; + spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); + + wake_up_interruptible(&xpc_activate_IRQ_wq); + return; + } + + ch = &part->channels[ch_number]; + xpc_msgqueue_ref(ch); + + if (!(ch->flags & XPC_C_CONNECTED)) { + xpc_msgqueue_deref(ch); + return; + } + + /* see if we're really dealing with an ACK for a previously sent msg */ + if (msg->hdr.size == 0) { + xpc_handle_notify_mq_ack_uv(ch, msg); + xpc_msgqueue_deref(ch); + return; + } + + /* we're dealing with a normal message sent via the notify_mq */ + ch_uv = &ch->sn.uv; + + msg_slot = ch_uv->recv_msg_slots + + (msg->hdr.msg_slot_number % ch->remote_nentries) * ch->entry_size; + + BUG_ON(msg_slot->hdr.size != 0); + + memcpy(msg_slot, msg, msg->hdr.size); + + xpc_put_fifo_entry_uv(&ch_uv->recv_msg_list, &msg_slot->hdr.u.next); + + if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) { + /* + * If there is an existing idle kthread get it to deliver + * the payload, otherwise we'll have to get the channel mgr + * for this partition to create a kthread to do the delivery. + */ + if (atomic_read(&ch->kthreads_idle) > 0) + wake_up_nr(&ch->idle_wq, 1); + else + xpc_send_chctl_local_msgrequest_uv(part, ch->number); + } + xpc_msgqueue_deref(ch); +} + +static irqreturn_t +xpc_handle_notify_IRQ_uv(int irq, void *dev_id) +{ + struct xpc_notify_mq_msg_uv *msg; + short partid; + struct xpc_partition *part; + + while ((msg = gru_get_next_message(xpc_notify_mq_uv->gru_mq_desc)) != + NULL) { + + partid = msg->hdr.partid; + if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) { + dev_err(xpc_part, "xpc_handle_notify_IRQ_uv() received " + "invalid partid=0x%x in message\n", partid); + } else { + part = &xpc_partitions[partid]; + + if (xpc_part_ref(part)) { + xpc_handle_notify_mq_msg_uv(part, msg); + xpc_part_deref(part); + } + } + + gru_free_message(xpc_notify_mq_uv->gru_mq_desc, msg); + } + + return IRQ_HANDLED; +} + +static int +xpc_n_of_deliverable_payloads_uv(struct xpc_channel *ch) +{ + return xpc_n_of_fifo_entries_uv(&ch->sn.uv.recv_msg_list); +} + +static void +xpc_process_msg_chctl_flags_uv(struct xpc_partition *part, int ch_number) +{ + struct xpc_channel *ch = &part->channels[ch_number]; + int ndeliverable_payloads; + + xpc_msgqueue_ref(ch); + + ndeliverable_payloads = xpc_n_of_deliverable_payloads_uv(ch); + + if (ndeliverable_payloads > 0 && + (ch->flags & XPC_C_CONNECTED) && + (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE)) { + + xpc_activate_kthreads(ch, ndeliverable_payloads); + } + + xpc_msgqueue_deref(ch); +} + +static enum xp_retval +xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload, + u16 payload_size, u8 notify_type, xpc_notify_func func, + void *key) +{ + enum xp_retval ret = xpSuccess; + struct xpc_send_msg_slot_uv *msg_slot = NULL; + struct xpc_notify_mq_msg_uv *msg; + u8 msg_buffer[XPC_NOTIFY_MSG_SIZE_UV]; + size_t msg_size; + + DBUG_ON(notify_type != XPC_N_CALL); + + msg_size = sizeof(struct xpc_notify_mq_msghdr_uv) + payload_size; + if (msg_size > ch->entry_size) + return xpPayloadTooBig; + + xpc_msgqueue_ref(ch); + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + goto out_1; + } + if (!(ch->flags & XPC_C_CONNECTED)) { + ret = xpNotConnected; + goto out_1; + } + + ret = xpc_allocate_msg_slot_uv(ch, flags, &msg_slot); + if (ret != xpSuccess) + goto out_1; + + if (func != NULL) { + atomic_inc(&ch->n_to_notify); + + msg_slot->key = key; + smp_wmb(); /* a non-NULL func must hit memory after the key */ + msg_slot->func = func; + + if (ch->flags & XPC_C_DISCONNECTING) { + ret = ch->reason; + goto out_2; + } + } + + msg = (struct xpc_notify_mq_msg_uv *)&msg_buffer; + msg->hdr.partid = xp_partition_id; + msg->hdr.ch_number = ch->number; + msg->hdr.size = msg_size; + msg->hdr.msg_slot_number = msg_slot->msg_slot_number; + memcpy(&msg->payload, payload, payload_size); + + ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg, + msg_size); + if (ret == xpSuccess) + goto out_1; + + XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret); +out_2: + if (func != NULL) { + /* + * Try to NULL the msg_slot's func field. If we fail, then + * xpc_notify_senders_of_disconnect_uv() beat us to it, in which + * case we need to pretend we succeeded to send the message + * since the user will get a callout for the disconnect error + * by xpc_notify_senders_of_disconnect_uv(), and to also get an + * error returned here will confuse them. Additionally, since + * in this case the channel is being disconnected we don't need + * to put the the msg_slot back on the free list. + */ + if (cmpxchg(&msg_slot->func, func, NULL) != func) { + ret = xpSuccess; + goto out_1; + } + + msg_slot->key = NULL; + atomic_dec(&ch->n_to_notify); + } + xpc_free_msg_slot_uv(ch, msg_slot); +out_1: + xpc_msgqueue_deref(ch); + return ret; +} + +/* + * Tell the callers of xpc_send_notify() that the status of their payloads + * is unknown because the channel is now disconnecting. + * + * We don't worry about putting these msg_slots on the free list since the + * msg_slots themselves are about to be kfree'd. + */ +static void +xpc_notify_senders_of_disconnect_uv(struct xpc_channel *ch) +{ + struct xpc_send_msg_slot_uv *msg_slot; + int entry; + + DBUG_ON(!(ch->flags & XPC_C_DISCONNECTING)); + + for (entry = 0; entry < ch->local_nentries; entry++) { + + if (atomic_read(&ch->n_to_notify) == 0) + break; + + msg_slot = &ch->sn.uv.send_msg_slots[entry]; + if (msg_slot->func != NULL) + xpc_notify_sender_uv(ch, msg_slot, ch->reason); + } +} + +/* + * Get the next deliverable message's payload. + */ +static void * +xpc_get_deliverable_payload_uv(struct xpc_channel *ch) +{ + struct xpc_fifo_entry_uv *entry; + struct xpc_notify_mq_msg_uv *msg; + void *payload = NULL; + + if (!(ch->flags & XPC_C_DISCONNECTING)) { + entry = xpc_get_fifo_entry_uv(&ch->sn.uv.recv_msg_list); + if (entry != NULL) { + msg = container_of(entry, struct xpc_notify_mq_msg_uv, + hdr.u.next); + payload = &msg->payload; + } + } + return payload; +} + +static void +xpc_received_payload_uv(struct xpc_channel *ch, void *payload) +{ + struct xpc_notify_mq_msg_uv *msg; + enum xp_retval ret; + + msg = container_of(payload, struct xpc_notify_mq_msg_uv, payload); + + /* return an ACK to the sender of this message */ + + msg->hdr.partid = xp_partition_id; + msg->hdr.size = 0; /* size of zero indicates this is an ACK */ + + ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg, + sizeof(struct xpc_notify_mq_msghdr_uv)); + if (ret != xpSuccess) + XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret); +} + +static struct xpc_arch_operations xpc_arch_ops_uv = { + .setup_partitions = xpc_setup_partitions_uv, + .teardown_partitions = xpc_teardown_partitions_uv, + .process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv, + .get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv, + .setup_rsvd_page = xpc_setup_rsvd_page_uv, + + .allow_hb = xpc_allow_hb_uv, + .disallow_hb = xpc_disallow_hb_uv, + .disallow_all_hbs = xpc_disallow_all_hbs_uv, + .increment_heartbeat = xpc_increment_heartbeat_uv, + .offline_heartbeat = xpc_offline_heartbeat_uv, + .online_heartbeat = xpc_online_heartbeat_uv, + .heartbeat_init = xpc_heartbeat_init_uv, + .heartbeat_exit = xpc_heartbeat_exit_uv, + .get_remote_heartbeat = xpc_get_remote_heartbeat_uv, + + .request_partition_activation = + xpc_request_partition_activation_uv, + .request_partition_reactivation = + xpc_request_partition_reactivation_uv, + .request_partition_deactivation = + xpc_request_partition_deactivation_uv, + .cancel_partition_deactivation_request = + xpc_cancel_partition_deactivation_request_uv, + + .setup_ch_structures = xpc_setup_ch_structures_uv, + .teardown_ch_structures = xpc_teardown_ch_structures_uv, + + .make_first_contact = xpc_make_first_contact_uv, + + .get_chctl_all_flags = xpc_get_chctl_all_flags_uv, + .send_chctl_closerequest = xpc_send_chctl_closerequest_uv, + .send_chctl_closereply = xpc_send_chctl_closereply_uv, + .send_chctl_openrequest = xpc_send_chctl_openrequest_uv, + .send_chctl_openreply = xpc_send_chctl_openreply_uv, + .send_chctl_opencomplete = xpc_send_chctl_opencomplete_uv, + .process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv, + + .save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv, + + .setup_msg_structures = xpc_setup_msg_structures_uv, + .teardown_msg_structures = xpc_teardown_msg_structures_uv, + + .indicate_partition_engaged = xpc_indicate_partition_engaged_uv, + .indicate_partition_disengaged = xpc_indicate_partition_disengaged_uv, + .assume_partition_disengaged = xpc_assume_partition_disengaged_uv, + .partition_engaged = xpc_partition_engaged_uv, + .any_partition_engaged = xpc_any_partition_engaged_uv, + + .n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv, + .send_payload = xpc_send_payload_uv, + .get_deliverable_payload = xpc_get_deliverable_payload_uv, + .received_payload = xpc_received_payload_uv, + .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv, +}; + +static int +xpc_init_mq_node(int nid) +{ + int cpu; + + get_online_cpus(); + + for_each_cpu(cpu, cpumask_of_node(nid)) { + xpc_activate_mq_uv = + xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid, + XPC_ACTIVATE_IRQ_NAME, + xpc_handle_activate_IRQ_uv); + if (!IS_ERR(xpc_activate_mq_uv)) + break; + } + if (IS_ERR(xpc_activate_mq_uv)) { + put_online_cpus(); + return PTR_ERR(xpc_activate_mq_uv); + } + + for_each_cpu(cpu, cpumask_of_node(nid)) { + xpc_notify_mq_uv = + xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid, + XPC_NOTIFY_IRQ_NAME, + xpc_handle_notify_IRQ_uv); + if (!IS_ERR(xpc_notify_mq_uv)) + break; + } + if (IS_ERR(xpc_notify_mq_uv)) { + xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); + put_online_cpus(); + return PTR_ERR(xpc_notify_mq_uv); + } + + put_online_cpus(); + return 0; +} + +int +xpc_init_uv(void) +{ + int nid; + int ret = 0; + + xpc_arch_ops = xpc_arch_ops_uv; + + if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) { + dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n", + XPC_MSG_HDR_MAX_SIZE); + return -E2BIG; + } + + if (xpc_mq_node < 0) + for_each_online_node(nid) { + ret = xpc_init_mq_node(nid); + + if (!ret) + break; + } + else + ret = xpc_init_mq_node(xpc_mq_node); + + if (ret < 0) + dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n", + -ret); + + return ret; +} + +void +xpc_exit_uv(void) +{ + xpc_destroy_gru_mq_uv(xpc_notify_mq_uv); + xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); +} + +module_param(xpc_mq_node, int, 0); +MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues."); diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c new file mode 100644 index 000000000..44d750d98 --- /dev/null +++ b/drivers/misc/sgi-xp/xpnet.c @@ -0,0 +1,596 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999-2009 Silicon Graphics, Inc. All rights reserved. + */ + +/* + * Cross Partition Network Interface (XPNET) support + * + * XPNET provides a virtual network layered on top of the Cross + * Partition communication layer. + * + * XPNET provides direct point-to-point and broadcast-like support + * for an ethernet-like device. The ethernet broadcast medium is + * replaced with a point-to-point message structure which passes + * pointers to a DMA-capable block that a remote partition should + * retrieve and pass to the upper level networking layer. + * + */ + +#include +#include +#include +#include +#include "xp.h" + +/* + * The message payload transferred by XPC. + * + * buf_pa is the physical address where the DMA should pull from. + * + * NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a + * cacheline boundary. To accomplish this, we record the number of + * bytes from the beginning of the first cacheline to the first useful + * byte of the skb (leadin_ignore) and the number of bytes from the + * last useful byte of the skb to the end of the last cacheline + * (tailout_ignore). + * + * size is the number of bytes to transfer which includes the skb->len + * (useful bytes of the senders skb) plus the leadin and tailout + */ +struct xpnet_message { + u16 version; /* Version for this message */ + u16 embedded_bytes; /* #of bytes embedded in XPC message */ + u32 magic; /* Special number indicating this is xpnet */ + unsigned long buf_pa; /* phys address of buffer to retrieve */ + u32 size; /* #of bytes in buffer */ + u8 leadin_ignore; /* #of bytes to ignore at the beginning */ + u8 tailout_ignore; /* #of bytes to ignore at the end */ + unsigned char data; /* body of small packets */ +}; + +/* + * Determine the size of our message, the cacheline aligned size, + * and then the number of message will request from XPC. + * + * XPC expects each message to exist in an individual cacheline. + */ +#define XPNET_MSG_SIZE XPC_MSG_PAYLOAD_MAX_SIZE +#define XPNET_MSG_DATA_MAX \ + (XPNET_MSG_SIZE - offsetof(struct xpnet_message, data)) +#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPC_MSG_MAX_SIZE) + +#define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1) +#define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1) + +/* + * Version number of XPNET implementation. XPNET can always talk to versions + * with same major #, and never talk to versions with a different version. + */ +#define _XPNET_VERSION(_major, _minor) (((_major) << 4) | (_minor)) +#define XPNET_VERSION_MAJOR(_v) ((_v) >> 4) +#define XPNET_VERSION_MINOR(_v) ((_v) & 0xf) + +#define XPNET_VERSION _XPNET_VERSION(1, 0) /* version 1.0 */ +#define XPNET_VERSION_EMBED _XPNET_VERSION(1, 1) /* version 1.1 */ +#define XPNET_MAGIC 0x88786984 /* "XNET" */ + +#define XPNET_VALID_MSG(_m) \ + ((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \ + && (msg->magic == XPNET_MAGIC)) + +#define XPNET_DEVICE_NAME "xp0" + +/* + * When messages are queued with xpc_send_notify, a kmalloc'd buffer + * of the following type is passed as a notification cookie. When the + * notification function is called, we use the cookie to decide + * whether all outstanding message sends have completed. The skb can + * then be released. + */ +struct xpnet_pending_msg { + struct sk_buff *skb; + atomic_t use_count; +}; + +struct net_device *xpnet_device; + +/* + * When we are notified of other partitions activating, we add them to + * our bitmask of partitions to which we broadcast. + */ +static unsigned long *xpnet_broadcast_partitions; +/* protect above */ +static DEFINE_SPINLOCK(xpnet_broadcast_lock); + +/* + * Since the Block Transfer Engine (BTE) is being used for the transfer + * and it relies upon cache-line size transfers, we need to reserve at + * least one cache-line for head and tail alignment. The BTE is + * limited to 8MB transfers. + * + * Testing has shown that changing MTU to greater than 64KB has no effect + * on TCP as the two sides negotiate a Max Segment Size that is limited + * to 64K. Other protocols May use packets greater than this, but for + * now, the default is 64KB. + */ +#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES) +/* 68 comes from min TCP+IP+MAC header */ +#define XPNET_MIN_MTU 68 +/* 32KB has been determined to be the ideal */ +#define XPNET_DEF_MTU (0x8000UL) + +/* + * The partid is encapsulated in the MAC address beginning in the following + * octet and it consists of two octets. + */ +#define XPNET_PARTID_OCTET 2 + +/* Define the XPNET debug device structures to be used with dev_dbg() et al */ + +struct device_driver xpnet_dbg_name = { + .name = "xpnet" +}; + +struct device xpnet_dbg_subname = { + .init_name = "", /* set to "" */ + .driver = &xpnet_dbg_name +}; + +struct device *xpnet = &xpnet_dbg_subname; + +/* + * Packet was recevied by XPC and forwarded to us. + */ +static void +xpnet_receive(short partid, int channel, struct xpnet_message *msg) +{ + struct sk_buff *skb; + void *dst; + enum xp_retval ret; + + if (!XPNET_VALID_MSG(msg)) { + /* + * Packet with a different XPC version. Ignore. + */ + xpc_received(partid, channel, (void *)msg); + + xpnet_device->stats.rx_errors++; + + return; + } + dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size, + msg->leadin_ignore, msg->tailout_ignore); + + /* reserve an extra cache line */ + skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES); + if (!skb) { + dev_err(xpnet, "failed on dev_alloc_skb(%d)\n", + msg->size + L1_CACHE_BYTES); + + xpc_received(partid, channel, (void *)msg); + + xpnet_device->stats.rx_errors++; + + return; + } + + /* + * The allocated skb has some reserved space. + * In order to use xp_remote_memcpy(), we need to get the + * skb->data pointer moved forward. + */ + skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data & + (L1_CACHE_BYTES - 1)) + + msg->leadin_ignore)); + + /* + * Update the tail pointer to indicate data actually + * transferred. + */ + skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore)); + + /* + * Move the data over from the other side. + */ + if ((XPNET_VERSION_MINOR(msg->version) == 1) && + (msg->embedded_bytes != 0)) { + dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, " + "%lu)\n", skb->data, &msg->data, + (size_t)msg->embedded_bytes); + + skb_copy_to_linear_data(skb, &msg->data, + (size_t)msg->embedded_bytes); + } else { + dst = (void *)((u64)skb->data & ~(L1_CACHE_BYTES - 1)); + dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t" + "xp_remote_memcpy(0x%p, 0x%p, %hu)\n", dst, + (void *)msg->buf_pa, msg->size); + + ret = xp_remote_memcpy(xp_pa(dst), msg->buf_pa, msg->size); + if (ret != xpSuccess) { + /* + * !!! Need better way of cleaning skb. Currently skb + * !!! appears in_use and we can't just call + * !!! dev_kfree_skb. + */ + dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%hx) " + "returned error=0x%x\n", dst, + (void *)msg->buf_pa, msg->size, ret); + + xpc_received(partid, channel, (void *)msg); + + xpnet_device->stats.rx_errors++; + + return; + } + } + + dev_dbg(xpnet, "head=0x%p skb->data=0x%p skb->tail=0x%p " + "skb->end=0x%p skb->len=%d\n", (void *)skb->head, + (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb), + skb->len); + + skb->protocol = eth_type_trans(skb, xpnet_device); + skb->ip_summed = CHECKSUM_UNNECESSARY; + + dev_dbg(xpnet, "passing skb to network layer\n" + "\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p " + "skb->end=0x%p skb->len=%d\n", + (void *)skb->head, (void *)skb->data, skb_tail_pointer(skb), + skb_end_pointer(skb), skb->len); + + xpnet_device->stats.rx_packets++; + xpnet_device->stats.rx_bytes += skb->len + ETH_HLEN; + + netif_rx_ni(skb); + xpc_received(partid, channel, (void *)msg); +} + +/* + * This is the handler which XPC calls during any sort of change in + * state or message reception on a connection. + */ +static void +xpnet_connection_activity(enum xp_retval reason, short partid, int channel, + void *data, void *key) +{ + DBUG_ON(partid < 0 || partid >= xp_max_npartitions); + DBUG_ON(channel != XPC_NET_CHANNEL); + + switch (reason) { + case xpMsgReceived: /* message received */ + DBUG_ON(data == NULL); + + xpnet_receive(partid, channel, (struct xpnet_message *)data); + break; + + case xpConnected: /* connection completed to a partition */ + spin_lock_bh(&xpnet_broadcast_lock); + __set_bit(partid, xpnet_broadcast_partitions); + spin_unlock_bh(&xpnet_broadcast_lock); + + netif_carrier_on(xpnet_device); + + dev_dbg(xpnet, "%s connected to partition %d\n", + xpnet_device->name, partid); + break; + + default: + spin_lock_bh(&xpnet_broadcast_lock); + __clear_bit(partid, xpnet_broadcast_partitions); + spin_unlock_bh(&xpnet_broadcast_lock); + + if (bitmap_empty((unsigned long *)xpnet_broadcast_partitions, + xp_max_npartitions)) { + netif_carrier_off(xpnet_device); + } + + dev_dbg(xpnet, "%s disconnected from partition %d\n", + xpnet_device->name, partid); + break; + } +} + +static int +xpnet_dev_open(struct net_device *dev) +{ + enum xp_retval ret; + + dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, " + "%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity, + (unsigned long)XPNET_MSG_SIZE, + (unsigned long)XPNET_MSG_NENTRIES, + (unsigned long)XPNET_MAX_KTHREADS, + (unsigned long)XPNET_MAX_IDLE_KTHREADS); + + ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL, + XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, + XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS); + if (ret != xpSuccess) { + dev_err(xpnet, "ifconfig up of %s failed on XPC connect, " + "ret=%d\n", dev->name, ret); + + return -ENOMEM; + } + + dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name); + + return 0; +} + +static int +xpnet_dev_stop(struct net_device *dev) +{ + xpc_disconnect(XPC_NET_CHANNEL); + + dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name); + + return 0; +} + +/* + * Notification that the other end has received the message and + * DMA'd the skb information. At this point, they are done with + * our side. When all recipients are done processing, we + * release the skb and then release our pending message structure. + */ +static void +xpnet_send_completed(enum xp_retval reason, short partid, int channel, + void *__qm) +{ + struct xpnet_pending_msg *queued_msg = (struct xpnet_pending_msg *)__qm; + + DBUG_ON(queued_msg == NULL); + + dev_dbg(xpnet, "message to %d notified with reason %d\n", + partid, reason); + + if (atomic_dec_return(&queued_msg->use_count) == 0) { + dev_dbg(xpnet, "all acks for skb->head=-x%p\n", + (void *)queued_msg->skb->head); + + dev_kfree_skb_any(queued_msg->skb); + kfree(queued_msg); + } +} + +static void +xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg, + u64 start_addr, u64 end_addr, u16 embedded_bytes, int dest_partid) +{ + u8 msg_buffer[XPNET_MSG_SIZE]; + struct xpnet_message *msg = (struct xpnet_message *)&msg_buffer; + u16 msg_size = sizeof(struct xpnet_message); + enum xp_retval ret; + + msg->embedded_bytes = embedded_bytes; + if (unlikely(embedded_bytes != 0)) { + msg->version = XPNET_VERSION_EMBED; + dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n", + &msg->data, skb->data, (size_t)embedded_bytes); + skb_copy_from_linear_data(skb, &msg->data, + (size_t)embedded_bytes); + msg_size += embedded_bytes - 1; + } else { + msg->version = XPNET_VERSION; + } + msg->magic = XPNET_MAGIC; + msg->size = end_addr - start_addr; + msg->leadin_ignore = (u64)skb->data - start_addr; + msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb); + msg->buf_pa = xp_pa((void *)start_addr); + + dev_dbg(xpnet, "sending XPC message to %d:%d\n" + "msg->buf_pa=0x%lx, msg->size=%u, " + "msg->leadin_ignore=%u, msg->tailout_ignore=%u\n", + dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size, + msg->leadin_ignore, msg->tailout_ignore); + + atomic_inc(&queued_msg->use_count); + + ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, XPC_NOWAIT, msg, + msg_size, xpnet_send_completed, queued_msg); + if (unlikely(ret != xpSuccess)) + atomic_dec(&queued_msg->use_count); +} + +/* + * Network layer has formatted a packet (skb) and is ready to place it + * "on the wire". Prepare and send an xpnet_message to all partitions + * which have connected with us and are targets of this packet. + * + * MAC-NOTE: For the XPNET driver, the MAC address contains the + * destination partid. If the destination partid octets are 0xffff, + * this packet is to be broadcast to all connected partitions. + */ +static netdev_tx_t +xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct xpnet_pending_msg *queued_msg; + u64 start_addr, end_addr; + short dest_partid; + u16 embedded_bytes = 0; + + dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p " + "skb->end=0x%p skb->len=%d\n", (void *)skb->head, + (void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb), + skb->len); + + if (skb->data[0] == 0x33) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; /* nothing needed to be done */ + } + + /* + * The xpnet_pending_msg tracks how many outstanding + * xpc_send_notifies are relying on this skb. When none + * remain, release the skb. + */ + queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC); + if (queued_msg == NULL) { + dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping " + "packet\n", sizeof(struct xpnet_pending_msg)); + + dev->stats.tx_errors++; + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + /* get the beginning of the first cacheline and end of last */ + start_addr = ((u64)skb->data & ~(L1_CACHE_BYTES - 1)); + end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb)); + + /* calculate how many bytes to embed in the XPC message */ + if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) { + /* skb->data does fit so embed */ + embedded_bytes = skb->len; + } + + /* + * Since the send occurs asynchronously, we set the count to one + * and begin sending. Any sends that happen to complete before + * we are done sending will not free the skb. We will be left + * with that task during exit. This also handles the case of + * a packet destined for a partition which is no longer up. + */ + atomic_set(&queued_msg->use_count, 1); + queued_msg->skb = skb; + + if (skb->data[0] == 0xff) { + /* we are being asked to broadcast to all partitions */ + for_each_set_bit(dest_partid, xpnet_broadcast_partitions, + xp_max_npartitions) { + + xpnet_send(skb, queued_msg, start_addr, end_addr, + embedded_bytes, dest_partid); + } + } else { + dest_partid = (short)skb->data[XPNET_PARTID_OCTET + 1]; + dest_partid |= (short)skb->data[XPNET_PARTID_OCTET + 0] << 8; + + if (dest_partid >= 0 && + dest_partid < xp_max_npartitions && + test_bit(dest_partid, xpnet_broadcast_partitions) != 0) { + + xpnet_send(skb, queued_msg, start_addr, end_addr, + embedded_bytes, dest_partid); + } + } + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + if (atomic_dec_return(&queued_msg->use_count) == 0) { + dev_kfree_skb(skb); + kfree(queued_msg); + } + + return NETDEV_TX_OK; +} + +/* + * Deal with transmit timeouts coming from the network layer. + */ +static void +xpnet_dev_tx_timeout(struct net_device *dev) +{ + dev->stats.tx_errors++; +} + +static const struct net_device_ops xpnet_netdev_ops = { + .ndo_open = xpnet_dev_open, + .ndo_stop = xpnet_dev_stop, + .ndo_start_xmit = xpnet_dev_hard_start_xmit, + .ndo_tx_timeout = xpnet_dev_tx_timeout, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +}; + +static int __init +xpnet_init(void) +{ + int result; + + if (!is_shub() && !is_uv()) + return -ENODEV; + + dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME); + + xpnet_broadcast_partitions = kcalloc(BITS_TO_LONGS(xp_max_npartitions), + sizeof(long), + GFP_KERNEL); + if (xpnet_broadcast_partitions == NULL) + return -ENOMEM; + + /* + * use ether_setup() to init the majority of our device + * structure and then override the necessary pieces. + */ + xpnet_device = alloc_netdev(0, XPNET_DEVICE_NAME, NET_NAME_UNKNOWN, + ether_setup); + if (xpnet_device == NULL) { + kfree(xpnet_broadcast_partitions); + return -ENOMEM; + } + + netif_carrier_off(xpnet_device); + + xpnet_device->netdev_ops = &xpnet_netdev_ops; + xpnet_device->mtu = XPNET_DEF_MTU; + xpnet_device->min_mtu = XPNET_MIN_MTU; + xpnet_device->max_mtu = XPNET_MAX_MTU; + + /* + * Multicast assumes the LSB of the first octet is set for multicast + * MAC addresses. We chose the first octet of the MAC to be unlikely + * to collide with any vendor's officially issued MAC. + */ + xpnet_device->dev_addr[0] = 0x02; /* locally administered, no OUI */ + + xpnet_device->dev_addr[XPNET_PARTID_OCTET + 1] = xp_partition_id; + xpnet_device->dev_addr[XPNET_PARTID_OCTET + 0] = (xp_partition_id >> 8); + + /* + * ether_setup() sets this to a multicast device. We are + * really not supporting multicast at this time. + */ + xpnet_device->flags &= ~IFF_MULTICAST; + + /* + * No need to checksum as it is a DMA transfer. The BTE will + * report an error if the data is not retrievable and the + * packet will be dropped. + */ + xpnet_device->features = NETIF_F_HW_CSUM; + + result = register_netdev(xpnet_device); + if (result != 0) { + free_netdev(xpnet_device); + kfree(xpnet_broadcast_partitions); + } + + return result; +} + +module_init(xpnet_init); + +static void __exit +xpnet_exit(void) +{ + dev_info(xpnet, "unregistering network device %s\n", + xpnet_device[0].name); + + unregister_netdev(xpnet_device); + free_netdev(xpnet_device); + kfree(xpnet_broadcast_partitions); +} + +module_exit(xpnet_exit); + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/spear13xx_pcie_gadget.c b/drivers/misc/spear13xx_pcie_gadget.c new file mode 100644 index 000000000..ee120dcbb --- /dev/null +++ b/drivers/misc/spear13xx_pcie_gadget.c @@ -0,0 +1,797 @@ +/* + * drivers/misc/spear13xx_pcie_gadget.c + * + * Copyright (C) 2010 ST Microelectronics + * Pratyush Anand + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IN0_MEM_SIZE (200 * 1024 * 1024 - 1) +/* In current implementation address translation is done using IN0 only. + * So IN1 start address and IN0 end address has been kept same +*/ +#define IN1_MEM_SIZE (0 * 1024 * 1024 - 1) +#define IN_IO_SIZE (20 * 1024 * 1024 - 1) +#define IN_CFG0_SIZE (12 * 1024 * 1024 - 1) +#define IN_CFG1_SIZE (12 * 1024 * 1024 - 1) +#define IN_MSG_SIZE (12 * 1024 * 1024 - 1) +/* Keep default BAR size as 4K*/ +/* AORAM would be mapped by default*/ +#define INBOUND_ADDR_MASK (SPEAR13XX_SYSRAM1_SIZE - 1) + +#define INT_TYPE_NO_INT 0 +#define INT_TYPE_INTX 1 +#define INT_TYPE_MSI 2 +struct spear_pcie_gadget_config { + void __iomem *base; + void __iomem *va_app_base; + void __iomem *va_dbi_base; + char int_type[10]; + ulong requested_msi; + ulong configured_msi; + ulong bar0_size; + ulong bar0_rw_offset; + void __iomem *va_bar0_address; +}; + +struct pcie_gadget_target { + struct configfs_subsystem subsys; + struct spear_pcie_gadget_config config; +}; + +struct pcie_gadget_target_attr { + struct configfs_attribute attr; + ssize_t (*show)(struct spear_pcie_gadget_config *config, + char *buf); + ssize_t (*store)(struct spear_pcie_gadget_config *config, + const char *buf, + size_t count); +}; + +static void enable_dbi_access(struct pcie_app_reg __iomem *app_reg) +{ + /* Enable DBI access */ + writel(readl(&app_reg->slv_armisc) | (1 << AXI_OP_DBI_ACCESS_ID), + &app_reg->slv_armisc); + writel(readl(&app_reg->slv_awmisc) | (1 << AXI_OP_DBI_ACCESS_ID), + &app_reg->slv_awmisc); + +} + +static void disable_dbi_access(struct pcie_app_reg __iomem *app_reg) +{ + /* disable DBI access */ + writel(readl(&app_reg->slv_armisc) & ~(1 << AXI_OP_DBI_ACCESS_ID), + &app_reg->slv_armisc); + writel(readl(&app_reg->slv_awmisc) & ~(1 << AXI_OP_DBI_ACCESS_ID), + &app_reg->slv_awmisc); + +} + +static void spear_dbi_read_reg(struct spear_pcie_gadget_config *config, + int where, int size, u32 *val) +{ + struct pcie_app_reg __iomem *app_reg = config->va_app_base; + ulong va_address; + + /* Enable DBI access */ + enable_dbi_access(app_reg); + + va_address = (ulong)config->va_dbi_base + (where & ~0x3); + + *val = readl(va_address); + + if (size == 1) + *val = (*val >> (8 * (where & 3))) & 0xff; + else if (size == 2) + *val = (*val >> (8 * (where & 3))) & 0xffff; + + /* Disable DBI access */ + disable_dbi_access(app_reg); +} + +static void spear_dbi_write_reg(struct spear_pcie_gadget_config *config, + int where, int size, u32 val) +{ + struct pcie_app_reg __iomem *app_reg = config->va_app_base; + ulong va_address; + + /* Enable DBI access */ + enable_dbi_access(app_reg); + + va_address = (ulong)config->va_dbi_base + (where & ~0x3); + + if (size == 4) + writel(val, va_address); + else if (size == 2) + writew(val, va_address + (where & 2)); + else if (size == 1) + writeb(val, va_address + (where & 3)); + + /* Disable DBI access */ + disable_dbi_access(app_reg); +} + +#define PCI_FIND_CAP_TTL 48 + +static int pci_find_own_next_cap_ttl(struct spear_pcie_gadget_config *config, + u32 pos, int cap, int *ttl) +{ + u32 id; + + while ((*ttl)--) { + spear_dbi_read_reg(config, pos, 1, &pos); + if (pos < 0x40) + break; + pos &= ~3; + spear_dbi_read_reg(config, pos + PCI_CAP_LIST_ID, 1, &id); + if (id == 0xff) + break; + if (id == cap) + return pos; + pos += PCI_CAP_LIST_NEXT; + } + return 0; +} + +static int pci_find_own_next_cap(struct spear_pcie_gadget_config *config, + u32 pos, int cap) +{ + int ttl = PCI_FIND_CAP_TTL; + + return pci_find_own_next_cap_ttl(config, pos, cap, &ttl); +} + +static int pci_find_own_cap_start(struct spear_pcie_gadget_config *config, + u8 hdr_type) +{ + u32 status; + + spear_dbi_read_reg(config, PCI_STATUS, 2, &status); + if (!(status & PCI_STATUS_CAP_LIST)) + return 0; + + switch (hdr_type) { + case PCI_HEADER_TYPE_NORMAL: + case PCI_HEADER_TYPE_BRIDGE: + return PCI_CAPABILITY_LIST; + case PCI_HEADER_TYPE_CARDBUS: + return PCI_CB_CAPABILITY_LIST; + default: + return 0; + } + + return 0; +} + +/* + * Tell if a device supports a given PCI capability. + * Returns the address of the requested capability structure within the + * device's PCI configuration space or 0 in case the device does not + * support it. Possible values for @cap: + * + * %PCI_CAP_ID_PM Power Management + * %PCI_CAP_ID_AGP Accelerated Graphics Port + * %PCI_CAP_ID_VPD Vital Product Data + * %PCI_CAP_ID_SLOTID Slot Identification + * %PCI_CAP_ID_MSI Message Signalled Interrupts + * %PCI_CAP_ID_CHSWP CompactPCI HotSwap + * %PCI_CAP_ID_PCIX PCI-X + * %PCI_CAP_ID_EXP PCI Express + */ +static int pci_find_own_capability(struct spear_pcie_gadget_config *config, + int cap) +{ + u32 pos; + u32 hdr_type; + + spear_dbi_read_reg(config, PCI_HEADER_TYPE, 1, &hdr_type); + + pos = pci_find_own_cap_start(config, hdr_type); + if (pos) + pos = pci_find_own_next_cap(config, pos, cap); + + return pos; +} + +static irqreturn_t spear_pcie_gadget_irq(int irq, void *dev_id) +{ + return 0; +} + +/* + * configfs interfaces show/store functions + */ + +static struct pcie_gadget_target *to_target(struct config_item *item) +{ + return item ? + container_of(to_configfs_subsystem(to_config_group(item)), + struct pcie_gadget_target, subsys) : NULL; +} + +static ssize_t pcie_gadget_link_show(struct config_item *item, char *buf) +{ + struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base; + + if (readl(&app_reg->app_status_1) & ((u32)1 << XMLH_LINK_UP_ID)) + return sprintf(buf, "UP"); + else + return sprintf(buf, "DOWN"); +} + +static ssize_t pcie_gadget_link_store(struct config_item *item, + const char *buf, size_t count) +{ + struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base; + + if (sysfs_streq(buf, "UP")) + writel(readl(&app_reg->app_ctrl_0) | (1 << APP_LTSSM_ENABLE_ID), + &app_reg->app_ctrl_0); + else if (sysfs_streq(buf, "DOWN")) + writel(readl(&app_reg->app_ctrl_0) + & ~(1 << APP_LTSSM_ENABLE_ID), + &app_reg->app_ctrl_0); + else + return -EINVAL; + return count; +} + +static ssize_t pcie_gadget_int_type_show(struct config_item *item, char *buf) +{ + return sprintf(buf, "%s", to_target(item)->int_type); +} + +static ssize_t pcie_gadget_int_type_store(struct config_item *item, + const char *buf, size_t count) +{ + struct spear_pcie_gadget_config *config = to_target(item) + u32 cap, vec, flags; + ulong vector; + + if (sysfs_streq(buf, "INTA")) + spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 1); + + else if (sysfs_streq(buf, "MSI")) { + vector = config->requested_msi; + vec = 0; + while (vector > 1) { + vector /= 2; + vec++; + } + spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 0); + cap = pci_find_own_capability(config, PCI_CAP_ID_MSI); + spear_dbi_read_reg(config, cap + PCI_MSI_FLAGS, 1, &flags); + flags &= ~PCI_MSI_FLAGS_QMASK; + flags |= vec << 1; + spear_dbi_write_reg(config, cap + PCI_MSI_FLAGS, 1, flags); + } else + return -EINVAL; + + strcpy(config->int_type, buf); + + return count; +} + +static ssize_t pcie_gadget_no_of_msi_show(struct config_item *item, char *buf) +{ + struct spear_pcie_gadget_config *config = to_target(item) + struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base; + u32 cap, vec, flags; + ulong vector; + + if ((readl(&app_reg->msg_status) & (1 << CFG_MSI_EN_ID)) + != (1 << CFG_MSI_EN_ID)) + vector = 0; + else { + cap = pci_find_own_capability(config, PCI_CAP_ID_MSI); + spear_dbi_read_reg(config, cap + PCI_MSI_FLAGS, 1, &flags); + flags &= ~PCI_MSI_FLAGS_QSIZE; + vec = flags >> 4; + vector = 1; + while (vec--) + vector *= 2; + } + config->configured_msi = vector; + + return sprintf(buf, "%lu", vector); +} + +static ssize_t pcie_gadget_no_of_msi_store(struct config_item *item, + const char *buf, size_t count) +{ + int ret; + + ret = kstrtoul(buf, 0, &to_target(item)->requested_msi); + if (ret) + return ret; + + if (config->requested_msi > 32) + config->requested_msi = 32; + + return count; +} + +static ssize_t pcie_gadget_inta_store(struct config_item *item, + const char *buf, size_t count) +{ + struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base; + ulong en; + int ret; + + ret = kstrtoul(buf, 0, &en); + if (ret) + return ret; + + if (en) + writel(readl(&app_reg->app_ctrl_0) | (1 << SYS_INT_ID), + &app_reg->app_ctrl_0); + else + writel(readl(&app_reg->app_ctrl_0) & ~(1 << SYS_INT_ID), + &app_reg->app_ctrl_0); + + return count; +} + +static ssize_t pcie_gadget_send_msi_store(struct config_item *item, + const char *buf, size_t count) +{ + struct spear_pcie_gadget_config *config = to_target(item) + struct pcie_app_reg __iomem *app_reg = config->va_app_base; + ulong vector; + u32 ven_msi; + int ret; + + ret = kstrtoul(buf, 0, &vector); + if (ret) + return ret; + + if (!config->configured_msi) + return -EINVAL; + + if (vector >= config->configured_msi) + return -EINVAL; + + ven_msi = readl(&app_reg->ven_msi_1); + ven_msi &= ~VEN_MSI_FUN_NUM_MASK; + ven_msi |= 0 << VEN_MSI_FUN_NUM_ID; + ven_msi &= ~VEN_MSI_TC_MASK; + ven_msi |= 0 << VEN_MSI_TC_ID; + ven_msi &= ~VEN_MSI_VECTOR_MASK; + ven_msi |= vector << VEN_MSI_VECTOR_ID; + + /* generating interrupt for msi vector */ + ven_msi |= VEN_MSI_REQ_EN; + writel(ven_msi, &app_reg->ven_msi_1); + udelay(1); + ven_msi &= ~VEN_MSI_REQ_EN; + writel(ven_msi, &app_reg->ven_msi_1); + + return count; +} + +static ssize_t pcie_gadget_vendor_id_show(struct config_item *item, char *buf) +{ + u32 id; + + spear_dbi_read_reg(to_target(item), PCI_VENDOR_ID, 2, &id); + + return sprintf(buf, "%x", id); +} + +static ssize_t pcie_gadget_vendor_id_store(struct config_item *item, + const char *buf, size_t count) +{ + ulong id; + int ret; + + ret = kstrtoul(buf, 0, &id); + if (ret) + return ret; + + spear_dbi_write_reg(to_target(item), PCI_VENDOR_ID, 2, id); + + return count; +} + +static ssize_t pcie_gadget_device_id_show(struct config_item *item, char *buf) +{ + u32 id; + + spear_dbi_read_reg(to_target(item), PCI_DEVICE_ID, 2, &id); + + return sprintf(buf, "%x", id); +} + +static ssize_t pcie_gadget_device_id_store(struct config_item *item, + const char *buf, size_t count) +{ + ulong id; + int ret; + + ret = kstrtoul(buf, 0, &id); + if (ret) + return ret; + + spear_dbi_write_reg(to_target(item), PCI_DEVICE_ID, 2, id); + + return count; +} + +static ssize_t pcie_gadget_bar0_size_show(struct config_item *item, char *buf) +{ + return sprintf(buf, "%lx", to_target(item)->bar0_size); +} + +static ssize_t pcie_gadget_bar0_size_store(struct config_item *item, + const char *buf, size_t count) +{ + struct spear_pcie_gadget_config *config = to_target(item) + ulong size; + u32 pos, pos1; + u32 no_of_bit = 0; + int ret; + + ret = kstrtoul(buf, 0, &size); + if (ret) + return ret; + + /* min bar size is 256 */ + if (size <= 0x100) + size = 0x100; + /* max bar size is 1MB*/ + else if (size >= 0x100000) + size = 0x100000; + else { + pos = 0; + pos1 = 0; + while (pos < 21) { + pos = find_next_bit((ulong *)&size, 21, pos); + if (pos != 21) + pos1 = pos + 1; + pos++; + no_of_bit++; + } + if (no_of_bit == 2) + pos1--; + + size = 1 << pos1; + } + config->bar0_size = size; + spear_dbi_write_reg(config, PCIE_BAR0_MASK_REG, 4, size - 1); + + return count; +} + +static ssize_t pcie_gadget_bar0_address_show(struct config_item *item, + char *buf) +{ + struct pcie_app_reg __iomem *app_reg = to_target(item)->va_app_base; + + u32 address = readl(&app_reg->pim0_mem_addr_start); + + return sprintf(buf, "%x", address); +} + +static ssize_t pcie_gadget_bar0_address_store(struct config_item *item, + const char *buf, size_t count) +{ + struct spear_pcie_gadget_config *config = to_target(item) + struct pcie_app_reg __iomem *app_reg = config->va_app_base; + ulong address; + int ret; + + ret = kstrtoul(buf, 0, &address); + if (ret) + return ret; + + address &= ~(config->bar0_size - 1); + if (config->va_bar0_address) + iounmap(config->va_bar0_address); + config->va_bar0_address = ioremap(address, config->bar0_size); + if (!config->va_bar0_address) + return -ENOMEM; + + writel(address, &app_reg->pim0_mem_addr_start); + + return count; +} + +static ssize_t pcie_gadget_bar0_rw_offset_show(struct config_item *item, + char *buf) +{ + return sprintf(buf, "%lx", to_target(item)->bar0_rw_offset); +} + +static ssize_t pcie_gadget_bar0_rw_offset_store(struct config_item *item, + const char *buf, size_t count) +{ + ulong offset; + int ret; + + ret = kstrtoul(buf, 0, &offset); + if (ret) + return ret; + + if (offset % 4) + return -EINVAL; + + to_target(item)->bar0_rw_offset = offset; + + return count; +} + +static ssize_t pcie_gadget_bar0_data_show(struct config_item *item, char *buf) +{ + struct spear_pcie_gadget_config *config = to_target(item) + ulong data; + + if (!config->va_bar0_address) + return -ENOMEM; + + data = readl((ulong)config->va_bar0_address + config->bar0_rw_offset); + + return sprintf(buf, "%lx", data); +} + +static ssize_t pcie_gadget_bar0_data_store(struct config_item *item, + const char *buf, size_t count) +{ + struct spear_pcie_gadget_config *config = to_target(item) + ulong data; + int ret; + + ret = kstrtoul(buf, 0, &data); + if (ret) + return ret; + + if (!config->va_bar0_address) + return -ENOMEM; + + writel(data, (ulong)config->va_bar0_address + config->bar0_rw_offset); + + return count; +} + +CONFIGFS_ATTR(pcie_gadget_, link); +CONFIGFS_ATTR(pcie_gadget_, int_type); +CONFIGFS_ATTR(pcie_gadget_, no_of_msi); +CONFIGFS_ATTR_WO(pcie_gadget_, inta); +CONFIGFS_ATTR_WO(pcie_gadget_, send_msi); +CONFIGFS_ATTR(pcie_gadget_, vendor_id); +CONFIGFS_ATTR(pcie_gadget_, device_id); +CONFIGFS_ATTR(pcie_gadget_, bar0_size); +CONFIGFS_ATTR(pcie_gadget_, bar0_address); +CONFIGFS_ATTR(pcie_gadget_, bar0_rw_offset); +CONFIGFS_ATTR(pcie_gadget_, bar0_data); + +static struct configfs_attribute *pcie_gadget_target_attrs[] = { + &pcie_gadget_attr_link, + &pcie_gadget_attr_int_type, + &pcie_gadget_attr_no_of_msi, + &pcie_gadget_attr_inta, + &pcie_gadget_attr_send_msi, + &pcie_gadget_attr_vendor_id, + &pcie_gadget_attr_device_id, + &pcie_gadget_attr_bar0_size, + &pcie_gadget_attr_bar0_address, + &pcie_gadget_attr_bar0_rw_offset, + &pcie_gadget_attr_bar0_data, + NULL, +}; + +static struct config_item_type pcie_gadget_target_type = { + .ct_attrs = pcie_gadget_target_attrs, + .ct_owner = THIS_MODULE, +}; + +static void spear13xx_pcie_device_init(struct spear_pcie_gadget_config *config) +{ + struct pcie_app_reg __iomem *app_reg = config->va_app_base; + + /*setup registers for outbound translation */ + + writel(config->base, &app_reg->in0_mem_addr_start); + writel(app_reg->in0_mem_addr_start + IN0_MEM_SIZE, + &app_reg->in0_mem_addr_limit); + writel(app_reg->in0_mem_addr_limit + 1, &app_reg->in1_mem_addr_start); + writel(app_reg->in1_mem_addr_start + IN1_MEM_SIZE, + &app_reg->in1_mem_addr_limit); + writel(app_reg->in1_mem_addr_limit + 1, &app_reg->in_io_addr_start); + writel(app_reg->in_io_addr_start + IN_IO_SIZE, + &app_reg->in_io_addr_limit); + writel(app_reg->in_io_addr_limit + 1, &app_reg->in_cfg0_addr_start); + writel(app_reg->in_cfg0_addr_start + IN_CFG0_SIZE, + &app_reg->in_cfg0_addr_limit); + writel(app_reg->in_cfg0_addr_limit + 1, &app_reg->in_cfg1_addr_start); + writel(app_reg->in_cfg1_addr_start + IN_CFG1_SIZE, + &app_reg->in_cfg1_addr_limit); + writel(app_reg->in_cfg1_addr_limit + 1, &app_reg->in_msg_addr_start); + writel(app_reg->in_msg_addr_start + IN_MSG_SIZE, + &app_reg->in_msg_addr_limit); + + writel(app_reg->in0_mem_addr_start, &app_reg->pom0_mem_addr_start); + writel(app_reg->in1_mem_addr_start, &app_reg->pom1_mem_addr_start); + writel(app_reg->in_io_addr_start, &app_reg->pom_io_addr_start); + + /*setup registers for inbound translation */ + + /* Keep AORAM mapped at BAR0 as default */ + config->bar0_size = INBOUND_ADDR_MASK + 1; + spear_dbi_write_reg(config, PCIE_BAR0_MASK_REG, 4, INBOUND_ADDR_MASK); + spear_dbi_write_reg(config, PCI_BASE_ADDRESS_0, 4, 0xC); + config->va_bar0_address = ioremap(SPEAR13XX_SYSRAM1_BASE, + config->bar0_size); + + writel(SPEAR13XX_SYSRAM1_BASE, &app_reg->pim0_mem_addr_start); + writel(0, &app_reg->pim1_mem_addr_start); + writel(INBOUND_ADDR_MASK + 1, &app_reg->mem0_addr_offset_limit); + + writel(0x0, &app_reg->pim_io_addr_start); + writel(0x0, &app_reg->pim_io_addr_start); + writel(0x0, &app_reg->pim_rom_addr_start); + + writel(DEVICE_TYPE_EP | (1 << MISCTRL_EN_ID) + | ((u32)1 << REG_TRANSLATION_ENABLE), + &app_reg->app_ctrl_0); + /* disable all rx interrupts */ + writel(0, &app_reg->int_mask); + + /* Select INTA as default*/ + spear_dbi_write_reg(config, PCI_INTERRUPT_LINE, 1, 1); +} + +static int spear_pcie_gadget_probe(struct platform_device *pdev) +{ + struct resource *res0, *res1; + unsigned int status = 0; + int irq; + struct clk *clk; + static struct pcie_gadget_target *target; + struct spear_pcie_gadget_config *config; + struct config_item *cg_item; + struct configfs_subsystem *subsys; + + target = devm_kzalloc(&pdev->dev, sizeof(*target), GFP_KERNEL); + if (!target) { + dev_err(&pdev->dev, "out of memory\n"); + return -ENOMEM; + } + + cg_item = &target->subsys.su_group.cg_item; + sprintf(cg_item->ci_namebuf, "pcie_gadget.%d", pdev->id); + cg_item->ci_type = &pcie_gadget_target_type; + config = &target->config; + + /* get resource for application registers*/ + res0 = platform_get_resource(pdev, IORESOURCE_MEM, 0); + config->va_app_base = devm_ioremap_resource(&pdev->dev, res0); + if (IS_ERR(config->va_app_base)) { + dev_err(&pdev->dev, "ioremap fail\n"); + return PTR_ERR(config->va_app_base); + } + + /* get resource for dbi registers*/ + res1 = platform_get_resource(pdev, IORESOURCE_MEM, 1); + config->base = (void __iomem *)res1->start; + + config->va_dbi_base = devm_ioremap_resource(&pdev->dev, res1); + if (IS_ERR(config->va_dbi_base)) { + dev_err(&pdev->dev, "ioremap fail\n"); + return PTR_ERR(config->va_dbi_base); + } + + platform_set_drvdata(pdev, target); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "no update irq?\n"); + return irq; + } + + status = devm_request_irq(&pdev->dev, irq, spear_pcie_gadget_irq, + 0, pdev->name, NULL); + if (status) { + dev_err(&pdev->dev, + "pcie gadget interrupt IRQ%d already claimed\n", irq); + return status; + } + + /* Register configfs hooks */ + subsys = &target->subsys; + config_group_init(&subsys->su_group); + mutex_init(&subsys->su_mutex); + status = configfs_register_subsystem(subsys); + if (status) + return status; + + /* + * init basic pcie application registers + * do not enable clock if it is PCIE0.Ideally , all controller should + * have been independent from others with respect to clock. But PCIE1 + * and 2 depends on PCIE0.So PCIE0 clk is provided during board init. + */ + if (pdev->id == 1) { + /* + * Ideally CFG Clock should have been also enabled here. But + * it is done currently during board init routne + */ + clk = clk_get_sys("pcie1", NULL); + if (IS_ERR(clk)) { + pr_err("%s:couldn't get clk for pcie1\n", __func__); + return PTR_ERR(clk); + } + status = clk_enable(clk); + if (status) { + pr_err("%s:couldn't enable clk for pcie1\n", __func__); + return status; + } + } else if (pdev->id == 2) { + /* + * Ideally CFG Clock should have been also enabled here. But + * it is done currently during board init routne + */ + clk = clk_get_sys("pcie2", NULL); + if (IS_ERR(clk)) { + pr_err("%s:couldn't get clk for pcie2\n", __func__); + return PTR_ERR(clk); + } + status = clk_enable(clk); + if (status) { + pr_err("%s:couldn't enable clk for pcie2\n", __func__); + return status; + } + } + spear13xx_pcie_device_init(config); + + return 0; +} + +static int spear_pcie_gadget_remove(struct platform_device *pdev) +{ + static struct pcie_gadget_target *target; + + target = platform_get_drvdata(pdev); + + configfs_unregister_subsystem(&target->subsys); + + return 0; +} + +static void spear_pcie_gadget_shutdown(struct platform_device *pdev) +{ +} + +static struct platform_driver spear_pcie_gadget_driver = { + .probe = spear_pcie_gadget_probe, + .remove = spear_pcie_gadget_remove, + .shutdown = spear_pcie_gadget_shutdown, + .driver = { + .name = "pcie-gadget-spear", + .bus = &platform_bus_type + }, +}; + +module_platform_driver(spear_pcie_gadget_driver); + +MODULE_ALIAS("platform:pcie-gadget-spear"); +MODULE_AUTHOR("Pratyush Anand"); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/sram-exec.c b/drivers/misc/sram-exec.c new file mode 100644 index 000000000..426ad912b --- /dev/null +++ b/drivers/misc/sram-exec.c @@ -0,0 +1,119 @@ +/* + * SRAM protect-exec region helper functions + * + * Copyright (C) 2017 Texas Instruments Incorporated - http://www.ti.com/ + * Dave Gerlach + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include + +#include +#include + +#include "sram.h" + +static DEFINE_MUTEX(exec_pool_list_mutex); +static LIST_HEAD(exec_pool_list); + +int sram_check_protect_exec(struct sram_dev *sram, struct sram_reserve *block, + struct sram_partition *part) +{ + unsigned long base = (unsigned long)part->base; + unsigned long end = base + block->size; + + if (!PAGE_ALIGNED(base) || !PAGE_ALIGNED(end)) { + dev_err(sram->dev, + "SRAM pool marked with 'protect-exec' is not page aligned and will not be created.\n"); + return -ENOMEM; + } + + return 0; +} + +int sram_add_protect_exec(struct sram_partition *part) +{ + mutex_lock(&exec_pool_list_mutex); + list_add_tail(&part->list, &exec_pool_list); + mutex_unlock(&exec_pool_list_mutex); + + return 0; +} + +/** + * sram_exec_copy - copy data to a protected executable region of sram + * + * @pool: struct gen_pool retrieved that is part of this sram + * @dst: Destination address for the copy, that must be inside pool + * @src: Source address for the data to copy + * @size: Size of copy to perform, which starting from dst, must reside in pool + * + * Return: Address for copied data that can safely be called through function + * pointer, or NULL if problem. + * + * This helper function allows sram driver to act as central control location + * of 'protect-exec' pools which are normal sram pools but are always set + * read-only and executable except when copying data to them, at which point + * they are set to read-write non-executable, to make sure no memory is + * writeable and executable at the same time. This region must be page-aligned + * and is checked during probe, otherwise page attribute manipulation would + * not be possible. Care must be taken to only call the returned address as + * dst address is not guaranteed to be safely callable. + * + * NOTE: This function uses the fncpy macro to move code to the executable + * region. Some architectures have strict requirements for relocating + * executable code, so fncpy is a macro that must be defined by any arch + * making use of this functionality that guarantees a safe copy of exec + * data and returns a safe address that can be called as a C function + * pointer. + */ +void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src, + size_t size) +{ + struct sram_partition *part = NULL, *p; + unsigned long base; + int pages; + void *dst_cpy; + + mutex_lock(&exec_pool_list_mutex); + list_for_each_entry(p, &exec_pool_list, list) { + if (p->pool == pool) + part = p; + } + mutex_unlock(&exec_pool_list_mutex); + + if (!part) + return NULL; + + if (!addr_in_gen_pool(pool, (unsigned long)dst, size)) + return NULL; + + base = (unsigned long)part->base; + pages = PAGE_ALIGN(size) / PAGE_SIZE; + + mutex_lock(&part->lock); + + set_memory_nx((unsigned long)base, pages); + set_memory_rw((unsigned long)base, pages); + + dst_cpy = fncpy(dst, src, size); + + set_memory_ro((unsigned long)base, pages); + set_memory_x((unsigned long)base, pages); + + mutex_unlock(&part->lock); + + return dst_cpy; +} +EXPORT_SYMBOL_GPL(sram_exec_copy); diff --git a/drivers/misc/sram.c b/drivers/misc/sram.c new file mode 100644 index 000000000..74b183baf --- /dev/null +++ b/drivers/misc/sram.c @@ -0,0 +1,456 @@ +/* + * Generic on-chip SRAM allocation driver + * + * Copyright (C) 2012 Philipp Zabel, Pengutronix + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sram.h" + +#define SRAM_GRANULARITY 32 + +static ssize_t sram_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t pos, size_t count) +{ + struct sram_partition *part; + + part = container_of(attr, struct sram_partition, battr); + + mutex_lock(&part->lock); + memcpy_fromio(buf, part->base + pos, count); + mutex_unlock(&part->lock); + + return count; +} + +static ssize_t sram_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t pos, size_t count) +{ + struct sram_partition *part; + + part = container_of(attr, struct sram_partition, battr); + + mutex_lock(&part->lock); + memcpy_toio(part->base + pos, buf, count); + mutex_unlock(&part->lock); + + return count; +} + +static int sram_add_pool(struct sram_dev *sram, struct sram_reserve *block, + phys_addr_t start, struct sram_partition *part) +{ + int ret; + + part->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY), + NUMA_NO_NODE, block->label); + if (IS_ERR(part->pool)) + return PTR_ERR(part->pool); + + ret = gen_pool_add_virt(part->pool, (unsigned long)part->base, start, + block->size, NUMA_NO_NODE); + if (ret < 0) { + dev_err(sram->dev, "failed to register subpool: %d\n", ret); + return ret; + } + + return 0; +} + +static int sram_add_export(struct sram_dev *sram, struct sram_reserve *block, + phys_addr_t start, struct sram_partition *part) +{ + sysfs_bin_attr_init(&part->battr); + part->battr.attr.name = devm_kasprintf(sram->dev, GFP_KERNEL, + "%llx.sram", + (unsigned long long)start); + if (!part->battr.attr.name) + return -ENOMEM; + + part->battr.attr.mode = S_IRUSR | S_IWUSR; + part->battr.read = sram_read; + part->battr.write = sram_write; + part->battr.size = block->size; + + return device_create_bin_file(sram->dev, &part->battr); +} + +static int sram_add_partition(struct sram_dev *sram, struct sram_reserve *block, + phys_addr_t start) +{ + int ret; + struct sram_partition *part = &sram->partition[sram->partitions]; + + mutex_init(&part->lock); + part->base = sram->virt_base + block->start; + + if (block->pool) { + ret = sram_add_pool(sram, block, start, part); + if (ret) + return ret; + } + if (block->export) { + ret = sram_add_export(sram, block, start, part); + if (ret) + return ret; + } + if (block->protect_exec) { + ret = sram_check_protect_exec(sram, block, part); + if (ret) + return ret; + + ret = sram_add_pool(sram, block, start, part); + if (ret) + return ret; + + sram_add_protect_exec(part); + } + + sram->partitions++; + + return 0; +} + +static void sram_free_partitions(struct sram_dev *sram) +{ + struct sram_partition *part; + + if (!sram->partitions) + return; + + part = &sram->partition[sram->partitions - 1]; + for (; sram->partitions; sram->partitions--, part--) { + if (part->battr.size) + device_remove_bin_file(sram->dev, &part->battr); + + if (part->pool && + gen_pool_avail(part->pool) < gen_pool_size(part->pool)) + dev_err(sram->dev, "removed pool while SRAM allocated\n"); + } +} + +static int sram_reserve_cmp(void *priv, struct list_head *a, + struct list_head *b) +{ + struct sram_reserve *ra = list_entry(a, struct sram_reserve, list); + struct sram_reserve *rb = list_entry(b, struct sram_reserve, list); + + return ra->start - rb->start; +} + +static int sram_reserve_regions(struct sram_dev *sram, struct resource *res) +{ + struct device_node *np = sram->dev->of_node, *child; + unsigned long size, cur_start, cur_size; + struct sram_reserve *rblocks, *block; + struct list_head reserve_list; + unsigned int nblocks, exports = 0; + const char *label; + int ret = 0; + + INIT_LIST_HEAD(&reserve_list); + + size = resource_size(res); + + /* + * We need an additional block to mark the end of the memory region + * after the reserved blocks from the dt are processed. + */ + nblocks = (np) ? of_get_available_child_count(np) + 1 : 1; + rblocks = kcalloc(nblocks, sizeof(*rblocks), GFP_KERNEL); + if (!rblocks) + return -ENOMEM; + + block = &rblocks[0]; + for_each_available_child_of_node(np, child) { + struct resource child_res; + + ret = of_address_to_resource(child, 0, &child_res); + if (ret < 0) { + dev_err(sram->dev, + "could not get address for node %pOF\n", + child); + goto err_chunks; + } + + if (child_res.start < res->start || child_res.end > res->end) { + dev_err(sram->dev, + "reserved block %pOF outside the sram area\n", + child); + ret = -EINVAL; + goto err_chunks; + } + + block->start = child_res.start - res->start; + block->size = resource_size(&child_res); + list_add_tail(&block->list, &reserve_list); + + if (of_find_property(child, "export", NULL)) + block->export = true; + + if (of_find_property(child, "pool", NULL)) + block->pool = true; + + if (of_find_property(child, "protect-exec", NULL)) + block->protect_exec = true; + + if ((block->export || block->pool || block->protect_exec) && + block->size) { + exports++; + + label = NULL; + ret = of_property_read_string(child, "label", &label); + if (ret && ret != -EINVAL) { + dev_err(sram->dev, + "%pOF has invalid label name\n", + child); + goto err_chunks; + } + if (!label) + label = child->name; + + block->label = devm_kstrdup(sram->dev, + label, GFP_KERNEL); + if (!block->label) { + ret = -ENOMEM; + goto err_chunks; + } + + dev_dbg(sram->dev, "found %sblock '%s' 0x%x-0x%x\n", + block->export ? "exported " : "", block->label, + block->start, block->start + block->size); + } else { + dev_dbg(sram->dev, "found reserved block 0x%x-0x%x\n", + block->start, block->start + block->size); + } + + block++; + } + child = NULL; + + /* the last chunk marks the end of the region */ + rblocks[nblocks - 1].start = size; + rblocks[nblocks - 1].size = 0; + list_add_tail(&rblocks[nblocks - 1].list, &reserve_list); + + list_sort(NULL, &reserve_list, sram_reserve_cmp); + + if (exports) { + sram->partition = devm_kcalloc(sram->dev, + exports, sizeof(*sram->partition), + GFP_KERNEL); + if (!sram->partition) { + ret = -ENOMEM; + goto err_chunks; + } + } + + cur_start = 0; + list_for_each_entry(block, &reserve_list, list) { + /* can only happen if sections overlap */ + if (block->start < cur_start) { + dev_err(sram->dev, + "block at 0x%x starts after current offset 0x%lx\n", + block->start, cur_start); + ret = -EINVAL; + sram_free_partitions(sram); + goto err_chunks; + } + + if ((block->export || block->pool || block->protect_exec) && + block->size) { + ret = sram_add_partition(sram, block, + res->start + block->start); + if (ret) { + sram_free_partitions(sram); + goto err_chunks; + } + } + + /* current start is in a reserved block, so continue after it */ + if (block->start == cur_start) { + cur_start = block->start + block->size; + continue; + } + + /* + * allocate the space between the current starting + * address and the following reserved block, or the + * end of the region. + */ + cur_size = block->start - cur_start; + + dev_dbg(sram->dev, "adding chunk 0x%lx-0x%lx\n", + cur_start, cur_start + cur_size); + + ret = gen_pool_add_virt(sram->pool, + (unsigned long)sram->virt_base + cur_start, + res->start + cur_start, cur_size, -1); + if (ret < 0) { + sram_free_partitions(sram); + goto err_chunks; + } + + /* next allocation after this reserved block */ + cur_start = block->start + block->size; + } + + err_chunks: + if (child) + of_node_put(child); + + kfree(rblocks); + + return ret; +} + +static int atmel_securam_wait(void) +{ + struct regmap *regmap; + u32 val; + + regmap = syscon_regmap_lookup_by_compatible("atmel,sama5d2-secumod"); + if (IS_ERR(regmap)) + return -ENODEV; + + return regmap_read_poll_timeout(regmap, AT91_SECUMOD_RAMRDY, val, + val & AT91_SECUMOD_RAMRDY_READY, + 10000, 500000); +} + +static const struct of_device_id sram_dt_ids[] = { + { .compatible = "mmio-sram" }, + { .compatible = "atmel,sama5d2-securam", .data = atmel_securam_wait }, + {} +}; + +static int sram_probe(struct platform_device *pdev) +{ + struct sram_dev *sram; + struct resource *res; + size_t size; + int ret; + int (*init_func)(void); + + sram = devm_kzalloc(&pdev->dev, sizeof(*sram), GFP_KERNEL); + if (!sram) + return -ENOMEM; + + sram->dev = &pdev->dev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(sram->dev, "found no memory resource\n"); + return -EINVAL; + } + + size = resource_size(res); + + if (!devm_request_mem_region(sram->dev, res->start, size, pdev->name)) { + dev_err(sram->dev, "could not request region for resource\n"); + return -EBUSY; + } + + if (of_property_read_bool(pdev->dev.of_node, "no-memory-wc")) + sram->virt_base = devm_ioremap(sram->dev, res->start, size); + else + sram->virt_base = devm_ioremap_wc(sram->dev, res->start, size); + if (!sram->virt_base) + return -ENOMEM; + + sram->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY), + NUMA_NO_NODE, NULL); + if (IS_ERR(sram->pool)) + return PTR_ERR(sram->pool); + + sram->clk = devm_clk_get(sram->dev, NULL); + if (IS_ERR(sram->clk)) + sram->clk = NULL; + else + clk_prepare_enable(sram->clk); + + ret = sram_reserve_regions(sram, res); + if (ret) + goto err_disable_clk; + + platform_set_drvdata(pdev, sram); + + init_func = of_device_get_match_data(&pdev->dev); + if (init_func) { + ret = init_func(); + if (ret) + goto err_free_partitions; + } + + dev_dbg(sram->dev, "SRAM pool: %zu KiB @ 0x%p\n", + gen_pool_size(sram->pool) / 1024, sram->virt_base); + + return 0; + +err_free_partitions: + sram_free_partitions(sram); +err_disable_clk: + if (sram->clk) + clk_disable_unprepare(sram->clk); + + return ret; +} + +static int sram_remove(struct platform_device *pdev) +{ + struct sram_dev *sram = platform_get_drvdata(pdev); + + sram_free_partitions(sram); + + if (gen_pool_avail(sram->pool) < gen_pool_size(sram->pool)) + dev_err(sram->dev, "removed while SRAM allocated\n"); + + if (sram->clk) + clk_disable_unprepare(sram->clk); + + return 0; +} + +static struct platform_driver sram_driver = { + .driver = { + .name = "sram", + .of_match_table = sram_dt_ids, + }, + .probe = sram_probe, + .remove = sram_remove, +}; + +static int __init sram_init(void) +{ + return platform_driver_register(&sram_driver); +} + +postcore_initcall(sram_init); diff --git a/drivers/misc/sram.h b/drivers/misc/sram.h new file mode 100644 index 000000000..c181ce4c8 --- /dev/null +++ b/drivers/misc/sram.h @@ -0,0 +1,58 @@ +/* + * Defines for the SRAM driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __SRAM_H +#define __SRAM_H + +struct sram_partition { + void __iomem *base; + + struct gen_pool *pool; + struct bin_attribute battr; + struct mutex lock; + struct list_head list; +}; + +struct sram_dev { + struct device *dev; + void __iomem *virt_base; + + struct gen_pool *pool; + struct clk *clk; + + struct sram_partition *partition; + u32 partitions; +}; + +struct sram_reserve { + struct list_head list; + u32 start; + u32 size; + bool export; + bool pool; + bool protect_exec; + const char *label; +}; + +#ifdef CONFIG_SRAM_EXEC +int sram_check_protect_exec(struct sram_dev *sram, struct sram_reserve *block, + struct sram_partition *part); +int sram_add_protect_exec(struct sram_partition *part); +#else +static inline int sram_check_protect_exec(struct sram_dev *sram, + struct sram_reserve *block, + struct sram_partition *part) +{ + return -ENODEV; +} + +static inline int sram_add_protect_exec(struct sram_partition *part) +{ + return -ENODEV; +} +#endif /* CONFIG_SRAM_EXEC */ +#endif /* __SRAM_H */ diff --git a/drivers/misc/ti-st/Kconfig b/drivers/misc/ti-st/Kconfig new file mode 100644 index 000000000..5bb92698b --- /dev/null +++ b/drivers/misc/ti-st/Kconfig @@ -0,0 +1,18 @@ +# +# TI's shared transport line discipline and the protocol +# drivers (BT, FM and GPS) +# +menu "Texas Instruments shared transport line discipline" +config TI_ST + tristate "Shared transport core driver" + depends on NET && TTY + depends on GPIOLIB || COMPILE_TEST + select FW_LOADER + help + This enables the shared transport core driver for TI + BT / FM and GPS combo chips. This enables protocol drivers + to register themselves with core and send data, the responses + are returned to relevant protocol drivers based on their + packet types. + +endmenu diff --git a/drivers/misc/ti-st/Makefile b/drivers/misc/ti-st/Makefile new file mode 100644 index 000000000..78d7ebb14 --- /dev/null +++ b/drivers/misc/ti-st/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for TI's shared transport line discipline +# and its protocol drivers (BT, FM, GPS) +# +obj-$(CONFIG_TI_ST) += st_drv.o +st_drv-objs := st_core.o st_kim.o st_ll.o diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c new file mode 100644 index 000000000..eda8d407b --- /dev/null +++ b/drivers/misc/ti-st/st_core.c @@ -0,0 +1,922 @@ +/* + * Shared Transport Line discipline driver Core + * This hooks up ST KIM driver and ST LL driver + * Copyright (C) 2009-2010 Texas Instruments + * Author: Pavan Savoy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#define pr_fmt(fmt) "(stc): " fmt +#include +#include +#include + +#include +#include + +#include + +extern void st_kim_recv(void *, const unsigned char *, long); +void st_int_recv(void *, const unsigned char *, long); +/* function pointer pointing to either, + * st_kim_recv during registration to receive fw download responses + * st_int_recv after registration to receive proto stack responses + */ +static void (*st_recv) (void *, const unsigned char *, long); + +/********************************************************************/ +static void add_channel_to_table(struct st_data_s *st_gdata, + struct st_proto_s *new_proto) +{ + pr_info("%s: id %d\n", __func__, new_proto->chnl_id); + /* list now has the channel id as index itself */ + st_gdata->list[new_proto->chnl_id] = new_proto; + st_gdata->is_registered[new_proto->chnl_id] = true; +} + +static void remove_channel_from_table(struct st_data_s *st_gdata, + struct st_proto_s *proto) +{ + pr_info("%s: id %d\n", __func__, proto->chnl_id); +/* st_gdata->list[proto->chnl_id] = NULL; */ + st_gdata->is_registered[proto->chnl_id] = false; +} + +/* + * called from KIM during firmware download. + * + * This is a wrapper function to tty->ops->write_room. + * It returns number of free space available in + * uart tx buffer. + */ +int st_get_uart_wr_room(struct st_data_s *st_gdata) +{ + struct tty_struct *tty; + if (unlikely(st_gdata == NULL || st_gdata->tty == NULL)) { + pr_err("tty unavailable to perform write"); + return -1; + } + tty = st_gdata->tty; + return tty->ops->write_room(tty); +} + +/* can be called in from + * -- KIM (during fw download) + * -- ST Core (during st_write) + * + * This is the internal write function - a wrapper + * to tty->ops->write + */ +int st_int_write(struct st_data_s *st_gdata, + const unsigned char *data, int count) +{ + struct tty_struct *tty; + if (unlikely(st_gdata == NULL || st_gdata->tty == NULL)) { + pr_err("tty unavailable to perform write"); + return -EINVAL; + } + tty = st_gdata->tty; +#ifdef VERBOSE + print_hex_dump(KERN_DEBUG, "ops->write(tty, data, count); + +} + +/* + * push the skb received to relevant + * protocol stacks + */ +static void st_send_frame(unsigned char chnl_id, struct st_data_s *st_gdata) +{ + pr_debug(" %s(prot:%d) ", __func__, chnl_id); + + if (unlikely + (st_gdata == NULL || st_gdata->rx_skb == NULL + || st_gdata->is_registered[chnl_id] == false)) { + pr_err("chnl_id %d not registered, no data to send?", + chnl_id); + kfree_skb(st_gdata->rx_skb); + return; + } + /* this cannot fail + * this shouldn't take long + * - should be just skb_queue_tail for the + * protocol stack driver + */ + if (likely(st_gdata->list[chnl_id]->recv != NULL)) { + if (unlikely + (st_gdata->list[chnl_id]->recv + (st_gdata->list[chnl_id]->priv_data, st_gdata->rx_skb) + != 0)) { + pr_err(" proto stack %d's ->recv failed", chnl_id); + kfree_skb(st_gdata->rx_skb); + return; + } + } else { + pr_err(" proto stack %d's ->recv null", chnl_id); + kfree_skb(st_gdata->rx_skb); + } + return; +} + +/** + * st_reg_complete - + * to call registration complete callbacks + * of all protocol stack drivers + * This function is being called with spin lock held, protocol drivers are + * only expected to complete their waits and do nothing more than that. + */ +static void st_reg_complete(struct st_data_s *st_gdata, int err) +{ + unsigned char i = 0; + pr_info(" %s ", __func__); + for (i = 0; i < ST_MAX_CHANNELS; i++) { + if (likely(st_gdata != NULL && + st_gdata->is_registered[i] == true && + st_gdata->list[i]->reg_complete_cb != NULL)) { + st_gdata->list[i]->reg_complete_cb + (st_gdata->list[i]->priv_data, err); + pr_info("protocol %d's cb sent %d\n", i, err); + if (err) { /* cleanup registered protocol */ + st_gdata->is_registered[i] = false; + if (st_gdata->protos_registered) + st_gdata->protos_registered--; + } + } + } +} + +static inline int st_check_data_len(struct st_data_s *st_gdata, + unsigned char chnl_id, int len) +{ + int room = skb_tailroom(st_gdata->rx_skb); + + pr_debug("len %d room %d", len, room); + + if (!len) { + /* Received packet has only packet header and + * has zero length payload. So, ask ST CORE to + * forward the packet to protocol driver (BT/FM/GPS) + */ + st_send_frame(chnl_id, st_gdata); + + } else if (len > room) { + /* Received packet's payload length is larger. + * We can't accommodate it in created skb. + */ + pr_err("Data length is too large len %d room %d", len, + room); + kfree_skb(st_gdata->rx_skb); + } else { + /* Packet header has non-zero payload length and + * we have enough space in created skb. Lets read + * payload data */ + st_gdata->rx_state = ST_W4_DATA; + st_gdata->rx_count = len; + return len; + } + + /* Change ST state to continue to process next + * packet */ + st_gdata->rx_state = ST_W4_PACKET_TYPE; + st_gdata->rx_skb = NULL; + st_gdata->rx_count = 0; + st_gdata->rx_chnl = 0; + + return 0; +} + +/** + * st_wakeup_ack - internal function for action when wake-up ack + * received + */ +static inline void st_wakeup_ack(struct st_data_s *st_gdata, + unsigned char cmd) +{ + struct sk_buff *waiting_skb; + unsigned long flags = 0; + + spin_lock_irqsave(&st_gdata->lock, flags); + /* de-Q from waitQ and Q in txQ now that the + * chip is awake + */ + while ((waiting_skb = skb_dequeue(&st_gdata->tx_waitq))) + skb_queue_tail(&st_gdata->txq, waiting_skb); + + /* state forwarded to ST LL */ + st_ll_sleep_state(st_gdata, (unsigned long)cmd); + spin_unlock_irqrestore(&st_gdata->lock, flags); + + /* wake up to send the recently copied skbs from waitQ */ + st_tx_wakeup(st_gdata); +} + +/** + * st_int_recv - ST's internal receive function. + * Decodes received RAW data and forwards to corresponding + * client drivers (Bluetooth,FM,GPS..etc). + * This can receive various types of packets, + * HCI-Events, ACL, SCO, 4 types of HCI-LL PM packets + * CH-8 packets from FM, CH-9 packets from GPS cores. + */ +void st_int_recv(void *disc_data, + const unsigned char *data, long count) +{ + char *ptr; + struct st_proto_s *proto; + unsigned short payload_len = 0; + int len = 0; + unsigned char type = 0; + unsigned char *plen; + struct st_data_s *st_gdata = (struct st_data_s *)disc_data; + unsigned long flags; + + ptr = (char *)data; + /* tty_receive sent null ? */ + if (unlikely(ptr == NULL) || (st_gdata == NULL)) { + pr_err(" received null from TTY "); + return; + } + + pr_debug("count %ld rx_state %ld" + "rx_count %ld", count, st_gdata->rx_state, + st_gdata->rx_count); + + spin_lock_irqsave(&st_gdata->lock, flags); + /* Decode received bytes here */ + while (count) { + if (st_gdata->rx_count) { + len = min_t(unsigned int, st_gdata->rx_count, count); + skb_put_data(st_gdata->rx_skb, ptr, len); + st_gdata->rx_count -= len; + count -= len; + ptr += len; + + if (st_gdata->rx_count) + continue; + + /* Check ST RX state machine , where are we? */ + switch (st_gdata->rx_state) { + /* Waiting for complete packet ? */ + case ST_W4_DATA: + pr_debug("Complete pkt received"); + /* Ask ST CORE to forward + * the packet to protocol driver */ + st_send_frame(st_gdata->rx_chnl, st_gdata); + + st_gdata->rx_state = ST_W4_PACKET_TYPE; + st_gdata->rx_skb = NULL; + continue; + /* parse the header to know details */ + case ST_W4_HEADER: + proto = st_gdata->list[st_gdata->rx_chnl]; + plen = + &st_gdata->rx_skb->data + [proto->offset_len_in_hdr]; + pr_debug("plen pointing to %x\n", *plen); + if (proto->len_size == 1)/* 1 byte len field */ + payload_len = *(unsigned char *)plen; + else if (proto->len_size == 2) + payload_len = + __le16_to_cpu(*(unsigned short *)plen); + else + pr_info("%s: invalid length " + "for id %d\n", + __func__, proto->chnl_id); + st_check_data_len(st_gdata, proto->chnl_id, + payload_len); + pr_debug("off %d, pay len %d\n", + proto->offset_len_in_hdr, payload_len); + continue; + } /* end of switch rx_state */ + } + + /* end of if rx_count */ + /* Check first byte of packet and identify module + * owner (BT/FM/GPS) */ + switch (*ptr) { + case LL_SLEEP_IND: + case LL_SLEEP_ACK: + case LL_WAKE_UP_IND: + pr_debug("PM packet"); + /* this takes appropriate action based on + * sleep state received -- + */ + st_ll_sleep_state(st_gdata, *ptr); + /* if WAKEUP_IND collides copy from waitq to txq + * and assume chip awake + */ + spin_unlock_irqrestore(&st_gdata->lock, flags); + if (st_ll_getstate(st_gdata) == ST_LL_AWAKE) + st_wakeup_ack(st_gdata, LL_WAKE_UP_ACK); + spin_lock_irqsave(&st_gdata->lock, flags); + + ptr++; + count--; + continue; + case LL_WAKE_UP_ACK: + pr_debug("PM packet"); + + spin_unlock_irqrestore(&st_gdata->lock, flags); + /* wake up ack received */ + st_wakeup_ack(st_gdata, *ptr); + spin_lock_irqsave(&st_gdata->lock, flags); + + ptr++; + count--; + continue; + /* Unknow packet? */ + default: + type = *ptr; + + /* Default case means non-HCILL packets, + * possibilities are packets for: + * (a) valid protocol - Supported Protocols within + * the ST_MAX_CHANNELS. + * (b) registered protocol - Checked by + * "st_gdata->list[type] == NULL)" are supported + * protocols only. + * Rules out any invalid protocol and + * unregistered protocols with channel ID < 16. + */ + + if ((type >= ST_MAX_CHANNELS) || + (st_gdata->list[type] == NULL)) { + pr_err("chip/interface misbehavior: " + "dropping frame starting " + "with 0x%02x\n", type); + goto done; + } + + st_gdata->rx_skb = alloc_skb( + st_gdata->list[type]->max_frame_size, + GFP_ATOMIC); + if (st_gdata->rx_skb == NULL) { + pr_err("out of memory: dropping\n"); + goto done; + } + + skb_reserve(st_gdata->rx_skb, + st_gdata->list[type]->reserve); + /* next 2 required for BT only */ + st_gdata->rx_skb->cb[0] = type; /*pkt_type*/ + st_gdata->rx_skb->cb[1] = 0; /*incoming*/ + st_gdata->rx_chnl = *ptr; + st_gdata->rx_state = ST_W4_HEADER; + st_gdata->rx_count = st_gdata->list[type]->hdr_len; + pr_debug("rx_count %ld\n", st_gdata->rx_count); + }; + ptr++; + count--; + } +done: + spin_unlock_irqrestore(&st_gdata->lock, flags); + pr_debug("done %s", __func__); + return; +} + +/** + * st_int_dequeue - internal de-Q function. + * If the previous data set was not written + * completely, return that skb which has the pending data. + * In normal cases, return top of txq. + */ +static struct sk_buff *st_int_dequeue(struct st_data_s *st_gdata) +{ + struct sk_buff *returning_skb; + + pr_debug("%s", __func__); + if (st_gdata->tx_skb != NULL) { + returning_skb = st_gdata->tx_skb; + st_gdata->tx_skb = NULL; + return returning_skb; + } + return skb_dequeue(&st_gdata->txq); +} + +/** + * st_int_enqueue - internal Q-ing function. + * Will either Q the skb to txq or the tx_waitq + * depending on the ST LL state. + * If the chip is asleep, then Q it onto waitq and + * wakeup the chip. + * txq and waitq needs protection since the other contexts + * may be sending data, waking up chip. + */ +static void st_int_enqueue(struct st_data_s *st_gdata, struct sk_buff *skb) +{ + unsigned long flags = 0; + + pr_debug("%s", __func__); + spin_lock_irqsave(&st_gdata->lock, flags); + + switch (st_ll_getstate(st_gdata)) { + case ST_LL_AWAKE: + pr_debug("ST LL is AWAKE, sending normally"); + skb_queue_tail(&st_gdata->txq, skb); + break; + case ST_LL_ASLEEP_TO_AWAKE: + skb_queue_tail(&st_gdata->tx_waitq, skb); + break; + case ST_LL_AWAKE_TO_ASLEEP: + pr_err("ST LL is illegal state(%ld)," + "purging received skb.", st_ll_getstate(st_gdata)); + kfree_skb(skb); + break; + case ST_LL_ASLEEP: + skb_queue_tail(&st_gdata->tx_waitq, skb); + st_ll_wakeup(st_gdata); + break; + default: + pr_err("ST LL is illegal state(%ld)," + "purging received skb.", st_ll_getstate(st_gdata)); + kfree_skb(skb); + break; + } + + spin_unlock_irqrestore(&st_gdata->lock, flags); + pr_debug("done %s", __func__); + return; +} + +/* + * internal wakeup function + * called from either + * - TTY layer when write's finished + * - st_write (in context of the protocol stack) + */ +static void work_fn_write_wakeup(struct work_struct *work) +{ + struct st_data_s *st_gdata = container_of(work, struct st_data_s, + work_write_wakeup); + + st_tx_wakeup((void *)st_gdata); +} +void st_tx_wakeup(struct st_data_s *st_data) +{ + struct sk_buff *skb; + unsigned long flags; /* for irq save flags */ + pr_debug("%s", __func__); + /* check for sending & set flag sending here */ + if (test_and_set_bit(ST_TX_SENDING, &st_data->tx_state)) { + pr_debug("ST already sending"); + /* keep sending */ + set_bit(ST_TX_WAKEUP, &st_data->tx_state); + return; + /* TX_WAKEUP will be checked in another + * context + */ + } + do { /* come back if st_tx_wakeup is set */ + /* woke-up to write */ + clear_bit(ST_TX_WAKEUP, &st_data->tx_state); + while ((skb = st_int_dequeue(st_data))) { + int len; + spin_lock_irqsave(&st_data->lock, flags); + /* enable wake-up from TTY */ + set_bit(TTY_DO_WRITE_WAKEUP, &st_data->tty->flags); + len = st_int_write(st_data, skb->data, skb->len); + skb_pull(skb, len); + /* if skb->len = len as expected, skb->len=0 */ + if (skb->len) { + /* would be the next skb to be sent */ + st_data->tx_skb = skb; + spin_unlock_irqrestore(&st_data->lock, flags); + break; + } + kfree_skb(skb); + spin_unlock_irqrestore(&st_data->lock, flags); + } + /* if wake-up is set in another context- restart sending */ + } while (test_bit(ST_TX_WAKEUP, &st_data->tx_state)); + + /* clear flag sending */ + clear_bit(ST_TX_SENDING, &st_data->tx_state); +} + +/********************************************************************/ +/* functions called from ST KIM +*/ +void kim_st_list_protocols(struct st_data_s *st_gdata, void *buf) +{ + seq_printf(buf, "[%d]\nBT=%c\nFM=%c\nGPS=%c\n", + st_gdata->protos_registered, + st_gdata->is_registered[0x04] == true ? 'R' : 'U', + st_gdata->is_registered[0x08] == true ? 'R' : 'U', + st_gdata->is_registered[0x09] == true ? 'R' : 'U'); +} + +/********************************************************************/ +/* + * functions called from protocol stack drivers + * to be EXPORT-ed + */ +long st_register(struct st_proto_s *new_proto) +{ + struct st_data_s *st_gdata; + long err = 0; + unsigned long flags = 0; + + st_kim_ref(&st_gdata, 0); + if (st_gdata == NULL || new_proto == NULL || new_proto->recv == NULL + || new_proto->reg_complete_cb == NULL) { + pr_err("gdata/new_proto/recv or reg_complete_cb not ready"); + return -EINVAL; + } + + if (new_proto->chnl_id >= ST_MAX_CHANNELS) { + pr_err("chnl_id %d not supported", new_proto->chnl_id); + return -EPROTONOSUPPORT; + } + + if (st_gdata->is_registered[new_proto->chnl_id] == true) { + pr_err("chnl_id %d already registered", new_proto->chnl_id); + return -EALREADY; + } + + /* can be from process context only */ + spin_lock_irqsave(&st_gdata->lock, flags); + + if (test_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state)) { + pr_info(" ST_REG_IN_PROGRESS:%d ", new_proto->chnl_id); + /* fw download in progress */ + + add_channel_to_table(st_gdata, new_proto); + st_gdata->protos_registered++; + new_proto->write = st_write; + + set_bit(ST_REG_PENDING, &st_gdata->st_state); + spin_unlock_irqrestore(&st_gdata->lock, flags); + return -EINPROGRESS; + } else if (st_gdata->protos_registered == ST_EMPTY) { + pr_info(" chnl_id list empty :%d ", new_proto->chnl_id); + set_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state); + st_recv = st_kim_recv; + + /* enable the ST LL - to set default chip state */ + st_ll_enable(st_gdata); + + /* release lock previously held - re-locked below */ + spin_unlock_irqrestore(&st_gdata->lock, flags); + + /* this may take a while to complete + * since it involves BT fw download + */ + err = st_kim_start(st_gdata->kim_data); + if (err != 0) { + clear_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state); + if ((st_gdata->protos_registered != ST_EMPTY) && + (test_bit(ST_REG_PENDING, &st_gdata->st_state))) { + pr_err(" KIM failure complete callback "); + spin_lock_irqsave(&st_gdata->lock, flags); + st_reg_complete(st_gdata, err); + spin_unlock_irqrestore(&st_gdata->lock, flags); + clear_bit(ST_REG_PENDING, &st_gdata->st_state); + } + return -EINVAL; + } + + spin_lock_irqsave(&st_gdata->lock, flags); + + clear_bit(ST_REG_IN_PROGRESS, &st_gdata->st_state); + st_recv = st_int_recv; + + /* this is where all pending registration + * are signalled to be complete by calling callback functions + */ + if ((st_gdata->protos_registered != ST_EMPTY) && + (test_bit(ST_REG_PENDING, &st_gdata->st_state))) { + pr_debug(" call reg complete callback "); + st_reg_complete(st_gdata, 0); + } + clear_bit(ST_REG_PENDING, &st_gdata->st_state); + + /* check for already registered once more, + * since the above check is old + */ + if (st_gdata->is_registered[new_proto->chnl_id] == true) { + pr_err(" proto %d already registered ", + new_proto->chnl_id); + spin_unlock_irqrestore(&st_gdata->lock, flags); + return -EALREADY; + } + + add_channel_to_table(st_gdata, new_proto); + st_gdata->protos_registered++; + new_proto->write = st_write; + spin_unlock_irqrestore(&st_gdata->lock, flags); + return err; + } + /* if fw is already downloaded & new stack registers protocol */ + else { + add_channel_to_table(st_gdata, new_proto); + st_gdata->protos_registered++; + new_proto->write = st_write; + + /* lock already held before entering else */ + spin_unlock_irqrestore(&st_gdata->lock, flags); + return err; + } +} +EXPORT_SYMBOL_GPL(st_register); + +/* to unregister a protocol - + * to be called from protocol stack driver + */ +long st_unregister(struct st_proto_s *proto) +{ + long err = 0; + unsigned long flags = 0; + struct st_data_s *st_gdata; + + pr_debug("%s: %d ", __func__, proto->chnl_id); + + st_kim_ref(&st_gdata, 0); + if (!st_gdata || proto->chnl_id >= ST_MAX_CHANNELS) { + pr_err(" chnl_id %d not supported", proto->chnl_id); + return -EPROTONOSUPPORT; + } + + spin_lock_irqsave(&st_gdata->lock, flags); + + if (st_gdata->is_registered[proto->chnl_id] == false) { + pr_err(" chnl_id %d not registered", proto->chnl_id); + spin_unlock_irqrestore(&st_gdata->lock, flags); + return -EPROTONOSUPPORT; + } + + if (st_gdata->protos_registered) + st_gdata->protos_registered--; + + remove_channel_from_table(st_gdata, proto); + spin_unlock_irqrestore(&st_gdata->lock, flags); + + if ((st_gdata->protos_registered == ST_EMPTY) && + (!test_bit(ST_REG_PENDING, &st_gdata->st_state))) { + pr_info(" all chnl_ids unregistered "); + + /* stop traffic on tty */ + if (st_gdata->tty) { + tty_ldisc_flush(st_gdata->tty); + stop_tty(st_gdata->tty); + } + + /* all chnl_ids now unregistered */ + st_kim_stop(st_gdata->kim_data); + /* disable ST LL */ + st_ll_disable(st_gdata); + } + return err; +} + +/* + * called in protocol stack drivers + * via the write function pointer + */ +long st_write(struct sk_buff *skb) +{ + struct st_data_s *st_gdata; + long len; + + st_kim_ref(&st_gdata, 0); + if (unlikely(skb == NULL || st_gdata == NULL + || st_gdata->tty == NULL)) { + pr_err("data/tty unavailable to perform write"); + return -EINVAL; + } + + pr_debug("%d to be written", skb->len); + len = skb->len; + + /* st_ll to decide where to enqueue the skb */ + st_int_enqueue(st_gdata, skb); + /* wake up */ + st_tx_wakeup(st_gdata); + + /* return number of bytes written */ + return len; +} + +/* for protocols making use of shared transport */ +EXPORT_SYMBOL_GPL(st_unregister); + +/********************************************************************/ +/* + * functions called from TTY layer + */ +static int st_tty_open(struct tty_struct *tty) +{ + int err = 0; + struct st_data_s *st_gdata; + pr_info("%s ", __func__); + + st_kim_ref(&st_gdata, 0); + st_gdata->tty = tty; + tty->disc_data = st_gdata; + + /* don't do an wakeup for now */ + clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); + + /* mem already allocated + */ + tty->receive_room = 65536; + /* Flush any pending characters in the driver and discipline. */ + tty_ldisc_flush(tty); + tty_driver_flush_buffer(tty); + /* + * signal to UIM via KIM that - + * installation of N_TI_WL ldisc is complete + */ + st_kim_complete(st_gdata->kim_data); + pr_debug("done %s", __func__); + return err; +} + +static void st_tty_close(struct tty_struct *tty) +{ + unsigned char i = ST_MAX_CHANNELS; + unsigned long flags = 0; + struct st_data_s *st_gdata = tty->disc_data; + + pr_info("%s ", __func__); + + /* TODO: + * if a protocol has been registered & line discipline + * un-installed for some reason - what should be done ? + */ + spin_lock_irqsave(&st_gdata->lock, flags); + for (i = ST_BT; i < ST_MAX_CHANNELS; i++) { + if (st_gdata->is_registered[i] == true) + pr_err("%d not un-registered", i); + st_gdata->list[i] = NULL; + st_gdata->is_registered[i] = false; + } + st_gdata->protos_registered = 0; + spin_unlock_irqrestore(&st_gdata->lock, flags); + /* + * signal to UIM via KIM that - + * N_TI_WL ldisc is un-installed + */ + st_kim_complete(st_gdata->kim_data); + st_gdata->tty = NULL; + /* Flush any pending characters in the driver and discipline. */ + tty_ldisc_flush(tty); + tty_driver_flush_buffer(tty); + + spin_lock_irqsave(&st_gdata->lock, flags); + /* empty out txq and tx_waitq */ + skb_queue_purge(&st_gdata->txq); + skb_queue_purge(&st_gdata->tx_waitq); + /* reset the TTY Rx states of ST */ + st_gdata->rx_count = 0; + st_gdata->rx_state = ST_W4_PACKET_TYPE; + kfree_skb(st_gdata->rx_skb); + st_gdata->rx_skb = NULL; + spin_unlock_irqrestore(&st_gdata->lock, flags); + + pr_debug("%s: done ", __func__); +} + +static void st_tty_receive(struct tty_struct *tty, const unsigned char *data, + char *tty_flags, int count) +{ +#ifdef VERBOSE + print_hex_dump(KERN_DEBUG, ">in>", DUMP_PREFIX_NONE, + 16, 1, data, count, 0); +#endif + + /* + * if fw download is in progress then route incoming data + * to KIM for validation + */ + st_recv(tty->disc_data, data, count); + pr_debug("done %s", __func__); +} + +/* wake-up function called in from the TTY layer + * inside the internal wakeup function will be called + */ +static void st_tty_wakeup(struct tty_struct *tty) +{ + struct st_data_s *st_gdata = tty->disc_data; + pr_debug("%s ", __func__); + /* don't do an wakeup for now */ + clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); + + /* + * schedule the internal wakeup instead of calling directly to + * avoid lockup (port->lock needed in tty->ops->write is + * already taken here + */ + schedule_work(&st_gdata->work_write_wakeup); +} + +static void st_tty_flush_buffer(struct tty_struct *tty) +{ + struct st_data_s *st_gdata = tty->disc_data; + pr_debug("%s ", __func__); + + kfree_skb(st_gdata->tx_skb); + st_gdata->tx_skb = NULL; + + tty_driver_flush_buffer(tty); + return; +} + +static struct tty_ldisc_ops st_ldisc_ops = { + .magic = TTY_LDISC_MAGIC, + .name = "n_st", + .open = st_tty_open, + .close = st_tty_close, + .receive_buf = st_tty_receive, + .write_wakeup = st_tty_wakeup, + .flush_buffer = st_tty_flush_buffer, + .owner = THIS_MODULE +}; + +/********************************************************************/ +int st_core_init(struct st_data_s **core_data) +{ + struct st_data_s *st_gdata; + long err; + + err = tty_register_ldisc(N_TI_WL, &st_ldisc_ops); + if (err) { + pr_err("error registering %d line discipline %ld", + N_TI_WL, err); + return err; + } + pr_debug("registered n_shared line discipline"); + + st_gdata = kzalloc(sizeof(struct st_data_s), GFP_KERNEL); + if (!st_gdata) { + pr_err("memory allocation failed"); + err = tty_unregister_ldisc(N_TI_WL); + if (err) + pr_err("unable to un-register ldisc %ld", err); + err = -ENOMEM; + return err; + } + + /* Initialize ST TxQ and Tx waitQ queue head. All BT/FM/GPS module skb's + * will be pushed in this queue for actual transmission. + */ + skb_queue_head_init(&st_gdata->txq); + skb_queue_head_init(&st_gdata->tx_waitq); + + /* Locking used in st_int_enqueue() to avoid multiple execution */ + spin_lock_init(&st_gdata->lock); + + err = st_ll_init(st_gdata); + if (err) { + pr_err("error during st_ll initialization(%ld)", err); + kfree(st_gdata); + err = tty_unregister_ldisc(N_TI_WL); + if (err) + pr_err("unable to un-register ldisc"); + return err; + } + + INIT_WORK(&st_gdata->work_write_wakeup, work_fn_write_wakeup); + + *core_data = st_gdata; + return 0; +} + +void st_core_exit(struct st_data_s *st_gdata) +{ + long err; + /* internal module cleanup */ + err = st_ll_deinit(st_gdata); + if (err) + pr_err("error during deinit of ST LL %ld", err); + + if (st_gdata != NULL) { + /* Free ST Tx Qs and skbs */ + skb_queue_purge(&st_gdata->txq); + skb_queue_purge(&st_gdata->tx_waitq); + kfree_skb(st_gdata->rx_skb); + kfree_skb(st_gdata->tx_skb); + /* TTY ldisc cleanup */ + err = tty_unregister_ldisc(N_TI_WL); + if (err) + pr_err("unable to un-register ldisc %ld", err); + /* free the global data pointer */ + kfree(st_gdata); + } +} diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c new file mode 100644 index 000000000..1874ac922 --- /dev/null +++ b/drivers/misc/ti-st/st_kim.c @@ -0,0 +1,868 @@ +/* + * Shared Transport Line discipline driver Core + * Init Manager module responsible for GPIO control + * and firmware download + * Copyright (C) 2009-2010 Texas Instruments + * Author: Pavan Savoy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#define pr_fmt(fmt) "(stk) :" fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define MAX_ST_DEVICES 3 /* Imagine 1 on each UART for now */ +static struct platform_device *st_kim_devices[MAX_ST_DEVICES]; + +/**********************************************************************/ +/* internal functions */ + +/** + * st_get_plat_device - + * function which returns the reference to the platform device + * requested by id. As of now only 1 such device exists (id=0) + * the context requesting for reference can get the id to be + * requested by a. The protocol driver which is registering or + * b. the tty device which is opened. + */ +static struct platform_device *st_get_plat_device(int id) +{ + return st_kim_devices[id]; +} + +/** + * validate_firmware_response - + * function to return whether the firmware response was proper + * in case of error don't complete so that waiting for proper + * response times out + */ +static void validate_firmware_response(struct kim_data_s *kim_gdata) +{ + struct sk_buff *skb = kim_gdata->rx_skb; + if (!skb) + return; + + /* these magic numbers are the position in the response buffer which + * allows us to distinguish whether the response is for the read + * version info. command + */ + if (skb->data[2] == 0x01 && skb->data[3] == 0x01 && + skb->data[4] == 0x10 && skb->data[5] == 0x00) { + /* fw version response */ + memcpy(kim_gdata->resp_buffer, + kim_gdata->rx_skb->data, + kim_gdata->rx_skb->len); + kim_gdata->rx_state = ST_W4_PACKET_TYPE; + kim_gdata->rx_skb = NULL; + kim_gdata->rx_count = 0; + } else if (unlikely(skb->data[5] != 0)) { + pr_err("no proper response during fw download"); + pr_err("data6 %x", skb->data[5]); + kfree_skb(skb); + return; /* keep waiting for the proper response */ + } + /* becos of all the script being downloaded */ + complete_all(&kim_gdata->kim_rcvd); + kfree_skb(skb); +} + +/* check for data len received inside kim_int_recv + * most often hit the last case to update state to waiting for data + */ +static inline int kim_check_data_len(struct kim_data_s *kim_gdata, int len) +{ + register int room = skb_tailroom(kim_gdata->rx_skb); + + pr_debug("len %d room %d", len, room); + + if (!len) { + validate_firmware_response(kim_gdata); + } else if (len > room) { + /* Received packet's payload length is larger. + * We can't accommodate it in created skb. + */ + pr_err("Data length is too large len %d room %d", len, + room); + kfree_skb(kim_gdata->rx_skb); + } else { + /* Packet header has non-zero payload length and + * we have enough space in created skb. Lets read + * payload data */ + kim_gdata->rx_state = ST_W4_DATA; + kim_gdata->rx_count = len; + return len; + } + + /* Change ST LL state to continue to process next + * packet */ + kim_gdata->rx_state = ST_W4_PACKET_TYPE; + kim_gdata->rx_skb = NULL; + kim_gdata->rx_count = 0; + + return 0; +} + +/** + * kim_int_recv - receive function called during firmware download + * firmware download responses on different UART drivers + * have been observed to come in bursts of different + * tty_receive and hence the logic + */ +static void kim_int_recv(struct kim_data_s *kim_gdata, + const unsigned char *data, long count) +{ + const unsigned char *ptr; + int len = 0; + unsigned char *plen; + + pr_debug("%s", __func__); + /* Decode received bytes here */ + ptr = data; + if (unlikely(ptr == NULL)) { + pr_err(" received null from TTY "); + return; + } + + while (count) { + if (kim_gdata->rx_count) { + len = min_t(unsigned int, kim_gdata->rx_count, count); + skb_put_data(kim_gdata->rx_skb, ptr, len); + kim_gdata->rx_count -= len; + count -= len; + ptr += len; + + if (kim_gdata->rx_count) + continue; + + /* Check ST RX state machine , where are we? */ + switch (kim_gdata->rx_state) { + /* Waiting for complete packet ? */ + case ST_W4_DATA: + pr_debug("Complete pkt received"); + validate_firmware_response(kim_gdata); + kim_gdata->rx_state = ST_W4_PACKET_TYPE; + kim_gdata->rx_skb = NULL; + continue; + /* Waiting for Bluetooth event header ? */ + case ST_W4_HEADER: + plen = + (unsigned char *)&kim_gdata->rx_skb->data[1]; + pr_debug("event hdr: plen 0x%02x\n", *plen); + kim_check_data_len(kim_gdata, *plen); + continue; + } /* end of switch */ + } /* end of if rx_state */ + switch (*ptr) { + /* Bluetooth event packet? */ + case 0x04: + kim_gdata->rx_state = ST_W4_HEADER; + kim_gdata->rx_count = 2; + break; + default: + pr_info("unknown packet"); + ptr++; + count--; + continue; + } + ptr++; + count--; + kim_gdata->rx_skb = + alloc_skb(1024+8, GFP_ATOMIC); + if (!kim_gdata->rx_skb) { + pr_err("can't allocate mem for new packet"); + kim_gdata->rx_state = ST_W4_PACKET_TYPE; + kim_gdata->rx_count = 0; + return; + } + skb_reserve(kim_gdata->rx_skb, 8); + kim_gdata->rx_skb->cb[0] = 4; + kim_gdata->rx_skb->cb[1] = 0; + + } + return; +} + +static long read_local_version(struct kim_data_s *kim_gdata, char *bts_scr_name) +{ + unsigned short version = 0, chip = 0, min_ver = 0, maj_ver = 0; + const char read_ver_cmd[] = { 0x01, 0x01, 0x10, 0x00 }; + long timeout; + + pr_debug("%s", __func__); + + reinit_completion(&kim_gdata->kim_rcvd); + if (4 != st_int_write(kim_gdata->core_data, read_ver_cmd, 4)) { + pr_err("kim: couldn't write 4 bytes"); + return -EIO; + } + + timeout = wait_for_completion_interruptible_timeout( + &kim_gdata->kim_rcvd, msecs_to_jiffies(CMD_RESP_TIME)); + if (timeout <= 0) { + pr_err(" waiting for ver info- timed out or received signal"); + return timeout ? -ERESTARTSYS : -ETIMEDOUT; + } + reinit_completion(&kim_gdata->kim_rcvd); + /* the positions 12 & 13 in the response buffer provide with the + * chip, major & minor numbers + */ + + version = + MAKEWORD(kim_gdata->resp_buffer[12], + kim_gdata->resp_buffer[13]); + chip = (version & 0x7C00) >> 10; + min_ver = (version & 0x007F); + maj_ver = (version & 0x0380) >> 7; + + if (version & 0x8000) + maj_ver |= 0x0008; + + sprintf(bts_scr_name, "ti-connectivity/TIInit_%d.%d.%d.bts", + chip, maj_ver, min_ver); + + /* to be accessed later via sysfs entry */ + kim_gdata->version.full = version; + kim_gdata->version.chip = chip; + kim_gdata->version.maj_ver = maj_ver; + kim_gdata->version.min_ver = min_ver; + + pr_info("%s", bts_scr_name); + return 0; +} + +static void skip_change_remote_baud(unsigned char **ptr, long *len) +{ + unsigned char *nxt_action, *cur_action; + cur_action = *ptr; + + nxt_action = cur_action + sizeof(struct bts_action) + + ((struct bts_action *) cur_action)->size; + + if (((struct bts_action *) nxt_action)->type != ACTION_WAIT_EVENT) { + pr_err("invalid action after change remote baud command"); + } else { + *ptr = *ptr + sizeof(struct bts_action) + + ((struct bts_action *)cur_action)->size; + *len = *len - (sizeof(struct bts_action) + + ((struct bts_action *)cur_action)->size); + /* warn user on not commenting these in firmware */ + pr_warn("skipping the wait event of change remote baud"); + } +} + +/** + * download_firmware - + * internal function which parses through the .bts firmware + * script file intreprets SEND, DELAY actions only as of now + */ +static long download_firmware(struct kim_data_s *kim_gdata) +{ + long err = 0; + long len = 0; + unsigned char *ptr = NULL; + unsigned char *action_ptr = NULL; + unsigned char bts_scr_name[40] = { 0 }; /* 40 char long bts scr name? */ + int wr_room_space; + int cmd_size; + unsigned long timeout; + + err = read_local_version(kim_gdata, bts_scr_name); + if (err != 0) { + pr_err("kim: failed to read local ver"); + return err; + } + err = + request_firmware(&kim_gdata->fw_entry, bts_scr_name, + &kim_gdata->kim_pdev->dev); + if (unlikely((err != 0) || (kim_gdata->fw_entry->data == NULL) || + (kim_gdata->fw_entry->size == 0))) { + pr_err(" request_firmware failed(errno %ld) for %s", err, + bts_scr_name); + return -EINVAL; + } + ptr = (void *)kim_gdata->fw_entry->data; + len = kim_gdata->fw_entry->size; + /* bts_header to remove out magic number and + * version + */ + ptr += sizeof(struct bts_header); + len -= sizeof(struct bts_header); + + while (len > 0 && ptr) { + pr_debug(" action size %d, type %d ", + ((struct bts_action *)ptr)->size, + ((struct bts_action *)ptr)->type); + + switch (((struct bts_action *)ptr)->type) { + case ACTION_SEND_COMMAND: /* action send */ + pr_debug("S"); + action_ptr = &(((struct bts_action *)ptr)->data[0]); + if (unlikely + (((struct hci_command *)action_ptr)->opcode == + 0xFF36)) { + /* ignore remote change + * baud rate HCI VS command */ + pr_warn("change remote baud" + " rate command in firmware"); + skip_change_remote_baud(&ptr, &len); + break; + } + /* + * Make sure we have enough free space in uart + * tx buffer to write current firmware command + */ + cmd_size = ((struct bts_action *)ptr)->size; + timeout = jiffies + msecs_to_jiffies(CMD_WR_TIME); + do { + wr_room_space = + st_get_uart_wr_room(kim_gdata->core_data); + if (wr_room_space < 0) { + pr_err("Unable to get free " + "space info from uart tx buffer"); + release_firmware(kim_gdata->fw_entry); + return wr_room_space; + } + mdelay(1); /* wait 1ms before checking room */ + } while ((wr_room_space < cmd_size) && + time_before(jiffies, timeout)); + + /* Timeout happened ? */ + if (time_after_eq(jiffies, timeout)) { + pr_err("Timeout while waiting for free " + "free space in uart tx buffer"); + release_firmware(kim_gdata->fw_entry); + return -ETIMEDOUT; + } + /* reinit completion before sending for the + * relevant wait + */ + reinit_completion(&kim_gdata->kim_rcvd); + + /* + * Free space found in uart buffer, call st_int_write + * to send current firmware command to the uart tx + * buffer. + */ + err = st_int_write(kim_gdata->core_data, + ((struct bts_action_send *)action_ptr)->data, + ((struct bts_action *)ptr)->size); + if (unlikely(err < 0)) { + release_firmware(kim_gdata->fw_entry); + return err; + } + /* + * Check number of bytes written to the uart tx buffer + * and requested command write size + */ + if (err != cmd_size) { + pr_err("Number of bytes written to uart " + "tx buffer are not matching with " + "requested cmd write size"); + release_firmware(kim_gdata->fw_entry); + return -EIO; + } + break; + case ACTION_WAIT_EVENT: /* wait */ + pr_debug("W"); + err = wait_for_completion_interruptible_timeout( + &kim_gdata->kim_rcvd, + msecs_to_jiffies(CMD_RESP_TIME)); + if (err <= 0) { + pr_err("response timeout/signaled during fw download "); + /* timed out */ + release_firmware(kim_gdata->fw_entry); + return err ? -ERESTARTSYS : -ETIMEDOUT; + } + reinit_completion(&kim_gdata->kim_rcvd); + break; + case ACTION_DELAY: /* sleep */ + pr_info("sleep command in scr"); + action_ptr = &(((struct bts_action *)ptr)->data[0]); + mdelay(((struct bts_action_delay *)action_ptr)->msec); + break; + } + len = + len - (sizeof(struct bts_action) + + ((struct bts_action *)ptr)->size); + ptr = + ptr + sizeof(struct bts_action) + + ((struct bts_action *)ptr)->size; + } + /* fw download complete */ + release_firmware(kim_gdata->fw_entry); + return 0; +} + +/**********************************************************************/ +/* functions called from ST core */ +/* called from ST Core, when REG_IN_PROGRESS (registration in progress) + * can be because of + * 1. response to read local version + * 2. during send/recv's of firmware download + */ +void st_kim_recv(void *disc_data, const unsigned char *data, long count) +{ + struct st_data_s *st_gdata = (struct st_data_s *)disc_data; + struct kim_data_s *kim_gdata = st_gdata->kim_data; + + /* proceed to gather all data and distinguish read fw version response + * from other fw responses when data gathering is complete + */ + kim_int_recv(kim_gdata, data, count); + return; +} + +/* to signal completion of line discipline installation + * called from ST Core, upon tty_open + */ +void st_kim_complete(void *kim_data) +{ + struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data; + complete(&kim_gdata->ldisc_installed); +} + +/** + * st_kim_start - called from ST Core upon 1st registration + * This involves toggling the chip enable gpio, reading + * the firmware version from chip, forming the fw file name + * based on the chip version, requesting the fw, parsing it + * and perform download(send/recv). + */ +long st_kim_start(void *kim_data) +{ + long err = 0; + long retry = POR_RETRY_COUNT; + struct ti_st_plat_data *pdata; + struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data; + + pr_info(" %s", __func__); + pdata = kim_gdata->kim_pdev->dev.platform_data; + + do { + /* platform specific enabling code here */ + if (pdata->chip_enable) + pdata->chip_enable(kim_gdata); + + /* Configure BT nShutdown to HIGH state */ + gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW); + mdelay(5); /* FIXME: a proper toggle */ + gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_HIGH); + mdelay(100); + /* re-initialize the completion */ + reinit_completion(&kim_gdata->ldisc_installed); + /* send notification to UIM */ + kim_gdata->ldisc_install = 1; + pr_info("ldisc_install = 1"); + sysfs_notify(&kim_gdata->kim_pdev->dev.kobj, + NULL, "install"); + /* wait for ldisc to be installed */ + err = wait_for_completion_interruptible_timeout( + &kim_gdata->ldisc_installed, msecs_to_jiffies(LDISC_TIME)); + if (!err) { + /* ldisc installation timeout, + * flush uart, power cycle BT_EN */ + pr_err("ldisc installation timeout"); + err = st_kim_stop(kim_gdata); + continue; + } else { + /* ldisc installed now */ + pr_info("line discipline installed"); + err = download_firmware(kim_gdata); + if (err != 0) { + /* ldisc installed but fw download failed, + * flush uart & power cycle BT_EN */ + pr_err("download firmware failed"); + err = st_kim_stop(kim_gdata); + continue; + } else { /* on success don't retry */ + break; + } + } + } while (retry--); + return err; +} + +/** + * st_kim_stop - stop communication with chip. + * This can be called from ST Core/KIM, on the- + * (a) last un-register when chip need not be powered there-after, + * (b) upon failure to either install ldisc or download firmware. + * The function is responsible to (a) notify UIM about un-installation, + * (b) flush UART if the ldisc was installed. + * (c) reset BT_EN - pull down nshutdown at the end. + * (d) invoke platform's chip disabling routine. + */ +long st_kim_stop(void *kim_data) +{ + long err = 0; + struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data; + struct ti_st_plat_data *pdata = + kim_gdata->kim_pdev->dev.platform_data; + struct tty_struct *tty = kim_gdata->core_data->tty; + + reinit_completion(&kim_gdata->ldisc_installed); + + if (tty) { /* can be called before ldisc is installed */ + /* Flush any pending characters in the driver and discipline. */ + tty_ldisc_flush(tty); + tty_driver_flush_buffer(tty); + } + + /* send uninstall notification to UIM */ + pr_info("ldisc_install = 0"); + kim_gdata->ldisc_install = 0; + sysfs_notify(&kim_gdata->kim_pdev->dev.kobj, NULL, "install"); + + /* wait for ldisc to be un-installed */ + err = wait_for_completion_interruptible_timeout( + &kim_gdata->ldisc_installed, msecs_to_jiffies(LDISC_TIME)); + if (!err) { /* timeout */ + pr_err(" timed out waiting for ldisc to be un-installed"); + err = -ETIMEDOUT; + } + + /* By default configure BT nShutdown to LOW state */ + gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW); + mdelay(1); + gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_HIGH); + mdelay(1); + gpio_set_value_cansleep(kim_gdata->nshutdown, GPIO_LOW); + + /* platform specific disable */ + if (pdata->chip_disable) + pdata->chip_disable(kim_gdata); + return err; +} + +/**********************************************************************/ +/* functions called from subsystems */ +/* called when debugfs entry is read from */ + +static int show_version(struct seq_file *s, void *unused) +{ + struct kim_data_s *kim_gdata = (struct kim_data_s *)s->private; + seq_printf(s, "%04X %d.%d.%d\n", kim_gdata->version.full, + kim_gdata->version.chip, kim_gdata->version.maj_ver, + kim_gdata->version.min_ver); + return 0; +} + +static int show_list(struct seq_file *s, void *unused) +{ + struct kim_data_s *kim_gdata = (struct kim_data_s *)s->private; + kim_st_list_protocols(kim_gdata->core_data, s); + return 0; +} + +static ssize_t show_install(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kim_data_s *kim_data = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", kim_data->ldisc_install); +} + +#ifdef DEBUG +static ssize_t store_dev_name(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kim_data_s *kim_data = dev_get_drvdata(dev); + pr_debug("storing dev name >%s<", buf); + strncpy(kim_data->dev_name, buf, count); + pr_debug("stored dev name >%s<", kim_data->dev_name); + return count; +} + +static ssize_t store_baud_rate(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kim_data_s *kim_data = dev_get_drvdata(dev); + pr_debug("storing baud rate >%s<", buf); + sscanf(buf, "%ld", &kim_data->baud_rate); + pr_debug("stored baud rate >%ld<", kim_data->baud_rate); + return count; +} +#endif /* if DEBUG */ + +static ssize_t show_dev_name(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kim_data_s *kim_data = dev_get_drvdata(dev); + return sprintf(buf, "%s\n", kim_data->dev_name); +} + +static ssize_t show_baud_rate(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kim_data_s *kim_data = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", kim_data->baud_rate); +} + +static ssize_t show_flow_cntrl(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct kim_data_s *kim_data = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", kim_data->flow_cntrl); +} + +/* structures specific for sysfs entries */ +static struct kobj_attribute ldisc_install = +__ATTR(install, 0444, (void *)show_install, NULL); + +static struct kobj_attribute uart_dev_name = +#ifdef DEBUG /* TODO: move this to debug-fs if possible */ +__ATTR(dev_name, 0644, (void *)show_dev_name, (void *)store_dev_name); +#else +__ATTR(dev_name, 0444, (void *)show_dev_name, NULL); +#endif + +static struct kobj_attribute uart_baud_rate = +#ifdef DEBUG /* TODO: move to debugfs */ +__ATTR(baud_rate, 0644, (void *)show_baud_rate, (void *)store_baud_rate); +#else +__ATTR(baud_rate, 0444, (void *)show_baud_rate, NULL); +#endif + +static struct kobj_attribute uart_flow_cntrl = +__ATTR(flow_cntrl, 0444, (void *)show_flow_cntrl, NULL); + +static struct attribute *uim_attrs[] = { + &ldisc_install.attr, + &uart_dev_name.attr, + &uart_baud_rate.attr, + &uart_flow_cntrl.attr, + NULL, +}; + +static const struct attribute_group uim_attr_grp = { + .attrs = uim_attrs, +}; + +/** + * st_kim_ref - reference the core's data + * This references the per-ST platform device in the arch/xx/ + * board-xx.c file. + * This would enable multiple such platform devices to exist + * on a given platform + */ +void st_kim_ref(struct st_data_s **core_data, int id) +{ + struct platform_device *pdev; + struct kim_data_s *kim_gdata; + /* get kim_gdata reference from platform device */ + pdev = st_get_plat_device(id); + if (!pdev) + goto err; + kim_gdata = platform_get_drvdata(pdev); + if (!kim_gdata) + goto err; + + *core_data = kim_gdata->core_data; + return; +err: + *core_data = NULL; +} + +static int kim_version_open(struct inode *i, struct file *f) +{ + return single_open(f, show_version, i->i_private); +} + +static int kim_list_open(struct inode *i, struct file *f) +{ + return single_open(f, show_list, i->i_private); +} + +static const struct file_operations version_debugfs_fops = { + /* version info */ + .open = kim_version_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +static const struct file_operations list_debugfs_fops = { + /* protocols info */ + .open = kim_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/**********************************************************************/ +/* functions called from platform device driver subsystem + * need to have a relevant platform device entry in the platform's + * board-*.c file + */ + +static struct dentry *kim_debugfs_dir; +static int kim_probe(struct platform_device *pdev) +{ + struct kim_data_s *kim_gdata; + struct ti_st_plat_data *pdata = pdev->dev.platform_data; + int err; + + if ((pdev->id != -1) && (pdev->id < MAX_ST_DEVICES)) { + /* multiple devices could exist */ + st_kim_devices[pdev->id] = pdev; + } else { + /* platform's sure about existence of 1 device */ + st_kim_devices[0] = pdev; + } + + kim_gdata = kzalloc(sizeof(struct kim_data_s), GFP_KERNEL); + if (!kim_gdata) { + pr_err("no mem to allocate"); + return -ENOMEM; + } + platform_set_drvdata(pdev, kim_gdata); + + err = st_core_init(&kim_gdata->core_data); + if (err != 0) { + pr_err(" ST core init failed"); + err = -EIO; + goto err_core_init; + } + /* refer to itself */ + kim_gdata->core_data->kim_data = kim_gdata; + + /* Claim the chip enable nShutdown gpio from the system */ + kim_gdata->nshutdown = pdata->nshutdown_gpio; + err = gpio_request(kim_gdata->nshutdown, "kim"); + if (unlikely(err)) { + pr_err(" gpio %d request failed ", kim_gdata->nshutdown); + goto err_sysfs_group; + } + + /* Configure nShutdown GPIO as output=0 */ + err = gpio_direction_output(kim_gdata->nshutdown, 0); + if (unlikely(err)) { + pr_err(" unable to configure gpio %d", kim_gdata->nshutdown); + goto err_sysfs_group; + } + /* get reference of pdev for request_firmware + */ + kim_gdata->kim_pdev = pdev; + init_completion(&kim_gdata->kim_rcvd); + init_completion(&kim_gdata->ldisc_installed); + + err = sysfs_create_group(&pdev->dev.kobj, &uim_attr_grp); + if (err) { + pr_err("failed to create sysfs entries"); + goto err_sysfs_group; + } + + /* copying platform data */ + strncpy(kim_gdata->dev_name, pdata->dev_name, UART_DEV_NAME_LEN); + kim_gdata->flow_cntrl = pdata->flow_cntrl; + kim_gdata->baud_rate = pdata->baud_rate; + pr_info("sysfs entries created\n"); + + kim_debugfs_dir = debugfs_create_dir("ti-st", NULL); + if (!kim_debugfs_dir) { + pr_err(" debugfs entries creation failed "); + return 0; + } + + debugfs_create_file("version", S_IRUGO, kim_debugfs_dir, + kim_gdata, &version_debugfs_fops); + debugfs_create_file("protocols", S_IRUGO, kim_debugfs_dir, + kim_gdata, &list_debugfs_fops); + return 0; + +err_sysfs_group: + st_core_exit(kim_gdata->core_data); + +err_core_init: + kfree(kim_gdata); + + return err; +} + +static int kim_remove(struct platform_device *pdev) +{ + /* free the GPIOs requested */ + struct ti_st_plat_data *pdata = pdev->dev.platform_data; + struct kim_data_s *kim_gdata; + + kim_gdata = platform_get_drvdata(pdev); + + /* Free the Bluetooth/FM/GPIO + * nShutdown gpio from the system + */ + gpio_free(pdata->nshutdown_gpio); + pr_info("nshutdown GPIO Freed"); + + debugfs_remove_recursive(kim_debugfs_dir); + sysfs_remove_group(&pdev->dev.kobj, &uim_attr_grp); + pr_info("sysfs entries removed"); + + kim_gdata->kim_pdev = NULL; + st_core_exit(kim_gdata->core_data); + + kfree(kim_gdata); + kim_gdata = NULL; + return 0; +} + +static int kim_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct ti_st_plat_data *pdata = pdev->dev.platform_data; + + if (pdata->suspend) + return pdata->suspend(pdev, state); + + return 0; +} + +static int kim_resume(struct platform_device *pdev) +{ + struct ti_st_plat_data *pdata = pdev->dev.platform_data; + + if (pdata->resume) + return pdata->resume(pdev); + + return 0; +} + +/**********************************************************************/ +/* entry point for ST KIM module, called in from ST Core */ +static struct platform_driver kim_platform_driver = { + .probe = kim_probe, + .remove = kim_remove, + .suspend = kim_suspend, + .resume = kim_resume, + .driver = { + .name = "kim", + }, +}; + +module_platform_driver(kim_platform_driver); + +MODULE_AUTHOR("Pavan Savoy "); +MODULE_DESCRIPTION("Shared Transport Driver for TI BT/FM/GPS combo chips "); +MODULE_LICENSE("GPL"); diff --git a/drivers/misc/ti-st/st_ll.c b/drivers/misc/ti-st/st_ll.c new file mode 100644 index 000000000..93b4d67cc --- /dev/null +++ b/drivers/misc/ti-st/st_ll.c @@ -0,0 +1,169 @@ +/* + * Shared Transport driver + * HCI-LL module responsible for TI proprietary HCI_LL protocol + * Copyright (C) 2009-2010 Texas Instruments + * Author: Pavan Savoy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#define pr_fmt(fmt) "(stll) :" fmt +#include +#include +#include +#include + +/**********************************************************************/ +/* internal functions */ +static void send_ll_cmd(struct st_data_s *st_data, + unsigned char cmd) +{ + + pr_debug("%s: writing %x", __func__, cmd); + st_int_write(st_data, &cmd, 1); + return; +} + +static void ll_device_want_to_sleep(struct st_data_s *st_data) +{ + struct kim_data_s *kim_data; + struct ti_st_plat_data *pdata; + + pr_debug("%s", __func__); + /* sanity check */ + if (st_data->ll_state != ST_LL_AWAKE) + pr_err("ERR hcill: ST_LL_GO_TO_SLEEP_IND" + "in state %ld", st_data->ll_state); + + send_ll_cmd(st_data, LL_SLEEP_ACK); + /* update state */ + st_data->ll_state = ST_LL_ASLEEP; + + /* communicate to platform about chip asleep */ + kim_data = st_data->kim_data; + pdata = kim_data->kim_pdev->dev.platform_data; + if (pdata->chip_asleep) + pdata->chip_asleep(NULL); +} + +static void ll_device_want_to_wakeup(struct st_data_s *st_data) +{ + struct kim_data_s *kim_data; + struct ti_st_plat_data *pdata; + + /* diff actions in diff states */ + switch (st_data->ll_state) { + case ST_LL_ASLEEP: + send_ll_cmd(st_data, LL_WAKE_UP_ACK); /* send wake_ack */ + break; + case ST_LL_ASLEEP_TO_AWAKE: + /* duplicate wake_ind */ + pr_err("duplicate wake_ind while waiting for Wake ack"); + break; + case ST_LL_AWAKE: + /* duplicate wake_ind */ + pr_err("duplicate wake_ind already AWAKE"); + break; + case ST_LL_AWAKE_TO_ASLEEP: + /* duplicate wake_ind */ + pr_err("duplicate wake_ind"); + break; + } + /* update state */ + st_data->ll_state = ST_LL_AWAKE; + + /* communicate to platform about chip wakeup */ + kim_data = st_data->kim_data; + pdata = kim_data->kim_pdev->dev.platform_data; + if (pdata->chip_awake) + pdata->chip_awake(NULL); +} + +/**********************************************************************/ +/* functions invoked by ST Core */ + +/* called when ST Core wants to + * enable ST LL */ +void st_ll_enable(struct st_data_s *ll) +{ + ll->ll_state = ST_LL_AWAKE; +} + +/* called when ST Core /local module wants to + * disable ST LL */ +void st_ll_disable(struct st_data_s *ll) +{ + ll->ll_state = ST_LL_INVALID; +} + +/* called when ST Core wants to update the state */ +void st_ll_wakeup(struct st_data_s *ll) +{ + if (likely(ll->ll_state != ST_LL_AWAKE)) { + send_ll_cmd(ll, LL_WAKE_UP_IND); /* WAKE_IND */ + ll->ll_state = ST_LL_ASLEEP_TO_AWAKE; + } else { + /* don't send the duplicate wake_indication */ + pr_err(" Chip already AWAKE "); + } +} + +/* called when ST Core wants the state */ +unsigned long st_ll_getstate(struct st_data_s *ll) +{ + pr_debug(" returning state %ld", ll->ll_state); + return ll->ll_state; +} + +/* called from ST Core, when a PM related packet arrives */ +unsigned long st_ll_sleep_state(struct st_data_s *st_data, + unsigned char cmd) +{ + switch (cmd) { + case LL_SLEEP_IND: /* sleep ind */ + pr_debug("sleep indication recvd"); + ll_device_want_to_sleep(st_data); + break; + case LL_SLEEP_ACK: /* sleep ack */ + pr_err("sleep ack rcvd: host shouldn't"); + break; + case LL_WAKE_UP_IND: /* wake ind */ + pr_debug("wake indication recvd"); + ll_device_want_to_wakeup(st_data); + break; + case LL_WAKE_UP_ACK: /* wake ack */ + pr_debug("wake ack rcvd"); + st_data->ll_state = ST_LL_AWAKE; + break; + default: + pr_err(" unknown input/state "); + return -EINVAL; + } + return 0; +} + +/* Called from ST CORE to initialize ST LL */ +long st_ll_init(struct st_data_s *ll) +{ + /* set state to invalid */ + ll->ll_state = ST_LL_INVALID; + return 0; +} + +/* Called from ST CORE to de-initialize ST LL */ +long st_ll_deinit(struct st_data_s *ll) +{ + return 0; +} diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c new file mode 100644 index 000000000..9ac95b48e --- /dev/null +++ b/drivers/misc/tifm_7xx1.c @@ -0,0 +1,446 @@ +/* + * tifm_7xx1.c - TI FlashMedia driver + * + * Copyright (C) 2006 Alex Dubov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include + +#define DRIVER_NAME "tifm_7xx1" +#define DRIVER_VERSION "0.8" + +#define TIFM_IRQ_ENABLE 0x80000000 +#define TIFM_IRQ_SOCKMASK(x) (x) +#define TIFM_IRQ_CARDMASK(x) ((x) << 8) +#define TIFM_IRQ_FIFOMASK(x) ((x) << 16) +#define TIFM_IRQ_SETALL 0xffffffff + +static void tifm_7xx1_dummy_eject(struct tifm_adapter *fm, + struct tifm_dev *sock) +{ +} + +static void tifm_7xx1_eject(struct tifm_adapter *fm, struct tifm_dev *sock) +{ + unsigned long flags; + + spin_lock_irqsave(&fm->lock, flags); + fm->socket_change_set |= 1 << sock->socket_id; + tifm_queue_work(&fm->media_switcher); + spin_unlock_irqrestore(&fm->lock, flags); +} + +static irqreturn_t tifm_7xx1_isr(int irq, void *dev_id) +{ + struct tifm_adapter *fm = dev_id; + struct tifm_dev *sock; + unsigned int irq_status, cnt; + + spin_lock(&fm->lock); + irq_status = readl(fm->addr + FM_INTERRUPT_STATUS); + if (irq_status == 0 || irq_status == (~0)) { + spin_unlock(&fm->lock); + return IRQ_NONE; + } + + if (irq_status & TIFM_IRQ_ENABLE) { + writel(TIFM_IRQ_ENABLE, fm->addr + FM_CLEAR_INTERRUPT_ENABLE); + + for (cnt = 0; cnt < fm->num_sockets; cnt++) { + sock = fm->sockets[cnt]; + if (sock) { + if ((irq_status >> cnt) & TIFM_IRQ_FIFOMASK(1)) + sock->data_event(sock); + if ((irq_status >> cnt) & TIFM_IRQ_CARDMASK(1)) + sock->card_event(sock); + } + } + + fm->socket_change_set |= irq_status + & ((1 << fm->num_sockets) - 1); + } + writel(irq_status, fm->addr + FM_INTERRUPT_STATUS); + + if (fm->finish_me) + complete_all(fm->finish_me); + else if (!fm->socket_change_set) + writel(TIFM_IRQ_ENABLE, fm->addr + FM_SET_INTERRUPT_ENABLE); + else + tifm_queue_work(&fm->media_switcher); + + spin_unlock(&fm->lock); + return IRQ_HANDLED; +} + +static unsigned char tifm_7xx1_toggle_sock_power(char __iomem *sock_addr) +{ + unsigned int s_state; + int cnt; + + writel(0x0e00, sock_addr + SOCK_CONTROL); + + for (cnt = 16; cnt <= 256; cnt <<= 1) { + if (!(TIFM_SOCK_STATE_POWERED + & readl(sock_addr + SOCK_PRESENT_STATE))) + break; + + msleep(cnt); + } + + s_state = readl(sock_addr + SOCK_PRESENT_STATE); + if (!(TIFM_SOCK_STATE_OCCUPIED & s_state)) + return 0; + + writel(readl(sock_addr + SOCK_CONTROL) | TIFM_CTRL_LED, + sock_addr + SOCK_CONTROL); + + /* xd needs some extra time before power on */ + if (((readl(sock_addr + SOCK_PRESENT_STATE) >> 4) & 7) + == TIFM_TYPE_XD) + msleep(40); + + writel((s_state & TIFM_CTRL_POWER_MASK) | 0x0c00, + sock_addr + SOCK_CONTROL); + /* wait for power to stabilize */ + msleep(20); + for (cnt = 16; cnt <= 256; cnt <<= 1) { + if ((TIFM_SOCK_STATE_POWERED + & readl(sock_addr + SOCK_PRESENT_STATE))) + break; + + msleep(cnt); + } + + writel(readl(sock_addr + SOCK_CONTROL) & (~TIFM_CTRL_LED), + sock_addr + SOCK_CONTROL); + + return (readl(sock_addr + SOCK_PRESENT_STATE) >> 4) & 7; +} + +inline static void tifm_7xx1_sock_power_off(char __iomem *sock_addr) +{ + writel((~TIFM_CTRL_POWER_MASK) & readl(sock_addr + SOCK_CONTROL), + sock_addr + SOCK_CONTROL); +} + +inline static char __iomem * +tifm_7xx1_sock_addr(char __iomem *base_addr, unsigned int sock_num) +{ + return base_addr + ((sock_num + 1) << 10); +} + +static void tifm_7xx1_switch_media(struct work_struct *work) +{ + struct tifm_adapter *fm = container_of(work, struct tifm_adapter, + media_switcher); + struct tifm_dev *sock; + char __iomem *sock_addr; + unsigned long flags; + unsigned char media_id; + unsigned int socket_change_set, cnt; + + spin_lock_irqsave(&fm->lock, flags); + socket_change_set = fm->socket_change_set; + fm->socket_change_set = 0; + + dev_dbg(fm->dev.parent, "checking media set %x\n", + socket_change_set); + + if (!socket_change_set) { + spin_unlock_irqrestore(&fm->lock, flags); + return; + } + + for (cnt = 0; cnt < fm->num_sockets; cnt++) { + if (!(socket_change_set & (1 << cnt))) + continue; + sock = fm->sockets[cnt]; + if (sock) { + printk(KERN_INFO + "%s : demand removing card from socket %u:%u\n", + dev_name(&fm->dev), fm->id, cnt); + fm->sockets[cnt] = NULL; + sock_addr = sock->addr; + spin_unlock_irqrestore(&fm->lock, flags); + device_unregister(&sock->dev); + spin_lock_irqsave(&fm->lock, flags); + tifm_7xx1_sock_power_off(sock_addr); + writel(0x0e00, sock_addr + SOCK_CONTROL); + } + + spin_unlock_irqrestore(&fm->lock, flags); + + media_id = tifm_7xx1_toggle_sock_power( + tifm_7xx1_sock_addr(fm->addr, cnt)); + + // tifm_alloc_device will check if media_id is valid + sock = tifm_alloc_device(fm, cnt, media_id); + if (sock) { + sock->addr = tifm_7xx1_sock_addr(fm->addr, cnt); + + if (!device_register(&sock->dev)) { + spin_lock_irqsave(&fm->lock, flags); + if (!fm->sockets[cnt]) { + fm->sockets[cnt] = sock; + sock = NULL; + } + spin_unlock_irqrestore(&fm->lock, flags); + } + if (sock) + tifm_free_device(&sock->dev); + } + spin_lock_irqsave(&fm->lock, flags); + } + + writel(TIFM_IRQ_FIFOMASK(socket_change_set) + | TIFM_IRQ_CARDMASK(socket_change_set), + fm->addr + FM_CLEAR_INTERRUPT_ENABLE); + + writel(TIFM_IRQ_FIFOMASK(socket_change_set) + | TIFM_IRQ_CARDMASK(socket_change_set), + fm->addr + FM_SET_INTERRUPT_ENABLE); + + writel(TIFM_IRQ_ENABLE, fm->addr + FM_SET_INTERRUPT_ENABLE); + spin_unlock_irqrestore(&fm->lock, flags); +} + +#ifdef CONFIG_PM + +static int tifm_7xx1_suspend(struct pci_dev *dev, pm_message_t state) +{ + struct tifm_adapter *fm = pci_get_drvdata(dev); + int cnt; + + dev_dbg(&dev->dev, "suspending host\n"); + + for (cnt = 0; cnt < fm->num_sockets; cnt++) { + if (fm->sockets[cnt]) + tifm_7xx1_sock_power_off(fm->sockets[cnt]->addr); + } + + pci_save_state(dev); + pci_enable_wake(dev, pci_choose_state(dev, state), 0); + pci_disable_device(dev); + pci_set_power_state(dev, pci_choose_state(dev, state)); + return 0; +} + +static int tifm_7xx1_resume(struct pci_dev *dev) +{ + struct tifm_adapter *fm = pci_get_drvdata(dev); + int rc; + unsigned long timeout; + unsigned int good_sockets = 0, bad_sockets = 0; + unsigned long flags; + /* Maximum number of entries is 4 */ + unsigned char new_ids[4]; + DECLARE_COMPLETION_ONSTACK(finish_resume); + + if (WARN_ON(fm->num_sockets > ARRAY_SIZE(new_ids))) + return -ENXIO; + + pci_set_power_state(dev, PCI_D0); + pci_restore_state(dev); + rc = pci_enable_device(dev); + if (rc) + return rc; + pci_set_master(dev); + + dev_dbg(&dev->dev, "resuming host\n"); + + for (rc = 0; rc < fm->num_sockets; rc++) + new_ids[rc] = tifm_7xx1_toggle_sock_power( + tifm_7xx1_sock_addr(fm->addr, rc)); + spin_lock_irqsave(&fm->lock, flags); + for (rc = 0; rc < fm->num_sockets; rc++) { + if (fm->sockets[rc]) { + if (fm->sockets[rc]->type == new_ids[rc]) + good_sockets |= 1 << rc; + else + bad_sockets |= 1 << rc; + } + } + + writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1), + fm->addr + FM_SET_INTERRUPT_ENABLE); + dev_dbg(&dev->dev, "change sets on resume: good %x, bad %x\n", + good_sockets, bad_sockets); + + fm->socket_change_set = 0; + if (good_sockets) { + fm->finish_me = &finish_resume; + spin_unlock_irqrestore(&fm->lock, flags); + timeout = wait_for_completion_timeout(&finish_resume, HZ); + dev_dbg(&dev->dev, "wait returned %lu\n", timeout); + writel(TIFM_IRQ_FIFOMASK(good_sockets) + | TIFM_IRQ_CARDMASK(good_sockets), + fm->addr + FM_CLEAR_INTERRUPT_ENABLE); + writel(TIFM_IRQ_FIFOMASK(good_sockets) + | TIFM_IRQ_CARDMASK(good_sockets), + fm->addr + FM_SET_INTERRUPT_ENABLE); + spin_lock_irqsave(&fm->lock, flags); + fm->finish_me = NULL; + fm->socket_change_set ^= good_sockets & fm->socket_change_set; + } + + fm->socket_change_set |= bad_sockets; + if (fm->socket_change_set) + tifm_queue_work(&fm->media_switcher); + + spin_unlock_irqrestore(&fm->lock, flags); + writel(TIFM_IRQ_ENABLE, + fm->addr + FM_SET_INTERRUPT_ENABLE); + + return 0; +} + +#else + +#define tifm_7xx1_suspend NULL +#define tifm_7xx1_resume NULL + +#endif /* CONFIG_PM */ + +static int tifm_7xx1_dummy_has_ms_pif(struct tifm_adapter *fm, + struct tifm_dev *sock) +{ + return 0; +} + +static int tifm_7xx1_has_ms_pif(struct tifm_adapter *fm, struct tifm_dev *sock) +{ + if (((fm->num_sockets == 4) && (sock->socket_id == 2)) + || ((fm->num_sockets == 2) && (sock->socket_id == 0))) + return 1; + + return 0; +} + +static int tifm_7xx1_probe(struct pci_dev *dev, + const struct pci_device_id *dev_id) +{ + struct tifm_adapter *fm; + int pci_dev_busy = 0; + int rc; + + rc = pci_set_dma_mask(dev, DMA_BIT_MASK(32)); + if (rc) + return rc; + + rc = pci_enable_device(dev); + if (rc) + return rc; + + pci_set_master(dev); + + rc = pci_request_regions(dev, DRIVER_NAME); + if (rc) { + pci_dev_busy = 1; + goto err_out; + } + + pci_intx(dev, 1); + + fm = tifm_alloc_adapter(dev->device == PCI_DEVICE_ID_TI_XX21_XX11_FM + ? 4 : 2, &dev->dev); + if (!fm) { + rc = -ENOMEM; + goto err_out_int; + } + + INIT_WORK(&fm->media_switcher, tifm_7xx1_switch_media); + fm->eject = tifm_7xx1_eject; + fm->has_ms_pif = tifm_7xx1_has_ms_pif; + pci_set_drvdata(dev, fm); + + fm->addr = pci_ioremap_bar(dev, 0); + if (!fm->addr) { + rc = -ENODEV; + goto err_out_free; + } + + rc = request_irq(dev->irq, tifm_7xx1_isr, IRQF_SHARED, DRIVER_NAME, fm); + if (rc) + goto err_out_unmap; + + rc = tifm_add_adapter(fm); + if (rc) + goto err_out_irq; + + writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1), + fm->addr + FM_CLEAR_INTERRUPT_ENABLE); + writel(TIFM_IRQ_ENABLE | TIFM_IRQ_SOCKMASK((1 << fm->num_sockets) - 1), + fm->addr + FM_SET_INTERRUPT_ENABLE); + return 0; + +err_out_irq: + free_irq(dev->irq, fm); +err_out_unmap: + iounmap(fm->addr); +err_out_free: + tifm_free_adapter(fm); +err_out_int: + pci_intx(dev, 0); + pci_release_regions(dev); +err_out: + if (!pci_dev_busy) + pci_disable_device(dev); + return rc; +} + +static void tifm_7xx1_remove(struct pci_dev *dev) +{ + struct tifm_adapter *fm = pci_get_drvdata(dev); + int cnt; + + fm->eject = tifm_7xx1_dummy_eject; + fm->has_ms_pif = tifm_7xx1_dummy_has_ms_pif; + writel(TIFM_IRQ_SETALL, fm->addr + FM_CLEAR_INTERRUPT_ENABLE); + mmiowb(); + free_irq(dev->irq, fm); + + tifm_remove_adapter(fm); + + for (cnt = 0; cnt < fm->num_sockets; cnt++) + tifm_7xx1_sock_power_off(tifm_7xx1_sock_addr(fm->addr, cnt)); + + iounmap(fm->addr); + pci_intx(dev, 0); + pci_release_regions(dev); + + pci_disable_device(dev); + tifm_free_adapter(fm); +} + +static const struct pci_device_id tifm_7xx1_pci_tbl[] = { + { PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX21_XX11_FM, PCI_ANY_ID, + PCI_ANY_ID, 0, 0, 0 }, /* xx21 - the one I have */ + { PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX12_FM, PCI_ANY_ID, + PCI_ANY_ID, 0, 0, 0 }, + { PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_XX20_FM, PCI_ANY_ID, + PCI_ANY_ID, 0, 0, 0 }, + { } +}; + +static struct pci_driver tifm_7xx1_driver = { + .name = DRIVER_NAME, + .id_table = tifm_7xx1_pci_tbl, + .probe = tifm_7xx1_probe, + .remove = tifm_7xx1_remove, + .suspend = tifm_7xx1_suspend, + .resume = tifm_7xx1_resume, +}; + +module_pci_driver(tifm_7xx1_driver); +MODULE_AUTHOR("Alex Dubov"); +MODULE_DESCRIPTION("TI FlashMedia host driver"); +MODULE_LICENSE("GPL"); +MODULE_DEVICE_TABLE(pci, tifm_7xx1_pci_tbl); +MODULE_VERSION(DRIVER_VERSION); diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c new file mode 100644 index 000000000..a511b2a71 --- /dev/null +++ b/drivers/misc/tifm_core.c @@ -0,0 +1,371 @@ +/* + * tifm_core.c - TI FlashMedia driver + * + * Copyright (C) 2006 Alex Dubov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include + +#define DRIVER_NAME "tifm_core" +#define DRIVER_VERSION "0.8" + +static struct workqueue_struct *workqueue; +static DEFINE_IDR(tifm_adapter_idr); +static DEFINE_SPINLOCK(tifm_adapter_lock); + +static const char *tifm_media_type_name(unsigned char type, unsigned char nt) +{ + const char *card_type_name[3][3] = { + { "SmartMedia/xD", "MemoryStick", "MMC/SD" }, + { "XD", "MS", "SD"}, + { "xd", "ms", "sd"} + }; + + if (nt > 2 || type < 1 || type > 3) + return NULL; + return card_type_name[nt][type - 1]; +} + +static int tifm_dev_match(struct tifm_dev *sock, struct tifm_device_id *id) +{ + if (sock->type == id->type) + return 1; + return 0; +} + +static int tifm_bus_match(struct device *dev, struct device_driver *drv) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + struct tifm_driver *fm_drv = container_of(drv, struct tifm_driver, + driver); + struct tifm_device_id *ids = fm_drv->id_table; + + if (ids) { + while (ids->type) { + if (tifm_dev_match(sock, ids)) + return 1; + ++ids; + } + } + return 0; +} + +static int tifm_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + + if (add_uevent_var(env, "TIFM_CARD_TYPE=%s", tifm_media_type_name(sock->type, 1))) + return -ENOMEM; + + return 0; +} + +static int tifm_device_probe(struct device *dev) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver, + driver); + int rc = -ENODEV; + + get_device(dev); + if (dev->driver && drv->probe) { + rc = drv->probe(sock); + if (!rc) + return 0; + } + put_device(dev); + return rc; +} + +static void tifm_dummy_event(struct tifm_dev *sock) +{ + return; +} + +static int tifm_device_remove(struct device *dev) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver, + driver); + + if (dev->driver && drv->remove) { + sock->card_event = tifm_dummy_event; + sock->data_event = tifm_dummy_event; + drv->remove(sock); + sock->dev.driver = NULL; + } + + put_device(dev); + return 0; +} + +#ifdef CONFIG_PM + +static int tifm_device_suspend(struct device *dev, pm_message_t state) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver, + driver); + + if (dev->driver && drv->suspend) + return drv->suspend(sock, state); + return 0; +} + +static int tifm_device_resume(struct device *dev) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + struct tifm_driver *drv = container_of(dev->driver, struct tifm_driver, + driver); + + if (dev->driver && drv->resume) + return drv->resume(sock); + return 0; +} + +#else + +#define tifm_device_suspend NULL +#define tifm_device_resume NULL + +#endif /* CONFIG_PM */ + +static ssize_t type_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + return sprintf(buf, "%x", sock->type); +} +static DEVICE_ATTR_RO(type); + +static struct attribute *tifm_dev_attrs[] = { + &dev_attr_type.attr, + NULL, +}; +ATTRIBUTE_GROUPS(tifm_dev); + +static struct bus_type tifm_bus_type = { + .name = "tifm", + .dev_groups = tifm_dev_groups, + .match = tifm_bus_match, + .uevent = tifm_uevent, + .probe = tifm_device_probe, + .remove = tifm_device_remove, + .suspend = tifm_device_suspend, + .resume = tifm_device_resume +}; + +static void tifm_free(struct device *dev) +{ + struct tifm_adapter *fm = container_of(dev, struct tifm_adapter, dev); + + kfree(fm); +} + +static struct class tifm_adapter_class = { + .name = "tifm_adapter", + .dev_release = tifm_free +}; + +struct tifm_adapter *tifm_alloc_adapter(unsigned int num_sockets, + struct device *dev) +{ + struct tifm_adapter *fm; + + fm = kzalloc(sizeof(struct tifm_adapter) + + sizeof(struct tifm_dev*) * num_sockets, GFP_KERNEL); + if (fm) { + fm->dev.class = &tifm_adapter_class; + fm->dev.parent = dev; + device_initialize(&fm->dev); + spin_lock_init(&fm->lock); + fm->num_sockets = num_sockets; + } + return fm; +} +EXPORT_SYMBOL(tifm_alloc_adapter); + +int tifm_add_adapter(struct tifm_adapter *fm) +{ + int rc; + + idr_preload(GFP_KERNEL); + spin_lock(&tifm_adapter_lock); + rc = idr_alloc(&tifm_adapter_idr, fm, 0, 0, GFP_NOWAIT); + if (rc >= 0) + fm->id = rc; + spin_unlock(&tifm_adapter_lock); + idr_preload_end(); + if (rc < 0) + return rc; + + dev_set_name(&fm->dev, "tifm%u", fm->id); + rc = device_add(&fm->dev); + if (rc) { + spin_lock(&tifm_adapter_lock); + idr_remove(&tifm_adapter_idr, fm->id); + spin_unlock(&tifm_adapter_lock); + } + + return rc; +} +EXPORT_SYMBOL(tifm_add_adapter); + +void tifm_remove_adapter(struct tifm_adapter *fm) +{ + unsigned int cnt; + + flush_workqueue(workqueue); + for (cnt = 0; cnt < fm->num_sockets; ++cnt) { + if (fm->sockets[cnt]) + device_unregister(&fm->sockets[cnt]->dev); + } + + spin_lock(&tifm_adapter_lock); + idr_remove(&tifm_adapter_idr, fm->id); + spin_unlock(&tifm_adapter_lock); + device_del(&fm->dev); +} +EXPORT_SYMBOL(tifm_remove_adapter); + +void tifm_free_adapter(struct tifm_adapter *fm) +{ + put_device(&fm->dev); +} +EXPORT_SYMBOL(tifm_free_adapter); + +void tifm_free_device(struct device *dev) +{ + struct tifm_dev *sock = container_of(dev, struct tifm_dev, dev); + kfree(sock); +} +EXPORT_SYMBOL(tifm_free_device); + +struct tifm_dev *tifm_alloc_device(struct tifm_adapter *fm, unsigned int id, + unsigned char type) +{ + struct tifm_dev *sock = NULL; + + if (!tifm_media_type_name(type, 0)) + return sock; + + sock = kzalloc(sizeof(struct tifm_dev), GFP_KERNEL); + if (sock) { + spin_lock_init(&sock->lock); + sock->type = type; + sock->socket_id = id; + sock->card_event = tifm_dummy_event; + sock->data_event = tifm_dummy_event; + + sock->dev.parent = fm->dev.parent; + sock->dev.bus = &tifm_bus_type; + sock->dev.dma_mask = fm->dev.parent->dma_mask; + sock->dev.release = tifm_free_device; + + dev_set_name(&sock->dev, "tifm_%s%u:%u", + tifm_media_type_name(type, 2), fm->id, id); + printk(KERN_INFO DRIVER_NAME + ": %s card detected in socket %u:%u\n", + tifm_media_type_name(type, 0), fm->id, id); + } + return sock; +} +EXPORT_SYMBOL(tifm_alloc_device); + +void tifm_eject(struct tifm_dev *sock) +{ + struct tifm_adapter *fm = dev_get_drvdata(sock->dev.parent); + fm->eject(fm, sock); +} +EXPORT_SYMBOL(tifm_eject); + +int tifm_has_ms_pif(struct tifm_dev *sock) +{ + struct tifm_adapter *fm = dev_get_drvdata(sock->dev.parent); + return fm->has_ms_pif(fm, sock); +} +EXPORT_SYMBOL(tifm_has_ms_pif); + +int tifm_map_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents, + int direction) +{ + return pci_map_sg(to_pci_dev(sock->dev.parent), sg, nents, direction); +} +EXPORT_SYMBOL(tifm_map_sg); + +void tifm_unmap_sg(struct tifm_dev *sock, struct scatterlist *sg, int nents, + int direction) +{ + pci_unmap_sg(to_pci_dev(sock->dev.parent), sg, nents, direction); +} +EXPORT_SYMBOL(tifm_unmap_sg); + +void tifm_queue_work(struct work_struct *work) +{ + queue_work(workqueue, work); +} +EXPORT_SYMBOL(tifm_queue_work); + +int tifm_register_driver(struct tifm_driver *drv) +{ + drv->driver.bus = &tifm_bus_type; + + return driver_register(&drv->driver); +} +EXPORT_SYMBOL(tifm_register_driver); + +void tifm_unregister_driver(struct tifm_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL(tifm_unregister_driver); + +static int __init tifm_init(void) +{ + int rc; + + workqueue = create_freezable_workqueue("tifm"); + if (!workqueue) + return -ENOMEM; + + rc = bus_register(&tifm_bus_type); + + if (rc) + goto err_out_wq; + + rc = class_register(&tifm_adapter_class); + if (!rc) + return 0; + + bus_unregister(&tifm_bus_type); + +err_out_wq: + destroy_workqueue(workqueue); + + return rc; +} + +static void __exit tifm_exit(void) +{ + class_unregister(&tifm_adapter_class); + bus_unregister(&tifm_bus_type); + destroy_workqueue(workqueue); +} + +subsys_initcall(tifm_init); +module_exit(tifm_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Alex Dubov"); +MODULE_DESCRIPTION("TI FlashMedia core driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRIVER_VERSION); diff --git a/drivers/misc/tsl2550.c b/drivers/misc/tsl2550.c new file mode 100644 index 000000000..3fce3b6a3 --- /dev/null +++ b/drivers/misc/tsl2550.c @@ -0,0 +1,468 @@ +/* + * tsl2550.c - Linux kernel modules for ambient light sensor + * + * Copyright (C) 2007 Rodolfo Giometti + * Copyright (C) 2007 Eurotech S.p.A. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include + +#define TSL2550_DRV_NAME "tsl2550" +#define DRIVER_VERSION "1.2" + +/* + * Defines + */ + +#define TSL2550_POWER_DOWN 0x00 +#define TSL2550_POWER_UP 0x03 +#define TSL2550_STANDARD_RANGE 0x18 +#define TSL2550_EXTENDED_RANGE 0x1d +#define TSL2550_READ_ADC0 0x43 +#define TSL2550_READ_ADC1 0x83 + +/* + * Structs + */ + +struct tsl2550_data { + struct i2c_client *client; + struct mutex update_lock; + + unsigned int power_state:1; + unsigned int operating_mode:1; +}; + +/* + * Global data + */ + +static const u8 TSL2550_MODE_RANGE[2] = { + TSL2550_STANDARD_RANGE, TSL2550_EXTENDED_RANGE, +}; + +/* + * Management functions + */ + +static int tsl2550_set_operating_mode(struct i2c_client *client, int mode) +{ + struct tsl2550_data *data = i2c_get_clientdata(client); + + int ret = i2c_smbus_write_byte(client, TSL2550_MODE_RANGE[mode]); + + data->operating_mode = mode; + + return ret; +} + +static int tsl2550_set_power_state(struct i2c_client *client, int state) +{ + struct tsl2550_data *data = i2c_get_clientdata(client); + int ret; + + if (state == 0) + ret = i2c_smbus_write_byte(client, TSL2550_POWER_DOWN); + else { + ret = i2c_smbus_write_byte(client, TSL2550_POWER_UP); + + /* On power up we should reset operating mode also... */ + tsl2550_set_operating_mode(client, data->operating_mode); + } + + data->power_state = state; + + return ret; +} + +static int tsl2550_get_adc_value(struct i2c_client *client, u8 cmd) +{ + int ret; + + ret = i2c_smbus_read_byte_data(client, cmd); + if (ret < 0) + return ret; + if (!(ret & 0x80)) + return -EAGAIN; + return ret & 0x7f; /* remove the "valid" bit */ +} + +/* + * LUX calculation + */ + +#define TSL2550_MAX_LUX 1846 + +static const u8 ratio_lut[] = { + 100, 100, 100, 100, 100, 100, 100, 100, + 100, 100, 100, 100, 100, 100, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 98, 98, 98, 98, 98, + 98, 98, 97, 97, 97, 97, 97, 96, + 96, 96, 96, 95, 95, 95, 94, 94, + 93, 93, 93, 92, 92, 91, 91, 90, + 89, 89, 88, 87, 87, 86, 85, 84, + 83, 82, 81, 80, 79, 78, 77, 75, + 74, 73, 71, 69, 68, 66, 64, 62, + 60, 58, 56, 54, 52, 49, 47, 44, + 42, 41, 40, 40, 39, 39, 38, 38, + 37, 37, 37, 36, 36, 36, 35, 35, + 35, 35, 34, 34, 34, 34, 33, 33, + 33, 33, 32, 32, 32, 32, 32, 31, + 31, 31, 31, 31, 30, 30, 30, 30, + 30, +}; + +static const u16 count_lut[] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 26, 28, 30, + 32, 34, 36, 38, 40, 42, 44, 46, + 49, 53, 57, 61, 65, 69, 73, 77, + 81, 85, 89, 93, 97, 101, 105, 109, + 115, 123, 131, 139, 147, 155, 163, 171, + 179, 187, 195, 203, 211, 219, 227, 235, + 247, 263, 279, 295, 311, 327, 343, 359, + 375, 391, 407, 423, 439, 455, 471, 487, + 511, 543, 575, 607, 639, 671, 703, 735, + 767, 799, 831, 863, 895, 927, 959, 991, + 1039, 1103, 1167, 1231, 1295, 1359, 1423, 1487, + 1551, 1615, 1679, 1743, 1807, 1871, 1935, 1999, + 2095, 2223, 2351, 2479, 2607, 2735, 2863, 2991, + 3119, 3247, 3375, 3503, 3631, 3759, 3887, 4015, +}; + +/* + * This function is described into Taos TSL2550 Designer's Notebook + * pages 2, 3. + */ +static int tsl2550_calculate_lux(u8 ch0, u8 ch1) +{ + unsigned int lux; + + /* Look up count from channel values */ + u16 c0 = count_lut[ch0]; + u16 c1 = count_lut[ch1]; + + /* + * Calculate ratio. + * Note: the "128" is a scaling factor + */ + u8 r = 128; + + /* Avoid division by 0 and count 1 cannot be greater than count 0 */ + if (c1 <= c0) + if (c0) { + r = c1 * 128 / c0; + + /* Calculate LUX */ + lux = ((c0 - c1) * ratio_lut[r]) / 256; + } else + lux = 0; + else + return 0; + + /* LUX range check */ + return lux > TSL2550_MAX_LUX ? TSL2550_MAX_LUX : lux; +} + +/* + * SysFS support + */ + +static ssize_t tsl2550_show_power_state(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct tsl2550_data *data = i2c_get_clientdata(to_i2c_client(dev)); + + return sprintf(buf, "%u\n", data->power_state); +} + +static ssize_t tsl2550_store_power_state(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct tsl2550_data *data = i2c_get_clientdata(client); + unsigned long val = simple_strtoul(buf, NULL, 10); + int ret; + + if (val > 1) + return -EINVAL; + + mutex_lock(&data->update_lock); + ret = tsl2550_set_power_state(client, val); + mutex_unlock(&data->update_lock); + + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO, + tsl2550_show_power_state, tsl2550_store_power_state); + +static ssize_t tsl2550_show_operating_mode(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct tsl2550_data *data = i2c_get_clientdata(to_i2c_client(dev)); + + return sprintf(buf, "%u\n", data->operating_mode); +} + +static ssize_t tsl2550_store_operating_mode(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct tsl2550_data *data = i2c_get_clientdata(client); + unsigned long val = simple_strtoul(buf, NULL, 10); + int ret; + + if (val > 1) + return -EINVAL; + + if (data->power_state == 0) + return -EBUSY; + + mutex_lock(&data->update_lock); + ret = tsl2550_set_operating_mode(client, val); + mutex_unlock(&data->update_lock); + + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(operating_mode, S_IWUSR | S_IRUGO, + tsl2550_show_operating_mode, tsl2550_store_operating_mode); + +static ssize_t __tsl2550_show_lux(struct i2c_client *client, char *buf) +{ + struct tsl2550_data *data = i2c_get_clientdata(client); + u8 ch0, ch1; + int ret; + + ret = tsl2550_get_adc_value(client, TSL2550_READ_ADC0); + if (ret < 0) + return ret; + ch0 = ret; + + ret = tsl2550_get_adc_value(client, TSL2550_READ_ADC1); + if (ret < 0) + return ret; + ch1 = ret; + + /* Do the job */ + ret = tsl2550_calculate_lux(ch0, ch1); + if (ret < 0) + return ret; + if (data->operating_mode == 1) + ret *= 5; + + return sprintf(buf, "%d\n", ret); +} + +static ssize_t tsl2550_show_lux1_input(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct tsl2550_data *data = i2c_get_clientdata(client); + int ret; + + /* No LUX data if not operational */ + if (!data->power_state) + return -EBUSY; + + mutex_lock(&data->update_lock); + ret = __tsl2550_show_lux(client, buf); + mutex_unlock(&data->update_lock); + + return ret; +} + +static DEVICE_ATTR(lux1_input, S_IRUGO, + tsl2550_show_lux1_input, NULL); + +static struct attribute *tsl2550_attributes[] = { + &dev_attr_power_state.attr, + &dev_attr_operating_mode.attr, + &dev_attr_lux1_input.attr, + NULL +}; + +static const struct attribute_group tsl2550_attr_group = { + .attrs = tsl2550_attributes, +}; + +/* + * Initialization function + */ + +static int tsl2550_init_client(struct i2c_client *client) +{ + struct tsl2550_data *data = i2c_get_clientdata(client); + int err; + + /* + * Probe the chip. To do so we try to power up the device and then to + * read back the 0x03 code + */ + err = i2c_smbus_read_byte_data(client, TSL2550_POWER_UP); + if (err < 0) + return err; + if (err != TSL2550_POWER_UP) + return -ENODEV; + data->power_state = 1; + + /* Set the default operating mode */ + err = i2c_smbus_write_byte(client, + TSL2550_MODE_RANGE[data->operating_mode]); + if (err < 0) + return err; + + return 0; +} + +/* + * I2C init/probing/exit functions + */ + +static struct i2c_driver tsl2550_driver; +static int tsl2550_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + struct tsl2550_data *data; + int *opmode, err = 0; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WRITE_BYTE + | I2C_FUNC_SMBUS_READ_BYTE_DATA)) { + err = -EIO; + goto exit; + } + + data = kzalloc(sizeof(struct tsl2550_data), GFP_KERNEL); + if (!data) { + err = -ENOMEM; + goto exit; + } + data->client = client; + i2c_set_clientdata(client, data); + + /* Check platform data */ + opmode = client->dev.platform_data; + if (opmode) { + if (*opmode < 0 || *opmode > 1) { + dev_err(&client->dev, "invalid operating_mode (%d)\n", + *opmode); + err = -EINVAL; + goto exit_kfree; + } + data->operating_mode = *opmode; + } else + data->operating_mode = 0; /* default mode is standard */ + dev_info(&client->dev, "%s operating mode\n", + data->operating_mode ? "extended" : "standard"); + + mutex_init(&data->update_lock); + + /* Initialize the TSL2550 chip */ + err = tsl2550_init_client(client); + if (err) + goto exit_kfree; + + /* Register sysfs hooks */ + err = sysfs_create_group(&client->dev.kobj, &tsl2550_attr_group); + if (err) + goto exit_kfree; + + dev_info(&client->dev, "support ver. %s enabled\n", DRIVER_VERSION); + + return 0; + +exit_kfree: + kfree(data); +exit: + return err; +} + +static int tsl2550_remove(struct i2c_client *client) +{ + sysfs_remove_group(&client->dev.kobj, &tsl2550_attr_group); + + /* Power down the device */ + tsl2550_set_power_state(client, 0); + + kfree(i2c_get_clientdata(client)); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP + +static int tsl2550_suspend(struct device *dev) +{ + return tsl2550_set_power_state(to_i2c_client(dev), 0); +} + +static int tsl2550_resume(struct device *dev) +{ + return tsl2550_set_power_state(to_i2c_client(dev), 1); +} + +static SIMPLE_DEV_PM_OPS(tsl2550_pm_ops, tsl2550_suspend, tsl2550_resume); +#define TSL2550_PM_OPS (&tsl2550_pm_ops) + +#else + +#define TSL2550_PM_OPS NULL + +#endif /* CONFIG_PM_SLEEP */ + +static const struct i2c_device_id tsl2550_id[] = { + { "tsl2550", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, tsl2550_id); + +static const struct of_device_id tsl2550_of_match[] = { + { .compatible = "taos,tsl2550" }, + { } +}; +MODULE_DEVICE_TABLE(of, tsl2550_of_match); + +static struct i2c_driver tsl2550_driver = { + .driver = { + .name = TSL2550_DRV_NAME, + .of_match_table = tsl2550_of_match, + .pm = TSL2550_PM_OPS, + }, + .probe = tsl2550_probe, + .remove = tsl2550_remove, + .id_table = tsl2550_id, +}; + +module_i2c_driver(tsl2550_driver); + +MODULE_AUTHOR("Rodolfo Giometti "); +MODULE_DESCRIPTION("TSL2550 ambient light sensor driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRIVER_VERSION); diff --git a/drivers/misc/vexpress-syscfg.c b/drivers/misc/vexpress-syscfg.c new file mode 100644 index 000000000..a3c6c773d --- /dev/null +++ b/drivers/misc/vexpress-syscfg.c @@ -0,0 +1,287 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2014 ARM Limited + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define SYS_CFGDATA 0x0 + +#define SYS_CFGCTRL 0x4 +#define SYS_CFGCTRL_START (1 << 31) +#define SYS_CFGCTRL_WRITE (1 << 30) +#define SYS_CFGCTRL_DCC(n) (((n) & 0xf) << 26) +#define SYS_CFGCTRL_FUNC(n) (((n) & 0x3f) << 20) +#define SYS_CFGCTRL_SITE(n) (((n) & 0x3) << 16) +#define SYS_CFGCTRL_POSITION(n) (((n) & 0xf) << 12) +#define SYS_CFGCTRL_DEVICE(n) (((n) & 0xfff) << 0) + +#define SYS_CFGSTAT 0x8 +#define SYS_CFGSTAT_ERR (1 << 1) +#define SYS_CFGSTAT_COMPLETE (1 << 0) + + +struct vexpress_syscfg { + struct device *dev; + void __iomem *base; + struct list_head funcs; +}; + +struct vexpress_syscfg_func { + struct list_head list; + struct vexpress_syscfg *syscfg; + struct regmap *regmap; + int num_templates; + u32 template[0]; /* Keep it last! */ +}; + + +static int vexpress_syscfg_exec(struct vexpress_syscfg_func *func, + int index, bool write, u32 *data) +{ + struct vexpress_syscfg *syscfg = func->syscfg; + u32 command, status; + int tries; + long timeout; + + if (WARN_ON(index >= func->num_templates)) + return -EINVAL; + + command = readl(syscfg->base + SYS_CFGCTRL); + if (WARN_ON(command & SYS_CFGCTRL_START)) + return -EBUSY; + + command = func->template[index]; + command |= SYS_CFGCTRL_START; + command |= write ? SYS_CFGCTRL_WRITE : 0; + + /* Use a canary for reads */ + if (!write) + *data = 0xdeadbeef; + + dev_dbg(syscfg->dev, "func %p, command %x, data %x\n", + func, command, *data); + writel(*data, syscfg->base + SYS_CFGDATA); + writel(0, syscfg->base + SYS_CFGSTAT); + writel(command, syscfg->base + SYS_CFGCTRL); + mb(); + + /* The operation can take ages... Go to sleep, 100us initially */ + tries = 100; + timeout = 100; + do { + if (!irqs_disabled()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(timeout)); + if (signal_pending(current)) + return -EINTR; + } else { + udelay(timeout); + } + + status = readl(syscfg->base + SYS_CFGSTAT); + if (status & SYS_CFGSTAT_ERR) + return -EFAULT; + + if (timeout > 20) + timeout -= 20; + } while (--tries && !(status & SYS_CFGSTAT_COMPLETE)); + if (WARN_ON_ONCE(!tries)) + return -ETIMEDOUT; + + if (!write) { + *data = readl(syscfg->base + SYS_CFGDATA); + dev_dbg(syscfg->dev, "func %p, read data %x\n", func, *data); + } + + return 0; +} + +static int vexpress_syscfg_read(void *context, unsigned int index, + unsigned int *val) +{ + struct vexpress_syscfg_func *func = context; + + return vexpress_syscfg_exec(func, index, false, val); +} + +static int vexpress_syscfg_write(void *context, unsigned int index, + unsigned int val) +{ + struct vexpress_syscfg_func *func = context; + + return vexpress_syscfg_exec(func, index, true, &val); +} + +static struct regmap_config vexpress_syscfg_regmap_config = { + .lock = vexpress_config_lock, + .unlock = vexpress_config_unlock, + .reg_bits = 32, + .val_bits = 32, + .reg_read = vexpress_syscfg_read, + .reg_write = vexpress_syscfg_write, + .reg_format_endian = REGMAP_ENDIAN_LITTLE, + .val_format_endian = REGMAP_ENDIAN_LITTLE, +}; + + +static struct regmap *vexpress_syscfg_regmap_init(struct device *dev, + void *context) +{ + int err; + struct vexpress_syscfg *syscfg = context; + struct vexpress_syscfg_func *func; + struct property *prop; + const __be32 *val = NULL; + __be32 energy_quirk[4]; + int num; + u32 site, position, dcc; + int i; + + err = vexpress_config_get_topo(dev->of_node, &site, + &position, &dcc); + if (err) + return ERR_PTR(err); + + prop = of_find_property(dev->of_node, + "arm,vexpress-sysreg,func", NULL); + if (!prop) + return ERR_PTR(-EINVAL); + + num = prop->length / sizeof(u32) / 2; + val = prop->value; + + /* + * "arm,vexpress-energy" function used to be described + * by its first device only, now it requires both + */ + if (num == 1 && of_device_is_compatible(dev->of_node, + "arm,vexpress-energy")) { + num = 2; + energy_quirk[0] = *val; + energy_quirk[2] = *val++; + energy_quirk[1] = *val; + energy_quirk[3] = cpu_to_be32(be32_to_cpup(val) + 1); + val = energy_quirk; + } + + func = kzalloc(struct_size(func, template, num), GFP_KERNEL); + if (!func) + return ERR_PTR(-ENOMEM); + + func->syscfg = syscfg; + func->num_templates = num; + + for (i = 0; i < num; i++) { + u32 function, device; + + function = be32_to_cpup(val++); + device = be32_to_cpup(val++); + + dev_dbg(dev, "func %p: %u/%u/%u/%u/%u\n", + func, site, position, dcc, + function, device); + + func->template[i] = SYS_CFGCTRL_DCC(dcc); + func->template[i] |= SYS_CFGCTRL_SITE(site); + func->template[i] |= SYS_CFGCTRL_POSITION(position); + func->template[i] |= SYS_CFGCTRL_FUNC(function); + func->template[i] |= SYS_CFGCTRL_DEVICE(device); + } + + vexpress_syscfg_regmap_config.max_register = num - 1; + + func->regmap = regmap_init(dev, NULL, func, + &vexpress_syscfg_regmap_config); + + if (IS_ERR(func->regmap)) { + void *err = func->regmap; + + kfree(func); + return err; + } + + list_add(&func->list, &syscfg->funcs); + + return func->regmap; +} + +static void vexpress_syscfg_regmap_exit(struct regmap *regmap, void *context) +{ + struct vexpress_syscfg *syscfg = context; + struct vexpress_syscfg_func *func, *tmp; + + regmap_exit(regmap); + + list_for_each_entry_safe(func, tmp, &syscfg->funcs, list) { + if (func->regmap == regmap) { + list_del(&syscfg->funcs); + kfree(func); + break; + } + } +} + +static struct vexpress_config_bridge_ops vexpress_syscfg_bridge_ops = { + .regmap_init = vexpress_syscfg_regmap_init, + .regmap_exit = vexpress_syscfg_regmap_exit, +}; + + +static int vexpress_syscfg_probe(struct platform_device *pdev) +{ + struct vexpress_syscfg *syscfg; + struct resource *res; + struct device *bridge; + + syscfg = devm_kzalloc(&pdev->dev, sizeof(*syscfg), GFP_KERNEL); + if (!syscfg) + return -ENOMEM; + syscfg->dev = &pdev->dev; + INIT_LIST_HEAD(&syscfg->funcs); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + syscfg->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(syscfg->base)) + return PTR_ERR(syscfg->base); + + /* Must use dev.parent (MFD), as that's where DT phandle points at... */ + bridge = vexpress_config_bridge_register(pdev->dev.parent, + &vexpress_syscfg_bridge_ops, syscfg); + + return PTR_ERR_OR_ZERO(bridge); +} + +static const struct platform_device_id vexpress_syscfg_id_table[] = { + { "vexpress-syscfg", }, + {}, +}; + +static struct platform_driver vexpress_syscfg_driver = { + .driver.name = "vexpress-syscfg", + .id_table = vexpress_syscfg_id_table, + .probe = vexpress_syscfg_probe, +}; + +static int __init vexpress_syscfg_init(void) +{ + return platform_driver_register(&vexpress_syscfg_driver); +} +core_initcall(vexpress_syscfg_init); diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c new file mode 100644 index 000000000..2543ef1ec --- /dev/null +++ b/drivers/misc/vmw_balloon.c @@ -0,0 +1,1236 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * VMware Balloon driver. + * + * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved. + * + * This is VMware physical memory management driver for Linux. The driver + * acts like a "balloon" that can be inflated to reclaim physical pages by + * reserving them in the guest and invalidating them in the monitor, + * freeing up the underlying machine pages so they can be allocated to + * other guests. The balloon can also be deflated to allow the guest to + * use more physical memory. Higher level policies can control the sizes + * of balloons in VMs in order to manage physical memory resources. + */ + +//#define DEBUG +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); +MODULE_VERSION("1.5.0.0-k"); +MODULE_ALIAS("dmi:*:svnVMware*:*"); +MODULE_ALIAS("vmware_vmmemctl"); +MODULE_LICENSE("GPL"); + +/* + * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't + * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use + * __GFP_NOWARN, to suppress page allocation failure warnings. + */ +#define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM|__GFP_NOWARN) + +/* + * Use GFP_HIGHUSER when executing in a separate kernel thread + * context and allocation can sleep. This is less stressful to + * the guest memory system, since it allows the thread to block + * while memory is reclaimed, and won't take pages from emergency + * low-memory pools. + */ +#define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER) + +/* Maximum number of refused pages we accumulate during inflation cycle */ +#define VMW_BALLOON_MAX_REFUSED 16 + +/* + * Hypervisor communication port definitions. + */ +#define VMW_BALLOON_HV_PORT 0x5670 +#define VMW_BALLOON_HV_MAGIC 0x456c6d6f +#define VMW_BALLOON_GUEST_ID 1 /* Linux */ + +enum vmwballoon_capabilities { + /* + * Bit 0 is reserved and not associated to any capability. + */ + VMW_BALLOON_BASIC_CMDS = (1 << 1), + VMW_BALLOON_BATCHED_CMDS = (1 << 2), + VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3), + VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4), +}; + +#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \ + | VMW_BALLOON_BATCHED_CMDS \ + | VMW_BALLOON_BATCHED_2M_CMDS \ + | VMW_BALLOON_SIGNALLED_WAKEUP_CMD) + +#define VMW_BALLOON_2M_SHIFT (9) +#define VMW_BALLOON_NUM_PAGE_SIZES (2) + +/* + * Backdoor commands availability: + * + * START, GET_TARGET and GUEST_ID are always available, + * + * VMW_BALLOON_BASIC_CMDS: + * LOCK and UNLOCK commands, + * VMW_BALLOON_BATCHED_CMDS: + * BATCHED_LOCK and BATCHED_UNLOCK commands. + * VMW BALLOON_BATCHED_2M_CMDS: + * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands, + * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD: + * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command. + */ +#define VMW_BALLOON_CMD_START 0 +#define VMW_BALLOON_CMD_GET_TARGET 1 +#define VMW_BALLOON_CMD_LOCK 2 +#define VMW_BALLOON_CMD_UNLOCK 3 +#define VMW_BALLOON_CMD_GUEST_ID 4 +#define VMW_BALLOON_CMD_BATCHED_LOCK 6 +#define VMW_BALLOON_CMD_BATCHED_UNLOCK 7 +#define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8 +#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9 +#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10 + + +/* error codes */ +#define VMW_BALLOON_SUCCESS 0 +#define VMW_BALLOON_FAILURE -1 +#define VMW_BALLOON_ERROR_CMD_INVALID 1 +#define VMW_BALLOON_ERROR_PPN_INVALID 2 +#define VMW_BALLOON_ERROR_PPN_LOCKED 3 +#define VMW_BALLOON_ERROR_PPN_UNLOCKED 4 +#define VMW_BALLOON_ERROR_PPN_PINNED 5 +#define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6 +#define VMW_BALLOON_ERROR_RESET 7 +#define VMW_BALLOON_ERROR_BUSY 8 + +#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000) + +/* Batch page description */ + +/* + * Layout of a page in the batch page: + * + * +-------------+----------+--------+ + * | | | | + * | Page number | Reserved | Status | + * | | | | + * +-------------+----------+--------+ + * 64 PAGE_SHIFT 6 0 + * + * The reserved field should be set to 0. + */ +#define VMW_BALLOON_BATCH_MAX_PAGES (PAGE_SIZE / sizeof(u64)) +#define VMW_BALLOON_BATCH_STATUS_MASK ((1UL << 5) - 1) +#define VMW_BALLOON_BATCH_PAGE_MASK (~((1UL << PAGE_SHIFT) - 1)) + +struct vmballoon_batch_page { + u64 pages[VMW_BALLOON_BATCH_MAX_PAGES]; +}; + +static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx) +{ + return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK; +} + +static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch, + int idx) +{ + return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK); +} + +static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx, + u64 pa) +{ + batch->pages[idx] = pa; +} + + +#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result) \ +({ \ + unsigned long __status, __dummy1, __dummy2, __dummy3; \ + __asm__ __volatile__ ("inl %%dx" : \ + "=a"(__status), \ + "=c"(__dummy1), \ + "=d"(__dummy2), \ + "=b"(result), \ + "=S" (__dummy3) : \ + "0"(VMW_BALLOON_HV_MAGIC), \ + "1"(VMW_BALLOON_CMD_##cmd), \ + "2"(VMW_BALLOON_HV_PORT), \ + "3"(arg1), \ + "4" (arg2) : \ + "memory"); \ + if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START) \ + result = __dummy1; \ + result &= -1UL; \ + __status & -1UL; \ +}) + +#ifdef CONFIG_DEBUG_FS +struct vmballoon_stats { + unsigned int timer; + unsigned int doorbell; + + /* allocation statistics */ + unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int sleep_alloc; + unsigned int sleep_alloc_fail; + unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES]; + + /* monitor operations */ + unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int target; + unsigned int target_fail; + unsigned int start; + unsigned int start_fail; + unsigned int guest_type; + unsigned int guest_type_fail; + unsigned int doorbell_set; + unsigned int doorbell_unset; +}; + +#define STATS_INC(stat) (stat)++ +#else +#define STATS_INC(stat) +#endif + +struct vmballoon; + +struct vmballoon_ops { + void (*add_page)(struct vmballoon *b, int idx, struct page *p); + int (*lock)(struct vmballoon *b, unsigned int num_pages, + bool is_2m_pages, unsigned int *target); + int (*unlock)(struct vmballoon *b, unsigned int num_pages, + bool is_2m_pages, unsigned int *target); +}; + +struct vmballoon_page_size { + /* list of reserved physical pages */ + struct list_head pages; + + /* transient list of non-balloonable pages */ + struct list_head refused_pages; + unsigned int n_refused_pages; +}; + +struct vmballoon { + struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES]; + + /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */ + unsigned supported_page_sizes; + + /* balloon size in pages */ + unsigned int size; + unsigned int target; + + /* reset flag */ + bool reset_required; + + unsigned long capabilities; + + struct vmballoon_batch_page *batch_page; + unsigned int batch_max_pages; + struct page *page; + + const struct vmballoon_ops *ops; + +#ifdef CONFIG_DEBUG_FS + /* statistics */ + struct vmballoon_stats stats; + + /* debugfs file exporting statistics */ + struct dentry *dbg_entry; +#endif + + struct sysinfo sysinfo; + + struct delayed_work dwork; + + struct vmci_handle vmci_doorbell; +}; + +static struct vmballoon balloon; + +/* + * Send "start" command to the host, communicating supported version + * of the protocol. + */ +static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps) +{ + unsigned long status, capabilities, dummy = 0; + bool success; + + STATS_INC(b->stats.start); + + status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities); + + switch (status) { + case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES: + b->capabilities = capabilities; + success = true; + break; + case VMW_BALLOON_SUCCESS: + b->capabilities = VMW_BALLOON_BASIC_CMDS; + success = true; + break; + default: + success = false; + } + + /* + * 2MB pages are only supported with batching. If batching is for some + * reason disabled, do not use 2MB pages, since otherwise the legacy + * mechanism is used with 2MB pages, causing a failure. + */ + if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) && + (b->capabilities & VMW_BALLOON_BATCHED_CMDS)) + b->supported_page_sizes = 2; + else + b->supported_page_sizes = 1; + + if (!success) { + pr_debug("%s - failed, hv returns %ld\n", __func__, status); + STATS_INC(b->stats.start_fail); + } + return success; +} + +static bool vmballoon_check_status(struct vmballoon *b, unsigned long status) +{ + switch (status) { + case VMW_BALLOON_SUCCESS: + return true; + + case VMW_BALLOON_ERROR_RESET: + b->reset_required = true; + /* fall through */ + + default: + return false; + } +} + +/* + * Communicate guest type to the host so that it can adjust ballooning + * algorithm to the one most appropriate for the guest. This command + * is normally issued after sending "start" command and is part of + * standard reset sequence. + */ +static bool vmballoon_send_guest_id(struct vmballoon *b) +{ + unsigned long status, dummy = 0; + + status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy, + dummy); + + STATS_INC(b->stats.guest_type); + + if (vmballoon_check_status(b, status)) + return true; + + pr_debug("%s - failed, hv returns %ld\n", __func__, status); + STATS_INC(b->stats.guest_type_fail); + return false; +} + +static u16 vmballoon_page_size(bool is_2m_page) +{ + if (is_2m_page) + return 1 << VMW_BALLOON_2M_SHIFT; + + return 1; +} + +/* + * Retrieve desired balloon size from the host. + */ +static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target) +{ + unsigned long status; + unsigned long target; + unsigned long limit; + unsigned long dummy = 0; + u32 limit32; + + /* + * si_meminfo() is cheap. Moreover, we want to provide dynamic + * max balloon size later. So let us call si_meminfo() every + * iteration. + */ + si_meminfo(&b->sysinfo); + limit = b->sysinfo.totalram; + + /* Ensure limit fits in 32-bits */ + limit32 = (u32)limit; + if (limit != limit32) + return false; + + /* update stats */ + STATS_INC(b->stats.target); + + status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target); + if (vmballoon_check_status(b, status)) { + *new_target = target; + return true; + } + + pr_debug("%s - failed, hv returns %ld\n", __func__, status); + STATS_INC(b->stats.target_fail); + return false; +} + +/* + * Notify the host about allocated page so that host can use it without + * fear that guest will need it. Host may reject some pages, we need to + * check the return value and maybe submit a different page. + */ +static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn, + unsigned int *hv_status, unsigned int *target) +{ + unsigned long status, dummy = 0; + u32 pfn32; + + pfn32 = (u32)pfn; + if (pfn32 != pfn) + return -EINVAL; + + STATS_INC(b->stats.lock[false]); + + *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target); + if (vmballoon_check_status(b, status)) + return 0; + + pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); + STATS_INC(b->stats.lock_fail[false]); + return -EIO; +} + +static int vmballoon_send_batched_lock(struct vmballoon *b, + unsigned int num_pages, bool is_2m_pages, unsigned int *target) +{ + unsigned long status; + unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page)); + + STATS_INC(b->stats.lock[is_2m_pages]); + + if (is_2m_pages) + status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages, + *target); + else + status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages, + *target); + + if (vmballoon_check_status(b, status)) + return 0; + + pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status); + STATS_INC(b->stats.lock_fail[is_2m_pages]); + return 1; +} + +/* + * Notify the host that guest intends to release given page back into + * the pool of available (to the guest) pages. + */ +static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn, + unsigned int *target) +{ + unsigned long status, dummy = 0; + u32 pfn32; + + pfn32 = (u32)pfn; + if (pfn32 != pfn) + return false; + + STATS_INC(b->stats.unlock[false]); + + status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target); + if (vmballoon_check_status(b, status)) + return true; + + pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); + STATS_INC(b->stats.unlock_fail[false]); + return false; +} + +static bool vmballoon_send_batched_unlock(struct vmballoon *b, + unsigned int num_pages, bool is_2m_pages, unsigned int *target) +{ + unsigned long status; + unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page)); + + STATS_INC(b->stats.unlock[is_2m_pages]); + + if (is_2m_pages) + status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages, + *target); + else + status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages, + *target); + + if (vmballoon_check_status(b, status)) + return true; + + pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status); + STATS_INC(b->stats.unlock_fail[is_2m_pages]); + return false; +} + +static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page) +{ + if (is_2m_page) + return alloc_pages(flags, VMW_BALLOON_2M_SHIFT); + + return alloc_page(flags); +} + +static void vmballoon_free_page(struct page *page, bool is_2m_page) +{ + if (is_2m_page) + __free_pages(page, VMW_BALLOON_2M_SHIFT); + else + __free_page(page); +} + +/* + * Quickly release all pages allocated for the balloon. This function is + * called when host decides to "reset" balloon for one reason or another. + * Unlike normal "deflate" we do not (shall not) notify host of the pages + * being released. + */ +static void vmballoon_pop(struct vmballoon *b) +{ + struct page *page, *next; + unsigned is_2m_pages; + + for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; + is_2m_pages++) { + struct vmballoon_page_size *page_size = + &b->page_sizes[is_2m_pages]; + u16 size_per_page = vmballoon_page_size(is_2m_pages); + + list_for_each_entry_safe(page, next, &page_size->pages, lru) { + list_del(&page->lru); + vmballoon_free_page(page, is_2m_pages); + STATS_INC(b->stats.free[is_2m_pages]); + b->size -= size_per_page; + cond_resched(); + } + } + + /* Clearing the batch_page unconditionally has no adverse effect */ + free_page((unsigned long)b->batch_page); + b->batch_page = NULL; +} + +/* + * Notify the host of a ballooned page. If host rejects the page put it on the + * refuse list, those refused page are then released at the end of the + * inflation cycle. + */ +static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages, + bool is_2m_pages, unsigned int *target) +{ + int locked, hv_status; + struct page *page = b->page; + struct vmballoon_page_size *page_size = &b->page_sizes[false]; + + /* is_2m_pages can never happen as 2m pages support implies batching */ + + locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status, + target); + if (locked) { + STATS_INC(b->stats.refused_alloc[false]); + + if (locked == -EIO && + (hv_status == VMW_BALLOON_ERROR_RESET || + hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED)) { + vmballoon_free_page(page, false); + return -EIO; + } + + /* + * Place page on the list of non-balloonable pages + * and retry allocation, unless we already accumulated + * too many of them, in which case take a breather. + */ + if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) { + page_size->n_refused_pages++; + list_add(&page->lru, &page_size->refused_pages); + } else { + vmballoon_free_page(page, false); + } + return locked; + } + + /* track allocated page */ + list_add(&page->lru, &page_size->pages); + + /* update balloon size */ + b->size++; + + return 0; +} + +static int vmballoon_lock_batched_page(struct vmballoon *b, + unsigned int num_pages, bool is_2m_pages, unsigned int *target) +{ + int locked, i; + u16 size_per_page = vmballoon_page_size(is_2m_pages); + + locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages, + target); + if (locked > 0) { + for (i = 0; i < num_pages; i++) { + u64 pa = vmballoon_batch_get_pa(b->batch_page, i); + struct page *p = pfn_to_page(pa >> PAGE_SHIFT); + + vmballoon_free_page(p, is_2m_pages); + } + + return -EIO; + } + + for (i = 0; i < num_pages; i++) { + u64 pa = vmballoon_batch_get_pa(b->batch_page, i); + struct page *p = pfn_to_page(pa >> PAGE_SHIFT); + struct vmballoon_page_size *page_size = + &b->page_sizes[is_2m_pages]; + + locked = vmballoon_batch_get_status(b->batch_page, i); + + switch (locked) { + case VMW_BALLOON_SUCCESS: + list_add(&p->lru, &page_size->pages); + b->size += size_per_page; + break; + case VMW_BALLOON_ERROR_PPN_PINNED: + case VMW_BALLOON_ERROR_PPN_INVALID: + if (page_size->n_refused_pages + < VMW_BALLOON_MAX_REFUSED) { + list_add(&p->lru, &page_size->refused_pages); + page_size->n_refused_pages++; + break; + } + /* Fallthrough */ + case VMW_BALLOON_ERROR_RESET: + case VMW_BALLOON_ERROR_PPN_NOTNEEDED: + vmballoon_free_page(p, is_2m_pages); + break; + default: + /* This should never happen */ + WARN_ON_ONCE(true); + } + } + + return 0; +} + +/* + * Release the page allocated for the balloon. Note that we first notify + * the host so it can make sure the page will be available for the guest + * to use, if needed. + */ +static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages, + bool is_2m_pages, unsigned int *target) +{ + struct page *page = b->page; + struct vmballoon_page_size *page_size = &b->page_sizes[false]; + + /* is_2m_pages can never happen as 2m pages support implies batching */ + + if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) { + list_add(&page->lru, &page_size->pages); + return -EIO; + } + + /* deallocate page */ + vmballoon_free_page(page, false); + STATS_INC(b->stats.free[false]); + + /* update balloon size */ + b->size--; + + return 0; +} + +static int vmballoon_unlock_batched_page(struct vmballoon *b, + unsigned int num_pages, bool is_2m_pages, + unsigned int *target) +{ + int locked, i, ret = 0; + bool hv_success; + u16 size_per_page = vmballoon_page_size(is_2m_pages); + + hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages, + target); + if (!hv_success) + ret = -EIO; + + for (i = 0; i < num_pages; i++) { + u64 pa = vmballoon_batch_get_pa(b->batch_page, i); + struct page *p = pfn_to_page(pa >> PAGE_SHIFT); + struct vmballoon_page_size *page_size = + &b->page_sizes[is_2m_pages]; + + locked = vmballoon_batch_get_status(b->batch_page, i); + if (!hv_success || locked != VMW_BALLOON_SUCCESS) { + /* + * That page wasn't successfully unlocked by the + * hypervisor, re-add it to the list of pages owned by + * the balloon driver. + */ + list_add(&p->lru, &page_size->pages); + } else { + /* deallocate page */ + vmballoon_free_page(p, is_2m_pages); + STATS_INC(b->stats.free[is_2m_pages]); + + /* update balloon size */ + b->size -= size_per_page; + } + } + + return ret; +} + +/* + * Release pages that were allocated while attempting to inflate the + * balloon but were refused by the host for one reason or another. + */ +static void vmballoon_release_refused_pages(struct vmballoon *b, + bool is_2m_pages) +{ + struct page *page, *next; + struct vmballoon_page_size *page_size = + &b->page_sizes[is_2m_pages]; + + list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) { + list_del(&page->lru); + vmballoon_free_page(page, is_2m_pages); + STATS_INC(b->stats.refused_free[is_2m_pages]); + } + + page_size->n_refused_pages = 0; +} + +static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p) +{ + b->page = p; +} + +static void vmballoon_add_batched_page(struct vmballoon *b, int idx, + struct page *p) +{ + vmballoon_batch_set_pa(b->batch_page, idx, + (u64)page_to_pfn(p) << PAGE_SHIFT); +} + +/* + * Inflate the balloon towards its target size. Note that we try to limit + * the rate of allocation to make sure we are not choking the rest of the + * system. + */ +static void vmballoon_inflate(struct vmballoon *b) +{ + unsigned int num_pages = 0; + int error = 0; + gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP; + bool is_2m_pages; + + pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); + + /* + * First try NOSLEEP page allocations to inflate balloon. + * + * If we do not throttle nosleep allocations, we can drain all + * free pages in the guest quickly (if the balloon target is high). + * As a side-effect, draining free pages helps to inform (force) + * the guest to start swapping if balloon target is not met yet, + * which is a desired behavior. However, balloon driver can consume + * all available CPU cycles if too many pages are allocated in a + * second. Therefore, we throttle nosleep allocations even when + * the guest is not under memory pressure. OTOH, if we have already + * predicted that the guest is under memory pressure, then we + * slowdown page allocations considerably. + */ + + /* + * Start with no sleep allocation rate which may be higher + * than sleeping allocation rate. + */ + is_2m_pages = b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES; + + pr_debug("%s - goal: %d", __func__, b->target - b->size); + + while (!b->reset_required && + b->size + num_pages * vmballoon_page_size(is_2m_pages) + < b->target) { + struct page *page; + + if (flags == VMW_PAGE_ALLOC_NOSLEEP) + STATS_INC(b->stats.alloc[is_2m_pages]); + else + STATS_INC(b->stats.sleep_alloc); + + page = vmballoon_alloc_page(flags, is_2m_pages); + if (!page) { + STATS_INC(b->stats.alloc_fail[is_2m_pages]); + + if (is_2m_pages) { + b->ops->lock(b, num_pages, true, &b->target); + + /* + * ignore errors from locking as we now switch + * to 4k pages and we might get different + * errors. + */ + + num_pages = 0; + is_2m_pages = false; + continue; + } + + if (flags == VMW_PAGE_ALLOC_CANSLEEP) { + /* + * CANSLEEP page allocation failed, so guest + * is under severe memory pressure. We just log + * the event, but do not stop the inflation + * due to its negative impact on performance. + */ + STATS_INC(b->stats.sleep_alloc_fail); + break; + } + + /* + * NOSLEEP page allocation failed, so the guest is + * under memory pressure. Slowing down page alloctions + * seems to be reasonable, but doing so might actually + * cause the hypervisor to throttle us down, resulting + * in degraded performance. We will count on the + * scheduler and standard memory management mechanisms + * for now. + */ + flags = VMW_PAGE_ALLOC_CANSLEEP; + continue; + } + + b->ops->add_page(b, num_pages++, page); + if (num_pages == b->batch_max_pages) { + error = b->ops->lock(b, num_pages, is_2m_pages, + &b->target); + num_pages = 0; + if (error) + break; + } + + cond_resched(); + } + + if (num_pages > 0) + b->ops->lock(b, num_pages, is_2m_pages, &b->target); + + vmballoon_release_refused_pages(b, true); + vmballoon_release_refused_pages(b, false); +} + +/* + * Decrease the size of the balloon allowing guest to use more memory. + */ +static void vmballoon_deflate(struct vmballoon *b) +{ + unsigned is_2m_pages; + + pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); + + /* free pages to reach target */ + for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes; + is_2m_pages++) { + struct page *page, *next; + unsigned int num_pages = 0; + struct vmballoon_page_size *page_size = + &b->page_sizes[is_2m_pages]; + + list_for_each_entry_safe(page, next, &page_size->pages, lru) { + if (b->reset_required || + (b->target > 0 && + b->size - num_pages + * vmballoon_page_size(is_2m_pages) + < b->target + vmballoon_page_size(true))) + break; + + list_del(&page->lru); + b->ops->add_page(b, num_pages++, page); + + if (num_pages == b->batch_max_pages) { + int error; + + error = b->ops->unlock(b, num_pages, + is_2m_pages, &b->target); + num_pages = 0; + if (error) + return; + } + + cond_resched(); + } + + if (num_pages > 0) + b->ops->unlock(b, num_pages, is_2m_pages, &b->target); + } +} + +static const struct vmballoon_ops vmballoon_basic_ops = { + .add_page = vmballoon_add_page, + .lock = vmballoon_lock_page, + .unlock = vmballoon_unlock_page +}; + +static const struct vmballoon_ops vmballoon_batched_ops = { + .add_page = vmballoon_add_batched_page, + .lock = vmballoon_lock_batched_page, + .unlock = vmballoon_unlock_batched_page +}; + +static bool vmballoon_init_batching(struct vmballoon *b) +{ + struct page *page; + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + return false; + + b->batch_page = page_address(page); + return true; +} + +/* + * Receive notification and resize balloon + */ +static void vmballoon_doorbell(void *client_data) +{ + struct vmballoon *b = client_data; + + STATS_INC(b->stats.doorbell); + + mod_delayed_work(system_freezable_wq, &b->dwork, 0); +} + +/* + * Clean up vmci doorbell + */ +static void vmballoon_vmci_cleanup(struct vmballoon *b) +{ + int error; + + VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID, + VMCI_INVALID_ID, error); + STATS_INC(b->stats.doorbell_unset); + + if (!vmci_handle_is_invalid(b->vmci_doorbell)) { + vmci_doorbell_destroy(b->vmci_doorbell); + b->vmci_doorbell = VMCI_INVALID_HANDLE; + } +} + +/* + * Initialize vmci doorbell, to get notified as soon as balloon changes + */ +static int vmballoon_vmci_init(struct vmballoon *b) +{ + unsigned long error, dummy; + + if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0) + return 0; + + error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB, + VMCI_PRIVILEGE_FLAG_RESTRICTED, + vmballoon_doorbell, b); + + if (error != VMCI_SUCCESS) + goto fail; + + error = VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, b->vmci_doorbell.context, + b->vmci_doorbell.resource, dummy); + + STATS_INC(b->stats.doorbell_set); + + if (error != VMW_BALLOON_SUCCESS) + goto fail; + + return 0; +fail: + vmballoon_vmci_cleanup(b); + return -EIO; +} + +/* + * Perform standard reset sequence by popping the balloon (in case it + * is not empty) and then restarting protocol. This operation normally + * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. + */ +static void vmballoon_reset(struct vmballoon *b) +{ + int error; + + vmballoon_vmci_cleanup(b); + + /* free all pages, skipping monitor unlock */ + vmballoon_pop(b); + + if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES)) + return; + + if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) { + b->ops = &vmballoon_batched_ops; + b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES; + if (!vmballoon_init_batching(b)) { + /* + * We failed to initialize batching, inform the monitor + * about it by sending a null capability. + * + * The guest will retry in one second. + */ + vmballoon_send_start(b, 0); + return; + } + } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) { + b->ops = &vmballoon_basic_ops; + b->batch_max_pages = 1; + } + + b->reset_required = false; + + error = vmballoon_vmci_init(b); + if (error) + pr_err("failed to initialize vmci doorbell\n"); + + if (!vmballoon_send_guest_id(b)) + pr_err("failed to send guest ID to the host\n"); +} + +/* + * Balloon work function: reset protocol, if needed, get the new size and + * adjust balloon as needed. Repeat in 1 sec. + */ +static void vmballoon_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct vmballoon *b = container_of(dwork, struct vmballoon, dwork); + unsigned int target; + + STATS_INC(b->stats.timer); + + if (b->reset_required) + vmballoon_reset(b); + + if (!b->reset_required && vmballoon_send_get_target(b, &target)) { + /* update target, adjust size */ + b->target = target; + + if (b->size < target) + vmballoon_inflate(b); + else if (target == 0 || + b->size > target + vmballoon_page_size(true)) + vmballoon_deflate(b); + } + + /* + * We are using a freezable workqueue so that balloon operations are + * stopped while the system transitions to/from sleep/hibernation. + */ + queue_delayed_work(system_freezable_wq, + dwork, round_jiffies_relative(HZ)); +} + +/* + * DEBUGFS Interface + */ +#ifdef CONFIG_DEBUG_FS + +static int vmballoon_debug_show(struct seq_file *f, void *offset) +{ + struct vmballoon *b = f->private; + struct vmballoon_stats *stats = &b->stats; + + /* format capabilities info */ + seq_printf(f, + "balloon capabilities: %#4x\n" + "used capabilities: %#4lx\n" + "is resetting: %c\n", + VMW_BALLOON_CAPABILITIES, b->capabilities, + b->reset_required ? 'y' : 'n'); + + /* format size info */ + seq_printf(f, + "target: %8d pages\n" + "current: %8d pages\n", + b->target, b->size); + + seq_printf(f, + "\n" + "timer: %8u\n" + "doorbell: %8u\n" + "start: %8u (%4u failed)\n" + "guestType: %8u (%4u failed)\n" + "2m-lock: %8u (%4u failed)\n" + "lock: %8u (%4u failed)\n" + "2m-unlock: %8u (%4u failed)\n" + "unlock: %8u (%4u failed)\n" + "target: %8u (%4u failed)\n" + "prim2mAlloc: %8u (%4u failed)\n" + "primNoSleepAlloc: %8u (%4u failed)\n" + "primCanSleepAlloc: %8u (%4u failed)\n" + "prim2mFree: %8u\n" + "primFree: %8u\n" + "err2mAlloc: %8u\n" + "errAlloc: %8u\n" + "err2mFree: %8u\n" + "errFree: %8u\n" + "doorbellSet: %8u\n" + "doorbellUnset: %8u\n", + stats->timer, + stats->doorbell, + stats->start, stats->start_fail, + stats->guest_type, stats->guest_type_fail, + stats->lock[true], stats->lock_fail[true], + stats->lock[false], stats->lock_fail[false], + stats->unlock[true], stats->unlock_fail[true], + stats->unlock[false], stats->unlock_fail[false], + stats->target, stats->target_fail, + stats->alloc[true], stats->alloc_fail[true], + stats->alloc[false], stats->alloc_fail[false], + stats->sleep_alloc, stats->sleep_alloc_fail, + stats->free[true], + stats->free[false], + stats->refused_alloc[true], stats->refused_alloc[false], + stats->refused_free[true], stats->refused_free[false], + stats->doorbell_set, stats->doorbell_unset); + + return 0; +} + +static int vmballoon_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, vmballoon_debug_show, inode->i_private); +} + +static const struct file_operations vmballoon_debug_fops = { + .owner = THIS_MODULE, + .open = vmballoon_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init vmballoon_debugfs_init(struct vmballoon *b) +{ + int error; + + b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b, + &vmballoon_debug_fops); + if (IS_ERR(b->dbg_entry)) { + error = PTR_ERR(b->dbg_entry); + pr_err("failed to create debugfs entry, error: %d\n", error); + return error; + } + + return 0; +} + +static void __exit vmballoon_debugfs_exit(struct vmballoon *b) +{ + debugfs_remove(b->dbg_entry); +} + +#else + +static inline int vmballoon_debugfs_init(struct vmballoon *b) +{ + return 0; +} + +static inline void vmballoon_debugfs_exit(struct vmballoon *b) +{ +} + +#endif /* CONFIG_DEBUG_FS */ + +static int __init vmballoon_init(void) +{ + int error; + unsigned is_2m_pages; + /* + * Check if we are running on VMware's hypervisor and bail out + * if we are not. + */ + if (x86_hyper_type != X86_HYPER_VMWARE) + return -ENODEV; + + for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; + is_2m_pages++) { + INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages); + INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages); + } + + INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work); + + error = vmballoon_debugfs_init(&balloon); + if (error) + return error; + + balloon.vmci_doorbell = VMCI_INVALID_HANDLE; + balloon.batch_page = NULL; + balloon.page = NULL; + balloon.reset_required = true; + + queue_delayed_work(system_freezable_wq, &balloon.dwork, 0); + + return 0; +} + +/* + * Using late_initcall() instead of module_init() allows the balloon to use the + * VMCI doorbell even when the balloon is built into the kernel. Otherwise the + * VMCI is probed only after the balloon is initialized. If the balloon is used + * as a module, late_initcall() is equivalent to module_init(). + */ +late_initcall(vmballoon_init); + +static void __exit vmballoon_exit(void) +{ + vmballoon_vmci_cleanup(&balloon); + cancel_delayed_work_sync(&balloon.dwork); + + vmballoon_debugfs_exit(&balloon); + + /* + * Deallocate all reserved memory, and reset connection with monitor. + * Reset connection before deallocating memory to avoid potential for + * additional spurious resets from guest touching deallocated pages. + */ + vmballoon_send_start(&balloon, 0); + vmballoon_pop(&balloon); +} +module_exit(vmballoon_exit); diff --git a/drivers/misc/vmw_vmci/Kconfig b/drivers/misc/vmw_vmci/Kconfig new file mode 100644 index 000000000..39c2ecadb --- /dev/null +++ b/drivers/misc/vmw_vmci/Kconfig @@ -0,0 +1,16 @@ +# +# VMware VMCI device +# + +config VMWARE_VMCI + tristate "VMware VMCI Driver" + depends on X86 && PCI + help + This is VMware's Virtual Machine Communication Interface. It enables + high-speed communication between host and guest in a virtual + environment via the VMCI virtual device. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called vmw_vmci. diff --git a/drivers/misc/vmw_vmci/Makefile b/drivers/misc/vmw_vmci/Makefile new file mode 100644 index 000000000..4da9893c3 --- /dev/null +++ b/drivers/misc/vmw_vmci/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_VMWARE_VMCI) += vmw_vmci.o +vmw_vmci-y += vmci_context.o vmci_datagram.o vmci_doorbell.o \ + vmci_driver.o vmci_event.o vmci_guest.o vmci_handle_array.o \ + vmci_host.o vmci_queue_pair.o vmci_resource.o vmci_route.o diff --git a/drivers/misc/vmw_vmci/vmci_context.c b/drivers/misc/vmw_vmci/vmci_context.c new file mode 100644 index 000000000..26e20b091 --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_context.c @@ -0,0 +1,1225 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vmci_queue_pair.h" +#include "vmci_datagram.h" +#include "vmci_doorbell.h" +#include "vmci_context.h" +#include "vmci_driver.h" +#include "vmci_event.h" + +/* Use a wide upper bound for the maximum contexts. */ +#define VMCI_MAX_CONTEXTS 2000 + +/* + * List of current VMCI contexts. Contexts can be added by + * vmci_ctx_create() and removed via vmci_ctx_destroy(). + * These, along with context lookup, are protected by the + * list structure's lock. + */ +static struct { + struct list_head head; + spinlock_t lock; /* Spinlock for context list operations */ +} ctx_list = { + .head = LIST_HEAD_INIT(ctx_list.head), + .lock = __SPIN_LOCK_UNLOCKED(ctx_list.lock), +}; + +/* Used by contexts that did not set up notify flag pointers */ +static bool ctx_dummy_notify; + +static void ctx_signal_notify(struct vmci_ctx *context) +{ + *context->notify = true; +} + +static void ctx_clear_notify(struct vmci_ctx *context) +{ + *context->notify = false; +} + +/* + * If nothing requires the attention of the guest, clears both + * notify flag and call. + */ +static void ctx_clear_notify_call(struct vmci_ctx *context) +{ + if (context->pending_datagrams == 0 && + vmci_handle_arr_get_size(context->pending_doorbell_array) == 0) + ctx_clear_notify(context); +} + +/* + * Sets the context's notify flag iff datagrams are pending for this + * context. Called from vmci_setup_notify(). + */ +void vmci_ctx_check_signal_notify(struct vmci_ctx *context) +{ + spin_lock(&context->lock); + if (context->pending_datagrams) + ctx_signal_notify(context); + spin_unlock(&context->lock); +} + +/* + * Allocates and initializes a VMCI context. + */ +struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags, + uintptr_t event_hnd, + int user_version, + const struct cred *cred) +{ + struct vmci_ctx *context; + int error; + + if (cid == VMCI_INVALID_ID) { + pr_devel("Invalid context ID for VMCI context\n"); + error = -EINVAL; + goto err_out; + } + + if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) { + pr_devel("Invalid flag (flags=0x%x) for VMCI context\n", + priv_flags); + error = -EINVAL; + goto err_out; + } + + if (user_version == 0) { + pr_devel("Invalid suer_version %d\n", user_version); + error = -EINVAL; + goto err_out; + } + + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) { + pr_warn("Failed to allocate memory for VMCI context\n"); + error = -EINVAL; + goto err_out; + } + + kref_init(&context->kref); + spin_lock_init(&context->lock); + INIT_LIST_HEAD(&context->list_item); + INIT_LIST_HEAD(&context->datagram_queue); + INIT_LIST_HEAD(&context->notifier_list); + + /* Initialize host-specific VMCI context. */ + init_waitqueue_head(&context->host_context.wait_queue); + + context->queue_pair_array = + vmci_handle_arr_create(0, VMCI_MAX_GUEST_QP_COUNT); + if (!context->queue_pair_array) { + error = -ENOMEM; + goto err_free_ctx; + } + + context->doorbell_array = + vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT); + if (!context->doorbell_array) { + error = -ENOMEM; + goto err_free_qp_array; + } + + context->pending_doorbell_array = + vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT); + if (!context->pending_doorbell_array) { + error = -ENOMEM; + goto err_free_db_array; + } + + context->user_version = user_version; + + context->priv_flags = priv_flags; + + if (cred) + context->cred = get_cred(cred); + + context->notify = &ctx_dummy_notify; + context->notify_page = NULL; + + /* + * If we collide with an existing context we generate a new + * and use it instead. The VMX will determine if regeneration + * is okay. Since there isn't 4B - 16 VMs running on a given + * host, the below loop will terminate. + */ + spin_lock(&ctx_list.lock); + + while (vmci_ctx_exists(cid)) { + /* We reserve the lowest 16 ids for fixed contexts. */ + cid = max(cid, VMCI_RESERVED_CID_LIMIT - 1) + 1; + if (cid == VMCI_INVALID_ID) + cid = VMCI_RESERVED_CID_LIMIT; + } + context->cid = cid; + + list_add_tail_rcu(&context->list_item, &ctx_list.head); + spin_unlock(&ctx_list.lock); + + return context; + + err_free_db_array: + vmci_handle_arr_destroy(context->doorbell_array); + err_free_qp_array: + vmci_handle_arr_destroy(context->queue_pair_array); + err_free_ctx: + kfree(context); + err_out: + return ERR_PTR(error); +} + +/* + * Destroy VMCI context. + */ +void vmci_ctx_destroy(struct vmci_ctx *context) +{ + spin_lock(&ctx_list.lock); + list_del_rcu(&context->list_item); + spin_unlock(&ctx_list.lock); + synchronize_rcu(); + + vmci_ctx_put(context); +} + +/* + * Fire notification for all contexts interested in given cid. + */ +static int ctx_fire_notification(u32 context_id, u32 priv_flags) +{ + u32 i, array_size; + struct vmci_ctx *sub_ctx; + struct vmci_handle_arr *subscriber_array; + struct vmci_handle context_handle = + vmci_make_handle(context_id, VMCI_EVENT_HANDLER); + + /* + * We create an array to hold the subscribers we find when + * scanning through all contexts. + */ + subscriber_array = vmci_handle_arr_create(0, VMCI_MAX_CONTEXTS); + if (subscriber_array == NULL) + return VMCI_ERROR_NO_MEM; + + /* + * Scan all contexts to find who is interested in being + * notified about given contextID. + */ + rcu_read_lock(); + list_for_each_entry_rcu(sub_ctx, &ctx_list.head, list_item) { + struct vmci_handle_list *node; + + /* + * We only deliver notifications of the removal of + * contexts, if the two contexts are allowed to + * interact. + */ + if (vmci_deny_interaction(priv_flags, sub_ctx->priv_flags)) + continue; + + list_for_each_entry_rcu(node, &sub_ctx->notifier_list, node) { + if (!vmci_handle_is_equal(node->handle, context_handle)) + continue; + + vmci_handle_arr_append_entry(&subscriber_array, + vmci_make_handle(sub_ctx->cid, + VMCI_EVENT_HANDLER)); + } + } + rcu_read_unlock(); + + /* Fire event to all subscribers. */ + array_size = vmci_handle_arr_get_size(subscriber_array); + for (i = 0; i < array_size; i++) { + int result; + struct vmci_event_ctx ev; + + ev.msg.hdr.dst = vmci_handle_arr_get_entry(subscriber_array, i); + ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_CONTEXT_RESOURCE_ID); + ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr); + ev.msg.event_data.event = VMCI_EVENT_CTX_REMOVED; + ev.payload.context_id = context_id; + + result = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID, + &ev.msg.hdr, false); + if (result < VMCI_SUCCESS) { + pr_devel("Failed to enqueue event datagram (type=%d) for context (ID=0x%x)\n", + ev.msg.event_data.event, + ev.msg.hdr.dst.context); + /* We continue to enqueue on next subscriber. */ + } + } + vmci_handle_arr_destroy(subscriber_array); + + return VMCI_SUCCESS; +} + +/* + * Returns the current number of pending datagrams. The call may + * also serve as a synchronization point for the datagram queue, + * as no enqueue operations can occur concurrently. + */ +int vmci_ctx_pending_datagrams(u32 cid, u32 *pending) +{ + struct vmci_ctx *context; + + context = vmci_ctx_get(cid); + if (context == NULL) + return VMCI_ERROR_INVALID_ARGS; + + spin_lock(&context->lock); + if (pending) + *pending = context->pending_datagrams; + spin_unlock(&context->lock); + vmci_ctx_put(context); + + return VMCI_SUCCESS; +} + +/* + * Queues a VMCI datagram for the appropriate target VM context. + */ +int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg) +{ + struct vmci_datagram_queue_entry *dq_entry; + struct vmci_ctx *context; + struct vmci_handle dg_src; + size_t vmci_dg_size; + + vmci_dg_size = VMCI_DG_SIZE(dg); + if (vmci_dg_size > VMCI_MAX_DG_SIZE) { + pr_devel("Datagram too large (bytes=%zu)\n", vmci_dg_size); + return VMCI_ERROR_INVALID_ARGS; + } + + /* Get the target VM's VMCI context. */ + context = vmci_ctx_get(cid); + if (!context) { + pr_devel("Invalid context (ID=0x%x)\n", cid); + return VMCI_ERROR_INVALID_ARGS; + } + + /* Allocate guest call entry and add it to the target VM's queue. */ + dq_entry = kmalloc(sizeof(*dq_entry), GFP_KERNEL); + if (dq_entry == NULL) { + pr_warn("Failed to allocate memory for datagram\n"); + vmci_ctx_put(context); + return VMCI_ERROR_NO_MEM; + } + dq_entry->dg = dg; + dq_entry->dg_size = vmci_dg_size; + dg_src = dg->src; + INIT_LIST_HEAD(&dq_entry->list_item); + + spin_lock(&context->lock); + + /* + * We put a higher limit on datagrams from the hypervisor. If + * the pending datagram is not from hypervisor, then we check + * if enqueueing it would exceed the + * VMCI_MAX_DATAGRAM_QUEUE_SIZE limit on the destination. If + * the pending datagram is from hypervisor, we allow it to be + * queued at the destination side provided we don't reach the + * VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE limit. + */ + if (context->datagram_queue_size + vmci_dg_size >= + VMCI_MAX_DATAGRAM_QUEUE_SIZE && + (!vmci_handle_is_equal(dg_src, + vmci_make_handle + (VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_CONTEXT_RESOURCE_ID)) || + context->datagram_queue_size + vmci_dg_size >= + VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE)) { + spin_unlock(&context->lock); + vmci_ctx_put(context); + kfree(dq_entry); + pr_devel("Context (ID=0x%x) receive queue is full\n", cid); + return VMCI_ERROR_NO_RESOURCES; + } + + list_add(&dq_entry->list_item, &context->datagram_queue); + context->pending_datagrams++; + context->datagram_queue_size += vmci_dg_size; + ctx_signal_notify(context); + wake_up(&context->host_context.wait_queue); + spin_unlock(&context->lock); + vmci_ctx_put(context); + + return vmci_dg_size; +} + +/* + * Verifies whether a context with the specified context ID exists. + * FIXME: utility is dubious as no decisions can be reliably made + * using this data as context can appear and disappear at any time. + */ +bool vmci_ctx_exists(u32 cid) +{ + struct vmci_ctx *context; + bool exists = false; + + rcu_read_lock(); + + list_for_each_entry_rcu(context, &ctx_list.head, list_item) { + if (context->cid == cid) { + exists = true; + break; + } + } + + rcu_read_unlock(); + return exists; +} + +/* + * Retrieves VMCI context corresponding to the given cid. + */ +struct vmci_ctx *vmci_ctx_get(u32 cid) +{ + struct vmci_ctx *c, *context = NULL; + + if (cid == VMCI_INVALID_ID) + return NULL; + + rcu_read_lock(); + list_for_each_entry_rcu(c, &ctx_list.head, list_item) { + if (c->cid == cid) { + /* + * The context owner drops its own reference to the + * context only after removing it from the list and + * waiting for RCU grace period to expire. This + * means that we are not about to increase the + * reference count of something that is in the + * process of being destroyed. + */ + context = c; + kref_get(&context->kref); + break; + } + } + rcu_read_unlock(); + + return context; +} + +/* + * Deallocates all parts of a context data structure. This + * function doesn't lock the context, because it assumes that + * the caller was holding the last reference to context. + */ +static void ctx_free_ctx(struct kref *kref) +{ + struct vmci_ctx *context = container_of(kref, struct vmci_ctx, kref); + struct vmci_datagram_queue_entry *dq_entry, *dq_entry_tmp; + struct vmci_handle temp_handle; + struct vmci_handle_list *notifier, *tmp; + + /* + * Fire event to all contexts interested in knowing this + * context is dying. + */ + ctx_fire_notification(context->cid, context->priv_flags); + + /* + * Cleanup all queue pair resources attached to context. If + * the VM dies without cleaning up, this code will make sure + * that no resources are leaked. + */ + temp_handle = vmci_handle_arr_get_entry(context->queue_pair_array, 0); + while (!vmci_handle_is_equal(temp_handle, VMCI_INVALID_HANDLE)) { + if (vmci_qp_broker_detach(temp_handle, + context) < VMCI_SUCCESS) { + /* + * When vmci_qp_broker_detach() succeeds it + * removes the handle from the array. If + * detach fails, we must remove the handle + * ourselves. + */ + vmci_handle_arr_remove_entry(context->queue_pair_array, + temp_handle); + } + temp_handle = + vmci_handle_arr_get_entry(context->queue_pair_array, 0); + } + + /* + * It is fine to destroy this without locking the callQueue, as + * this is the only thread having a reference to the context. + */ + list_for_each_entry_safe(dq_entry, dq_entry_tmp, + &context->datagram_queue, list_item) { + WARN_ON(dq_entry->dg_size != VMCI_DG_SIZE(dq_entry->dg)); + list_del(&dq_entry->list_item); + kfree(dq_entry->dg); + kfree(dq_entry); + } + + list_for_each_entry_safe(notifier, tmp, + &context->notifier_list, node) { + list_del(¬ifier->node); + kfree(notifier); + } + + vmci_handle_arr_destroy(context->queue_pair_array); + vmci_handle_arr_destroy(context->doorbell_array); + vmci_handle_arr_destroy(context->pending_doorbell_array); + vmci_ctx_unset_notify(context); + if (context->cred) + put_cred(context->cred); + kfree(context); +} + +/* + * Drops reference to VMCI context. If this is the last reference to + * the context it will be deallocated. A context is created with + * a reference count of one, and on destroy, it is removed from + * the context list before its reference count is decremented. Thus, + * if we reach zero, we are sure that nobody else are about to increment + * it (they need the entry in the context list for that), and so there + * is no need for locking. + */ +void vmci_ctx_put(struct vmci_ctx *context) +{ + kref_put(&context->kref, ctx_free_ctx); +} + +/* + * Dequeues the next datagram and returns it to caller. + * The caller passes in a pointer to the max size datagram + * it can handle and the datagram is only unqueued if the + * size is less than max_size. If larger max_size is set to + * the size of the datagram to give the caller a chance to + * set up a larger buffer for the guestcall. + */ +int vmci_ctx_dequeue_datagram(struct vmci_ctx *context, + size_t *max_size, + struct vmci_datagram **dg) +{ + struct vmci_datagram_queue_entry *dq_entry; + struct list_head *list_item; + int rv; + + /* Dequeue the next datagram entry. */ + spin_lock(&context->lock); + if (context->pending_datagrams == 0) { + ctx_clear_notify_call(context); + spin_unlock(&context->lock); + pr_devel("No datagrams pending\n"); + return VMCI_ERROR_NO_MORE_DATAGRAMS; + } + + list_item = context->datagram_queue.next; + + dq_entry = + list_entry(list_item, struct vmci_datagram_queue_entry, list_item); + + /* Check size of caller's buffer. */ + if (*max_size < dq_entry->dg_size) { + *max_size = dq_entry->dg_size; + spin_unlock(&context->lock); + pr_devel("Caller's buffer should be at least (size=%u bytes)\n", + (u32) *max_size); + return VMCI_ERROR_NO_MEM; + } + + list_del(list_item); + context->pending_datagrams--; + context->datagram_queue_size -= dq_entry->dg_size; + if (context->pending_datagrams == 0) { + ctx_clear_notify_call(context); + rv = VMCI_SUCCESS; + } else { + /* + * Return the size of the next datagram. + */ + struct vmci_datagram_queue_entry *next_entry; + + list_item = context->datagram_queue.next; + next_entry = + list_entry(list_item, struct vmci_datagram_queue_entry, + list_item); + + /* + * The following size_t -> int truncation is fine as + * the maximum size of a (routable) datagram is 68KB. + */ + rv = (int)next_entry->dg_size; + } + spin_unlock(&context->lock); + + /* Caller must free datagram. */ + *dg = dq_entry->dg; + dq_entry->dg = NULL; + kfree(dq_entry); + + return rv; +} + +/* + * Reverts actions set up by vmci_setup_notify(). Unmaps and unlocks the + * page mapped/locked by vmci_setup_notify(). + */ +void vmci_ctx_unset_notify(struct vmci_ctx *context) +{ + struct page *notify_page; + + spin_lock(&context->lock); + + notify_page = context->notify_page; + context->notify = &ctx_dummy_notify; + context->notify_page = NULL; + + spin_unlock(&context->lock); + + if (notify_page) { + kunmap(notify_page); + put_page(notify_page); + } +} + +/* + * Add remote_cid to list of contexts current contexts wants + * notifications from/about. + */ +int vmci_ctx_add_notification(u32 context_id, u32 remote_cid) +{ + struct vmci_ctx *context; + struct vmci_handle_list *notifier, *n; + int result; + bool exists = false; + + context = vmci_ctx_get(context_id); + if (!context) + return VMCI_ERROR_NOT_FOUND; + + if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(remote_cid)) { + pr_devel("Context removed notifications for other VMs not supported (src=0x%x, remote=0x%x)\n", + context_id, remote_cid); + result = VMCI_ERROR_DST_UNREACHABLE; + goto out; + } + + if (context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) { + result = VMCI_ERROR_NO_ACCESS; + goto out; + } + + notifier = kmalloc(sizeof(struct vmci_handle_list), GFP_KERNEL); + if (!notifier) { + result = VMCI_ERROR_NO_MEM; + goto out; + } + + INIT_LIST_HEAD(¬ifier->node); + notifier->handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER); + + spin_lock(&context->lock); + + if (context->n_notifiers < VMCI_MAX_CONTEXTS) { + list_for_each_entry(n, &context->notifier_list, node) { + if (vmci_handle_is_equal(n->handle, notifier->handle)) { + exists = true; + break; + } + } + + if (exists) { + kfree(notifier); + result = VMCI_ERROR_ALREADY_EXISTS; + } else { + list_add_tail_rcu(¬ifier->node, + &context->notifier_list); + context->n_notifiers++; + result = VMCI_SUCCESS; + } + } else { + kfree(notifier); + result = VMCI_ERROR_NO_MEM; + } + + spin_unlock(&context->lock); + + out: + vmci_ctx_put(context); + return result; +} + +/* + * Remove remote_cid from current context's list of contexts it is + * interested in getting notifications from/about. + */ +int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid) +{ + struct vmci_ctx *context; + struct vmci_handle_list *notifier, *tmp; + struct vmci_handle handle; + bool found = false; + + context = vmci_ctx_get(context_id); + if (!context) + return VMCI_ERROR_NOT_FOUND; + + handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER); + + spin_lock(&context->lock); + list_for_each_entry_safe(notifier, tmp, + &context->notifier_list, node) { + if (vmci_handle_is_equal(notifier->handle, handle)) { + list_del_rcu(¬ifier->node); + context->n_notifiers--; + found = true; + break; + } + } + spin_unlock(&context->lock); + + if (found) { + synchronize_rcu(); + kfree(notifier); + } + + vmci_ctx_put(context); + + return found ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND; +} + +static int vmci_ctx_get_chkpt_notifiers(struct vmci_ctx *context, + u32 *buf_size, void **pbuf) +{ + u32 *notifiers; + size_t data_size; + struct vmci_handle_list *entry; + int i = 0; + + if (context->n_notifiers == 0) { + *buf_size = 0; + *pbuf = NULL; + return VMCI_SUCCESS; + } + + data_size = context->n_notifiers * sizeof(*notifiers); + if (*buf_size < data_size) { + *buf_size = data_size; + return VMCI_ERROR_MORE_DATA; + } + + notifiers = kmalloc(data_size, GFP_ATOMIC); /* FIXME: want GFP_KERNEL */ + if (!notifiers) + return VMCI_ERROR_NO_MEM; + + list_for_each_entry(entry, &context->notifier_list, node) + notifiers[i++] = entry->handle.context; + + *buf_size = data_size; + *pbuf = notifiers; + return VMCI_SUCCESS; +} + +static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context, + u32 *buf_size, void **pbuf) +{ + struct dbell_cpt_state *dbells; + u32 i, n_doorbells; + + n_doorbells = vmci_handle_arr_get_size(context->doorbell_array); + if (n_doorbells > 0) { + size_t data_size = n_doorbells * sizeof(*dbells); + if (*buf_size < data_size) { + *buf_size = data_size; + return VMCI_ERROR_MORE_DATA; + } + + dbells = kzalloc(data_size, GFP_ATOMIC); + if (!dbells) + return VMCI_ERROR_NO_MEM; + + for (i = 0; i < n_doorbells; i++) + dbells[i].handle = vmci_handle_arr_get_entry( + context->doorbell_array, i); + + *buf_size = data_size; + *pbuf = dbells; + } else { + *buf_size = 0; + *pbuf = NULL; + } + + return VMCI_SUCCESS; +} + +/* + * Get current context's checkpoint state of given type. + */ +int vmci_ctx_get_chkpt_state(u32 context_id, + u32 cpt_type, + u32 *buf_size, + void **pbuf) +{ + struct vmci_ctx *context; + int result; + + context = vmci_ctx_get(context_id); + if (!context) + return VMCI_ERROR_NOT_FOUND; + + spin_lock(&context->lock); + + switch (cpt_type) { + case VMCI_NOTIFICATION_CPT_STATE: + result = vmci_ctx_get_chkpt_notifiers(context, buf_size, pbuf); + break; + + case VMCI_WELLKNOWN_CPT_STATE: + /* + * For compatibility with VMX'en with VM to VM communication, we + * always return zero wellknown handles. + */ + + *buf_size = 0; + *pbuf = NULL; + result = VMCI_SUCCESS; + break; + + case VMCI_DOORBELL_CPT_STATE: + result = vmci_ctx_get_chkpt_doorbells(context, buf_size, pbuf); + break; + + default: + pr_devel("Invalid cpt state (type=%d)\n", cpt_type); + result = VMCI_ERROR_INVALID_ARGS; + break; + } + + spin_unlock(&context->lock); + vmci_ctx_put(context); + + return result; +} + +/* + * Set current context's checkpoint state of given type. + */ +int vmci_ctx_set_chkpt_state(u32 context_id, + u32 cpt_type, + u32 buf_size, + void *cpt_buf) +{ + u32 i; + u32 current_id; + int result = VMCI_SUCCESS; + u32 num_ids = buf_size / sizeof(u32); + + if (cpt_type == VMCI_WELLKNOWN_CPT_STATE && num_ids > 0) { + /* + * We would end up here if VMX with VM to VM communication + * attempts to restore a checkpoint with wellknown handles. + */ + pr_warn("Attempt to restore checkpoint with obsolete wellknown handles\n"); + return VMCI_ERROR_OBSOLETE; + } + + if (cpt_type != VMCI_NOTIFICATION_CPT_STATE) { + pr_devel("Invalid cpt state (type=%d)\n", cpt_type); + return VMCI_ERROR_INVALID_ARGS; + } + + for (i = 0; i < num_ids && result == VMCI_SUCCESS; i++) { + current_id = ((u32 *)cpt_buf)[i]; + result = vmci_ctx_add_notification(context_id, current_id); + if (result != VMCI_SUCCESS) + break; + } + if (result != VMCI_SUCCESS) + pr_devel("Failed to set cpt state (type=%d) (error=%d)\n", + cpt_type, result); + + return result; +} + +/* + * Retrieves the specified context's pending notifications in the + * form of a handle array. The handle arrays returned are the + * actual data - not a copy and should not be modified by the + * caller. They must be released using + * vmci_ctx_rcv_notifications_release. + */ +int vmci_ctx_rcv_notifications_get(u32 context_id, + struct vmci_handle_arr **db_handle_array, + struct vmci_handle_arr **qp_handle_array) +{ + struct vmci_ctx *context; + int result = VMCI_SUCCESS; + + context = vmci_ctx_get(context_id); + if (context == NULL) + return VMCI_ERROR_NOT_FOUND; + + spin_lock(&context->lock); + + *db_handle_array = context->pending_doorbell_array; + context->pending_doorbell_array = + vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT); + if (!context->pending_doorbell_array) { + context->pending_doorbell_array = *db_handle_array; + *db_handle_array = NULL; + result = VMCI_ERROR_NO_MEM; + } + *qp_handle_array = NULL; + + spin_unlock(&context->lock); + vmci_ctx_put(context); + + return result; +} + +/* + * Releases handle arrays with pending notifications previously + * retrieved using vmci_ctx_rcv_notifications_get. If the + * notifications were not successfully handed over to the guest, + * success must be false. + */ +void vmci_ctx_rcv_notifications_release(u32 context_id, + struct vmci_handle_arr *db_handle_array, + struct vmci_handle_arr *qp_handle_array, + bool success) +{ + struct vmci_ctx *context = vmci_ctx_get(context_id); + + spin_lock(&context->lock); + if (!success) { + struct vmci_handle handle; + + /* + * New notifications may have been added while we were not + * holding the context lock, so we transfer any new pending + * doorbell notifications to the old array, and reinstate the + * old array. + */ + + handle = vmci_handle_arr_remove_tail( + context->pending_doorbell_array); + while (!vmci_handle_is_invalid(handle)) { + if (!vmci_handle_arr_has_entry(db_handle_array, + handle)) { + vmci_handle_arr_append_entry( + &db_handle_array, handle); + } + handle = vmci_handle_arr_remove_tail( + context->pending_doorbell_array); + } + vmci_handle_arr_destroy(context->pending_doorbell_array); + context->pending_doorbell_array = db_handle_array; + db_handle_array = NULL; + } else { + ctx_clear_notify_call(context); + } + spin_unlock(&context->lock); + vmci_ctx_put(context); + + if (db_handle_array) + vmci_handle_arr_destroy(db_handle_array); + + if (qp_handle_array) + vmci_handle_arr_destroy(qp_handle_array); +} + +/* + * Registers that a new doorbell handle has been allocated by the + * context. Only doorbell handles registered can be notified. + */ +int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle) +{ + struct vmci_ctx *context; + int result; + + if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle)) + return VMCI_ERROR_INVALID_ARGS; + + context = vmci_ctx_get(context_id); + if (context == NULL) + return VMCI_ERROR_NOT_FOUND; + + spin_lock(&context->lock); + if (!vmci_handle_arr_has_entry(context->doorbell_array, handle)) + result = vmci_handle_arr_append_entry(&context->doorbell_array, + handle); + else + result = VMCI_ERROR_DUPLICATE_ENTRY; + + spin_unlock(&context->lock); + vmci_ctx_put(context); + + return result; +} + +/* + * Unregisters a doorbell handle that was previously registered + * with vmci_ctx_dbell_create. + */ +int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle) +{ + struct vmci_ctx *context; + struct vmci_handle removed_handle; + + if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle)) + return VMCI_ERROR_INVALID_ARGS; + + context = vmci_ctx_get(context_id); + if (context == NULL) + return VMCI_ERROR_NOT_FOUND; + + spin_lock(&context->lock); + removed_handle = + vmci_handle_arr_remove_entry(context->doorbell_array, handle); + vmci_handle_arr_remove_entry(context->pending_doorbell_array, handle); + spin_unlock(&context->lock); + + vmci_ctx_put(context); + + return vmci_handle_is_invalid(removed_handle) ? + VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS; +} + +/* + * Unregisters all doorbell handles that were previously + * registered with vmci_ctx_dbell_create. + */ +int vmci_ctx_dbell_destroy_all(u32 context_id) +{ + struct vmci_ctx *context; + struct vmci_handle handle; + + if (context_id == VMCI_INVALID_ID) + return VMCI_ERROR_INVALID_ARGS; + + context = vmci_ctx_get(context_id); + if (context == NULL) + return VMCI_ERROR_NOT_FOUND; + + spin_lock(&context->lock); + do { + struct vmci_handle_arr *arr = context->doorbell_array; + handle = vmci_handle_arr_remove_tail(arr); + } while (!vmci_handle_is_invalid(handle)); + do { + struct vmci_handle_arr *arr = context->pending_doorbell_array; + handle = vmci_handle_arr_remove_tail(arr); + } while (!vmci_handle_is_invalid(handle)); + spin_unlock(&context->lock); + + vmci_ctx_put(context); + + return VMCI_SUCCESS; +} + +/* + * Registers a notification of a doorbell handle initiated by the + * specified source context. The notification of doorbells are + * subject to the same isolation rules as datagram delivery. To + * allow host side senders of notifications a finer granularity + * of sender rights than those assigned to the sending context + * itself, the host context is required to specify a different + * set of privilege flags that will override the privileges of + * the source context. + */ +int vmci_ctx_notify_dbell(u32 src_cid, + struct vmci_handle handle, + u32 src_priv_flags) +{ + struct vmci_ctx *dst_context; + int result; + + if (vmci_handle_is_invalid(handle)) + return VMCI_ERROR_INVALID_ARGS; + + /* Get the target VM's VMCI context. */ + dst_context = vmci_ctx_get(handle.context); + if (!dst_context) { + pr_devel("Invalid context (ID=0x%x)\n", handle.context); + return VMCI_ERROR_NOT_FOUND; + } + + if (src_cid != handle.context) { + u32 dst_priv_flags; + + if (VMCI_CONTEXT_IS_VM(src_cid) && + VMCI_CONTEXT_IS_VM(handle.context)) { + pr_devel("Doorbell notification from VM to VM not supported (src=0x%x, dst=0x%x)\n", + src_cid, handle.context); + result = VMCI_ERROR_DST_UNREACHABLE; + goto out; + } + + result = vmci_dbell_get_priv_flags(handle, &dst_priv_flags); + if (result < VMCI_SUCCESS) { + pr_warn("Failed to get privilege flags for destination (handle=0x%x:0x%x)\n", + handle.context, handle.resource); + goto out; + } + + if (src_cid != VMCI_HOST_CONTEXT_ID || + src_priv_flags == VMCI_NO_PRIVILEGE_FLAGS) { + src_priv_flags = vmci_context_get_priv_flags(src_cid); + } + + if (vmci_deny_interaction(src_priv_flags, dst_priv_flags)) { + result = VMCI_ERROR_NO_ACCESS; + goto out; + } + } + + if (handle.context == VMCI_HOST_CONTEXT_ID) { + result = vmci_dbell_host_context_notify(src_cid, handle); + } else { + spin_lock(&dst_context->lock); + + if (!vmci_handle_arr_has_entry(dst_context->doorbell_array, + handle)) { + result = VMCI_ERROR_NOT_FOUND; + } else { + if (!vmci_handle_arr_has_entry( + dst_context->pending_doorbell_array, + handle)) { + result = vmci_handle_arr_append_entry( + &dst_context->pending_doorbell_array, + handle); + if (result == VMCI_SUCCESS) { + ctx_signal_notify(dst_context); + wake_up(&dst_context->host_context.wait_queue); + } + } else { + result = VMCI_SUCCESS; + } + } + spin_unlock(&dst_context->lock); + } + + out: + vmci_ctx_put(dst_context); + + return result; +} + +bool vmci_ctx_supports_host_qp(struct vmci_ctx *context) +{ + return context && context->user_version >= VMCI_VERSION_HOSTQP; +} + +/* + * Registers that a new queue pair handle has been allocated by + * the context. + */ +int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle) +{ + int result; + + if (context == NULL || vmci_handle_is_invalid(handle)) + return VMCI_ERROR_INVALID_ARGS; + + if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle)) + result = vmci_handle_arr_append_entry( + &context->queue_pair_array, handle); + else + result = VMCI_ERROR_DUPLICATE_ENTRY; + + return result; +} + +/* + * Unregisters a queue pair handle that was previously registered + * with vmci_ctx_qp_create. + */ +int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle) +{ + struct vmci_handle hndl; + + if (context == NULL || vmci_handle_is_invalid(handle)) + return VMCI_ERROR_INVALID_ARGS; + + hndl = vmci_handle_arr_remove_entry(context->queue_pair_array, handle); + + return vmci_handle_is_invalid(hndl) ? + VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS; +} + +/* + * Determines whether a given queue pair handle is registered + * with the given context. + */ +bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle) +{ + if (context == NULL || vmci_handle_is_invalid(handle)) + return false; + + return vmci_handle_arr_has_entry(context->queue_pair_array, handle); +} + +/* + * vmci_context_get_priv_flags() - Retrieve privilege flags. + * @context_id: The context ID of the VMCI context. + * + * Retrieves privilege flags of the given VMCI context ID. + */ +u32 vmci_context_get_priv_flags(u32 context_id) +{ + if (vmci_host_code_active()) { + u32 flags; + struct vmci_ctx *context; + + context = vmci_ctx_get(context_id); + if (!context) + return VMCI_LEAST_PRIVILEGE_FLAGS; + + flags = context->priv_flags; + vmci_ctx_put(context); + return flags; + } + return VMCI_NO_PRIVILEGE_FLAGS; +} +EXPORT_SYMBOL_GPL(vmci_context_get_priv_flags); + +/* + * vmci_is_context_owner() - Determimnes if user is the context owner + * @context_id: The context ID of the VMCI context. + * @uid: The host user id (real kernel value). + * + * Determines whether a given UID is the owner of given VMCI context. + */ +bool vmci_is_context_owner(u32 context_id, kuid_t uid) +{ + bool is_owner = false; + + if (vmci_host_code_active()) { + struct vmci_ctx *context = vmci_ctx_get(context_id); + if (context) { + if (context->cred) + is_owner = uid_eq(context->cred->uid, uid); + vmci_ctx_put(context); + } + } + + return is_owner; +} +EXPORT_SYMBOL_GPL(vmci_is_context_owner); diff --git a/drivers/misc/vmw_vmci/vmci_context.h b/drivers/misc/vmw_vmci/vmci_context.h new file mode 100644 index 000000000..24a88e68a --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_context.h @@ -0,0 +1,182 @@ +/* + * VMware VMCI driver (vmciContext.h) + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef _VMCI_CONTEXT_H_ +#define _VMCI_CONTEXT_H_ + +#include +#include +#include +#include +#include + +#include "vmci_handle_array.h" +#include "vmci_datagram.h" + +/* Used to determine what checkpoint state to get and set. */ +enum { + VMCI_NOTIFICATION_CPT_STATE = 1, + VMCI_WELLKNOWN_CPT_STATE = 2, + VMCI_DG_OUT_STATE = 3, + VMCI_DG_IN_STATE = 4, + VMCI_DG_IN_SIZE_STATE = 5, + VMCI_DOORBELL_CPT_STATE = 6, +}; + +/* Host specific struct used for signalling */ +struct vmci_host { + wait_queue_head_t wait_queue; +}; + +struct vmci_handle_list { + struct list_head node; + struct vmci_handle handle; +}; + +struct vmci_ctx { + struct list_head list_item; /* For global VMCI list. */ + u32 cid; + struct kref kref; + struct list_head datagram_queue; /* Head of per VM queue. */ + u32 pending_datagrams; + size_t datagram_queue_size; /* Size of datagram queue in bytes. */ + + /* + * Version of the code that created + * this context; e.g., VMX. + */ + int user_version; + spinlock_t lock; /* Locks callQueue and handle_arrays. */ + + /* + * queue_pairs attached to. The array of + * handles for queue pairs is accessed + * from the code for QP API, and there + * it is protected by the QP lock. It + * is also accessed from the context + * clean up path, which does not + * require a lock. VMCILock is not + * used to protect the QP array field. + */ + struct vmci_handle_arr *queue_pair_array; + + /* Doorbells created by context. */ + struct vmci_handle_arr *doorbell_array; + + /* Doorbells pending for context. */ + struct vmci_handle_arr *pending_doorbell_array; + + /* Contexts current context is subscribing to. */ + struct list_head notifier_list; + unsigned int n_notifiers; + + struct vmci_host host_context; + u32 priv_flags; + + const struct cred *cred; + bool *notify; /* Notify flag pointer - hosted only. */ + struct page *notify_page; /* Page backing the notify UVA. */ +}; + +/* VMCINotifyAddRemoveInfo: Used to add/remove remote context notifications. */ +struct vmci_ctx_info { + u32 remote_cid; + int result; +}; + +/* VMCICptBufInfo: Used to set/get current context's checkpoint state. */ +struct vmci_ctx_chkpt_buf_info { + u64 cpt_buf; + u32 cpt_type; + u32 buf_size; + s32 result; + u32 _pad; +}; + +/* + * VMCINotificationReceiveInfo: Used to recieve pending notifications + * for doorbells and queue pairs. + */ +struct vmci_ctx_notify_recv_info { + u64 db_handle_buf_uva; + u64 db_handle_buf_size; + u64 qp_handle_buf_uva; + u64 qp_handle_buf_size; + s32 result; + u32 _pad; +}; + +/* + * Utilility function that checks whether two entities are allowed + * to interact. If one of them is restricted, the other one must + * be trusted. + */ +static inline bool vmci_deny_interaction(u32 part_one, u32 part_two) +{ + return ((part_one & VMCI_PRIVILEGE_FLAG_RESTRICTED) && + !(part_two & VMCI_PRIVILEGE_FLAG_TRUSTED)) || + ((part_two & VMCI_PRIVILEGE_FLAG_RESTRICTED) && + !(part_one & VMCI_PRIVILEGE_FLAG_TRUSTED)); +} + +struct vmci_ctx *vmci_ctx_create(u32 cid, u32 flags, + uintptr_t event_hnd, int version, + const struct cred *cred); +void vmci_ctx_destroy(struct vmci_ctx *context); + +bool vmci_ctx_supports_host_qp(struct vmci_ctx *context); +int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg); +int vmci_ctx_dequeue_datagram(struct vmci_ctx *context, + size_t *max_size, struct vmci_datagram **dg); +int vmci_ctx_pending_datagrams(u32 cid, u32 *pending); +struct vmci_ctx *vmci_ctx_get(u32 cid); +void vmci_ctx_put(struct vmci_ctx *context); +bool vmci_ctx_exists(u32 cid); + +int vmci_ctx_add_notification(u32 context_id, u32 remote_cid); +int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid); +int vmci_ctx_get_chkpt_state(u32 context_id, u32 cpt_type, + u32 *num_cids, void **cpt_buf_ptr); +int vmci_ctx_set_chkpt_state(u32 context_id, u32 cpt_type, + u32 num_cids, void *cpt_buf); + +int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle); +int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle); +bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle); + +void vmci_ctx_check_signal_notify(struct vmci_ctx *context); +void vmci_ctx_unset_notify(struct vmci_ctx *context); + +int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle); +int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle); +int vmci_ctx_dbell_destroy_all(u32 context_id); +int vmci_ctx_notify_dbell(u32 cid, struct vmci_handle handle, + u32 src_priv_flags); + +int vmci_ctx_rcv_notifications_get(u32 context_id, struct vmci_handle_arr + **db_handle_array, struct vmci_handle_arr + **qp_handle_array); +void vmci_ctx_rcv_notifications_release(u32 context_id, struct vmci_handle_arr + *db_handle_array, struct vmci_handle_arr + *qp_handle_array, bool success); + +static inline u32 vmci_ctx_get_id(struct vmci_ctx *context) +{ + if (!context) + return VMCI_INVALID_ID; + return context->cid; +} + +#endif /* _VMCI_CONTEXT_H_ */ diff --git a/drivers/misc/vmw_vmci/vmci_datagram.c b/drivers/misc/vmw_vmci/vmci_datagram.c new file mode 100644 index 000000000..8a4b6bbe1 --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_datagram.c @@ -0,0 +1,502 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include "vmci_datagram.h" +#include "vmci_resource.h" +#include "vmci_context.h" +#include "vmci_driver.h" +#include "vmci_event.h" +#include "vmci_route.h" + +/* + * struct datagram_entry describes the datagram entity. It is used for datagram + * entities created only on the host. + */ +struct datagram_entry { + struct vmci_resource resource; + u32 flags; + bool run_delayed; + vmci_datagram_recv_cb recv_cb; + void *client_data; + u32 priv_flags; +}; + +struct delayed_datagram_info { + struct datagram_entry *entry; + struct work_struct work; + bool in_dg_host_queue; + /* msg and msg_payload must be together. */ + struct vmci_datagram msg; + u8 msg_payload[]; +}; + +/* Number of in-flight host->host datagrams */ +static atomic_t delayed_dg_host_queue_size = ATOMIC_INIT(0); + +/* + * Create a datagram entry given a handle pointer. + */ +static int dg_create_handle(u32 resource_id, + u32 flags, + u32 priv_flags, + vmci_datagram_recv_cb recv_cb, + void *client_data, struct vmci_handle *out_handle) +{ + int result; + u32 context_id; + struct vmci_handle handle; + struct datagram_entry *entry; + + if ((flags & VMCI_FLAG_WELLKNOWN_DG_HND) != 0) + return VMCI_ERROR_INVALID_ARGS; + + if ((flags & VMCI_FLAG_ANYCID_DG_HND) != 0) { + context_id = VMCI_INVALID_ID; + } else { + context_id = vmci_get_context_id(); + if (context_id == VMCI_INVALID_ID) + return VMCI_ERROR_NO_RESOURCES; + } + + handle = vmci_make_handle(context_id, resource_id); + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + pr_warn("Failed allocating memory for datagram entry\n"); + return VMCI_ERROR_NO_MEM; + } + + entry->run_delayed = (flags & VMCI_FLAG_DG_DELAYED_CB) ? true : false; + entry->flags = flags; + entry->recv_cb = recv_cb; + entry->client_data = client_data; + entry->priv_flags = priv_flags; + + /* Make datagram resource live. */ + result = vmci_resource_add(&entry->resource, + VMCI_RESOURCE_TYPE_DATAGRAM, + handle); + if (result != VMCI_SUCCESS) { + pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d\n", + handle.context, handle.resource, result); + kfree(entry); + return result; + } + + *out_handle = vmci_resource_handle(&entry->resource); + return VMCI_SUCCESS; +} + +/* + * Internal utility function with the same purpose as + * vmci_datagram_get_priv_flags that also takes a context_id. + */ +static int vmci_datagram_get_priv_flags(u32 context_id, + struct vmci_handle handle, + u32 *priv_flags) +{ + if (context_id == VMCI_INVALID_ID) + return VMCI_ERROR_INVALID_ARGS; + + if (context_id == VMCI_HOST_CONTEXT_ID) { + struct datagram_entry *src_entry; + struct vmci_resource *resource; + + resource = vmci_resource_by_handle(handle, + VMCI_RESOURCE_TYPE_DATAGRAM); + if (!resource) + return VMCI_ERROR_INVALID_ARGS; + + src_entry = container_of(resource, struct datagram_entry, + resource); + *priv_flags = src_entry->priv_flags; + vmci_resource_put(resource); + } else if (context_id == VMCI_HYPERVISOR_CONTEXT_ID) + *priv_flags = VMCI_MAX_PRIVILEGE_FLAGS; + else + *priv_flags = vmci_context_get_priv_flags(context_id); + + return VMCI_SUCCESS; +} + +/* + * Calls the specified callback in a delayed context. + */ +static void dg_delayed_dispatch(struct work_struct *work) +{ + struct delayed_datagram_info *dg_info = + container_of(work, struct delayed_datagram_info, work); + + dg_info->entry->recv_cb(dg_info->entry->client_data, &dg_info->msg); + + vmci_resource_put(&dg_info->entry->resource); + + if (dg_info->in_dg_host_queue) + atomic_dec(&delayed_dg_host_queue_size); + + kfree(dg_info); +} + +/* + * Dispatch datagram as a host, to the host, or other vm context. This + * function cannot dispatch to hypervisor context handlers. This should + * have been handled before we get here by vmci_datagram_dispatch. + * Returns number of bytes sent on success, error code otherwise. + */ +static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg) +{ + int retval; + size_t dg_size; + u32 src_priv_flags; + + dg_size = VMCI_DG_SIZE(dg); + + /* Host cannot send to the hypervisor. */ + if (dg->dst.context == VMCI_HYPERVISOR_CONTEXT_ID) + return VMCI_ERROR_DST_UNREACHABLE; + + /* Check that source handle matches sending context. */ + if (dg->src.context != context_id) { + pr_devel("Sender context (ID=0x%x) is not owner of src datagram entry (handle=0x%x:0x%x)\n", + context_id, dg->src.context, dg->src.resource); + return VMCI_ERROR_NO_ACCESS; + } + + /* Get hold of privileges of sending endpoint. */ + retval = vmci_datagram_get_priv_flags(context_id, dg->src, + &src_priv_flags); + if (retval != VMCI_SUCCESS) { + pr_warn("Couldn't get privileges (handle=0x%x:0x%x)\n", + dg->src.context, dg->src.resource); + return retval; + } + + /* Determine if we should route to host or guest destination. */ + if (dg->dst.context == VMCI_HOST_CONTEXT_ID) { + /* Route to host datagram entry. */ + struct datagram_entry *dst_entry; + struct vmci_resource *resource; + + if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID && + dg->dst.resource == VMCI_EVENT_HANDLER) { + return vmci_event_dispatch(dg); + } + + resource = vmci_resource_by_handle(dg->dst, + VMCI_RESOURCE_TYPE_DATAGRAM); + if (!resource) { + pr_devel("Sending to invalid destination (handle=0x%x:0x%x)\n", + dg->dst.context, dg->dst.resource); + return VMCI_ERROR_INVALID_RESOURCE; + } + dst_entry = container_of(resource, struct datagram_entry, + resource); + if (vmci_deny_interaction(src_priv_flags, + dst_entry->priv_flags)) { + vmci_resource_put(resource); + return VMCI_ERROR_NO_ACCESS; + } + + /* + * If a VMCI datagram destined for the host is also sent by the + * host, we always run it delayed. This ensures that no locks + * are held when the datagram callback runs. + */ + if (dst_entry->run_delayed || + dg->src.context == VMCI_HOST_CONTEXT_ID) { + struct delayed_datagram_info *dg_info; + + if (atomic_add_return(1, &delayed_dg_host_queue_size) + == VMCI_MAX_DELAYED_DG_HOST_QUEUE_SIZE) { + atomic_dec(&delayed_dg_host_queue_size); + vmci_resource_put(resource); + return VMCI_ERROR_NO_MEM; + } + + dg_info = kmalloc(sizeof(*dg_info) + + (size_t) dg->payload_size, GFP_ATOMIC); + if (!dg_info) { + atomic_dec(&delayed_dg_host_queue_size); + vmci_resource_put(resource); + return VMCI_ERROR_NO_MEM; + } + + dg_info->in_dg_host_queue = true; + dg_info->entry = dst_entry; + memcpy(&dg_info->msg, dg, dg_size); + + INIT_WORK(&dg_info->work, dg_delayed_dispatch); + schedule_work(&dg_info->work); + retval = VMCI_SUCCESS; + + } else { + retval = dst_entry->recv_cb(dst_entry->client_data, dg); + vmci_resource_put(resource); + if (retval < VMCI_SUCCESS) + return retval; + } + } else { + /* Route to destination VM context. */ + struct vmci_datagram *new_dg; + + if (context_id != dg->dst.context) { + if (vmci_deny_interaction(src_priv_flags, + vmci_context_get_priv_flags + (dg->dst.context))) { + return VMCI_ERROR_NO_ACCESS; + } else if (VMCI_CONTEXT_IS_VM(context_id)) { + /* + * If the sending context is a VM, it + * cannot reach another VM. + */ + + pr_devel("Datagram communication between VMs not supported (src=0x%x, dst=0x%x)\n", + context_id, dg->dst.context); + return VMCI_ERROR_DST_UNREACHABLE; + } + } + + /* We make a copy to enqueue. */ + new_dg = kmemdup(dg, dg_size, GFP_KERNEL); + if (new_dg == NULL) + return VMCI_ERROR_NO_MEM; + + retval = vmci_ctx_enqueue_datagram(dg->dst.context, new_dg); + if (retval < VMCI_SUCCESS) { + kfree(new_dg); + return retval; + } + } + + /* + * We currently truncate the size to signed 32 bits. This doesn't + * matter for this handler as it only support 4Kb messages. + */ + return (int)dg_size; +} + +/* + * Dispatch datagram as a guest, down through the VMX and potentially to + * the host. + * Returns number of bytes sent on success, error code otherwise. + */ +static int dg_dispatch_as_guest(struct vmci_datagram *dg) +{ + int retval; + struct vmci_resource *resource; + + resource = vmci_resource_by_handle(dg->src, + VMCI_RESOURCE_TYPE_DATAGRAM); + if (!resource) + return VMCI_ERROR_NO_HANDLE; + + retval = vmci_send_datagram(dg); + vmci_resource_put(resource); + return retval; +} + +/* + * Dispatch datagram. This will determine the routing for the datagram + * and dispatch it accordingly. + * Returns number of bytes sent on success, error code otherwise. + */ +int vmci_datagram_dispatch(u32 context_id, + struct vmci_datagram *dg, bool from_guest) +{ + int retval; + enum vmci_route route; + + BUILD_BUG_ON(sizeof(struct vmci_datagram) != 24); + + if (dg->payload_size > VMCI_MAX_DG_SIZE || + VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) { + pr_devel("Payload (size=%llu bytes) too big to send\n", + (unsigned long long)dg->payload_size); + return VMCI_ERROR_INVALID_ARGS; + } + + retval = vmci_route(&dg->src, &dg->dst, from_guest, &route); + if (retval < VMCI_SUCCESS) { + pr_devel("Failed to route datagram (src=0x%x, dst=0x%x, err=%d)\n", + dg->src.context, dg->dst.context, retval); + return retval; + } + + if (VMCI_ROUTE_AS_HOST == route) { + if (VMCI_INVALID_ID == context_id) + context_id = VMCI_HOST_CONTEXT_ID; + return dg_dispatch_as_host(context_id, dg); + } + + if (VMCI_ROUTE_AS_GUEST == route) + return dg_dispatch_as_guest(dg); + + pr_warn("Unknown route (%d) for datagram\n", route); + return VMCI_ERROR_DST_UNREACHABLE; +} + +/* + * Invoke the handler for the given datagram. This is intended to be + * called only when acting as a guest and receiving a datagram from the + * virtual device. + */ +int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg) +{ + struct vmci_resource *resource; + struct datagram_entry *dst_entry; + + resource = vmci_resource_by_handle(dg->dst, + VMCI_RESOURCE_TYPE_DATAGRAM); + if (!resource) { + pr_devel("destination (handle=0x%x:0x%x) doesn't exist\n", + dg->dst.context, dg->dst.resource); + return VMCI_ERROR_NO_HANDLE; + } + + dst_entry = container_of(resource, struct datagram_entry, resource); + if (dst_entry->run_delayed) { + struct delayed_datagram_info *dg_info; + + dg_info = kmalloc(sizeof(*dg_info) + (size_t)dg->payload_size, + GFP_ATOMIC); + if (!dg_info) { + vmci_resource_put(resource); + return VMCI_ERROR_NO_MEM; + } + + dg_info->in_dg_host_queue = false; + dg_info->entry = dst_entry; + memcpy(&dg_info->msg, dg, VMCI_DG_SIZE(dg)); + + INIT_WORK(&dg_info->work, dg_delayed_dispatch); + schedule_work(&dg_info->work); + } else { + dst_entry->recv_cb(dst_entry->client_data, dg); + vmci_resource_put(resource); + } + + return VMCI_SUCCESS; +} + +/* + * vmci_datagram_create_handle_priv() - Create host context datagram endpoint + * @resource_id: The resource ID. + * @flags: Datagram Flags. + * @priv_flags: Privilege Flags. + * @recv_cb: Callback when receiving datagrams. + * @client_data: Pointer for a datagram_entry struct + * @out_handle: vmci_handle that is populated as a result of this function. + * + * Creates a host context datagram endpoint and returns a handle to it. + */ +int vmci_datagram_create_handle_priv(u32 resource_id, + u32 flags, + u32 priv_flags, + vmci_datagram_recv_cb recv_cb, + void *client_data, + struct vmci_handle *out_handle) +{ + if (out_handle == NULL) + return VMCI_ERROR_INVALID_ARGS; + + if (recv_cb == NULL) { + pr_devel("Client callback needed when creating datagram\n"); + return VMCI_ERROR_INVALID_ARGS; + } + + if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) + return VMCI_ERROR_INVALID_ARGS; + + return dg_create_handle(resource_id, flags, priv_flags, recv_cb, + client_data, out_handle); +} +EXPORT_SYMBOL_GPL(vmci_datagram_create_handle_priv); + +/* + * vmci_datagram_create_handle() - Create host context datagram endpoint + * @resource_id: Resource ID. + * @flags: Datagram Flags. + * @recv_cb: Callback when receiving datagrams. + * @client_ata: Pointer for a datagram_entry struct + * @out_handle: vmci_handle that is populated as a result of this function. + * + * Creates a host context datagram endpoint and returns a handle to + * it. Same as vmci_datagram_create_handle_priv without the priviledge + * flags argument. + */ +int vmci_datagram_create_handle(u32 resource_id, + u32 flags, + vmci_datagram_recv_cb recv_cb, + void *client_data, + struct vmci_handle *out_handle) +{ + return vmci_datagram_create_handle_priv( + resource_id, flags, + VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS, + recv_cb, client_data, + out_handle); +} +EXPORT_SYMBOL_GPL(vmci_datagram_create_handle); + +/* + * vmci_datagram_destroy_handle() - Destroys datagram handle + * @handle: vmci_handle to be destroyed and reaped. + * + * Use this function to destroy any datagram handles created by + * vmci_datagram_create_handle{,Priv} functions. + */ +int vmci_datagram_destroy_handle(struct vmci_handle handle) +{ + struct datagram_entry *entry; + struct vmci_resource *resource; + + resource = vmci_resource_by_handle(handle, VMCI_RESOURCE_TYPE_DATAGRAM); + if (!resource) { + pr_devel("Failed to destroy datagram (handle=0x%x:0x%x)\n", + handle.context, handle.resource); + return VMCI_ERROR_NOT_FOUND; + } + + entry = container_of(resource, struct datagram_entry, resource); + + vmci_resource_put(&entry->resource); + vmci_resource_remove(&entry->resource); + kfree(entry); + + return VMCI_SUCCESS; +} +EXPORT_SYMBOL_GPL(vmci_datagram_destroy_handle); + +/* + * vmci_datagram_send() - Send a datagram + * @msg: The datagram to send. + * + * Sends the provided datagram on its merry way. + */ +int vmci_datagram_send(struct vmci_datagram *msg) +{ + if (msg == NULL) + return VMCI_ERROR_INVALID_ARGS; + + return vmci_datagram_dispatch(VMCI_INVALID_ID, msg, false); +} +EXPORT_SYMBOL_GPL(vmci_datagram_send); diff --git a/drivers/misc/vmw_vmci/vmci_datagram.h b/drivers/misc/vmw_vmci/vmci_datagram.h new file mode 100644 index 000000000..eb4aab7f6 --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_datagram.h @@ -0,0 +1,52 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef _VMCI_DATAGRAM_H_ +#define _VMCI_DATAGRAM_H_ + +#include +#include + +#include "vmci_context.h" + +#define VMCI_MAX_DELAYED_DG_HOST_QUEUE_SIZE 256 + +/* + * The struct vmci_datagram_queue_entry is a queue header for the in-kernel VMCI + * datagram queues. It is allocated in non-paged memory, as the + * content is accessed while holding a spinlock. The pending datagram + * itself may be allocated from paged memory. We shadow the size of + * the datagram in the non-paged queue entry as this size is used + * while holding the same spinlock as above. + */ +struct vmci_datagram_queue_entry { + struct list_head list_item; /* For queuing. */ + size_t dg_size; /* Size of datagram. */ + struct vmci_datagram *dg; /* Pending datagram. */ +}; + +/* VMCIDatagramSendRecvInfo */ +struct vmci_datagram_snd_rcv_info { + u64 addr; + u32 len; + s32 result; +}; + +/* Datagram API for non-public use. */ +int vmci_datagram_dispatch(u32 context_id, struct vmci_datagram *dg, + bool from_guest); +int vmci_datagram_invoke_guest_handler(struct vmci_datagram *dg); + +#endif /* _VMCI_DATAGRAM_H_ */ diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.c b/drivers/misc/vmw_vmci/vmci_doorbell.c new file mode 100644 index 000000000..458121034 --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_doorbell.c @@ -0,0 +1,609 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vmci_datagram.h" +#include "vmci_doorbell.h" +#include "vmci_resource.h" +#include "vmci_driver.h" +#include "vmci_route.h" + + +#define VMCI_DOORBELL_INDEX_BITS 6 +#define VMCI_DOORBELL_INDEX_TABLE_SIZE (1 << VMCI_DOORBELL_INDEX_BITS) +#define VMCI_DOORBELL_HASH(_idx) hash_32(_idx, VMCI_DOORBELL_INDEX_BITS) + +/* + * DoorbellEntry describes the a doorbell notification handle allocated by the + * host. + */ +struct dbell_entry { + struct vmci_resource resource; + struct hlist_node node; + struct work_struct work; + vmci_callback notify_cb; + void *client_data; + u32 idx; + u32 priv_flags; + bool run_delayed; + atomic_t active; /* Only used by guest personality */ +}; + +/* The VMCI index table keeps track of currently registered doorbells. */ +struct dbell_index_table { + spinlock_t lock; /* Index table lock */ + struct hlist_head entries[VMCI_DOORBELL_INDEX_TABLE_SIZE]; +}; + +static struct dbell_index_table vmci_doorbell_it = { + .lock = __SPIN_LOCK_UNLOCKED(vmci_doorbell_it.lock), +}; + +/* + * The max_notify_idx is one larger than the currently known bitmap index in + * use, and is used to determine how much of the bitmap needs to be scanned. + */ +static u32 max_notify_idx; + +/* + * The notify_idx_count is used for determining whether there are free entries + * within the bitmap (if notify_idx_count + 1 < max_notify_idx). + */ +static u32 notify_idx_count; + +/* + * The last_notify_idx_reserved is used to track the last index handed out - in + * the case where multiple handles share a notification index, we hand out + * indexes round robin based on last_notify_idx_reserved. + */ +static u32 last_notify_idx_reserved; + +/* This is a one entry cache used to by the index allocation. */ +static u32 last_notify_idx_released = PAGE_SIZE; + + +/* + * Utility function that retrieves the privilege flags associated + * with a given doorbell handle. For guest endpoints, the + * privileges are determined by the context ID, but for host + * endpoints privileges are associated with the complete + * handle. Hypervisor endpoints are not yet supported. + */ +int vmci_dbell_get_priv_flags(struct vmci_handle handle, u32 *priv_flags) +{ + if (priv_flags == NULL || handle.context == VMCI_INVALID_ID) + return VMCI_ERROR_INVALID_ARGS; + + if (handle.context == VMCI_HOST_CONTEXT_ID) { + struct dbell_entry *entry; + struct vmci_resource *resource; + + resource = vmci_resource_by_handle(handle, + VMCI_RESOURCE_TYPE_DOORBELL); + if (!resource) + return VMCI_ERROR_NOT_FOUND; + + entry = container_of(resource, struct dbell_entry, resource); + *priv_flags = entry->priv_flags; + vmci_resource_put(resource); + } else if (handle.context == VMCI_HYPERVISOR_CONTEXT_ID) { + /* + * Hypervisor endpoints for notifications are not + * supported (yet). + */ + return VMCI_ERROR_INVALID_ARGS; + } else { + *priv_flags = vmci_context_get_priv_flags(handle.context); + } + + return VMCI_SUCCESS; +} + +/* + * Find doorbell entry by bitmap index. + */ +static struct dbell_entry *dbell_index_table_find(u32 idx) +{ + u32 bucket = VMCI_DOORBELL_HASH(idx); + struct dbell_entry *dbell; + + hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket], + node) { + if (idx == dbell->idx) + return dbell; + } + + return NULL; +} + +/* + * Add the given entry to the index table. This willi take a reference to the + * entry's resource so that the entry is not deleted before it is removed from + * the * table. + */ +static void dbell_index_table_add(struct dbell_entry *entry) +{ + u32 bucket; + u32 new_notify_idx; + + vmci_resource_get(&entry->resource); + + spin_lock_bh(&vmci_doorbell_it.lock); + + /* + * Below we try to allocate an index in the notification + * bitmap with "not too much" sharing between resources. If we + * use less that the full bitmap, we either add to the end if + * there are no unused flags within the currently used area, + * or we search for unused ones. If we use the full bitmap, we + * allocate the index round robin. + */ + if (max_notify_idx < PAGE_SIZE || notify_idx_count < PAGE_SIZE) { + if (last_notify_idx_released < max_notify_idx && + !dbell_index_table_find(last_notify_idx_released)) { + new_notify_idx = last_notify_idx_released; + last_notify_idx_released = PAGE_SIZE; + } else { + bool reused = false; + new_notify_idx = last_notify_idx_reserved; + if (notify_idx_count + 1 < max_notify_idx) { + do { + if (!dbell_index_table_find + (new_notify_idx)) { + reused = true; + break; + } + new_notify_idx = (new_notify_idx + 1) % + max_notify_idx; + } while (new_notify_idx != + last_notify_idx_released); + } + if (!reused) { + new_notify_idx = max_notify_idx; + max_notify_idx++; + } + } + } else { + new_notify_idx = (last_notify_idx_reserved + 1) % PAGE_SIZE; + } + + last_notify_idx_reserved = new_notify_idx; + notify_idx_count++; + + entry->idx = new_notify_idx; + bucket = VMCI_DOORBELL_HASH(entry->idx); + hlist_add_head(&entry->node, &vmci_doorbell_it.entries[bucket]); + + spin_unlock_bh(&vmci_doorbell_it.lock); +} + +/* + * Remove the given entry from the index table. This will release() the + * entry's resource. + */ +static void dbell_index_table_remove(struct dbell_entry *entry) +{ + spin_lock_bh(&vmci_doorbell_it.lock); + + hlist_del_init(&entry->node); + + notify_idx_count--; + if (entry->idx == max_notify_idx - 1) { + /* + * If we delete an entry with the maximum known + * notification index, we take the opportunity to + * prune the current max. As there might be other + * unused indices immediately below, we lower the + * maximum until we hit an index in use. + */ + while (max_notify_idx > 0 && + !dbell_index_table_find(max_notify_idx - 1)) + max_notify_idx--; + } + + last_notify_idx_released = entry->idx; + + spin_unlock_bh(&vmci_doorbell_it.lock); + + vmci_resource_put(&entry->resource); +} + +/* + * Creates a link between the given doorbell handle and the given + * index in the bitmap in the device backend. A notification state + * is created in hypervisor. + */ +static int dbell_link(struct vmci_handle handle, u32 notify_idx) +{ + struct vmci_doorbell_link_msg link_msg; + + link_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_DOORBELL_LINK); + link_msg.hdr.src = VMCI_ANON_SRC_HANDLE; + link_msg.hdr.payload_size = sizeof(link_msg) - VMCI_DG_HEADERSIZE; + link_msg.handle = handle; + link_msg.notify_idx = notify_idx; + + return vmci_send_datagram(&link_msg.hdr); +} + +/* + * Unlinks the given doorbell handle from an index in the bitmap in + * the device backend. The notification state is destroyed in hypervisor. + */ +static int dbell_unlink(struct vmci_handle handle) +{ + struct vmci_doorbell_unlink_msg unlink_msg; + + unlink_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_DOORBELL_UNLINK); + unlink_msg.hdr.src = VMCI_ANON_SRC_HANDLE; + unlink_msg.hdr.payload_size = sizeof(unlink_msg) - VMCI_DG_HEADERSIZE; + unlink_msg.handle = handle; + + return vmci_send_datagram(&unlink_msg.hdr); +} + +/* + * Notify another guest or the host. We send a datagram down to the + * host via the hypervisor with the notification info. + */ +static int dbell_notify_as_guest(struct vmci_handle handle, u32 priv_flags) +{ + struct vmci_doorbell_notify_msg notify_msg; + + notify_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_DOORBELL_NOTIFY); + notify_msg.hdr.src = VMCI_ANON_SRC_HANDLE; + notify_msg.hdr.payload_size = sizeof(notify_msg) - VMCI_DG_HEADERSIZE; + notify_msg.handle = handle; + + return vmci_send_datagram(¬ify_msg.hdr); +} + +/* + * Calls the specified callback in a delayed context. + */ +static void dbell_delayed_dispatch(struct work_struct *work) +{ + struct dbell_entry *entry = container_of(work, + struct dbell_entry, work); + + entry->notify_cb(entry->client_data); + vmci_resource_put(&entry->resource); +} + +/* + * Dispatches a doorbell notification to the host context. + */ +int vmci_dbell_host_context_notify(u32 src_cid, struct vmci_handle handle) +{ + struct dbell_entry *entry; + struct vmci_resource *resource; + + if (vmci_handle_is_invalid(handle)) { + pr_devel("Notifying an invalid doorbell (handle=0x%x:0x%x)\n", + handle.context, handle.resource); + return VMCI_ERROR_INVALID_ARGS; + } + + resource = vmci_resource_by_handle(handle, + VMCI_RESOURCE_TYPE_DOORBELL); + if (!resource) { + pr_devel("Notifying an unknown doorbell (handle=0x%x:0x%x)\n", + handle.context, handle.resource); + return VMCI_ERROR_NOT_FOUND; + } + + entry = container_of(resource, struct dbell_entry, resource); + if (entry->run_delayed) { + if (!schedule_work(&entry->work)) + vmci_resource_put(resource); + } else { + entry->notify_cb(entry->client_data); + vmci_resource_put(resource); + } + + return VMCI_SUCCESS; +} + +/* + * Register the notification bitmap with the host. + */ +bool vmci_dbell_register_notification_bitmap(u32 bitmap_ppn) +{ + int result; + struct vmci_notify_bm_set_msg bitmap_set_msg = { }; + + bitmap_set_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_SET_NOTIFY_BITMAP); + bitmap_set_msg.hdr.src = VMCI_ANON_SRC_HANDLE; + bitmap_set_msg.hdr.payload_size = sizeof(bitmap_set_msg) - + VMCI_DG_HEADERSIZE; + bitmap_set_msg.bitmap_ppn = bitmap_ppn; + + result = vmci_send_datagram(&bitmap_set_msg.hdr); + if (result != VMCI_SUCCESS) { + pr_devel("Failed to register (PPN=%u) as notification bitmap (error=%d)\n", + bitmap_ppn, result); + return false; + } + return true; +} + +/* + * Executes or schedules the handlers for a given notify index. + */ +static void dbell_fire_entries(u32 notify_idx) +{ + u32 bucket = VMCI_DOORBELL_HASH(notify_idx); + struct dbell_entry *dbell; + + spin_lock_bh(&vmci_doorbell_it.lock); + + hlist_for_each_entry(dbell, &vmci_doorbell_it.entries[bucket], node) { + if (dbell->idx == notify_idx && + atomic_read(&dbell->active) == 1) { + if (dbell->run_delayed) { + vmci_resource_get(&dbell->resource); + if (!schedule_work(&dbell->work)) + vmci_resource_put(&dbell->resource); + } else { + dbell->notify_cb(dbell->client_data); + } + } + } + + spin_unlock_bh(&vmci_doorbell_it.lock); +} + +/* + * Scans the notification bitmap, collects pending notifications, + * resets the bitmap and invokes appropriate callbacks. + */ +void vmci_dbell_scan_notification_entries(u8 *bitmap) +{ + u32 idx; + + for (idx = 0; idx < max_notify_idx; idx++) { + if (bitmap[idx] & 0x1) { + bitmap[idx] &= ~1; + dbell_fire_entries(idx); + } + } +} + +/* + * vmci_doorbell_create() - Creates a doorbell + * @handle: A handle used to track the resource. Can be invalid. + * @flags: Flag that determines context of callback. + * @priv_flags: Privileges flags. + * @notify_cb: The callback to be ivoked when the doorbell fires. + * @client_data: A parameter to be passed to the callback. + * + * Creates a doorbell with the given callback. If the handle is + * VMCI_INVALID_HANDLE, a free handle will be assigned, if + * possible. The callback can be run immediately (potentially with + * locks held - the default) or delayed (in a kernel thread) by + * specifying the flag VMCI_FLAG_DELAYED_CB. If delayed execution + * is selected, a given callback may not be run if the kernel is + * unable to allocate memory for the delayed execution (highly + * unlikely). + */ +int vmci_doorbell_create(struct vmci_handle *handle, + u32 flags, + u32 priv_flags, + vmci_callback notify_cb, void *client_data) +{ + struct dbell_entry *entry; + struct vmci_handle new_handle; + int result; + + if (!handle || !notify_cb || flags & ~VMCI_FLAG_DELAYED_CB || + priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) + return VMCI_ERROR_INVALID_ARGS; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (entry == NULL) { + pr_warn("Failed allocating memory for datagram entry\n"); + return VMCI_ERROR_NO_MEM; + } + + if (vmci_handle_is_invalid(*handle)) { + u32 context_id = vmci_get_context_id(); + + if (context_id == VMCI_INVALID_ID) { + pr_warn("Failed to get context ID\n"); + result = VMCI_ERROR_NO_RESOURCES; + goto free_mem; + } + + /* Let resource code allocate a free ID for us */ + new_handle = vmci_make_handle(context_id, VMCI_INVALID_ID); + } else { + bool valid_context = false; + + /* + * Validate the handle. We must do both of the checks below + * because we can be acting as both a host and a guest at the + * same time. We always allow the host context ID, since the + * host functionality is in practice always there with the + * unified driver. + */ + if (handle->context == VMCI_HOST_CONTEXT_ID || + (vmci_guest_code_active() && + vmci_get_context_id() == handle->context)) { + valid_context = true; + } + + if (!valid_context || handle->resource == VMCI_INVALID_ID) { + pr_devel("Invalid argument (handle=0x%x:0x%x)\n", + handle->context, handle->resource); + result = VMCI_ERROR_INVALID_ARGS; + goto free_mem; + } + + new_handle = *handle; + } + + entry->idx = 0; + INIT_HLIST_NODE(&entry->node); + entry->priv_flags = priv_flags; + INIT_WORK(&entry->work, dbell_delayed_dispatch); + entry->run_delayed = flags & VMCI_FLAG_DELAYED_CB; + entry->notify_cb = notify_cb; + entry->client_data = client_data; + atomic_set(&entry->active, 0); + + result = vmci_resource_add(&entry->resource, + VMCI_RESOURCE_TYPE_DOORBELL, + new_handle); + if (result != VMCI_SUCCESS) { + pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d\n", + new_handle.context, new_handle.resource, result); + goto free_mem; + } + + new_handle = vmci_resource_handle(&entry->resource); + if (vmci_guest_code_active()) { + dbell_index_table_add(entry); + result = dbell_link(new_handle, entry->idx); + if (VMCI_SUCCESS != result) + goto destroy_resource; + + atomic_set(&entry->active, 1); + } + + *handle = new_handle; + + return result; + + destroy_resource: + dbell_index_table_remove(entry); + vmci_resource_remove(&entry->resource); + free_mem: + kfree(entry); + return result; +} +EXPORT_SYMBOL_GPL(vmci_doorbell_create); + +/* + * vmci_doorbell_destroy() - Destroy a doorbell. + * @handle: The handle tracking the resource. + * + * Destroys a doorbell previously created with vmcii_doorbell_create. This + * operation may block waiting for a callback to finish. + */ +int vmci_doorbell_destroy(struct vmci_handle handle) +{ + struct dbell_entry *entry; + struct vmci_resource *resource; + + if (vmci_handle_is_invalid(handle)) + return VMCI_ERROR_INVALID_ARGS; + + resource = vmci_resource_by_handle(handle, + VMCI_RESOURCE_TYPE_DOORBELL); + if (!resource) { + pr_devel("Failed to destroy doorbell (handle=0x%x:0x%x)\n", + handle.context, handle.resource); + return VMCI_ERROR_NOT_FOUND; + } + + entry = container_of(resource, struct dbell_entry, resource); + + if (!hlist_unhashed(&entry->node)) { + int result; + + dbell_index_table_remove(entry); + + result = dbell_unlink(handle); + if (VMCI_SUCCESS != result) { + + /* + * The only reason this should fail would be + * an inconsistency between guest and + * hypervisor state, where the guest believes + * it has an active registration whereas the + * hypervisor doesn't. One case where this may + * happen is if a doorbell is unregistered + * following a hibernation at a time where the + * doorbell state hasn't been restored on the + * hypervisor side yet. Since the handle has + * now been removed in the guest, we just + * print a warning and return success. + */ + pr_devel("Unlink of doorbell (handle=0x%x:0x%x) unknown by hypervisor (error=%d)\n", + handle.context, handle.resource, result); + } + } + + /* + * Now remove the resource from the table. It might still be in use + * after this, in a callback or still on the delayed work queue. + */ + vmci_resource_put(&entry->resource); + vmci_resource_remove(&entry->resource); + + kfree(entry); + + return VMCI_SUCCESS; +} +EXPORT_SYMBOL_GPL(vmci_doorbell_destroy); + +/* + * vmci_doorbell_notify() - Ring the doorbell (and hide in the bushes). + * @dst: The handlle identifying the doorbell resource + * @priv_flags: Priviledge flags. + * + * Generates a notification on the doorbell identified by the + * handle. For host side generation of notifications, the caller + * can specify what the privilege of the calling side is. + */ +int vmci_doorbell_notify(struct vmci_handle dst, u32 priv_flags) +{ + int retval; + enum vmci_route route; + struct vmci_handle src; + + if (vmci_handle_is_invalid(dst) || + (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS)) + return VMCI_ERROR_INVALID_ARGS; + + src = VMCI_INVALID_HANDLE; + retval = vmci_route(&src, &dst, false, &route); + if (retval < VMCI_SUCCESS) + return retval; + + if (VMCI_ROUTE_AS_HOST == route) + return vmci_ctx_notify_dbell(VMCI_HOST_CONTEXT_ID, + dst, priv_flags); + + if (VMCI_ROUTE_AS_GUEST == route) + return dbell_notify_as_guest(dst, priv_flags); + + pr_warn("Unknown route (%d) for doorbell\n", route); + return VMCI_ERROR_DST_UNREACHABLE; +} +EXPORT_SYMBOL_GPL(vmci_doorbell_notify); diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.h b/drivers/misc/vmw_vmci/vmci_doorbell.h new file mode 100644 index 000000000..e4c0b1748 --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_doorbell.h @@ -0,0 +1,51 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef VMCI_DOORBELL_H +#define VMCI_DOORBELL_H + +#include +#include + +#include "vmci_driver.h" + +/* + * VMCINotifyResourceInfo: Used to create and destroy doorbells, and + * generate a notification for a doorbell or queue pair. + */ +struct vmci_dbell_notify_resource_info { + struct vmci_handle handle; + u16 resource; + u16 action; + s32 result; +}; + +/* + * Structure used for checkpointing the doorbell mappings. It is + * written to the checkpoint as is, so changing this structure will + * break checkpoint compatibility. + */ +struct dbell_cpt_state { + struct vmci_handle handle; + u64 bitmap_idx; +}; + +int vmci_dbell_host_context_notify(u32 src_cid, struct vmci_handle handle); +int vmci_dbell_get_priv_flags(struct vmci_handle handle, u32 *priv_flags); + +bool vmci_dbell_register_notification_bitmap(u32 bitmap_ppn); +void vmci_dbell_scan_notification_entries(u8 *bitmap); + +#endif /* VMCI_DOORBELL_H */ diff --git a/drivers/misc/vmw_vmci/vmci_driver.c b/drivers/misc/vmw_vmci/vmci_driver.c new file mode 100644 index 000000000..003bfba40 --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_driver.c @@ -0,0 +1,117 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include "vmci_driver.h" +#include "vmci_event.h" + +static bool vmci_disable_host; +module_param_named(disable_host, vmci_disable_host, bool, 0); +MODULE_PARM_DESC(disable_host, + "Disable driver host personality (default=enabled)"); + +static bool vmci_disable_guest; +module_param_named(disable_guest, vmci_disable_guest, bool, 0); +MODULE_PARM_DESC(disable_guest, + "Disable driver guest personality (default=enabled)"); + +static bool vmci_guest_personality_initialized; +static bool vmci_host_personality_initialized; + +/* + * vmci_get_context_id() - Gets the current context ID. + * + * Returns the current context ID. Note that since this is accessed only + * from code running in the host, this always returns the host context ID. + */ +u32 vmci_get_context_id(void) +{ + if (vmci_guest_code_active()) + return vmci_get_vm_context_id(); + else if (vmci_host_code_active()) + return VMCI_HOST_CONTEXT_ID; + + return VMCI_INVALID_ID; +} +EXPORT_SYMBOL_GPL(vmci_get_context_id); + +static int __init vmci_drv_init(void) +{ + int vmci_err; + int error; + + vmci_err = vmci_event_init(); + if (vmci_err < VMCI_SUCCESS) { + pr_err("Failed to initialize VMCIEvent (result=%d)\n", + vmci_err); + return -EINVAL; + } + + if (!vmci_disable_guest) { + error = vmci_guest_init(); + if (error) { + pr_warn("Failed to initialize guest personality (err=%d)\n", + error); + } else { + vmci_guest_personality_initialized = true; + pr_info("Guest personality initialized and is %s\n", + vmci_guest_code_active() ? + "active" : "inactive"); + } + } + + if (!vmci_disable_host) { + error = vmci_host_init(); + if (error) { + pr_warn("Unable to initialize host personality (err=%d)\n", + error); + } else { + vmci_host_personality_initialized = true; + pr_info("Initialized host personality\n"); + } + } + + if (!vmci_guest_personality_initialized && + !vmci_host_personality_initialized) { + vmci_event_exit(); + return -ENODEV; + } + + return 0; +} +module_init(vmci_drv_init); + +static void __exit vmci_drv_exit(void) +{ + if (vmci_guest_personality_initialized) + vmci_guest_exit(); + + if (vmci_host_personality_initialized) + vmci_host_exit(); + + vmci_event_exit(); +} +module_exit(vmci_drv_exit); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface."); +MODULE_VERSION("1.1.6.0-k"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/vmw_vmci/vmci_driver.h b/drivers/misc/vmw_vmci/vmci_driver.h new file mode 100644 index 000000000..cee9e977d --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_driver.h @@ -0,0 +1,57 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef _VMCI_DRIVER_H_ +#define _VMCI_DRIVER_H_ + +#include +#include + +#include "vmci_queue_pair.h" +#include "vmci_context.h" + +enum vmci_obj_type { + VMCIOBJ_VMX_VM = 10, + VMCIOBJ_CONTEXT, + VMCIOBJ_SOCKET, + VMCIOBJ_NOT_SET, +}; + +/* For storing VMCI structures in file handles. */ +struct vmci_obj { + void *ptr; + enum vmci_obj_type type; +}; + +/* + * Needed by other components of this module. It's okay to have one global + * instance of this because there can only ever be one VMCI device. Our + * virtual hardware enforces this. + */ +extern struct pci_dev *vmci_pdev; + +u32 vmci_get_context_id(void); +int vmci_send_datagram(struct vmci_datagram *dg); + +int vmci_host_init(void); +void vmci_host_exit(void); +bool vmci_host_code_active(void); + +int vmci_guest_init(void); +void vmci_guest_exit(void); +bool vmci_guest_code_active(void); +u32 vmci_get_vm_context_id(void); + +#endif /* _VMCI_DRIVER_H_ */ diff --git a/drivers/misc/vmw_vmci/vmci_event.c b/drivers/misc/vmw_vmci/vmci_event.c new file mode 100644 index 000000000..84258a480 --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_event.c @@ -0,0 +1,225 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "vmci_driver.h" +#include "vmci_event.h" + +#define EVENT_MAGIC 0xEABE0000 +#define VMCI_EVENT_MAX_ATTEMPTS 10 + +struct vmci_subscription { + u32 id; + u32 event; + vmci_event_cb callback; + void *callback_data; + struct list_head node; /* on one of subscriber lists */ +}; + +static struct list_head subscriber_array[VMCI_EVENT_MAX]; +static DEFINE_MUTEX(subscriber_mutex); + +int __init vmci_event_init(void) +{ + int i; + + for (i = 0; i < VMCI_EVENT_MAX; i++) + INIT_LIST_HEAD(&subscriber_array[i]); + + return VMCI_SUCCESS; +} + +void vmci_event_exit(void) +{ + int e; + + /* We free all memory at exit. */ + for (e = 0; e < VMCI_EVENT_MAX; e++) { + struct vmci_subscription *cur, *p2; + list_for_each_entry_safe(cur, p2, &subscriber_array[e], node) { + + /* + * We should never get here because all events + * should have been unregistered before we try + * to unload the driver module. + */ + pr_warn("Unexpected free events occurring\n"); + list_del(&cur->node); + kfree(cur); + } + } +} + +/* + * Find entry. Assumes subscriber_mutex is held. + */ +static struct vmci_subscription *event_find(u32 sub_id) +{ + int e; + + for (e = 0; e < VMCI_EVENT_MAX; e++) { + struct vmci_subscription *cur; + list_for_each_entry(cur, &subscriber_array[e], node) { + if (cur->id == sub_id) + return cur; + } + } + return NULL; +} + +/* + * Actually delivers the events to the subscribers. + * The callback function for each subscriber is invoked. + */ +static void event_deliver(struct vmci_event_msg *event_msg) +{ + struct vmci_subscription *cur; + struct list_head *subscriber_list; + + rcu_read_lock(); + subscriber_list = &subscriber_array[event_msg->event_data.event]; + list_for_each_entry_rcu(cur, subscriber_list, node) { + cur->callback(cur->id, &event_msg->event_data, + cur->callback_data); + } + rcu_read_unlock(); +} + +/* + * Dispatcher for the VMCI_EVENT_RECEIVE datagrams. Calls all + * subscribers for given event. + */ +int vmci_event_dispatch(struct vmci_datagram *msg) +{ + struct vmci_event_msg *event_msg = (struct vmci_event_msg *)msg; + + if (msg->payload_size < sizeof(u32) || + msg->payload_size > sizeof(struct vmci_event_data_max)) + return VMCI_ERROR_INVALID_ARGS; + + if (!VMCI_EVENT_VALID(event_msg->event_data.event)) + return VMCI_ERROR_EVENT_UNKNOWN; + + event_deliver(event_msg); + return VMCI_SUCCESS; +} + +/* + * vmci_event_subscribe() - Subscribe to a given event. + * @event: The event to subscribe to. + * @callback: The callback to invoke upon the event. + * @callback_data: Data to pass to the callback. + * @subscription_id: ID used to track subscription. Used with + * vmci_event_unsubscribe() + * + * Subscribes to the provided event. The callback specified will be + * fired from RCU critical section and therefore must not sleep. + */ +int vmci_event_subscribe(u32 event, + vmci_event_cb callback, + void *callback_data, + u32 *new_subscription_id) +{ + struct vmci_subscription *sub; + int attempts; + int retval; + bool have_new_id = false; + + if (!new_subscription_id) { + pr_devel("%s: Invalid subscription (NULL)\n", __func__); + return VMCI_ERROR_INVALID_ARGS; + } + + if (!VMCI_EVENT_VALID(event) || !callback) { + pr_devel("%s: Failed to subscribe to event (type=%d) (callback=%p) (data=%p)\n", + __func__, event, callback, callback_data); + return VMCI_ERROR_INVALID_ARGS; + } + + sub = kzalloc(sizeof(*sub), GFP_KERNEL); + if (!sub) + return VMCI_ERROR_NO_MEM; + + sub->id = VMCI_EVENT_MAX; + sub->event = event; + sub->callback = callback; + sub->callback_data = callback_data; + INIT_LIST_HEAD(&sub->node); + + mutex_lock(&subscriber_mutex); + + /* Creation of a new event is always allowed. */ + for (attempts = 0; attempts < VMCI_EVENT_MAX_ATTEMPTS; attempts++) { + static u32 subscription_id; + /* + * We try to get an id a couple of time before + * claiming we are out of resources. + */ + + /* Test for duplicate id. */ + if (!event_find(++subscription_id)) { + sub->id = subscription_id; + have_new_id = true; + break; + } + } + + if (have_new_id) { + list_add_rcu(&sub->node, &subscriber_array[event]); + retval = VMCI_SUCCESS; + } else { + retval = VMCI_ERROR_NO_RESOURCES; + } + + mutex_unlock(&subscriber_mutex); + + *new_subscription_id = sub->id; + return retval; +} +EXPORT_SYMBOL_GPL(vmci_event_subscribe); + +/* + * vmci_event_unsubscribe() - unsubscribe from an event. + * @sub_id: A subscription ID as provided by vmci_event_subscribe() + * + * Unsubscribe from given event. Removes it from list and frees it. + * Will return callback_data if requested by caller. + */ +int vmci_event_unsubscribe(u32 sub_id) +{ + struct vmci_subscription *s; + + mutex_lock(&subscriber_mutex); + s = event_find(sub_id); + if (s) + list_del_rcu(&s->node); + mutex_unlock(&subscriber_mutex); + + if (!s) + return VMCI_ERROR_NOT_FOUND; + + synchronize_rcu(); + kfree(s); + + return VMCI_SUCCESS; +} +EXPORT_SYMBOL_GPL(vmci_event_unsubscribe); diff --git a/drivers/misc/vmw_vmci/vmci_event.h b/drivers/misc/vmw_vmci/vmci_event.h new file mode 100644 index 000000000..7df9b1c0a --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_event.h @@ -0,0 +1,25 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef __VMCI_EVENT_H__ +#define __VMCI_EVENT_H__ + +#include + +int vmci_event_init(void); +void vmci_event_exit(void); +int vmci_event_dispatch(struct vmci_datagram *msg); + +#endif /*__VMCI_EVENT_H__ */ diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c new file mode 100644 index 000000000..dd20ea4ad --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_guest.c @@ -0,0 +1,736 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vmci_datagram.h" +#include "vmci_doorbell.h" +#include "vmci_context.h" +#include "vmci_driver.h" +#include "vmci_event.h" + +#define PCI_DEVICE_ID_VMWARE_VMCI 0x0740 + +#define VMCI_UTIL_NUM_RESOURCES 1 + +static bool vmci_disable_msi; +module_param_named(disable_msi, vmci_disable_msi, bool, 0); +MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)"); + +static bool vmci_disable_msix; +module_param_named(disable_msix, vmci_disable_msix, bool, 0); +MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)"); + +static u32 ctx_update_sub_id = VMCI_INVALID_ID; +static u32 vm_context_id = VMCI_INVALID_ID; + +struct vmci_guest_device { + struct device *dev; /* PCI device we are attached to */ + void __iomem *iobase; + + bool exclusive_vectors; + + struct tasklet_struct datagram_tasklet; + struct tasklet_struct bm_tasklet; + + void *data_buffer; + void *notification_bitmap; + dma_addr_t notification_base; +}; + +/* vmci_dev singleton device and supporting data*/ +struct pci_dev *vmci_pdev; +static struct vmci_guest_device *vmci_dev_g; +static DEFINE_SPINLOCK(vmci_dev_spinlock); + +static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0); + +bool vmci_guest_code_active(void) +{ + return atomic_read(&vmci_num_guest_devices) != 0; +} + +u32 vmci_get_vm_context_id(void) +{ + if (vm_context_id == VMCI_INVALID_ID) { + struct vmci_datagram get_cid_msg; + get_cid_msg.dst = + vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_GET_CONTEXT_ID); + get_cid_msg.src = VMCI_ANON_SRC_HANDLE; + get_cid_msg.payload_size = 0; + vm_context_id = vmci_send_datagram(&get_cid_msg); + } + return vm_context_id; +} + +/* + * VM to hypervisor call mechanism. We use the standard VMware naming + * convention since shared code is calling this function as well. + */ +int vmci_send_datagram(struct vmci_datagram *dg) +{ + unsigned long flags; + int result; + + /* Check args. */ + if (dg == NULL) + return VMCI_ERROR_INVALID_ARGS; + + /* + * Need to acquire spinlock on the device because the datagram + * data may be spread over multiple pages and the monitor may + * interleave device user rpc calls from multiple + * VCPUs. Acquiring the spinlock precludes that + * possibility. Disabling interrupts to avoid incoming + * datagrams during a "rep out" and possibly landing up in + * this function. + */ + spin_lock_irqsave(&vmci_dev_spinlock, flags); + + if (vmci_dev_g) { + iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR, + dg, VMCI_DG_SIZE(dg)); + result = ioread32(vmci_dev_g->iobase + VMCI_RESULT_LOW_ADDR); + } else { + result = VMCI_ERROR_UNAVAILABLE; + } + + spin_unlock_irqrestore(&vmci_dev_spinlock, flags); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_send_datagram); + +/* + * Gets called with the new context id if updated or resumed. + * Context id. + */ +static void vmci_guest_cid_update(u32 sub_id, + const struct vmci_event_data *event_data, + void *client_data) +{ + const struct vmci_event_payld_ctx *ev_payload = + vmci_event_data_const_payload(event_data); + + if (sub_id != ctx_update_sub_id) { + pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id); + return; + } + + if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) { + pr_devel("Invalid event data\n"); + return; + } + + pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n", + vm_context_id, ev_payload->context_id, event_data->event); + + vm_context_id = ev_payload->context_id; +} + +/* + * Verify that the host supports the hypercalls we need. If it does not, + * try to find fallback hypercalls and use those instead. Returns + * true if required hypercalls (or fallback hypercalls) are + * supported by the host, false otherwise. + */ +static int vmci_check_host_caps(struct pci_dev *pdev) +{ + bool result; + struct vmci_resource_query_msg *msg; + u32 msg_size = sizeof(struct vmci_resource_query_hdr) + + VMCI_UTIL_NUM_RESOURCES * sizeof(u32); + struct vmci_datagram *check_msg; + + check_msg = kzalloc(msg_size, GFP_KERNEL); + if (!check_msg) { + dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__); + return -ENOMEM; + } + + check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_RESOURCES_QUERY); + check_msg->src = VMCI_ANON_SRC_HANDLE; + check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE; + msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg); + + msg->num_resources = VMCI_UTIL_NUM_RESOURCES; + msg->resources[0] = VMCI_GET_CONTEXT_ID; + + /* Checks that hyper calls are supported */ + result = vmci_send_datagram(check_msg) == 0x01; + kfree(check_msg); + + dev_dbg(&pdev->dev, "%s: Host capability check: %s\n", + __func__, result ? "PASSED" : "FAILED"); + + /* We need the vector. There are no fallbacks. */ + return result ? 0 : -ENXIO; +} + +/* + * Reads datagrams from the data in port and dispatches them. We + * always start reading datagrams into only the first page of the + * datagram buffer. If the datagrams don't fit into one page, we + * use the maximum datagram buffer size for the remainder of the + * invocation. This is a simple heuristic for not penalizing + * small datagrams. + * + * This function assumes that it has exclusive access to the data + * in port for the duration of the call. + */ +static void vmci_dispatch_dgs(unsigned long data) +{ + struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data; + u8 *dg_in_buffer = vmci_dev->data_buffer; + struct vmci_datagram *dg; + size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE; + size_t current_dg_in_buffer_size = PAGE_SIZE; + size_t remaining_bytes; + + BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE); + + ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, + vmci_dev->data_buffer, current_dg_in_buffer_size); + dg = (struct vmci_datagram *)dg_in_buffer; + remaining_bytes = current_dg_in_buffer_size; + + while (dg->dst.resource != VMCI_INVALID_ID || + remaining_bytes > PAGE_SIZE) { + unsigned dg_in_size; + + /* + * When the input buffer spans multiple pages, a datagram can + * start on any page boundary in the buffer. + */ + if (dg->dst.resource == VMCI_INVALID_ID) { + dg = (struct vmci_datagram *)roundup( + (uintptr_t)dg + 1, PAGE_SIZE); + remaining_bytes = + (size_t)(dg_in_buffer + + current_dg_in_buffer_size - + (u8 *)dg); + continue; + } + + dg_in_size = VMCI_DG_SIZE_ALIGNED(dg); + + if (dg_in_size <= dg_in_buffer_size) { + int result; + + /* + * If the remaining bytes in the datagram + * buffer doesn't contain the complete + * datagram, we first make sure we have enough + * room for it and then we read the reminder + * of the datagram and possibly any following + * datagrams. + */ + if (dg_in_size > remaining_bytes) { + if (remaining_bytes != + current_dg_in_buffer_size) { + + /* + * We move the partial + * datagram to the front and + * read the reminder of the + * datagram and possibly + * following calls into the + * following bytes. + */ + memmove(dg_in_buffer, dg_in_buffer + + current_dg_in_buffer_size - + remaining_bytes, + remaining_bytes); + dg = (struct vmci_datagram *) + dg_in_buffer; + } + + if (current_dg_in_buffer_size != + dg_in_buffer_size) + current_dg_in_buffer_size = + dg_in_buffer_size; + + ioread8_rep(vmci_dev->iobase + + VMCI_DATA_IN_ADDR, + vmci_dev->data_buffer + + remaining_bytes, + current_dg_in_buffer_size - + remaining_bytes); + } + + /* + * We special case event datagrams from the + * hypervisor. + */ + if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID && + dg->dst.resource == VMCI_EVENT_HANDLER) { + result = vmci_event_dispatch(dg); + } else { + result = vmci_datagram_invoke_guest_handler(dg); + } + if (result < VMCI_SUCCESS) + dev_dbg(vmci_dev->dev, + "Datagram with resource (ID=0x%x) failed (err=%d)\n", + dg->dst.resource, result); + + /* On to the next datagram. */ + dg = (struct vmci_datagram *)((u8 *)dg + + dg_in_size); + } else { + size_t bytes_to_skip; + + /* + * Datagram doesn't fit in datagram buffer of maximal + * size. We drop it. + */ + dev_dbg(vmci_dev->dev, + "Failed to receive datagram (size=%u bytes)\n", + dg_in_size); + + bytes_to_skip = dg_in_size - remaining_bytes; + if (current_dg_in_buffer_size != dg_in_buffer_size) + current_dg_in_buffer_size = dg_in_buffer_size; + + for (;;) { + ioread8_rep(vmci_dev->iobase + + VMCI_DATA_IN_ADDR, + vmci_dev->data_buffer, + current_dg_in_buffer_size); + if (bytes_to_skip <= current_dg_in_buffer_size) + break; + + bytes_to_skip -= current_dg_in_buffer_size; + } + dg = (struct vmci_datagram *)(dg_in_buffer + + bytes_to_skip); + } + + remaining_bytes = + (size_t) (dg_in_buffer + current_dg_in_buffer_size - + (u8 *)dg); + + if (remaining_bytes < VMCI_DG_HEADERSIZE) { + /* Get the next batch of datagrams. */ + + ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, + vmci_dev->data_buffer, + current_dg_in_buffer_size); + dg = (struct vmci_datagram *)dg_in_buffer; + remaining_bytes = current_dg_in_buffer_size; + } + } +} + +/* + * Scans the notification bitmap for raised flags, clears them + * and handles the notifications. + */ +static void vmci_process_bitmap(unsigned long data) +{ + struct vmci_guest_device *dev = (struct vmci_guest_device *)data; + + if (!dev->notification_bitmap) { + dev_dbg(dev->dev, "No bitmap present in %s\n", __func__); + return; + } + + vmci_dbell_scan_notification_entries(dev->notification_bitmap); +} + +/* + * Interrupt handler for legacy or MSI interrupt, or for first MSI-X + * interrupt (vector VMCI_INTR_DATAGRAM). + */ +static irqreturn_t vmci_interrupt(int irq, void *_dev) +{ + struct vmci_guest_device *dev = _dev; + + /* + * If we are using MSI-X with exclusive vectors then we simply schedule + * the datagram tasklet, since we know the interrupt was meant for us. + * Otherwise we must read the ICR to determine what to do. + */ + + if (dev->exclusive_vectors) { + tasklet_schedule(&dev->datagram_tasklet); + } else { + unsigned int icr; + + /* Acknowledge interrupt and determine what needs doing. */ + icr = ioread32(dev->iobase + VMCI_ICR_ADDR); + if (icr == 0 || icr == ~0) + return IRQ_NONE; + + if (icr & VMCI_ICR_DATAGRAM) { + tasklet_schedule(&dev->datagram_tasklet); + icr &= ~VMCI_ICR_DATAGRAM; + } + + if (icr & VMCI_ICR_NOTIFICATION) { + tasklet_schedule(&dev->bm_tasklet); + icr &= ~VMCI_ICR_NOTIFICATION; + } + + if (icr != 0) + dev_warn(dev->dev, + "Ignoring unknown interrupt cause (%d)\n", + icr); + } + + return IRQ_HANDLED; +} + +/* + * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION, + * which is for the notification bitmap. Will only get called if we are + * using MSI-X with exclusive vectors. + */ +static irqreturn_t vmci_interrupt_bm(int irq, void *_dev) +{ + struct vmci_guest_device *dev = _dev; + + /* For MSI-X we can just assume it was meant for us. */ + tasklet_schedule(&dev->bm_tasklet); + + return IRQ_HANDLED; +} + +/* + * Most of the initialization at module load time is done here. + */ +static int vmci_guest_probe_device(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct vmci_guest_device *vmci_dev; + void __iomem *iobase; + unsigned int capabilities; + unsigned long cmd; + int vmci_err; + int error; + + dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n"); + + error = pcim_enable_device(pdev); + if (error) { + dev_err(&pdev->dev, + "Failed to enable VMCI device: %d\n", error); + return error; + } + + error = pcim_iomap_regions(pdev, 1 << 0, KBUILD_MODNAME); + if (error) { + dev_err(&pdev->dev, "Failed to reserve/map IO regions\n"); + return error; + } + + iobase = pcim_iomap_table(pdev)[0]; + + dev_info(&pdev->dev, "Found VMCI PCI device at %#lx, irq %u\n", + (unsigned long)iobase, pdev->irq); + + vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL); + if (!vmci_dev) { + dev_err(&pdev->dev, + "Can't allocate memory for VMCI device\n"); + return -ENOMEM; + } + + vmci_dev->dev = &pdev->dev; + vmci_dev->exclusive_vectors = false; + vmci_dev->iobase = iobase; + + tasklet_init(&vmci_dev->datagram_tasklet, + vmci_dispatch_dgs, (unsigned long)vmci_dev); + tasklet_init(&vmci_dev->bm_tasklet, + vmci_process_bitmap, (unsigned long)vmci_dev); + + vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE); + if (!vmci_dev->data_buffer) { + dev_err(&pdev->dev, + "Can't allocate memory for datagram buffer\n"); + return -ENOMEM; + } + + pci_set_master(pdev); /* To enable queue_pair functionality. */ + + /* + * Verify that the VMCI Device supports the capabilities that + * we need. If the device is missing capabilities that we would + * like to use, check for fallback capabilities and use those + * instead (so we can run a new VM on old hosts). Fail the load if + * a required capability is missing and there is no fallback. + * + * Right now, we need datagrams. There are no fallbacks. + */ + capabilities = ioread32(vmci_dev->iobase + VMCI_CAPS_ADDR); + if (!(capabilities & VMCI_CAPS_DATAGRAM)) { + dev_err(&pdev->dev, "Device does not support datagrams\n"); + error = -ENXIO; + goto err_free_data_buffer; + } + + /* + * If the hardware supports notifications, we will use that as + * well. + */ + if (capabilities & VMCI_CAPS_NOTIFICATIONS) { + vmci_dev->notification_bitmap = dma_alloc_coherent( + &pdev->dev, PAGE_SIZE, &vmci_dev->notification_base, + GFP_KERNEL); + if (!vmci_dev->notification_bitmap) { + dev_warn(&pdev->dev, + "Unable to allocate notification bitmap\n"); + } else { + memset(vmci_dev->notification_bitmap, 0, PAGE_SIZE); + capabilities |= VMCI_CAPS_NOTIFICATIONS; + } + } + + dev_info(&pdev->dev, "Using capabilities 0x%x\n", capabilities); + + /* Let the host know which capabilities we intend to use. */ + iowrite32(capabilities, vmci_dev->iobase + VMCI_CAPS_ADDR); + + /* Set up global device so that we can start sending datagrams */ + spin_lock_irq(&vmci_dev_spinlock); + vmci_dev_g = vmci_dev; + vmci_pdev = pdev; + spin_unlock_irq(&vmci_dev_spinlock); + + /* + * Register notification bitmap with device if that capability is + * used. + */ + if (capabilities & VMCI_CAPS_NOTIFICATIONS) { + unsigned long bitmap_ppn = + vmci_dev->notification_base >> PAGE_SHIFT; + if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) { + dev_warn(&pdev->dev, + "VMCI device unable to register notification bitmap with PPN 0x%x\n", + (u32) bitmap_ppn); + error = -ENXIO; + goto err_remove_vmci_dev_g; + } + } + + /* Check host capabilities. */ + error = vmci_check_host_caps(pdev); + if (error) + goto err_remove_bitmap; + + /* Enable device. */ + + /* + * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can + * update the internal context id when needed. + */ + vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE, + vmci_guest_cid_update, NULL, + &ctx_update_sub_id); + if (vmci_err < VMCI_SUCCESS) + dev_warn(&pdev->dev, + "Failed to subscribe to event (type=%d): %d\n", + VMCI_EVENT_CTX_ID_UPDATE, vmci_err); + + /* + * Enable interrupts. Try MSI-X first, then MSI, and then fallback on + * legacy interrupts. + */ + error = pci_alloc_irq_vectors(pdev, VMCI_MAX_INTRS, VMCI_MAX_INTRS, + PCI_IRQ_MSIX); + if (error < 0) { + error = pci_alloc_irq_vectors(pdev, 1, 1, + PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY); + if (error < 0) + goto err_remove_bitmap; + } else { + vmci_dev->exclusive_vectors = true; + } + + /* + * Request IRQ for legacy or MSI interrupts, or for first + * MSI-X vector. + */ + error = request_irq(pci_irq_vector(pdev, 0), vmci_interrupt, + IRQF_SHARED, KBUILD_MODNAME, vmci_dev); + if (error) { + dev_err(&pdev->dev, "Irq %u in use: %d\n", + pci_irq_vector(pdev, 0), error); + goto err_disable_msi; + } + + /* + * For MSI-X with exclusive vectors we need to request an + * interrupt for each vector so that we get a separate + * interrupt handler routine. This allows us to distinguish + * between the vectors. + */ + if (vmci_dev->exclusive_vectors) { + error = request_irq(pci_irq_vector(pdev, 1), + vmci_interrupt_bm, 0, KBUILD_MODNAME, + vmci_dev); + if (error) { + dev_err(&pdev->dev, + "Failed to allocate irq %u: %d\n", + pci_irq_vector(pdev, 1), error); + goto err_free_irq; + } + } + + dev_dbg(&pdev->dev, "Registered device\n"); + + atomic_inc(&vmci_num_guest_devices); + + /* Enable specific interrupt bits. */ + cmd = VMCI_IMR_DATAGRAM; + if (capabilities & VMCI_CAPS_NOTIFICATIONS) + cmd |= VMCI_IMR_NOTIFICATION; + iowrite32(cmd, vmci_dev->iobase + VMCI_IMR_ADDR); + + /* Enable interrupts. */ + iowrite32(VMCI_CONTROL_INT_ENABLE, + vmci_dev->iobase + VMCI_CONTROL_ADDR); + + pci_set_drvdata(pdev, vmci_dev); + return 0; + +err_free_irq: + free_irq(pci_irq_vector(pdev, 0), vmci_dev); + tasklet_kill(&vmci_dev->datagram_tasklet); + tasklet_kill(&vmci_dev->bm_tasklet); + +err_disable_msi: + pci_free_irq_vectors(pdev); + + vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); + if (vmci_err < VMCI_SUCCESS) + dev_warn(&pdev->dev, + "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", + VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); + +err_remove_bitmap: + if (vmci_dev->notification_bitmap) { + iowrite32(VMCI_CONTROL_RESET, + vmci_dev->iobase + VMCI_CONTROL_ADDR); + dma_free_coherent(&pdev->dev, PAGE_SIZE, + vmci_dev->notification_bitmap, + vmci_dev->notification_base); + } + +err_remove_vmci_dev_g: + spin_lock_irq(&vmci_dev_spinlock); + vmci_pdev = NULL; + vmci_dev_g = NULL; + spin_unlock_irq(&vmci_dev_spinlock); + +err_free_data_buffer: + vfree(vmci_dev->data_buffer); + + /* The rest are managed resources and will be freed by PCI core */ + return error; +} + +static void vmci_guest_remove_device(struct pci_dev *pdev) +{ + struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev); + int vmci_err; + + dev_dbg(&pdev->dev, "Removing device\n"); + + atomic_dec(&vmci_num_guest_devices); + + vmci_qp_guest_endpoints_exit(); + + vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); + if (vmci_err < VMCI_SUCCESS) + dev_warn(&pdev->dev, + "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", + VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); + + spin_lock_irq(&vmci_dev_spinlock); + vmci_dev_g = NULL; + vmci_pdev = NULL; + spin_unlock_irq(&vmci_dev_spinlock); + + dev_dbg(&pdev->dev, "Resetting vmci device\n"); + iowrite32(VMCI_CONTROL_RESET, vmci_dev->iobase + VMCI_CONTROL_ADDR); + + /* + * Free IRQ and then disable MSI/MSI-X as appropriate. For + * MSI-X, we might have multiple vectors, each with their own + * IRQ, which we must free too. + */ + if (vmci_dev->exclusive_vectors) + free_irq(pci_irq_vector(pdev, 1), vmci_dev); + free_irq(pci_irq_vector(pdev, 0), vmci_dev); + pci_free_irq_vectors(pdev); + + tasklet_kill(&vmci_dev->datagram_tasklet); + tasklet_kill(&vmci_dev->bm_tasklet); + + if (vmci_dev->notification_bitmap) { + /* + * The device reset above cleared the bitmap state of the + * device, so we can safely free it here. + */ + + dma_free_coherent(&pdev->dev, PAGE_SIZE, + vmci_dev->notification_bitmap, + vmci_dev->notification_base); + } + + vfree(vmci_dev->data_buffer); + + /* The rest are managed resources and will be freed by PCI core */ +} + +static const struct pci_device_id vmci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), }, + { 0 }, +}; +MODULE_DEVICE_TABLE(pci, vmci_ids); + +static struct pci_driver vmci_guest_driver = { + .name = KBUILD_MODNAME, + .id_table = vmci_ids, + .probe = vmci_guest_probe_device, + .remove = vmci_guest_remove_device, +}; + +int __init vmci_guest_init(void) +{ + return pci_register_driver(&vmci_guest_driver); +} + +void __exit vmci_guest_exit(void) +{ + pci_unregister_driver(&vmci_guest_driver); +} diff --git a/drivers/misc/vmw_vmci/vmci_handle_array.c b/drivers/misc/vmw_vmci/vmci_handle_array.c new file mode 100644 index 000000000..917e18a8a --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_handle_array.c @@ -0,0 +1,154 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include +#include "vmci_handle_array.h" + +static size_t handle_arr_calc_size(u32 capacity) +{ + return VMCI_HANDLE_ARRAY_HEADER_SIZE + + capacity * sizeof(struct vmci_handle); +} + +struct vmci_handle_arr *vmci_handle_arr_create(u32 capacity, u32 max_capacity) +{ + struct vmci_handle_arr *array; + + if (max_capacity == 0 || capacity > max_capacity) + return NULL; + + if (capacity == 0) + capacity = min((u32)VMCI_HANDLE_ARRAY_DEFAULT_CAPACITY, + max_capacity); + + array = kmalloc(handle_arr_calc_size(capacity), GFP_ATOMIC); + if (!array) + return NULL; + + array->capacity = capacity; + array->max_capacity = max_capacity; + array->size = 0; + + return array; +} + +void vmci_handle_arr_destroy(struct vmci_handle_arr *array) +{ + kfree(array); +} + +int vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr, + struct vmci_handle handle) +{ + struct vmci_handle_arr *array = *array_ptr; + + if (unlikely(array->size >= array->capacity)) { + /* reallocate. */ + struct vmci_handle_arr *new_array; + u32 capacity_bump = min(array->max_capacity - array->capacity, + array->capacity); + size_t new_size = handle_arr_calc_size(array->capacity + + capacity_bump); + + if (array->size >= array->max_capacity) + return VMCI_ERROR_NO_MEM; + + new_array = krealloc(array, new_size, GFP_ATOMIC); + if (!new_array) + return VMCI_ERROR_NO_MEM; + + new_array->capacity += capacity_bump; + *array_ptr = array = new_array; + } + + array->entries[array->size] = handle; + array->size++; + + return VMCI_SUCCESS; +} + +/* + * Handle that was removed, VMCI_INVALID_HANDLE if entry not found. + */ +struct vmci_handle vmci_handle_arr_remove_entry(struct vmci_handle_arr *array, + struct vmci_handle entry_handle) +{ + struct vmci_handle handle = VMCI_INVALID_HANDLE; + u32 i; + + for (i = 0; i < array->size; i++) { + if (vmci_handle_is_equal(array->entries[i], entry_handle)) { + handle = array->entries[i]; + array->size--; + array->entries[i] = array->entries[array->size]; + array->entries[array->size] = VMCI_INVALID_HANDLE; + break; + } + } + + return handle; +} + +/* + * Handle that was removed, VMCI_INVALID_HANDLE if array was empty. + */ +struct vmci_handle vmci_handle_arr_remove_tail(struct vmci_handle_arr *array) +{ + struct vmci_handle handle = VMCI_INVALID_HANDLE; + + if (array->size) { + array->size--; + handle = array->entries[array->size]; + array->entries[array->size] = VMCI_INVALID_HANDLE; + } + + return handle; +} + +/* + * Handle at given index, VMCI_INVALID_HANDLE if invalid index. + */ +struct vmci_handle +vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, u32 index) +{ + if (unlikely(index >= array->size)) + return VMCI_INVALID_HANDLE; + + return array->entries[index]; +} + +bool vmci_handle_arr_has_entry(const struct vmci_handle_arr *array, + struct vmci_handle entry_handle) +{ + u32 i; + + for (i = 0; i < array->size; i++) + if (vmci_handle_is_equal(array->entries[i], entry_handle)) + return true; + + return false; +} + +/* + * NULL if the array is empty. Otherwise, a pointer to the array + * of VMCI handles in the handle array. + */ +struct vmci_handle *vmci_handle_arr_get_handles(struct vmci_handle_arr *array) +{ + if (array->size) + return array->entries; + + return NULL; +} diff --git a/drivers/misc/vmw_vmci/vmci_handle_array.h b/drivers/misc/vmw_vmci/vmci_handle_array.h new file mode 100644 index 000000000..0fc585978 --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_handle_array.h @@ -0,0 +1,61 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef _VMCI_HANDLE_ARRAY_H_ +#define _VMCI_HANDLE_ARRAY_H_ + +#include +#include +#include + +struct vmci_handle_arr { + u32 capacity; + u32 max_capacity; + u32 size; + u32 pad; + struct vmci_handle entries[]; +}; + +#define VMCI_HANDLE_ARRAY_HEADER_SIZE \ + offsetof(struct vmci_handle_arr, entries) +/* Select a default capacity that results in a 64 byte sized array */ +#define VMCI_HANDLE_ARRAY_DEFAULT_CAPACITY 6 +/* Make sure that the max array size can be expressed by a u32 */ +#define VMCI_HANDLE_ARRAY_MAX_CAPACITY \ + ((U32_MAX - VMCI_HANDLE_ARRAY_HEADER_SIZE - 1) / \ + sizeof(struct vmci_handle)) + +struct vmci_handle_arr *vmci_handle_arr_create(u32 capacity, u32 max_capacity); +void vmci_handle_arr_destroy(struct vmci_handle_arr *array); +int vmci_handle_arr_append_entry(struct vmci_handle_arr **array_ptr, + struct vmci_handle handle); +struct vmci_handle vmci_handle_arr_remove_entry(struct vmci_handle_arr *array, + struct vmci_handle + entry_handle); +struct vmci_handle vmci_handle_arr_remove_tail(struct vmci_handle_arr *array); +struct vmci_handle +vmci_handle_arr_get_entry(const struct vmci_handle_arr *array, u32 index); +bool vmci_handle_arr_has_entry(const struct vmci_handle_arr *array, + struct vmci_handle entry_handle); +struct vmci_handle *vmci_handle_arr_get_handles(struct vmci_handle_arr *array); + +static inline u32 vmci_handle_arr_get_size( + const struct vmci_handle_arr *array) +{ + return array->size; +} + + +#endif /* _VMCI_HANDLE_ARRAY_H_ */ diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c new file mode 100644 index 000000000..83e0c95d2 --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_host.c @@ -0,0 +1,1036 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vmci_handle_array.h" +#include "vmci_queue_pair.h" +#include "vmci_datagram.h" +#include "vmci_doorbell.h" +#include "vmci_resource.h" +#include "vmci_context.h" +#include "vmci_driver.h" +#include "vmci_event.h" + +#define VMCI_UTIL_NUM_RESOURCES 1 + +enum { + VMCI_NOTIFY_RESOURCE_QUEUE_PAIR = 0, + VMCI_NOTIFY_RESOURCE_DOOR_BELL = 1, +}; + +enum { + VMCI_NOTIFY_RESOURCE_ACTION_NOTIFY = 0, + VMCI_NOTIFY_RESOURCE_ACTION_CREATE = 1, + VMCI_NOTIFY_RESOURCE_ACTION_DESTROY = 2, +}; + +/* + * VMCI driver initialization. This block can also be used to + * pass initial group membership etc. + */ +struct vmci_init_blk { + u32 cid; + u32 flags; +}; + +/* VMCIqueue_pairAllocInfo_VMToVM */ +struct vmci_qp_alloc_info_vmvm { + struct vmci_handle handle; + u32 peer; + u32 flags; + u64 produce_size; + u64 consume_size; + u64 produce_page_file; /* User VA. */ + u64 consume_page_file; /* User VA. */ + u64 produce_page_file_size; /* Size of the file name array. */ + u64 consume_page_file_size; /* Size of the file name array. */ + s32 result; + u32 _pad; +}; + +/* VMCISetNotifyInfo: Used to pass notify flag's address to the host driver. */ +struct vmci_set_notify_info { + u64 notify_uva; + s32 result; + u32 _pad; +}; + +/* + * Per-instance host state + */ +struct vmci_host_dev { + struct vmci_ctx *context; + int user_version; + enum vmci_obj_type ct_type; + struct mutex lock; /* Mutex lock for vmci context access */ +}; + +static struct vmci_ctx *host_context; +static bool vmci_host_device_initialized; +static atomic_t vmci_host_active_users = ATOMIC_INIT(0); + +/* + * Determines whether the VMCI host personality is + * available. Since the core functionality of the host driver is + * always present, all guests could possibly use the host + * personality. However, to minimize the deviation from the + * pre-unified driver state of affairs, we only consider the host + * device active if there is no active guest device or if there + * are VMX'en with active VMCI contexts using the host device. + */ +bool vmci_host_code_active(void) +{ + return vmci_host_device_initialized && + (!vmci_guest_code_active() || + atomic_read(&vmci_host_active_users) > 0); +} + +/* + * Called on open of /dev/vmci. + */ +static int vmci_host_open(struct inode *inode, struct file *filp) +{ + struct vmci_host_dev *vmci_host_dev; + + vmci_host_dev = kzalloc(sizeof(struct vmci_host_dev), GFP_KERNEL); + if (vmci_host_dev == NULL) + return -ENOMEM; + + vmci_host_dev->ct_type = VMCIOBJ_NOT_SET; + mutex_init(&vmci_host_dev->lock); + filp->private_data = vmci_host_dev; + + return 0; +} + +/* + * Called on close of /dev/vmci, most often when the process + * exits. + */ +static int vmci_host_close(struct inode *inode, struct file *filp) +{ + struct vmci_host_dev *vmci_host_dev = filp->private_data; + + if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) { + vmci_ctx_destroy(vmci_host_dev->context); + vmci_host_dev->context = NULL; + + /* + * The number of active contexts is used to track whether any + * VMX'en are using the host personality. It is incremented when + * a context is created through the IOCTL_VMCI_INIT_CONTEXT + * ioctl. + */ + atomic_dec(&vmci_host_active_users); + } + vmci_host_dev->ct_type = VMCIOBJ_NOT_SET; + + kfree(vmci_host_dev); + filp->private_data = NULL; + return 0; +} + +/* + * This is used to wake up the VMX when a VMCI call arrives, or + * to wake up select() or poll() at the next clock tick. + */ +static __poll_t vmci_host_poll(struct file *filp, poll_table *wait) +{ + struct vmci_host_dev *vmci_host_dev = filp->private_data; + struct vmci_ctx *context = vmci_host_dev->context; + __poll_t mask = 0; + + if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) { + /* Check for VMCI calls to this VM context. */ + if (wait) + poll_wait(filp, &context->host_context.wait_queue, + wait); + + spin_lock(&context->lock); + if (context->pending_datagrams > 0 || + vmci_handle_arr_get_size( + context->pending_doorbell_array) > 0) { + mask = EPOLLIN; + } + spin_unlock(&context->lock); + } + return mask; +} + +/* + * Copies the handles of a handle array into a user buffer, and + * returns the new length in userBufferSize. If the copy to the + * user buffer fails, the functions still returns VMCI_SUCCESS, + * but retval != 0. + */ +static int drv_cp_harray_to_user(void __user *user_buf_uva, + u64 *user_buf_size, + struct vmci_handle_arr *handle_array, + int *retval) +{ + u32 array_size = 0; + struct vmci_handle *handles; + + if (handle_array) + array_size = vmci_handle_arr_get_size(handle_array); + + if (array_size * sizeof(*handles) > *user_buf_size) + return VMCI_ERROR_MORE_DATA; + + *user_buf_size = array_size * sizeof(*handles); + if (*user_buf_size) + *retval = copy_to_user(user_buf_uva, + vmci_handle_arr_get_handles + (handle_array), *user_buf_size); + + return VMCI_SUCCESS; +} + +/* + * Sets up a given context for notify to work. Maps the notify + * boolean in user VA into kernel space. + */ +static int vmci_host_setup_notify(struct vmci_ctx *context, + unsigned long uva) +{ + int retval; + + if (context->notify_page) { + pr_devel("%s: Notify mechanism is already set up\n", __func__); + return VMCI_ERROR_DUPLICATE_ENTRY; + } + + /* + * We are using 'bool' internally, but let's make sure we explicit + * about the size. + */ + BUILD_BUG_ON(sizeof(bool) != sizeof(u8)); + if (!access_ok(VERIFY_WRITE, (void __user *)uva, sizeof(u8))) + return VMCI_ERROR_GENERIC; + + /* + * Lock physical page backing a given user VA. + */ + retval = get_user_pages_fast(uva, 1, 1, &context->notify_page); + if (retval != 1) { + context->notify_page = NULL; + return VMCI_ERROR_GENERIC; + } + + /* + * Map the locked page and set up notify pointer. + */ + context->notify = kmap(context->notify_page) + (uva & (PAGE_SIZE - 1)); + vmci_ctx_check_signal_notify(context); + + return VMCI_SUCCESS; +} + +static int vmci_host_get_version(struct vmci_host_dev *vmci_host_dev, + unsigned int cmd, void __user *uptr) +{ + if (cmd == IOCTL_VMCI_VERSION2) { + int __user *vptr = uptr; + if (get_user(vmci_host_dev->user_version, vptr)) + return -EFAULT; + } + + /* + * The basic logic here is: + * + * If the user sends in a version of 0 tell it our version. + * If the user didn't send in a version, tell it our version. + * If the user sent in an old version, tell it -its- version. + * If the user sent in an newer version, tell it our version. + * + * The rationale behind telling the caller its version is that + * Workstation 6.5 required that VMX and VMCI kernel module were + * version sync'd. All new VMX users will be programmed to + * handle the VMCI kernel module version. + */ + + if (vmci_host_dev->user_version > 0 && + vmci_host_dev->user_version < VMCI_VERSION_HOSTQP) { + return vmci_host_dev->user_version; + } + + return VMCI_VERSION; +} + +#define vmci_ioctl_err(fmt, ...) \ + pr_devel("%s: " fmt, ioctl_name, ##__VA_ARGS__) + +static int vmci_host_do_init_context(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_init_blk init_block; + const struct cred *cred; + int retval; + + if (copy_from_user(&init_block, uptr, sizeof(init_block))) { + vmci_ioctl_err("error reading init block\n"); + return -EFAULT; + } + + mutex_lock(&vmci_host_dev->lock); + + if (vmci_host_dev->ct_type != VMCIOBJ_NOT_SET) { + vmci_ioctl_err("received VMCI init on initialized handle\n"); + retval = -EINVAL; + goto out; + } + + if (init_block.flags & ~VMCI_PRIVILEGE_FLAG_RESTRICTED) { + vmci_ioctl_err("unsupported VMCI restriction flag\n"); + retval = -EINVAL; + goto out; + } + + cred = get_current_cred(); + vmci_host_dev->context = vmci_ctx_create(init_block.cid, + init_block.flags, 0, + vmci_host_dev->user_version, + cred); + put_cred(cred); + if (IS_ERR(vmci_host_dev->context)) { + retval = PTR_ERR(vmci_host_dev->context); + vmci_ioctl_err("error initializing context\n"); + goto out; + } + + /* + * Copy cid to userlevel, we do this to allow the VMX + * to enforce its policy on cid generation. + */ + init_block.cid = vmci_ctx_get_id(vmci_host_dev->context); + if (copy_to_user(uptr, &init_block, sizeof(init_block))) { + vmci_ctx_destroy(vmci_host_dev->context); + vmci_host_dev->context = NULL; + vmci_ioctl_err("error writing init block\n"); + retval = -EFAULT; + goto out; + } + + vmci_host_dev->ct_type = VMCIOBJ_CONTEXT; + atomic_inc(&vmci_host_active_users); + + retval = 0; + +out: + mutex_unlock(&vmci_host_dev->lock); + return retval; +} + +static int vmci_host_do_send_datagram(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_datagram_snd_rcv_info send_info; + struct vmci_datagram *dg = NULL; + u32 cid; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&send_info, uptr, sizeof(send_info))) + return -EFAULT; + + if (send_info.len > VMCI_MAX_DG_SIZE) { + vmci_ioctl_err("datagram is too big (size=%d)\n", + send_info.len); + return -EINVAL; + } + + if (send_info.len < sizeof(*dg)) { + vmci_ioctl_err("datagram is too small (size=%d)\n", + send_info.len); + return -EINVAL; + } + + dg = memdup_user((void __user *)(uintptr_t)send_info.addr, + send_info.len); + if (IS_ERR(dg)) { + vmci_ioctl_err( + "cannot allocate memory to dispatch datagram\n"); + return PTR_ERR(dg); + } + + if (VMCI_DG_SIZE(dg) != send_info.len) { + vmci_ioctl_err("datagram size mismatch\n"); + kfree(dg); + return -EINVAL; + } + + pr_devel("Datagram dst (handle=0x%x:0x%x) src (handle=0x%x:0x%x), payload (size=%llu bytes)\n", + dg->dst.context, dg->dst.resource, + dg->src.context, dg->src.resource, + (unsigned long long)dg->payload_size); + + /* Get source context id. */ + cid = vmci_ctx_get_id(vmci_host_dev->context); + send_info.result = vmci_datagram_dispatch(cid, dg, true); + kfree(dg); + + return copy_to_user(uptr, &send_info, sizeof(send_info)) ? -EFAULT : 0; +} + +static int vmci_host_do_receive_datagram(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_datagram_snd_rcv_info recv_info; + struct vmci_datagram *dg = NULL; + int retval; + size_t size; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&recv_info, uptr, sizeof(recv_info))) + return -EFAULT; + + size = recv_info.len; + recv_info.result = vmci_ctx_dequeue_datagram(vmci_host_dev->context, + &size, &dg); + + if (recv_info.result >= VMCI_SUCCESS) { + void __user *ubuf = (void __user *)(uintptr_t)recv_info.addr; + retval = copy_to_user(ubuf, dg, VMCI_DG_SIZE(dg)); + kfree(dg); + if (retval != 0) + return -EFAULT; + } + + return copy_to_user(uptr, &recv_info, sizeof(recv_info)) ? -EFAULT : 0; +} + +static int vmci_host_do_alloc_queuepair(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_handle handle; + int vmci_status; + int __user *retptr; + u32 cid; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + cid = vmci_ctx_get_id(vmci_host_dev->context); + + if (vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) { + struct vmci_qp_alloc_info_vmvm alloc_info; + struct vmci_qp_alloc_info_vmvm __user *info = uptr; + + if (copy_from_user(&alloc_info, uptr, sizeof(alloc_info))) + return -EFAULT; + + handle = alloc_info.handle; + retptr = &info->result; + + vmci_status = vmci_qp_broker_alloc(alloc_info.handle, + alloc_info.peer, + alloc_info.flags, + VMCI_NO_PRIVILEGE_FLAGS, + alloc_info.produce_size, + alloc_info.consume_size, + NULL, + vmci_host_dev->context); + + if (vmci_status == VMCI_SUCCESS) + vmci_status = VMCI_SUCCESS_QUEUEPAIR_CREATE; + } else { + struct vmci_qp_alloc_info alloc_info; + struct vmci_qp_alloc_info __user *info = uptr; + struct vmci_qp_page_store page_store; + + if (copy_from_user(&alloc_info, uptr, sizeof(alloc_info))) + return -EFAULT; + + handle = alloc_info.handle; + retptr = &info->result; + + page_store.pages = alloc_info.ppn_va; + page_store.len = alloc_info.num_ppns; + + vmci_status = vmci_qp_broker_alloc(alloc_info.handle, + alloc_info.peer, + alloc_info.flags, + VMCI_NO_PRIVILEGE_FLAGS, + alloc_info.produce_size, + alloc_info.consume_size, + &page_store, + vmci_host_dev->context); + } + + if (put_user(vmci_status, retptr)) { + if (vmci_status >= VMCI_SUCCESS) { + vmci_status = vmci_qp_broker_detach(handle, + vmci_host_dev->context); + } + return -EFAULT; + } + + return 0; +} + +static int vmci_host_do_queuepair_setva(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_qp_set_va_info set_va_info; + struct vmci_qp_set_va_info __user *info = uptr; + s32 result; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) { + vmci_ioctl_err("is not allowed\n"); + return -EINVAL; + } + + if (copy_from_user(&set_va_info, uptr, sizeof(set_va_info))) + return -EFAULT; + + if (set_va_info.va) { + /* + * VMX is passing down a new VA for the queue + * pair mapping. + */ + result = vmci_qp_broker_map(set_va_info.handle, + vmci_host_dev->context, + set_va_info.va); + } else { + /* + * The queue pair is about to be unmapped by + * the VMX. + */ + result = vmci_qp_broker_unmap(set_va_info.handle, + vmci_host_dev->context, 0); + } + + return put_user(result, &info->result) ? -EFAULT : 0; +} + +static int vmci_host_do_queuepair_setpf(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_qp_page_file_info page_file_info; + struct vmci_qp_page_file_info __user *info = uptr; + s32 result; + + if (vmci_host_dev->user_version < VMCI_VERSION_HOSTQP || + vmci_host_dev->user_version >= VMCI_VERSION_NOVMVM) { + vmci_ioctl_err("not supported on this VMX (version=%d)\n", + vmci_host_dev->user_version); + return -EINVAL; + } + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&page_file_info, uptr, sizeof(*info))) + return -EFAULT; + + /* + * Communicate success pre-emptively to the caller. Note that the + * basic premise is that it is incumbent upon the caller not to look at + * the info.result field until after the ioctl() returns. And then, + * only if the ioctl() result indicates no error. We send up the + * SUCCESS status before calling SetPageStore() store because failing + * to copy up the result code means unwinding the SetPageStore(). + * + * It turns out the logic to unwind a SetPageStore() opens a can of + * worms. For example, if a host had created the queue_pair and a + * guest attaches and SetPageStore() is successful but writing success + * fails, then ... the host has to be stopped from writing (anymore) + * data into the queue_pair. That means an additional test in the + * VMCI_Enqueue() code path. Ugh. + */ + + if (put_user(VMCI_SUCCESS, &info->result)) { + /* + * In this case, we can't write a result field of the + * caller's info block. So, we don't even try to + * SetPageStore(). + */ + return -EFAULT; + } + + result = vmci_qp_broker_set_page_store(page_file_info.handle, + page_file_info.produce_va, + page_file_info.consume_va, + vmci_host_dev->context); + if (result < VMCI_SUCCESS) { + if (put_user(result, &info->result)) { + /* + * Note that in this case the SetPageStore() + * call failed but we were unable to + * communicate that to the caller (because the + * copy_to_user() call failed). So, if we + * simply return an error (in this case + * -EFAULT) then the caller will know that the + * SetPageStore failed even though we couldn't + * put the result code in the result field and + * indicate exactly why it failed. + * + * That says nothing about the issue where we + * were once able to write to the caller's info + * memory and now can't. Something more + * serious is probably going on than the fact + * that SetPageStore() didn't work. + */ + return -EFAULT; + } + } + + return 0; +} + +static int vmci_host_do_qp_detach(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_qp_dtch_info detach_info; + struct vmci_qp_dtch_info __user *info = uptr; + s32 result; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&detach_info, uptr, sizeof(detach_info))) + return -EFAULT; + + result = vmci_qp_broker_detach(detach_info.handle, + vmci_host_dev->context); + if (result == VMCI_SUCCESS && + vmci_host_dev->user_version < VMCI_VERSION_NOVMVM) { + result = VMCI_SUCCESS_LAST_DETACH; + } + + return put_user(result, &info->result) ? -EFAULT : 0; +} + +static int vmci_host_do_ctx_add_notify(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_ctx_info ar_info; + struct vmci_ctx_info __user *info = uptr; + s32 result; + u32 cid; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&ar_info, uptr, sizeof(ar_info))) + return -EFAULT; + + cid = vmci_ctx_get_id(vmci_host_dev->context); + result = vmci_ctx_add_notification(cid, ar_info.remote_cid); + + return put_user(result, &info->result) ? -EFAULT : 0; +} + +static int vmci_host_do_ctx_remove_notify(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_ctx_info ar_info; + struct vmci_ctx_info __user *info = uptr; + u32 cid; + int result; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&ar_info, uptr, sizeof(ar_info))) + return -EFAULT; + + cid = vmci_ctx_get_id(vmci_host_dev->context); + result = vmci_ctx_remove_notification(cid, + ar_info.remote_cid); + + return put_user(result, &info->result) ? -EFAULT : 0; +} + +static int vmci_host_do_ctx_get_cpt_state(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_ctx_chkpt_buf_info get_info; + u32 cid; + void *cpt_buf; + int retval; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&get_info, uptr, sizeof(get_info))) + return -EFAULT; + + cid = vmci_ctx_get_id(vmci_host_dev->context); + get_info.result = vmci_ctx_get_chkpt_state(cid, get_info.cpt_type, + &get_info.buf_size, &cpt_buf); + if (get_info.result == VMCI_SUCCESS && get_info.buf_size) { + void __user *ubuf = (void __user *)(uintptr_t)get_info.cpt_buf; + retval = copy_to_user(ubuf, cpt_buf, get_info.buf_size); + kfree(cpt_buf); + + if (retval) + return -EFAULT; + } + + return copy_to_user(uptr, &get_info, sizeof(get_info)) ? -EFAULT : 0; +} + +static int vmci_host_do_ctx_set_cpt_state(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_ctx_chkpt_buf_info set_info; + u32 cid; + void *cpt_buf; + int retval; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&set_info, uptr, sizeof(set_info))) + return -EFAULT; + + cpt_buf = kmalloc(set_info.buf_size, GFP_KERNEL); + if (!cpt_buf) { + vmci_ioctl_err( + "cannot allocate memory to set cpt state (type=%d)\n", + set_info.cpt_type); + return -ENOMEM; + } + + if (copy_from_user(cpt_buf, (void __user *)(uintptr_t)set_info.cpt_buf, + set_info.buf_size)) { + retval = -EFAULT; + goto out; + } + + cid = vmci_ctx_get_id(vmci_host_dev->context); + set_info.result = vmci_ctx_set_chkpt_state(cid, set_info.cpt_type, + set_info.buf_size, cpt_buf); + + retval = copy_to_user(uptr, &set_info, sizeof(set_info)) ? -EFAULT : 0; + +out: + kfree(cpt_buf); + return retval; +} + +static int vmci_host_do_get_context_id(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + u32 __user *u32ptr = uptr; + + return put_user(VMCI_HOST_CONTEXT_ID, u32ptr) ? -EFAULT : 0; +} + +static int vmci_host_do_set_notify(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_set_notify_info notify_info; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(¬ify_info, uptr, sizeof(notify_info))) + return -EFAULT; + + if (notify_info.notify_uva) { + notify_info.result = + vmci_host_setup_notify(vmci_host_dev->context, + notify_info.notify_uva); + } else { + vmci_ctx_unset_notify(vmci_host_dev->context); + notify_info.result = VMCI_SUCCESS; + } + + return copy_to_user(uptr, ¬ify_info, sizeof(notify_info)) ? + -EFAULT : 0; +} + +static int vmci_host_do_notify_resource(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_dbell_notify_resource_info info; + u32 cid; + + if (vmci_host_dev->user_version < VMCI_VERSION_NOTIFY) { + vmci_ioctl_err("invalid for current VMX versions\n"); + return -EINVAL; + } + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (copy_from_user(&info, uptr, sizeof(info))) + return -EFAULT; + + cid = vmci_ctx_get_id(vmci_host_dev->context); + + switch (info.action) { + case VMCI_NOTIFY_RESOURCE_ACTION_NOTIFY: + if (info.resource == VMCI_NOTIFY_RESOURCE_DOOR_BELL) { + u32 flags = VMCI_NO_PRIVILEGE_FLAGS; + info.result = vmci_ctx_notify_dbell(cid, info.handle, + flags); + } else { + info.result = VMCI_ERROR_UNAVAILABLE; + } + break; + + case VMCI_NOTIFY_RESOURCE_ACTION_CREATE: + info.result = vmci_ctx_dbell_create(cid, info.handle); + break; + + case VMCI_NOTIFY_RESOURCE_ACTION_DESTROY: + info.result = vmci_ctx_dbell_destroy(cid, info.handle); + break; + + default: + vmci_ioctl_err("got unknown action (action=%d)\n", + info.action); + info.result = VMCI_ERROR_INVALID_ARGS; + } + + return copy_to_user(uptr, &info, sizeof(info)) ? -EFAULT : 0; +} + +static int vmci_host_do_recv_notifications(struct vmci_host_dev *vmci_host_dev, + const char *ioctl_name, + void __user *uptr) +{ + struct vmci_ctx_notify_recv_info info; + struct vmci_handle_arr *db_handle_array; + struct vmci_handle_arr *qp_handle_array; + void __user *ubuf; + u32 cid; + int retval = 0; + + if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) { + vmci_ioctl_err("only valid for contexts\n"); + return -EINVAL; + } + + if (vmci_host_dev->user_version < VMCI_VERSION_NOTIFY) { + vmci_ioctl_err("not supported for the current vmx version\n"); + return -EINVAL; + } + + if (copy_from_user(&info, uptr, sizeof(info))) + return -EFAULT; + + if ((info.db_handle_buf_size && !info.db_handle_buf_uva) || + (info.qp_handle_buf_size && !info.qp_handle_buf_uva)) { + return -EINVAL; + } + + cid = vmci_ctx_get_id(vmci_host_dev->context); + + info.result = vmci_ctx_rcv_notifications_get(cid, + &db_handle_array, &qp_handle_array); + if (info.result != VMCI_SUCCESS) + return copy_to_user(uptr, &info, sizeof(info)) ? -EFAULT : 0; + + ubuf = (void __user *)(uintptr_t)info.db_handle_buf_uva; + info.result = drv_cp_harray_to_user(ubuf, &info.db_handle_buf_size, + db_handle_array, &retval); + if (info.result == VMCI_SUCCESS && !retval) { + ubuf = (void __user *)(uintptr_t)info.qp_handle_buf_uva; + info.result = drv_cp_harray_to_user(ubuf, + &info.qp_handle_buf_size, + qp_handle_array, &retval); + } + + if (!retval && copy_to_user(uptr, &info, sizeof(info))) + retval = -EFAULT; + + vmci_ctx_rcv_notifications_release(cid, + db_handle_array, qp_handle_array, + info.result == VMCI_SUCCESS && !retval); + + return retval; +} + +static long vmci_host_unlocked_ioctl(struct file *filp, + unsigned int iocmd, unsigned long ioarg) +{ +#define VMCI_DO_IOCTL(ioctl_name, ioctl_fn) do { \ + char *name = __stringify(IOCTL_VMCI_ ## ioctl_name); \ + return vmci_host_do_ ## ioctl_fn( \ + vmci_host_dev, name, uptr); \ + } while (0) + + struct vmci_host_dev *vmci_host_dev = filp->private_data; + void __user *uptr = (void __user *)ioarg; + + switch (iocmd) { + case IOCTL_VMCI_INIT_CONTEXT: + VMCI_DO_IOCTL(INIT_CONTEXT, init_context); + case IOCTL_VMCI_DATAGRAM_SEND: + VMCI_DO_IOCTL(DATAGRAM_SEND, send_datagram); + case IOCTL_VMCI_DATAGRAM_RECEIVE: + VMCI_DO_IOCTL(DATAGRAM_RECEIVE, receive_datagram); + case IOCTL_VMCI_QUEUEPAIR_ALLOC: + VMCI_DO_IOCTL(QUEUEPAIR_ALLOC, alloc_queuepair); + case IOCTL_VMCI_QUEUEPAIR_SETVA: + VMCI_DO_IOCTL(QUEUEPAIR_SETVA, queuepair_setva); + case IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE: + VMCI_DO_IOCTL(QUEUEPAIR_SETPAGEFILE, queuepair_setpf); + case IOCTL_VMCI_QUEUEPAIR_DETACH: + VMCI_DO_IOCTL(QUEUEPAIR_DETACH, qp_detach); + case IOCTL_VMCI_CTX_ADD_NOTIFICATION: + VMCI_DO_IOCTL(CTX_ADD_NOTIFICATION, ctx_add_notify); + case IOCTL_VMCI_CTX_REMOVE_NOTIFICATION: + VMCI_DO_IOCTL(CTX_REMOVE_NOTIFICATION, ctx_remove_notify); + case IOCTL_VMCI_CTX_GET_CPT_STATE: + VMCI_DO_IOCTL(CTX_GET_CPT_STATE, ctx_get_cpt_state); + case IOCTL_VMCI_CTX_SET_CPT_STATE: + VMCI_DO_IOCTL(CTX_SET_CPT_STATE, ctx_set_cpt_state); + case IOCTL_VMCI_GET_CONTEXT_ID: + VMCI_DO_IOCTL(GET_CONTEXT_ID, get_context_id); + case IOCTL_VMCI_SET_NOTIFY: + VMCI_DO_IOCTL(SET_NOTIFY, set_notify); + case IOCTL_VMCI_NOTIFY_RESOURCE: + VMCI_DO_IOCTL(NOTIFY_RESOURCE, notify_resource); + case IOCTL_VMCI_NOTIFICATIONS_RECEIVE: + VMCI_DO_IOCTL(NOTIFICATIONS_RECEIVE, recv_notifications); + + case IOCTL_VMCI_VERSION: + case IOCTL_VMCI_VERSION2: + return vmci_host_get_version(vmci_host_dev, iocmd, uptr); + + default: + pr_devel("%s: Unknown ioctl (iocmd=%d)\n", __func__, iocmd); + return -EINVAL; + } + +#undef VMCI_DO_IOCTL +} + +static const struct file_operations vmuser_fops = { + .owner = THIS_MODULE, + .open = vmci_host_open, + .release = vmci_host_close, + .poll = vmci_host_poll, + .unlocked_ioctl = vmci_host_unlocked_ioctl, + .compat_ioctl = vmci_host_unlocked_ioctl, +}; + +static struct miscdevice vmci_host_miscdev = { + .name = "vmci", + .minor = MISC_DYNAMIC_MINOR, + .fops = &vmuser_fops, +}; + +int __init vmci_host_init(void) +{ + int error; + + host_context = vmci_ctx_create(VMCI_HOST_CONTEXT_ID, + VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS, + -1, VMCI_VERSION, NULL); + if (IS_ERR(host_context)) { + error = PTR_ERR(host_context); + pr_warn("Failed to initialize VMCIContext (error%d)\n", + error); + return error; + } + + error = misc_register(&vmci_host_miscdev); + if (error) { + pr_warn("Module registration error (name=%s, major=%d, minor=%d, err=%d)\n", + vmci_host_miscdev.name, + MISC_MAJOR, vmci_host_miscdev.minor, + error); + pr_warn("Unable to initialize host personality\n"); + vmci_ctx_destroy(host_context); + return error; + } + + pr_info("VMCI host device registered (name=%s, major=%d, minor=%d)\n", + vmci_host_miscdev.name, MISC_MAJOR, vmci_host_miscdev.minor); + + vmci_host_device_initialized = true; + return 0; +} + +void __exit vmci_host_exit(void) +{ + vmci_host_device_initialized = false; + + misc_deregister(&vmci_host_miscdev); + vmci_ctx_destroy(host_context); + vmci_qp_broker_exit(); + + pr_debug("VMCI host driver module unloaded\n"); +} diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c new file mode 100644 index 000000000..db433d285 --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -0,0 +1,3272 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vmci_handle_array.h" +#include "vmci_queue_pair.h" +#include "vmci_datagram.h" +#include "vmci_resource.h" +#include "vmci_context.h" +#include "vmci_driver.h" +#include "vmci_event.h" +#include "vmci_route.h" + +/* + * In the following, we will distinguish between two kinds of VMX processes - + * the ones with versions lower than VMCI_VERSION_NOVMVM that use specialized + * VMCI page files in the VMX and supporting VM to VM communication and the + * newer ones that use the guest memory directly. We will in the following + * refer to the older VMX versions as old-style VMX'en, and the newer ones as + * new-style VMX'en. + * + * The state transition datagram is as follows (the VMCIQPB_ prefix has been + * removed for readability) - see below for more details on the transtions: + * + * -------------- NEW ------------- + * | | + * \_/ \_/ + * CREATED_NO_MEM <-----------------> CREATED_MEM + * | | | + * | o-----------------------o | + * | | | + * \_/ \_/ \_/ + * ATTACHED_NO_MEM <----------------> ATTACHED_MEM + * | | | + * | o----------------------o | + * | | | + * \_/ \_/ \_/ + * SHUTDOWN_NO_MEM <----------------> SHUTDOWN_MEM + * | | + * | | + * -------------> gone <------------- + * + * In more detail. When a VMCI queue pair is first created, it will be in the + * VMCIQPB_NEW state. It will then move into one of the following states: + * + * - VMCIQPB_CREATED_NO_MEM: this state indicates that either: + * + * - the created was performed by a host endpoint, in which case there is + * no backing memory yet. + * + * - the create was initiated by an old-style VMX, that uses + * vmci_qp_broker_set_page_store to specify the UVAs of the queue pair at + * a later point in time. This state can be distinguished from the one + * above by the context ID of the creator. A host side is not allowed to + * attach until the page store has been set. + * + * - VMCIQPB_CREATED_MEM: this state is the result when the queue pair + * is created by a VMX using the queue pair device backend that + * sets the UVAs of the queue pair immediately and stores the + * information for later attachers. At this point, it is ready for + * the host side to attach to it. + * + * Once the queue pair is in one of the created states (with the exception of + * the case mentioned for older VMX'en above), it is possible to attach to the + * queue pair. Again we have two new states possible: + * + * - VMCIQPB_ATTACHED_MEM: this state can be reached through the following + * paths: + * + * - from VMCIQPB_CREATED_NO_MEM when a new-style VMX allocates a queue + * pair, and attaches to a queue pair previously created by the host side. + * + * - from VMCIQPB_CREATED_MEM when the host side attaches to a queue pair + * already created by a guest. + * + * - from VMCIQPB_ATTACHED_NO_MEM, when an old-style VMX calls + * vmci_qp_broker_set_page_store (see below). + * + * - VMCIQPB_ATTACHED_NO_MEM: If the queue pair already was in the + * VMCIQPB_CREATED_NO_MEM due to a host side create, an old-style VMX will + * bring the queue pair into this state. Once vmci_qp_broker_set_page_store + * is called to register the user memory, the VMCIQPB_ATTACH_MEM state + * will be entered. + * + * From the attached queue pair, the queue pair can enter the shutdown states + * when either side of the queue pair detaches. If the guest side detaches + * first, the queue pair will enter the VMCIQPB_SHUTDOWN_NO_MEM state, where + * the content of the queue pair will no longer be available. If the host + * side detaches first, the queue pair will either enter the + * VMCIQPB_SHUTDOWN_MEM, if the guest memory is currently mapped, or + * VMCIQPB_SHUTDOWN_NO_MEM, if the guest memory is not mapped + * (e.g., the host detaches while a guest is stunned). + * + * New-style VMX'en will also unmap guest memory, if the guest is + * quiesced, e.g., during a snapshot operation. In that case, the guest + * memory will no longer be available, and the queue pair will transition from + * *_MEM state to a *_NO_MEM state. The VMX may later map the memory once more, + * in which case the queue pair will transition from the *_NO_MEM state at that + * point back to the *_MEM state. Note that the *_NO_MEM state may have changed, + * since the peer may have either attached or detached in the meantime. The + * values are laid out such that ++ on a state will move from a *_NO_MEM to a + * *_MEM state, and vice versa. + */ + +/* The Kernel specific component of the struct vmci_queue structure. */ +struct vmci_queue_kern_if { + struct mutex __mutex; /* Protects the queue. */ + struct mutex *mutex; /* Shared by producer and consumer queues. */ + size_t num_pages; /* Number of pages incl. header. */ + bool host; /* Host or guest? */ + union { + struct { + dma_addr_t *pas; + void **vas; + } g; /* Used by the guest. */ + struct { + struct page **page; + struct page **header_page; + } h; /* Used by the host. */ + } u; +}; + +/* + * This structure is opaque to the clients. + */ +struct vmci_qp { + struct vmci_handle handle; + struct vmci_queue *produce_q; + struct vmci_queue *consume_q; + u64 produce_q_size; + u64 consume_q_size; + u32 peer; + u32 flags; + u32 priv_flags; + bool guest_endpoint; + unsigned int blocked; + unsigned int generation; + wait_queue_head_t event; +}; + +enum qp_broker_state { + VMCIQPB_NEW, + VMCIQPB_CREATED_NO_MEM, + VMCIQPB_CREATED_MEM, + VMCIQPB_ATTACHED_NO_MEM, + VMCIQPB_ATTACHED_MEM, + VMCIQPB_SHUTDOWN_NO_MEM, + VMCIQPB_SHUTDOWN_MEM, + VMCIQPB_GONE +}; + +#define QPBROKERSTATE_HAS_MEM(_qpb) (_qpb->state == VMCIQPB_CREATED_MEM || \ + _qpb->state == VMCIQPB_ATTACHED_MEM || \ + _qpb->state == VMCIQPB_SHUTDOWN_MEM) + +/* + * In the queue pair broker, we always use the guest point of view for + * the produce and consume queue values and references, e.g., the + * produce queue size stored is the guests produce queue size. The + * host endpoint will need to swap these around. The only exception is + * the local queue pairs on the host, in which case the host endpoint + * that creates the queue pair will have the right orientation, and + * the attaching host endpoint will need to swap. + */ +struct qp_entry { + struct list_head list_item; + struct vmci_handle handle; + u32 peer; + u32 flags; + u64 produce_size; + u64 consume_size; + u32 ref_count; +}; + +struct qp_broker_entry { + struct vmci_resource resource; + struct qp_entry qp; + u32 create_id; + u32 attach_id; + enum qp_broker_state state; + bool require_trusted_attach; + bool created_by_trusted; + bool vmci_page_files; /* Created by VMX using VMCI page files */ + struct vmci_queue *produce_q; + struct vmci_queue *consume_q; + struct vmci_queue_header saved_produce_q; + struct vmci_queue_header saved_consume_q; + vmci_event_release_cb wakeup_cb; + void *client_data; + void *local_mem; /* Kernel memory for local queue pair */ +}; + +struct qp_guest_endpoint { + struct vmci_resource resource; + struct qp_entry qp; + u64 num_ppns; + void *produce_q; + void *consume_q; + struct ppn_set ppn_set; +}; + +struct qp_list { + struct list_head head; + struct mutex mutex; /* Protect queue list. */ +}; + +static struct qp_list qp_broker_list = { + .head = LIST_HEAD_INIT(qp_broker_list.head), + .mutex = __MUTEX_INITIALIZER(qp_broker_list.mutex), +}; + +static struct qp_list qp_guest_endpoints = { + .head = LIST_HEAD_INIT(qp_guest_endpoints.head), + .mutex = __MUTEX_INITIALIZER(qp_guest_endpoints.mutex), +}; + +#define INVALID_VMCI_GUEST_MEM_ID 0 +#define QPE_NUM_PAGES(_QPE) ((u32) \ + (DIV_ROUND_UP(_QPE.produce_size, PAGE_SIZE) + \ + DIV_ROUND_UP(_QPE.consume_size, PAGE_SIZE) + 2)) + + +/* + * Frees kernel VA space for a given queue and its queue header, and + * frees physical data pages. + */ +static void qp_free_queue(void *q, u64 size) +{ + struct vmci_queue *queue = q; + + if (queue) { + u64 i; + + /* Given size does not include header, so add in a page here. */ + for (i = 0; i < DIV_ROUND_UP(size, PAGE_SIZE) + 1; i++) { + dma_free_coherent(&vmci_pdev->dev, PAGE_SIZE, + queue->kernel_if->u.g.vas[i], + queue->kernel_if->u.g.pas[i]); + } + + vfree(queue); + } +} + +/* + * Allocates kernel queue pages of specified size with IOMMU mappings, + * plus space for the queue structure/kernel interface and the queue + * header. + */ +static void *qp_alloc_queue(u64 size, u32 flags) +{ + u64 i; + struct vmci_queue *queue; + size_t pas_size; + size_t vas_size; + size_t queue_size = sizeof(*queue) + sizeof(*queue->kernel_if); + u64 num_pages; + + if (size > SIZE_MAX - PAGE_SIZE) + return NULL; + num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1; + if (num_pages > + (SIZE_MAX - queue_size) / + (sizeof(*queue->kernel_if->u.g.pas) + + sizeof(*queue->kernel_if->u.g.vas))) + return NULL; + + pas_size = num_pages * sizeof(*queue->kernel_if->u.g.pas); + vas_size = num_pages * sizeof(*queue->kernel_if->u.g.vas); + queue_size += pas_size + vas_size; + + queue = vmalloc(queue_size); + if (!queue) + return NULL; + + queue->q_header = NULL; + queue->saved_header = NULL; + queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1); + queue->kernel_if->mutex = NULL; + queue->kernel_if->num_pages = num_pages; + queue->kernel_if->u.g.pas = (dma_addr_t *)(queue->kernel_if + 1); + queue->kernel_if->u.g.vas = + (void **)((u8 *)queue->kernel_if->u.g.pas + pas_size); + queue->kernel_if->host = false; + + for (i = 0; i < num_pages; i++) { + queue->kernel_if->u.g.vas[i] = + dma_alloc_coherent(&vmci_pdev->dev, PAGE_SIZE, + &queue->kernel_if->u.g.pas[i], + GFP_KERNEL); + if (!queue->kernel_if->u.g.vas[i]) { + /* Size excl. the header. */ + qp_free_queue(queue, i * PAGE_SIZE); + return NULL; + } + } + + /* Queue header is the first page. */ + queue->q_header = queue->kernel_if->u.g.vas[0]; + + return queue; +} + +/* + * Copies from a given buffer or iovector to a VMCI Queue. Uses + * kmap()/kunmap() to dynamically map/unmap required portions of the queue + * by traversing the offset -> page translation structure for the queue. + * Assumes that offset + size does not wrap around in the queue. + */ +static int qp_memcpy_to_queue_iter(struct vmci_queue *queue, + u64 queue_offset, + struct iov_iter *from, + size_t size) +{ + struct vmci_queue_kern_if *kernel_if = queue->kernel_if; + size_t bytes_copied = 0; + + while (bytes_copied < size) { + const u64 page_index = + (queue_offset + bytes_copied) / PAGE_SIZE; + const size_t page_offset = + (queue_offset + bytes_copied) & (PAGE_SIZE - 1); + void *va; + size_t to_copy; + + if (kernel_if->host) + va = kmap(kernel_if->u.h.page[page_index]); + else + va = kernel_if->u.g.vas[page_index + 1]; + /* Skip header. */ + + if (size - bytes_copied > PAGE_SIZE - page_offset) + /* Enough payload to fill up from this page. */ + to_copy = PAGE_SIZE - page_offset; + else + to_copy = size - bytes_copied; + + if (!copy_from_iter_full((u8 *)va + page_offset, to_copy, + from)) { + if (kernel_if->host) + kunmap(kernel_if->u.h.page[page_index]); + return VMCI_ERROR_INVALID_ARGS; + } + bytes_copied += to_copy; + if (kernel_if->host) + kunmap(kernel_if->u.h.page[page_index]); + } + + return VMCI_SUCCESS; +} + +/* + * Copies to a given buffer or iovector from a VMCI Queue. Uses + * kmap()/kunmap() to dynamically map/unmap required portions of the queue + * by traversing the offset -> page translation structure for the queue. + * Assumes that offset + size does not wrap around in the queue. + */ +static int qp_memcpy_from_queue_iter(struct iov_iter *to, + const struct vmci_queue *queue, + u64 queue_offset, size_t size) +{ + struct vmci_queue_kern_if *kernel_if = queue->kernel_if; + size_t bytes_copied = 0; + + while (bytes_copied < size) { + const u64 page_index = + (queue_offset + bytes_copied) / PAGE_SIZE; + const size_t page_offset = + (queue_offset + bytes_copied) & (PAGE_SIZE - 1); + void *va; + size_t to_copy; + int err; + + if (kernel_if->host) + va = kmap(kernel_if->u.h.page[page_index]); + else + va = kernel_if->u.g.vas[page_index + 1]; + /* Skip header. */ + + if (size - bytes_copied > PAGE_SIZE - page_offset) + /* Enough payload to fill up this page. */ + to_copy = PAGE_SIZE - page_offset; + else + to_copy = size - bytes_copied; + + err = copy_to_iter((u8 *)va + page_offset, to_copy, to); + if (err != to_copy) { + if (kernel_if->host) + kunmap(kernel_if->u.h.page[page_index]); + return VMCI_ERROR_INVALID_ARGS; + } + bytes_copied += to_copy; + if (kernel_if->host) + kunmap(kernel_if->u.h.page[page_index]); + } + + return VMCI_SUCCESS; +} + +/* + * Allocates two list of PPNs --- one for the pages in the produce queue, + * and the other for the pages in the consume queue. Intializes the list + * of PPNs with the page frame numbers of the KVA for the two queues (and + * the queue headers). + */ +static int qp_alloc_ppn_set(void *prod_q, + u64 num_produce_pages, + void *cons_q, + u64 num_consume_pages, struct ppn_set *ppn_set) +{ + u32 *produce_ppns; + u32 *consume_ppns; + struct vmci_queue *produce_q = prod_q; + struct vmci_queue *consume_q = cons_q; + u64 i; + + if (!produce_q || !num_produce_pages || !consume_q || + !num_consume_pages || !ppn_set) + return VMCI_ERROR_INVALID_ARGS; + + if (ppn_set->initialized) + return VMCI_ERROR_ALREADY_EXISTS; + + produce_ppns = + kmalloc_array(num_produce_pages, sizeof(*produce_ppns), + GFP_KERNEL); + if (!produce_ppns) + return VMCI_ERROR_NO_MEM; + + consume_ppns = + kmalloc_array(num_consume_pages, sizeof(*consume_ppns), + GFP_KERNEL); + if (!consume_ppns) { + kfree(produce_ppns); + return VMCI_ERROR_NO_MEM; + } + + for (i = 0; i < num_produce_pages; i++) { + unsigned long pfn; + + produce_ppns[i] = + produce_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT; + pfn = produce_ppns[i]; + + /* Fail allocation if PFN isn't supported by hypervisor. */ + if (sizeof(pfn) > sizeof(*produce_ppns) + && pfn != produce_ppns[i]) + goto ppn_error; + } + + for (i = 0; i < num_consume_pages; i++) { + unsigned long pfn; + + consume_ppns[i] = + consume_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT; + pfn = consume_ppns[i]; + + /* Fail allocation if PFN isn't supported by hypervisor. */ + if (sizeof(pfn) > sizeof(*consume_ppns) + && pfn != consume_ppns[i]) + goto ppn_error; + } + + ppn_set->num_produce_pages = num_produce_pages; + ppn_set->num_consume_pages = num_consume_pages; + ppn_set->produce_ppns = produce_ppns; + ppn_set->consume_ppns = consume_ppns; + ppn_set->initialized = true; + return VMCI_SUCCESS; + + ppn_error: + kfree(produce_ppns); + kfree(consume_ppns); + return VMCI_ERROR_INVALID_ARGS; +} + +/* + * Frees the two list of PPNs for a queue pair. + */ +static void qp_free_ppn_set(struct ppn_set *ppn_set) +{ + if (ppn_set->initialized) { + /* Do not call these functions on NULL inputs. */ + kfree(ppn_set->produce_ppns); + kfree(ppn_set->consume_ppns); + } + memset(ppn_set, 0, sizeof(*ppn_set)); +} + +/* + * Populates the list of PPNs in the hypercall structure with the PPNS + * of the produce queue and the consume queue. + */ +static int qp_populate_ppn_set(u8 *call_buf, const struct ppn_set *ppn_set) +{ + memcpy(call_buf, ppn_set->produce_ppns, + ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns)); + memcpy(call_buf + + ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns), + ppn_set->consume_ppns, + ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns)); + + return VMCI_SUCCESS; +} + +/* + * Allocates kernel VA space of specified size plus space for the queue + * and kernel interface. This is different from the guest queue allocator, + * because we do not allocate our own queue header/data pages here but + * share those of the guest. + */ +static struct vmci_queue *qp_host_alloc_queue(u64 size) +{ + struct vmci_queue *queue; + size_t queue_page_size; + u64 num_pages; + const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if)); + + if (size > SIZE_MAX - PAGE_SIZE) + return NULL; + num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1; + if (num_pages > (SIZE_MAX - queue_size) / + sizeof(*queue->kernel_if->u.h.page)) + return NULL; + + queue_page_size = num_pages * sizeof(*queue->kernel_if->u.h.page); + + if (queue_size + queue_page_size > KMALLOC_MAX_SIZE) + return NULL; + + queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL); + if (queue) { + queue->q_header = NULL; + queue->saved_header = NULL; + queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1); + queue->kernel_if->host = true; + queue->kernel_if->mutex = NULL; + queue->kernel_if->num_pages = num_pages; + queue->kernel_if->u.h.header_page = + (struct page **)((u8 *)queue + queue_size); + queue->kernel_if->u.h.page = + &queue->kernel_if->u.h.header_page[1]; + } + + return queue; +} + +/* + * Frees kernel memory for a given queue (header plus translation + * structure). + */ +static void qp_host_free_queue(struct vmci_queue *queue, u64 queue_size) +{ + kfree(queue); +} + +/* + * Initialize the mutex for the pair of queues. This mutex is used to + * protect the q_header and the buffer from changing out from under any + * users of either queue. Of course, it's only any good if the mutexes + * are actually acquired. Queue structure must lie on non-paged memory + * or we cannot guarantee access to the mutex. + */ +static void qp_init_queue_mutex(struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + /* + * Only the host queue has shared state - the guest queues do not + * need to synchronize access using a queue mutex. + */ + + if (produce_q->kernel_if->host) { + produce_q->kernel_if->mutex = &produce_q->kernel_if->__mutex; + consume_q->kernel_if->mutex = &produce_q->kernel_if->__mutex; + mutex_init(produce_q->kernel_if->mutex); + } +} + +/* + * Cleans up the mutex for the pair of queues. + */ +static void qp_cleanup_queue_mutex(struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + if (produce_q->kernel_if->host) { + produce_q->kernel_if->mutex = NULL; + consume_q->kernel_if->mutex = NULL; + } +} + +/* + * Acquire the mutex for the queue. Note that the produce_q and + * the consume_q share a mutex. So, only one of the two need to + * be passed in to this routine. Either will work just fine. + */ +static void qp_acquire_queue_mutex(struct vmci_queue *queue) +{ + if (queue->kernel_if->host) + mutex_lock(queue->kernel_if->mutex); +} + +/* + * Release the mutex for the queue. Note that the produce_q and + * the consume_q share a mutex. So, only one of the two need to + * be passed in to this routine. Either will work just fine. + */ +static void qp_release_queue_mutex(struct vmci_queue *queue) +{ + if (queue->kernel_if->host) + mutex_unlock(queue->kernel_if->mutex); +} + +/* + * Helper function to release pages in the PageStoreAttachInfo + * previously obtained using get_user_pages. + */ +static void qp_release_pages(struct page **pages, + u64 num_pages, bool dirty) +{ + int i; + + for (i = 0; i < num_pages; i++) { + if (dirty) + set_page_dirty_lock(pages[i]); + + put_page(pages[i]); + pages[i] = NULL; + } +} + +/* + * Lock the user pages referenced by the {produce,consume}Buffer + * struct into memory and populate the {produce,consume}Pages + * arrays in the attach structure with them. + */ +static int qp_host_get_user_memory(u64 produce_uva, + u64 consume_uva, + struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + int retval; + int err = VMCI_SUCCESS; + + retval = get_user_pages_fast((uintptr_t) produce_uva, + produce_q->kernel_if->num_pages, 1, + produce_q->kernel_if->u.h.header_page); + if (retval < (int)produce_q->kernel_if->num_pages) { + pr_debug("get_user_pages_fast(produce) failed (retval=%d)", + retval); + if (retval > 0) + qp_release_pages(produce_q->kernel_if->u.h.header_page, + retval, false); + err = VMCI_ERROR_NO_MEM; + goto out; + } + + retval = get_user_pages_fast((uintptr_t) consume_uva, + consume_q->kernel_if->num_pages, 1, + consume_q->kernel_if->u.h.header_page); + if (retval < (int)consume_q->kernel_if->num_pages) { + pr_debug("get_user_pages_fast(consume) failed (retval=%d)", + retval); + if (retval > 0) + qp_release_pages(consume_q->kernel_if->u.h.header_page, + retval, false); + qp_release_pages(produce_q->kernel_if->u.h.header_page, + produce_q->kernel_if->num_pages, false); + err = VMCI_ERROR_NO_MEM; + } + + out: + return err; +} + +/* + * Registers the specification of the user pages used for backing a queue + * pair. Enough information to map in pages is stored in the OS specific + * part of the struct vmci_queue structure. + */ +static int qp_host_register_user_memory(struct vmci_qp_page_store *page_store, + struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + u64 produce_uva; + u64 consume_uva; + + /* + * The new style and the old style mapping only differs in + * that we either get a single or two UVAs, so we split the + * single UVA range at the appropriate spot. + */ + produce_uva = page_store->pages; + consume_uva = page_store->pages + + produce_q->kernel_if->num_pages * PAGE_SIZE; + return qp_host_get_user_memory(produce_uva, consume_uva, produce_q, + consume_q); +} + +/* + * Releases and removes the references to user pages stored in the attach + * struct. Pages are released from the page cache and may become + * swappable again. + */ +static void qp_host_unregister_user_memory(struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + qp_release_pages(produce_q->kernel_if->u.h.header_page, + produce_q->kernel_if->num_pages, true); + memset(produce_q->kernel_if->u.h.header_page, 0, + sizeof(*produce_q->kernel_if->u.h.header_page) * + produce_q->kernel_if->num_pages); + qp_release_pages(consume_q->kernel_if->u.h.header_page, + consume_q->kernel_if->num_pages, true); + memset(consume_q->kernel_if->u.h.header_page, 0, + sizeof(*consume_q->kernel_if->u.h.header_page) * + consume_q->kernel_if->num_pages); +} + +/* + * Once qp_host_register_user_memory has been performed on a + * queue, the queue pair headers can be mapped into the + * kernel. Once mapped, they must be unmapped with + * qp_host_unmap_queues prior to calling + * qp_host_unregister_user_memory. + * Pages are pinned. + */ +static int qp_host_map_queues(struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + int result; + + if (!produce_q->q_header || !consume_q->q_header) { + struct page *headers[2]; + + if (produce_q->q_header != consume_q->q_header) + return VMCI_ERROR_QUEUEPAIR_MISMATCH; + + if (produce_q->kernel_if->u.h.header_page == NULL || + *produce_q->kernel_if->u.h.header_page == NULL) + return VMCI_ERROR_UNAVAILABLE; + + headers[0] = *produce_q->kernel_if->u.h.header_page; + headers[1] = *consume_q->kernel_if->u.h.header_page; + + produce_q->q_header = vmap(headers, 2, VM_MAP, PAGE_KERNEL); + if (produce_q->q_header != NULL) { + consume_q->q_header = + (struct vmci_queue_header *)((u8 *) + produce_q->q_header + + PAGE_SIZE); + result = VMCI_SUCCESS; + } else { + pr_warn("vmap failed\n"); + result = VMCI_ERROR_NO_MEM; + } + } else { + result = VMCI_SUCCESS; + } + + return result; +} + +/* + * Unmaps previously mapped queue pair headers from the kernel. + * Pages are unpinned. + */ +static int qp_host_unmap_queues(u32 gid, + struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + if (produce_q->q_header) { + if (produce_q->q_header < consume_q->q_header) + vunmap(produce_q->q_header); + else + vunmap(consume_q->q_header); + + produce_q->q_header = NULL; + consume_q->q_header = NULL; + } + + return VMCI_SUCCESS; +} + +/* + * Finds the entry in the list corresponding to a given handle. Assumes + * that the list is locked. + */ +static struct qp_entry *qp_list_find(struct qp_list *qp_list, + struct vmci_handle handle) +{ + struct qp_entry *entry; + + if (vmci_handle_is_invalid(handle)) + return NULL; + + list_for_each_entry(entry, &qp_list->head, list_item) { + if (vmci_handle_is_equal(entry->handle, handle)) + return entry; + } + + return NULL; +} + +/* + * Finds the entry in the list corresponding to a given handle. + */ +static struct qp_guest_endpoint * +qp_guest_handle_to_entry(struct vmci_handle handle) +{ + struct qp_guest_endpoint *entry; + struct qp_entry *qp = qp_list_find(&qp_guest_endpoints, handle); + + entry = qp ? container_of( + qp, struct qp_guest_endpoint, qp) : NULL; + return entry; +} + +/* + * Finds the entry in the list corresponding to a given handle. + */ +static struct qp_broker_entry * +qp_broker_handle_to_entry(struct vmci_handle handle) +{ + struct qp_broker_entry *entry; + struct qp_entry *qp = qp_list_find(&qp_broker_list, handle); + + entry = qp ? container_of( + qp, struct qp_broker_entry, qp) : NULL; + return entry; +} + +/* + * Dispatches a queue pair event message directly into the local event + * queue. + */ +static int qp_notify_peer_local(bool attach, struct vmci_handle handle) +{ + u32 context_id = vmci_get_context_id(); + struct vmci_event_qp ev; + + ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER); + ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_CONTEXT_RESOURCE_ID); + ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr); + ev.msg.event_data.event = + attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH; + ev.payload.peer_id = context_id; + ev.payload.handle = handle; + + return vmci_event_dispatch(&ev.msg.hdr); +} + +/* + * Allocates and initializes a qp_guest_endpoint structure. + * Allocates a queue_pair rid (and handle) iff the given entry has + * an invalid handle. 0 through VMCI_RESERVED_RESOURCE_ID_MAX + * are reserved handles. Assumes that the QP list mutex is held + * by the caller. + */ +static struct qp_guest_endpoint * +qp_guest_endpoint_create(struct vmci_handle handle, + u32 peer, + u32 flags, + u64 produce_size, + u64 consume_size, + void *produce_q, + void *consume_q) +{ + int result; + struct qp_guest_endpoint *entry; + /* One page each for the queue headers. */ + const u64 num_ppns = DIV_ROUND_UP(produce_size, PAGE_SIZE) + + DIV_ROUND_UP(consume_size, PAGE_SIZE) + 2; + + if (vmci_handle_is_invalid(handle)) { + u32 context_id = vmci_get_context_id(); + + handle = vmci_make_handle(context_id, VMCI_INVALID_ID); + } + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (entry) { + entry->qp.peer = peer; + entry->qp.flags = flags; + entry->qp.produce_size = produce_size; + entry->qp.consume_size = consume_size; + entry->qp.ref_count = 0; + entry->num_ppns = num_ppns; + entry->produce_q = produce_q; + entry->consume_q = consume_q; + INIT_LIST_HEAD(&entry->qp.list_item); + + /* Add resource obj */ + result = vmci_resource_add(&entry->resource, + VMCI_RESOURCE_TYPE_QPAIR_GUEST, + handle); + entry->qp.handle = vmci_resource_handle(&entry->resource); + if ((result != VMCI_SUCCESS) || + qp_list_find(&qp_guest_endpoints, entry->qp.handle)) { + pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d", + handle.context, handle.resource, result); + kfree(entry); + entry = NULL; + } + } + return entry; +} + +/* + * Frees a qp_guest_endpoint structure. + */ +static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry) +{ + qp_free_ppn_set(&entry->ppn_set); + qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q); + qp_free_queue(entry->produce_q, entry->qp.produce_size); + qp_free_queue(entry->consume_q, entry->qp.consume_size); + /* Unlink from resource hash table and free callback */ + vmci_resource_remove(&entry->resource); + + kfree(entry); +} + +/* + * Helper to make a queue_pairAlloc hypercall when the driver is + * supporting a guest device. + */ +static int qp_alloc_hypercall(const struct qp_guest_endpoint *entry) +{ + struct vmci_qp_alloc_msg *alloc_msg; + size_t msg_size; + int result; + + if (!entry || entry->num_ppns <= 2) + return VMCI_ERROR_INVALID_ARGS; + + msg_size = sizeof(*alloc_msg) + + (size_t) entry->num_ppns * sizeof(u32); + alloc_msg = kmalloc(msg_size, GFP_KERNEL); + if (!alloc_msg) + return VMCI_ERROR_NO_MEM; + + alloc_msg->hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_QUEUEPAIR_ALLOC); + alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE; + alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE; + alloc_msg->handle = entry->qp.handle; + alloc_msg->peer = entry->qp.peer; + alloc_msg->flags = entry->qp.flags; + alloc_msg->produce_size = entry->qp.produce_size; + alloc_msg->consume_size = entry->qp.consume_size; + alloc_msg->num_ppns = entry->num_ppns; + + result = qp_populate_ppn_set((u8 *)alloc_msg + sizeof(*alloc_msg), + &entry->ppn_set); + if (result == VMCI_SUCCESS) + result = vmci_send_datagram(&alloc_msg->hdr); + + kfree(alloc_msg); + + return result; +} + +/* + * Helper to make a queue_pairDetach hypercall when the driver is + * supporting a guest device. + */ +static int qp_detatch_hypercall(struct vmci_handle handle) +{ + struct vmci_qp_detach_msg detach_msg; + + detach_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_QUEUEPAIR_DETACH); + detach_msg.hdr.src = VMCI_ANON_SRC_HANDLE; + detach_msg.hdr.payload_size = sizeof(handle); + detach_msg.handle = handle; + + return vmci_send_datagram(&detach_msg.hdr); +} + +/* + * Adds the given entry to the list. Assumes that the list is locked. + */ +static void qp_list_add_entry(struct qp_list *qp_list, struct qp_entry *entry) +{ + if (entry) + list_add(&entry->list_item, &qp_list->head); +} + +/* + * Removes the given entry from the list. Assumes that the list is locked. + */ +static void qp_list_remove_entry(struct qp_list *qp_list, + struct qp_entry *entry) +{ + if (entry) + list_del(&entry->list_item); +} + +/* + * Helper for VMCI queue_pair detach interface. Frees the physical + * pages for the queue pair. + */ +static int qp_detatch_guest_work(struct vmci_handle handle) +{ + int result; + struct qp_guest_endpoint *entry; + u32 ref_count = ~0; /* To avoid compiler warning below */ + + mutex_lock(&qp_guest_endpoints.mutex); + + entry = qp_guest_handle_to_entry(handle); + if (!entry) { + mutex_unlock(&qp_guest_endpoints.mutex); + return VMCI_ERROR_NOT_FOUND; + } + + if (entry->qp.flags & VMCI_QPFLAG_LOCAL) { + result = VMCI_SUCCESS; + + if (entry->qp.ref_count > 1) { + result = qp_notify_peer_local(false, handle); + /* + * We can fail to notify a local queuepair + * because we can't allocate. We still want + * to release the entry if that happens, so + * don't bail out yet. + */ + } + } else { + result = qp_detatch_hypercall(handle); + if (result < VMCI_SUCCESS) { + /* + * We failed to notify a non-local queuepair. + * That other queuepair might still be + * accessing the shared memory, so don't + * release the entry yet. It will get cleaned + * up by VMCIqueue_pair_Exit() if necessary + * (assuming we are going away, otherwise why + * did this fail?). + */ + + mutex_unlock(&qp_guest_endpoints.mutex); + return result; + } + } + + /* + * If we get here then we either failed to notify a local queuepair, or + * we succeeded in all cases. Release the entry if required. + */ + + entry->qp.ref_count--; + if (entry->qp.ref_count == 0) + qp_list_remove_entry(&qp_guest_endpoints, &entry->qp); + + /* If we didn't remove the entry, this could change once we unlock. */ + if (entry) + ref_count = entry->qp.ref_count; + + mutex_unlock(&qp_guest_endpoints.mutex); + + if (ref_count == 0) + qp_guest_endpoint_destroy(entry); + + return result; +} + +/* + * This functions handles the actual allocation of a VMCI queue + * pair guest endpoint. Allocates physical pages for the queue + * pair. It makes OS dependent calls through generic wrappers. + */ +static int qp_alloc_guest_work(struct vmci_handle *handle, + struct vmci_queue **produce_q, + u64 produce_size, + struct vmci_queue **consume_q, + u64 consume_size, + u32 peer, + u32 flags, + u32 priv_flags) +{ + const u64 num_produce_pages = + DIV_ROUND_UP(produce_size, PAGE_SIZE) + 1; + const u64 num_consume_pages = + DIV_ROUND_UP(consume_size, PAGE_SIZE) + 1; + void *my_produce_q = NULL; + void *my_consume_q = NULL; + int result; + struct qp_guest_endpoint *queue_pair_entry = NULL; + + if (priv_flags != VMCI_NO_PRIVILEGE_FLAGS) + return VMCI_ERROR_NO_ACCESS; + + mutex_lock(&qp_guest_endpoints.mutex); + + queue_pair_entry = qp_guest_handle_to_entry(*handle); + if (queue_pair_entry) { + if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) { + /* Local attach case. */ + if (queue_pair_entry->qp.ref_count > 1) { + pr_devel("Error attempting to attach more than once\n"); + result = VMCI_ERROR_UNAVAILABLE; + goto error_keep_entry; + } + + if (queue_pair_entry->qp.produce_size != consume_size || + queue_pair_entry->qp.consume_size != + produce_size || + queue_pair_entry->qp.flags != + (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) { + pr_devel("Error mismatched queue pair in local attach\n"); + result = VMCI_ERROR_QUEUEPAIR_MISMATCH; + goto error_keep_entry; + } + + /* + * Do a local attach. We swap the consume and + * produce queues for the attacher and deliver + * an attach event. + */ + result = qp_notify_peer_local(true, *handle); + if (result < VMCI_SUCCESS) + goto error_keep_entry; + + my_produce_q = queue_pair_entry->consume_q; + my_consume_q = queue_pair_entry->produce_q; + goto out; + } + + result = VMCI_ERROR_ALREADY_EXISTS; + goto error_keep_entry; + } + + my_produce_q = qp_alloc_queue(produce_size, flags); + if (!my_produce_q) { + pr_warn("Error allocating pages for produce queue\n"); + result = VMCI_ERROR_NO_MEM; + goto error; + } + + my_consume_q = qp_alloc_queue(consume_size, flags); + if (!my_consume_q) { + pr_warn("Error allocating pages for consume queue\n"); + result = VMCI_ERROR_NO_MEM; + goto error; + } + + queue_pair_entry = qp_guest_endpoint_create(*handle, peer, flags, + produce_size, consume_size, + my_produce_q, my_consume_q); + if (!queue_pair_entry) { + pr_warn("Error allocating memory in %s\n", __func__); + result = VMCI_ERROR_NO_MEM; + goto error; + } + + result = qp_alloc_ppn_set(my_produce_q, num_produce_pages, my_consume_q, + num_consume_pages, + &queue_pair_entry->ppn_set); + if (result < VMCI_SUCCESS) { + pr_warn("qp_alloc_ppn_set failed\n"); + goto error; + } + + /* + * It's only necessary to notify the host if this queue pair will be + * attached to from another context. + */ + if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) { + /* Local create case. */ + u32 context_id = vmci_get_context_id(); + + /* + * Enforce similar checks on local queue pairs as we + * do for regular ones. The handle's context must + * match the creator or attacher context id (here they + * are both the current context id) and the + * attach-only flag cannot exist during create. We + * also ensure specified peer is this context or an + * invalid one. + */ + if (queue_pair_entry->qp.handle.context != context_id || + (queue_pair_entry->qp.peer != VMCI_INVALID_ID && + queue_pair_entry->qp.peer != context_id)) { + result = VMCI_ERROR_NO_ACCESS; + goto error; + } + + if (queue_pair_entry->qp.flags & VMCI_QPFLAG_ATTACH_ONLY) { + result = VMCI_ERROR_NOT_FOUND; + goto error; + } + } else { + result = qp_alloc_hypercall(queue_pair_entry); + if (result < VMCI_SUCCESS) { + pr_warn("qp_alloc_hypercall result = %d\n", result); + goto error; + } + } + + qp_init_queue_mutex((struct vmci_queue *)my_produce_q, + (struct vmci_queue *)my_consume_q); + + qp_list_add_entry(&qp_guest_endpoints, &queue_pair_entry->qp); + + out: + queue_pair_entry->qp.ref_count++; + *handle = queue_pair_entry->qp.handle; + *produce_q = (struct vmci_queue *)my_produce_q; + *consume_q = (struct vmci_queue *)my_consume_q; + + /* + * We should initialize the queue pair header pages on a local + * queue pair create. For non-local queue pairs, the + * hypervisor initializes the header pages in the create step. + */ + if ((queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) && + queue_pair_entry->qp.ref_count == 1) { + vmci_q_header_init((*produce_q)->q_header, *handle); + vmci_q_header_init((*consume_q)->q_header, *handle); + } + + mutex_unlock(&qp_guest_endpoints.mutex); + + return VMCI_SUCCESS; + + error: + mutex_unlock(&qp_guest_endpoints.mutex); + if (queue_pair_entry) { + /* The queues will be freed inside the destroy routine. */ + qp_guest_endpoint_destroy(queue_pair_entry); + } else { + qp_free_queue(my_produce_q, produce_size); + qp_free_queue(my_consume_q, consume_size); + } + return result; + + error_keep_entry: + /* This path should only be used when an existing entry was found. */ + mutex_unlock(&qp_guest_endpoints.mutex); + return result; +} + +/* + * The first endpoint issuing a queue pair allocation will create the state + * of the queue pair in the queue pair broker. + * + * If the creator is a guest, it will associate a VMX virtual address range + * with the queue pair as specified by the page_store. For compatibility with + * older VMX'en, that would use a separate step to set the VMX virtual + * address range, the virtual address range can be registered later using + * vmci_qp_broker_set_page_store. In that case, a page_store of NULL should be + * used. + * + * If the creator is the host, a page_store of NULL should be used as well, + * since the host is not able to supply a page store for the queue pair. + * + * For older VMX and host callers, the queue pair will be created in the + * VMCIQPB_CREATED_NO_MEM state, and for current VMX callers, it will be + * created in VMCOQPB_CREATED_MEM state. + */ +static int qp_broker_create(struct vmci_handle handle, + u32 peer, + u32 flags, + u32 priv_flags, + u64 produce_size, + u64 consume_size, + struct vmci_qp_page_store *page_store, + struct vmci_ctx *context, + vmci_event_release_cb wakeup_cb, + void *client_data, struct qp_broker_entry **ent) +{ + struct qp_broker_entry *entry = NULL; + const u32 context_id = vmci_ctx_get_id(context); + bool is_local = flags & VMCI_QPFLAG_LOCAL; + int result; + u64 guest_produce_size; + u64 guest_consume_size; + + /* Do not create if the caller asked not to. */ + if (flags & VMCI_QPFLAG_ATTACH_ONLY) + return VMCI_ERROR_NOT_FOUND; + + /* + * Creator's context ID should match handle's context ID or the creator + * must allow the context in handle's context ID as the "peer". + */ + if (handle.context != context_id && handle.context != peer) + return VMCI_ERROR_NO_ACCESS; + + if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(peer)) + return VMCI_ERROR_DST_UNREACHABLE; + + /* + * Creator's context ID for local queue pairs should match the + * peer, if a peer is specified. + */ + if (is_local && peer != VMCI_INVALID_ID && context_id != peer) + return VMCI_ERROR_NO_ACCESS; + + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + return VMCI_ERROR_NO_MEM; + + if (vmci_ctx_get_id(context) == VMCI_HOST_CONTEXT_ID && !is_local) { + /* + * The queue pair broker entry stores values from the guest + * point of view, so a creating host side endpoint should swap + * produce and consume values -- unless it is a local queue + * pair, in which case no swapping is necessary, since the local + * attacher will swap queues. + */ + + guest_produce_size = consume_size; + guest_consume_size = produce_size; + } else { + guest_produce_size = produce_size; + guest_consume_size = consume_size; + } + + entry->qp.handle = handle; + entry->qp.peer = peer; + entry->qp.flags = flags; + entry->qp.produce_size = guest_produce_size; + entry->qp.consume_size = guest_consume_size; + entry->qp.ref_count = 1; + entry->create_id = context_id; + entry->attach_id = VMCI_INVALID_ID; + entry->state = VMCIQPB_NEW; + entry->require_trusted_attach = + !!(context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED); + entry->created_by_trusted = + !!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED); + entry->vmci_page_files = false; + entry->wakeup_cb = wakeup_cb; + entry->client_data = client_data; + entry->produce_q = qp_host_alloc_queue(guest_produce_size); + if (entry->produce_q == NULL) { + result = VMCI_ERROR_NO_MEM; + goto error; + } + entry->consume_q = qp_host_alloc_queue(guest_consume_size); + if (entry->consume_q == NULL) { + result = VMCI_ERROR_NO_MEM; + goto error; + } + + qp_init_queue_mutex(entry->produce_q, entry->consume_q); + + INIT_LIST_HEAD(&entry->qp.list_item); + + if (is_local) { + u8 *tmp; + + entry->local_mem = kcalloc(QPE_NUM_PAGES(entry->qp), + PAGE_SIZE, GFP_KERNEL); + if (entry->local_mem == NULL) { + result = VMCI_ERROR_NO_MEM; + goto error; + } + entry->state = VMCIQPB_CREATED_MEM; + entry->produce_q->q_header = entry->local_mem; + tmp = (u8 *)entry->local_mem + PAGE_SIZE * + (DIV_ROUND_UP(entry->qp.produce_size, PAGE_SIZE) + 1); + entry->consume_q->q_header = (struct vmci_queue_header *)tmp; + } else if (page_store) { + /* + * The VMX already initialized the queue pair headers, so no + * need for the kernel side to do that. + */ + result = qp_host_register_user_memory(page_store, + entry->produce_q, + entry->consume_q); + if (result < VMCI_SUCCESS) + goto error; + + entry->state = VMCIQPB_CREATED_MEM; + } else { + /* + * A create without a page_store may be either a host + * side create (in which case we are waiting for the + * guest side to supply the memory) or an old style + * queue pair create (in which case we will expect a + * set page store call as the next step). + */ + entry->state = VMCIQPB_CREATED_NO_MEM; + } + + qp_list_add_entry(&qp_broker_list, &entry->qp); + if (ent != NULL) + *ent = entry; + + /* Add to resource obj */ + result = vmci_resource_add(&entry->resource, + VMCI_RESOURCE_TYPE_QPAIR_HOST, + handle); + if (result != VMCI_SUCCESS) { + pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d", + handle.context, handle.resource, result); + goto error; + } + + entry->qp.handle = vmci_resource_handle(&entry->resource); + if (is_local) { + vmci_q_header_init(entry->produce_q->q_header, + entry->qp.handle); + vmci_q_header_init(entry->consume_q->q_header, + entry->qp.handle); + } + + vmci_ctx_qp_create(context, entry->qp.handle); + + return VMCI_SUCCESS; + + error: + if (entry != NULL) { + qp_host_free_queue(entry->produce_q, guest_produce_size); + qp_host_free_queue(entry->consume_q, guest_consume_size); + kfree(entry); + } + + return result; +} + +/* + * Enqueues an event datagram to notify the peer VM attached to + * the given queue pair handle about attach/detach event by the + * given VM. Returns Payload size of datagram enqueued on + * success, error code otherwise. + */ +static int qp_notify_peer(bool attach, + struct vmci_handle handle, + u32 my_id, + u32 peer_id) +{ + int rv; + struct vmci_event_qp ev; + + if (vmci_handle_is_invalid(handle) || my_id == VMCI_INVALID_ID || + peer_id == VMCI_INVALID_ID) + return VMCI_ERROR_INVALID_ARGS; + + /* + * In vmci_ctx_enqueue_datagram() we enforce the upper limit on + * number of pending events from the hypervisor to a given VM + * otherwise a rogue VM could do an arbitrary number of attach + * and detach operations causing memory pressure in the host + * kernel. + */ + + ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER); + ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_CONTEXT_RESOURCE_ID); + ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr); + ev.msg.event_data.event = attach ? + VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH; + ev.payload.handle = handle; + ev.payload.peer_id = my_id; + + rv = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID, + &ev.msg.hdr, false); + if (rv < VMCI_SUCCESS) + pr_warn("Failed to enqueue queue_pair %s event datagram for context (ID=0x%x)\n", + attach ? "ATTACH" : "DETACH", peer_id); + + return rv; +} + +/* + * The second endpoint issuing a queue pair allocation will attach to + * the queue pair registered with the queue pair broker. + * + * If the attacher is a guest, it will associate a VMX virtual address + * range with the queue pair as specified by the page_store. At this + * point, the already attach host endpoint may start using the queue + * pair, and an attach event is sent to it. For compatibility with + * older VMX'en, that used a separate step to set the VMX virtual + * address range, the virtual address range can be registered later + * using vmci_qp_broker_set_page_store. In that case, a page_store of + * NULL should be used, and the attach event will be generated once + * the actual page store has been set. + * + * If the attacher is the host, a page_store of NULL should be used as + * well, since the page store information is already set by the guest. + * + * For new VMX and host callers, the queue pair will be moved to the + * VMCIQPB_ATTACHED_MEM state, and for older VMX callers, it will be + * moved to the VMCOQPB_ATTACHED_NO_MEM state. + */ +static int qp_broker_attach(struct qp_broker_entry *entry, + u32 peer, + u32 flags, + u32 priv_flags, + u64 produce_size, + u64 consume_size, + struct vmci_qp_page_store *page_store, + struct vmci_ctx *context, + vmci_event_release_cb wakeup_cb, + void *client_data, + struct qp_broker_entry **ent) +{ + const u32 context_id = vmci_ctx_get_id(context); + bool is_local = flags & VMCI_QPFLAG_LOCAL; + int result; + + if (entry->state != VMCIQPB_CREATED_NO_MEM && + entry->state != VMCIQPB_CREATED_MEM) + return VMCI_ERROR_UNAVAILABLE; + + if (is_local) { + if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL) || + context_id != entry->create_id) { + return VMCI_ERROR_INVALID_ARGS; + } + } else if (context_id == entry->create_id || + context_id == entry->attach_id) { + return VMCI_ERROR_ALREADY_EXISTS; + } + + if (VMCI_CONTEXT_IS_VM(context_id) && + VMCI_CONTEXT_IS_VM(entry->create_id)) + return VMCI_ERROR_DST_UNREACHABLE; + + /* + * If we are attaching from a restricted context then the queuepair + * must have been created by a trusted endpoint. + */ + if ((context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) && + !entry->created_by_trusted) + return VMCI_ERROR_NO_ACCESS; + + /* + * If we are attaching to a queuepair that was created by a restricted + * context then we must be trusted. + */ + if (entry->require_trusted_attach && + (!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED))) + return VMCI_ERROR_NO_ACCESS; + + /* + * If the creator specifies VMCI_INVALID_ID in "peer" field, access + * control check is not performed. + */ + if (entry->qp.peer != VMCI_INVALID_ID && entry->qp.peer != context_id) + return VMCI_ERROR_NO_ACCESS; + + if (entry->create_id == VMCI_HOST_CONTEXT_ID) { + /* + * Do not attach if the caller doesn't support Host Queue Pairs + * and a host created this queue pair. + */ + + if (!vmci_ctx_supports_host_qp(context)) + return VMCI_ERROR_INVALID_RESOURCE; + + } else if (context_id == VMCI_HOST_CONTEXT_ID) { + struct vmci_ctx *create_context; + bool supports_host_qp; + + /* + * Do not attach a host to a user created queue pair if that + * user doesn't support host queue pair end points. + */ + + create_context = vmci_ctx_get(entry->create_id); + supports_host_qp = vmci_ctx_supports_host_qp(create_context); + vmci_ctx_put(create_context); + + if (!supports_host_qp) + return VMCI_ERROR_INVALID_RESOURCE; + } + + if ((entry->qp.flags & ~VMCI_QP_ASYMM) != (flags & ~VMCI_QP_ASYMM_PEER)) + return VMCI_ERROR_QUEUEPAIR_MISMATCH; + + if (context_id != VMCI_HOST_CONTEXT_ID) { + /* + * The queue pair broker entry stores values from the guest + * point of view, so an attaching guest should match the values + * stored in the entry. + */ + + if (entry->qp.produce_size != produce_size || + entry->qp.consume_size != consume_size) { + return VMCI_ERROR_QUEUEPAIR_MISMATCH; + } + } else if (entry->qp.produce_size != consume_size || + entry->qp.consume_size != produce_size) { + return VMCI_ERROR_QUEUEPAIR_MISMATCH; + } + + if (context_id != VMCI_HOST_CONTEXT_ID) { + /* + * If a guest attached to a queue pair, it will supply + * the backing memory. If this is a pre NOVMVM vmx, + * the backing memory will be supplied by calling + * vmci_qp_broker_set_page_store() following the + * return of the vmci_qp_broker_alloc() call. If it is + * a vmx of version NOVMVM or later, the page store + * must be supplied as part of the + * vmci_qp_broker_alloc call. Under all circumstances + * must the initially created queue pair not have any + * memory associated with it already. + */ + + if (entry->state != VMCIQPB_CREATED_NO_MEM) + return VMCI_ERROR_INVALID_ARGS; + + if (page_store != NULL) { + /* + * Patch up host state to point to guest + * supplied memory. The VMX already + * initialized the queue pair headers, so no + * need for the kernel side to do that. + */ + + result = qp_host_register_user_memory(page_store, + entry->produce_q, + entry->consume_q); + if (result < VMCI_SUCCESS) + return result; + + entry->state = VMCIQPB_ATTACHED_MEM; + } else { + entry->state = VMCIQPB_ATTACHED_NO_MEM; + } + } else if (entry->state == VMCIQPB_CREATED_NO_MEM) { + /* + * The host side is attempting to attach to a queue + * pair that doesn't have any memory associated with + * it. This must be a pre NOVMVM vmx that hasn't set + * the page store information yet, or a quiesced VM. + */ + + return VMCI_ERROR_UNAVAILABLE; + } else { + /* The host side has successfully attached to a queue pair. */ + entry->state = VMCIQPB_ATTACHED_MEM; + } + + if (entry->state == VMCIQPB_ATTACHED_MEM) { + result = + qp_notify_peer(true, entry->qp.handle, context_id, + entry->create_id); + if (result < VMCI_SUCCESS) + pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n", + entry->create_id, entry->qp.handle.context, + entry->qp.handle.resource); + } + + entry->attach_id = context_id; + entry->qp.ref_count++; + if (wakeup_cb) { + entry->wakeup_cb = wakeup_cb; + entry->client_data = client_data; + } + + /* + * When attaching to local queue pairs, the context already has + * an entry tracking the queue pair, so don't add another one. + */ + if (!is_local) + vmci_ctx_qp_create(context, entry->qp.handle); + + if (ent != NULL) + *ent = entry; + + return VMCI_SUCCESS; +} + +/* + * queue_pair_Alloc for use when setting up queue pair endpoints + * on the host. + */ +static int qp_broker_alloc(struct vmci_handle handle, + u32 peer, + u32 flags, + u32 priv_flags, + u64 produce_size, + u64 consume_size, + struct vmci_qp_page_store *page_store, + struct vmci_ctx *context, + vmci_event_release_cb wakeup_cb, + void *client_data, + struct qp_broker_entry **ent, + bool *swap) +{ + const u32 context_id = vmci_ctx_get_id(context); + bool create; + struct qp_broker_entry *entry = NULL; + bool is_local = flags & VMCI_QPFLAG_LOCAL; + int result; + + if (vmci_handle_is_invalid(handle) || + (flags & ~VMCI_QP_ALL_FLAGS) || is_local || + !(produce_size || consume_size) || + !context || context_id == VMCI_INVALID_ID || + handle.context == VMCI_INVALID_ID) { + return VMCI_ERROR_INVALID_ARGS; + } + + if (page_store && !VMCI_QP_PAGESTORE_IS_WELLFORMED(page_store)) + return VMCI_ERROR_INVALID_ARGS; + + /* + * In the initial argument check, we ensure that non-vmkernel hosts + * are not allowed to create local queue pairs. + */ + + mutex_lock(&qp_broker_list.mutex); + + if (!is_local && vmci_ctx_qp_exists(context, handle)) { + pr_devel("Context (ID=0x%x) already attached to queue pair (handle=0x%x:0x%x)\n", + context_id, handle.context, handle.resource); + mutex_unlock(&qp_broker_list.mutex); + return VMCI_ERROR_ALREADY_EXISTS; + } + + if (handle.resource != VMCI_INVALID_ID) + entry = qp_broker_handle_to_entry(handle); + + if (!entry) { + create = true; + result = + qp_broker_create(handle, peer, flags, priv_flags, + produce_size, consume_size, page_store, + context, wakeup_cb, client_data, ent); + } else { + create = false; + result = + qp_broker_attach(entry, peer, flags, priv_flags, + produce_size, consume_size, page_store, + context, wakeup_cb, client_data, ent); + } + + mutex_unlock(&qp_broker_list.mutex); + + if (swap) + *swap = (context_id == VMCI_HOST_CONTEXT_ID) && + !(create && is_local); + + return result; +} + +/* + * This function implements the kernel API for allocating a queue + * pair. + */ +static int qp_alloc_host_work(struct vmci_handle *handle, + struct vmci_queue **produce_q, + u64 produce_size, + struct vmci_queue **consume_q, + u64 consume_size, + u32 peer, + u32 flags, + u32 priv_flags, + vmci_event_release_cb wakeup_cb, + void *client_data) +{ + struct vmci_handle new_handle; + struct vmci_ctx *context; + struct qp_broker_entry *entry; + int result; + bool swap; + + if (vmci_handle_is_invalid(*handle)) { + new_handle = vmci_make_handle( + VMCI_HOST_CONTEXT_ID, VMCI_INVALID_ID); + } else + new_handle = *handle; + + context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID); + entry = NULL; + result = + qp_broker_alloc(new_handle, peer, flags, priv_flags, + produce_size, consume_size, NULL, context, + wakeup_cb, client_data, &entry, &swap); + if (result == VMCI_SUCCESS) { + if (swap) { + /* + * If this is a local queue pair, the attacher + * will swap around produce and consume + * queues. + */ + + *produce_q = entry->consume_q; + *consume_q = entry->produce_q; + } else { + *produce_q = entry->produce_q; + *consume_q = entry->consume_q; + } + + *handle = vmci_resource_handle(&entry->resource); + } else { + *handle = VMCI_INVALID_HANDLE; + pr_devel("queue pair broker failed to alloc (result=%d)\n", + result); + } + vmci_ctx_put(context); + return result; +} + +/* + * Allocates a VMCI queue_pair. Only checks validity of input + * arguments. The real work is done in the host or guest + * specific function. + */ +int vmci_qp_alloc(struct vmci_handle *handle, + struct vmci_queue **produce_q, + u64 produce_size, + struct vmci_queue **consume_q, + u64 consume_size, + u32 peer, + u32 flags, + u32 priv_flags, + bool guest_endpoint, + vmci_event_release_cb wakeup_cb, + void *client_data) +{ + if (!handle || !produce_q || !consume_q || + (!produce_size && !consume_size) || (flags & ~VMCI_QP_ALL_FLAGS)) + return VMCI_ERROR_INVALID_ARGS; + + if (guest_endpoint) { + return qp_alloc_guest_work(handle, produce_q, + produce_size, consume_q, + consume_size, peer, + flags, priv_flags); + } else { + return qp_alloc_host_work(handle, produce_q, + produce_size, consume_q, + consume_size, peer, flags, + priv_flags, wakeup_cb, client_data); + } +} + +/* + * This function implements the host kernel API for detaching from + * a queue pair. + */ +static int qp_detatch_host_work(struct vmci_handle handle) +{ + int result; + struct vmci_ctx *context; + + context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID); + + result = vmci_qp_broker_detach(handle, context); + + vmci_ctx_put(context); + return result; +} + +/* + * Detaches from a VMCI queue_pair. Only checks validity of input argument. + * Real work is done in the host or guest specific function. + */ +static int qp_detatch(struct vmci_handle handle, bool guest_endpoint) +{ + if (vmci_handle_is_invalid(handle)) + return VMCI_ERROR_INVALID_ARGS; + + if (guest_endpoint) + return qp_detatch_guest_work(handle); + else + return qp_detatch_host_work(handle); +} + +/* + * Returns the entry from the head of the list. Assumes that the list is + * locked. + */ +static struct qp_entry *qp_list_get_head(struct qp_list *qp_list) +{ + if (!list_empty(&qp_list->head)) { + struct qp_entry *entry = + list_first_entry(&qp_list->head, struct qp_entry, + list_item); + return entry; + } + + return NULL; +} + +void vmci_qp_broker_exit(void) +{ + struct qp_entry *entry; + struct qp_broker_entry *be; + + mutex_lock(&qp_broker_list.mutex); + + while ((entry = qp_list_get_head(&qp_broker_list))) { + be = (struct qp_broker_entry *)entry; + + qp_list_remove_entry(&qp_broker_list, entry); + kfree(be); + } + + mutex_unlock(&qp_broker_list.mutex); +} + +/* + * Requests that a queue pair be allocated with the VMCI queue + * pair broker. Allocates a queue pair entry if one does not + * exist. Attaches to one if it exists, and retrieves the page + * files backing that queue_pair. Assumes that the queue pair + * broker lock is held. + */ +int vmci_qp_broker_alloc(struct vmci_handle handle, + u32 peer, + u32 flags, + u32 priv_flags, + u64 produce_size, + u64 consume_size, + struct vmci_qp_page_store *page_store, + struct vmci_ctx *context) +{ + return qp_broker_alloc(handle, peer, flags, priv_flags, + produce_size, consume_size, + page_store, context, NULL, NULL, NULL, NULL); +} + +/* + * VMX'en with versions lower than VMCI_VERSION_NOVMVM use a separate + * step to add the UVAs of the VMX mapping of the queue pair. This function + * provides backwards compatibility with such VMX'en, and takes care of + * registering the page store for a queue pair previously allocated by the + * VMX during create or attach. This function will move the queue pair state + * to either from VMCIQBP_CREATED_NO_MEM to VMCIQBP_CREATED_MEM or + * VMCIQBP_ATTACHED_NO_MEM to VMCIQBP_ATTACHED_MEM. If moving to the + * attached state with memory, the queue pair is ready to be used by the + * host peer, and an attached event will be generated. + * + * Assumes that the queue pair broker lock is held. + * + * This function is only used by the hosted platform, since there is no + * issue with backwards compatibility for vmkernel. + */ +int vmci_qp_broker_set_page_store(struct vmci_handle handle, + u64 produce_uva, + u64 consume_uva, + struct vmci_ctx *context) +{ + struct qp_broker_entry *entry; + int result; + const u32 context_id = vmci_ctx_get_id(context); + + if (vmci_handle_is_invalid(handle) || !context || + context_id == VMCI_INVALID_ID) + return VMCI_ERROR_INVALID_ARGS; + + /* + * We only support guest to host queue pairs, so the VMX must + * supply UVAs for the mapped page files. + */ + + if (produce_uva == 0 || consume_uva == 0) + return VMCI_ERROR_INVALID_ARGS; + + mutex_lock(&qp_broker_list.mutex); + + if (!vmci_ctx_qp_exists(context, handle)) { + pr_warn("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n", + context_id, handle.context, handle.resource); + result = VMCI_ERROR_NOT_FOUND; + goto out; + } + + entry = qp_broker_handle_to_entry(handle); + if (!entry) { + result = VMCI_ERROR_NOT_FOUND; + goto out; + } + + /* + * If I'm the owner then I can set the page store. + * + * Or, if a host created the queue_pair and I'm the attached peer + * then I can set the page store. + */ + if (entry->create_id != context_id && + (entry->create_id != VMCI_HOST_CONTEXT_ID || + entry->attach_id != context_id)) { + result = VMCI_ERROR_QUEUEPAIR_NOTOWNER; + goto out; + } + + if (entry->state != VMCIQPB_CREATED_NO_MEM && + entry->state != VMCIQPB_ATTACHED_NO_MEM) { + result = VMCI_ERROR_UNAVAILABLE; + goto out; + } + + result = qp_host_get_user_memory(produce_uva, consume_uva, + entry->produce_q, entry->consume_q); + if (result < VMCI_SUCCESS) + goto out; + + result = qp_host_map_queues(entry->produce_q, entry->consume_q); + if (result < VMCI_SUCCESS) { + qp_host_unregister_user_memory(entry->produce_q, + entry->consume_q); + goto out; + } + + if (entry->state == VMCIQPB_CREATED_NO_MEM) + entry->state = VMCIQPB_CREATED_MEM; + else + entry->state = VMCIQPB_ATTACHED_MEM; + + entry->vmci_page_files = true; + + if (entry->state == VMCIQPB_ATTACHED_MEM) { + result = + qp_notify_peer(true, handle, context_id, entry->create_id); + if (result < VMCI_SUCCESS) { + pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n", + entry->create_id, entry->qp.handle.context, + entry->qp.handle.resource); + } + } + + result = VMCI_SUCCESS; + out: + mutex_unlock(&qp_broker_list.mutex); + return result; +} + +/* + * Resets saved queue headers for the given QP broker + * entry. Should be used when guest memory becomes available + * again, or the guest detaches. + */ +static void qp_reset_saved_headers(struct qp_broker_entry *entry) +{ + entry->produce_q->saved_header = NULL; + entry->consume_q->saved_header = NULL; +} + +/* + * The main entry point for detaching from a queue pair registered with the + * queue pair broker. If more than one endpoint is attached to the queue + * pair, the first endpoint will mainly decrement a reference count and + * generate a notification to its peer. The last endpoint will clean up + * the queue pair state registered with the broker. + * + * When a guest endpoint detaches, it will unmap and unregister the guest + * memory backing the queue pair. If the host is still attached, it will + * no longer be able to access the queue pair content. + * + * If the queue pair is already in a state where there is no memory + * registered for the queue pair (any *_NO_MEM state), it will transition to + * the VMCIQPB_SHUTDOWN_NO_MEM state. This will also happen, if a guest + * endpoint is the first of two endpoints to detach. If the host endpoint is + * the first out of two to detach, the queue pair will move to the + * VMCIQPB_SHUTDOWN_MEM state. + */ +int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context) +{ + struct qp_broker_entry *entry; + const u32 context_id = vmci_ctx_get_id(context); + u32 peer_id; + bool is_local = false; + int result; + + if (vmci_handle_is_invalid(handle) || !context || + context_id == VMCI_INVALID_ID) { + return VMCI_ERROR_INVALID_ARGS; + } + + mutex_lock(&qp_broker_list.mutex); + + if (!vmci_ctx_qp_exists(context, handle)) { + pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n", + context_id, handle.context, handle.resource); + result = VMCI_ERROR_NOT_FOUND; + goto out; + } + + entry = qp_broker_handle_to_entry(handle); + if (!entry) { + pr_devel("Context (ID=0x%x) reports being attached to queue pair(handle=0x%x:0x%x) that isn't present in broker\n", + context_id, handle.context, handle.resource); + result = VMCI_ERROR_NOT_FOUND; + goto out; + } + + if (context_id != entry->create_id && context_id != entry->attach_id) { + result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED; + goto out; + } + + if (context_id == entry->create_id) { + peer_id = entry->attach_id; + entry->create_id = VMCI_INVALID_ID; + } else { + peer_id = entry->create_id; + entry->attach_id = VMCI_INVALID_ID; + } + entry->qp.ref_count--; + + is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL; + + if (context_id != VMCI_HOST_CONTEXT_ID) { + bool headers_mapped; + + /* + * Pre NOVMVM vmx'en may detach from a queue pair + * before setting the page store, and in that case + * there is no user memory to detach from. Also, more + * recent VMX'en may detach from a queue pair in the + * quiesced state. + */ + + qp_acquire_queue_mutex(entry->produce_q); + headers_mapped = entry->produce_q->q_header || + entry->consume_q->q_header; + if (QPBROKERSTATE_HAS_MEM(entry)) { + result = + qp_host_unmap_queues(INVALID_VMCI_GUEST_MEM_ID, + entry->produce_q, + entry->consume_q); + if (result < VMCI_SUCCESS) + pr_warn("Failed to unmap queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n", + handle.context, handle.resource, + result); + + qp_host_unregister_user_memory(entry->produce_q, + entry->consume_q); + + } + + if (!headers_mapped) + qp_reset_saved_headers(entry); + + qp_release_queue_mutex(entry->produce_q); + + if (!headers_mapped && entry->wakeup_cb) + entry->wakeup_cb(entry->client_data); + + } else { + if (entry->wakeup_cb) { + entry->wakeup_cb = NULL; + entry->client_data = NULL; + } + } + + if (entry->qp.ref_count == 0) { + qp_list_remove_entry(&qp_broker_list, &entry->qp); + + if (is_local) + kfree(entry->local_mem); + + qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q); + qp_host_free_queue(entry->produce_q, entry->qp.produce_size); + qp_host_free_queue(entry->consume_q, entry->qp.consume_size); + /* Unlink from resource hash table and free callback */ + vmci_resource_remove(&entry->resource); + + kfree(entry); + + vmci_ctx_qp_destroy(context, handle); + } else { + qp_notify_peer(false, handle, context_id, peer_id); + if (context_id == VMCI_HOST_CONTEXT_ID && + QPBROKERSTATE_HAS_MEM(entry)) { + entry->state = VMCIQPB_SHUTDOWN_MEM; + } else { + entry->state = VMCIQPB_SHUTDOWN_NO_MEM; + } + + if (!is_local) + vmci_ctx_qp_destroy(context, handle); + + } + result = VMCI_SUCCESS; + out: + mutex_unlock(&qp_broker_list.mutex); + return result; +} + +/* + * Establishes the necessary mappings for a queue pair given a + * reference to the queue pair guest memory. This is usually + * called when a guest is unquiesced and the VMX is allowed to + * map guest memory once again. + */ +int vmci_qp_broker_map(struct vmci_handle handle, + struct vmci_ctx *context, + u64 guest_mem) +{ + struct qp_broker_entry *entry; + const u32 context_id = vmci_ctx_get_id(context); + int result; + + if (vmci_handle_is_invalid(handle) || !context || + context_id == VMCI_INVALID_ID) + return VMCI_ERROR_INVALID_ARGS; + + mutex_lock(&qp_broker_list.mutex); + + if (!vmci_ctx_qp_exists(context, handle)) { + pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n", + context_id, handle.context, handle.resource); + result = VMCI_ERROR_NOT_FOUND; + goto out; + } + + entry = qp_broker_handle_to_entry(handle); + if (!entry) { + pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n", + context_id, handle.context, handle.resource); + result = VMCI_ERROR_NOT_FOUND; + goto out; + } + + if (context_id != entry->create_id && context_id != entry->attach_id) { + result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED; + goto out; + } + + result = VMCI_SUCCESS; + + if (context_id != VMCI_HOST_CONTEXT_ID && + !QPBROKERSTATE_HAS_MEM(entry)) { + struct vmci_qp_page_store page_store; + + page_store.pages = guest_mem; + page_store.len = QPE_NUM_PAGES(entry->qp); + + qp_acquire_queue_mutex(entry->produce_q); + qp_reset_saved_headers(entry); + result = + qp_host_register_user_memory(&page_store, + entry->produce_q, + entry->consume_q); + qp_release_queue_mutex(entry->produce_q); + if (result == VMCI_SUCCESS) { + /* Move state from *_NO_MEM to *_MEM */ + + entry->state++; + + if (entry->wakeup_cb) + entry->wakeup_cb(entry->client_data); + } + } + + out: + mutex_unlock(&qp_broker_list.mutex); + return result; +} + +/* + * Saves a snapshot of the queue headers for the given QP broker + * entry. Should be used when guest memory is unmapped. + * Results: + * VMCI_SUCCESS on success, appropriate error code if guest memory + * can't be accessed.. + */ +static int qp_save_headers(struct qp_broker_entry *entry) +{ + int result; + + if (entry->produce_q->saved_header != NULL && + entry->consume_q->saved_header != NULL) { + /* + * If the headers have already been saved, we don't need to do + * it again, and we don't want to map in the headers + * unnecessarily. + */ + + return VMCI_SUCCESS; + } + + if (NULL == entry->produce_q->q_header || + NULL == entry->consume_q->q_header) { + result = qp_host_map_queues(entry->produce_q, entry->consume_q); + if (result < VMCI_SUCCESS) + return result; + } + + memcpy(&entry->saved_produce_q, entry->produce_q->q_header, + sizeof(entry->saved_produce_q)); + entry->produce_q->saved_header = &entry->saved_produce_q; + memcpy(&entry->saved_consume_q, entry->consume_q->q_header, + sizeof(entry->saved_consume_q)); + entry->consume_q->saved_header = &entry->saved_consume_q; + + return VMCI_SUCCESS; +} + +/* + * Removes all references to the guest memory of a given queue pair, and + * will move the queue pair from state *_MEM to *_NO_MEM. It is usually + * called when a VM is being quiesced where access to guest memory should + * avoided. + */ +int vmci_qp_broker_unmap(struct vmci_handle handle, + struct vmci_ctx *context, + u32 gid) +{ + struct qp_broker_entry *entry; + const u32 context_id = vmci_ctx_get_id(context); + int result; + + if (vmci_handle_is_invalid(handle) || !context || + context_id == VMCI_INVALID_ID) + return VMCI_ERROR_INVALID_ARGS; + + mutex_lock(&qp_broker_list.mutex); + + if (!vmci_ctx_qp_exists(context, handle)) { + pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n", + context_id, handle.context, handle.resource); + result = VMCI_ERROR_NOT_FOUND; + goto out; + } + + entry = qp_broker_handle_to_entry(handle); + if (!entry) { + pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n", + context_id, handle.context, handle.resource); + result = VMCI_ERROR_NOT_FOUND; + goto out; + } + + if (context_id != entry->create_id && context_id != entry->attach_id) { + result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED; + goto out; + } + + if (context_id != VMCI_HOST_CONTEXT_ID && + QPBROKERSTATE_HAS_MEM(entry)) { + qp_acquire_queue_mutex(entry->produce_q); + result = qp_save_headers(entry); + if (result < VMCI_SUCCESS) + pr_warn("Failed to save queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n", + handle.context, handle.resource, result); + + qp_host_unmap_queues(gid, entry->produce_q, entry->consume_q); + + /* + * On hosted, when we unmap queue pairs, the VMX will also + * unmap the guest memory, so we invalidate the previously + * registered memory. If the queue pair is mapped again at a + * later point in time, we will need to reregister the user + * memory with a possibly new user VA. + */ + qp_host_unregister_user_memory(entry->produce_q, + entry->consume_q); + + /* + * Move state from *_MEM to *_NO_MEM. + */ + entry->state--; + + qp_release_queue_mutex(entry->produce_q); + } + + result = VMCI_SUCCESS; + + out: + mutex_unlock(&qp_broker_list.mutex); + return result; +} + +/* + * Destroys all guest queue pair endpoints. If active guest queue + * pairs still exist, hypercalls to attempt detach from these + * queue pairs will be made. Any failure to detach is silently + * ignored. + */ +void vmci_qp_guest_endpoints_exit(void) +{ + struct qp_entry *entry; + struct qp_guest_endpoint *ep; + + mutex_lock(&qp_guest_endpoints.mutex); + + while ((entry = qp_list_get_head(&qp_guest_endpoints))) { + ep = (struct qp_guest_endpoint *)entry; + + /* Don't make a hypercall for local queue_pairs. */ + if (!(entry->flags & VMCI_QPFLAG_LOCAL)) + qp_detatch_hypercall(entry->handle); + + /* We cannot fail the exit, so let's reset ref_count. */ + entry->ref_count = 0; + qp_list_remove_entry(&qp_guest_endpoints, entry); + + qp_guest_endpoint_destroy(ep); + } + + mutex_unlock(&qp_guest_endpoints.mutex); +} + +/* + * Helper routine that will lock the queue pair before subsequent + * operations. + * Note: Non-blocking on the host side is currently only implemented in ESX. + * Since non-blocking isn't yet implemented on the host personality we + * have no reason to acquire a spin lock. So to avoid the use of an + * unnecessary lock only acquire the mutex if we can block. + */ +static void qp_lock(const struct vmci_qp *qpair) +{ + qp_acquire_queue_mutex(qpair->produce_q); +} + +/* + * Helper routine that unlocks the queue pair after calling + * qp_lock. + */ +static void qp_unlock(const struct vmci_qp *qpair) +{ + qp_release_queue_mutex(qpair->produce_q); +} + +/* + * The queue headers may not be mapped at all times. If a queue is + * currently not mapped, it will be attempted to do so. + */ +static int qp_map_queue_headers(struct vmci_queue *produce_q, + struct vmci_queue *consume_q) +{ + int result; + + if (NULL == produce_q->q_header || NULL == consume_q->q_header) { + result = qp_host_map_queues(produce_q, consume_q); + if (result < VMCI_SUCCESS) + return (produce_q->saved_header && + consume_q->saved_header) ? + VMCI_ERROR_QUEUEPAIR_NOT_READY : + VMCI_ERROR_QUEUEPAIR_NOTATTACHED; + } + + return VMCI_SUCCESS; +} + +/* + * Helper routine that will retrieve the produce and consume + * headers of a given queue pair. If the guest memory of the + * queue pair is currently not available, the saved queue headers + * will be returned, if these are available. + */ +static int qp_get_queue_headers(const struct vmci_qp *qpair, + struct vmci_queue_header **produce_q_header, + struct vmci_queue_header **consume_q_header) +{ + int result; + + result = qp_map_queue_headers(qpair->produce_q, qpair->consume_q); + if (result == VMCI_SUCCESS) { + *produce_q_header = qpair->produce_q->q_header; + *consume_q_header = qpair->consume_q->q_header; + } else if (qpair->produce_q->saved_header && + qpair->consume_q->saved_header) { + *produce_q_header = qpair->produce_q->saved_header; + *consume_q_header = qpair->consume_q->saved_header; + result = VMCI_SUCCESS; + } + + return result; +} + +/* + * Callback from VMCI queue pair broker indicating that a queue + * pair that was previously not ready, now either is ready or + * gone forever. + */ +static int qp_wakeup_cb(void *client_data) +{ + struct vmci_qp *qpair = (struct vmci_qp *)client_data; + + qp_lock(qpair); + while (qpair->blocked > 0) { + qpair->blocked--; + qpair->generation++; + wake_up(&qpair->event); + } + qp_unlock(qpair); + + return VMCI_SUCCESS; +} + +/* + * Makes the calling thread wait for the queue pair to become + * ready for host side access. Returns true when thread is + * woken up after queue pair state change, false otherwise. + */ +static bool qp_wait_for_ready_queue(struct vmci_qp *qpair) +{ + unsigned int generation; + + qpair->blocked++; + generation = qpair->generation; + qp_unlock(qpair); + wait_event(qpair->event, generation != qpair->generation); + qp_lock(qpair); + + return true; +} + +/* + * Enqueues a given buffer to the produce queue using the provided + * function. As many bytes as possible (space available in the queue) + * are enqueued. Assumes the queue->mutex has been acquired. Returns + * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue + * data, VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the + * queue (as defined by the queue size), VMCI_ERROR_INVALID_ARGS, if + * an error occured when accessing the buffer, + * VMCI_ERROR_QUEUEPAIR_NOTATTACHED, if the queue pair pages aren't + * available. Otherwise, the number of bytes written to the queue is + * returned. Updates the tail pointer of the produce queue. + */ +static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q, + struct vmci_queue *consume_q, + const u64 produce_q_size, + struct iov_iter *from) +{ + s64 free_space; + u64 tail; + size_t buf_size = iov_iter_count(from); + size_t written; + ssize_t result; + + result = qp_map_queue_headers(produce_q, consume_q); + if (unlikely(result != VMCI_SUCCESS)) + return result; + + free_space = vmci_q_header_free_space(produce_q->q_header, + consume_q->q_header, + produce_q_size); + if (free_space == 0) + return VMCI_ERROR_QUEUEPAIR_NOSPACE; + + if (free_space < VMCI_SUCCESS) + return (ssize_t) free_space; + + written = (size_t) (free_space > buf_size ? buf_size : free_space); + tail = vmci_q_header_producer_tail(produce_q->q_header); + if (likely(tail + written < produce_q_size)) { + result = qp_memcpy_to_queue_iter(produce_q, tail, from, written); + } else { + /* Tail pointer wraps around. */ + + const size_t tmp = (size_t) (produce_q_size - tail); + + result = qp_memcpy_to_queue_iter(produce_q, tail, from, tmp); + if (result >= VMCI_SUCCESS) + result = qp_memcpy_to_queue_iter(produce_q, 0, from, + written - tmp); + } + + if (result < VMCI_SUCCESS) + return result; + + vmci_q_header_add_producer_tail(produce_q->q_header, written, + produce_q_size); + return written; +} + +/* + * Dequeues data (if available) from the given consume queue. Writes data + * to the user provided buffer using the provided function. + * Assumes the queue->mutex has been acquired. + * Results: + * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue. + * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue + * (as defined by the queue size). + * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer. + * Otherwise the number of bytes dequeued is returned. + * Side effects: + * Updates the head pointer of the consume queue. + */ +static ssize_t qp_dequeue_locked(struct vmci_queue *produce_q, + struct vmci_queue *consume_q, + const u64 consume_q_size, + struct iov_iter *to, + bool update_consumer) +{ + size_t buf_size = iov_iter_count(to); + s64 buf_ready; + u64 head; + size_t read; + ssize_t result; + + result = qp_map_queue_headers(produce_q, consume_q); + if (unlikely(result != VMCI_SUCCESS)) + return result; + + buf_ready = vmci_q_header_buf_ready(consume_q->q_header, + produce_q->q_header, + consume_q_size); + if (buf_ready == 0) + return VMCI_ERROR_QUEUEPAIR_NODATA; + + if (buf_ready < VMCI_SUCCESS) + return (ssize_t) buf_ready; + + read = (size_t) (buf_ready > buf_size ? buf_size : buf_ready); + head = vmci_q_header_consumer_head(produce_q->q_header); + if (likely(head + read < consume_q_size)) { + result = qp_memcpy_from_queue_iter(to, consume_q, head, read); + } else { + /* Head pointer wraps around. */ + + const size_t tmp = (size_t) (consume_q_size - head); + + result = qp_memcpy_from_queue_iter(to, consume_q, head, tmp); + if (result >= VMCI_SUCCESS) + result = qp_memcpy_from_queue_iter(to, consume_q, 0, + read - tmp); + + } + + if (result < VMCI_SUCCESS) + return result; + + if (update_consumer) + vmci_q_header_add_consumer_head(produce_q->q_header, + read, consume_q_size); + + return read; +} + +/* + * vmci_qpair_alloc() - Allocates a queue pair. + * @qpair: Pointer for the new vmci_qp struct. + * @handle: Handle to track the resource. + * @produce_qsize: Desired size of the producer queue. + * @consume_qsize: Desired size of the consumer queue. + * @peer: ContextID of the peer. + * @flags: VMCI flags. + * @priv_flags: VMCI priviledge flags. + * + * This is the client interface for allocating the memory for a + * vmci_qp structure and then attaching to the underlying + * queue. If an error occurs allocating the memory for the + * vmci_qp structure no attempt is made to attach. If an + * error occurs attaching, then the structure is freed. + */ +int vmci_qpair_alloc(struct vmci_qp **qpair, + struct vmci_handle *handle, + u64 produce_qsize, + u64 consume_qsize, + u32 peer, + u32 flags, + u32 priv_flags) +{ + struct vmci_qp *my_qpair; + int retval; + struct vmci_handle src = VMCI_INVALID_HANDLE; + struct vmci_handle dst = vmci_make_handle(peer, VMCI_INVALID_ID); + enum vmci_route route; + vmci_event_release_cb wakeup_cb; + void *client_data; + + /* + * Restrict the size of a queuepair. The device already + * enforces a limit on the total amount of memory that can be + * allocated to queuepairs for a guest. However, we try to + * allocate this memory before we make the queuepair + * allocation hypercall. On Linux, we allocate each page + * separately, which means rather than fail, the guest will + * thrash while it tries to allocate, and will become + * increasingly unresponsive to the point where it appears to + * be hung. So we place a limit on the size of an individual + * queuepair here, and leave the device to enforce the + * restriction on total queuepair memory. (Note that this + * doesn't prevent all cases; a user with only this much + * physical memory could still get into trouble.) The error + * used by the device is NO_RESOURCES, so use that here too. + */ + + if (produce_qsize + consume_qsize < max(produce_qsize, consume_qsize) || + produce_qsize + consume_qsize > VMCI_MAX_GUEST_QP_MEMORY) + return VMCI_ERROR_NO_RESOURCES; + + retval = vmci_route(&src, &dst, false, &route); + if (retval < VMCI_SUCCESS) + route = vmci_guest_code_active() ? + VMCI_ROUTE_AS_GUEST : VMCI_ROUTE_AS_HOST; + + if (flags & (VMCI_QPFLAG_NONBLOCK | VMCI_QPFLAG_PINNED)) { + pr_devel("NONBLOCK OR PINNED set"); + return VMCI_ERROR_INVALID_ARGS; + } + + my_qpair = kzalloc(sizeof(*my_qpair), GFP_KERNEL); + if (!my_qpair) + return VMCI_ERROR_NO_MEM; + + my_qpair->produce_q_size = produce_qsize; + my_qpair->consume_q_size = consume_qsize; + my_qpair->peer = peer; + my_qpair->flags = flags; + my_qpair->priv_flags = priv_flags; + + wakeup_cb = NULL; + client_data = NULL; + + if (VMCI_ROUTE_AS_HOST == route) { + my_qpair->guest_endpoint = false; + if (!(flags & VMCI_QPFLAG_LOCAL)) { + my_qpair->blocked = 0; + my_qpair->generation = 0; + init_waitqueue_head(&my_qpair->event); + wakeup_cb = qp_wakeup_cb; + client_data = (void *)my_qpair; + } + } else { + my_qpair->guest_endpoint = true; + } + + retval = vmci_qp_alloc(handle, + &my_qpair->produce_q, + my_qpair->produce_q_size, + &my_qpair->consume_q, + my_qpair->consume_q_size, + my_qpair->peer, + my_qpair->flags, + my_qpair->priv_flags, + my_qpair->guest_endpoint, + wakeup_cb, client_data); + + if (retval < VMCI_SUCCESS) { + kfree(my_qpair); + return retval; + } + + *qpair = my_qpair; + my_qpair->handle = *handle; + + return retval; +} +EXPORT_SYMBOL_GPL(vmci_qpair_alloc); + +/* + * vmci_qpair_detach() - Detatches the client from a queue pair. + * @qpair: Reference of a pointer to the qpair struct. + * + * This is the client interface for detaching from a VMCIQPair. + * Note that this routine will free the memory allocated for the + * vmci_qp structure too. + */ +int vmci_qpair_detach(struct vmci_qp **qpair) +{ + int result; + struct vmci_qp *old_qpair; + + if (!qpair || !(*qpair)) + return VMCI_ERROR_INVALID_ARGS; + + old_qpair = *qpair; + result = qp_detatch(old_qpair->handle, old_qpair->guest_endpoint); + + /* + * The guest can fail to detach for a number of reasons, and + * if it does so, it will cleanup the entry (if there is one). + * The host can fail too, but it won't cleanup the entry + * immediately, it will do that later when the context is + * freed. Either way, we need to release the qpair struct + * here; there isn't much the caller can do, and we don't want + * to leak. + */ + + memset(old_qpair, 0, sizeof(*old_qpair)); + old_qpair->handle = VMCI_INVALID_HANDLE; + old_qpair->peer = VMCI_INVALID_ID; + kfree(old_qpair); + *qpair = NULL; + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_detach); + +/* + * vmci_qpair_get_produce_indexes() - Retrieves the indexes of the producer. + * @qpair: Pointer to the queue pair struct. + * @producer_tail: Reference used for storing producer tail index. + * @consumer_head: Reference used for storing the consumer head index. + * + * This is the client interface for getting the current indexes of the + * QPair from the point of the view of the caller as the producer. + */ +int vmci_qpair_get_produce_indexes(const struct vmci_qp *qpair, + u64 *producer_tail, + u64 *consumer_head) +{ + struct vmci_queue_header *produce_q_header; + struct vmci_queue_header *consume_q_header; + int result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + result = + qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); + if (result == VMCI_SUCCESS) + vmci_q_header_get_pointers(produce_q_header, consume_q_header, + producer_tail, consumer_head); + qp_unlock(qpair); + + if (result == VMCI_SUCCESS && + ((producer_tail && *producer_tail >= qpair->produce_q_size) || + (consumer_head && *consumer_head >= qpair->produce_q_size))) + return VMCI_ERROR_INVALID_SIZE; + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_get_produce_indexes); + +/* + * vmci_qpair_get_consume_indexes() - Retrieves the indexes of the consumer. + * @qpair: Pointer to the queue pair struct. + * @consumer_tail: Reference used for storing consumer tail index. + * @producer_head: Reference used for storing the producer head index. + * + * This is the client interface for getting the current indexes of the + * QPair from the point of the view of the caller as the consumer. + */ +int vmci_qpair_get_consume_indexes(const struct vmci_qp *qpair, + u64 *consumer_tail, + u64 *producer_head) +{ + struct vmci_queue_header *produce_q_header; + struct vmci_queue_header *consume_q_header; + int result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + result = + qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); + if (result == VMCI_SUCCESS) + vmci_q_header_get_pointers(consume_q_header, produce_q_header, + consumer_tail, producer_head); + qp_unlock(qpair); + + if (result == VMCI_SUCCESS && + ((consumer_tail && *consumer_tail >= qpair->consume_q_size) || + (producer_head && *producer_head >= qpair->consume_q_size))) + return VMCI_ERROR_INVALID_SIZE; + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_get_consume_indexes); + +/* + * vmci_qpair_produce_free_space() - Retrieves free space in producer queue. + * @qpair: Pointer to the queue pair struct. + * + * This is the client interface for getting the amount of free + * space in the QPair from the point of the view of the caller as + * the producer which is the common case. Returns < 0 if err, else + * available bytes into which data can be enqueued if > 0. + */ +s64 vmci_qpair_produce_free_space(const struct vmci_qp *qpair) +{ + struct vmci_queue_header *produce_q_header; + struct vmci_queue_header *consume_q_header; + s64 result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + result = + qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); + if (result == VMCI_SUCCESS) + result = vmci_q_header_free_space(produce_q_header, + consume_q_header, + qpair->produce_q_size); + else + result = 0; + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_produce_free_space); + +/* + * vmci_qpair_consume_free_space() - Retrieves free space in consumer queue. + * @qpair: Pointer to the queue pair struct. + * + * This is the client interface for getting the amount of free + * space in the QPair from the point of the view of the caller as + * the consumer which is not the common case. Returns < 0 if err, else + * available bytes into which data can be enqueued if > 0. + */ +s64 vmci_qpair_consume_free_space(const struct vmci_qp *qpair) +{ + struct vmci_queue_header *produce_q_header; + struct vmci_queue_header *consume_q_header; + s64 result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + result = + qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); + if (result == VMCI_SUCCESS) + result = vmci_q_header_free_space(consume_q_header, + produce_q_header, + qpair->consume_q_size); + else + result = 0; + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_consume_free_space); + +/* + * vmci_qpair_produce_buf_ready() - Gets bytes ready to read from + * producer queue. + * @qpair: Pointer to the queue pair struct. + * + * This is the client interface for getting the amount of + * enqueued data in the QPair from the point of the view of the + * caller as the producer which is not the common case. Returns < 0 if err, + * else available bytes that may be read. + */ +s64 vmci_qpair_produce_buf_ready(const struct vmci_qp *qpair) +{ + struct vmci_queue_header *produce_q_header; + struct vmci_queue_header *consume_q_header; + s64 result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + result = + qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); + if (result == VMCI_SUCCESS) + result = vmci_q_header_buf_ready(produce_q_header, + consume_q_header, + qpair->produce_q_size); + else + result = 0; + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_produce_buf_ready); + +/* + * vmci_qpair_consume_buf_ready() - Gets bytes ready to read from + * consumer queue. + * @qpair: Pointer to the queue pair struct. + * + * This is the client interface for getting the amount of + * enqueued data in the QPair from the point of the view of the + * caller as the consumer which is the normal case. Returns < 0 if err, + * else available bytes that may be read. + */ +s64 vmci_qpair_consume_buf_ready(const struct vmci_qp *qpair) +{ + struct vmci_queue_header *produce_q_header; + struct vmci_queue_header *consume_q_header; + s64 result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + result = + qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header); + if (result == VMCI_SUCCESS) + result = vmci_q_header_buf_ready(consume_q_header, + produce_q_header, + qpair->consume_q_size); + else + result = 0; + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_consume_buf_ready); + +/* + * vmci_qpair_enqueue() - Throw data on the queue. + * @qpair: Pointer to the queue pair struct. + * @buf: Pointer to buffer containing data + * @buf_size: Length of buffer. + * @buf_type: Buffer type (Unused). + * + * This is the client interface for enqueueing data into the queue. + * Returns number of bytes enqueued or < 0 on error. + */ +ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair, + const void *buf, + size_t buf_size, + int buf_type) +{ + ssize_t result; + struct iov_iter from; + struct kvec v = {.iov_base = (void *)buf, .iov_len = buf_size}; + + if (!qpair || !buf) + return VMCI_ERROR_INVALID_ARGS; + + iov_iter_kvec(&from, WRITE | ITER_KVEC, &v, 1, buf_size); + + qp_lock(qpair); + + do { + result = qp_enqueue_locked(qpair->produce_q, + qpair->consume_q, + qpair->produce_q_size, + &from); + + if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && + !qp_wait_for_ready_queue(qpair)) + result = VMCI_ERROR_WOULD_BLOCK; + + } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_enqueue); + +/* + * vmci_qpair_dequeue() - Get data from the queue. + * @qpair: Pointer to the queue pair struct. + * @buf: Pointer to buffer for the data + * @buf_size: Length of buffer. + * @buf_type: Buffer type (Unused). + * + * This is the client interface for dequeueing data from the queue. + * Returns number of bytes dequeued or < 0 on error. + */ +ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair, + void *buf, + size_t buf_size, + int buf_type) +{ + ssize_t result; + struct iov_iter to; + struct kvec v = {.iov_base = buf, .iov_len = buf_size}; + + if (!qpair || !buf) + return VMCI_ERROR_INVALID_ARGS; + + iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size); + + qp_lock(qpair); + + do { + result = qp_dequeue_locked(qpair->produce_q, + qpair->consume_q, + qpair->consume_q_size, + &to, true); + + if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && + !qp_wait_for_ready_queue(qpair)) + result = VMCI_ERROR_WOULD_BLOCK; + + } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_dequeue); + +/* + * vmci_qpair_peek() - Peek at the data in the queue. + * @qpair: Pointer to the queue pair struct. + * @buf: Pointer to buffer for the data + * @buf_size: Length of buffer. + * @buf_type: Buffer type (Unused on Linux). + * + * This is the client interface for peeking into a queue. (I.e., + * copy data from the queue without updating the head pointer.) + * Returns number of bytes dequeued or < 0 on error. + */ +ssize_t vmci_qpair_peek(struct vmci_qp *qpair, + void *buf, + size_t buf_size, + int buf_type) +{ + struct iov_iter to; + struct kvec v = {.iov_base = buf, .iov_len = buf_size}; + ssize_t result; + + if (!qpair || !buf) + return VMCI_ERROR_INVALID_ARGS; + + iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size); + + qp_lock(qpair); + + do { + result = qp_dequeue_locked(qpair->produce_q, + qpair->consume_q, + qpair->consume_q_size, + &to, false); + + if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && + !qp_wait_for_ready_queue(qpair)) + result = VMCI_ERROR_WOULD_BLOCK; + + } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_peek); + +/* + * vmci_qpair_enquev() - Throw data on the queue using iov. + * @qpair: Pointer to the queue pair struct. + * @iov: Pointer to buffer containing data + * @iov_size: Length of buffer. + * @buf_type: Buffer type (Unused). + * + * This is the client interface for enqueueing data into the queue. + * This function uses IO vectors to handle the work. Returns number + * of bytes enqueued or < 0 on error. + */ +ssize_t vmci_qpair_enquev(struct vmci_qp *qpair, + struct msghdr *msg, + size_t iov_size, + int buf_type) +{ + ssize_t result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + + do { + result = qp_enqueue_locked(qpair->produce_q, + qpair->consume_q, + qpair->produce_q_size, + &msg->msg_iter); + + if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && + !qp_wait_for_ready_queue(qpair)) + result = VMCI_ERROR_WOULD_BLOCK; + + } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_enquev); + +/* + * vmci_qpair_dequev() - Get data from the queue using iov. + * @qpair: Pointer to the queue pair struct. + * @iov: Pointer to buffer for the data + * @iov_size: Length of buffer. + * @buf_type: Buffer type (Unused). + * + * This is the client interface for dequeueing data from the queue. + * This function uses IO vectors to handle the work. Returns number + * of bytes dequeued or < 0 on error. + */ +ssize_t vmci_qpair_dequev(struct vmci_qp *qpair, + struct msghdr *msg, + size_t iov_size, + int buf_type) +{ + ssize_t result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + + do { + result = qp_dequeue_locked(qpair->produce_q, + qpair->consume_q, + qpair->consume_q_size, + &msg->msg_iter, true); + + if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && + !qp_wait_for_ready_queue(qpair)) + result = VMCI_ERROR_WOULD_BLOCK; + + } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); + + qp_unlock(qpair); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_dequev); + +/* + * vmci_qpair_peekv() - Peek at the data in the queue using iov. + * @qpair: Pointer to the queue pair struct. + * @iov: Pointer to buffer for the data + * @iov_size: Length of buffer. + * @buf_type: Buffer type (Unused on Linux). + * + * This is the client interface for peeking into a queue. (I.e., + * copy data from the queue without updating the head pointer.) + * This function uses IO vectors to handle the work. Returns number + * of bytes peeked or < 0 on error. + */ +ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, + struct msghdr *msg, + size_t iov_size, + int buf_type) +{ + ssize_t result; + + if (!qpair) + return VMCI_ERROR_INVALID_ARGS; + + qp_lock(qpair); + + do { + result = qp_dequeue_locked(qpair->produce_q, + qpair->consume_q, + qpair->consume_q_size, + &msg->msg_iter, false); + + if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY && + !qp_wait_for_ready_queue(qpair)) + result = VMCI_ERROR_WOULD_BLOCK; + + } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY); + + qp_unlock(qpair); + return result; +} +EXPORT_SYMBOL_GPL(vmci_qpair_peekv); diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.h b/drivers/misc/vmw_vmci/vmci_queue_pair.h new file mode 100644 index 000000000..ed177f04e --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.h @@ -0,0 +1,173 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef _VMCI_QUEUE_PAIR_H_ +#define _VMCI_QUEUE_PAIR_H_ + +#include +#include + +#include "vmci_context.h" + +/* Callback needed for correctly waiting on events. */ +typedef int (*vmci_event_release_cb) (void *client_data); + +/* Guest device port I/O. */ +struct ppn_set { + u64 num_produce_pages; + u64 num_consume_pages; + u32 *produce_ppns; + u32 *consume_ppns; + bool initialized; +}; + +/* VMCIqueue_pairAllocInfo */ +struct vmci_qp_alloc_info { + struct vmci_handle handle; + u32 peer; + u32 flags; + u64 produce_size; + u64 consume_size; + u64 ppn_va; /* Start VA of queue pair PPNs. */ + u64 num_ppns; + s32 result; + u32 version; +}; + +/* VMCIqueue_pairSetVAInfo */ +struct vmci_qp_set_va_info { + struct vmci_handle handle; + u64 va; /* Start VA of queue pair PPNs. */ + u64 num_ppns; + u32 version; + s32 result; +}; + +/* + * For backwards compatibility, here is a version of the + * VMCIqueue_pairPageFileInfo before host support end-points was added. + * Note that the current version of that structure requires VMX to + * pass down the VA of the mapped file. Before host support was added + * there was nothing of the sort. So, when the driver sees the ioctl + * with a parameter that is the sizeof + * VMCIqueue_pairPageFileInfo_NoHostQP then it can infer that the version + * of VMX running can't attach to host end points because it doesn't + * provide the VA of the mapped files. + * + * The Linux driver doesn't get an indication of the size of the + * structure passed down from user space. So, to fix a long standing + * but unfiled bug, the _pad field has been renamed to version. + * Existing versions of VMX always initialize the PageFileInfo + * structure so that _pad, er, version is set to 0. + * + * A version value of 1 indicates that the size of the structure has + * been increased to include two UVA's: produce_uva and consume_uva. + * These UVA's are of the mmap()'d queue contents backing files. + * + * In addition, if when VMX is sending down the + * VMCIqueue_pairPageFileInfo structure it gets an error then it will + * try again with the _NoHostQP version of the file to see if an older + * VMCI kernel module is running. + */ + +/* VMCIqueue_pairPageFileInfo */ +struct vmci_qp_page_file_info { + struct vmci_handle handle; + u64 produce_page_file; /* User VA. */ + u64 consume_page_file; /* User VA. */ + u64 produce_page_file_size; /* Size of the file name array. */ + u64 consume_page_file_size; /* Size of the file name array. */ + s32 result; + u32 version; /* Was _pad. */ + u64 produce_va; /* User VA of the mapped file. */ + u64 consume_va; /* User VA of the mapped file. */ +}; + +/* vmci queuepair detach info */ +struct vmci_qp_dtch_info { + struct vmci_handle handle; + s32 result; + u32 _pad; +}; + +/* + * struct vmci_qp_page_store describes how the memory of a given queue pair + * is backed. When the queue pair is between the host and a guest, the + * page store consists of references to the guest pages. On vmkernel, + * this is a list of PPNs, and on hosted, it is a user VA where the + * queue pair is mapped into the VMX address space. + */ +struct vmci_qp_page_store { + /* Reference to pages backing the queue pair. */ + u64 pages; + /* Length of pageList/virtual addres range (in pages). */ + u32 len; +}; + +/* + * This data type contains the information about a queue. + * There are two queues (hence, queue pairs) per transaction model between a + * pair of end points, A & B. One queue is used by end point A to transmit + * commands and responses to B. The other queue is used by B to transmit + * commands and responses. + * + * struct vmci_queue_kern_if is a per-OS defined Queue structure. It contains + * either a direct pointer to the linear address of the buffer contents or a + * pointer to structures which help the OS locate those data pages. See + * vmciKernelIf.c for each platform for its definition. + */ +struct vmci_queue { + struct vmci_queue_header *q_header; + struct vmci_queue_header *saved_header; + struct vmci_queue_kern_if *kernel_if; +}; + +/* + * Utility function that checks whether the fields of the page + * store contain valid values. + * Result: + * true if the page store is wellformed. false otherwise. + */ +static inline bool +VMCI_QP_PAGESTORE_IS_WELLFORMED(struct vmci_qp_page_store *page_store) +{ + return page_store->len >= 2; +} + +void vmci_qp_broker_exit(void); +int vmci_qp_broker_alloc(struct vmci_handle handle, u32 peer, + u32 flags, u32 priv_flags, + u64 produce_size, u64 consume_size, + struct vmci_qp_page_store *page_store, + struct vmci_ctx *context); +int vmci_qp_broker_set_page_store(struct vmci_handle handle, + u64 produce_uva, u64 consume_uva, + struct vmci_ctx *context); +int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context); + +void vmci_qp_guest_endpoints_exit(void); + +int vmci_qp_alloc(struct vmci_handle *handle, + struct vmci_queue **produce_q, u64 produce_size, + struct vmci_queue **consume_q, u64 consume_size, + u32 peer, u32 flags, u32 priv_flags, + bool guest_endpoint, vmci_event_release_cb wakeup_cb, + void *client_data); +int vmci_qp_broker_map(struct vmci_handle handle, + struct vmci_ctx *context, u64 guest_mem); +int vmci_qp_broker_unmap(struct vmci_handle handle, + struct vmci_ctx *context, u32 gid); + +#endif /* _VMCI_QUEUE_PAIR_H_ */ diff --git a/drivers/misc/vmw_vmci/vmci_resource.c b/drivers/misc/vmw_vmci/vmci_resource.c new file mode 100644 index 000000000..da1ee2e1b --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_resource.c @@ -0,0 +1,229 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include +#include +#include +#include +#include + +#include "vmci_resource.h" +#include "vmci_driver.h" + + +#define VMCI_RESOURCE_HASH_BITS 7 +#define VMCI_RESOURCE_HASH_BUCKETS (1 << VMCI_RESOURCE_HASH_BITS) + +struct vmci_hash_table { + spinlock_t lock; + struct hlist_head entries[VMCI_RESOURCE_HASH_BUCKETS]; +}; + +static struct vmci_hash_table vmci_resource_table = { + .lock = __SPIN_LOCK_UNLOCKED(vmci_resource_table.lock), +}; + +static unsigned int vmci_resource_hash(struct vmci_handle handle) +{ + return hash_32(handle.resource, VMCI_RESOURCE_HASH_BITS); +} + +/* + * Gets a resource (if one exists) matching given handle from the hash table. + */ +static struct vmci_resource *vmci_resource_lookup(struct vmci_handle handle, + enum vmci_resource_type type) +{ + struct vmci_resource *r, *resource = NULL; + unsigned int idx = vmci_resource_hash(handle); + + rcu_read_lock(); + hlist_for_each_entry_rcu(r, + &vmci_resource_table.entries[idx], node) { + u32 cid = r->handle.context; + u32 rid = r->handle.resource; + + if (r->type == type && + rid == handle.resource && + (cid == handle.context || cid == VMCI_INVALID_ID || + handle.context == VMCI_INVALID_ID)) { + resource = r; + break; + } + } + rcu_read_unlock(); + + return resource; +} + +/* + * Find an unused resource ID and return it. The first + * VMCI_RESERVED_RESOURCE_ID_MAX are reserved so we start from + * its value + 1. + * Returns VMCI resource id on success, VMCI_INVALID_ID on failure. + */ +static u32 vmci_resource_find_id(u32 context_id, + enum vmci_resource_type resource_type) +{ + static u32 resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1; + u32 old_rid = resource_id; + u32 current_rid; + + /* + * Generate a unique resource ID. Keep on trying until we wrap around + * in the RID space. + */ + do { + struct vmci_handle handle; + + current_rid = resource_id; + resource_id++; + if (unlikely(resource_id == VMCI_INVALID_ID)) { + /* Skip the reserved rids. */ + resource_id = VMCI_RESERVED_RESOURCE_ID_MAX + 1; + } + + handle = vmci_make_handle(context_id, current_rid); + if (!vmci_resource_lookup(handle, resource_type)) + return current_rid; + } while (resource_id != old_rid); + + return VMCI_INVALID_ID; +} + + +int vmci_resource_add(struct vmci_resource *resource, + enum vmci_resource_type resource_type, + struct vmci_handle handle) + +{ + unsigned int idx; + int result; + + spin_lock(&vmci_resource_table.lock); + + if (handle.resource == VMCI_INVALID_ID) { + handle.resource = vmci_resource_find_id(handle.context, + resource_type); + if (handle.resource == VMCI_INVALID_ID) { + result = VMCI_ERROR_NO_HANDLE; + goto out; + } + } else if (vmci_resource_lookup(handle, resource_type)) { + result = VMCI_ERROR_ALREADY_EXISTS; + goto out; + } + + resource->handle = handle; + resource->type = resource_type; + INIT_HLIST_NODE(&resource->node); + kref_init(&resource->kref); + init_completion(&resource->done); + + idx = vmci_resource_hash(resource->handle); + hlist_add_head_rcu(&resource->node, &vmci_resource_table.entries[idx]); + + result = VMCI_SUCCESS; + +out: + spin_unlock(&vmci_resource_table.lock); + return result; +} + +void vmci_resource_remove(struct vmci_resource *resource) +{ + struct vmci_handle handle = resource->handle; + unsigned int idx = vmci_resource_hash(handle); + struct vmci_resource *r; + + /* Remove resource from hash table. */ + spin_lock(&vmci_resource_table.lock); + + hlist_for_each_entry(r, &vmci_resource_table.entries[idx], node) { + if (vmci_handle_is_equal(r->handle, resource->handle)) { + hlist_del_init_rcu(&r->node); + break; + } + } + + spin_unlock(&vmci_resource_table.lock); + synchronize_rcu(); + + vmci_resource_put(resource); + wait_for_completion(&resource->done); +} + +struct vmci_resource * +vmci_resource_by_handle(struct vmci_handle resource_handle, + enum vmci_resource_type resource_type) +{ + struct vmci_resource *r, *resource = NULL; + + rcu_read_lock(); + + r = vmci_resource_lookup(resource_handle, resource_type); + if (r && + (resource_type == r->type || + resource_type == VMCI_RESOURCE_TYPE_ANY)) { + resource = vmci_resource_get(r); + } + + rcu_read_unlock(); + + return resource; +} + +/* + * Get a reference to given resource. + */ +struct vmci_resource *vmci_resource_get(struct vmci_resource *resource) +{ + kref_get(&resource->kref); + + return resource; +} + +static void vmci_release_resource(struct kref *kref) +{ + struct vmci_resource *resource = + container_of(kref, struct vmci_resource, kref); + + /* Verify the resource has been unlinked from hash table */ + WARN_ON(!hlist_unhashed(&resource->node)); + + /* Signal that container of this resource can now be destroyed */ + complete(&resource->done); +} + +/* + * Resource's release function will get called if last reference. + * If it is the last reference, then we are sure that nobody else + * can increment the count again (it's gone from the resource hash + * table), so there's no need for locking here. + */ +int vmci_resource_put(struct vmci_resource *resource) +{ + /* + * We propagate the information back to caller in case it wants to know + * whether entry was freed. + */ + return kref_put(&resource->kref, vmci_release_resource) ? + VMCI_SUCCESS_ENTRY_DEAD : VMCI_SUCCESS; +} + +struct vmci_handle vmci_resource_handle(struct vmci_resource *resource) +{ + return resource->handle; +} diff --git a/drivers/misc/vmw_vmci/vmci_resource.h b/drivers/misc/vmw_vmci/vmci_resource.h new file mode 100644 index 000000000..9190cd298 --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_resource.h @@ -0,0 +1,59 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef _VMCI_RESOURCE_H_ +#define _VMCI_RESOURCE_H_ + +#include +#include + +#include "vmci_context.h" + + +enum vmci_resource_type { + VMCI_RESOURCE_TYPE_ANY, + VMCI_RESOURCE_TYPE_API, + VMCI_RESOURCE_TYPE_GROUP, + VMCI_RESOURCE_TYPE_DATAGRAM, + VMCI_RESOURCE_TYPE_DOORBELL, + VMCI_RESOURCE_TYPE_QPAIR_GUEST, + VMCI_RESOURCE_TYPE_QPAIR_HOST +}; + +struct vmci_resource { + struct vmci_handle handle; + enum vmci_resource_type type; + struct hlist_node node; + struct kref kref; + struct completion done; +}; + + +int vmci_resource_add(struct vmci_resource *resource, + enum vmci_resource_type resource_type, + struct vmci_handle handle); + +void vmci_resource_remove(struct vmci_resource *resource); + +struct vmci_resource * +vmci_resource_by_handle(struct vmci_handle resource_handle, + enum vmci_resource_type resource_type); + +struct vmci_resource *vmci_resource_get(struct vmci_resource *resource); +int vmci_resource_put(struct vmci_resource *resource); + +struct vmci_handle vmci_resource_handle(struct vmci_resource *resource); + +#endif /* _VMCI_RESOURCE_H_ */ diff --git a/drivers/misc/vmw_vmci/vmci_route.c b/drivers/misc/vmw_vmci/vmci_route.c new file mode 100644 index 000000000..91090658b --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_route.c @@ -0,0 +1,226 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include +#include + +#include "vmci_context.h" +#include "vmci_driver.h" +#include "vmci_route.h" + +/* + * Make a routing decision for the given source and destination handles. + * This will try to determine the route using the handles and the available + * devices. Will set the source context if it is invalid. + */ +int vmci_route(struct vmci_handle *src, + const struct vmci_handle *dst, + bool from_guest, + enum vmci_route *route) +{ + bool has_host_device = vmci_host_code_active(); + bool has_guest_device = vmci_guest_code_active(); + + *route = VMCI_ROUTE_NONE; + + /* + * "from_guest" is only ever set to true by + * IOCTL_VMCI_DATAGRAM_SEND (or by the vmkernel equivalent), + * which comes from the VMX, so we know it is coming from a + * guest. + * + * To avoid inconsistencies, test these once. We will test + * them again when we do the actual send to ensure that we do + * not touch a non-existent device. + */ + + /* Must have a valid destination context. */ + if (VMCI_INVALID_ID == dst->context) + return VMCI_ERROR_INVALID_ARGS; + + /* Anywhere to hypervisor. */ + if (VMCI_HYPERVISOR_CONTEXT_ID == dst->context) { + + /* + * If this message already came from a guest then we + * cannot send it to the hypervisor. It must come + * from a local client. + */ + if (from_guest) + return VMCI_ERROR_DST_UNREACHABLE; + + /* + * We must be acting as a guest in order to send to + * the hypervisor. + */ + if (!has_guest_device) + return VMCI_ERROR_DEVICE_NOT_FOUND; + + /* And we cannot send if the source is the host context. */ + if (VMCI_HOST_CONTEXT_ID == src->context) + return VMCI_ERROR_INVALID_ARGS; + + /* + * If the client passed the ANON source handle then + * respect it (both context and resource are invalid). + * However, if they passed only an invalid context, + * then they probably mean ANY, in which case we + * should set the real context here before passing it + * down. + */ + if (VMCI_INVALID_ID == src->context && + VMCI_INVALID_ID != src->resource) + src->context = vmci_get_context_id(); + + /* Send from local client down to the hypervisor. */ + *route = VMCI_ROUTE_AS_GUEST; + return VMCI_SUCCESS; + } + + /* Anywhere to local client on host. */ + if (VMCI_HOST_CONTEXT_ID == dst->context) { + /* + * If it is not from a guest but we are acting as a + * guest, then we need to send it down to the host. + * Note that if we are also acting as a host then this + * will prevent us from sending from local client to + * local client, but we accept that restriction as a + * way to remove any ambiguity from the host context. + */ + if (src->context == VMCI_HYPERVISOR_CONTEXT_ID) { + /* + * If the hypervisor is the source, this is + * host local communication. The hypervisor + * may send vmci event datagrams to the host + * itself, but it will never send datagrams to + * an "outer host" through the guest device. + */ + + if (has_host_device) { + *route = VMCI_ROUTE_AS_HOST; + return VMCI_SUCCESS; + } else { + return VMCI_ERROR_DEVICE_NOT_FOUND; + } + } + + if (!from_guest && has_guest_device) { + /* If no source context then use the current. */ + if (VMCI_INVALID_ID == src->context) + src->context = vmci_get_context_id(); + + /* Send it from local client down to the host. */ + *route = VMCI_ROUTE_AS_GUEST; + return VMCI_SUCCESS; + } + + /* + * Otherwise we already received it from a guest and + * it is destined for a local client on this host, or + * it is from another local client on this host. We + * must be acting as a host to service it. + */ + if (!has_host_device) + return VMCI_ERROR_DEVICE_NOT_FOUND; + + if (VMCI_INVALID_ID == src->context) { + /* + * If it came from a guest then it must have a + * valid context. Otherwise we can use the + * host context. + */ + if (from_guest) + return VMCI_ERROR_INVALID_ARGS; + + src->context = VMCI_HOST_CONTEXT_ID; + } + + /* Route to local client. */ + *route = VMCI_ROUTE_AS_HOST; + return VMCI_SUCCESS; + } + + /* + * If we are acting as a host then this might be destined for + * a guest. + */ + if (has_host_device) { + /* It will have a context if it is meant for a guest. */ + if (vmci_ctx_exists(dst->context)) { + if (VMCI_INVALID_ID == src->context) { + /* + * If it came from a guest then it + * must have a valid context. + * Otherwise we can use the host + * context. + */ + + if (from_guest) + return VMCI_ERROR_INVALID_ARGS; + + src->context = VMCI_HOST_CONTEXT_ID; + } else if (VMCI_CONTEXT_IS_VM(src->context) && + src->context != dst->context) { + /* + * VM to VM communication is not + * allowed. Since we catch all + * communication destined for the host + * above, this must be destined for a + * VM since there is a valid context. + */ + + return VMCI_ERROR_DST_UNREACHABLE; + } + + /* Pass it up to the guest. */ + *route = VMCI_ROUTE_AS_HOST; + return VMCI_SUCCESS; + } else if (!has_guest_device) { + /* + * The host is attempting to reach a CID + * without an active context, and we can't + * send it down, since we have no guest + * device. + */ + + return VMCI_ERROR_DST_UNREACHABLE; + } + } + + /* + * We must be a guest trying to send to another guest, which means + * we need to send it down to the host. We do not filter out VM to + * VM communication here, since we want to be able to use the guest + * driver on older versions that do support VM to VM communication. + */ + if (!has_guest_device) { + /* + * Ending up here means we have neither guest nor host + * device. + */ + return VMCI_ERROR_DEVICE_NOT_FOUND; + } + + /* If no source context then use the current context. */ + if (VMCI_INVALID_ID == src->context) + src->context = vmci_get_context_id(); + + /* + * Send it from local client down to the host, which will + * route it to the other guest for us. + */ + *route = VMCI_ROUTE_AS_GUEST; + return VMCI_SUCCESS; +} diff --git a/drivers/misc/vmw_vmci/vmci_route.h b/drivers/misc/vmw_vmci/vmci_route.h new file mode 100644 index 000000000..3b30e8241 --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_route.h @@ -0,0 +1,30 @@ +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#ifndef _VMCI_ROUTE_H_ +#define _VMCI_ROUTE_H_ + +#include + +enum vmci_route { + VMCI_ROUTE_NONE, + VMCI_ROUTE_AS_HOST, + VMCI_ROUTE_AS_GUEST, +}; + +int vmci_route(struct vmci_handle *src, const struct vmci_handle *dst, + bool from_guest, enum vmci_route *route); + +#endif /* _VMCI_ROUTE_H_ */ -- cgit v1.2.3